//
// aegis - project change supervisor
// Copyright (C) 2014 Peter Miller
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 3 of the License, or (at
// your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// General Public License for more details.
//
// You should have received a copy of the GNU General Public License along
// with this program. If not, see .
//
#include
bool
nstring::looks_like_utf8(void)
const
{
const char *cp = c_str();
const char *end = cp + size();
unsigned octets_remaining = 0;
while (cp < end)
{
unsigned char c = *cp++;
if (octets_remaining == 0)
{
if ((c & 0x80) == 0)
{
// make sure we like the character
switch (c)
{
case '\b':
case '\t':
case '\n':
case '\v':
case '\f':
case '\r':
case ' ':
case '!':
case '"':
case '#':
case '$':
case '%':
case '&':
case '\'':
case '(':
case ')':
case '*':
case '+':
case ',':
case '-':
case '.':
case '/':
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
case ':':
case ';':
case '<':
case '=':
case '>':
case '?':
case '@':
case 'A':
case 'B':
case 'C':
case 'D':
case 'E':
case 'F':
case 'G':
case 'H':
case 'I':
case 'J':
case 'K':
case 'L':
case 'M':
case 'N':
case 'O':
case 'P':
case 'Q':
case 'R':
case 'S':
case 'T':
case 'U':
case 'V':
case 'W':
case 'X':
case 'Y':
case 'Z':
case '[':
case '\\':
case ']':
case '^':
case '_':
case '`':
case 'a':
case 'b':
case 'c':
case 'd':
case 'e':
case 'f':
case 'g':
case 'h':
case 'i':
case 'j':
case 'k':
case 'l':
case 'm':
case 'n':
case 'o':
case 'p':
case 'q':
case 'r':
case 's':
case 't':
case 'u':
case 'v':
case 'w':
case 'x':
case 'y':
case 'z':
case '{':
case '|':
case '}':
case '~':
break;
default:
// not a valid utf-8 encoding
not_a_valid_encoding:
return false;
}
continue;
}
if ((c & 0xC0) == 0x80)
{
// unexpected continuation byte
goto not_a_valid_encoding;
}
if ((c & 0xE0) == 0xC0)
{
octets_remaining = 1;
continue;
}
if ((c & 0xF0) == 0xE0)
{
octets_remaining = 2;
continue;
}
if ((c & 0xF8) == 0xF0)
{
octets_remaining = 3;
continue;
}
}
else
{
if ((c & 0xC0) == 0x80)
{
// continuation byte
--octets_remaining;
continue;
}
}
goto not_a_valid_encoding;
}
return true;
}
// vim: set ts=8 sw=4 et :