// // aegis - project change supervisor // Copyright (C) 2004-2008, 2012 Peter Miller // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 3 of the License, or (at // your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // General Public License for more details. // // You should have received a copy of the GNU General Public License // along with this program. If not, see . // #include #include #include #include #include #include struct table_t { const char *name; const char *value; }; // // The object of this table is not to be an accurate translation, but to // simply make it readable. Most of the time, non-printing characters // will be transmitted as &#DDD; escapes, and so will unpack at this end // correctly. This table is simply for foreign text not produced by // Aegis. // // The table is sorted via "LANG=en sort -u" because we are going to // binary chop across it for efficiency. // static const table_t table[] = { { "AElig", "AE" }, { "Aacute", "A" }, { "Acirc", "A" }, { "Agrave", "A" }, { "Aring", "A" }, { "Atilde", "A" }, { "Auml", "AE" }, { "Ccedil", "C" }, { "Eacute", "E" }, { "Ecirc", "E" }, { "Egrave", "E" }, { "Euml", "EE" }, { "Iacute", "I" }, { "Icirc", "I" }, { "Igrave", "I" }, { "Iuml", "IE" }, { "Ntilde", "N" }, { "OElig", "OE" }, { "Oacute", "O" }, { "Ocirc", "O" }, { "Ograve", "O" }, { "Oslash", "O" }, { "Otilde", "O" }, { "Ouml", "OE" }, { "Scaron", "S" }, { "Uacute", "U" }, { "Ucirc", "U" }, { "Ugrave", "U" }, { "Uuml", "UE" }, { "Yacute", "Y" }, { "Yuml", "YE" }, { "aacute", "a" }, { "acirc", "a" }, { "acute", "a" }, { "aelig", "ae" }, { "agrave", "a" }, { "amp", "&" }, { "aring", "a" }, { "atilde", "a" }, { "auml", "ae" }, { "bdquo", ",," }, { "brvbar", "|" }, { "ccedil", "c" }, { "cedil", "," }, { "cent", "c" }, { "circ", "^" }, { "copy", "(C)" }, { "curren", "$" }, { "deg", "o" }, { "divide", "/" }, { "eacute", "e" }, { "ecirc", "e" }, { "egrave", "e" }, { "emsp", " " }, { "ensp", " " }, { "euml", "ee" }, { "frac12", " 1/2 " }, { "frac14", " 1/4 " }, { "frac34", " 3/4 " }, { "gt", ">" }, { "iacute", "i" }, { "icirc", "i" }, { "iexcl", "!" }, { "igrave", "i" }, { "iquest", "?" }, { "iuml", "ie" }, { "laquo", "<" }, { "ldquo", "\"" }, { "lsaquo", "<" }, { "lsquo", "'" }, { "lt", "<" }, { "mdash", "-" }, { "micro", "u" }, { "middot", "." }, { "nbsp", " " }, { "ndash", "-" }, { "ntilde", "n" }, { "oacute", "o" }, { "ocirc", "o" }, { "oelig", "oe" }, { "ograve", "o" }, { "oslash", "o" }, { "otilde", "o" }, { "ouml", "oe" }, { "permil", "o/oo" }, { "plusmn", "+/-" }, { "pound", "L" }, { "quot", "'" }, { "quot", "\"" }, { "raquo", ">" }, { "rdquo", "\"" }, { "reg", "(R)" }, { "rsaquo", ">" }, { "rsquo", "'" }, { "sbquo", "," }, { "scaron", "s" }, { "sup1", "1" }, { "sup2", "2" }, { "sup3", "3" }, { "szlig", "ss" }, { "thinsp", " " }, { "tilde", "~" }, { "times", "x" }, { "uacute", "u" }, { "ucirc", "u" }, { "ugrave", "u" }, { "uuml", "ue" }, { "yacute", "y" }, { "yuml", "ye" }, { "zwj", "" }, { "zwnj", "" }, }; static int compare(const void *vkey, const void *velem) { const char *key = (const char *)vkey; const table_t *elem = (const table_t *)velem; return strcmp(key, elem->name); } nstring nstring::html_unquote() const { static nstring_accumulator name; static nstring_accumulator output; output.clear(); const char *sp = c_str(); for (;;) { unsigned char c = *sp++; if (!c) break; if (c != '&') { output.push_back(c); continue; } c = *sp++; if (!c) { output.push_back('&'); break; } name.clear(); if (c == '#') { name.push_back(c); int n = 0; for (;;) { c = *sp++; if (!isdigit(c)) break; name.push_back(c); n = n * 10 + c - '0'; } if (c != ';' || name.size() < 2) { --sp; output.push_back('&'); output.push_back(name); } else { if (n > 0 && n < 256) { output.push_back((char)n); } else { output.push_back('&'); output.push_back(name); output.push_back(';'); } } } else { for (;;) { if (!isalnum(c)) break; name.push_back(c); c = *sp++; } if (c != ';' || name.empty()) { --sp; output.push_back('&'); output.push_back(name); } else { name.push_back('\0'); // terminating NUL character table_t *tp = (table_t *) bsearch ( name.get_data(), table, SIZEOF(table), sizeof(table[0]), compare ); if (tp) { output.push_back(tp->value); } else { name.pop_back(); // get rid of the NUL character output.push_back('&'); output.push_back(name); output.push_back(';'); } } } } return output.mkstr(); } // vim: set ts=8 sw=4 et :