// // aegis - project change supervisor // Copyright (C) 2014 Peter Miller // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 3 of the License, or (at // your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // General Public License for more details. // // You should have received a copy of the GNU General Public License along // with this program. If not, see . // #include #include #include #include #include // // For reasons I don't understand, the MB_CUR_MAX symbol (wich is // defined as a reference to __mb_cur_max) does not get resolved // at link time, despite being present in libc. It's easier to // just dodge the question. // #ifdef __CYGWIN__ #undef MB_CUR_MAX #define MB_CUR_MAX 8 #endif void wcs_to_mbs(const wchar_t *text, size_t text_size, char **result_p, size_t *result_size_p) { trace(("%s\n", __PRETTY_FUNCTION__)); static char *buf; static size_t bufmax; // // Do the conversion "long hand". This is because the wcstombs // function barfs when it sees an invalid wchar_t. This // function treats them literally and keeps going. // size_t buflen = (text_size + 1) * MB_CUR_MAX; if (buflen < text_size + 1) { // // There are some wonderfully brain-dead implementations // out there. The more stupid ones manage to make // MB_CUR_MAX be zero! // buflen = text_size + 1; } if (buflen > bufmax) { for (;;) { bufmax = bufmax * 2 + 8; if (buflen <= bufmax) break; } delete [] buf; buf = new char [bufmax]; } // // perform the conversion in the native language default // language_human(); // // The wctomb function has internal state. It needs to be reset. // wctomb((char *)0, 0); const wchar_t *ip = text; size_t remainder = text_size; char *op = buf; while (remainder > 0) { int n = wctomb(op, *ip); if (n <= 0) { // // Copy the character literally. // Throw away anything that will not fit. // *op++ = *ip++; if (!op[-1]) op[-1] = '?'; --remainder; // // The wctomb function's internal state will now // be "error" or broken, or otherwise useless. // Reset it so that we can keep going. // wctomb((char *)0, 0); } else { op += n; ++ip; --remainder; } } // // The final NUL could require shift state end characters, // meaning that n could be more than 1. // int n = wctomb(op, (wchar_t)0); if (n <= 0) *op = 0; else { op += n - 1; assert(*op == 0); } // // restore the locale to the C locale // language_C(); // // set the output side effects // *result_p = buf; *result_size_p = op - buf; } void mbs_to_wcs(const char *text, size_t text_size, wchar_t **result, size_t *result_size) { trace(("%s\n", __PRETTY_FUNCTION__)); static wchar_t *buf; static size_t bufmax; assert(!text == !text_size); if (!text || !text_size) { *result = 0; *result_size = 0; return; } // // Make sure we have enough room in the output buffer. // if (bufmax < text_size) { for (;;) { bufmax = bufmax * 2 + 8; if (text_size <= bufmax) break; } delete [] buf; buf = new wchar_t [bufmax]; } // // change the locale to the native language default // language_human(); // // Reset the mbtowc internal state. // mbtowc((wchar_t *)0, (char *)0, 0); // // scan the string and extract the wide characters // const char *ip = text; wchar_t *op = buf; size_t remainder = text_size; while (remainder > 0) { int n = mbtowc(op, ip, remainder); if (n == 0) break; if (n < 0) { *op++ = *ip++; --remainder; // // The mbtowc function's internal state will now be "error" // or broken, or otherwise useless. // Reset it so that we can keep going. // mbtowc((wchar_t *)0, (char *)0, 0); } else { // // the one wchar_t used n bytes of input // ip += n; remainder -= n; ++op; } } // // change the locale back to the C locale // language_C(); // // set the output side effects // *result = buf; *result_size = op - buf; } // vim: set ts=8 sw=4 et :