Delete unused source files for 1.98d.

[ffftp/ffftp.git] / putty / CHARSET / UTF8.C
diff --git a/putty/CHARSET/UTF8.C b/putty/CHARSET/UTF8.C

deleted file mode 100644 (file)

index 489ffa2..0000000
--- a/putty/CHARSET/UTF8.C
+++ /dev/null
@@ -1,882 +0,0 @@
-/*\r
- * utf8.c - routines to handle UTF-8.\r
- */\r
-\r
-#ifndef ENUM_CHARSETS\r
-\r
-#include "charset.h"\r
-#include "internal.h"\r
-\r
-void read_utf8(charset_spec const *, long int, charset_state *,\r
-              void (*)(void *, long int), void *);\r
-void write_utf8(charset_spec const *, long int,\r
-               charset_state *, void (*)(void *, long int), void *);\r
-\r
-/*\r
- * UTF-8 has no associated data, so `charset' may be ignored.\r
- */\r
-\r
-void read_utf8(charset_spec const *charset, long int input_chr,\r
-              charset_state *state,\r
-              void (*emit)(void *ctx, long int output), void *emitctx)\r
-{\r
-    UNUSEDARG(charset);\r
-\r
-    /*\r
-     * For reading UTF-8, the `state' word contains:\r
-     * \r
-     *  - in bits 29-31, the number of bytes expected to be in the\r
-     *    current multibyte character (which we can tell instantly\r
-     *    from the first byte, of course).\r
-     * \r
-     *  - in bits 26-28, the number of bytes _seen so far_ in the\r
-     *    current multibyte character.\r
-     * \r
-     *  - in the remainder of the word, the current value of the\r
-     *    character, which is shifted upwards by 6 bits to\r
-     *    accommodate each new byte.\r
-     * \r
-     * As required, the state is zero when we are not in the middle\r
-     * of a multibyte character at all.\r
-     * \r
-     * For example, when reading E9 8D 8B, starting at state=0:\r
-     * \r
-     *  - after E9, the state is 0x64000009\r
-     *  - after 8D, the state is 0x6800024d\r
-     *  - after 8B, the state conceptually becomes 0x6c00934b, at\r
-     *    which point we notice we've got as many characters as we\r
-     *    were expecting, output U+934B, and reset the state to\r
-     *    zero.\r
-     *\r
-     * Note that the maximum number of bits we might need to store\r
-     * in the character value field is 25 (U+7FFFFFFF contains 31\r
-     * bits, but we will never actually store its full value\r
-     * because when we receive the last 6 bits in the final\r
-     * continuation byte we will output it and revert the state to\r
-     * zero). Hence the character value field never collides with\r
-     * the byte counts.\r
-     */\r
-\r
-    if (input_chr < 0x80) {\r
-       /*\r
-        * Single-byte character. If the state is nonzero before\r
-        * coming here, output an error for an incomplete sequence.\r
-        * Then output the character.\r
-        */\r
-       if (state->s0 != 0) {\r
-           emit(emitctx, ERROR);\r
-           state->s0 = 0;\r
-       }\r
-       emit(emitctx, input_chr);\r
-    } else if (input_chr == 0xFE || input_chr == 0xFF) {\r
-       /*\r
-        * FE and FF bytes should _never_ occur in UTF-8. They are\r
-        * automatic errors; if the state was nonzero to start\r
-        * with, output a further error for an incomplete sequence.\r
-        */\r
-       if (state->s0 != 0) {\r
-           emit(emitctx, ERROR);\r
-           state->s0 = 0;\r
-       }\r
-       emit(emitctx, ERROR);\r
-    } else if (input_chr >= 0x80 && input_chr < 0xC0) {\r
-       /*\r
-        * Continuation byte. Output an error for an unexpected\r
-        * continuation byte, if the state is zero.\r
-        */\r
-       if (state->s0 == 0) {\r
-           emit(emitctx, ERROR);\r
-       } else {\r
-           unsigned long charval;\r
-           unsigned long topstuff;\r
-           int bytes;\r
-\r
-           /*\r
-            * Otherwise, accumulate more of the character value.\r
-            */\r
-           charval = state->s0 & 0x03ffffffL;\r
-           charval = (charval << 6) | (input_chr & 0x3F);\r
-\r
-           /*\r
-            * Check the byte counts; if we have not reached the\r
-            * end of the character, update the state and return.\r
-            */\r
-           topstuff = state->s0 & 0xfc000000L;\r
-           topstuff += 0x04000000L;   /* add one to the byte count */\r
-           if (((topstuff << 3) ^ topstuff) & 0xe0000000L) {\r
-               state->s0 = topstuff | charval;\r
-               return;\r
-           }\r
-\r
-           /*\r
-            * Now we know we've reached the end of the character.\r
-            * `charval' is the Unicode value. We should check for\r
-            * various invalid things, and then either output\r
-            * charval or an error. In all cases we reset the state\r
-            * to zero.\r
-            */\r
-           bytes = topstuff >> 29;\r
-           state->s0 = 0;\r
-\r
-           if (charval >= 0xD800 && charval < 0xE000) {\r
-               /*\r
-                * Surrogates (0xD800-0xDFFF) may never be encoded\r
-                * in UTF-8. A surrogate pair in Unicode should\r
-                * have been encoded as a single UTF-8 character\r
-                * occupying more than three bytes.\r
-                */\r
-               emit(emitctx, ERROR);\r
-           } else if (charval == 0xFFFE || charval == 0xFFFF) {\r
-               /*\r
-                * U+FFFE and U+FFFF are invalid Unicode characters\r
-                * and may never be encoded in UTF-8. (This is one\r
-                * reason why U+FFFF is our way of signalling an\r
-                * error to our `emit' function :-)\r
-                */\r
-               emit(emitctx, ERROR);\r
-           } else if ((charval <= 0x7FL /* && bytes > 1 */) ||\r
-                      (charval <= 0x7FFL && bytes > 2) ||\r
-                      (charval <= 0xFFFFL && bytes > 3) ||\r
-                      (charval <= 0x1FFFFFL && bytes > 4) ||\r
-                      (charval <= 0x3FFFFFFL && bytes > 5)) {\r
-               /*\r
-                * Overlong sequences are not to be tolerated,\r
-                * under any circumstances.\r
-                */\r
-               emit(emitctx, ERROR);\r
-           } else {\r
-               /*\r
-                * Oh, all right. We'll let this one off.\r
-                */\r
-               emit(emitctx, charval);\r
-           }\r
-       }\r
-\r
-    } else {\r
-       /*\r
-        * Lead byte. First output an error for an incomplete\r
-        * sequence, if the state is nonzero.\r
-        */\r
-       if (state->s0 != 0)\r
-           emit(emitctx, ERROR);\r
-\r
-       /*\r
-        * Now deal with the lead byte: work out the number of\r
-        * bytes we expect to see in this character, and extract\r
-        * the initial bits of it too.\r
-        */\r
-       if (input_chr >= 0xC0 && input_chr < 0xE0) {\r
-           state->s0 = 0x44000000L | (input_chr & 0x1F);\r
-       } else if (input_chr >= 0xE0 && input_chr < 0xF0) {\r
-           state->s0 = 0x64000000L | (input_chr & 0x0F);\r
-       } else if (input_chr >= 0xF0 && input_chr < 0xF8) {\r
-           state->s0 = 0x84000000L | (input_chr & 0x07);\r
-       } else if (input_chr >= 0xF8 && input_chr < 0xFC) {\r
-           state->s0 = 0xa4000000L | (input_chr & 0x03);\r
-       } else if (input_chr >= 0xFC && input_chr < 0xFE) {\r
-           state->s0 = 0xc4000000L | (input_chr & 0x01);\r
-       }\r
-    }\r
-}\r
-\r
-/*\r
- * UTF-8 is a stateless multi-byte encoding (in the sense that just\r
- * after any character has been completed, the state is always the\r
- * same); hence when writing it, there is no need to use the\r
- * charset_state.\r
- */\r
-\r
-void write_utf8(charset_spec const *charset, long int input_chr,\r
-               charset_state *state,\r
-               void (*emit)(void *ctx, long int output), void *emitctx)\r
-{\r
-    UNUSEDARG(charset);\r
-    UNUSEDARG(state);\r
-\r
-    /*\r
-     * Refuse to output any illegal code points.\r
-     */\r
-    if (input_chr == 0xFFFE || input_chr == 0xFFFF ||\r
-       (input_chr >= 0xD800 && input_chr < 0xE000)) {\r
-       emit(emitctx, ERROR);\r
-    } else if (input_chr < 0x80) {     /* one-byte character */\r
-       emit(emitctx, input_chr);\r
-    } else if (input_chr < 0x800) {    /* two-byte character */\r
-       emit(emitctx, 0xC0 | (0x1F & (input_chr >>  6)));\r
-       emit(emitctx, 0x80 | (0x3F & (input_chr      )));\r
-    } else if (input_chr < 0x10000) {  /* three-byte character */\r
-       emit(emitctx, 0xE0 | (0x0F & (input_chr >> 12)));\r
-       emit(emitctx, 0x80 | (0x3F & (input_chr >>  6)));\r
-       emit(emitctx, 0x80 | (0x3F & (input_chr      )));\r
-    } else if (input_chr < 0x200000) { /* four-byte character */\r
-       emit(emitctx, 0xF0 | (0x07 & (input_chr >> 18)));\r
-       emit(emitctx, 0x80 | (0x3F & (input_chr >> 12)));\r
-       emit(emitctx, 0x80 | (0x3F & (input_chr >>  6)));\r
-       emit(emitctx, 0x80 | (0x3F & (input_chr      )));\r
-    } else if (input_chr < 0x4000000) {/* five-byte character */\r
-       emit(emitctx, 0xF8 | (0x03 & (input_chr >> 24)));\r
-       emit(emitctx, 0x80 | (0x3F & (input_chr >> 18)));\r
-       emit(emitctx, 0x80 | (0x3F & (input_chr >> 12)));\r
-       emit(emitctx, 0x80 | (0x3F & (input_chr >>  6)));\r
-       emit(emitctx, 0x80 | (0x3F & (input_chr      )));\r
-    } else {                          /* six-byte character */\r
-       emit(emitctx, 0xFC | (0x01 & (input_chr >> 30)));\r
-       emit(emitctx, 0x80 | (0x3F & (input_chr >> 24)));\r
-       emit(emitctx, 0x80 | (0x3F & (input_chr >> 18)));\r
-       emit(emitctx, 0x80 | (0x3F & (input_chr >> 12)));\r
-       emit(emitctx, 0x80 | (0x3F & (input_chr >>  6)));\r
-       emit(emitctx, 0x80 | (0x3F & (input_chr      )));\r
-    }\r
-}\r
-\r
-#ifdef TESTMODE\r
-\r
-#include <stdio.h>\r
-#include <stdarg.h>\r
-\r
-int total_errs = 0;\r
-\r
-void utf8_emit(void *ctx, long output)\r
-{\r
-    wchar_t **p = (wchar_t **)ctx;\r
-    *(*p)++ = output;\r
-}\r
-\r
-void utf8_read_test(int line, char *input, int inlen, ...)\r
-{\r
-    va_list ap;\r
-    wchar_t *p, str[512];\r
-    int i;\r
-    charset_state state;\r
-    unsigned long l;\r
-\r
-    state.s0 = 0;\r
-    p = str;\r
-\r
-    for (i = 0; i < inlen; i++)\r
-       read_utf8(NULL, input[i] & 0xFF, &state, utf8_emit, &p);\r
-\r
-    va_start(ap, inlen);\r
-    l = 0;\r
-    for (i = 0; i < p - str; i++) {\r
-       l = va_arg(ap, long int);\r
-       if (l == -1) {\r
-           printf("%d: correct string shorter than output\n", line);\r
-           total_errs++;\r
-           break;\r
-       }\r
-       if (l != str[i]) {\r
-           printf("%d: char %d came out as %08x, should be %08x\n",\r
-                   line, i, str[i], l);\r
-           total_errs++;\r
-       }\r
-    }\r
-    if (l != -1) {\r
-       l = va_arg(ap, long int);\r
-       if (l != -1) {\r
-           printf("%d: correct string longer than output\n", line);\r
-           total_errs++;\r
-       }\r
-    }\r
-    va_end(ap);\r
-}\r
-\r
-void utf8_write_test(int line, const long *input, int inlen, ...)\r
-{\r
-    va_list ap;\r
-    wchar_t *p, str[512];\r
-    int i;\r
-    charset_state state;\r
-    unsigned long l;\r
-\r
-    state.s0 = 0;\r
-    p = str;\r
-\r
-    for (i = 0; i < inlen; i++)\r
-       write_utf8(NULL, input[i], &state, utf8_emit, &p);\r
-\r
-    va_start(ap, inlen);\r
-    l = 0;\r
-    for (i = 0; i < p - str; i++) {\r
-       l = va_arg(ap, long int);\r
-       if (l == -1) {\r
-           printf("%d: correct string shorter than output\n", line);\r
-           total_errs++;\r
-           break;\r
-       }\r
-       if (l != str[i]) {\r
-           printf("%d: char %d came out as %08x, should be %08x\n",\r
-                   line, i, str[i], l);\r
-           total_errs++;\r
-       }\r
-    }\r
-    if (l != -1) {\r
-       l = va_arg(ap, long int);\r
-       if (l != -1) {\r
-           printf("%d: correct string longer than output\n", line);\r
-           total_errs++;\r
-       }\r
-    }\r
-    va_end(ap);\r
-}\r
-\r
-/* Macro to concoct the first three parameters of utf8_read_test. */\r
-#define TESTSTR(x) __LINE__, x, lenof(x)\r
-\r
-int main(void)\r
-{\r
-    printf("read tests beginning\n");\r
-    utf8_read_test(TESTSTR("\xCE\xBA\xE1\xBD\xB9\xCF\x83\xCE\xBC\xCE\xB5"),\r
-                  0x000003BA, /* GREEK SMALL LETTER KAPPA */\r
-                  0x00001F79, /* GREEK SMALL LETTER OMICRON WITH OXIA */\r
-                  0x000003C3, /* GREEK SMALL LETTER SIGMA */\r
-                  0x000003BC, /* GREEK SMALL LETTER MU */\r
-                  0x000003B5, /* GREEK SMALL LETTER EPSILON */\r
-                  0, -1);\r
-    utf8_read_test(TESTSTR("\x00"),\r
-                  0x00000000, /* <control> */\r
-                  0, -1);\r
-    utf8_read_test(TESTSTR("\xC2\x80"),\r
-                  0x00000080, /* <control> */\r
-                  0, -1);\r
-    utf8_read_test(TESTSTR("\xE0\xA0\x80"),\r
-                  0x00000800, /* <no name available> */\r
-                  0, -1);\r
-    utf8_read_test(TESTSTR("\xF0\x90\x80\x80"),\r
-                  0x00010000, /* <no name available> */\r
-                  0, -1);\r
-    utf8_read_test(TESTSTR("\xF8\x88\x80\x80\x80"),\r
-                  0x00200000, /* <no name available> */\r
-                  0, -1);\r
-    utf8_read_test(TESTSTR("\xFC\x84\x80\x80\x80\x80"),\r
-                  0x04000000, /* <no name available> */\r
-                  0, -1);\r
-    utf8_read_test(TESTSTR("\x7F"),\r
-                  0x0000007F, /* <control> */\r
-                  0, -1);\r
-    utf8_read_test(TESTSTR("\xDF\xBF"),\r
-                  0x000007FF, /* <no name available> */\r
-                  0, -1);\r
-    utf8_read_test(TESTSTR("\xEF\xBF\xBD"),\r
-                  0x0000FFFD, /* REPLACEMENT CHARACTER */\r
-                  0, -1);\r
-    utf8_read_test(TESTSTR("\xEF\xBF\xBF"),\r
-                  ERROR,      /* <no name available> (invalid char) */\r
-                  0, -1);\r
-    utf8_read_test(TESTSTR("\xF7\xBF\xBF\xBF"),\r
-                  0x001FFFFF, /* <no name available> */\r
-                  0, -1);\r
-    utf8_read_test(TESTSTR("\xFB\xBF\xBF\xBF\xBF"),\r
-                  0x03FFFFFF, /* <no name available> */\r
-                  0, -1);\r
-    utf8_read_test(TESTSTR("\xFD\xBF\xBF\xBF\xBF\xBF"),\r
-                  0x7FFFFFFF, /* <no name available> */\r
-                  0, -1);\r
-    utf8_read_test(TESTSTR("\xED\x9F\xBF"),\r
-                  0x0000D7FF, /* <no name available> */\r
-                  0, -1);\r
-    utf8_read_test(TESTSTR("\xEE\x80\x80"),\r
-                  0x0000E000, /* <Private Use, First> */\r
-                  0, -1);\r
-    utf8_read_test(TESTSTR("\xEF\xBF\xBD"),\r
-                  0x0000FFFD, /* REPLACEMENT CHARACTER */\r
-                  0, -1);\r
-    utf8_read_test(TESTSTR("\xF4\x8F\xBF\xBF"),\r
-                  0x0010FFFF, /* <no name available> */\r
-                  0, -1);\r
-    utf8_read_test(TESTSTR("\xF4\x90\x80\x80"),\r
-                  0x00110000, /* <no name available> */\r
-                  0, -1);\r
-    utf8_read_test(TESTSTR("\x80"),\r
-                  ERROR,      /* (unexpected continuation byte) */\r
-                  0, -1);\r
-    utf8_read_test(TESTSTR("\xBF"),\r
-                  ERROR,      /* (unexpected continuation byte) */\r
-                  0, -1);\r
-    utf8_read_test(TESTSTR("\x80\xBF"),\r
-                  ERROR,      /* (unexpected continuation byte) */\r
-                  ERROR,      /* (unexpected continuation byte) */\r
-                  0, -1);\r
-    utf8_read_test(TESTSTR("\x80\xBF\x80"),\r
-                  ERROR,      /* (unexpected continuation byte) */\r
-                  ERROR,      /* (unexpected continuation byte) */\r
-                  ERROR,      /* (unexpected continuation byte) */\r
-                  0, -1);\r
-    utf8_read_test(TESTSTR("\x80\xBF\x80\xBF"),\r
-                  ERROR,      /* (unexpected continuation byte) */\r
-                  ERROR,      /* (unexpected continuation byte) */\r
-                  ERROR,      /* (unexpected continuation byte) */\r
-                  ERROR,      /* (unexpected continuation byte) */\r
-                  0, -1);\r
-    utf8_read_test(TESTSTR("\x80\xBF\x80\xBF\x80"),\r
-                  ERROR,      /* (unexpected continuation byte) */\r
-                  ERROR,      /* (unexpected continuation byte) */\r
-                  ERROR,      /* (unexpected continuation byte) */\r
-                  ERROR,      /* (unexpected continuation byte) */\r
-                  ERROR,      /* (unexpected continuation byte) */\r
-                  0, -1);\r
-    utf8_read_test(TESTSTR("\x80\xBF\x80\xBF\x80\xBF"),\r
-                  ERROR,      /* (unexpected continuation byte) */\r
-                  ERROR,      /* (unexpected continuation byte) */\r
-                  ERROR,      /* (unexpected continuation byte) */\r
-                  ERROR,      /* (unexpected continuation byte) */\r
-                  ERROR,      /* (unexpected continuation byte) */\r
-                  ERROR,      /* (unexpected continuation byte) */\r
-                  0, -1);\r
-    utf8_read_test(TESTSTR("\x80\xBF\x80\xBF\x80\xBF\x80"),\r
-                  ERROR,      /* (unexpected continuation byte) */\r
-                  ERROR,      /* (unexpected continuation byte) */\r
-                  ERROR,      /* (unexpected continuation byte) */\r
-                  ERROR,      /* (unexpected continuation byte) */\r
-                  ERROR,      /* (unexpected continuation byte) */\r
-                  ERROR,      /* (unexpected continuation byte) */\r
-                  ERROR,      /* (unexpected continuation byte) */\r
-                  0, -1);\r
-    utf8_read_test(TESTSTR("\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8A\x8B\x8C\x8D\x8E\x8F\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9A\x9B\x9C\x9D\x9E\x9F\xA0\xA1\xA2\xA3\xA4\xA5\xA6\xA7\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF"),\r
-                  ERROR,      /* (unexpected continuation byte) */\r
-                  ERROR,      /* (unexpected continuation byte) */\r
-                  ERROR,      /* (unexpected continuation byte) */\r
-                  ERROR,      /* (unexpected continuation byte) */\r
-                  ERROR,      /* (unexpected continuation byte) */\r
-                  ERROR,      /* (unexpected continuation byte) */\r
-                  ERROR,      /* (unexpected continuation byte) */\r
-                  ERROR,      /* (unexpected continuation byte) */\r
-                  ERROR,      /* (unexpected continuation byte) */\r
-                  ERROR,      /* (unexpected continuation byte) */\r
-                  ERROR,      /* (unexpected continuation byte) */\r
-                  ERROR,      /* (unexpected continuation byte) */\r
-                  ERROR,      /* (unexpected continuation byte) */\r
-                  ERROR,      /* (unexpected continuation byte) */\r
-                  ERROR,      /* (unexpected continuation byte) */\r
-                  ERROR,      /* (unexpected continuation byte) */\r
-                  ERROR,      /* (unexpected continuation byte) */\r
-                  ERROR,      /* (unexpected continuation byte) */\r
-                  ERROR,      /* (unexpected continuation byte) */\r
-                  ERROR,      /* (unexpected continuation byte) */\r
-                  ERROR,      /* (unexpected continuation byte) */\r
-                  ERROR,      /* (unexpected continuation byte) */\r
-                  ERROR,      /* (unexpected continuation byte) */\r
-                  ERROR,      /* (unexpected continuation byte) */\r
-                  ERROR,      /* (unexpected continuation byte) */\r
-                  ERROR,      /* (unexpected continuation byte) */\r
-                  ERROR,      /* (unexpected continuation byte) */\r
-                  ERROR,      /* (unexpected continuation byte) */\r
-                  ERROR,      /* (unexpected continuation byte) */\r
-                  ERROR,      /* (unexpected continuation byte) */\r
-                  ERROR,      /* (unexpected continuation byte) */\r
-                  ERROR,      /* (unexpected continuation byte) */\r
-                  ERROR,      /* (unexpected continuation byte) */\r
-                  ERROR,      /* (unexpected continuation byte) */\r
-                  ERROR,      /* (unexpected continuation byte) */\r
-                  ERROR,      /* (unexpected continuation byte) */\r
-                  ERROR,      /* (unexpected continuation byte) */\r
-                  ERROR,      /* (unexpected continuation byte) */\r
-                  ERROR,      /* (unexpected continuation byte) */\r
-                  ERROR,      /* (unexpected continuation byte) */\r
-                  ERROR,      /* (unexpected continuation byte) */\r
-                  ERROR,      /* (unexpected continuation byte) */\r
-                  ERROR,      /* (unexpected continuation byte) */\r
-                  ERROR,      /* (unexpected continuation byte) */\r
-                  ERROR,      /* (unexpected continuation byte) */\r
-                  ERROR,      /* (unexpected continuation byte) */\r
-                  ERROR,      /* (unexpected continuation byte) */\r
-                  ERROR,      /* (unexpected continuation byte) */\r
-                  ERROR,      /* (unexpected continuation byte) */\r
-                  ERROR,      /* (unexpected continuation byte) */\r
-                  ERROR,      /* (unexpected continuation byte) */\r
-                  ERROR,      /* (unexpected continuation byte) */\r
-                  ERROR,      /* (unexpected continuation byte) */\r
-                  ERROR,      /* (unexpected continuation byte) */\r
-                  ERROR,      /* (unexpected continuation byte) */\r
-                  ERROR,      /* (unexpected continuation byte) */\r
-                  ERROR,      /* (unexpected continuation byte) */\r
-                  ERROR,      /* (unexpected continuation byte) */\r
-                  ERROR,      /* (unexpected continuation byte) */\r
-                  ERROR,      /* (unexpected continuation byte) */\r
-                  ERROR,      /* (unexpected continuation byte) */\r
-                  ERROR,      /* (unexpected continuation byte) */\r
-                  ERROR,      /* (unexpected continuation byte) */\r
-                  ERROR,      /* (unexpected continuation byte) */\r
-                  0, -1);\r
-    utf8_read_test(TESTSTR("\xC0\x20\xC1\x20\xC2\x20\xC3\x20\xC4\x20\xC5\x20\xC6\x20\xC7\x20"),\r
-                  ERROR,      /* (incomplete sequence) */\r
-                  0x00000020, /* SPACE */\r
-                  ERROR,      /* (incomplete sequence) */\r
-                  0x00000020, /* SPACE */\r
-                  ERROR,      /* (incomplete sequence) */\r
-                  0x00000020, /* SPACE */\r
-                  ERROR,      /* (incomplete sequence) */\r
-                  0x00000020, /* SPACE */\r
-                  ERROR,      /* (incomplete sequence) */\r
-                  0x00000020, /* SPACE */\r
-                  ERROR,      /* (incomplete sequence) */\r
-                  0x00000020, /* SPACE */\r
-                  ERROR,      /* (incomplete sequence) */\r
-                  0x00000020, /* SPACE */\r
-                  ERROR,      /* (incomplete sequence) */\r
-                  0x00000020, /* SPACE */\r
-                  0, -1);\r
-    utf8_read_test(TESTSTR("\xE0\x20\xE1\x20\xE2\x20\xE3\x20\xE4\x20\xE5\x20\xE6\x20\xE7\x20\xE8\x20\xE9\x20\xEA\x20\xEB\x20\xEC\x20\xED\x20\xEE\x20\xEF\x20"),\r
-                  ERROR,      /* (incomplete sequence) */\r
-                  0x00000020, /* SPACE */\r
-                  ERROR,      /* (incomplete sequence) */\r
-                  0x00000020, /* SPACE */\r
-                  ERROR,      /* (incomplete sequence) */\r
-                  0x00000020, /* SPACE */\r
-                  ERROR,      /* (incomplete sequence) */\r
-                  0x00000020, /* SPACE */\r
-                  ERROR,      /* (incomplete sequence) */\r
-                  0x00000020, /* SPACE */\r
-                  ERROR,      /* (incomplete sequence) */\r
-                  0x00000020, /* SPACE */\r
-                  ERROR,      /* (incomplete sequence) */\r
-                  0x00000020, /* SPACE */\r
-                  ERROR,      /* (incomplete sequence) */\r
-                  0x00000020, /* SPACE */\r
-                  ERROR,      /* (incomplete sequence) */\r
-                  0x00000020, /* SPACE */\r
-                  ERROR,      /* (incomplete sequence) */\r
-                  0x00000020, /* SPACE */\r
-                  ERROR,      /* (incomplete sequence) */\r
-                  0x00000020, /* SPACE */\r
-                  ERROR,      /* (incomplete sequence) */\r
-                  0x00000020, /* SPACE */\r
-                  ERROR,      /* (incomplete sequence) */\r
-                  0x00000020, /* SPACE */\r
-                  ERROR,      /* (incomplete sequence) */\r
-                  0x00000020, /* SPACE */\r
-                  ERROR,      /* (incomplete sequence) */\r
-                  0x00000020, /* SPACE */\r
-                  ERROR,      /* (incomplete sequence) */\r
-                  0x00000020, /* SPACE */\r
-                  0, -1);\r
-    utf8_read_test(TESTSTR("\xF0\x20\xF1\x20\xF2\x20\xF3\x20\xF4\x20\xF5\x20\xF6\x20\xF7\x20"),\r
-                  ERROR,      /* (incomplete sequence) */\r
-                  0x00000020, /* SPACE */\r
-                  ERROR,      /* (incomplete sequence) */\r
-                  0x00000020, /* SPACE */\r
-                  ERROR,      /* (incomplete sequence) */\r
-                  0x00000020, /* SPACE */\r
-                  ERROR,      /* (incomplete sequence) */\r
-                  0x00000020, /* SPACE */\r
-                  ERROR,      /* (incomplete sequence) */\r
-                  0x00000020, /* SPACE */\r
-                  ERROR,      /* (incomplete sequence) */\r
-                  0x00000020, /* SPACE */\r
-                  ERROR,      /* (incomplete sequence) */\r
-                  0x00000020, /* SPACE */\r
-                  ERROR,      /* (incomplete sequence) */\r
-                  0x00000020, /* SPACE */\r
-                  0, -1);\r
-    utf8_read_test(TESTSTR("\xF8\x20\xF9\x20\xFA\x20\xFB\x20"),\r
-                  ERROR,      /* (incomplete sequence) */\r
-                  0x00000020, /* SPACE */\r
-                  ERROR,      /* (incomplete sequence) */\r
-                  0x00000020, /* SPACE */\r
-                  ERROR,      /* (incomplete sequence) */\r
-                  0x00000020, /* SPACE */\r
-                  ERROR,      /* (incomplete sequence) */\r
-                  0x00000020, /* SPACE */\r
-                  0, -1);\r
-    utf8_read_test(TESTSTR("\xFC\x20\xFD\x20"),\r
-                  ERROR,      /* (incomplete sequence) */\r
-                  0x00000020, /* SPACE */\r
-                  ERROR,      /* (incomplete sequence) */\r
-                  0x00000020, /* SPACE */\r
-                  0, -1);\r
-    utf8_read_test(TESTSTR("\xC0"),\r
-                  ERROR,      /* (incomplete sequence) */\r
-                  0, -1);\r
-    utf8_read_test(TESTSTR("\xE0\x80"),\r
-                  ERROR,      /* (incomplete sequence) */\r
-                  0, -1);\r
-    utf8_read_test(TESTSTR("\xF0\x80\x80"),\r
-                  ERROR,      /* (incomplete sequence) */\r
-                  0, -1);\r
-    utf8_read_test(TESTSTR("\xF8\x80\x80\x80"),\r
-                  ERROR,      /* (incomplete sequence) */\r
-                  0, -1);\r
-    utf8_read_test(TESTSTR("\xFC\x80\x80\x80\x80"),\r
-                  ERROR,      /* (incomplete sequence) */\r
-                  0, -1);\r
-    utf8_read_test(TESTSTR("\xDF"),\r
-                  ERROR,      /* (incomplete sequence) */\r
-                  0, -1);\r
-    utf8_read_test(TESTSTR("\xEF\xBF"),\r
-                  ERROR,      /* (incomplete sequence) */\r
-                  0, -1);\r
-    utf8_read_test(TESTSTR("\xF7\xBF\xBF"),\r
-                  ERROR,      /* (incomplete sequence) */\r
-                  0, -1);\r
-    utf8_read_test(TESTSTR("\xFB\xBF\xBF\xBF"),\r
-                  ERROR,      /* (incomplete sequence) */\r
-                  0, -1);\r
-    utf8_read_test(TESTSTR("\xFD\xBF\xBF\xBF\xBF"),\r
-                  ERROR,      /* (incomplete sequence) */\r
-                  0, -1);\r
-    utf8_read_test(TESTSTR("\xC0\xE0\x80\xF0\x80\x80\xF8\x80\x80\x80\xFC\x80\x80\x80\x80\xDF\xEF\xBF\xF7\xBF\xBF\xFB\xBF\xBF\xBF\xFD\xBF\xBF\xBF\xBF"),\r
-                  ERROR,      /* (incomplete sequence) */\r
-                  ERROR,      /* (incomplete sequence) */\r
-                  ERROR,      /* (incomplete sequence) */\r
-                  ERROR,      /* (incomplete sequence) */\r
-                  ERROR,      /* (incomplete sequence) */\r
-                  ERROR,      /* (incomplete sequence) */\r
-                  ERROR,      /* (incomplete sequence) */\r
-                  ERROR,      /* (incomplete sequence) */\r
-                  ERROR,      /* (incomplete sequence) */\r
-                  ERROR,      /* (incomplete sequence) */\r
-                  0, -1);\r
-    utf8_read_test(TESTSTR("\xFE"),\r
-                  ERROR,      /* (invalid UTF-8 byte) */\r
-                  0, -1);\r
-    utf8_read_test(TESTSTR("\xFF"),\r
-                  ERROR,      /* (invalid UTF-8 byte) */\r
-                  0, -1);\r
-    utf8_read_test(TESTSTR("\xFE\xFE\xFF\xFF"),\r
-                  ERROR,      /* (invalid UTF-8 byte) */\r
-                  ERROR,      /* (invalid UTF-8 byte) */\r
-                  ERROR,      /* (invalid UTF-8 byte) */\r
-                  ERROR,      /* (invalid UTF-8 byte) */\r
-                  0, -1);\r
-    utf8_read_test(TESTSTR("\xC0\xAF"),\r
-                  ERROR,      /* SOLIDUS (overlong form of 2F) */\r
-                  0, -1);\r
-    utf8_read_test(TESTSTR("\xE0\x80\xAF"),\r
-                  ERROR,      /* SOLIDUS (overlong form of 2F) */\r
-                  0, -1);\r
-    utf8_read_test(TESTSTR("\xF0\x80\x80\xAF"),\r
-                  ERROR,      /* SOLIDUS (overlong form of 2F) */\r
-                  0, -1);\r
-    utf8_read_test(TESTSTR("\xF8\x80\x80\x80\xAF"),\r
-                  ERROR,      /* SOLIDUS (overlong form of 2F) */\r
-                  0, -1);\r
-    utf8_read_test(TESTSTR("\xFC\x80\x80\x80\x80\xAF"),\r
-                  ERROR,      /* SOLIDUS (overlong form of 2F) */\r
-                  0, -1);\r
-    utf8_read_test(TESTSTR("\xC1\xBF"),\r
-                  ERROR,      /* <control> (overlong form of 7F) */\r
-                  0, -1);\r
-    utf8_read_test(TESTSTR("\xE0\x9F\xBF"),\r
-                  ERROR,      /* <no name available> (overlong form of DF BF) */\r
-                  0, -1);\r
-    utf8_read_test(TESTSTR("\xF0\x8F\xBF\xBF"),\r
-                  ERROR,      /* <no name available> (overlong form of EF BF BF) (invalid char) */\r
-                  0, -1);\r
-    utf8_read_test(TESTSTR("\xF8\x87\xBF\xBF\xBF"),\r
-                  ERROR,      /* <no name available> (overlong form of F7 BF BF BF) */\r
-                  0, -1);\r
-    utf8_read_test(TESTSTR("\xFC\x83\xBF\xBF\xBF\xBF"),\r
-                  ERROR,      /* <no name available> (overlong form of FB BF BF BF BF) */\r
-                  0, -1);\r
-    utf8_read_test(TESTSTR("\xC0\x80"),\r
-                  ERROR,      /* <control> (overlong form of 00) */\r
-                  0, -1);\r
-    utf8_read_test(TESTSTR("\xE0\x80\x80"),\r
-                  ERROR,      /* <control> (overlong form of 00) */\r
-                  0, -1);\r
-    utf8_read_test(TESTSTR("\xF0\x80\x80\x80"),\r
-                  ERROR,      /* <control> (overlong form of 00) */\r
-                  0, -1);\r
-    utf8_read_test(TESTSTR("\xF8\x80\x80\x80\x80"),\r
-                  ERROR,      /* <control> (overlong form of 00) */\r
-                  0, -1);\r
-    utf8_read_test(TESTSTR("\xFC\x80\x80\x80\x80\x80"),\r
-                  ERROR,      /* <control> (overlong form of 00) */\r
-                  0, -1);\r
-    utf8_read_test(TESTSTR("\xED\xA0\x80"),\r
-                  ERROR,      /* <Non Private Use High Surrogate, First> (surrogate) */\r
-                  0, -1);\r
-    utf8_read_test(TESTSTR("\xED\xAD\xBF"),\r
-                  ERROR,      /* <Non Private Use High Surrogate, Last> (surrogate) */\r
-                  0, -1);\r
-    utf8_read_test(TESTSTR("\xED\xAE\x80"),\r
-                  ERROR,      /* <Private Use High Surrogate, First> (surrogate) */\r
-                  0, -1);\r
-    utf8_read_test(TESTSTR("\xED\xAF\xBF"),\r
-                  ERROR,      /* <Private Use High Surrogate, Last> (surrogate) */\r
-                  0, -1);\r
-    utf8_read_test(TESTSTR("\xED\xB0\x80"),\r
-                  ERROR,      /* <Low Surrogate, First> (surrogate) */\r
-                  0, -1);\r
-    utf8_read_test(TESTSTR("\xED\xBE\x80"),\r
-                  ERROR,      /* <no name available> (surrogate) */\r
-                  0, -1);\r
-    utf8_read_test(TESTSTR("\xED\xBF\xBF"),\r
-                  ERROR,      /* <Low Surrogate, Last> (surrogate) */\r
-                  0, -1);\r
-    utf8_read_test(TESTSTR("\xED\xA0\x80\xED\xB0\x80"),\r
-                  ERROR,      /* <Non Private Use High Surrogate, First> (surrogate) */\r
-                  ERROR,      /* <Low Surrogate, First> (surrogate) */\r
-                  0, -1);\r
-    utf8_read_test(TESTSTR("\xED\xA0\x80\xED\xBF\xBF"),\r
-                  ERROR,      /* <Non Private Use High Surrogate, First> (surrogate) */\r
-                  ERROR,      /* <Low Surrogate, Last> (surrogate) */\r
-                  0, -1);\r
-    utf8_read_test(TESTSTR("\xED\xAD\xBF\xED\xB0\x80"),\r
-                  ERROR,      /* <Non Private Use High Surrogate, Last> (surrogate) */\r
-                  ERROR,      /* <Low Surrogate, First> (surrogate) */\r
-                  0, -1);\r
-    utf8_read_test(TESTSTR("\xED\xAD\xBF\xED\xBF\xBF"),\r
-                  ERROR,      /* <Non Private Use High Surrogate, Last> (surrogate) */\r
-                  ERROR,      /* <Low Surrogate, Last> (surrogate) */\r
-                  0, -1);\r
-    utf8_read_test(TESTSTR("\xED\xAE\x80\xED\xB0\x80"),\r
-                  ERROR,      /* <Private Use High Surrogate, First> (surrogate) */\r
-                  ERROR,      /* <Low Surrogate, First> (surrogate) */\r
-                  0, -1);\r
-    utf8_read_test(TESTSTR("\xED\xAE\x80\xED\xBF\xBF"),\r
-                  ERROR,      /* <Private Use High Surrogate, First> (surrogate) */\r
-                  ERROR,      /* <Low Surrogate, Last> (surrogate) */\r
-                  0, -1);\r
-    utf8_read_test(TESTSTR("\xED\xAF\xBF\xED\xB0\x80"),\r
-                  ERROR,      /* <Private Use High Surrogate, Last> (surrogate) */\r
-                  ERROR,      /* <Low Surrogate, First> (surrogate) */\r
-                  0, -1);\r
-    utf8_read_test(TESTSTR("\xED\xAF\xBF\xED\xBF\xBF"),\r
-                  ERROR,      /* <Private Use High Surrogate, Last> (surrogate) */\r
-                  ERROR,      /* <Low Surrogate, Last> (surrogate) */\r
-                  0, -1);\r
-    utf8_read_test(TESTSTR("\xEF\xBF\xBE"),\r
-                  ERROR,      /* <no name available> (invalid char) */\r
-                  0, -1);\r
-    utf8_read_test(TESTSTR("\xEF\xBF\xBF"),\r
-                  ERROR,      /* <no name available> (invalid char) */\r
-                  0, -1);\r
-    printf("read tests completed\n");\r
-    printf("write tests beginning\n");\r
-    {\r
-       const static long str[] =\r
-       {0x03BAL, 0x1F79L, 0x03C3L, 0x03BCL, 0x03B5L, 0};\r
-       utf8_write_test(TESTSTR(str),\r
-                       0xCE, 0xBA,\r
-                       0xE1, 0xBD, 0xB9,\r
-                       0xCF, 0x83,\r
-                       0xCE, 0xBC,\r
-                       0xCE, 0xB5,\r
-                       0, -1);\r
-    }\r
-    {\r
-       const static long str[] = {0x0000L, 0};\r
-       utf8_write_test(TESTSTR(str),\r
-                       0x00,\r
-                       0, -1);\r
-    }\r
-    {\r
-       const static long str[] = {0x0080L, 0};\r
-       utf8_write_test(TESTSTR(str),\r
-                       0xC2, 0x80,\r
-                       0, -1);\r
-    }\r
-    {\r
-       const static long str[] = {0x0800L, 0};\r
-       utf8_write_test(TESTSTR(str),\r
-                       0xE0, 0xA0, 0x80,\r
-                       0, -1);\r
-    }\r
-    {\r
-       const static long str[] = {0x00010000L, 0};\r
-       utf8_write_test(TESTSTR(str),\r
-                       0xF0, 0x90, 0x80, 0x80,\r
-                       0, -1);\r
-    }\r
-    {\r
-       const static long str[] = {0x00200000L, 0};\r
-       utf8_write_test(TESTSTR(str),\r
-                       0xF8, 0x88, 0x80, 0x80, 0x80,\r
-                       0, -1);\r
-    }\r
-    {\r
-       const static long str[] = {0x04000000L, 0};\r
-       utf8_write_test(TESTSTR(str),\r
-                       0xFC, 0x84, 0x80, 0x80, 0x80, 0x80,\r
-                       0, -1);\r
-    }\r
-    {\r
-       const static long str[] = {0x007FL, 0};\r
-       utf8_write_test(TESTSTR(str),\r
-                       0x7F,\r
-                       0, -1);\r
-    }\r
-    {\r
-       const static long str[] = {0x07FFL, 0};\r
-       utf8_write_test(TESTSTR(str),\r
-                       0xDF, 0xBF,\r
-                       0, -1);\r
-    }\r
-    {\r
-       const static long str[] = {0xFFFDL, 0};\r
-       utf8_write_test(TESTSTR(str),\r
-                       0xEF, 0xBF, 0xBD,\r
-                       0, -1);\r
-    }\r
-    {\r
-       const static long str[] = {0xFFFFL, 0};\r
-       utf8_write_test(TESTSTR(str),\r
-                       ERROR,\r
-                       0, -1);\r
-    }\r
-    {\r
-       const static long str[] = {0x001FFFFFL, 0};\r
-       utf8_write_test(TESTSTR(str),\r
-                       0xF7, 0xBF, 0xBF, 0xBF,\r
-                       0, -1);\r
-    }\r
-    {\r
-       const static long str[] = {0x03FFFFFFL, 0};\r
-       utf8_write_test(TESTSTR(str),\r
-                       0xFB, 0xBF, 0xBF, 0xBF, 0xBF,\r
-                       0, -1);\r
-    }\r
-    {\r
-       const static long str[] = {0x7FFFFFFFL, 0};\r
-       utf8_write_test(TESTSTR(str),\r
-                       0xFD, 0xBF, 0xBF, 0xBF, 0xBF, 0xBF,\r
-                       0, -1);\r
-    }\r
-    {\r
-       const static long str[] = {0xD7FFL, 0};\r
-       utf8_write_test(TESTSTR(str),\r
-                       0xED, 0x9F, 0xBF,\r
-                       0, -1);\r
-    }\r
-    {\r
-       const static long str[] = {0xD800L, 0};\r
-       utf8_write_test(TESTSTR(str),\r
-                       ERROR,\r
-                       0, -1);\r
-    }\r
-    {\r
-       const static long str[] = {0xD800L, 0xDC00L, 0};\r
-       utf8_write_test(TESTSTR(str),\r
-                       ERROR,\r
-                       ERROR,\r
-                       0, -1);\r
-    }\r
-    {\r
-       const static long str[] = {0xDFFFL, 0};\r
-       utf8_write_test(TESTSTR(str),\r
-                       ERROR,\r
-                       0, -1);\r
-    }\r
-    {\r
-       const static long str[] = {0xE000L, 0};\r
-       utf8_write_test(TESTSTR(str),\r
-                       0xEE, 0x80, 0x80,\r
-                       0, -1);\r
-    }\r
-    printf("write tests completed\n");\r
-\r
-    printf("total: %d errors\n", total_errs);\r
-    return (total_errs != 0);\r
-}\r
-#endif /* TESTMODE */\r
-\r
-const charset_spec charset_CS_UTF8 = {\r
-    CS_UTF8, read_utf8, write_utf8, NULL\r
-};\r
-\r
-#else /* ENUM_CHARSETS */\r
-\r
-ENUM_CHARSET(CS_UTF8)\r
-\r
-#endif /* ENUM_CHARSETS */\r