2 * Copyright (c) 1987, Fujitsu LTD. (Itaru ICHIKAWA).
3 * Copyright (c) 1996-2009, The nkf Project.
5 * Permission is hereby granted, free of charge, to any person obtaining a copy
6 * of this software and associated documentation files (the "Software"), to deal
7 * in the Software without restriction, including without limitation the rights
8 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 * copies of the Software, and to permit persons to whom the Software is
10 * furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice shall be included in
13 * all copies or substantial portions of the Software.
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 #define NKF_VERSION "2.0.8"
24 #define NKF_RELEASE_DATE "2009-01-05"
26 "Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa).\n" \
27 "Copyright (C) 1996-2009, The nkf Project."
38 # define INCL_DOSERRORS
44 /* state of output_mode and input_mode
123 NKF_ENCODING_TABLE_SIZE,
124 JIS_X_0201_1976_K = 0x1013, /* I */ /* JIS C 6220-1969 */
125 /* JIS_X_0201_1976_R = 0x1014, */ /* J */ /* JIS C 6220-1969 */
126 /* JIS_X_0208_1978 = 0x1040, */ /* @ */ /* JIS C 6226-1978 */
127 /* JIS_X_0208_1983 = 0x1087, */ /* B */ /* JIS C 6226-1983 */
128 JIS_X_0208 = 0x1168, /* @B */
129 JIS_X_0212 = 0x1159, /* D */
130 /* JIS_X_0213_2000_1 = 0x1228, */ /* O */
131 JIS_X_0213_2 = 0x1229, /* P */
132 JIS_X_0213_1 = 0x1233 /* Q */
135 static nkf_char s_iconv(nkf_char c2, nkf_char c1, nkf_char c0);
136 static nkf_char e_iconv(nkf_char c2, nkf_char c1, nkf_char c0);
137 static nkf_char w_iconv(nkf_char c2, nkf_char c1, nkf_char c0);
138 static nkf_char w_iconv16(nkf_char c2, nkf_char c1, nkf_char c0);
139 static nkf_char w_iconv32(nkf_char c2, nkf_char c1, nkf_char c0);
140 static void j_oconv(nkf_char c2, nkf_char c1);
141 static void s_oconv(nkf_char c2, nkf_char c1);
142 static void e_oconv(nkf_char c2, nkf_char c1);
143 static void w_oconv(nkf_char c2, nkf_char c1);
144 static void w_oconv16(nkf_char c2, nkf_char c1);
145 static void w_oconv32(nkf_char c2, nkf_char c1);
149 nkf_char (*iconv)(nkf_char c2, nkf_char c1, nkf_char c0);
150 void (*oconv)(nkf_char c2, nkf_char c1);
151 } nkf_native_encoding;
153 nkf_native_encoding NkfEncodingASCII = { "ASCII", e_iconv, e_oconv };
154 nkf_native_encoding NkfEncodingISO_2022_JP = { "ISO-2022-JP", e_iconv, j_oconv };
155 nkf_native_encoding NkfEncodingShift_JIS = { "Shift_JIS", s_iconv, s_oconv };
156 nkf_native_encoding NkfEncodingEUC_JP = { "EUC-JP", e_iconv, e_oconv };
157 nkf_native_encoding NkfEncodingUTF_8 = { "UTF-8", w_iconv, w_oconv };
158 nkf_native_encoding NkfEncodingUTF_16 = { "UTF-16", w_iconv16, w_oconv16 };
159 nkf_native_encoding NkfEncodingUTF_32 = { "UTF-32", w_iconv32, w_oconv32 };
164 const nkf_native_encoding *base_encoding;
167 nkf_encoding nkf_encoding_table[] = {
168 {ASCII, "US-ASCII", &NkfEncodingASCII},
169 {ISO_8859_1, "ISO-8859-1", &NkfEncodingASCII},
170 {ISO_2022_JP, "ISO-2022-JP", &NkfEncodingISO_2022_JP},
171 {CP50220, "CP50220", &NkfEncodingISO_2022_JP},
172 {CP50221, "CP50221", &NkfEncodingISO_2022_JP},
173 {CP50222, "CP50222", &NkfEncodingISO_2022_JP},
174 {ISO_2022_JP_1, "ISO-2022-JP-1", &NkfEncodingISO_2022_JP},
175 {ISO_2022_JP_3, "ISO-2022-JP-3", &NkfEncodingISO_2022_JP},
176 {ISO_2022_JP_2004, "ISO-2022-JP-2004", &NkfEncodingISO_2022_JP},
177 {SHIFT_JIS, "Shift_JIS", &NkfEncodingShift_JIS},
178 {WINDOWS_31J, "Windows-31J", &NkfEncodingShift_JIS},
179 {CP10001, "CP10001", &NkfEncodingShift_JIS},
180 {EUC_JP, "EUC-JP", &NkfEncodingEUC_JP},
181 {EUCJP_NKF, "eucJP-nkf", &NkfEncodingEUC_JP},
182 {CP51932, "CP51932", &NkfEncodingEUC_JP},
183 {EUCJP_MS, "eucJP-MS", &NkfEncodingEUC_JP},
184 {EUCJP_ASCII, "eucJP-ASCII", &NkfEncodingEUC_JP},
185 {SHIFT_JISX0213, "Shift_JISX0213", &NkfEncodingShift_JIS},
186 {SHIFT_JIS_2004, "Shift_JIS-2004", &NkfEncodingShift_JIS},
187 {EUC_JISX0213, "EUC-JISX0213", &NkfEncodingEUC_JP},
188 {EUC_JIS_2004, "EUC-JIS-2004", &NkfEncodingEUC_JP},
189 {UTF_8, "UTF-8", &NkfEncodingUTF_8},
190 {UTF_8N, "UTF-8N", &NkfEncodingUTF_8},
191 {UTF_8_BOM, "UTF-8-BOM", &NkfEncodingUTF_8},
192 {UTF8_MAC, "UTF8-MAC", &NkfEncodingUTF_8},
193 {UTF_16, "UTF-16", &NkfEncodingUTF_16},
194 {UTF_16BE, "UTF-16BE", &NkfEncodingUTF_16},
195 {UTF_16BE_BOM, "UTF-16BE-BOM", &NkfEncodingUTF_16},
196 {UTF_16LE, "UTF-16LE", &NkfEncodingUTF_16},
197 {UTF_16LE_BOM, "UTF-16LE-BOM", &NkfEncodingUTF_16},
198 {UTF_32, "UTF-32", &NkfEncodingUTF_32},
199 {UTF_32BE, "UTF-32BE", &NkfEncodingUTF_32},
200 {UTF_32BE_BOM, "UTF-32BE-BOM", &NkfEncodingUTF_32},
201 {UTF_32LE, "UTF-32LE", &NkfEncodingUTF_32},
202 {UTF_32LE_BOM, "UTF-32LE-BOM", &NkfEncodingUTF_32},
203 {BINARY, "BINARY", &NkfEncodingASCII},
210 } encoding_name_to_id_table[] = {
213 {"ISO-2022-JP", ISO_2022_JP},
214 {"ISO2022JP-CP932", CP50220},
215 {"CP50220", CP50220},
216 {"CP50221", CP50221},
217 {"CSISO2022JP", CP50221},
218 {"CP50222", CP50222},
219 {"ISO-2022-JP-1", ISO_2022_JP_1},
220 {"ISO-2022-JP-3", ISO_2022_JP_3},
221 {"ISO-2022-JP-2004", ISO_2022_JP_2004},
222 {"SHIFT_JIS", SHIFT_JIS},
224 {"WINDOWS-31J", WINDOWS_31J},
225 {"CSWINDOWS31J", WINDOWS_31J},
226 {"CP932", WINDOWS_31J},
227 {"MS932", WINDOWS_31J},
228 {"CP10001", CP10001},
231 {"EUCJP-NKF", EUCJP_NKF},
232 {"CP51932", CP51932},
233 {"EUC-JP-MS", EUCJP_MS},
234 {"EUCJP-MS", EUCJP_MS},
235 {"EUCJPMS", EUCJP_MS},
236 {"EUC-JP-ASCII", EUCJP_ASCII},
237 {"EUCJP-ASCII", EUCJP_ASCII},
238 {"SHIFT_JISX0213", SHIFT_JISX0213},
239 {"SHIFT_JIS-2004", SHIFT_JIS_2004},
240 {"EUC-JISX0213", EUC_JISX0213},
241 {"EUC-JIS-2004", EUC_JIS_2004},
244 {"UTF-8-BOM", UTF_8_BOM},
245 {"UTF8-MAC", UTF8_MAC},
246 {"UTF-8-MAC", UTF8_MAC},
248 {"UTF-16BE", UTF_16BE},
249 {"UTF-16BE-BOM", UTF_16BE_BOM},
250 {"UTF-16LE", UTF_16LE},
251 {"UTF-16LE-BOM", UTF_16LE_BOM},
253 {"UTF-32BE", UTF_32BE},
254 {"UTF-32BE-BOM", UTF_32BE_BOM},
255 {"UTF-32LE", UTF_32LE},
256 {"UTF-32LE-BOM", UTF_32LE_BOM},
261 #if defined(DEFAULT_CODE_JIS)
262 #define DEFAULT_ENCIDX ISO_2022_JP
263 #elif defined(DEFAULT_CODE_SJIS)
264 #define DEFAULT_ENCIDX SHIFT_JIS
265 #elif defined(DEFAULT_CODE_WINDOWS_31J)
266 #define DEFAULT_ENCIDX WINDOWS_31J
267 #elif defined(DEFAULT_CODE_EUC)
268 #define DEFAULT_ENCIDX EUC_JP
269 #elif defined(DEFAULT_CODE_UTF8)
270 #define DEFAULT_ENCIDX UTF_8
274 #define is_alnum(c) \
275 (('a'<=c && c<='z')||('A'<= c && c<='Z')||('0'<=c && c<='9'))
277 /* I don't trust portablity of toupper */
278 #define nkf_toupper(c) (('a'<=c && c<='z')?(c-('a'-'A')):c)
279 #define nkf_isoctal(c) ('0'<=c && c<='7')
280 #define nkf_isdigit(c) ('0'<=c && c<='9')
281 #define nkf_isxdigit(c) (nkf_isdigit(c) || ('a'<=c && c<='f') || ('A'<=c && c <= 'F'))
282 #define nkf_isblank(c) (c == SP || c == TAB)
283 #define nkf_isspace(c) (nkf_isblank(c) || c == CR || c == LF)
284 #define nkf_isalpha(c) (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'))
285 #define nkf_isalnum(c) (nkf_isdigit(c) || nkf_isalpha(c))
286 #define nkf_isprint(c) (SP<=c && c<='~')
287 #define nkf_isgraph(c) ('!'<=c && c<='~')
288 #define hex2bin(c) (('0'<=c&&c<='9') ? (c-'0') : \
289 ('A'<=c&&c<='F') ? (c-'A'+10) : \
290 ('a'<=c&&c<='f') ? (c-'a'+10) : 0)
291 #define bin2hex(c) ("0123456789ABCDEF"[c&15])
292 #define is_eucg3(c2) (((unsigned short)c2 >> 8) == SS3)
293 #define nkf_noescape_mime(c) ((c == CR) || (c == LF) || \
294 ((c > SP) && (c < DEL) && (c != '?') && (c != '=') && (c != '_') \
295 && (c != '(') && (c != ')') && (c != '.') && (c != 0x22)))
297 #define is_ibmext_in_sjis(c2) (CP932_TABLE_BEGIN <= c2 && c2 <= CP932_TABLE_END)
298 #define nkf_byte_jisx0201_katakana_p(c) (SP <= c && c < (0xE0&0x7F))
300 #define HOLD_SIZE 1024
301 #if defined(INT_IS_SHORT)
302 #define IOBUF_SIZE 2048
304 #define IOBUF_SIZE 16384
307 #define DEFAULT_J 'B'
308 #define DEFAULT_R 'B'
315 /* MIME preprocessor */
317 #ifdef EASYWIN /*Easy Win */
318 extern POINT _BufferSize;
327 void (*status_func)(struct input_code *, nkf_char);
328 nkf_char (*iconv_func)(nkf_char c2, nkf_char c1, nkf_char c0);
332 static const char *input_codename = NULL; /* NULL: unestablished, "": BINARY */
333 static nkf_encoding *input_encoding = NULL;
334 static nkf_encoding *output_encoding = NULL;
336 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
338 * 0: Shift_JIS, eucJP-ascii
343 #define UCS_MAP_ASCII 0
345 #define UCS_MAP_CP932 2
346 #define UCS_MAP_CP10001 3
347 static int ms_ucs_map_f = UCS_MAP_ASCII;
349 #ifdef UTF8_INPUT_ENABLE
350 /* no NEC special, NEC-selected IBM extended and IBM extended characters */
351 static int no_cp932ext_f = FALSE;
352 /* ignore ZERO WIDTH NO-BREAK SPACE */
353 static int no_best_fit_chars_f = FALSE;
354 static int input_endian = ENDIAN_BIG;
355 static nkf_char unicode_subchar = '?'; /* the regular substitution character */
356 static void (*encode_fallback)(nkf_char c) = NULL;
357 static void w_status(struct input_code *, nkf_char);
359 #ifdef UTF8_OUTPUT_ENABLE
360 static int output_bom_f = FALSE;
361 static int output_endian = ENDIAN_BIG;
364 static void std_putc(nkf_char c);
365 static nkf_char std_getc(FILE *f);
366 static nkf_char std_ungetc(nkf_char c,FILE *f);
368 static nkf_char broken_getc(FILE *f);
369 static nkf_char broken_ungetc(nkf_char c,FILE *f);
371 static nkf_char mime_getc(FILE *f);
373 static void mime_putc(nkf_char c);
377 #if !defined(PERL_XS) && !defined(WIN32DLL)
378 static unsigned char stdibuf[IOBUF_SIZE];
379 static unsigned char stdobuf[IOBUF_SIZE];
383 static int unbuf_f = FALSE;
384 static int estab_f = FALSE;
385 static int nop_f = FALSE;
386 static int binmode_f = TRUE; /* binary mode */
387 static int rot_f = FALSE; /* rot14/43 mode */
388 static int hira_f = FALSE; /* hira/kata henkan */
389 static int alpha_f = FALSE; /* convert JIx0208 alphbet to ASCII */
390 static int mime_f = MIME_DECODE_DEFAULT; /* convert MIME B base64 or Q */
391 static int mime_decode_f = FALSE; /* mime decode is explicitly on */
392 static int mimebuf_f = FALSE; /* MIME buffered input */
393 static int broken_f = FALSE; /* convert ESC-less broken JIS */
394 static int iso8859_f = FALSE; /* ISO8859 through */
395 static int mimeout_f = FALSE; /* base64 mode */
396 static int x0201_f = X0201_DEFAULT; /* convert JIS X 0201 */
397 static int iso2022jp_f = FALSE; /* replace non ISO-2022-JP with GETA */
399 #ifdef UNICODE_NORMALIZATION
400 static int nfc_f = FALSE;
401 static nkf_char (*i_nfc_getc)(FILE *) = std_getc; /* input of ugetc */
402 static nkf_char (*i_nfc_ungetc)(nkf_char c ,FILE *f) = std_ungetc;
406 static int cap_f = FALSE;
407 static nkf_char (*i_cgetc)(FILE *) = std_getc; /* input of cgetc */
408 static nkf_char (*i_cungetc)(nkf_char c ,FILE *f) = std_ungetc;
410 static int url_f = FALSE;
411 static nkf_char (*i_ugetc)(FILE *) = std_getc; /* input of ugetc */
412 static nkf_char (*i_uungetc)(nkf_char c ,FILE *f) = std_ungetc;
415 #define PREFIX_EUCG3 NKF_INT32_C(0x8F00)
416 #define CLASS_MASK NKF_INT32_C(0xFF000000)
417 #define CLASS_UNICODE NKF_INT32_C(0x01000000)
418 #define VALUE_MASK NKF_INT32_C(0x00FFFFFF)
419 #define UNICODE_BMP_MAX NKF_INT32_C(0x0000FFFF)
420 #define UNICODE_MAX NKF_INT32_C(0x0010FFFF)
421 #define nkf_char_euc3_new(c) ((c) | PREFIX_EUCG3)
422 #define nkf_char_unicode_new(c) ((c) | CLASS_UNICODE)
423 #define nkf_char_unicode_p(c) ((c & CLASS_MASK) == CLASS_UNICODE)
424 #define nkf_char_unicode_bmp_p(c) ((c & VALUE_MASK) <= UNICODE_BMP_MAX)
425 #define nkf_char_unicode_value_p(c) ((c & VALUE_MASK) <= UNICODE_MAX)
427 #ifdef NUMCHAR_OPTION
428 static int numchar_f = FALSE;
429 static nkf_char (*i_ngetc)(FILE *) = std_getc; /* input of ugetc */
430 static nkf_char (*i_nungetc)(nkf_char c ,FILE *f) = std_ungetc;
434 static int noout_f = FALSE;
435 static void no_putc(nkf_char c);
436 static int debug_f = FALSE;
437 static void debug(const char *str);
438 static nkf_char (*iconv_for_check)(nkf_char c2,nkf_char c1,nkf_char c0) = 0;
441 static int guess_f = 0; /* 0: OFF, 1: ON, 2: VERBOSE */
442 static void set_input_codename(const char *codename);
445 static int exec_f = 0;
448 #ifdef SHIFTJIS_CP932
449 /* invert IBM extended characters to others */
450 static int cp51932_f = FALSE;
452 /* invert NEC-selected IBM extended characters to IBM extended characters */
453 static int cp932inv_f = TRUE;
455 /* static nkf_char cp932_conv(nkf_char c2, nkf_char c1); */
456 #endif /* SHIFTJIS_CP932 */
458 static int x0212_f = FALSE;
459 static int x0213_f = FALSE;
461 static unsigned char prefix_table[256];
463 static void e_status(struct input_code *, nkf_char);
464 static void s_status(struct input_code *, nkf_char);
466 struct input_code input_code_list[] = {
467 {"EUC-JP", 0, 0, 0, {0, 0, 0}, e_status, e_iconv, 0},
468 {"Shift_JIS", 0, 0, 0, {0, 0, 0}, s_status, s_iconv, 0},
469 #ifdef UTF8_INPUT_ENABLE
470 {"UTF-8", 0, 0, 0, {0, 0, 0}, w_status, w_iconv, 0},
475 static int mimeout_mode = 0; /* 0, -1, 'Q', 'B', 1, 2 */
476 static int base64_count = 0;
478 /* X0208 -> ASCII converter */
481 static int f_line = 0; /* chars in line */
482 static int f_prev = 0;
483 static int fold_preserve_f = FALSE; /* preserve new lines */
484 static int fold_f = FALSE;
485 static int fold_len = 0;
488 static unsigned char kanji_intro = DEFAULT_J;
489 static unsigned char ascii_intro = DEFAULT_R;
493 #define FOLD_MARGIN 10
494 #define DEFAULT_FOLD 60
496 static int fold_margin = FOLD_MARGIN;
498 /* process default */
501 no_connection2(nkf_char c2, nkf_char c1, nkf_char c0)
503 fprintf(stderr,"nkf internal module connection failure.\n");
509 no_connection(nkf_char c2, nkf_char c1)
511 no_connection2(c2,c1,0);
514 static nkf_char (*iconv)(nkf_char c2,nkf_char c1,nkf_char c0) = no_connection2;
515 static void (*oconv)(nkf_char c2,nkf_char c1) = no_connection;
517 static void (*o_zconv)(nkf_char c2,nkf_char c1) = no_connection;
518 static void (*o_fconv)(nkf_char c2,nkf_char c1) = no_connection;
519 static void (*o_eol_conv)(nkf_char c2,nkf_char c1) = no_connection;
520 static void (*o_rot_conv)(nkf_char c2,nkf_char c1) = no_connection;
521 static void (*o_hira_conv)(nkf_char c2,nkf_char c1) = no_connection;
522 static void (*o_base64conv)(nkf_char c2,nkf_char c1) = no_connection;
523 static void (*o_iso2022jp_check_conv)(nkf_char c2,nkf_char c1) = no_connection;
525 /* static redirections */
527 static void (*o_putc)(nkf_char c) = std_putc;
529 static nkf_char (*i_getc)(FILE *f) = std_getc; /* general input */
530 static nkf_char (*i_ungetc)(nkf_char c,FILE *f) =std_ungetc;
532 static nkf_char (*i_bgetc)(FILE *) = std_getc; /* input of mgetc */
533 static nkf_char (*i_bungetc)(nkf_char c ,FILE *f) = std_ungetc;
535 static void (*o_mputc)(nkf_char c) = std_putc ; /* output of mputc */
537 static nkf_char (*i_mgetc)(FILE *) = std_getc; /* input of mgetc */
538 static nkf_char (*i_mungetc)(nkf_char c ,FILE *f) = std_ungetc;
540 /* for strict mime */
541 static nkf_char (*i_mgetc_buf)(FILE *) = std_getc; /* input of mgetc_buf */
542 static nkf_char (*i_mungetc_buf)(nkf_char c,FILE *f) = std_ungetc;
545 static int output_mode = ASCII; /* output kanji mode */
546 static int input_mode = ASCII; /* input kanji mode */
547 static int mime_decode_mode = FALSE; /* MIME mode B base64, Q hex */
549 /* X0201 / X0208 conversion tables */
551 /* X0201 kana conversion table */
553 static const unsigned char cv[]= {
554 0x21,0x21,0x21,0x23,0x21,0x56,0x21,0x57,
555 0x21,0x22,0x21,0x26,0x25,0x72,0x25,0x21,
556 0x25,0x23,0x25,0x25,0x25,0x27,0x25,0x29,
557 0x25,0x63,0x25,0x65,0x25,0x67,0x25,0x43,
558 0x21,0x3c,0x25,0x22,0x25,0x24,0x25,0x26,
559 0x25,0x28,0x25,0x2a,0x25,0x2b,0x25,0x2d,
560 0x25,0x2f,0x25,0x31,0x25,0x33,0x25,0x35,
561 0x25,0x37,0x25,0x39,0x25,0x3b,0x25,0x3d,
562 0x25,0x3f,0x25,0x41,0x25,0x44,0x25,0x46,
563 0x25,0x48,0x25,0x4a,0x25,0x4b,0x25,0x4c,
564 0x25,0x4d,0x25,0x4e,0x25,0x4f,0x25,0x52,
565 0x25,0x55,0x25,0x58,0x25,0x5b,0x25,0x5e,
566 0x25,0x5f,0x25,0x60,0x25,0x61,0x25,0x62,
567 0x25,0x64,0x25,0x66,0x25,0x68,0x25,0x69,
568 0x25,0x6a,0x25,0x6b,0x25,0x6c,0x25,0x6d,
569 0x25,0x6f,0x25,0x73,0x21,0x2b,0x21,0x2c,
573 /* X0201 kana conversion table for daguten */
575 static const unsigned char dv[]= {
576 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
577 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
578 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
579 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
580 0x00,0x00,0x00,0x00,0x00,0x00,0x25,0x74,
581 0x00,0x00,0x00,0x00,0x25,0x2c,0x25,0x2e,
582 0x25,0x30,0x25,0x32,0x25,0x34,0x25,0x36,
583 0x25,0x38,0x25,0x3a,0x25,0x3c,0x25,0x3e,
584 0x25,0x40,0x25,0x42,0x25,0x45,0x25,0x47,
585 0x25,0x49,0x00,0x00,0x00,0x00,0x00,0x00,
586 0x00,0x00,0x00,0x00,0x25,0x50,0x25,0x53,
587 0x25,0x56,0x25,0x59,0x25,0x5c,0x00,0x00,
588 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
589 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
590 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
591 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
594 /* X0201 kana conversion table for han-daguten */
596 static const unsigned char ev[]= {
597 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
598 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
599 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
600 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
601 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
602 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
603 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
604 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
605 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
606 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
607 0x00,0x00,0x00,0x00,0x25,0x51,0x25,0x54,
608 0x25,0x57,0x25,0x5a,0x25,0x5d,0x00,0x00,
609 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
610 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
611 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
612 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
616 /* X0208 kigou conversion table */
617 /* 0x8140 - 0x819e */
618 static const unsigned char fv[] = {
620 0x00,0x00,0x00,0x00,0x2c,0x2e,0x00,0x3a,
621 0x3b,0x3f,0x21,0x00,0x00,0x27,0x60,0x00,
622 0x5e,0x00,0x5f,0x00,0x00,0x00,0x00,0x00,
623 0x00,0x00,0x00,0x00,0x00,0x2d,0x00,0x2f,
624 0x5c,0x00,0x00,0x7c,0x00,0x00,0x60,0x27,
625 0x22,0x22,0x28,0x29,0x00,0x00,0x5b,0x5d,
626 0x7b,0x7d,0x3c,0x3e,0x00,0x00,0x00,0x00,
627 0x00,0x00,0x00,0x00,0x2b,0x2d,0x00,0x00,
628 0x00,0x3d,0x00,0x3c,0x3e,0x00,0x00,0x00,
629 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
630 0x24,0x00,0x00,0x25,0x23,0x26,0x2a,0x40,
631 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
636 static int option_mode = 0;
637 static int file_out_f = FALSE;
639 static int overwrite_f = FALSE;
640 static int preserve_time_f = FALSE;
641 static int backup_f = FALSE;
642 static char *backup_suffix = "";
645 static int eolmode_f = 0; /* CR, LF, CRLF */
646 static int input_eol = 0; /* 0: unestablished, EOF: MIXED */
647 static nkf_char prev_cr = 0; /* CR or 0 */
648 #ifdef EASYWIN /*Easy Win */
649 static int end_check;
653 nkf_xmalloc(size_t size)
657 if (size == 0) size = 1;
661 perror("can't malloc");
669 nkf_xrealloc(void *ptr, size_t size)
671 if (size == 0) size = 1;
673 ptr = realloc(ptr, size);
675 perror("can't realloc");
682 #define nkf_xfree(ptr) free(ptr)
685 nkf_str_caseeql(const char *src, const char *target)
688 for (i = 0; src[i] && target[i]; i++) {
689 if (nkf_toupper(src[i]) != nkf_toupper(target[i])) return FALSE;
691 if (src[i] || target[i]) return FALSE;
696 nkf_enc_from_index(int idx)
698 if (idx < 0 || NKF_ENCODING_TABLE_SIZE <= idx) {
701 return &nkf_encoding_table[idx];
705 nkf_enc_find_index(const char *name)
708 if (name[0] == 'X' && *(name+1) == '-') name += 2;
709 for (i = 0; encoding_name_to_id_table[i].id >= 0; i++) {
710 if (nkf_str_caseeql(encoding_name_to_id_table[i].name, name)) {
711 return encoding_name_to_id_table[i].id;
718 nkf_enc_find(const char *name)
721 idx = nkf_enc_find_index(name);
722 if (idx < 0) return 0;
723 return nkf_enc_from_index(idx);
726 #define nkf_enc_name(enc) (enc)->name
727 #define nkf_enc_to_index(enc) (enc)->id
728 #define nkf_enc_to_base_encoding(enc) (enc)->base_encoding
729 #define nkf_enc_to_iconv(enc) nkf_enc_to_base_encoding(enc)->iconv
730 #define nkf_enc_to_oconv(enc) nkf_enc_to_base_encoding(enc)->oconv
731 #define nkf_enc_asciicompat(enc) (\
732 nkf_enc_to_base_encoding(enc) == &NkfEncodingASCII ||\
733 nkf_enc_to_base_encoding(enc) == &NkfEncodingISO_2022_JP)
734 #define nkf_enc_unicode_p(enc) (\
735 nkf_enc_to_base_encoding(enc) == &NkfEncodingUTF_8 ||\
736 nkf_enc_to_base_encoding(enc) == &NkfEncodingUTF_16 ||\
737 nkf_enc_to_base_encoding(enc) == &NkfEncodingUTF_32)
738 #define nkf_enc_cp5022x_p(enc) (\
739 nkf_enc_to_index(enc) == CP50220 ||\
740 nkf_enc_to_index(enc) == CP50221 ||\
741 nkf_enc_to_index(enc) == CP50222)
743 #ifdef DEFAULT_CODE_LOCALE
747 #ifdef HAVE_LANGINFO_H
748 return nl_langinfo(CODESET);
749 #elif defined(__WIN32__)
751 sprintf(buf, "CP%d", GetACP());
753 #elif defined(__OS2__)
754 # if defined(INT_IS_SHORT)
760 ULONG ulCP[1], ulncp;
761 DosQueryCp(sizeof(ulCP), ulCP, &ulncp);
762 if (ulCP[0] == 932 || ulCP[0] == 943)
763 strcpy(buf, "Shift_JIS");
765 sprintf(buf, "CP%lu", ulCP[0]);
773 nkf_locale_encoding()
775 nkf_encoding *enc = 0;
776 const char *encname = nkf_locale_charmap();
778 enc = nkf_enc_find(encname);
781 #endif /* DEFAULT_CODE_LOCALE */
786 return &nkf_encoding_table[UTF_8];
790 nkf_default_encoding()
792 nkf_encoding *enc = 0;
793 #ifdef DEFAULT_CODE_LOCALE
794 enc = nkf_locale_encoding();
795 #elif defined(DEFAULT_ENCIDX)
796 enc = nkf_enc_from_index(DEFAULT_ENCIDX);
798 if (!enc) enc = nkf_utf8_encoding();
809 nkf_buf_new(int length)
811 nkf_buf_t *buf = nkf_xmalloc(sizeof(nkf_buf_t));
812 buf->ptr = nkf_xmalloc(length);
819 nkf_buf_dispose(nkf_buf_t *buf)
825 #define nkf_buf_length(buf) ((buf)->len)
826 #define nkf_buf_empty_p(buf) ((buf)->len == 0)
829 nkf_buf_at(nkf_buf_t *buf, int index)
831 assert(index <= buf->len);
832 return buf->ptr[index];
836 nkf_buf_clear(nkf_buf_t *buf)
842 nkf_buf_push(nkf_buf_t *buf, unsigned char c)
844 if (buf->capa <= buf->len) {
847 buf->ptr[buf->len++] = c;
851 nkf_buf_pop(nkf_buf_t *buf)
853 assert(!nkf_buf_empty_p(buf));
854 return buf->ptr[--buf->len];
857 /* Normalization Form C */
860 #define fprintf dllprintf
866 fprintf(HELP_OUTPUT,"Network Kanji Filter Version " NKF_VERSION " (" NKF_RELEASE_DATE ") \n" COPY_RIGHT "\n");
873 "USAGE: nkf(nkf32,wnkf,nkf2) -[flags] [in file] .. [out file for -O flag]\n"
875 "b,u Output is buffered (DEFAULT),Output is unbuffered\n"
876 "j,s,e,w Output code is ISO-2022-JP, Shift JIS, EUC-JP, UTF-8N\n"
877 #ifdef UTF8_OUTPUT_ENABLE
878 " After 'w' you can add more options. -w[ 8 [0], 16 [[BL] [0]] ]\n"
880 "J,S,E,W Input assumption is JIS 7 bit , Shift JIS, EUC-JP, UTF-8\n"
881 #ifdef UTF8_INPUT_ENABLE
882 " After 'W' you can add more options. -W[ 8, 16 [BL] ] \n"
887 "i[@B] Specify the Esc Seq for JIS X 0208-1978/83 (DEFAULT B)\n"
888 "o[BJH] Specify the Esc Seq for ASCII/Roman (DEFAULT B)\n"
889 "r {de/en}crypt ROT13/47\n"
890 "h 1 katakana->hiragana, 2 hiragana->katakana, 3 both\n"
891 "m[BQSN0] MIME decode [B:base64,Q:quoted,S:strict,N:non-strict,0:no decode]\n"
892 "M[BQ] MIME encode [B:base64 Q:quoted]\n"
893 "l ISO8859-1 (Latin-1) support\n"
894 "f/F Folding: -f60 or -f or -f60-10 (fold margin 10) F preserve nl\n"
897 "Z[0-4] Default/0: Convert JISX0208 Alphabet to ASCII\n"
898 " 1: Kankaku to one space 2: to two spaces 3: HTML Entity\n"
899 " 4: JISX0208 Katakana to JISX0201 Katakana\n"
900 "X,x Assume X0201 kana in MS-Kanji, -x preserves X0201\n"
901 "B[0-2] Broken input 0: missing ESC,1: any X on ESC-[($]-X,2: ASCII on NL\n"
905 "T Text mode output\n"
907 "O Output to File (DEFAULT 'nkf.out')\n"
908 "I Convert non ISO-2022-JP charactor to GETA\n"
909 "d,c Convert line breaks -d: LF -c: CRLF\n"
910 "-L[uwm] line mode u:LF w:CRLF m:CR (DEFAULT noconversion)\n"
911 "v, V Show this usage. V: show configuration\n"
914 "Long name options\n"
915 " --ic=<input codeset> --oc=<output codeset>\n"
916 " Specify the input or output codeset\n"
917 " --fj --unix --mac --windows\n"
918 " --jis --euc --sjis --utf8 --utf16 --mime --base64\n"
919 " Convert for the system or code\n"
920 " --hiragana --katakana --katakana-hiragana\n"
921 " To Hiragana/Katakana Conversion\n"
922 " --prefix= Insert escape before troublesome characters of Shift_JIS\n"
926 " --cap-input, --url-input Convert hex after ':' or '%%'\n"
928 #ifdef NUMCHAR_OPTION
929 " --numchar-input Convert Unicode Character Reference\n"
931 #ifdef UTF8_INPUT_ENABLE
932 " --fb-{skip, html, xml, perl, java, subchar}\n"
933 " Specify how nkf handles unassigned characters\n"
938 " --in-place[=SUFFIX] --overwrite[=SUFFIX]\n"
939 " Overwrite original listed files by filtered result\n"
940 " --overwrite preserves timestamp of original files\n"
942 " -g --guess Guess the input code\n"
943 " --help --version Show this help/the version\n"
944 " For more information, see also man nkf\n"
950 show_configuration(void)
953 "Summary of my nkf " NKF_VERSION " (" NKF_RELEASE_DATE ") configuration:\n"
954 " Compile-time options:\n"
955 " Compiled at: " __DATE__ " " __TIME__ "\n"
958 " Default output encoding: "
959 #ifdef DEFAULT_CODE_LOCALE
960 "LOCALE (%s)\n", nkf_enc_name(nkf_default_encoding())
961 #elif defined(DEFAULT_ENCIDX)
962 "CONFIG (%s)\n", nkf_enc_name(nkf_default_encoding())
968 " Default output end of line: "
969 #if DEFAULT_NEWLINE == CR
971 #elif DEFAULT_NEWLINE == CRLF
977 " Decode MIME encoded string: "
978 #if MIME_DECODE_DEFAULT
984 " Convert JIS X 0201 Katakana: "
991 " --help, --version output: "
992 #if HELP_OUTPUT_HELP_OUTPUT
1003 get_backup_filename(const char *suffix, const char *filename)
1005 char *backup_filename;
1006 int asterisk_count = 0;
1008 int filename_length = strlen(filename);
1010 for(i = 0; suffix[i]; i++){
1011 if(suffix[i] == '*') asterisk_count++;
1015 backup_filename = nkf_xmalloc(strlen(suffix) + (asterisk_count * (filename_length - 1)) + 1);
1016 for(i = 0, j = 0; suffix[i];){
1017 if(suffix[i] == '*'){
1018 backup_filename[j] = '\0';
1019 strncat(backup_filename, filename, filename_length);
1021 j += filename_length;
1023 backup_filename[j++] = suffix[i++];
1026 backup_filename[j] = '\0';
1028 j = filename_length + strlen(suffix);
1029 backup_filename = nkf_xmalloc(j + 1);
1030 strcpy(backup_filename, filename);
1031 strcat(backup_filename, suffix);
1032 backup_filename[j] = '\0';
1034 return backup_filename;
1038 #ifdef UTF8_INPUT_ENABLE
1040 nkf_each_char_to_hex(void (*f)(nkf_char c2,nkf_char c1), nkf_char c)
1047 (*f)(0, bin2hex(c>>shift));
1058 encode_fallback_html(nkf_char c)
1063 if(c >= NKF_INT32_C(1000000))
1064 (*oconv)(0, 0x30+(c/NKF_INT32_C(1000000))%10);
1065 if(c >= NKF_INT32_C(100000))
1066 (*oconv)(0, 0x30+(c/NKF_INT32_C(100000) )%10);
1068 (*oconv)(0, 0x30+(c/10000 )%10);
1070 (*oconv)(0, 0x30+(c/1000 )%10);
1072 (*oconv)(0, 0x30+(c/100 )%10);
1074 (*oconv)(0, 0x30+(c/10 )%10);
1076 (*oconv)(0, 0x30+ c %10);
1082 encode_fallback_xml(nkf_char c)
1087 nkf_each_char_to_hex(oconv, c);
1093 encode_fallback_java(nkf_char c)
1097 if(!nkf_char_unicode_bmp_p(c)){
1101 (*oconv)(0, bin2hex(c>>20));
1102 (*oconv)(0, bin2hex(c>>16));
1106 (*oconv)(0, bin2hex(c>>12));
1107 (*oconv)(0, bin2hex(c>> 8));
1108 (*oconv)(0, bin2hex(c>> 4));
1109 (*oconv)(0, bin2hex(c ));
1114 encode_fallback_perl(nkf_char c)
1119 nkf_each_char_to_hex(oconv, c);
1125 encode_fallback_subchar(nkf_char c)
1127 c = unicode_subchar;
1128 (*oconv)((c>>8)&0xFF, c&0xFF);
1133 static const struct {
1157 {"katakana-hiragana","h3"},
1165 #ifdef UTF8_OUTPUT_ENABLE
1175 {"fb-subchar=", ""},
1177 #ifdef UTF8_INPUT_ENABLE
1178 {"utf8-input", "W"},
1179 {"utf16-input", "W16"},
1180 {"no-cp932ext", ""},
1181 {"no-best-fit-chars",""},
1183 #ifdef UNICODE_NORMALIZATION
1184 {"utf8mac-input", ""},
1196 #ifdef NUMCHAR_OPTION
1197 {"numchar-input", ""},
1203 #ifdef SHIFTJIS_CP932
1214 set_input_encoding(nkf_encoding *enc)
1216 switch (nkf_enc_to_index(enc)) {
1223 #ifdef SHIFTJIS_CP932
1226 #ifdef UTF8_OUTPUT_ENABLE
1227 ms_ucs_map_f = UCS_MAP_CP932;
1237 case ISO_2022_JP_2004:
1244 #ifdef SHIFTJIS_CP932
1247 #ifdef UTF8_OUTPUT_ENABLE
1248 ms_ucs_map_f = UCS_MAP_CP932;
1253 #ifdef SHIFTJIS_CP932
1256 #ifdef UTF8_OUTPUT_ENABLE
1257 ms_ucs_map_f = UCS_MAP_CP10001;
1265 #ifdef SHIFTJIS_CP932
1268 #ifdef UTF8_OUTPUT_ENABLE
1269 ms_ucs_map_f = UCS_MAP_CP932;
1273 #ifdef SHIFTJIS_CP932
1276 #ifdef UTF8_OUTPUT_ENABLE
1277 ms_ucs_map_f = UCS_MAP_MS;
1281 #ifdef SHIFTJIS_CP932
1284 #ifdef UTF8_OUTPUT_ENABLE
1285 ms_ucs_map_f = UCS_MAP_ASCII;
1288 case SHIFT_JISX0213:
1289 case SHIFT_JIS_2004:
1291 #ifdef SHIFTJIS_CP932
1298 #ifdef SHIFTJIS_CP932
1302 #ifdef UTF8_INPUT_ENABLE
1303 #ifdef UNICODE_NORMALIZATION
1311 input_endian = ENDIAN_BIG;
1315 input_endian = ENDIAN_LITTLE;
1320 input_endian = ENDIAN_BIG;
1324 input_endian = ENDIAN_LITTLE;
1331 set_output_encoding(nkf_encoding *enc)
1333 switch (nkf_enc_to_index(enc)) {
1336 #ifdef SHIFTJIS_CP932
1337 if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1339 #ifdef UTF8_OUTPUT_ENABLE
1340 ms_ucs_map_f = UCS_MAP_CP932;
1344 #ifdef SHIFTJIS_CP932
1345 if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1347 #ifdef UTF8_OUTPUT_ENABLE
1348 ms_ucs_map_f = UCS_MAP_CP932;
1353 #ifdef SHIFTJIS_CP932
1354 if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1360 #ifdef SHIFTJIS_CP932
1361 if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1367 #ifdef UTF8_OUTPUT_ENABLE
1368 ms_ucs_map_f = UCS_MAP_CP932;
1372 #ifdef UTF8_OUTPUT_ENABLE
1373 ms_ucs_map_f = UCS_MAP_CP10001;
1378 #ifdef SHIFTJIS_CP932
1379 if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1381 #ifdef UTF8_OUTPUT_ENABLE
1382 ms_ucs_map_f = UCS_MAP_ASCII;
1387 #ifdef SHIFTJIS_CP932
1388 if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1390 #ifdef UTF8_OUTPUT_ENABLE
1391 ms_ucs_map_f = UCS_MAP_ASCII;
1395 #ifdef SHIFTJIS_CP932
1396 if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1398 #ifdef UTF8_OUTPUT_ENABLE
1399 ms_ucs_map_f = UCS_MAP_CP932;
1404 #ifdef UTF8_OUTPUT_ENABLE
1405 ms_ucs_map_f = UCS_MAP_MS;
1410 #ifdef UTF8_OUTPUT_ENABLE
1411 ms_ucs_map_f = UCS_MAP_ASCII;
1414 case SHIFT_JISX0213:
1415 case SHIFT_JIS_2004:
1417 #ifdef SHIFTJIS_CP932
1418 if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1425 #ifdef SHIFTJIS_CP932
1426 if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1429 #ifdef UTF8_OUTPUT_ENABLE
1431 output_bom_f = TRUE;
1435 output_bom_f = TRUE;
1438 output_endian = ENDIAN_LITTLE;
1439 output_bom_f = FALSE;
1442 output_endian = ENDIAN_LITTLE;
1443 output_bom_f = TRUE;
1446 output_bom_f = TRUE;
1449 output_endian = ENDIAN_LITTLE;
1450 output_bom_f = FALSE;
1453 output_endian = ENDIAN_LITTLE;
1454 output_bom_f = TRUE;
1460 static struct input_code*
1461 find_inputcode_byfunc(nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0))
1464 struct input_code *p = input_code_list;
1466 if (iconv_func == p->iconv_func){
1476 set_iconv(nkf_char f, nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0))
1478 #ifdef INPUT_CODE_FIX
1479 if (f || !input_encoding)
1486 #ifdef INPUT_CODE_FIX
1487 && (f == -TRUE || !input_encoding) /* -TRUE means "FORCE" */
1493 if (estab_f && iconv_for_check != iconv){
1494 struct input_code *p = find_inputcode_byfunc(iconv);
1496 set_input_codename(p->name);
1499 iconv_for_check = iconv;
1506 x0212_shift(nkf_char c)
1511 if (0x75 <= c && c <= 0x7f){
1512 ret = c + (0x109 - 0x75);
1515 if (0x75 <= c && c <= 0x7f){
1516 ret = c + (0x113 - 0x75);
1524 x0212_unshift(nkf_char c)
1527 if (0x7f <= c && c <= 0x88){
1528 ret = c + (0x75 - 0x7f);
1529 }else if (0x89 <= c && c <= 0x92){
1530 ret = PREFIX_EUCG3 | 0x80 | (c + (0x75 - 0x89));
1534 #endif /* X0212_ENABLE */
1537 e2s_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1)
1543 if((0x21 <= ndx && ndx <= 0x2F)){
1544 if (p2) *p2 = ((ndx - 1) >> 1) + 0xec - ndx / 8 * 3;
1545 if (p1) *p1 = c1 + ((ndx & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
1547 }else if(0x6E <= ndx && ndx <= 0x7E){
1548 if (p2) *p2 = ((ndx - 1) >> 1) + 0xbe;
1549 if (p1) *p1 = c1 + ((ndx & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
1555 else if(nkf_isgraph(ndx)){
1557 const unsigned short *ptr;
1558 ptr = x0212_shiftjis[ndx - 0x21];
1560 val = ptr[(c1 & 0x7f) - 0x21];
1569 c2 = x0212_shift(c2);
1571 #endif /* X0212_ENABLE */
1573 if(0x7F < c2) return 1;
1574 if (p2) *p2 = ((c2 - 1) >> 1) + ((c2 <= 0x5e) ? 0x71 : 0xb1);
1575 if (p1) *p1 = c1 + ((c2 & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
1580 s2e_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1)
1582 #if defined(SHIFTJIS_CP932) || defined(X0212_ENABLE)
1585 static const char shift_jisx0213_s1a3_table[5][2] ={ { 1, 8}, { 3, 4}, { 5,12}, {13,14}, {15, 0} };
1586 if (0xFC < c1) return 1;
1587 #ifdef SHIFTJIS_CP932
1588 if (!cp932inv_f && is_ibmext_in_sjis(c2)){
1589 val = shiftjis_cp932[c2 - CP932_TABLE_BEGIN][c1 - 0x40];
1596 && CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
1597 val = cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40];
1603 #endif /* SHIFTJIS_CP932 */
1605 if (!x0213_f && is_ibmext_in_sjis(c2)){
1606 val = shiftjis_x0212[c2 - 0xfa][c1 - 0x40];
1609 c2 = PREFIX_EUCG3 | ((val >> 8) & 0x7f);
1622 if(x0213_f && c2 >= 0xF0){
1623 if(c2 <= 0xF3 || (c2 == 0xF4 && c1 < 0x9F)){ /* k=1, 3<=k<=5, k=8, 12<=k<=15 */
1624 c2 = PREFIX_EUCG3 | 0x20 | shift_jisx0213_s1a3_table[c2 - 0xF0][0x9E < c1];
1625 }else{ /* 78<=k<=94 */
1626 c2 = PREFIX_EUCG3 | (c2 * 2 - 0x17B);
1627 if (0x9E < c1) c2++;
1630 #define SJ0162 0x00e1 /* 01 - 62 ku offset */
1631 #define SJ6394 0x0161 /* 63 - 94 ku offset */
1632 c2 = c2 + c2 - ((c2 <= 0x9F) ? SJ0162 : SJ6394);
1633 if (0x9E < c1) c2++;
1636 c1 = c1 - ((c1 > DEL) ? SP : 0x1F);
1643 c2 = x0212_unshift(c2);
1650 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
1652 nkf_unicode_to_utf8(nkf_char val, nkf_char *p1, nkf_char *p2, nkf_char *p3, nkf_char *p4)
1660 }else if (val < 0x800){
1661 *p1 = 0xc0 | (val >> 6);
1662 *p2 = 0x80 | (val & 0x3f);
1665 } else if (nkf_char_unicode_bmp_p(val)) {
1666 *p1 = 0xe0 | (val >> 12);
1667 *p2 = 0x80 | ((val >> 6) & 0x3f);
1668 *p3 = 0x80 | ( val & 0x3f);
1670 } else if (nkf_char_unicode_value_p(val)) {
1671 *p1 = 0xe0 | (val >> 16);
1672 *p2 = 0x80 | ((val >> 12) & 0x3f);
1673 *p3 = 0x80 | ((val >> 6) & 0x3f);
1674 *p4 = 0x80 | ( val & 0x3f);
1684 nkf_utf8_to_unicode(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4)
1691 else if (c1 <= 0xC3) {
1692 /* trail byte or invalid */
1695 else if (c1 <= 0xDF) {
1697 wc = (c1 & 0x1F) << 6;
1700 else if (c1 <= 0xEF) {
1702 wc = (c1 & 0x0F) << 12;
1703 wc |= (c2 & 0x3F) << 6;
1706 else if (c2 <= 0xF4) {
1708 wc = (c1 & 0x0F) << 18;
1709 wc |= (c2 & 0x3F) << 12;
1710 wc |= (c3 & 0x3F) << 6;
1720 #ifdef UTF8_INPUT_ENABLE
1722 unicode_to_jis_common2(nkf_char c1, nkf_char c0,
1723 const unsigned short *const *pp, nkf_char psize,
1724 nkf_char *p2, nkf_char *p1)
1727 const unsigned short *p;
1730 if (pp == 0) return 1;
1733 if (c1 < 0 || psize <= c1) return 1;
1735 if (p == 0) return 1;
1738 if (c0 < 0 || sizeof_utf8_to_euc_C2 <= c0) return 1;
1740 if (val == 0) return 1;
1741 if (no_cp932ext_f && (
1742 (val>>8) == 0x2D || /* NEC special characters */
1743 val > NKF_INT32_C(0xF300) /* IBM extended characters */
1751 if (c2 == SO) c2 = JIS_X_0201_1976_K;
1759 unicode_to_jis_common(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1)
1761 const unsigned short *const *pp;
1762 const unsigned short *const *const *ppp;
1763 static const char no_best_fit_chars_table_C2[] =
1764 {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1765 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1766 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 2, 1, 1, 2,
1767 0, 0, 1, 1, 0, 1, 0, 1, 2, 1, 1, 1, 1, 1, 1, 1};
1768 static const char no_best_fit_chars_table_C2_ms[] =
1769 {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1770 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1771 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0,
1772 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0};
1773 static const char no_best_fit_chars_table_932_C2[] =
1774 {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1775 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1776 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1,
1777 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0};
1778 static const char no_best_fit_chars_table_932_C3[] =
1779 {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1780 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
1781 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1782 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1};
1788 }else if(c2 < 0xe0){
1789 if(no_best_fit_chars_f){
1790 if(ms_ucs_map_f == UCS_MAP_CP932){
1793 if(no_best_fit_chars_table_932_C2[c1&0x3F]) return 1;
1796 if(no_best_fit_chars_table_932_C3[c1&0x3F]) return 1;
1799 }else if(!cp932inv_f){
1802 if(no_best_fit_chars_table_C2[c1&0x3F]) return 1;
1805 if(no_best_fit_chars_table_932_C3[c1&0x3F]) return 1;
1808 }else if(ms_ucs_map_f == UCS_MAP_MS){
1809 if(c2 == 0xC2 && no_best_fit_chars_table_C2_ms[c1&0x3F]) return 1;
1810 }else if(ms_ucs_map_f == UCS_MAP_CP10001){
1828 ms_ucs_map_f == UCS_MAP_CP932 ? utf8_to_euc_2bytes_932 :
1829 ms_ucs_map_f == UCS_MAP_MS ? utf8_to_euc_2bytes_ms :
1830 ms_ucs_map_f == UCS_MAP_CP10001 ? utf8_to_euc_2bytes_mac :
1832 ret = unicode_to_jis_common2(c2, c1, pp, sizeof_utf8_to_euc_2bytes, p2, p1);
1833 }else if(c0 < 0xF0){
1834 if(no_best_fit_chars_f){
1835 if(ms_ucs_map_f == UCS_MAP_CP932){
1836 if(c2 == 0xE3 && c1 == 0x82 && c0 == 0x94) return 1;
1837 }else if(ms_ucs_map_f == UCS_MAP_MS){
1842 if(c0 == 0x94 || c0 == 0x96 || c0 == 0xBE) return 1;
1845 if(c0 == 0x92) return 1;
1850 if(c1 == 0x80 || c0 == 0x9C) return 1;
1853 }else if(ms_ucs_map_f == UCS_MAP_CP10001){
1858 if(c0 == 0x94) return 1;
1861 if(c0 == 0xBB) return 1;
1871 if(c0 == 0x95) return 1;
1874 if(c0 == 0xA5) return 1;
1881 if(c0 == 0x8D) return 1;
1884 if(c0 == 0x9E && !cp932inv_f) return 1;
1887 if(0xA0 <= c0 && c0 <= 0xA5) return 1;
1895 ms_ucs_map_f == UCS_MAP_CP932 ? utf8_to_euc_3bytes_932 :
1896 ms_ucs_map_f == UCS_MAP_MS ? utf8_to_euc_3bytes_ms :
1897 ms_ucs_map_f == UCS_MAP_CP10001 ? utf8_to_euc_3bytes_mac :
1899 ret = unicode_to_jis_common2(c1, c0, ppp[c2 - 0xE0], sizeof_utf8_to_euc_C2, p2, p1);
1901 #ifdef SHIFTJIS_CP932
1902 if (!ret && !cp932inv_f && is_eucg3(*p2)) {
1904 if (e2s_conv(*p2, *p1, &s2, &s1) == 0) {
1905 s2e_conv(s2, s1, p2, p1);
1914 #ifdef UTF8_OUTPUT_ENABLE
1916 e2w_conv(nkf_char c2, nkf_char c1)
1918 const unsigned short *p;
1920 if (c2 == JIS_X_0201_1976_K) {
1921 if (ms_ucs_map_f == UCS_MAP_CP10001) {
1929 p = euc_to_utf8_1byte;
1931 } else if (is_eucg3(c2)){
1932 if(ms_ucs_map_f == UCS_MAP_ASCII&& c2 == NKF_INT32_C(0x8F22) && c1 == 0x43){
1935 c2 = (c2&0x7f) - 0x21;
1936 if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
1937 p = x0212_to_utf8_2bytes[c2];
1943 c2 = (c2&0x7f) - 0x21;
1944 if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
1946 ms_ucs_map_f == UCS_MAP_ASCII ? euc_to_utf8_2bytes[c2] :
1947 ms_ucs_map_f == UCS_MAP_CP10001 ? euc_to_utf8_2bytes_mac[c2] :
1948 euc_to_utf8_2bytes_ms[c2];
1953 c1 = (c1 & 0x7f) - 0x21;
1954 if (0<=c1 && c1<sizeof_euc_to_utf8_1byte)
1961 w2e_conv(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1)
1968 }else if (0xc0 <= c2 && c2 <= 0xef) {
1969 ret = unicode_to_jis_common(c2, c1, c0, p2, p1);
1970 #ifdef NUMCHAR_OPTION
1973 if (p1) *p1 = nkf_char_unicode_new(nkf_utf8_to_unicode(c2, c1, c0, 0));
1981 #ifdef UTF8_INPUT_ENABLE
1983 w16e_conv(nkf_char val, nkf_char *p2, nkf_char *p1)
1985 nkf_char c1, c2, c3, c4;
1992 else if (nkf_char_unicode_bmp_p(val)){
1993 nkf_unicode_to_utf8(val, &c1, &c2, &c3, &c4);
1994 ret = unicode_to_jis_common(c1, c2, c3, p2, p1);
1997 *p1 = nkf_char_unicode_new(val);
2003 *p1 = nkf_char_unicode_new(val);
2010 e_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
2012 if (c2 == JIS_X_0201_1976_K || c2 == SS2){
2013 if (iso2022jp_f && !x0201_f) {
2014 c2 = GETA1; c1 = GETA2;
2016 c2 = JIS_X_0201_1976_K;
2020 }else if (c2 == 0x8f){
2024 if (!cp51932_f && !x0213_f && 0xF5 <= c1 && c1 <= 0xFE && 0xA1 <= c0 && c0 <= 0xFE) {
2025 /* encoding is eucJP-ms, so invert to Unicode Private User Area */
2026 c1 = nkf_char_unicode_new((c1 - 0xF5) * 94 + c0 - 0xA1 + 0xE3AC);
2029 c2 = (c2 << 8) | (c1 & 0x7f);
2031 #ifdef SHIFTJIS_CP932
2034 if (e2s_conv(c2, c1, &s2, &s1) == 0){
2035 s2e_conv(s2, s1, &c2, &c1);
2042 #endif /* SHIFTJIS_CP932 */
2044 #endif /* X0212_ENABLE */
2045 } else if ((c2 == EOF) || (c2 == 0) || c2 < SP || c2 == ISO_8859_1) {
2048 if (!cp51932_f && ms_ucs_map_f && 0xF5 <= c2 && c2 <= 0xFE && 0xA1 <= c1 && c1 <= 0xFE) {
2049 /* encoding is eucJP-ms, so invert to Unicode Private User Area */
2050 c1 = nkf_char_unicode_new((c2 - 0xF5) * 94 + c1 - 0xA1 + 0xE000);
2055 #ifdef SHIFTJIS_CP932
2056 if (cp51932_f && 0x79 <= c2 && c2 <= 0x7c){
2058 if (e2s_conv(c2, c1, &s2, &s1) == 0){
2059 s2e_conv(s2, s1, &c2, &c1);
2066 #endif /* SHIFTJIS_CP932 */
2074 s_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
2076 if (c2 == JIS_X_0201_1976_K || (0xA1 <= c2 && c2 <= 0xDF)) {
2077 if (iso2022jp_f && !x0201_f) {
2078 c2 = GETA1; c1 = GETA2;
2082 } else if ((c2 == EOF) || (c2 == 0) || c2 < SP) {
2084 } else if (!x0213_f && 0xF0 <= c2 && c2 <= 0xF9 && 0x40 <= c1 && c1 <= 0xFC) {
2086 if(c1 == 0x7F) return 0;
2087 c1 = nkf_char_unicode_new((c2 - 0xF0) * 188 + (c1 - 0x40 - (0x7E < c1)) + 0xE000);
2090 nkf_char ret = s2e_conv(c2, c1, &c2, &c1);
2091 if (ret) return ret;
2098 w_iconv(nkf_char c1, nkf_char c2, nkf_char c3)
2100 nkf_char ret = 0, c4 = 0;
2101 static const char w_iconv_utf8_1st_byte[] =
2103 20, 20, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
2104 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
2105 30, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 33, 33,
2106 40, 41, 41, 41, 42, 43, 43, 43, 50, 50, 50, 50, 60, 60, 70, 70};
2113 if (c1 < 0 || 0xff < c1) {
2114 }else if (c1 == 0) { /* 0 : 1 byte*/
2116 } else if ((c1 & 0xC0) == 0x80) { /* 0x80-0xbf : trail byte */
2119 switch (w_iconv_utf8_1st_byte[c1 - 0xC0]) {
2121 if (c2 < 0x80 || 0xBF < c2) return 0;
2124 if (c3 == 0) return -1;
2125 if (c2 < 0xA0 || 0xBF < c2 || (c3 & 0xC0) != 0x80)
2130 if (c3 == 0) return -1;
2131 if ((c2 & 0xC0) != 0x80 || (c3 & 0xC0) != 0x80)
2135 if (c3 == 0) return -1;
2136 if (c2 < 0x80 || 0x9F < c2 || (c3 & 0xC0) != 0x80)
2140 if (c3 == 0) return -2;
2141 if (c2 < 0x90 || 0xBF < c2 || (c3 & 0xC0) != 0x80 || (c4 & 0xC0) != 0x80)
2145 if (c3 == 0) return -2;
2146 if (c2 < 0x80 || 0xBF < c2 || (c3 & 0xC0) != 0x80 || (c4 & 0xC0) != 0x80)
2150 if (c3 == 0) return -2;
2151 if (c2 < 0x80 || 0x8F < c2 || (c3 & 0xC0) != 0x80 || (c4 & 0xC0) != 0x80)
2159 if (c1 == 0 || c1 == EOF){
2160 } else if ((c1 & 0xf8) == 0xf0) { /* 4 bytes */
2161 c2 = nkf_char_unicode_new(nkf_utf8_to_unicode(c1, c2, c3, c4));
2164 ret = w2e_conv(c1, c2, c3, &c1, &c2);
2172 #define NKF_ICONV_INVALID_CODE_RANGE -13
2174 unicode_iconv(nkf_char wc)
2182 }else if ((wc>>11) == 27) {
2183 /* unpaired surrogate */
2184 return NKF_ICONV_INVALID_CODE_RANGE;
2185 }else if (wc < 0xFFFF) {
2186 ret = w16e_conv(wc, &c2, &c1);
2187 if (ret) return ret;
2188 }else if (wc < 0x10FFFF) {
2190 c1 = nkf_char_unicode_new(wc);
2192 return NKF_ICONV_INVALID_CODE_RANGE;
2198 #define NKF_ICONV_NEED_ONE_MORE_BYTE -1
2199 #define NKF_ICONV_NEED_TWO_MORE_BYTES -2
2200 #define UTF16_TO_UTF32(lead, trail) (((lead) << 10) + (trail) - NKF_INT32_C(0x35FDC00))
2202 nkf_iconv_utf_16(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4)
2211 if (input_endian == ENDIAN_BIG) {
2212 if (0xD8 <= c1 && c1 <= 0xDB) {
2213 if (0xDC <= c3 && c3 <= 0xDF) {
2214 wc = UTF16_TO_UTF32(c1 << 8 | c2, c3 << 8 | c4);
2215 } else return NKF_ICONV_NEED_TWO_MORE_BYTES;
2220 if (0xD8 <= c2 && c2 <= 0xDB) {
2221 if (0xDC <= c4 && c4 <= 0xDF) {
2222 wc = UTF16_TO_UTF32(c2 << 8 | c1, c4 << 8 | c3);
2223 } else return NKF_ICONV_NEED_TWO_MORE_BYTES;
2229 return (*unicode_iconv)(wc);
2233 w_iconv16(nkf_char c2, nkf_char c1, nkf_char c0)
2239 w_iconv32(nkf_char c2, nkf_char c1, nkf_char c0)
2245 nkf_iconv_utf_32(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4)
2254 switch(input_endian){
2256 wc = c2 << 16 | c3 << 8 | c4;
2259 wc = c3 << 16 | c2 << 8 | c1;
2262 wc = c1 << 16 | c4 << 8 | c3;
2265 wc = c4 << 16 | c1 << 8 | c2;
2268 return NKF_ICONV_INVALID_CODE_RANGE;
2271 return (*unicode_iconv)(wc);
2275 #define output_ascii_escape_sequence(mode) do { \
2276 if (output_mode != ASCII && output_mode != ISO_8859_1) { \
2279 (*o_putc)(ascii_intro); \
2280 output_mode = mode; \
2285 output_escape_sequence(int mode)
2287 if (output_mode == mode)
2295 case JIS_X_0201_1976_K:
2303 (*o_putc)(kanji_intro);
2328 j_oconv(nkf_char c2, nkf_char c1)
2330 #ifdef NUMCHAR_OPTION
2331 if (c2 == 0 && nkf_char_unicode_p(c1)){
2332 w16e_conv(c1, &c2, &c1);
2333 if (c2 == 0 && nkf_char_unicode_p(c1)){
2334 c2 = c1 & VALUE_MASK;
2335 if (ms_ucs_map_f && 0xE000 <= c2 && c2 <= 0xE757) {
2338 c2 = 0x7F + c1 / 94;
2339 c1 = 0x21 + c1 % 94;
2341 if (encode_fallback) (*encode_fallback)(c1);
2348 output_ascii_escape_sequence(ASCII);
2351 else if (c2 == EOF) {
2352 output_ascii_escape_sequence(ASCII);
2355 else if (c2 == ISO_8859_1) {
2356 output_ascii_escape_sequence(ISO_8859_1);
2359 else if (c2 == JIS_X_0201_1976_K) {
2360 output_escape_sequence(JIS_X_0201_1976_K);
2363 } else if (is_eucg3(c2)){
2364 output_escape_sequence(x0213_f ? JIS_X_0213_2 : JIS_X_0212);
2365 (*o_putc)(c2 & 0x7f);
2370 ? c2<0x20 || 0x92<c2 || c1<0x20 || 0x7e<c1
2371 : c2<0x20 || 0x7e<c2 || c1<0x20 || 0x7e<c1) return;
2372 output_escape_sequence(x0213_f ? JIS_X_0213_1 : JIS_X_0208);
2379 e_oconv(nkf_char c2, nkf_char c1)
2381 if (c2 == 0 && nkf_char_unicode_p(c1)){
2382 w16e_conv(c1, &c2, &c1);
2383 if (c2 == 0 && nkf_char_unicode_p(c1)){
2384 c2 = c1 & VALUE_MASK;
2385 if (x0212_f && 0xE000 <= c2 && c2 <= 0xE757) {
2389 c2 += c2 < 10 ? 0x75 : 0x8FEB;
2390 c1 = 0x21 + c1 % 94;
2393 (*o_putc)((c2 & 0x7f) | 0x080);
2394 (*o_putc)(c1 | 0x080);
2396 (*o_putc)((c2 & 0x7f) | 0x080);
2397 (*o_putc)(c1 | 0x080);
2401 if (encode_fallback) (*encode_fallback)(c1);
2409 } else if (c2 == 0) {
2410 output_mode = ASCII;
2412 } else if (c2 == JIS_X_0201_1976_K) {
2413 output_mode = EUC_JP;
2414 (*o_putc)(SS2); (*o_putc)(c1|0x80);
2415 } else if (c2 == ISO_8859_1) {
2416 output_mode = ISO_8859_1;
2417 (*o_putc)(c1 | 0x080);
2419 } else if (is_eucg3(c2)){
2420 output_mode = EUC_JP;
2421 #ifdef SHIFTJIS_CP932
2424 if (e2s_conv(c2, c1, &s2, &s1) == 0){
2425 s2e_conv(s2, s1, &c2, &c1);
2430 output_mode = ASCII;
2432 }else if (is_eucg3(c2)){
2435 (*o_putc)((c2 & 0x7f) | 0x080);
2436 (*o_putc)(c1 | 0x080);
2439 (*o_putc)((c2 & 0x7f) | 0x080);
2440 (*o_putc)(c1 | 0x080);
2444 if (!nkf_isgraph(c1) || !nkf_isgraph(c2)) {
2445 set_iconv(FALSE, 0);
2446 return; /* too late to rescue this char */
2448 output_mode = EUC_JP;
2449 (*o_putc)(c2 | 0x080);
2450 (*o_putc)(c1 | 0x080);
2455 s_oconv(nkf_char c2, nkf_char c1)
2457 #ifdef NUMCHAR_OPTION
2458 if (c2 == 0 && nkf_char_unicode_p(c1)){
2459 w16e_conv(c1, &c2, &c1);
2460 if (c2 == 0 && nkf_char_unicode_p(c1)){
2461 c2 = c1 & VALUE_MASK;
2462 if (!x0213_f && 0xE000 <= c2 && c2 <= 0xE757) {
2465 c2 = c1 / 188 + (cp932inv_f ? 0xF0 : 0xEB);
2467 c1 += 0x40 + (c1 > 0x3e);
2472 if(encode_fallback)(*encode_fallback)(c1);
2481 } else if (c2 == 0) {
2482 output_mode = ASCII;
2484 } else if (c2 == JIS_X_0201_1976_K) {
2485 output_mode = SHIFT_JIS;
2487 } else if (c2 == ISO_8859_1) {
2488 output_mode = ISO_8859_1;
2489 (*o_putc)(c1 | 0x080);
2491 } else if (is_eucg3(c2)){
2492 output_mode = SHIFT_JIS;
2493 if (e2s_conv(c2, c1, &c2, &c1) == 0){
2499 if (!nkf_isprint(c1) || !nkf_isprint(c2)) {
2500 set_iconv(FALSE, 0);
2501 return; /* too late to rescue this char */
2503 output_mode = SHIFT_JIS;
2504 e2s_conv(c2, c1, &c2, &c1);
2506 #ifdef SHIFTJIS_CP932
2508 && CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
2509 nkf_char c = cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40];
2515 #endif /* SHIFTJIS_CP932 */
2518 if (prefix_table[(unsigned char)c1]){
2519 (*o_putc)(prefix_table[(unsigned char)c1]);
2525 #ifdef UTF8_OUTPUT_ENABLE
2527 w_oconv(nkf_char c2, nkf_char c1)
2533 output_bom_f = FALSE;
2544 if (c2 == 0 && nkf_char_unicode_p(c1)){
2545 val = c1 & VALUE_MASK;
2546 nkf_unicode_to_utf8(val, &c1, &c2, &c3, &c4);
2548 if (c2) (*o_putc)(c2);
2549 if (c3) (*o_putc)(c3);
2550 if (c4) (*o_putc)(c4);
2557 val = e2w_conv(c2, c1);
2559 nkf_unicode_to_utf8(val, &c1, &c2, &c3, &c4);
2561 if (c2) (*o_putc)(c2);
2562 if (c3) (*o_putc)(c3);
2563 if (c4) (*o_putc)(c4);
2569 w_oconv16(nkf_char c2, nkf_char c1)
2572 output_bom_f = FALSE;
2573 if (output_endian == ENDIAN_LITTLE){
2587 if (c2 == 0 && nkf_char_unicode_p(c1)) {
2588 if (nkf_char_unicode_bmp_p(c1)) {
2589 c2 = (c1 >> 8) & 0xff;
2593 if (c1 <= UNICODE_MAX) {
2594 c2 = (c1 >> 10) + NKF_INT32_C(0xD7C0); /* high surrogate */
2595 c1 = (c1 & 0x3FF) + NKF_INT32_C(0xDC00); /* low surrogate */
2596 if (output_endian == ENDIAN_LITTLE){
2597 (*o_putc)(c2 & 0xff);
2598 (*o_putc)((c2 >> 8) & 0xff);
2599 (*o_putc)(c1 & 0xff);
2600 (*o_putc)((c1 >> 8) & 0xff);
2602 (*o_putc)((c2 >> 8) & 0xff);
2603 (*o_putc)(c2 & 0xff);
2604 (*o_putc)((c1 >> 8) & 0xff);
2605 (*o_putc)(c1 & 0xff);
2611 nkf_char val = e2w_conv(c2, c1);
2612 c2 = (val >> 8) & 0xff;
2617 if (output_endian == ENDIAN_LITTLE){
2627 w_oconv32(nkf_char c2, nkf_char c1)
2630 output_bom_f = FALSE;
2631 if (output_endian == ENDIAN_LITTLE){
2649 if (c2 == ISO_8859_1) {
2651 } else if (c2 == 0 && nkf_char_unicode_p(c1)) {
2654 c1 = e2w_conv(c2, c1);
2657 if (output_endian == ENDIAN_LITTLE){
2658 (*o_putc)( c1 & 0xFF);
2659 (*o_putc)((c1 >> 8) & 0xFF);
2660 (*o_putc)((c1 >> 16) & 0xFF);
2664 (*o_putc)((c1 >> 16) & 0xFF);
2665 (*o_putc)((c1 >> 8) & 0xFF);
2666 (*o_putc)( c1 & 0xFF);
2671 #define SCORE_L2 (1) /* Kanji Level 2 */
2672 #define SCORE_KANA (SCORE_L2 << 1) /* Halfwidth Katakana */
2673 #define SCORE_DEPEND (SCORE_KANA << 1) /* MD Characters */
2674 #define SCORE_CP932 (SCORE_DEPEND << 1) /* IBM extended characters */
2675 #define SCORE_X0212 (SCORE_CP932 << 1) /* JIS X 0212 */
2676 #define SCORE_NO_EXIST (SCORE_X0212 << 1) /* Undefined Characters */
2677 #define SCORE_iMIME (SCORE_NO_EXIST << 1) /* MIME selected */
2678 #define SCORE_ERROR (SCORE_iMIME << 1) /* Error */
2680 #define SCORE_INIT (SCORE_iMIME)
2682 static const nkf_char score_table_A0[] = {
2685 0, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND,
2686 SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_NO_EXIST,
2689 static const nkf_char score_table_F0[] = {
2690 SCORE_L2, SCORE_L2, SCORE_L2, SCORE_L2,
2691 SCORE_L2, SCORE_DEPEND, SCORE_NO_EXIST, SCORE_NO_EXIST,
2692 SCORE_DEPEND, SCORE_DEPEND, SCORE_CP932, SCORE_CP932,
2693 SCORE_CP932, SCORE_NO_EXIST, SCORE_NO_EXIST, SCORE_ERROR,
2697 set_code_score(struct input_code *ptr, nkf_char score)
2700 ptr->score |= score;
2705 clr_code_score(struct input_code *ptr, nkf_char score)
2708 ptr->score &= ~score;
2713 code_score(struct input_code *ptr)
2715 nkf_char c2 = ptr->buf[0];
2716 #ifdef UTF8_OUTPUT_ENABLE
2717 nkf_char c1 = ptr->buf[1];
2720 set_code_score(ptr, SCORE_ERROR);
2721 }else if (c2 == SS2){
2722 set_code_score(ptr, SCORE_KANA);
2723 }else if (c2 == 0x8f){
2724 set_code_score(ptr, SCORE_X0212);
2725 #ifdef UTF8_OUTPUT_ENABLE
2726 }else if (!e2w_conv(c2, c1)){
2727 set_code_score(ptr, SCORE_NO_EXIST);
2729 }else if ((c2 & 0x70) == 0x20){
2730 set_code_score(ptr, score_table_A0[c2 & 0x0f]);
2731 }else if ((c2 & 0x70) == 0x70){
2732 set_code_score(ptr, score_table_F0[c2 & 0x0f]);
2733 }else if ((c2 & 0x70) >= 0x50){
2734 set_code_score(ptr, SCORE_L2);
2739 status_disable(struct input_code *ptr)
2744 if (iconv == ptr->iconv_func) set_iconv(FALSE, 0);
2748 status_push_ch(struct input_code *ptr, nkf_char c)
2750 ptr->buf[ptr->index++] = c;
2754 status_clear(struct input_code *ptr)
2761 status_reset(struct input_code *ptr)
2764 ptr->score = SCORE_INIT;
2768 status_reinit(struct input_code *ptr)
2771 ptr->_file_stat = 0;
2775 status_check(struct input_code *ptr, nkf_char c)
2777 if (c <= DEL && estab_f){
2783 s_status(struct input_code *ptr, nkf_char c)
2787 status_check(ptr, c);
2792 }else if (nkf_char_unicode_p(c)){
2794 }else if (0xa1 <= c && c <= 0xdf){
2795 status_push_ch(ptr, SS2);
2796 status_push_ch(ptr, c);
2799 }else if ((0x81 <= c && c < 0xa0) || (0xe0 <= c && c <= 0xea)){
2801 status_push_ch(ptr, c);
2802 }else if (0xed <= c && c <= 0xee){
2804 status_push_ch(ptr, c);
2805 #ifdef SHIFTJIS_CP932
2806 }else if (is_ibmext_in_sjis(c)){
2808 status_push_ch(ptr, c);
2809 #endif /* SHIFTJIS_CP932 */
2811 }else if (0xf0 <= c && c <= 0xfc){
2813 status_push_ch(ptr, c);
2814 #endif /* X0212_ENABLE */
2816 status_disable(ptr);
2820 if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
2821 status_push_ch(ptr, c);
2822 s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]);
2826 status_disable(ptr);
2830 #ifdef SHIFTJIS_CP932
2831 if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)) {
2832 status_push_ch(ptr, c);
2833 if (s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]) == 0) {
2834 set_code_score(ptr, SCORE_CP932);
2839 #endif /* SHIFTJIS_CP932 */
2840 status_disable(ptr);
2843 if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
2844 status_push_ch(ptr, c);
2845 s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]);
2846 set_code_score(ptr, SCORE_CP932);
2849 status_disable(ptr);
2856 e_status(struct input_code *ptr, nkf_char c)
2860 status_check(ptr, c);
2865 }else if (nkf_char_unicode_p(c)){
2867 }else if (SS2 == c || (0xa1 <= c && c <= 0xfe)){
2869 status_push_ch(ptr, c);
2871 }else if (0x8f == c){
2873 status_push_ch(ptr, c);
2874 #endif /* X0212_ENABLE */
2876 status_disable(ptr);
2880 if (0xa1 <= c && c <= 0xfe){
2881 status_push_ch(ptr, c);
2885 status_disable(ptr);
2890 if (0xa1 <= c && c <= 0xfe){
2892 status_push_ch(ptr, c);
2894 status_disable(ptr);
2896 #endif /* X0212_ENABLE */
2900 #ifdef UTF8_INPUT_ENABLE
2902 w_status(struct input_code *ptr, nkf_char c)
2906 status_check(ptr, c);
2911 }else if (nkf_char_unicode_p(c)){
2913 }else if (0xc0 <= c && c <= 0xdf){
2915 status_push_ch(ptr, c);
2916 }else if (0xe0 <= c && c <= 0xef){
2918 status_push_ch(ptr, c);
2919 }else if (0xf0 <= c && c <= 0xf4){
2921 status_push_ch(ptr, c);
2923 status_disable(ptr);
2928 if (0x80 <= c && c <= 0xbf){
2929 status_push_ch(ptr, c);
2930 if (ptr->index > ptr->stat){
2931 int bom = (ptr->buf[0] == 0xef && ptr->buf[1] == 0xbb
2932 && ptr->buf[2] == 0xbf);
2933 w2e_conv(ptr->buf[0], ptr->buf[1], ptr->buf[2],
2934 &ptr->buf[0], &ptr->buf[1]);
2941 status_disable(ptr);
2945 if (0x80 <= c && c <= 0xbf){
2946 if (ptr->index < ptr->stat){
2947 status_push_ch(ptr, c);
2952 status_disable(ptr);
2960 code_status(nkf_char c)
2962 int action_flag = 1;
2963 struct input_code *result = 0;
2964 struct input_code *p = input_code_list;
2966 if (!p->status_func) {
2970 if (!p->status_func)
2972 (p->status_func)(p, c);
2975 }else if(p->stat == 0){
2986 if (result && !estab_f){
2987 set_iconv(TRUE, result->iconv_func);
2988 }else if (c <= DEL){
2989 struct input_code *ptr = input_code_list;
2999 nkf_buf_t *std_gc_buf;
3000 nkf_char broken_state;
3001 nkf_buf_t *broken_buf;
3002 nkf_char mimeout_state;
3005 static nkf_state_t *nkf_state = NULL;
3007 #define STD_GC_BUFSIZE (256)
3010 nkf_state_init(void)
3013 nkf_buf_clear(nkf_state->std_gc_buf);
3014 nkf_buf_clear(nkf_state->broken_buf);
3017 nkf_state = nkf_xmalloc(sizeof(nkf_state_t));
3018 nkf_state->std_gc_buf = nkf_buf_new(STD_GC_BUFSIZE);
3019 nkf_state->broken_buf = nkf_buf_new(3);
3021 nkf_state->broken_state = 0;
3022 nkf_state->mimeout_state = 0;
3029 if (!nkf_buf_empty_p(nkf_state->std_gc_buf)){
3030 return nkf_buf_pop(nkf_state->std_gc_buf);
3037 std_ungetc(nkf_char c, FILE *f)
3039 nkf_buf_push(nkf_state->std_gc_buf, c);
3045 std_putc(nkf_char c)
3052 static unsigned char hold_buf[HOLD_SIZE*2];
3053 static int hold_count = 0;
3055 push_hold_buf(nkf_char c2)
3057 if (hold_count >= HOLD_SIZE*2)
3059 hold_buf[hold_count++] = (unsigned char)c2;
3060 return ((hold_count >= HOLD_SIZE*2) ? EOF : hold_count);
3064 h_conv(FILE *f, int c1, int c2)
3070 /** it must NOT be in the kanji shifte sequence */
3071 /** it must NOT be written in JIS7 */
3072 /** and it must be after 2 byte 8bit code */
3078 while ((c2 = (*i_getc)(f)) != EOF) {
3084 if (push_hold_buf(c2) == EOF || estab_f) {
3090 struct input_code *p = input_code_list;
3091 struct input_code *result = p;
3096 if (p->status_func && p->score < result->score) {
3101 set_iconv(TRUE, result->iconv_func);
3106 ** 1) EOF is detected, or
3107 ** 2) Code is established, or
3108 ** 3) Buffer is FULL (but last word is pushed)
3110 ** in 1) and 3) cases, we continue to use
3111 ** Kanji codes by oconv and leave estab_f unchanged.
3116 while (hold_index < hold_count){
3117 c1 = hold_buf[hold_index++];
3121 }else if (iconv == s_iconv && 0xa1 <= c1 && c1 <= 0xdf){
3122 (*iconv)(JIS_X_0201_1976_K, c1, 0);
3125 if (hold_index < hold_count){
3126 c2 = hold_buf[hold_index++];
3136 switch ((*iconv)(c1, c2, 0)) { /* can be EUC/SJIS/UTF-8 */
3139 if (hold_index < hold_count){
3140 c3 = hold_buf[hold_index++];
3141 } else if ((c3 = (*i_getc)(f)) == EOF) {
3146 if (hold_index < hold_count){
3147 c4 = hold_buf[hold_index++];
3148 } else if ((c4 = (*i_getc)(f)) == EOF) {
3153 (*iconv)(c1, c2, (c3<<8)|c4);
3158 /* 3 bytes EUC or UTF-8 */
3159 if (hold_index < hold_count){
3160 c3 = hold_buf[hold_index++];
3161 } else if ((c3 = (*i_getc)(f)) == EOF) {
3167 (*iconv)(c1, c2, c3);
3170 if (c3 == EOF) break;
3176 * Check and Ignore BOM
3182 switch(c2 = (*i_getc)(f)){
3184 if((c2 = (*i_getc)(f)) == 0x00){
3185 if((c2 = (*i_getc)(f)) == 0xFE){
3186 if((c2 = (*i_getc)(f)) == 0xFF){
3187 if(!input_encoding){
3188 set_iconv(TRUE, w_iconv32);
3190 if (iconv == w_iconv32) {
3191 input_endian = ENDIAN_BIG;
3194 (*i_ungetc)(0xFF,f);
3195 }else (*i_ungetc)(c2,f);
3196 (*i_ungetc)(0xFE,f);
3197 }else if(c2 == 0xFF){
3198 if((c2 = (*i_getc)(f)) == 0xFE){
3199 if(!input_encoding){
3200 set_iconv(TRUE, w_iconv32);
3202 if (iconv == w_iconv32) {
3203 input_endian = ENDIAN_2143;
3206 (*i_ungetc)(0xFF,f);
3207 }else (*i_ungetc)(c2,f);
3208 (*i_ungetc)(0xFF,f);
3209 }else (*i_ungetc)(c2,f);
3210 (*i_ungetc)(0x00,f);
3211 }else (*i_ungetc)(c2,f);
3212 (*i_ungetc)(0x00,f);
3215 if((c2 = (*i_getc)(f)) == 0xBB){
3216 if((c2 = (*i_getc)(f)) == 0xBF){
3217 if(!input_encoding){
3218 set_iconv(TRUE, w_iconv);
3220 if (iconv == w_iconv) {
3223 (*i_ungetc)(0xBF,f);
3224 }else (*i_ungetc)(c2,f);
3225 (*i_ungetc)(0xBB,f);
3226 }else (*i_ungetc)(c2,f);
3227 (*i_ungetc)(0xEF,f);
3230 if((c2 = (*i_getc)(f)) == 0xFF){
3231 if((c2 = (*i_getc)(f)) == 0x00){
3232 if((c2 = (*i_getc)(f)) == 0x00){
3233 if(!input_encoding){
3234 set_iconv(TRUE, w_iconv32);
3236 if (iconv == w_iconv32) {
3237 input_endian = ENDIAN_3412;
3240 (*i_ungetc)(0x00,f);
3241 }else (*i_ungetc)(c2,f);
3242 (*i_ungetc)(0x00,f);
3243 }else (*i_ungetc)(c2,f);
3244 if(!input_encoding){
3245 set_iconv(TRUE, w_iconv16);
3247 if (iconv == w_iconv16) {
3248 input_endian = ENDIAN_BIG;
3251 (*i_ungetc)(0xFF,f);
3252 }else (*i_ungetc)(c2,f);
3253 (*i_ungetc)(0xFE,f);
3256 if((c2 = (*i_getc)(f)) == 0xFE){
3257 if((c2 = (*i_getc)(f)) == 0x00){
3258 if((c2 = (*i_getc)(f)) == 0x00){
3259 if(!input_encoding){
3260 set_iconv(TRUE, w_iconv32);
3262 if (iconv == w_iconv32) {
3263 input_endian = ENDIAN_LITTLE;
3266 (*i_ungetc)(0x00,f);
3267 }else (*i_ungetc)(c2,f);
3268 (*i_ungetc)(0x00,f);
3269 }else (*i_ungetc)(c2,f);
3270 if(!input_encoding){
3271 set_iconv(TRUE, w_iconv16);
3273 if (iconv == w_iconv16) {
3274 input_endian = ENDIAN_LITTLE;
3277 (*i_ungetc)(0xFE,f);
3278 }else (*i_ungetc)(c2,f);
3279 (*i_ungetc)(0xFF,f);
3288 broken_getc(FILE *f)
3292 if (!nkf_buf_empty_p(nkf_state->broken_buf)) {
3293 return nkf_buf_pop(nkf_state->broken_buf);
3296 if (c=='$' && nkf_state->broken_state != ESC
3297 && (input_mode == ASCII || input_mode == JIS_X_0201_1976_K)) {
3299 nkf_state->broken_state = 0;
3300 if (c1=='@'|| c1=='B') {
3301 nkf_buf_push(nkf_state->broken_buf, c1);
3302 nkf_buf_push(nkf_state->broken_buf, c);
3308 } else if (c=='(' && nkf_state->broken_state != ESC
3309 && (input_mode == JIS_X_0208 || input_mode == JIS_X_0201_1976_K)) {
3311 nkf_state->broken_state = 0;
3312 if (c1=='J'|| c1=='B') {
3313 nkf_buf_push(nkf_state->broken_buf, c1);
3314 nkf_buf_push(nkf_state->broken_buf, c);
3321 nkf_state->broken_state = c;
3327 broken_ungetc(nkf_char c, FILE *f)
3329 if (nkf_buf_length(nkf_state->broken_buf) < 2)
3330 nkf_buf_push(nkf_state->broken_buf, c);
3335 eol_conv(nkf_char c2, nkf_char c1)
3337 if (guess_f && input_eol != EOF) {
3338 if (c2 == 0 && c1 == LF) {
3339 if (!input_eol) input_eol = prev_cr ? CRLF : LF;
3340 else if (input_eol != (prev_cr ? CRLF : LF)) input_eol = EOF;
3341 } else if (c2 == 0 && c1 == CR && input_eol == LF) input_eol = EOF;
3343 else if (!input_eol) input_eol = CR;
3344 else if (input_eol != CR) input_eol = EOF;
3346 if (prev_cr || (c2 == 0 && c1 == LF)) {
3348 if (eolmode_f != LF) (*o_eol_conv)(0, CR);
3349 if (eolmode_f != CR) (*o_eol_conv)(0, LF);
3351 if (c2 == 0 && c1 == CR) prev_cr = CR;
3352 else if (c2 != 0 || c1 != LF) (*o_eol_conv)(c2, c1);
3356 Return value of fold_conv()
3358 LF add newline and output char
3359 CR add newline and output nothing
3362 1 (or else) normal output
3364 fold state in prev (previous character)
3366 >0x80 Japanese (X0208/X0201)
3371 This fold algorthm does not preserve heading space in a line.
3372 This is the main difference from fmt.
3375 #define char_size(c2,c1) (c2?2:1)
3378 fold_conv(nkf_char c2, nkf_char c1)
3381 nkf_char fold_state;
3383 if (c1== CR && !fold_preserve_f) {
3384 fold_state=0; /* ignore cr */
3385 }else if (c1== LF&&f_prev==CR && fold_preserve_f) {
3387 fold_state=0; /* ignore cr */
3388 } else if (c1== BS) {
3389 if (f_line>0) f_line--;
3391 } else if (c2==EOF && f_line != 0) { /* close open last line */
3393 } else if ((c1==LF && !fold_preserve_f)
3394 || ((c1==CR||(c1==LF&&f_prev!=CR))
3395 && fold_preserve_f)) {
3397 if (fold_preserve_f) {
3401 } else if ((f_prev == c1 && !fold_preserve_f)
3402 || (f_prev == LF && fold_preserve_f)
3403 ) { /* duplicate newline */
3406 fold_state = LF; /* output two newline */
3412 if (f_prev&0x80) { /* Japanese? */
3414 fold_state = 0; /* ignore given single newline */
3415 } else if (f_prev==SP) {
3419 if (++f_line<=fold_len)
3423 fold_state = CR; /* fold and output nothing */
3427 } else if (c1=='\f') {
3430 fold_state = LF; /* output newline and clear */
3431 } else if ( (c2==0 && c1==SP)||
3432 (c2==0 && c1==TAB)||
3433 (c2=='!'&& c1=='!')) {
3434 /* X0208 kankaku or ascii space */
3436 fold_state = 0; /* remove duplicate spaces */
3439 if (++f_line<=fold_len)
3440 fold_state = SP; /* output ASCII space only */
3442 f_prev = SP; f_line = 0;
3443 fold_state = CR; /* fold and output nothing */
3447 prev0 = f_prev; /* we still need this one... , but almost done */
3449 if (c2 || c2 == JIS_X_0201_1976_K)
3450 f_prev |= 0x80; /* this is Japanese */
3451 f_line += char_size(c2,c1);
3452 if (f_line<=fold_len) { /* normal case */
3455 if (f_line>fold_len+fold_margin) { /* too many kinsoku suspension */
3456 f_line = char_size(c2,c1);
3457 fold_state = LF; /* We can't wait, do fold now */
3458 } else if (c2 == JIS_X_0201_1976_K) {
3459 /* simple kinsoku rules return 1 means no folding */
3460 if (c1==(0xde&0x7f)) fold_state = 1; /*
\e$B!+
\e(B*/
3461 else if (c1==(0xdf&0x7f)) fold_state = 1; /*
\e$B!,
\e(B*/
3462 else if (c1==(0xa4&0x7f)) fold_state = 1; /*
\e$B!#
\e(B*/
3463 else if (c1==(0xa3&0x7f)) fold_state = 1; /*
\e$B!$
\e(B*/
3464 else if (c1==(0xa1&0x7f)) fold_state = 1; /*
\e$B!W
\e(B*/
3465 else if (c1==(0xb0&0x7f)) fold_state = 1; /* - */
3466 else if (SP<=c1 && c1<=(0xdf&0x7f)) { /* X0201 */
3468 fold_state = LF;/* add one new f_line before this character */
3471 fold_state = LF;/* add one new f_line before this character */
3474 /* kinsoku point in ASCII */
3475 if ( c1==')'|| /* { [ ( */
3486 /* just after special */
3487 } else if (!is_alnum(prev0)) {
3488 f_line = char_size(c2,c1);
3490 } else if ((prev0==SP) || /* ignored new f_line */
3491 (prev0==LF)|| /* ignored new f_line */
3492 (prev0&0x80)) { /* X0208 - ASCII */
3493 f_line = char_size(c2,c1);
3494 fold_state = LF;/* add one new f_line before this character */
3496 fold_state = 1; /* default no fold in ASCII */
3500 if (c1=='"') fold_state = 1; /*
\e$B!"
\e(B */
3501 else if (c1=='#') fold_state = 1; /*
\e$B!#
\e(B */
3502 else if (c1=='W') fold_state = 1; /*
\e$B!W
\e(B */
3503 else if (c1=='K') fold_state = 1; /*
\e$B!K
\e(B */
3504 else if (c1=='$') fold_state = 1; /*
\e$B!$
\e(B */
3505 else if (c1=='%') fold_state = 1; /*
\e$B!%
\e(B */
3506 else if (c1=='\'') fold_state = 1; /*
\e$B!\
\e(B */
3507 else if (c1=='(') fold_state = 1; /*
\e$B!(
\e(B */
3508 else if (c1==')') fold_state = 1; /*
\e$B!)
\e(B */
3509 else if (c1=='*') fold_state = 1; /*
\e$B!*
\e(B */
3510 else if (c1=='+') fold_state = 1; /*
\e$B!+
\e(B */
3511 else if (c1==',') fold_state = 1; /*
\e$B!,
\e(B */
3512 /* default no fold in kinsoku */
3515 f_line = char_size(c2,c1);
3516 /* add one new f_line before this character */
3519 f_line = char_size(c2,c1);
3521 /* add one new f_line before this character */
3526 /* terminator process */
3527 switch(fold_state) {
3529 OCONV_NEWLINE((*o_fconv));
3535 OCONV_NEWLINE((*o_fconv));
3546 static nkf_char z_prev2=0,z_prev1=0;
3549 z_conv(nkf_char c2, nkf_char c1)
3552 /* if (c2) c1 &= 0x7f; assertion */
3554 if (c2 == JIS_X_0201_1976_K && (c1 == 0x20 || c1 == 0x7D || c1 == 0x7E)) {
3560 if (z_prev2 == JIS_X_0201_1976_K) {
3561 if (c2 == JIS_X_0201_1976_K) {
3562 if (c1 == (0xde&0x7f)) { /*
\e$BByE@
\e(B */
3564 (*o_zconv)(dv[(z_prev1-SP)*2], dv[(z_prev1-SP)*2+1]);
3566 } else if (c1 == (0xdf&0x7f) && ev[(z_prev1-SP)*2]) { /*
\e$BH>ByE@
\e(B */
3568 (*o_zconv)(ev[(z_prev1-SP)*2], ev[(z_prev1-SP)*2+1]);
3573 (*o_zconv)(cv[(z_prev1-SP)*2], cv[(z_prev1-SP)*2+1]);
3575 if (c2 == JIS_X_0201_1976_K) {
3576 if (dv[(c1-SP)*2] || ev[(c1-SP)*2]) {
3577 /* wait for
\e$BByE@
\e(B or
\e$BH>ByE@
\e(B */
3582 (*o_zconv)(cv[(c1-SP)*2], cv[(c1-SP)*2+1]);
3593 if (alpha_f&1 && c2 == 0x23) {
3594 /* JISX0208 Alphabet */
3596 } else if (c2 == 0x21) {
3597 /* JISX0208 Kigou */
3602 } else if (alpha_f&4) {
3607 } else if (alpha_f&1 && 0x20<c1 && c1<0x7f && fv[c1-0x20]) {
3613 if (alpha_f&8 && c2 == 0) {
3615 const char *entity = 0;
3617 case '>': entity = ">"; break;
3618 case '<': entity = "<"; break;
3619 case '\"': entity = """; break;
3620 case '&': entity = "&"; break;
3623 while (*entity) (*o_zconv)(0, *entity++);
3629 /* JIS X 0208 Katakana to JIS X 0201 Katakana */
3634 /* U+3002 (0x8142) Ideographic Full Stop -> U+FF61 (0xA1) Halfwidth Ideographic Full Stop */
3638 /* U+300C (0x8175) Left Corner Bracket -> U+FF62 (0xA2) Halfwidth Left Corner Bracket */
3642 /* U+300D (0x8176) Right Corner Bracket -> U+FF63 (0xA3) Halfwidth Right Corner Bracket */
3646 /* U+3001 (0x8141) Ideographic Comma -> U+FF64 (0xA4) Halfwidth Ideographic Comma */
3650 /* U+30FB (0x8145) Katakana Middle Dot -> U+FF65 (0xA5) Halfwidth Katakana Middle Dot */
3654 /* U+30FC (0x815B) Katakana-Hiragana Prolonged Sound Mark -> U+FF70 (0xB0) Halfwidth Katakana-Hiragana Prolonged Sound Mark */
3658 /* U+309B (0x814A) Katakana-Hiragana Voiced Sound Mark -> U+FF9E (0xDE) Halfwidth Katakana Voiced Sound Mark */
3662 /* U+309C (0x814B) Katakana-Hiragana Semi-Voiced Sound Mark -> U+FF9F (0xDF) Halfwidth Katakana Semi-Voiced Sound Mark */
3667 (*o_zconv)(JIS_X_0201_1976_K, c);
3670 } else if (c2 == 0x25) {
3671 /* JISX0208 Katakana */
3672 static const int fullwidth_to_halfwidth[] =
3674 0x0000, 0x2700, 0x3100, 0x2800, 0x3200, 0x2900, 0x3300, 0x2A00,
3675 0x3400, 0x2B00, 0x3500, 0x3600, 0x365E, 0x3700, 0x375E, 0x3800,
3676 0x385E, 0x3900, 0x395E, 0x3A00, 0x3A5E, 0x3B00, 0x3B5E, 0x3C00,
3677 0x3C5E, 0x3D00, 0x3D5E, 0x3E00, 0x3E5E, 0x3F00, 0x3F5E, 0x4000,
3678 0x405E, 0x4100, 0x415E, 0x2F00, 0x4200, 0x425E, 0x4300, 0x435E,
3679 0x4400, 0x445E, 0x4500, 0x4600, 0x4700, 0x4800, 0x4900, 0x4A00,
3680 0x4A5E, 0x4A5F, 0x4B00, 0x4B5E, 0x4B5F, 0x4C00, 0x4C5E, 0x4C5F,
3681 0x4D00, 0x4D5E, 0x4D5F, 0x4E00, 0x4E5E, 0x4E5F, 0x4F00, 0x5000,
3682 0x5100, 0x5200, 0x5300, 0x2C00, 0x5400, 0x2D00, 0x5500, 0x2E00,
3683 0x5600, 0x5700, 0x5800, 0x5900, 0x5A00, 0x5B00, 0x0000, 0x5C00,
3684 0x0000, 0x0000, 0x2600, 0x5D00, 0x335E, 0x0000, 0x0000, 0x0000,
3685 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
3687 if (fullwidth_to_halfwidth[c1-0x20]){
3688 c2 = fullwidth_to_halfwidth[c1-0x20];
3689 (*o_zconv)(JIS_X_0201_1976_K, c2>>8);
3691 (*o_zconv)(JIS_X_0201_1976_K, c2&0xFF);
3701 #define rot13(c) ( \
3703 (c <= 'M') ? (c + 13): \
3704 (c <= 'Z') ? (c - 13): \
3706 (c <= 'm') ? (c + 13): \
3707 (c <= 'z') ? (c - 13): \
3711 #define rot47(c) ( \
3713 ( c <= 'O') ? (c + 47) : \
3714 ( c <= '~') ? (c - 47) : \
3719 rot_conv(nkf_char c2, nkf_char c1)
3721 if (c2 == 0 || c2 == JIS_X_0201_1976_K || c2 == ISO_8859_1) {
3727 (*o_rot_conv)(c2,c1);
3731 hira_conv(nkf_char c2, nkf_char c1)
3735 if (0x20 < c1 && c1 < 0x74) {
3737 (*o_hira_conv)(c2,c1);
3739 } else if (c1 == 0x74 && nkf_enc_unicode_p(output_encoding)) {
3741 c1 = nkf_char_unicode_new(0x3094);
3742 (*o_hira_conv)(c2,c1);
3745 } else if (c2 == 0x21 && (c1 == 0x33 || c1 == 0x34)) {
3747 (*o_hira_conv)(c2,c1);
3752 if (c2 == 0 && c1 == nkf_char_unicode_new(0x3094)) {
3755 } else if (c2 == 0x24 && 0x20 < c1 && c1 < 0x74) {
3757 } else if (c2 == 0x21 && (c1 == 0x35 || c1 == 0x36)) {
3761 (*o_hira_conv)(c2,c1);
3766 iso2022jp_check_conv(nkf_char c2, nkf_char c1)
3768 #define RANGE_NUM_MAX 18
3769 static const nkf_char range[RANGE_NUM_MAX][2] = {
3790 nkf_char start, end, c;
3792 if(c2 >= 0x00 && c2 <= 0x20 && c1 >= 0x7f && c1 <= 0xff) {
3796 if((c2 >= 0x29 && c2 <= 0x2f) || (c2 >= 0x75 && c2 <= 0x7e)) {
3801 for (i = 0; i < RANGE_NUM_MAX; i++) {
3802 start = range[i][0];
3805 if (c >= start && c <= end) {
3810 (*o_iso2022jp_check_conv)(c2,c1);
3814 /* This converts =?ISO-2022-JP?B?HOGE HOGE?= */
3816 static const unsigned char *mime_pattern[] = {
3817 (const unsigned char *)"\075?EUC-JP?B?",
3818 (const unsigned char *)"\075?SHIFT_JIS?B?",
3819 (const unsigned char *)"\075?ISO-8859-1?Q?",
3820 (const unsigned char *)"\075?ISO-8859-1?B?",
3821 (const unsigned char *)"\075?ISO-2022-JP?B?",
3822 (const unsigned char *)"\075?ISO-2022-JP?Q?",
3823 #if defined(UTF8_INPUT_ENABLE)
3824 (const unsigned char *)"\075?UTF-8?B?",
3825 (const unsigned char *)"\075?UTF-8?Q?",
3827 (const unsigned char *)"\075?US-ASCII?Q?",
3832 /*
\e$B3:Ev$9$k%3!<%I$NM%@hEY$r>e$2$k$?$a$NL\0u
\e(B */
3833 nkf_char (*mime_priority_func[])(nkf_char c2, nkf_char c1, nkf_char c0) = {
3834 e_iconv, s_iconv, 0, 0, 0, 0,
3835 #if defined(UTF8_INPUT_ENABLE)
3841 static const nkf_char mime_encode[] = {
3842 EUC_JP, SHIFT_JIS, ISO_8859_1, ISO_8859_1, JIS_X_0208, JIS_X_0201_1976_K,
3843 #if defined(UTF8_INPUT_ENABLE)
3850 static const nkf_char mime_encode_method[] = {
3851 'B', 'B','Q', 'B', 'B', 'Q',
3852 #if defined(UTF8_INPUT_ENABLE)
3860 /* MIME preprocessor fifo */
3862 #define MIME_BUF_SIZE (1024) /* 2^n ring buffer */
3863 #define MIME_BUF_MASK (MIME_BUF_SIZE-1)
3864 #define mime_input_buf(n) mime_input_state.buf[(n)&MIME_BUF_MASK]
3866 unsigned char buf[MIME_BUF_SIZE];
3868 unsigned int last; /* decoded */
3869 unsigned int input; /* undecoded */
3871 static nkf_char (*mime_iconv_back)(nkf_char c2,nkf_char c1,nkf_char c0) = NULL;
3873 #define MAXRECOVER 20
3876 mime_input_buf_unshift(nkf_char c)
3878 mime_input_buf(--mime_input_state.top) = (unsigned char)c;
3882 mime_ungetc(nkf_char c, FILE *f)
3884 mime_input_buf_unshift(c);
3889 mime_ungetc_buf(nkf_char c, FILE *f)
3892 (*i_mungetc_buf)(c,f);
3894 mime_input_buf(--mime_input_state.input) = (unsigned char)c;
3899 mime_getc_buf(FILE *f)
3901 /* we don't keep eof of mime_input_buf, becase it contains ?= as
3902 a terminator. It was checked in mime_integrity. */
3903 return ((mimebuf_f)?
3904 (*i_mgetc_buf)(f):mime_input_buf(mime_input_state.input++));
3908 switch_mime_getc(void)
3910 if (i_getc!=mime_getc) {
3911 i_mgetc = i_getc; i_getc = mime_getc;
3912 i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
3913 if(mime_f==STRICT_MIME) {
3914 i_mgetc_buf = i_mgetc; i_mgetc = mime_getc_buf;
3915 i_mungetc_buf = i_mungetc; i_mungetc = mime_ungetc_buf;
3921 unswitch_mime_getc(void)
3923 if(mime_f==STRICT_MIME) {
3924 i_mgetc = i_mgetc_buf;
3925 i_mungetc = i_mungetc_buf;
3928 i_ungetc = i_mungetc;
3929 if(mime_iconv_back)set_iconv(FALSE, mime_iconv_back);
3930 mime_iconv_back = NULL;
3934 mime_integrity(FILE *f, const unsigned char *p)
3938 /* In buffered mode, read until =? or NL or buffer full
3940 mime_input_state.input = mime_input_state.top;
3941 mime_input_state.last = mime_input_state.top;
3943 while(*p) mime_input_buf(mime_input_state.input++) = *p++;
3945 q = mime_input_state.input;
3946 while((c=(*i_getc)(f))!=EOF) {
3947 if (((mime_input_state.input-mime_input_state.top)&MIME_BUF_MASK)==0) {
3948 break; /* buffer full */
3950 if (c=='=' && d=='?') {
3951 /* checked. skip header, start decode */
3952 mime_input_buf(mime_input_state.input++) = (unsigned char)c;
3953 /* mime_last_input = mime_input_state.input; */
3954 mime_input_state.input = q;
3958 if (!( (c=='+'||c=='/'|| c=='=' || c=='?' || is_alnum(c))))
3960 /* Should we check length mod 4? */
3961 mime_input_buf(mime_input_state.input++) = (unsigned char)c;
3964 /* In case of Incomplete MIME, no MIME decode */
3965 mime_input_buf(mime_input_state.input++) = (unsigned char)c;
3966 mime_input_state.last = mime_input_state.input; /* point undecoded buffer */
3967 mime_decode_mode = 1; /* no decode on mime_input_buf last in mime_getc */
3968 switch_mime_getc(); /* anyway we need buffered getc */
3973 mime_begin_strict(FILE *f)
3977 const unsigned char *p,*q;
3978 nkf_char r[MAXRECOVER]; /* recovery buffer, max mime pattern length */
3980 mime_decode_mode = FALSE;
3981 /* =? has been checked */
3983 p = mime_pattern[j];
3986 for(i=2;p[i]>SP;i++) { /* start at =? */
3987 if (((r[i] = c1 = (*i_getc)(f))==EOF) || nkf_toupper(c1) != p[i]) {
3988 /* pattern fails, try next one */
3990 while (mime_pattern[++j]) {
3991 p = mime_pattern[j];
3992 for(k=2;k<i;k++) /* assume length(p) > i */
3993 if (p[k]!=q[k]) break;
3994 if (k==i && nkf_toupper(c1)==p[k]) break;
3996 p = mime_pattern[j];
3997 if (p) continue; /* found next one, continue */
3998 /* all fails, output from recovery buffer */
4006 mime_decode_mode = p[i-2];
4008 mime_iconv_back = iconv;
4009 set_iconv(FALSE, mime_priority_func[j]);
4010 clr_code_score(find_inputcode_byfunc(mime_priority_func[j]), SCORE_iMIME);
4012 if (mime_decode_mode=='B') {
4013 mimebuf_f = unbuf_f;
4015 /* do MIME integrity check */
4016 return mime_integrity(f,mime_pattern[j]);
4030 /* In NONSTRICT mode, only =? is checked. In case of failure, we */
4031 /* re-read and convert again from mime_buffer. */
4033 /* =? has been checked */
4034 k = mime_input_state.last;
4035 mime_input_buf(mime_input_state.last++)='='; mime_input_buf(mime_input_state.last++)='?';
4036 for(i=2;i<MAXRECOVER;i++) { /* start at =? */
4037 /* We accept any character type even if it is breaked by new lines */
4038 c1 = (*i_getc)(f); mime_input_buf(mime_input_state.last++) = (unsigned char)c1;
4039 if (c1==LF||c1==SP||c1==CR||
4040 c1=='-'||c1=='_'||is_alnum(c1)) continue;
4042 /* Failed. But this could be another MIME preemble */
4044 mime_input_state.last--;
4050 c1 = (*i_getc)(f); mime_input_buf(mime_input_state.last++) = (unsigned char)c1;
4051 if (!(++i<MAXRECOVER) || c1==EOF) break;
4052 if (c1=='b'||c1=='B') {
4053 mime_decode_mode = 'B';
4054 } else if (c1=='q'||c1=='Q') {
4055 mime_decode_mode = 'Q';
4059 c1 = (*i_getc)(f); mime_input_buf(mime_input_state.last++) = (unsigned char)c1;
4060 if (!(++i<MAXRECOVER) || c1==EOF) break;
4062 mime_decode_mode = FALSE;
4068 if (!mime_decode_mode) {
4069 /* false MIME premble, restart from mime_buffer */
4070 mime_decode_mode = 1; /* no decode, but read from the mime_buffer */
4071 /* Since we are in MIME mode until buffer becomes empty, */
4072 /* we never go into mime_begin again for a while. */
4075 /* discard mime preemble, and goto MIME mode */
4076 mime_input_state.last = k;
4077 /* do no MIME integrity check */
4078 return c1; /* used only for checking EOF */
4089 debug(const char *str)
4092 fprintf(stderr, "%s\n", str ? str : "NULL");
4098 set_input_codename(const char *codename)
4100 if (!input_codename) {
4101 input_codename = codename;
4102 } else if (strcmp(codename, input_codename) != 0) {
4103 input_codename = "";
4108 get_guessed_code(void)
4110 if (input_codename && !*input_codename) {
4111 input_codename = "BINARY";
4113 struct input_code *p = find_inputcode_byfunc(iconv);
4114 if (!input_codename) {
4115 input_codename = "ASCII";
4116 } else if (strcmp(input_codename, "Shift_JIS") == 0) {
4117 if (p->score & (SCORE_DEPEND|SCORE_CP932))
4118 input_codename = "CP932";
4119 } else if (strcmp(input_codename, "EUC-JP") == 0) {
4120 if (p->score & (SCORE_X0212))
4121 input_codename = "EUCJP-MS";
4122 else if (p->score & (SCORE_DEPEND|SCORE_CP932))
4123 input_codename = "CP51932";
4124 } else if (strcmp(input_codename, "ISO-2022-JP") == 0) {
4125 if (p->score & (SCORE_KANA))
4126 input_codename = "CP50221";
4127 else if (p->score & (SCORE_DEPEND|SCORE_CP932))
4128 input_codename = "CP50220";
4131 return input_codename;
4134 #if !defined(PERL_XS) && !defined(WIN32DLL)
4136 print_guessed_code(char *filename)
4138 if (filename != NULL) printf("%s: ", filename);
4139 if (input_codename && !*input_codename) {
4142 input_codename = get_guessed_code();
4144 printf("%s\n", input_codename);
4148 input_eol == CR ? " (CR)" :
4149 input_eol == LF ? " (LF)" :
4150 input_eol == CRLF ? " (CRLF)" :
4151 input_eol == EOF ? " (MIXED NL)" :
4161 hex_getc(nkf_char ch, FILE *f, nkf_char (*g)(FILE *f), nkf_char (*u)(nkf_char c, FILE *f))
4163 nkf_char c1, c2, c3;
4169 if (!nkf_isxdigit(c2)){
4174 if (!nkf_isxdigit(c3)){
4179 return (hex2bin(c2) << 4) | hex2bin(c3);
4185 return hex_getc(':', f, i_cgetc, i_cungetc);
4189 cap_ungetc(nkf_char c, FILE *f)
4191 return (*i_cungetc)(c, f);
4197 return hex_getc('%', f, i_ugetc, i_uungetc);
4201 url_ungetc(nkf_char c, FILE *f)
4203 return (*i_uungetc)(c, f);
4207 #ifdef NUMCHAR_OPTION
4209 numchar_getc(FILE *f)
4211 nkf_char (*g)(FILE *) = i_ngetc;
4212 nkf_char (*u)(nkf_char c ,FILE *f) = i_nungetc;
4223 if (buf[i] == 'x' || buf[i] == 'X'){
4224 for (j = 0; j < 7; j++){
4226 if (!nkf_isxdigit(buf[i])){
4233 c |= hex2bin(buf[i]);
4236 for (j = 0; j < 8; j++){
4240 if (!nkf_isdigit(buf[i])){
4247 c += hex2bin(buf[i]);
4253 return nkf_char_unicode_new(c);
4263 numchar_ungetc(nkf_char c, FILE *f)
4265 return (*i_nungetc)(c, f);
4269 #ifdef UNICODE_NORMALIZATION
4274 nkf_char (*g)(FILE *f) = i_nfc_getc;
4275 nkf_char (*u)(nkf_char c ,FILE *f) = i_nfc_ungetc;
4276 nkf_buf_t *buf = nkf_buf_new(9);
4277 const unsigned char *array;
4278 int lower=0, upper=NORMALIZATION_TABLE_LENGTH-1;
4279 nkf_char c = (*g)(f);
4281 if (c == EOF || c > 0xFF || (c & 0xc0) == 0x80) return c;
4283 nkf_buf_push(buf, (unsigned char)c);
4285 while (lower <= upper) {
4286 int mid = (lower+upper) / 2;
4288 array = normalization_table[mid].nfd;
4289 for (len=0; len < NORMALIZATION_TABLE_NFD_LENGTH && array[len]; len++) {
4290 if (len >= nkf_buf_length(buf)) {
4294 lower = 1, upper = 0;
4297 nkf_buf_push(buf, c);
4299 if (array[len] != nkf_buf_at(buf, len)) {
4300 if (array[len] < nkf_buf_at(buf, len)) lower = mid + 1;
4301 else upper = mid - 1;
4308 array = normalization_table[mid].nfc;
4310 for (i=0; i < NORMALIZATION_TABLE_NFC_LENGTH && array[i]; i++)
4311 nkf_buf_push(buf, array[i]);
4315 } while (lower <= upper);
4317 while (nkf_buf_length(buf) > 1) (*u)(nkf_buf_pop(buf), f);
4318 c = nkf_buf_pop(buf);
4319 nkf_buf_dispose(buf);
4325 nfc_ungetc(nkf_char c, FILE *f)
4327 return (*i_nfc_ungetc)(c, f);
4329 #endif /* UNICODE_NORMALIZATION */
4333 base64decode(nkf_char c)
4338 i = c - 'A'; /* A..Z 0-25 */
4339 } else if (c == '_') {
4340 i = '?' /* 63 */ ; /* _ 63 */
4342 i = c - 'G' /* - 'a' + 26 */ ; /* a..z 26-51 */
4344 } else if (c > '/') {
4345 i = c - '0' + '4' /* - '0' + 52 */ ; /* 0..9 52-61 */
4346 } else if (c == '+' || c == '-') {
4347 i = '>' /* 62 */ ; /* + and - 62 */
4349 i = '?' /* 63 */ ; /* / 63 */
4357 nkf_char c1, c2, c3, c4, cc;
4358 nkf_char t1, t2, t3, t4, mode, exit_mode;
4359 nkf_char lwsp_count;
4362 nkf_char lwsp_size = 128;
4364 if (mime_input_state.top != mime_input_state.last) { /* Something is in FIFO */
4365 return mime_input_buf(mime_input_state.top++);
4367 if (mime_decode_mode==1 ||mime_decode_mode==FALSE) {
4368 mime_decode_mode=FALSE;
4369 unswitch_mime_getc();
4370 return (*i_getc)(f);
4373 if (mimebuf_f == FIXED_MIME)
4374 exit_mode = mime_decode_mode;
4377 if (mime_decode_mode == 'Q') {
4378 if ((c1 = (*i_mgetc)(f)) == EOF) return (EOF);
4380 if (c1=='_' && mimebuf_f != FIXED_MIME) return SP;
4381 if (c1<=SP || DEL<=c1) {
4382 mime_decode_mode = exit_mode; /* prepare for quit */
4385 if (c1!='=' && (c1!='?' || mimebuf_f == FIXED_MIME)) {
4389 mime_decode_mode = exit_mode; /* prepare for quit */
4390 if ((c2 = (*i_mgetc)(f)) == EOF) return (EOF);
4391 if (c1=='?'&&c2=='=' && mimebuf_f != FIXED_MIME) {
4392 /* end Q encoding */
4393 input_mode = exit_mode;
4395 lwsp_buf = nkf_xmalloc((lwsp_size+5)*sizeof(char));
4396 while ((c1=(*i_getc)(f))!=EOF) {
4401 if ((c1=(*i_getc)(f))!=EOF && (c1==SP||c1==TAB)) {
4409 if ((c1=(*i_getc)(f))!=EOF && c1 == LF) {
4410 if ((c1=(*i_getc)(f))!=EOF && (c1==SP||c1==TAB)) {
4425 lwsp_buf[lwsp_count] = (unsigned char)c1;
4426 if (lwsp_count++>lwsp_size){
4428 lwsp_buf_new = nkf_xrealloc(lwsp_buf, (lwsp_size+5)*sizeof(char));
4429 lwsp_buf = lwsp_buf_new;
4435 if (lwsp_count > 0 && (c1 != '=' || (lwsp_buf[lwsp_count-1] != SP && lwsp_buf[lwsp_count-1] != TAB))) {
4437 for(lwsp_count--;lwsp_count>0;lwsp_count--)
4438 i_ungetc(lwsp_buf[lwsp_count],f);
4441 nkf_xfree(lwsp_buf);
4444 if (c1=='='&&c2<SP) { /* this is soft wrap */
4445 while((c1 = (*i_mgetc)(f)) <=SP) {
4446 if ((c1 = (*i_mgetc)(f)) == EOF) return (EOF);
4448 mime_decode_mode = 'Q'; /* still in MIME */
4449 goto restart_mime_q;
4452 mime_decode_mode = 'Q'; /* still in MIME */
4456 if ((c3 = (*i_mgetc)(f)) == EOF) return (EOF);
4457 if (c2<=SP) return c2;
4458 mime_decode_mode = 'Q'; /* still in MIME */
4459 return ((hex2bin(c2)<<4) + hex2bin(c3));
4462 if (mime_decode_mode != 'B') {
4463 mime_decode_mode = FALSE;
4464 return (*i_mgetc)(f);
4468 /* Base64 encoding */
4470 MIME allows line break in the middle of
4471 Base64, but we are very pessimistic in decoding
4472 in unbuf mode because MIME encoded code may broken by
4473 less or editor's control sequence (such as ESC-[-K in unbuffered
4474 mode. ignore incomplete MIME.
4476 mode = mime_decode_mode;
4477 mime_decode_mode = exit_mode; /* prepare for quit */
4479 while ((c1 = (*i_mgetc)(f))<=SP) {
4484 if ((c2 = (*i_mgetc)(f))<=SP) {
4487 if (mime_f != STRICT_MIME) goto mime_c2_retry;
4488 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
4491 if ((c1 == '?') && (c2 == '=')) {
4494 lwsp_buf = nkf_xmalloc((lwsp_size+5)*sizeof(char));
4495 while ((c1=(*i_getc)(f))!=EOF) {
4500 if ((c1=(*i_getc)(f))!=EOF && (c1==SP||c1==TAB)) {
4508 if ((c1=(*i_getc)(f))!=EOF) {
4512 } else if ((c1=(*i_getc)(f))!=EOF && (c1==SP||c1==TAB)) {
4527 lwsp_buf[lwsp_count] = (unsigned char)c1;
4528 if (lwsp_count++>lwsp_size){
4530 lwsp_buf_new = nkf_xrealloc(lwsp_buf, (lwsp_size+5)*sizeof(char));
4531 lwsp_buf = lwsp_buf_new;
4537 if (lwsp_count > 0 && (c1 != '=' || (lwsp_buf[lwsp_count-1] != SP && lwsp_buf[lwsp_count-1] != TAB))) {
4539 for(lwsp_count--;lwsp_count>0;lwsp_count--)
4540 i_ungetc(lwsp_buf[lwsp_count],f);
4543 nkf_xfree(lwsp_buf);
4547 if ((c3 = (*i_mgetc)(f))<=SP) {
4550 if (mime_f != STRICT_MIME) goto mime_c3_retry;
4551 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
4555 if ((c4 = (*i_mgetc)(f))<=SP) {
4558 if (mime_f != STRICT_MIME) goto mime_c4_retry;
4559 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
4563 mime_decode_mode = mode; /* still in MIME sigh... */
4565 /* BASE 64 decoding */
4567 t1 = 0x3f & base64decode(c1);
4568 t2 = 0x3f & base64decode(c2);
4569 t3 = 0x3f & base64decode(c3);
4570 t4 = 0x3f & base64decode(c4);
4571 cc = ((t1 << 2) & 0x0fc) | ((t2 >> 4) & 0x03);
4573 mime_input_buf(mime_input_state.last++) = (unsigned char)cc;
4574 cc = ((t2 << 4) & 0x0f0) | ((t3 >> 2) & 0x0f);
4576 mime_input_buf(mime_input_state.last++) = (unsigned char)cc;
4577 cc = ((t3 << 6) & 0x0c0) | (t4 & 0x3f);
4579 mime_input_buf(mime_input_state.last++) = (unsigned char)cc;
4584 return mime_input_buf(mime_input_state.top++);
4587 static const char basis_64[] =
4588 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
4590 #define MIMEOUT_BUF_LENGTH 74
4592 char buf[MIMEOUT_BUF_LENGTH+1];
4596 /*nkf_char mime_lastchar2, mime_lastchar1;*/
4599 open_mime(nkf_char mode)
4601 const unsigned char *p;
4604 p = mime_pattern[0];
4605 for(i=0;mime_pattern[i];i++) {
4606 if (mode == mime_encode[i]) {
4607 p = mime_pattern[i];
4611 mimeout_mode = mime_encode_method[i];
4613 if (base64_count>45) {
4614 if (mimeout_state.count>0 && nkf_isblank(mimeout_state.buf[i])){
4615 (*o_mputc)(mimeout_state.buf[i]);
4618 PUT_NEWLINE((*o_mputc));
4621 if (mimeout_state.count>0
4622 && (mimeout_state.buf[i]==SP || mimeout_state.buf[i]==TAB
4623 || mimeout_state.buf[i]==CR || mimeout_state.buf[i]==LF)) {
4627 for (;i<mimeout_state.count;i++) {
4628 if (mimeout_state.buf[i]==SP || mimeout_state.buf[i]==TAB
4629 || mimeout_state.buf[i]==CR || mimeout_state.buf[i]==LF) {
4630 (*o_mputc)(mimeout_state.buf[i]);
4640 j = mimeout_state.count;
4641 mimeout_state.count = 0;
4643 mime_putc(mimeout_state.buf[i]);
4648 mime_prechar(nkf_char c2, nkf_char c1)
4650 if (mimeout_mode > 0){
4652 if (base64_count + mimeout_state.count/3*4> 73){
4653 (*o_base64conv)(EOF,0);
4654 OCONV_NEWLINE((*o_base64conv));
4655 (*o_base64conv)(0,SP);
4659 if (base64_count + mimeout_state.count/3*4> 66) {
4660 (*o_base64conv)(EOF,0);
4661 OCONV_NEWLINE((*o_base64conv));
4662 (*o_base64conv)(0,SP);
4668 if (c2 != EOF && base64_count + mimeout_state.count/3*4> 60) {
4669 mimeout_mode = (output_mode==ASCII ||output_mode == ISO_8859_1) ? 'Q' : 'B';
4670 open_mime(output_mode);
4671 (*o_base64conv)(EOF,0);
4672 OCONV_NEWLINE((*o_base64conv));
4673 (*o_base64conv)(0,SP);
4692 switch(mimeout_mode) {
4697 (*o_mputc)(basis_64[((nkf_state->mimeout_state & 0x3)<< 4)]);
4703 (*o_mputc)(basis_64[((nkf_state->mimeout_state & 0xF) << 2)]);
4708 if (mimeout_mode > 0) {
4709 if (mimeout_f!=FIXED_MIME) {
4711 } else if (mimeout_mode != 'Q')
4717 mimeout_addchar(nkf_char c)
4719 switch(mimeout_mode) {
4724 } else if(!nkf_isalnum(c)) {
4726 (*o_mputc)(bin2hex(((c>>4)&0xf)));
4727 (*o_mputc)(bin2hex((c&0xf)));
4735 nkf_state->mimeout_state=c;
4736 (*o_mputc)(basis_64[c>>2]);
4741 (*o_mputc)(basis_64[((nkf_state->mimeout_state & 0x3)<< 4) | ((c & 0xF0) >> 4)]);
4742 nkf_state->mimeout_state=c;
4747 (*o_mputc)(basis_64[((nkf_state->mimeout_state & 0xF) << 2) | ((c & 0xC0) >>6)]);
4748 (*o_mputc)(basis_64[c & 0x3F]);
4760 mime_putc(nkf_char c)
4765 if (mimeout_f == FIXED_MIME){
4766 if (mimeout_mode == 'Q'){
4767 if (base64_count > 71){
4768 if (c!=CR && c!=LF) {
4770 PUT_NEWLINE((*o_mputc));
4775 if (base64_count > 71){
4777 PUT_NEWLINE((*o_mputc));
4780 if (c == EOF) { /* c==EOF */
4784 if (c != EOF) { /* c==EOF */
4790 /* mimeout_f != FIXED_MIME */
4792 if (c == EOF) { /* c==EOF */
4793 if (mimeout_mode == -1 && mimeout_state.count > 1) open_mime(output_mode);
4794 j = mimeout_state.count;
4795 mimeout_state.count = 0;
4797 if (mimeout_mode > 0) {
4798 if (!nkf_isblank(mimeout_state.buf[j-1])) {
4800 if (nkf_isspace(mimeout_state.buf[i]) && base64_count < 71){
4803 mimeout_addchar(mimeout_state.buf[i]);
4807 mimeout_addchar(mimeout_state.buf[i]);
4811 mimeout_addchar(mimeout_state.buf[i]);
4817 mimeout_addchar(mimeout_state.buf[i]);
4823 if (mimeout_state.count > 0){
4824 lastchar = mimeout_state.buf[mimeout_state.count - 1];
4829 if (mimeout_mode=='Q') {
4830 if (c <= DEL && (output_mode==ASCII ||output_mode == ISO_8859_1)) {
4831 if (c == CR || c == LF) {
4836 } else if (c <= SP) {
4838 if (base64_count > 70) {
4839 PUT_NEWLINE((*o_mputc));
4842 if (!nkf_isblank(c)) {
4847 if (base64_count > 70) {
4849 PUT_NEWLINE((*o_mputc));
4852 open_mime(output_mode);
4854 if (!nkf_noescape_mime(c)) {
4865 if (mimeout_mode <= 0) {
4866 if (c <= DEL && (output_mode==ASCII ||output_mode == ISO_8859_1)) {
4867 if (nkf_isspace(c)) {
4869 if (mimeout_mode == -1) {
4872 if (c==CR || c==LF) {
4874 open_mime(output_mode);
4880 for (i=0;i<mimeout_state.count;i++) {
4881 (*o_mputc)(mimeout_state.buf[i]);
4882 if (mimeout_state.buf[i] == CR || mimeout_state.buf[i] == LF){
4893 mimeout_state.buf[0] = (char)c;
4894 mimeout_state.count = 1;
4896 if (base64_count > 1
4897 && base64_count + mimeout_state.count > 76
4898 && mimeout_state.buf[0] != CR && mimeout_state.buf[0] != LF){
4899 static const char *str = "boundary=\"";
4900 static int len = 10;
4903 for (; i < mimeout_state.count - len; ++i) {
4904 if (!strncmp(mimeout_state.buf+i, str, len)) {
4910 if (i == 0 || i == mimeout_state.count - len) {
4911 PUT_NEWLINE((*o_mputc));
4913 if (!nkf_isspace(mimeout_state.buf[0])){
4920 for (j = 0; j <= i; ++j) {
4921 (*o_mputc)(mimeout_state.buf[j]);
4923 PUT_NEWLINE((*o_mputc));
4925 for (; j <= mimeout_state.count; ++j) {
4926 mimeout_state.buf[j - i] = mimeout_state.buf[j];
4928 mimeout_state.count -= i;
4931 mimeout_state.buf[mimeout_state.count++] = (char)c;
4932 if (mimeout_state.count>MIMEOUT_BUF_LENGTH) {
4933 open_mime(output_mode);
4938 if (lastchar==CR || lastchar == LF){
4939 for (i=0;i<mimeout_state.count;i++) {
4940 (*o_mputc)(mimeout_state.buf[i]);
4943 mimeout_state.count = 0;
4946 for (i=0;i<mimeout_state.count-1;i++) {
4947 (*o_mputc)(mimeout_state.buf[i]);
4950 mimeout_state.buf[0] = SP;
4951 mimeout_state.count = 1;
4953 open_mime(output_mode);
4956 /* mimeout_mode == 'B', 1, 2 */
4957 if ( c<=DEL && (output_mode==ASCII ||output_mode == ISO_8859_1)) {
4958 if (lastchar == CR || lastchar == LF){
4959 if (nkf_isblank(c)) {
4960 for (i=0;i<mimeout_state.count;i++) {
4961 mimeout_addchar(mimeout_state.buf[i]);
4963 mimeout_state.count = 0;
4964 } else if (SP<c && c<DEL) {
4966 for (i=0;i<mimeout_state.count;i++) {
4967 (*o_mputc)(mimeout_state.buf[i]);
4970 mimeout_state.count = 0;
4972 mimeout_state.buf[mimeout_state.count++] = (char)c;
4975 if (c==SP || c==TAB || c==CR || c==LF) {
4976 for (i=0;i<mimeout_state.count;i++) {
4977 if (SP<mimeout_state.buf[i] && mimeout_state.buf[i]<DEL) {
4979 for (i=0;i<mimeout_state.count;i++) {
4980 (*o_mputc)(mimeout_state.buf[i]);
4983 mimeout_state.count = 0;
4986 mimeout_state.buf[mimeout_state.count++] = (char)c;
4987 if (mimeout_state.count>MIMEOUT_BUF_LENGTH) {
4989 for (i=0;i<mimeout_state.count;i++) {
4990 (*o_mputc)(mimeout_state.buf[i]);
4993 mimeout_state.count = 0;
4997 if (mimeout_state.count>0 && SP<c && c!='=') {
4998 mimeout_state.buf[mimeout_state.count++] = (char)c;
4999 if (mimeout_state.count>MIMEOUT_BUF_LENGTH) {
5000 j = mimeout_state.count;
5001 mimeout_state.count = 0;
5003 mimeout_addchar(mimeout_state.buf[i]);
5010 if (mimeout_state.count>0) {
5011 j = mimeout_state.count;
5012 mimeout_state.count = 0;
5014 if (mimeout_state.buf[i]==CR || mimeout_state.buf[i]==LF)
5016 mimeout_addchar(mimeout_state.buf[i]);
5022 (*o_mputc)(mimeout_state.buf[i]);
5024 open_mime(output_mode);
5031 base64_conv(nkf_char c2, nkf_char c1)
5033 mime_prechar(c2, c1);
5034 (*o_base64conv)(c2,c1);
5038 typedef struct nkf_iconv_t {
5041 size_t input_buffer_size;
5042 char *output_buffer;
5043 size_t output_buffer_size;
5047 nkf_iconv_new(char *tocode, char *fromcode)
5049 nkf_iconv_t converter;
5051 converter->input_buffer_size = IOBUF_SIZE;
5052 converter->input_buffer = nkf_xmalloc(converter->input_buffer_size);
5053 converter->output_buffer_size = IOBUF_SIZE * 2;
5054 converter->output_buffer = nkf_xmalloc(converter->output_buffer_size);
5055 converter->cd = iconv_open(tocode, fromcode);
5056 if (converter->cd == (iconv_t)-1)
5060 perror(fprintf("iconv doesn't support %s to %s conversion.", fromcode, tocode));
5063 perror("can't iconv_open");
5069 nkf_iconv_convert(nkf_iconv_t *converter, FILE *input)
5071 size_t invalid = (size_t)0;
5072 char *input_buffer = converter->input_buffer;
5073 size_t input_length = (size_t)0;
5074 char *output_buffer = converter->output_buffer;
5075 size_t output_length = converter->output_buffer_size;
5080 while ((c = (*i_getc)(f)) != EOF) {
5081 input_buffer[input_length++] = c;
5082 if (input_length < converter->input_buffer_size) break;
5086 size_t ret = iconv(converter->cd, &input_buffer, &input_length, &output_buffer, &output_length);
5087 while (output_length-- > 0) {
5088 (*o_putc)(output_buffer[converter->output_buffer_size-output_length]);
5090 if (ret == (size_t) - 1) {
5093 if (input_buffer != converter->input_buffer)
5094 memmove(converter->input_buffer, input_buffer, input_length);
5097 converter->output_buffer_size *= 2;
5098 output_buffer = realloc(converter->outbuf, converter->output_buffer_size);
5099 if (output_buffer == NULL) {
5100 perror("can't realloc");
5103 converter->output_buffer = output_buffer;
5106 perror("can't iconv");
5119 nkf_iconv_close(nkf_iconv_t *convert)
5121 nkf_xfree(converter->inbuf);
5122 nkf_xfree(converter->outbuf);
5123 iconv_close(converter->cd);
5132 struct input_code *p = input_code_list;
5144 mime_f = MIME_DECODE_DEFAULT;
5145 mime_decode_f = FALSE;
5150 x0201_f = X0201_DEFAULT;
5151 iso2022jp_f = FALSE;
5152 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
5153 ms_ucs_map_f = UCS_MAP_ASCII;
5155 #ifdef UTF8_INPUT_ENABLE
5156 no_cp932ext_f = FALSE;
5157 no_best_fit_chars_f = FALSE;
5158 encode_fallback = NULL;
5159 unicode_subchar = '?';
5160 input_endian = ENDIAN_BIG;
5162 #ifdef UTF8_OUTPUT_ENABLE
5163 output_bom_f = FALSE;
5164 output_endian = ENDIAN_BIG;
5166 #ifdef UNICODE_NORMALIZATION
5182 #ifdef SHIFTJIS_CP932
5192 for (i = 0; i < 256; i++){
5193 prefix_table[i] = 0;
5197 mimeout_state.count = 0;
5202 fold_preserve_f = FALSE;
5205 kanji_intro = DEFAULT_J;
5206 ascii_intro = DEFAULT_R;
5207 fold_margin = FOLD_MARGIN;
5208 o_zconv = no_connection;
5209 o_fconv = no_connection;
5210 o_eol_conv = no_connection;
5211 o_rot_conv = no_connection;
5212 o_hira_conv = no_connection;
5213 o_base64conv = no_connection;
5214 o_iso2022jp_check_conv = no_connection;
5217 i_ungetc = std_ungetc;
5219 i_bungetc = std_ungetc;
5222 i_mungetc = std_ungetc;
5223 i_mgetc_buf = std_getc;
5224 i_mungetc_buf = std_ungetc;
5225 output_mode = ASCII;
5227 mime_decode_mode = FALSE;
5233 z_prev2=0,z_prev1=0;
5235 iconv_for_check = 0;
5237 input_codename = NULL;
5238 input_encoding = NULL;
5239 output_encoding = NULL;
5247 module_connection(void)
5249 if (input_encoding) set_input_encoding(input_encoding);
5250 if (!output_encoding) {
5251 output_encoding = nkf_default_encoding();
5253 if (!output_encoding) {
5254 if (noout_f || guess_f) output_encoding = nkf_enc_from_index(ISO_2022_JP);
5257 set_output_encoding(output_encoding);
5258 oconv = nkf_enc_to_oconv(output_encoding);
5261 /* replace continucation module, from output side */
5263 /* output redicrection */
5265 if (noout_f || guess_f){
5272 if (mimeout_f == TRUE) {
5273 o_base64conv = oconv; oconv = base64_conv;
5275 /* base64_count = 0; */
5278 if (eolmode_f || guess_f) {
5279 o_eol_conv = oconv; oconv = eol_conv;
5282 o_rot_conv = oconv; oconv = rot_conv;
5285 o_iso2022jp_check_conv = oconv; oconv = iso2022jp_check_conv;
5288 o_hira_conv = oconv; oconv = hira_conv;
5291 o_fconv = oconv; oconv = fold_conv;
5294 if (alpha_f || x0201_f) {
5295 o_zconv = oconv; oconv = z_conv;
5299 i_ungetc = std_ungetc;
5300 /* input redicrection */
5303 i_cgetc = i_getc; i_getc = cap_getc;
5304 i_cungetc = i_ungetc; i_ungetc= cap_ungetc;
5307 i_ugetc = i_getc; i_getc = url_getc;
5308 i_uungetc = i_ungetc; i_ungetc= url_ungetc;
5311 #ifdef NUMCHAR_OPTION
5313 i_ngetc = i_getc; i_getc = numchar_getc;
5314 i_nungetc = i_ungetc; i_ungetc= numchar_ungetc;
5317 #ifdef UNICODE_NORMALIZATION
5319 i_nfc_getc = i_getc; i_getc = nfc_getc;
5320 i_nfc_ungetc = i_ungetc; i_ungetc= nfc_ungetc;
5323 if (mime_f && mimebuf_f==FIXED_MIME) {
5324 i_mgetc = i_getc; i_getc = mime_getc;
5325 i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
5328 i_bgetc = i_getc; i_getc = broken_getc;
5329 i_bungetc = i_ungetc; i_ungetc = broken_ungetc;
5331 if (input_encoding) {
5332 set_iconv(-TRUE, nkf_enc_to_iconv(input_encoding));
5334 set_iconv(FALSE, e_iconv);
5338 struct input_code *p = input_code_list;
5347 Conversion main loop. Code detection only.
5350 #if !defined(PERL_XS) && !defined(WIN32DLL)
5357 module_connection();
5358 while ((c = (*i_getc)(f)) != EOF)
5365 #define NEXT continue /* no output, get next */
5366 #define SKIP c2=0;continue /* no output, get next */
5367 #define MORE c2=c1;continue /* need one more byte */
5368 #define SEND ; /* output c1 and c2, get next */
5369 #define LAST break /* end of loop, go closing */
5370 #define set_input_mode(mode) do { \
5371 input_mode = mode; \
5373 set_input_codename("ISO-2022-JP"); \
5374 debug("ISO-2022-JP"); \
5378 kanji_convert(FILE *f)
5380 nkf_char c1=0, c2=0, c3=0, c4=0;
5381 int shift_mode = 0; /* 0, 1, 2, 3 */
5383 int is_8bit = FALSE;
5385 if (input_encoding && !nkf_enc_asciicompat(input_encoding)) {
5390 output_mode = ASCII;
5392 if (module_connection() < 0) {
5393 #if !defined(PERL_XS) && !defined(WIN32DLL)
5394 fprintf(stderr, "no output encoding given\n");
5400 #ifdef UTF8_INPUT_ENABLE
5401 if(iconv == w_iconv32){
5402 while ((c1 = (*i_getc)(f)) != EOF &&
5403 (c2 = (*i_getc)(f)) != EOF &&
5404 (c3 = (*i_getc)(f)) != EOF &&
5405 (c4 = (*i_getc)(f)) != EOF) {
5406 nkf_iconv_utf_32(c1, c2, c3, c4);
5408 (*i_ungetc)(EOF, f);
5410 else if (iconv == w_iconv16) {
5411 while ((c1 = (*i_getc)(f)) != EOF &&
5412 (c2 = (*i_getc)(f)) != EOF) {
5413 if (nkf_iconv_utf_16(c1, c2, 0, 0) == -2 &&
5414 (c3 = (*i_getc)(f)) != EOF &&
5415 (c4 = (*i_getc)(f)) != EOF) {
5416 nkf_iconv_utf_16(c1, c2, c3, c4);
5419 (*i_ungetc)(EOF, f);
5423 while ((c1 = (*i_getc)(f)) != EOF) {
5424 #ifdef INPUT_CODE_FIX
5425 if (!input_encoding)
5431 /* in case of 8th bit is on */
5432 if (!estab_f&&!mime_decode_mode) {
5433 /* in case of not established yet */
5434 /* It is still ambiguious */
5435 if (h_conv(f, c2, c1)==EOF) {
5443 /* in case of already established */
5445 /* ignore bogus code */
5453 /* 2nd byte of 7 bit code or SJIS */
5457 else if (nkf_char_unicode_p(c1)) {
5463 if (input_mode == JIS_X_0208 && DEL <= c1 && c1 < 0x92) {
5466 } else if (c1 > DEL) {
5468 if (!estab_f && !iso8859_f) {
5469 /* not established yet */
5471 } else { /* estab_f==TRUE */
5477 else if ((iconv == s_iconv && 0xA0 <= c1 && c1 <= 0xDF) ||
5478 (ms_ucs_map_f == UCS_MAP_CP10001 && (c1 == 0xFD || c1 == 0xFE))) {
5480 c2 = JIS_X_0201_1976_K;
5485 /* already established */
5489 } else if (SP < c1 && c1 < DEL) {
5490 /* in case of Roman characters */
5492 /* output 1 shifted byte */
5496 } else if (nkf_byte_jisx0201_katakana_p(c1)){
5497 /* output 1 shifted byte */
5498 c2 = JIS_X_0201_1976_K;
5501 /* look like bogus code */
5504 } else if (input_mode == JIS_X_0208 || input_mode == JIS_X_0212 ||
5505 input_mode == JIS_X_0213_1 || input_mode == JIS_X_0213_2) {
5506 /* in case of Kanji shifted */
5508 } else if (c1 == '=' && mime_f && !mime_decode_mode) {
5509 /* Check MIME code */
5510 if ((c1 = (*i_getc)(f)) == EOF) {
5513 } else if (c1 == '?') {
5514 /* =? is mime conversion start sequence */
5515 if(mime_f == STRICT_MIME) {
5516 /* check in real detail */
5517 if (mime_begin_strict(f) == EOF)
5520 } else if (mime_begin(f) == EOF)
5529 /* normal ASCII code */
5532 } else if (c1 == SI && (!is_8bit || mime_decode_mode)) {
5535 } else if (c1 == SO && (!is_8bit || mime_decode_mode)) {
5538 } else if (c1 == ESC && (!is_8bit || mime_decode_mode)) {
5539 if ((c1 = (*i_getc)(f)) == EOF) {
5540 /* (*oconv)(0, ESC); don't send bogus code */
5543 else if (c1 == '&') {
5545 if ((c1 = (*i_getc)(f)) == EOF) {
5551 else if (c1 == '$') {
5553 if ((c1 = (*i_getc)(f)) == EOF) {
5554 /* don't send bogus code
5556 (*oconv)(0, '$'); */
5558 } else if (c1 == '@' || c1 == 'B') {
5560 set_input_mode(JIS_X_0208);
5562 } else if (c1 == '(') {
5564 if ((c1 = (*i_getc)(f)) == EOF) {
5565 /* don't send bogus code
5571 } else if (c1 == '@'|| c1 == 'B') {
5573 set_input_mode(JIS_X_0208);
5576 } else if (c1 == 'D'){
5577 set_input_mode(JIS_X_0212);
5579 #endif /* X0212_ENABLE */
5580 } else if (c1 == 'O' || c1 == 'Q'){
5581 set_input_mode(JIS_X_0213_1);
5583 } else if (c1 == 'P'){
5584 set_input_mode(JIS_X_0213_2);
5587 /* could be some special code */
5594 } else if (broken_f&0x2) {
5595 /* accept any ESC-(-x as broken code ... */
5596 input_mode = JIS_X_0208;
5605 } else if (c1 == '(') {
5607 if ((c1 = (*i_getc)(f)) == EOF) {
5608 /* don't send bogus code
5610 (*oconv)(0, '('); */
5613 else if (c1 == 'I') {
5614 /* JIS X 0201 Katakana */
5615 set_input_mode(JIS_X_0201_1976_K);
5618 else if (c1 == 'B' || c1 == 'J' || c1 == 'H') {
5619 /* ISO-646IRV:1983 or JIS X 0201 Roman or JUNET */
5620 set_input_mode(ASCII);
5623 else if (broken_f&0x2) {
5624 set_input_mode(ASCII);
5633 else if (c1 == '.') {
5635 if ((c1 = (*i_getc)(f)) == EOF) {
5638 else if (c1 == 'A') {
5649 else if (c1 == 'N') {
5652 if (g2 == ISO_8859_1) {
5667 } else if (c1 == ESC && iconv == s_iconv) {
5668 /* ESC in Shift_JIS */
5669 if ((c1 = (*i_getc)(f)) == EOF) {
5670 /* (*oconv)(0, ESC); don't send bogus code */
5672 } else if (c1 == '$') {
5674 if ((c1 = (*i_getc)(f)) == EOF) {
5676 } else if (('E' <= c1 && c1 <= 'G') ||
5677 ('O' <= c1 && c1 <= 'Q')) {
5685 static const nkf_char jphone_emoji_first_table[7] =
5686 {0xE1E0, 0xDFE0, 0xE2E0, 0xE3E0, 0xE4E0, 0xDFE0, 0xE0E0};
5687 c3 = nkf_char_unicode_new(jphone_emoji_first_table[c1 % 7]);
5688 if ((c1 = (*i_getc)(f)) == EOF) LAST;
5689 while (SP <= c1 && c1 <= 'z') {
5690 (*oconv)(0, c1 + c3);
5691 if ((c1 = (*i_getc)(f)) == EOF) LAST;
5706 } else if (c1 == LF || c1 == CR) {
5708 input_mode = ASCII; set_iconv(FALSE, 0);
5710 } else if (mime_decode_f && !mime_decode_mode){
5712 if ((c1=(*i_getc)(f))!=EOF && c1 == SP) {
5720 } else { /* if (c1 == CR)*/
5721 if ((c1=(*i_getc)(f))!=EOF) {
5725 } else if (c1 == LF && (c1=(*i_getc)(f))!=EOF && c1 == SP) {
5745 switch ((*iconv)(c2, c1, 0)) { /* can be EUC / SJIS / UTF-8 */
5748 if ((c3 = (*i_getc)(f)) != EOF) {
5751 if ((c4 = (*i_getc)(f)) != EOF) {
5753 (*iconv)(c2, c1, c3|c4);
5758 /* 3 bytes EUC or UTF-8 */
5759 if ((c3 = (*i_getc)(f)) != EOF) {
5761 (*iconv)(c2, c1, c3);
5769 0x7F <= c2 && c2 <= 0x92 &&
5770 0x21 <= c1 && c1 <= 0x7E) {
5772 c1 = nkf_char_unicode_new((c2 - 0x7F) * 94 + c1 - 0x21 + 0xE000);
5775 (*oconv)(c2, c1); /* this is JIS, not SJIS/EUC case */
5779 (*oconv)(PREFIX_EUCG3 | c2, c1);
5781 #endif /* X0212_ENABLE */
5783 (*oconv)(PREFIX_EUCG3 | c2, c1);
5786 (*oconv)(input_mode, c1); /* other special case */
5792 /* goto next_word */
5796 (*iconv)(EOF, 0, 0);
5797 if (!input_codename)
5800 struct input_code *p = input_code_list;
5801 struct input_code *result = p;
5803 if (p->score < result->score) result = p;
5806 set_input_codename(result->name);
5808 debug(result->name);
5816 * int options(unsigned char *cp)
5823 options(unsigned char *cp)
5827 unsigned char *cp_back = NULL;
5832 while(*cp && *cp++!='-');
5833 while (*cp || cp_back) {
5841 case '-': /* literal options */
5842 if (!*cp || *cp == SP) { /* ignore the rest of arguments */
5846 for (i=0;i<sizeof(long_option)/sizeof(long_option[0]);i++) {
5847 p = (unsigned char *)long_option[i].name;
5848 for (j=0;*p && *p != '=' && *p == cp[j];p++, j++);
5849 if (*p == cp[j] || cp[j] == SP){
5856 #if !defined(PERL_XS) && !defined(WIN32DLL)
5857 fprintf(stderr, "unknown long option: --%s\n", cp);
5861 while(*cp && *cp != SP && cp++);
5862 if (long_option[i].alias[0]){
5864 cp = (unsigned char *)long_option[i].alias;
5866 if (strcmp(long_option[i].name, "ic=") == 0){
5867 enc = nkf_enc_find((char *)p);
5869 input_encoding = enc;
5872 if (strcmp(long_option[i].name, "oc=") == 0){
5873 enc = nkf_enc_find((char *)p);
5874 /* if (enc <= 0) continue; */
5876 output_encoding = enc;
5879 if (strcmp(long_option[i].name, "guess=") == 0){
5880 if (p[0] == '0' || p[0] == '1') {
5888 if (strcmp(long_option[i].name, "overwrite") == 0){
5891 preserve_time_f = TRUE;
5894 if (strcmp(long_option[i].name, "overwrite=") == 0){
5897 preserve_time_f = TRUE;
5899 backup_suffix = (char *)p;
5902 if (strcmp(long_option[i].name, "in-place") == 0){
5905 preserve_time_f = FALSE;
5908 if (strcmp(long_option[i].name, "in-place=") == 0){
5911 preserve_time_f = FALSE;
5913 backup_suffix = (char *)p;
5918 if (strcmp(long_option[i].name, "cap-input") == 0){
5922 if (strcmp(long_option[i].name, "url-input") == 0){
5927 #ifdef NUMCHAR_OPTION
5928 if (strcmp(long_option[i].name, "numchar-input") == 0){
5934 if (strcmp(long_option[i].name, "no-output") == 0){
5938 if (strcmp(long_option[i].name, "debug") == 0){
5943 if (strcmp(long_option[i].name, "cp932") == 0){
5944 #ifdef SHIFTJIS_CP932
5948 #ifdef UTF8_OUTPUT_ENABLE
5949 ms_ucs_map_f = UCS_MAP_CP932;
5953 if (strcmp(long_option[i].name, "no-cp932") == 0){
5954 #ifdef SHIFTJIS_CP932
5958 #ifdef UTF8_OUTPUT_ENABLE
5959 ms_ucs_map_f = UCS_MAP_ASCII;
5963 #ifdef SHIFTJIS_CP932
5964 if (strcmp(long_option[i].name, "cp932inv") == 0){
5971 if (strcmp(long_option[i].name, "x0212") == 0){
5978 if (strcmp(long_option[i].name, "exec-in") == 0){
5982 if (strcmp(long_option[i].name, "exec-out") == 0){
5987 #if defined(UTF8_OUTPUT_ENABLE) && defined(UTF8_INPUT_ENABLE)
5988 if (strcmp(long_option[i].name, "no-cp932ext") == 0){
5989 no_cp932ext_f = TRUE;
5992 if (strcmp(long_option[i].name, "no-best-fit-chars") == 0){
5993 no_best_fit_chars_f = TRUE;
5996 if (strcmp(long_option[i].name, "fb-skip") == 0){
5997 encode_fallback = NULL;
6000 if (strcmp(long_option[i].name, "fb-html") == 0){
6001 encode_fallback = encode_fallback_html;
6004 if (strcmp(long_option[i].name, "fb-xml") == 0){
6005 encode_fallback = encode_fallback_xml;
6008 if (strcmp(long_option[i].name, "fb-java") == 0){
6009 encode_fallback = encode_fallback_java;
6012 if (strcmp(long_option[i].name, "fb-perl") == 0){
6013 encode_fallback = encode_fallback_perl;
6016 if (strcmp(long_option[i].name, "fb-subchar") == 0){
6017 encode_fallback = encode_fallback_subchar;
6020 if (strcmp(long_option[i].name, "fb-subchar=") == 0){
6021 encode_fallback = encode_fallback_subchar;
6022 unicode_subchar = 0;
6024 /* decimal number */
6025 for (i = 0; i < 7 && nkf_isdigit(p[i]); i++){
6026 unicode_subchar *= 10;
6027 unicode_subchar += hex2bin(p[i]);
6029 }else if(p[1] == 'x' || p[1] == 'X'){
6030 /* hexadecimal number */
6031 for (i = 2; i < 8 && nkf_isxdigit(p[i]); i++){
6032 unicode_subchar <<= 4;
6033 unicode_subchar |= hex2bin(p[i]);
6037 for (i = 1; i < 8 && nkf_isoctal(p[i]); i++){
6038 unicode_subchar *= 8;
6039 unicode_subchar += hex2bin(p[i]);
6042 w16e_conv(unicode_subchar, &i, &j);
6043 unicode_subchar = i<<8 | j;
6047 #ifdef UTF8_OUTPUT_ENABLE
6048 if (strcmp(long_option[i].name, "ms-ucs-map") == 0){
6049 ms_ucs_map_f = UCS_MAP_MS;
6053 #ifdef UNICODE_NORMALIZATION
6054 if (strcmp(long_option[i].name, "utf8mac-input") == 0){
6059 if (strcmp(long_option[i].name, "prefix=") == 0){
6060 if (nkf_isgraph(p[0])){
6061 for (i = 1; nkf_isgraph(p[i]); i++){
6062 prefix_table[p[i]] = p[0];
6067 #if !defined(PERL_XS) && !defined(WIN32DLL)
6068 fprintf(stderr, "unsupported long option: --%s\n", long_option[i].name);
6073 case 'b': /* buffered mode */
6076 case 'u': /* non bufferd mode */
6079 case 't': /* transparent mode */
6084 } else if (*cp=='2') {
6088 * nkf -t2MB hoge.bin | nkf -t2mB | diff -s - hoge.bin
6096 case 'j': /* JIS output */
6098 output_encoding = nkf_enc_from_index(ISO_2022_JP);
6100 case 'e': /* AT&T EUC output */
6101 output_encoding = nkf_enc_from_index(EUCJP_NKF);
6103 case 's': /* SJIS output */
6104 output_encoding = nkf_enc_from_index(WINDOWS_31J);
6106 case 'l': /* ISO8859 Latin-1 support, no conversion */
6107 iso8859_f = TRUE; /* Only compatible with ISO-2022-JP */
6108 input_encoding = nkf_enc_from_index(ISO_8859_1);
6110 case 'i': /* Kanji IN ESC-$-@/B */
6111 if (*cp=='@'||*cp=='B')
6112 kanji_intro = *cp++;
6114 case 'o': /* ASCII IN ESC-(-J/B */
6115 if (*cp=='J'||*cp=='B'||*cp=='H')
6116 ascii_intro = *cp++;
6120 bit:1 katakana->hiragana
6121 bit:2 hiragana->katakana
6123 if ('9'>= *cp && *cp>='0')
6124 hira_f |= (*cp++ -'0');
6131 #if defined(MSDOS) || defined(__OS2__)
6138 show_configuration();
6146 #ifdef UTF8_OUTPUT_ENABLE
6147 case 'w': /* UTF-8 output */
6152 output_encoding = nkf_enc_from_index(UTF_8N);
6154 output_bom_f = TRUE;
6155 output_encoding = nkf_enc_from_index(UTF_8_BOM);
6159 if ('1'== cp[0] && '6'==cp[1]) {
6162 } else if ('3'== cp[0] && '2'==cp[1]) {
6166 output_encoding = nkf_enc_from_index(UTF_8);
6171 output_endian = ENDIAN_LITTLE;
6172 } else if (cp[0] == 'B') {
6175 output_encoding = nkf_enc_from_index(enc_idx);
6180 enc_idx = enc_idx == UTF_16
6181 ? (output_endian == ENDIAN_LITTLE ? UTF_16LE : UTF_16BE)
6182 : (output_endian == ENDIAN_LITTLE ? UTF_32LE : UTF_32BE);
6184 output_bom_f = TRUE;
6185 enc_idx = enc_idx == UTF_16
6186 ? (output_endian == ENDIAN_LITTLE ? UTF_16LE_BOM : UTF_16BE_BOM)
6187 : (output_endian == ENDIAN_LITTLE ? UTF_32LE_BOM : UTF_32BE_BOM);
6189 output_encoding = nkf_enc_from_index(enc_idx);
6193 #ifdef UTF8_INPUT_ENABLE
6194 case 'W': /* UTF input */
6197 input_encoding = nkf_enc_from_index(UTF_8);
6200 if ('1'== cp[0] && '6'==cp[1]) {
6202 input_endian = ENDIAN_BIG;
6204 } else if ('3'== cp[0] && '2'==cp[1]) {
6206 input_endian = ENDIAN_BIG;
6209 input_encoding = nkf_enc_from_index(UTF_8);
6214 input_endian = ENDIAN_LITTLE;
6215 } else if (cp[0] == 'B') {
6217 input_endian = ENDIAN_BIG;
6219 enc_idx = (enc_idx == UTF_16
6220 ? (input_endian == ENDIAN_LITTLE ? UTF_16LE : UTF_16BE)
6221 : (input_endian == ENDIAN_LITTLE ? UTF_32LE : UTF_32BE));
6222 input_encoding = nkf_enc_from_index(enc_idx);
6226 /* Input code assumption */
6227 case 'J': /* ISO-2022-JP input */
6228 input_encoding = nkf_enc_from_index(ISO_2022_JP);
6230 case 'E': /* EUC-JP input */
6231 input_encoding = nkf_enc_from_index(EUCJP_NKF);
6233 case 'S': /* Windows-31J input */
6234 input_encoding = nkf_enc_from_index(WINDOWS_31J);
6236 case 'Z': /* Convert X0208 alphabet to asii */
6238 bit:0 Convert JIS X 0208 Alphabet to ASCII
6239 bit:1 Convert Kankaku to one space
6240 bit:2 Convert Kankaku to two spaces
6241 bit:3 Convert HTML Entity
6242 bit:4 Convert JIS X 0208 Katakana to JIS X 0201 Katakana
6244 while ('0'<= *cp && *cp <='9') {
6245 alpha_f |= 1 << (*cp++ - '0');
6247 if (!alpha_f) alpha_f = 1;
6249 case 'x': /* Convert X0201 kana to X0208 or X0201 Conversion */
6250 x0201_f = FALSE; /* No X0201->X0208 conversion */
6252 ESC-(-I in JIS, EUC, MS Kanji
6253 SI/SO in JIS, EUC, MS Kanji
6254 SS2 in EUC, JIS, not in MS Kanji
6255 MS Kanji (0xa0-0xdf)
6257 ESC-(-I in JIS (0x20-0x5f)
6258 SS2 in EUC (0xa0-0xdf)
6259 0xa0-0xd in MS Kanji (0xa0-0xdf)
6262 case 'X': /* Convert X0201 kana to X0208 */
6265 case 'F': /* prserve new lines */
6266 fold_preserve_f = TRUE;
6267 case 'f': /* folding -f60 or -f */
6270 while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
6272 fold_len += *cp++ - '0';
6274 if (!(0<fold_len && fold_len<BUFSIZ))
6275 fold_len = DEFAULT_FOLD;
6279 while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
6281 fold_margin += *cp++ - '0';
6285 case 'm': /* MIME support */
6286 /* mime_decode_f = TRUE; */ /* this has too large side effects... */
6287 if (*cp=='B'||*cp=='Q') {
6288 mime_decode_mode = *cp++;
6289 mimebuf_f = FIXED_MIME;
6290 } else if (*cp=='N') {
6291 mime_f = TRUE; cp++;
6292 } else if (*cp=='S') {
6293 mime_f = STRICT_MIME; cp++;
6294 } else if (*cp=='0') {
6295 mime_decode_f = FALSE;
6296 mime_f = FALSE; cp++;
6298 mime_f = STRICT_MIME;
6301 case 'M': /* MIME output */
6304 mimeout_f = FIXED_MIME; cp++;
6305 } else if (*cp=='Q') {
6307 mimeout_f = FIXED_MIME; cp++;
6312 case 'B': /* Broken JIS support */
6314 bit:1 allow any x on ESC-(-x or ESC-$-x
6315 bit:2 reset to ascii on NL
6317 if ('9'>= *cp && *cp>='0')
6318 broken_f |= 1<<(*cp++ -'0');
6323 case 'O':/* for Output file */
6327 case 'c':/* add cr code */
6330 case 'd':/* delete cr code */
6333 case 'I': /* ISO-2022-JP output */
6336 case 'L': /* line mode */
6337 if (*cp=='u') { /* unix */
6338 eolmode_f = LF; cp++;
6339 } else if (*cp=='m') { /* mac */
6340 eolmode_f = CR; cp++;
6341 } else if (*cp=='w') { /* windows */
6342 eolmode_f = CRLF; cp++;
6343 } else if (*cp=='0') { /* no conversion */
6344 eolmode_f = 0; cp++;
6349 if ('2' <= *cp && *cp <= '9') {
6352 } else if (*cp == '0' || *cp == '1') {
6361 /* module muliple options in a string are allowed for Perl moudle */
6362 while(*cp && *cp++!='-');
6365 #if !defined(PERL_XS) && !defined(WIN32DLL)
6366 fprintf(stderr, "unknown option: -%c\n", *(cp-1));
6368 /* bogus option but ignored */
6376 #include "nkf32dll.c"
6377 #elif defined(PERL_XS)
6378 #else /* WIN32DLL */
6380 main(int argc, char **argv)
6385 char *outfname = NULL;
6388 #ifdef EASYWIN /*Easy Win */
6389 _BufferSize.y = 400;/*Set Scroll Buffer Size*/
6391 #ifdef DEFAULT_CODE_LOCALE
6392 setlocale(LC_CTYPE, "");
6396 for (argc--,argv++; (argc > 0) && **argv == '-'; argc--, argv++) {
6397 cp = (unsigned char *)*argv;
6402 if (pipe(fds) < 0 || (pid = fork()) < 0){
6413 execvp(argv[1], &argv[1]);
6430 int debug_f_back = debug_f;
6433 int exec_f_back = exec_f;
6436 int x0212_f_back = x0212_f;
6438 int x0213_f_back = x0213_f;
6439 int guess_f_back = guess_f;
6441 guess_f = guess_f_back;
6444 debug_f = debug_f_back;
6447 exec_f = exec_f_back;
6449 x0212_f = x0212_f_back;
6450 x0213_f = x0213_f_back;
6453 if (binmode_f == TRUE)
6454 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
6455 if (freopen("","wb",stdout) == NULL)
6462 setbuf(stdout, (char *) NULL);
6464 setvbuffer(stdout, (char *) stdobuf, IOBUF_SIZE);
6467 if (binmode_f == TRUE)
6468 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
6469 if (freopen("","rb",stdin) == NULL) return (-1);
6473 setvbuffer(stdin, (char *) stdibuf, IOBUF_SIZE);
6477 kanji_convert(stdin);
6478 if (guess_f) print_guessed_code(NULL);
6482 int is_argument_error = FALSE;
6484 input_codename = NULL;
6487 iconv_for_check = 0;
6489 if ((fin = fopen((origfname = *argv++), "r")) == NULL) {
6491 is_argument_error = TRUE;
6499 /* reopen file for stdout */
6500 if (file_out_f == TRUE) {
6503 outfname = nkf_xmalloc(strlen(origfname)
6504 + strlen(".nkftmpXXXXXX")
6506 strcpy(outfname, origfname);
6510 for (i = strlen(outfname); i; --i){
6511 if (outfname[i - 1] == '/'
6512 || outfname[i - 1] == '\\'){
6518 strcat(outfname, "ntXXXXXX");
6520 fd = open(outfname, O_WRONLY | O_CREAT | O_TRUNC | O_EXCL,
6521 S_IREAD | S_IWRITE);
6523 strcat(outfname, ".nkftmpXXXXXX");
6524 fd = mkstemp(outfname);
6527 || (fd_backup = dup(fileno(stdout))) < 0
6528 || dup2(fd, fileno(stdout)) < 0
6539 outfname = "nkf.out";
6542 if(freopen(outfname, "w", stdout) == NULL) {
6546 if (binmode_f == TRUE) {
6547 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
6548 if (freopen("","wb",stdout) == NULL)
6555 if (binmode_f == TRUE)
6556 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
6557 if (freopen("","rb",fin) == NULL)
6562 setvbuffer(fin, (char *) stdibuf, IOBUF_SIZE);
6566 char *filename = NULL;
6568 if (nfiles > 1) filename = origfname;
6569 if (guess_f) print_guessed_code(filename);
6575 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__) && !defined(__WATCOMC__) && !defined(__EMX__) && !defined(__OS2__) && !defined(__DJGPP__)
6583 if (dup2(fd_backup, fileno(stdout)) < 0){
6586 if (stat(origfname, &sb)) {
6587 fprintf(stderr, "Can't stat %s\n", origfname);
6589 /*
\e$B%Q!<%_%C%7%g%s$rI|85
\e(B */
6590 if (chmod(outfname, sb.st_mode)) {
6591 fprintf(stderr, "Can't set permission %s\n", outfname);
6594 /*
\e$B%?%$%`%9%?%s%W$rI|85
\e(B */
6595 if(preserve_time_f){
6596 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__) && !defined(__WATCOMC__) && !defined(__EMX__) && !defined(__OS2__) && !defined(__DJGPP__)
6597 tb[0] = tb[1] = sb.st_mtime;
6598 if (utime(outfname, tb)) {
6599 fprintf(stderr, "Can't set timestamp %s\n", outfname);
6602 tb.actime = sb.st_atime;
6603 tb.modtime = sb.st_mtime;
6604 if (utime(outfname, &tb)) {
6605 fprintf(stderr, "Can't set timestamp %s\n", outfname);
6610 char *backup_filename = get_backup_filename(backup_suffix, origfname);
6612 unlink(backup_filename);
6614 if (rename(origfname, backup_filename)) {
6615 perror(backup_filename);
6616 fprintf(stderr, "Can't rename %s to %s\n",
6617 origfname, backup_filename);
6619 nkf_xfree(backup_filename);
6622 if (unlink(origfname)){
6627 if (rename(outfname, origfname)) {
6629 fprintf(stderr, "Can't rename %s to %s\n",
6630 outfname, origfname);
6632 nkf_xfree(outfname);
6637 if (is_argument_error)
6640 #ifdef EASYWIN /*Easy Win */
6641 if (file_out_f == FALSE)
6642 scanf("%d",&end_check);
6645 #else /* for Other OS */
6646 if (file_out_f == TRUE)
6648 #endif /*Easy Win */
6651 #endif /* WIN32DLL */