2 * Copyright (c) 1987, Fujitsu LTD. (Itaru ICHIKAWA).
3 * Copyright (c) 1996-2010, The nkf Project.
5 * This software is provided 'as-is', without any express or implied
6 * warranty. In no event will the authors be held liable for any damages
7 * arising from the use of this software.
9 * Permission is granted to anyone to use this software for any purpose,
10 * including commercial applications, and to alter it and redistribute it
11 * freely, subject to the following restrictions:
13 * 1. The origin of this software must not be misrepresented; you must not
14 * claim that you wrote the original software. If you use this software
15 * in a product, an acknowledgment in the product documentation would be
16 * appreciated but is not required.
18 * 2. Altered source versions must be plainly marked as such, and must not be
19 * misrepresented as being the original software.
21 * 3. This notice may not be removed or altered from any source distribution.
23 #define NKF_VERSION "2.1.1"
24 #define NKF_RELEASE_DATE "2010-04-14"
26 "Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa).\n" \
27 "Copyright (C) 1996-2010, The nkf Project."
38 # define INCL_DOSERRORS
44 /* state of output_mode and input_mode
123 NKF_ENCODING_TABLE_SIZE,
124 JIS_X_0201_1976_K = 0x1013, /* I */ /* JIS C 6220-1969 */
125 /* JIS_X_0201_1976_R = 0x1014, */ /* J */ /* JIS C 6220-1969 */
126 /* JIS_X_0208_1978 = 0x1040, */ /* @ */ /* JIS C 6226-1978 */
127 /* JIS_X_0208_1983 = 0x1087, */ /* B */ /* JIS C 6226-1983 */
128 JIS_X_0208 = 0x1168, /* @B */
129 JIS_X_0212 = 0x1159, /* D */
130 /* JIS_X_0213_2000_1 = 0x1228, */ /* O */
131 JIS_X_0213_2 = 0x1229, /* P */
132 JIS_X_0213_1 = 0x1233 /* Q */
135 static nkf_char s_iconv(nkf_char c2, nkf_char c1, nkf_char c0);
136 static nkf_char e_iconv(nkf_char c2, nkf_char c1, nkf_char c0);
137 static nkf_char w_iconv(nkf_char c2, nkf_char c1, nkf_char c0);
138 static nkf_char w_iconv16(nkf_char c2, nkf_char c1, nkf_char c0);
139 static nkf_char w_iconv32(nkf_char c2, nkf_char c1, nkf_char c0);
140 static void j_oconv(nkf_char c2, nkf_char c1);
141 static void s_oconv(nkf_char c2, nkf_char c1);
142 static void e_oconv(nkf_char c2, nkf_char c1);
143 static void w_oconv(nkf_char c2, nkf_char c1);
144 static void w_oconv16(nkf_char c2, nkf_char c1);
145 static void w_oconv32(nkf_char c2, nkf_char c1);
149 nkf_char (*iconv)(nkf_char c2, nkf_char c1, nkf_char c0);
150 void (*oconv)(nkf_char c2, nkf_char c1);
151 } nkf_native_encoding;
153 nkf_native_encoding NkfEncodingASCII = { "ASCII", e_iconv, e_oconv };
154 nkf_native_encoding NkfEncodingISO_2022_JP = { "ISO-2022-JP", e_iconv, j_oconv };
155 nkf_native_encoding NkfEncodingShift_JIS = { "Shift_JIS", s_iconv, s_oconv };
156 nkf_native_encoding NkfEncodingEUC_JP = { "EUC-JP", e_iconv, e_oconv };
157 nkf_native_encoding NkfEncodingUTF_8 = { "UTF-8", w_iconv, w_oconv };
158 nkf_native_encoding NkfEncodingUTF_16 = { "UTF-16", w_iconv16, w_oconv16 };
159 nkf_native_encoding NkfEncodingUTF_32 = { "UTF-32", w_iconv32, w_oconv32 };
164 const nkf_native_encoding *base_encoding;
167 nkf_encoding nkf_encoding_table[] = {
168 {ASCII, "US-ASCII", &NkfEncodingASCII},
169 {ISO_8859_1, "ISO-8859-1", &NkfEncodingASCII},
170 {ISO_2022_JP, "ISO-2022-JP", &NkfEncodingISO_2022_JP},
171 {CP50220, "CP50220", &NkfEncodingISO_2022_JP},
172 {CP50221, "CP50221", &NkfEncodingISO_2022_JP},
173 {CP50222, "CP50222", &NkfEncodingISO_2022_JP},
174 {ISO_2022_JP_1, "ISO-2022-JP-1", &NkfEncodingISO_2022_JP},
175 {ISO_2022_JP_3, "ISO-2022-JP-3", &NkfEncodingISO_2022_JP},
176 {ISO_2022_JP_2004, "ISO-2022-JP-2004", &NkfEncodingISO_2022_JP},
177 {SHIFT_JIS, "Shift_JIS", &NkfEncodingShift_JIS},
178 {WINDOWS_31J, "Windows-31J", &NkfEncodingShift_JIS},
179 {CP10001, "CP10001", &NkfEncodingShift_JIS},
180 {EUC_JP, "EUC-JP", &NkfEncodingEUC_JP},
181 {EUCJP_NKF, "eucJP-nkf", &NkfEncodingEUC_JP},
182 {CP51932, "CP51932", &NkfEncodingEUC_JP},
183 {EUCJP_MS, "eucJP-MS", &NkfEncodingEUC_JP},
184 {EUCJP_ASCII, "eucJP-ASCII", &NkfEncodingEUC_JP},
185 {SHIFT_JISX0213, "Shift_JISX0213", &NkfEncodingShift_JIS},
186 {SHIFT_JIS_2004, "Shift_JIS-2004", &NkfEncodingShift_JIS},
187 {EUC_JISX0213, "EUC-JISX0213", &NkfEncodingEUC_JP},
188 {EUC_JIS_2004, "EUC-JIS-2004", &NkfEncodingEUC_JP},
189 {UTF_8, "UTF-8", &NkfEncodingUTF_8},
190 {UTF_8N, "UTF-8N", &NkfEncodingUTF_8},
191 {UTF_8_BOM, "UTF-8-BOM", &NkfEncodingUTF_8},
192 {UTF8_MAC, "UTF8-MAC", &NkfEncodingUTF_8},
193 {UTF_16, "UTF-16", &NkfEncodingUTF_16},
194 {UTF_16BE, "UTF-16BE", &NkfEncodingUTF_16},
195 {UTF_16BE_BOM, "UTF-16BE-BOM", &NkfEncodingUTF_16},
196 {UTF_16LE, "UTF-16LE", &NkfEncodingUTF_16},
197 {UTF_16LE_BOM, "UTF-16LE-BOM", &NkfEncodingUTF_16},
198 {UTF_32, "UTF-32", &NkfEncodingUTF_32},
199 {UTF_32BE, "UTF-32BE", &NkfEncodingUTF_32},
200 {UTF_32BE_BOM, "UTF-32BE-BOM", &NkfEncodingUTF_32},
201 {UTF_32LE, "UTF-32LE", &NkfEncodingUTF_32},
202 {UTF_32LE_BOM, "UTF-32LE-BOM", &NkfEncodingUTF_32},
203 {BINARY, "BINARY", &NkfEncodingASCII},
210 } encoding_name_to_id_table[] = {
215 {"ISO-2022-JP", ISO_2022_JP},
216 {"ISO2022JP-CP932", CP50220},
217 {"CP50220", CP50220},
218 {"CP50221", CP50221},
219 {"CSISO2022JP", CP50221},
220 {"CP50222", CP50222},
221 {"ISO-2022-JP-1", ISO_2022_JP_1},
222 {"ISO-2022-JP-3", ISO_2022_JP_3},
223 {"ISO-2022-JP-2004", ISO_2022_JP_2004},
224 {"SHIFT_JIS", SHIFT_JIS},
226 {"MS_Kanji", SHIFT_JIS},
228 {"WINDOWS-31J", WINDOWS_31J},
229 {"CSWINDOWS31J", WINDOWS_31J},
230 {"CP932", WINDOWS_31J},
231 {"MS932", WINDOWS_31J},
232 {"CP10001", CP10001},
235 {"EUCJP-NKF", EUCJP_NKF},
236 {"CP51932", CP51932},
237 {"EUC-JP-MS", EUCJP_MS},
238 {"EUCJP-MS", EUCJP_MS},
239 {"EUCJPMS", EUCJP_MS},
240 {"EUC-JP-ASCII", EUCJP_ASCII},
241 {"EUCJP-ASCII", EUCJP_ASCII},
242 {"SHIFT_JISX0213", SHIFT_JISX0213},
243 {"SHIFT_JIS-2004", SHIFT_JIS_2004},
244 {"EUC-JISX0213", EUC_JISX0213},
245 {"EUC-JIS-2004", EUC_JIS_2004},
248 {"UTF-8-BOM", UTF_8_BOM},
249 {"UTF8-MAC", UTF8_MAC},
250 {"UTF-8-MAC", UTF8_MAC},
252 {"UTF-16BE", UTF_16BE},
253 {"UTF-16BE-BOM", UTF_16BE_BOM},
254 {"UTF-16LE", UTF_16LE},
255 {"UTF-16LE-BOM", UTF_16LE_BOM},
257 {"UTF-32BE", UTF_32BE},
258 {"UTF-32BE-BOM", UTF_32BE_BOM},
259 {"UTF-32LE", UTF_32LE},
260 {"UTF-32LE-BOM", UTF_32LE_BOM},
265 #if defined(DEFAULT_CODE_JIS)
266 #define DEFAULT_ENCIDX ISO_2022_JP
267 #elif defined(DEFAULT_CODE_SJIS)
268 #define DEFAULT_ENCIDX SHIFT_JIS
269 #elif defined(DEFAULT_CODE_WINDOWS_31J)
270 #define DEFAULT_ENCIDX WINDOWS_31J
271 #elif defined(DEFAULT_CODE_EUC)
272 #define DEFAULT_ENCIDX EUC_JP
273 #elif defined(DEFAULT_CODE_UTF8)
274 #define DEFAULT_ENCIDX UTF_8
278 #define is_alnum(c) \
279 (('a'<=c && c<='z')||('A'<= c && c<='Z')||('0'<=c && c<='9'))
281 /* I don't trust portablity of toupper */
282 #define nkf_toupper(c) (('a'<=c && c<='z')?(c-('a'-'A')):c)
283 #define nkf_isoctal(c) ('0'<=c && c<='7')
284 #define nkf_isdigit(c) ('0'<=c && c<='9')
285 #define nkf_isxdigit(c) (nkf_isdigit(c) || ('a'<=c && c<='f') || ('A'<=c && c <= 'F'))
286 #define nkf_isblank(c) (c == SP || c == TAB)
287 #define nkf_isspace(c) (nkf_isblank(c) || c == CR || c == LF)
288 #define nkf_isalpha(c) (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'))
289 #define nkf_isalnum(c) (nkf_isdigit(c) || nkf_isalpha(c))
290 #define nkf_isprint(c) (SP<=c && c<='~')
291 #define nkf_isgraph(c) ('!'<=c && c<='~')
292 #define hex2bin(c) (('0'<=c&&c<='9') ? (c-'0') : \
293 ('A'<=c&&c<='F') ? (c-'A'+10) : \
294 ('a'<=c&&c<='f') ? (c-'a'+10) : 0)
295 #define bin2hex(c) ("0123456789ABCDEF"[c&15])
296 #define is_eucg3(c2) (((unsigned short)c2 >> 8) == SS3)
297 #define nkf_noescape_mime(c) ((c == CR) || (c == LF) || \
298 ((c > SP) && (c < DEL) && (c != '?') && (c != '=') && (c != '_') \
299 && (c != '(') && (c != ')') && (c != '.') && (c != 0x22)))
301 #define is_ibmext_in_sjis(c2) (CP932_TABLE_BEGIN <= c2 && c2 <= CP932_TABLE_END)
302 #define nkf_byte_jisx0201_katakana_p(c) (SP <= c && c <= 0x5F)
304 #define HOLD_SIZE 1024
305 #if defined(INT_IS_SHORT)
306 #define IOBUF_SIZE 2048
308 #define IOBUF_SIZE 16384
311 #define DEFAULT_J 'B'
312 #define DEFAULT_R 'B'
319 /* MIME preprocessor */
321 #ifdef EASYWIN /*Easy Win */
322 extern POINT _BufferSize;
331 void (*status_func)(struct input_code *, nkf_char);
332 nkf_char (*iconv_func)(nkf_char c2, nkf_char c1, nkf_char c0);
336 static const char *input_codename = NULL; /* NULL: unestablished, "": BINARY */
337 static nkf_encoding *input_encoding = NULL;
338 static nkf_encoding *output_encoding = NULL;
340 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
342 * 0: Shift_JIS, eucJP-ascii
347 #define UCS_MAP_ASCII 0
349 #define UCS_MAP_CP932 2
350 #define UCS_MAP_CP10001 3
351 static int ms_ucs_map_f = UCS_MAP_ASCII;
353 #ifdef UTF8_INPUT_ENABLE
354 /* no NEC special, NEC-selected IBM extended and IBM extended characters */
355 static int no_cp932ext_f = FALSE;
356 /* ignore ZERO WIDTH NO-BREAK SPACE */
357 static int no_best_fit_chars_f = FALSE;
358 static int input_endian = ENDIAN_BIG;
359 static nkf_char unicode_subchar = '?'; /* the regular substitution character */
360 static void (*encode_fallback)(nkf_char c) = NULL;
361 static void w_status(struct input_code *, nkf_char);
363 #ifdef UTF8_OUTPUT_ENABLE
364 static int output_bom_f = FALSE;
365 static int output_endian = ENDIAN_BIG;
368 static void std_putc(nkf_char c);
369 static nkf_char std_getc(FILE *f);
370 static nkf_char std_ungetc(nkf_char c,FILE *f);
372 static nkf_char broken_getc(FILE *f);
373 static nkf_char broken_ungetc(nkf_char c,FILE *f);
375 static nkf_char mime_getc(FILE *f);
377 static void mime_putc(nkf_char c);
381 #if !defined(PERL_XS) && !defined(WIN32DLL)
382 static unsigned char stdibuf[IOBUF_SIZE];
383 static unsigned char stdobuf[IOBUF_SIZE];
386 #define NKF_UNSPECIFIED (-TRUE)
389 static int unbuf_f = FALSE;
390 static int estab_f = FALSE;
391 static int nop_f = FALSE;
392 static int binmode_f = TRUE; /* binary mode */
393 static int rot_f = FALSE; /* rot14/43 mode */
394 static int hira_f = FALSE; /* hira/kata henkan */
395 static int alpha_f = FALSE; /* convert JIx0208 alphbet to ASCII */
396 static int mime_f = MIME_DECODE_DEFAULT; /* convert MIME B base64 or Q */
397 static int mime_decode_f = FALSE; /* mime decode is explicitly on */
398 static int mimebuf_f = FALSE; /* MIME buffered input */
399 static int broken_f = FALSE; /* convert ESC-less broken JIS */
400 static int iso8859_f = FALSE; /* ISO8859 through */
401 static int mimeout_f = FALSE; /* base64 mode */
402 static int x0201_f = NKF_UNSPECIFIED; /* convert JIS X 0201 */
403 static int iso2022jp_f = FALSE; /* replace non ISO-2022-JP with GETA */
405 #ifdef UNICODE_NORMALIZATION
406 static int nfc_f = FALSE;
407 static nkf_char (*i_nfc_getc)(FILE *) = std_getc; /* input of ugetc */
408 static nkf_char (*i_nfc_ungetc)(nkf_char c ,FILE *f) = std_ungetc;
412 static int cap_f = FALSE;
413 static nkf_char (*i_cgetc)(FILE *) = std_getc; /* input of cgetc */
414 static nkf_char (*i_cungetc)(nkf_char c ,FILE *f) = std_ungetc;
416 static int url_f = FALSE;
417 static nkf_char (*i_ugetc)(FILE *) = std_getc; /* input of ugetc */
418 static nkf_char (*i_uungetc)(nkf_char c ,FILE *f) = std_ungetc;
421 #define PREFIX_EUCG3 NKF_INT32_C(0x8F00)
422 #define CLASS_MASK NKF_INT32_C(0xFF000000)
423 #define CLASS_UNICODE NKF_INT32_C(0x01000000)
424 #define VALUE_MASK NKF_INT32_C(0x00FFFFFF)
425 #define UNICODE_BMP_MAX NKF_INT32_C(0x0000FFFF)
426 #define UNICODE_MAX NKF_INT32_C(0x0010FFFF)
427 #define nkf_char_euc3_new(c) ((c) | PREFIX_EUCG3)
428 #define nkf_char_unicode_new(c) ((c) | CLASS_UNICODE)
429 #define nkf_char_unicode_p(c) ((c & CLASS_MASK) == CLASS_UNICODE)
430 #define nkf_char_unicode_bmp_p(c) ((c & VALUE_MASK) <= UNICODE_BMP_MAX)
431 #define nkf_char_unicode_value_p(c) ((c & VALUE_MASK) <= UNICODE_MAX)
433 #ifdef NUMCHAR_OPTION
434 static int numchar_f = FALSE;
435 static nkf_char (*i_ngetc)(FILE *) = std_getc; /* input of ugetc */
436 static nkf_char (*i_nungetc)(nkf_char c ,FILE *f) = std_ungetc;
440 static int noout_f = FALSE;
441 static void no_putc(nkf_char c);
442 static int debug_f = FALSE;
443 static void debug(const char *str);
444 static nkf_char (*iconv_for_check)(nkf_char c2,nkf_char c1,nkf_char c0) = 0;
447 static int guess_f = 0; /* 0: OFF, 1: ON, 2: VERBOSE */
448 static void set_input_codename(const char *codename);
451 static int exec_f = 0;
454 #ifdef SHIFTJIS_CP932
455 /* invert IBM extended characters to others */
456 static int cp51932_f = FALSE;
458 /* invert NEC-selected IBM extended characters to IBM extended characters */
459 static int cp932inv_f = TRUE;
461 /* static nkf_char cp932_conv(nkf_char c2, nkf_char c1); */
462 #endif /* SHIFTJIS_CP932 */
464 static int x0212_f = FALSE;
465 static int x0213_f = FALSE;
467 static unsigned char prefix_table[256];
469 static void e_status(struct input_code *, nkf_char);
470 static void s_status(struct input_code *, nkf_char);
472 struct input_code input_code_list[] = {
473 {"EUC-JP", 0, 0, 0, {0, 0, 0}, e_status, e_iconv, 0},
474 {"Shift_JIS", 0, 0, 0, {0, 0, 0}, s_status, s_iconv, 0},
475 #ifdef UTF8_INPUT_ENABLE
476 {"UTF-8", 0, 0, 0, {0, 0, 0}, w_status, w_iconv, 0},
477 {"UTF-16", 0, 0, 0, {0, 0, 0}, NULL, w_iconv16, 0},
478 {"UTF-32", 0, 0, 0, {0, 0, 0}, NULL, w_iconv32, 0},
483 static int mimeout_mode = 0; /* 0, -1, 'Q', 'B', 1, 2 */
484 static int base64_count = 0;
486 /* X0208 -> ASCII converter */
489 static int f_line = 0; /* chars in line */
490 static int f_prev = 0;
491 static int fold_preserve_f = FALSE; /* preserve new lines */
492 static int fold_f = FALSE;
493 static int fold_len = 0;
496 static unsigned char kanji_intro = DEFAULT_J;
497 static unsigned char ascii_intro = DEFAULT_R;
501 #define FOLD_MARGIN 10
502 #define DEFAULT_FOLD 60
504 static int fold_margin = FOLD_MARGIN;
506 /* process default */
509 no_connection2(nkf_char c2, nkf_char c1, nkf_char c0)
511 fprintf(stderr,"nkf internal module connection failure.\n");
517 no_connection(nkf_char c2, nkf_char c1)
519 no_connection2(c2,c1,0);
522 static nkf_char (*iconv)(nkf_char c2,nkf_char c1,nkf_char c0) = no_connection2;
523 static void (*oconv)(nkf_char c2,nkf_char c1) = no_connection;
525 static void (*o_zconv)(nkf_char c2,nkf_char c1) = no_connection;
526 static void (*o_fconv)(nkf_char c2,nkf_char c1) = no_connection;
527 static void (*o_eol_conv)(nkf_char c2,nkf_char c1) = no_connection;
528 static void (*o_rot_conv)(nkf_char c2,nkf_char c1) = no_connection;
529 static void (*o_hira_conv)(nkf_char c2,nkf_char c1) = no_connection;
530 static void (*o_base64conv)(nkf_char c2,nkf_char c1) = no_connection;
531 static void (*o_iso2022jp_check_conv)(nkf_char c2,nkf_char c1) = no_connection;
533 /* static redirections */
535 static void (*o_putc)(nkf_char c) = std_putc;
537 static nkf_char (*i_getc)(FILE *f) = std_getc; /* general input */
538 static nkf_char (*i_ungetc)(nkf_char c,FILE *f) =std_ungetc;
540 static nkf_char (*i_bgetc)(FILE *) = std_getc; /* input of mgetc */
541 static nkf_char (*i_bungetc)(nkf_char c ,FILE *f) = std_ungetc;
543 static void (*o_mputc)(nkf_char c) = std_putc ; /* output of mputc */
545 static nkf_char (*i_mgetc)(FILE *) = std_getc; /* input of mgetc */
546 static nkf_char (*i_mungetc)(nkf_char c ,FILE *f) = std_ungetc;
548 /* for strict mime */
549 static nkf_char (*i_mgetc_buf)(FILE *) = std_getc; /* input of mgetc_buf */
550 static nkf_char (*i_mungetc_buf)(nkf_char c,FILE *f) = std_ungetc;
553 static int output_mode = ASCII; /* output kanji mode */
554 static int input_mode = ASCII; /* input kanji mode */
555 static int mime_decode_mode = FALSE; /* MIME mode B base64, Q hex */
557 /* X0201 / X0208 conversion tables */
559 /* X0201 kana conversion table */
561 static const unsigned char cv[]= {
562 0x21,0x21,0x21,0x23,0x21,0x56,0x21,0x57,
563 0x21,0x22,0x21,0x26,0x25,0x72,0x25,0x21,
564 0x25,0x23,0x25,0x25,0x25,0x27,0x25,0x29,
565 0x25,0x63,0x25,0x65,0x25,0x67,0x25,0x43,
566 0x21,0x3c,0x25,0x22,0x25,0x24,0x25,0x26,
567 0x25,0x28,0x25,0x2a,0x25,0x2b,0x25,0x2d,
568 0x25,0x2f,0x25,0x31,0x25,0x33,0x25,0x35,
569 0x25,0x37,0x25,0x39,0x25,0x3b,0x25,0x3d,
570 0x25,0x3f,0x25,0x41,0x25,0x44,0x25,0x46,
571 0x25,0x48,0x25,0x4a,0x25,0x4b,0x25,0x4c,
572 0x25,0x4d,0x25,0x4e,0x25,0x4f,0x25,0x52,
573 0x25,0x55,0x25,0x58,0x25,0x5b,0x25,0x5e,
574 0x25,0x5f,0x25,0x60,0x25,0x61,0x25,0x62,
575 0x25,0x64,0x25,0x66,0x25,0x68,0x25,0x69,
576 0x25,0x6a,0x25,0x6b,0x25,0x6c,0x25,0x6d,
577 0x25,0x6f,0x25,0x73,0x21,0x2b,0x21,0x2c,
581 /* X0201 kana conversion table for daguten */
583 static const unsigned char dv[]= {
584 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
585 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
586 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
587 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
588 0x00,0x00,0x00,0x00,0x00,0x00,0x25,0x74,
589 0x00,0x00,0x00,0x00,0x25,0x2c,0x25,0x2e,
590 0x25,0x30,0x25,0x32,0x25,0x34,0x25,0x36,
591 0x25,0x38,0x25,0x3a,0x25,0x3c,0x25,0x3e,
592 0x25,0x40,0x25,0x42,0x25,0x45,0x25,0x47,
593 0x25,0x49,0x00,0x00,0x00,0x00,0x00,0x00,
594 0x00,0x00,0x00,0x00,0x25,0x50,0x25,0x53,
595 0x25,0x56,0x25,0x59,0x25,0x5c,0x00,0x00,
596 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
597 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
598 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
599 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
602 /* X0201 kana conversion table for han-daguten */
604 static const unsigned char ev[]= {
605 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
606 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
607 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
608 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
609 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
610 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
611 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
612 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
613 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
614 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
615 0x00,0x00,0x00,0x00,0x25,0x51,0x25,0x54,
616 0x25,0x57,0x25,0x5a,0x25,0x5d,0x00,0x00,
617 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
618 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
619 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
620 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
624 /* X0208 kigou conversion table */
625 /* 0x8140 - 0x819e */
626 static const unsigned char fv[] = {
628 0x00,0x00,0x00,0x00,0x2c,0x2e,0x00,0x3a,
629 0x3b,0x3f,0x21,0x00,0x00,0x27,0x60,0x00,
630 0x5e,0x00,0x5f,0x00,0x00,0x00,0x00,0x00,
631 0x00,0x00,0x00,0x00,0x00,0x2d,0x00,0x2f,
632 0x5c,0x00,0x00,0x7c,0x00,0x00,0x60,0x27,
633 0x22,0x22,0x28,0x29,0x00,0x00,0x5b,0x5d,
634 0x7b,0x7d,0x3c,0x3e,0x00,0x00,0x00,0x00,
635 0x00,0x00,0x00,0x00,0x2b,0x2d,0x00,0x00,
636 0x00,0x3d,0x00,0x3c,0x3e,0x00,0x00,0x00,
637 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
638 0x24,0x00,0x00,0x25,0x23,0x26,0x2a,0x40,
639 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
644 static int option_mode = 0;
645 static int file_out_f = FALSE;
647 static int overwrite_f = FALSE;
648 static int preserve_time_f = FALSE;
649 static int backup_f = FALSE;
650 static char *backup_suffix = "";
653 static int eolmode_f = 0; /* CR, LF, CRLF */
654 static int input_eol = 0; /* 0: unestablished, EOF: MIXED */
655 static nkf_char prev_cr = 0; /* CR or 0 */
656 #ifdef EASYWIN /*Easy Win */
657 static int end_check;
661 nkf_xmalloc(size_t size)
665 if (size == 0) size = 1;
669 perror("can't malloc");
677 nkf_xrealloc(void *ptr, size_t size)
679 if (size == 0) size = 1;
681 ptr = realloc(ptr, size);
683 perror("can't realloc");
690 #define nkf_xfree(ptr) free(ptr)
693 nkf_str_caseeql(const char *src, const char *target)
696 for (i = 0; src[i] && target[i]; i++) {
697 if (nkf_toupper(src[i]) != nkf_toupper(target[i])) return FALSE;
699 if (src[i] || target[i]) return FALSE;
704 nkf_enc_from_index(int idx)
706 if (idx < 0 || NKF_ENCODING_TABLE_SIZE <= idx) {
709 return &nkf_encoding_table[idx];
713 nkf_enc_find_index(const char *name)
716 if (name[0] == 'X' && *(name+1) == '-') name += 2;
717 for (i = 0; encoding_name_to_id_table[i].id >= 0; i++) {
718 if (nkf_str_caseeql(encoding_name_to_id_table[i].name, name)) {
719 return encoding_name_to_id_table[i].id;
726 nkf_enc_find(const char *name)
729 idx = nkf_enc_find_index(name);
730 if (idx < 0) return 0;
731 return nkf_enc_from_index(idx);
734 #define nkf_enc_name(enc) (enc)->name
735 #define nkf_enc_to_index(enc) (enc)->id
736 #define nkf_enc_to_base_encoding(enc) (enc)->base_encoding
737 #define nkf_enc_to_iconv(enc) nkf_enc_to_base_encoding(enc)->iconv
738 #define nkf_enc_to_oconv(enc) nkf_enc_to_base_encoding(enc)->oconv
739 #define nkf_enc_asciicompat(enc) (\
740 nkf_enc_to_base_encoding(enc) == &NkfEncodingASCII ||\
741 nkf_enc_to_base_encoding(enc) == &NkfEncodingISO_2022_JP)
742 #define nkf_enc_unicode_p(enc) (\
743 nkf_enc_to_base_encoding(enc) == &NkfEncodingUTF_8 ||\
744 nkf_enc_to_base_encoding(enc) == &NkfEncodingUTF_16 ||\
745 nkf_enc_to_base_encoding(enc) == &NkfEncodingUTF_32)
746 #define nkf_enc_cp5022x_p(enc) (\
747 nkf_enc_to_index(enc) == CP50220 ||\
748 nkf_enc_to_index(enc) == CP50221 ||\
749 nkf_enc_to_index(enc) == CP50222)
751 #ifdef DEFAULT_CODE_LOCALE
755 #ifdef HAVE_LANGINFO_H
756 return nl_langinfo(CODESET);
757 #elif defined(__WIN32__)
759 sprintf(buf, "CP%d", GetACP());
761 #elif defined(__OS2__)
762 # if defined(INT_IS_SHORT)
768 ULONG ulCP[1], ulncp;
769 DosQueryCp(sizeof(ulCP), ulCP, &ulncp);
770 if (ulCP[0] == 932 || ulCP[0] == 943)
771 strcpy(buf, "Shift_JIS");
773 sprintf(buf, "CP%lu", ulCP[0]);
781 nkf_locale_encoding()
783 nkf_encoding *enc = 0;
784 const char *encname = nkf_locale_charmap();
786 enc = nkf_enc_find(encname);
789 #endif /* DEFAULT_CODE_LOCALE */
794 return &nkf_encoding_table[UTF_8];
798 nkf_default_encoding()
800 nkf_encoding *enc = 0;
801 #ifdef DEFAULT_CODE_LOCALE
802 enc = nkf_locale_encoding();
803 #elif defined(DEFAULT_ENCIDX)
804 enc = nkf_enc_from_index(DEFAULT_ENCIDX);
806 if (!enc) enc = nkf_utf8_encoding();
817 nkf_buf_new(int length)
819 nkf_buf_t *buf = nkf_xmalloc(sizeof(nkf_buf_t));
820 buf->ptr = nkf_xmalloc(sizeof(nkf_char) * length);
828 nkf_buf_dispose(nkf_buf_t *buf)
835 #define nkf_buf_length(buf) ((buf)->len)
836 #define nkf_buf_empty_p(buf) ((buf)->len == 0)
839 nkf_buf_at(nkf_buf_t *buf, int index)
841 assert(index <= buf->len);
842 return buf->ptr[index];
846 nkf_buf_clear(nkf_buf_t *buf)
852 nkf_buf_push(nkf_buf_t *buf, nkf_char c)
854 if (buf->capa <= buf->len) {
857 buf->ptr[buf->len++] = c;
861 nkf_buf_pop(nkf_buf_t *buf)
863 assert(!nkf_buf_empty_p(buf));
864 return buf->ptr[--buf->len];
867 /* Normalization Form C */
870 #define fprintf dllprintf
876 fprintf(HELP_OUTPUT,"Network Kanji Filter Version " NKF_VERSION " (" NKF_RELEASE_DATE ") \n" COPY_RIGHT "\n");
883 "Usage: nkf -[flags] [--] [in file] .. [out file for -O flag]\n"
884 #ifdef UTF8_OUTPUT_ENABLE
885 " j/s/e/w Specify output encoding ISO-2022-JP, Shift_JIS, EUC-JP\n"
886 " UTF options is -w[8[0],{16,32}[{B,L}[0]]]\n"
889 #ifdef UTF8_INPUT_ENABLE
890 " J/S/E/W Specify input encoding ISO-2022-JP, Shift_JIS, EUC-JP\n"
891 " UTF option is -W[8,[16,32][B,L]]\n"
893 " J/S/E Specify output encoding ISO-2022-JP, Shift_JIS, EUC-JP\n"
897 " m[BQSN0] MIME decode [B:base64,Q:quoted,S:strict,N:nonstrict,0:no decode]\n"
898 " M[BQ] MIME encode [B:base64 Q:quoted]\n"
899 " f/F Folding: -f60 or -f or -f60-10 (fold margin 10) F preserve nl\n"
902 " Z[0-4] Default/0: Convert JISX0208 Alphabet to ASCII\n"
903 " 1: Kankaku to one space 2: to two spaces 3: HTML Entity\n"
904 " 4: JISX0208 Katakana to JISX0201 Katakana\n"
905 " X,x Convert Halfwidth Katakana to Fullwidth or preserve it\n"
908 " O Output to File (DEFAULT 'nkf.out')\n"
909 " L[uwm] Line mode u:LF w:CRLF m:CR (DEFAULT noconversion)\n"
912 " --ic=<encoding> Specify the input encoding\n"
913 " --oc=<encoding> Specify the output encoding\n"
914 " --hiragana --katakana Hiragana/Katakana Conversion\n"
915 " --katakana-hiragana Converts each other\n"
919 " --{cap, url}-input Convert hex after ':' or '%%'\n"
921 #ifdef NUMCHAR_OPTION
922 " --numchar-input Convert Unicode Character Reference\n"
924 #ifdef UTF8_INPUT_ENABLE
925 " --fb-{skip, html, xml, perl, java, subchar}\n"
926 " Specify unassigned character's replacement\n"
931 " --in-place[=SUF] Overwrite original files\n"
932 " --overwrite[=SUF] Preserve timestamp of original files\n"
934 " -g --guess Guess the input code\n"
935 " -v --version Print the version\n"
936 " --help/-V Print this help / configuration\n"
942 show_configuration(void)
945 "Summary of my nkf " NKF_VERSION " (" NKF_RELEASE_DATE ") configuration:\n"
946 " Compile-time options:\n"
947 " Compiled at: " __DATE__ " " __TIME__ "\n"
950 " Default output encoding: "
951 #ifdef DEFAULT_CODE_LOCALE
952 "LOCALE (%s)\n", nkf_enc_name(nkf_default_encoding())
953 #elif defined(DEFAULT_ENCIDX)
954 "CONFIG (%s)\n", nkf_enc_name(nkf_default_encoding())
960 " Default output end of line: "
961 #if DEFAULT_NEWLINE == CR
963 #elif DEFAULT_NEWLINE == CRLF
969 " Decode MIME encoded string: "
970 #if MIME_DECODE_DEFAULT
976 " Convert JIS X 0201 Katakana: "
983 " --help, --version output: "
984 #if HELP_OUTPUT_HELP_OUTPUT
995 get_backup_filename(const char *suffix, const char *filename)
997 char *backup_filename;
998 int asterisk_count = 0;
1000 int filename_length = strlen(filename);
1002 for(i = 0; suffix[i]; i++){
1003 if(suffix[i] == '*') asterisk_count++;
1007 backup_filename = nkf_xmalloc(strlen(suffix) + (asterisk_count * (filename_length - 1)) + 1);
1008 for(i = 0, j = 0; suffix[i];){
1009 if(suffix[i] == '*'){
1010 backup_filename[j] = '\0';
1011 strncat(backup_filename, filename, filename_length);
1013 j += filename_length;
1015 backup_filename[j++] = suffix[i++];
1018 backup_filename[j] = '\0';
1020 j = filename_length + strlen(suffix);
1021 backup_filename = nkf_xmalloc(j + 1);
1022 strcpy(backup_filename, filename);
1023 strcat(backup_filename, suffix);
1024 backup_filename[j] = '\0';
1026 return backup_filename;
1030 #ifdef UTF8_INPUT_ENABLE
1032 nkf_each_char_to_hex(void (*f)(nkf_char c2,nkf_char c1), nkf_char c)
1037 if(c >= NKF_INT32_C(1)<<shift){
1039 (*f)(0, bin2hex(c>>shift));
1050 encode_fallback_html(nkf_char c)
1055 if(c >= NKF_INT32_C(1000000))
1056 (*oconv)(0, 0x30+(c/NKF_INT32_C(1000000))%10);
1057 if(c >= NKF_INT32_C(100000))
1058 (*oconv)(0, 0x30+(c/NKF_INT32_C(100000) )%10);
1060 (*oconv)(0, 0x30+(c/10000 )%10);
1062 (*oconv)(0, 0x30+(c/1000 )%10);
1064 (*oconv)(0, 0x30+(c/100 )%10);
1066 (*oconv)(0, 0x30+(c/10 )%10);
1068 (*oconv)(0, 0x30+ c %10);
1074 encode_fallback_xml(nkf_char c)
1079 nkf_each_char_to_hex(oconv, c);
1085 encode_fallback_java(nkf_char c)
1089 if(!nkf_char_unicode_bmp_p(c)){
1093 (*oconv)(0, bin2hex(c>>20));
1094 (*oconv)(0, bin2hex(c>>16));
1098 (*oconv)(0, bin2hex(c>>12));
1099 (*oconv)(0, bin2hex(c>> 8));
1100 (*oconv)(0, bin2hex(c>> 4));
1101 (*oconv)(0, bin2hex(c ));
1106 encode_fallback_perl(nkf_char c)
1111 nkf_each_char_to_hex(oconv, c);
1117 encode_fallback_subchar(nkf_char c)
1119 c = unicode_subchar;
1120 (*oconv)((c>>8)&0xFF, c&0xFF);
1125 static const struct {
1149 {"katakana-hiragana","h3"},
1157 #ifdef UTF8_OUTPUT_ENABLE
1167 {"fb-subchar=", ""},
1169 #ifdef UTF8_INPUT_ENABLE
1170 {"utf8-input", "W"},
1171 {"utf16-input", "W16"},
1172 {"no-cp932ext", ""},
1173 {"no-best-fit-chars",""},
1175 #ifdef UNICODE_NORMALIZATION
1176 {"utf8mac-input", ""},
1188 #ifdef NUMCHAR_OPTION
1189 {"numchar-input", ""},
1195 #ifdef SHIFTJIS_CP932
1206 set_input_encoding(nkf_encoding *enc)
1208 switch (nkf_enc_to_index(enc)) {
1215 if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE; /* -x specified implicitly */
1216 #ifdef SHIFTJIS_CP932
1219 #ifdef UTF8_OUTPUT_ENABLE
1220 ms_ucs_map_f = UCS_MAP_CP932;
1230 case ISO_2022_JP_2004:
1237 if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE; /* -x specified implicitly */
1238 #ifdef SHIFTJIS_CP932
1241 #ifdef UTF8_OUTPUT_ENABLE
1242 ms_ucs_map_f = UCS_MAP_CP932;
1247 #ifdef SHIFTJIS_CP932
1250 #ifdef UTF8_OUTPUT_ENABLE
1251 ms_ucs_map_f = UCS_MAP_CP10001;
1259 if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE; /* -x specified implicitly */
1260 #ifdef SHIFTJIS_CP932
1263 #ifdef UTF8_OUTPUT_ENABLE
1264 ms_ucs_map_f = UCS_MAP_CP932;
1268 #ifdef SHIFTJIS_CP932
1271 #ifdef UTF8_OUTPUT_ENABLE
1272 ms_ucs_map_f = UCS_MAP_MS;
1276 #ifdef SHIFTJIS_CP932
1279 #ifdef UTF8_OUTPUT_ENABLE
1280 ms_ucs_map_f = UCS_MAP_ASCII;
1283 case SHIFT_JISX0213:
1284 case SHIFT_JIS_2004:
1286 #ifdef SHIFTJIS_CP932
1293 #ifdef SHIFTJIS_CP932
1297 #ifdef UTF8_INPUT_ENABLE
1298 #ifdef UNICODE_NORMALIZATION
1306 input_endian = ENDIAN_BIG;
1310 input_endian = ENDIAN_LITTLE;
1315 input_endian = ENDIAN_BIG;
1319 input_endian = ENDIAN_LITTLE;
1326 set_output_encoding(nkf_encoding *enc)
1328 switch (nkf_enc_to_index(enc)) {
1330 if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE; /* -x specified implicitly */
1331 #ifdef SHIFTJIS_CP932
1332 if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1334 #ifdef UTF8_OUTPUT_ENABLE
1335 ms_ucs_map_f = UCS_MAP_CP932;
1339 if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE; /* -x specified implicitly */
1340 #ifdef SHIFTJIS_CP932
1341 if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1343 #ifdef UTF8_OUTPUT_ENABLE
1344 ms_ucs_map_f = UCS_MAP_CP932;
1348 #ifdef SHIFTJIS_CP932
1349 if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1354 #ifdef SHIFTJIS_CP932
1355 if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1361 #ifdef SHIFTJIS_CP932
1362 if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1368 if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE; /* -x specified implicitly */
1369 #ifdef UTF8_OUTPUT_ENABLE
1370 ms_ucs_map_f = UCS_MAP_CP932;
1374 #ifdef UTF8_OUTPUT_ENABLE
1375 ms_ucs_map_f = UCS_MAP_CP10001;
1380 #ifdef SHIFTJIS_CP932
1381 if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1383 #ifdef UTF8_OUTPUT_ENABLE
1384 ms_ucs_map_f = UCS_MAP_ASCII;
1389 #ifdef SHIFTJIS_CP932
1390 if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1392 #ifdef UTF8_OUTPUT_ENABLE
1393 ms_ucs_map_f = UCS_MAP_ASCII;
1397 if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE; /* -x specified implicitly */
1398 #ifdef SHIFTJIS_CP932
1399 if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1401 #ifdef UTF8_OUTPUT_ENABLE
1402 ms_ucs_map_f = UCS_MAP_CP932;
1407 #ifdef UTF8_OUTPUT_ENABLE
1408 ms_ucs_map_f = UCS_MAP_MS;
1413 #ifdef UTF8_OUTPUT_ENABLE
1414 ms_ucs_map_f = UCS_MAP_ASCII;
1417 case SHIFT_JISX0213:
1418 case SHIFT_JIS_2004:
1420 #ifdef SHIFTJIS_CP932
1421 if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1428 #ifdef SHIFTJIS_CP932
1429 if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1432 #ifdef UTF8_OUTPUT_ENABLE
1434 output_bom_f = TRUE;
1438 output_bom_f = TRUE;
1441 output_endian = ENDIAN_LITTLE;
1442 output_bom_f = FALSE;
1445 output_endian = ENDIAN_LITTLE;
1446 output_bom_f = TRUE;
1450 output_bom_f = TRUE;
1453 output_endian = ENDIAN_LITTLE;
1454 output_bom_f = FALSE;
1457 output_endian = ENDIAN_LITTLE;
1458 output_bom_f = TRUE;
1464 static struct input_code*
1465 find_inputcode_byfunc(nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0))
1468 struct input_code *p = input_code_list;
1470 if (iconv_func == p->iconv_func){
1480 set_iconv(nkf_char f, nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0))
1482 #ifdef INPUT_CODE_FIX
1483 if (f || !input_encoding)
1490 #ifdef INPUT_CODE_FIX
1491 && (f == -TRUE || !input_encoding) /* -TRUE means "FORCE" */
1497 if (estab_f && iconv_for_check != iconv){
1498 struct input_code *p = find_inputcode_byfunc(iconv);
1500 set_input_codename(p->name);
1503 iconv_for_check = iconv;
1510 x0212_shift(nkf_char c)
1515 if (0x75 <= c && c <= 0x7f){
1516 ret = c + (0x109 - 0x75);
1519 if (0x75 <= c && c <= 0x7f){
1520 ret = c + (0x113 - 0x75);
1528 x0212_unshift(nkf_char c)
1531 if (0x7f <= c && c <= 0x88){
1532 ret = c + (0x75 - 0x7f);
1533 }else if (0x89 <= c && c <= 0x92){
1534 ret = PREFIX_EUCG3 | 0x80 | (c + (0x75 - 0x89));
1538 #endif /* X0212_ENABLE */
1541 e2s_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1)
1547 if((0x21 <= ndx && ndx <= 0x2F)){
1548 if (p2) *p2 = ((ndx - 1) >> 1) + 0xec - ndx / 8 * 3;
1549 if (p1) *p1 = c1 + ((ndx & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
1551 }else if(0x6E <= ndx && ndx <= 0x7E){
1552 if (p2) *p2 = ((ndx - 1) >> 1) + 0xbe;
1553 if (p1) *p1 = c1 + ((ndx & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
1559 else if(nkf_isgraph(ndx)){
1561 const unsigned short *ptr;
1562 ptr = x0212_shiftjis[ndx - 0x21];
1564 val = ptr[(c1 & 0x7f) - 0x21];
1573 c2 = x0212_shift(c2);
1575 #endif /* X0212_ENABLE */
1577 if(0x7F < c2) return 1;
1578 if (p2) *p2 = ((c2 - 1) >> 1) + ((c2 <= 0x5e) ? 0x71 : 0xb1);
1579 if (p1) *p1 = c1 + ((c2 & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
1584 s2e_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1)
1586 #if defined(SHIFTJIS_CP932) || defined(X0212_ENABLE)
1589 static const char shift_jisx0213_s1a3_table[5][2] ={ { 1, 8}, { 3, 4}, { 5,12}, {13,14}, {15, 0} };
1590 if (0xFC < c1) return 1;
1591 #ifdef SHIFTJIS_CP932
1592 if (!cp932inv_f && is_ibmext_in_sjis(c2)){
1593 val = shiftjis_cp932[c2 - CP932_TABLE_BEGIN][c1 - 0x40];
1600 && CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
1601 val = cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40];
1607 #endif /* SHIFTJIS_CP932 */
1609 if (!x0213_f && is_ibmext_in_sjis(c2)){
1610 val = shiftjis_x0212[c2 - 0xfa][c1 - 0x40];
1613 c2 = PREFIX_EUCG3 | ((val >> 8) & 0x7f);
1626 if(x0213_f && c2 >= 0xF0){
1627 if(c2 <= 0xF3 || (c2 == 0xF4 && c1 < 0x9F)){ /* k=1, 3<=k<=5, k=8, 12<=k<=15 */
1628 c2 = PREFIX_EUCG3 | 0x20 | shift_jisx0213_s1a3_table[c2 - 0xF0][0x9E < c1];
1629 }else{ /* 78<=k<=94 */
1630 c2 = PREFIX_EUCG3 | (c2 * 2 - 0x17B);
1631 if (0x9E < c1) c2++;
1634 #define SJ0162 0x00e1 /* 01 - 62 ku offset */
1635 #define SJ6394 0x0161 /* 63 - 94 ku offset */
1636 c2 = c2 + c2 - ((c2 <= 0x9F) ? SJ0162 : SJ6394);
1637 if (0x9E < c1) c2++;
1640 c1 = c1 - ((c1 > DEL) ? SP : 0x1F);
1647 c2 = x0212_unshift(c2);
1654 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
1656 nkf_unicode_to_utf8(nkf_char val, nkf_char *p1, nkf_char *p2, nkf_char *p3, nkf_char *p4)
1664 }else if (val < 0x800){
1665 *p1 = 0xc0 | (val >> 6);
1666 *p2 = 0x80 | (val & 0x3f);
1669 } else if (nkf_char_unicode_bmp_p(val)) {
1670 *p1 = 0xe0 | (val >> 12);
1671 *p2 = 0x80 | ((val >> 6) & 0x3f);
1672 *p3 = 0x80 | ( val & 0x3f);
1674 } else if (nkf_char_unicode_value_p(val)) {
1675 *p1 = 0xf0 | (val >> 18);
1676 *p2 = 0x80 | ((val >> 12) & 0x3f);
1677 *p3 = 0x80 | ((val >> 6) & 0x3f);
1678 *p4 = 0x80 | ( val & 0x3f);
1688 nkf_utf8_to_unicode(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4)
1695 else if (c1 <= 0xC3) {
1696 /* trail byte or invalid */
1699 else if (c1 <= 0xDF) {
1701 wc = (c1 & 0x1F) << 6;
1704 else if (c1 <= 0xEF) {
1706 wc = (c1 & 0x0F) << 12;
1707 wc |= (c2 & 0x3F) << 6;
1710 else if (c2 <= 0xF4) {
1712 wc = (c1 & 0x0F) << 18;
1713 wc |= (c2 & 0x3F) << 12;
1714 wc |= (c3 & 0x3F) << 6;
1724 #ifdef UTF8_INPUT_ENABLE
1726 unicode_to_jis_common2(nkf_char c1, nkf_char c0,
1727 const unsigned short *const *pp, nkf_char psize,
1728 nkf_char *p2, nkf_char *p1)
1731 const unsigned short *p;
1734 if (pp == 0) return 1;
1737 if (c1 < 0 || psize <= c1) return 1;
1739 if (p == 0) return 1;
1742 if (c0 < 0 || sizeof_utf8_to_euc_C2 <= c0) return 1;
1744 if (val == 0) return 1;
1745 if (no_cp932ext_f && (
1746 (val>>8) == 0x2D || /* NEC special characters */
1747 val > NKF_INT32_C(0xF300) /* IBM extended characters */
1755 if (c2 == SO) c2 = JIS_X_0201_1976_K;
1763 unicode_to_jis_common(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1)
1765 const unsigned short *const *pp;
1766 const unsigned short *const *const *ppp;
1767 static const char no_best_fit_chars_table_C2[] =
1768 {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1769 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1770 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 2, 1, 1, 2,
1771 0, 0, 1, 1, 0, 1, 0, 1, 2, 1, 1, 1, 1, 1, 1, 1};
1772 static const char no_best_fit_chars_table_C2_ms[] =
1773 {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1774 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1775 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0,
1776 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0};
1777 static const char no_best_fit_chars_table_932_C2[] =
1778 {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1779 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1780 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1,
1781 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0};
1782 static const char no_best_fit_chars_table_932_C3[] =
1783 {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1784 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
1785 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1786 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1};
1792 }else if(c2 < 0xe0){
1793 if(no_best_fit_chars_f){
1794 if(ms_ucs_map_f == UCS_MAP_CP932){
1797 if(no_best_fit_chars_table_932_C2[c1&0x3F]) return 1;
1800 if(no_best_fit_chars_table_932_C3[c1&0x3F]) return 1;
1803 }else if(!cp932inv_f){
1806 if(no_best_fit_chars_table_C2[c1&0x3F]) return 1;
1809 if(no_best_fit_chars_table_932_C3[c1&0x3F]) return 1;
1812 }else if(ms_ucs_map_f == UCS_MAP_MS){
1813 if(c2 == 0xC2 && no_best_fit_chars_table_C2_ms[c1&0x3F]) return 1;
1814 }else if(ms_ucs_map_f == UCS_MAP_CP10001){
1832 ms_ucs_map_f == UCS_MAP_CP932 ? utf8_to_euc_2bytes_932 :
1833 ms_ucs_map_f == UCS_MAP_MS ? utf8_to_euc_2bytes_ms :
1834 ms_ucs_map_f == UCS_MAP_CP10001 ? utf8_to_euc_2bytes_mac :
1836 ret = unicode_to_jis_common2(c2, c1, pp, sizeof_utf8_to_euc_2bytes, p2, p1);
1837 }else if(c0 < 0xF0){
1838 if(no_best_fit_chars_f){
1839 if(ms_ucs_map_f == UCS_MAP_CP932){
1840 if(c2 == 0xE3 && c1 == 0x82 && c0 == 0x94) return 1;
1841 }else if(ms_ucs_map_f == UCS_MAP_MS){
1846 if(c0 == 0x94 || c0 == 0x96 || c0 == 0xBE) return 1;
1849 if(c0 == 0x92) return 1;
1854 if(c1 == 0x80 || c0 == 0x9C) return 1;
1857 }else if(ms_ucs_map_f == UCS_MAP_CP10001){
1862 if(c0 == 0x94) return 1;
1865 if(c0 == 0xBB) return 1;
1875 if(c0 == 0x95) return 1;
1878 if(c0 == 0xA5) return 1;
1885 if(c0 == 0x8D) return 1;
1888 if(c0 == 0x9E && !cp932inv_f) return 1;
1891 if(0xA0 <= c0 && c0 <= 0xA5) return 1;
1899 ms_ucs_map_f == UCS_MAP_CP932 ? utf8_to_euc_3bytes_932 :
1900 ms_ucs_map_f == UCS_MAP_MS ? utf8_to_euc_3bytes_ms :
1901 ms_ucs_map_f == UCS_MAP_CP10001 ? utf8_to_euc_3bytes_mac :
1903 ret = unicode_to_jis_common2(c1, c0, ppp[c2 - 0xE0], sizeof_utf8_to_euc_C2, p2, p1);
1905 #ifdef SHIFTJIS_CP932
1906 if (!ret && !cp932inv_f && is_eucg3(*p2)) {
1908 if (e2s_conv(*p2, *p1, &s2, &s1) == 0) {
1909 s2e_conv(s2, s1, p2, p1);
1918 #ifdef UTF8_OUTPUT_ENABLE
1920 e2w_conv(nkf_char c2, nkf_char c1)
1922 const unsigned short *p;
1924 if (c2 == JIS_X_0201_1976_K) {
1925 if (ms_ucs_map_f == UCS_MAP_CP10001) {
1933 p = euc_to_utf8_1byte;
1935 } else if (is_eucg3(c2)){
1936 if(ms_ucs_map_f == UCS_MAP_ASCII&& c2 == NKF_INT32_C(0x8F22) && c1 == 0x43){
1939 c2 = (c2&0x7f) - 0x21;
1940 if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
1941 p = x0212_to_utf8_2bytes[c2];
1947 c2 = (c2&0x7f) - 0x21;
1948 if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
1950 ms_ucs_map_f == UCS_MAP_ASCII ? euc_to_utf8_2bytes[c2] :
1951 ms_ucs_map_f == UCS_MAP_CP10001 ? euc_to_utf8_2bytes_mac[c2] :
1952 euc_to_utf8_2bytes_ms[c2];
1957 c1 = (c1 & 0x7f) - 0x21;
1958 if (0<=c1 && c1<sizeof_euc_to_utf8_1byte)
1965 w2e_conv(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1)
1972 }else if (0xc0 <= c2 && c2 <= 0xef) {
1973 ret = unicode_to_jis_common(c2, c1, c0, p2, p1);
1974 #ifdef NUMCHAR_OPTION
1977 if (p1) *p1 = nkf_char_unicode_new(nkf_utf8_to_unicode(c2, c1, c0, 0));
1985 #ifdef UTF8_INPUT_ENABLE
1987 w16e_conv(nkf_char val, nkf_char *p2, nkf_char *p1)
1989 nkf_char c1, c2, c3, c4;
1996 else if (nkf_char_unicode_bmp_p(val)){
1997 nkf_unicode_to_utf8(val, &c1, &c2, &c3, &c4);
1998 ret = unicode_to_jis_common(c1, c2, c3, p2, p1);
2001 *p1 = nkf_char_unicode_new(val);
2007 *p1 = nkf_char_unicode_new(val);
2014 e_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
2016 if (c2 == JIS_X_0201_1976_K || c2 == SS2){
2017 if (iso2022jp_f && !x0201_f) {
2018 c2 = GETA1; c1 = GETA2;
2020 c2 = JIS_X_0201_1976_K;
2024 }else if (c2 == 0x8f){
2028 if (!cp51932_f && !x0213_f && 0xF5 <= c1 && c1 <= 0xFE && 0xA1 <= c0 && c0 <= 0xFE) {
2029 /* encoding is eucJP-ms, so invert to Unicode Private User Area */
2030 c1 = nkf_char_unicode_new((c1 - 0xF5) * 94 + c0 - 0xA1 + 0xE3AC);
2033 c2 = (c2 << 8) | (c1 & 0x7f);
2035 #ifdef SHIFTJIS_CP932
2038 if (e2s_conv(c2, c1, &s2, &s1) == 0){
2039 s2e_conv(s2, s1, &c2, &c1);
2046 #endif /* SHIFTJIS_CP932 */
2048 #endif /* X0212_ENABLE */
2049 } else if ((c2 == EOF) || (c2 == 0) || c2 < SP || c2 == ISO_8859_1) {
2052 if (!cp51932_f && ms_ucs_map_f && 0xF5 <= c2 && c2 <= 0xFE && 0xA1 <= c1 && c1 <= 0xFE) {
2053 /* encoding is eucJP-ms, so invert to Unicode Private User Area */
2054 c1 = nkf_char_unicode_new((c2 - 0xF5) * 94 + c1 - 0xA1 + 0xE000);
2059 #ifdef SHIFTJIS_CP932
2060 if (cp51932_f && 0x79 <= c2 && c2 <= 0x7c){
2062 if (e2s_conv(c2, c1, &s2, &s1) == 0){
2063 s2e_conv(s2, s1, &c2, &c1);
2070 #endif /* SHIFTJIS_CP932 */
2078 s_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
2080 if (c2 == JIS_X_0201_1976_K || (0xA1 <= c2 && c2 <= 0xDF)) {
2081 if (iso2022jp_f && !x0201_f) {
2082 c2 = GETA1; c1 = GETA2;
2086 } else if ((c2 == EOF) || (c2 == 0) || c2 < SP) {
2088 } else if (!x0213_f && 0xF0 <= c2 && c2 <= 0xF9 && 0x40 <= c1 && c1 <= 0xFC) {
2090 if(c1 == 0x7F) return 0;
2091 c1 = nkf_char_unicode_new((c2 - 0xF0) * 188 + (c1 - 0x40 - (0x7E < c1)) + 0xE000);
2094 nkf_char ret = s2e_conv(c2, c1, &c2, &c1);
2095 if (ret) return ret;
2102 w_iconv(nkf_char c1, nkf_char c2, nkf_char c3)
2104 nkf_char ret = 0, c4 = 0;
2105 static const char w_iconv_utf8_1st_byte[] =
2107 20, 20, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
2108 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
2109 30, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 33, 33,
2110 40, 41, 41, 41, 42, 43, 43, 43, 50, 50, 50, 50, 60, 60, 70, 70};
2117 if (c1 < 0 || 0xff < c1) {
2118 }else if (c1 == 0) { /* 0 : 1 byte*/
2120 } else if ((c1 & 0xC0) == 0x80) { /* 0x80-0xbf : trail byte */
2123 switch (w_iconv_utf8_1st_byte[c1 - 0xC0]) {
2125 if (c2 < 0x80 || 0xBF < c2) return 0;
2128 if (c3 == 0) return -1;
2129 if (c2 < 0xA0 || 0xBF < c2 || (c3 & 0xC0) != 0x80)
2134 if (c3 == 0) return -1;
2135 if ((c2 & 0xC0) != 0x80 || (c3 & 0xC0) != 0x80)
2139 if (c3 == 0) return -1;
2140 if (c2 < 0x80 || 0x9F < c2 || (c3 & 0xC0) != 0x80)
2144 if (c3 == 0) return -2;
2145 if (c2 < 0x90 || 0xBF < c2 || (c3 & 0xC0) != 0x80 || (c4 & 0xC0) != 0x80)
2149 if (c3 == 0) return -2;
2150 if (c2 < 0x80 || 0xBF < c2 || (c3 & 0xC0) != 0x80 || (c4 & 0xC0) != 0x80)
2154 if (c3 == 0) return -2;
2155 if (c2 < 0x80 || 0x8F < c2 || (c3 & 0xC0) != 0x80 || (c4 & 0xC0) != 0x80)
2163 if (c1 == 0 || c1 == EOF){
2164 } else if ((c1 & 0xf8) == 0xf0) { /* 4 bytes */
2165 c2 = nkf_char_unicode_new(nkf_utf8_to_unicode(c1, c2, c3, c4));
2168 ret = w2e_conv(c1, c2, c3, &c1, &c2);
2176 #define NKF_ICONV_INVALID_CODE_RANGE -13
2178 unicode_iconv(nkf_char wc)
2186 }else if ((wc>>11) == 27) {
2187 /* unpaired surrogate */
2188 return NKF_ICONV_INVALID_CODE_RANGE;
2189 }else if (wc < 0xFFFF) {
2190 ret = w16e_conv(wc, &c2, &c1);
2191 if (ret) return ret;
2192 }else if (wc < 0x10FFFF) {
2194 c1 = nkf_char_unicode_new(wc);
2196 return NKF_ICONV_INVALID_CODE_RANGE;
2202 #define NKF_ICONV_NEED_ONE_MORE_BYTE -1
2203 #define NKF_ICONV_NEED_TWO_MORE_BYTES -2
2204 #define UTF16_TO_UTF32(lead, trail) (((lead) << 10) + (trail) - NKF_INT32_C(0x35FDC00))
2206 nkf_iconv_utf_16(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4)
2215 if (input_endian == ENDIAN_BIG) {
2216 if (0xD8 <= c1 && c1 <= 0xDB) {
2217 if (0xDC <= c3 && c3 <= 0xDF) {
2218 wc = UTF16_TO_UTF32(c1 << 8 | c2, c3 << 8 | c4);
2219 } else return NKF_ICONV_NEED_TWO_MORE_BYTES;
2224 if (0xD8 <= c2 && c2 <= 0xDB) {
2225 if (0xDC <= c4 && c4 <= 0xDF) {
2226 wc = UTF16_TO_UTF32(c2 << 8 | c1, c4 << 8 | c3);
2227 } else return NKF_ICONV_NEED_TWO_MORE_BYTES;
2233 return (*unicode_iconv)(wc);
2237 w_iconv16(nkf_char c2, nkf_char c1, nkf_char c0)
2240 return 16; /* different from w_iconv32 */
2244 w_iconv32(nkf_char c2, nkf_char c1, nkf_char c0)
2247 return 32; /* different from w_iconv16 */
2251 nkf_iconv_utf_32(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4)
2260 switch(input_endian){
2262 wc = c2 << 16 | c3 << 8 | c4;
2265 wc = c3 << 16 | c2 << 8 | c1;
2268 wc = c1 << 16 | c4 << 8 | c3;
2271 wc = c4 << 16 | c1 << 8 | c2;
2274 return NKF_ICONV_INVALID_CODE_RANGE;
2277 return (*unicode_iconv)(wc);
2281 #define output_ascii_escape_sequence(mode) do { \
2282 if (output_mode != ASCII && output_mode != ISO_8859_1) { \
2285 (*o_putc)(ascii_intro); \
2286 output_mode = mode; \
2291 output_escape_sequence(int mode)
2293 if (output_mode == mode)
2301 case JIS_X_0201_1976_K:
2309 (*o_putc)(kanji_intro);
2334 j_oconv(nkf_char c2, nkf_char c1)
2336 #ifdef NUMCHAR_OPTION
2337 if (c2 == 0 && nkf_char_unicode_p(c1)){
2338 w16e_conv(c1, &c2, &c1);
2339 if (c2 == 0 && nkf_char_unicode_p(c1)){
2340 c2 = c1 & VALUE_MASK;
2341 if (ms_ucs_map_f && 0xE000 <= c2 && c2 <= 0xE757) {
2344 c2 = 0x7F + c1 / 94;
2345 c1 = 0x21 + c1 % 94;
2347 if (encode_fallback) (*encode_fallback)(c1);
2354 output_ascii_escape_sequence(ASCII);
2357 else if (c2 == EOF) {
2358 output_ascii_escape_sequence(ASCII);
2361 else if (c2 == ISO_8859_1) {
2362 output_ascii_escape_sequence(ISO_8859_1);
2365 else if (c2 == JIS_X_0201_1976_K) {
2366 output_escape_sequence(JIS_X_0201_1976_K);
2369 } else if (is_eucg3(c2)){
2370 output_escape_sequence(x0213_f ? JIS_X_0213_2 : JIS_X_0212);
2371 (*o_putc)(c2 & 0x7f);
2376 ? c2<0x20 || 0x92<c2 || c1<0x20 || 0x7e<c1
2377 : c2<0x20 || 0x7e<c2 || c1<0x20 || 0x7e<c1) return;
2378 output_escape_sequence(x0213_f ? JIS_X_0213_1 : JIS_X_0208);
2385 e_oconv(nkf_char c2, nkf_char c1)
2387 if (c2 == 0 && nkf_char_unicode_p(c1)){
2388 w16e_conv(c1, &c2, &c1);
2389 if (c2 == 0 && nkf_char_unicode_p(c1)){
2390 c2 = c1 & VALUE_MASK;
2391 if (x0212_f && 0xE000 <= c2 && c2 <= 0xE757) {
2395 c2 += c2 < 10 ? 0x75 : 0x8FEB;
2396 c1 = 0x21 + c1 % 94;
2399 (*o_putc)((c2 & 0x7f) | 0x080);
2400 (*o_putc)(c1 | 0x080);
2402 (*o_putc)((c2 & 0x7f) | 0x080);
2403 (*o_putc)(c1 | 0x080);
2407 if (encode_fallback) (*encode_fallback)(c1);
2415 } else if (c2 == 0) {
2416 output_mode = ASCII;
2418 } else if (c2 == JIS_X_0201_1976_K) {
2419 output_mode = EUC_JP;
2420 (*o_putc)(SS2); (*o_putc)(c1|0x80);
2421 } else if (c2 == ISO_8859_1) {
2422 output_mode = ISO_8859_1;
2423 (*o_putc)(c1 | 0x080);
2425 } else if (is_eucg3(c2)){
2426 output_mode = EUC_JP;
2427 #ifdef SHIFTJIS_CP932
2430 if (e2s_conv(c2, c1, &s2, &s1) == 0){
2431 s2e_conv(s2, s1, &c2, &c1);
2436 output_mode = ASCII;
2438 }else if (is_eucg3(c2)){
2441 (*o_putc)((c2 & 0x7f) | 0x080);
2442 (*o_putc)(c1 | 0x080);
2445 (*o_putc)((c2 & 0x7f) | 0x080);
2446 (*o_putc)(c1 | 0x080);
2450 if (!nkf_isgraph(c1) || !nkf_isgraph(c2)) {
2451 set_iconv(FALSE, 0);
2452 return; /* too late to rescue this char */
2454 output_mode = EUC_JP;
2455 (*o_putc)(c2 | 0x080);
2456 (*o_putc)(c1 | 0x080);
2461 s_oconv(nkf_char c2, nkf_char c1)
2463 #ifdef NUMCHAR_OPTION
2464 if (c2 == 0 && nkf_char_unicode_p(c1)){
2465 w16e_conv(c1, &c2, &c1);
2466 if (c2 == 0 && nkf_char_unicode_p(c1)){
2467 c2 = c1 & VALUE_MASK;
2468 if (!x0213_f && 0xE000 <= c2 && c2 <= 0xE757) {
2471 c2 = c1 / 188 + (cp932inv_f ? 0xF0 : 0xEB);
2473 c1 += 0x40 + (c1 > 0x3e);
2478 if(encode_fallback)(*encode_fallback)(c1);
2487 } else if (c2 == 0) {
2488 output_mode = ASCII;
2490 } else if (c2 == JIS_X_0201_1976_K) {
2491 output_mode = SHIFT_JIS;
2493 } else if (c2 == ISO_8859_1) {
2494 output_mode = ISO_8859_1;
2495 (*o_putc)(c1 | 0x080);
2497 } else if (is_eucg3(c2)){
2498 output_mode = SHIFT_JIS;
2499 if (e2s_conv(c2, c1, &c2, &c1) == 0){
2505 if (!nkf_isprint(c1) || !nkf_isprint(c2)) {
2506 set_iconv(FALSE, 0);
2507 return; /* too late to rescue this char */
2509 output_mode = SHIFT_JIS;
2510 e2s_conv(c2, c1, &c2, &c1);
2512 #ifdef SHIFTJIS_CP932
2514 && CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
2515 nkf_char c = cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40];
2521 #endif /* SHIFTJIS_CP932 */
2524 if (prefix_table[(unsigned char)c1]){
2525 (*o_putc)(prefix_table[(unsigned char)c1]);
2531 #ifdef UTF8_OUTPUT_ENABLE
2533 w_oconv(nkf_char c2, nkf_char c1)
2539 output_bom_f = FALSE;
2550 if (c2 == 0 && nkf_char_unicode_p(c1)){
2551 val = c1 & VALUE_MASK;
2552 nkf_unicode_to_utf8(val, &c1, &c2, &c3, &c4);
2554 if (c2) (*o_putc)(c2);
2555 if (c3) (*o_putc)(c3);
2556 if (c4) (*o_putc)(c4);
2563 val = e2w_conv(c2, c1);
2565 nkf_unicode_to_utf8(val, &c1, &c2, &c3, &c4);
2567 if (c2) (*o_putc)(c2);
2568 if (c3) (*o_putc)(c3);
2569 if (c4) (*o_putc)(c4);
2575 w_oconv16(nkf_char c2, nkf_char c1)
2578 output_bom_f = FALSE;
2579 if (output_endian == ENDIAN_LITTLE){
2593 if (c2 == 0 && nkf_char_unicode_p(c1)) {
2594 if (nkf_char_unicode_bmp_p(c1)) {
2595 c2 = (c1 >> 8) & 0xff;
2599 if (c1 <= UNICODE_MAX) {
2600 c2 = (c1 >> 10) + NKF_INT32_C(0xD7C0); /* high surrogate */
2601 c1 = (c1 & 0x3FF) + NKF_INT32_C(0xDC00); /* low surrogate */
2602 if (output_endian == ENDIAN_LITTLE){
2603 (*o_putc)(c2 & 0xff);
2604 (*o_putc)((c2 >> 8) & 0xff);
2605 (*o_putc)(c1 & 0xff);
2606 (*o_putc)((c1 >> 8) & 0xff);
2608 (*o_putc)((c2 >> 8) & 0xff);
2609 (*o_putc)(c2 & 0xff);
2610 (*o_putc)((c1 >> 8) & 0xff);
2611 (*o_putc)(c1 & 0xff);
2617 nkf_char val = e2w_conv(c2, c1);
2618 c2 = (val >> 8) & 0xff;
2623 if (output_endian == ENDIAN_LITTLE){
2633 w_oconv32(nkf_char c2, nkf_char c1)
2636 output_bom_f = FALSE;
2637 if (output_endian == ENDIAN_LITTLE){
2655 if (c2 == ISO_8859_1) {
2657 } else if (c2 == 0 && nkf_char_unicode_p(c1)) {
2660 c1 = e2w_conv(c2, c1);
2663 if (output_endian == ENDIAN_LITTLE){
2664 (*o_putc)( c1 & 0xFF);
2665 (*o_putc)((c1 >> 8) & 0xFF);
2666 (*o_putc)((c1 >> 16) & 0xFF);
2670 (*o_putc)((c1 >> 16) & 0xFF);
2671 (*o_putc)((c1 >> 8) & 0xFF);
2672 (*o_putc)( c1 & 0xFF);
2677 #define SCORE_L2 (1) /* Kanji Level 2 */
2678 #define SCORE_KANA (SCORE_L2 << 1) /* Halfwidth Katakana */
2679 #define SCORE_DEPEND (SCORE_KANA << 1) /* MD Characters */
2680 #define SCORE_CP932 (SCORE_DEPEND << 1) /* IBM extended characters */
2681 #define SCORE_X0212 (SCORE_CP932 << 1) /* JIS X 0212 */
2682 #define SCORE_NO_EXIST (SCORE_X0212 << 1) /* Undefined Characters */
2683 #define SCORE_iMIME (SCORE_NO_EXIST << 1) /* MIME selected */
2684 #define SCORE_ERROR (SCORE_iMIME << 1) /* Error */
2686 #define SCORE_INIT (SCORE_iMIME)
2688 static const nkf_char score_table_A0[] = {
2691 0, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND,
2692 SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_NO_EXIST,
2695 static const nkf_char score_table_F0[] = {
2696 SCORE_L2, SCORE_L2, SCORE_L2, SCORE_L2,
2697 SCORE_L2, SCORE_DEPEND, SCORE_NO_EXIST, SCORE_NO_EXIST,
2698 SCORE_DEPEND, SCORE_DEPEND, SCORE_CP932, SCORE_CP932,
2699 SCORE_CP932, SCORE_NO_EXIST, SCORE_NO_EXIST, SCORE_ERROR,
2703 set_code_score(struct input_code *ptr, nkf_char score)
2706 ptr->score |= score;
2711 clr_code_score(struct input_code *ptr, nkf_char score)
2714 ptr->score &= ~score;
2719 code_score(struct input_code *ptr)
2721 nkf_char c2 = ptr->buf[0];
2722 #ifdef UTF8_OUTPUT_ENABLE
2723 nkf_char c1 = ptr->buf[1];
2726 set_code_score(ptr, SCORE_ERROR);
2727 }else if (c2 == SS2){
2728 set_code_score(ptr, SCORE_KANA);
2729 }else if (c2 == 0x8f){
2730 set_code_score(ptr, SCORE_X0212);
2731 #ifdef UTF8_OUTPUT_ENABLE
2732 }else if (!e2w_conv(c2, c1)){
2733 set_code_score(ptr, SCORE_NO_EXIST);
2735 }else if ((c2 & 0x70) == 0x20){
2736 set_code_score(ptr, score_table_A0[c2 & 0x0f]);
2737 }else if ((c2 & 0x70) == 0x70){
2738 set_code_score(ptr, score_table_F0[c2 & 0x0f]);
2739 }else if ((c2 & 0x70) >= 0x50){
2740 set_code_score(ptr, SCORE_L2);
2745 status_disable(struct input_code *ptr)
2750 if (iconv == ptr->iconv_func) set_iconv(FALSE, 0);
2754 status_push_ch(struct input_code *ptr, nkf_char c)
2756 ptr->buf[ptr->index++] = c;
2760 status_clear(struct input_code *ptr)
2767 status_reset(struct input_code *ptr)
2770 ptr->score = SCORE_INIT;
2774 status_reinit(struct input_code *ptr)
2777 ptr->_file_stat = 0;
2781 status_check(struct input_code *ptr, nkf_char c)
2783 if (c <= DEL && estab_f){
2789 s_status(struct input_code *ptr, nkf_char c)
2793 status_check(ptr, c);
2798 }else if (nkf_char_unicode_p(c)){
2800 }else if (0xa1 <= c && c <= 0xdf){
2801 status_push_ch(ptr, SS2);
2802 status_push_ch(ptr, c);
2805 }else if ((0x81 <= c && c < 0xa0) || (0xe0 <= c && c <= 0xea)){
2807 status_push_ch(ptr, c);
2808 }else if (0xed <= c && c <= 0xee){
2810 status_push_ch(ptr, c);
2811 #ifdef SHIFTJIS_CP932
2812 }else if (is_ibmext_in_sjis(c)){
2814 status_push_ch(ptr, c);
2815 #endif /* SHIFTJIS_CP932 */
2817 }else if (0xf0 <= c && c <= 0xfc){
2819 status_push_ch(ptr, c);
2820 #endif /* X0212_ENABLE */
2822 status_disable(ptr);
2826 if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
2827 status_push_ch(ptr, c);
2828 s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]);
2832 status_disable(ptr);
2836 #ifdef SHIFTJIS_CP932
2837 if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)) {
2838 status_push_ch(ptr, c);
2839 if (s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]) == 0) {
2840 set_code_score(ptr, SCORE_CP932);
2845 #endif /* SHIFTJIS_CP932 */
2846 status_disable(ptr);
2849 if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
2850 status_push_ch(ptr, c);
2851 s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]);
2852 set_code_score(ptr, SCORE_CP932);
2855 status_disable(ptr);
2862 e_status(struct input_code *ptr, nkf_char c)
2866 status_check(ptr, c);
2871 }else if (nkf_char_unicode_p(c)){
2873 }else if (SS2 == c || (0xa1 <= c && c <= 0xfe)){
2875 status_push_ch(ptr, c);
2877 }else if (0x8f == c){
2879 status_push_ch(ptr, c);
2880 #endif /* X0212_ENABLE */
2882 status_disable(ptr);
2886 if (0xa1 <= c && c <= 0xfe){
2887 status_push_ch(ptr, c);
2891 status_disable(ptr);
2896 if (0xa1 <= c && c <= 0xfe){
2898 status_push_ch(ptr, c);
2900 status_disable(ptr);
2902 #endif /* X0212_ENABLE */
2906 #ifdef UTF8_INPUT_ENABLE
2908 w_status(struct input_code *ptr, nkf_char c)
2912 status_check(ptr, c);
2917 }else if (nkf_char_unicode_p(c)){
2919 }else if (0xc0 <= c && c <= 0xdf){
2921 status_push_ch(ptr, c);
2922 }else if (0xe0 <= c && c <= 0xef){
2924 status_push_ch(ptr, c);
2925 }else if (0xf0 <= c && c <= 0xf4){
2927 status_push_ch(ptr, c);
2929 status_disable(ptr);
2934 if (0x80 <= c && c <= 0xbf){
2935 status_push_ch(ptr, c);
2936 if (ptr->index > ptr->stat){
2937 int bom = (ptr->buf[0] == 0xef && ptr->buf[1] == 0xbb
2938 && ptr->buf[2] == 0xbf);
2939 w2e_conv(ptr->buf[0], ptr->buf[1], ptr->buf[2],
2940 &ptr->buf[0], &ptr->buf[1]);
2947 status_disable(ptr);
2951 if (0x80 <= c && c <= 0xbf){
2952 if (ptr->index < ptr->stat){
2953 status_push_ch(ptr, c);
2958 status_disable(ptr);
2966 code_status(nkf_char c)
2968 int action_flag = 1;
2969 struct input_code *result = 0;
2970 struct input_code *p = input_code_list;
2972 if (!p->status_func) {
2976 if (!p->status_func)
2978 (p->status_func)(p, c);
2981 }else if(p->stat == 0){
2992 if (result && !estab_f){
2993 set_iconv(TRUE, result->iconv_func);
2994 }else if (c <= DEL){
2995 struct input_code *ptr = input_code_list;
3005 nkf_buf_t *std_gc_buf;
3006 nkf_char broken_state;
3007 nkf_buf_t *broken_buf;
3008 nkf_char mimeout_state;
3012 static nkf_state_t *nkf_state = NULL;
3014 #define STD_GC_BUFSIZE (256)
3017 nkf_state_init(void)
3020 nkf_buf_clear(nkf_state->std_gc_buf);
3021 nkf_buf_clear(nkf_state->broken_buf);
3022 nkf_buf_clear(nkf_state->nfc_buf);
3025 nkf_state = nkf_xmalloc(sizeof(nkf_state_t));
3026 nkf_state->std_gc_buf = nkf_buf_new(STD_GC_BUFSIZE);
3027 nkf_state->broken_buf = nkf_buf_new(3);
3028 nkf_state->nfc_buf = nkf_buf_new(9);
3030 nkf_state->broken_state = 0;
3031 nkf_state->mimeout_state = 0;
3038 if (!nkf_buf_empty_p(nkf_state->std_gc_buf)){
3039 return nkf_buf_pop(nkf_state->std_gc_buf);
3046 std_ungetc(nkf_char c, FILE *f)
3048 nkf_buf_push(nkf_state->std_gc_buf, c);
3054 std_putc(nkf_char c)
3061 static nkf_char hold_buf[HOLD_SIZE*2];
3062 static int hold_count = 0;
3064 push_hold_buf(nkf_char c2)
3066 if (hold_count >= HOLD_SIZE*2)
3068 hold_buf[hold_count++] = c2;
3069 return ((hold_count >= HOLD_SIZE*2) ? EOF : hold_count);
3073 h_conv(FILE *f, nkf_char c1, nkf_char c2)
3079 /** it must NOT be in the kanji shifte sequence */
3080 /** it must NOT be written in JIS7 */
3081 /** and it must be after 2 byte 8bit code */
3087 while ((c2 = (*i_getc)(f)) != EOF) {
3093 if (push_hold_buf(c2) == EOF || estab_f) {
3099 struct input_code *p = input_code_list;
3100 struct input_code *result = p;
3105 if (p->status_func && p->score < result->score) {
3110 set_iconv(TRUE, result->iconv_func);
3115 ** 1) EOF is detected, or
3116 ** 2) Code is established, or
3117 ** 3) Buffer is FULL (but last word is pushed)
3119 ** in 1) and 3) cases, we continue to use
3120 ** Kanji codes by oconv and leave estab_f unchanged.
3125 while (hold_index < hold_count){
3126 c1 = hold_buf[hold_index++];
3127 if (nkf_char_unicode_p(c1)) {
3131 else if (c1 <= DEL){
3134 }else if (iconv == s_iconv && 0xa1 <= c1 && c1 <= 0xdf){
3135 (*iconv)(JIS_X_0201_1976_K, c1, 0);
3138 if (hold_index < hold_count){
3139 c2 = hold_buf[hold_index++];
3149 switch ((*iconv)(c1, c2, 0)) { /* can be EUC/SJIS/UTF-8 */
3152 if (hold_index < hold_count){
3153 c3 = hold_buf[hold_index++];
3154 } else if ((c3 = (*i_getc)(f)) == EOF) {
3159 if (hold_index < hold_count){
3160 c4 = hold_buf[hold_index++];
3161 } else if ((c4 = (*i_getc)(f)) == EOF) {
3166 (*iconv)(c1, c2, (c3<<8)|c4);
3169 /* 3 bytes EUC or UTF-8 */
3170 if (hold_index < hold_count){
3171 c3 = hold_buf[hold_index++];
3172 } else if ((c3 = (*i_getc)(f)) == EOF) {
3178 (*iconv)(c1, c2, c3);
3181 if (c3 == EOF) break;
3187 * Check and Ignore BOM
3193 switch(c2 = (*i_getc)(f)){
3195 if((c2 = (*i_getc)(f)) == 0x00){
3196 if((c2 = (*i_getc)(f)) == 0xFE){
3197 if((c2 = (*i_getc)(f)) == 0xFF){
3198 if(!input_encoding){
3199 set_iconv(TRUE, w_iconv32);
3201 if (iconv == w_iconv32) {
3202 input_endian = ENDIAN_BIG;
3205 (*i_ungetc)(0xFF,f);
3206 }else (*i_ungetc)(c2,f);
3207 (*i_ungetc)(0xFE,f);
3208 }else if(c2 == 0xFF){
3209 if((c2 = (*i_getc)(f)) == 0xFE){
3210 if(!input_encoding){
3211 set_iconv(TRUE, w_iconv32);
3213 if (iconv == w_iconv32) {
3214 input_endian = ENDIAN_2143;
3217 (*i_ungetc)(0xFF,f);
3218 }else (*i_ungetc)(c2,f);
3219 (*i_ungetc)(0xFF,f);
3220 }else (*i_ungetc)(c2,f);
3221 (*i_ungetc)(0x00,f);
3222 }else (*i_ungetc)(c2,f);
3223 (*i_ungetc)(0x00,f);
3226 if((c2 = (*i_getc)(f)) == 0xBB){
3227 if((c2 = (*i_getc)(f)) == 0xBF){
3228 if(!input_encoding){
3229 set_iconv(TRUE, w_iconv);
3231 if (iconv == w_iconv) {
3234 (*i_ungetc)(0xBF,f);
3235 }else (*i_ungetc)(c2,f);
3236 (*i_ungetc)(0xBB,f);
3237 }else (*i_ungetc)(c2,f);
3238 (*i_ungetc)(0xEF,f);
3241 if((c2 = (*i_getc)(f)) == 0xFF){
3242 if((c2 = (*i_getc)(f)) == 0x00){
3243 if((c2 = (*i_getc)(f)) == 0x00){
3244 if(!input_encoding){
3245 set_iconv(TRUE, w_iconv32);
3247 if (iconv == w_iconv32) {
3248 input_endian = ENDIAN_3412;
3251 (*i_ungetc)(0x00,f);
3252 }else (*i_ungetc)(c2,f);
3253 (*i_ungetc)(0x00,f);
3254 }else (*i_ungetc)(c2,f);
3255 if(!input_encoding){
3256 set_iconv(TRUE, w_iconv16);
3258 if (iconv == w_iconv16) {
3259 input_endian = ENDIAN_BIG;
3262 (*i_ungetc)(0xFF,f);
3263 }else (*i_ungetc)(c2,f);
3264 (*i_ungetc)(0xFE,f);
3267 if((c2 = (*i_getc)(f)) == 0xFE){
3268 if((c2 = (*i_getc)(f)) == 0x00){
3269 if((c2 = (*i_getc)(f)) == 0x00){
3270 if(!input_encoding){
3271 set_iconv(TRUE, w_iconv32);
3273 if (iconv == w_iconv32) {
3274 input_endian = ENDIAN_LITTLE;
3277 (*i_ungetc)(0x00,f);
3278 }else (*i_ungetc)(c2,f);
3279 (*i_ungetc)(0x00,f);
3280 }else (*i_ungetc)(c2,f);
3281 if(!input_encoding){
3282 set_iconv(TRUE, w_iconv16);
3284 if (iconv == w_iconv16) {
3285 input_endian = ENDIAN_LITTLE;
3288 (*i_ungetc)(0xFE,f);
3289 }else (*i_ungetc)(c2,f);
3290 (*i_ungetc)(0xFF,f);
3299 broken_getc(FILE *f)
3303 if (!nkf_buf_empty_p(nkf_state->broken_buf)) {
3304 return nkf_buf_pop(nkf_state->broken_buf);
3307 if (c=='$' && nkf_state->broken_state != ESC
3308 && (input_mode == ASCII || input_mode == JIS_X_0201_1976_K)) {
3310 nkf_state->broken_state = 0;
3311 if (c1=='@'|| c1=='B') {
3312 nkf_buf_push(nkf_state->broken_buf, c1);
3313 nkf_buf_push(nkf_state->broken_buf, c);
3319 } else if (c=='(' && nkf_state->broken_state != ESC
3320 && (input_mode == JIS_X_0208 || input_mode == JIS_X_0201_1976_K)) {
3322 nkf_state->broken_state = 0;
3323 if (c1=='J'|| c1=='B') {
3324 nkf_buf_push(nkf_state->broken_buf, c1);
3325 nkf_buf_push(nkf_state->broken_buf, c);
3332 nkf_state->broken_state = c;
3338 broken_ungetc(nkf_char c, FILE *f)
3340 if (nkf_buf_length(nkf_state->broken_buf) < 2)
3341 nkf_buf_push(nkf_state->broken_buf, c);
3346 eol_conv(nkf_char c2, nkf_char c1)
3348 if (guess_f && input_eol != EOF) {
3349 if (c2 == 0 && c1 == LF) {
3350 if (!input_eol) input_eol = prev_cr ? CRLF : LF;
3351 else if (input_eol != (prev_cr ? CRLF : LF)) input_eol = EOF;
3352 } else if (c2 == 0 && c1 == CR && input_eol == LF) input_eol = EOF;
3354 else if (!input_eol) input_eol = CR;
3355 else if (input_eol != CR) input_eol = EOF;
3357 if (prev_cr || (c2 == 0 && c1 == LF)) {
3359 if (eolmode_f != LF) (*o_eol_conv)(0, CR);
3360 if (eolmode_f != CR) (*o_eol_conv)(0, LF);
3362 if (c2 == 0 && c1 == CR) prev_cr = CR;
3363 else if (c2 != 0 || c1 != LF) (*o_eol_conv)(c2, c1);
3367 put_newline(void (*func)(nkf_char))
3369 switch (eolmode_f ? eolmode_f : DEFAULT_NEWLINE) {
3384 oconv_newline(void (*func)(nkf_char, nkf_char))
3386 switch (eolmode_f ? eolmode_f : DEFAULT_NEWLINE) {
3401 Return value of fold_conv()
3403 LF add newline and output char
3404 CR add newline and output nothing
3407 1 (or else) normal output
3409 fold state in prev (previous character)
3411 >0x80 Japanese (X0208/X0201)
3416 This fold algorthm does not preserve heading space in a line.
3417 This is the main difference from fmt.
3420 #define char_size(c2,c1) (c2?2:1)
3423 fold_conv(nkf_char c2, nkf_char c1)
3426 nkf_char fold_state;
3428 if (c1== CR && !fold_preserve_f) {
3429 fold_state=0; /* ignore cr */
3430 }else if (c1== LF&&f_prev==CR && fold_preserve_f) {
3432 fold_state=0; /* ignore cr */
3433 } else if (c1== BS) {
3434 if (f_line>0) f_line--;
3436 } else if (c2==EOF && f_line != 0) { /* close open last line */
3438 } else if ((c1==LF && !fold_preserve_f)
3439 || ((c1==CR||(c1==LF&&f_prev!=CR))
3440 && fold_preserve_f)) {
3442 if (fold_preserve_f) {
3446 } else if ((f_prev == c1 && !fold_preserve_f)
3447 || (f_prev == LF && fold_preserve_f)
3448 ) { /* duplicate newline */
3451 fold_state = LF; /* output two newline */
3457 if (f_prev&0x80) { /* Japanese? */
3459 fold_state = 0; /* ignore given single newline */
3460 } else if (f_prev==SP) {
3464 if (++f_line<=fold_len)
3468 fold_state = CR; /* fold and output nothing */
3472 } else if (c1=='\f') {
3475 fold_state = LF; /* output newline and clear */
3476 } else if ((c2==0 && nkf_isblank(c1)) || (c2 == '!' && c1 == '!')) {
3477 /* X0208 kankaku or ascii space */
3479 fold_state = 0; /* remove duplicate spaces */
3482 if (++f_line<=fold_len)
3483 fold_state = SP; /* output ASCII space only */
3485 f_prev = SP; f_line = 0;
3486 fold_state = CR; /* fold and output nothing */
3490 prev0 = f_prev; /* we still need this one... , but almost done */
3492 if (c2 || c2 == JIS_X_0201_1976_K)
3493 f_prev |= 0x80; /* this is Japanese */
3494 f_line += char_size(c2,c1);
3495 if (f_line<=fold_len) { /* normal case */
3498 if (f_line>fold_len+fold_margin) { /* too many kinsoku suspension */
3499 f_line = char_size(c2,c1);
3500 fold_state = LF; /* We can't wait, do fold now */
3501 } else if (c2 == JIS_X_0201_1976_K) {
3502 /* simple kinsoku rules return 1 means no folding */
3503 if (c1==(0xde&0x7f)) fold_state = 1; /*
\e$B!+
\e(B*/
3504 else if (c1==(0xdf&0x7f)) fold_state = 1; /*
\e$B!,
\e(B*/
3505 else if (c1==(0xa4&0x7f)) fold_state = 1; /*
\e$B!#
\e(B*/
3506 else if (c1==(0xa3&0x7f)) fold_state = 1; /*
\e$B!$
\e(B*/
3507 else if (c1==(0xa1&0x7f)) fold_state = 1; /*
\e$B!W
\e(B*/
3508 else if (c1==(0xb0&0x7f)) fold_state = 1; /* - */
3509 else if (SP<=c1 && c1<=(0xdf&0x7f)) { /* X0201 */
3511 fold_state = LF;/* add one new f_line before this character */
3514 fold_state = LF;/* add one new f_line before this character */
3517 /* kinsoku point in ASCII */
3518 if ( c1==')'|| /* { [ ( */
3529 /* just after special */
3530 } else if (!is_alnum(prev0)) {
3531 f_line = char_size(c2,c1);
3533 } else if ((prev0==SP) || /* ignored new f_line */
3534 (prev0==LF)|| /* ignored new f_line */
3535 (prev0&0x80)) { /* X0208 - ASCII */
3536 f_line = char_size(c2,c1);
3537 fold_state = LF;/* add one new f_line before this character */
3539 fold_state = 1; /* default no fold in ASCII */
3543 if (c1=='"') fold_state = 1; /*
\e$B!"
\e(B */
3544 else if (c1=='#') fold_state = 1; /*
\e$B!#
\e(B */
3545 else if (c1=='W') fold_state = 1; /*
\e$B!W
\e(B */
3546 else if (c1=='K') fold_state = 1; /*
\e$B!K
\e(B */
3547 else if (c1=='$') fold_state = 1; /*
\e$B!$
\e(B */
3548 else if (c1=='%') fold_state = 1; /*
\e$B!%
\e(B */
3549 else if (c1=='\'') fold_state = 1; /*
\e$B!\
\e(B */
3550 else if (c1=='(') fold_state = 1; /*
\e$B!(
\e(B */
3551 else if (c1==')') fold_state = 1; /*
\e$B!)
\e(B */
3552 else if (c1=='*') fold_state = 1; /*
\e$B!*
\e(B */
3553 else if (c1=='+') fold_state = 1; /*
\e$B!+
\e(B */
3554 else if (c1==',') fold_state = 1; /*
\e$B!,
\e(B */
3555 /* default no fold in kinsoku */
3558 f_line = char_size(c2,c1);
3559 /* add one new f_line before this character */
3562 f_line = char_size(c2,c1);
3564 /* add one new f_line before this character */
3569 /* terminator process */
3570 switch(fold_state) {
3572 oconv_newline(o_fconv);
3578 oconv_newline(o_fconv);
3589 static nkf_char z_prev2=0,z_prev1=0;
3592 z_conv(nkf_char c2, nkf_char c1)
3595 /* if (c2) c1 &= 0x7f; assertion */
3597 if (c2 == JIS_X_0201_1976_K && (c1 == 0x20 || c1 == 0x7D || c1 == 0x7E)) {
3603 if (z_prev2 == JIS_X_0201_1976_K) {
3604 if (c2 == JIS_X_0201_1976_K) {
3605 if (c1 == (0xde&0x7f)) { /*
\e$BByE@
\e(B */
3607 (*o_zconv)(dv[(z_prev1-SP)*2], dv[(z_prev1-SP)*2+1]);
3609 } else if (c1 == (0xdf&0x7f) && ev[(z_prev1-SP)*2]) { /*
\e$BH>ByE@
\e(B */
3611 (*o_zconv)(ev[(z_prev1-SP)*2], ev[(z_prev1-SP)*2+1]);
3616 (*o_zconv)(cv[(z_prev1-SP)*2], cv[(z_prev1-SP)*2+1]);
3618 if (c2 == JIS_X_0201_1976_K) {
3619 if (dv[(c1-SP)*2] || ev[(c1-SP)*2]) {
3620 /* wait for
\e$BByE@
\e(B or
\e$BH>ByE@
\e(B */
3625 (*o_zconv)(cv[(c1-SP)*2], cv[(c1-SP)*2+1]);
3636 if (alpha_f&1 && c2 == 0x23) {
3637 /* JISX0208 Alphabet */
3639 } else if (c2 == 0x21) {
3640 /* JISX0208 Kigou */
3645 } else if (alpha_f&4) {
3650 } else if (alpha_f&1 && 0x20<c1 && c1<0x7f && fv[c1-0x20]) {
3656 if (alpha_f&8 && c2 == 0) {
3658 const char *entity = 0;
3660 case '>': entity = ">"; break;
3661 case '<': entity = "<"; break;
3662 case '\"': entity = """; break;
3663 case '&': entity = "&"; break;
3666 while (*entity) (*o_zconv)(0, *entity++);
3672 /* JIS X 0208 Katakana to JIS X 0201 Katakana */
3677 /* U+3002 (0x8142) Ideographic Full Stop -> U+FF61 (0xA1) Halfwidth Ideographic Full Stop */
3681 /* U+300C (0x8175) Left Corner Bracket -> U+FF62 (0xA2) Halfwidth Left Corner Bracket */
3685 /* U+300D (0x8176) Right Corner Bracket -> U+FF63 (0xA3) Halfwidth Right Corner Bracket */
3689 /* U+3001 (0x8141) Ideographic Comma -> U+FF64 (0xA4) Halfwidth Ideographic Comma */
3693 /* U+30FB (0x8145) Katakana Middle Dot -> U+FF65 (0xA5) Halfwidth Katakana Middle Dot */
3697 /* U+30FC (0x815B) Katakana-Hiragana Prolonged Sound Mark -> U+FF70 (0xB0) Halfwidth Katakana-Hiragana Prolonged Sound Mark */
3701 /* U+309B (0x814A) Katakana-Hiragana Voiced Sound Mark -> U+FF9E (0xDE) Halfwidth Katakana Voiced Sound Mark */
3705 /* U+309C (0x814B) Katakana-Hiragana Semi-Voiced Sound Mark -> U+FF9F (0xDF) Halfwidth Katakana Semi-Voiced Sound Mark */
3710 (*o_zconv)(JIS_X_0201_1976_K, c);
3713 } else if (c2 == 0x25) {
3714 /* JISX0208 Katakana */
3715 static const int fullwidth_to_halfwidth[] =
3717 0x0000, 0x2700, 0x3100, 0x2800, 0x3200, 0x2900, 0x3300, 0x2A00,
3718 0x3400, 0x2B00, 0x3500, 0x3600, 0x365E, 0x3700, 0x375E, 0x3800,
3719 0x385E, 0x3900, 0x395E, 0x3A00, 0x3A5E, 0x3B00, 0x3B5E, 0x3C00,
3720 0x3C5E, 0x3D00, 0x3D5E, 0x3E00, 0x3E5E, 0x3F00, 0x3F5E, 0x4000,
3721 0x405E, 0x4100, 0x415E, 0x2F00, 0x4200, 0x425E, 0x4300, 0x435E,
3722 0x4400, 0x445E, 0x4500, 0x4600, 0x4700, 0x4800, 0x4900, 0x4A00,
3723 0x4A5E, 0x4A5F, 0x4B00, 0x4B5E, 0x4B5F, 0x4C00, 0x4C5E, 0x4C5F,
3724 0x4D00, 0x4D5E, 0x4D5F, 0x4E00, 0x4E5E, 0x4E5F, 0x4F00, 0x5000,
3725 0x5100, 0x5200, 0x5300, 0x2C00, 0x5400, 0x2D00, 0x5500, 0x2E00,
3726 0x5600, 0x5700, 0x5800, 0x5900, 0x5A00, 0x5B00, 0x0000, 0x5C00,
3727 0x0000, 0x0000, 0x2600, 0x5D00, 0x335E, 0x0000, 0x0000, 0x0000,
3728 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
3730 if (fullwidth_to_halfwidth[c1-0x20]){
3731 c2 = fullwidth_to_halfwidth[c1-0x20];
3732 (*o_zconv)(JIS_X_0201_1976_K, c2>>8);
3734 (*o_zconv)(JIS_X_0201_1976_K, c2&0xFF);
3744 #define rot13(c) ( \
3746 (c <= 'M') ? (c + 13): \
3747 (c <= 'Z') ? (c - 13): \
3749 (c <= 'm') ? (c + 13): \
3750 (c <= 'z') ? (c - 13): \
3754 #define rot47(c) ( \
3756 ( c <= 'O') ? (c + 47) : \
3757 ( c <= '~') ? (c - 47) : \
3762 rot_conv(nkf_char c2, nkf_char c1)
3764 if (c2 == 0 || c2 == JIS_X_0201_1976_K || c2 == ISO_8859_1) {
3770 (*o_rot_conv)(c2,c1);
3774 hira_conv(nkf_char c2, nkf_char c1)
3778 if (0x20 < c1 && c1 < 0x74) {
3780 (*o_hira_conv)(c2,c1);
3782 } else if (c1 == 0x74 && nkf_enc_unicode_p(output_encoding)) {
3784 c1 = nkf_char_unicode_new(0x3094);
3785 (*o_hira_conv)(c2,c1);
3788 } else if (c2 == 0x21 && (c1 == 0x33 || c1 == 0x34)) {
3790 (*o_hira_conv)(c2,c1);
3795 if (c2 == 0 && c1 == nkf_char_unicode_new(0x3094)) {
3798 } else if (c2 == 0x24 && 0x20 < c1 && c1 < 0x74) {
3800 } else if (c2 == 0x21 && (c1 == 0x35 || c1 == 0x36)) {
3804 (*o_hira_conv)(c2,c1);
3809 iso2022jp_check_conv(nkf_char c2, nkf_char c1)
3811 #define RANGE_NUM_MAX 18
3812 static const nkf_char range[RANGE_NUM_MAX][2] = {
3833 nkf_char start, end, c;
3835 if(c2 >= 0x00 && c2 <= 0x20 && c1 >= 0x7f && c1 <= 0xff) {
3839 if((c2 >= 0x29 && c2 <= 0x2f) || (c2 >= 0x75 && c2 <= 0x7e)) {
3844 for (i = 0; i < RANGE_NUM_MAX; i++) {
3845 start = range[i][0];
3848 if (c >= start && c <= end) {
3853 (*o_iso2022jp_check_conv)(c2,c1);
3857 /* This converts =?ISO-2022-JP?B?HOGE HOGE?= */
3859 static const unsigned char *mime_pattern[] = {
3860 (const unsigned char *)"\075?EUC-JP?B?",
3861 (const unsigned char *)"\075?SHIFT_JIS?B?",
3862 (const unsigned char *)"\075?ISO-8859-1?Q?",
3863 (const unsigned char *)"\075?ISO-8859-1?B?",
3864 (const unsigned char *)"\075?ISO-2022-JP?B?",
3865 (const unsigned char *)"\075?ISO-2022-JP?B?",
3866 (const unsigned char *)"\075?ISO-2022-JP?Q?",
3867 #if defined(UTF8_INPUT_ENABLE)
3868 (const unsigned char *)"\075?UTF-8?B?",
3869 (const unsigned char *)"\075?UTF-8?Q?",
3871 (const unsigned char *)"\075?US-ASCII?Q?",
3876 /*
\e$B3:Ev$9$k%3!<%I$NM%@hEY$r>e$2$k$?$a$NL\0u
\e(B */
3877 nkf_char (*mime_priority_func[])(nkf_char c2, nkf_char c1, nkf_char c0) = {
3878 e_iconv, s_iconv, 0, 0, 0, 0,
3879 #if defined(UTF8_INPUT_ENABLE)
3885 static const nkf_char mime_encode[] = {
3886 EUC_JP, SHIFT_JIS, ISO_8859_1, ISO_8859_1, JIS_X_0208, JIS_X_0201_1976_K, JIS_X_0201_1976_K,
3887 #if defined(UTF8_INPUT_ENABLE)
3894 static const nkf_char mime_encode_method[] = {
3895 'B', 'B','Q', 'B', 'B', 'B', 'Q',
3896 #if defined(UTF8_INPUT_ENABLE)
3904 /* MIME preprocessor fifo */
3906 #define MIME_BUF_SIZE (1024) /* 2^n ring buffer */
3907 #define MIME_BUF_MASK (MIME_BUF_SIZE-1)
3908 #define mime_input_buf(n) mime_input_state.buf[(n)&MIME_BUF_MASK]
3910 unsigned char buf[MIME_BUF_SIZE];
3912 unsigned int last; /* decoded */
3913 unsigned int input; /* undecoded */
3915 static nkf_char (*mime_iconv_back)(nkf_char c2,nkf_char c1,nkf_char c0) = NULL;
3917 #define MAXRECOVER 20
3920 mime_input_buf_unshift(nkf_char c)
3922 mime_input_buf(--mime_input_state.top) = (unsigned char)c;
3926 mime_ungetc(nkf_char c, FILE *f)
3928 mime_input_buf_unshift(c);
3933 mime_ungetc_buf(nkf_char c, FILE *f)
3936 (*i_mungetc_buf)(c,f);
3938 mime_input_buf(--mime_input_state.input) = (unsigned char)c;
3943 mime_getc_buf(FILE *f)
3945 /* we don't keep eof of mime_input_buf, becase it contains ?= as
3946 a terminator. It was checked in mime_integrity. */
3947 return ((mimebuf_f)?
3948 (*i_mgetc_buf)(f):mime_input_buf(mime_input_state.input++));
3952 switch_mime_getc(void)
3954 if (i_getc!=mime_getc) {
3955 i_mgetc = i_getc; i_getc = mime_getc;
3956 i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
3957 if(mime_f==STRICT_MIME) {
3958 i_mgetc_buf = i_mgetc; i_mgetc = mime_getc_buf;
3959 i_mungetc_buf = i_mungetc; i_mungetc = mime_ungetc_buf;
3965 unswitch_mime_getc(void)
3967 if(mime_f==STRICT_MIME) {
3968 i_mgetc = i_mgetc_buf;
3969 i_mungetc = i_mungetc_buf;
3972 i_ungetc = i_mungetc;
3973 if(mime_iconv_back)set_iconv(FALSE, mime_iconv_back);
3974 mime_iconv_back = NULL;
3978 mime_integrity(FILE *f, const unsigned char *p)
3982 /* In buffered mode, read until =? or NL or buffer full
3984 mime_input_state.input = mime_input_state.top;
3985 mime_input_state.last = mime_input_state.top;
3987 while(*p) mime_input_buf(mime_input_state.input++) = *p++;
3989 q = mime_input_state.input;
3990 while((c=(*i_getc)(f))!=EOF) {
3991 if (((mime_input_state.input-mime_input_state.top)&MIME_BUF_MASK)==0) {
3992 break; /* buffer full */
3994 if (c=='=' && d=='?') {
3995 /* checked. skip header, start decode */
3996 mime_input_buf(mime_input_state.input++) = (unsigned char)c;
3997 /* mime_last_input = mime_input_state.input; */
3998 mime_input_state.input = q;
4002 if (!( (c=='+'||c=='/'|| c=='=' || c=='?' || is_alnum(c))))
4004 /* Should we check length mod 4? */
4005 mime_input_buf(mime_input_state.input++) = (unsigned char)c;
4008 /* In case of Incomplete MIME, no MIME decode */
4009 mime_input_buf(mime_input_state.input++) = (unsigned char)c;
4010 mime_input_state.last = mime_input_state.input; /* point undecoded buffer */
4011 mime_decode_mode = 1; /* no decode on mime_input_buf last in mime_getc */
4012 switch_mime_getc(); /* anyway we need buffered getc */
4017 mime_begin_strict(FILE *f)
4021 const unsigned char *p,*q;
4022 nkf_char r[MAXRECOVER]; /* recovery buffer, max mime pattern length */
4024 mime_decode_mode = FALSE;
4025 /* =? has been checked */
4027 p = mime_pattern[j];
4030 for(i=2;p[i]>SP;i++) { /* start at =? */
4031 if (((r[i] = c1 = (*i_getc)(f))==EOF) || nkf_toupper(c1) != p[i]) {
4032 /* pattern fails, try next one */
4034 while (mime_pattern[++j]) {
4035 p = mime_pattern[j];
4036 for(k=2;k<i;k++) /* assume length(p) > i */
4037 if (p[k]!=q[k]) break;
4038 if (k==i && nkf_toupper(c1)==p[k]) break;
4040 p = mime_pattern[j];
4041 if (p) continue; /* found next one, continue */
4042 /* all fails, output from recovery buffer */
4050 mime_decode_mode = p[i-2];
4052 mime_iconv_back = iconv;
4053 set_iconv(FALSE, mime_priority_func[j]);
4054 clr_code_score(find_inputcode_byfunc(mime_priority_func[j]), SCORE_iMIME);
4056 if (mime_decode_mode=='B') {
4057 mimebuf_f = unbuf_f;
4059 /* do MIME integrity check */
4060 return mime_integrity(f,mime_pattern[j]);
4074 /* In NONSTRICT mode, only =? is checked. In case of failure, we */
4075 /* re-read and convert again from mime_buffer. */
4077 /* =? has been checked */
4078 k = mime_input_state.last;
4079 mime_input_buf(mime_input_state.last++)='='; mime_input_buf(mime_input_state.last++)='?';
4080 for(i=2;i<MAXRECOVER;i++) { /* start at =? */
4081 /* We accept any character type even if it is breaked by new lines */
4082 c1 = (*i_getc)(f); mime_input_buf(mime_input_state.last++) = (unsigned char)c1;
4083 if (c1==LF||c1==SP||c1==CR||
4084 c1=='-'||c1=='_'||is_alnum(c1)) continue;
4086 /* Failed. But this could be another MIME preemble */
4088 mime_input_state.last--;
4094 c1 = (*i_getc)(f); mime_input_buf(mime_input_state.last++) = (unsigned char)c1;
4095 if (!(++i<MAXRECOVER) || c1==EOF) break;
4096 if (c1=='b'||c1=='B') {
4097 mime_decode_mode = 'B';
4098 } else if (c1=='q'||c1=='Q') {
4099 mime_decode_mode = 'Q';
4103 c1 = (*i_getc)(f); mime_input_buf(mime_input_state.last++) = (unsigned char)c1;
4104 if (!(++i<MAXRECOVER) || c1==EOF) break;
4106 mime_decode_mode = FALSE;
4112 if (!mime_decode_mode) {
4113 /* false MIME premble, restart from mime_buffer */
4114 mime_decode_mode = 1; /* no decode, but read from the mime_buffer */
4115 /* Since we are in MIME mode until buffer becomes empty, */
4116 /* we never go into mime_begin again for a while. */
4119 /* discard mime preemble, and goto MIME mode */
4120 mime_input_state.last = k;
4121 /* do no MIME integrity check */
4122 return c1; /* used only for checking EOF */
4133 debug(const char *str)
4136 fprintf(stderr, "%s\n", str ? str : "NULL");
4142 set_input_codename(const char *codename)
4144 if (!input_codename) {
4145 input_codename = codename;
4146 } else if (strcmp(codename, input_codename) != 0) {
4147 input_codename = "";
4152 get_guessed_code(void)
4154 if (input_codename && !*input_codename) {
4155 input_codename = "BINARY";
4157 struct input_code *p = find_inputcode_byfunc(iconv);
4158 if (!input_codename) {
4159 input_codename = "ASCII";
4160 } else if (strcmp(input_codename, "Shift_JIS") == 0) {
4161 if (p->score & (SCORE_DEPEND|SCORE_CP932))
4162 input_codename = "CP932";
4163 } else if (strcmp(input_codename, "EUC-JP") == 0) {
4164 if (p->score & (SCORE_X0212))
4165 input_codename = "EUCJP-MS";
4166 else if (p->score & (SCORE_DEPEND|SCORE_CP932))
4167 input_codename = "CP51932";
4168 } else if (strcmp(input_codename, "ISO-2022-JP") == 0) {
4169 if (p->score & (SCORE_KANA))
4170 input_codename = "CP50221";
4171 else if (p->score & (SCORE_DEPEND|SCORE_CP932))
4172 input_codename = "CP50220";
4175 return input_codename;
4178 #if !defined(PERL_XS) && !defined(WIN32DLL)
4180 print_guessed_code(char *filename)
4182 if (filename != NULL) printf("%s: ", filename);
4183 if (input_codename && !*input_codename) {
4186 input_codename = get_guessed_code();
4188 printf("%s\n", input_codename);
4192 input_eol == CR ? " (CR)" :
4193 input_eol == LF ? " (LF)" :
4194 input_eol == CRLF ? " (CRLF)" :
4195 input_eol == EOF ? " (MIXED NL)" :
4205 hex_getc(nkf_char ch, FILE *f, nkf_char (*g)(FILE *f), nkf_char (*u)(nkf_char c, FILE *f))
4207 nkf_char c1, c2, c3;
4213 if (!nkf_isxdigit(c2)){
4218 if (!nkf_isxdigit(c3)){
4223 return (hex2bin(c2) << 4) | hex2bin(c3);
4229 return hex_getc(':', f, i_cgetc, i_cungetc);
4233 cap_ungetc(nkf_char c, FILE *f)
4235 return (*i_cungetc)(c, f);
4241 return hex_getc('%', f, i_ugetc, i_uungetc);
4245 url_ungetc(nkf_char c, FILE *f)
4247 return (*i_uungetc)(c, f);
4251 #ifdef NUMCHAR_OPTION
4253 numchar_getc(FILE *f)
4255 nkf_char (*g)(FILE *) = i_ngetc;
4256 nkf_char (*u)(nkf_char c ,FILE *f) = i_nungetc;
4267 if (buf[i] == 'x' || buf[i] == 'X'){
4268 for (j = 0; j < 7; j++){
4270 if (!nkf_isxdigit(buf[i])){
4277 c |= hex2bin(buf[i]);
4280 for (j = 0; j < 8; j++){
4284 if (!nkf_isdigit(buf[i])){
4291 c += hex2bin(buf[i]);
4297 return nkf_char_unicode_new(c);
4307 numchar_ungetc(nkf_char c, FILE *f)
4309 return (*i_nungetc)(c, f);
4313 #ifdef UNICODE_NORMALIZATION
4318 nkf_char (*g)(FILE *f) = i_nfc_getc;
4319 nkf_char (*u)(nkf_char c ,FILE *f) = i_nfc_ungetc;
4320 nkf_buf_t *buf = nkf_state->nfc_buf;
4321 const unsigned char *array;
4322 int lower=0, upper=NORMALIZATION_TABLE_LENGTH-1;
4323 nkf_char c = (*g)(f);
4325 if (c == EOF || c > 0xFF || (c & 0xc0) == 0x80) return c;
4327 nkf_buf_push(buf, c);
4329 while (lower <= upper) {
4330 int mid = (lower+upper) / 2;
4332 array = normalization_table[mid].nfd;
4333 for (len=0; len < NORMALIZATION_TABLE_NFD_LENGTH && array[len]; len++) {
4334 if (len >= nkf_buf_length(buf)) {
4338 lower = 1, upper = 0;
4341 nkf_buf_push(buf, c);
4343 if (array[len] != nkf_buf_at(buf, len)) {
4344 if (array[len] < nkf_buf_at(buf, len)) lower = mid + 1;
4345 else upper = mid - 1;
4352 array = normalization_table[mid].nfc;
4354 for (i=0; i < NORMALIZATION_TABLE_NFC_LENGTH && array[i]; i++)
4355 nkf_buf_push(buf, array[i]);
4359 } while (lower <= upper);
4361 while (nkf_buf_length(buf) > 1) (*u)(nkf_buf_pop(buf), f);
4362 c = nkf_buf_pop(buf);
4368 nfc_ungetc(nkf_char c, FILE *f)
4370 return (*i_nfc_ungetc)(c, f);
4372 #endif /* UNICODE_NORMALIZATION */
4376 base64decode(nkf_char c)
4381 i = c - 'A'; /* A..Z 0-25 */
4382 } else if (c == '_') {
4383 i = '?' /* 63 */ ; /* _ 63 */
4385 i = c - 'G' /* - 'a' + 26 */ ; /* a..z 26-51 */
4387 } else if (c > '/') {
4388 i = c - '0' + '4' /* - '0' + 52 */ ; /* 0..9 52-61 */
4389 } else if (c == '+' || c == '-') {
4390 i = '>' /* 62 */ ; /* + and - 62 */
4392 i = '?' /* 63 */ ; /* / 63 */
4400 nkf_char c1, c2, c3, c4, cc;
4401 nkf_char t1, t2, t3, t4, mode, exit_mode;
4402 nkf_char lwsp_count;
4405 nkf_char lwsp_size = 128;
4407 if (mime_input_state.top != mime_input_state.last) { /* Something is in FIFO */
4408 return mime_input_buf(mime_input_state.top++);
4410 if (mime_decode_mode==1 ||mime_decode_mode==FALSE) {
4411 mime_decode_mode=FALSE;
4412 unswitch_mime_getc();
4413 return (*i_getc)(f);
4416 if (mimebuf_f == FIXED_MIME)
4417 exit_mode = mime_decode_mode;
4420 if (mime_decode_mode == 'Q') {
4421 if ((c1 = (*i_mgetc)(f)) == EOF) return (EOF);
4423 if (c1=='_' && mimebuf_f != FIXED_MIME) return SP;
4424 if (c1<=SP || DEL<=c1) {
4425 mime_decode_mode = exit_mode; /* prepare for quit */
4428 if (c1!='=' && (c1!='?' || mimebuf_f == FIXED_MIME)) {
4432 mime_decode_mode = exit_mode; /* prepare for quit */
4433 if ((c2 = (*i_mgetc)(f)) == EOF) return (EOF);
4434 if (c1=='?'&&c2=='=' && mimebuf_f != FIXED_MIME) {
4435 /* end Q encoding */
4436 input_mode = exit_mode;
4438 lwsp_buf = nkf_xmalloc((lwsp_size+5)*sizeof(char));
4439 while ((c1=(*i_getc)(f))!=EOF) {
4444 if ((c1=(*i_getc)(f))!=EOF && nkf_isblank(c1)) {
4452 if ((c1=(*i_getc)(f))!=EOF && c1 == LF) {
4453 if ((c1=(*i_getc)(f))!=EOF && nkf_isblank(c1)) {
4468 lwsp_buf[lwsp_count] = (unsigned char)c1;
4469 if (lwsp_count++>lwsp_size){
4471 lwsp_buf_new = nkf_xrealloc(lwsp_buf, (lwsp_size+5)*sizeof(char));
4472 lwsp_buf = lwsp_buf_new;
4478 if (lwsp_count > 0 && (c1 != '=' || (lwsp_buf[lwsp_count-1] != SP && lwsp_buf[lwsp_count-1] != TAB))) {
4480 for(lwsp_count--;lwsp_count>0;lwsp_count--)
4481 i_ungetc(lwsp_buf[lwsp_count],f);
4484 nkf_xfree(lwsp_buf);
4487 if (c1=='='&&c2<SP) { /* this is soft wrap */
4488 while((c1 = (*i_mgetc)(f)) <=SP) {
4489 if (c1 == EOF) return (EOF);
4491 mime_decode_mode = 'Q'; /* still in MIME */
4492 goto restart_mime_q;
4495 mime_decode_mode = 'Q'; /* still in MIME */
4499 if ((c3 = (*i_mgetc)(f)) == EOF) return (EOF);
4500 if (c2<=SP) return c2;
4501 mime_decode_mode = 'Q'; /* still in MIME */
4502 return ((hex2bin(c2)<<4) + hex2bin(c3));
4505 if (mime_decode_mode != 'B') {
4506 mime_decode_mode = FALSE;
4507 return (*i_mgetc)(f);
4511 /* Base64 encoding */
4513 MIME allows line break in the middle of
4514 Base64, but we are very pessimistic in decoding
4515 in unbuf mode because MIME encoded code may broken by
4516 less or editor's control sequence (such as ESC-[-K in unbuffered
4517 mode. ignore incomplete MIME.
4519 mode = mime_decode_mode;
4520 mime_decode_mode = exit_mode; /* prepare for quit */
4522 while ((c1 = (*i_mgetc)(f))<=SP) {
4527 if ((c2 = (*i_mgetc)(f))<=SP) {
4530 if (mime_f != STRICT_MIME) goto mime_c2_retry;
4531 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
4534 if ((c1 == '?') && (c2 == '=')) {
4537 lwsp_buf = nkf_xmalloc((lwsp_size+5)*sizeof(char));
4538 while ((c1=(*i_getc)(f))!=EOF) {
4543 if ((c1=(*i_getc)(f))!=EOF && nkf_isblank(c1)) {
4551 if ((c1=(*i_getc)(f))!=EOF) {
4555 } else if ((c1=(*i_getc)(f))!=EOF && nkf_isblank(c1)) {
4570 lwsp_buf[lwsp_count] = (unsigned char)c1;
4571 if (lwsp_count++>lwsp_size){
4573 lwsp_buf_new = nkf_xrealloc(lwsp_buf, (lwsp_size+5)*sizeof(char));
4574 lwsp_buf = lwsp_buf_new;
4580 if (lwsp_count > 0 && (c1 != '=' || (lwsp_buf[lwsp_count-1] != SP && lwsp_buf[lwsp_count-1] != TAB))) {
4582 for(lwsp_count--;lwsp_count>0;lwsp_count--)
4583 i_ungetc(lwsp_buf[lwsp_count],f);
4586 nkf_xfree(lwsp_buf);
4590 if ((c3 = (*i_mgetc)(f))<=SP) {
4593 if (mime_f != STRICT_MIME) goto mime_c3_retry;
4594 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
4598 if ((c4 = (*i_mgetc)(f))<=SP) {
4601 if (mime_f != STRICT_MIME) goto mime_c4_retry;
4602 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
4606 mime_decode_mode = mode; /* still in MIME sigh... */
4608 /* BASE 64 decoding */
4610 t1 = 0x3f & base64decode(c1);
4611 t2 = 0x3f & base64decode(c2);
4612 t3 = 0x3f & base64decode(c3);
4613 t4 = 0x3f & base64decode(c4);
4614 cc = ((t1 << 2) & 0x0fc) | ((t2 >> 4) & 0x03);
4616 mime_input_buf(mime_input_state.last++) = (unsigned char)cc;
4617 cc = ((t2 << 4) & 0x0f0) | ((t3 >> 2) & 0x0f);
4619 mime_input_buf(mime_input_state.last++) = (unsigned char)cc;
4620 cc = ((t3 << 6) & 0x0c0) | (t4 & 0x3f);
4622 mime_input_buf(mime_input_state.last++) = (unsigned char)cc;
4627 return mime_input_buf(mime_input_state.top++);
4630 static const char basis_64[] =
4631 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
4633 #define MIMEOUT_BUF_LENGTH 74
4635 char buf[MIMEOUT_BUF_LENGTH+1];
4639 /*nkf_char mime_lastchar2, mime_lastchar1;*/
4642 open_mime(nkf_char mode)
4644 const unsigned char *p;
4647 p = mime_pattern[0];
4648 for(i=0;mime_pattern[i];i++) {
4649 if (mode == mime_encode[i]) {
4650 p = mime_pattern[i];
4654 mimeout_mode = mime_encode_method[i];
4656 if (base64_count>45) {
4657 if (mimeout_state.count>0 && nkf_isblank(mimeout_state.buf[i])){
4658 (*o_mputc)(mimeout_state.buf[i]);
4661 put_newline(o_mputc);
4664 if (mimeout_state.count>0 && nkf_isspace(mimeout_state.buf[i])) {
4668 for (;i<mimeout_state.count;i++) {
4669 if (nkf_isspace(mimeout_state.buf[i])) {
4670 (*o_mputc)(mimeout_state.buf[i]);
4680 j = mimeout_state.count;
4681 mimeout_state.count = 0;
4683 mime_putc(mimeout_state.buf[i]);
4688 mime_prechar(nkf_char c2, nkf_char c1)
4690 if (mimeout_mode > 0){
4692 if (base64_count + mimeout_state.count/3*4> 73){
4693 (*o_base64conv)(EOF,0);
4694 oconv_newline(o_base64conv);
4695 (*o_base64conv)(0,SP);
4699 if ((c2 != 0 || c1 > DEL) && base64_count + mimeout_state.count/3*4> 66) {
4700 (*o_base64conv)(EOF,0);
4701 oconv_newline(o_base64conv);
4702 (*o_base64conv)(0,SP);
4708 if (c2 != EOF && base64_count + mimeout_state.count/3*4> 60) {
4709 mimeout_mode = (output_mode==ASCII ||output_mode == ISO_8859_1) ? 'Q' : 'B';
4710 open_mime(output_mode);
4711 (*o_base64conv)(EOF,0);
4712 oconv_newline(o_base64conv);
4713 (*o_base64conv)(0,SP);
4732 switch(mimeout_mode) {
4737 (*o_mputc)(basis_64[((nkf_state->mimeout_state & 0x3)<< 4)]);
4743 (*o_mputc)(basis_64[((nkf_state->mimeout_state & 0xF) << 2)]);
4748 if (mimeout_mode > 0) {
4749 if (mimeout_f!=FIXED_MIME) {
4751 } else if (mimeout_mode != 'Q')
4757 mimeout_addchar(nkf_char c)
4759 switch(mimeout_mode) {
4764 } else if(!nkf_isalnum(c)) {
4766 (*o_mputc)(bin2hex(((c>>4)&0xf)));
4767 (*o_mputc)(bin2hex((c&0xf)));
4775 nkf_state->mimeout_state=c;
4776 (*o_mputc)(basis_64[c>>2]);
4781 (*o_mputc)(basis_64[((nkf_state->mimeout_state & 0x3)<< 4) | ((c & 0xF0) >> 4)]);
4782 nkf_state->mimeout_state=c;
4787 (*o_mputc)(basis_64[((nkf_state->mimeout_state & 0xF) << 2) | ((c & 0xC0) >>6)]);
4788 (*o_mputc)(basis_64[c & 0x3F]);
4800 mime_putc(nkf_char c)
4805 if (mimeout_f == FIXED_MIME){
4806 if (mimeout_mode == 'Q'){
4807 if (base64_count > 71){
4808 if (c!=CR && c!=LF) {
4810 put_newline(o_mputc);
4815 if (base64_count > 71){
4817 put_newline(o_mputc);
4820 if (c == EOF) { /* c==EOF */
4824 if (c != EOF) { /* c==EOF */
4830 /* mimeout_f != FIXED_MIME */
4832 if (c == EOF) { /* c==EOF */
4833 if (mimeout_mode == -1 && mimeout_state.count > 1) open_mime(output_mode);
4834 j = mimeout_state.count;
4835 mimeout_state.count = 0;
4837 if (mimeout_mode > 0) {
4838 if (!nkf_isblank(mimeout_state.buf[j-1])) {
4840 if (nkf_isspace(mimeout_state.buf[i]) && base64_count < 71){
4843 mimeout_addchar(mimeout_state.buf[i]);
4847 mimeout_addchar(mimeout_state.buf[i]);
4851 mimeout_addchar(mimeout_state.buf[i]);
4857 mimeout_addchar(mimeout_state.buf[i]);
4863 if (mimeout_state.count > 0){
4864 lastchar = mimeout_state.buf[mimeout_state.count - 1];
4869 if (mimeout_mode=='Q') {
4870 if (c <= DEL && (output_mode==ASCII ||output_mode == ISO_8859_1)) {
4871 if (c == CR || c == LF) {
4876 } else if (c <= SP) {
4878 if (base64_count > 70) {
4879 put_newline(o_mputc);
4882 if (!nkf_isblank(c)) {
4887 if (base64_count > 70) {
4889 put_newline(o_mputc);
4892 open_mime(output_mode);
4894 if (!nkf_noescape_mime(c)) {
4907 if (mimeout_mode <= 0) {
4908 if (c <= DEL && (output_mode==ASCII || output_mode == ISO_8859_1 ||
4909 output_mode == UTF_8)) {
4910 if (nkf_isspace(c)) {
4912 if (mimeout_mode == -1) {
4915 if (c==CR || c==LF) {
4917 open_mime(output_mode);
4923 for (i=0;i<mimeout_state.count;i++) {
4924 (*o_mputc)(mimeout_state.buf[i]);
4925 if (mimeout_state.buf[i] == CR || mimeout_state.buf[i] == LF){
4936 mimeout_state.buf[0] = (char)c;
4937 mimeout_state.count = 1;
4939 if (base64_count > 1
4940 && base64_count + mimeout_state.count > 76
4941 && mimeout_state.buf[0] != CR && mimeout_state.buf[0] != LF){
4942 static const char *str = "boundary=\"";
4943 static int len = 10;
4946 for (; i < mimeout_state.count - len; ++i) {
4947 if (!strncmp(mimeout_state.buf+i, str, len)) {
4953 if (i == 0 || i == mimeout_state.count - len) {
4954 put_newline(o_mputc);
4956 if (!nkf_isspace(mimeout_state.buf[0])){
4963 for (j = 0; j <= i; ++j) {
4964 (*o_mputc)(mimeout_state.buf[j]);
4966 put_newline(o_mputc);
4968 for (; j <= mimeout_state.count; ++j) {
4969 mimeout_state.buf[j - i] = mimeout_state.buf[j];
4971 mimeout_state.count -= i;
4974 mimeout_state.buf[mimeout_state.count++] = (char)c;
4975 if (mimeout_state.count>MIMEOUT_BUF_LENGTH) {
4976 open_mime(output_mode);
4981 if (lastchar==CR || lastchar == LF){
4982 for (i=0;i<mimeout_state.count;i++) {
4983 (*o_mputc)(mimeout_state.buf[i]);
4986 mimeout_state.count = 0;
4989 for (i=0;i<mimeout_state.count-1;i++) {
4990 (*o_mputc)(mimeout_state.buf[i]);
4993 mimeout_state.buf[0] = SP;
4994 mimeout_state.count = 1;
4996 open_mime(output_mode);
4999 /* mimeout_mode == 'B', 1, 2 */
5000 if (c <= DEL && (output_mode==ASCII || output_mode == ISO_8859_1 ||
5001 output_mode == UTF_8)) {
5002 if (lastchar == CR || lastchar == LF){
5003 if (nkf_isblank(c)) {
5004 for (i=0;i<mimeout_state.count;i++) {
5005 mimeout_addchar(mimeout_state.buf[i]);
5007 mimeout_state.count = 0;
5010 for (i=0;i<mimeout_state.count;i++) {
5011 (*o_mputc)(mimeout_state.buf[i]);
5014 mimeout_state.count = 0;
5016 mimeout_state.buf[mimeout_state.count++] = (char)c;
5019 if (nkf_isspace(c)) {
5020 for (i=0;i<mimeout_state.count;i++) {
5021 if (SP<mimeout_state.buf[i] && mimeout_state.buf[i]<DEL) {
5023 for (i=0;i<mimeout_state.count;i++) {
5024 (*o_mputc)(mimeout_state.buf[i]);
5027 mimeout_state.count = 0;
5030 mimeout_state.buf[mimeout_state.count++] = (char)c;
5031 if (mimeout_state.count>MIMEOUT_BUF_LENGTH) {
5033 for (i=0;i<mimeout_state.count;i++) {
5034 (*o_mputc)(mimeout_state.buf[i]);
5037 mimeout_state.count = 0;
5041 if (mimeout_state.count>0 && SP<c && c!='=') {
5042 mimeout_state.buf[mimeout_state.count++] = (char)c;
5043 if (mimeout_state.count>MIMEOUT_BUF_LENGTH) {
5044 j = mimeout_state.count;
5045 mimeout_state.count = 0;
5047 mimeout_addchar(mimeout_state.buf[i]);
5054 if (mimeout_state.count>0) {
5055 j = mimeout_state.count;
5056 mimeout_state.count = 0;
5058 if (mimeout_state.buf[i]==CR || mimeout_state.buf[i]==LF)
5060 mimeout_addchar(mimeout_state.buf[i]);
5066 (*o_mputc)(mimeout_state.buf[i]);
5068 open_mime(output_mode);
5075 base64_conv(nkf_char c2, nkf_char c1)
5077 mime_prechar(c2, c1);
5078 (*o_base64conv)(c2,c1);
5082 typedef struct nkf_iconv_t {
5085 size_t input_buffer_size;
5086 char *output_buffer;
5087 size_t output_buffer_size;
5091 nkf_iconv_new(char *tocode, char *fromcode)
5093 nkf_iconv_t converter;
5095 converter->input_buffer_size = IOBUF_SIZE;
5096 converter->input_buffer = nkf_xmalloc(converter->input_buffer_size);
5097 converter->output_buffer_size = IOBUF_SIZE * 2;
5098 converter->output_buffer = nkf_xmalloc(converter->output_buffer_size);
5099 converter->cd = iconv_open(tocode, fromcode);
5100 if (converter->cd == (iconv_t)-1)
5104 perror(fprintf("iconv doesn't support %s to %s conversion.", fromcode, tocode));
5107 perror("can't iconv_open");
5113 nkf_iconv_convert(nkf_iconv_t *converter, FILE *input)
5115 size_t invalid = (size_t)0;
5116 char *input_buffer = converter->input_buffer;
5117 size_t input_length = (size_t)0;
5118 char *output_buffer = converter->output_buffer;
5119 size_t output_length = converter->output_buffer_size;
5124 while ((c = (*i_getc)(f)) != EOF) {
5125 input_buffer[input_length++] = c;
5126 if (input_length < converter->input_buffer_size) break;
5130 size_t ret = iconv(converter->cd, &input_buffer, &input_length, &output_buffer, &output_length);
5131 while (output_length-- > 0) {
5132 (*o_putc)(output_buffer[converter->output_buffer_size-output_length]);
5134 if (ret == (size_t) - 1) {
5137 if (input_buffer != converter->input_buffer)
5138 memmove(converter->input_buffer, input_buffer, input_length);
5141 converter->output_buffer_size *= 2;
5142 output_buffer = realloc(converter->outbuf, converter->output_buffer_size);
5143 if (output_buffer == NULL) {
5144 perror("can't realloc");
5147 converter->output_buffer = output_buffer;
5150 perror("can't iconv");
5163 nkf_iconv_close(nkf_iconv_t *convert)
5165 nkf_xfree(converter->inbuf);
5166 nkf_xfree(converter->outbuf);
5167 iconv_close(converter->cd);
5176 struct input_code *p = input_code_list;
5188 mime_f = MIME_DECODE_DEFAULT;
5189 mime_decode_f = FALSE;
5194 x0201_f = NKF_UNSPECIFIED;
5195 iso2022jp_f = FALSE;
5196 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
5197 ms_ucs_map_f = UCS_MAP_ASCII;
5199 #ifdef UTF8_INPUT_ENABLE
5200 no_cp932ext_f = FALSE;
5201 no_best_fit_chars_f = FALSE;
5202 encode_fallback = NULL;
5203 unicode_subchar = '?';
5204 input_endian = ENDIAN_BIG;
5206 #ifdef UTF8_OUTPUT_ENABLE
5207 output_bom_f = FALSE;
5208 output_endian = ENDIAN_BIG;
5210 #ifdef UNICODE_NORMALIZATION
5226 #ifdef SHIFTJIS_CP932
5236 for (i = 0; i < 256; i++){
5237 prefix_table[i] = 0;
5241 mimeout_state.count = 0;
5246 fold_preserve_f = FALSE;
5249 kanji_intro = DEFAULT_J;
5250 ascii_intro = DEFAULT_R;
5251 fold_margin = FOLD_MARGIN;
5252 o_zconv = no_connection;
5253 o_fconv = no_connection;
5254 o_eol_conv = no_connection;
5255 o_rot_conv = no_connection;
5256 o_hira_conv = no_connection;
5257 o_base64conv = no_connection;
5258 o_iso2022jp_check_conv = no_connection;
5261 i_ungetc = std_ungetc;
5263 i_bungetc = std_ungetc;
5266 i_mungetc = std_ungetc;
5267 i_mgetc_buf = std_getc;
5268 i_mungetc_buf = std_ungetc;
5269 output_mode = ASCII;
5271 mime_decode_mode = FALSE;
5277 z_prev2=0,z_prev1=0;
5279 iconv_for_check = 0;
5281 input_codename = NULL;
5282 input_encoding = NULL;
5283 output_encoding = NULL;
5291 module_connection(void)
5293 if (input_encoding) set_input_encoding(input_encoding);
5294 if (!output_encoding) {
5295 output_encoding = nkf_default_encoding();
5297 if (!output_encoding) {
5298 if (noout_f || guess_f) output_encoding = nkf_enc_from_index(ISO_2022_JP);
5301 set_output_encoding(output_encoding);
5302 oconv = nkf_enc_to_oconv(output_encoding);
5304 if (nkf_enc_unicode_p(output_encoding))
5305 output_mode = UTF_8;
5307 if (x0201_f == NKF_UNSPECIFIED) {
5308 x0201_f = X0201_DEFAULT;
5311 /* replace continucation module, from output side */
5313 /* output redicrection */
5315 if (noout_f || guess_f){
5322 if (mimeout_f == TRUE) {
5323 o_base64conv = oconv; oconv = base64_conv;
5325 /* base64_count = 0; */
5328 if (eolmode_f || guess_f) {
5329 o_eol_conv = oconv; oconv = eol_conv;
5332 o_rot_conv = oconv; oconv = rot_conv;
5335 o_iso2022jp_check_conv = oconv; oconv = iso2022jp_check_conv;
5338 o_hira_conv = oconv; oconv = hira_conv;
5341 o_fconv = oconv; oconv = fold_conv;
5344 if (alpha_f || x0201_f) {
5345 o_zconv = oconv; oconv = z_conv;
5349 i_ungetc = std_ungetc;
5350 /* input redicrection */
5353 i_cgetc = i_getc; i_getc = cap_getc;
5354 i_cungetc = i_ungetc; i_ungetc= cap_ungetc;
5357 i_ugetc = i_getc; i_getc = url_getc;
5358 i_uungetc = i_ungetc; i_ungetc= url_ungetc;
5361 #ifdef NUMCHAR_OPTION
5363 i_ngetc = i_getc; i_getc = numchar_getc;
5364 i_nungetc = i_ungetc; i_ungetc= numchar_ungetc;
5367 #ifdef UNICODE_NORMALIZATION
5369 i_nfc_getc = i_getc; i_getc = nfc_getc;
5370 i_nfc_ungetc = i_ungetc; i_ungetc= nfc_ungetc;
5373 if (mime_f && mimebuf_f==FIXED_MIME) {
5374 i_mgetc = i_getc; i_getc = mime_getc;
5375 i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
5378 i_bgetc = i_getc; i_getc = broken_getc;
5379 i_bungetc = i_ungetc; i_ungetc = broken_ungetc;
5381 if (input_encoding) {
5382 set_iconv(-TRUE, nkf_enc_to_iconv(input_encoding));
5384 set_iconv(FALSE, e_iconv);
5388 struct input_code *p = input_code_list;
5397 Conversion main loop. Code detection only.
5400 #if !defined(PERL_XS) && !defined(WIN32DLL)
5407 module_connection();
5408 while ((c = (*i_getc)(f)) != EOF)
5415 #define NEXT continue /* no output, get next */
5416 #define SKIP c2=0;continue /* no output, get next */
5417 #define MORE c2=c1;continue /* need one more byte */
5418 #define SEND ; /* output c1 and c2, get next */
5419 #define LAST break /* end of loop, go closing */
5420 #define set_input_mode(mode) do { \
5421 input_mode = mode; \
5423 set_input_codename("ISO-2022-JP"); \
5424 debug("ISO-2022-JP"); \
5428 kanji_convert(FILE *f)
5430 nkf_char c1=0, c2=0, c3=0, c4=0;
5431 int shift_mode = 0; /* 0, 1, 2, 3 */
5433 int is_8bit = FALSE;
5435 if (input_encoding && !nkf_enc_asciicompat(input_encoding)) {
5440 output_mode = ASCII;
5442 if (module_connection() < 0) {
5443 #if !defined(PERL_XS) && !defined(WIN32DLL)
5444 fprintf(stderr, "no output encoding given\n");
5450 #ifdef UTF8_INPUT_ENABLE
5451 if(iconv == w_iconv32){
5452 while ((c1 = (*i_getc)(f)) != EOF &&
5453 (c2 = (*i_getc)(f)) != EOF &&
5454 (c3 = (*i_getc)(f)) != EOF &&
5455 (c4 = (*i_getc)(f)) != EOF) {
5456 nkf_iconv_utf_32(c1, c2, c3, c4);
5460 else if (iconv == w_iconv16) {
5461 while ((c1 = (*i_getc)(f)) != EOF &&
5462 (c2 = (*i_getc)(f)) != EOF) {
5463 if (nkf_iconv_utf_16(c1, c2, 0, 0) == -2 &&
5464 (c3 = (*i_getc)(f)) != EOF &&
5465 (c4 = (*i_getc)(f)) != EOF) {
5466 nkf_iconv_utf_16(c1, c2, c3, c4);
5473 while ((c1 = (*i_getc)(f)) != EOF) {
5474 #ifdef INPUT_CODE_FIX
5475 if (!input_encoding)
5481 /* in case of 8th bit is on */
5482 if (!estab_f&&!mime_decode_mode) {
5483 /* in case of not established yet */
5484 /* It is still ambiguious */
5485 if (h_conv(f, c2, c1)==EOF) {
5493 /* in case of already established */
5495 /* ignore bogus code */
5503 /* 2nd byte of 7 bit code or SJIS */
5507 else if (nkf_char_unicode_p(c1)) {
5513 if (input_mode == JIS_X_0208 && DEL <= c1 && c1 < 0x92) {
5516 }else if (input_codename && input_codename[0] == 'I' &&
5517 0xA1 <= c1 && c1 <= 0xDF) {
5518 /* JIS X 0201 Katakana in 8bit JIS */
5519 c2 = JIS_X_0201_1976_K;
5522 } else if (c1 > DEL) {
5524 if (!estab_f && !iso8859_f) {
5525 /* not established yet */
5527 } else { /* estab_f==TRUE */
5533 else if ((iconv == s_iconv && 0xA0 <= c1 && c1 <= 0xDF) ||
5534 (ms_ucs_map_f == UCS_MAP_CP10001 && (c1 == 0xFD || c1 == 0xFE))) {
5536 c2 = JIS_X_0201_1976_K;
5541 /* already established */
5545 } else if (SP < c1 && c1 < DEL) {
5546 /* in case of Roman characters */
5548 /* output 1 shifted byte */
5552 } else if (nkf_byte_jisx0201_katakana_p(c1)){
5553 /* output 1 shifted byte */
5554 c2 = JIS_X_0201_1976_K;
5557 /* look like bogus code */
5560 } else if (input_mode == JIS_X_0208 || input_mode == JIS_X_0212 ||
5561 input_mode == JIS_X_0213_1 || input_mode == JIS_X_0213_2) {
5562 /* in case of Kanji shifted */
5564 } else if (c1 == '=' && mime_f && !mime_decode_mode) {
5565 /* Check MIME code */
5566 if ((c1 = (*i_getc)(f)) == EOF) {
5569 } else if (c1 == '?') {
5570 /* =? is mime conversion start sequence */
5571 if(mime_f == STRICT_MIME) {
5572 /* check in real detail */
5573 if (mime_begin_strict(f) == EOF)
5576 } else if (mime_begin(f) == EOF)
5585 /* normal ASCII code */
5588 } else if (c1 == SI && (!is_8bit || mime_decode_mode)) {
5591 } else if (c1 == SO && (!is_8bit || mime_decode_mode)) {
5594 } else if (c1 == ESC && (!is_8bit || mime_decode_mode)) {
5595 if ((c1 = (*i_getc)(f)) == EOF) {
5599 else if (c1 == '&') {
5601 if ((c1 = (*i_getc)(f)) == EOF) {
5607 else if (c1 == '$') {
5609 if ((c1 = (*i_getc)(f)) == EOF) {
5610 /* don't send bogus code
5612 (*oconv)(0, '$'); */
5614 } else if (c1 == '@' || c1 == 'B') {
5616 set_input_mode(JIS_X_0208);
5618 } else if (c1 == '(') {
5620 if ((c1 = (*i_getc)(f)) == EOF) {
5621 /* don't send bogus code
5627 } else if (c1 == '@'|| c1 == 'B') {
5629 set_input_mode(JIS_X_0208);
5632 } else if (c1 == 'D'){
5633 set_input_mode(JIS_X_0212);
5635 #endif /* X0212_ENABLE */
5636 } else if (c1 == 'O' || c1 == 'Q'){
5637 set_input_mode(JIS_X_0213_1);
5639 } else if (c1 == 'P'){
5640 set_input_mode(JIS_X_0213_2);
5643 /* could be some special code */
5650 } else if (broken_f&0x2) {
5651 /* accept any ESC-(-x as broken code ... */
5652 input_mode = JIS_X_0208;
5661 } else if (c1 == '(') {
5663 if ((c1 = (*i_getc)(f)) == EOF) {
5664 /* don't send bogus code
5666 (*oconv)(0, '('); */
5669 else if (c1 == 'I') {
5670 /* JIS X 0201 Katakana */
5671 set_input_mode(JIS_X_0201_1976_K);
5674 else if (c1 == 'B' || c1 == 'J' || c1 == 'H') {
5675 /* ISO-646IRV:1983 or JIS X 0201 Roman or JUNET */
5676 set_input_mode(ASCII);
5679 else if (broken_f&0x2) {
5680 set_input_mode(ASCII);
5689 else if (c1 == '.') {
5691 if ((c1 = (*i_getc)(f)) == EOF) {
5694 else if (c1 == 'A') {
5705 else if (c1 == 'N') {
5708 if (g2 == ISO_8859_1) {
5723 } else if (c1 == ESC && iconv == s_iconv) {
5724 /* ESC in Shift_JIS */
5725 if ((c1 = (*i_getc)(f)) == EOF) {
5728 } else if (c1 == '$') {
5730 if ((c1 = (*i_getc)(f)) == EOF) {
5732 } else if (('E' <= c1 && c1 <= 'G') ||
5733 ('O' <= c1 && c1 <= 'Q')) {
5741 static const nkf_char jphone_emoji_first_table[7] =
5742 {0xE1E0, 0xDFE0, 0xE2E0, 0xE3E0, 0xE4E0, 0xDFE0, 0xE0E0};
5743 c3 = nkf_char_unicode_new(jphone_emoji_first_table[c1 % 7]);
5744 if ((c1 = (*i_getc)(f)) == EOF) LAST;
5745 while (SP <= c1 && c1 <= 'z') {
5746 (*oconv)(0, c1 + c3);
5747 if ((c1 = (*i_getc)(f)) == EOF) LAST;
5762 } else if (c1 == LF || c1 == CR) {
5764 input_mode = ASCII; set_iconv(FALSE, 0);
5766 } else if (mime_decode_f && !mime_decode_mode){
5768 if ((c1=(*i_getc)(f))!=EOF && c1 == SP) {
5776 } else { /* if (c1 == CR)*/
5777 if ((c1=(*i_getc)(f))!=EOF) {
5781 } else if (c1 == LF && (c1=(*i_getc)(f))!=EOF && c1 == SP) {
5801 switch ((*iconv)(c2, c1, 0)) { /* can be EUC / SJIS / UTF-8 */
5804 if ((c3 = (*i_getc)(f)) != EOF) {
5807 if ((c4 = (*i_getc)(f)) != EOF) {
5809 (*iconv)(c2, c1, c3|c4);
5814 /* 3 bytes EUC or UTF-8 */
5815 if ((c3 = (*i_getc)(f)) != EOF) {
5817 (*iconv)(c2, c1, c3);
5825 0x7F <= c2 && c2 <= 0x92 &&
5826 0x21 <= c1 && c1 <= 0x7E) {
5828 c1 = nkf_char_unicode_new((c2 - 0x7F) * 94 + c1 - 0x21 + 0xE000);
5831 (*oconv)(c2, c1); /* this is JIS, not SJIS/EUC case */
5835 (*oconv)(PREFIX_EUCG3 | c2, c1);
5837 #endif /* X0212_ENABLE */
5839 (*oconv)(PREFIX_EUCG3 | c2, c1);
5842 (*oconv)(input_mode, c1); /* other special case */
5848 /* goto next_word */
5853 (*iconv)(EOF, 0, 0);
5854 if (!input_codename)
5857 struct input_code *p = input_code_list;
5858 struct input_code *result = p;
5860 if (p->score < result->score) result = p;
5863 set_input_codename(result->name);
5865 debug(result->name);
5873 * int options(unsigned char *cp)
5880 options(unsigned char *cp)
5884 unsigned char *cp_back = NULL;
5889 while(*cp && *cp++!='-');
5890 while (*cp || cp_back) {
5898 case '-': /* literal options */
5899 if (!*cp || *cp == SP) { /* ignore the rest of arguments */
5903 for (i=0;i<sizeof(long_option)/sizeof(long_option[0]);i++) {
5904 p = (unsigned char *)long_option[i].name;
5905 for (j=0;*p && *p != '=' && *p == cp[j];p++, j++);
5906 if (*p == cp[j] || cp[j] == SP){
5913 #if !defined(PERL_XS) && !defined(WIN32DLL)
5914 fprintf(stderr, "unknown long option: --%s\n", cp);
5918 while(*cp && *cp != SP && cp++);
5919 if (long_option[i].alias[0]){
5921 cp = (unsigned char *)long_option[i].alias;
5924 if (strcmp(long_option[i].name, "help") == 0){
5929 if (strcmp(long_option[i].name, "ic=") == 0){
5930 enc = nkf_enc_find((char *)p);
5932 input_encoding = enc;
5935 if (strcmp(long_option[i].name, "oc=") == 0){
5936 enc = nkf_enc_find((char *)p);
5937 /* if (enc <= 0) continue; */
5939 output_encoding = enc;
5942 if (strcmp(long_option[i].name, "guess=") == 0){
5943 if (p[0] == '0' || p[0] == '1') {
5951 if (strcmp(long_option[i].name, "overwrite") == 0){
5954 preserve_time_f = TRUE;
5957 if (strcmp(long_option[i].name, "overwrite=") == 0){
5960 preserve_time_f = TRUE;
5962 backup_suffix = (char *)p;
5965 if (strcmp(long_option[i].name, "in-place") == 0){
5968 preserve_time_f = FALSE;
5971 if (strcmp(long_option[i].name, "in-place=") == 0){
5974 preserve_time_f = FALSE;
5976 backup_suffix = (char *)p;
5981 if (strcmp(long_option[i].name, "cap-input") == 0){
5985 if (strcmp(long_option[i].name, "url-input") == 0){
5990 #ifdef NUMCHAR_OPTION
5991 if (strcmp(long_option[i].name, "numchar-input") == 0){
5997 if (strcmp(long_option[i].name, "no-output") == 0){
6001 if (strcmp(long_option[i].name, "debug") == 0){
6006 if (strcmp(long_option[i].name, "cp932") == 0){
6007 #ifdef SHIFTJIS_CP932
6011 #ifdef UTF8_OUTPUT_ENABLE
6012 ms_ucs_map_f = UCS_MAP_CP932;
6016 if (strcmp(long_option[i].name, "no-cp932") == 0){
6017 #ifdef SHIFTJIS_CP932
6021 #ifdef UTF8_OUTPUT_ENABLE
6022 ms_ucs_map_f = UCS_MAP_ASCII;
6026 #ifdef SHIFTJIS_CP932
6027 if (strcmp(long_option[i].name, "cp932inv") == 0){
6034 if (strcmp(long_option[i].name, "x0212") == 0){
6041 if (strcmp(long_option[i].name, "exec-in") == 0){
6045 if (strcmp(long_option[i].name, "exec-out") == 0){
6050 #if defined(UTF8_OUTPUT_ENABLE) && defined(UTF8_INPUT_ENABLE)
6051 if (strcmp(long_option[i].name, "no-cp932ext") == 0){
6052 no_cp932ext_f = TRUE;
6055 if (strcmp(long_option[i].name, "no-best-fit-chars") == 0){
6056 no_best_fit_chars_f = TRUE;
6059 if (strcmp(long_option[i].name, "fb-skip") == 0){
6060 encode_fallback = NULL;
6063 if (strcmp(long_option[i].name, "fb-html") == 0){
6064 encode_fallback = encode_fallback_html;
6067 if (strcmp(long_option[i].name, "fb-xml") == 0){
6068 encode_fallback = encode_fallback_xml;
6071 if (strcmp(long_option[i].name, "fb-java") == 0){
6072 encode_fallback = encode_fallback_java;
6075 if (strcmp(long_option[i].name, "fb-perl") == 0){
6076 encode_fallback = encode_fallback_perl;
6079 if (strcmp(long_option[i].name, "fb-subchar") == 0){
6080 encode_fallback = encode_fallback_subchar;
6083 if (strcmp(long_option[i].name, "fb-subchar=") == 0){
6084 encode_fallback = encode_fallback_subchar;
6085 unicode_subchar = 0;
6087 /* decimal number */
6088 for (i = 0; i < 7 && nkf_isdigit(p[i]); i++){
6089 unicode_subchar *= 10;
6090 unicode_subchar += hex2bin(p[i]);
6092 }else if(p[1] == 'x' || p[1] == 'X'){
6093 /* hexadecimal number */
6094 for (i = 2; i < 8 && nkf_isxdigit(p[i]); i++){
6095 unicode_subchar <<= 4;
6096 unicode_subchar |= hex2bin(p[i]);
6100 for (i = 1; i < 8 && nkf_isoctal(p[i]); i++){
6101 unicode_subchar *= 8;
6102 unicode_subchar += hex2bin(p[i]);
6105 w16e_conv(unicode_subchar, &i, &j);
6106 unicode_subchar = i<<8 | j;
6110 #ifdef UTF8_OUTPUT_ENABLE
6111 if (strcmp(long_option[i].name, "ms-ucs-map") == 0){
6112 ms_ucs_map_f = UCS_MAP_MS;
6116 #ifdef UNICODE_NORMALIZATION
6117 if (strcmp(long_option[i].name, "utf8mac-input") == 0){
6122 if (strcmp(long_option[i].name, "prefix=") == 0){
6123 if (nkf_isgraph(p[0])){
6124 for (i = 1; nkf_isgraph(p[i]); i++){
6125 prefix_table[p[i]] = p[0];
6130 #if !defined(PERL_XS) && !defined(WIN32DLL)
6131 fprintf(stderr, "unsupported long option: --%s\n", long_option[i].name);
6136 case 'b': /* buffered mode */
6139 case 'u': /* non bufferd mode */
6142 case 't': /* transparent mode */
6147 } else if (*cp=='2') {
6151 * nkf -t2MB hoge.bin | nkf -t2mB | diff -s - hoge.bin
6159 case 'j': /* JIS output */
6161 output_encoding = nkf_enc_from_index(ISO_2022_JP);
6163 case 'e': /* AT&T EUC output */
6164 output_encoding = nkf_enc_from_index(EUCJP_NKF);
6166 case 's': /* SJIS output */
6167 output_encoding = nkf_enc_from_index(SHIFT_JIS);
6169 case 'l': /* ISO8859 Latin-1 support, no conversion */
6170 iso8859_f = TRUE; /* Only compatible with ISO-2022-JP */
6171 input_encoding = nkf_enc_from_index(ISO_8859_1);
6173 case 'i': /* Kanji IN ESC-$-@/B */
6174 if (*cp=='@'||*cp=='B')
6175 kanji_intro = *cp++;
6177 case 'o': /* ASCII IN ESC-(-J/B/H */
6178 /* ESC ( H was used in initial JUNET messages */
6179 if (*cp=='J'||*cp=='B'||*cp=='H')
6180 ascii_intro = *cp++;
6184 bit:1 katakana->hiragana
6185 bit:2 hiragana->katakana
6187 if ('9'>= *cp && *cp>='0')
6188 hira_f |= (*cp++ -'0');
6195 #if defined(MSDOS) || defined(__OS2__)
6202 show_configuration();
6210 #ifdef UTF8_OUTPUT_ENABLE
6211 case 'w': /* UTF-{8,16,32} output */
6216 output_encoding = nkf_enc_from_index(UTF_8N);
6218 output_bom_f = TRUE;
6219 output_encoding = nkf_enc_from_index(UTF_8_BOM);
6223 if ('1'== cp[0] && '6'==cp[1]) {
6226 } else if ('3'== cp[0] && '2'==cp[1]) {
6230 output_encoding = nkf_enc_from_index(UTF_8);
6235 output_endian = ENDIAN_LITTLE;
6236 output_bom_f = TRUE;
6237 } else if (cp[0] == 'B') {
6239 output_bom_f = TRUE;
6242 output_bom_f = FALSE;
6244 enc_idx = enc_idx == UTF_16
6245 ? (output_endian == ENDIAN_LITTLE ? UTF_16LE : UTF_16BE)
6246 : (output_endian == ENDIAN_LITTLE ? UTF_32LE : UTF_32BE);
6248 enc_idx = enc_idx == UTF_16
6249 ? (output_endian == ENDIAN_LITTLE ? UTF_16LE_BOM : UTF_16BE_BOM)
6250 : (output_endian == ENDIAN_LITTLE ? UTF_32LE_BOM : UTF_32BE_BOM);
6252 output_encoding = nkf_enc_from_index(enc_idx);
6256 #ifdef UTF8_INPUT_ENABLE
6257 case 'W': /* UTF input */
6260 input_encoding = nkf_enc_from_index(UTF_8);
6263 if ('1'== cp[0] && '6'==cp[1]) {
6265 input_endian = ENDIAN_BIG;
6267 } else if ('3'== cp[0] && '2'==cp[1]) {
6269 input_endian = ENDIAN_BIG;
6272 input_encoding = nkf_enc_from_index(UTF_8);
6277 input_endian = ENDIAN_LITTLE;
6278 } else if (cp[0] == 'B') {
6280 input_endian = ENDIAN_BIG;
6282 enc_idx = (enc_idx == UTF_16
6283 ? (input_endian == ENDIAN_LITTLE ? UTF_16LE : UTF_16BE)
6284 : (input_endian == ENDIAN_LITTLE ? UTF_32LE : UTF_32BE));
6285 input_encoding = nkf_enc_from_index(enc_idx);
6289 /* Input code assumption */
6290 case 'J': /* ISO-2022-JP input */
6291 input_encoding = nkf_enc_from_index(ISO_2022_JP);
6293 case 'E': /* EUC-JP input */
6294 input_encoding = nkf_enc_from_index(EUCJP_NKF);
6296 case 'S': /* Shift_JIS input */
6297 input_encoding = nkf_enc_from_index(SHIFT_JIS);
6299 case 'Z': /* Convert X0208 alphabet to asii */
6301 bit:0 Convert JIS X 0208 Alphabet to ASCII
6302 bit:1 Convert Kankaku to one space
6303 bit:2 Convert Kankaku to two spaces
6304 bit:3 Convert HTML Entity
6305 bit:4 Convert JIS X 0208 Katakana to JIS X 0201 Katakana
6307 while ('0'<= *cp && *cp <='4') {
6308 alpha_f |= 1 << (*cp++ - '0');
6312 case 'x': /* Convert X0201 kana to X0208 or X0201 Conversion */
6313 x0201_f = FALSE; /* No X0201->X0208 conversion */
6315 ESC-(-I in JIS, EUC, MS Kanji
6316 SI/SO in JIS, EUC, MS Kanji
6317 SS2 in EUC, JIS, not in MS Kanji
6318 MS Kanji (0xa0-0xdf)
6320 ESC-(-I in JIS (0x20-0x5f)
6321 SS2 in EUC (0xa0-0xdf)
6322 0xa0-0xd in MS Kanji (0xa0-0xdf)
6325 case 'X': /* Convert X0201 kana to X0208 */
6328 case 'F': /* prserve new lines */
6329 fold_preserve_f = TRUE;
6330 case 'f': /* folding -f60 or -f */
6333 while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
6335 fold_len += *cp++ - '0';
6337 if (!(0<fold_len && fold_len<BUFSIZ))
6338 fold_len = DEFAULT_FOLD;
6342 while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
6344 fold_margin += *cp++ - '0';
6348 case 'm': /* MIME support */
6349 /* mime_decode_f = TRUE; */ /* this has too large side effects... */
6350 if (*cp=='B'||*cp=='Q') {
6351 mime_decode_mode = *cp++;
6352 mimebuf_f = FIXED_MIME;
6353 } else if (*cp=='N') {
6354 mime_f = TRUE; cp++;
6355 } else if (*cp=='S') {
6356 mime_f = STRICT_MIME; cp++;
6357 } else if (*cp=='0') {
6358 mime_decode_f = FALSE;
6359 mime_f = FALSE; cp++;
6361 mime_f = STRICT_MIME;
6364 case 'M': /* MIME output */
6367 mimeout_f = FIXED_MIME; cp++;
6368 } else if (*cp=='Q') {
6370 mimeout_f = FIXED_MIME; cp++;
6375 case 'B': /* Broken JIS support */
6377 bit:1 allow any x on ESC-(-x or ESC-$-x
6378 bit:2 reset to ascii on NL
6380 if ('9'>= *cp && *cp>='0')
6381 broken_f |= 1<<(*cp++ -'0');
6386 case 'O':/* for Output file */
6390 case 'c':/* add cr code */
6393 case 'd':/* delete cr code */
6396 case 'I': /* ISO-2022-JP output */
6399 case 'L': /* line mode */
6400 if (*cp=='u') { /* unix */
6401 eolmode_f = LF; cp++;
6402 } else if (*cp=='m') { /* mac */
6403 eolmode_f = CR; cp++;
6404 } else if (*cp=='w') { /* windows */
6405 eolmode_f = CRLF; cp++;
6406 } else if (*cp=='0') { /* no conversion */
6407 eolmode_f = 0; cp++;
6412 if ('2' <= *cp && *cp <= '9') {
6415 } else if (*cp == '0' || *cp == '1') {
6424 /* module muliple options in a string are allowed for Perl moudle */
6425 while(*cp && *cp++!='-');
6428 #if !defined(PERL_XS) && !defined(WIN32DLL)
6429 fprintf(stderr, "unknown option: -%c\n", *(cp-1));
6431 /* bogus option but ignored */
6439 #include "nkf32dll.c"
6440 #elif defined(PERL_XS)
6441 #else /* WIN32DLL */
6443 main(int argc, char **argv)
6448 char *outfname = NULL;
6451 #ifdef EASYWIN /*Easy Win */
6452 _BufferSize.y = 400;/*Set Scroll Buffer Size*/
6454 #ifdef DEFAULT_CODE_LOCALE
6455 setlocale(LC_CTYPE, "");
6459 for (argc--,argv++; (argc > 0) && **argv == '-'; argc--, argv++) {
6460 cp = (unsigned char *)*argv;
6465 if (pipe(fds) < 0 || (pid = fork()) < 0){
6476 execvp(argv[1], &argv[1]);
6493 int debug_f_back = debug_f;
6496 int exec_f_back = exec_f;
6499 int x0212_f_back = x0212_f;
6501 int x0213_f_back = x0213_f;
6502 int guess_f_back = guess_f;
6504 guess_f = guess_f_back;
6507 debug_f = debug_f_back;
6510 exec_f = exec_f_back;
6512 x0212_f = x0212_f_back;
6513 x0213_f = x0213_f_back;
6516 if (binmode_f == TRUE)
6517 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
6518 if (freopen("","wb",stdout) == NULL)
6525 setbuf(stdout, (char *) NULL);
6527 setvbuffer(stdout, (char *) stdobuf, IOBUF_SIZE);
6530 if (binmode_f == TRUE)
6531 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
6532 if (freopen("","rb",stdin) == NULL) return (-1);
6536 setvbuffer(stdin, (char *) stdibuf, IOBUF_SIZE);
6540 kanji_convert(stdin);
6541 if (guess_f) print_guessed_code(NULL);
6545 int is_argument_error = FALSE;
6547 input_codename = NULL;
6550 iconv_for_check = 0;
6552 if ((fin = fopen((origfname = *argv++), "r")) == NULL) {
6554 is_argument_error = TRUE;
6562 /* reopen file for stdout */
6563 if (file_out_f == TRUE) {
6566 outfname = nkf_xmalloc(strlen(origfname)
6567 + strlen(".nkftmpXXXXXX")
6569 strcpy(outfname, origfname);
6573 for (i = strlen(outfname); i; --i){
6574 if (outfname[i - 1] == '/'
6575 || outfname[i - 1] == '\\'){
6581 strcat(outfname, "ntXXXXXX");
6583 fd = open(outfname, O_WRONLY | O_CREAT | O_TRUNC | O_EXCL,
6584 S_IREAD | S_IWRITE);
6586 strcat(outfname, ".nkftmpXXXXXX");
6587 fd = mkstemp(outfname);
6590 || (fd_backup = dup(fileno(stdout))) < 0
6591 || dup2(fd, fileno(stdout)) < 0
6602 outfname = "nkf.out";
6605 if(freopen(outfname, "w", stdout) == NULL) {
6609 if (binmode_f == TRUE) {
6610 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
6611 if (freopen("","wb",stdout) == NULL)
6618 if (binmode_f == TRUE)
6619 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
6620 if (freopen("","rb",fin) == NULL)
6625 setvbuffer(fin, (char *) stdibuf, IOBUF_SIZE);
6629 char *filename = NULL;
6631 if (nfiles > 1) filename = origfname;
6632 if (guess_f) print_guessed_code(filename);
6638 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__) && !defined(__WATCOMC__) && !defined(__EMX__) && !defined(__OS2__) && !defined(__DJGPP__)
6646 if (dup2(fd_backup, fileno(stdout)) < 0){
6649 if (stat(origfname, &sb)) {
6650 fprintf(stderr, "Can't stat %s\n", origfname);
6652 /*
\e$B%Q!<%_%C%7%g%s$rI|85
\e(B */
6653 if (chmod(outfname, sb.st_mode)) {
6654 fprintf(stderr, "Can't set permission %s\n", outfname);
6657 /*
\e$B%?%$%`%9%?%s%W$rI|85
\e(B */
6658 if(preserve_time_f){
6659 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__) && !defined(__WATCOMC__) && !defined(__EMX__) && !defined(__OS2__) && !defined(__DJGPP__)
6660 tb[0] = tb[1] = sb.st_mtime;
6661 if (utime(outfname, tb)) {
6662 fprintf(stderr, "Can't set timestamp %s\n", outfname);
6665 tb.actime = sb.st_atime;
6666 tb.modtime = sb.st_mtime;
6667 if (utime(outfname, &tb)) {
6668 fprintf(stderr, "Can't set timestamp %s\n", outfname);
6673 char *backup_filename = get_backup_filename(backup_suffix, origfname);
6675 unlink(backup_filename);
6677 if (rename(origfname, backup_filename)) {
6678 perror(backup_filename);
6679 fprintf(stderr, "Can't rename %s to %s\n",
6680 origfname, backup_filename);
6682 nkf_xfree(backup_filename);
6685 if (unlink(origfname)){
6690 if (rename(outfname, origfname)) {
6692 fprintf(stderr, "Can't rename %s to %s\n",
6693 outfname, origfname);
6695 nkf_xfree(outfname);
6700 if (is_argument_error)
6703 #ifdef EASYWIN /*Easy Win */
6704 if (file_out_f == FALSE)
6705 scanf("%d",&end_check);
6708 #else /* for Other OS */
6709 if (file_out_f == TRUE)
6711 #endif /*Easy Win */
6714 #endif /* WIN32DLL */