2 * Copyright (c) 1987, Fujitsu LTD. (Itaru ICHIKAWA).
3 * Copyright (c) 1996-2010, The nkf Project.
5 * This software is provided 'as-is', without any express or implied
6 * warranty. In no event will the authors be held liable for any damages
7 * arising from the use of this software.
9 * Permission is granted to anyone to use this software for any purpose,
10 * including commercial applications, and to alter it and redistribute it
11 * freely, subject to the following restrictions:
13 * 1. The origin of this software must not be misrepresented; you must not
14 * claim that you wrote the original software. If you use this software
15 * in a product, an acknowledgment in the product documentation would be
16 * appreciated but is not required.
18 * 2. Altered source versions must be plainly marked as such, and must not be
19 * misrepresented as being the original software.
21 * 3. This notice may not be removed or altered from any source distribution.
23 #define NKF_VERSION "2.1.1"
24 #define NKF_RELEASE_DATE "2010-08-08"
26 "Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa).\n" \
27 "Copyright (C) 1996-2010, The nkf Project."
38 # define INCL_DOSERRORS
44 /* state of output_mode and input_mode
123 NKF_ENCODING_TABLE_SIZE,
124 JIS_X_0201_1976_K = 0x1013, /* I */ /* JIS C 6220-1969 */
125 /* JIS_X_0201_1976_R = 0x1014, */ /* J */ /* JIS C 6220-1969 */
126 /* JIS_X_0208_1978 = 0x1040, */ /* @ */ /* JIS C 6226-1978 */
127 /* JIS_X_0208_1983 = 0x1087, */ /* B */ /* JIS C 6226-1983 */
128 JIS_X_0208 = 0x1168, /* @B */
129 JIS_X_0212 = 0x1159, /* D */
130 /* JIS_X_0213_2000_1 = 0x1228, */ /* O */
131 JIS_X_0213_2 = 0x1229, /* P */
132 JIS_X_0213_1 = 0x1233 /* Q */
135 static nkf_char s_iconv(nkf_char c2, nkf_char c1, nkf_char c0);
136 static nkf_char e_iconv(nkf_char c2, nkf_char c1, nkf_char c0);
137 static nkf_char w_iconv(nkf_char c2, nkf_char c1, nkf_char c0);
138 static nkf_char w_iconv16(nkf_char c2, nkf_char c1, nkf_char c0);
139 static nkf_char w_iconv32(nkf_char c2, nkf_char c1, nkf_char c0);
140 static void j_oconv(nkf_char c2, nkf_char c1);
141 static void s_oconv(nkf_char c2, nkf_char c1);
142 static void e_oconv(nkf_char c2, nkf_char c1);
143 static void w_oconv(nkf_char c2, nkf_char c1);
144 static void w_oconv16(nkf_char c2, nkf_char c1);
145 static void w_oconv32(nkf_char c2, nkf_char c1);
149 nkf_char (*iconv)(nkf_char c2, nkf_char c1, nkf_char c0);
150 void (*oconv)(nkf_char c2, nkf_char c1);
151 } nkf_native_encoding;
153 nkf_native_encoding NkfEncodingASCII = { "ASCII", e_iconv, e_oconv };
154 nkf_native_encoding NkfEncodingISO_2022_JP = { "ISO-2022-JP", e_iconv, j_oconv };
155 nkf_native_encoding NkfEncodingShift_JIS = { "Shift_JIS", s_iconv, s_oconv };
156 nkf_native_encoding NkfEncodingEUC_JP = { "EUC-JP", e_iconv, e_oconv };
157 nkf_native_encoding NkfEncodingUTF_8 = { "UTF-8", w_iconv, w_oconv };
158 nkf_native_encoding NkfEncodingUTF_16 = { "UTF-16", w_iconv16, w_oconv16 };
159 nkf_native_encoding NkfEncodingUTF_32 = { "UTF-32", w_iconv32, w_oconv32 };
164 const nkf_native_encoding *base_encoding;
167 nkf_encoding nkf_encoding_table[] = {
168 {ASCII, "US-ASCII", &NkfEncodingASCII},
169 {ISO_8859_1, "ISO-8859-1", &NkfEncodingASCII},
170 {ISO_2022_JP, "ISO-2022-JP", &NkfEncodingISO_2022_JP},
171 {CP50220, "CP50220", &NkfEncodingISO_2022_JP},
172 {CP50221, "CP50221", &NkfEncodingISO_2022_JP},
173 {CP50222, "CP50222", &NkfEncodingISO_2022_JP},
174 {ISO_2022_JP_1, "ISO-2022-JP-1", &NkfEncodingISO_2022_JP},
175 {ISO_2022_JP_3, "ISO-2022-JP-3", &NkfEncodingISO_2022_JP},
176 {ISO_2022_JP_2004, "ISO-2022-JP-2004", &NkfEncodingISO_2022_JP},
177 {SHIFT_JIS, "Shift_JIS", &NkfEncodingShift_JIS},
178 {WINDOWS_31J, "Windows-31J", &NkfEncodingShift_JIS},
179 {CP10001, "CP10001", &NkfEncodingShift_JIS},
180 {EUC_JP, "EUC-JP", &NkfEncodingEUC_JP},
181 {EUCJP_NKF, "eucJP-nkf", &NkfEncodingEUC_JP},
182 {CP51932, "CP51932", &NkfEncodingEUC_JP},
183 {EUCJP_MS, "eucJP-MS", &NkfEncodingEUC_JP},
184 {EUCJP_ASCII, "eucJP-ASCII", &NkfEncodingEUC_JP},
185 {SHIFT_JISX0213, "Shift_JISX0213", &NkfEncodingShift_JIS},
186 {SHIFT_JIS_2004, "Shift_JIS-2004", &NkfEncodingShift_JIS},
187 {EUC_JISX0213, "EUC-JISX0213", &NkfEncodingEUC_JP},
188 {EUC_JIS_2004, "EUC-JIS-2004", &NkfEncodingEUC_JP},
189 {UTF_8, "UTF-8", &NkfEncodingUTF_8},
190 {UTF_8N, "UTF-8N", &NkfEncodingUTF_8},
191 {UTF_8_BOM, "UTF-8-BOM", &NkfEncodingUTF_8},
192 {UTF8_MAC, "UTF8-MAC", &NkfEncodingUTF_8},
193 {UTF_16, "UTF-16", &NkfEncodingUTF_16},
194 {UTF_16BE, "UTF-16BE", &NkfEncodingUTF_16},
195 {UTF_16BE_BOM, "UTF-16BE-BOM", &NkfEncodingUTF_16},
196 {UTF_16LE, "UTF-16LE", &NkfEncodingUTF_16},
197 {UTF_16LE_BOM, "UTF-16LE-BOM", &NkfEncodingUTF_16},
198 {UTF_32, "UTF-32", &NkfEncodingUTF_32},
199 {UTF_32BE, "UTF-32BE", &NkfEncodingUTF_32},
200 {UTF_32BE_BOM, "UTF-32BE-BOM", &NkfEncodingUTF_32},
201 {UTF_32LE, "UTF-32LE", &NkfEncodingUTF_32},
202 {UTF_32LE_BOM, "UTF-32LE-BOM", &NkfEncodingUTF_32},
203 {BINARY, "BINARY", &NkfEncodingASCII},
210 } encoding_name_to_id_table[] = {
215 {"ISO-2022-JP", ISO_2022_JP},
216 {"ISO2022JP-CP932", CP50220},
217 {"CP50220", CP50220},
218 {"CP50221", CP50221},
219 {"CSISO2022JP", CP50221},
220 {"CP50222", CP50222},
221 {"ISO-2022-JP-1", ISO_2022_JP_1},
222 {"ISO-2022-JP-3", ISO_2022_JP_3},
223 {"ISO-2022-JP-2004", ISO_2022_JP_2004},
224 {"SHIFT_JIS", SHIFT_JIS},
226 {"MS_Kanji", SHIFT_JIS},
228 {"WINDOWS-31J", WINDOWS_31J},
229 {"CSWINDOWS31J", WINDOWS_31J},
230 {"CP932", WINDOWS_31J},
231 {"MS932", WINDOWS_31J},
232 {"CP10001", CP10001},
235 {"EUCJP-NKF", EUCJP_NKF},
236 {"CP51932", CP51932},
237 {"EUC-JP-MS", EUCJP_MS},
238 {"EUCJP-MS", EUCJP_MS},
239 {"EUCJPMS", EUCJP_MS},
240 {"EUC-JP-ASCII", EUCJP_ASCII},
241 {"EUCJP-ASCII", EUCJP_ASCII},
242 {"SHIFT_JISX0213", SHIFT_JISX0213},
243 {"SHIFT_JIS-2004", SHIFT_JIS_2004},
244 {"EUC-JISX0213", EUC_JISX0213},
245 {"EUC-JIS-2004", EUC_JIS_2004},
248 {"UTF-8-BOM", UTF_8_BOM},
249 {"UTF8-MAC", UTF8_MAC},
250 {"UTF-8-MAC", UTF8_MAC},
252 {"UTF-16BE", UTF_16BE},
253 {"UTF-16BE-BOM", UTF_16BE_BOM},
254 {"UTF-16LE", UTF_16LE},
255 {"UTF-16LE-BOM", UTF_16LE_BOM},
257 {"UTF-32BE", UTF_32BE},
258 {"UTF-32BE-BOM", UTF_32BE_BOM},
259 {"UTF-32LE", UTF_32LE},
260 {"UTF-32LE-BOM", UTF_32LE_BOM},
265 #if defined(DEFAULT_CODE_JIS)
266 #define DEFAULT_ENCIDX ISO_2022_JP
267 #elif defined(DEFAULT_CODE_SJIS)
268 #define DEFAULT_ENCIDX SHIFT_JIS
269 #elif defined(DEFAULT_CODE_WINDOWS_31J)
270 #define DEFAULT_ENCIDX WINDOWS_31J
271 #elif defined(DEFAULT_CODE_EUC)
272 #define DEFAULT_ENCIDX EUC_JP
273 #elif defined(DEFAULT_CODE_UTF8)
274 #define DEFAULT_ENCIDX UTF_8
278 #define is_alnum(c) \
279 (('a'<=c && c<='z')||('A'<= c && c<='Z')||('0'<=c && c<='9'))
281 /* I don't trust portablity of toupper */
282 #define nkf_toupper(c) (('a'<=c && c<='z')?(c-('a'-'A')):c)
283 #define nkf_isoctal(c) ('0'<=c && c<='7')
284 #define nkf_isdigit(c) ('0'<=c && c<='9')
285 #define nkf_isxdigit(c) (nkf_isdigit(c) || ('a'<=c && c<='f') || ('A'<=c && c <= 'F'))
286 #define nkf_isblank(c) (c == SP || c == TAB)
287 #define nkf_isspace(c) (nkf_isblank(c) || c == CR || c == LF)
288 #define nkf_isalpha(c) (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'))
289 #define nkf_isalnum(c) (nkf_isdigit(c) || nkf_isalpha(c))
290 #define nkf_isprint(c) (SP<=c && c<='~')
291 #define nkf_isgraph(c) ('!'<=c && c<='~')
292 #define hex2bin(c) (('0'<=c&&c<='9') ? (c-'0') : \
293 ('A'<=c&&c<='F') ? (c-'A'+10) : \
294 ('a'<=c&&c<='f') ? (c-'a'+10) : 0)
295 #define bin2hex(c) ("0123456789ABCDEF"[c&15])
296 #define is_eucg3(c2) (((unsigned short)c2 >> 8) == SS3)
297 #define nkf_noescape_mime(c) ((c == CR) || (c == LF) || \
298 ((c > SP) && (c < DEL) && (c != '?') && (c != '=') && (c != '_') \
299 && (c != '(') && (c != ')') && (c != '.') && (c != 0x22)))
301 #define is_ibmext_in_sjis(c2) (CP932_TABLE_BEGIN <= c2 && c2 <= CP932_TABLE_END)
302 #define nkf_byte_jisx0201_katakana_p(c) (SP <= c && c <= 0x5F)
304 #define HOLD_SIZE 1024
305 #if defined(INT_IS_SHORT)
306 #define IOBUF_SIZE 2048
308 #define IOBUF_SIZE 16384
311 #define DEFAULT_J 'B'
312 #define DEFAULT_R 'B'
319 /* MIME preprocessor */
321 #ifdef EASYWIN /*Easy Win */
322 extern POINT _BufferSize;
331 void (*status_func)(struct input_code *, nkf_char);
332 nkf_char (*iconv_func)(nkf_char c2, nkf_char c1, nkf_char c0);
336 static const char *input_codename = NULL; /* NULL: unestablished, "": BINARY */
337 static nkf_encoding *input_encoding = NULL;
338 static nkf_encoding *output_encoding = NULL;
340 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
342 * 0: Shift_JIS, eucJP-ascii
347 #define UCS_MAP_ASCII 0
349 #define UCS_MAP_CP932 2
350 #define UCS_MAP_CP10001 3
351 static int ms_ucs_map_f = UCS_MAP_ASCII;
353 #ifdef UTF8_INPUT_ENABLE
354 /* no NEC special, NEC-selected IBM extended and IBM extended characters */
355 static int no_cp932ext_f = FALSE;
356 /* ignore ZERO WIDTH NO-BREAK SPACE */
357 static int no_best_fit_chars_f = FALSE;
358 static int input_endian = ENDIAN_BIG;
359 static nkf_char unicode_subchar = '?'; /* the regular substitution character */
360 static void (*encode_fallback)(nkf_char c) = NULL;
361 static void w_status(struct input_code *, nkf_char);
363 #ifdef UTF8_OUTPUT_ENABLE
364 static int output_bom_f = FALSE;
365 static int output_endian = ENDIAN_BIG;
368 static void std_putc(nkf_char c);
369 static nkf_char std_getc(FILE *f);
370 static nkf_char std_ungetc(nkf_char c,FILE *f);
372 static nkf_char broken_getc(FILE *f);
373 static nkf_char broken_ungetc(nkf_char c,FILE *f);
375 static nkf_char mime_getc(FILE *f);
377 static void mime_putc(nkf_char c);
381 #if !defined(PERL_XS) && !defined(WIN32DLL)
382 static unsigned char stdibuf[IOBUF_SIZE];
383 static unsigned char stdobuf[IOBUF_SIZE];
386 #define NKF_UNSPECIFIED (-TRUE)
389 static int unbuf_f = FALSE;
390 static int estab_f = FALSE;
391 static int nop_f = FALSE;
392 static int binmode_f = TRUE; /* binary mode */
393 static int rot_f = FALSE; /* rot14/43 mode */
394 static int hira_f = FALSE; /* hira/kata henkan */
395 static int alpha_f = FALSE; /* convert JIx0208 alphbet to ASCII */
396 static int mime_f = MIME_DECODE_DEFAULT; /* convert MIME B base64 or Q */
397 static int mime_decode_f = FALSE; /* mime decode is explicitly on */
398 static int mimebuf_f = FALSE; /* MIME buffered input */
399 static int broken_f = FALSE; /* convert ESC-less broken JIS */
400 static int iso8859_f = FALSE; /* ISO8859 through */
401 static int mimeout_f = FALSE; /* base64 mode */
402 static int x0201_f = NKF_UNSPECIFIED; /* convert JIS X 0201 */
403 static int iso2022jp_f = FALSE; /* replace non ISO-2022-JP with GETA */
405 #ifdef UNICODE_NORMALIZATION
406 static int nfc_f = FALSE;
407 static nkf_char (*i_nfc_getc)(FILE *) = std_getc; /* input of ugetc */
408 static nkf_char (*i_nfc_ungetc)(nkf_char c ,FILE *f) = std_ungetc;
412 static int cap_f = FALSE;
413 static nkf_char (*i_cgetc)(FILE *) = std_getc; /* input of cgetc */
414 static nkf_char (*i_cungetc)(nkf_char c ,FILE *f) = std_ungetc;
416 static int url_f = FALSE;
417 static nkf_char (*i_ugetc)(FILE *) = std_getc; /* input of ugetc */
418 static nkf_char (*i_uungetc)(nkf_char c ,FILE *f) = std_ungetc;
421 #define PREFIX_EUCG3 NKF_INT32_C(0x8F00)
422 #define CLASS_MASK NKF_INT32_C(0xFF000000)
423 #define CLASS_UNICODE NKF_INT32_C(0x01000000)
424 #define VALUE_MASK NKF_INT32_C(0x00FFFFFF)
425 #define UNICODE_BMP_MAX NKF_INT32_C(0x0000FFFF)
426 #define UNICODE_MAX NKF_INT32_C(0x0010FFFF)
427 #define nkf_char_euc3_new(c) ((c) | PREFIX_EUCG3)
428 #define nkf_char_unicode_new(c) ((c) | CLASS_UNICODE)
429 #define nkf_char_unicode_p(c) ((c & CLASS_MASK) == CLASS_UNICODE)
430 #define nkf_char_unicode_bmp_p(c) ((c & VALUE_MASK) <= UNICODE_BMP_MAX)
431 #define nkf_char_unicode_value_p(c) ((c & VALUE_MASK) <= UNICODE_MAX)
433 #ifdef NUMCHAR_OPTION
434 static int numchar_f = FALSE;
435 static nkf_char (*i_ngetc)(FILE *) = std_getc; /* input of ugetc */
436 static nkf_char (*i_nungetc)(nkf_char c ,FILE *f) = std_ungetc;
440 static int noout_f = FALSE;
441 static void no_putc(nkf_char c);
442 static int debug_f = FALSE;
443 static void debug(const char *str);
444 static nkf_char (*iconv_for_check)(nkf_char c2,nkf_char c1,nkf_char c0) = 0;
447 static int guess_f = 0; /* 0: OFF, 1: ON, 2: VERBOSE */
448 static void set_input_codename(const char *codename);
451 static int exec_f = 0;
454 #ifdef SHIFTJIS_CP932
455 /* invert IBM extended characters to others */
456 static int cp51932_f = FALSE;
458 /* invert NEC-selected IBM extended characters to IBM extended characters */
459 static int cp932inv_f = TRUE;
461 /* static nkf_char cp932_conv(nkf_char c2, nkf_char c1); */
462 #endif /* SHIFTJIS_CP932 */
464 static int x0212_f = FALSE;
465 static int x0213_f = FALSE;
467 static unsigned char prefix_table[256];
469 static void e_status(struct input_code *, nkf_char);
470 static void s_status(struct input_code *, nkf_char);
472 struct input_code input_code_list[] = {
473 {"EUC-JP", 0, 0, 0, {0, 0, 0}, e_status, e_iconv, 0},
474 {"Shift_JIS", 0, 0, 0, {0, 0, 0}, s_status, s_iconv, 0},
475 #ifdef UTF8_INPUT_ENABLE
476 {"UTF-8", 0, 0, 0, {0, 0, 0}, w_status, w_iconv, 0},
477 {"UTF-16", 0, 0, 0, {0, 0, 0}, NULL, w_iconv16, 0},
478 {"UTF-32", 0, 0, 0, {0, 0, 0}, NULL, w_iconv32, 0},
480 {NULL, 0, 0, 0, {0, 0, 0}, NULL, NULL, 0}
483 static int mimeout_mode = 0; /* 0, -1, 'Q', 'B', 1, 2 */
484 static int base64_count = 0;
486 /* X0208 -> ASCII converter */
489 static int f_line = 0; /* chars in line */
490 static int f_prev = 0;
491 static int fold_preserve_f = FALSE; /* preserve new lines */
492 static int fold_f = FALSE;
493 static int fold_len = 0;
496 static unsigned char kanji_intro = DEFAULT_J;
497 static unsigned char ascii_intro = DEFAULT_R;
501 #define FOLD_MARGIN 10
502 #define DEFAULT_FOLD 60
504 static int fold_margin = FOLD_MARGIN;
506 /* process default */
509 no_connection2(nkf_char c2, nkf_char c1, nkf_char c0)
511 fprintf(stderr,"nkf internal module connection failure.\n");
517 no_connection(nkf_char c2, nkf_char c1)
519 no_connection2(c2,c1,0);
522 static nkf_char (*iconv)(nkf_char c2,nkf_char c1,nkf_char c0) = no_connection2;
523 static void (*oconv)(nkf_char c2,nkf_char c1) = no_connection;
525 static void (*o_zconv)(nkf_char c2,nkf_char c1) = no_connection;
526 static void (*o_fconv)(nkf_char c2,nkf_char c1) = no_connection;
527 static void (*o_eol_conv)(nkf_char c2,nkf_char c1) = no_connection;
528 static void (*o_rot_conv)(nkf_char c2,nkf_char c1) = no_connection;
529 static void (*o_hira_conv)(nkf_char c2,nkf_char c1) = no_connection;
530 static void (*o_base64conv)(nkf_char c2,nkf_char c1) = no_connection;
531 static void (*o_iso2022jp_check_conv)(nkf_char c2,nkf_char c1) = no_connection;
533 /* static redirections */
535 static void (*o_putc)(nkf_char c) = std_putc;
537 static nkf_char (*i_getc)(FILE *f) = std_getc; /* general input */
538 static nkf_char (*i_ungetc)(nkf_char c,FILE *f) =std_ungetc;
540 static nkf_char (*i_bgetc)(FILE *) = std_getc; /* input of mgetc */
541 static nkf_char (*i_bungetc)(nkf_char c ,FILE *f) = std_ungetc;
543 static void (*o_mputc)(nkf_char c) = std_putc ; /* output of mputc */
545 static nkf_char (*i_mgetc)(FILE *) = std_getc; /* input of mgetc */
546 static nkf_char (*i_mungetc)(nkf_char c ,FILE *f) = std_ungetc;
548 /* for strict mime */
549 static nkf_char (*i_mgetc_buf)(FILE *) = std_getc; /* input of mgetc_buf */
550 static nkf_char (*i_mungetc_buf)(nkf_char c,FILE *f) = std_ungetc;
553 static int output_mode = ASCII; /* output kanji mode */
554 static int input_mode = ASCII; /* input kanji mode */
555 static int mime_decode_mode = FALSE; /* MIME mode B base64, Q hex */
557 /* X0201 / X0208 conversion tables */
559 /* X0201 kana conversion table */
561 static const unsigned char cv[]= {
562 0x21,0x21,0x21,0x23,0x21,0x56,0x21,0x57,
563 0x21,0x22,0x21,0x26,0x25,0x72,0x25,0x21,
564 0x25,0x23,0x25,0x25,0x25,0x27,0x25,0x29,
565 0x25,0x63,0x25,0x65,0x25,0x67,0x25,0x43,
566 0x21,0x3c,0x25,0x22,0x25,0x24,0x25,0x26,
567 0x25,0x28,0x25,0x2a,0x25,0x2b,0x25,0x2d,
568 0x25,0x2f,0x25,0x31,0x25,0x33,0x25,0x35,
569 0x25,0x37,0x25,0x39,0x25,0x3b,0x25,0x3d,
570 0x25,0x3f,0x25,0x41,0x25,0x44,0x25,0x46,
571 0x25,0x48,0x25,0x4a,0x25,0x4b,0x25,0x4c,
572 0x25,0x4d,0x25,0x4e,0x25,0x4f,0x25,0x52,
573 0x25,0x55,0x25,0x58,0x25,0x5b,0x25,0x5e,
574 0x25,0x5f,0x25,0x60,0x25,0x61,0x25,0x62,
575 0x25,0x64,0x25,0x66,0x25,0x68,0x25,0x69,
576 0x25,0x6a,0x25,0x6b,0x25,0x6c,0x25,0x6d,
577 0x25,0x6f,0x25,0x73,0x21,0x2b,0x21,0x2c,
581 /* X0201 kana conversion table for daguten */
583 static const unsigned char dv[]= {
584 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
585 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
586 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
587 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
588 0x00,0x00,0x00,0x00,0x00,0x00,0x25,0x74,
589 0x00,0x00,0x00,0x00,0x25,0x2c,0x25,0x2e,
590 0x25,0x30,0x25,0x32,0x25,0x34,0x25,0x36,
591 0x25,0x38,0x25,0x3a,0x25,0x3c,0x25,0x3e,
592 0x25,0x40,0x25,0x42,0x25,0x45,0x25,0x47,
593 0x25,0x49,0x00,0x00,0x00,0x00,0x00,0x00,
594 0x00,0x00,0x00,0x00,0x25,0x50,0x25,0x53,
595 0x25,0x56,0x25,0x59,0x25,0x5c,0x00,0x00,
596 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
597 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
598 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
599 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
602 /* X0201 kana conversion table for han-daguten */
604 static const unsigned char ev[]= {
605 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
606 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
607 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
608 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
609 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
610 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
611 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
612 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
613 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
614 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
615 0x00,0x00,0x00,0x00,0x25,0x51,0x25,0x54,
616 0x25,0x57,0x25,0x5a,0x25,0x5d,0x00,0x00,
617 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
618 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
619 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
620 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
624 /* X0208 kigou conversion table */
625 /* 0x8140 - 0x819e */
626 static const unsigned char fv[] = {
628 0x00,0x00,0x00,0x00,0x2c,0x2e,0x00,0x3a,
629 0x3b,0x3f,0x21,0x00,0x00,0x27,0x60,0x00,
630 0x5e,0x00,0x5f,0x00,0x00,0x00,0x00,0x00,
631 0x00,0x00,0x00,0x00,0x00,0x2d,0x00,0x2f,
632 0x5c,0x00,0x00,0x7c,0x00,0x00,0x60,0x27,
633 0x22,0x22,0x28,0x29,0x00,0x00,0x5b,0x5d,
634 0x7b,0x7d,0x3c,0x3e,0x00,0x00,0x00,0x00,
635 0x00,0x00,0x00,0x00,0x2b,0x2d,0x00,0x00,
636 0x00,0x3d,0x00,0x3c,0x3e,0x00,0x00,0x00,
637 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
638 0x24,0x00,0x00,0x25,0x23,0x26,0x2a,0x40,
639 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
644 static int option_mode = 0;
645 static int file_out_f = FALSE;
647 static int overwrite_f = FALSE;
648 static int preserve_time_f = FALSE;
649 static int backup_f = FALSE;
650 static char *backup_suffix = "";
653 static int eolmode_f = 0; /* CR, LF, CRLF */
654 static int input_eol = 0; /* 0: unestablished, EOF: MIXED */
655 static nkf_char prev_cr = 0; /* CR or 0 */
656 #ifdef EASYWIN /*Easy Win */
657 static int end_check;
661 nkf_xmalloc(size_t size)
665 if (size == 0) size = 1;
669 perror("can't malloc");
677 nkf_xrealloc(void *ptr, size_t size)
679 if (size == 0) size = 1;
681 ptr = realloc(ptr, size);
683 perror("can't realloc");
690 #define nkf_xfree(ptr) free(ptr)
693 nkf_str_caseeql(const char *src, const char *target)
696 for (i = 0; src[i] && target[i]; i++) {
697 if (nkf_toupper(src[i]) != nkf_toupper(target[i])) return FALSE;
699 if (src[i] || target[i]) return FALSE;
704 nkf_enc_from_index(int idx)
706 if (idx < 0 || NKF_ENCODING_TABLE_SIZE <= idx) {
709 return &nkf_encoding_table[idx];
713 nkf_enc_find_index(const char *name)
716 if (name[0] == 'X' && *(name+1) == '-') name += 2;
717 for (i = 0; encoding_name_to_id_table[i].id >= 0; i++) {
718 if (nkf_str_caseeql(encoding_name_to_id_table[i].name, name)) {
719 return encoding_name_to_id_table[i].id;
726 nkf_enc_find(const char *name)
729 idx = nkf_enc_find_index(name);
730 if (idx < 0) return 0;
731 return nkf_enc_from_index(idx);
734 #define nkf_enc_name(enc) (enc)->name
735 #define nkf_enc_to_index(enc) (enc)->id
736 #define nkf_enc_to_base_encoding(enc) (enc)->base_encoding
737 #define nkf_enc_to_iconv(enc) nkf_enc_to_base_encoding(enc)->iconv
738 #define nkf_enc_to_oconv(enc) nkf_enc_to_base_encoding(enc)->oconv
739 #define nkf_enc_asciicompat(enc) (\
740 nkf_enc_to_base_encoding(enc) == &NkfEncodingASCII ||\
741 nkf_enc_to_base_encoding(enc) == &NkfEncodingISO_2022_JP)
742 #define nkf_enc_unicode_p(enc) (\
743 nkf_enc_to_base_encoding(enc) == &NkfEncodingUTF_8 ||\
744 nkf_enc_to_base_encoding(enc) == &NkfEncodingUTF_16 ||\
745 nkf_enc_to_base_encoding(enc) == &NkfEncodingUTF_32)
746 #define nkf_enc_cp5022x_p(enc) (\
747 nkf_enc_to_index(enc) == CP50220 ||\
748 nkf_enc_to_index(enc) == CP50221 ||\
749 nkf_enc_to_index(enc) == CP50222)
751 #ifdef DEFAULT_CODE_LOCALE
755 #ifdef HAVE_LANGINFO_H
756 return nl_langinfo(CODESET);
757 #elif defined(__WIN32__)
759 sprintf(buf, "CP%d", GetACP());
761 #elif defined(__OS2__)
762 # if defined(INT_IS_SHORT)
768 ULONG ulCP[1], ulncp;
769 DosQueryCp(sizeof(ulCP), ulCP, &ulncp);
770 if (ulCP[0] == 932 || ulCP[0] == 943)
771 strcpy(buf, "Shift_JIS");
773 sprintf(buf, "CP%lu", ulCP[0]);
781 nkf_locale_encoding()
783 nkf_encoding *enc = 0;
784 const char *encname = nkf_locale_charmap();
786 enc = nkf_enc_find(encname);
789 #endif /* DEFAULT_CODE_LOCALE */
794 return &nkf_encoding_table[UTF_8];
798 nkf_default_encoding()
800 nkf_encoding *enc = 0;
801 #ifdef DEFAULT_CODE_LOCALE
802 enc = nkf_locale_encoding();
803 #elif defined(DEFAULT_ENCIDX)
804 enc = nkf_enc_from_index(DEFAULT_ENCIDX);
806 if (!enc) enc = nkf_utf8_encoding();
817 nkf_buf_new(int length)
819 nkf_buf_t *buf = nkf_xmalloc(sizeof(nkf_buf_t));
820 buf->ptr = nkf_xmalloc(sizeof(nkf_char) * length);
828 nkf_buf_dispose(nkf_buf_t *buf)
835 #define nkf_buf_length(buf) ((buf)->len)
836 #define nkf_buf_empty_p(buf) ((buf)->len == 0)
839 nkf_buf_at(nkf_buf_t *buf, int index)
841 assert(index <= buf->len);
842 return buf->ptr[index];
846 nkf_buf_clear(nkf_buf_t *buf)
852 nkf_buf_push(nkf_buf_t *buf, nkf_char c)
854 if (buf->capa <= buf->len) {
857 buf->ptr[buf->len++] = c;
861 nkf_buf_pop(nkf_buf_t *buf)
863 assert(!nkf_buf_empty_p(buf));
864 return buf->ptr[--buf->len];
867 /* Normalization Form C */
870 #define fprintf dllprintf
876 fprintf(HELP_OUTPUT,"Network Kanji Filter Version " NKF_VERSION " (" NKF_RELEASE_DATE ") \n" COPY_RIGHT "\n");
883 "Usage: nkf -[flags] [--] [in file] .. [out file for -O flag]\n"
884 #ifdef UTF8_OUTPUT_ENABLE
885 " j/s/e/w Specify output encoding ISO-2022-JP, Shift_JIS, EUC-JP\n"
886 " UTF options is -w[8[0],{16,32}[{B,L}[0]]]\n"
889 #ifdef UTF8_INPUT_ENABLE
890 " J/S/E/W Specify input encoding ISO-2022-JP, Shift_JIS, EUC-JP\n"
891 " UTF option is -W[8,[16,32][B,L]]\n"
893 " J/S/E Specify output encoding ISO-2022-JP, Shift_JIS, EUC-JP\n"
897 " m[BQSN0] MIME decode [B:base64,Q:quoted,S:strict,N:nonstrict,0:no decode]\n"
898 " M[BQ] MIME encode [B:base64 Q:quoted]\n"
899 " f/F Folding: -f60 or -f or -f60-10 (fold margin 10) F preserve nl\n"
902 " Z[0-4] Default/0: Convert JISX0208 Alphabet to ASCII\n"
903 " 1: Kankaku to one space 2: to two spaces 3: HTML Entity\n"
904 " 4: JISX0208 Katakana to JISX0201 Katakana\n"
905 " X,x Convert Halfwidth Katakana to Fullwidth or preserve it\n"
908 " O Output to File (DEFAULT 'nkf.out')\n"
909 " L[uwm] Line mode u:LF w:CRLF m:CR (DEFAULT noconversion)\n"
912 " --ic=<encoding> Specify the input encoding\n"
913 " --oc=<encoding> Specify the output encoding\n"
914 " --hiragana --katakana Hiragana/Katakana Conversion\n"
915 " --katakana-hiragana Converts each other\n"
919 " --{cap, url}-input Convert hex after ':' or '%%'\n"
921 #ifdef NUMCHAR_OPTION
922 " --numchar-input Convert Unicode Character Reference\n"
924 #ifdef UTF8_INPUT_ENABLE
925 " --fb-{skip, html, xml, perl, java, subchar}\n"
926 " Specify unassigned character's replacement\n"
931 " --in-place[=SUF] Overwrite original files\n"
932 " --overwrite[=SUF] Preserve timestamp of original files\n"
934 " -g --guess Guess the input code\n"
935 " -v --version Print the version\n"
936 " --help/-V Print this help / configuration\n"
942 show_configuration(void)
945 "Summary of my nkf " NKF_VERSION " (" NKF_RELEASE_DATE ") configuration:\n"
946 " Compile-time options:\n"
947 " Compiled at: " __DATE__ " " __TIME__ "\n"
950 " Default output encoding: "
951 #ifdef DEFAULT_CODE_LOCALE
952 "LOCALE (%s)\n", nkf_enc_name(nkf_default_encoding())
953 #elif defined(DEFAULT_ENCIDX)
954 "CONFIG (%s)\n", nkf_enc_name(nkf_default_encoding())
960 " Default output end of line: "
961 #if DEFAULT_NEWLINE == CR
963 #elif DEFAULT_NEWLINE == CRLF
969 " Decode MIME encoded string: "
970 #if MIME_DECODE_DEFAULT
976 " Convert JIS X 0201 Katakana: "
983 " --help, --version output: "
984 #if HELP_OUTPUT_HELP_OUTPUT
995 get_backup_filename(const char *suffix, const char *filename)
997 char *backup_filename;
998 int asterisk_count = 0;
1000 int filename_length = strlen(filename);
1002 for(i = 0; suffix[i]; i++){
1003 if(suffix[i] == '*') asterisk_count++;
1007 backup_filename = nkf_xmalloc(strlen(suffix) + (asterisk_count * (filename_length - 1)) + 1);
1008 for(i = 0, j = 0; suffix[i];){
1009 if(suffix[i] == '*'){
1010 backup_filename[j] = '\0';
1011 strncat(backup_filename, filename, filename_length);
1013 j += filename_length;
1015 backup_filename[j++] = suffix[i++];
1018 backup_filename[j] = '\0';
1020 j = filename_length + strlen(suffix);
1021 backup_filename = nkf_xmalloc(j + 1);
1022 strcpy(backup_filename, filename);
1023 strcat(backup_filename, suffix);
1024 backup_filename[j] = '\0';
1026 return backup_filename;
1030 #ifdef UTF8_INPUT_ENABLE
1032 nkf_each_char_to_hex(void (*f)(nkf_char c2,nkf_char c1), nkf_char c)
1037 if(c >= NKF_INT32_C(1)<<shift){
1039 (*f)(0, bin2hex(c>>shift));
1050 encode_fallback_html(nkf_char c)
1055 if(c >= NKF_INT32_C(1000000))
1056 (*oconv)(0, 0x30+(c/NKF_INT32_C(1000000))%10);
1057 if(c >= NKF_INT32_C(100000))
1058 (*oconv)(0, 0x30+(c/NKF_INT32_C(100000) )%10);
1060 (*oconv)(0, 0x30+(c/10000 )%10);
1062 (*oconv)(0, 0x30+(c/1000 )%10);
1064 (*oconv)(0, 0x30+(c/100 )%10);
1066 (*oconv)(0, 0x30+(c/10 )%10);
1068 (*oconv)(0, 0x30+ c %10);
1074 encode_fallback_xml(nkf_char c)
1079 nkf_each_char_to_hex(oconv, c);
1085 encode_fallback_java(nkf_char c)
1089 if(!nkf_char_unicode_bmp_p(c)){
1093 (*oconv)(0, bin2hex(c>>20));
1094 (*oconv)(0, bin2hex(c>>16));
1098 (*oconv)(0, bin2hex(c>>12));
1099 (*oconv)(0, bin2hex(c>> 8));
1100 (*oconv)(0, bin2hex(c>> 4));
1101 (*oconv)(0, bin2hex(c ));
1106 encode_fallback_perl(nkf_char c)
1111 nkf_each_char_to_hex(oconv, c);
1117 encode_fallback_subchar(nkf_char c)
1119 c = unicode_subchar;
1120 (*oconv)((c>>8)&0xFF, c&0xFF);
1125 static const struct {
1149 {"katakana-hiragana","h3"},
1157 #ifdef UTF8_OUTPUT_ENABLE
1167 {"fb-subchar=", ""},
1169 #ifdef UTF8_INPUT_ENABLE
1170 {"utf8-input", "W"},
1171 {"utf16-input", "W16"},
1172 {"no-cp932ext", ""},
1173 {"no-best-fit-chars",""},
1175 #ifdef UNICODE_NORMALIZATION
1176 {"utf8mac-input", ""},
1188 #ifdef NUMCHAR_OPTION
1189 {"numchar-input", ""},
1195 #ifdef SHIFTJIS_CP932
1206 set_input_encoding(nkf_encoding *enc)
1208 switch (nkf_enc_to_index(enc)) {
1214 if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE; /* -x specified implicitly */
1216 #ifdef SHIFTJIS_CP932
1219 #ifdef UTF8_OUTPUT_ENABLE
1220 ms_ucs_map_f = UCS_MAP_CP932;
1230 case ISO_2022_JP_2004:
1237 if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE; /* -x specified implicitly */
1238 #ifdef SHIFTJIS_CP932
1241 #ifdef UTF8_OUTPUT_ENABLE
1242 ms_ucs_map_f = UCS_MAP_CP932;
1247 #ifdef SHIFTJIS_CP932
1250 #ifdef UTF8_OUTPUT_ENABLE
1251 ms_ucs_map_f = UCS_MAP_CP10001;
1259 if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE; /* -x specified implicitly */
1260 #ifdef SHIFTJIS_CP932
1263 #ifdef UTF8_OUTPUT_ENABLE
1264 ms_ucs_map_f = UCS_MAP_CP932;
1268 if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE; /* -x specified implicitly */
1269 #ifdef SHIFTJIS_CP932
1272 #ifdef UTF8_OUTPUT_ENABLE
1273 ms_ucs_map_f = UCS_MAP_MS;
1277 if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE; /* -x specified implicitly */
1278 #ifdef SHIFTJIS_CP932
1281 #ifdef UTF8_OUTPUT_ENABLE
1282 ms_ucs_map_f = UCS_MAP_ASCII;
1285 case SHIFT_JISX0213:
1286 case SHIFT_JIS_2004:
1288 #ifdef SHIFTJIS_CP932
1295 #ifdef SHIFTJIS_CP932
1299 #ifdef UTF8_INPUT_ENABLE
1300 #ifdef UNICODE_NORMALIZATION
1308 input_endian = ENDIAN_BIG;
1312 input_endian = ENDIAN_LITTLE;
1317 input_endian = ENDIAN_BIG;
1321 input_endian = ENDIAN_LITTLE;
1328 set_output_encoding(nkf_encoding *enc)
1330 switch (nkf_enc_to_index(enc)) {
1332 #ifdef SHIFTJIS_CP932
1333 if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1335 #ifdef UTF8_OUTPUT_ENABLE
1336 ms_ucs_map_f = UCS_MAP_CP932;
1340 if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE; /* -x specified implicitly */
1341 #ifdef SHIFTJIS_CP932
1342 if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1344 #ifdef UTF8_OUTPUT_ENABLE
1345 ms_ucs_map_f = UCS_MAP_CP932;
1349 #ifdef SHIFTJIS_CP932
1350 if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1355 #ifdef SHIFTJIS_CP932
1356 if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1362 #ifdef SHIFTJIS_CP932
1363 if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1369 if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE; /* -x specified implicitly */
1370 #ifdef UTF8_OUTPUT_ENABLE
1371 ms_ucs_map_f = UCS_MAP_CP932;
1375 #ifdef UTF8_OUTPUT_ENABLE
1376 ms_ucs_map_f = UCS_MAP_CP10001;
1381 #ifdef SHIFTJIS_CP932
1382 if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1384 #ifdef UTF8_OUTPUT_ENABLE
1385 ms_ucs_map_f = UCS_MAP_ASCII;
1390 #ifdef SHIFTJIS_CP932
1391 if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1393 #ifdef UTF8_OUTPUT_ENABLE
1394 ms_ucs_map_f = UCS_MAP_ASCII;
1398 if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE; /* -x specified implicitly */
1399 #ifdef SHIFTJIS_CP932
1400 if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1402 #ifdef UTF8_OUTPUT_ENABLE
1403 ms_ucs_map_f = UCS_MAP_CP932;
1407 if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE; /* -x specified implicitly */
1409 #ifdef UTF8_OUTPUT_ENABLE
1410 ms_ucs_map_f = UCS_MAP_MS;
1414 if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE; /* -x specified implicitly */
1416 #ifdef UTF8_OUTPUT_ENABLE
1417 ms_ucs_map_f = UCS_MAP_ASCII;
1420 case SHIFT_JISX0213:
1421 case SHIFT_JIS_2004:
1423 #ifdef SHIFTJIS_CP932
1424 if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1431 #ifdef SHIFTJIS_CP932
1432 if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1435 #ifdef UTF8_OUTPUT_ENABLE
1437 output_bom_f = TRUE;
1441 output_bom_f = TRUE;
1444 output_endian = ENDIAN_LITTLE;
1445 output_bom_f = FALSE;
1448 output_endian = ENDIAN_LITTLE;
1449 output_bom_f = TRUE;
1453 output_bom_f = TRUE;
1456 output_endian = ENDIAN_LITTLE;
1457 output_bom_f = FALSE;
1460 output_endian = ENDIAN_LITTLE;
1461 output_bom_f = TRUE;
1467 static struct input_code*
1468 find_inputcode_byfunc(nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0))
1471 struct input_code *p = input_code_list;
1473 if (iconv_func == p->iconv_func){
1483 set_iconv(nkf_char f, nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0))
1485 #ifdef INPUT_CODE_FIX
1486 if (f || !input_encoding)
1493 #ifdef INPUT_CODE_FIX
1494 && (f == -TRUE || !input_encoding) /* -TRUE means "FORCE" */
1500 if (estab_f && iconv_for_check != iconv){
1501 struct input_code *p = find_inputcode_byfunc(iconv);
1503 set_input_codename(p->name);
1506 iconv_for_check = iconv;
1513 x0212_shift(nkf_char c)
1518 if (0x75 <= c && c <= 0x7f){
1519 ret = c + (0x109 - 0x75);
1522 if (0x75 <= c && c <= 0x7f){
1523 ret = c + (0x113 - 0x75);
1531 x0212_unshift(nkf_char c)
1534 if (0x7f <= c && c <= 0x88){
1535 ret = c + (0x75 - 0x7f);
1536 }else if (0x89 <= c && c <= 0x92){
1537 ret = PREFIX_EUCG3 | 0x80 | (c + (0x75 - 0x89));
1541 #endif /* X0212_ENABLE */
1544 e2s_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1)
1550 if((0x21 <= ndx && ndx <= 0x2F)){
1551 if (p2) *p2 = ((ndx - 1) >> 1) + 0xec - ndx / 8 * 3;
1552 if (p1) *p1 = c1 + ((ndx & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
1554 }else if(0x6E <= ndx && ndx <= 0x7E){
1555 if (p2) *p2 = ((ndx - 1) >> 1) + 0xbe;
1556 if (p1) *p1 = c1 + ((ndx & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
1562 else if(nkf_isgraph(ndx)){
1564 const unsigned short *ptr;
1565 ptr = x0212_shiftjis[ndx - 0x21];
1567 val = ptr[(c1 & 0x7f) - 0x21];
1576 c2 = x0212_shift(c2);
1578 #endif /* X0212_ENABLE */
1580 if(0x7F < c2) return 1;
1581 if (p2) *p2 = ((c2 - 1) >> 1) + ((c2 <= 0x5e) ? 0x71 : 0xb1);
1582 if (p1) *p1 = c1 + ((c2 & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
1587 s2e_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1)
1589 #if defined(SHIFTJIS_CP932) || defined(X0212_ENABLE)
1592 static const char shift_jisx0213_s1a3_table[5][2] ={ { 1, 8}, { 3, 4}, { 5,12}, {13,14}, {15, 0} };
1593 if (0xFC < c1) return 1;
1594 #ifdef SHIFTJIS_CP932
1595 if (!cp932inv_f && is_ibmext_in_sjis(c2)){
1596 val = shiftjis_cp932[c2 - CP932_TABLE_BEGIN][c1 - 0x40];
1603 && CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
1604 val = cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40];
1610 #endif /* SHIFTJIS_CP932 */
1612 if (!x0213_f && is_ibmext_in_sjis(c2)){
1613 val = shiftjis_x0212[c2 - 0xfa][c1 - 0x40];
1616 c2 = PREFIX_EUCG3 | ((val >> 8) & 0x7f);
1629 if(x0213_f && c2 >= 0xF0){
1630 if(c2 <= 0xF3 || (c2 == 0xF4 && c1 < 0x9F)){ /* k=1, 3<=k<=5, k=8, 12<=k<=15 */
1631 c2 = PREFIX_EUCG3 | 0x20 | shift_jisx0213_s1a3_table[c2 - 0xF0][0x9E < c1];
1632 }else{ /* 78<=k<=94 */
1633 c2 = PREFIX_EUCG3 | (c2 * 2 - 0x17B);
1634 if (0x9E < c1) c2++;
1637 #define SJ0162 0x00e1 /* 01 - 62 ku offset */
1638 #define SJ6394 0x0161 /* 63 - 94 ku offset */
1639 c2 = c2 + c2 - ((c2 <= 0x9F) ? SJ0162 : SJ6394);
1640 if (0x9E < c1) c2++;
1643 c1 = c1 - ((c1 > DEL) ? SP : 0x1F);
1650 c2 = x0212_unshift(c2);
1657 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
1659 nkf_unicode_to_utf8(nkf_char val, nkf_char *p1, nkf_char *p2, nkf_char *p3, nkf_char *p4)
1667 }else if (val < 0x800){
1668 *p1 = 0xc0 | (val >> 6);
1669 *p2 = 0x80 | (val & 0x3f);
1672 } else if (nkf_char_unicode_bmp_p(val)) {
1673 *p1 = 0xe0 | (val >> 12);
1674 *p2 = 0x80 | ((val >> 6) & 0x3f);
1675 *p3 = 0x80 | ( val & 0x3f);
1677 } else if (nkf_char_unicode_value_p(val)) {
1678 *p1 = 0xf0 | (val >> 18);
1679 *p2 = 0x80 | ((val >> 12) & 0x3f);
1680 *p3 = 0x80 | ((val >> 6) & 0x3f);
1681 *p4 = 0x80 | ( val & 0x3f);
1691 nkf_utf8_to_unicode(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4)
1698 else if (c1 <= 0xC3) {
1699 /* trail byte or invalid */
1702 else if (c1 <= 0xDF) {
1704 wc = (c1 & 0x1F) << 6;
1707 else if (c1 <= 0xEF) {
1709 wc = (c1 & 0x0F) << 12;
1710 wc |= (c2 & 0x3F) << 6;
1713 else if (c2 <= 0xF4) {
1715 wc = (c1 & 0x0F) << 18;
1716 wc |= (c2 & 0x3F) << 12;
1717 wc |= (c3 & 0x3F) << 6;
1727 #ifdef UTF8_INPUT_ENABLE
1729 unicode_to_jis_common2(nkf_char c1, nkf_char c0,
1730 const unsigned short *const *pp, nkf_char psize,
1731 nkf_char *p2, nkf_char *p1)
1734 const unsigned short *p;
1737 if (pp == 0) return 1;
1740 if (c1 < 0 || psize <= c1) return 1;
1742 if (p == 0) return 1;
1745 if (c0 < 0 || sizeof_utf8_to_euc_C2 <= c0) return 1;
1747 if (val == 0) return 1;
1748 if (no_cp932ext_f && (
1749 (val>>8) == 0x2D || /* NEC special characters */
1750 val > NKF_INT32_C(0xF300) /* IBM extended characters */
1758 if (c2 == SO) c2 = JIS_X_0201_1976_K;
1766 unicode_to_jis_common(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1)
1768 const unsigned short *const *pp;
1769 const unsigned short *const *const *ppp;
1770 static const char no_best_fit_chars_table_C2[] =
1771 {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1772 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1773 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 2, 1, 1, 2,
1774 0, 0, 1, 1, 0, 1, 0, 1, 2, 1, 1, 1, 1, 1, 1, 1};
1775 static const char no_best_fit_chars_table_C2_ms[] =
1776 {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1777 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1778 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0,
1779 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0};
1780 static const char no_best_fit_chars_table_932_C2[] =
1781 {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1782 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1783 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1,
1784 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0};
1785 static const char no_best_fit_chars_table_932_C3[] =
1786 {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1787 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
1788 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1789 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1};
1795 }else if(c2 < 0xe0){
1796 if(no_best_fit_chars_f){
1797 if(ms_ucs_map_f == UCS_MAP_CP932){
1800 if(no_best_fit_chars_table_932_C2[c1&0x3F]) return 1;
1803 if(no_best_fit_chars_table_932_C3[c1&0x3F]) return 1;
1806 }else if(!cp932inv_f){
1809 if(no_best_fit_chars_table_C2[c1&0x3F]) return 1;
1812 if(no_best_fit_chars_table_932_C3[c1&0x3F]) return 1;
1815 }else if(ms_ucs_map_f == UCS_MAP_MS){
1816 if(c2 == 0xC2 && no_best_fit_chars_table_C2_ms[c1&0x3F]) return 1;
1817 }else if(ms_ucs_map_f == UCS_MAP_CP10001){
1835 ms_ucs_map_f == UCS_MAP_CP932 ? utf8_to_euc_2bytes_932 :
1836 ms_ucs_map_f == UCS_MAP_MS ? utf8_to_euc_2bytes_ms :
1837 ms_ucs_map_f == UCS_MAP_CP10001 ? utf8_to_euc_2bytes_mac :
1839 ret = unicode_to_jis_common2(c2, c1, pp, sizeof_utf8_to_euc_2bytes, p2, p1);
1840 }else if(c0 < 0xF0){
1841 if(no_best_fit_chars_f){
1842 if(ms_ucs_map_f == UCS_MAP_CP932){
1843 if(c2 == 0xE3 && c1 == 0x82 && c0 == 0x94) return 1;
1844 }else if(ms_ucs_map_f == UCS_MAP_MS){
1849 if(c0 == 0x94 || c0 == 0x96 || c0 == 0xBE) return 1;
1852 if(c0 == 0x92) return 1;
1857 if(c1 == 0x80 || c0 == 0x9C) return 1;
1860 }else if(ms_ucs_map_f == UCS_MAP_CP10001){
1865 if(c0 == 0x94) return 1;
1868 if(c0 == 0xBB) return 1;
1878 if(c0 == 0x95) return 1;
1881 if(c0 == 0xA5) return 1;
1888 if(c0 == 0x8D) return 1;
1891 if(c0 == 0x9E && !cp932inv_f) return 1;
1894 if(0xA0 <= c0 && c0 <= 0xA5) return 1;
1902 ms_ucs_map_f == UCS_MAP_CP932 ? utf8_to_euc_3bytes_932 :
1903 ms_ucs_map_f == UCS_MAP_MS ? utf8_to_euc_3bytes_ms :
1904 ms_ucs_map_f == UCS_MAP_CP10001 ? utf8_to_euc_3bytes_mac :
1906 ret = unicode_to_jis_common2(c1, c0, ppp[c2 - 0xE0], sizeof_utf8_to_euc_C2, p2, p1);
1908 #ifdef SHIFTJIS_CP932
1909 if (!ret && !cp932inv_f && is_eucg3(*p2)) {
1911 if (e2s_conv(*p2, *p1, &s2, &s1) == 0) {
1912 s2e_conv(s2, s1, p2, p1);
1921 #ifdef UTF8_OUTPUT_ENABLE
1923 e2w_conv(nkf_char c2, nkf_char c1)
1925 const unsigned short *p;
1927 if (c2 == JIS_X_0201_1976_K) {
1928 if (ms_ucs_map_f == UCS_MAP_CP10001) {
1936 p = euc_to_utf8_1byte;
1938 } else if (is_eucg3(c2)){
1939 if(ms_ucs_map_f == UCS_MAP_ASCII&& c2 == NKF_INT32_C(0x8F22) && c1 == 0x43){
1942 c2 = (c2&0x7f) - 0x21;
1943 if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
1944 p = x0212_to_utf8_2bytes[c2];
1950 c2 = (c2&0x7f) - 0x21;
1951 if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
1953 ms_ucs_map_f == UCS_MAP_ASCII ? euc_to_utf8_2bytes[c2] :
1954 ms_ucs_map_f == UCS_MAP_CP10001 ? euc_to_utf8_2bytes_mac[c2] :
1955 euc_to_utf8_2bytes_ms[c2];
1960 c1 = (c1 & 0x7f) - 0x21;
1961 if (0<=c1 && c1<sizeof_euc_to_utf8_1byte)
1968 w2e_conv(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1)
1975 }else if (0xc0 <= c2 && c2 <= 0xef) {
1976 ret = unicode_to_jis_common(c2, c1, c0, p2, p1);
1977 #ifdef NUMCHAR_OPTION
1980 if (p1) *p1 = nkf_char_unicode_new(nkf_utf8_to_unicode(c2, c1, c0, 0));
1988 #ifdef UTF8_INPUT_ENABLE
1990 w16e_conv(nkf_char val, nkf_char *p2, nkf_char *p1)
1992 nkf_char c1, c2, c3, c4;
1999 else if (nkf_char_unicode_bmp_p(val)){
2000 nkf_unicode_to_utf8(val, &c1, &c2, &c3, &c4);
2001 ret = unicode_to_jis_common(c1, c2, c3, p2, p1);
2004 *p1 = nkf_char_unicode_new(val);
2010 *p1 = nkf_char_unicode_new(val);
2017 e_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
2019 if (c2 == JIS_X_0201_1976_K || c2 == SS2){
2020 if (iso2022jp_f && !x0201_f) {
2021 c2 = GETA1; c1 = GETA2;
2023 c2 = JIS_X_0201_1976_K;
2027 }else if (c2 == 0x8f){
2031 if (!cp51932_f && !x0213_f && 0xF5 <= c1 && c1 <= 0xFE && 0xA1 <= c0 && c0 <= 0xFE) {
2032 /* encoding is eucJP-ms, so invert to Unicode Private User Area */
2033 c1 = nkf_char_unicode_new((c1 - 0xF5) * 94 + c0 - 0xA1 + 0xE3AC);
2036 c2 = (c2 << 8) | (c1 & 0x7f);
2038 #ifdef SHIFTJIS_CP932
2041 if (e2s_conv(c2, c1, &s2, &s1) == 0){
2042 s2e_conv(s2, s1, &c2, &c1);
2049 #endif /* SHIFTJIS_CP932 */
2051 #endif /* X0212_ENABLE */
2052 } else if ((c2 == EOF) || (c2 == 0) || c2 < SP || c2 == ISO_8859_1) {
2055 if (!cp51932_f && ms_ucs_map_f && 0xF5 <= c2 && c2 <= 0xFE && 0xA1 <= c1 && c1 <= 0xFE) {
2056 /* encoding is eucJP-ms, so invert to Unicode Private User Area */
2057 c1 = nkf_char_unicode_new((c2 - 0xF5) * 94 + c1 - 0xA1 + 0xE000);
2062 #ifdef SHIFTJIS_CP932
2063 if (cp51932_f && 0x79 <= c2 && c2 <= 0x7c){
2065 if (e2s_conv(c2, c1, &s2, &s1) == 0){
2066 s2e_conv(s2, s1, &c2, &c1);
2073 #endif /* SHIFTJIS_CP932 */
2081 s_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
2083 if (c2 == JIS_X_0201_1976_K || (0xA1 <= c2 && c2 <= 0xDF)) {
2084 if (iso2022jp_f && !x0201_f) {
2085 c2 = GETA1; c1 = GETA2;
2089 } else if ((c2 == EOF) || (c2 == 0) || c2 < SP) {
2091 } else if (!x0213_f && 0xF0 <= c2 && c2 <= 0xF9 && 0x40 <= c1 && c1 <= 0xFC) {
2093 if(c1 == 0x7F) return 0;
2094 c1 = nkf_char_unicode_new((c2 - 0xF0) * 188 + (c1 - 0x40 - (0x7E < c1)) + 0xE000);
2097 nkf_char ret = s2e_conv(c2, c1, &c2, &c1);
2098 if (ret) return ret;
2105 w_iconv(nkf_char c1, nkf_char c2, nkf_char c3)
2107 nkf_char ret = 0, c4 = 0;
2108 static const char w_iconv_utf8_1st_byte[] =
2110 20, 20, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
2111 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
2112 30, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 33, 33,
2113 40, 41, 41, 41, 42, 43, 43, 43, 50, 50, 50, 50, 60, 60, 70, 70};
2120 if (c1 < 0 || 0xff < c1) {
2121 }else if (c1 == 0) { /* 0 : 1 byte*/
2123 } else if ((c1 & 0xC0) == 0x80) { /* 0x80-0xbf : trail byte */
2126 switch (w_iconv_utf8_1st_byte[c1 - 0xC0]) {
2128 if (c2 < 0x80 || 0xBF < c2) return 0;
2131 if (c3 == 0) return -1;
2132 if (c2 < 0xA0 || 0xBF < c2 || (c3 & 0xC0) != 0x80)
2137 if (c3 == 0) return -1;
2138 if ((c2 & 0xC0) != 0x80 || (c3 & 0xC0) != 0x80)
2142 if (c3 == 0) return -1;
2143 if (c2 < 0x80 || 0x9F < c2 || (c3 & 0xC0) != 0x80)
2147 if (c3 == 0) return -2;
2148 if (c2 < 0x90 || 0xBF < c2 || (c3 & 0xC0) != 0x80 || (c4 & 0xC0) != 0x80)
2152 if (c3 == 0) return -2;
2153 if (c2 < 0x80 || 0xBF < c2 || (c3 & 0xC0) != 0x80 || (c4 & 0xC0) != 0x80)
2157 if (c3 == 0) return -2;
2158 if (c2 < 0x80 || 0x8F < c2 || (c3 & 0xC0) != 0x80 || (c4 & 0xC0) != 0x80)
2166 if (c1 == 0 || c1 == EOF){
2167 } else if ((c1 & 0xf8) == 0xf0) { /* 4 bytes */
2168 c2 = nkf_char_unicode_new(nkf_utf8_to_unicode(c1, c2, c3, c4));
2171 ret = w2e_conv(c1, c2, c3, &c1, &c2);
2179 #define NKF_ICONV_INVALID_CODE_RANGE -13
2181 unicode_iconv(nkf_char wc)
2189 }else if ((wc>>11) == 27) {
2190 /* unpaired surrogate */
2191 return NKF_ICONV_INVALID_CODE_RANGE;
2192 }else if (wc < 0xFFFF) {
2193 ret = w16e_conv(wc, &c2, &c1);
2194 if (ret) return ret;
2195 }else if (wc < 0x10FFFF) {
2197 c1 = nkf_char_unicode_new(wc);
2199 return NKF_ICONV_INVALID_CODE_RANGE;
2205 #define NKF_ICONV_NEED_ONE_MORE_BYTE (size_t)-1
2206 #define NKF_ICONV_NEED_TWO_MORE_BYTES (size_t)-2
2207 #define UTF16_TO_UTF32(lead, trail) (((lead) << 10) + (trail) - NKF_INT32_C(0x35FDC00))
2209 nkf_iconv_utf_16(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4)
2218 if (input_endian == ENDIAN_BIG) {
2219 if (0xD8 <= c1 && c1 <= 0xDB) {
2220 if (0xDC <= c3 && c3 <= 0xDF) {
2221 wc = UTF16_TO_UTF32(c1 << 8 | c2, c3 << 8 | c4);
2222 } else return NKF_ICONV_NEED_TWO_MORE_BYTES;
2227 if (0xD8 <= c2 && c2 <= 0xDB) {
2228 if (0xDC <= c4 && c4 <= 0xDF) {
2229 wc = UTF16_TO_UTF32(c2 << 8 | c1, c4 << 8 | c3);
2230 } else return NKF_ICONV_NEED_TWO_MORE_BYTES;
2236 return (*unicode_iconv)(wc);
2240 w_iconv16(nkf_char c2, nkf_char c1, nkf_char c0)
2243 return 16; /* different from w_iconv32 */
2247 w_iconv32(nkf_char c2, nkf_char c1, nkf_char c0)
2250 return 32; /* different from w_iconv16 */
2254 nkf_iconv_utf_32(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4)
2263 switch(input_endian){
2265 wc = c2 << 16 | c3 << 8 | c4;
2268 wc = c3 << 16 | c2 << 8 | c1;
2271 wc = c1 << 16 | c4 << 8 | c3;
2274 wc = c4 << 16 | c1 << 8 | c2;
2277 return NKF_ICONV_INVALID_CODE_RANGE;
2280 return (*unicode_iconv)(wc);
2284 #define output_ascii_escape_sequence(mode) do { \
2285 if (output_mode != ASCII && output_mode != ISO_8859_1) { \
2288 (*o_putc)(ascii_intro); \
2289 output_mode = mode; \
2294 output_escape_sequence(int mode)
2296 if (output_mode == mode)
2304 case JIS_X_0201_1976_K:
2312 (*o_putc)(kanji_intro);
2337 j_oconv(nkf_char c2, nkf_char c1)
2339 #ifdef NUMCHAR_OPTION
2340 if (c2 == 0 && nkf_char_unicode_p(c1)){
2341 w16e_conv(c1, &c2, &c1);
2342 if (c2 == 0 && nkf_char_unicode_p(c1)){
2343 c2 = c1 & VALUE_MASK;
2344 if (ms_ucs_map_f && 0xE000 <= c2 && c2 <= 0xE757) {
2347 c2 = 0x7F + c1 / 94;
2348 c1 = 0x21 + c1 % 94;
2350 if (encode_fallback) (*encode_fallback)(c1);
2357 output_ascii_escape_sequence(ASCII);
2360 else if (c2 == EOF) {
2361 output_ascii_escape_sequence(ASCII);
2364 else if (c2 == ISO_8859_1) {
2365 output_ascii_escape_sequence(ISO_8859_1);
2368 else if (c2 == JIS_X_0201_1976_K) {
2369 output_escape_sequence(JIS_X_0201_1976_K);
2372 } else if (is_eucg3(c2)){
2373 output_escape_sequence(x0213_f ? JIS_X_0213_2 : JIS_X_0212);
2374 (*o_putc)(c2 & 0x7f);
2379 ? c2<0x20 || 0x92<c2 || c1<0x20 || 0x7e<c1
2380 : c2<0x20 || 0x7e<c2 || c1<0x20 || 0x7e<c1) return;
2381 output_escape_sequence(x0213_f ? JIS_X_0213_1 : JIS_X_0208);
2388 e_oconv(nkf_char c2, nkf_char c1)
2390 if (c2 == 0 && nkf_char_unicode_p(c1)){
2391 w16e_conv(c1, &c2, &c1);
2392 if (c2 == 0 && nkf_char_unicode_p(c1)){
2393 c2 = c1 & VALUE_MASK;
2394 if (x0212_f && 0xE000 <= c2 && c2 <= 0xE757) {
2398 c2 += c2 < 10 ? 0x75 : 0x8FEB;
2399 c1 = 0x21 + c1 % 94;
2402 (*o_putc)((c2 & 0x7f) | 0x080);
2403 (*o_putc)(c1 | 0x080);
2405 (*o_putc)((c2 & 0x7f) | 0x080);
2406 (*o_putc)(c1 | 0x080);
2410 if (encode_fallback) (*encode_fallback)(c1);
2418 } else if (c2 == 0) {
2419 output_mode = ASCII;
2421 } else if (c2 == JIS_X_0201_1976_K) {
2422 output_mode = EUC_JP;
2423 (*o_putc)(SS2); (*o_putc)(c1|0x80);
2424 } else if (c2 == ISO_8859_1) {
2425 output_mode = ISO_8859_1;
2426 (*o_putc)(c1 | 0x080);
2428 } else if (is_eucg3(c2)){
2429 output_mode = EUC_JP;
2430 #ifdef SHIFTJIS_CP932
2433 if (e2s_conv(c2, c1, &s2, &s1) == 0){
2434 s2e_conv(s2, s1, &c2, &c1);
2439 output_mode = ASCII;
2441 }else if (is_eucg3(c2)){
2444 (*o_putc)((c2 & 0x7f) | 0x080);
2445 (*o_putc)(c1 | 0x080);
2448 (*o_putc)((c2 & 0x7f) | 0x080);
2449 (*o_putc)(c1 | 0x080);
2453 if (!nkf_isgraph(c1) || !nkf_isgraph(c2)) {
2454 set_iconv(FALSE, 0);
2455 return; /* too late to rescue this char */
2457 output_mode = EUC_JP;
2458 (*o_putc)(c2 | 0x080);
2459 (*o_putc)(c1 | 0x080);
2464 s_oconv(nkf_char c2, nkf_char c1)
2466 #ifdef NUMCHAR_OPTION
2467 if (c2 == 0 && nkf_char_unicode_p(c1)){
2468 w16e_conv(c1, &c2, &c1);
2469 if (c2 == 0 && nkf_char_unicode_p(c1)){
2470 c2 = c1 & VALUE_MASK;
2471 if (!x0213_f && 0xE000 <= c2 && c2 <= 0xE757) {
2474 c2 = c1 / 188 + (cp932inv_f ? 0xF0 : 0xEB);
2476 c1 += 0x40 + (c1 > 0x3e);
2481 if(encode_fallback)(*encode_fallback)(c1);
2490 } else if (c2 == 0) {
2491 output_mode = ASCII;
2493 } else if (c2 == JIS_X_0201_1976_K) {
2494 output_mode = SHIFT_JIS;
2496 } else if (c2 == ISO_8859_1) {
2497 output_mode = ISO_8859_1;
2498 (*o_putc)(c1 | 0x080);
2500 } else if (is_eucg3(c2)){
2501 output_mode = SHIFT_JIS;
2502 if (e2s_conv(c2, c1, &c2, &c1) == 0){
2508 if (!nkf_isprint(c1) || !nkf_isprint(c2)) {
2509 set_iconv(FALSE, 0);
2510 return; /* too late to rescue this char */
2512 output_mode = SHIFT_JIS;
2513 e2s_conv(c2, c1, &c2, &c1);
2515 #ifdef SHIFTJIS_CP932
2517 && CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
2518 nkf_char c = cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40];
2524 #endif /* SHIFTJIS_CP932 */
2527 if (prefix_table[(unsigned char)c1]){
2528 (*o_putc)(prefix_table[(unsigned char)c1]);
2534 #ifdef UTF8_OUTPUT_ENABLE
2536 w_oconv(nkf_char c2, nkf_char c1)
2542 output_bom_f = FALSE;
2553 if (c2 == 0 && nkf_char_unicode_p(c1)){
2554 val = c1 & VALUE_MASK;
2555 nkf_unicode_to_utf8(val, &c1, &c2, &c3, &c4);
2557 if (c2) (*o_putc)(c2);
2558 if (c3) (*o_putc)(c3);
2559 if (c4) (*o_putc)(c4);
2566 val = e2w_conv(c2, c1);
2568 nkf_unicode_to_utf8(val, &c1, &c2, &c3, &c4);
2570 if (c2) (*o_putc)(c2);
2571 if (c3) (*o_putc)(c3);
2572 if (c4) (*o_putc)(c4);
2578 w_oconv16(nkf_char c2, nkf_char c1)
2581 output_bom_f = FALSE;
2582 if (output_endian == ENDIAN_LITTLE){
2596 if (c2 == 0 && nkf_char_unicode_p(c1)) {
2597 if (nkf_char_unicode_bmp_p(c1)) {
2598 c2 = (c1 >> 8) & 0xff;
2602 if (c1 <= UNICODE_MAX) {
2603 c2 = (c1 >> 10) + NKF_INT32_C(0xD7C0); /* high surrogate */
2604 c1 = (c1 & 0x3FF) + NKF_INT32_C(0xDC00); /* low surrogate */
2605 if (output_endian == ENDIAN_LITTLE){
2606 (*o_putc)(c2 & 0xff);
2607 (*o_putc)((c2 >> 8) & 0xff);
2608 (*o_putc)(c1 & 0xff);
2609 (*o_putc)((c1 >> 8) & 0xff);
2611 (*o_putc)((c2 >> 8) & 0xff);
2612 (*o_putc)(c2 & 0xff);
2613 (*o_putc)((c1 >> 8) & 0xff);
2614 (*o_putc)(c1 & 0xff);
2620 nkf_char val = e2w_conv(c2, c1);
2621 c2 = (val >> 8) & 0xff;
2626 if (output_endian == ENDIAN_LITTLE){
2636 w_oconv32(nkf_char c2, nkf_char c1)
2639 output_bom_f = FALSE;
2640 if (output_endian == ENDIAN_LITTLE){
2658 if (c2 == ISO_8859_1) {
2660 } else if (c2 == 0 && nkf_char_unicode_p(c1)) {
2663 c1 = e2w_conv(c2, c1);
2666 if (output_endian == ENDIAN_LITTLE){
2667 (*o_putc)( c1 & 0xFF);
2668 (*o_putc)((c1 >> 8) & 0xFF);
2669 (*o_putc)((c1 >> 16) & 0xFF);
2673 (*o_putc)((c1 >> 16) & 0xFF);
2674 (*o_putc)((c1 >> 8) & 0xFF);
2675 (*o_putc)( c1 & 0xFF);
2680 #define SCORE_L2 (1) /* Kanji Level 2 */
2681 #define SCORE_KANA (SCORE_L2 << 1) /* Halfwidth Katakana */
2682 #define SCORE_DEPEND (SCORE_KANA << 1) /* MD Characters */
2683 #define SCORE_CP932 (SCORE_DEPEND << 1) /* IBM extended characters */
2684 #define SCORE_X0212 (SCORE_CP932 << 1) /* JIS X 0212 */
2685 #define SCORE_NO_EXIST (SCORE_X0212 << 1) /* Undefined Characters */
2686 #define SCORE_iMIME (SCORE_NO_EXIST << 1) /* MIME selected */
2687 #define SCORE_ERROR (SCORE_iMIME << 1) /* Error */
2689 #define SCORE_INIT (SCORE_iMIME)
2691 static const nkf_char score_table_A0[] = {
2694 0, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND,
2695 SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_NO_EXIST,
2698 static const nkf_char score_table_F0[] = {
2699 SCORE_L2, SCORE_L2, SCORE_L2, SCORE_L2,
2700 SCORE_L2, SCORE_DEPEND, SCORE_NO_EXIST, SCORE_NO_EXIST,
2701 SCORE_DEPEND, SCORE_DEPEND, SCORE_CP932, SCORE_CP932,
2702 SCORE_CP932, SCORE_NO_EXIST, SCORE_NO_EXIST, SCORE_ERROR,
2706 set_code_score(struct input_code *ptr, nkf_char score)
2709 ptr->score |= score;
2714 clr_code_score(struct input_code *ptr, nkf_char score)
2717 ptr->score &= ~score;
2722 code_score(struct input_code *ptr)
2724 nkf_char c2 = ptr->buf[0];
2725 #ifdef UTF8_OUTPUT_ENABLE
2726 nkf_char c1 = ptr->buf[1];
2729 set_code_score(ptr, SCORE_ERROR);
2730 }else if (c2 == SS2){
2731 set_code_score(ptr, SCORE_KANA);
2732 }else if (c2 == 0x8f){
2733 set_code_score(ptr, SCORE_X0212);
2734 #ifdef UTF8_OUTPUT_ENABLE
2735 }else if (!e2w_conv(c2, c1)){
2736 set_code_score(ptr, SCORE_NO_EXIST);
2738 }else if ((c2 & 0x70) == 0x20){
2739 set_code_score(ptr, score_table_A0[c2 & 0x0f]);
2740 }else if ((c2 & 0x70) == 0x70){
2741 set_code_score(ptr, score_table_F0[c2 & 0x0f]);
2742 }else if ((c2 & 0x70) >= 0x50){
2743 set_code_score(ptr, SCORE_L2);
2748 status_disable(struct input_code *ptr)
2753 if (iconv == ptr->iconv_func) set_iconv(FALSE, 0);
2757 status_push_ch(struct input_code *ptr, nkf_char c)
2759 ptr->buf[ptr->index++] = c;
2763 status_clear(struct input_code *ptr)
2770 status_reset(struct input_code *ptr)
2773 ptr->score = SCORE_INIT;
2777 status_reinit(struct input_code *ptr)
2780 ptr->_file_stat = 0;
2784 status_check(struct input_code *ptr, nkf_char c)
2786 if (c <= DEL && estab_f){
2792 s_status(struct input_code *ptr, nkf_char c)
2796 status_check(ptr, c);
2801 }else if (nkf_char_unicode_p(c)){
2803 }else if (0xa1 <= c && c <= 0xdf){
2804 status_push_ch(ptr, SS2);
2805 status_push_ch(ptr, c);
2808 }else if ((0x81 <= c && c < 0xa0) || (0xe0 <= c && c <= 0xea)){
2810 status_push_ch(ptr, c);
2811 }else if (0xed <= c && c <= 0xee){
2813 status_push_ch(ptr, c);
2814 #ifdef SHIFTJIS_CP932
2815 }else if (is_ibmext_in_sjis(c)){
2817 status_push_ch(ptr, c);
2818 #endif /* SHIFTJIS_CP932 */
2820 }else if (0xf0 <= c && c <= 0xfc){
2822 status_push_ch(ptr, c);
2823 #endif /* X0212_ENABLE */
2825 status_disable(ptr);
2829 if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
2830 status_push_ch(ptr, c);
2831 s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]);
2835 status_disable(ptr);
2839 #ifdef SHIFTJIS_CP932
2840 if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)) {
2841 status_push_ch(ptr, c);
2842 if (s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]) == 0) {
2843 set_code_score(ptr, SCORE_CP932);
2848 #endif /* SHIFTJIS_CP932 */
2849 status_disable(ptr);
2852 if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
2853 status_push_ch(ptr, c);
2854 s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]);
2855 set_code_score(ptr, SCORE_CP932);
2858 status_disable(ptr);
2865 e_status(struct input_code *ptr, nkf_char c)
2869 status_check(ptr, c);
2874 }else if (nkf_char_unicode_p(c)){
2876 }else if (SS2 == c || (0xa1 <= c && c <= 0xfe)){
2878 status_push_ch(ptr, c);
2880 }else if (0x8f == c){
2882 status_push_ch(ptr, c);
2883 #endif /* X0212_ENABLE */
2885 status_disable(ptr);
2889 if (0xa1 <= c && c <= 0xfe){
2890 status_push_ch(ptr, c);
2894 status_disable(ptr);
2899 if (0xa1 <= c && c <= 0xfe){
2901 status_push_ch(ptr, c);
2903 status_disable(ptr);
2905 #endif /* X0212_ENABLE */
2909 #ifdef UTF8_INPUT_ENABLE
2911 w_status(struct input_code *ptr, nkf_char c)
2915 status_check(ptr, c);
2920 }else if (nkf_char_unicode_p(c)){
2922 }else if (0xc0 <= c && c <= 0xdf){
2924 status_push_ch(ptr, c);
2925 }else if (0xe0 <= c && c <= 0xef){
2927 status_push_ch(ptr, c);
2928 }else if (0xf0 <= c && c <= 0xf4){
2930 status_push_ch(ptr, c);
2932 status_disable(ptr);
2937 if (0x80 <= c && c <= 0xbf){
2938 status_push_ch(ptr, c);
2939 if (ptr->index > ptr->stat){
2940 int bom = (ptr->buf[0] == 0xef && ptr->buf[1] == 0xbb
2941 && ptr->buf[2] == 0xbf);
2942 w2e_conv(ptr->buf[0], ptr->buf[1], ptr->buf[2],
2943 &ptr->buf[0], &ptr->buf[1]);
2950 status_disable(ptr);
2954 if (0x80 <= c && c <= 0xbf){
2955 if (ptr->index < ptr->stat){
2956 status_push_ch(ptr, c);
2961 status_disable(ptr);
2969 code_status(nkf_char c)
2971 int action_flag = 1;
2972 struct input_code *result = 0;
2973 struct input_code *p = input_code_list;
2975 if (!p->status_func) {
2979 if (!p->status_func)
2981 (p->status_func)(p, c);
2984 }else if(p->stat == 0){
2995 if (result && !estab_f){
2996 set_iconv(TRUE, result->iconv_func);
2997 }else if (c <= DEL){
2998 struct input_code *ptr = input_code_list;
3008 nkf_buf_t *std_gc_buf;
3009 nkf_char broken_state;
3010 nkf_buf_t *broken_buf;
3011 nkf_char mimeout_state;
3015 static nkf_state_t *nkf_state = NULL;
3017 #define STD_GC_BUFSIZE (256)
3020 nkf_state_init(void)
3023 nkf_buf_clear(nkf_state->std_gc_buf);
3024 nkf_buf_clear(nkf_state->broken_buf);
3025 nkf_buf_clear(nkf_state->nfc_buf);
3028 nkf_state = nkf_xmalloc(sizeof(nkf_state_t));
3029 nkf_state->std_gc_buf = nkf_buf_new(STD_GC_BUFSIZE);
3030 nkf_state->broken_buf = nkf_buf_new(3);
3031 nkf_state->nfc_buf = nkf_buf_new(9);
3033 nkf_state->broken_state = 0;
3034 nkf_state->mimeout_state = 0;
3041 if (!nkf_buf_empty_p(nkf_state->std_gc_buf)){
3042 return nkf_buf_pop(nkf_state->std_gc_buf);
3049 std_ungetc(nkf_char c, FILE *f)
3051 nkf_buf_push(nkf_state->std_gc_buf, c);
3057 std_putc(nkf_char c)
3064 static nkf_char hold_buf[HOLD_SIZE*2];
3065 static int hold_count = 0;
3067 push_hold_buf(nkf_char c2)
3069 if (hold_count >= HOLD_SIZE*2)
3071 hold_buf[hold_count++] = c2;
3072 return ((hold_count >= HOLD_SIZE*2) ? EOF : hold_count);
3076 h_conv(FILE *f, nkf_char c1, nkf_char c2)
3082 /** it must NOT be in the kanji shifte sequence */
3083 /** it must NOT be written in JIS7 */
3084 /** and it must be after 2 byte 8bit code */
3090 while ((c2 = (*i_getc)(f)) != EOF) {
3096 if (push_hold_buf(c2) == EOF || estab_f) {
3102 struct input_code *p = input_code_list;
3103 struct input_code *result = p;
3108 if (p->status_func && p->score < result->score) {
3113 set_iconv(TRUE, result->iconv_func);
3118 ** 1) EOF is detected, or
3119 ** 2) Code is established, or
3120 ** 3) Buffer is FULL (but last word is pushed)
3122 ** in 1) and 3) cases, we continue to use
3123 ** Kanji codes by oconv and leave estab_f unchanged.
3128 while (hold_index < hold_count){
3129 c1 = hold_buf[hold_index++];
3130 if (nkf_char_unicode_p(c1)) {
3134 else if (c1 <= DEL){
3137 }else if (iconv == s_iconv && 0xa1 <= c1 && c1 <= 0xdf){
3138 (*iconv)(JIS_X_0201_1976_K, c1, 0);
3141 if (hold_index < hold_count){
3142 c2 = hold_buf[hold_index++];
3152 switch ((*iconv)(c1, c2, 0)) { /* can be EUC/SJIS/UTF-8 */
3155 if (hold_index < hold_count){
3156 c3 = hold_buf[hold_index++];
3157 } else if ((c3 = (*i_getc)(f)) == EOF) {
3162 if (hold_index < hold_count){
3163 c4 = hold_buf[hold_index++];
3164 } else if ((c4 = (*i_getc)(f)) == EOF) {
3169 (*iconv)(c1, c2, (c3<<8)|c4);
3172 /* 3 bytes EUC or UTF-8 */
3173 if (hold_index < hold_count){
3174 c3 = hold_buf[hold_index++];
3175 } else if ((c3 = (*i_getc)(f)) == EOF) {
3181 (*iconv)(c1, c2, c3);
3184 if (c3 == EOF) break;
3190 * Check and Ignore BOM
3196 switch(c2 = (*i_getc)(f)){
3198 if((c2 = (*i_getc)(f)) == 0x00){
3199 if((c2 = (*i_getc)(f)) == 0xFE){
3200 if((c2 = (*i_getc)(f)) == 0xFF){
3201 if(!input_encoding){
3202 set_iconv(TRUE, w_iconv32);
3204 if (iconv == w_iconv32) {
3205 input_endian = ENDIAN_BIG;
3208 (*i_ungetc)(0xFF,f);
3209 }else (*i_ungetc)(c2,f);
3210 (*i_ungetc)(0xFE,f);
3211 }else if(c2 == 0xFF){
3212 if((c2 = (*i_getc)(f)) == 0xFE){
3213 if(!input_encoding){
3214 set_iconv(TRUE, w_iconv32);
3216 if (iconv == w_iconv32) {
3217 input_endian = ENDIAN_2143;
3220 (*i_ungetc)(0xFF,f);
3221 }else (*i_ungetc)(c2,f);
3222 (*i_ungetc)(0xFF,f);
3223 }else (*i_ungetc)(c2,f);
3224 (*i_ungetc)(0x00,f);
3225 }else (*i_ungetc)(c2,f);
3226 (*i_ungetc)(0x00,f);
3229 if((c2 = (*i_getc)(f)) == 0xBB){
3230 if((c2 = (*i_getc)(f)) == 0xBF){
3231 if(!input_encoding){
3232 set_iconv(TRUE, w_iconv);
3234 if (iconv == w_iconv) {
3237 (*i_ungetc)(0xBF,f);
3238 }else (*i_ungetc)(c2,f);
3239 (*i_ungetc)(0xBB,f);
3240 }else (*i_ungetc)(c2,f);
3241 (*i_ungetc)(0xEF,f);
3244 if((c2 = (*i_getc)(f)) == 0xFF){
3245 if((c2 = (*i_getc)(f)) == 0x00){
3246 if((c2 = (*i_getc)(f)) == 0x00){
3247 if(!input_encoding){
3248 set_iconv(TRUE, w_iconv32);
3250 if (iconv == w_iconv32) {
3251 input_endian = ENDIAN_3412;
3254 (*i_ungetc)(0x00,f);
3255 }else (*i_ungetc)(c2,f);
3256 (*i_ungetc)(0x00,f);
3257 }else (*i_ungetc)(c2,f);
3258 if(!input_encoding){
3259 set_iconv(TRUE, w_iconv16);
3261 if (iconv == w_iconv16) {
3262 input_endian = ENDIAN_BIG;
3265 (*i_ungetc)(0xFF,f);
3266 }else (*i_ungetc)(c2,f);
3267 (*i_ungetc)(0xFE,f);
3270 if((c2 = (*i_getc)(f)) == 0xFE){
3271 if((c2 = (*i_getc)(f)) == 0x00){
3272 if((c2 = (*i_getc)(f)) == 0x00){
3273 if(!input_encoding){
3274 set_iconv(TRUE, w_iconv32);
3276 if (iconv == w_iconv32) {
3277 input_endian = ENDIAN_LITTLE;
3280 (*i_ungetc)(0x00,f);
3281 }else (*i_ungetc)(c2,f);
3282 (*i_ungetc)(0x00,f);
3283 }else (*i_ungetc)(c2,f);
3284 if(!input_encoding){
3285 set_iconv(TRUE, w_iconv16);
3287 if (iconv == w_iconv16) {
3288 input_endian = ENDIAN_LITTLE;
3291 (*i_ungetc)(0xFE,f);
3292 }else (*i_ungetc)(c2,f);
3293 (*i_ungetc)(0xFF,f);
3302 broken_getc(FILE *f)
3306 if (!nkf_buf_empty_p(nkf_state->broken_buf)) {
3307 return nkf_buf_pop(nkf_state->broken_buf);
3310 if (c=='$' && nkf_state->broken_state != ESC
3311 && (input_mode == ASCII || input_mode == JIS_X_0201_1976_K)) {
3313 nkf_state->broken_state = 0;
3314 if (c1=='@'|| c1=='B') {
3315 nkf_buf_push(nkf_state->broken_buf, c1);
3316 nkf_buf_push(nkf_state->broken_buf, c);
3322 } else if (c=='(' && nkf_state->broken_state != ESC
3323 && (input_mode == JIS_X_0208 || input_mode == JIS_X_0201_1976_K)) {
3325 nkf_state->broken_state = 0;
3326 if (c1=='J'|| c1=='B') {
3327 nkf_buf_push(nkf_state->broken_buf, c1);
3328 nkf_buf_push(nkf_state->broken_buf, c);
3335 nkf_state->broken_state = c;
3341 broken_ungetc(nkf_char c, FILE *f)
3343 if (nkf_buf_length(nkf_state->broken_buf) < 2)
3344 nkf_buf_push(nkf_state->broken_buf, c);
3349 eol_conv(nkf_char c2, nkf_char c1)
3351 if (guess_f && input_eol != EOF) {
3352 if (c2 == 0 && c1 == LF) {
3353 if (!input_eol) input_eol = prev_cr ? CRLF : LF;
3354 else if (input_eol != (prev_cr ? CRLF : LF)) input_eol = EOF;
3355 } else if (c2 == 0 && c1 == CR && input_eol == LF) input_eol = EOF;
3357 else if (!input_eol) input_eol = CR;
3358 else if (input_eol != CR) input_eol = EOF;
3360 if (prev_cr || (c2 == 0 && c1 == LF)) {
3362 if (eolmode_f != LF) (*o_eol_conv)(0, CR);
3363 if (eolmode_f != CR) (*o_eol_conv)(0, LF);
3365 if (c2 == 0 && c1 == CR) prev_cr = CR;
3366 else if (c2 != 0 || c1 != LF) (*o_eol_conv)(c2, c1);
3370 put_newline(void (*func)(nkf_char))
3372 switch (eolmode_f ? eolmode_f : DEFAULT_NEWLINE) {
3387 oconv_newline(void (*func)(nkf_char, nkf_char))
3389 switch (eolmode_f ? eolmode_f : DEFAULT_NEWLINE) {
3404 Return value of fold_conv()
3406 LF add newline and output char
3407 CR add newline and output nothing
3410 1 (or else) normal output
3412 fold state in prev (previous character)
3414 >0x80 Japanese (X0208/X0201)
3419 This fold algorthm does not preserve heading space in a line.
3420 This is the main difference from fmt.
3423 #define char_size(c2,c1) (c2?2:1)
3426 fold_conv(nkf_char c2, nkf_char c1)
3429 nkf_char fold_state;
3431 if (c1== CR && !fold_preserve_f) {
3432 fold_state=0; /* ignore cr */
3433 }else if (c1== LF&&f_prev==CR && fold_preserve_f) {
3435 fold_state=0; /* ignore cr */
3436 } else if (c1== BS) {
3437 if (f_line>0) f_line--;
3439 } else if (c2==EOF && f_line != 0) { /* close open last line */
3441 } else if ((c1==LF && !fold_preserve_f)
3442 || ((c1==CR||(c1==LF&&f_prev!=CR))
3443 && fold_preserve_f)) {
3445 if (fold_preserve_f) {
3449 } else if ((f_prev == c1 && !fold_preserve_f)
3450 || (f_prev == LF && fold_preserve_f)
3451 ) { /* duplicate newline */
3454 fold_state = LF; /* output two newline */
3460 if (f_prev&0x80) { /* Japanese? */
3462 fold_state = 0; /* ignore given single newline */
3463 } else if (f_prev==SP) {
3467 if (++f_line<=fold_len)
3471 fold_state = CR; /* fold and output nothing */
3475 } else if (c1=='\f') {
3478 fold_state = LF; /* output newline and clear */
3479 } else if ((c2==0 && nkf_isblank(c1)) || (c2 == '!' && c1 == '!')) {
3480 /* X0208 kankaku or ascii space */
3482 fold_state = 0; /* remove duplicate spaces */
3485 if (++f_line<=fold_len)
3486 fold_state = SP; /* output ASCII space only */
3488 f_prev = SP; f_line = 0;
3489 fold_state = CR; /* fold and output nothing */
3493 prev0 = f_prev; /* we still need this one... , but almost done */
3495 if (c2 || c2 == JIS_X_0201_1976_K)
3496 f_prev |= 0x80; /* this is Japanese */
3497 f_line += char_size(c2,c1);
3498 if (f_line<=fold_len) { /* normal case */
3501 if (f_line>fold_len+fold_margin) { /* too many kinsoku suspension */
3502 f_line = char_size(c2,c1);
3503 fold_state = LF; /* We can't wait, do fold now */
3504 } else if (c2 == JIS_X_0201_1976_K) {
3505 /* simple kinsoku rules return 1 means no folding */
3506 if (c1==(0xde&0x7f)) fold_state = 1; /*
\e$B!+
\e(B*/
3507 else if (c1==(0xdf&0x7f)) fold_state = 1; /*
\e$B!,
\e(B*/
3508 else if (c1==(0xa4&0x7f)) fold_state = 1; /*
\e$B!#
\e(B*/
3509 else if (c1==(0xa3&0x7f)) fold_state = 1; /*
\e$B!$
\e(B*/
3510 else if (c1==(0xa1&0x7f)) fold_state = 1; /*
\e$B!W
\e(B*/
3511 else if (c1==(0xb0&0x7f)) fold_state = 1; /* - */
3512 else if (SP<=c1 && c1<=(0xdf&0x7f)) { /* X0201 */
3514 fold_state = LF;/* add one new f_line before this character */
3517 fold_state = LF;/* add one new f_line before this character */
3520 /* kinsoku point in ASCII */
3521 if ( c1==')'|| /* { [ ( */
3532 /* just after special */
3533 } else if (!is_alnum(prev0)) {
3534 f_line = char_size(c2,c1);
3536 } else if ((prev0==SP) || /* ignored new f_line */
3537 (prev0==LF)|| /* ignored new f_line */
3538 (prev0&0x80)) { /* X0208 - ASCII */
3539 f_line = char_size(c2,c1);
3540 fold_state = LF;/* add one new f_line before this character */
3542 fold_state = 1; /* default no fold in ASCII */
3546 if (c1=='"') fold_state = 1; /*
\e$B!"
\e(B */
3547 else if (c1=='#') fold_state = 1; /*
\e$B!#
\e(B */
3548 else if (c1=='W') fold_state = 1; /*
\e$B!W
\e(B */
3549 else if (c1=='K') fold_state = 1; /*
\e$B!K
\e(B */
3550 else if (c1=='$') fold_state = 1; /*
\e$B!$
\e(B */
3551 else if (c1=='%') fold_state = 1; /*
\e$B!%
\e(B */
3552 else if (c1=='\'') fold_state = 1; /*
\e$B!\
\e(B */
3553 else if (c1=='(') fold_state = 1; /*
\e$B!(
\e(B */
3554 else if (c1==')') fold_state = 1; /*
\e$B!)
\e(B */
3555 else if (c1=='*') fold_state = 1; /*
\e$B!*
\e(B */
3556 else if (c1=='+') fold_state = 1; /*
\e$B!+
\e(B */
3557 else if (c1==',') fold_state = 1; /*
\e$B!,
\e(B */
3558 /* default no fold in kinsoku */
3561 f_line = char_size(c2,c1);
3562 /* add one new f_line before this character */
3565 f_line = char_size(c2,c1);
3567 /* add one new f_line before this character */
3572 /* terminator process */
3573 switch(fold_state) {
3575 oconv_newline(o_fconv);
3581 oconv_newline(o_fconv);
3592 static nkf_char z_prev2=0,z_prev1=0;
3595 z_conv(nkf_char c2, nkf_char c1)
3598 /* if (c2) c1 &= 0x7f; assertion */
3600 if (c2 == JIS_X_0201_1976_K && (c1 == 0x20 || c1 == 0x7D || c1 == 0x7E)) {
3606 if (z_prev2 == JIS_X_0201_1976_K) {
3607 if (c2 == JIS_X_0201_1976_K) {
3608 if (c1 == (0xde&0x7f)) { /*
\e$BByE@
\e(B */
3610 (*o_zconv)(dv[(z_prev1-SP)*2], dv[(z_prev1-SP)*2+1]);
3612 } else if (c1 == (0xdf&0x7f) && ev[(z_prev1-SP)*2]) { /*
\e$BH>ByE@
\e(B */
3614 (*o_zconv)(ev[(z_prev1-SP)*2], ev[(z_prev1-SP)*2+1]);
3619 (*o_zconv)(cv[(z_prev1-SP)*2], cv[(z_prev1-SP)*2+1]);
3621 if (c2 == JIS_X_0201_1976_K) {
3622 if (dv[(c1-SP)*2] || ev[(c1-SP)*2]) {
3623 /* wait for
\e$BByE@
\e(B or
\e$BH>ByE@
\e(B */
3628 (*o_zconv)(cv[(c1-SP)*2], cv[(c1-SP)*2+1]);
3639 if (alpha_f&1 && c2 == 0x23) {
3640 /* JISX0208 Alphabet */
3642 } else if (c2 == 0x21) {
3643 /* JISX0208 Kigou */
3648 } else if (alpha_f&4) {
3653 } else if (alpha_f&1 && 0x20<c1 && c1<0x7f && fv[c1-0x20]) {
3659 if (alpha_f&8 && c2 == 0) {
3661 const char *entity = 0;
3663 case '>': entity = ">"; break;
3664 case '<': entity = "<"; break;
3665 case '\"': entity = """; break;
3666 case '&': entity = "&"; break;
3669 while (*entity) (*o_zconv)(0, *entity++);
3675 /* JIS X 0208 Katakana to JIS X 0201 Katakana */
3680 /* U+3002 (0x8142) Ideographic Full Stop -> U+FF61 (0xA1) Halfwidth Ideographic Full Stop */
3684 /* U+300C (0x8175) Left Corner Bracket -> U+FF62 (0xA2) Halfwidth Left Corner Bracket */
3688 /* U+300D (0x8176) Right Corner Bracket -> U+FF63 (0xA3) Halfwidth Right Corner Bracket */
3692 /* U+3001 (0x8141) Ideographic Comma -> U+FF64 (0xA4) Halfwidth Ideographic Comma */
3696 /* U+30FB (0x8145) Katakana Middle Dot -> U+FF65 (0xA5) Halfwidth Katakana Middle Dot */
3700 /* U+30FC (0x815B) Katakana-Hiragana Prolonged Sound Mark -> U+FF70 (0xB0) Halfwidth Katakana-Hiragana Prolonged Sound Mark */
3704 /* U+309B (0x814A) Katakana-Hiragana Voiced Sound Mark -> U+FF9E (0xDE) Halfwidth Katakana Voiced Sound Mark */
3708 /* U+309C (0x814B) Katakana-Hiragana Semi-Voiced Sound Mark -> U+FF9F (0xDF) Halfwidth Katakana Semi-Voiced Sound Mark */
3713 (*o_zconv)(JIS_X_0201_1976_K, c);
3716 } else if (c2 == 0x25) {
3717 /* JISX0208 Katakana */
3718 static const int fullwidth_to_halfwidth[] =
3720 0x0000, 0x2700, 0x3100, 0x2800, 0x3200, 0x2900, 0x3300, 0x2A00,
3721 0x3400, 0x2B00, 0x3500, 0x3600, 0x365E, 0x3700, 0x375E, 0x3800,
3722 0x385E, 0x3900, 0x395E, 0x3A00, 0x3A5E, 0x3B00, 0x3B5E, 0x3C00,
3723 0x3C5E, 0x3D00, 0x3D5E, 0x3E00, 0x3E5E, 0x3F00, 0x3F5E, 0x4000,
3724 0x405E, 0x4100, 0x415E, 0x2F00, 0x4200, 0x425E, 0x4300, 0x435E,
3725 0x4400, 0x445E, 0x4500, 0x4600, 0x4700, 0x4800, 0x4900, 0x4A00,
3726 0x4A5E, 0x4A5F, 0x4B00, 0x4B5E, 0x4B5F, 0x4C00, 0x4C5E, 0x4C5F,
3727 0x4D00, 0x4D5E, 0x4D5F, 0x4E00, 0x4E5E, 0x4E5F, 0x4F00, 0x5000,
3728 0x5100, 0x5200, 0x5300, 0x2C00, 0x5400, 0x2D00, 0x5500, 0x2E00,
3729 0x5600, 0x5700, 0x5800, 0x5900, 0x5A00, 0x5B00, 0x0000, 0x5C00,
3730 0x0000, 0x0000, 0x2600, 0x5D00, 0x335E, 0x0000, 0x0000, 0x0000,
3731 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
3733 if (fullwidth_to_halfwidth[c1-0x20]){
3734 c2 = fullwidth_to_halfwidth[c1-0x20];
3735 (*o_zconv)(JIS_X_0201_1976_K, c2>>8);
3737 (*o_zconv)(JIS_X_0201_1976_K, c2&0xFF);
3747 #define rot13(c) ( \
3749 (c <= 'M') ? (c + 13): \
3750 (c <= 'Z') ? (c - 13): \
3752 (c <= 'm') ? (c + 13): \
3753 (c <= 'z') ? (c - 13): \
3757 #define rot47(c) ( \
3759 ( c <= 'O') ? (c + 47) : \
3760 ( c <= '~') ? (c - 47) : \
3765 rot_conv(nkf_char c2, nkf_char c1)
3767 if (c2 == 0 || c2 == JIS_X_0201_1976_K || c2 == ISO_8859_1) {
3773 (*o_rot_conv)(c2,c1);
3777 hira_conv(nkf_char c2, nkf_char c1)
3781 if (0x20 < c1 && c1 < 0x74) {
3783 (*o_hira_conv)(c2,c1);
3785 } else if (c1 == 0x74 && nkf_enc_unicode_p(output_encoding)) {
3787 c1 = nkf_char_unicode_new(0x3094);
3788 (*o_hira_conv)(c2,c1);
3791 } else if (c2 == 0x21 && (c1 == 0x33 || c1 == 0x34)) {
3793 (*o_hira_conv)(c2,c1);
3798 if (c2 == 0 && c1 == nkf_char_unicode_new(0x3094)) {
3801 } else if (c2 == 0x24 && 0x20 < c1 && c1 < 0x74) {
3803 } else if (c2 == 0x21 && (c1 == 0x35 || c1 == 0x36)) {
3807 (*o_hira_conv)(c2,c1);
3812 iso2022jp_check_conv(nkf_char c2, nkf_char c1)
3814 #define RANGE_NUM_MAX 18
3815 static const nkf_char range[RANGE_NUM_MAX][2] = {
3836 nkf_char start, end, c;
3838 if(c2 >= 0x00 && c2 <= 0x20 && c1 >= 0x7f && c1 <= 0xff) {
3842 if((c2 >= 0x29 && c2 <= 0x2f) || (c2 >= 0x75 && c2 <= 0x7e)) {
3847 for (i = 0; i < RANGE_NUM_MAX; i++) {
3848 start = range[i][0];
3851 if (c >= start && c <= end) {
3856 (*o_iso2022jp_check_conv)(c2,c1);
3860 /* This converts =?ISO-2022-JP?B?HOGE HOGE?= */
3862 static const unsigned char *mime_pattern[] = {
3863 (const unsigned char *)"\075?EUC-JP?B?",
3864 (const unsigned char *)"\075?SHIFT_JIS?B?",
3865 (const unsigned char *)"\075?ISO-8859-1?Q?",
3866 (const unsigned char *)"\075?ISO-8859-1?B?",
3867 (const unsigned char *)"\075?ISO-2022-JP?B?",
3868 (const unsigned char *)"\075?ISO-2022-JP?B?",
3869 (const unsigned char *)"\075?ISO-2022-JP?Q?",
3870 #if defined(UTF8_INPUT_ENABLE)
3871 (const unsigned char *)"\075?UTF-8?B?",
3872 (const unsigned char *)"\075?UTF-8?Q?",
3874 (const unsigned char *)"\075?US-ASCII?Q?",
3879 /*
\e$B3:Ev$9$k%3!<%I$NM%@hEY$r>e$2$k$?$a$NL\0u
\e(B */
3880 nkf_char (*mime_priority_func[])(nkf_char c2, nkf_char c1, nkf_char c0) = {
3881 e_iconv, s_iconv, 0, 0, 0, 0,
3882 #if defined(UTF8_INPUT_ENABLE)
3888 static const nkf_char mime_encode[] = {
3889 EUC_JP, SHIFT_JIS, ISO_8859_1, ISO_8859_1, JIS_X_0208, JIS_X_0201_1976_K, JIS_X_0201_1976_K,
3890 #if defined(UTF8_INPUT_ENABLE)
3897 static const nkf_char mime_encode_method[] = {
3898 'B', 'B','Q', 'B', 'B', 'B', 'Q',
3899 #if defined(UTF8_INPUT_ENABLE)
3907 /* MIME preprocessor fifo */
3909 #define MIME_BUF_SIZE (1024) /* 2^n ring buffer */
3910 #define MIME_BUF_MASK (MIME_BUF_SIZE-1)
3911 #define mime_input_buf(n) mime_input_state.buf[(n)&MIME_BUF_MASK]
3913 unsigned char buf[MIME_BUF_SIZE];
3915 unsigned int last; /* decoded */
3916 unsigned int input; /* undecoded */
3918 static nkf_char (*mime_iconv_back)(nkf_char c2,nkf_char c1,nkf_char c0) = NULL;
3920 #define MAXRECOVER 20
3923 mime_input_buf_unshift(nkf_char c)
3925 mime_input_buf(--mime_input_state.top) = (unsigned char)c;
3929 mime_ungetc(nkf_char c, FILE *f)
3931 mime_input_buf_unshift(c);
3936 mime_ungetc_buf(nkf_char c, FILE *f)
3939 (*i_mungetc_buf)(c,f);
3941 mime_input_buf(--mime_input_state.input) = (unsigned char)c;
3946 mime_getc_buf(FILE *f)
3948 /* we don't keep eof of mime_input_buf, becase it contains ?= as
3949 a terminator. It was checked in mime_integrity. */
3950 return ((mimebuf_f)?
3951 (*i_mgetc_buf)(f):mime_input_buf(mime_input_state.input++));
3955 switch_mime_getc(void)
3957 if (i_getc!=mime_getc) {
3958 i_mgetc = i_getc; i_getc = mime_getc;
3959 i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
3960 if(mime_f==STRICT_MIME) {
3961 i_mgetc_buf = i_mgetc; i_mgetc = mime_getc_buf;
3962 i_mungetc_buf = i_mungetc; i_mungetc = mime_ungetc_buf;
3968 unswitch_mime_getc(void)
3970 if(mime_f==STRICT_MIME) {
3971 i_mgetc = i_mgetc_buf;
3972 i_mungetc = i_mungetc_buf;
3975 i_ungetc = i_mungetc;
3976 if(mime_iconv_back)set_iconv(FALSE, mime_iconv_back);
3977 mime_iconv_back = NULL;
3981 mime_integrity(FILE *f, const unsigned char *p)
3985 /* In buffered mode, read until =? or NL or buffer full
3987 mime_input_state.input = mime_input_state.top;
3988 mime_input_state.last = mime_input_state.top;
3990 while(*p) mime_input_buf(mime_input_state.input++) = *p++;
3992 q = mime_input_state.input;
3993 while((c=(*i_getc)(f))!=EOF) {
3994 if (((mime_input_state.input-mime_input_state.top)&MIME_BUF_MASK)==0) {
3995 break; /* buffer full */
3997 if (c=='=' && d=='?') {
3998 /* checked. skip header, start decode */
3999 mime_input_buf(mime_input_state.input++) = (unsigned char)c;
4000 /* mime_last_input = mime_input_state.input; */
4001 mime_input_state.input = q;
4005 if (!( (c=='+'||c=='/'|| c=='=' || c=='?' || is_alnum(c))))
4007 /* Should we check length mod 4? */
4008 mime_input_buf(mime_input_state.input++) = (unsigned char)c;
4011 /* In case of Incomplete MIME, no MIME decode */
4012 mime_input_buf(mime_input_state.input++) = (unsigned char)c;
4013 mime_input_state.last = mime_input_state.input; /* point undecoded buffer */
4014 mime_decode_mode = 1; /* no decode on mime_input_buf last in mime_getc */
4015 switch_mime_getc(); /* anyway we need buffered getc */
4020 mime_begin_strict(FILE *f)
4024 const unsigned char *p,*q;
4025 nkf_char r[MAXRECOVER]; /* recovery buffer, max mime pattern length */
4027 mime_decode_mode = FALSE;
4028 /* =? has been checked */
4030 p = mime_pattern[j];
4033 for(i=2;p[i]>SP;i++) { /* start at =? */
4034 if (((r[i] = c1 = (*i_getc)(f))==EOF) || nkf_toupper(c1) != p[i]) {
4035 /* pattern fails, try next one */
4037 while (mime_pattern[++j]) {
4038 p = mime_pattern[j];
4039 for(k=2;k<i;k++) /* assume length(p) > i */
4040 if (p[k]!=q[k]) break;
4041 if (k==i && nkf_toupper(c1)==p[k]) break;
4043 p = mime_pattern[j];
4044 if (p) continue; /* found next one, continue */
4045 /* all fails, output from recovery buffer */
4053 mime_decode_mode = p[i-2];
4055 mime_iconv_back = iconv;
4056 set_iconv(FALSE, mime_priority_func[j]);
4057 clr_code_score(find_inputcode_byfunc(mime_priority_func[j]), SCORE_iMIME);
4059 if (mime_decode_mode=='B') {
4060 mimebuf_f = unbuf_f;
4062 /* do MIME integrity check */
4063 return mime_integrity(f,mime_pattern[j]);
4077 /* In NONSTRICT mode, only =? is checked. In case of failure, we */
4078 /* re-read and convert again from mime_buffer. */
4080 /* =? has been checked */
4081 k = mime_input_state.last;
4082 mime_input_buf(mime_input_state.last++)='='; mime_input_buf(mime_input_state.last++)='?';
4083 for(i=2;i<MAXRECOVER;i++) { /* start at =? */
4084 /* We accept any character type even if it is breaked by new lines */
4085 c1 = (*i_getc)(f); mime_input_buf(mime_input_state.last++) = (unsigned char)c1;
4086 if (c1==LF||c1==SP||c1==CR||
4087 c1=='-'||c1=='_'||is_alnum(c1)) continue;
4089 /* Failed. But this could be another MIME preemble */
4091 mime_input_state.last--;
4097 c1 = (*i_getc)(f); mime_input_buf(mime_input_state.last++) = (unsigned char)c1;
4098 if (!(++i<MAXRECOVER) || c1==EOF) break;
4099 if (c1=='b'||c1=='B') {
4100 mime_decode_mode = 'B';
4101 } else if (c1=='q'||c1=='Q') {
4102 mime_decode_mode = 'Q';
4106 c1 = (*i_getc)(f); mime_input_buf(mime_input_state.last++) = (unsigned char)c1;
4107 if (!(++i<MAXRECOVER) || c1==EOF) break;
4109 mime_decode_mode = FALSE;
4115 if (!mime_decode_mode) {
4116 /* false MIME premble, restart from mime_buffer */
4117 mime_decode_mode = 1; /* no decode, but read from the mime_buffer */
4118 /* Since we are in MIME mode until buffer becomes empty, */
4119 /* we never go into mime_begin again for a while. */
4122 /* discard mime preemble, and goto MIME mode */
4123 mime_input_state.last = k;
4124 /* do no MIME integrity check */
4125 return c1; /* used only for checking EOF */
4136 debug(const char *str)
4139 fprintf(stderr, "%s\n", str ? str : "NULL");
4145 set_input_codename(const char *codename)
4147 if (!input_codename) {
4148 input_codename = codename;
4149 } else if (strcmp(codename, input_codename) != 0) {
4150 input_codename = "";
4155 get_guessed_code(void)
4157 if (input_codename && !*input_codename) {
4158 input_codename = "BINARY";
4160 struct input_code *p = find_inputcode_byfunc(iconv);
4161 if (!input_codename) {
4162 input_codename = "ASCII";
4163 } else if (strcmp(input_codename, "Shift_JIS") == 0) {
4164 if (p->score & (SCORE_DEPEND|SCORE_CP932))
4165 input_codename = "CP932";
4166 } else if (strcmp(input_codename, "EUC-JP") == 0) {
4167 if (p->score & (SCORE_X0212))
4168 input_codename = "EUCJP-MS";
4169 else if (p->score & (SCORE_DEPEND|SCORE_CP932))
4170 input_codename = "CP51932";
4171 } else if (strcmp(input_codename, "ISO-2022-JP") == 0) {
4172 if (p->score & (SCORE_KANA))
4173 input_codename = "CP50221";
4174 else if (p->score & (SCORE_DEPEND|SCORE_CP932))
4175 input_codename = "CP50220";
4178 return input_codename;
4181 #if !defined(PERL_XS) && !defined(WIN32DLL)
4183 print_guessed_code(char *filename)
4185 if (filename != NULL) printf("%s: ", filename);
4186 if (input_codename && !*input_codename) {
4189 input_codename = get_guessed_code();
4191 printf("%s\n", input_codename);
4195 input_eol == CR ? " (CR)" :
4196 input_eol == LF ? " (LF)" :
4197 input_eol == CRLF ? " (CRLF)" :
4198 input_eol == EOF ? " (MIXED NL)" :
4208 hex_getc(nkf_char ch, FILE *f, nkf_char (*g)(FILE *f), nkf_char (*u)(nkf_char c, FILE *f))
4210 nkf_char c1, c2, c3;
4216 if (!nkf_isxdigit(c2)){
4221 if (!nkf_isxdigit(c3)){
4226 return (hex2bin(c2) << 4) | hex2bin(c3);
4232 return hex_getc(':', f, i_cgetc, i_cungetc);
4236 cap_ungetc(nkf_char c, FILE *f)
4238 return (*i_cungetc)(c, f);
4244 return hex_getc('%', f, i_ugetc, i_uungetc);
4248 url_ungetc(nkf_char c, FILE *f)
4250 return (*i_uungetc)(c, f);
4254 #ifdef NUMCHAR_OPTION
4256 numchar_getc(FILE *f)
4258 nkf_char (*g)(FILE *) = i_ngetc;
4259 nkf_char (*u)(nkf_char c ,FILE *f) = i_nungetc;
4270 if (buf[i] == 'x' || buf[i] == 'X'){
4271 for (j = 0; j < 7; j++){
4273 if (!nkf_isxdigit(buf[i])){
4280 c |= hex2bin(buf[i]);
4283 for (j = 0; j < 8; j++){
4287 if (!nkf_isdigit(buf[i])){
4294 c += hex2bin(buf[i]);
4300 return nkf_char_unicode_new(c);
4310 numchar_ungetc(nkf_char c, FILE *f)
4312 return (*i_nungetc)(c, f);
4316 #ifdef UNICODE_NORMALIZATION
4321 nkf_char (*g)(FILE *f) = i_nfc_getc;
4322 nkf_char (*u)(nkf_char c ,FILE *f) = i_nfc_ungetc;
4323 nkf_buf_t *buf = nkf_state->nfc_buf;
4324 const unsigned char *array;
4325 int lower=0, upper=NORMALIZATION_TABLE_LENGTH-1;
4326 nkf_char c = (*g)(f);
4328 if (c == EOF || c > 0xFF || (c & 0xc0) == 0x80) return c;
4330 nkf_buf_push(buf, c);
4332 while (lower <= upper) {
4333 int mid = (lower+upper) / 2;
4335 array = normalization_table[mid].nfd;
4336 for (len=0; len < NORMALIZATION_TABLE_NFD_LENGTH && array[len]; len++) {
4337 if (len >= nkf_buf_length(buf)) {
4341 lower = 1, upper = 0;
4344 nkf_buf_push(buf, c);
4346 if (array[len] != nkf_buf_at(buf, len)) {
4347 if (array[len] < nkf_buf_at(buf, len)) lower = mid + 1;
4348 else upper = mid - 1;
4355 array = normalization_table[mid].nfc;
4357 for (i=0; i < NORMALIZATION_TABLE_NFC_LENGTH && array[i]; i++)
4358 nkf_buf_push(buf, array[i]);
4362 } while (lower <= upper);
4364 while (nkf_buf_length(buf) > 1) (*u)(nkf_buf_pop(buf), f);
4365 c = nkf_buf_pop(buf);
4371 nfc_ungetc(nkf_char c, FILE *f)
4373 return (*i_nfc_ungetc)(c, f);
4375 #endif /* UNICODE_NORMALIZATION */
4379 base64decode(nkf_char c)
4384 i = c - 'A'; /* A..Z 0-25 */
4385 } else if (c == '_') {
4386 i = '?' /* 63 */ ; /* _ 63 */
4388 i = c - 'G' /* - 'a' + 26 */ ; /* a..z 26-51 */
4390 } else if (c > '/') {
4391 i = c - '0' + '4' /* - '0' + 52 */ ; /* 0..9 52-61 */
4392 } else if (c == '+' || c == '-') {
4393 i = '>' /* 62 */ ; /* + and - 62 */
4395 i = '?' /* 63 */ ; /* / 63 */
4403 nkf_char c1, c2, c3, c4, cc;
4404 nkf_char t1, t2, t3, t4, mode, exit_mode;
4405 nkf_char lwsp_count;
4408 nkf_char lwsp_size = 128;
4410 if (mime_input_state.top != mime_input_state.last) { /* Something is in FIFO */
4411 return mime_input_buf(mime_input_state.top++);
4413 if (mime_decode_mode==1 ||mime_decode_mode==FALSE) {
4414 mime_decode_mode=FALSE;
4415 unswitch_mime_getc();
4416 return (*i_getc)(f);
4419 if (mimebuf_f == FIXED_MIME)
4420 exit_mode = mime_decode_mode;
4423 if (mime_decode_mode == 'Q') {
4424 if ((c1 = (*i_mgetc)(f)) == EOF) return (EOF);
4426 if (c1=='_' && mimebuf_f != FIXED_MIME) return SP;
4427 if (c1<=SP || DEL<=c1) {
4428 mime_decode_mode = exit_mode; /* prepare for quit */
4431 if (c1!='=' && (c1!='?' || mimebuf_f == FIXED_MIME)) {
4435 mime_decode_mode = exit_mode; /* prepare for quit */
4436 if ((c2 = (*i_mgetc)(f)) == EOF) return (EOF);
4437 if (c1=='?'&&c2=='=' && mimebuf_f != FIXED_MIME) {
4438 /* end Q encoding */
4439 input_mode = exit_mode;
4441 lwsp_buf = nkf_xmalloc((lwsp_size+5)*sizeof(char));
4442 while ((c1=(*i_getc)(f))!=EOF) {
4447 if ((c1=(*i_getc)(f))!=EOF && nkf_isblank(c1)) {
4455 if ((c1=(*i_getc)(f))!=EOF && c1 == LF) {
4456 if ((c1=(*i_getc)(f))!=EOF && nkf_isblank(c1)) {
4471 lwsp_buf[lwsp_count] = (unsigned char)c1;
4472 if (lwsp_count++>lwsp_size){
4474 lwsp_buf_new = nkf_xrealloc(lwsp_buf, (lwsp_size+5)*sizeof(char));
4475 lwsp_buf = lwsp_buf_new;
4481 if (lwsp_count > 0 && (c1 != '=' || (lwsp_buf[lwsp_count-1] != SP && lwsp_buf[lwsp_count-1] != TAB))) {
4483 for(lwsp_count--;lwsp_count>0;lwsp_count--)
4484 i_ungetc(lwsp_buf[lwsp_count],f);
4487 nkf_xfree(lwsp_buf);
4490 if (c1=='='&&c2<SP) { /* this is soft wrap */
4491 while((c1 = (*i_mgetc)(f)) <=SP) {
4492 if (c1 == EOF) return (EOF);
4494 mime_decode_mode = 'Q'; /* still in MIME */
4495 goto restart_mime_q;
4498 mime_decode_mode = 'Q'; /* still in MIME */
4502 if ((c3 = (*i_mgetc)(f)) == EOF) return (EOF);
4503 if (c2<=SP) return c2;
4504 mime_decode_mode = 'Q'; /* still in MIME */
4505 return ((hex2bin(c2)<<4) + hex2bin(c3));
4508 if (mime_decode_mode != 'B') {
4509 mime_decode_mode = FALSE;
4510 return (*i_mgetc)(f);
4514 /* Base64 encoding */
4516 MIME allows line break in the middle of
4517 Base64, but we are very pessimistic in decoding
4518 in unbuf mode because MIME encoded code may broken by
4519 less or editor's control sequence (such as ESC-[-K in unbuffered
4520 mode. ignore incomplete MIME.
4522 mode = mime_decode_mode;
4523 mime_decode_mode = exit_mode; /* prepare for quit */
4525 while ((c1 = (*i_mgetc)(f))<=SP) {
4530 if ((c2 = (*i_mgetc)(f))<=SP) {
4533 if (mime_f != STRICT_MIME) goto mime_c2_retry;
4534 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
4537 if ((c1 == '?') && (c2 == '=')) {
4540 lwsp_buf = nkf_xmalloc((lwsp_size+5)*sizeof(char));
4541 while ((c1=(*i_getc)(f))!=EOF) {
4546 if ((c1=(*i_getc)(f))!=EOF && nkf_isblank(c1)) {
4554 if ((c1=(*i_getc)(f))!=EOF) {
4558 } else if ((c1=(*i_getc)(f))!=EOF && nkf_isblank(c1)) {
4573 lwsp_buf[lwsp_count] = (unsigned char)c1;
4574 if (lwsp_count++>lwsp_size){
4576 lwsp_buf_new = nkf_xrealloc(lwsp_buf, (lwsp_size+5)*sizeof(char));
4577 lwsp_buf = lwsp_buf_new;
4583 if (lwsp_count > 0 && (c1 != '=' || (lwsp_buf[lwsp_count-1] != SP && lwsp_buf[lwsp_count-1] != TAB))) {
4585 for(lwsp_count--;lwsp_count>0;lwsp_count--)
4586 i_ungetc(lwsp_buf[lwsp_count],f);
4589 nkf_xfree(lwsp_buf);
4593 if ((c3 = (*i_mgetc)(f))<=SP) {
4596 if (mime_f != STRICT_MIME) goto mime_c3_retry;
4597 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
4601 if ((c4 = (*i_mgetc)(f))<=SP) {
4604 if (mime_f != STRICT_MIME) goto mime_c4_retry;
4605 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
4609 mime_decode_mode = mode; /* still in MIME sigh... */
4611 /* BASE 64 decoding */
4613 t1 = 0x3f & base64decode(c1);
4614 t2 = 0x3f & base64decode(c2);
4615 t3 = 0x3f & base64decode(c3);
4616 t4 = 0x3f & base64decode(c4);
4617 cc = ((t1 << 2) & 0x0fc) | ((t2 >> 4) & 0x03);
4619 mime_input_buf(mime_input_state.last++) = (unsigned char)cc;
4620 cc = ((t2 << 4) & 0x0f0) | ((t3 >> 2) & 0x0f);
4622 mime_input_buf(mime_input_state.last++) = (unsigned char)cc;
4623 cc = ((t3 << 6) & 0x0c0) | (t4 & 0x3f);
4625 mime_input_buf(mime_input_state.last++) = (unsigned char)cc;
4630 return mime_input_buf(mime_input_state.top++);
4633 static const char basis_64[] =
4634 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
4636 #define MIMEOUT_BUF_LENGTH 74
4638 unsigned char buf[MIMEOUT_BUF_LENGTH+1];
4642 /*nkf_char mime_lastchar2, mime_lastchar1;*/
4645 open_mime(nkf_char mode)
4647 const unsigned char *p;
4650 p = mime_pattern[0];
4651 for(i=0;mime_pattern[i];i++) {
4652 if (mode == mime_encode[i]) {
4653 p = mime_pattern[i];
4657 mimeout_mode = mime_encode_method[i];
4659 if (base64_count>45) {
4660 if (mimeout_state.count>0 && nkf_isblank(mimeout_state.buf[i])){
4661 (*o_mputc)(mimeout_state.buf[i]);
4664 put_newline(o_mputc);
4667 if (mimeout_state.count>0 && nkf_isspace(mimeout_state.buf[i])) {
4671 for (;i<mimeout_state.count;i++) {
4672 if (nkf_isspace(mimeout_state.buf[i])) {
4673 (*o_mputc)(mimeout_state.buf[i]);
4683 j = mimeout_state.count;
4684 mimeout_state.count = 0;
4686 mime_putc(mimeout_state.buf[i]);
4691 mime_prechar(nkf_char c2, nkf_char c1)
4693 if (mimeout_mode > 0){
4695 if (base64_count + mimeout_state.count/3*4> 73){
4696 (*o_base64conv)(EOF,0);
4697 oconv_newline(o_base64conv);
4698 (*o_base64conv)(0,SP);
4702 if ((c2 != 0 || c1 > DEL) && base64_count + mimeout_state.count/3*4> 66) {
4703 (*o_base64conv)(EOF,0);
4704 oconv_newline(o_base64conv);
4705 (*o_base64conv)(0,SP);
4711 if (c2 != EOF && base64_count + mimeout_state.count/3*4> 60) {
4712 mimeout_mode = (output_mode==ASCII ||output_mode == ISO_8859_1) ? 'Q' : 'B';
4713 open_mime(output_mode);
4714 (*o_base64conv)(EOF,0);
4715 oconv_newline(o_base64conv);
4716 (*o_base64conv)(0,SP);
4735 switch(mimeout_mode) {
4740 (*o_mputc)(basis_64[((nkf_state->mimeout_state & 0x3)<< 4)]);
4746 (*o_mputc)(basis_64[((nkf_state->mimeout_state & 0xF) << 2)]);
4751 if (mimeout_mode > 0) {
4752 if (mimeout_f!=FIXED_MIME) {
4754 } else if (mimeout_mode != 'Q')
4760 mimeout_addchar(nkf_char c)
4762 switch(mimeout_mode) {
4767 } else if(!nkf_isalnum(c)) {
4769 (*o_mputc)(bin2hex(((c>>4)&0xf)));
4770 (*o_mputc)(bin2hex((c&0xf)));
4778 nkf_state->mimeout_state=c;
4779 (*o_mputc)(basis_64[c>>2]);
4784 (*o_mputc)(basis_64[((nkf_state->mimeout_state & 0x3)<< 4) | ((c & 0xF0) >> 4)]);
4785 nkf_state->mimeout_state=c;
4790 (*o_mputc)(basis_64[((nkf_state->mimeout_state & 0xF) << 2) | ((c & 0xC0) >>6)]);
4791 (*o_mputc)(basis_64[c & 0x3F]);
4803 mime_putc(nkf_char c)
4808 if (mimeout_f == FIXED_MIME){
4809 if (mimeout_mode == 'Q'){
4810 if (base64_count > 71){
4811 if (c!=CR && c!=LF) {
4813 put_newline(o_mputc);
4818 if (base64_count > 71){
4820 put_newline(o_mputc);
4823 if (c == EOF) { /* c==EOF */
4827 if (c != EOF) { /* c==EOF */
4833 /* mimeout_f != FIXED_MIME */
4835 if (c == EOF) { /* c==EOF */
4836 if (mimeout_mode == -1 && mimeout_state.count > 1) open_mime(output_mode);
4837 j = mimeout_state.count;
4838 mimeout_state.count = 0;
4840 if (mimeout_mode > 0) {
4841 if (!nkf_isblank(mimeout_state.buf[j-1])) {
4843 if (nkf_isspace(mimeout_state.buf[i]) && base64_count < 71){
4846 mimeout_addchar(mimeout_state.buf[i]);
4850 mimeout_addchar(mimeout_state.buf[i]);
4854 mimeout_addchar(mimeout_state.buf[i]);
4860 mimeout_addchar(mimeout_state.buf[i]);
4866 if (mimeout_state.count > 0){
4867 lastchar = mimeout_state.buf[mimeout_state.count - 1];
4872 if (mimeout_mode=='Q') {
4873 if (c <= DEL && (output_mode==ASCII ||output_mode == ISO_8859_1)) {
4874 if (c == CR || c == LF) {
4879 } else if (c <= SP) {
4881 if (base64_count > 70) {
4882 put_newline(o_mputc);
4885 if (!nkf_isblank(c)) {
4890 if (base64_count > 70) {
4892 put_newline(o_mputc);
4895 open_mime(output_mode);
4897 if (!nkf_noescape_mime(c)) {
4910 if (mimeout_mode <= 0) {
4911 if (c <= DEL && (output_mode==ASCII || output_mode == ISO_8859_1 ||
4912 output_mode == UTF_8)) {
4913 if (nkf_isspace(c)) {
4915 if (mimeout_mode == -1) {
4918 if (c==CR || c==LF) {
4920 open_mime(output_mode);
4926 for (i=0;i<mimeout_state.count;i++) {
4927 (*o_mputc)(mimeout_state.buf[i]);
4928 if (mimeout_state.buf[i] == CR || mimeout_state.buf[i] == LF){
4939 mimeout_state.buf[0] = (char)c;
4940 mimeout_state.count = 1;
4942 if (base64_count > 1
4943 && base64_count + mimeout_state.count > 76
4944 && mimeout_state.buf[0] != CR && mimeout_state.buf[0] != LF){
4945 static const char *str = "boundary=\"";
4946 static int len = 10;
4949 for (; i < mimeout_state.count - len; ++i) {
4950 if (!strncmp((char *)(mimeout_state.buf+i), str, len)) {
4956 if (i == 0 || i == mimeout_state.count - len) {
4957 put_newline(o_mputc);
4959 if (!nkf_isspace(mimeout_state.buf[0])){
4966 for (j = 0; j <= i; ++j) {
4967 (*o_mputc)(mimeout_state.buf[j]);
4969 put_newline(o_mputc);
4971 for (; j <= mimeout_state.count; ++j) {
4972 mimeout_state.buf[j - i] = mimeout_state.buf[j];
4974 mimeout_state.count -= i;
4977 mimeout_state.buf[mimeout_state.count++] = (char)c;
4978 if (mimeout_state.count>MIMEOUT_BUF_LENGTH) {
4979 open_mime(output_mode);
4984 if (lastchar==CR || lastchar == LF){
4985 for (i=0;i<mimeout_state.count;i++) {
4986 (*o_mputc)(mimeout_state.buf[i]);
4989 mimeout_state.count = 0;
4992 for (i=0;i<mimeout_state.count-1;i++) {
4993 (*o_mputc)(mimeout_state.buf[i]);
4996 mimeout_state.buf[0] = SP;
4997 mimeout_state.count = 1;
4999 open_mime(output_mode);
5002 /* mimeout_mode == 'B', 1, 2 */
5003 if (c <= DEL && (output_mode==ASCII || output_mode == ISO_8859_1 ||
5004 output_mode == UTF_8)) {
5005 if (lastchar == CR || lastchar == LF){
5006 if (nkf_isblank(c)) {
5007 for (i=0;i<mimeout_state.count;i++) {
5008 mimeout_addchar(mimeout_state.buf[i]);
5010 mimeout_state.count = 0;
5013 for (i=0;i<mimeout_state.count;i++) {
5014 (*o_mputc)(mimeout_state.buf[i]);
5017 mimeout_state.count = 0;
5019 mimeout_state.buf[mimeout_state.count++] = (char)c;
5022 if (nkf_isspace(c)) {
5023 for (i=0;i<mimeout_state.count;i++) {
5024 if (SP<mimeout_state.buf[i] && mimeout_state.buf[i]<DEL) {
5026 for (i=0;i<mimeout_state.count;i++) {
5027 (*o_mputc)(mimeout_state.buf[i]);
5030 mimeout_state.count = 0;
5033 mimeout_state.buf[mimeout_state.count++] = (char)c;
5034 if (mimeout_state.count>MIMEOUT_BUF_LENGTH) {
5036 for (i=0;i<mimeout_state.count;i++) {
5037 (*o_mputc)(mimeout_state.buf[i]);
5040 mimeout_state.count = 0;
5044 if (mimeout_state.count>0 && SP<c && c!='=') {
5045 mimeout_state.buf[mimeout_state.count++] = (char)c;
5046 if (mimeout_state.count>MIMEOUT_BUF_LENGTH) {
5047 j = mimeout_state.count;
5048 mimeout_state.count = 0;
5050 mimeout_addchar(mimeout_state.buf[i]);
5057 if (mimeout_state.count>0) {
5058 j = mimeout_state.count;
5059 mimeout_state.count = 0;
5061 if (mimeout_state.buf[i]==CR || mimeout_state.buf[i]==LF)
5063 mimeout_addchar(mimeout_state.buf[i]);
5069 (*o_mputc)(mimeout_state.buf[i]);
5071 open_mime(output_mode);
5078 base64_conv(nkf_char c2, nkf_char c1)
5080 mime_prechar(c2, c1);
5081 (*o_base64conv)(c2,c1);
5085 typedef struct nkf_iconv_t {
5088 size_t input_buffer_size;
5089 char *output_buffer;
5090 size_t output_buffer_size;
5094 nkf_iconv_new(char *tocode, char *fromcode)
5096 nkf_iconv_t converter;
5098 converter->input_buffer_size = IOBUF_SIZE;
5099 converter->input_buffer = nkf_xmalloc(converter->input_buffer_size);
5100 converter->output_buffer_size = IOBUF_SIZE * 2;
5101 converter->output_buffer = nkf_xmalloc(converter->output_buffer_size);
5102 converter->cd = iconv_open(tocode, fromcode);
5103 if (converter->cd == (iconv_t)-1)
5107 perror(fprintf("iconv doesn't support %s to %s conversion.", fromcode, tocode));
5110 perror("can't iconv_open");
5116 nkf_iconv_convert(nkf_iconv_t *converter, FILE *input)
5118 size_t invalid = (size_t)0;
5119 char *input_buffer = converter->input_buffer;
5120 size_t input_length = (size_t)0;
5121 char *output_buffer = converter->output_buffer;
5122 size_t output_length = converter->output_buffer_size;
5127 while ((c = (*i_getc)(f)) != EOF) {
5128 input_buffer[input_length++] = c;
5129 if (input_length < converter->input_buffer_size) break;
5133 size_t ret = iconv(converter->cd, &input_buffer, &input_length, &output_buffer, &output_length);
5134 while (output_length-- > 0) {
5135 (*o_putc)(output_buffer[converter->output_buffer_size-output_length]);
5137 if (ret == (size_t) - 1) {
5140 if (input_buffer != converter->input_buffer)
5141 memmove(converter->input_buffer, input_buffer, input_length);
5144 converter->output_buffer_size *= 2;
5145 output_buffer = realloc(converter->outbuf, converter->output_buffer_size);
5146 if (output_buffer == NULL) {
5147 perror("can't realloc");
5150 converter->output_buffer = output_buffer;
5153 perror("can't iconv");
5166 nkf_iconv_close(nkf_iconv_t *convert)
5168 nkf_xfree(converter->inbuf);
5169 nkf_xfree(converter->outbuf);
5170 iconv_close(converter->cd);
5179 struct input_code *p = input_code_list;
5191 mime_f = MIME_DECODE_DEFAULT;
5192 mime_decode_f = FALSE;
5197 x0201_f = NKF_UNSPECIFIED;
5198 iso2022jp_f = FALSE;
5199 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
5200 ms_ucs_map_f = UCS_MAP_ASCII;
5202 #ifdef UTF8_INPUT_ENABLE
5203 no_cp932ext_f = FALSE;
5204 no_best_fit_chars_f = FALSE;
5205 encode_fallback = NULL;
5206 unicode_subchar = '?';
5207 input_endian = ENDIAN_BIG;
5209 #ifdef UTF8_OUTPUT_ENABLE
5210 output_bom_f = FALSE;
5211 output_endian = ENDIAN_BIG;
5213 #ifdef UNICODE_NORMALIZATION
5229 #ifdef SHIFTJIS_CP932
5239 for (i = 0; i < 256; i++){
5240 prefix_table[i] = 0;
5244 mimeout_state.count = 0;
5249 fold_preserve_f = FALSE;
5252 kanji_intro = DEFAULT_J;
5253 ascii_intro = DEFAULT_R;
5254 fold_margin = FOLD_MARGIN;
5255 o_zconv = no_connection;
5256 o_fconv = no_connection;
5257 o_eol_conv = no_connection;
5258 o_rot_conv = no_connection;
5259 o_hira_conv = no_connection;
5260 o_base64conv = no_connection;
5261 o_iso2022jp_check_conv = no_connection;
5264 i_ungetc = std_ungetc;
5266 i_bungetc = std_ungetc;
5269 i_mungetc = std_ungetc;
5270 i_mgetc_buf = std_getc;
5271 i_mungetc_buf = std_ungetc;
5272 output_mode = ASCII;
5274 mime_decode_mode = FALSE;
5280 z_prev2=0,z_prev1=0;
5282 iconv_for_check = 0;
5284 input_codename = NULL;
5285 input_encoding = NULL;
5286 output_encoding = NULL;
5294 module_connection(void)
5296 if (input_encoding) set_input_encoding(input_encoding);
5297 if (!output_encoding) {
5298 output_encoding = nkf_default_encoding();
5300 if (!output_encoding) {
5301 if (noout_f || guess_f) output_encoding = nkf_enc_from_index(ISO_2022_JP);
5304 set_output_encoding(output_encoding);
5305 oconv = nkf_enc_to_oconv(output_encoding);
5307 if (nkf_enc_unicode_p(output_encoding))
5308 output_mode = UTF_8;
5310 if (x0201_f == NKF_UNSPECIFIED) {
5311 x0201_f = X0201_DEFAULT;
5314 /* replace continucation module, from output side */
5316 /* output redicrection */
5318 if (noout_f || guess_f){
5325 if (mimeout_f == TRUE) {
5326 o_base64conv = oconv; oconv = base64_conv;
5328 /* base64_count = 0; */
5331 if (eolmode_f || guess_f) {
5332 o_eol_conv = oconv; oconv = eol_conv;
5335 o_rot_conv = oconv; oconv = rot_conv;
5338 o_iso2022jp_check_conv = oconv; oconv = iso2022jp_check_conv;
5341 o_hira_conv = oconv; oconv = hira_conv;
5344 o_fconv = oconv; oconv = fold_conv;
5347 if (alpha_f || x0201_f) {
5348 o_zconv = oconv; oconv = z_conv;
5352 i_ungetc = std_ungetc;
5353 /* input redicrection */
5356 i_cgetc = i_getc; i_getc = cap_getc;
5357 i_cungetc = i_ungetc; i_ungetc= cap_ungetc;
5360 i_ugetc = i_getc; i_getc = url_getc;
5361 i_uungetc = i_ungetc; i_ungetc= url_ungetc;
5364 #ifdef NUMCHAR_OPTION
5366 i_ngetc = i_getc; i_getc = numchar_getc;
5367 i_nungetc = i_ungetc; i_ungetc= numchar_ungetc;
5370 #ifdef UNICODE_NORMALIZATION
5372 i_nfc_getc = i_getc; i_getc = nfc_getc;
5373 i_nfc_ungetc = i_ungetc; i_ungetc= nfc_ungetc;
5376 if (mime_f && mimebuf_f==FIXED_MIME) {
5377 i_mgetc = i_getc; i_getc = mime_getc;
5378 i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
5381 i_bgetc = i_getc; i_getc = broken_getc;
5382 i_bungetc = i_ungetc; i_ungetc = broken_ungetc;
5384 if (input_encoding) {
5385 set_iconv(-TRUE, nkf_enc_to_iconv(input_encoding));
5387 set_iconv(FALSE, e_iconv);
5391 struct input_code *p = input_code_list;
5400 Conversion main loop. Code detection only.
5403 #if !defined(PERL_XS) && !defined(WIN32DLL)
5410 module_connection();
5411 while ((c = (*i_getc)(f)) != EOF)
5418 #define NEXT continue /* no output, get next */
5419 #define SKIP c2=0;continue /* no output, get next */
5420 #define MORE c2=c1;continue /* need one more byte */
5421 #define SEND (void)0 /* output c1 and c2, get next */
5422 #define LAST break /* end of loop, go closing */
5423 #define set_input_mode(mode) do { \
5424 input_mode = mode; \
5426 set_input_codename("ISO-2022-JP"); \
5427 debug("ISO-2022-JP"); \
5431 kanji_convert(FILE *f)
5433 nkf_char c1=0, c2=0, c3=0, c4=0;
5434 int shift_mode = 0; /* 0, 1, 2, 3 */
5436 int is_8bit = FALSE;
5438 if (input_encoding && !nkf_enc_asciicompat(input_encoding)) {
5443 output_mode = ASCII;
5445 if (module_connection() < 0) {
5446 #if !defined(PERL_XS) && !defined(WIN32DLL)
5447 fprintf(stderr, "no output encoding given\n");
5453 #ifdef UTF8_INPUT_ENABLE
5454 if(iconv == w_iconv32){
5455 while ((c1 = (*i_getc)(f)) != EOF &&
5456 (c2 = (*i_getc)(f)) != EOF &&
5457 (c3 = (*i_getc)(f)) != EOF &&
5458 (c4 = (*i_getc)(f)) != EOF) {
5459 nkf_iconv_utf_32(c1, c2, c3, c4);
5463 else if (iconv == w_iconv16) {
5464 while ((c1 = (*i_getc)(f)) != EOF &&
5465 (c2 = (*i_getc)(f)) != EOF) {
5466 if (nkf_iconv_utf_16(c1, c2, 0, 0) == NKF_ICONV_NEED_TWO_MORE_BYTES &&
5467 (c3 = (*i_getc)(f)) != EOF &&
5468 (c4 = (*i_getc)(f)) != EOF) {
5469 nkf_iconv_utf_16(c1, c2, c3, c4);
5476 while ((c1 = (*i_getc)(f)) != EOF) {
5477 #ifdef INPUT_CODE_FIX
5478 if (!input_encoding)
5484 /* in case of 8th bit is on */
5485 if (!estab_f&&!mime_decode_mode) {
5486 /* in case of not established yet */
5487 /* It is still ambiguious */
5488 if (h_conv(f, c2, c1)==EOF) {
5496 /* in case of already established */
5498 /* ignore bogus code */
5506 /* 2nd byte of 7 bit code or SJIS */
5510 else if (nkf_char_unicode_p(c1)) {
5516 if (input_mode == JIS_X_0208 && DEL <= c1 && c1 < 0x92) {
5519 }else if (input_codename && input_codename[0] == 'I' &&
5520 0xA1 <= c1 && c1 <= 0xDF) {
5521 /* JIS X 0201 Katakana in 8bit JIS */
5522 c2 = JIS_X_0201_1976_K;
5525 } else if (c1 > DEL) {
5527 if (!estab_f && !iso8859_f) {
5528 /* not established yet */
5530 } else { /* estab_f==TRUE */
5536 else if ((iconv == s_iconv && 0xA0 <= c1 && c1 <= 0xDF) ||
5537 (ms_ucs_map_f == UCS_MAP_CP10001 && (c1 == 0xFD || c1 == 0xFE))) {
5539 c2 = JIS_X_0201_1976_K;
5544 /* already established */
5548 } else if (SP < c1 && c1 < DEL) {
5549 /* in case of Roman characters */
5551 /* output 1 shifted byte */
5555 } else if (nkf_byte_jisx0201_katakana_p(c1)){
5556 /* output 1 shifted byte */
5557 c2 = JIS_X_0201_1976_K;
5560 /* look like bogus code */
5563 } else if (input_mode == JIS_X_0208 || input_mode == JIS_X_0212 ||
5564 input_mode == JIS_X_0213_1 || input_mode == JIS_X_0213_2) {
5565 /* in case of Kanji shifted */
5567 } else if (c1 == '=' && mime_f && !mime_decode_mode) {
5568 /* Check MIME code */
5569 if ((c1 = (*i_getc)(f)) == EOF) {
5572 } else if (c1 == '?') {
5573 /* =? is mime conversion start sequence */
5574 if(mime_f == STRICT_MIME) {
5575 /* check in real detail */
5576 if (mime_begin_strict(f) == EOF)
5579 } else if (mime_begin(f) == EOF)
5588 /* normal ASCII code */
5591 } else if (c1 == SI && (!is_8bit || mime_decode_mode)) {
5594 } else if (c1 == SO && (!is_8bit || mime_decode_mode)) {
5597 } else if (c1 == ESC && (!is_8bit || mime_decode_mode)) {
5598 if ((c1 = (*i_getc)(f)) == EOF) {
5602 else if (c1 == '&') {
5604 if ((c1 = (*i_getc)(f)) == EOF) {
5610 else if (c1 == '$') {
5612 if ((c1 = (*i_getc)(f)) == EOF) {
5613 /* don't send bogus code
5615 (*oconv)(0, '$'); */
5617 } else if (c1 == '@' || c1 == 'B') {
5619 set_input_mode(JIS_X_0208);
5621 } else if (c1 == '(') {
5623 if ((c1 = (*i_getc)(f)) == EOF) {
5624 /* don't send bogus code
5630 } else if (c1 == '@'|| c1 == 'B') {
5632 set_input_mode(JIS_X_0208);
5635 } else if (c1 == 'D'){
5636 set_input_mode(JIS_X_0212);
5638 #endif /* X0212_ENABLE */
5639 } else if (c1 == 'O' || c1 == 'Q'){
5640 set_input_mode(JIS_X_0213_1);
5642 } else if (c1 == 'P'){
5643 set_input_mode(JIS_X_0213_2);
5646 /* could be some special code */
5653 } else if (broken_f&0x2) {
5654 /* accept any ESC-(-x as broken code ... */
5655 input_mode = JIS_X_0208;
5664 } else if (c1 == '(') {
5666 if ((c1 = (*i_getc)(f)) == EOF) {
5667 /* don't send bogus code
5669 (*oconv)(0, '('); */
5672 else if (c1 == 'I') {
5673 /* JIS X 0201 Katakana */
5674 set_input_mode(JIS_X_0201_1976_K);
5677 else if (c1 == 'B' || c1 == 'J' || c1 == 'H') {
5678 /* ISO-646IRV:1983 or JIS X 0201 Roman or JUNET */
5679 set_input_mode(ASCII);
5682 else if (broken_f&0x2) {
5683 set_input_mode(ASCII);
5692 else if (c1 == '.') {
5694 if ((c1 = (*i_getc)(f)) == EOF) {
5697 else if (c1 == 'A') {
5708 else if (c1 == 'N') {
5711 if (g2 == ISO_8859_1) {
5726 } else if (c1 == ESC && iconv == s_iconv) {
5727 /* ESC in Shift_JIS */
5728 if ((c1 = (*i_getc)(f)) == EOF) {
5731 } else if (c1 == '$') {
5733 if ((c1 = (*i_getc)(f)) == EOF) {
5735 } else if (('E' <= c1 && c1 <= 'G') ||
5736 ('O' <= c1 && c1 <= 'Q')) {
5744 static const nkf_char jphone_emoji_first_table[7] =
5745 {0xE1E0, 0xDFE0, 0xE2E0, 0xE3E0, 0xE4E0, 0xDFE0, 0xE0E0};
5746 c3 = nkf_char_unicode_new(jphone_emoji_first_table[c1 % 7]);
5747 if ((c1 = (*i_getc)(f)) == EOF) LAST;
5748 while (SP <= c1 && c1 <= 'z') {
5749 (*oconv)(0, c1 + c3);
5750 if ((c1 = (*i_getc)(f)) == EOF) LAST;
5765 } else if (c1 == LF || c1 == CR) {
5767 input_mode = ASCII; set_iconv(FALSE, 0);
5769 } else if (mime_decode_f && !mime_decode_mode){
5771 if ((c1=(*i_getc)(f))!=EOF && c1 == SP) {
5779 } else { /* if (c1 == CR)*/
5780 if ((c1=(*i_getc)(f))!=EOF) {
5784 } else if (c1 == LF && (c1=(*i_getc)(f))!=EOF && c1 == SP) {
5804 switch ((*iconv)(c2, c1, 0)) { /* can be EUC / SJIS / UTF-8 */
5807 if ((c3 = (*i_getc)(f)) != EOF) {
5810 if ((c4 = (*i_getc)(f)) != EOF) {
5812 (*iconv)(c2, c1, c3|c4);
5817 /* 3 bytes EUC or UTF-8 */
5818 if ((c3 = (*i_getc)(f)) != EOF) {
5820 (*iconv)(c2, c1, c3);
5828 0x7F <= c2 && c2 <= 0x92 &&
5829 0x21 <= c1 && c1 <= 0x7E) {
5831 c1 = nkf_char_unicode_new((c2 - 0x7F) * 94 + c1 - 0x21 + 0xE000);
5834 (*oconv)(c2, c1); /* this is JIS, not SJIS/EUC case */
5838 (*oconv)(PREFIX_EUCG3 | c2, c1);
5840 #endif /* X0212_ENABLE */
5842 (*oconv)(PREFIX_EUCG3 | c2, c1);
5845 (*oconv)(input_mode, c1); /* other special case */
5851 /* goto next_word */
5856 (*iconv)(EOF, 0, 0);
5857 if (!input_codename)
5860 struct input_code *p = input_code_list;
5861 struct input_code *result = p;
5863 if (p->score < result->score) result = p;
5866 set_input_codename(result->name);
5868 debug(result->name);
5876 * int options(unsigned char *cp)
5883 options(unsigned char *cp)
5887 unsigned char *cp_back = NULL;
5892 while(*cp && *cp++!='-');
5893 while (*cp || cp_back) {
5901 case '-': /* literal options */
5902 if (!*cp || *cp == SP) { /* ignore the rest of arguments */
5906 for (i=0;i<(int)(sizeof(long_option)/sizeof(long_option[0]));i++) {
5907 p = (unsigned char *)long_option[i].name;
5908 for (j=0;*p && *p != '=' && *p == cp[j];p++, j++);
5909 if (*p == cp[j] || cp[j] == SP){
5916 #if !defined(PERL_XS) && !defined(WIN32DLL)
5917 fprintf(stderr, "unknown long option: --%s\n", cp);
5921 while(*cp && *cp != SP && cp++);
5922 if (long_option[i].alias[0]){
5924 cp = (unsigned char *)long_option[i].alias;
5927 if (strcmp(long_option[i].name, "help") == 0){
5932 if (strcmp(long_option[i].name, "ic=") == 0){
5933 enc = nkf_enc_find((char *)p);
5935 input_encoding = enc;
5938 if (strcmp(long_option[i].name, "oc=") == 0){
5939 enc = nkf_enc_find((char *)p);
5940 /* if (enc <= 0) continue; */
5942 output_encoding = enc;
5945 if (strcmp(long_option[i].name, "guess=") == 0){
5946 if (p[0] == '0' || p[0] == '1') {
5954 if (strcmp(long_option[i].name, "overwrite") == 0){
5957 preserve_time_f = TRUE;
5960 if (strcmp(long_option[i].name, "overwrite=") == 0){
5963 preserve_time_f = TRUE;
5965 backup_suffix = (char *)p;
5968 if (strcmp(long_option[i].name, "in-place") == 0){
5971 preserve_time_f = FALSE;
5974 if (strcmp(long_option[i].name, "in-place=") == 0){
5977 preserve_time_f = FALSE;
5979 backup_suffix = (char *)p;
5984 if (strcmp(long_option[i].name, "cap-input") == 0){
5988 if (strcmp(long_option[i].name, "url-input") == 0){
5993 #ifdef NUMCHAR_OPTION
5994 if (strcmp(long_option[i].name, "numchar-input") == 0){
6000 if (strcmp(long_option[i].name, "no-output") == 0){
6004 if (strcmp(long_option[i].name, "debug") == 0){
6009 if (strcmp(long_option[i].name, "cp932") == 0){
6010 #ifdef SHIFTJIS_CP932
6014 #ifdef UTF8_OUTPUT_ENABLE
6015 ms_ucs_map_f = UCS_MAP_CP932;
6019 if (strcmp(long_option[i].name, "no-cp932") == 0){
6020 #ifdef SHIFTJIS_CP932
6024 #ifdef UTF8_OUTPUT_ENABLE
6025 ms_ucs_map_f = UCS_MAP_ASCII;
6029 #ifdef SHIFTJIS_CP932
6030 if (strcmp(long_option[i].name, "cp932inv") == 0){
6037 if (strcmp(long_option[i].name, "x0212") == 0){
6044 if (strcmp(long_option[i].name, "exec-in") == 0){
6048 if (strcmp(long_option[i].name, "exec-out") == 0){
6053 #if defined(UTF8_OUTPUT_ENABLE) && defined(UTF8_INPUT_ENABLE)
6054 if (strcmp(long_option[i].name, "no-cp932ext") == 0){
6055 no_cp932ext_f = TRUE;
6058 if (strcmp(long_option[i].name, "no-best-fit-chars") == 0){
6059 no_best_fit_chars_f = TRUE;
6062 if (strcmp(long_option[i].name, "fb-skip") == 0){
6063 encode_fallback = NULL;
6066 if (strcmp(long_option[i].name, "fb-html") == 0){
6067 encode_fallback = encode_fallback_html;
6070 if (strcmp(long_option[i].name, "fb-xml") == 0){
6071 encode_fallback = encode_fallback_xml;
6074 if (strcmp(long_option[i].name, "fb-java") == 0){
6075 encode_fallback = encode_fallback_java;
6078 if (strcmp(long_option[i].name, "fb-perl") == 0){
6079 encode_fallback = encode_fallback_perl;
6082 if (strcmp(long_option[i].name, "fb-subchar") == 0){
6083 encode_fallback = encode_fallback_subchar;
6086 if (strcmp(long_option[i].name, "fb-subchar=") == 0){
6087 encode_fallback = encode_fallback_subchar;
6088 unicode_subchar = 0;
6090 /* decimal number */
6091 for (i = 0; i < 7 && nkf_isdigit(p[i]); i++){
6092 unicode_subchar *= 10;
6093 unicode_subchar += hex2bin(p[i]);
6095 }else if(p[1] == 'x' || p[1] == 'X'){
6096 /* hexadecimal number */
6097 for (i = 2; i < 8 && nkf_isxdigit(p[i]); i++){
6098 unicode_subchar <<= 4;
6099 unicode_subchar |= hex2bin(p[i]);
6103 for (i = 1; i < 8 && nkf_isoctal(p[i]); i++){
6104 unicode_subchar *= 8;
6105 unicode_subchar += hex2bin(p[i]);
6108 w16e_conv(unicode_subchar, &i, &j);
6109 unicode_subchar = i<<8 | j;
6113 #ifdef UTF8_OUTPUT_ENABLE
6114 if (strcmp(long_option[i].name, "ms-ucs-map") == 0){
6115 ms_ucs_map_f = UCS_MAP_MS;
6119 #ifdef UNICODE_NORMALIZATION
6120 if (strcmp(long_option[i].name, "utf8mac-input") == 0){
6125 if (strcmp(long_option[i].name, "prefix=") == 0){
6126 if (nkf_isgraph(p[0])){
6127 for (i = 1; nkf_isgraph(p[i]); i++){
6128 prefix_table[p[i]] = p[0];
6133 #if !defined(PERL_XS) && !defined(WIN32DLL)
6134 fprintf(stderr, "unsupported long option: --%s\n", long_option[i].name);
6139 case 'b': /* buffered mode */
6142 case 'u': /* non bufferd mode */
6145 case 't': /* transparent mode */
6150 } else if (*cp=='2') {
6154 * nkf -t2MB hoge.bin | nkf -t2mB | diff -s - hoge.bin
6162 case 'j': /* JIS output */
6164 output_encoding = nkf_enc_from_index(ISO_2022_JP);
6166 case 'e': /* AT&T EUC output */
6167 output_encoding = nkf_enc_from_index(EUCJP_NKF);
6169 case 's': /* SJIS output */
6170 output_encoding = nkf_enc_from_index(SHIFT_JIS);
6172 case 'l': /* ISO8859 Latin-1 support, no conversion */
6173 iso8859_f = TRUE; /* Only compatible with ISO-2022-JP */
6174 input_encoding = nkf_enc_from_index(ISO_8859_1);
6176 case 'i': /* Kanji IN ESC-$-@/B */
6177 if (*cp=='@'||*cp=='B')
6178 kanji_intro = *cp++;
6180 case 'o': /* ASCII IN ESC-(-J/B/H */
6181 /* ESC ( H was used in initial JUNET messages */
6182 if (*cp=='J'||*cp=='B'||*cp=='H')
6183 ascii_intro = *cp++;
6187 bit:1 katakana->hiragana
6188 bit:2 hiragana->katakana
6190 if ('9'>= *cp && *cp>='0')
6191 hira_f |= (*cp++ -'0');
6198 #if defined(MSDOS) || defined(__OS2__)
6205 show_configuration();
6213 #ifdef UTF8_OUTPUT_ENABLE
6214 case 'w': /* UTF-{8,16,32} output */
6219 output_encoding = nkf_enc_from_index(UTF_8N);
6221 output_bom_f = TRUE;
6222 output_encoding = nkf_enc_from_index(UTF_8_BOM);
6226 if ('1'== cp[0] && '6'==cp[1]) {
6229 } else if ('3'== cp[0] && '2'==cp[1]) {
6233 output_encoding = nkf_enc_from_index(UTF_8);
6238 output_endian = ENDIAN_LITTLE;
6239 output_bom_f = TRUE;
6240 } else if (cp[0] == 'B') {
6242 output_bom_f = TRUE;
6245 output_bom_f = FALSE;
6247 enc_idx = enc_idx == UTF_16
6248 ? (output_endian == ENDIAN_LITTLE ? UTF_16LE : UTF_16BE)
6249 : (output_endian == ENDIAN_LITTLE ? UTF_32LE : UTF_32BE);
6251 enc_idx = enc_idx == UTF_16
6252 ? (output_endian == ENDIAN_LITTLE ? UTF_16LE_BOM : UTF_16BE_BOM)
6253 : (output_endian == ENDIAN_LITTLE ? UTF_32LE_BOM : UTF_32BE_BOM);
6255 output_encoding = nkf_enc_from_index(enc_idx);
6259 #ifdef UTF8_INPUT_ENABLE
6260 case 'W': /* UTF input */
6263 input_encoding = nkf_enc_from_index(UTF_8);
6266 if ('1'== cp[0] && '6'==cp[1]) {
6268 input_endian = ENDIAN_BIG;
6270 } else if ('3'== cp[0] && '2'==cp[1]) {
6272 input_endian = ENDIAN_BIG;
6275 input_encoding = nkf_enc_from_index(UTF_8);
6280 input_endian = ENDIAN_LITTLE;
6281 } else if (cp[0] == 'B') {
6283 input_endian = ENDIAN_BIG;
6285 enc_idx = (enc_idx == UTF_16
6286 ? (input_endian == ENDIAN_LITTLE ? UTF_16LE : UTF_16BE)
6287 : (input_endian == ENDIAN_LITTLE ? UTF_32LE : UTF_32BE));
6288 input_encoding = nkf_enc_from_index(enc_idx);
6292 /* Input code assumption */
6293 case 'J': /* ISO-2022-JP input */
6294 input_encoding = nkf_enc_from_index(ISO_2022_JP);
6296 case 'E': /* EUC-JP input */
6297 input_encoding = nkf_enc_from_index(EUCJP_NKF);
6299 case 'S': /* Shift_JIS input */
6300 input_encoding = nkf_enc_from_index(SHIFT_JIS);
6302 case 'Z': /* Convert X0208 alphabet to asii */
6304 bit:0 Convert JIS X 0208 Alphabet to ASCII
6305 bit:1 Convert Kankaku to one space
6306 bit:2 Convert Kankaku to two spaces
6307 bit:3 Convert HTML Entity
6308 bit:4 Convert JIS X 0208 Katakana to JIS X 0201 Katakana
6310 while ('0'<= *cp && *cp <='4') {
6311 alpha_f |= 1 << (*cp++ - '0');
6315 case 'x': /* Convert X0201 kana to X0208 or X0201 Conversion */
6316 x0201_f = FALSE; /* No X0201->X0208 conversion */
6318 ESC-(-I in JIS, EUC, MS Kanji
6319 SI/SO in JIS, EUC, MS Kanji
6320 SS2 in EUC, JIS, not in MS Kanji
6321 MS Kanji (0xa0-0xdf)
6323 ESC-(-I in JIS (0x20-0x5f)
6324 SS2 in EUC (0xa0-0xdf)
6325 0xa0-0xd in MS Kanji (0xa0-0xdf)
6328 case 'X': /* Convert X0201 kana to X0208 */
6331 case 'F': /* prserve new lines */
6332 fold_preserve_f = TRUE;
6333 case 'f': /* folding -f60 or -f */
6336 while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
6338 fold_len += *cp++ - '0';
6340 if (!(0<fold_len && fold_len<BUFSIZ))
6341 fold_len = DEFAULT_FOLD;
6345 while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
6347 fold_margin += *cp++ - '0';
6351 case 'm': /* MIME support */
6352 /* mime_decode_f = TRUE; */ /* this has too large side effects... */
6353 if (*cp=='B'||*cp=='Q') {
6354 mime_decode_mode = *cp++;
6355 mimebuf_f = FIXED_MIME;
6356 } else if (*cp=='N') {
6357 mime_f = TRUE; cp++;
6358 } else if (*cp=='S') {
6359 mime_f = STRICT_MIME; cp++;
6360 } else if (*cp=='0') {
6361 mime_decode_f = FALSE;
6362 mime_f = FALSE; cp++;
6364 mime_f = STRICT_MIME;
6367 case 'M': /* MIME output */
6370 mimeout_f = FIXED_MIME; cp++;
6371 } else if (*cp=='Q') {
6373 mimeout_f = FIXED_MIME; cp++;
6378 case 'B': /* Broken JIS support */
6380 bit:1 allow any x on ESC-(-x or ESC-$-x
6381 bit:2 reset to ascii on NL
6383 if ('9'>= *cp && *cp>='0')
6384 broken_f |= 1<<(*cp++ -'0');
6389 case 'O':/* for Output file */
6393 case 'c':/* add cr code */
6396 case 'd':/* delete cr code */
6399 case 'I': /* ISO-2022-JP output */
6402 case 'L': /* line mode */
6403 if (*cp=='u') { /* unix */
6404 eolmode_f = LF; cp++;
6405 } else if (*cp=='m') { /* mac */
6406 eolmode_f = CR; cp++;
6407 } else if (*cp=='w') { /* windows */
6408 eolmode_f = CRLF; cp++;
6409 } else if (*cp=='0') { /* no conversion */
6410 eolmode_f = 0; cp++;
6415 if ('2' <= *cp && *cp <= '9') {
6418 } else if (*cp == '0' || *cp == '1') {
6427 /* module muliple options in a string are allowed for Perl moudle */
6428 while(*cp && *cp++!='-');
6431 #if !defined(PERL_XS) && !defined(WIN32DLL)
6432 fprintf(stderr, "unknown option: -%c\n", *(cp-1));
6434 /* bogus option but ignored */
6442 #include "nkf32dll.c"
6443 #elif defined(PERL_XS)
6444 #else /* WIN32DLL */
6446 main(int argc, char **argv)
6451 char *outfname = NULL;
6454 #ifdef EASYWIN /*Easy Win */
6455 _BufferSize.y = 400;/*Set Scroll Buffer Size*/
6457 #ifdef DEFAULT_CODE_LOCALE
6458 setlocale(LC_CTYPE, "");
6462 for (argc--,argv++; (argc > 0) && **argv == '-'; argc--, argv++) {
6463 cp = (unsigned char *)*argv;
6468 if (pipe(fds) < 0 || (pid = fork()) < 0){
6479 execvp(argv[1], &argv[1]);
6496 int debug_f_back = debug_f;
6499 int exec_f_back = exec_f;
6502 int x0212_f_back = x0212_f;
6504 int x0213_f_back = x0213_f;
6505 int guess_f_back = guess_f;
6507 guess_f = guess_f_back;
6510 debug_f = debug_f_back;
6513 exec_f = exec_f_back;
6515 x0212_f = x0212_f_back;
6516 x0213_f = x0213_f_back;
6519 if (binmode_f == TRUE)
6520 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
6521 if (freopen("","wb",stdout) == NULL)
6528 setbuf(stdout, (char *) NULL);
6530 setvbuffer(stdout, (char *) stdobuf, IOBUF_SIZE);
6533 if (binmode_f == TRUE)
6534 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
6535 if (freopen("","rb",stdin) == NULL) return (-1);
6539 setvbuffer(stdin, (char *) stdibuf, IOBUF_SIZE);
6543 kanji_convert(stdin);
6544 if (guess_f) print_guessed_code(NULL);
6548 int is_argument_error = FALSE;
6550 input_codename = NULL;
6553 iconv_for_check = 0;
6555 if ((fin = fopen((origfname = *argv++), "r")) == NULL) {
6557 is_argument_error = TRUE;
6565 /* reopen file for stdout */
6566 if (file_out_f == TRUE) {
6569 outfname = nkf_xmalloc(strlen(origfname)
6570 + strlen(".nkftmpXXXXXX")
6572 strcpy(outfname, origfname);
6576 for (i = strlen(outfname); i; --i){
6577 if (outfname[i - 1] == '/'
6578 || outfname[i - 1] == '\\'){
6584 strcat(outfname, "ntXXXXXX");
6586 fd = open(outfname, O_WRONLY | O_CREAT | O_TRUNC | O_EXCL,
6587 S_IREAD | S_IWRITE);
6589 strcat(outfname, ".nkftmpXXXXXX");
6590 fd = mkstemp(outfname);
6593 || (fd_backup = dup(fileno(stdout))) < 0
6594 || dup2(fd, fileno(stdout)) < 0
6605 outfname = "nkf.out";
6608 if(freopen(outfname, "w", stdout) == NULL) {
6612 if (binmode_f == TRUE) {
6613 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
6614 if (freopen("","wb",stdout) == NULL)
6621 if (binmode_f == TRUE)
6622 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
6623 if (freopen("","rb",fin) == NULL)
6628 setvbuffer(fin, (char *) stdibuf, IOBUF_SIZE);
6632 char *filename = NULL;
6634 if (nfiles > 1) filename = origfname;
6635 if (guess_f) print_guessed_code(filename);
6641 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__) && !defined(__WATCOMC__) && !defined(__EMX__) && !defined(__OS2__) && !defined(__DJGPP__)
6649 if (dup2(fd_backup, fileno(stdout)) < 0){
6652 if (stat(origfname, &sb)) {
6653 fprintf(stderr, "Can't stat %s\n", origfname);
6655 /*
\e$B%Q!<%_%C%7%g%s$rI|85
\e(B */
6656 if (chmod(outfname, sb.st_mode)) {
6657 fprintf(stderr, "Can't set permission %s\n", outfname);
6660 /*
\e$B%?%$%`%9%?%s%W$rI|85
\e(B */
6661 if(preserve_time_f){
6662 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__) && !defined(__WATCOMC__) && !defined(__EMX__) && !defined(__OS2__) && !defined(__DJGPP__)
6663 tb[0] = tb[1] = sb.st_mtime;
6664 if (utime(outfname, tb)) {
6665 fprintf(stderr, "Can't set timestamp %s\n", outfname);
6668 tb.actime = sb.st_atime;
6669 tb.modtime = sb.st_mtime;
6670 if (utime(outfname, &tb)) {
6671 fprintf(stderr, "Can't set timestamp %s\n", outfname);
6676 char *backup_filename = get_backup_filename(backup_suffix, origfname);
6678 unlink(backup_filename);
6680 if (rename(origfname, backup_filename)) {
6681 perror(backup_filename);
6682 fprintf(stderr, "Can't rename %s to %s\n",
6683 origfname, backup_filename);
6685 nkf_xfree(backup_filename);
6688 if (unlink(origfname)){
6693 if (rename(outfname, origfname)) {
6695 fprintf(stderr, "Can't rename %s to %s\n",
6696 outfname, origfname);
6698 nkf_xfree(outfname);
6703 if (is_argument_error)
6706 #ifdef EASYWIN /*Easy Win */
6707 if (file_out_f == FALSE)
6708 scanf("%d",&end_check);
6711 #else /* for Other OS */
6712 if (file_out_f == TRUE)
6714 #endif /*Easy Win */
6717 #endif /* WIN32DLL */