1 /** Network Kanji Filter. (PDS Version)
2 ** -*- coding: ISO-2022-JP -*-
3 ************************************************************************
4 ** Copyright (C) 1987, Fujitsu LTD. (Itaru ICHIKAWA)
5 **
\e$BO"Mm@h!'
\e(B
\e$B!J3t!KIY;NDL8&5f=j!!%=%U%H#38&!!;T@n!!;j
\e(B
6 **
\e$B!J
\e(BE-Mail Address: ichikawa@flab.fujitsu.co.jp
\e$B!K
\e(B
7 ** Copyright (C) 1996,1998
9 **
\e$BO"Mm@h!'
\e(B
\e$BN05eBg3X>pJs9)3X2J
\e(B
\e$B2OLn
\e(B
\e$B??<#
\e(B mime/X0208 support
10 **
\e$B!J
\e(BE-Mail Address: kono@ie.u-ryukyu.ac.jp
\e$B!K
\e(B
11 **
\e$BO"Mm@h!'
\e(B COW for DOS & Win16 & Win32 & OS/2
12 **
\e$B!J
\e(BE-Mail Address: GHG00637@niftyserve.or.p
\e$B!K
\e(B
14 **
\e$B$3$N%=!<%9$N$$$+$J$kJ#<L!$2~JQ!$=$@5$b5vBz$7$^$9!#$?$@$7!"
\e(B
15 **
\e$B$=$N:]$K$O!"C/$,9W8%$7$?$r<($9$3$NItJ,$r;D$9$3$H!#
\e(B
16 **
\e$B:FG[I[$d;(;o$NIUO?$J$I$NLd$$9g$o$;$bI,MW$"$j$^$;$s!#
\e(B
17 **
\e$B1DMxMxMQ$b>e5-$KH?$7$J$$HO0O$G5v2D$7$^$9!#
\e(B
18 **
\e$B%P%$%J%j$NG[I[$N:]$K$O
\e(Bversion message
\e$B$rJ]B8$9$k$3$H$r>r7o$H$7$^$9!#
\e(B
19 **
\e$B$3$N%W%m%0%i%`$K$D$$$F$OFC$K2?$NJ]>Z$b$7$J$$!"0-$7$+$i$:!#
\e(B
21 ** Everyone is permitted to do anything on this program
22 ** including copying, modifying, improving,
23 ** as long as you don't try to pretend that you wrote it.
24 ** i.e., the above copyright notice has to appear in all copies.
25 ** Binary distribution requires original version messages.
26 ** You don't have to ask before copying, redistribution or publishing.
27 ** THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE.
28 ***********************************************************************/
30 /***********************************************************************
31 *
\e$B8=:_!"
\e(Bnkf
\e$B$O
\e(B SorceForge
\e$B$K$F%a%s%F%J%s%9$,B3$1$i$l$F$$$^$9!#
\e(B
32 * http://sourceforge.jp/projects/nkf/
33 ***********************************************************************/
34 #define NKF_IDENT "$Id: nkf.c,v 1.192 2008/11/09 23:09:22 naruse Exp $"
35 #define NKF_VERSION "2.0.8"
36 #define NKF_RELEASE_DATE "2008-11-10"
38 "Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa),2000 S. Kono, COW\n" \
39 "Copyright (C) 2002-2008 Kono, Furukawa, Naruse, mastodon"
50 # define INCL_DOSERRORS
55 /* state of output_mode and input_mode
134 NKF_ENCODING_TABLE_SIZE,
135 JIS_X_0201_1976_K = 0x1013, /* I */ /* JIS C 6220-1969 */
136 /* JIS_X_0201_1976_R = 0x1014, */ /* J */ /* JIS C 6220-1969 */
137 /* JIS_X_0208_1978 = 0x1040, */ /* @ */ /* JIS C 6226-1978 */
138 /* JIS_X_0208_1983 = 0x1087, */ /* B */ /* JIS C 6226-1983 */
139 JIS_X_0208 = 0x1168, /* @B */
140 JIS_X_0212 = 0x1159, /* D */
141 /* JIS_X_0213_2000_1 = 0x1228, */ /* O */
142 JIS_X_0213_2 = 0x1229, /* P */
143 JIS_X_0213_1 = 0x1233, /* Q */
146 static nkf_char s_iconv(nkf_char c2, nkf_char c1, nkf_char c0);
147 static nkf_char e_iconv(nkf_char c2, nkf_char c1, nkf_char c0);
148 static nkf_char w_iconv(nkf_char c2, nkf_char c1, nkf_char c0);
149 static nkf_char w_iconv16(nkf_char c2, nkf_char c1, nkf_char c0);
150 static nkf_char w_iconv32(nkf_char c2, nkf_char c1, nkf_char c0);
151 static void j_oconv(nkf_char c2, nkf_char c1);
152 static void s_oconv(nkf_char c2, nkf_char c1);
153 static void e_oconv(nkf_char c2, nkf_char c1);
154 static void w_oconv(nkf_char c2, nkf_char c1);
155 static void w_oconv16(nkf_char c2, nkf_char c1);
156 static void w_oconv32(nkf_char c2, nkf_char c1);
160 nkf_char (*iconv)(nkf_char c2, nkf_char c1, nkf_char c0);
161 void (*oconv)(nkf_char c2, nkf_char c1);
162 } nkf_native_encoding;
164 nkf_native_encoding NkfEncodingASCII = { "ASCII", e_iconv, e_oconv };
165 nkf_native_encoding NkfEncodingISO_2022_JP = { "ISO-2022-JP", e_iconv, j_oconv };
166 nkf_native_encoding NkfEncodingShift_JIS = { "Shift_JIS", s_iconv, s_oconv };
167 nkf_native_encoding NkfEncodingEUC_JP = { "EUC-JP", e_iconv, e_oconv };
168 nkf_native_encoding NkfEncodingUTF_8 = { "UTF-8", w_iconv, w_oconv };
169 nkf_native_encoding NkfEncodingUTF_16 = { "UTF-16", w_iconv16, w_oconv16 };
170 nkf_native_encoding NkfEncodingUTF_32 = { "UTF-32", w_iconv32, w_oconv32 };
175 const nkf_native_encoding *base_encoding;
178 nkf_encoding nkf_encoding_table[] = {
179 {ASCII, "US-ASCII", &NkfEncodingASCII},
180 {ISO_8859_1, "ISO-8859-1", &NkfEncodingASCII},
181 {ISO_2022_JP, "ISO-2022-JP", &NkfEncodingISO_2022_JP},
182 {CP50220, "CP50220", &NkfEncodingISO_2022_JP},
183 {CP50221, "CP50221", &NkfEncodingISO_2022_JP},
184 {CP50222, "CP50222", &NkfEncodingISO_2022_JP},
185 {ISO_2022_JP_1, "ISO-2022-JP-1", &NkfEncodingISO_2022_JP},
186 {ISO_2022_JP_3, "ISO-2022-JP-3", &NkfEncodingISO_2022_JP},
187 {ISO_2022_JP_2004, "ISO-2022-JP-2004", &NkfEncodingISO_2022_JP},
188 {SHIFT_JIS, "Shift_JIS", &NkfEncodingShift_JIS},
189 {WINDOWS_31J, "Windows-31J", &NkfEncodingShift_JIS},
190 {CP10001, "CP10001", &NkfEncodingShift_JIS},
191 {EUC_JP, "EUC-JP", &NkfEncodingEUC_JP},
192 {EUCJP_NKF, "eucJP-nkf", &NkfEncodingEUC_JP},
193 {CP51932, "CP51932", &NkfEncodingEUC_JP},
194 {EUCJP_MS, "eucJP-MS", &NkfEncodingEUC_JP},
195 {EUCJP_ASCII, "eucJP-ASCII", &NkfEncodingEUC_JP},
196 {SHIFT_JISX0213, "Shift_JISX0213", &NkfEncodingShift_JIS},
197 {SHIFT_JIS_2004, "Shift_JIS-2004", &NkfEncodingShift_JIS},
198 {EUC_JISX0213, "EUC-JISX0213", &NkfEncodingEUC_JP},
199 {EUC_JIS_2004, "EUC-JIS-2004", &NkfEncodingEUC_JP},
200 {UTF_8, "UTF-8", &NkfEncodingUTF_8},
201 {UTF_8N, "UTF-8N", &NkfEncodingUTF_8},
202 {UTF_8_BOM, "UTF-8-BOM", &NkfEncodingUTF_8},
203 {UTF8_MAC, "UTF8-MAC", &NkfEncodingUTF_8},
204 {UTF_16, "UTF-16", &NkfEncodingUTF_16},
205 {UTF_16BE, "UTF-16BE", &NkfEncodingUTF_16},
206 {UTF_16BE_BOM, "UTF-16BE-BOM", &NkfEncodingUTF_16},
207 {UTF_16LE, "UTF-16LE", &NkfEncodingUTF_16},
208 {UTF_16LE_BOM, "UTF-16LE-BOM", &NkfEncodingUTF_16},
209 {UTF_32, "UTF-32", &NkfEncodingUTF_32},
210 {UTF_32BE, "UTF-32BE", &NkfEncodingUTF_32},
211 {UTF_32BE_BOM, "UTF-32BE-BOM", &NkfEncodingUTF_32},
212 {UTF_32LE, "UTF-32LE", &NkfEncodingUTF_32},
213 {UTF_32LE_BOM, "UTF-32LE-BOM", &NkfEncodingUTF_32},
214 {BINARY, "BINARY", &NkfEncodingASCII},
221 } encoding_name_to_id_table[] = {
224 {"ISO-2022-JP", ISO_2022_JP},
225 {"ISO2022JP-CP932", CP50220},
226 {"CP50220", CP50220},
227 {"CP50221", CP50221},
228 {"CSISO2022JP", CP50221},
229 {"CP50222", CP50222},
230 {"ISO-2022-JP-1", ISO_2022_JP_1},
231 {"ISO-2022-JP-3", ISO_2022_JP_3},
232 {"ISO-2022-JP-2004", ISO_2022_JP_2004},
233 {"SHIFT_JIS", SHIFT_JIS},
235 {"WINDOWS-31J", WINDOWS_31J},
236 {"CSWINDOWS31J", WINDOWS_31J},
237 {"CP932", WINDOWS_31J},
238 {"MS932", WINDOWS_31J},
239 {"CP10001", CP10001},
242 {"EUCJP-NKF", EUCJP_NKF},
243 {"CP51932", CP51932},
244 {"EUC-JP-MS", EUCJP_MS},
245 {"EUCJP-MS", EUCJP_MS},
246 {"EUCJPMS", EUCJP_MS},
247 {"EUC-JP-ASCII", EUCJP_ASCII},
248 {"EUCJP-ASCII", EUCJP_ASCII},
249 {"SHIFT_JISX0213", SHIFT_JISX0213},
250 {"SHIFT_JIS-2004", SHIFT_JIS_2004},
251 {"EUC-JISX0213", EUC_JISX0213},
252 {"EUC-JIS-2004", EUC_JIS_2004},
255 {"UTF-8-BOM", UTF_8_BOM},
256 {"UTF8-MAC", UTF8_MAC},
257 {"UTF-8-MAC", UTF8_MAC},
259 {"UTF-16BE", UTF_16BE},
260 {"UTF-16BE-BOM", UTF_16BE_BOM},
261 {"UTF-16LE", UTF_16LE},
262 {"UTF-16LE-BOM", UTF_16LE_BOM},
264 {"UTF-32BE", UTF_32BE},
265 {"UTF-32BE-BOM", UTF_32BE_BOM},
266 {"UTF-32LE", UTF_32LE},
267 {"UTF-32LE-BOM", UTF_32LE_BOM},
272 #if defined(DEFAULT_CODE_JIS)
273 #define DEFAULT_ENCIDX ISO_2022_JP
274 #elif defined(DEFAULT_CODE_SJIS)
275 #define DEFAULT_ENCIDX SHIFT_JIS
276 #elif defined(DEFAULT_CODE_WINDOWS_31J)
277 #define DEFAULT_ENCIDX WINDOWS_31J
278 #elif defined(DEFAULT_CODE_EUC)
279 #define DEFAULT_ENCIDX EUC_JP
280 #elif defined(DEFAULT_CODE_UTF8)
281 #define DEFAULT_ENCIDX UTF_8
285 #define is_alnum(c) \
286 (('a'<=c && c<='z')||('A'<= c && c<='Z')||('0'<=c && c<='9'))
288 /* I don't trust portablity of toupper */
289 #define nkf_toupper(c) (('a'<=c && c<='z')?(c-('a'-'A')):c)
290 #define nkf_isoctal(c) ('0'<=c && c<='7')
291 #define nkf_isdigit(c) ('0'<=c && c<='9')
292 #define nkf_isxdigit(c) (nkf_isdigit(c) || ('a'<=c && c<='f') || ('A'<=c && c <= 'F'))
293 #define nkf_isblank(c) (c == SP || c == TAB)
294 #define nkf_isspace(c) (nkf_isblank(c) || c == CR || c == LF)
295 #define nkf_isalpha(c) (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'))
296 #define nkf_isalnum(c) (nkf_isdigit(c) || nkf_isalpha(c))
297 #define nkf_isprint(c) (SP<=c && c<='~')
298 #define nkf_isgraph(c) ('!'<=c && c<='~')
299 #define hex2bin(c) (('0'<=c&&c<='9') ? (c-'0') : \
300 ('A'<=c&&c<='F') ? (c-'A'+10) : \
301 ('a'<=c&&c<='f') ? (c-'a'+10) : 0)
302 #define bin2hex(c) ("0123456789ABCDEF"[c&15])
303 #define is_eucg3(c2) (((unsigned short)c2 >> 8) == SS3)
304 #define nkf_noescape_mime(c) ((c == CR) || (c == LF) || \
305 ((c > SP) && (c < DEL) && (c != '?') && (c != '=') && (c != '_') \
306 && (c != '(') && (c != ')') && (c != '.') && (c != 0x22)))
308 #define is_ibmext_in_sjis(c2) (CP932_TABLE_BEGIN <= c2 && c2 <= CP932_TABLE_END)
309 #define nkf_byte_jisx0201_katakana_p(c) (SP <= c && c < (0xE0&0x7F))
311 #define HOLD_SIZE 1024
312 #if defined(INT_IS_SHORT)
313 #define IOBUF_SIZE 2048
315 #define IOBUF_SIZE 16384
318 #define DEFAULT_J 'B'
319 #define DEFAULT_R 'B'
326 /* MIME preprocessor */
328 #ifdef EASYWIN /*Easy Win */
329 extern POINT _BufferSize;
338 void (*status_func)(struct input_code *, nkf_char);
339 nkf_char (*iconv_func)(nkf_char c2, nkf_char c1, nkf_char c0);
343 static const char *input_codename = NULL; /* NULL: unestablished, "": BINARY */
344 static nkf_encoding *input_encoding = NULL;
345 static nkf_encoding *output_encoding = NULL;
347 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
349 * 0: Shift_JIS, eucJP-ascii
354 #define UCS_MAP_ASCII 0
356 #define UCS_MAP_CP932 2
357 #define UCS_MAP_CP10001 3
358 static int ms_ucs_map_f = UCS_MAP_ASCII;
360 #ifdef UTF8_INPUT_ENABLE
361 /* no NEC special, NEC-selected IBM extended and IBM extended characters */
362 static int no_cp932ext_f = FALSE;
363 /* ignore ZERO WIDTH NO-BREAK SPACE */
364 static int no_best_fit_chars_f = FALSE;
365 static int input_endian = ENDIAN_BIG;
366 static nkf_char unicode_subchar = '?'; /* the regular substitution character */
367 static void (*encode_fallback)(nkf_char c) = NULL;
368 static void w_status(struct input_code *, nkf_char);
370 #ifdef UTF8_OUTPUT_ENABLE
371 static int output_bom_f = FALSE;
372 static int output_endian = ENDIAN_BIG;
375 static void std_putc(nkf_char c);
376 static nkf_char std_getc(FILE *f);
377 static nkf_char std_ungetc(nkf_char c,FILE *f);
379 static nkf_char broken_getc(FILE *f);
380 static nkf_char broken_ungetc(nkf_char c,FILE *f);
382 static nkf_char mime_getc(FILE *f);
384 static void mime_putc(nkf_char c);
388 #if !defined(PERL_XS) && !defined(WIN32DLL)
389 static unsigned char stdibuf[IOBUF_SIZE];
390 static unsigned char stdobuf[IOBUF_SIZE];
394 static int unbuf_f = FALSE;
395 static int estab_f = FALSE;
396 static int nop_f = FALSE;
397 static int binmode_f = TRUE; /* binary mode */
398 static int rot_f = FALSE; /* rot14/43 mode */
399 static int hira_f = FALSE; /* hira/kata henkan */
400 static int alpha_f = FALSE; /* convert JIx0208 alphbet to ASCII */
401 static int mime_f = MIME_DECODE_DEFAULT; /* convert MIME B base64 or Q */
402 static int mime_decode_f = FALSE; /* mime decode is explicitly on */
403 static int mimebuf_f = FALSE; /* MIME buffered input */
404 static int broken_f = FALSE; /* convert ESC-less broken JIS */
405 static int iso8859_f = FALSE; /* ISO8859 through */
406 static int mimeout_f = FALSE; /* base64 mode */
407 static int x0201_f = X0201_DEFAULT; /* convert JIS X 0201 */
408 static int iso2022jp_f = FALSE; /* replace non ISO-2022-JP with GETA */
410 #ifdef UNICODE_NORMALIZATION
411 static int nfc_f = FALSE;
412 static nkf_char (*i_nfc_getc)(FILE *) = std_getc; /* input of ugetc */
413 static nkf_char (*i_nfc_ungetc)(nkf_char c ,FILE *f) = std_ungetc;
417 static int cap_f = FALSE;
418 static nkf_char (*i_cgetc)(FILE *) = std_getc; /* input of cgetc */
419 static nkf_char (*i_cungetc)(nkf_char c ,FILE *f) = std_ungetc;
421 static int url_f = FALSE;
422 static nkf_char (*i_ugetc)(FILE *) = std_getc; /* input of ugetc */
423 static nkf_char (*i_uungetc)(nkf_char c ,FILE *f) = std_ungetc;
426 #define PREFIX_EUCG3 NKF_INT32_C(0x8F00)
427 #define CLASS_MASK NKF_INT32_C(0xFF000000)
428 #define CLASS_UNICODE NKF_INT32_C(0x01000000)
429 #define VALUE_MASK NKF_INT32_C(0x00FFFFFF)
430 #define UNICODE_BMP_MAX NKF_INT32_C(0x0000FFFF)
431 #define UNICODE_MAX NKF_INT32_C(0x0010FFFF)
432 #define nkf_char_euc3_new(c) ((c) | PREFIX_EUCG3)
433 #define nkf_char_unicode_new(c) ((c) | CLASS_UNICODE)
434 #define nkf_char_unicode_p(c) ((c & CLASS_MASK) == CLASS_UNICODE)
435 #define nkf_char_unicode_bmp_p(c) ((c & VALUE_MASK) <= UNICODE_BMP_MAX)
436 #define nkf_char_unicode_value_p(c) ((c & VALUE_MASK) <= UNICODE_MAX)
438 #ifdef NUMCHAR_OPTION
439 static int numchar_f = FALSE;
440 static nkf_char (*i_ngetc)(FILE *) = std_getc; /* input of ugetc */
441 static nkf_char (*i_nungetc)(nkf_char c ,FILE *f) = std_ungetc;
445 static int noout_f = FALSE;
446 static void no_putc(nkf_char c);
447 static int debug_f = FALSE;
448 static void debug(const char *str);
449 static nkf_char (*iconv_for_check)(nkf_char c2,nkf_char c1,nkf_char c0) = 0;
452 static int guess_f = 0; /* 0: OFF, 1: ON, 2: VERBOSE */
453 static void set_input_codename(const char *codename);
456 static int exec_f = 0;
459 #ifdef SHIFTJIS_CP932
460 /* invert IBM extended characters to others */
461 static int cp51932_f = FALSE;
463 /* invert NEC-selected IBM extended characters to IBM extended characters */
464 static int cp932inv_f = TRUE;
466 /* static nkf_char cp932_conv(nkf_char c2, nkf_char c1); */
467 #endif /* SHIFTJIS_CP932 */
469 static int x0212_f = FALSE;
470 static int x0213_f = FALSE;
472 static unsigned char prefix_table[256];
474 static void e_status(struct input_code *, nkf_char);
475 static void s_status(struct input_code *, nkf_char);
477 struct input_code input_code_list[] = {
478 {"EUC-JP", 0, 0, 0, {0, 0, 0}, e_status, e_iconv, 0},
479 {"Shift_JIS", 0, 0, 0, {0, 0, 0}, s_status, s_iconv, 0},
480 #ifdef UTF8_INPUT_ENABLE
481 {"UTF-8", 0, 0, 0, {0, 0, 0}, w_status, w_iconv, 0},
486 static int mimeout_mode = 0; /* 0, -1, 'Q', 'B', 1, 2 */
487 static int base64_count = 0;
489 /* X0208 -> ASCII converter */
492 static int f_line = 0; /* chars in line */
493 static int f_prev = 0;
494 static int fold_preserve_f = FALSE; /* preserve new lines */
495 static int fold_f = FALSE;
496 static int fold_len = 0;
499 static unsigned char kanji_intro = DEFAULT_J;
500 static unsigned char ascii_intro = DEFAULT_R;
504 #define FOLD_MARGIN 10
505 #define DEFAULT_FOLD 60
507 static int fold_margin = FOLD_MARGIN;
509 /* process default */
512 no_connection2(nkf_char c2, nkf_char c1, nkf_char c0)
514 fprintf(stderr,"nkf internal module connection failure.\n");
520 no_connection(nkf_char c2, nkf_char c1)
522 no_connection2(c2,c1,0);
525 static nkf_char (*iconv)(nkf_char c2,nkf_char c1,nkf_char c0) = no_connection2;
526 static void (*oconv)(nkf_char c2,nkf_char c1) = no_connection;
528 static void (*o_zconv)(nkf_char c2,nkf_char c1) = no_connection;
529 static void (*o_fconv)(nkf_char c2,nkf_char c1) = no_connection;
530 static void (*o_eol_conv)(nkf_char c2,nkf_char c1) = no_connection;
531 static void (*o_rot_conv)(nkf_char c2,nkf_char c1) = no_connection;
532 static void (*o_hira_conv)(nkf_char c2,nkf_char c1) = no_connection;
533 static void (*o_base64conv)(nkf_char c2,nkf_char c1) = no_connection;
534 static void (*o_iso2022jp_check_conv)(nkf_char c2,nkf_char c1) = no_connection;
536 /* static redirections */
538 static void (*o_putc)(nkf_char c) = std_putc;
540 static nkf_char (*i_getc)(FILE *f) = std_getc; /* general input */
541 static nkf_char (*i_ungetc)(nkf_char c,FILE *f) =std_ungetc;
543 static nkf_char (*i_bgetc)(FILE *) = std_getc; /* input of mgetc */
544 static nkf_char (*i_bungetc)(nkf_char c ,FILE *f) = std_ungetc;
546 static void (*o_mputc)(nkf_char c) = std_putc ; /* output of mputc */
548 static nkf_char (*i_mgetc)(FILE *) = std_getc; /* input of mgetc */
549 static nkf_char (*i_mungetc)(nkf_char c ,FILE *f) = std_ungetc;
551 /* for strict mime */
552 static nkf_char (*i_mgetc_buf)(FILE *) = std_getc; /* input of mgetc_buf */
553 static nkf_char (*i_mungetc_buf)(nkf_char c,FILE *f) = std_ungetc;
556 static int output_mode = ASCII; /* output kanji mode */
557 static int input_mode = ASCII; /* input kanji mode */
558 static int mime_decode_mode = FALSE; /* MIME mode B base64, Q hex */
560 /* X0201 / X0208 conversion tables */
562 /* X0201 kana conversion table */
564 static const unsigned char cv[]= {
565 0x21,0x21,0x21,0x23,0x21,0x56,0x21,0x57,
566 0x21,0x22,0x21,0x26,0x25,0x72,0x25,0x21,
567 0x25,0x23,0x25,0x25,0x25,0x27,0x25,0x29,
568 0x25,0x63,0x25,0x65,0x25,0x67,0x25,0x43,
569 0x21,0x3c,0x25,0x22,0x25,0x24,0x25,0x26,
570 0x25,0x28,0x25,0x2a,0x25,0x2b,0x25,0x2d,
571 0x25,0x2f,0x25,0x31,0x25,0x33,0x25,0x35,
572 0x25,0x37,0x25,0x39,0x25,0x3b,0x25,0x3d,
573 0x25,0x3f,0x25,0x41,0x25,0x44,0x25,0x46,
574 0x25,0x48,0x25,0x4a,0x25,0x4b,0x25,0x4c,
575 0x25,0x4d,0x25,0x4e,0x25,0x4f,0x25,0x52,
576 0x25,0x55,0x25,0x58,0x25,0x5b,0x25,0x5e,
577 0x25,0x5f,0x25,0x60,0x25,0x61,0x25,0x62,
578 0x25,0x64,0x25,0x66,0x25,0x68,0x25,0x69,
579 0x25,0x6a,0x25,0x6b,0x25,0x6c,0x25,0x6d,
580 0x25,0x6f,0x25,0x73,0x21,0x2b,0x21,0x2c,
584 /* X0201 kana conversion table for daguten */
586 static const unsigned char dv[]= {
587 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
588 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
589 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
590 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
591 0x00,0x00,0x00,0x00,0x00,0x00,0x25,0x74,
592 0x00,0x00,0x00,0x00,0x25,0x2c,0x25,0x2e,
593 0x25,0x30,0x25,0x32,0x25,0x34,0x25,0x36,
594 0x25,0x38,0x25,0x3a,0x25,0x3c,0x25,0x3e,
595 0x25,0x40,0x25,0x42,0x25,0x45,0x25,0x47,
596 0x25,0x49,0x00,0x00,0x00,0x00,0x00,0x00,
597 0x00,0x00,0x00,0x00,0x25,0x50,0x25,0x53,
598 0x25,0x56,0x25,0x59,0x25,0x5c,0x00,0x00,
599 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
600 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
601 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
602 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
605 /* X0201 kana conversion table for han-daguten */
607 static const unsigned char ev[]= {
608 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
609 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
610 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
611 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
612 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
613 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
614 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
615 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
616 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
617 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
618 0x00,0x00,0x00,0x00,0x25,0x51,0x25,0x54,
619 0x25,0x57,0x25,0x5a,0x25,0x5d,0x00,0x00,
620 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
621 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
622 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
623 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
627 /* X0208 kigou conversion table */
628 /* 0x8140 - 0x819e */
629 static const unsigned char fv[] = {
631 0x00,0x00,0x00,0x00,0x2c,0x2e,0x00,0x3a,
632 0x3b,0x3f,0x21,0x00,0x00,0x27,0x60,0x00,
633 0x5e,0x00,0x5f,0x00,0x00,0x00,0x00,0x00,
634 0x00,0x00,0x00,0x00,0x00,0x2d,0x00,0x2f,
635 0x5c,0x00,0x00,0x7c,0x00,0x00,0x60,0x27,
636 0x22,0x22,0x28,0x29,0x00,0x00,0x5b,0x5d,
637 0x7b,0x7d,0x3c,0x3e,0x00,0x00,0x00,0x00,
638 0x00,0x00,0x00,0x00,0x2b,0x2d,0x00,0x00,
639 0x00,0x3d,0x00,0x3c,0x3e,0x00,0x00,0x00,
640 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
641 0x24,0x00,0x00,0x25,0x23,0x26,0x2a,0x40,
642 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
647 static int option_mode = 0;
648 static int file_out_f = FALSE;
650 static int overwrite_f = FALSE;
651 static int preserve_time_f = FALSE;
652 static int backup_f = FALSE;
653 static char *backup_suffix = "";
656 static int eolmode_f = 0; /* CR, LF, CRLF */
657 static int input_eol = 0; /* 0: unestablished, EOF: MIXED */
658 static nkf_char prev_cr = 0; /* CR or 0 */
659 #ifdef EASYWIN /*Easy Win */
660 static int end_check;
663 #define STD_GC_BUFSIZE (256)
664 nkf_char std_gc_buf[STD_GC_BUFSIZE];
668 nkf_malloc(size_t size)
672 if (size == 0) size = 1;
676 perror("can't malloc");
684 nkf_realloc(void *ptr, size_t size)
686 if (size == 0) size = 1;
688 ptr = realloc(ptr, size);
690 perror("can't realloc");
697 #define nkf_free(ptr) free(ptr)
700 nkf_str_caseeql(const char *src, const char *target)
703 for (i = 0; src[i] && target[i]; i++) {
704 if (nkf_toupper(src[i]) != nkf_toupper(target[i])) return FALSE;
706 if (src[i] || target[i]) return FALSE;
711 nkf_enc_from_index(int idx)
713 if (idx < 0 || NKF_ENCODING_TABLE_SIZE <= idx) {
716 return &nkf_encoding_table[idx];
720 nkf_enc_find_index(const char *name)
723 if (name[0] == 'X' && *(name+1) == '-') name += 2;
724 for (i = 0; encoding_name_to_id_table[i].id >= 0; i++) {
725 if (nkf_str_caseeql(encoding_name_to_id_table[i].name, name)) {
726 return encoding_name_to_id_table[i].id;
733 nkf_enc_find(const char *name)
736 idx = nkf_enc_find_index(name);
737 if (idx < 0) return 0;
738 return nkf_enc_from_index(idx);
741 #define nkf_enc_name(enc) (enc)->name
742 #define nkf_enc_to_index(enc) (enc)->id
743 #define nkf_enc_to_base_encoding(enc) (enc)->base_encoding
744 #define nkf_enc_to_iconv(enc) nkf_enc_to_base_encoding(enc)->iconv
745 #define nkf_enc_to_oconv(enc) nkf_enc_to_base_encoding(enc)->oconv
746 #define nkf_enc_asciicompat(enc) (\
747 nkf_enc_to_base_encoding(enc) == &NkfEncodingASCII ||\
748 nkf_enc_to_base_encoding(enc) == &NkfEncodingISO_2022_JP)
749 #define nkf_enc_unicode_p(enc) (\
750 nkf_enc_to_base_encoding(enc) == &NkfEncodingUTF_8 ||\
751 nkf_enc_to_base_encoding(enc) == &NkfEncodingUTF_16 ||\
752 nkf_enc_to_base_encoding(enc) == &NkfEncodingUTF_32)
753 #define nkf_enc_cp5022x_p(enc) (\
754 nkf_enc_to_index(enc) == CP50220 ||\
755 nkf_enc_to_index(enc) == CP50221 ||\
756 nkf_enc_to_index(enc) == CP50222)
758 #ifdef DEFAULT_CODE_LOCALE
762 #ifdef HAVE_LANGINFO_H
763 return nl_langinfo(CODESET);
764 #elif defined(__WIN32__)
766 sprintf(buf, "CP%d", GetACP());
768 #elif defined(__OS2__)
769 # if defined(INT_IS_SHORT)
775 ULONG ulCP[1], ulncp;
776 DosQueryCp(sizeof(ulCP), ulCP, &ulncp);
777 if (ulCP[0] == 932 || ulCP[0] == 943)
778 strcpy(buf, "Shift_JIS");
780 sprintf(buf, "CP%lu", ulCP[0]);
788 nkf_locale_encoding()
790 nkf_encoding *enc = 0;
791 const char *encname = nkf_locale_charmap();
793 enc = nkf_enc_find(encname);
796 #endif /* DEFAULT_CODE_LOCALE */
801 return &nkf_encoding_table[UTF_8];
805 nkf_default_encoding()
807 nkf_encoding *enc = 0;
808 #ifdef DEFAULT_CODE_LOCALE
809 enc = nkf_locale_encoding();
810 #elif defined(DEFAULT_ENCIDX)
811 enc = nkf_enc_from_index(DEFAULT_ENCIDX);
813 if (!enc) enc = nkf_utf8_encoding();
819 #define fprintf dllprintf
825 fprintf(HELP_OUTPUT,"Network Kanji Filter Version " NKF_VERSION " (" NKF_RELEASE_DATE ") \n" COPY_RIGHT "\n");
832 "USAGE: nkf(nkf32,wnkf,nkf2) -[flags] [in file] .. [out file for -O flag]\n"
834 "b,u Output is buffered (DEFAULT),Output is unbuffered\n"
835 "j,s,e,w Output code is ISO-2022-JP, Shift JIS, EUC-JP, UTF-8N\n"
836 #ifdef UTF8_OUTPUT_ENABLE
837 " After 'w' you can add more options. -w[ 8 [0], 16 [[BL] [0]] ]\n"
839 "J,S,E,W Input assumption is JIS 7 bit , Shift JIS, EUC-JP, UTF-8\n"
840 #ifdef UTF8_INPUT_ENABLE
841 " After 'W' you can add more options. -W[ 8, 16 [BL] ] \n"
844 "i[@B] Specify the Esc Seq for JIS X 0208-1978/83 (DEFAULT B)\n"
845 "o[BJH] Specify the Esc Seq for ASCII/Roman (DEFAULT B)\n"
846 "r {de/en}crypt ROT13/47\n"
847 "h 1 katakana->hiragana, 2 hiragana->katakana, 3 both\n"
848 "m[BQSN0] MIME decode [B:base64,Q:quoted,S:strict,N:non-strict,0:no decode]\n"
849 "M[BQ] MIME encode [B:base64 Q:quoted]\n"
850 "l ISO8859-1 (Latin-1) support\n"
851 "f/F Folding: -f60 or -f or -f60-10 (fold margin 10) F preserve nl\n"
852 "Z[0-4] Default/0: Convert JISX0208 Alphabet to ASCII\n"
853 " 1: Kankaku to one space 2: to two spaces 3: HTML Entity\n"
854 " 4: JISX0208 Katakana to JISX0201 Katakana\n"
855 "X,x Assume X0201 kana in MS-Kanji, -x preserves X0201\n"
856 "B[0-2] Broken input 0: missing ESC,1: any X on ESC-[($]-X,2: ASCII on NL\n"
858 "T Text mode output\n"
860 "O Output to File (DEFAULT 'nkf.out')\n"
861 "I Convert non ISO-2022-JP charactor to GETA\n"
862 "d,c Convert line breaks -d: LF -c: CRLF\n"
863 "-L[uwm] line mode u:LF w:CRLF m:CR (DEFAULT noconversion)\n"
864 "v, V Show this usage. V: show configuration\n"
866 "Long name options\n"
867 " --ic=<input codeset> --oc=<output codeset>\n"
868 " Specify the input or output codeset\n"
869 " --fj --unix --mac --windows\n"
870 " --jis --euc --sjis --utf8 --utf16 --mime --base64\n"
871 " Convert for the system or code\n"
872 " --hiragana --katakana --katakana-hiragana\n"
873 " To Hiragana/Katakana Conversion\n"
874 " --prefix= Insert escape before troublesome characters of Shift_JIS\n"
876 " --cap-input, --url-input Convert hex after ':' or '%%'\n"
878 #ifdef NUMCHAR_OPTION
879 " --numchar-input Convert Unicode Character Reference\n"
881 #ifdef UTF8_INPUT_ENABLE
882 " --fb-{skip, html, xml, perl, java, subchar}\n"
883 " Specify how nkf handles unassigned characters\n"
886 " --in-place[=SUFFIX] --overwrite[=SUFFIX]\n"
887 " Overwrite original listed files by filtered result\n"
888 " --overwrite preserves timestamp of original files\n"
890 " -g --guess Guess the input code\n"
891 " --help --version Show this help/the version\n"
892 " For more information, see also man nkf\n"
898 show_configuration(void)
901 "Summary of my nkf " NKF_VERSION " (" NKF_RELEASE_DATE ") configuration:\n"
904 " Compile-time options:\n"
905 " Compiled at: " __DATE__ " " __TIME__ "\n"
908 " Default output encoding: "
909 #ifdef DEFAULT_CODE_LOCALE
910 "LOCALE (%s)\n", nkf_enc_name(nkf_default_encoding())
911 #elif defined(DEFAULT_ENCIDX)
912 "CONFIG (%s)\n", nkf_enc_name(nkf_default_encoding())
918 " Default output end of line: "
919 #if DEFAULT_NEWLINE == CR
921 #elif DEFAULT_NEWLINE == CRLF
927 " Decode MIME encoded string: "
928 #if MIME_DECODE_DEFAULT
934 " Convert JIS X 0201 Katakana: "
941 " --help, --version output: "
942 #if HELP_OUTPUT_HELP_OUTPUT
953 get_backup_filename(const char *suffix, const char *filename)
955 char *backup_filename;
956 int asterisk_count = 0;
958 int filename_length = strlen(filename);
960 for(i = 0; suffix[i]; i++){
961 if(suffix[i] == '*') asterisk_count++;
965 backup_filename = nkf_malloc(strlen(suffix) + (asterisk_count * (filename_length - 1)) + 1);
966 for(i = 0, j = 0; suffix[i];){
967 if(suffix[i] == '*'){
968 backup_filename[j] = '\0';
969 strncat(backup_filename, filename, filename_length);
971 j += filename_length;
973 backup_filename[j++] = suffix[i++];
976 backup_filename[j] = '\0';
978 j = filename_length + strlen(suffix);
979 backup_filename = nkf_malloc(j + 1);
980 strcpy(backup_filename, filename);
981 strcat(backup_filename, suffix);
982 backup_filename[j] = '\0';
984 return backup_filename;
988 #ifdef UTF8_INPUT_ENABLE
990 nkf_each_char_to_hex(void (*f)(nkf_char c2,nkf_char c1), nkf_char c)
997 (*f)(0, bin2hex(c>>shift));
1008 encode_fallback_html(nkf_char c)
1013 if(c >= NKF_INT32_C(1000000))
1014 (*oconv)(0, 0x30+(c/NKF_INT32_C(1000000))%10);
1015 if(c >= NKF_INT32_C(100000))
1016 (*oconv)(0, 0x30+(c/NKF_INT32_C(100000) )%10);
1018 (*oconv)(0, 0x30+(c/10000 )%10);
1020 (*oconv)(0, 0x30+(c/1000 )%10);
1022 (*oconv)(0, 0x30+(c/100 )%10);
1024 (*oconv)(0, 0x30+(c/10 )%10);
1026 (*oconv)(0, 0x30+ c %10);
1032 encode_fallback_xml(nkf_char c)
1037 nkf_each_char_to_hex(oconv, c);
1043 encode_fallback_java(nkf_char c)
1047 if(!nkf_char_unicode_bmp_p(c)){
1051 (*oconv)(0, bin2hex(c>>20));
1052 (*oconv)(0, bin2hex(c>>16));
1056 (*oconv)(0, bin2hex(c>>12));
1057 (*oconv)(0, bin2hex(c>> 8));
1058 (*oconv)(0, bin2hex(c>> 4));
1059 (*oconv)(0, bin2hex(c ));
1064 encode_fallback_perl(nkf_char c)
1069 nkf_each_char_to_hex(oconv, c);
1075 encode_fallback_subchar(nkf_char c)
1077 c = unicode_subchar;
1078 (*oconv)((c>>8)&0xFF, c&0xFF);
1083 static const struct {
1107 {"katakana-hiragana","h3"},
1115 #ifdef UTF8_OUTPUT_ENABLE
1125 {"fb-subchar=", ""},
1127 #ifdef UTF8_INPUT_ENABLE
1128 {"utf8-input", "W"},
1129 {"utf16-input", "W16"},
1130 {"no-cp932ext", ""},
1131 {"no-best-fit-chars",""},
1133 #ifdef UNICODE_NORMALIZATION
1134 {"utf8mac-input", ""},
1146 #ifdef NUMCHAR_OPTION
1147 {"numchar-input", ""},
1153 #ifdef SHIFTJIS_CP932
1164 set_input_encoding(nkf_encoding *enc)
1166 switch (nkf_enc_to_index(enc)) {
1173 #ifdef SHIFTJIS_CP932
1176 #ifdef UTF8_OUTPUT_ENABLE
1177 ms_ucs_map_f = UCS_MAP_CP932;
1187 case ISO_2022_JP_2004:
1194 #ifdef SHIFTJIS_CP932
1197 #ifdef UTF8_OUTPUT_ENABLE
1198 ms_ucs_map_f = UCS_MAP_CP932;
1203 #ifdef SHIFTJIS_CP932
1206 #ifdef UTF8_OUTPUT_ENABLE
1207 ms_ucs_map_f = UCS_MAP_CP10001;
1215 #ifdef SHIFTJIS_CP932
1218 #ifdef UTF8_OUTPUT_ENABLE
1219 ms_ucs_map_f = UCS_MAP_CP932;
1223 #ifdef SHIFTJIS_CP932
1226 #ifdef UTF8_OUTPUT_ENABLE
1227 ms_ucs_map_f = UCS_MAP_MS;
1231 #ifdef SHIFTJIS_CP932
1234 #ifdef UTF8_OUTPUT_ENABLE
1235 ms_ucs_map_f = UCS_MAP_ASCII;
1238 case SHIFT_JISX0213:
1239 case SHIFT_JIS_2004:
1241 #ifdef SHIFTJIS_CP932
1248 #ifdef SHIFTJIS_CP932
1252 #ifdef UTF8_INPUT_ENABLE
1253 #ifdef UNICODE_NORMALIZATION
1261 input_endian = ENDIAN_BIG;
1265 input_endian = ENDIAN_LITTLE;
1270 input_endian = ENDIAN_BIG;
1274 input_endian = ENDIAN_LITTLE;
1281 set_output_encoding(nkf_encoding *enc)
1283 switch (nkf_enc_to_index(enc)) {
1286 #ifdef SHIFTJIS_CP932
1287 if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1289 #ifdef UTF8_OUTPUT_ENABLE
1290 ms_ucs_map_f = UCS_MAP_CP932;
1294 #ifdef SHIFTJIS_CP932
1295 if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1297 #ifdef UTF8_OUTPUT_ENABLE
1298 ms_ucs_map_f = UCS_MAP_CP932;
1303 #ifdef SHIFTJIS_CP932
1304 if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1310 #ifdef SHIFTJIS_CP932
1311 if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1317 #ifdef UTF8_OUTPUT_ENABLE
1318 ms_ucs_map_f = UCS_MAP_CP932;
1322 #ifdef UTF8_OUTPUT_ENABLE
1323 ms_ucs_map_f = UCS_MAP_CP10001;
1328 #ifdef SHIFTJIS_CP932
1329 if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1331 #ifdef UTF8_OUTPUT_ENABLE
1332 ms_ucs_map_f = UCS_MAP_ASCII;
1337 #ifdef SHIFTJIS_CP932
1338 if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1340 #ifdef UTF8_OUTPUT_ENABLE
1341 ms_ucs_map_f = UCS_MAP_ASCII;
1345 #ifdef SHIFTJIS_CP932
1346 if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1348 #ifdef UTF8_OUTPUT_ENABLE
1349 ms_ucs_map_f = UCS_MAP_CP932;
1354 #ifdef UTF8_OUTPUT_ENABLE
1355 ms_ucs_map_f = UCS_MAP_MS;
1360 #ifdef UTF8_OUTPUT_ENABLE
1361 ms_ucs_map_f = UCS_MAP_ASCII;
1364 case SHIFT_JISX0213:
1365 case SHIFT_JIS_2004:
1367 #ifdef SHIFTJIS_CP932
1368 if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1375 #ifdef SHIFTJIS_CP932
1376 if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1379 #ifdef UTF8_OUTPUT_ENABLE
1381 output_bom_f = TRUE;
1385 output_bom_f = TRUE;
1388 output_endian = ENDIAN_LITTLE;
1389 output_bom_f = FALSE;
1392 output_endian = ENDIAN_LITTLE;
1393 output_bom_f = TRUE;
1396 output_bom_f = TRUE;
1399 output_endian = ENDIAN_LITTLE;
1400 output_bom_f = FALSE;
1403 output_endian = ENDIAN_LITTLE;
1404 output_bom_f = TRUE;
1410 static struct input_code*
1411 find_inputcode_byfunc(nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0))
1414 struct input_code *p = input_code_list;
1416 if (iconv_func == p->iconv_func){
1426 set_iconv(nkf_char f, nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0))
1428 #ifdef INPUT_CODE_FIX
1429 if (f || !input_encoding)
1436 #ifdef INPUT_CODE_FIX
1437 && (f == -TRUE || !input_encoding) /* -TRUE means "FORCE" */
1443 if (estab_f && iconv_for_check != iconv){
1444 struct input_code *p = find_inputcode_byfunc(iconv);
1446 set_input_codename(p->name);
1449 iconv_for_check = iconv;
1456 x0212_shift(nkf_char c)
1461 if (0x75 <= c && c <= 0x7f){
1462 ret = c + (0x109 - 0x75);
1465 if (0x75 <= c && c <= 0x7f){
1466 ret = c + (0x113 - 0x75);
1474 x0212_unshift(nkf_char c)
1477 if (0x7f <= c && c <= 0x88){
1478 ret = c + (0x75 - 0x7f);
1479 }else if (0x89 <= c && c <= 0x92){
1480 ret = PREFIX_EUCG3 | 0x80 | (c + (0x75 - 0x89));
1484 #endif /* X0212_ENABLE */
1487 e2s_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1)
1493 if((0x21 <= ndx && ndx <= 0x2F)){
1494 if (p2) *p2 = ((ndx - 1) >> 1) + 0xec - ndx / 8 * 3;
1495 if (p1) *p1 = c1 + ((ndx & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
1497 }else if(0x6E <= ndx && ndx <= 0x7E){
1498 if (p2) *p2 = ((ndx - 1) >> 1) + 0xbe;
1499 if (p1) *p1 = c1 + ((ndx & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
1505 else if(nkf_isgraph(ndx)){
1507 const unsigned short *ptr;
1508 ptr = x0212_shiftjis[ndx - 0x21];
1510 val = ptr[(c1 & 0x7f) - 0x21];
1519 c2 = x0212_shift(c2);
1521 #endif /* X0212_ENABLE */
1523 if(0x7F < c2) return 1;
1524 if (p2) *p2 = ((c2 - 1) >> 1) + ((c2 <= 0x5e) ? 0x71 : 0xb1);
1525 if (p1) *p1 = c1 + ((c2 & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
1530 s2e_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1)
1532 #if defined(SHIFTJIS_CP932) || defined(X0212_ENABLE)
1535 static const char shift_jisx0213_s1a3_table[5][2] ={ { 1, 8}, { 3, 4}, { 5,12}, {13,14}, {15, 0} };
1536 if (0xFC < c1) return 1;
1537 #ifdef SHIFTJIS_CP932
1538 if (!cp932inv_f && is_ibmext_in_sjis(c2)){
1539 val = shiftjis_cp932[c2 - CP932_TABLE_BEGIN][c1 - 0x40];
1546 && CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
1547 val = cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40];
1553 #endif /* SHIFTJIS_CP932 */
1555 if (!x0213_f && is_ibmext_in_sjis(c2)){
1556 val = shiftjis_x0212[c2 - 0xfa][c1 - 0x40];
1559 c2 = PREFIX_EUCG3 | ((val >> 8) & 0x7f);
1572 if(x0213_f && c2 >= 0xF0){
1573 if(c2 <= 0xF3 || (c2 == 0xF4 && c1 < 0x9F)){ /* k=1, 3<=k<=5, k=8, 12<=k<=15 */
1574 c2 = PREFIX_EUCG3 | 0x20 | shift_jisx0213_s1a3_table[c2 - 0xF0][0x9E < c1];
1575 }else{ /* 78<=k<=94 */
1576 c2 = PREFIX_EUCG3 | (c2 * 2 - 0x17B);
1577 if (0x9E < c1) c2++;
1580 #define SJ0162 0x00e1 /* 01 - 62 ku offset */
1581 #define SJ6394 0x0161 /* 63 - 94 ku offset */
1582 c2 = c2 + c2 - ((c2 <= 0x9F) ? SJ0162 : SJ6394);
1583 if (0x9E < c1) c2++;
1586 c1 = c1 - ((c1 > DEL) ? SP : 0x1F);
1593 c2 = x0212_unshift(c2);
1600 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
1602 nkf_unicode_to_utf8(nkf_char val, nkf_char *p1, nkf_char *p2, nkf_char *p3, nkf_char *p4)
1610 }else if (val < 0x800){
1611 *p1 = 0xc0 | (val >> 6);
1612 *p2 = 0x80 | (val & 0x3f);
1615 } else if (nkf_char_unicode_bmp_p(val)) {
1616 *p1 = 0xe0 | (val >> 12);
1617 *p2 = 0x80 | ((val >> 6) & 0x3f);
1618 *p3 = 0x80 | ( val & 0x3f);
1620 } else if (nkf_char_unicode_value_p(val)) {
1621 *p1 = 0xe0 | (val >> 16);
1622 *p2 = 0x80 | ((val >> 12) & 0x3f);
1623 *p3 = 0x80 | ((val >> 6) & 0x3f);
1624 *p4 = 0x80 | ( val & 0x3f);
1634 nkf_utf8_to_unicode(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4)
1641 else if (c1 <= 0xC3) {
1642 /* trail byte or invalid */
1645 else if (c1 <= 0xDF) {
1647 wc = (c1 & 0x1F) << 6;
1650 else if (c1 <= 0xEF) {
1652 wc = (c1 & 0x0F) << 12;
1653 wc |= (c2 & 0x3F) << 6;
1656 else if (c2 <= 0xF4) {
1658 wc = (c1 & 0x0F) << 18;
1659 wc |= (c2 & 0x3F) << 12;
1660 wc |= (c3 & 0x3F) << 6;
1670 #ifdef UTF8_INPUT_ENABLE
1672 unicode_to_jis_common2(nkf_char c1, nkf_char c0,
1673 const unsigned short *const *pp, nkf_char psize,
1674 nkf_char *p2, nkf_char *p1)
1677 const unsigned short *p;
1680 if (pp == 0) return 1;
1683 if (c1 < 0 || psize <= c1) return 1;
1685 if (p == 0) return 1;
1688 if (c0 < 0 || sizeof_utf8_to_euc_C2 <= c0) return 1;
1690 if (val == 0) return 1;
1691 if (no_cp932ext_f && (
1692 (val>>8) == 0x2D || /* NEC special characters */
1693 val > NKF_INT32_C(0xF300) /* IBM extended characters */
1701 if (c2 == SO) c2 = JIS_X_0201_1976_K;
1709 unicode_to_jis_common(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1)
1711 const unsigned short *const *pp;
1712 const unsigned short *const *const *ppp;
1713 static const char no_best_fit_chars_table_C2[] =
1714 {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1715 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1716 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 2, 1, 1, 2,
1717 0, 0, 1, 1, 0, 1, 0, 1, 2, 1, 1, 1, 1, 1, 1, 1};
1718 static const char no_best_fit_chars_table_C2_ms[] =
1719 {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1720 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1721 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0,
1722 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0};
1723 static const char no_best_fit_chars_table_932_C2[] =
1724 {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1725 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1726 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1,
1727 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0};
1728 static const char no_best_fit_chars_table_932_C3[] =
1729 {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1730 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
1731 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1732 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1};
1738 }else if(c2 < 0xe0){
1739 if(no_best_fit_chars_f){
1740 if(ms_ucs_map_f == UCS_MAP_CP932){
1743 if(no_best_fit_chars_table_932_C2[c1&0x3F]) return 1;
1746 if(no_best_fit_chars_table_932_C3[c1&0x3F]) return 1;
1749 }else if(!cp932inv_f){
1752 if(no_best_fit_chars_table_C2[c1&0x3F]) return 1;
1755 if(no_best_fit_chars_table_932_C3[c1&0x3F]) return 1;
1758 }else if(ms_ucs_map_f == UCS_MAP_MS){
1759 if(c2 == 0xC2 && no_best_fit_chars_table_C2_ms[c1&0x3F]) return 1;
1760 }else if(ms_ucs_map_f == UCS_MAP_CP10001){
1778 ms_ucs_map_f == UCS_MAP_CP932 ? utf8_to_euc_2bytes_932 :
1779 ms_ucs_map_f == UCS_MAP_MS ? utf8_to_euc_2bytes_ms :
1780 ms_ucs_map_f == UCS_MAP_CP10001 ? utf8_to_euc_2bytes_mac :
1782 ret = unicode_to_jis_common2(c2, c1, pp, sizeof_utf8_to_euc_2bytes, p2, p1);
1783 }else if(c0 < 0xF0){
1784 if(no_best_fit_chars_f){
1785 if(ms_ucs_map_f == UCS_MAP_CP932){
1786 if(c2 == 0xE3 && c1 == 0x82 && c0 == 0x94) return 1;
1787 }else if(ms_ucs_map_f == UCS_MAP_MS){
1792 if(c0 == 0x94 || c0 == 0x96 || c0 == 0xBE) return 1;
1795 if(c0 == 0x92) return 1;
1800 if(c1 == 0x80 || c0 == 0x9C) return 1;
1803 }else if(ms_ucs_map_f == UCS_MAP_CP10001){
1808 if(c0 == 0x94) return 1;
1811 if(c0 == 0xBB) return 1;
1821 if(c0 == 0x95) return 1;
1824 if(c0 == 0xA5) return 1;
1831 if(c0 == 0x8D) return 1;
1834 if(c0 == 0x9E && !cp932inv_f) return 1;
1837 if(0xA0 <= c0 && c0 <= 0xA5) return 1;
1845 ms_ucs_map_f == UCS_MAP_CP932 ? utf8_to_euc_3bytes_932 :
1846 ms_ucs_map_f == UCS_MAP_MS ? utf8_to_euc_3bytes_ms :
1847 ms_ucs_map_f == UCS_MAP_CP10001 ? utf8_to_euc_3bytes_mac :
1849 ret = unicode_to_jis_common2(c1, c0, ppp[c2 - 0xE0], sizeof_utf8_to_euc_C2, p2, p1);
1851 #ifdef SHIFTJIS_CP932
1852 if (!ret && !cp932inv_f && is_eucg3(*p2)) {
1854 if (e2s_conv(*p2, *p1, &s2, &s1) == 0) {
1855 s2e_conv(s2, s1, p2, p1);
1864 #ifdef UTF8_OUTPUT_ENABLE
1866 e2w_conv(nkf_char c2, nkf_char c1)
1868 const unsigned short *p;
1870 if (c2 == JIS_X_0201_1976_K) {
1871 if (ms_ucs_map_f == UCS_MAP_CP10001) {
1879 p = euc_to_utf8_1byte;
1881 } else if (is_eucg3(c2)){
1882 if(ms_ucs_map_f == UCS_MAP_ASCII&& c2 == NKF_INT32_C(0x8F22) && c1 == 0x43){
1885 c2 = (c2&0x7f) - 0x21;
1886 if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
1887 p = x0212_to_utf8_2bytes[c2];
1893 c2 = (c2&0x7f) - 0x21;
1894 if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
1896 ms_ucs_map_f == UCS_MAP_ASCII ? euc_to_utf8_2bytes[c2] :
1897 ms_ucs_map_f == UCS_MAP_CP10001 ? euc_to_utf8_2bytes_mac[c2] :
1898 euc_to_utf8_2bytes_ms[c2];
1903 c1 = (c1 & 0x7f) - 0x21;
1904 if (0<=c1 && c1<sizeof_euc_to_utf8_1byte)
1911 w2e_conv(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1)
1918 }else if (0xc0 <= c2 && c2 <= 0xef) {
1919 ret = unicode_to_jis_common(c2, c1, c0, p2, p1);
1920 #ifdef NUMCHAR_OPTION
1923 if (p1) *p1 = nkf_char_unicode_new(nkf_utf8_to_unicode(c2, c1, c0, 0));
1931 #ifdef UTF8_INPUT_ENABLE
1933 w16e_conv(nkf_char val, nkf_char *p2, nkf_char *p1)
1935 nkf_char c1, c2, c3, c4;
1942 else if (nkf_char_unicode_bmp_p(val)){
1943 nkf_unicode_to_utf8(val, &c1, &c2, &c3, &c4);
1944 ret = unicode_to_jis_common(c1, c2, c3, p2, p1);
1947 *p1 = nkf_char_unicode_new(val);
1953 *p1 = nkf_char_unicode_new(val);
1960 e_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
1962 if (c2 == JIS_X_0201_1976_K || c2 == SS2){
1963 if (iso2022jp_f && !x0201_f) {
1964 c2 = GETA1; c1 = GETA2;
1966 c2 = JIS_X_0201_1976_K;
1970 }else if (c2 == 0x8f){
1974 if (!cp51932_f && !x0213_f && 0xF5 <= c1 && c1 <= 0xFE && 0xA1 <= c0 && c0 <= 0xFE) {
1975 /* encoding is eucJP-ms, so invert to Unicode Private User Area */
1976 c1 = nkf_char_unicode_new((c1 - 0xF5) * 94 + c0 - 0xA1 + 0xE3AC);
1979 c2 = (c2 << 8) | (c1 & 0x7f);
1981 #ifdef SHIFTJIS_CP932
1984 if (e2s_conv(c2, c1, &s2, &s1) == 0){
1985 s2e_conv(s2, s1, &c2, &c1);
1992 #endif /* SHIFTJIS_CP932 */
1994 #endif /* X0212_ENABLE */
1995 } else if ((c2 == EOF) || (c2 == 0) || c2 < SP || c2 == ISO_8859_1) {
1998 if (!cp51932_f && ms_ucs_map_f && 0xF5 <= c2 && c2 <= 0xFE && 0xA1 <= c1 && c1 <= 0xFE) {
1999 /* encoding is eucJP-ms, so invert to Unicode Private User Area */
2000 c1 = nkf_char_unicode_new((c2 - 0xF5) * 94 + c1 - 0xA1 + 0xE000);
2005 #ifdef SHIFTJIS_CP932
2006 if (cp51932_f && 0x79 <= c2 && c2 <= 0x7c){
2008 if (e2s_conv(c2, c1, &s2, &s1) == 0){
2009 s2e_conv(s2, s1, &c2, &c1);
2016 #endif /* SHIFTJIS_CP932 */
2024 s_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
2026 if (c2 == JIS_X_0201_1976_K || (0xA1 <= c2 && c2 <= 0xDF)) {
2027 if (iso2022jp_f && !x0201_f) {
2028 c2 = GETA1; c1 = GETA2;
2032 } else if ((c2 == EOF) || (c2 == 0) || c2 < SP) {
2034 } else if (!x0213_f && 0xF0 <= c2 && c2 <= 0xF9 && 0x40 <= c1 && c1 <= 0xFC) {
2036 if(c1 == 0x7F) return 0;
2037 c1 = nkf_char_unicode_new((c2 - 0xF0) * 188 + (c1 - 0x40 - (0x7E < c1)) + 0xE000);
2040 nkf_char ret = s2e_conv(c2, c1, &c2, &c1);
2041 if (ret) return ret;
2048 w_iconv(nkf_char c1, nkf_char c2, nkf_char c3)
2050 nkf_char ret = 0, c4 = 0;
2051 static const char w_iconv_utf8_1st_byte[] =
2053 20, 20, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
2054 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
2055 30, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 33, 33,
2056 40, 41, 41, 41, 42, 43, 43, 43, 50, 50, 50, 50, 60, 60, 70, 70};
2063 if (c1 < 0 || 0xff < c1) {
2064 }else if (c1 == 0) { /* 0 : 1 byte*/
2066 } else if ((c1 & 0xC0) == 0x80) { /* 0x80-0xbf : trail byte */
2069 switch (w_iconv_utf8_1st_byte[c1 - 0xC0]) {
2071 if (c2 < 0x80 || 0xBF < c2) return 0;
2074 if (c3 == 0) return -1;
2075 if (c2 < 0xA0 || 0xBF < c2 || (c3 & 0xC0) != 0x80)
2080 if (c3 == 0) return -1;
2081 if ((c2 & 0xC0) != 0x80 || (c3 & 0xC0) != 0x80)
2085 if (c3 == 0) return -1;
2086 if (c2 < 0x80 || 0x9F < c2 || (c3 & 0xC0) != 0x80)
2090 if (c3 == 0) return -2;
2091 if (c2 < 0x90 || 0xBF < c2 || (c3 & 0xC0) != 0x80 || (c4 & 0xC0) != 0x80)
2095 if (c3 == 0) return -2;
2096 if (c2 < 0x80 || 0xBF < c2 || (c3 & 0xC0) != 0x80 || (c4 & 0xC0) != 0x80)
2100 if (c3 == 0) return -2;
2101 if (c2 < 0x80 || 0x8F < c2 || (c3 & 0xC0) != 0x80 || (c4 & 0xC0) != 0x80)
2109 if (c1 == 0 || c1 == EOF){
2110 } else if ((c1 & 0xf8) == 0xf0) { /* 4 bytes */
2111 c2 = nkf_char_unicode_new(nkf_utf8_to_unicode(c1, c2, c3, c4));
2114 ret = w2e_conv(c1, c2, c3, &c1, &c2);
2122 #define NKF_ICONV_INVALID_CODE_RANGE -13
2124 unicode_iconv(nkf_char wc)
2132 }else if ((wc>>11) == 27) {
2133 /* unpaired surrogate */
2134 return NKF_ICONV_INVALID_CODE_RANGE;
2135 }else if (wc < 0xFFFF) {
2136 ret = w16e_conv(wc, &c2, &c1);
2137 if (ret) return ret;
2138 }else if (wc < 0x10FFFF) {
2140 c1 = nkf_char_unicode_new(wc);
2142 return NKF_ICONV_INVALID_CODE_RANGE;
2148 #define NKF_ICONV_NEED_ONE_MORE_BYTE -1
2149 #define NKF_ICONV_NEED_TWO_MORE_BYTES -2
2150 #define UTF16_TO_UTF32(lead, trail) (((lead) << 10) + (trail) - NKF_INT32_C(0x35FDC00))
2152 nkf_iconv_utf_16(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4)
2161 if (input_endian == ENDIAN_BIG) {
2162 if (0xD8 <= c1 && c1 <= 0xDB) {
2163 if (0xDC <= c3 && c3 <= 0xDF) {
2164 wc = UTF16_TO_UTF32(c1 << 8 | c2, c3 << 8 | c4);
2165 } else return NKF_ICONV_NEED_TWO_MORE_BYTES;
2170 if (0xD8 <= c2 && c2 <= 0xDB) {
2171 if (0xDC <= c4 && c4 <= 0xDF) {
2172 wc = UTF16_TO_UTF32(c2 << 8 | c1, c4 << 8 | c3);
2173 } else return NKF_ICONV_NEED_TWO_MORE_BYTES;
2179 return (*unicode_iconv)(wc);
2183 w_iconv16(nkf_char c2, nkf_char c1, nkf_char c0)
2189 w_iconv32(nkf_char c2, nkf_char c1, nkf_char c0)
2195 nkf_iconv_utf_32(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4)
2204 switch(input_endian){
2206 wc = c2 << 16 | c3 << 8 | c4;
2209 wc = c3 << 16 | c2 << 8 | c1;
2212 wc = c1 << 16 | c4 << 8 | c3;
2215 wc = c4 << 16 | c1 << 8 | c2;
2218 return NKF_ICONV_INVALID_CODE_RANGE;
2221 return (*unicode_iconv)(wc);
2225 #define output_ascii_escape_sequence(mode) do { \
2226 if (output_mode != ASCII && output_mode != ISO_8859_1) { \
2229 (*o_putc)(ascii_intro); \
2230 output_mode = mode; \
2235 output_escape_sequence(int mode)
2237 if (output_mode == mode)
2245 case JIS_X_0201_1976_K:
2253 (*o_putc)(kanji_intro);
2278 j_oconv(nkf_char c2, nkf_char c1)
2280 #ifdef NUMCHAR_OPTION
2281 if (c2 == 0 && nkf_char_unicode_p(c1)){
2282 w16e_conv(c1, &c2, &c1);
2283 if (c2 == 0 && nkf_char_unicode_p(c1)){
2284 c2 = c1 & VALUE_MASK;
2285 if (ms_ucs_map_f && 0xE000 <= c2 && c2 <= 0xE757) {
2288 c2 = 0x7F + c1 / 94;
2289 c1 = 0x21 + c1 % 94;
2291 if (encode_fallback) (*encode_fallback)(c1);
2298 output_ascii_escape_sequence(ASCII);
2301 else if (c2 == EOF) {
2302 output_ascii_escape_sequence(ASCII);
2305 else if (c2 == ISO_8859_1) {
2306 output_ascii_escape_sequence(ISO_8859_1);
2309 else if (c2 == JIS_X_0201_1976_K) {
2310 output_escape_sequence(JIS_X_0201_1976_K);
2313 } else if (is_eucg3(c2)){
2314 output_escape_sequence(x0213_f ? JIS_X_0213_2 : JIS_X_0212);
2315 (*o_putc)(c2 & 0x7f);
2320 ? c2<0x20 || 0x92<c2 || c1<0x20 || 0x7e<c1
2321 : c2<0x20 || 0x7e<c2 || c1<0x20 || 0x7e<c1) return;
2322 output_escape_sequence(x0213_f ? JIS_X_0213_1 : JIS_X_0208);
2329 e_oconv(nkf_char c2, nkf_char c1)
2331 if (c2 == 0 && nkf_char_unicode_p(c1)){
2332 w16e_conv(c1, &c2, &c1);
2333 if (c2 == 0 && nkf_char_unicode_p(c1)){
2334 c2 = c1 & VALUE_MASK;
2335 if (x0212_f && 0xE000 <= c2 && c2 <= 0xE757) {
2339 c2 += c2 < 10 ? 0x75 : 0x8FEB;
2340 c1 = 0x21 + c1 % 94;
2343 (*o_putc)((c2 & 0x7f) | 0x080);
2344 (*o_putc)(c1 | 0x080);
2346 (*o_putc)((c2 & 0x7f) | 0x080);
2347 (*o_putc)(c1 | 0x080);
2351 if (encode_fallback) (*encode_fallback)(c1);
2359 } else if (c2 == 0) {
2360 output_mode = ASCII;
2362 } else if (c2 == JIS_X_0201_1976_K) {
2363 output_mode = EUC_JP;
2364 (*o_putc)(SS2); (*o_putc)(c1|0x80);
2365 } else if (c2 == ISO_8859_1) {
2366 output_mode = ISO_8859_1;
2367 (*o_putc)(c1 | 0x080);
2369 } else if (is_eucg3(c2)){
2370 output_mode = EUC_JP;
2371 #ifdef SHIFTJIS_CP932
2374 if (e2s_conv(c2, c1, &s2, &s1) == 0){
2375 s2e_conv(s2, s1, &c2, &c1);
2380 output_mode = ASCII;
2382 }else if (is_eucg3(c2)){
2385 (*o_putc)((c2 & 0x7f) | 0x080);
2386 (*o_putc)(c1 | 0x080);
2389 (*o_putc)((c2 & 0x7f) | 0x080);
2390 (*o_putc)(c1 | 0x080);
2394 if (!nkf_isgraph(c1) || !nkf_isgraph(c2)) {
2395 set_iconv(FALSE, 0);
2396 return; /* too late to rescue this char */
2398 output_mode = EUC_JP;
2399 (*o_putc)(c2 | 0x080);
2400 (*o_putc)(c1 | 0x080);
2405 s_oconv(nkf_char c2, nkf_char c1)
2407 #ifdef NUMCHAR_OPTION
2408 if (c2 == 0 && nkf_char_unicode_p(c1)){
2409 w16e_conv(c1, &c2, &c1);
2410 if (c2 == 0 && nkf_char_unicode_p(c1)){
2411 c2 = c1 & VALUE_MASK;
2412 if (!x0213_f && 0xE000 <= c2 && c2 <= 0xE757) {
2415 c2 = c1 / 188 + (cp932inv_f ? 0xF0 : 0xEB);
2417 c1 += 0x40 + (c1 > 0x3e);
2422 if(encode_fallback)(*encode_fallback)(c1);
2431 } else if (c2 == 0) {
2432 output_mode = ASCII;
2434 } else if (c2 == JIS_X_0201_1976_K) {
2435 output_mode = SHIFT_JIS;
2437 } else if (c2 == ISO_8859_1) {
2438 output_mode = ISO_8859_1;
2439 (*o_putc)(c1 | 0x080);
2441 } else if (is_eucg3(c2)){
2442 output_mode = SHIFT_JIS;
2443 if (e2s_conv(c2, c1, &c2, &c1) == 0){
2449 if (!nkf_isprint(c1) || !nkf_isprint(c2)) {
2450 set_iconv(FALSE, 0);
2451 return; /* too late to rescue this char */
2453 output_mode = SHIFT_JIS;
2454 e2s_conv(c2, c1, &c2, &c1);
2456 #ifdef SHIFTJIS_CP932
2458 && CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
2459 nkf_char c = cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40];
2465 #endif /* SHIFTJIS_CP932 */
2468 if (prefix_table[(unsigned char)c1]){
2469 (*o_putc)(prefix_table[(unsigned char)c1]);
2475 #ifdef UTF8_OUTPUT_ENABLE
2477 w_oconv(nkf_char c2, nkf_char c1)
2483 output_bom_f = FALSE;
2494 if (c2 == 0 && nkf_char_unicode_p(c1)){
2495 val = c1 & VALUE_MASK;
2496 nkf_unicode_to_utf8(val, &c1, &c2, &c3, &c4);
2498 if (c2) (*o_putc)(c2);
2499 if (c3) (*o_putc)(c3);
2500 if (c4) (*o_putc)(c4);
2507 val = e2w_conv(c2, c1);
2509 nkf_unicode_to_utf8(val, &c1, &c2, &c3, &c4);
2511 if (c2) (*o_putc)(c2);
2512 if (c3) (*o_putc)(c3);
2513 if (c4) (*o_putc)(c4);
2519 w_oconv16(nkf_char c2, nkf_char c1)
2522 output_bom_f = FALSE;
2523 if (output_endian == ENDIAN_LITTLE){
2537 if (c2 == 0 && nkf_char_unicode_p(c1)) {
2538 if (nkf_char_unicode_bmp_p(c1)) {
2539 c2 = (c1 >> 8) & 0xff;
2543 if (c1 <= UNICODE_MAX) {
2544 c2 = (c1 >> 10) + NKF_INT32_C(0xD7C0); /* high surrogate */
2545 c1 = (c1 & 0x3FF) + NKF_INT32_C(0xDC00); /* low surrogate */
2546 if (output_endian == ENDIAN_LITTLE){
2547 (*o_putc)(c2 & 0xff);
2548 (*o_putc)((c2 >> 8) & 0xff);
2549 (*o_putc)(c1 & 0xff);
2550 (*o_putc)((c1 >> 8) & 0xff);
2552 (*o_putc)((c2 >> 8) & 0xff);
2553 (*o_putc)(c2 & 0xff);
2554 (*o_putc)((c1 >> 8) & 0xff);
2555 (*o_putc)(c1 & 0xff);
2561 nkf_char val = e2w_conv(c2, c1);
2562 c2 = (val >> 8) & 0xff;
2567 if (output_endian == ENDIAN_LITTLE){
2577 w_oconv32(nkf_char c2, nkf_char c1)
2580 output_bom_f = FALSE;
2581 if (output_endian == ENDIAN_LITTLE){
2599 if (c2 == ISO_8859_1) {
2601 } else if (c2 == 0 && nkf_char_unicode_p(c1)) {
2604 c1 = e2w_conv(c2, c1);
2607 if (output_endian == ENDIAN_LITTLE){
2608 (*o_putc)( c1 & 0xFF);
2609 (*o_putc)((c1 >> 8) & 0xFF);
2610 (*o_putc)((c1 >> 16) & 0xFF);
2614 (*o_putc)((c1 >> 16) & 0xFF);
2615 (*o_putc)((c1 >> 8) & 0xFF);
2616 (*o_putc)( c1 & 0xFF);
2621 #define SCORE_L2 (1) /*
\e$BBh
\e(B2
\e$B?e=`4A;z
\e(B */
2622 #define SCORE_KANA (SCORE_L2 << 1) /*
\e$B$$$o$f$kH>3Q%+%J
\e(B */
2623 #define SCORE_DEPEND (SCORE_KANA << 1) /*
\e$B5!<o0MB8J8;z
\e(B */
2624 #define SCORE_CP932 (SCORE_DEPEND << 1) /* CP932
\e$B$K$h$kFI$_49$(
\e(B (IBM extended characters) */
2625 #define SCORE_X0212 (SCORE_CP932 << 1) /* JIS X 0212 */
2626 #define SCORE_NO_EXIST (SCORE_X0212 << 1) /*
\e$BB8:_$7$J$$J8;z
\e(B */
2627 #define SCORE_iMIME (SCORE_NO_EXIST << 1) /* MIME
\e$B$K$h$k;XDj
\e(B */
2628 #define SCORE_ERROR (SCORE_iMIME << 1) /*
\e$B%(%i!<
\e(B */
2630 #define SCORE_INIT (SCORE_iMIME)
2632 static const char score_table_A0[] = {
2635 0, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND,
2636 SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_NO_EXIST,
2639 static const char score_table_F0[] = {
2640 SCORE_L2, SCORE_L2, SCORE_L2, SCORE_L2,
2641 SCORE_L2, SCORE_DEPEND, SCORE_NO_EXIST, SCORE_NO_EXIST,
2642 SCORE_DEPEND, SCORE_DEPEND, SCORE_CP932, SCORE_CP932,
2643 SCORE_CP932, SCORE_NO_EXIST, SCORE_NO_EXIST, SCORE_ERROR,
2647 set_code_score(struct input_code *ptr, nkf_char score)
2650 ptr->score |= score;
2655 clr_code_score(struct input_code *ptr, nkf_char score)
2658 ptr->score &= ~score;
2663 code_score(struct input_code *ptr)
2665 nkf_char c2 = ptr->buf[0];
2666 #ifdef UTF8_OUTPUT_ENABLE
2667 nkf_char c1 = ptr->buf[1];
2670 set_code_score(ptr, SCORE_ERROR);
2671 }else if (c2 == SS2){
2672 set_code_score(ptr, SCORE_KANA);
2673 }else if (c2 == 0x8f){
2674 set_code_score(ptr, SCORE_X0212);
2675 #ifdef UTF8_OUTPUT_ENABLE
2676 }else if (!e2w_conv(c2, c1)){
2677 set_code_score(ptr, SCORE_NO_EXIST);
2679 }else if ((c2 & 0x70) == 0x20){
2680 set_code_score(ptr, score_table_A0[c2 & 0x0f]);
2681 }else if ((c2 & 0x70) == 0x70){
2682 set_code_score(ptr, score_table_F0[c2 & 0x0f]);
2683 }else if ((c2 & 0x70) >= 0x50){
2684 set_code_score(ptr, SCORE_L2);
2689 status_disable(struct input_code *ptr)
2694 if (iconv == ptr->iconv_func) set_iconv(FALSE, 0);
2698 status_push_ch(struct input_code *ptr, nkf_char c)
2700 ptr->buf[ptr->index++] = c;
2704 status_clear(struct input_code *ptr)
2711 status_reset(struct input_code *ptr)
2714 ptr->score = SCORE_INIT;
2718 status_reinit(struct input_code *ptr)
2721 ptr->_file_stat = 0;
2725 status_check(struct input_code *ptr, nkf_char c)
2727 if (c <= DEL && estab_f){
2733 s_status(struct input_code *ptr, nkf_char c)
2737 status_check(ptr, c);
2742 }else if (nkf_char_unicode_p(c)){
2744 }else if (0xa1 <= c && c <= 0xdf){
2745 status_push_ch(ptr, SS2);
2746 status_push_ch(ptr, c);
2749 }else if ((0x81 <= c && c < 0xa0) || (0xe0 <= c && c <= 0xea)){
2751 status_push_ch(ptr, c);
2752 }else if (0xed <= c && c <= 0xee){
2754 status_push_ch(ptr, c);
2755 #ifdef SHIFTJIS_CP932
2756 }else if (is_ibmext_in_sjis(c)){
2758 status_push_ch(ptr, c);
2759 #endif /* SHIFTJIS_CP932 */
2761 }else if (0xf0 <= c && c <= 0xfc){
2763 status_push_ch(ptr, c);
2764 #endif /* X0212_ENABLE */
2766 status_disable(ptr);
2770 if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
2771 status_push_ch(ptr, c);
2772 s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]);
2776 status_disable(ptr);
2780 #ifdef SHIFTJIS_CP932
2781 if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)) {
2782 status_push_ch(ptr, c);
2783 if (s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]) == 0) {
2784 set_code_score(ptr, SCORE_CP932);
2789 #endif /* SHIFTJIS_CP932 */
2790 status_disable(ptr);
2793 if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
2794 status_push_ch(ptr, c);
2795 s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]);
2796 set_code_score(ptr, SCORE_CP932);
2799 status_disable(ptr);
2806 e_status(struct input_code *ptr, nkf_char c)
2810 status_check(ptr, c);
2815 }else if (nkf_char_unicode_p(c)){
2817 }else if (SS2 == c || (0xa1 <= c && c <= 0xfe)){
2819 status_push_ch(ptr, c);
2821 }else if (0x8f == c){
2823 status_push_ch(ptr, c);
2824 #endif /* X0212_ENABLE */
2826 status_disable(ptr);
2830 if (0xa1 <= c && c <= 0xfe){
2831 status_push_ch(ptr, c);
2835 status_disable(ptr);
2840 if (0xa1 <= c && c <= 0xfe){
2842 status_push_ch(ptr, c);
2844 status_disable(ptr);
2846 #endif /* X0212_ENABLE */
2850 #ifdef UTF8_INPUT_ENABLE
2852 w_status(struct input_code *ptr, nkf_char c)
2856 status_check(ptr, c);
2861 }else if (nkf_char_unicode_p(c)){
2863 }else if (0xc0 <= c && c <= 0xdf){
2865 status_push_ch(ptr, c);
2866 }else if (0xe0 <= c && c <= 0xef){
2868 status_push_ch(ptr, c);
2869 }else if (0xf0 <= c && c <= 0xf4){
2871 status_push_ch(ptr, c);
2873 status_disable(ptr);
2878 if (0x80 <= c && c <= 0xbf){
2879 status_push_ch(ptr, c);
2880 if (ptr->index > ptr->stat){
2881 int bom = (ptr->buf[0] == 0xef && ptr->buf[1] == 0xbb
2882 && ptr->buf[2] == 0xbf);
2883 w2e_conv(ptr->buf[0], ptr->buf[1], ptr->buf[2],
2884 &ptr->buf[0], &ptr->buf[1]);
2891 status_disable(ptr);
2895 if (0x80 <= c && c <= 0xbf){
2896 if (ptr->index < ptr->stat){
2897 status_push_ch(ptr, c);
2902 status_disable(ptr);
2910 code_status(nkf_char c)
2912 int action_flag = 1;
2913 struct input_code *result = 0;
2914 struct input_code *p = input_code_list;
2916 if (!p->status_func) {
2920 if (!p->status_func)
2922 (p->status_func)(p, c);
2925 }else if(p->stat == 0){
2936 if (result && !estab_f){
2937 set_iconv(TRUE, result->iconv_func);
2938 }else if (c <= DEL){
2939 struct input_code *ptr = input_code_list;
2953 return std_gc_buf[--std_gc_ndx];
2960 std_ungetc(nkf_char c, FILE *f)
2962 if (std_gc_ndx == STD_GC_BUFSIZE){
2965 std_gc_buf[std_gc_ndx++] = c;
2971 std_putc(nkf_char c)
2978 static unsigned char hold_buf[HOLD_SIZE*2];
2979 static int hold_count = 0;
2981 push_hold_buf(nkf_char c2)
2983 if (hold_count >= HOLD_SIZE*2)
2985 hold_buf[hold_count++] = (unsigned char)c2;
2986 return ((hold_count >= HOLD_SIZE*2) ? EOF : hold_count);
2990 h_conv(FILE *f, int c1, int c2)
2996 /** it must NOT be in the kanji shifte sequence */
2997 /** it must NOT be written in JIS7 */
2998 /** and it must be after 2 byte 8bit code */
3004 while ((c2 = (*i_getc)(f)) != EOF) {
3010 if (push_hold_buf(c2) == EOF || estab_f) {
3016 struct input_code *p = input_code_list;
3017 struct input_code *result = p;
3022 if (p->status_func && p->score < result->score) {
3027 set_iconv(TRUE, result->iconv_func);
3032 ** 1) EOF is detected, or
3033 ** 2) Code is established, or
3034 ** 3) Buffer is FULL (but last word is pushed)
3036 ** in 1) and 3) cases, we continue to use
3037 ** Kanji codes by oconv and leave estab_f unchanged.
3042 while (hold_index < hold_count){
3043 c1 = hold_buf[hold_index++];
3047 }else if (iconv == s_iconv && 0xa1 <= c1 && c1 <= 0xdf){
3048 (*iconv)(JIS_X_0201_1976_K, c1, 0);
3051 if (hold_index < hold_count){
3052 c2 = hold_buf[hold_index++];
3062 switch ((*iconv)(c1, c2, 0)) { /* can be EUC/SJIS/UTF-8 */
3065 if (hold_index < hold_count){
3066 c3 = hold_buf[hold_index++];
3067 } else if ((c3 = (*i_getc)(f)) == EOF) {
3072 if (hold_index < hold_count){
3073 c4 = hold_buf[hold_index++];
3074 } else if ((c4 = (*i_getc)(f)) == EOF) {
3079 (*iconv)(c1, c2, (c3<<8)|c4);
3084 /* 3 bytes EUC or UTF-8 */
3085 if (hold_index < hold_count){
3086 c3 = hold_buf[hold_index++];
3087 } else if ((c3 = (*i_getc)(f)) == EOF) {
3093 (*iconv)(c1, c2, c3);
3096 if (c3 == EOF) break;
3102 * Check and Ignore BOM
3108 switch(c2 = (*i_getc)(f)){
3110 if((c2 = (*i_getc)(f)) == 0x00){
3111 if((c2 = (*i_getc)(f)) == 0xFE){
3112 if((c2 = (*i_getc)(f)) == 0xFF){
3113 if(!input_encoding){
3114 set_iconv(TRUE, w_iconv32);
3116 if (iconv == w_iconv32) {
3117 input_endian = ENDIAN_BIG;
3120 (*i_ungetc)(0xFF,f);
3121 }else (*i_ungetc)(c2,f);
3122 (*i_ungetc)(0xFE,f);
3123 }else if(c2 == 0xFF){
3124 if((c2 = (*i_getc)(f)) == 0xFE){
3125 if(!input_encoding){
3126 set_iconv(TRUE, w_iconv32);
3128 if (iconv == w_iconv32) {
3129 input_endian = ENDIAN_2143;
3132 (*i_ungetc)(0xFF,f);
3133 }else (*i_ungetc)(c2,f);
3134 (*i_ungetc)(0xFF,f);
3135 }else (*i_ungetc)(c2,f);
3136 (*i_ungetc)(0x00,f);
3137 }else (*i_ungetc)(c2,f);
3138 (*i_ungetc)(0x00,f);
3141 if((c2 = (*i_getc)(f)) == 0xBB){
3142 if((c2 = (*i_getc)(f)) == 0xBF){
3143 if(!input_encoding){
3144 set_iconv(TRUE, w_iconv);
3146 if (iconv == w_iconv) {
3149 (*i_ungetc)(0xBF,f);
3150 }else (*i_ungetc)(c2,f);
3151 (*i_ungetc)(0xBB,f);
3152 }else (*i_ungetc)(c2,f);
3153 (*i_ungetc)(0xEF,f);
3156 if((c2 = (*i_getc)(f)) == 0xFF){
3157 if((c2 = (*i_getc)(f)) == 0x00){
3158 if((c2 = (*i_getc)(f)) == 0x00){
3159 if(!input_encoding){
3160 set_iconv(TRUE, w_iconv32);
3162 if (iconv == w_iconv32) {
3163 input_endian = ENDIAN_3412;
3166 (*i_ungetc)(0x00,f);
3167 }else (*i_ungetc)(c2,f);
3168 (*i_ungetc)(0x00,f);
3169 }else (*i_ungetc)(c2,f);
3170 if(!input_encoding){
3171 set_iconv(TRUE, w_iconv16);
3173 if (iconv == w_iconv16) {
3174 input_endian = ENDIAN_BIG;
3177 (*i_ungetc)(0xFF,f);
3178 }else (*i_ungetc)(c2,f);
3179 (*i_ungetc)(0xFE,f);
3182 if((c2 = (*i_getc)(f)) == 0xFE){
3183 if((c2 = (*i_getc)(f)) == 0x00){
3184 if((c2 = (*i_getc)(f)) == 0x00){
3185 if(!input_encoding){
3186 set_iconv(TRUE, w_iconv32);
3188 if (iconv == w_iconv32) {
3189 input_endian = ENDIAN_LITTLE;
3192 (*i_ungetc)(0x00,f);
3193 }else (*i_ungetc)(c2,f);
3194 (*i_ungetc)(0x00,f);
3195 }else (*i_ungetc)(c2,f);
3196 if(!input_encoding){
3197 set_iconv(TRUE, w_iconv16);
3199 if (iconv == w_iconv16) {
3200 input_endian = ENDIAN_LITTLE;
3203 (*i_ungetc)(0xFE,f);
3204 }else (*i_ungetc)(c2,f);
3205 (*i_ungetc)(0xFF,f);
3220 init_broken_state(void)
3222 memset(&broken_state, 0, sizeof(broken_state));
3228 broken_state.buf[broken_state.count++] = c;
3232 pop_broken_buf(void)
3234 return broken_state.buf[--broken_state.count];
3238 broken_getc(FILE *f)
3242 if (broken_state.count > 0) {
3243 return pop_broken_buf();
3246 if (c=='$' && broken_state.status != ESC
3247 && (input_mode == ASCII || input_mode == JIS_X_0201_1976_K)) {
3249 broken_state.status = 0;
3250 if (c1=='@'|| c1=='B') {
3251 push_broken_buf(c1);
3258 } else if (c=='(' && broken_state.status != ESC
3259 && (input_mode == JIS_X_0208 || input_mode == JIS_X_0201_1976_K)) {
3261 broken_state.status = 0;
3262 if (c1=='J'|| c1=='B') {
3263 push_broken_buf(c1);
3271 broken_state.status = c;
3277 broken_ungetc(nkf_char c, FILE *f)
3279 if (broken_state.count < 2)
3285 eol_conv(nkf_char c2, nkf_char c1)
3287 if (guess_f && input_eol != EOF) {
3288 if (c2 == 0 && c1 == LF) {
3289 if (!input_eol) input_eol = prev_cr ? CRLF : LF;
3290 else if (input_eol != (prev_cr ? CRLF : LF)) input_eol = EOF;
3291 } else if (c2 == 0 && c1 == CR && input_eol == LF) input_eol = EOF;
3293 else if (!input_eol) input_eol = CR;
3294 else if (input_eol != CR) input_eol = EOF;
3296 if (prev_cr || (c2 == 0 && c1 == LF)) {
3298 if (eolmode_f != LF) (*o_eol_conv)(0, CR);
3299 if (eolmode_f != CR) (*o_eol_conv)(0, LF);
3301 if (c2 == 0 && c1 == CR) prev_cr = CR;
3302 else if (c2 != 0 || c1 != LF) (*o_eol_conv)(c2, c1);
3306 Return value of fold_conv()
3308 LF add newline and output char
3309 CR add newline and output nothing
3312 1 (or else) normal output
3314 fold state in prev (previous character)
3316 >0x80 Japanese (X0208/X0201)
3321 This fold algorthm does not preserve heading space in a line.
3322 This is the main difference from fmt.
3325 #define char_size(c2,c1) (c2?2:1)
3328 fold_conv(nkf_char c2, nkf_char c1)
3331 nkf_char fold_state;
3333 if (c1== CR && !fold_preserve_f) {
3334 fold_state=0; /* ignore cr */
3335 }else if (c1== LF&&f_prev==CR && fold_preserve_f) {
3337 fold_state=0; /* ignore cr */
3338 } else if (c1== BS) {
3339 if (f_line>0) f_line--;
3341 } else if (c2==EOF && f_line != 0) { /* close open last line */
3343 } else if ((c1==LF && !fold_preserve_f)
3344 || ((c1==CR||(c1==LF&&f_prev!=CR))
3345 && fold_preserve_f)) {
3347 if (fold_preserve_f) {
3351 } else if ((f_prev == c1 && !fold_preserve_f)
3352 || (f_prev == LF && fold_preserve_f)
3353 ) { /* duplicate newline */
3356 fold_state = LF; /* output two newline */
3362 if (f_prev&0x80) { /* Japanese? */
3364 fold_state = 0; /* ignore given single newline */
3365 } else if (f_prev==SP) {
3369 if (++f_line<=fold_len)
3373 fold_state = CR; /* fold and output nothing */
3377 } else if (c1=='\f') {
3380 fold_state = LF; /* output newline and clear */
3381 } else if ( (c2==0 && c1==SP)||
3382 (c2==0 && c1==TAB)||
3383 (c2=='!'&& c1=='!')) {
3384 /* X0208 kankaku or ascii space */
3386 fold_state = 0; /* remove duplicate spaces */
3389 if (++f_line<=fold_len)
3390 fold_state = SP; /* output ASCII space only */
3392 f_prev = SP; f_line = 0;
3393 fold_state = CR; /* fold and output nothing */
3397 prev0 = f_prev; /* we still need this one... , but almost done */
3399 if (c2 || c2 == JIS_X_0201_1976_K)
3400 f_prev |= 0x80; /* this is Japanese */
3401 f_line += char_size(c2,c1);
3402 if (f_line<=fold_len) { /* normal case */
3405 if (f_line>fold_len+fold_margin) { /* too many kinsoku suspension */
3406 f_line = char_size(c2,c1);
3407 fold_state = LF; /* We can't wait, do fold now */
3408 } else if (c2 == JIS_X_0201_1976_K) {
3409 /* simple kinsoku rules return 1 means no folding */
3410 if (c1==(0xde&0x7f)) fold_state = 1; /*
\e$B!+
\e(B*/
3411 else if (c1==(0xdf&0x7f)) fold_state = 1; /*
\e$B!,
\e(B*/
3412 else if (c1==(0xa4&0x7f)) fold_state = 1; /*
\e$B!#
\e(B*/
3413 else if (c1==(0xa3&0x7f)) fold_state = 1; /*
\e$B!$
\e(B*/
3414 else if (c1==(0xa1&0x7f)) fold_state = 1; /*
\e$B!W
\e(B*/
3415 else if (c1==(0xb0&0x7f)) fold_state = 1; /* - */
3416 else if (SP<=c1 && c1<=(0xdf&0x7f)) { /* X0201 */
3418 fold_state = LF;/* add one new f_line before this character */
3421 fold_state = LF;/* add one new f_line before this character */
3424 /* kinsoku point in ASCII */
3425 if ( c1==')'|| /* { [ ( */
3436 /* just after special */
3437 } else if (!is_alnum(prev0)) {
3438 f_line = char_size(c2,c1);
3440 } else if ((prev0==SP) || /* ignored new f_line */
3441 (prev0==LF)|| /* ignored new f_line */
3442 (prev0&0x80)) { /* X0208 - ASCII */
3443 f_line = char_size(c2,c1);
3444 fold_state = LF;/* add one new f_line before this character */
3446 fold_state = 1; /* default no fold in ASCII */
3450 if (c1=='"') fold_state = 1; /*
\e$B!"
\e(B */
3451 else if (c1=='#') fold_state = 1; /*
\e$B!#
\e(B */
3452 else if (c1=='W') fold_state = 1; /*
\e$B!W
\e(B */
3453 else if (c1=='K') fold_state = 1; /*
\e$B!K
\e(B */
3454 else if (c1=='$') fold_state = 1; /*
\e$B!$
\e(B */
3455 else if (c1=='%') fold_state = 1; /*
\e$B!%
\e(B */
3456 else if (c1=='\'') fold_state = 1; /*
\e$B!\
\e(B */
3457 else if (c1=='(') fold_state = 1; /*
\e$B!(
\e(B */
3458 else if (c1==')') fold_state = 1; /*
\e$B!)
\e(B */
3459 else if (c1=='*') fold_state = 1; /*
\e$B!*
\e(B */
3460 else if (c1=='+') fold_state = 1; /*
\e$B!+
\e(B */
3461 else if (c1==',') fold_state = 1; /*
\e$B!,
\e(B */
3462 /* default no fold in kinsoku */
3465 f_line = char_size(c2,c1);
3466 /* add one new f_line before this character */
3469 f_line = char_size(c2,c1);
3471 /* add one new f_line before this character */
3476 /* terminator process */
3477 switch(fold_state) {
3479 OCONV_NEWLINE((*o_fconv));
3485 OCONV_NEWLINE((*o_fconv));
3496 static nkf_char z_prev2=0,z_prev1=0;
3499 z_conv(nkf_char c2, nkf_char c1)
3502 /* if (c2) c1 &= 0x7f; assertion */
3504 if (c2 == JIS_X_0201_1976_K && (c1 == 0x20 || c1 == 0x7D || c1 == 0x7E)) {
3510 if (z_prev2 == JIS_X_0201_1976_K) {
3511 if (c2 == JIS_X_0201_1976_K) {
3512 if (c1 == (0xde&0x7f)) { /*
\e$BByE@
\e(B */
3514 (*o_zconv)(dv[(z_prev1-SP)*2], dv[(z_prev1-SP)*2+1]);
3516 } else if (c1 == (0xdf&0x7f) && ev[(z_prev1-SP)*2]) { /*
\e$BH>ByE@
\e(B */
3518 (*o_zconv)(ev[(z_prev1-SP)*2], ev[(z_prev1-SP)*2+1]);
3523 (*o_zconv)(cv[(z_prev1-SP)*2], cv[(z_prev1-SP)*2+1]);
3525 if (c2 == JIS_X_0201_1976_K) {
3526 if (dv[(c1-SP)*2] || ev[(c1-SP)*2]) {
3527 /* wait for
\e$BByE@
\e(B or
\e$BH>ByE@
\e(B */
3532 (*o_zconv)(cv[(c1-SP)*2], cv[(c1-SP)*2+1]);
3543 if (alpha_f&1 && c2 == 0x23) {
3544 /* JISX0208 Alphabet */
3546 } else if (c2 == 0x21) {
3547 /* JISX0208 Kigou */
3552 } else if (alpha_f&4) {
3557 } else if (alpha_f&1 && 0x20<c1 && c1<0x7f && fv[c1-0x20]) {
3563 if (alpha_f&8 && c2 == 0) {
3565 const char *entity = 0;
3567 case '>': entity = ">"; break;
3568 case '<': entity = "<"; break;
3569 case '\"': entity = """; break;
3570 case '&': entity = "&"; break;
3573 while (*entity) (*o_zconv)(0, *entity++);
3579 /* JIS X 0208 Katakana to JIS X 0201 Katakana */
3584 /* U+3002 (0x8142) Ideographic Full Stop -> U+FF61 (0xA1) Halfwidth Ideographic Full Stop */
3588 /* U+300C (0x8175) Left Corner Bracket -> U+FF62 (0xA2) Halfwidth Left Corner Bracket */
3592 /* U+300D (0x8176) Right Corner Bracket -> U+FF63 (0xA3) Halfwidth Right Corner Bracket */
3596 /* U+3001 (0x8141) Ideographic Comma -> U+FF64 (0xA4) Halfwidth Ideographic Comma */
3600 /* U+30FB (0x8145) Katakana Middle Dot -> U+FF65 (0xA5) Halfwidth Katakana Middle Dot */
3604 /* U+30FC (0x815B) Katakana-Hiragana Prolonged Sound Mark -> U+FF70 (0xB0) Halfwidth Katakana-Hiragana Prolonged Sound Mark */
3608 /* U+309B (0x814A) Katakana-Hiragana Voiced Sound Mark -> U+FF9E (0xDE) Halfwidth Katakana Voiced Sound Mark */
3612 /* U+309C (0x814B) Katakana-Hiragana Semi-Voiced Sound Mark -> U+FF9F (0xDF) Halfwidth Katakana Semi-Voiced Sound Mark */
3617 (*o_zconv)(JIS_X_0201_1976_K, c);
3620 } else if (c2 == 0x25) {
3621 /* JISX0208 Katakana */
3622 static const int fullwidth_to_halfwidth[] =
3624 0x0000, 0x2700, 0x3100, 0x2800, 0x3200, 0x2900, 0x3300, 0x2A00,
3625 0x3400, 0x2B00, 0x3500, 0x3600, 0x365E, 0x3700, 0x375E, 0x3800,
3626 0x385E, 0x3900, 0x395E, 0x3A00, 0x3A5E, 0x3B00, 0x3B5E, 0x3C00,
3627 0x3C5E, 0x3D00, 0x3D5E, 0x3E00, 0x3E5E, 0x3F00, 0x3F5E, 0x4000,
3628 0x405E, 0x4100, 0x415E, 0x2F00, 0x4200, 0x425E, 0x4300, 0x435E,
3629 0x4400, 0x445E, 0x4500, 0x4600, 0x4700, 0x4800, 0x4900, 0x4A00,
3630 0x4A5E, 0x4A5F, 0x4B00, 0x4B5E, 0x4B5F, 0x4C00, 0x4C5E, 0x4C5F,
3631 0x4D00, 0x4D5E, 0x4D5F, 0x4E00, 0x4E5E, 0x4E5F, 0x4F00, 0x5000,
3632 0x5100, 0x5200, 0x5300, 0x2C00, 0x5400, 0x2D00, 0x5500, 0x2E00,
3633 0x5600, 0x5700, 0x5800, 0x5900, 0x5A00, 0x5B00, 0x0000, 0x5C00,
3634 0x0000, 0x0000, 0x2600, 0x5D00, 0x335E, 0x0000, 0x0000, 0x0000,
3635 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
3637 if (fullwidth_to_halfwidth[c1-0x20]){
3638 c2 = fullwidth_to_halfwidth[c1-0x20];
3639 (*o_zconv)(JIS_X_0201_1976_K, c2>>8);
3641 (*o_zconv)(JIS_X_0201_1976_K, c2&0xFF);
3651 #define rot13(c) ( \
3653 (c <= 'M') ? (c + 13): \
3654 (c <= 'Z') ? (c - 13): \
3656 (c <= 'm') ? (c + 13): \
3657 (c <= 'z') ? (c - 13): \
3661 #define rot47(c) ( \
3663 ( c <= 'O') ? (c + 47) : \
3664 ( c <= '~') ? (c - 47) : \
3669 rot_conv(nkf_char c2, nkf_char c1)
3671 if (c2 == 0 || c2 == JIS_X_0201_1976_K || c2 == ISO_8859_1) {
3677 (*o_rot_conv)(c2,c1);
3681 hira_conv(nkf_char c2, nkf_char c1)
3685 if (0x20 < c1 && c1 < 0x74) {
3687 (*o_hira_conv)(c2,c1);
3689 } else if (c1 == 0x74 && nkf_enc_unicode_p(output_encoding)) {
3691 c1 = nkf_char_unicode_new(0x3094);
3692 (*o_hira_conv)(c2,c1);
3695 } else if (c2 == 0x21 && (c1 == 0x33 || c1 == 0x34)) {
3697 (*o_hira_conv)(c2,c1);
3702 if (c2 == 0 && c1 == nkf_char_unicode_new(0x3094)) {
3705 } else if (c2 == 0x24 && 0x20 < c1 && c1 < 0x74) {
3707 } else if (c2 == 0x21 && (c1 == 0x35 || c1 == 0x36)) {
3711 (*o_hira_conv)(c2,c1);
3716 iso2022jp_check_conv(nkf_char c2, nkf_char c1)
3718 #define RANGE_NUM_MAX 18
3719 static const nkf_char range[RANGE_NUM_MAX][2] = {
3740 nkf_char start, end, c;
3742 if(c2 >= 0x00 && c2 <= 0x20 && c1 >= 0x7f && c1 <= 0xff) {
3746 if((c2 >= 0x29 && c2 <= 0x2f) || (c2 >= 0x75 && c2 <= 0x7e)) {
3751 for (i = 0; i < RANGE_NUM_MAX; i++) {
3752 start = range[i][0];
3755 if (c >= start && c <= end) {
3760 (*o_iso2022jp_check_conv)(c2,c1);
3764 /* This converts =?ISO-2022-JP?B?HOGE HOGE?= */
3766 static const unsigned char *mime_pattern[] = {
3767 (const unsigned char *)"\075?EUC-JP?B?",
3768 (const unsigned char *)"\075?SHIFT_JIS?B?",
3769 (const unsigned char *)"\075?ISO-8859-1?Q?",
3770 (const unsigned char *)"\075?ISO-8859-1?B?",
3771 (const unsigned char *)"\075?ISO-2022-JP?B?",
3772 (const unsigned char *)"\075?ISO-2022-JP?Q?",
3773 #if defined(UTF8_INPUT_ENABLE)
3774 (const unsigned char *)"\075?UTF-8?B?",
3775 (const unsigned char *)"\075?UTF-8?Q?",
3777 (const unsigned char *)"\075?US-ASCII?Q?",
3782 /*
\e$B3:Ev$9$k%3!<%I$NM%@hEY$r>e$2$k$?$a$NL\0u
\e(B */
3783 nkf_char (*mime_priority_func[])(nkf_char c2, nkf_char c1, nkf_char c0) = {
3784 e_iconv, s_iconv, 0, 0, 0, 0,
3785 #if defined(UTF8_INPUT_ENABLE)
3791 static const nkf_char mime_encode[] = {
3792 EUC_JP, SHIFT_JIS, ISO_8859_1, ISO_8859_1, JIS_X_0208, JIS_X_0201_1976_K,
3793 #if defined(UTF8_INPUT_ENABLE)
3800 static const nkf_char mime_encode_method[] = {
3801 'B', 'B','Q', 'B', 'B', 'Q',
3802 #if defined(UTF8_INPUT_ENABLE)
3810 /* MIME preprocessor fifo */
3812 #define MIME_BUF_SIZE (1024) /* 2^n ring buffer */
3813 #define MIME_BUF_MASK (MIME_BUF_SIZE-1)
3814 #define mime_input_buf(n) mime_input_state.buf[(n)&MIME_BUF_MASK]
3816 unsigned char buf[MIME_BUF_SIZE];
3818 unsigned int last; /* decoded */
3819 unsigned int input; /* undecoded */
3821 static nkf_char (*mime_iconv_back)(nkf_char c2,nkf_char c1,nkf_char c0) = NULL;
3823 #define MAXRECOVER 20
3826 mime_input_buf_unshift(nkf_char c)
3828 mime_input_buf(--mime_input_state.top) = (unsigned char)c;
3832 mime_ungetc(nkf_char c, FILE *f)
3834 mime_input_buf_unshift(c);
3839 mime_ungetc_buf(nkf_char c, FILE *f)
3842 (*i_mungetc_buf)(c,f);
3844 mime_input_buf(--mime_input_state.input) = (unsigned char)c;
3849 mime_getc_buf(FILE *f)
3851 /* we don't keep eof of mime_input_buf, becase it contains ?= as
3852 a terminator. It was checked in mime_integrity. */
3853 return ((mimebuf_f)?
3854 (*i_mgetc_buf)(f):mime_input_buf(mime_input_state.input++));
3858 switch_mime_getc(void)
3860 if (i_getc!=mime_getc) {
3861 i_mgetc = i_getc; i_getc = mime_getc;
3862 i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
3863 if(mime_f==STRICT_MIME) {
3864 i_mgetc_buf = i_mgetc; i_mgetc = mime_getc_buf;
3865 i_mungetc_buf = i_mungetc; i_mungetc = mime_ungetc_buf;
3871 unswitch_mime_getc(void)
3873 if(mime_f==STRICT_MIME) {
3874 i_mgetc = i_mgetc_buf;
3875 i_mungetc = i_mungetc_buf;
3878 i_ungetc = i_mungetc;
3879 if(mime_iconv_back)set_iconv(FALSE, mime_iconv_back);
3880 mime_iconv_back = NULL;
3884 mime_integrity(FILE *f, const unsigned char *p)
3888 /* In buffered mode, read until =? or NL or buffer full
3890 mime_input_state.input = mime_input_state.top;
3891 mime_input_state.last = mime_input_state.top;
3893 while(*p) mime_input_buf(mime_input_state.input++) = *p++;
3895 q = mime_input_state.input;
3896 while((c=(*i_getc)(f))!=EOF) {
3897 if (((mime_input_state.input-mime_input_state.top)&MIME_BUF_MASK)==0) {
3898 break; /* buffer full */
3900 if (c=='=' && d=='?') {
3901 /* checked. skip header, start decode */
3902 mime_input_buf(mime_input_state.input++) = (unsigned char)c;
3903 /* mime_last_input = mime_input_state.input; */
3904 mime_input_state.input = q;
3908 if (!( (c=='+'||c=='/'|| c=='=' || c=='?' || is_alnum(c))))
3910 /* Should we check length mod 4? */
3911 mime_input_buf(mime_input_state.input++) = (unsigned char)c;
3914 /* In case of Incomplete MIME, no MIME decode */
3915 mime_input_buf(mime_input_state.input++) = (unsigned char)c;
3916 mime_input_state.last = mime_input_state.input; /* point undecoded buffer */
3917 mime_decode_mode = 1; /* no decode on mime_input_buf last in mime_getc */
3918 switch_mime_getc(); /* anyway we need buffered getc */
3923 mime_begin_strict(FILE *f)
3927 const unsigned char *p,*q;
3928 nkf_char r[MAXRECOVER]; /* recovery buffer, max mime pattern length */
3930 mime_decode_mode = FALSE;
3931 /* =? has been checked */
3933 p = mime_pattern[j];
3936 for(i=2;p[i]>SP;i++) { /* start at =? */
3937 if (((r[i] = c1 = (*i_getc)(f))==EOF) || nkf_toupper(c1) != p[i]) {
3938 /* pattern fails, try next one */
3940 while (mime_pattern[++j]) {
3941 p = mime_pattern[j];
3942 for(k=2;k<i;k++) /* assume length(p) > i */
3943 if (p[k]!=q[k]) break;
3944 if (k==i && nkf_toupper(c1)==p[k]) break;
3946 p = mime_pattern[j];
3947 if (p) continue; /* found next one, continue */
3948 /* all fails, output from recovery buffer */
3956 mime_decode_mode = p[i-2];
3958 mime_iconv_back = iconv;
3959 set_iconv(FALSE, mime_priority_func[j]);
3960 clr_code_score(find_inputcode_byfunc(mime_priority_func[j]), SCORE_iMIME);
3962 if (mime_decode_mode=='B') {
3963 mimebuf_f = unbuf_f;
3965 /* do MIME integrity check */
3966 return mime_integrity(f,mime_pattern[j]);
3980 /* In NONSTRICT mode, only =? is checked. In case of failure, we */
3981 /* re-read and convert again from mime_buffer. */
3983 /* =? has been checked */
3984 k = mime_input_state.last;
3985 mime_input_buf(mime_input_state.last++)='='; mime_input_buf(mime_input_state.last++)='?';
3986 for(i=2;i<MAXRECOVER;i++) { /* start at =? */
3987 /* We accept any character type even if it is breaked by new lines */
3988 c1 = (*i_getc)(f); mime_input_buf(mime_input_state.last++) = (unsigned char)c1;
3989 if (c1==LF||c1==SP||c1==CR||
3990 c1=='-'||c1=='_'||is_alnum(c1)) continue;
3992 /* Failed. But this could be another MIME preemble */
3994 mime_input_state.last--;
4000 c1 = (*i_getc)(f); mime_input_buf(mime_input_state.last++) = (unsigned char)c1;
4001 if (!(++i<MAXRECOVER) || c1==EOF) break;
4002 if (c1=='b'||c1=='B') {
4003 mime_decode_mode = 'B';
4004 } else if (c1=='q'||c1=='Q') {
4005 mime_decode_mode = 'Q';
4009 c1 = (*i_getc)(f); mime_input_buf(mime_input_state.last++) = (unsigned char)c1;
4010 if (!(++i<MAXRECOVER) || c1==EOF) break;
4012 mime_decode_mode = FALSE;
4018 if (!mime_decode_mode) {
4019 /* false MIME premble, restart from mime_buffer */
4020 mime_decode_mode = 1; /* no decode, but read from the mime_buffer */
4021 /* Since we are in MIME mode until buffer becomes empty, */
4022 /* we never go into mime_begin again for a while. */
4025 /* discard mime preemble, and goto MIME mode */
4026 mime_input_state.last = k;
4027 /* do no MIME integrity check */
4028 return c1; /* used only for checking EOF */
4039 debug(const char *str)
4042 fprintf(stderr, "%s\n", str ? str : "NULL");
4048 set_input_codename(const char *codename)
4050 if (!input_codename) {
4051 input_codename = codename;
4052 } else if (strcmp(codename, input_codename) != 0) {
4053 input_codename = "";
4058 get_guessed_code(void)
4060 if (input_codename && !*input_codename) {
4061 input_codename = "BINARY";
4063 struct input_code *p = find_inputcode_byfunc(iconv);
4064 if (!input_codename) {
4065 input_codename = "ASCII";
4066 } else if (strcmp(input_codename, "Shift_JIS") == 0) {
4067 if (p->score & (SCORE_DEPEND|SCORE_CP932))
4068 input_codename = "CP932";
4069 } else if (strcmp(input_codename, "EUC-JP") == 0) {
4070 if (p->score & (SCORE_X0212))
4071 input_codename = "EUCJP-MS";
4072 else if (p->score & (SCORE_DEPEND|SCORE_CP932))
4073 input_codename = "CP51932";
4074 } else if (strcmp(input_codename, "ISO-2022-JP") == 0) {
4075 if (p->score & (SCORE_KANA))
4076 input_codename = "CP50221";
4077 else if (p->score & (SCORE_DEPEND|SCORE_CP932))
4078 input_codename = "CP50220";
4081 return input_codename;
4084 #if !defined(PERL_XS) && !defined(WIN32DLL)
4086 print_guessed_code(char *filename)
4088 if (filename != NULL) printf("%s: ", filename);
4089 if (input_codename && !*input_codename) {
4092 input_codename = get_guessed_code();
4094 printf("%s\n", input_codename);
4098 input_eol == CR ? " (CR)" :
4099 input_eol == LF ? " (LF)" :
4100 input_eol == CRLF ? " (CRLF)" :
4101 input_eol == EOF ? " (MIXED NL)" :
4111 hex_getc(nkf_char ch, FILE *f, nkf_char (*g)(FILE *f), nkf_char (*u)(nkf_char c, FILE *f))
4113 nkf_char c1, c2, c3;
4119 if (!nkf_isxdigit(c2)){
4124 if (!nkf_isxdigit(c3)){
4129 return (hex2bin(c2) << 4) | hex2bin(c3);
4135 return hex_getc(':', f, i_cgetc, i_cungetc);
4139 cap_ungetc(nkf_char c, FILE *f)
4141 return (*i_cungetc)(c, f);
4147 return hex_getc('%', f, i_ugetc, i_uungetc);
4151 url_ungetc(nkf_char c, FILE *f)
4153 return (*i_uungetc)(c, f);
4157 #ifdef NUMCHAR_OPTION
4159 numchar_getc(FILE *f)
4161 nkf_char (*g)(FILE *) = i_ngetc;
4162 nkf_char (*u)(nkf_char c ,FILE *f) = i_nungetc;
4173 if (buf[i] == 'x' || buf[i] == 'X'){
4174 for (j = 0; j < 7; j++){
4176 if (!nkf_isxdigit(buf[i])){
4183 c |= hex2bin(buf[i]);
4186 for (j = 0; j < 8; j++){
4190 if (!nkf_isdigit(buf[i])){
4197 c += hex2bin(buf[i]);
4203 return nkf_char_unicode_new(c);
4213 numchar_ungetc(nkf_char c, FILE *f)
4215 return (*i_nungetc)(c, f);
4219 #ifdef UNICODE_NORMALIZATION
4228 nkf_ary_new(int length)
4230 nkf_ary *ary = nkf_malloc(sizeof(nkf_ary));
4231 ary->ary = nkf_malloc(length);
4232 ary->max_length = length;
4238 nkf_ary_dispose(nkf_ary *ary)
4244 #define nkf_ary_length(ary) ((ary)->count)
4245 #define nkf_ary_empty_p(ary) ((ary)->count == 0)
4247 static unsigned char
4248 nkf_ary_at(nkf_ary *ary, int index)
4250 assert(index <= ary->count);
4251 return ary->ary[index];
4255 nkf_ary_clear(nkf_ary *ary)
4260 static unsigned char
4261 nkf_ary_push(nkf_ary *ary, nkf_char c)
4263 assert(ary->max_length > ary->count);
4264 ary->ary[ary->count++] = c;
4268 static unsigned char
4269 nkf_ary_pop(nkf_ary *ary)
4271 assert(0 < ary->count);
4272 return ary->ary[--ary->count];
4275 /* Normalization Form C */
4279 nkf_char (*g)(FILE *f) = i_nfc_getc;
4280 nkf_char (*u)(nkf_char c ,FILE *f) = i_nfc_ungetc;
4281 nkf_ary *buf = nkf_ary_new(9);
4282 const unsigned char *array;
4283 int lower=0, upper=NORMALIZATION_TABLE_LENGTH-1;
4284 nkf_char c = (*g)(f);
4286 if (c == EOF || c > 0xFF || (c & 0xc0) == 0x80) return c;
4288 nkf_ary_push(buf, (unsigned char)c);
4290 while (lower <= upper) {
4291 int mid = (lower+upper) / 2;
4293 array = normalization_table[mid].nfd;
4294 for (len=0; len < NORMALIZATION_TABLE_NFD_LENGTH && array[len]; len++) {
4295 if (len >= nkf_ary_length(buf)) {
4299 lower = 1, upper = 0;
4302 nkf_ary_push(buf, c);
4304 if (array[len] != nkf_ary_at(buf, len)) {
4305 if (array[len] < nkf_ary_at(buf, len)) lower = mid + 1;
4306 else upper = mid - 1;
4313 array = normalization_table[mid].nfc;
4315 for (i=0; i < NORMALIZATION_TABLE_NFC_LENGTH && array[i]; i++)
4316 nkf_ary_push(buf, array[i]);
4320 } while (lower <= upper);
4322 while (nkf_ary_length(buf) > 1) (*u)(nkf_ary_pop(buf), f);
4323 c = nkf_ary_pop(buf);
4324 nkf_ary_dispose(buf);
4330 nfc_ungetc(nkf_char c, FILE *f)
4332 return (*i_nfc_ungetc)(c, f);
4334 #endif /* UNICODE_NORMALIZATION */
4338 base64decode(nkf_char c)
4343 i = c - 'A'; /* A..Z 0-25 */
4344 } else if (c == '_') {
4345 i = '?' /* 63 */ ; /* _ 63 */
4347 i = c - 'G' /* - 'a' + 26 */ ; /* a..z 26-51 */
4349 } else if (c > '/') {
4350 i = c - '0' + '4' /* - '0' + 52 */ ; /* 0..9 52-61 */
4351 } else if (c == '+' || c == '-') {
4352 i = '>' /* 62 */ ; /* + and - 62 */
4354 i = '?' /* 63 */ ; /* / 63 */
4362 nkf_char c1, c2, c3, c4, cc;
4363 nkf_char t1, t2, t3, t4, mode, exit_mode;
4364 nkf_char lwsp_count;
4367 nkf_char lwsp_size = 128;
4369 if (mime_input_state.top != mime_input_state.last) { /* Something is in FIFO */
4370 return mime_input_buf(mime_input_state.top++);
4372 if (mime_decode_mode==1 ||mime_decode_mode==FALSE) {
4373 mime_decode_mode=FALSE;
4374 unswitch_mime_getc();
4375 return (*i_getc)(f);
4378 if (mimebuf_f == FIXED_MIME)
4379 exit_mode = mime_decode_mode;
4382 if (mime_decode_mode == 'Q') {
4383 if ((c1 = (*i_mgetc)(f)) == EOF) return (EOF);
4385 if (c1=='_' && mimebuf_f != FIXED_MIME) return SP;
4386 if (c1<=SP || DEL<=c1) {
4387 mime_decode_mode = exit_mode; /* prepare for quit */
4390 if (c1!='=' && (c1!='?' || mimebuf_f == FIXED_MIME)) {
4394 mime_decode_mode = exit_mode; /* prepare for quit */
4395 if ((c2 = (*i_mgetc)(f)) == EOF) return (EOF);
4396 if (c1=='?'&&c2=='=' && mimebuf_f != FIXED_MIME) {
4397 /* end Q encoding */
4398 input_mode = exit_mode;
4400 lwsp_buf = nkf_malloc((lwsp_size+5)*sizeof(char));
4401 while ((c1=(*i_getc)(f))!=EOF) {
4406 if ((c1=(*i_getc)(f))!=EOF && (c1==SP||c1==TAB)) {
4414 if ((c1=(*i_getc)(f))!=EOF && c1 == LF) {
4415 if ((c1=(*i_getc)(f))!=EOF && (c1==SP||c1==TAB)) {
4430 lwsp_buf[lwsp_count] = (unsigned char)c1;
4431 if (lwsp_count++>lwsp_size){
4433 lwsp_buf_new = nkf_realloc(lwsp_buf, (lwsp_size+5)*sizeof(char));
4434 lwsp_buf = lwsp_buf_new;
4440 if (lwsp_count > 0 && (c1 != '=' || (lwsp_buf[lwsp_count-1] != SP && lwsp_buf[lwsp_count-1] != TAB))) {
4442 for(lwsp_count--;lwsp_count>0;lwsp_count--)
4443 i_ungetc(lwsp_buf[lwsp_count],f);
4449 if (c1=='='&&c2<SP) { /* this is soft wrap */
4450 while((c1 = (*i_mgetc)(f)) <=SP) {
4451 if ((c1 = (*i_mgetc)(f)) == EOF) return (EOF);
4453 mime_decode_mode = 'Q'; /* still in MIME */
4454 goto restart_mime_q;
4457 mime_decode_mode = 'Q'; /* still in MIME */
4461 if ((c3 = (*i_mgetc)(f)) == EOF) return (EOF);
4462 if (c2<=SP) return c2;
4463 mime_decode_mode = 'Q'; /* still in MIME */
4464 return ((hex2bin(c2)<<4) + hex2bin(c3));
4467 if (mime_decode_mode != 'B') {
4468 mime_decode_mode = FALSE;
4469 return (*i_mgetc)(f);
4473 /* Base64 encoding */
4475 MIME allows line break in the middle of
4476 Base64, but we are very pessimistic in decoding
4477 in unbuf mode because MIME encoded code may broken by
4478 less or editor's control sequence (such as ESC-[-K in unbuffered
4479 mode. ignore incomplete MIME.
4481 mode = mime_decode_mode;
4482 mime_decode_mode = exit_mode; /* prepare for quit */
4484 while ((c1 = (*i_mgetc)(f))<=SP) {
4489 if ((c2 = (*i_mgetc)(f))<=SP) {
4492 if (mime_f != STRICT_MIME) goto mime_c2_retry;
4493 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
4496 if ((c1 == '?') && (c2 == '=')) {
4499 lwsp_buf = nkf_malloc((lwsp_size+5)*sizeof(char));
4500 while ((c1=(*i_getc)(f))!=EOF) {
4505 if ((c1=(*i_getc)(f))!=EOF && (c1==SP||c1==TAB)) {
4513 if ((c1=(*i_getc)(f))!=EOF) {
4517 } else if ((c1=(*i_getc)(f))!=EOF && (c1==SP||c1==TAB)) {
4532 lwsp_buf[lwsp_count] = (unsigned char)c1;
4533 if (lwsp_count++>lwsp_size){
4535 lwsp_buf_new = nkf_realloc(lwsp_buf, (lwsp_size+5)*sizeof(char));
4536 lwsp_buf = lwsp_buf_new;
4542 if (lwsp_count > 0 && (c1 != '=' || (lwsp_buf[lwsp_count-1] != SP && lwsp_buf[lwsp_count-1] != TAB))) {
4544 for(lwsp_count--;lwsp_count>0;lwsp_count--)
4545 i_ungetc(lwsp_buf[lwsp_count],f);
4552 if ((c3 = (*i_mgetc)(f))<=SP) {
4555 if (mime_f != STRICT_MIME) goto mime_c3_retry;
4556 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
4560 if ((c4 = (*i_mgetc)(f))<=SP) {
4563 if (mime_f != STRICT_MIME) goto mime_c4_retry;
4564 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
4568 mime_decode_mode = mode; /* still in MIME sigh... */
4570 /* BASE 64 decoding */
4572 t1 = 0x3f & base64decode(c1);
4573 t2 = 0x3f & base64decode(c2);
4574 t3 = 0x3f & base64decode(c3);
4575 t4 = 0x3f & base64decode(c4);
4576 cc = ((t1 << 2) & 0x0fc) | ((t2 >> 4) & 0x03);
4578 mime_input_buf(mime_input_state.last++) = (unsigned char)cc;
4579 cc = ((t2 << 4) & 0x0f0) | ((t3 >> 2) & 0x0f);
4581 mime_input_buf(mime_input_state.last++) = (unsigned char)cc;
4582 cc = ((t3 << 6) & 0x0c0) | (t4 & 0x3f);
4584 mime_input_buf(mime_input_state.last++) = (unsigned char)cc;
4589 return mime_input_buf(mime_input_state.top++);
4592 static const char basis_64[] =
4593 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
4595 #define MIMEOUT_BUF_LENGTH (60)
4597 char buf[MIMEOUT_BUF_LENGTH+1];
4602 /*nkf_char mime_lastchar2, mime_lastchar1;*/
4605 open_mime(nkf_char mode)
4607 const unsigned char *p;
4610 p = mime_pattern[0];
4611 for(i=0;mime_pattern[i];i++) {
4612 if (mode == mime_encode[i]) {
4613 p = mime_pattern[i];
4617 mimeout_mode = mime_encode_method[i];
4619 if (base64_count>45) {
4620 if (mimeout_state.count>0 && nkf_isblank(mimeout_state.buf[i])){
4621 (*o_mputc)(mimeout_state.buf[i]);
4624 PUT_NEWLINE((*o_mputc));
4627 if (mimeout_state.count>0
4628 && (mimeout_state.buf[i]==SP || mimeout_state.buf[i]==TAB
4629 || mimeout_state.buf[i]==CR || mimeout_state.buf[i]==LF)) {
4633 for (;i<mimeout_state.count;i++) {
4634 if (mimeout_state.buf[i]==SP || mimeout_state.buf[i]==TAB
4635 || mimeout_state.buf[i]==CR || mimeout_state.buf[i]==LF) {
4636 (*o_mputc)(mimeout_state.buf[i]);
4646 j = mimeout_state.count;
4647 mimeout_state.count = 0;
4649 mime_putc(mimeout_state.buf[i]);
4654 mime_prechar(nkf_char c2, nkf_char c1)
4656 if (mimeout_mode > 0){
4658 if (base64_count + mimeout_state.count/3*4> 73){
4659 (*o_base64conv)(EOF,0);
4660 OCONV_NEWLINE((*o_base64conv));
4661 (*o_base64conv)(0,SP);
4665 if (base64_count + mimeout_state.count/3*4> 66) {
4666 (*o_base64conv)(EOF,0);
4667 OCONV_NEWLINE((*o_base64conv));
4668 (*o_base64conv)(0,SP);
4674 if (c2 != EOF && base64_count + mimeout_state.count/3*4> 60) {
4675 mimeout_mode = (output_mode==ASCII ||output_mode == ISO_8859_1) ? 'Q' : 'B';
4676 open_mime(output_mode);
4677 (*o_base64conv)(EOF,0);
4678 OCONV_NEWLINE((*o_base64conv));
4679 (*o_base64conv)(0,SP);
4698 switch(mimeout_mode) {
4703 (*o_mputc)(basis_64[((mimeout_state.state & 0x3)<< 4)]);
4709 (*o_mputc)(basis_64[((mimeout_state.state & 0xF) << 2)]);
4714 if (mimeout_mode > 0) {
4715 if (mimeout_f!=FIXED_MIME) {
4717 } else if (mimeout_mode != 'Q')
4723 mimeout_addchar(nkf_char c)
4725 switch(mimeout_mode) {
4730 } else if(!nkf_isalnum(c)) {
4732 (*o_mputc)(bin2hex(((c>>4)&0xf)));
4733 (*o_mputc)(bin2hex((c&0xf)));
4741 mimeout_state.state=c;
4742 (*o_mputc)(basis_64[c>>2]);
4747 (*o_mputc)(basis_64[((mimeout_state.state & 0x3)<< 4) | ((c & 0xF0) >> 4)]);
4748 mimeout_state.state=c;
4753 (*o_mputc)(basis_64[((mimeout_state.state & 0xF) << 2) | ((c & 0xC0) >>6)]);
4754 (*o_mputc)(basis_64[c & 0x3F]);
4766 mime_putc(nkf_char c)
4771 if (mimeout_f == FIXED_MIME){
4772 if (mimeout_mode == 'Q'){
4773 if (base64_count > 71){
4774 if (c!=CR && c!=LF) {
4776 PUT_NEWLINE((*o_mputc));
4781 if (base64_count > 71){
4783 PUT_NEWLINE((*o_mputc));
4786 if (c == EOF) { /* c==EOF */
4790 if (c != EOF) { /* c==EOF */
4796 /* mimeout_f != FIXED_MIME */
4798 if (c == EOF) { /* c==EOF */
4799 if (mimeout_mode == -1 && mimeout_state.count > 1) open_mime(output_mode);
4800 j = mimeout_state.count;
4801 mimeout_state.count = 0;
4803 if (mimeout_mode > 0) {
4804 if (!nkf_isblank(mimeout_state.buf[j-1])) {
4806 if (nkf_isspace(mimeout_state.buf[i]) && base64_count < 71){
4809 mimeout_addchar(mimeout_state.buf[i]);
4813 mimeout_addchar(mimeout_state.buf[i]);
4817 mimeout_addchar(mimeout_state.buf[i]);
4823 mimeout_addchar(mimeout_state.buf[i]);
4829 if (mimeout_state.count > 0){
4830 lastchar = mimeout_state.buf[mimeout_state.count - 1];
4835 if (mimeout_mode=='Q') {
4836 if (c <= DEL && (output_mode==ASCII ||output_mode == ISO_8859_1)) {
4837 if (c == CR || c == LF) {
4842 } else if (c <= SP) {
4844 if (base64_count > 70) {
4845 PUT_NEWLINE((*o_mputc));
4848 if (!nkf_isblank(c)) {
4853 if (base64_count > 70) {
4855 PUT_NEWLINE((*o_mputc));
4858 open_mime(output_mode);
4860 if (!nkf_noescape_mime(c)) {
4871 if (mimeout_mode <= 0) {
4872 if (c <= DEL && (output_mode==ASCII ||output_mode == ISO_8859_1)) {
4873 if (nkf_isspace(c)) {
4875 if (mimeout_mode == -1) {
4878 if (c==CR || c==LF) {
4880 open_mime(output_mode);
4886 for (i=0;i<mimeout_state.count;i++) {
4887 (*o_mputc)(mimeout_state.buf[i]);
4888 if (mimeout_state.buf[i] == CR || mimeout_state.buf[i] == LF){
4899 mimeout_state.buf[0] = (char)c;
4900 mimeout_state.count = 1;
4902 if (base64_count > 1
4903 && base64_count + mimeout_state.count > 76
4904 && mimeout_state.buf[0] != CR && mimeout_state.buf[0] != LF){
4905 PUT_NEWLINE((*o_mputc));
4907 if (!nkf_isspace(mimeout_state.buf[0])){
4912 mimeout_state.buf[mimeout_state.count++] = (char)c;
4913 if (mimeout_state.count>MIMEOUT_BUF_LENGTH) {
4914 open_mime(output_mode);
4919 if (lastchar==CR || lastchar == LF){
4920 for (i=0;i<mimeout_state.count;i++) {
4921 (*o_mputc)(mimeout_state.buf[i]);
4924 mimeout_state.count = 0;
4927 for (i=0;i<mimeout_state.count-1;i++) {
4928 (*o_mputc)(mimeout_state.buf[i]);
4931 mimeout_state.buf[0] = SP;
4932 mimeout_state.count = 1;
4934 open_mime(output_mode);
4937 /* mimeout_mode == 'B', 1, 2 */
4938 if ( c<=DEL && (output_mode==ASCII ||output_mode == ISO_8859_1)) {
4939 if (lastchar == CR || lastchar == LF){
4940 if (nkf_isblank(c)) {
4941 for (i=0;i<mimeout_state.count;i++) {
4942 mimeout_addchar(mimeout_state.buf[i]);
4944 mimeout_state.count = 0;
4945 } else if (SP<c && c<DEL) {
4947 for (i=0;i<mimeout_state.count;i++) {
4948 (*o_mputc)(mimeout_state.buf[i]);
4951 mimeout_state.count = 0;
4953 mimeout_state.buf[mimeout_state.count++] = (char)c;
4956 if (c==SP || c==TAB || c==CR || c==LF) {
4957 for (i=0;i<mimeout_state.count;i++) {
4958 if (SP<mimeout_state.buf[i] && mimeout_state.buf[i]<DEL) {
4960 for (i=0;i<mimeout_state.count;i++) {
4961 (*o_mputc)(mimeout_state.buf[i]);
4964 mimeout_state.count = 0;
4967 mimeout_state.buf[mimeout_state.count++] = (char)c;
4968 if (mimeout_state.count>MIMEOUT_BUF_LENGTH) {
4970 for (i=0;i<mimeout_state.count;i++) {
4971 (*o_mputc)(mimeout_state.buf[i]);
4974 mimeout_state.count = 0;
4978 if (mimeout_state.count>0 && SP<c && c!='=') {
4979 mimeout_state.buf[mimeout_state.count++] = (char)c;
4980 if (mimeout_state.count>MIMEOUT_BUF_LENGTH) {
4981 j = mimeout_state.count;
4982 mimeout_state.count = 0;
4984 mimeout_addchar(mimeout_state.buf[i]);
4991 if (mimeout_state.count>0) {
4992 j = mimeout_state.count;
4993 mimeout_state.count = 0;
4995 if (mimeout_state.buf[i]==CR || mimeout_state.buf[i]==LF)
4997 mimeout_addchar(mimeout_state.buf[i]);
5003 (*o_mputc)(mimeout_state.buf[i]);
5005 open_mime(output_mode);
5012 base64_conv(nkf_char c2, nkf_char c1)
5014 mime_prechar(c2, c1);
5015 (*o_base64conv)(c2,c1);
5019 typedef struct nkf_iconv_t {
5022 size_t input_buffer_size;
5023 char *output_buffer;
5024 size_t output_buffer_size;
5028 nkf_iconv_new(char *tocode, char *fromcode)
5030 nkf_iconv_t converter;
5032 converter->input_buffer_size = IOBUF_SIZE;
5033 converter->input_buffer = nkf_malloc(converter->input_buffer_size);
5034 converter->output_buffer_size = IOBUF_SIZE * 2;
5035 converter->output_buffer = nkf_malloc(converter->output_buffer_size);
5036 converter->cd = iconv_open(tocode, fromcode);
5037 if (converter->cd == (iconv_t)-1)
5041 perror(fprintf("iconv doesn't support %s to %s conversion.", fromcode, tocode));
5044 perror("can't iconv_open");
5050 nkf_iconv_convert(nkf_iconv_t *converter, FILE *input)
5052 size_t invalid = (size_t)0;
5053 char *input_buffer = converter->input_buffer;
5054 size_t input_length = (size_t)0;
5055 char *output_buffer = converter->output_buffer;
5056 size_t output_length = converter->output_buffer_size;
5061 while ((c = (*i_getc)(f)) != EOF) {
5062 input_buffer[input_length++] = c;
5063 if (input_length < converter->input_buffer_size) break;
5067 size_t ret = iconv(converter->cd, &input_buffer, &input_length, &output_buffer, &output_length);
5068 while (output_length-- > 0) {
5069 (*o_putc)(output_buffer[converter->output_buffer_size-output_length]);
5071 if (ret == (size_t) - 1) {
5074 if (input_buffer != converter->input_buffer)
5075 memmove(converter->input_buffer, input_buffer, input_length);
5078 converter->output_buffer_size *= 2;
5079 output_buffer = realloc(converter->outbuf, converter->output_buffer_size);
5080 if (output_buffer == NULL) {
5081 perror("can't realloc");
5084 converter->output_buffer = output_buffer;
5087 perror("can't iconv");
5100 nkf_iconv_close(nkf_iconv_t *convert)
5102 nkf_free(converter->inbuf);
5103 nkf_free(converter->outbuf);
5104 iconv_close(converter->cd);
5113 struct input_code *p = input_code_list;
5125 mime_f = MIME_DECODE_DEFAULT;
5126 mime_decode_f = FALSE;
5131 x0201_f = X0201_DEFAULT;
5132 iso2022jp_f = FALSE;
5133 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
5134 ms_ucs_map_f = UCS_MAP_ASCII;
5136 #ifdef UTF8_INPUT_ENABLE
5137 no_cp932ext_f = FALSE;
5138 no_best_fit_chars_f = FALSE;
5139 encode_fallback = NULL;
5140 unicode_subchar = '?';
5141 input_endian = ENDIAN_BIG;
5143 #ifdef UTF8_OUTPUT_ENABLE
5144 output_bom_f = FALSE;
5145 output_endian = ENDIAN_BIG;
5147 #ifdef UNICODE_NORMALIZATION
5163 #ifdef SHIFTJIS_CP932
5173 for (i = 0; i < 256; i++){
5174 prefix_table[i] = 0;
5178 mimeout_state.count = 0;
5183 fold_preserve_f = FALSE;
5186 kanji_intro = DEFAULT_J;
5187 ascii_intro = DEFAULT_R;
5188 fold_margin = FOLD_MARGIN;
5189 o_zconv = no_connection;
5190 o_fconv = no_connection;
5191 o_eol_conv = no_connection;
5192 o_rot_conv = no_connection;
5193 o_hira_conv = no_connection;
5194 o_base64conv = no_connection;
5195 o_iso2022jp_check_conv = no_connection;
5198 i_ungetc = std_ungetc;
5200 i_bungetc = std_ungetc;
5203 i_mungetc = std_ungetc;
5204 i_mgetc_buf = std_getc;
5205 i_mungetc_buf = std_ungetc;
5206 output_mode = ASCII;
5208 mime_decode_mode = FALSE;
5214 init_broken_state();
5215 z_prev2=0,z_prev1=0;
5217 iconv_for_check = 0;
5219 input_codename = NULL;
5220 input_encoding = NULL;
5221 output_encoding = NULL;
5228 module_connection(void)
5230 if (input_encoding) set_input_encoding(input_encoding);
5231 if (!output_encoding) {
5232 output_encoding = nkf_default_encoding();
5234 if (!output_encoding) {
5235 if (noout_f || guess_f) output_encoding = nkf_enc_from_index(ISO_2022_JP);
5238 set_output_encoding(output_encoding);
5239 oconv = nkf_enc_to_oconv(output_encoding);
5242 /* replace continucation module, from output side */
5244 /* output redicrection */
5246 if (noout_f || guess_f){
5253 if (mimeout_f == TRUE) {
5254 o_base64conv = oconv; oconv = base64_conv;
5256 /* base64_count = 0; */
5259 if (eolmode_f || guess_f) {
5260 o_eol_conv = oconv; oconv = eol_conv;
5263 o_rot_conv = oconv; oconv = rot_conv;
5266 o_iso2022jp_check_conv = oconv; oconv = iso2022jp_check_conv;
5269 o_hira_conv = oconv; oconv = hira_conv;
5272 o_fconv = oconv; oconv = fold_conv;
5275 if (alpha_f || x0201_f) {
5276 o_zconv = oconv; oconv = z_conv;
5280 i_ungetc = std_ungetc;
5281 /* input redicrection */
5284 i_cgetc = i_getc; i_getc = cap_getc;
5285 i_cungetc = i_ungetc; i_ungetc= cap_ungetc;
5288 i_ugetc = i_getc; i_getc = url_getc;
5289 i_uungetc = i_ungetc; i_ungetc= url_ungetc;
5292 #ifdef NUMCHAR_OPTION
5294 i_ngetc = i_getc; i_getc = numchar_getc;
5295 i_nungetc = i_ungetc; i_ungetc= numchar_ungetc;
5298 #ifdef UNICODE_NORMALIZATION
5300 i_nfc_getc = i_getc; i_getc = nfc_getc;
5301 i_nfc_ungetc = i_ungetc; i_ungetc= nfc_ungetc;
5304 if (mime_f && mimebuf_f==FIXED_MIME) {
5305 i_mgetc = i_getc; i_getc = mime_getc;
5306 i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
5309 i_bgetc = i_getc; i_getc = broken_getc;
5310 i_bungetc = i_ungetc; i_ungetc = broken_ungetc;
5312 if (input_encoding) {
5313 set_iconv(-TRUE, nkf_enc_to_iconv(input_encoding));
5315 set_iconv(FALSE, e_iconv);
5319 struct input_code *p = input_code_list;
5328 Conversion main loop. Code detection only.
5331 #if !defined(PERL_XS) && !defined(WIN32DLL)
5338 module_connection();
5339 while ((c = (*i_getc)(f)) != EOF)
5346 #define NEXT continue /* no output, get next */
5347 #define SKIP c2=0;continue /* no output, get next */
5348 #define MORE c2=c1;continue /* need one more byte */
5349 #define SEND ; /* output c1 and c2, get next */
5350 #define LAST break /* end of loop, go closing */
5351 #define set_input_mode(mode) do { \
5352 input_mode = mode; \
5354 set_input_codename("ISO-2022-JP"); \
5355 debug("ISO-2022-JP"); \
5359 kanji_convert(FILE *f)
5361 nkf_char c1=0, c2=0, c3=0, c4=0;
5362 int shift_mode = 0; /* 0, 1, 2, 3 */
5364 int is_8bit = FALSE;
5366 if (input_encoding && !nkf_enc_asciicompat(input_encoding)) {
5371 output_mode = ASCII;
5373 if (module_connection() < 0) {
5374 #if !defined(PERL_XS) && !defined(WIN32DLL)
5375 fprintf(stderr, "no output encoding given\n");
5381 #ifdef UTF8_INPUT_ENABLE
5382 if(iconv == w_iconv32){
5383 while ((c1 = (*i_getc)(f)) != EOF &&
5384 (c2 = (*i_getc)(f)) != EOF &&
5385 (c3 = (*i_getc)(f)) != EOF &&
5386 (c4 = (*i_getc)(f)) != EOF) {
5387 nkf_iconv_utf_32(c1, c2, c3, c4);
5389 (*i_ungetc)(EOF, f);
5391 else if (iconv == w_iconv16) {
5392 while ((c1 = (*i_getc)(f)) != EOF &&
5393 (c2 = (*i_getc)(f)) != EOF) {
5394 if (nkf_iconv_utf_16(c1, c2, 0, 0) == -2 &&
5395 (c3 = (*i_getc)(f)) != EOF &&
5396 (c4 = (*i_getc)(f)) != EOF) {
5397 nkf_iconv_utf_16(c1, c2, c3, c4);
5400 (*i_ungetc)(EOF, f);
5404 while ((c1 = (*i_getc)(f)) != EOF) {
5405 #ifdef INPUT_CODE_FIX
5406 if (!input_encoding)
5412 /* in case of 8th bit is on */
5413 if (!estab_f&&!mime_decode_mode) {
5414 /* in case of not established yet */
5415 /* It is still ambiguious */
5416 if (h_conv(f, c2, c1)==EOF) {
5424 /* in case of already established */
5426 /* ignore bogus code */
5434 /* 2nd byte of 7 bit code or SJIS */
5438 else if (nkf_char_unicode_p(c1)) {
5444 if (input_mode == JIS_X_0208 && DEL <= c1 && c1 < 0x92) {
5447 } else if (c1 > DEL) {
5449 if (!estab_f && !iso8859_f) {
5450 /* not established yet */
5452 } else { /* estab_f==TRUE */
5458 else if ((iconv == s_iconv && 0xA0 <= c1 && c1 <= 0xDF) ||
5459 (ms_ucs_map_f == UCS_MAP_CP10001 && (c1 == 0xFD || c1 == 0xFE))) {
5461 c2 = JIS_X_0201_1976_K;
5466 /* already established */
5470 } else if (SP < c1 && c1 < DEL) {
5471 /* in case of Roman characters */
5473 /* output 1 shifted byte */
5477 } else if (nkf_byte_jisx0201_katakana_p(c1)){
5478 /* output 1 shifted byte */
5479 c2 = JIS_X_0201_1976_K;
5482 /* look like bogus code */
5485 } else if (input_mode == JIS_X_0208 || input_mode == JIS_X_0212 ||
5486 input_mode == JIS_X_0213_1 || input_mode == JIS_X_0213_2) {
5487 /* in case of Kanji shifted */
5489 } else if (c1 == '=' && mime_f && !mime_decode_mode) {
5490 /* Check MIME code */
5491 if ((c1 = (*i_getc)(f)) == EOF) {
5494 } else if (c1 == '?') {
5495 /* =? is mime conversion start sequence */
5496 if(mime_f == STRICT_MIME) {
5497 /* check in real detail */
5498 if (mime_begin_strict(f) == EOF)
5501 } else if (mime_begin(f) == EOF)
5510 /* normal ASCII code */
5513 } else if (c1 == SI && (!is_8bit || mime_decode_mode)) {
5516 } else if (c1 == SO && (!is_8bit || mime_decode_mode)) {
5519 } else if (c1 == ESC && (!is_8bit || mime_decode_mode)) {
5520 if ((c1 = (*i_getc)(f)) == EOF) {
5521 /* (*oconv)(0, ESC); don't send bogus code */
5524 else if (c1 == '&') {
5526 if ((c1 = (*i_getc)(f)) == EOF) {
5532 else if (c1 == '$') {
5534 if ((c1 = (*i_getc)(f)) == EOF) {
5535 /* don't send bogus code
5537 (*oconv)(0, '$'); */
5539 } else if (c1 == '@' || c1 == 'B') {
5541 set_input_mode(JIS_X_0208);
5543 } else if (c1 == '(') {
5545 if ((c1 = (*i_getc)(f)) == EOF) {
5546 /* don't send bogus code
5552 } else if (c1 == '@'|| c1 == 'B') {
5554 set_input_mode(JIS_X_0208);
5557 } else if (c1 == 'D'){
5558 set_input_mode(JIS_X_0212);
5560 #endif /* X0212_ENABLE */
5561 } else if (c1 == 'O' || c1 == 'Q'){
5562 set_input_mode(JIS_X_0213_1);
5564 } else if (c1 == 'P'){
5565 set_input_mode(JIS_X_0213_2);
5568 /* could be some special code */
5575 } else if (broken_f&0x2) {
5576 /* accept any ESC-(-x as broken code ... */
5577 input_mode = JIS_X_0208;
5586 } else if (c1 == '(') {
5588 if ((c1 = (*i_getc)(f)) == EOF) {
5589 /* don't send bogus code
5591 (*oconv)(0, '('); */
5594 else if (c1 == 'I') {
5595 /* JIS X 0201 Katakana */
5596 set_input_mode(JIS_X_0201_1976_K);
5599 else if (c1 == 'B' || c1 == 'J' || c1 == 'H') {
5600 /* ISO-646IRV:1983 or JIS X 0201 Roman or JUNET */
5601 set_input_mode(ASCII);
5604 else if (broken_f&0x2) {
5605 set_input_mode(ASCII);
5614 else if (c1 == '.') {
5616 if ((c1 = (*i_getc)(f)) == EOF) {
5619 else if (c1 == 'A') {
5630 else if (c1 == 'N') {
5633 if (g2 == ISO_8859_1) {
5648 } else if (c1 == ESC && iconv == s_iconv) {
5649 /* ESC in Shift_JIS */
5650 if ((c1 = (*i_getc)(f)) == EOF) {
5651 /* (*oconv)(0, ESC); don't send bogus code */
5653 } else if (c1 == '$') {
5655 if ((c1 = (*i_getc)(f)) == EOF) {
5657 } else if (('E' <= c1 && c1 <= 'G') ||
5658 ('O' <= c1 && c1 <= 'Q')) {
5666 static const nkf_char jphone_emoji_first_table[7] =
5667 {0xE1E0, 0xDFE0, 0xE2E0, 0xE3E0, 0xE4E0, 0xDFE0, 0xE0E0};
5668 c3 = nkf_char_unicode_new(jphone_emoji_first_table[c1 % 7]);
5669 if ((c1 = (*i_getc)(f)) == EOF) LAST;
5670 while (SP <= c1 && c1 <= 'z') {
5671 (*oconv)(0, c1 + c3);
5672 if ((c1 = (*i_getc)(f)) == EOF) LAST;
5687 } else if (c1 == LF || c1 == CR) {
5689 input_mode = ASCII; set_iconv(FALSE, 0);
5691 } else if (mime_decode_f && !mime_decode_mode){
5693 if ((c1=(*i_getc)(f))!=EOF && c1 == SP) {
5701 } else { /* if (c1 == CR)*/
5702 if ((c1=(*i_getc)(f))!=EOF) {
5706 } else if (c1 == LF && (c1=(*i_getc)(f))!=EOF && c1 == SP) {
5726 switch ((*iconv)(c2, c1, 0)) { /* can be EUC / SJIS / UTF-8 */
5729 if ((c3 = (*i_getc)(f)) != EOF) {
5732 if ((c4 = (*i_getc)(f)) != EOF) {
5734 (*iconv)(c2, c1, c3|c4);
5739 /* 3 bytes EUC or UTF-8 */
5740 if ((c3 = (*i_getc)(f)) != EOF) {
5742 (*iconv)(c2, c1, c3);
5750 0x7F <= c2 && c2 <= 0x92 &&
5751 0x21 <= c1 && c1 <= 0x7E) {
5753 c1 = nkf_char_unicode_new((c2 - 0x7F) * 94 + c1 - 0x21 + 0xE000);
5756 (*oconv)(c2, c1); /* this is JIS, not SJIS/EUC case */
5760 (*oconv)(PREFIX_EUCG3 | c2, c1);
5762 #endif /* X0212_ENABLE */
5764 (*oconv)(PREFIX_EUCG3 | c2, c1);
5767 (*oconv)(input_mode, c1); /* other special case */
5773 /* goto next_word */
5777 (*iconv)(EOF, 0, 0);
5778 if (!input_codename)
5781 struct input_code *p = input_code_list;
5782 struct input_code *result = p;
5784 if (p->score < result->score) result = p;
5787 set_input_codename(result->name);
5789 debug(result->name);
5797 * int options(unsigned char *cp)
5804 options(unsigned char *cp)
5808 unsigned char *cp_back = NULL;
5813 while(*cp && *cp++!='-');
5814 while (*cp || cp_back) {
5822 case '-': /* literal options */
5823 if (!*cp || *cp == SP) { /* ignore the rest of arguments */
5827 for (i=0;i<sizeof(long_option)/sizeof(long_option[0]);i++) {
5828 p = (unsigned char *)long_option[i].name;
5829 for (j=0;*p && *p != '=' && *p == cp[j];p++, j++);
5830 if (*p == cp[j] || cp[j] == SP){
5837 #if !defined(PERL_XS) && !defined(WIN32DLL)
5838 fprintf(stderr, "unknown long option: --%s\n", cp);
5842 while(*cp && *cp != SP && cp++);
5843 if (long_option[i].alias[0]){
5845 cp = (unsigned char *)long_option[i].alias;
5847 if (strcmp(long_option[i].name, "ic=") == 0){
5848 enc = nkf_enc_find((char *)p);
5850 input_encoding = enc;
5853 if (strcmp(long_option[i].name, "oc=") == 0){
5854 enc = nkf_enc_find((char *)p);
5855 /* if (enc <= 0) continue; */
5857 output_encoding = enc;
5860 if (strcmp(long_option[i].name, "guess=") == 0){
5861 if (p[0] == '0' || p[0] == '1') {
5869 if (strcmp(long_option[i].name, "overwrite") == 0){
5872 preserve_time_f = TRUE;
5875 if (strcmp(long_option[i].name, "overwrite=") == 0){
5878 preserve_time_f = TRUE;
5880 backup_suffix = (char *)p;
5883 if (strcmp(long_option[i].name, "in-place") == 0){
5886 preserve_time_f = FALSE;
5889 if (strcmp(long_option[i].name, "in-place=") == 0){
5892 preserve_time_f = FALSE;
5894 backup_suffix = (char *)p;
5899 if (strcmp(long_option[i].name, "cap-input") == 0){
5903 if (strcmp(long_option[i].name, "url-input") == 0){
5908 #ifdef NUMCHAR_OPTION
5909 if (strcmp(long_option[i].name, "numchar-input") == 0){
5915 if (strcmp(long_option[i].name, "no-output") == 0){
5919 if (strcmp(long_option[i].name, "debug") == 0){
5924 if (strcmp(long_option[i].name, "cp932") == 0){
5925 #ifdef SHIFTJIS_CP932
5929 #ifdef UTF8_OUTPUT_ENABLE
5930 ms_ucs_map_f = UCS_MAP_CP932;
5934 if (strcmp(long_option[i].name, "no-cp932") == 0){
5935 #ifdef SHIFTJIS_CP932
5939 #ifdef UTF8_OUTPUT_ENABLE
5940 ms_ucs_map_f = UCS_MAP_ASCII;
5944 #ifdef SHIFTJIS_CP932
5945 if (strcmp(long_option[i].name, "cp932inv") == 0){
5952 if (strcmp(long_option[i].name, "x0212") == 0){
5959 if (strcmp(long_option[i].name, "exec-in") == 0){
5963 if (strcmp(long_option[i].name, "exec-out") == 0){
5968 #if defined(UTF8_OUTPUT_ENABLE) && defined(UTF8_INPUT_ENABLE)
5969 if (strcmp(long_option[i].name, "no-cp932ext") == 0){
5970 no_cp932ext_f = TRUE;
5973 if (strcmp(long_option[i].name, "no-best-fit-chars") == 0){
5974 no_best_fit_chars_f = TRUE;
5977 if (strcmp(long_option[i].name, "fb-skip") == 0){
5978 encode_fallback = NULL;
5981 if (strcmp(long_option[i].name, "fb-html") == 0){
5982 encode_fallback = encode_fallback_html;
5985 if (strcmp(long_option[i].name, "fb-xml") == 0){
5986 encode_fallback = encode_fallback_xml;
5989 if (strcmp(long_option[i].name, "fb-java") == 0){
5990 encode_fallback = encode_fallback_java;
5993 if (strcmp(long_option[i].name, "fb-perl") == 0){
5994 encode_fallback = encode_fallback_perl;
5997 if (strcmp(long_option[i].name, "fb-subchar") == 0){
5998 encode_fallback = encode_fallback_subchar;
6001 if (strcmp(long_option[i].name, "fb-subchar=") == 0){
6002 encode_fallback = encode_fallback_subchar;
6003 unicode_subchar = 0;
6005 /* decimal number */
6006 for (i = 0; i < 7 && nkf_isdigit(p[i]); i++){
6007 unicode_subchar *= 10;
6008 unicode_subchar += hex2bin(p[i]);
6010 }else if(p[1] == 'x' || p[1] == 'X'){
6011 /* hexadecimal number */
6012 for (i = 2; i < 8 && nkf_isxdigit(p[i]); i++){
6013 unicode_subchar <<= 4;
6014 unicode_subchar |= hex2bin(p[i]);
6018 for (i = 1; i < 8 && nkf_isoctal(p[i]); i++){
6019 unicode_subchar *= 8;
6020 unicode_subchar += hex2bin(p[i]);
6023 w16e_conv(unicode_subchar, &i, &j);
6024 unicode_subchar = i<<8 | j;
6028 #ifdef UTF8_OUTPUT_ENABLE
6029 if (strcmp(long_option[i].name, "ms-ucs-map") == 0){
6030 ms_ucs_map_f = UCS_MAP_MS;
6034 #ifdef UNICODE_NORMALIZATION
6035 if (strcmp(long_option[i].name, "utf8mac-input") == 0){
6040 if (strcmp(long_option[i].name, "prefix=") == 0){
6041 if (nkf_isgraph(p[0])){
6042 for (i = 1; nkf_isgraph(p[i]); i++){
6043 prefix_table[p[i]] = p[0];
6048 #if !defined(PERL_XS) && !defined(WIN32DLL)
6049 fprintf(stderr, "unsupported long option: --%s\n", long_option[i].name);
6054 case 'b': /* buffered mode */
6057 case 'u': /* non bufferd mode */
6060 case 't': /* transparent mode */
6065 } else if (*cp=='2') {
6069 * nkf -t2MB hoge.bin | nkf -t2mB | diff -s - hoge.bin
6077 case 'j': /* JIS output */
6079 output_encoding = nkf_enc_from_index(ISO_2022_JP);
6081 case 'e': /* AT&T EUC output */
6082 output_encoding = nkf_enc_from_index(EUCJP_NKF);
6084 case 's': /* SJIS output */
6085 output_encoding = nkf_enc_from_index(WINDOWS_31J);
6087 case 'l': /* ISO8859 Latin-1 support, no conversion */
6088 iso8859_f = TRUE; /* Only compatible with ISO-2022-JP */
6089 input_encoding = nkf_enc_from_index(ISO_8859_1);
6091 case 'i': /* Kanji IN ESC-$-@/B */
6092 if (*cp=='@'||*cp=='B')
6093 kanji_intro = *cp++;
6095 case 'o': /* ASCII IN ESC-(-J/B */
6096 if (*cp=='J'||*cp=='B'||*cp=='H')
6097 ascii_intro = *cp++;
6101 bit:1 katakana->hiragana
6102 bit:2 hiragana->katakana
6104 if ('9'>= *cp && *cp>='0')
6105 hira_f |= (*cp++ -'0');
6112 #if defined(MSDOS) || defined(__OS2__)
6119 show_configuration();
6127 #ifdef UTF8_OUTPUT_ENABLE
6128 case 'w': /* UTF-8 output */
6133 output_encoding = nkf_enc_from_index(UTF_8N);
6135 output_bom_f = TRUE;
6136 output_encoding = nkf_enc_from_index(UTF_8_BOM);
6140 if ('1'== cp[0] && '6'==cp[1]) {
6143 } else if ('3'== cp[0] && '2'==cp[1]) {
6147 output_encoding = nkf_enc_from_index(UTF_8);
6152 output_endian = ENDIAN_LITTLE;
6153 } else if (cp[0] == 'B') {
6156 output_encoding = nkf_enc_from_index(enc_idx);
6161 enc_idx = enc_idx == UTF_16
6162 ? (output_endian == ENDIAN_LITTLE ? UTF_16LE : UTF_16BE)
6163 : (output_endian == ENDIAN_LITTLE ? UTF_32LE : UTF_32BE);
6165 output_bom_f = TRUE;
6166 enc_idx = enc_idx == UTF_16
6167 ? (output_endian == ENDIAN_LITTLE ? UTF_16LE_BOM : UTF_16BE_BOM)
6168 : (output_endian == ENDIAN_LITTLE ? UTF_32LE_BOM : UTF_32BE_BOM);
6170 output_encoding = nkf_enc_from_index(enc_idx);
6174 #ifdef UTF8_INPUT_ENABLE
6175 case 'W': /* UTF input */
6178 input_encoding = nkf_enc_from_index(UTF_8);
6181 if ('1'== cp[0] && '6'==cp[1]) {
6183 input_endian = ENDIAN_BIG;
6185 } else if ('3'== cp[0] && '2'==cp[1]) {
6187 input_endian = ENDIAN_BIG;
6190 input_encoding = nkf_enc_from_index(UTF_8);
6195 input_endian = ENDIAN_LITTLE;
6196 } else if (cp[0] == 'B') {
6198 input_endian = ENDIAN_BIG;
6200 enc_idx = (enc_idx == UTF_16
6201 ? (input_endian == ENDIAN_LITTLE ? UTF_16LE : UTF_16BE)
6202 : (input_endian == ENDIAN_LITTLE ? UTF_32LE : UTF_32BE));
6203 input_encoding = nkf_enc_from_index(enc_idx);
6207 /* Input code assumption */
6208 case 'J': /* ISO-2022-JP input */
6209 input_encoding = nkf_enc_from_index(ISO_2022_JP);
6211 case 'E': /* EUC-JP input */
6212 input_encoding = nkf_enc_from_index(EUCJP_NKF);
6214 case 'S': /* Windows-31J input */
6215 input_encoding = nkf_enc_from_index(WINDOWS_31J);
6217 case 'Z': /* Convert X0208 alphabet to asii */
6219 bit:0 Convert JIS X 0208 Alphabet to ASCII
6220 bit:1 Convert Kankaku to one space
6221 bit:2 Convert Kankaku to two spaces
6222 bit:3 Convert HTML Entity
6223 bit:4 Convert JIS X 0208 Katakana to JIS X 0201 Katakana
6225 while ('0'<= *cp && *cp <='9') {
6226 alpha_f |= 1 << (*cp++ - '0');
6228 if (!alpha_f) alpha_f = 1;
6230 case 'x': /* Convert X0201 kana to X0208 or X0201 Conversion */
6231 x0201_f = FALSE; /* No X0201->X0208 conversion */
6233 ESC-(-I in JIS, EUC, MS Kanji
6234 SI/SO in JIS, EUC, MS Kanji
6235 SS2 in EUC, JIS, not in MS Kanji
6236 MS Kanji (0xa0-0xdf)
6238 ESC-(-I in JIS (0x20-0x5f)
6239 SS2 in EUC (0xa0-0xdf)
6240 0xa0-0xd in MS Kanji (0xa0-0xdf)
6243 case 'X': /* Convert X0201 kana to X0208 */
6246 case 'F': /* prserve new lines */
6247 fold_preserve_f = TRUE;
6248 case 'f': /* folding -f60 or -f */
6251 while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
6253 fold_len += *cp++ - '0';
6255 if (!(0<fold_len && fold_len<BUFSIZ))
6256 fold_len = DEFAULT_FOLD;
6260 while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
6262 fold_margin += *cp++ - '0';
6266 case 'm': /* MIME support */
6267 /* mime_decode_f = TRUE; */ /* this has too large side effects... */
6268 if (*cp=='B'||*cp=='Q') {
6269 mime_decode_mode = *cp++;
6270 mimebuf_f = FIXED_MIME;
6271 } else if (*cp=='N') {
6272 mime_f = TRUE; cp++;
6273 } else if (*cp=='S') {
6274 mime_f = STRICT_MIME; cp++;
6275 } else if (*cp=='0') {
6276 mime_decode_f = FALSE;
6277 mime_f = FALSE; cp++;
6279 mime_f = STRICT_MIME;
6282 case 'M': /* MIME output */
6285 mimeout_f = FIXED_MIME; cp++;
6286 } else if (*cp=='Q') {
6288 mimeout_f = FIXED_MIME; cp++;
6293 case 'B': /* Broken JIS support */
6295 bit:1 allow any x on ESC-(-x or ESC-$-x
6296 bit:2 reset to ascii on NL
6298 if ('9'>= *cp && *cp>='0')
6299 broken_f |= 1<<(*cp++ -'0');
6304 case 'O':/* for Output file */
6308 case 'c':/* add cr code */
6311 case 'd':/* delete cr code */
6314 case 'I': /* ISO-2022-JP output */
6317 case 'L': /* line mode */
6318 if (*cp=='u') { /* unix */
6319 eolmode_f = LF; cp++;
6320 } else if (*cp=='m') { /* mac */
6321 eolmode_f = CR; cp++;
6322 } else if (*cp=='w') { /* windows */
6323 eolmode_f = CRLF; cp++;
6324 } else if (*cp=='0') { /* no conversion */
6325 eolmode_f = 0; cp++;
6330 if ('2' <= *cp && *cp <= '9') {
6333 } else if (*cp == '0' || *cp == '1') {
6342 /* module muliple options in a string are allowed for Perl moudle */
6343 while(*cp && *cp++!='-');
6346 #if !defined(PERL_XS) && !defined(WIN32DLL)
6347 fprintf(stderr, "unknown option: -%c\n", *(cp-1));
6349 /* bogus option but ignored */
6357 #include "nkf32dll.c"
6358 #elif defined(PERL_XS)
6359 #else /* WIN32DLL */
6361 main(int argc, char **argv)
6366 char *outfname = NULL;
6369 #ifdef EASYWIN /*Easy Win */
6370 _BufferSize.y = 400;/*Set Scroll Buffer Size*/
6372 #ifdef DEFAULT_CODE_LOCALE
6373 setlocale(LC_CTYPE, "");
6375 for (argc--,argv++; (argc > 0) && **argv == '-'; argc--, argv++) {
6376 cp = (unsigned char *)*argv;
6381 if (pipe(fds) < 0 || (pid = fork()) < 0){
6392 execvp(argv[1], &argv[1]);
6409 int debug_f_back = debug_f;
6412 int exec_f_back = exec_f;
6415 int x0212_f_back = x0212_f;
6417 int x0213_f_back = x0213_f;
6418 int guess_f_back = guess_f;
6420 guess_f = guess_f_back;
6423 debug_f = debug_f_back;
6426 exec_f = exec_f_back;
6428 x0212_f = x0212_f_back;
6429 x0213_f = x0213_f_back;
6432 if (binmode_f == TRUE)
6433 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
6434 if (freopen("","wb",stdout) == NULL)
6441 setbuf(stdout, (char *) NULL);
6443 setvbuffer(stdout, (char *) stdobuf, IOBUF_SIZE);
6446 if (binmode_f == TRUE)
6447 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
6448 if (freopen("","rb",stdin) == NULL) return (-1);
6452 setvbuffer(stdin, (char *) stdibuf, IOBUF_SIZE);
6456 kanji_convert(stdin);
6457 if (guess_f) print_guessed_code(NULL);
6461 int is_argument_error = FALSE;
6463 input_codename = NULL;
6466 iconv_for_check = 0;
6468 if ((fin = fopen((origfname = *argv++), "r")) == NULL) {
6470 is_argument_error = TRUE;
6478 /* reopen file for stdout */
6479 if (file_out_f == TRUE) {
6482 outfname = nkf_malloc(strlen(origfname)
6483 + strlen(".nkftmpXXXXXX")
6485 strcpy(outfname, origfname);
6489 for (i = strlen(outfname); i; --i){
6490 if (outfname[i - 1] == '/'
6491 || outfname[i - 1] == '\\'){
6497 strcat(outfname, "ntXXXXXX");
6499 fd = open(outfname, O_WRONLY | O_CREAT | O_TRUNC | O_EXCL,
6500 S_IREAD | S_IWRITE);
6502 strcat(outfname, ".nkftmpXXXXXX");
6503 fd = mkstemp(outfname);
6506 || (fd_backup = dup(fileno(stdout))) < 0
6507 || dup2(fd, fileno(stdout)) < 0
6518 outfname = "nkf.out";
6521 if(freopen(outfname, "w", stdout) == NULL) {
6525 if (binmode_f == TRUE) {
6526 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
6527 if (freopen("","wb",stdout) == NULL)
6534 if (binmode_f == TRUE)
6535 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
6536 if (freopen("","rb",fin) == NULL)
6541 setvbuffer(fin, (char *) stdibuf, IOBUF_SIZE);
6545 char *filename = NULL;
6547 if (nfiles > 1) filename = origfname;
6548 if (guess_f) print_guessed_code(filename);
6554 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__) && !defined(__WATCOMC__) && !defined(__EMX__) && !defined(__OS2__) && !defined(__DJGPP__)
6562 if (dup2(fd_backup, fileno(stdout)) < 0){
6565 if (stat(origfname, &sb)) {
6566 fprintf(stderr, "Can't stat %s\n", origfname);
6568 /*
\e$B%Q!<%_%C%7%g%s$rI|85
\e(B */
6569 if (chmod(outfname, sb.st_mode)) {
6570 fprintf(stderr, "Can't set permission %s\n", outfname);
6573 /*
\e$B%?%$%`%9%?%s%W$rI|85
\e(B */
6574 if(preserve_time_f){
6575 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__) && !defined(__WATCOMC__) && !defined(__EMX__) && !defined(__OS2__) && !defined(__DJGPP__)
6576 tb[0] = tb[1] = sb.st_mtime;
6577 if (utime(outfname, tb)) {
6578 fprintf(stderr, "Can't set timestamp %s\n", outfname);
6581 tb.actime = sb.st_atime;
6582 tb.modtime = sb.st_mtime;
6583 if (utime(outfname, &tb)) {
6584 fprintf(stderr, "Can't set timestamp %s\n", outfname);
6589 char *backup_filename = get_backup_filename(backup_suffix, origfname);
6591 unlink(backup_filename);
6593 if (rename(origfname, backup_filename)) {
6594 perror(backup_filename);
6595 fprintf(stderr, "Can't rename %s to %s\n",
6596 origfname, backup_filename);
6598 nkf_free(backup_filename);
6601 if (unlink(origfname)){
6606 if (rename(outfname, origfname)) {
6608 fprintf(stderr, "Can't rename %s to %s\n",
6609 outfname, origfname);
6616 if (is_argument_error)
6619 #ifdef EASYWIN /*Easy Win */
6620 if (file_out_f == FALSE)
6621 scanf("%d",&end_check);
6624 #else /* for Other OS */
6625 if (file_out_f == TRUE)
6627 #endif /*Easy Win */
6630 #endif /* WIN32DLL */