1 /** Network Kanji Filter. (PDS Version)
2 ************************************************************************
3 ** Copyright (C) 1987, Fujitsu LTD. (Itaru ICHIKAWA)
4 **
\e$BO"Mm@h!'
\e(B
\e$B!J3t!KIY;NDL8&5f=j!!%=%U%H#38&!!;T@n!!;j
\e(B
5 **
\e$B!J
\e(BE-Mail Address: ichikawa@flab.fujitsu.co.jp
\e$B!K
\e(B
6 ** Copyright (C) 1996,1998
8 **
\e$BO"Mm@h!'
\e(B
\e$BN05eBg3X>pJs9)3X2J
\e(B
\e$B2OLn
\e(B
\e$B??<#
\e(B mime/X0208 support
9 **
\e$B!J
\e(BE-Mail Address: kono@ie.u-ryukyu.ac.jp
\e$B!K
\e(B
10 **
\e$BO"Mm@h!'
\e(B COW for DOS & Win16 & Win32 & OS/2
11 **
\e$B!J
\e(BE-Mail Address: GHG00637@niftyserve.or.p
\e$B!K
\e(B
13 **
\e$B$3$N%=!<%9$N$$$+$J$kJ#<L!$2~JQ!$=$@5$b5vBz$7$^$9!#$?$@$7!"
\e(B
14 **
\e$B$=$N:]$K$O!"C/$,9W8%$7$?$r<($9$3$NItJ,$r;D$9$3$H!#
\e(B
15 **
\e$B:FG[I[$d;(;o$NIUO?$J$I$NLd$$9g$o$;$bI,MW$"$j$^$;$s!#
\e(B
16 **
\e$B1DMxMxMQ$b>e5-$KH?$7$J$$HO0O$G5v2D$7$^$9!#
\e(B
17 **
\e$B%P%$%J%j$NG[I[$N:]$K$O
\e(Bversion message
\e$B$rJ]B8$9$k$3$H$r>r7o$H$7$^$9!#
\e(B
18 **
\e$B$3$N%W%m%0%i%`$K$D$$$F$OFC$K2?$NJ]>Z$b$7$J$$!"0-$7$+$i$:!#
\e(B
20 ** Everyone is permitted to do anything on this program
21 ** including copying, modifying, improving,
22 ** as long as you don't try to pretend that you wrote it.
23 ** i.e., the above copyright notice has to appear in all copies.
24 ** Binary distribution requires original version messages.
25 ** You don't have to ask before copying, redistribution or publishing.
26 ** THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE.
27 ***********************************************************************/
29 /***********************************************************************
30 ** UTF-8
\e$B%5%]!<%H$K$D$$$F
\e(B
31 **
\e$B=>Mh$N
\e(B nkf
\e$B$HF~$l$+$($F$=$N$^$^;H$($k$h$&$K$J$C$F$$$^$9
\e(B
32 ** nkf -e
\e$B$J$I$H$7$F5/F0$9$k$H!"<+F0H=JL$G
\e(B UTF-8
\e$B$HH=Dj$5$l$l$P!"
\e(B
33 **
\e$B$=$N$^$^
\e(B euc-jp
\e$B$KJQ49$5$l$^$9
\e(B
35 **
\e$B$^$@%P%0$,$"$k2DG=@-$,9b$$$G$9!#
\e(B
36 ** (
\e$BFC$K<+F0H=JL!"%3!<%I:.:_!"%(%i!<=hM}7O
\e(B)
38 **
\e$B2?$+LdBj$r8+$D$1$?$i!"
\e(B
39 ** E-Mail: furukawa@tcp-ip.or.jp
40 **
\e$B$^$G8fO"Mm$r$*4j$$$7$^$9!#
\e(B
41 ***********************************************************************/
42 /* $Id: nkf.c,v 1.131 2007/09/12 04:56:53 naruse Exp $ */
43 #define NKF_VERSION "2.0.8"
44 #define NKF_RELEASE_DATE "2007-09-12"
49 "Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa),2000 S. Kono, COW\n" \
50 "Copyright (C) 2002-2007 Kono, Furukawa, Naruse, mastodon"
57 ** USAGE: nkf [flags] [file]
60 ** b Output is buffered (DEFAULT)
61 ** u Output is unbuffered
65 ** j Output code is JIS 7 bit (DEFAULT SELECT)
66 ** s Output code is MS Kanji (DEFAULT SELECT)
67 ** e Output code is AT&T JIS (DEFAULT SELECT)
68 ** w Output code is AT&T JIS (DEFAULT SELECT)
69 ** l Output code is JIS 7bit and ISO8859-1 Latin-1
71 ** m MIME conversion for ISO-2022-JP
72 ** I Convert non ISO-2022-JP charactor to GETA by Pekoe <pekoe@lair.net>
73 ** i_ Output sequence to designate JIS-kanji (DEFAULT_J)
74 ** o_ Output sequence to designate single-byte roman characters (DEFAULT_R)
75 ** M MIME output conversion
77 ** r {de/en}crypt ROT13/47
81 ** T Text mode output (for MS-DOS)
83 ** x Do not convert X0201 kana into X0208
84 ** Z Convert X0208 alphabet to ASCII
89 ** B try to fix broken JIS, missing Escape
90 ** B[1-9] broken level
92 ** O Output to 'nkf.out' file or last file name
93 ** d Delete \r in line feed
94 ** c Add \r in line feed
95 ** -- other long option
96 ** -- ignore following option (don't use with -O )
100 #if (defined(__TURBOC__) || defined(_MSC_VER) || defined(LSI_C) || defined(__MINGW32__) || defined(__EMX__) || defined(__MSDOS__) || defined(__WINDOWS__) || defined(__DOS__) || defined(__OS2__)) && !defined(MSDOS)
102 #if (defined(__Win32__) || defined(_WIN32)) && !defined(__WIN32__)
118 #if defined(MSDOS) || defined(__OS2__)
121 #if defined(_MSC_VER) || defined(__WATCOMC__)
122 #define mktemp _mktemp
128 #define setbinmode(fp) fsetbin(fp)
129 #elif defined(__DJGPP__)
130 #include <libc/dosio.h>
131 #define setbinmode(fp) djgpp_setbinmode(fp)
132 #else /* Microsoft C, Turbo C */
133 #define setbinmode(fp) setmode(fileno(fp), O_BINARY)
136 #define setbinmode(fp)
139 #if defined(__DJGPP__)
140 void djgpp_setbinmode(FILE *fp)
142 /* we do not use libc's setmode(), which changes COOKED/RAW mode in device. */
145 m = (__file_handle_modes[fd] & (~O_TEXT)) | O_BINARY;
146 __file_handle_set(fd, m);
150 #ifdef _IOFBF /* SysV and MSDOS, Windows */
151 #define setvbuffer(fp, buf, size) setvbuf(fp, buf, _IOFBF, size)
153 #define setvbuffer(fp, buf, size) setbuffer(fp, buf, size)
156 /*Borland C++ 4.5 EasyWin*/
157 #if defined(__TURBOC__) && defined(_Windows) && !defined(__WIN32__) /*Easy Win */
166 /* added by satoru@isoternet.org */
168 #include <sys/types.h>
170 #include <sys/stat.h>
171 #if !defined(MSDOS) || defined(__DJGPP__) /* UNIX, djgpp */
173 #if defined(__WATCOMC__)
174 #include <sys/utime.h>
178 #else /* defined(MSDOS) */
180 #ifdef __BORLANDC__ /* BCC32 */
182 #else /* !defined(__BORLANDC__) */
183 #include <sys/utime.h>
184 #endif /* (__BORLANDC__) */
185 #else /* !defined(__WIN32__) */
186 #if defined(_MSC_VER) || defined(__MINGW32__) || defined(__WATCOMC__) || defined(__OS2__) || defined(__EMX__) || defined(__IBMC__) || defined(__IBMCPP__) /* VC++, MinGW, Watcom, emx+gcc, IBM VAC++ */
187 #include <sys/utime.h>
188 #elif defined(__TURBOC__) /* BCC */
190 #elif defined(LSI_C) /* LSI C */
191 #endif /* (__WIN32__) */
199 /* state of output_mode and input_mode
216 #define X0213_1 0x284F
217 #define X0213_2 0x2850
219 /* Input Assumption */
224 #define LATIN1_INPUT 6
226 #define STRICT_MIME 8
231 #define JAPANESE_EUC 10
235 #define UTF8_INPUT 13
236 #define UTF16_INPUT 1015
237 #define UTF32_INPUT 1017
241 #define ENDIAN_BIG 1234
242 #define ENDIAN_LITTLE 4321
243 #define ENDIAN_2143 2143
244 #define ENDIAN_3412 3412
264 #define is_alnum(c) \
265 (('a'<=c && c<='z')||('A'<= c && c<='Z')||('0'<=c && c<='9'))
267 /* I don't trust portablity of toupper */
268 #define nkf_toupper(c) (('a'<=c && c<='z')?(c-('a'-'A')):c)
269 #define nkf_isoctal(c) ('0'<=c && c<='7')
270 #define nkf_isdigit(c) ('0'<=c && c<='9')
271 #define nkf_isxdigit(c) (nkf_isdigit(c) || ('a'<=c && c<='f') || ('A'<=c && c <= 'F'))
272 #define nkf_isblank(c) (c == SPACE || c == TAB)
273 #define nkf_isspace(c) (nkf_isblank(c) || c == CR || c == NL)
274 #define nkf_isalpha(c) (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'))
275 #define nkf_isalnum(c) (nkf_isdigit(c) || nkf_isalpha(c))
276 #define nkf_isprint(c) (' '<=c && c<='~')
277 #define nkf_isgraph(c) ('!'<=c && c<='~')
278 #define hex2bin(c) (('0'<=c&&c<='9') ? (c-'0') : \
279 ('A'<=c&&c<='F') ? (c-'A'+10) : \
280 ('a'<=c&&c<='f') ? (c-'a'+10) : 0 )
281 #define is_eucg3(c2) (((unsigned short)c2 >> 8) == SS3)
283 #define CP932_TABLE_BEGIN 0xFA
284 #define CP932_TABLE_END 0xFC
285 #define CP932INV_TABLE_BEGIN 0xED
286 #define CP932INV_TABLE_END 0xEE
287 #define is_ibmext_in_sjis(c2) (CP932_TABLE_BEGIN <= c2 && c2 <= CP932_TABLE_END)
289 #define HOLD_SIZE 1024
290 #if defined(INT_IS_SHORT)
291 #define IOBUF_SIZE 2048
293 #define IOBUF_SIZE 16384
296 #define DEFAULT_J 'B'
297 #define DEFAULT_R 'B'
299 #define SJ0162 0x00e1 /* 01 - 62 ku offset */
300 #define SJ6394 0x0161 /* 63 - 94 ku offset */
302 #define RANGE_NUM_MAX 18
307 #if defined(UTF8_OUTPUT_ENABLE) || defined(UTF8_INPUT_ENABLE)
308 #define sizeof_euc_to_utf8_1byte 94
309 #define sizeof_euc_to_utf8_2bytes 94
310 #define sizeof_utf8_to_euc_C2 64
311 #define sizeof_utf8_to_euc_E5B8 64
312 #define sizeof_utf8_to_euc_2bytes 112
313 #define sizeof_utf8_to_euc_3bytes 16
316 /* MIME preprocessor */
318 #ifdef EASYWIN /*Easy Win */
319 extern POINT _BufferSize;
328 void (*status_func)(struct input_code *, nkf_char);
329 nkf_char (*iconv_func)(nkf_char c2, nkf_char c1, nkf_char c0);
333 static char *input_codename = "";
336 static const char *CopyRight = COPY_RIGHT;
338 #if !defined(PERL_XS) && !defined(WIN32DLL)
339 static nkf_char noconvert(FILE *f);
341 static void module_connection(void);
342 static nkf_char kanji_convert(FILE *f);
343 static nkf_char h_conv(FILE *f,nkf_char c2,nkf_char c1);
344 static nkf_char push_hold_buf(nkf_char c2);
345 static void set_iconv(nkf_char f, nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0));
346 static nkf_char s_iconv(nkf_char c2,nkf_char c1,nkf_char c0);
347 static nkf_char s2e_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1);
348 static nkf_char e_iconv(nkf_char c2,nkf_char c1,nkf_char c0);
349 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
351 * 0: Shift_JIS, eucJP-ascii
356 #define UCS_MAP_ASCII 0
358 #define UCS_MAP_CP932 2
359 #define UCS_MAP_CP10001 3
360 static int ms_ucs_map_f = UCS_MAP_ASCII;
362 #ifdef UTF8_INPUT_ENABLE
363 /* no NEC special, NEC-selected IBM extended and IBM extended characters */
364 static int no_cp932ext_f = FALSE;
365 /* ignore ZERO WIDTH NO-BREAK SPACE */
366 static int no_best_fit_chars_f = FALSE;
367 static int input_endian = ENDIAN_BIG;
368 static nkf_char unicode_subchar = '?'; /* the regular substitution character */
369 static void nkf_each_char_to_hex(void (*f)(nkf_char c2,nkf_char c1), nkf_char c);
370 static void encode_fallback_html(nkf_char c);
371 static void encode_fallback_xml(nkf_char c);
372 static void encode_fallback_java(nkf_char c);
373 static void encode_fallback_perl(nkf_char c);
374 static void encode_fallback_subchar(nkf_char c);
375 static void (*encode_fallback)(nkf_char c) = NULL;
376 static nkf_char w2e_conv(nkf_char c2,nkf_char c1,nkf_char c0,nkf_char *p2,nkf_char *p1);
377 static nkf_char w_iconv(nkf_char c2,nkf_char c1,nkf_char c0);
378 static nkf_char w_iconv16(nkf_char c2,nkf_char c1,nkf_char c0);
379 static nkf_char w_iconv32(nkf_char c2,nkf_char c1,nkf_char c0);
380 static nkf_char unicode_to_jis_common(nkf_char c2,nkf_char c1,nkf_char c0,nkf_char *p2,nkf_char *p1);
381 static nkf_char w_iconv_common(nkf_char c1,nkf_char c0,const unsigned short *const *pp,nkf_char psize,nkf_char *p2,nkf_char *p1);
382 static void w16w_conv(nkf_char val, nkf_char *p2, nkf_char *p1, nkf_char *p0);
383 static nkf_char ww16_conv(nkf_char c2, nkf_char c1, nkf_char c0);
384 static nkf_char w16e_conv(nkf_char val,nkf_char *p2,nkf_char *p1);
385 static void w_status(struct input_code *, nkf_char);
387 #ifdef UTF8_OUTPUT_ENABLE
388 static int output_bom_f = FALSE;
389 static int output_endian = ENDIAN_BIG;
390 static nkf_char e2w_conv(nkf_char c2,nkf_char c1);
391 static void w_oconv(nkf_char c2,nkf_char c1);
392 static void w_oconv16(nkf_char c2,nkf_char c1);
393 static void w_oconv32(nkf_char c2,nkf_char c1);
395 static void e_oconv(nkf_char c2,nkf_char c1);
396 static nkf_char e2s_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1);
397 static void s_oconv(nkf_char c2,nkf_char c1);
398 static void j_oconv(nkf_char c2,nkf_char c1);
399 static void fold_conv(nkf_char c2,nkf_char c1);
400 static void cr_conv(nkf_char c2,nkf_char c1);
401 static void z_conv(nkf_char c2,nkf_char c1);
402 static void rot_conv(nkf_char c2,nkf_char c1);
403 static void hira_conv(nkf_char c2,nkf_char c1);
404 static void base64_conv(nkf_char c2,nkf_char c1);
405 static void iso2022jp_check_conv(nkf_char c2,nkf_char c1);
406 static void no_connection(nkf_char c2,nkf_char c1);
407 static nkf_char no_connection2(nkf_char c2,nkf_char c1,nkf_char c0);
409 static void code_score(struct input_code *ptr);
410 static void code_status(nkf_char c);
412 static void std_putc(nkf_char c);
413 static nkf_char std_getc(FILE *f);
414 static nkf_char std_ungetc(nkf_char c,FILE *f);
416 static nkf_char broken_getc(FILE *f);
417 static nkf_char broken_ungetc(nkf_char c,FILE *f);
419 static nkf_char mime_begin(FILE *f);
420 static nkf_char mime_getc(FILE *f);
421 static nkf_char mime_ungetc(nkf_char c,FILE *f);
423 static void switch_mime_getc(void);
424 static void unswitch_mime_getc(void);
425 static nkf_char mime_begin_strict(FILE *f);
426 static nkf_char mime_getc_buf(FILE *f);
427 static nkf_char mime_ungetc_buf(nkf_char c,FILE *f);
428 static nkf_char mime_integrity(FILE *f,const unsigned char *p);
430 static nkf_char base64decode(nkf_char c);
431 static void mime_prechar(nkf_char c2, nkf_char c1);
432 static void mime_putc(nkf_char c);
433 static void open_mime(nkf_char c);
434 static void close_mime(void);
435 static void eof_mime(void);
436 static void mimeout_addchar(nkf_char c);
438 static void usage(void);
439 static void version(void);
441 static void options(unsigned char *c);
442 #if defined(PERL_XS) || defined(WIN32DLL)
443 static void reinit(void);
448 #if !defined(PERL_XS) && !defined(WIN32DLL)
449 static unsigned char stdibuf[IOBUF_SIZE];
450 static unsigned char stdobuf[IOBUF_SIZE];
452 static unsigned char hold_buf[HOLD_SIZE*2];
453 static int hold_count = 0;
455 /* MIME preprocessor fifo */
457 #define MIME_BUF_SIZE (1024) /* 2^n ring buffer */
458 #define MIME_BUF_MASK (MIME_BUF_SIZE-1)
459 #define Fifo(n) mime_buf[(n)&MIME_BUF_MASK]
460 static unsigned char mime_buf[MIME_BUF_SIZE];
461 static unsigned int mime_top = 0;
462 static unsigned int mime_last = 0; /* decoded */
463 static unsigned int mime_input = 0; /* undecoded */
464 static nkf_char (*mime_iconv_back)(nkf_char c2,nkf_char c1,nkf_char c0) = NULL;
467 static int unbuf_f = FALSE;
468 static int estab_f = FALSE;
469 static int nop_f = FALSE;
470 static int binmode_f = TRUE; /* binary mode */
471 static int rot_f = FALSE; /* rot14/43 mode */
472 static int hira_f = FALSE; /* hira/kata henkan */
473 static int input_f = FALSE; /* non fixed input code */
474 static int alpha_f = FALSE; /* convert JIx0208 alphbet to ASCII */
475 static int mime_f = STRICT_MIME; /* convert MIME B base64 or Q */
476 static int mime_decode_f = FALSE; /* mime decode is explicitly on */
477 static int mimebuf_f = FALSE; /* MIME buffered input */
478 static int broken_f = FALSE; /* convert ESC-less broken JIS */
479 static int iso8859_f = FALSE; /* ISO8859 through */
480 static int mimeout_f = FALSE; /* base64 mode */
481 #if defined(MSDOS) || defined(__OS2__)
482 static int x0201_f = TRUE; /* Assume JISX0201 kana */
484 static int x0201_f = NO_X0201; /* Assume NO JISX0201 */
486 static int iso2022jp_f = FALSE; /* convert ISO-2022-JP */
488 #ifdef UNICODE_NORMALIZATION
489 static int nfc_f = FALSE;
490 static nkf_char (*i_nfc_getc)(FILE *) = std_getc; /* input of ugetc */
491 static nkf_char (*i_nfc_ungetc)(nkf_char c ,FILE *f) = std_ungetc;
492 static nkf_char nfc_getc(FILE *f);
493 static nkf_char nfc_ungetc(nkf_char c,FILE *f);
497 static int cap_f = FALSE;
498 static nkf_char (*i_cgetc)(FILE *) = std_getc; /* input of cgetc */
499 static nkf_char (*i_cungetc)(nkf_char c ,FILE *f) = std_ungetc;
500 static nkf_char cap_getc(FILE *f);
501 static nkf_char cap_ungetc(nkf_char c,FILE *f);
503 static int url_f = FALSE;
504 static nkf_char (*i_ugetc)(FILE *) = std_getc; /* input of ugetc */
505 static nkf_char (*i_uungetc)(nkf_char c ,FILE *f) = std_ungetc;
506 static nkf_char url_getc(FILE *f);
507 static nkf_char url_ungetc(nkf_char c,FILE *f);
510 #if defined(INT_IS_SHORT)
511 #define NKF_INT32_C(n) (n##L)
513 #define NKF_INT32_C(n) (n)
515 #define PREFIX_EUCG3 NKF_INT32_C(0x8F00)
516 #define CLASS_MASK NKF_INT32_C(0xFF000000)
517 #define CLASS_UNICODE NKF_INT32_C(0x01000000)
518 #define VALUE_MASK NKF_INT32_C(0x00FFFFFF)
519 #define UNICODE_MAX NKF_INT32_C(0x0010FFFF)
520 #define is_unicode_capsule(c) ((c & CLASS_MASK) == CLASS_UNICODE)
521 #define is_unicode_bmp(c) ((c & VALUE_MASK) <= NKF_INT32_C(0xFFFF))
523 #ifdef NUMCHAR_OPTION
524 static int numchar_f = FALSE;
525 static nkf_char (*i_ngetc)(FILE *) = std_getc; /* input of ugetc */
526 static nkf_char (*i_nungetc)(nkf_char c ,FILE *f) = std_ungetc;
527 static nkf_char numchar_getc(FILE *f);
528 static nkf_char numchar_ungetc(nkf_char c,FILE *f);
532 static int noout_f = FALSE;
533 static void no_putc(nkf_char c);
534 static nkf_char debug_f = FALSE;
535 static void debug(const char *str);
536 static nkf_char (*iconv_for_check)(nkf_char c2,nkf_char c1,nkf_char c0) = 0;
539 static int guess_f = FALSE;
541 static void print_guessed_code(char *filename);
543 static void set_input_codename(char *codename);
544 static int is_inputcode_mixed = FALSE;
545 static int is_inputcode_set = FALSE;
548 static int exec_f = 0;
551 #ifdef SHIFTJIS_CP932
552 /* invert IBM extended characters to others */
553 static int cp51932_f = FALSE;
555 /* invert NEC-selected IBM extended characters to IBM extended characters */
556 static int cp932inv_f = TRUE;
558 /* static nkf_char cp932_conv(nkf_char c2, nkf_char c1); */
559 #endif /* SHIFTJIS_CP932 */
562 static int x0212_f = FALSE;
563 static nkf_char x0212_shift(nkf_char c);
564 static nkf_char x0212_unshift(nkf_char c);
566 static int x0213_f = FALSE;
568 static unsigned char prefix_table[256];
570 static void set_code_score(struct input_code *ptr, nkf_char score);
571 static void clr_code_score(struct input_code *ptr, nkf_char score);
572 static void status_disable(struct input_code *ptr);
573 static void status_push_ch(struct input_code *ptr, nkf_char c);
574 static void status_clear(struct input_code *ptr);
575 static void status_reset(struct input_code *ptr);
576 static void status_reinit(struct input_code *ptr);
577 static void status_check(struct input_code *ptr, nkf_char c);
578 static void e_status(struct input_code *, nkf_char);
579 static void s_status(struct input_code *, nkf_char);
581 struct input_code input_code_list[] = {
582 {"EUC-JP", 0, 0, 0, {0, 0, 0}, e_status, e_iconv, 0},
583 {"Shift_JIS", 0, 0, 0, {0, 0, 0}, s_status, s_iconv, 0},
584 #ifdef UTF8_INPUT_ENABLE
585 {"UTF-8", 0, 0, 0, {0, 0, 0}, w_status, w_iconv, 0},
586 {"UTF-16", 0, 0, 0, {0, 0, 0}, NULL, w_iconv16, 0},
587 {"UTF-32", 0, 0, 0, {0, 0, 0}, NULL, w_iconv32, 0},
592 static int mimeout_mode = 0;
593 static int base64_count = 0;
595 /* X0208 -> ASCII converter */
598 static int f_line = 0; /* chars in line */
599 static int f_prev = 0;
600 static int fold_preserve_f = FALSE; /* preserve new lines */
601 static int fold_f = FALSE;
602 static int fold_len = 0;
605 static unsigned char kanji_intro = DEFAULT_J;
606 static unsigned char ascii_intro = DEFAULT_R;
610 #define FOLD_MARGIN 10
611 #define DEFAULT_FOLD 60
613 static int fold_margin = FOLD_MARGIN;
617 #ifdef DEFAULT_CODE_JIS
618 # define DEFAULT_CONV j_oconv
620 #ifdef DEFAULT_CODE_SJIS
621 # define DEFAULT_CONV s_oconv
623 #ifdef DEFAULT_CODE_EUC
624 # define DEFAULT_CONV e_oconv
626 #ifdef DEFAULT_CODE_UTF8
627 # define DEFAULT_CONV w_oconv
630 /* process default */
631 static void (*output_conv)(nkf_char c2,nkf_char c1) = DEFAULT_CONV;
633 static void (*oconv)(nkf_char c2,nkf_char c1) = no_connection;
634 /* s_iconv or oconv */
635 static nkf_char (*iconv)(nkf_char c2,nkf_char c1,nkf_char c0) = no_connection2;
637 static void (*o_zconv)(nkf_char c2,nkf_char c1) = no_connection;
638 static void (*o_fconv)(nkf_char c2,nkf_char c1) = no_connection;
639 static void (*o_crconv)(nkf_char c2,nkf_char c1) = no_connection;
640 static void (*o_rot_conv)(nkf_char c2,nkf_char c1) = no_connection;
641 static void (*o_hira_conv)(nkf_char c2,nkf_char c1) = no_connection;
642 static void (*o_base64conv)(nkf_char c2,nkf_char c1) = no_connection;
643 static void (*o_iso2022jp_check_conv)(nkf_char c2,nkf_char c1) = no_connection;
645 /* static redirections */
647 static void (*o_putc)(nkf_char c) = std_putc;
649 static nkf_char (*i_getc)(FILE *f) = std_getc; /* general input */
650 static nkf_char (*i_ungetc)(nkf_char c,FILE *f) =std_ungetc;
652 static nkf_char (*i_bgetc)(FILE *) = std_getc; /* input of mgetc */
653 static nkf_char (*i_bungetc)(nkf_char c ,FILE *f) = std_ungetc;
655 static void (*o_mputc)(nkf_char c) = std_putc ; /* output of mputc */
657 static nkf_char (*i_mgetc)(FILE *) = std_getc; /* input of mgetc */
658 static nkf_char (*i_mungetc)(nkf_char c ,FILE *f) = std_ungetc;
660 /* for strict mime */
661 static nkf_char (*i_mgetc_buf)(FILE *) = std_getc; /* input of mgetc_buf */
662 static nkf_char (*i_mungetc_buf)(nkf_char c,FILE *f) = std_ungetc;
665 static int output_mode = ASCII, /* output kanji mode */
666 input_mode = ASCII, /* input kanji mode */
667 shift_mode = FALSE; /* TRUE shift out, or X0201 */
668 static int mime_decode_mode = FALSE; /* MIME mode B base64, Q hex */
670 /* X0201 / X0208 conversion tables */
672 /* X0201 kana conversion table */
675 unsigned char cv[]= {
676 0x21,0x21,0x21,0x23,0x21,0x56,0x21,0x57,
677 0x21,0x22,0x21,0x26,0x25,0x72,0x25,0x21,
678 0x25,0x23,0x25,0x25,0x25,0x27,0x25,0x29,
679 0x25,0x63,0x25,0x65,0x25,0x67,0x25,0x43,
680 0x21,0x3c,0x25,0x22,0x25,0x24,0x25,0x26,
681 0x25,0x28,0x25,0x2a,0x25,0x2b,0x25,0x2d,
682 0x25,0x2f,0x25,0x31,0x25,0x33,0x25,0x35,
683 0x25,0x37,0x25,0x39,0x25,0x3b,0x25,0x3d,
684 0x25,0x3f,0x25,0x41,0x25,0x44,0x25,0x46,
685 0x25,0x48,0x25,0x4a,0x25,0x4b,0x25,0x4c,
686 0x25,0x4d,0x25,0x4e,0x25,0x4f,0x25,0x52,
687 0x25,0x55,0x25,0x58,0x25,0x5b,0x25,0x5e,
688 0x25,0x5f,0x25,0x60,0x25,0x61,0x25,0x62,
689 0x25,0x64,0x25,0x66,0x25,0x68,0x25,0x69,
690 0x25,0x6a,0x25,0x6b,0x25,0x6c,0x25,0x6d,
691 0x25,0x6f,0x25,0x73,0x21,0x2b,0x21,0x2c,
695 /* X0201 kana conversion table for daguten */
698 unsigned char dv[]= {
699 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
700 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
701 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
702 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
703 0x00,0x00,0x00,0x00,0x00,0x00,0x25,0x74,
704 0x00,0x00,0x00,0x00,0x25,0x2c,0x25,0x2e,
705 0x25,0x30,0x25,0x32,0x25,0x34,0x25,0x36,
706 0x25,0x38,0x25,0x3a,0x25,0x3c,0x25,0x3e,
707 0x25,0x40,0x25,0x42,0x25,0x45,0x25,0x47,
708 0x25,0x49,0x00,0x00,0x00,0x00,0x00,0x00,
709 0x00,0x00,0x00,0x00,0x25,0x50,0x25,0x53,
710 0x25,0x56,0x25,0x59,0x25,0x5c,0x00,0x00,
711 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
712 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
713 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
714 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
717 /* X0201 kana conversion table for han-daguten */
720 unsigned char ev[]= {
721 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
722 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
723 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
724 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
725 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
726 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
727 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
728 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
729 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
730 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
731 0x00,0x00,0x00,0x00,0x25,0x51,0x25,0x54,
732 0x25,0x57,0x25,0x5a,0x25,0x5d,0x00,0x00,
733 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
734 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
735 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
736 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
740 /* X0208 kigou conversion table */
741 /* 0x8140 - 0x819e */
743 unsigned char fv[] = {
745 0x00,0x00,0x00,0x00,0x2c,0x2e,0x00,0x3a,
746 0x3b,0x3f,0x21,0x00,0x00,0x27,0x60,0x00,
747 0x5e,0x00,0x5f,0x00,0x00,0x00,0x00,0x00,
748 0x00,0x00,0x00,0x00,0x00,0x2d,0x00,0x2f,
749 0x5c,0x00,0x00,0x7c,0x00,0x00,0x60,0x27,
750 0x22,0x22,0x28,0x29,0x00,0x00,0x5b,0x5d,
751 0x7b,0x7d,0x3c,0x3e,0x00,0x00,0x00,0x00,
752 0x00,0x00,0x00,0x00,0x2b,0x2d,0x00,0x00,
753 0x00,0x3d,0x00,0x3c,0x3e,0x00,0x00,0x00,
754 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
755 0x24,0x00,0x00,0x25,0x23,0x26,0x2a,0x40,
756 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
762 static int file_out_f = FALSE;
764 static int overwrite_f = FALSE;
765 static int preserve_time_f = FALSE;
766 static int backup_f = FALSE;
767 static char *backup_suffix = "";
768 static char *get_backup_filename(const char *suffix, const char *filename);
771 static int crmode_f = 0; /* CR, NL, CRLF */
772 static nkf_char prev_cr = 0;
773 #ifdef EASYWIN /*Easy Win */
774 static int end_check;
777 #define STD_GC_BUFSIZE (256)
778 nkf_char std_gc_buf[STD_GC_BUFSIZE];
782 #include "nkf32dll.c"
783 #elif defined(PERL_XS)
785 int main(int argc, char **argv)
790 char *outfname = NULL;
793 #ifdef EASYWIN /*Easy Win */
794 _BufferSize.y = 400;/*Set Scroll Buffer Size*/
797 for (argc--,argv++; (argc > 0) && **argv == '-'; argc--, argv++) {
798 cp = (unsigned char *)*argv;
803 if (pipe(fds) < 0 || (pid = fork()) < 0){
814 execvp(argv[1], &argv[1]);
828 if(x0201_f == WISH_TRUE)
829 x0201_f = ((!iso2022jp_f)? TRUE : NO_X0201);
831 if (binmode_f == TRUE)
832 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
833 if (freopen("","wb",stdout) == NULL)
840 setbuf(stdout, (char *) NULL);
842 setvbuffer(stdout, (char *) stdobuf, IOBUF_SIZE);
845 if (binmode_f == TRUE)
846 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
847 if (freopen("","rb",stdin) == NULL) return (-1);
851 setvbuffer(stdin, (char *) stdibuf, IOBUF_SIZE);
855 kanji_convert(stdin);
856 if (guess_f) print_guessed_code(NULL);
860 int is_argument_error = FALSE;
862 is_inputcode_mixed = FALSE;
863 is_inputcode_set = FALSE;
868 if ((fin = fopen((origfname = *argv++), "r")) == NULL) {
871 is_argument_error = TRUE;
879 /* reopen file for stdout */
880 if (file_out_f == TRUE) {
883 outfname = malloc(strlen(origfname)
884 + strlen(".nkftmpXXXXXX")
890 strcpy(outfname, origfname);
894 for (i = strlen(outfname); i; --i){
895 if (outfname[i - 1] == '/'
896 || outfname[i - 1] == '\\'){
902 strcat(outfname, "ntXXXXXX");
904 fd = open(outfname, O_WRONLY | O_CREAT | O_TRUNC | O_EXCL,
907 strcat(outfname, ".nkftmpXXXXXX");
908 fd = mkstemp(outfname);
911 || (fd_backup = dup(fileno(stdout))) < 0
912 || dup2(fd, fileno(stdout)) < 0
923 outfname = "nkf.out";
926 if(freopen(outfname, "w", stdout) == NULL) {
930 if (binmode_f == TRUE) {
931 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
932 if (freopen("","wb",stdout) == NULL)
939 if (binmode_f == TRUE)
940 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
941 if (freopen("","rb",fin) == NULL)
946 setvbuffer(fin, (char *) stdibuf, IOBUF_SIZE);
950 char *filename = NULL;
952 if (nfiles > 1) filename = origfname;
953 if (guess_f) print_guessed_code(filename);
959 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__) && !defined(__WATCOMC__) && !defined(__EMX__) && !defined(__OS2__) && !defined(__DJGPP__)
967 if (dup2(fd_backup, fileno(stdout)) < 0){
970 if (stat(origfname, &sb)) {
971 fprintf(stderr, "Can't stat %s\n", origfname);
973 /*
\e$B%Q!<%_%C%7%g%s$rI|85
\e(B */
974 if (chmod(outfname, sb.st_mode)) {
975 fprintf(stderr, "Can't set permission %s\n", outfname);
978 /*
\e$B%?%$%`%9%?%s%W$rI|85
\e(B */
980 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__) && !defined(__WATCOMC__) && !defined(__EMX__) && !defined(__OS2__) && !defined(__DJGPP__)
981 tb[0] = tb[1] = sb.st_mtime;
982 if (utime(outfname, tb)) {
983 fprintf(stderr, "Can't set timestamp %s\n", outfname);
986 tb.actime = sb.st_atime;
987 tb.modtime = sb.st_mtime;
988 if (utime(outfname, &tb)) {
989 fprintf(stderr, "Can't set timestamp %s\n", outfname);
994 char *backup_filename = get_backup_filename(backup_suffix, origfname);
996 unlink(backup_filename);
998 if (rename(origfname, backup_filename)) {
999 perror(backup_filename);
1000 fprintf(stderr, "Can't rename %s to %s\n",
1001 origfname, backup_filename);
1005 if (unlink(origfname)){
1010 if (rename(outfname, origfname)) {
1012 fprintf(stderr, "Can't rename %s to %s\n",
1013 outfname, origfname);
1020 if (is_argument_error)
1023 #ifdef EASYWIN /*Easy Win */
1024 if (file_out_f == FALSE)
1025 scanf("%d",&end_check);
1028 #else /* for Other OS */
1029 if (file_out_f == TRUE)
1031 #endif /*Easy Win */
1034 #endif /* WIN32DLL */
1037 char *get_backup_filename(const char *suffix, const char *filename)
1039 char *backup_filename;
1040 int asterisk_count = 0;
1042 int filename_length = strlen(filename);
1044 for(i = 0; suffix[i]; i++){
1045 if(suffix[i] == '*') asterisk_count++;
1049 backup_filename = malloc(strlen(suffix) + (asterisk_count * (filename_length - 1)) + 1);
1050 if (!backup_filename){
1051 perror("Can't malloc backup filename.");
1055 for(i = 0, j = 0; suffix[i];){
1056 if(suffix[i] == '*'){
1057 backup_filename[j] = '\0';
1058 strncat(backup_filename, filename, filename_length);
1060 j += filename_length;
1062 backup_filename[j++] = suffix[i++];
1065 backup_filename[j] = '\0';
1067 j = strlen(suffix) + filename_length;
1068 backup_filename = malloc( + 1);
1069 strcpy(backup_filename, filename);
1070 strcat(backup_filename, suffix);
1071 backup_filename[j] = '\0';
1073 return backup_filename;
1102 {"katakana-hiragana","h3"},
1109 #ifdef UTF8_OUTPUT_ENABLE
1119 {"fb-subchar=", ""},
1121 #ifdef UTF8_INPUT_ENABLE
1122 {"utf8-input", "W"},
1123 {"utf16-input", "W16"},
1124 {"no-cp932ext", ""},
1125 {"no-best-fit-chars",""},
1127 #ifdef UNICODE_NORMALIZATION
1128 {"utf8mac-input", ""},
1140 #ifdef NUMCHAR_OPTION
1141 {"numchar-input", ""},
1147 #ifdef SHIFTJIS_CP932
1157 static int option_mode = 0;
1159 void options(unsigned char *cp)
1163 unsigned char *cp_back = NULL;
1168 while(*cp && *cp++!='-');
1169 while (*cp || cp_back) {
1177 case '-': /* literal options */
1178 if (!*cp || *cp == SPACE) { /* ignore the rest of arguments */
1182 for (i=0;i<sizeof(long_option)/sizeof(long_option[0]);i++) {
1183 p = (unsigned char *)long_option[i].name;
1184 for (j=0;*p && *p != '=' && *p == cp[j];p++, j++);
1185 if (*p == cp[j] || cp[j] == ' '){
1192 while(*cp && *cp != SPACE && cp++);
1193 if (long_option[i].alias[0]){
1195 cp = (unsigned char *)long_option[i].alias;
1197 if (strcmp(long_option[i].name, "ic=") == 0){
1198 for (i=0; i < 16 && SPACE < p[i] && p[i] < DEL; i++){
1199 codeset[i] = nkf_toupper(p[i]);
1202 if(strcmp(codeset, "ISO-2022-JP") == 0){
1203 input_f = JIS_INPUT;
1204 }else if(strcmp(codeset, "X-ISO2022JP-CP932") == 0 ||
1205 strcmp(codeset, "CP50220") == 0 ||
1206 strcmp(codeset, "CP50221") == 0 ||
1207 strcmp(codeset, "CP50222") == 0){
1208 input_f = JIS_INPUT;
1209 #ifdef SHIFTJIS_CP932
1212 #ifdef UTF8_OUTPUT_ENABLE
1213 ms_ucs_map_f = UCS_MAP_CP932;
1215 }else if(strcmp(codeset, "ISO-2022-JP-1") == 0){
1216 input_f = JIS_INPUT;
1220 }else if(strcmp(codeset, "ISO-2022-JP-3") == 0){
1221 input_f = JIS_INPUT;
1226 }else if(strcmp(codeset, "SHIFT_JIS") == 0){
1227 input_f = SJIS_INPUT;
1228 }else if(strcmp(codeset, "WINDOWS-31J") == 0 ||
1229 strcmp(codeset, "CSWINDOWS31J") == 0 ||
1230 strcmp(codeset, "CP932") == 0 ||
1231 strcmp(codeset, "MS932") == 0){
1232 input_f = SJIS_INPUT;
1233 #ifdef SHIFTJIS_CP932
1236 #ifdef UTF8_OUTPUT_ENABLE
1237 ms_ucs_map_f = UCS_MAP_CP932;
1239 }else if(strcmp(codeset, "CP10001") == 0){
1240 input_f = SJIS_INPUT;
1241 #ifdef SHIFTJIS_CP932
1244 #ifdef UTF8_OUTPUT_ENABLE
1245 ms_ucs_map_f = UCS_MAP_CP10001;
1247 }else if(strcmp(codeset, "EUCJP") == 0 ||
1248 strcmp(codeset, "EUC-JP") == 0){
1249 input_f = EUC_INPUT;
1250 }else if(strcmp(codeset, "CP51932") == 0){
1251 input_f = EUC_INPUT;
1252 #ifdef SHIFTJIS_CP932
1255 #ifdef UTF8_OUTPUT_ENABLE
1256 ms_ucs_map_f = UCS_MAP_CP932;
1258 }else if(strcmp(codeset, "EUC-JP-MS") == 0 ||
1259 strcmp(codeset, "EUCJP-MS") == 0 ||
1260 strcmp(codeset, "EUCJPMS") == 0){
1261 input_f = EUC_INPUT;
1262 #ifdef SHIFTJIS_CP932
1265 #ifdef UTF8_OUTPUT_ENABLE
1266 ms_ucs_map_f = UCS_MAP_MS;
1268 }else if(strcmp(codeset, "EUC-JP-ASCII") == 0 ||
1269 strcmp(codeset, "EUCJP-ASCII") == 0){
1270 input_f = EUC_INPUT;
1271 #ifdef SHIFTJIS_CP932
1274 #ifdef UTF8_OUTPUT_ENABLE
1275 ms_ucs_map_f = UCS_MAP_ASCII;
1277 }else if(strcmp(codeset, "SHIFT_JISX0213") == 0 ||
1278 strcmp(codeset, "SHIFT_JIS-2004") == 0){
1279 input_f = SJIS_INPUT;
1281 #ifdef SHIFTJIS_CP932
1284 }else if(strcmp(codeset, "EUC-JISX0213") == 0 ||
1285 strcmp(codeset, "EUC-JIS-2004") == 0){
1286 input_f = EUC_INPUT;
1288 #ifdef SHIFTJIS_CP932
1291 #ifdef UTF8_INPUT_ENABLE
1292 }else if(strcmp(codeset, "UTF-8") == 0 ||
1293 strcmp(codeset, "UTF-8N") == 0 ||
1294 strcmp(codeset, "UTF-8-BOM") == 0){
1295 input_f = UTF8_INPUT;
1296 #ifdef UNICODE_NORMALIZATION
1297 }else if(strcmp(codeset, "UTF8-MAC") == 0 ||
1298 strcmp(codeset, "UTF-8-MAC") == 0){
1299 input_f = UTF8_INPUT;
1302 }else if(strcmp(codeset, "UTF-16") == 0 ||
1303 strcmp(codeset, "UTF-16BE") == 0 ||
1304 strcmp(codeset, "UTF-16BE-BOM") == 0){
1305 input_f = UTF16_INPUT;
1306 input_endian = ENDIAN_BIG;
1307 }else if(strcmp(codeset, "UTF-16LE") == 0 ||
1308 strcmp(codeset, "UTF-16LE-BOM") == 0){
1309 input_f = UTF16_INPUT;
1310 input_endian = ENDIAN_LITTLE;
1311 }else if(strcmp(codeset, "UTF-32") == 0 ||
1312 strcmp(codeset, "UTF-32BE") == 0 ||
1313 strcmp(codeset, "UTF-32BE-BOM") == 0){
1314 input_f = UTF32_INPUT;
1315 input_endian = ENDIAN_BIG;
1316 }else if(strcmp(codeset, "UTF-32LE") == 0 ||
1317 strcmp(codeset, "UTF-32LE-BOM") == 0){
1318 input_f = UTF32_INPUT;
1319 input_endian = ENDIAN_LITTLE;
1324 if (strcmp(long_option[i].name, "oc=") == 0){
1326 for (i=0; i < 16 && SPACE < p[i] && p[i] < DEL; i++){
1327 codeset[i] = nkf_toupper(p[i]);
1330 if(strcmp(codeset, "ISO-2022-JP") == 0){
1331 output_conv = j_oconv;
1332 }else if(strcmp(codeset, "X-ISO2022JP-CP932") == 0){
1333 output_conv = j_oconv;
1334 no_cp932ext_f = TRUE;
1335 #ifdef SHIFTJIS_CP932
1338 #ifdef UTF8_OUTPUT_ENABLE
1339 ms_ucs_map_f = UCS_MAP_CP932;
1341 }else if(strcmp(codeset, "CP50220") == 0){
1342 output_conv = j_oconv;
1344 #ifdef SHIFTJIS_CP932
1347 #ifdef UTF8_OUTPUT_ENABLE
1348 ms_ucs_map_f = UCS_MAP_CP932;
1350 }else if(strcmp(codeset, "CP50221") == 0){
1351 output_conv = j_oconv;
1352 #ifdef SHIFTJIS_CP932
1355 #ifdef UTF8_OUTPUT_ENABLE
1356 ms_ucs_map_f = UCS_MAP_CP932;
1358 }else if(strcmp(codeset, "ISO-2022-JP-1") == 0){
1359 output_conv = j_oconv;
1363 #ifdef SHIFTJIS_CP932
1366 }else if(strcmp(codeset, "ISO-2022-JP-3") == 0){
1367 output_conv = j_oconv;
1372 #ifdef SHIFTJIS_CP932
1375 }else if(strcmp(codeset, "SHIFT_JIS") == 0){
1376 output_conv = s_oconv;
1377 }else if(strcmp(codeset, "WINDOWS-31J") == 0 ||
1378 strcmp(codeset, "CSWINDOWS31J") == 0 ||
1379 strcmp(codeset, "CP932") == 0 ||
1380 strcmp(codeset, "MS932") == 0){
1381 output_conv = s_oconv;
1382 #ifdef UTF8_OUTPUT_ENABLE
1383 ms_ucs_map_f = UCS_MAP_CP932;
1385 }else if(strcmp(codeset, "CP10001") == 0){
1386 output_conv = s_oconv;
1387 #ifdef UTF8_OUTPUT_ENABLE
1388 ms_ucs_map_f = UCS_MAP_CP10001;
1390 }else if(strcmp(codeset, "EUCJP") == 0 ||
1391 strcmp(codeset, "EUC-JP") == 0){
1392 output_conv = e_oconv;
1393 }else if(strcmp(codeset, "CP51932") == 0){
1394 output_conv = e_oconv;
1395 #ifdef SHIFTJIS_CP932
1398 #ifdef UTF8_OUTPUT_ENABLE
1399 ms_ucs_map_f = UCS_MAP_CP932;
1401 }else if(strcmp(codeset, "EUC-JP-MS") == 0 ||
1402 strcmp(codeset, "EUCJP-MS") == 0 ||
1403 strcmp(codeset, "EUCJPMS") == 0){
1404 output_conv = e_oconv;
1408 #ifdef UTF8_OUTPUT_ENABLE
1409 ms_ucs_map_f = UCS_MAP_MS;
1411 }else if(strcmp(codeset, "EUC-JP-ASCII") == 0 ||
1412 strcmp(codeset, "EUCJP-ASCII") == 0){
1413 output_conv = e_oconv;
1417 #ifdef UTF8_OUTPUT_ENABLE
1418 ms_ucs_map_f = UCS_MAP_ASCII;
1420 }else if(strcmp(codeset, "SHIFT_JISX0213") == 0 ||
1421 strcmp(codeset, "SHIFT_JIS-2004") == 0){
1422 output_conv = s_oconv;
1424 #ifdef SHIFTJIS_CP932
1427 }else if(strcmp(codeset, "EUC-JISX0213") == 0 ||
1428 strcmp(codeset, "EUC-JIS-2004") == 0){
1429 output_conv = e_oconv;
1434 #ifdef SHIFTJIS_CP932
1437 #ifdef UTF8_OUTPUT_ENABLE
1438 }else if(strcmp(codeset, "UTF-8") == 0){
1439 output_conv = w_oconv;
1440 }else if(strcmp(codeset, "UTF-8N") == 0){
1441 output_conv = w_oconv;
1442 }else if(strcmp(codeset, "UTF-8-BOM") == 0){
1443 output_conv = w_oconv;
1444 output_bom_f = TRUE;
1445 }else if(strcmp(codeset, "UTF-16BE") == 0){
1446 output_conv = w_oconv16;
1447 }else if(strcmp(codeset, "UTF-16") == 0 ||
1448 strcmp(codeset, "UTF-16BE-BOM") == 0){
1449 output_conv = w_oconv16;
1450 output_bom_f = TRUE;
1451 }else if(strcmp(codeset, "UTF-16LE") == 0){
1452 output_conv = w_oconv16;
1453 output_endian = ENDIAN_LITTLE;
1454 }else if(strcmp(codeset, "UTF-16LE-BOM") == 0){
1455 output_conv = w_oconv16;
1456 output_endian = ENDIAN_LITTLE;
1457 output_bom_f = TRUE;
1458 }else if(strcmp(codeset, "UTF-32") == 0 ||
1459 strcmp(codeset, "UTF-32BE") == 0){
1460 output_conv = w_oconv32;
1461 }else if(strcmp(codeset, "UTF-32BE-BOM") == 0){
1462 output_conv = w_oconv32;
1463 output_bom_f = TRUE;
1464 }else if(strcmp(codeset, "UTF-32LE") == 0){
1465 output_conv = w_oconv32;
1466 output_endian = ENDIAN_LITTLE;
1467 }else if(strcmp(codeset, "UTF-32LE-BOM") == 0){
1468 output_conv = w_oconv32;
1469 output_endian = ENDIAN_LITTLE;
1470 output_bom_f = TRUE;
1476 if (strcmp(long_option[i].name, "overwrite") == 0){
1479 preserve_time_f = TRUE;
1482 if (strcmp(long_option[i].name, "overwrite=") == 0){
1485 preserve_time_f = TRUE;
1487 backup_suffix = malloc(strlen((char *) p) + 1);
1488 strcpy(backup_suffix, (char *) p);
1491 if (strcmp(long_option[i].name, "in-place") == 0){
1494 preserve_time_f = FALSE;
1497 if (strcmp(long_option[i].name, "in-place=") == 0){
1500 preserve_time_f = FALSE;
1502 backup_suffix = malloc(strlen((char *) p) + 1);
1503 strcpy(backup_suffix, (char *) p);
1508 if (strcmp(long_option[i].name, "cap-input") == 0){
1512 if (strcmp(long_option[i].name, "url-input") == 0){
1517 #ifdef NUMCHAR_OPTION
1518 if (strcmp(long_option[i].name, "numchar-input") == 0){
1524 if (strcmp(long_option[i].name, "no-output") == 0){
1528 if (strcmp(long_option[i].name, "debug") == 0){
1533 if (strcmp(long_option[i].name, "cp932") == 0){
1534 #ifdef SHIFTJIS_CP932
1538 #ifdef UTF8_OUTPUT_ENABLE
1539 ms_ucs_map_f = UCS_MAP_CP932;
1543 if (strcmp(long_option[i].name, "no-cp932") == 0){
1544 #ifdef SHIFTJIS_CP932
1548 #ifdef UTF8_OUTPUT_ENABLE
1549 ms_ucs_map_f = UCS_MAP_ASCII;
1553 #ifdef SHIFTJIS_CP932
1554 if (strcmp(long_option[i].name, "cp932inv") == 0){
1561 if (strcmp(long_option[i].name, "x0212") == 0){
1568 if (strcmp(long_option[i].name, "exec-in") == 0){
1572 if (strcmp(long_option[i].name, "exec-out") == 0){
1577 #if defined(UTF8_OUTPUT_ENABLE) && defined(UTF8_INPUT_ENABLE)
1578 if (strcmp(long_option[i].name, "no-cp932ext") == 0){
1579 no_cp932ext_f = TRUE;
1582 if (strcmp(long_option[i].name, "no-best-fit-chars") == 0){
1583 no_best_fit_chars_f = TRUE;
1586 if (strcmp(long_option[i].name, "fb-skip") == 0){
1587 encode_fallback = NULL;
1590 if (strcmp(long_option[i].name, "fb-html") == 0){
1591 encode_fallback = encode_fallback_html;
1594 if (strcmp(long_option[i].name, "fb-xml" ) == 0){
1595 encode_fallback = encode_fallback_xml;
1598 if (strcmp(long_option[i].name, "fb-java") == 0){
1599 encode_fallback = encode_fallback_java;
1602 if (strcmp(long_option[i].name, "fb-perl") == 0){
1603 encode_fallback = encode_fallback_perl;
1606 if (strcmp(long_option[i].name, "fb-subchar") == 0){
1607 encode_fallback = encode_fallback_subchar;
1610 if (strcmp(long_option[i].name, "fb-subchar=") == 0){
1611 encode_fallback = encode_fallback_subchar;
1612 unicode_subchar = 0;
1614 /* decimal number */
1615 for (i = 0; i < 7 && nkf_isdigit(p[i]); i++){
1616 unicode_subchar *= 10;
1617 unicode_subchar += hex2bin(p[i]);
1619 }else if(p[1] == 'x' || p[1] == 'X'){
1620 /* hexadecimal number */
1621 for (i = 2; i < 8 && nkf_isxdigit(p[i]); i++){
1622 unicode_subchar <<= 4;
1623 unicode_subchar |= hex2bin(p[i]);
1627 for (i = 1; i < 8 && nkf_isoctal(p[i]); i++){
1628 unicode_subchar *= 8;
1629 unicode_subchar += hex2bin(p[i]);
1632 w16e_conv(unicode_subchar, &i, &j);
1633 unicode_subchar = i<<8 | j;
1637 #ifdef UTF8_OUTPUT_ENABLE
1638 if (strcmp(long_option[i].name, "ms-ucs-map") == 0){
1639 ms_ucs_map_f = UCS_MAP_MS;
1643 #ifdef UNICODE_NORMALIZATION
1644 if (strcmp(long_option[i].name, "utf8mac-input") == 0){
1645 input_f = UTF8_INPUT;
1650 if (strcmp(long_option[i].name, "prefix=") == 0){
1651 if (nkf_isgraph(p[0])){
1652 for (i = 1; nkf_isgraph(p[i]); i++){
1653 prefix_table[p[i]] = p[0];
1660 case 'b': /* buffered mode */
1663 case 'u': /* non bufferd mode */
1666 case 't': /* transparent mode */
1671 } else if (*cp=='2') {
1675 * nkf -t2MB hoge.bin | nkf -t2mB | diff -s - hoge.bin
1683 case 'j': /* JIS output */
1685 output_conv = j_oconv;
1687 case 'e': /* AT&T EUC output */
1688 output_conv = e_oconv;
1691 case 's': /* SJIS output */
1692 output_conv = s_oconv;
1694 case 'l': /* ISO8859 Latin-1 support, no conversion */
1695 iso8859_f = TRUE; /* Only compatible with ISO-2022-JP */
1696 input_f = LATIN1_INPUT;
1698 case 'i': /* Kanji IN ESC-$-@/B */
1699 if (*cp=='@'||*cp=='B')
1700 kanji_intro = *cp++;
1702 case 'o': /* ASCII IN ESC-(-J/B */
1703 if (*cp=='J'||*cp=='B'||*cp=='H')
1704 ascii_intro = *cp++;
1708 bit:1 katakana->hiragana
1709 bit:2 hiragana->katakana
1711 if ('9'>= *cp && *cp>='0')
1712 hira_f |= (*cp++ -'0');
1719 #if defined(MSDOS) || defined(__OS2__)
1734 #ifdef UTF8_OUTPUT_ENABLE
1735 case 'w': /* UTF-8 output */
1737 output_conv = w_oconv; cp++;
1741 output_bom_f = TRUE;
1744 if ('1'== cp[0] && '6'==cp[1]) {
1745 output_conv = w_oconv16; cp+=2;
1746 } else if ('3'== cp[0] && '2'==cp[1]) {
1747 output_conv = w_oconv32; cp+=2;
1749 output_conv = w_oconv;
1754 output_endian = ENDIAN_LITTLE;
1755 } else if (cp[0] == 'B') {
1763 output_bom_f = TRUE;
1768 #ifdef UTF8_INPUT_ENABLE
1769 case 'W': /* UTF input */
1772 input_f = UTF8_INPUT;
1774 if ('1'== cp[0] && '6'==cp[1]) {
1776 input_f = UTF16_INPUT;
1777 input_endian = ENDIAN_BIG;
1778 } else if ('3'== cp[0] && '2'==cp[1]) {
1780 input_f = UTF32_INPUT;
1781 input_endian = ENDIAN_BIG;
1783 input_f = UTF8_INPUT;
1788 input_endian = ENDIAN_LITTLE;
1789 } else if (cp[0] == 'B') {
1795 /* Input code assumption */
1796 case 'J': /* JIS input */
1797 input_f = JIS_INPUT;
1799 case 'E': /* AT&T EUC input */
1800 input_f = EUC_INPUT;
1802 case 'S': /* MS Kanji input */
1803 input_f = SJIS_INPUT;
1804 if (x0201_f==NO_X0201) x0201_f=TRUE;
1806 case 'Z': /* Convert X0208 alphabet to asii */
1808 bit:0 Convert JIS X 0208 Alphabet to ASCII
1809 bit:1 Convert Kankaku to one space
1810 bit:2 Convert Kankaku to two spaces
1811 bit:3 Convert HTML Entity
1812 bit:4 Convert JIS X 0208 Katakana to JIS X 0201 Katakana
1814 while ('0'<= *cp && *cp <='9') {
1815 alpha_f |= 1 << (*cp++ - '0');
1817 if (!alpha_f) alpha_f = 1;
1819 case 'x': /* Convert X0201 kana to X0208 or X0201 Conversion */
1820 x0201_f = FALSE; /* No X0201->X0208 conversion */
1822 ESC-(-I in JIS, EUC, MS Kanji
1823 SI/SO in JIS, EUC, MS Kanji
1824 SSO in EUC, JIS, not in MS Kanji
1825 MS Kanji (0xa0-0xdf)
1827 ESC-(-I in JIS (0x20-0x5f)
1828 SSO in EUC (0xa0-0xdf)
1829 0xa0-0xd in MS Kanji (0xa0-0xdf)
1832 case 'X': /* Assume X0201 kana */
1833 /* Default value is NO_X0201 for EUC/MS-Kanji mix */
1836 case 'F': /* prserve new lines */
1837 fold_preserve_f = TRUE;
1838 case 'f': /* folding -f60 or -f */
1841 while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
1843 fold_len += *cp++ - '0';
1845 if (!(0<fold_len && fold_len<BUFSIZ))
1846 fold_len = DEFAULT_FOLD;
1850 while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
1852 fold_margin += *cp++ - '0';
1856 case 'm': /* MIME support */
1857 /* mime_decode_f = TRUE; */ /* this has too large side effects... */
1858 if (*cp=='B'||*cp=='Q') {
1859 mime_decode_mode = *cp++;
1860 mimebuf_f = FIXED_MIME;
1861 } else if (*cp=='N') {
1862 mime_f = TRUE; cp++;
1863 } else if (*cp=='S') {
1864 mime_f = STRICT_MIME; cp++;
1865 } else if (*cp=='0') {
1866 mime_decode_f = FALSE;
1867 mime_f = FALSE; cp++;
1870 case 'M': /* MIME output */
1873 mimeout_f = FIXED_MIME; cp++;
1874 } else if (*cp=='Q') {
1876 mimeout_f = FIXED_MIME; cp++;
1881 case 'B': /* Broken JIS support */
1883 bit:1 allow any x on ESC-(-x or ESC-$-x
1884 bit:2 reset to ascii on NL
1886 if ('9'>= *cp && *cp>='0')
1887 broken_f |= 1<<(*cp++ -'0');
1892 case 'O':/* for Output file */
1896 case 'c':/* add cr code */
1899 case 'd':/* delete cr code */
1902 case 'I': /* ISO-2022-JP output */
1905 case 'L': /* line mode */
1906 if (*cp=='u') { /* unix */
1907 crmode_f = NL; cp++;
1908 } else if (*cp=='m') { /* mac */
1909 crmode_f = CR; cp++;
1910 } else if (*cp=='w') { /* windows */
1911 crmode_f = CRLF; cp++;
1912 } else if (*cp=='0') { /* no conversion */
1922 /* module muliple options in a string are allowed for Perl moudle */
1923 while(*cp && *cp++!='-');
1926 /* bogus option but ignored */
1932 struct input_code * find_inputcode_byfunc(nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0))
1935 struct input_code *p = input_code_list;
1937 if (iconv_func == p->iconv_func){
1946 void set_iconv(nkf_char f, nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0))
1948 #ifdef INPUT_CODE_FIX
1956 #ifdef INPUT_CODE_FIX
1957 && (f == -TRUE || !input_f) /* -TRUE means "FORCE" */
1963 if (estab_f && iconv_for_check != iconv){
1964 struct input_code *p = find_inputcode_byfunc(iconv);
1966 set_input_codename(p->name);
1967 debug(input_codename);
1969 iconv_for_check = iconv;
1974 #define SCORE_L2 (1) /*
\e$BBh
\e(B2
\e$B?e=`4A;z
\e(B */
1975 #define SCORE_KANA (SCORE_L2 << 1) /*
\e$B$$$o$f$kH>3Q%+%J
\e(B */
1976 #define SCORE_DEPEND (SCORE_KANA << 1) /*
\e$B5!<o0MB8J8;z
\e(B */
1977 #ifdef SHIFTJIS_CP932
1978 #define SCORE_CP932 (SCORE_DEPEND << 1) /* CP932
\e$B$K$h$kFI$_49$(
\e(B */
1979 #define SCORE_NO_EXIST (SCORE_CP932 << 1) /*
\e$BB8:_$7$J$$J8;z
\e(B */
1981 #define SCORE_NO_EXIST (SCORE_DEPEND << 1) /*
\e$BB8:_$7$J$$J8;z
\e(B */
1983 #define SCORE_iMIME (SCORE_NO_EXIST << 1) /* MIME
\e$B$K$h$k;XDj
\e(B */
1984 #define SCORE_ERROR (SCORE_iMIME << 1) /*
\e$B%(%i!<
\e(B */
1986 #define SCORE_INIT (SCORE_iMIME)
1988 const nkf_char score_table_A0[] = {
1991 0, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND,
1992 SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_NO_EXIST,
1995 const nkf_char score_table_F0[] = {
1996 SCORE_L2, SCORE_L2, SCORE_L2, SCORE_L2,
1997 SCORE_L2, SCORE_DEPEND, SCORE_NO_EXIST, SCORE_NO_EXIST,
1998 SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND,
1999 SCORE_DEPEND, SCORE_NO_EXIST, SCORE_NO_EXIST, SCORE_ERROR,
2002 void set_code_score(struct input_code *ptr, nkf_char score)
2005 ptr->score |= score;
2009 void clr_code_score(struct input_code *ptr, nkf_char score)
2012 ptr->score &= ~score;
2016 void code_score(struct input_code *ptr)
2018 nkf_char c2 = ptr->buf[0];
2019 #ifdef UTF8_OUTPUT_ENABLE
2020 nkf_char c1 = ptr->buf[1];
2023 set_code_score(ptr, SCORE_ERROR);
2024 }else if (c2 == SSO){
2025 set_code_score(ptr, SCORE_KANA);
2026 #ifdef UTF8_OUTPUT_ENABLE
2027 }else if (!e2w_conv(c2, c1)){
2028 set_code_score(ptr, SCORE_NO_EXIST);
2030 }else if ((c2 & 0x70) == 0x20){
2031 set_code_score(ptr, score_table_A0[c2 & 0x0f]);
2032 }else if ((c2 & 0x70) == 0x70){
2033 set_code_score(ptr, score_table_F0[c2 & 0x0f]);
2034 }else if ((c2 & 0x70) >= 0x50){
2035 set_code_score(ptr, SCORE_L2);
2039 void status_disable(struct input_code *ptr)
2044 if (iconv == ptr->iconv_func) set_iconv(FALSE, 0);
2047 void status_push_ch(struct input_code *ptr, nkf_char c)
2049 ptr->buf[ptr->index++] = c;
2052 void status_clear(struct input_code *ptr)
2058 void status_reset(struct input_code *ptr)
2061 ptr->score = SCORE_INIT;
2064 void status_reinit(struct input_code *ptr)
2067 ptr->_file_stat = 0;
2070 void status_check(struct input_code *ptr, nkf_char c)
2072 if (c <= DEL && estab_f){
2077 void s_status(struct input_code *ptr, nkf_char c)
2081 status_check(ptr, c);
2086 #ifdef NUMCHAR_OPTION
2087 }else if (is_unicode_capsule(c)){
2090 }else if (0xa1 <= c && c <= 0xdf){
2091 status_push_ch(ptr, SSO);
2092 status_push_ch(ptr, c);
2095 }else if ((0x81 <= c && c < 0xa0) || (0xe0 <= c && c <= 0xef)){
2097 status_push_ch(ptr, c);
2098 #ifdef SHIFTJIS_CP932
2100 && is_ibmext_in_sjis(c)){
2102 status_push_ch(ptr, c);
2103 #endif /* SHIFTJIS_CP932 */
2105 }else if (x0212_f && 0xf0 <= c && c <= 0xfc){
2107 status_push_ch(ptr, c);
2108 #endif /* X0212_ENABLE */
2110 status_disable(ptr);
2114 if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
2115 status_push_ch(ptr, c);
2116 s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]);
2120 status_disable(ptr);
2124 #ifdef SHIFTJIS_CP932
2125 if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
2126 status_push_ch(ptr, c);
2127 if (s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]) == 0){
2128 set_code_score(ptr, SCORE_CP932);
2133 #endif /* SHIFTJIS_CP932 */
2134 #ifndef X0212_ENABLE
2135 status_disable(ptr);
2141 void e_status(struct input_code *ptr, nkf_char c)
2145 status_check(ptr, c);
2150 #ifdef NUMCHAR_OPTION
2151 }else if (is_unicode_capsule(c)){
2154 }else if (SSO == c || (0xa1 <= c && c <= 0xfe)){
2156 status_push_ch(ptr, c);
2158 }else if (0x8f == c){
2160 status_push_ch(ptr, c);
2161 #endif /* X0212_ENABLE */
2163 status_disable(ptr);
2167 if (0xa1 <= c && c <= 0xfe){
2168 status_push_ch(ptr, c);
2172 status_disable(ptr);
2177 if (0xa1 <= c && c <= 0xfe){
2179 status_push_ch(ptr, c);
2181 status_disable(ptr);
2183 #endif /* X0212_ENABLE */
2187 #ifdef UTF8_INPUT_ENABLE
2188 void w_status(struct input_code *ptr, nkf_char c)
2192 status_check(ptr, c);
2197 #ifdef NUMCHAR_OPTION
2198 }else if (is_unicode_capsule(c)){
2201 }else if (0xc0 <= c && c <= 0xdf){
2203 status_push_ch(ptr, c);
2204 }else if (0xe0 <= c && c <= 0xef){
2206 status_push_ch(ptr, c);
2207 }else if (0xf0 <= c && c <= 0xf4){
2209 status_push_ch(ptr, c);
2211 status_disable(ptr);
2216 if (0x80 <= c && c <= 0xbf){
2217 status_push_ch(ptr, c);
2218 if (ptr->index > ptr->stat){
2219 int bom = (ptr->buf[0] == 0xef && ptr->buf[1] == 0xbb
2220 && ptr->buf[2] == 0xbf);
2221 w2e_conv(ptr->buf[0], ptr->buf[1], ptr->buf[2],
2222 &ptr->buf[0], &ptr->buf[1]);
2229 status_disable(ptr);
2233 if (0x80 <= c && c <= 0xbf){
2234 if (ptr->index < ptr->stat){
2235 status_push_ch(ptr, c);
2240 status_disable(ptr);
2247 void code_status(nkf_char c)
2249 int action_flag = 1;
2250 struct input_code *result = 0;
2251 struct input_code *p = input_code_list;
2253 if (!p->status_func) {
2257 if (!p->status_func)
2259 (p->status_func)(p, c);
2262 }else if(p->stat == 0){
2273 if (result && !estab_f){
2274 set_iconv(TRUE, result->iconv_func);
2275 }else if (c <= DEL){
2276 struct input_code *ptr = input_code_list;
2286 nkf_char std_getc(FILE *f)
2289 return std_gc_buf[--std_gc_ndx];
2295 nkf_char std_ungetc(nkf_char c, FILE *f)
2297 if (std_gc_ndx == STD_GC_BUFSIZE){
2300 std_gc_buf[std_gc_ndx++] = c;
2305 void std_putc(nkf_char c)
2312 #if !defined(PERL_XS) && !defined(WIN32DLL)
2313 nkf_char noconvert(FILE *f)
2318 module_connection();
2319 while ((c = (*i_getc)(f)) != EOF)
2326 void module_connection(void)
2328 oconv = output_conv;
2331 /* replace continucation module, from output side */
2333 /* output redicrection */
2335 if (noout_f || guess_f){
2342 if (mimeout_f == TRUE) {
2343 o_base64conv = oconv; oconv = base64_conv;
2345 /* base64_count = 0; */
2349 o_crconv = oconv; oconv = cr_conv;
2352 o_rot_conv = oconv; oconv = rot_conv;
2355 o_iso2022jp_check_conv = oconv; oconv = iso2022jp_check_conv;
2358 o_hira_conv = oconv; oconv = hira_conv;
2361 o_fconv = oconv; oconv = fold_conv;
2364 if (alpha_f || x0201_f) {
2365 o_zconv = oconv; oconv = z_conv;
2369 i_ungetc = std_ungetc;
2370 /* input redicrection */
2373 i_cgetc = i_getc; i_getc = cap_getc;
2374 i_cungetc = i_ungetc; i_ungetc= cap_ungetc;
2377 i_ugetc = i_getc; i_getc = url_getc;
2378 i_uungetc = i_ungetc; i_ungetc= url_ungetc;
2381 #ifdef NUMCHAR_OPTION
2383 i_ngetc = i_getc; i_getc = numchar_getc;
2384 i_nungetc = i_ungetc; i_ungetc= numchar_ungetc;
2387 #ifdef UNICODE_NORMALIZATION
2388 if (nfc_f && input_f == UTF8_INPUT){
2389 i_nfc_getc = i_getc; i_getc = nfc_getc;
2390 i_nfc_ungetc = i_ungetc; i_ungetc= nfc_ungetc;
2393 if (mime_f && mimebuf_f==FIXED_MIME) {
2394 i_mgetc = i_getc; i_getc = mime_getc;
2395 i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
2398 i_bgetc = i_getc; i_getc = broken_getc;
2399 i_bungetc = i_ungetc; i_ungetc = broken_ungetc;
2401 if (input_f == JIS_INPUT || input_f == EUC_INPUT || input_f == LATIN1_INPUT) {
2402 set_iconv(-TRUE, e_iconv);
2403 } else if (input_f == SJIS_INPUT) {
2404 set_iconv(-TRUE, s_iconv);
2405 #ifdef UTF8_INPUT_ENABLE
2406 } else if (input_f == UTF8_INPUT) {
2407 set_iconv(-TRUE, w_iconv);
2408 } else if (input_f == UTF16_INPUT) {
2409 set_iconv(-TRUE, w_iconv16);
2410 } else if (input_f == UTF32_INPUT) {
2411 set_iconv(-TRUE, w_iconv32);
2414 set_iconv(FALSE, e_iconv);
2418 struct input_code *p = input_code_list;
2426 * Check and Ignore BOM
2428 void check_bom(FILE *f)
2431 switch(c2 = (*i_getc)(f)){
2433 if((c2 = (*i_getc)(f)) == 0x00){
2434 if((c2 = (*i_getc)(f)) == 0xFE){
2435 if((c2 = (*i_getc)(f)) == 0xFF){
2437 set_iconv(TRUE, w_iconv32);
2439 if (iconv == w_iconv32) {
2440 input_endian = ENDIAN_BIG;
2443 (*i_ungetc)(0xFF,f);
2444 }else (*i_ungetc)(c2,f);
2445 (*i_ungetc)(0xFE,f);
2446 }else if(c2 == 0xFF){
2447 if((c2 = (*i_getc)(f)) == 0xFE){
2449 set_iconv(TRUE, w_iconv32);
2451 if (iconv == w_iconv32) {
2452 input_endian = ENDIAN_2143;
2455 (*i_ungetc)(0xFF,f);
2456 }else (*i_ungetc)(c2,f);
2457 (*i_ungetc)(0xFF,f);
2458 }else (*i_ungetc)(c2,f);
2459 (*i_ungetc)(0x00,f);
2460 }else (*i_ungetc)(c2,f);
2461 (*i_ungetc)(0x00,f);
2464 if((c2 = (*i_getc)(f)) == 0xBB){
2465 if((c2 = (*i_getc)(f)) == 0xBF){
2467 set_iconv(TRUE, w_iconv);
2469 if (iconv == w_iconv) {
2472 (*i_ungetc)(0xBF,f);
2473 }else (*i_ungetc)(c2,f);
2474 (*i_ungetc)(0xBB,f);
2475 }else (*i_ungetc)(c2,f);
2476 (*i_ungetc)(0xEF,f);
2479 if((c2 = (*i_getc)(f)) == 0xFF){
2480 if((c2 = (*i_getc)(f)) == 0x00){
2481 if((c2 = (*i_getc)(f)) == 0x00){
2483 set_iconv(TRUE, w_iconv32);
2485 if (iconv == w_iconv32) {
2486 input_endian = ENDIAN_3412;
2489 (*i_ungetc)(0x00,f);
2490 }else (*i_ungetc)(c2,f);
2491 (*i_ungetc)(0x00,f);
2492 }else (*i_ungetc)(c2,f);
2494 set_iconv(TRUE, w_iconv16);
2496 if (iconv == w_iconv16) {
2497 input_endian = ENDIAN_BIG;
2500 (*i_ungetc)(0xFF,f);
2501 }else (*i_ungetc)(c2,f);
2502 (*i_ungetc)(0xFE,f);
2505 if((c2 = (*i_getc)(f)) == 0xFE){
2506 if((c2 = (*i_getc)(f)) == 0x00){
2507 if((c2 = (*i_getc)(f)) == 0x00){
2509 set_iconv(TRUE, w_iconv32);
2511 if (iconv == w_iconv32) {
2512 input_endian = ENDIAN_LITTLE;
2515 (*i_ungetc)(0x00,f);
2516 }else (*i_ungetc)(c2,f);
2517 (*i_ungetc)(0x00,f);
2518 }else (*i_ungetc)(c2,f);
2520 set_iconv(TRUE, w_iconv16);
2522 if (iconv == w_iconv16) {
2523 input_endian = ENDIAN_LITTLE;
2526 (*i_ungetc)(0xFE,f);
2527 }else (*i_ungetc)(c2,f);
2528 (*i_ungetc)(0xFF,f);
2537 Conversion main loop. Code detection only.
2540 nkf_char kanji_convert(FILE *f)
2542 nkf_char c3, c2=0, c1, c0=0;
2543 int is_8bit = FALSE;
2545 if(input_f == SJIS_INPUT || input_f == EUC_INPUT
2546 #ifdef UTF8_INPUT_ENABLE
2547 || input_f == UTF8_INPUT || input_f == UTF16_INPUT
2554 output_mode = ASCII;
2557 #define NEXT continue /* no output, get next */
2558 #define SEND ; /* output c1 and c2, get next */
2559 #define LAST break /* end of loop, go closing */
2561 module_connection();
2564 while ((c1 = (*i_getc)(f)) != EOF) {
2565 #ifdef INPUT_CODE_FIX
2571 if (c2 > ((input_f == JIS_INPUT && ms_ucs_map_f) ? 0x92 : DEL)) {
2572 /* in case of 8th bit is on */
2573 if (!estab_f&&!mime_decode_mode) {
2574 /* in case of not established yet */
2575 /* It is still ambiguious */
2576 if (h_conv(f, c2, c1)==EOF)
2582 /* in case of already established */
2584 /* ignore bogus code and not CP5022x UCD */
2592 /* second byte, 7 bit code */
2593 /* it might be kanji shitfted */
2594 if ((c1 == DEL) || (c1 <= SPACE)) {
2595 /* ignore bogus first code */
2602 #ifdef UTF8_INPUT_ENABLE
2603 if (iconv == w_iconv16) {
2604 if (input_endian == ENDIAN_BIG) {
2606 if ((c1 = (*i_getc)(f)) != EOF) {
2607 if (0xD8 <= c2 && c2 <= 0xDB) {
2608 if ((c0 = (*i_getc)(f)) != EOF) {
2610 if ((c3 = (*i_getc)(f)) != EOF) {
2617 if ((c2 = (*i_getc)(f)) != EOF) {
2618 if (0xD8 <= c2 && c2 <= 0xDB) {
2619 if ((c3 = (*i_getc)(f)) != EOF) {
2620 if ((c0 = (*i_getc)(f)) != EOF) {
2629 } else if(iconv == w_iconv32){
2631 if((c2 = (*i_getc)(f)) != EOF &&
2632 (c1 = (*i_getc)(f)) != EOF &&
2633 (c0 = (*i_getc)(f)) != EOF){
2634 switch(input_endian){
2636 c1 = (c2&0xFF)<<16 | (c1&0xFF)<<8 | (c0&0xFF);
2639 c1 = (c3&0xFF) | (c2&0xFF)<<8 | (c1&0xFF)<<16;
2642 c1 = (c3&0xFF)<<16 | (c1&0xFF) | (c0&0xFF)<<8;
2645 c1 = (c3&0xFF)<<8 | (c2&0xFF) | (c0&0xFF)<<16;
2655 #ifdef NUMCHAR_OPTION
2656 if (is_unicode_capsule(c1)){
2660 if (c1 > ((input_f == JIS_INPUT && ms_ucs_map_f) ? 0x92 : DEL)) {
2662 if (!estab_f && !iso8859_f) {
2663 /* not established yet */
2666 } else { /* estab_f==TRUE */
2671 } else if (SSP<=c1 && c1<0xe0 && iconv == s_iconv) {
2672 /* SJIS X0201 Case... */
2673 if(iso2022jp_f && x0201_f==NO_X0201) {
2674 (*oconv)(GETA1, GETA2);
2681 } else if (c1==SSO && iconv != s_iconv) {
2682 /* EUC X0201 Case */
2683 c1 = (*i_getc)(f); /* skip SSO */
2685 if (SSP<=c1 && c1<0xe0) {
2686 if(iso2022jp_f && x0201_f==NO_X0201) {
2687 (*oconv)(GETA1, GETA2);
2694 } else { /* bogus code, skip SSO and one byte */
2697 } else if (ms_ucs_map_f == UCS_MAP_CP10001 &&
2698 (c1 == 0xFD || c1 == 0xFE)) {
2704 /* already established */
2709 } else if ((c1 > SPACE) && (c1 != DEL)) {
2710 /* in case of Roman characters */
2712 /* output 1 shifted byte */
2716 } else if (SPACE<=c1 && c1<(0xe0&0x7f) ){
2717 /* output 1 shifted byte */
2718 if(iso2022jp_f && x0201_f==NO_X0201) {
2719 (*oconv)(GETA1, GETA2);
2726 /* look like bogus code */
2729 } else if (input_mode == X0208 || input_mode == X0212 ||
2730 input_mode == X0213_1 || input_mode == X0213_2) {
2731 /* in case of Kanji shifted */
2734 } else if (c1 == '=' && mime_f && !mime_decode_mode ) {
2735 /* Check MIME code */
2736 if ((c1 = (*i_getc)(f)) == EOF) {
2739 } else if (c1 == '?') {
2740 /* =? is mime conversion start sequence */
2741 if(mime_f == STRICT_MIME) {
2742 /* check in real detail */
2743 if (mime_begin_strict(f) == EOF)
2747 } else if (mime_begin(f) == EOF)
2757 /* normal ASCII code */
2760 } else if (c1 == SI && (!is_8bit || mime_decode_mode)) {
\r
2763 } else if (c1 == SO && (!is_8bit || mime_decode_mode)) {
\r
2766 } else if (c1 == ESC && (!is_8bit || mime_decode_mode)) {
\r
2767 if ((c1 = (*i_getc)(f)) == EOF) {
2768 /* (*oconv)(0, ESC); don't send bogus code */
2770 } else if (c1 == '$') {
2771 if ((c1 = (*i_getc)(f)) == EOF) {
2773 (*oconv)(0, ESC); don't send bogus code
2774 (*oconv)(0, '$'); */
2776 } else if (c1 == '@'|| c1 == 'B') {
2777 /* This is kanji introduction */
2780 set_input_codename("ISO-2022-JP");
2782 debug(input_codename);
2785 } else if (c1 == '(') {
2786 if ((c1 = (*i_getc)(f)) == EOF) {
2787 /* don't send bogus code
2793 } else if (c1 == '@'|| c1 == 'B') {
2794 /* This is kanji introduction */
2799 } else if (c1 == 'D'){
2803 #endif /* X0212_ENABLE */
2804 } else if (c1 == (X0213_1&0x7F)){
2805 input_mode = X0213_1;
2808 } else if (c1 == (X0213_2&0x7F)){
2809 input_mode = X0213_2;
2813 /* could be some special code */
2820 } else if (broken_f&0x2) {
2821 /* accept any ESC-(-x as broken code ... */
2831 } else if (c1 == '(') {
2832 if ((c1 = (*i_getc)(f)) == EOF) {
2833 /* don't send bogus code
2835 (*oconv)(0, '('); */
2839 /* This is X0201 kana introduction */
2840 input_mode = X0201; shift_mode = X0201;
2842 } else if (c1 == 'B' || c1 == 'J' || c1 == 'H') {
2843 /* This is X0208 kanji introduction */
2844 input_mode = ASCII; shift_mode = FALSE;
2846 } else if (broken_f&0x2) {
2847 input_mode = ASCII; shift_mode = FALSE;
2852 /* maintain various input_mode here */
2856 } else if ( c1 == 'N' || c1 == 'n' ){
2858 c3 = (*i_getc)(f); /* skip SS2 */
2859 if ( (SPACE<=c3 && c3 < 0x60) || (0xa0<=c3 && c3 < 0xe0)){
2874 } else if (c1 == ESC && iconv == s_iconv) {
2875 /* ESC in Shift_JIS */
2876 if ((c1 = (*i_getc)(f)) == EOF) {
2877 /* (*oconv)(0, ESC); don't send bogus code */
2879 } else if (c1 == '$') {
2881 if ((c1 = (*i_getc)(f)) == EOF) {
2883 (*oconv)(0, ESC); don't send bogus code
2884 (*oconv)(0, '$'); */
2887 if (('E' <= c1 && c1 <= 'G') ||
2888 ('O' <= c1 && c1 <= 'Q')) {
2896 static const int jphone_emoji_first_table[7] = {2, 0, 3, 4, 5, 0, 1};
2897 c0 = (jphone_emoji_first_table[c1 % 7] << 8) - SPACE + 0xE000 + CLASS_UNICODE;
2898 while ((c1 = (*i_getc)(f)) != EOF) {
2899 if (SPACE <= c1 && c1 <= 'z') {
2900 (*oconv)(0, c1 + c0);
2901 } else break; /* c1 == SO */
2905 if (c1 == EOF) LAST;
2912 } else if (c1 == NL || c1 == CR) {
2914 input_mode = ASCII; set_iconv(FALSE, 0);
2916 } else if (mime_decode_f && !mime_decode_mode){
2918 if ((c1=(*i_getc)(f))!=EOF && c1 == SPACE) {
2926 } else { /* if (c1 == CR)*/
2927 if ((c1=(*i_getc)(f))!=EOF) {
2931 } else if (c1 == NL && (c1=(*i_getc)(f))!=EOF && c1 == SPACE) {
2946 if (prev_cr && c1 == NL) crmode_f = CRLF;
2949 } else if (c1 == DEL && input_mode == X0208 ) {
2959 switch ((*iconv)(c2, c1, c0)) { /* can be EUC / SJIS / UTF-8 / UTF-16 */
2962 if ((c0 = (*i_getc)(f)) != EOF) {
2965 if ((c3 = (*i_getc)(f)) != EOF) {
2967 (*iconv)(c2, c1, c0|c3);
2972 /* 3 bytes EUC or UTF-8 */
2973 if ((c0 = (*i_getc)(f)) != EOF) {
2975 (*iconv)(c2, c1, c0);
2983 0x7F <= c2 && c2 <= 0x92 &&
2984 0x21 <= c1 && c1 <= 0x7E) {
2986 if(c1 == 0x7F) return 0;
2987 c1 = (c2 - 0x7F) * 94 + c1 - 0x21 + 0xE000 + CLASS_UNICODE;
2990 (*oconv)(c2, c1); /* this is JIS, not SJIS/EUC case */
2994 (*oconv)(PREFIX_EUCG3 | c2, c1);
2996 #endif /* X0212_ENABLE */
2998 (*oconv)(PREFIX_EUCG3 | c2, c1);
3001 (*oconv)(input_mode, c1); /* other special case */
3007 /* goto next_word */
3011 (*iconv)(EOF, 0, 0);
3012 if (!is_inputcode_set)
3015 struct input_code *p = input_code_list;
3016 struct input_code *result = p;
3018 if (p->score < result->score) result = p;
3021 set_input_codename(result->name);
3028 h_conv(FILE *f, nkf_char c2, nkf_char c1)
3030 nkf_char ret, c3, c0;
3034 /** it must NOT be in the kanji shifte sequence */
3035 /** it must NOT be written in JIS7 */
3036 /** and it must be after 2 byte 8bit code */
3042 while ((c1 = (*i_getc)(f)) != EOF) {
3048 if (push_hold_buf(c1) == EOF || estab_f){
3054 struct input_code *p = input_code_list;
3055 struct input_code *result = p;
3060 if (p->status_func && p->score < result->score){
3065 set_iconv(TRUE, result->iconv_func);
3070 ** 1) EOF is detected, or
3071 ** 2) Code is established, or
3072 ** 3) Buffer is FULL (but last word is pushed)
3074 ** in 1) and 3) cases, we continue to use
3075 ** Kanji codes by oconv and leave estab_f unchanged.
3080 while (hold_index < hold_count){
3081 c2 = hold_buf[hold_index++];
3083 #ifdef NUMCHAR_OPTION
3084 || is_unicode_capsule(c2)
3089 }else if (iconv == s_iconv && 0xa1 <= c2 && c2 <= 0xdf){
3090 (*iconv)(X0201, c2, 0);
3093 if (hold_index < hold_count){
3094 c1 = hold_buf[hold_index++];
3104 switch ((*iconv)(c2, c1, 0)) { /* can be EUC/SJIS/UTF-8 */
3107 if (hold_index < hold_count){
3108 c0 = hold_buf[hold_index++];
3109 } else if ((c0 = (*i_getc)(f)) == EOF) {
3115 if (hold_index < hold_count){
3116 c3 = hold_buf[hold_index++];
3117 } else if ((c3 = (*i_getc)(f)) == EOF) {
3122 (*iconv)(c2, c1, c0|c3);
3127 /* 3 bytes EUC or UTF-8 */
3128 if (hold_index < hold_count){
3129 c0 = hold_buf[hold_index++];
3130 } else if ((c0 = (*i_getc)(f)) == EOF) {
3136 (*iconv)(c2, c1, c0);
3139 if (c0 == EOF) break;
3144 nkf_char push_hold_buf(nkf_char c2)
3146 if (hold_count >= HOLD_SIZE*2)
3148 hold_buf[hold_count++] = (unsigned char)c2;
3149 return ((hold_count >= HOLD_SIZE*2) ? EOF : hold_count);
3152 nkf_char s2e_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1)
3154 #if defined(SHIFTJIS_CP932) || defined(X0212_ENABLE)
3157 static const nkf_char shift_jisx0213_s1a3_table[5][2] ={ { 1, 8}, { 3, 4}, { 5,12}, {13,14}, {15, 0} };
3158 #ifdef SHIFTJIS_CP932
3159 if (!cp932inv_f && is_ibmext_in_sjis(c2)){
3160 val = shiftjis_cp932[c2 - CP932_TABLE_BEGIN][c1 - 0x40];
3167 && CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
3168 nkf_char c = cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40];
3174 #endif /* SHIFTJIS_CP932 */
3176 if (!x0213_f && is_ibmext_in_sjis(c2)){
3177 val = shiftjis_x0212[c2 - 0xfa][c1 - 0x40];
3180 c2 = PREFIX_EUCG3 | ((val >> 8) & 0x7f);
3193 if(x0213_f && c2 >= 0xF0){
3194 if(c2 <= 0xF3 || (c2 == 0xF4 && c1 < 0x9F)){ /* k=1, 3<=k<=5, k=8, 12<=k<=15 */
3195 c2 = PREFIX_EUCG3 | 0x20 | shift_jisx0213_s1a3_table[c2 - 0xF0][0x9E < c1];
3196 }else{ /* 78<=k<=94 */
3197 c2 = PREFIX_EUCG3 | (c2 * 2 - 0x17B);
3198 if (0x9E < c1) c2++;
3201 c2 = c2 + c2 - ((c2 <= 0x9F) ? SJ0162 : SJ6394);
3202 if (0x9E < c1) c2++;
3205 c1 = c1 - ((c1 > DEL) ? SPACE : 0x1F);
3212 c2 = x0212_unshift(c2);
3219 nkf_char s_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
3223 } else if ((c2 == EOF) || (c2 == 0) || c2 < SPACE) {
3225 } else if (!x0213_f && 0xF0 <= c2 && c2 <= 0xF9 && 0x40 <= c1 && c1 <= 0xFC) {
3227 if(c1 == 0x7F) return 0;
3228 c1 = (c2 - 0xF0) * 188 + (c1 - 0x40 - (0x7E < c1)) + 0xE000 + CLASS_UNICODE;
3231 nkf_char ret = s2e_conv(c2, c1, &c2, &c1);
3232 if (ret) return ret;
3238 nkf_char e_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
3243 }else if (c2 == 0x8f){
3247 if (!cp51932_f && !x0213_f && 0xF5 <= c1 && c1 <= 0xFE && 0xA1 <= c0 && c0 <= 0xFE) {
3248 /* encoding is eucJP-ms, so invert to Unicode Private User Area */
3249 c1 = (c1 - 0xF5) * 94 + c0 - 0xA1 + 0xE3AC + CLASS_UNICODE;
3252 c2 = (c2 << 8) | (c1 & 0x7f);
3254 #ifdef SHIFTJIS_CP932
3257 if (e2s_conv(c2, c1, &s2, &s1) == 0){
3258 s2e_conv(s2, s1, &c2, &c1);
3265 #endif /* SHIFTJIS_CP932 */
3267 #endif /* X0212_ENABLE */
3268 } else if (c2 == SSO){
3271 } else if ((c2 == EOF) || (c2 == 0) || c2 < SPACE) {
3274 if (!cp51932_f && ms_ucs_map_f && 0xF5 <= c2 && c2 <= 0xFE && 0xA1 <= c1 && c1 <= 0xFE) {
3275 /* encoding is eucJP-ms, so invert to Unicode Private User Area */
3276 c1 = (c2 - 0xF5) * 94 + c1 - 0xA1 + 0xE000 + CLASS_UNICODE;
3281 #ifdef SHIFTJIS_CP932
3282 if (cp51932_f && 0x79 <= c2 && c2 <= 0x7c){
3284 if (e2s_conv(c2, c1, &s2, &s1) == 0){
3285 s2e_conv(s2, s1, &c2, &c1);
3292 #endif /* SHIFTJIS_CP932 */
3299 #ifdef UTF8_INPUT_ENABLE
3300 nkf_char w2e_conv(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1)
3307 }else if (0xc0 <= c2 && c2 <= 0xef) {
3308 ret = unicode_to_jis_common(c2, c1, c0, p2, p1);
3309 #ifdef NUMCHAR_OPTION
3312 if (p1) *p1 = CLASS_UNICODE | ww16_conv(c2, c1, c0);
3320 nkf_char w_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
3323 static const int w_iconv_utf8_1st_byte[] =
3325 20, 20, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
3326 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
3327 30, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 33, 33,
3328 40, 41, 41, 41, 42, 43, 43, 43, 50, 50, 50, 50, 60, 60, 70, 70};
3330 if (c2 < 0 || 0xff < c2) {
3331 }else if (c2 == 0) { /* 0 : 1 byte*/
3333 } else if ((c2 & 0xc0) == 0x80) { /* 0x80-0xbf : trail byte */
3336 switch (w_iconv_utf8_1st_byte[c2 - 0xC0]) {
3338 if (c1 < 0x80 || 0xBF < c1) return 0;
3341 if (c0 == 0) return -1;
3342 if (c1 < 0xA0 || 0xBF < c1 || (c0 & 0xc0) != 0x80)
3347 if (c0 == 0) return -1;
3348 if ((c1 & 0xc0) != 0x80 || (c0 & 0xc0) != 0x80)
3352 if (c0 == 0) return -1;
3353 if (c1 < 0x80 || 0x9F < c1 || (c0 & 0xc0) != 0x80)
3357 if (c0 == 0) return -2;
3358 if (c1 < 0x90 || 0xBF < c1 || (c0 & 0xc0c0) != 0x8080)
3362 if (c0 == 0) return -2;
3363 if (c1 < 0x80 || 0xBF < c1 || (c0 & 0xc0c0) != 0x8080)
3367 if (c0 == 0) return -2;
3368 if (c1 < 0x80 || 0x8F < c1 || (c0 & 0xc0c0) != 0x8080)
3376 if (c2 == 0 || c2 == EOF){
3377 } else if ((c2 & 0xf8) == 0xf0) { /* 4 bytes */
3378 c1 = CLASS_UNICODE | ww16_conv(c2, c1, c0);
3381 ret = w2e_conv(c2, c1, c0, &c2, &c1);
3390 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
3391 void w16w_conv(nkf_char val, nkf_char *p2, nkf_char *p1, nkf_char *p0)
3398 }else if (val < 0x800){
3399 *p2 = 0xc0 | (val >> 6);
3400 *p1 = 0x80 | (val & 0x3f);
3402 } else if (val <= NKF_INT32_C(0xFFFF)) {
3403 *p2 = 0xe0 | (val >> 12);
3404 *p1 = 0x80 | ((val >> 6) & 0x3f);
3405 *p0 = 0x80 | (val & 0x3f);
3406 } else if (val <= NKF_INT32_C(0x10FFFF)) {
3407 *p2 = 0xe0 | (val >> 16);
3408 *p1 = 0x80 | ((val >> 12) & 0x3f);
3409 *p0 = 0x8080 | ((val << 2) & 0x3f00)| (val & 0x3f);
3418 #ifdef UTF8_INPUT_ENABLE
3419 nkf_char ww16_conv(nkf_char c2, nkf_char c1, nkf_char c0)
3424 } else if (c2 >= 0xf0){
3425 /* c2: 1st, c1: 2nd, c0: 3rd/4th */
3426 val = (c2 & 0x0f) << 18;
3427 val |= (c1 & 0x3f) << 12;
3428 val |= (c0 & 0x3f00) >> 2;
3430 }else if (c2 >= 0xe0){
3431 val = (c2 & 0x0f) << 12;
3432 val |= (c1 & 0x3f) << 6;
3434 }else if (c2 >= 0xc0){
3435 val = (c2 & 0x1f) << 6;
3443 nkf_char w16e_conv(nkf_char val, nkf_char *p2, nkf_char *p1)
3445 nkf_char c2, c1, c0;
3452 w16w_conv(val, &c2, &c1, &c0);
3453 ret = unicode_to_jis_common(c2, c1, c0, p2, p1);
3454 #ifdef NUMCHAR_OPTION
3457 *p1 = CLASS_UNICODE | val;
3466 #ifdef UTF8_INPUT_ENABLE
3467 nkf_char w_iconv16(nkf_char c2, nkf_char c1, nkf_char c0)
3470 if ((c2==0 && c1 < 0x80) || c2==EOF) {
3473 }else if (0xD8 <= c2 && c2 <= 0xDB) {
3474 if (c0 < NKF_INT32_C(0xDC00) || NKF_INT32_C(0xDFFF) < c0)
3476 c1 = CLASS_UNICODE | ((c2 << 18) + (c1 << 10) + c0 - NKF_INT32_C(0x35FDC00));
3478 }else if ((c2>>3) == 27) { /* unpaired surrogate */
3483 }else ret = w16e_conv(((c2 & 0xff)<<8) + c1, &c2, &c1);
3484 if (ret) return ret;
3489 nkf_char w_iconv32(nkf_char c2, nkf_char c1, nkf_char c0)
3493 if ((c2 == 0 && c1 < 0x80) || c2==EOF) {
3494 } else if (is_unicode_bmp(c1)) {
3495 ret = w16e_conv(c1, &c2, &c1);
3498 c1 = CLASS_UNICODE | c1;
3500 if (ret) return ret;
3505 nkf_char unicode_to_jis_common(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1)
3507 const unsigned short *const *pp;
3508 const unsigned short *const *const *ppp;
3509 static const int no_best_fit_chars_table_C2[] =
3510 {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3511 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3512 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 2, 1, 1, 2,
3513 0, 0, 1, 1, 0, 1, 0, 1, 2, 1, 1, 1, 1, 1, 1, 1};
3514 static const int no_best_fit_chars_table_C2_ms[] =
3515 {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3516 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3517 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0,
3518 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0};
3519 static const int no_best_fit_chars_table_932_C2[] =
3520 {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3521 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3522 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1,
3523 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0};
3524 static const int no_best_fit_chars_table_932_C3[] =
3525 {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3526 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
3527 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3528 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1};
3534 }else if(c2 < 0xe0){
3535 if(no_best_fit_chars_f){
3536 if(ms_ucs_map_f == UCS_MAP_CP932){
3539 if(no_best_fit_chars_table_932_C2[c1&0x3F]) return 1;
3542 if(no_best_fit_chars_table_932_C3[c1&0x3F]) return 1;
3545 }else if(!cp932inv_f){
3548 if(no_best_fit_chars_table_C2[c1&0x3F]) return 1;
3551 if(no_best_fit_chars_table_932_C3[c1&0x3F]) return 1;
3554 }else if(ms_ucs_map_f == UCS_MAP_MS){
3555 if(c2 == 0xC2 && no_best_fit_chars_table_C2_ms[c1&0x3F]) return 1;
3556 }else if(ms_ucs_map_f == UCS_MAP_CP10001){
3574 ms_ucs_map_f == UCS_MAP_CP932 ? utf8_to_euc_2bytes_932 :
3575 ms_ucs_map_f == UCS_MAP_MS ? utf8_to_euc_2bytes_ms :
3576 ms_ucs_map_f == UCS_MAP_CP10001 ? utf8_to_euc_2bytes_mac :
3578 ret = w_iconv_common(c2, c1, pp, sizeof_utf8_to_euc_2bytes, p2, p1);
3579 }else if(c0 < 0xF0){
3580 if(no_best_fit_chars_f){
3581 if(ms_ucs_map_f == UCS_MAP_CP932){
3582 if(c2 == 0xE3 && c1 == 0x82 && c0 == 0x94) return 1;
3583 }else if(ms_ucs_map_f == UCS_MAP_MS){
3588 if(c0 == 0x94 || c0 == 0x96 || c0 == 0xBE) return 1;
3591 if(c0 == 0x92) return 1;
3596 if(c1 == 0x80 || c0 == 0x9C) return 1;
3599 }else if(ms_ucs_map_f == UCS_MAP_CP10001){
3604 if(c0 == 0x94) return 1;
3607 if(c0 == 0xBB) return 1;
3617 if(c0 == 0x95) return 1;
3620 if(c0 == 0xA5) return 1;
3627 if(c0 == 0x8D) return 1;
3630 if(c0 == 0x9E && !cp932inv_f) return 1;
3633 if(0xA0 <= c0 && c0 <= 0xA5) return 1;
3641 ms_ucs_map_f == UCS_MAP_CP932 ? utf8_to_euc_3bytes_932 :
3642 ms_ucs_map_f == UCS_MAP_MS ? utf8_to_euc_3bytes_ms :
3643 ms_ucs_map_f == UCS_MAP_CP10001 ? utf8_to_euc_3bytes_mac :
3645 ret = w_iconv_common(c1, c0, ppp[c2 - 0xE0], sizeof_utf8_to_euc_C2, p2, p1);
3647 #ifdef SHIFTJIS_CP932
3648 if (!ret && !cp932inv_f && is_eucg3(*p2)) {
3650 if (e2s_conv(*p2, *p1, &s2, &s1) == 0) {
3651 s2e_conv(s2, s1, p2, p1);
3660 nkf_char w_iconv_common(nkf_char c1, nkf_char c0, const unsigned short *const *pp, nkf_char psize, nkf_char *p2, nkf_char *p1)
3663 const unsigned short *p;
3666 if (pp == 0) return 1;
3669 if (c1 < 0 || psize <= c1) return 1;
3671 if (p == 0) return 1;
3674 if (c0 < 0 || sizeof_utf8_to_euc_C2 <= c0) return 1;
3676 if (val == 0) return 1;
3677 if (no_cp932ext_f && (
3678 (val>>8) == 0x2D || /* NEC special characters */
3679 val > NKF_INT32_C(0xF300) /* IBM extended characters */
3687 if (c2 == SO) c2 = X0201;
3694 void nkf_each_char_to_hex(void (*f)(nkf_char c2,nkf_char c1), nkf_char c)
3696 const char *hex = "0123456789ABCDEF";
3702 (*f)(0, hex[(c>>shift)&0xF]);
3712 void encode_fallback_html(nkf_char c)
3717 if(c >= NKF_INT32_C(1000000))
3718 (*oconv)(0, 0x30+(c/NKF_INT32_C(1000000))%10);
3719 if(c >= NKF_INT32_C(100000))
3720 (*oconv)(0, 0x30+(c/NKF_INT32_C(100000) )%10);
3722 (*oconv)(0, 0x30+(c/10000 )%10);
3724 (*oconv)(0, 0x30+(c/1000 )%10);
3726 (*oconv)(0, 0x30+(c/100 )%10);
3728 (*oconv)(0, 0x30+(c/10 )%10);
3730 (*oconv)(0, 0x30+ c %10);
3735 void encode_fallback_xml(nkf_char c)
3740 nkf_each_char_to_hex(oconv, c);
3745 void encode_fallback_java(nkf_char c)
3747 const char *hex = "0123456789ABCDEF";
3750 if(!is_unicode_bmp(c)){
3754 (*oconv)(0, hex[(c>>20)&0xF]);
3755 (*oconv)(0, hex[(c>>16)&0xF]);
3759 (*oconv)(0, hex[(c>>12)&0xF]);
3760 (*oconv)(0, hex[(c>> 8)&0xF]);
3761 (*oconv)(0, hex[(c>> 4)&0xF]);
3762 (*oconv)(0, hex[ c &0xF]);
3766 void encode_fallback_perl(nkf_char c)
3771 nkf_each_char_to_hex(oconv, c);
3776 void encode_fallback_subchar(nkf_char c)
3778 c = unicode_subchar;
3779 (*oconv)((c>>8)&0xFF, c&0xFF);
3784 #ifdef UTF8_OUTPUT_ENABLE
3785 nkf_char e2w_conv(nkf_char c2, nkf_char c1)
3787 const unsigned short *p;
3790 if (ms_ucs_map_f == UCS_MAP_CP10001) {
3798 p = euc_to_utf8_1byte;
3800 } else if (is_eucg3(c2)){
3801 if(ms_ucs_map_f == UCS_MAP_ASCII&& c2 == NKF_INT32_C(0x8F22) && c1 == 0x43){
3804 c2 = (c2&0x7f) - 0x21;
3805 if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
3806 p = x0212_to_utf8_2bytes[c2];
3812 c2 = (c2&0x7f) - 0x21;
3813 if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
3815 ms_ucs_map_f == UCS_MAP_ASCII ? euc_to_utf8_2bytes[c2] :
3816 ms_ucs_map_f == UCS_MAP_CP10001 ? euc_to_utf8_2bytes_mac[c2] :
3817 euc_to_utf8_2bytes_ms[c2];
3822 c1 = (c1 & 0x7f) - 0x21;
3823 if (0<=c1 && c1<sizeof_euc_to_utf8_1byte)
3828 void w_oconv(nkf_char c2, nkf_char c1)
3834 output_bom_f = FALSE;
3845 #ifdef NUMCHAR_OPTION
3846 if (c2 == 0 && is_unicode_capsule(c1)){
3847 val = c1 & VALUE_MASK;
3850 }else if (val < 0x800){
3851 (*o_putc)(0xC0 | (val >> 6));
3852 (*o_putc)(0x80 | (val & 0x3f));
3853 } else if (val <= NKF_INT32_C(0xFFFF)) {
3854 (*o_putc)(0xE0 | (val >> 12));
3855 (*o_putc)(0x80 | ((val >> 6) & 0x3f));
3856 (*o_putc)(0x80 | (val & 0x3f));
3857 } else if (val <= NKF_INT32_C(0x10FFFF)) {
3858 (*o_putc)(0xF0 | ( val>>18));
3859 (*o_putc)(0x80 | ((val>>12) & 0x3f));
3860 (*o_putc)(0x80 | ((val>> 6) & 0x3f));
3861 (*o_putc)(0x80 | ( val & 0x3f));
3868 output_mode = ASCII;
3870 } else if (c2 == ISO8859_1) {
3871 output_mode = ISO8859_1;
3872 (*o_putc)(c1 | 0x080);
3875 val = e2w_conv(c2, c1);
3877 w16w_conv(val, &c2, &c1, &c0);
3881 if (c0) (*o_putc)(c0);
3887 void w_oconv16(nkf_char c2, nkf_char c1)
3890 output_bom_f = FALSE;
3891 if (output_endian == ENDIAN_LITTLE){
3892 (*o_putc)((unsigned char)'\377');
3896 (*o_putc)((unsigned char)'\377');
3905 if (c2 == ISO8859_1) {
3908 #ifdef NUMCHAR_OPTION
3909 } else if (c2 == 0 && is_unicode_capsule(c1)) {
3910 if (is_unicode_bmp(c1)) {
3911 c2 = (c1 >> 8) & 0xff;
3915 if (c1 <= UNICODE_MAX) {
3916 c2 = (c1 >> 10) + NKF_INT32_C(0xD7C0); /* high surrogate */
3917 c1 = (c1 & 0x3FF) + NKF_INT32_C(0xDC00); /* low surrogate */
3918 if (output_endian == ENDIAN_LITTLE){
3919 (*o_putc)(c2 & 0xff);
3920 (*o_putc)((c2 >> 8) & 0xff);
3921 (*o_putc)(c1 & 0xff);
3922 (*o_putc)((c1 >> 8) & 0xff);
3924 (*o_putc)((c2 >> 8) & 0xff);
3925 (*o_putc)(c2 & 0xff);
3926 (*o_putc)((c1 >> 8) & 0xff);
3927 (*o_putc)(c1 & 0xff);
3934 nkf_char val = e2w_conv(c2, c1);
3935 c2 = (val >> 8) & 0xff;
3939 if (output_endian == ENDIAN_LITTLE){
3948 void w_oconv32(nkf_char c2, nkf_char c1)
3951 output_bom_f = FALSE;
3952 if (output_endian == ENDIAN_LITTLE){
3953 (*o_putc)((unsigned char)'\377');
3961 (*o_putc)((unsigned char)'\377');
3970 if (c2 == ISO8859_1) {
3972 #ifdef NUMCHAR_OPTION
3973 } else if (c2 == 0 && is_unicode_capsule(c1)) {
3977 c1 = e2w_conv(c2, c1);
3980 if (output_endian == ENDIAN_LITTLE){
3981 (*o_putc)( c1 & NKF_INT32_C(0x000000FF));
3982 (*o_putc)((c1 & NKF_INT32_C(0x0000FF00)) >> 8);
3983 (*o_putc)((c1 & NKF_INT32_C(0x00FF0000)) >> 16);
3987 (*o_putc)((c1 & NKF_INT32_C(0x00FF0000)) >> 16);
3988 (*o_putc)((c1 & NKF_INT32_C(0x0000FF00)) >> 8);
3989 (*o_putc)( c1 & NKF_INT32_C(0x000000FF));
3994 void e_oconv(nkf_char c2, nkf_char c1)
3996 #ifdef NUMCHAR_OPTION
3997 if (c2 == 0 && is_unicode_capsule(c1)){
3998 w16e_conv(c1, &c2, &c1);
3999 if (c2 == 0 && is_unicode_capsule(c1)){
4000 c2 = c1 & VALUE_MASK;
4001 if (x0212_f && 0xE000 <= c2 && c2 <= 0xE757) {
4005 c2 += c2 < 10 ? 0x75 : 0x8FEB;
4006 c1 = 0x21 + c1 % 94;
4009 (*o_putc)((c2 & 0x7f) | 0x080);
4010 (*o_putc)(c1 | 0x080);
4012 (*o_putc)((c2 & 0x7f) | 0x080);
4013 (*o_putc)(c1 | 0x080);
4017 if (encode_fallback) (*encode_fallback)(c1);
4026 } else if (c2 == 0) {
4027 output_mode = ASCII;
4029 } else if (c2 == X0201) {
4030 output_mode = JAPANESE_EUC;
4031 (*o_putc)(SSO); (*o_putc)(c1|0x80);
4032 } else if (c2 == ISO8859_1) {
4033 output_mode = ISO8859_1;
4034 (*o_putc)(c1 | 0x080);
4036 } else if (is_eucg3(c2)){
4037 output_mode = JAPANESE_EUC;
4038 #ifdef SHIFTJIS_CP932
4041 if (e2s_conv(c2, c1, &s2, &s1) == 0){
4042 s2e_conv(s2, s1, &c2, &c1);
4047 output_mode = ASCII;
4049 }else if (is_eucg3(c2)){
4052 (*o_putc)((c2 & 0x7f) | 0x080);
4053 (*o_putc)(c1 | 0x080);
4056 (*o_putc)((c2 & 0x7f) | 0x080);
4057 (*o_putc)(c1 | 0x080);
4061 if (!nkf_isgraph(c1) || !nkf_isgraph(c2)) {
4062 set_iconv(FALSE, 0);
4063 return; /* too late to rescue this char */
4065 output_mode = JAPANESE_EUC;
4066 (*o_putc)(c2 | 0x080);
4067 (*o_putc)(c1 | 0x080);
4072 nkf_char x0212_shift(nkf_char c)
4077 if (0x75 <= c && c <= 0x7f){
4078 ret = c + (0x109 - 0x75);
4081 if (0x75 <= c && c <= 0x7f){
4082 ret = c + (0x113 - 0x75);
4089 nkf_char x0212_unshift(nkf_char c)
4092 if (0x7f <= c && c <= 0x88){
4093 ret = c + (0x75 - 0x7f);
4094 }else if (0x89 <= c && c <= 0x92){
4095 ret = PREFIX_EUCG3 | 0x80 | (c + (0x75 - 0x89));
4099 #endif /* X0212_ENABLE */
4101 nkf_char e2s_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1)
4107 if((0x21 <= ndx && ndx <= 0x2F)){
4108 if (p2) *p2 = ((ndx - 1) >> 1) + 0xec - ndx / 8 * 3;
4109 if (p1) *p1 = c1 + ((ndx & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
4111 }else if(0x6E <= ndx && ndx <= 0x7E){
4112 if (p2) *p2 = ((ndx - 1) >> 1) + 0xbe;
4113 if (p1) *p1 = c1 + ((ndx & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
4119 else if(nkf_isgraph(ndx)){
4121 const unsigned short *ptr;
4122 ptr = x0212_shiftjis[ndx - 0x21];
4124 val = ptr[(c1 & 0x7f) - 0x21];
4133 c2 = x0212_shift(c2);
4135 #endif /* X0212_ENABLE */
4137 if(0x7F < c2) return 1;
4138 if (p2) *p2 = ((c2 - 1) >> 1) + ((c2 <= 0x5e) ? 0x71 : 0xb1);
4139 if (p1) *p1 = c1 + ((c2 & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
4143 void s_oconv(nkf_char c2, nkf_char c1)
4145 #ifdef NUMCHAR_OPTION
4146 if (c2 == 0 && is_unicode_capsule(c1)){
4147 w16e_conv(c1, &c2, &c1);
4148 if (c2 == 0 && is_unicode_capsule(c1)){
4149 c2 = c1 & VALUE_MASK;
4150 if (!x0213_f && 0xE000 <= c2 && c2 <= 0xE757) {
4153 c2 = c1 / 188 + 0xF0;
4155 c1 += 0x40 + (c1 > 0x3e);
4160 if(encode_fallback)(*encode_fallback)(c1);
4169 } else if (c2 == 0) {
4170 output_mode = ASCII;
4172 } else if (c2 == X0201) {
4173 output_mode = SHIFT_JIS;
4175 } else if (c2 == ISO8859_1) {
4176 output_mode = ISO8859_1;
4177 (*o_putc)(c1 | 0x080);
4179 } else if (is_eucg3(c2)){
4180 output_mode = SHIFT_JIS;
4181 if (e2s_conv(c2, c1, &c2, &c1) == 0){
4187 if (!nkf_isprint(c1) || !nkf_isprint(c2)) {
4188 set_iconv(FALSE, 0);
4189 return; /* too late to rescue this char */
4191 output_mode = SHIFT_JIS;
4192 e2s_conv(c2, c1, &c2, &c1);
4194 #ifdef SHIFTJIS_CP932
4196 && CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
4197 nkf_char c = cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40];
4203 #endif /* SHIFTJIS_CP932 */
4206 if (prefix_table[(unsigned char)c1]){
4207 (*o_putc)(prefix_table[(unsigned char)c1]);
4213 void j_oconv(nkf_char c2, nkf_char c1)
4215 #ifdef NUMCHAR_OPTION
4216 if (c2 == 0 && is_unicode_capsule(c1)){
4217 w16e_conv(c1, &c2, &c1);
4218 if (c2 == 0 && is_unicode_capsule(c1)){
4219 c2 = c1 & VALUE_MASK;
4220 if (ms_ucs_map_f && 0xE000 <= c2 && c2 <= 0xE757) {
4223 c2 = 0x7F + c1 / 94;
4224 c1 = 0x21 + c1 % 94;
4226 if (encode_fallback) (*encode_fallback)(c1);
4233 if (output_mode !=ASCII && output_mode!=ISO8859_1) {
4236 (*o_putc)(ascii_intro);
4237 output_mode = ASCII;
4241 } else if (is_eucg3(c2)){
4243 if(output_mode!=X0213_2){
4244 output_mode = X0213_2;
4248 (*o_putc)(X0213_2&0x7F);
4251 if(output_mode!=X0212){
4252 output_mode = X0212;
4256 (*o_putc)(X0212&0x7F);
4259 (*o_putc)(c2 & 0x7f);
4262 } else if (c2==X0201) {
4263 if (output_mode!=X0201) {
4264 output_mode = X0201;
4270 } else if (c2==ISO8859_1) {
4271 /* iso8859 introduction, or 8th bit on */
4272 /* Can we convert in 7bit form using ESC-'-'-A ?
4274 output_mode = ISO8859_1;
4276 } else if (c2 == 0) {
4277 if (output_mode !=ASCII && output_mode!=ISO8859_1) {
4280 (*o_putc)(ascii_intro);
4281 output_mode = ASCII;
4286 ? c2<0x20 || 0x92<c2 || c1<0x20 || 0x7e<c1
4287 : c2<0x20 || 0x7e<c2 || c1<0x20 || 0x7e<c1) return;
4289 if (output_mode!=X0213_1) {
4290 output_mode = X0213_1;
4294 (*o_putc)(X0213_1&0x7F);
4296 }else if (output_mode != X0208) {
4297 output_mode = X0208;
4300 (*o_putc)(kanji_intro);
4307 void base64_conv(nkf_char c2, nkf_char c1)
4309 mime_prechar(c2, c1);
4310 (*o_base64conv)(c2,c1);
4314 static nkf_char broken_buf[3];
4315 static int broken_counter = 0;
4316 static int broken_last = 0;
4317 nkf_char broken_getc(FILE *f)
4321 if (broken_counter>0) {
4322 return broken_buf[--broken_counter];
4325 if (c=='$' && broken_last != ESC
4326 && (input_mode==ASCII || input_mode==X0201)) {
4329 if (c1=='@'|| c1=='B') {
4330 broken_buf[0]=c1; broken_buf[1]=c;
4337 } else if (c=='(' && broken_last != ESC
4338 && (input_mode==X0208 || input_mode==X0201)) { /* ) */
4341 if (c1=='J'|| c1=='B') {
4342 broken_buf[0]=c1; broken_buf[1]=c;
4355 nkf_char broken_ungetc(nkf_char c, FILE *f)
4357 if (broken_counter<2)
4358 broken_buf[broken_counter++]=c;
4362 void cr_conv(nkf_char c2, nkf_char c1)
4366 if (! (c2==0&&c1==NL) ) {
4372 } else if (c1=='\r') {
4374 } else if (c1=='\n') {
4375 if (crmode_f==CRLF) {
4376 (*o_crconv)(0,'\r');
4377 } else if (crmode_f==CR) {
4378 (*o_crconv)(0,'\r');
4382 } else if (c1!='\032' || crmode_f!=NL){
4388 Return value of fold_conv()
4390 \n add newline and output char
4391 \r add newline and output nothing
4394 1 (or else) normal output
4396 fold state in prev (previous character)
4398 >0x80 Japanese (X0208/X0201)
4403 This fold algorthm does not preserve heading space in a line.
4404 This is the main difference from fmt.
4407 #define char_size(c2,c1) (c2?2:1)
4409 void fold_conv(nkf_char c2, nkf_char c1)
4412 nkf_char fold_state;
4414 if (c1== '\r' && !fold_preserve_f) {
4415 fold_state=0; /* ignore cr */
4416 }else if (c1== '\n'&&f_prev=='\r' && fold_preserve_f) {
4418 fold_state=0; /* ignore cr */
4419 } else if (c1== BS) {
4420 if (f_line>0) f_line--;
4422 } else if (c2==EOF && f_line != 0) { /* close open last line */
4424 } else if ((c1=='\n' && !fold_preserve_f)
4425 || ((c1=='\r'||(c1=='\n'&&f_prev!='\r'))
4426 && fold_preserve_f)) {
4428 if (fold_preserve_f) {
4432 } else if ((f_prev == c1 && !fold_preserve_f)
4433 || (f_prev == '\n' && fold_preserve_f)
4434 ) { /* duplicate newline */
4437 fold_state = '\n'; /* output two newline */
4443 if (f_prev&0x80) { /* Japanese? */
4445 fold_state = 0; /* ignore given single newline */
4446 } else if (f_prev==' ') {
4450 if (++f_line<=fold_len)
4454 fold_state = '\r'; /* fold and output nothing */
4458 } else if (c1=='\f') {
4461 fold_state = '\n'; /* output newline and clear */
4462 } else if ( (c2==0 && c1==' ')||
4463 (c2==0 && c1=='\t')||
4464 (c2=='!'&& c1=='!')) {
4465 /* X0208 kankaku or ascii space */
4466 if (f_prev == ' ') {
4467 fold_state = 0; /* remove duplicate spaces */
4470 if (++f_line<=fold_len)
4471 fold_state = ' '; /* output ASCII space only */
4473 f_prev = ' '; f_line = 0;
4474 fold_state = '\r'; /* fold and output nothing */
4478 prev0 = f_prev; /* we still need this one... , but almost done */
4480 if (c2 || c2==X0201)
4481 f_prev |= 0x80; /* this is Japanese */
4482 f_line += char_size(c2,c1);
4483 if (f_line<=fold_len) { /* normal case */
4486 if (f_line>fold_len+fold_margin) { /* too many kinsoku suspension */
4487 f_line = char_size(c2,c1);
4488 fold_state = '\n'; /* We can't wait, do fold now */
4489 } else if (c2==X0201) {
4490 /* simple kinsoku rules return 1 means no folding */
4491 if (c1==(0xde&0x7f)) fold_state = 1; /*
\e$B!+
\e(B*/
4492 else if (c1==(0xdf&0x7f)) fold_state = 1; /*
\e$B!,
\e(B*/
4493 else if (c1==(0xa4&0x7f)) fold_state = 1; /*
\e$B!#
\e(B*/
4494 else if (c1==(0xa3&0x7f)) fold_state = 1; /*
\e$B!$
\e(B*/
4495 else if (c1==(0xa1&0x7f)) fold_state = 1; /*
\e$B!W
\e(B*/
4496 else if (c1==(0xb0&0x7f)) fold_state = 1; /* - */
4497 else if (SPACE<=c1 && c1<=(0xdf&0x7f)) { /* X0201 */
4499 fold_state = '\n';/* add one new f_line before this character */
4502 fold_state = '\n';/* add one new f_line before this character */
4505 /* kinsoku point in ASCII */
4506 if ( c1==')'|| /* { [ ( */
4517 /* just after special */
4518 } else if (!is_alnum(prev0)) {
4519 f_line = char_size(c2,c1);
4521 } else if ((prev0==' ') || /* ignored new f_line */
4522 (prev0=='\n')|| /* ignored new f_line */
4523 (prev0&0x80)) { /* X0208 - ASCII */
4524 f_line = char_size(c2,c1);
4525 fold_state = '\n';/* add one new f_line before this character */
4527 fold_state = 1; /* default no fold in ASCII */
4531 if (c1=='"') fold_state = 1; /*
\e$B!"
\e(B */
4532 else if (c1=='#') fold_state = 1; /*
\e$B!#
\e(B */
4533 else if (c1=='W') fold_state = 1; /*
\e$B!W
\e(B */
4534 else if (c1=='K') fold_state = 1; /*
\e$B!K
\e(B */
4535 else if (c1=='$') fold_state = 1; /*
\e$B!$
\e(B */
4536 else if (c1=='%') fold_state = 1; /*
\e$B!%
\e(B */
4537 else if (c1=='\'') fold_state = 1; /*
\e$B!\
\e(B */
4538 else if (c1=='(') fold_state = 1; /*
\e$B!(
\e(B */
4539 else if (c1==')') fold_state = 1; /*
\e$B!)
\e(B */
4540 else if (c1=='*') fold_state = 1; /*
\e$B!*
\e(B */
4541 else if (c1=='+') fold_state = 1; /*
\e$B!+
\e(B */
4542 else if (c1==',') fold_state = 1; /*
\e$B!,
\e(B */
4543 /* default no fold in kinsoku */
4546 f_line = char_size(c2,c1);
4547 /* add one new f_line before this character */
4550 f_line = char_size(c2,c1);
4552 /* add one new f_line before this character */
4557 /* terminator process */
4558 switch(fold_state) {
4577 nkf_char z_prev2=0,z_prev1=0;
4579 void z_conv(nkf_char c2, nkf_char c1)
4582 /* if (c2) c1 &= 0x7f; assertion */
4584 if (c2 == X0201 && (c1 == 0x20 || c1 == 0x7D || c1 == 0x7E)) {
4589 if (x0201_f && z_prev2==X0201) { /* X0201 */
4590 if (c1==(0xde&0x7f)) { /*
\e$BByE@
\e(B */
4592 (*o_zconv)(dv[(z_prev1-SPACE)*2],dv[(z_prev1-SPACE)*2+1]);
4594 } else if (c1==(0xdf&0x7f)&&ev[(z_prev1-SPACE)*2]) { /*
\e$BH>ByE@
\e(B */
4596 (*o_zconv)(ev[(z_prev1-SPACE)*2],ev[(z_prev1-SPACE)*2+1]);
4600 (*o_zconv)(cv[(z_prev1-SPACE)*2],cv[(z_prev1-SPACE)*2+1]);
4609 if (x0201_f && c2==X0201) {
4610 if (dv[(c1-SPACE)*2]||ev[(c1-SPACE)*2]) {
4611 /* wait for
\e$BByE@
\e(B or
\e$BH>ByE@
\e(B */
4612 z_prev1 = c1; z_prev2 = c2;
4615 (*o_zconv)(cv[(c1-SPACE)*2],cv[(c1-SPACE)*2+1]);
4620 if (alpha_f&1 && c2 == 0x23 ) {
4621 /* JISX0208 Alphabet */
4623 } else if (c2 == 0x21) {
4624 /* JISX0208 Kigou */
4629 } else if (alpha_f&4) {
4634 } else if (alpha_f&1 && 0x20<c1 && c1<0x7f && fv[c1-0x20]) {
4640 if (alpha_f&8 && c2 == 0) {
4644 case '>': entity = ">"; break;
4645 case '<': entity = "<"; break;
4646 case '\"': entity = """; break;
4647 case '&': entity = "&"; break;
4650 while (*entity) (*o_zconv)(0, *entity++);
4656 /* JIS X 0208 Katakana to JIS X 0201 Katakana */
4661 /* U+3002 (0x8142) Ideographic Full Stop -> U+FF61 (0xA1) Halfwidth Ideographic Full Stop */
4665 /* U+300C (0x8175) Left Corner Bracket -> U+FF62 (0xA2) Halfwidth Left Corner Bracket */
4669 /* U+300D (0x8176) Right Corner Bracket -> U+FF63 (0xA3) Halfwidth Right Corner Bracket */
4673 /* U+3001 (0x8141) Ideographic Comma -> U+FF64 (0xA4) Halfwidth Ideographic Comma */
4677 /* U+30FB (0x8145) Katakana Middle Dot -> U+FF65 (0xA5) Halfwidth Katakana Middle Dot */
4681 /* U+30FC (0x815B) Katakana-Hiragana Prolonged Sound Mark -> U+FF70 (0xB0) Halfwidth Katakana-Hiragana Prolonged Sound Mark */
4685 /* U+309B (0x814A) Katakana-Hiragana Voiced Sound Mark -> U+FF9E (0xDE) Halfwidth Katakana Voiced Sound Mark */
4689 /* U+309C (0x814B) Katakana-Hiragana Semi-Voiced Sound Mark -> U+FF9F (0xDF) Halfwidth Katakana Semi-Voiced Sound Mark */
4694 (*o_zconv)(X0201, c);
4697 } else if (c2 == 0x25) {
4698 /* JISX0208 Katakana */
4699 static const int fullwidth_to_halfwidth[] =
4701 0x0000, 0x2700, 0x3100, 0x2800, 0x3200, 0x2900, 0x3300, 0x2A00,
4702 0x3400, 0x2B00, 0x3500, 0x3600, 0x365E, 0x3700, 0x375E, 0x3800,
4703 0x385E, 0x3900, 0x395E, 0x3A00, 0x3A5E, 0x3B00, 0x3B5E, 0x3C00,
4704 0x3C5E, 0x3D00, 0x3D5E, 0x3E00, 0x3E5E, 0x3F00, 0x3F5E, 0x4000,
4705 0x405E, 0x4100, 0x415E, 0x2F00, 0x4200, 0x425E, 0x4300, 0x435E,
4706 0x4400, 0x445E, 0x4500, 0x4600, 0x4700, 0x4800, 0x4900, 0x4A00,
4707 0x4A5E, 0x4A5F, 0x4B00, 0x4B5E, 0x4B5F, 0x4C00, 0x4C5E, 0x4C5F,
4708 0x4D00, 0x4D5E, 0x4D5F, 0x4E00, 0x4E5E, 0x4E5F, 0x4F00, 0x5000,
4709 0x5100, 0x5200, 0x5300, 0x2C00, 0x5400, 0x2D00, 0x5500, 0x2E00,
4710 0x5600, 0x5700, 0x5800, 0x5900, 0x5A00, 0x5B00, 0x0000, 0x5C00,
4711 0x0000, 0x0000, 0x2600, 0x5D00, 0x335E, 0x0000, 0x0000, 0x0000,
4712 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
4714 if (fullwidth_to_halfwidth[c1-0x20]){
4715 c2 = fullwidth_to_halfwidth[c1-0x20];
4716 (*o_zconv)(X0201, c2>>8);
4718 (*o_zconv)(X0201, c2&0xFF);
4728 #define rot13(c) ( \
4730 (c <= 'M') ? (c + 13): \
4731 (c <= 'Z') ? (c - 13): \
4733 (c <= 'm') ? (c + 13): \
4734 (c <= 'z') ? (c - 13): \
4738 #define rot47(c) ( \
4740 ( c <= 'O' ) ? (c + 47) : \
4741 ( c <= '~' ) ? (c - 47) : \
4745 void rot_conv(nkf_char c2, nkf_char c1)
4747 if (c2==0 || c2==X0201 || c2==ISO8859_1) {
4753 (*o_rot_conv)(c2,c1);
4756 void hira_conv(nkf_char c2, nkf_char c1)
4760 if (0x20 < c1 && c1 < 0x74) {
4762 (*o_hira_conv)(c2,c1);
4764 } else if (c1 == 0x74 && (output_conv == w_oconv || output_conv == w_oconv16)) {
4766 c1 = CLASS_UNICODE | 0x3094;
4767 (*o_hira_conv)(c2,c1);
4770 } else if (c2 == 0x21 && (c1 == 0x33 || c1 == 0x34)) {
4772 (*o_hira_conv)(c2,c1);
4777 if (c2 == 0 && c1 == (CLASS_UNICODE | 0x3094)) {
4780 } else if (c2 == 0x24 && 0x20 < c1 && c1 < 0x74) {
4782 } else if (c2 == 0x21 && (c1 == 0x35 || c1 == 0x36)) {
4786 (*o_hira_conv)(c2,c1);
4790 void iso2022jp_check_conv(nkf_char c2, nkf_char c1)
4792 static const nkf_char range[RANGE_NUM_MAX][2] = {
4813 nkf_char start, end, c;
4815 if(c2 >= 0x00 && c2 <= 0x20 && c1 >= 0x7f && c1 <= 0xff) {
4819 if((c2 >= 0x29 && c2 <= 0x2f) || (c2 >= 0x75 && c2 <= 0x7e)) {
4824 for (i = 0; i < RANGE_NUM_MAX; i++) {
4825 start = range[i][0];
4828 if (c >= start && c <= end) {
4833 (*o_iso2022jp_check_conv)(c2,c1);
4837 /* This converts =?ISO-2022-JP?B?HOGE HOGE?= */
4839 const unsigned char *mime_pattern[] = {
4840 (const unsigned char *)"\075?EUC-JP?B?",
4841 (const unsigned char *)"\075?SHIFT_JIS?B?",
4842 (const unsigned char *)"\075?ISO-8859-1?Q?",
4843 (const unsigned char *)"\075?ISO-8859-1?B?",
4844 (const unsigned char *)"\075?ISO-2022-JP?B?",
4845 (const unsigned char *)"\075?ISO-2022-JP?Q?",
4846 #if defined(UTF8_INPUT_ENABLE)
4847 (const unsigned char *)"\075?UTF-8?B?",
4848 (const unsigned char *)"\075?UTF-8?Q?",
4850 (const unsigned char *)"\075?US-ASCII?Q?",
4855 /*
\e$B3:Ev$9$k%3!<%I$NM%@hEY$r>e$2$k$?$a$NL\0u
\e(B */
4856 nkf_char (*mime_priority_func[])(nkf_char c2, nkf_char c1, nkf_char c0) = {
4857 e_iconv, s_iconv, 0, 0, 0, 0,
4858 #if defined(UTF8_INPUT_ENABLE)
4864 const nkf_char mime_encode[] = {
4865 JAPANESE_EUC, SHIFT_JIS,ISO8859_1, ISO8859_1, X0208, X0201,
4866 #if defined(UTF8_INPUT_ENABLE)
4873 const nkf_char mime_encode_method[] = {
4874 'B', 'B','Q', 'B', 'B', 'Q',
4875 #if defined(UTF8_INPUT_ENABLE)
4883 #define MAXRECOVER 20
4885 void switch_mime_getc(void)
4887 if (i_getc!=mime_getc) {
4888 i_mgetc = i_getc; i_getc = mime_getc;
4889 i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
4890 if(mime_f==STRICT_MIME) {
4891 i_mgetc_buf = i_mgetc; i_mgetc = mime_getc_buf;
4892 i_mungetc_buf = i_mungetc; i_mungetc = mime_ungetc_buf;
4897 void unswitch_mime_getc(void)
4899 if(mime_f==STRICT_MIME) {
4900 i_mgetc = i_mgetc_buf;
4901 i_mungetc = i_mungetc_buf;
4904 i_ungetc = i_mungetc;
4905 if(mime_iconv_back)set_iconv(FALSE, mime_iconv_back);
4906 mime_iconv_back = NULL;
4909 nkf_char mime_begin_strict(FILE *f)
4913 const unsigned char *p,*q;
4914 nkf_char r[MAXRECOVER]; /* recovery buffer, max mime pattern length */
4916 mime_decode_mode = FALSE;
4917 /* =? has been checked */
4919 p = mime_pattern[j];
4922 for(i=2;p[i]>' ';i++) { /* start at =? */
4923 if ( ((r[i] = c1 = (*i_getc)(f))==EOF) || nkf_toupper(c1) != p[i] ) {
4924 /* pattern fails, try next one */
4926 while (mime_pattern[++j]) {
4927 p = mime_pattern[j];
4928 for(k=2;k<i;k++) /* assume length(p) > i */
4929 if (p[k]!=q[k]) break;
4930 if (k==i && nkf_toupper(c1)==p[k]) break;
4932 p = mime_pattern[j];
4933 if (p) continue; /* found next one, continue */
4934 /* all fails, output from recovery buffer */
4942 mime_decode_mode = p[i-2];
4944 mime_iconv_back = iconv;
4945 set_iconv(FALSE, mime_priority_func[j]);
4946 clr_code_score(find_inputcode_byfunc(mime_priority_func[j]), SCORE_iMIME);
4948 if (mime_decode_mode=='B') {
4949 mimebuf_f = unbuf_f;
4951 /* do MIME integrity check */
4952 return mime_integrity(f,mime_pattern[j]);
4960 nkf_char mime_getc_buf(FILE *f)
4962 /* we don't keep eof of Fifo, becase it contains ?= as
4963 a terminator. It was checked in mime_integrity. */
4964 return ((mimebuf_f)?
4965 (*i_mgetc_buf)(f):Fifo(mime_input++));
4968 nkf_char mime_ungetc_buf(nkf_char c, FILE *f)
4971 (*i_mungetc_buf)(c,f);
4973 Fifo(--mime_input) = (unsigned char)c;
4977 nkf_char mime_begin(FILE *f)
4982 /* In NONSTRICT mode, only =? is checked. In case of failure, we */
4983 /* re-read and convert again from mime_buffer. */
4985 /* =? has been checked */
4987 Fifo(mime_last++)='='; Fifo(mime_last++)='?';
4988 for(i=2;i<MAXRECOVER;i++) { /* start at =? */
4989 /* We accept any character type even if it is breaked by new lines */
4990 c1 = (*i_getc)(f); Fifo(mime_last++) = (unsigned char)c1;
4991 if (c1=='\n'||c1==' '||c1=='\r'||
4992 c1=='-'||c1=='_'||is_alnum(c1) ) continue;
4994 /* Failed. But this could be another MIME preemble */
5002 c1 = (*i_getc)(f); Fifo(mime_last++) = (unsigned char)c1;
5003 if (!(++i<MAXRECOVER) || c1==EOF) break;
5004 if (c1=='b'||c1=='B') {
5005 mime_decode_mode = 'B';
5006 } else if (c1=='q'||c1=='Q') {
5007 mime_decode_mode = 'Q';
5011 c1 = (*i_getc)(f); Fifo(mime_last++) = (unsigned char)c1;
5012 if (!(++i<MAXRECOVER) || c1==EOF) break;
5014 mime_decode_mode = FALSE;
5020 if (!mime_decode_mode) {
5021 /* false MIME premble, restart from mime_buffer */
5022 mime_decode_mode = 1; /* no decode, but read from the mime_buffer */
5023 /* Since we are in MIME mode until buffer becomes empty, */
5024 /* we never go into mime_begin again for a while. */
5027 /* discard mime preemble, and goto MIME mode */
5029 /* do no MIME integrity check */
5030 return c1; /* used only for checking EOF */
5034 void no_putc(nkf_char c)
5039 void debug(const char *str)
5042 fprintf(stderr, "%s\n", str);
5047 void set_input_codename(char *codename)
5051 strcmp(codename, "") != 0 &&
5052 strcmp(codename, input_codename) != 0)
5054 is_inputcode_mixed = TRUE;
5056 input_codename = codename;
5057 is_inputcode_set = TRUE;
5060 #if !defined(PERL_XS) && !defined(WIN32DLL)
5061 void print_guessed_code(char *filename)
5063 char *codename = "BINARY";
5064 char *str_crmode = NULL;
5065 if (!is_inputcode_mixed) {
5066 if (strcmp(input_codename, "") == 0) {
5069 codename = input_codename;
5071 if (crmode_f == CR) str_crmode = "CR";
5072 else if (crmode_f == NL) str_crmode = "LF";
5073 else if (crmode_f == CRLF) str_crmode = "CRLF";
5075 if (filename != NULL) printf("%s:", filename);
5076 if (str_crmode != NULL) printf("%s (%s)\n", codename, str_crmode);
5077 else printf("%s\n", codename);
5083 nkf_char hex_getc(nkf_char ch, FILE *f, nkf_char (*g)(FILE *f), nkf_char (*u)(nkf_char c, FILE *f))
5085 nkf_char c1, c2, c3;
5091 if (!nkf_isxdigit(c2)){
5096 if (!nkf_isxdigit(c3)){
5101 return (hex2bin(c2) << 4) | hex2bin(c3);
5104 nkf_char cap_getc(FILE *f)
5106 return hex_getc(':', f, i_cgetc, i_cungetc);
5109 nkf_char cap_ungetc(nkf_char c, FILE *f)
5111 return (*i_cungetc)(c, f);
5114 nkf_char url_getc(FILE *f)
5116 return hex_getc('%', f, i_ugetc, i_uungetc);
5119 nkf_char url_ungetc(nkf_char c, FILE *f)
5121 return (*i_uungetc)(c, f);
5125 #ifdef NUMCHAR_OPTION
5126 nkf_char numchar_getc(FILE *f)
5128 nkf_char (*g)(FILE *) = i_ngetc;
5129 nkf_char (*u)(nkf_char c ,FILE *f) = i_nungetc;
5140 if (buf[i] == 'x' || buf[i] == 'X'){
5141 for (j = 0; j < 7; j++){
5143 if (!nkf_isxdigit(buf[i])){
5150 c |= hex2bin(buf[i]);
5153 for (j = 0; j < 8; j++){
5157 if (!nkf_isdigit(buf[i])){
5164 c += hex2bin(buf[i]);
5170 return CLASS_UNICODE | c;
5179 nkf_char numchar_ungetc(nkf_char c, FILE *f)
5181 return (*i_nungetc)(c, f);
5185 #ifdef UNICODE_NORMALIZATION
5187 /* Normalization Form C */
5188 nkf_char nfc_getc(FILE *f)
5190 nkf_char (*g)(FILE *f) = i_nfc_getc;
5191 nkf_char (*u)(nkf_char c ,FILE *f) = i_nfc_ungetc;
5192 int i=0, j, k=1, lower, upper;
5194 const nkf_nfchar *array;
5197 while (k > 0 && ((buf[i] & 0xc0) != 0x80)){
5198 lower=0, upper=NORMALIZATION_TABLE_LENGTH-1;
5199 while (upper >= lower) {
5200 j = (lower+upper) / 2;
5201 array = normalization_table[j].nfd;
5202 for (k=0; k < NORMALIZATION_TABLE_NFD_LENGTH && array[k]; k++){
5203 if (array[k] != buf[k]){
5204 array[k] < buf[k] ? (lower = j + 1) : (upper = j - 1);
5211 array = normalization_table[j].nfc;
5212 for (i=0; i < NORMALIZATION_TABLE_NFC_LENGTH && array[i]; i++)
5213 buf[i] = (nkf_char)(array[i]);
5224 nkf_char nfc_ungetc(nkf_char c, FILE *f)
5226 return (*i_nfc_ungetc)(c, f);
5228 #endif /* UNICODE_NORMALIZATION */
5234 nkf_char c1, c2, c3, c4, cc;
5235 nkf_char t1, t2, t3, t4, mode, exit_mode;
5236 nkf_char lwsp_count;
5239 nkf_char lwsp_size = 128;
5241 if (mime_top != mime_last) { /* Something is in FIFO */
5242 return Fifo(mime_top++);
5244 if (mime_decode_mode==1 ||mime_decode_mode==FALSE) {
5245 mime_decode_mode=FALSE;
5246 unswitch_mime_getc();
5247 return (*i_getc)(f);
5250 if (mimebuf_f == FIXED_MIME)
5251 exit_mode = mime_decode_mode;
5254 if (mime_decode_mode == 'Q') {
5255 if ((c1 = (*i_mgetc)(f)) == EOF) return (EOF);
5257 if (c1=='_' && mimebuf_f != FIXED_MIME) return ' ';
5258 if (c1<=' ' || DEL<=c1) {
5259 mime_decode_mode = exit_mode; /* prepare for quit */
5262 if (c1!='=' && (c1!='?' || mimebuf_f == FIXED_MIME)) {
5266 mime_decode_mode = exit_mode; /* prepare for quit */
5267 if ((c2 = (*i_mgetc)(f)) == EOF) return (EOF);
5268 if (c1=='?'&&c2=='=' && mimebuf_f != FIXED_MIME) {
5269 /* end Q encoding */
5270 input_mode = exit_mode;
5272 lwsp_buf = malloc((lwsp_size+5)*sizeof(char));
5273 if (lwsp_buf==NULL) {
5274 perror("can't malloc");
5277 while ((c1=(*i_getc)(f))!=EOF) {
5282 if ((c1=(*i_getc)(f))!=EOF && (c1==SPACE||c1==TAB)) {
5290 if ((c1=(*i_getc)(f))!=EOF && c1 == NL) {
5291 if ((c1=(*i_getc)(f))!=EOF && (c1==SPACE||c1==TAB)) {
5306 lwsp_buf[lwsp_count] = (unsigned char)c1;
5307 if (lwsp_count++>lwsp_size){
5309 lwsp_buf_new = realloc(lwsp_buf, (lwsp_size+5)*sizeof(char));
5310 if (lwsp_buf_new==NULL) {
5312 perror("can't realloc");
5315 lwsp_buf = lwsp_buf_new;
5321 if (lwsp_count > 0 && (c1 != '=' || (lwsp_buf[lwsp_count-1] != SPACE && lwsp_buf[lwsp_count-1] != TAB))) {
5323 for(lwsp_count--;lwsp_count>0;lwsp_count--)
5324 i_ungetc(lwsp_buf[lwsp_count],f);
5330 if (c1=='='&&c2<' ') { /* this is soft wrap */
5331 while((c1 = (*i_mgetc)(f)) <=' ') {
5332 if ((c1 = (*i_mgetc)(f)) == EOF) return (EOF);
5334 mime_decode_mode = 'Q'; /* still in MIME */
5335 goto restart_mime_q;
5338 mime_decode_mode = 'Q'; /* still in MIME */
5342 if ((c3 = (*i_mgetc)(f)) == EOF) return (EOF);
5343 if (c2<=' ') return c2;
5344 mime_decode_mode = 'Q'; /* still in MIME */
5345 return ((hex2bin(c2)<<4) + hex2bin(c3));
5348 if (mime_decode_mode != 'B') {
5349 mime_decode_mode = FALSE;
5350 return (*i_mgetc)(f);
5354 /* Base64 encoding */
5356 MIME allows line break in the middle of
5357 Base64, but we are very pessimistic in decoding
5358 in unbuf mode because MIME encoded code may broken by
5359 less or editor's control sequence (such as ESC-[-K in unbuffered
5360 mode. ignore incomplete MIME.
5362 mode = mime_decode_mode;
5363 mime_decode_mode = exit_mode; /* prepare for quit */
5365 while ((c1 = (*i_mgetc)(f))<=' ') {
5370 if ((c2 = (*i_mgetc)(f))<=' ') {
5373 if (mime_f != STRICT_MIME) goto mime_c2_retry;
5374 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
5377 if ((c1 == '?') && (c2 == '=')) {
5380 lwsp_buf = malloc((lwsp_size+5)*sizeof(char));
5381 if (lwsp_buf==NULL) {
5382 perror("can't malloc");
5385 while ((c1=(*i_getc)(f))!=EOF) {
5390 if ((c1=(*i_getc)(f))!=EOF && (c1==SPACE||c1==TAB)) {
5398 if ((c1=(*i_getc)(f))!=EOF) {
5402 } else if ((c1=(*i_getc)(f))!=EOF && (c1==SPACE||c1==TAB)) {
5417 lwsp_buf[lwsp_count] = (unsigned char)c1;
5418 if (lwsp_count++>lwsp_size){
5420 lwsp_buf_new = realloc(lwsp_buf, (lwsp_size+5)*sizeof(char));
5421 if (lwsp_buf_new==NULL) {
5423 perror("can't realloc");
5426 lwsp_buf = lwsp_buf_new;
5432 if (lwsp_count > 0 && (c1 != '=' || (lwsp_buf[lwsp_count-1] != SPACE && lwsp_buf[lwsp_count-1] != TAB))) {
5434 for(lwsp_count--;lwsp_count>0;lwsp_count--)
5435 i_ungetc(lwsp_buf[lwsp_count],f);
5442 if ((c3 = (*i_mgetc)(f))<=' ') {
5445 if (mime_f != STRICT_MIME) goto mime_c3_retry;
5446 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
5450 if ((c4 = (*i_mgetc)(f))<=' ') {
5453 if (mime_f != STRICT_MIME) goto mime_c4_retry;
5454 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
5458 mime_decode_mode = mode; /* still in MIME sigh... */
5460 /* BASE 64 decoding */
5462 t1 = 0x3f & base64decode(c1);
5463 t2 = 0x3f & base64decode(c2);
5464 t3 = 0x3f & base64decode(c3);
5465 t4 = 0x3f & base64decode(c4);
5466 cc = ((t1 << 2) & 0x0fc) | ((t2 >> 4) & 0x03);
5468 Fifo(mime_last++) = (unsigned char)cc;
5469 cc = ((t2 << 4) & 0x0f0) | ((t3 >> 2) & 0x0f);
5471 Fifo(mime_last++) = (unsigned char)cc;
5472 cc = ((t3 << 6) & 0x0c0) | (t4 & 0x3f);
5474 Fifo(mime_last++) = (unsigned char)cc;
5479 return Fifo(mime_top++);
5482 nkf_char mime_ungetc(nkf_char c, FILE *f)
5484 Fifo(--mime_top) = (unsigned char)c;
5488 nkf_char mime_integrity(FILE *f, const unsigned char *p)
5492 /* In buffered mode, read until =? or NL or buffer full
5494 mime_input = mime_top;
5495 mime_last = mime_top;
5497 while(*p) Fifo(mime_input++) = *p++;
5500 while((c=(*i_getc)(f))!=EOF) {
5501 if (((mime_input-mime_top)&MIME_BUF_MASK)==0) {
5502 break; /* buffer full */
5504 if (c=='=' && d=='?') {
5505 /* checked. skip header, start decode */
5506 Fifo(mime_input++) = (unsigned char)c;
5507 /* mime_last_input = mime_input; */
5512 if (!( (c=='+'||c=='/'|| c=='=' || c=='?' || is_alnum(c))))
5514 /* Should we check length mod 4? */
5515 Fifo(mime_input++) = (unsigned char)c;
5518 /* In case of Incomplete MIME, no MIME decode */
5519 Fifo(mime_input++) = (unsigned char)c;
5520 mime_last = mime_input; /* point undecoded buffer */
5521 mime_decode_mode = 1; /* no decode on Fifo last in mime_getc */
5522 switch_mime_getc(); /* anyway we need buffered getc */
5526 nkf_char base64decode(nkf_char c)
5531 i = c - 'A'; /* A..Z 0-25 */
5533 i = c - 'G' /* - 'a' + 26 */ ; /* a..z 26-51 */
5535 } else if (c > '/') {
5536 i = c - '0' + '4' /* - '0' + 52 */ ; /* 0..9 52-61 */
5537 } else if (c == '+') {
5538 i = '>' /* 62 */ ; /* + 62 */
5540 i = '?' /* 63 */ ; /* / 63 */
5545 static const char basis_64[] =
5546 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
5548 static nkf_char b64c;
5549 #define MIMEOUT_BUF_LENGTH (60)
5550 char mimeout_buf[MIMEOUT_BUF_LENGTH+1];
5551 int mimeout_buf_count = 0;
5552 int mimeout_preserve_space = 0;
5553 #define itoh4(c) (c>=10?c+'A'-10:c+'0')
5555 void open_mime(nkf_char mode)
5557 const unsigned char *p;
5560 p = mime_pattern[0];
5561 for(i=0;mime_pattern[i];i++) {
5562 if (mode == mime_encode[i]) {
5563 p = mime_pattern[i];
5567 mimeout_mode = mime_encode_method[i];
5570 if (base64_count>45) {
5571 if (mimeout_buf_count>0 && nkf_isblank(mimeout_buf[i])){
5572 (*o_mputc)(mimeout_buf[i]);
5578 if (!mimeout_preserve_space && mimeout_buf_count>0
5579 && (mimeout_buf[i]==SPACE || mimeout_buf[i]==TAB
5580 || mimeout_buf[i]==CR || mimeout_buf[i]==NL )) {
5584 if (!mimeout_preserve_space) {
5585 for (;i<mimeout_buf_count;i++) {
5586 if (mimeout_buf[i]==SPACE || mimeout_buf[i]==TAB
5587 || mimeout_buf[i]==CR || mimeout_buf[i]==NL ) {
5588 (*o_mputc)(mimeout_buf[i]);
5595 mimeout_preserve_space = FALSE;
5601 j = mimeout_buf_count;
5602 mimeout_buf_count = 0;
5604 mime_putc(mimeout_buf[i]);
5608 void close_mime(void)
5618 switch(mimeout_mode) {
5623 (*o_mputc)(basis_64[((b64c & 0x3)<< 4)]);
5629 (*o_mputc)(basis_64[((b64c & 0xF) << 2)]);
5635 if (mimeout_f!=FIXED_MIME) {
5637 } else if (mimeout_mode != 'Q')
5642 void mimeout_addchar(nkf_char c)
5644 switch(mimeout_mode) {
5649 } else if(!nkf_isalnum(c)) {
5651 (*o_mputc)(itoh4(((c>>4)&0xf)));
5652 (*o_mputc)(itoh4((c&0xf)));
5661 (*o_mputc)(basis_64[c>>2]);
5666 (*o_mputc)(basis_64[((b64c & 0x3)<< 4) | ((c & 0xF0) >> 4)]);
5672 (*o_mputc)(basis_64[((b64c & 0xF) << 2) | ((c & 0xC0) >>6)]);
5673 (*o_mputc)(basis_64[c & 0x3F]);
5684 /*nkf_char mime_lastchar2, mime_lastchar1;*/
5686 void mime_prechar(nkf_char c2, nkf_char c1)
5690 if (base64_count + mimeout_buf_count/3*4> 73){
5691 (*o_base64conv)(EOF,0);
5692 (*o_base64conv)(0,NL);
5693 (*o_base64conv)(0,SPACE);
5696 if (base64_count + mimeout_buf_count/3*4> 66){
5697 (*o_base64conv)(EOF,0);
5698 (*o_base64conv)(0,NL);
5699 (*o_base64conv)(0,SPACE);
5701 }/*else if (mime_lastchar2){
5702 if (c1 <=DEL && !nkf_isspace(c1)){
5703 (*o_base64conv)(0,SPACE);
5707 if (c2 && mime_lastchar2 == 0
5708 && mime_lastchar1 && !nkf_isspace(mime_lastchar1)){
5709 (*o_base64conv)(0,SPACE);
5712 /*mime_lastchar2 = c2;
5713 mime_lastchar1 = c1;*/
5716 void mime_putc(nkf_char c)
5721 if (mimeout_f == FIXED_MIME){
5722 if (mimeout_mode == 'Q'){
5723 if (base64_count > 71){
5724 if (c!=CR && c!=NL) {
5731 if (base64_count > 71){
5736 if (c == EOF) { /* c==EOF */
5740 if (c != EOF) { /* c==EOF */
5746 /* mimeout_f != FIXED_MIME */
5748 if (c == EOF) { /* c==EOF */
5749 j = mimeout_buf_count;
5750 mimeout_buf_count = 0;
5753 if (!nkf_isblank(mimeout_buf[j-1])) {
5755 if (nkf_isspace(mimeout_buf[i]) && base64_count < 71){
5758 mimeout_addchar(mimeout_buf[i]);
5762 mimeout_addchar(mimeout_buf[i]);
5766 mimeout_addchar(mimeout_buf[i]);
5772 mimeout_addchar(mimeout_buf[i]);
5778 if (mimeout_mode=='Q') {
5779 if (c <= DEL && (output_mode==ASCII ||output_mode == ISO8859_1 ) ) {
5780 if (c == CR || c == NL) {
5785 } else if (c <= SPACE) {
5787 if (base64_count > 70) {
5791 if (!nkf_isblank(c)) {
5802 if (mimeout_buf_count > 0){
5803 lastchar = mimeout_buf[mimeout_buf_count - 1];
5808 if (!mimeout_mode) {
5809 if (c <= DEL && (output_mode==ASCII ||output_mode == ISO8859_1)) {
5810 if (nkf_isspace(c)) {
5811 if (c==CR || c==NL) {
5814 for (i=0;i<mimeout_buf_count;i++) {
5815 (*o_mputc)(mimeout_buf[i]);
5816 if (mimeout_buf[i] == CR || mimeout_buf[i] == NL){
5822 mimeout_buf[0] = (char)c;
5823 mimeout_buf_count = 1;
5825 if (base64_count > 1
5826 && base64_count + mimeout_buf_count > 76
5827 && mimeout_buf[0] != CR && mimeout_buf[0] != NL){
5830 if (!nkf_isspace(mimeout_buf[0])){
5835 mimeout_buf[mimeout_buf_count++] = (char)c;
5836 if (mimeout_buf_count>MIMEOUT_BUF_LENGTH) {
5837 open_mime(output_mode);
5842 if (lastchar==CR || lastchar == NL){
5843 for (i=0;i<mimeout_buf_count;i++) {
5844 (*o_mputc)(mimeout_buf[i]);
5847 mimeout_buf_count = 0;
5849 if (lastchar==SPACE) {
5850 for (i=0;i<mimeout_buf_count-1;i++) {
5851 (*o_mputc)(mimeout_buf[i]);
5854 mimeout_buf[0] = SPACE;
5855 mimeout_buf_count = 1;
5857 open_mime(output_mode);
5860 /* mimeout_mode == 'B', 1, 2 */
5861 if ( c<=DEL && (output_mode==ASCII ||output_mode == ISO8859_1 ) ) {
5862 if (lastchar == CR || lastchar == NL){
5863 if (nkf_isblank(c)) {
5864 for (i=0;i<mimeout_buf_count;i++) {
5865 mimeout_addchar(mimeout_buf[i]);
5867 mimeout_buf_count = 0;
5868 } else if (SPACE<c && c<DEL) {
5870 for (i=0;i<mimeout_buf_count;i++) {
5871 (*o_mputc)(mimeout_buf[i]);
5874 mimeout_buf_count = 0;
5877 if (c==SPACE || c==TAB || c==CR || c==NL) {
5878 for (i=0;i<mimeout_buf_count;i++) {
5879 if (SPACE<mimeout_buf[i] && mimeout_buf[i]<DEL) {
5881 for (i=0;i<mimeout_buf_count;i++) {
5882 (*o_mputc)(mimeout_buf[i]);
5885 mimeout_buf_count = 0;
5888 mimeout_buf[mimeout_buf_count++] = (char)c;
5889 if (mimeout_buf_count>MIMEOUT_BUF_LENGTH) {
5891 for (i=0;i<mimeout_buf_count;i++) {
5892 (*o_mputc)(mimeout_buf[i]);
5895 mimeout_buf_count = 0;
5899 if (mimeout_buf_count>0 && SPACE<c && c!='=') {
5900 mimeout_buf[mimeout_buf_count++] = (char)c;
5901 if (mimeout_buf_count>MIMEOUT_BUF_LENGTH) {
5902 j = mimeout_buf_count;
5903 mimeout_buf_count = 0;
5905 mimeout_addchar(mimeout_buf[i]);
5912 if (mimeout_buf_count>0) {
5913 j = mimeout_buf_count;
5914 mimeout_buf_count = 0;
5916 if (mimeout_buf[i]==CR || mimeout_buf[i]==NL)
5918 mimeout_addchar(mimeout_buf[i]);
5924 (*o_mputc)(mimeout_buf[i]);
5926 open_mime(output_mode);
5933 #if defined(PERL_XS) || defined(WIN32DLL)
5937 struct input_code *p = input_code_list;
5950 mime_f = STRICT_MIME;
5951 mime_decode_f = FALSE;
5956 #if defined(MSDOS) || defined(__OS2__)
5961 iso2022jp_f = FALSE;
5962 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
5963 ms_ucs_map_f = UCS_MAP_ASCII;
5965 #ifdef UTF8_INPUT_ENABLE
5966 no_cp932ext_f = FALSE;
5967 no_best_fit_chars_f = FALSE;
5968 encode_fallback = NULL;
5969 unicode_subchar = '?';
5970 input_endian = ENDIAN_BIG;
5972 #ifdef UTF8_OUTPUT_ENABLE
5973 output_bom_f = FALSE;
5974 output_endian = ENDIAN_BIG;
5976 #ifdef UNICODE_NORMALIZATION
5989 is_inputcode_mixed = FALSE;
5990 is_inputcode_set = FALSE;
5994 #ifdef SHIFTJIS_CP932
6004 for (i = 0; i < 256; i++){
6005 prefix_table[i] = 0;
6009 mimeout_buf_count = 0;
6014 fold_preserve_f = FALSE;
6017 kanji_intro = DEFAULT_J;
6018 ascii_intro = DEFAULT_R;
6019 fold_margin = FOLD_MARGIN;
6020 output_conv = DEFAULT_CONV;
6021 oconv = DEFAULT_CONV;
6022 o_zconv = no_connection;
6023 o_fconv = no_connection;
6024 o_crconv = no_connection;
6025 o_rot_conv = no_connection;
6026 o_hira_conv = no_connection;
6027 o_base64conv = no_connection;
6028 o_iso2022jp_check_conv = no_connection;
6031 i_ungetc = std_ungetc;
6033 i_bungetc = std_ungetc;
6036 i_mungetc = std_ungetc;
6037 i_mgetc_buf = std_getc;
6038 i_mungetc_buf = std_ungetc;
6039 output_mode = ASCII;
6042 mime_decode_mode = FALSE;
6048 z_prev2=0,z_prev1=0;
6050 iconv_for_check = 0;
6052 input_codename = "";
6059 void no_connection(nkf_char c2, nkf_char c1)
6061 no_connection2(c2,c1,0);
6064 nkf_char no_connection2(nkf_char c2, nkf_char c1, nkf_char c0)
6066 fprintf(stderr,"nkf internal module connection failure.\n");
6068 return 0; /* LINT */
6073 #define fprintf dllprintf
6077 fprintf(stderr,"USAGE: nkf(nkf32,wnkf,nkf2) -[flags] [in file] .. [out file for -O flag]\n");
6078 fprintf(stderr,"Flags:\n");
6079 fprintf(stderr,"b,u Output is buffered (DEFAULT),Output is unbuffered\n");
6080 #ifdef DEFAULT_CODE_SJIS
6081 fprintf(stderr,"j,s,e,w Output code is JIS 7 bit, Shift_JIS (DEFAULT), EUC-JP, UTF-8N\n");
6083 #ifdef DEFAULT_CODE_JIS
6084 fprintf(stderr,"j,s,e,w Output code is JIS 7 bit (DEFAULT), Shift JIS, EUC-JP, UTF-8N\n");
6086 #ifdef DEFAULT_CODE_EUC
6087 fprintf(stderr,"j,s,e,w Output code is JIS 7 bit, Shift JIS, EUC-JP (DEFAULT), UTF-8N\n");
6089 #ifdef DEFAULT_CODE_UTF8
6090 fprintf(stderr,"j,s,e,w Output code is JIS 7 bit, Shift JIS, EUC-JP, UTF-8N (DEFAULT)\n");
6092 #ifdef UTF8_OUTPUT_ENABLE
6093 fprintf(stderr," After 'w' you can add more options. -w[ 8 [0], 16 [[BL] [0]] ]\n");
6095 fprintf(stderr,"J,S,E,W Input assumption is JIS 7 bit , Shift JIS, EUC-JP, UTF-8\n");
6096 #ifdef UTF8_INPUT_ENABLE
6097 fprintf(stderr," After 'W' you can add more options. -W[ 8, 16 [BL] ] \n");
6099 fprintf(stderr,"t no conversion\n");
6100 fprintf(stderr,"i[@B] Specify the Esc Seq for JIS X 0208-1978/83 (DEFAULT B)\n");
6101 fprintf(stderr,"o[BJH] Specify the Esc Seq for ASCII/Roman (DEFAULT B)\n");
6102 fprintf(stderr,"r {de/en}crypt ROT13/47\n");
6103 fprintf(stderr,"h 1 katakana->hiragana, 2 hiragana->katakana, 3 both\n");
6104 fprintf(stderr,"v Show this usage. V: show version\n");
6105 fprintf(stderr,"m[BQN0] MIME decode [B:base64,Q:quoted,N:non-strict,0:no decode]\n");
6106 fprintf(stderr,"M[BQ] MIME encode [B:base64 Q:quoted]\n");
6107 fprintf(stderr,"l ISO8859-1 (Latin-1) support\n");
6108 fprintf(stderr,"f/F Folding: -f60 or -f or -f60-10 (fold margin 10) F preserve nl\n");
6109 fprintf(stderr,"Z[0-4] Default/0: Convert JISX0208 Alphabet to ASCII\n");
6110 fprintf(stderr," 1: Kankaku to one space 2: to two spaces 3: HTML Entity\n");
6111 fprintf(stderr," 4: JISX0208 Katakana to JISX0201 Katakana\n");
6112 fprintf(stderr,"X,x Assume X0201 kana in MS-Kanji, -x preserves X0201\n");
6113 fprintf(stderr,"B[0-2] Broken input 0: missing ESC,1: any X on ESC-[($]-X,2: ASCII on NL\n");
6115 fprintf(stderr,"T Text mode output\n");
6117 fprintf(stderr,"O Output to File (DEFAULT 'nkf.out')\n");
6118 fprintf(stderr,"I Convert non ISO-2022-JP charactor to GETA\n");
6119 fprintf(stderr,"d,c Convert line breaks -d: LF -c: CRLF\n");
6120 fprintf(stderr,"-L[uwm] line mode u:LF w:CRLF m:CR (DEFAULT noconversion)\n");
6121 fprintf(stderr,"\n");
6122 fprintf(stderr,"Long name options\n");
6123 fprintf(stderr," --ic=<input codeset> --oc=<output codeset>\n");
6124 fprintf(stderr," Specify the input or output codeset\n");
6125 fprintf(stderr," --fj --unix --mac --windows\n");
6126 fprintf(stderr," --jis --euc --sjis --utf8 --utf16 --mime --base64\n");
6127 fprintf(stderr," Convert for the system or code\n");
6128 fprintf(stderr," --hiragana --katakana --katakana-hiragana\n");
6129 fprintf(stderr," To Hiragana/Katakana Conversion\n");
6130 fprintf(stderr," --prefix= Insert escape before troublesome characters of Shift_JIS\n");
6132 fprintf(stderr," --cap-input, --url-input Convert hex after ':' or '%%'\n");
6134 #ifdef NUMCHAR_OPTION
6135 fprintf(stderr," --numchar-input Convert Unicode Character Reference\n");
6137 #ifdef UTF8_INPUT_ENABLE
6138 fprintf(stderr," --fb-{skip, html, xml, perl, java, subchar}\n");
6139 fprintf(stderr," Specify how nkf handles unassigned characters\n");
6142 fprintf(stderr," --in-place[=SUFFIX] --overwrite[=SUFFIX]\n");
6143 fprintf(stderr," Overwrite original listed files by filtered result\n");
6144 fprintf(stderr," --overwrite preserves timestamp of original files\n");
6146 fprintf(stderr," -g --guess Guess the input code\n");
6147 fprintf(stderr," --help --version Show this help/the version\n");
6148 fprintf(stderr," For more information, see also man nkf\n");
6149 fprintf(stderr,"\n");
6155 fprintf(stderr,"Network Kanji Filter Version %s (%s) "
6156 #if defined(MSDOS) && !defined(__WIN32__) && !defined(__WIN16__) && !defined(__OS2__)
6159 #if defined(MSDOS) && defined(__WIN16__)
6162 #if defined(MSDOS) && defined(__WIN32__)
6168 ,NKF_VERSION,NKF_RELEASE_DATE);
6169 fprintf(stderr,"\n%s\n",CopyRight);
6174 **
\e$B%Q%C%A@):n<T
\e(B
6175 ** void@merope.pleiades.or.jp (Kusakabe Youichi)
6176 ** NIDE Naoyuki <nide@ics.nara-wu.ac.jp>
6177 ** ohta@src.ricoh.co.jp (Junn Ohta)
6178 ** inouet@strl.nhk.or.jp (Tomoyuki Inoue)
6179 ** kiri@pulser.win.or.jp (Tetsuaki Kiriyama)
6180 ** Kimihiko Sato <sato@sail.t.u-tokyo.ac.jp>
6181 ** a_kuroe@kuroe.aoba.yokohama.jp (Akihiko Kuroe)
6182 ** kono@ie.u-ryukyu.ac.jp (Shinji Kono)
6183 ** GHG00637@nifty-serve.or.jp (COW)