1 /** Network Kanji Filter. (PDS Version)
2 ************************************************************************
3 ** Copyright (C) 1987, Fujitsu LTD. (Itaru ICHIKAWA)
4 **
\e$BO"Mm@h!'
\e(B
\e$B!J3t!KIY;NDL8&5f=j!!%=%U%H#38&!!;T@n!!;j
\e(B
5 **
\e$B!J
\e(BE-Mail Address: ichikawa@flab.fujitsu.co.jp
\e$B!K
\e(B
6 ** Copyright (C) 1996,1998
8 **
\e$BO"Mm@h!'
\e(B
\e$BN05eBg3X>pJs9)3X2J
\e(B
\e$B2OLn
\e(B
\e$B??<#
\e(B mime/X0208 support
9 **
\e$B!J
\e(BE-Mail Address: kono@ie.u-ryukyu.ac.jp
\e$B!K
\e(B
10 **
\e$BO"Mm@h!'
\e(B COW for DOS & Win16 & Win32 & OS/2
11 **
\e$B!J
\e(BE-Mail Address: GHG00637@niftyserve.or.p
\e$B!K
\e(B
13 **
\e$B$3$N%=!<%9$N$$$+$J$kJ#<L!$2~JQ!$=$@5$b5vBz$7$^$9!#$?$@$7!"
\e(B
14 **
\e$B$=$N:]$K$O!"C/$,9W8%$7$?$r<($9$3$NItJ,$r;D$9$3$H!#
\e(B
15 **
\e$B:FG[I[$d;(;o$NIUO?$J$I$NLd$$9g$o$;$bI,MW$"$j$^$;$s!#
\e(B
16 **
\e$B1DMxMxMQ$b>e5-$KH?$7$J$$HO0O$G5v2D$7$^$9!#
\e(B
17 **
\e$B%P%$%J%j$NG[I[$N:]$K$O
\e(Bversion message
\e$B$rJ]B8$9$k$3$H$r>r7o$H$7$^$9!#
\e(B
18 **
\e$B$3$N%W%m%0%i%`$K$D$$$F$OFC$K2?$NJ]>Z$b$7$J$$!"0-$7$+$i$:!#
\e(B
20 ** Everyone is permitted to do anything on this program
21 ** including copying, modifying, improving,
22 ** as long as you don't try to pretend that you wrote it.
23 ** i.e., the above copyright notice has to appear in all copies.
24 ** Binary distribution requires original version messages.
25 ** You don't have to ask before copying, redistribution or publishing.
26 ** THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE.
27 ***********************************************************************/
29 /***********************************************************************
30 ** UTF-8
\e$B%5%]!<%H$K$D$$$F
\e(B
31 **
\e$B=>Mh$N
\e(B nkf
\e$B$HF~$l$+$($F$=$N$^$^;H$($k$h$&$K$J$C$F$$$^$9
\e(B
32 ** nkf -e
\e$B$J$I$H$7$F5/F0$9$k$H!"<+F0H=JL$G
\e(B UTF-8
\e$B$HH=Dj$5$l$l$P!"
\e(B
33 **
\e$B$=$N$^$^
\e(B euc-jp
\e$B$KJQ49$5$l$^$9
\e(B
35 **
\e$B$^$@%P%0$,$"$k2DG=@-$,9b$$$G$9!#
\e(B
36 ** (
\e$BFC$K<+F0H=JL!"%3!<%I:.:_!"%(%i!<=hM}7O
\e(B)
38 **
\e$B2?$+LdBj$r8+$D$1$?$i!"
\e(B
39 ** E-Mail: furukawa@tcp-ip.or.jp
40 **
\e$B$^$G8fO"Mm$r$*4j$$$7$^$9!#
\e(B
41 ***********************************************************************/
42 /* $Id: nkf.c,v 1.63 2005/03/03 22:03:17 naruse Exp $ */
43 #define NKF_VERSION "2.0.4"
44 #define NKF_RELEASE_DATE "2005-03-04"
47 static char *CopyRight =
48 "Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa),2000 S. Kono, COW, 2002-2004 Kono, Furukawa";
55 ** USAGE: nkf [flags] [file]
58 ** b Output is buffered (DEFAULT)
59 ** u Output is unbuffered
63 ** j Outout code is JIS 7 bit (DEFAULT SELECT)
64 ** s Output code is MS Kanji (DEFAULT SELECT)
65 ** e Output code is AT&T JIS (DEFAULT SELECT)
66 ** w Output code is AT&T JIS (DEFAULT SELECT)
67 ** l Output code is JIS 7bit and ISO8859-1 Latin-1
69 ** m MIME conversion for ISO-2022-JP
70 ** I Convert non ISO-2022-JP charactor to GETA by Pekoe <pekoe@lair.net>
71 ** i_ Output sequence to designate JIS-kanji (DEFAULT_J)
72 ** o_ Output sequence to designate single-byte roman characters (DEFAULT_R)
73 ** M MIME output conversion
75 ** r {de/en}crypt ROT13/47
79 ** T Text mode output (for MS-DOS)
81 ** x Do not convert X0201 kana into X0208
82 ** Z Convert X0208 alphabet to ASCII
87 ** B try to fix broken JIS, missing Escape
88 ** B[1-9] broken level
90 ** O Output to 'nkf.out' file or last file name
91 ** d Delete \r in line feed
92 ** c Add \r in line feed
93 ** -- other long option
94 ** -- ignore following option (don't use with -O )
98 #if (defined(__TURBOC__) || defined(_MSC_VER) || defined(LSI_C) || defined(__MINGW32__)) && !defined(MSDOS)
100 #if (defined(__Win32__) || defined(_WIN32)) && !defined(__WIN32__)
115 #if defined(MSDOS) || defined(__OS2__)
122 #define setbinmode(fp) fsetbin(fp)
123 #else /* Microsoft C, Turbo C */
124 #define setbinmode(fp) setmode(fileno(fp), O_BINARY)
126 #else /* UNIX,OS/2 */
127 #define setbinmode(fp)
130 #ifdef _IOFBF /* SysV and MSDOS, Windows */
131 #define setvbuffer(fp, buf, size) setvbuf(fp, buf, _IOFBF, size)
133 #define setvbuffer(fp, buf, size) setbuffer(fp, buf, size)
136 /*Borland C++ 4.5 EasyWin*/
137 #if defined(__TURBOC__) && defined(_Windows) && !defined(__WIN32__) /*Easy Win */
146 /* added by satoru@isoternet.org */
148 #include <sys/stat.h>
149 #ifndef MSDOS /* UNIX, OS/2 */
152 #else /* defined(MSDOS) */
154 #ifdef __BORLANDC__ /* BCC32 */
156 #else /* !defined(__BORLANDC__) */
157 #include <sys/utime.h>
158 #endif /* (__BORLANDC__) */
159 #else /* !defined(__WIN32__) */
160 #if defined(_MSC_VER) || defined(__MINGW32__) /* VC++, MinGW */
161 #include <sys/utime.h>
162 #elif defined(__TURBOC__) /* BCC */
164 #elif defined(LSI_C) /* LSI C */
165 #endif /* (__WIN32__) */
177 /* state of output_mode and input_mode
195 /* Input Assumption */
199 #define LATIN1_INPUT 6
201 #define STRICT_MIME 8
206 #define JAPANESE_EUC 10
210 #define UTF8_INPUT 13
211 #define UTF16LE_INPUT 14
212 #define UTF16BE_INPUT 15
232 #define is_alnum(c) \
233 (('a'<=c && c<='z')||('A'<= c && c<='Z')||('0'<=c && c<='9'))
235 #define HOLD_SIZE 1024
236 #define IOBUF_SIZE 16384
238 #define DEFAULT_J 'B'
239 #define DEFAULT_R 'B'
241 #define SJ0162 0x00e1 /* 01 - 62 ku offset */
242 #define SJ6394 0x0161 /* 63 - 94 ku offset */
244 #define RANGE_NUM_MAX 18
249 #if defined( UTF8_OUTPUT_ENABLE ) || defined( UTF8_INPUT_ENABLE )
250 #define sizeof_euc_utf8 94
251 #define sizeof_euc_to_utf8_1byte 94
252 #define sizeof_euc_to_utf8_2bytes 94
253 #define sizeof_utf8_to_euc_C2 64
254 #define sizeof_utf8_to_euc_E5B8 64
255 #define sizeof_utf8_to_euc_2bytes 112
256 #define sizeof_utf8_to_euc_3bytes 112
259 /* MIME preprocessor */
262 #ifdef EASYWIN /*Easy Win */
263 extern POINT _BufferSize;
266 /* function prototype */
268 #ifdef ANSI_C_PROTOTYPE
270 #define STATIC static
282 void (*status_func)PROTO((struct input_code *, int));
283 int (*iconv_func)PROTO((int c2, int c1, int c0));
287 STATIC char *input_codename = "";
289 STATIC int noconvert PROTO((FILE *f));
290 STATIC int kanji_convert PROTO((FILE *f));
291 STATIC int h_conv PROTO((FILE *f,int c2,int c1));
292 STATIC int push_hold_buf PROTO((int c2));
293 STATIC void set_iconv PROTO((int f, int (*iconv_func)(int c2,int c1,int c0)));
294 STATIC int s_iconv PROTO((int c2,int c1,int c0));
295 STATIC int s2e_conv PROTO((int c2, int c1, int *p2, int *p1));
296 STATIC int e_iconv PROTO((int c2,int c1,int c0));
297 #ifdef UTF8_INPUT_ENABLE
298 STATIC int w2e_conv PROTO((int c2,int c1,int c0,int *p2,int *p1));
299 STATIC int w_iconv PROTO((int c2,int c1,int c0));
300 STATIC int w_iconv16 PROTO((int c2,int c1,int c0));
301 STATIC int w_iconv_common PROTO((int c1,int c0,unsigned short **pp,int psize,int *p2,int *p1));
302 STATIC int ww16_conv PROTO((int c2, int c1, int c0));
304 #ifdef UTF8_OUTPUT_ENABLE
305 STATIC int e2w_conv PROTO((int c2,int c1));
306 STATIC void w_oconv PROTO((int c2,int c1));
307 STATIC void w_oconv16 PROTO((int c2,int c1));
309 STATIC void e_oconv PROTO((int c2,int c1));
310 STATIC int e2s_conv PROTO((int c2, int c1, int *p2, int *p1));
311 STATIC void s_oconv PROTO((int c2,int c1));
312 STATIC void j_oconv PROTO((int c2,int c1));
313 STATIC void fold_conv PROTO((int c2,int c1));
314 STATIC void cr_conv PROTO((int c2,int c1));
315 STATIC void z_conv PROTO((int c2,int c1));
316 STATIC void rot_conv PROTO((int c2,int c1));
317 STATIC void hira_conv PROTO((int c2,int c1));
318 STATIC void base64_conv PROTO((int c2,int c1));
319 STATIC void iso2022jp_check_conv PROTO((int c2,int c1));
320 STATIC void no_connection PROTO((int c2,int c1));
321 STATIC int no_connection2 PROTO((int c2,int c1,int c0));
323 STATIC void code_score PROTO((struct input_code *ptr));
324 STATIC void code_status PROTO((int c));
326 STATIC void std_putc PROTO((int c));
327 STATIC int std_getc PROTO((FILE *f));
328 STATIC int std_ungetc PROTO((int c,FILE *f));
330 STATIC int broken_getc PROTO((FILE *f));
331 STATIC int broken_ungetc PROTO((int c,FILE *f));
333 STATIC int mime_begin PROTO((FILE *f));
334 STATIC int mime_getc PROTO((FILE *f));
335 STATIC int mime_ungetc PROTO((int c,FILE *f));
337 STATIC int mime_begin_strict PROTO((FILE *f));
338 STATIC int mime_getc_buf PROTO((FILE *f));
339 STATIC int mime_ungetc_buf PROTO((int c,FILE *f));
340 STATIC int mime_integrity PROTO((FILE *f,unsigned char *p));
342 STATIC int base64decode PROTO((int c));
343 STATIC void mime_prechar PROTO((int c2, int c1));
344 STATIC void mime_putc PROTO((int c));
345 STATIC void open_mime PROTO((int c));
346 STATIC void close_mime PROTO(());
347 STATIC void usage PROTO(());
348 STATIC void version PROTO(());
349 STATIC void options PROTO((unsigned char *c));
350 #if defined(PERL_XS) || defined(WIN32DLL)
351 STATIC void reinit PROTO(());
356 static unsigned char stdibuf[IOBUF_SIZE];
357 static unsigned char stdobuf[IOBUF_SIZE];
358 static unsigned char hold_buf[HOLD_SIZE*2];
359 static int hold_count;
361 /* MIME preprocessor fifo */
363 #define MIME_BUF_SIZE (1024) /* 2^n ring buffer */
364 #define MIME_BUF_MASK (MIME_BUF_SIZE-1)
365 #define Fifo(n) mime_buf[(n)&MIME_BUF_MASK]
366 static unsigned char mime_buf[MIME_BUF_SIZE];
367 static unsigned int mime_top = 0;
368 static unsigned int mime_last = 0; /* decoded */
369 static unsigned int mime_input = 0; /* undecoded */
372 static int unbuf_f = FALSE;
373 static int estab_f = FALSE;
374 static int nop_f = FALSE;
375 static int binmode_f = TRUE; /* binary mode */
376 static int rot_f = FALSE; /* rot14/43 mode */
377 static int hira_f = FALSE; /* hira/kata henkan */
378 static int input_f = FALSE; /* non fixed input code */
379 static int alpha_f = FALSE; /* convert JIx0208 alphbet to ASCII */
380 static int mime_f = STRICT_MIME; /* convert MIME B base64 or Q */
381 static int mime_decode_f = FALSE; /* mime decode is explicitly on */
382 static int mimebuf_f = FALSE; /* MIME buffered input */
383 static int broken_f = FALSE; /* convert ESC-less broken JIS */
384 static int iso8859_f = FALSE; /* ISO8859 through */
385 static int mimeout_f = FALSE; /* base64 mode */
386 #if defined(MSDOS) || defined(__OS2__)
387 static int x0201_f = TRUE; /* Assume JISX0201 kana */
389 static int x0201_f = NO_X0201; /* Assume NO JISX0201 */
391 static int iso2022jp_f = FALSE; /* convert ISO-2022-JP */
392 #ifdef UTF8_OUTPUT_ENABLE
393 static int unicode_bom_f= 0; /* Output Unicode BOM */
394 static int w_oconv16_LE = 0; /* utf-16 little endian */
395 static int ms_ucs_map_f = FALSE; /* Microsoft UCS Mapping Compatible */
399 #ifdef NUMCHAR_OPTION
401 #define CLASS_MASK 0x0f000000
402 #define CLASS_UTF16 0x01000000
406 static int cap_f = FALSE;
407 static int (*i_cgetc)PROTO((FILE *)) = std_getc; /* input of cgetc */
408 static int (*i_cungetc)PROTO((int c ,FILE *f)) = std_ungetc;
409 STATIC int cap_getc PROTO((FILE *f));
410 STATIC int cap_ungetc PROTO((int c,FILE *f));
412 static int url_f = FALSE;
413 static int (*i_ugetc)PROTO((FILE *)) = std_getc; /* input of ugetc */
414 static int (*i_uungetc)PROTO((int c ,FILE *f)) = std_ungetc;
415 STATIC int url_getc PROTO((FILE *f));
416 STATIC int url_ungetc PROTO((int c,FILE *f));
418 static int numchar_f = FALSE;
419 static int (*i_ngetc)PROTO((FILE *)) = std_getc; /* input of ugetc */
420 static int (*i_nungetc)PROTO((int c ,FILE *f)) = std_ungetc;
421 STATIC int numchar_getc PROTO((FILE *f));
422 STATIC int numchar_ungetc PROTO((int c,FILE *f));
426 static int noout_f = FALSE;
427 STATIC void no_putc PROTO((int c));
428 static int debug_f = FALSE;
429 STATIC void debug PROTO((char *str));
432 static int guess_f = FALSE;
433 STATIC void print_guessed_code PROTO((char *filename));
434 STATIC void set_input_codename PROTO((char *codename));
435 static int is_inputcode_mixed = FALSE;
436 static int is_inputcode_set = FALSE;
439 static int exec_f = 0;
442 #ifdef SHIFTJIS_CP932
443 STATIC int cp932_f = TRUE;
444 #define CP932_TABLE_BEGIN (0xfa)
445 #define CP932_TABLE_END (0xfc)
447 STATIC int cp932inv_f = TRUE;
448 #define CP932INV_TABLE_BEGIN (0xed)
449 #define CP932INV_TABLE_END (0xee)
451 /* STATIC int cp932_conv PROTO((int c2, int c1)); */
452 #endif /* SHIFTJIS_CP932 */
455 STATIC int x0212_f = FALSE;
456 static int x0212_shift PROTO((int c));
457 static int x0212_unshift PROTO((int c));
460 STATIC unsigned char prefix_table[256];
462 STATIC void e_status PROTO((struct input_code *, int));
463 STATIC void s_status PROTO((struct input_code *, int));
465 #ifdef UTF8_INPUT_ENABLE
466 STATIC void w_status PROTO((struct input_code *, int));
467 STATIC void w16_status PROTO((struct input_code *, int));
468 static int utf16_mode = UTF16LE_INPUT;
471 struct input_code input_code_list[] = {
472 {"EUC-JP", 0, 0, 0, {0, 0, 0}, e_status, e_iconv, 0},
473 {"Shift_JIS", 0, 0, 0, {0, 0, 0}, s_status, s_iconv, 0},
474 {"UTF-8", 0, 0, 0, {0, 0, 0}, w_status, w_iconv, 0},
475 {"UTF-16", 0, 0, 0, {0, 0, 0}, w16_status, w_iconv16, 0},
479 static int mimeout_mode = 0;
480 static int base64_count = 0;
482 /* X0208 -> ASCII converter */
485 static int f_line = 0; /* chars in line */
486 static int f_prev = 0;
487 static int fold_preserve_f = FALSE; /* preserve new lines */
488 static int fold_f = FALSE;
489 static int fold_len = 0;
492 static unsigned char kanji_intro = DEFAULT_J;
493 static unsigned char ascii_intro = DEFAULT_R;
497 #define FOLD_MARGIN 10
498 #define DEFAULT_FOLD 60
500 static int fold_margin = FOLD_MARGIN;
504 #ifdef DEFAULT_CODE_JIS
505 # define DEFAULT_CONV j_oconv
507 #ifdef DEFAULT_CODE_SJIS
508 # define DEFAULT_CONV s_oconv
510 #ifdef DEFAULT_CODE_EUC
511 # define DEFAULT_CONV e_oconv
513 #ifdef DEFAULT_CODE_UTF8
514 # define DEFAULT_CONV w_oconv
517 /* process default */
518 static void (*output_conv)PROTO((int c2,int c1)) = DEFAULT_CONV;
520 static void (*oconv)PROTO((int c2,int c1)) = no_connection;
521 /* s_iconv or oconv */
522 static int (*iconv)PROTO((int c2,int c1,int c0)) = no_connection2;
524 static void (*o_zconv)PROTO((int c2,int c1)) = no_connection;
525 static void (*o_fconv)PROTO((int c2,int c1)) = no_connection;
526 static void (*o_crconv)PROTO((int c2,int c1)) = no_connection;
527 static void (*o_rot_conv)PROTO((int c2,int c1)) = no_connection;
528 static void (*o_hira_conv)PROTO((int c2,int c1)) = no_connection;
529 static void (*o_base64conv)PROTO((int c2,int c1)) = no_connection;
530 static void (*o_iso2022jp_check_conv)PROTO((int c2,int c1)) = no_connection;
532 /* static redirections */
534 static void (*o_putc)PROTO((int c)) = std_putc;
536 static int (*i_getc)PROTO((FILE *f)) = std_getc; /* general input */
537 static int (*i_ungetc)PROTO((int c,FILE *f)) =std_ungetc;
539 static int (*i_bgetc)PROTO((FILE *)) = std_getc; /* input of mgetc */
540 static int (*i_bungetc)PROTO((int c ,FILE *f)) = std_ungetc;
542 static void (*o_mputc)PROTO((int c)) = std_putc ; /* output of mputc */
544 static int (*i_mgetc)PROTO((FILE *)) = std_getc; /* input of mgetc */
545 static int (*i_mungetc)PROTO((int c ,FILE *f)) = std_ungetc;
547 /* for strict mime */
548 static int (*i_mgetc_buf)PROTO((FILE *)) = std_getc; /* input of mgetc_buf */
549 static int (*i_mungetc_buf)PROTO((int c,FILE *f)) = std_ungetc;
552 static int output_mode = ASCII, /* output kanji mode */
553 input_mode = ASCII, /* input kanji mode */
554 shift_mode = FALSE; /* TRUE shift out, or X0201 */
555 static int mime_decode_mode = FALSE; /* MIME mode B base64, Q hex */
557 /* X0201 / X0208 conversion tables */
559 /* X0201 kana conversion table */
562 unsigned char cv[]= {
563 0x21,0x21,0x21,0x23,0x21,0x56,0x21,0x57,
564 0x21,0x22,0x21,0x26,0x25,0x72,0x25,0x21,
565 0x25,0x23,0x25,0x25,0x25,0x27,0x25,0x29,
566 0x25,0x63,0x25,0x65,0x25,0x67,0x25,0x43,
567 0x21,0x3c,0x25,0x22,0x25,0x24,0x25,0x26,
568 0x25,0x28,0x25,0x2a,0x25,0x2b,0x25,0x2d,
569 0x25,0x2f,0x25,0x31,0x25,0x33,0x25,0x35,
570 0x25,0x37,0x25,0x39,0x25,0x3b,0x25,0x3d,
571 0x25,0x3f,0x25,0x41,0x25,0x44,0x25,0x46,
572 0x25,0x48,0x25,0x4a,0x25,0x4b,0x25,0x4c,
573 0x25,0x4d,0x25,0x4e,0x25,0x4f,0x25,0x52,
574 0x25,0x55,0x25,0x58,0x25,0x5b,0x25,0x5e,
575 0x25,0x5f,0x25,0x60,0x25,0x61,0x25,0x62,
576 0x25,0x64,0x25,0x66,0x25,0x68,0x25,0x69,
577 0x25,0x6a,0x25,0x6b,0x25,0x6c,0x25,0x6d,
578 0x25,0x6f,0x25,0x73,0x21,0x2b,0x21,0x2c,
582 /* X0201 kana conversion table for daguten */
585 unsigned char dv[]= {
586 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
587 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
588 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
589 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
590 0x00,0x00,0x00,0x00,0x00,0x00,0x25,0x74,
591 0x00,0x00,0x00,0x00,0x25,0x2c,0x25,0x2e,
592 0x25,0x30,0x25,0x32,0x25,0x34,0x25,0x36,
593 0x25,0x38,0x25,0x3a,0x25,0x3c,0x25,0x3e,
594 0x25,0x40,0x25,0x42,0x25,0x45,0x25,0x47,
595 0x25,0x49,0x00,0x00,0x00,0x00,0x00,0x00,
596 0x00,0x00,0x00,0x00,0x25,0x50,0x25,0x53,
597 0x25,0x56,0x25,0x59,0x25,0x5c,0x00,0x00,
598 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
599 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
600 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
601 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
604 /* X0201 kana conversion table for han-daguten */
607 unsigned char ev[]= {
608 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
609 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
610 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
611 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
612 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
613 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
614 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
615 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
616 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
617 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
618 0x00,0x00,0x00,0x00,0x25,0x51,0x25,0x54,
619 0x25,0x57,0x25,0x5a,0x25,0x5d,0x00,0x00,
620 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
621 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
622 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
623 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
627 /* X0208 kigou conversion table */
628 /* 0x8140 - 0x819e */
630 unsigned char fv[] = {
632 0x00,0x00,0x00,0x00,0x2c,0x2e,0x00,0x3a,
633 0x3b,0x3f,0x21,0x00,0x00,0x27,0x60,0x00,
634 0x5e,0x00,0x5f,0x00,0x00,0x00,0x00,0x00,
635 0x00,0x00,0x00,0x00,0x00,0x2d,0x00,0x2f,
636 0x5c,0x00,0x00,0x7c,0x00,0x00,0x60,0x27,
637 0x22,0x22,0x28,0x29,0x00,0x00,0x5b,0x5d,
638 0x7b,0x7d,0x3c,0x3e,0x00,0x00,0x00,0x00,
639 0x00,0x00,0x00,0x00,0x2b,0x2d,0x00,0x00,
640 0x00,0x3d,0x00,0x3c,0x3e,0x00,0x00,0x00,
641 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
642 0x24,0x00,0x00,0x25,0x23,0x26,0x2a,0x40,
643 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
649 static int file_out = FALSE;
651 static int overwrite = FALSE;
654 static int crmode_f = 0; /* CR, NL, CRLF */
655 #ifdef EASYWIN /*Easy Win */
656 static int end_check;
659 #define STD_GC_BUFSIZE (256)
660 int std_gc_buf[STD_GC_BUFSIZE];
664 #include "nkf32dll.c"
665 #elif defined(PERL_XS)
675 char *outfname = NULL;
678 #ifdef EASYWIN /*Easy Win */
679 _BufferSize.y = 400;/*Set Scroll Buffer Size*/
682 for (argc--,argv++; (argc > 0) && **argv == '-'; argc--, argv++) {
683 cp = (unsigned char *)*argv;
688 if (pipe(fds) < 0 || (pid = fork()) < 0){
699 execvp(argv[1], &argv[1]);
713 if(x0201_f == WISH_TRUE)
714 x0201_f = ((!iso2022jp_f)? TRUE : NO_X0201);
716 if (binmode_f == TRUE)
718 if (freopen("","wb",stdout) == NULL)
725 setbuf(stdout, (char *) NULL);
727 setvbuffer(stdout, stdobuf, IOBUF_SIZE);
730 if (binmode_f == TRUE)
732 if (freopen("","rb",stdin) == NULL) return (-1);
736 setvbuffer(stdin, stdibuf, IOBUF_SIZE);
740 kanji_convert(stdin);
741 if (guess_f) print_guessed_code(NULL);
746 if ((fin = fopen((origfname = *argv++), "r")) == NULL) {
755 /* reopen file for stdout */
756 if (file_out == TRUE) {
759 outfname = malloc(strlen(origfname)
760 + strlen(".nkftmpXXXXXX")
766 strcpy(outfname, origfname);
770 for (i = strlen(outfname); i; --i){
771 if (outfname[i - 1] == '/'
772 || outfname[i - 1] == '\\'){
778 strcat(outfname, "ntXXXXXX");
780 fd = open(outfname, O_WRONLY | O_CREAT | O_TRUNC,
783 strcat(outfname, ".nkftmpXXXXXX");
784 fd = mkstemp(outfname);
787 || (fd_backup = dup(fileno(stdout))) < 0
788 || dup2(fd, fileno(stdout)) < 0
799 outfname = "nkf.out";
802 if(freopen(outfname, "w", stdout) == NULL) {
806 if (binmode_f == TRUE) {
808 if (freopen("","wb",stdout) == NULL)
815 if (binmode_f == TRUE)
817 if (freopen("","rb",fin) == NULL)
822 setvbuffer(fin, stdibuf, IOBUF_SIZE);
826 char *filename = NULL;
828 if (nfiles > 1) filename = origfname;
829 if (guess_f) print_guessed_code(filename);
835 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__)
843 if (dup2(fd_backup, fileno(stdout)) < 0){
846 if (stat(origfname, &sb)) {
847 fprintf(stderr, "Can't stat %s\n", origfname);
849 /*
\e$B%Q!<%_%C%7%g%s$rI|85
\e(B */
850 if (chmod(outfname, sb.st_mode)) {
851 fprintf(stderr, "Can't set permission %s\n", outfname);
854 /*
\e$B%?%$%`%9%?%s%W$rI|85
\e(B */
855 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__)
856 tb[0] = tb[1] = sb.st_mtime;
857 if (utime(outfname, tb)) {
858 fprintf(stderr, "Can't set timestamp %s\n", outfname);
861 tb.actime = sb.st_atime;
862 tb.modtime = sb.st_mtime;
863 if (utime(outfname, &tb)) {
864 fprintf(stderr, "Can't set timestamp %s\n", outfname);
868 if (unlink(origfname)){
872 if (rename(outfname, origfname)) {
874 fprintf(stderr, "Can't rename %s to %s\n",
875 outfname, origfname);
883 #ifdef EASYWIN /*Easy Win */
884 if (file_out == FALSE)
885 scanf("%d",&end_check);
888 #else /* for Other OS */
889 if (file_out == TRUE)
894 #endif /* WIN32DLL */
919 {"katakana-hiragana","h3"},
926 #ifdef UTF8_OUTPUT_ENABLE
931 #ifdef UTF8_INPUT_ENABLE
933 {"utf16-input", "W16"},
942 #ifdef NUMCHAR_OPTION
943 {"numchar-input", ""},
949 #ifdef SHIFTJIS_CP932
959 static int option_mode = 0;
966 unsigned char *p = NULL;
978 case '-': /* literal options */
979 if (!*cp) { /* ignore the rest of arguments */
983 for (i=0;i<sizeof(long_option)/sizeof(long_option[0]);i++) {
985 p = (unsigned char *)long_option[i].name;
986 for (j=0;*p && (*p != '=') && *p == cp[j];p++, j++);
994 cp = (unsigned char *)long_option[i].alias;
997 if (strcmp(long_option[i].name, "overwrite") == 0){
1004 if (strcmp(long_option[i].name, "cap-input") == 0){
1008 if (strcmp(long_option[i].name, "url-input") == 0){
1013 #ifdef NUMCHAR_OPTION
1014 if (strcmp(long_option[i].name, "numchar-input") == 0){
1020 if (strcmp(long_option[i].name, "no-output") == 0){
1024 if (strcmp(long_option[i].name, "debug") == 0){
1029 if (strcmp(long_option[i].name, "cp932") == 0){
1030 #ifdef SHIFTJIS_CP932
1034 #ifdef UTF8_OUTPUT_ENABLE
1035 ms_ucs_map_f = TRUE;
1039 if (strcmp(long_option[i].name, "no-cp932") == 0){
1040 #ifdef SHIFTJIS_CP932
1044 #ifdef UTF8_OUTPUT_ENABLE
1045 ms_ucs_map_f = FALSE;
1049 #ifdef SHIFTJIS_CP932
1050 if (strcmp(long_option[i].name, "cp932inv") == 0){
1057 if (strcmp(long_option[i].name, "x0212") == 0){
1064 if (strcmp(long_option[i].name, "exec-in") == 0){
1068 if (strcmp(long_option[i].name, "exec-out") == 0){
1073 #ifdef UTF8_OUTPUT_ENABLE
1074 if (strcmp(long_option[i].name, "ms-ucs-map") == 0){
1075 ms_ucs_map_f = TRUE;
1079 if (strcmp(long_option[i].name, "prefix=") == 0){
1080 if (*p == '=' && ' ' < p[1] && p[1] < 128){
1081 for (i = 2; ' ' < p[i] && p[i] < 128; i++){
1082 prefix_table[p[i]] = p[1];
1089 case 'b': /* buffered mode */
1092 case 'u': /* non bufferd mode */
1095 case 't': /* transparent mode */
1098 case 'j': /* JIS output */
1100 output_conv = j_oconv;
1102 case 'e': /* AT&T EUC output */
1103 output_conv = e_oconv;
1105 case 's': /* SJIS output */
1106 output_conv = s_oconv;
1108 case 'l': /* ISO8859 Latin-1 support, no conversion */
1109 iso8859_f = TRUE; /* Only compatible with ISO-2022-JP */
1110 input_f = LATIN1_INPUT;
1112 case 'i': /* Kanji IN ESC-$-@/B */
1113 if (*cp=='@'||*cp=='B')
1114 kanji_intro = *cp++;
1116 case 'o': /* ASCII IN ESC-(-J/B */
1117 if (*cp=='J'||*cp=='B'||*cp=='H')
1118 ascii_intro = *cp++;
1125 if ('9'>= *cp && *cp>='0')
1126 hira_f |= (*cp++ -'0');
1133 #if defined(MSDOS) || defined(__OS2__)
1148 #ifdef UTF8_OUTPUT_ENABLE
1149 case 'w': /* UTF-8 output */
1150 if ('1'== cp[0] && '6'==cp[1]) {
1151 output_conv = w_oconv16; cp+=2;
1153 unicode_bom_f=2; cp++;
1156 unicode_bom_f=1; cp++;
1158 } else if (cp[0] == 'B') {
1159 unicode_bom_f=2; cp++;
1161 unicode_bom_f=1; cp++;
1164 } else if (cp[0] == '8') {
1165 output_conv = w_oconv; cp++;
1168 unicode_bom_f=1; cp++;
1171 output_conv = w_oconv;
1174 #ifdef UTF8_INPUT_ENABLE
1175 case 'W': /* UTF-8 input */
1176 if ('1'== cp[0] && '6'==cp[1]) {
1177 input_f = UTF16LE_INPUT;
1180 } else if (cp[0] == 'B') {
1182 input_f = UTF16BE_INPUT;
1184 } else if (cp[0] == '8') {
1186 input_f = UTF8_INPUT;
1188 input_f = UTF8_INPUT;
1191 /* Input code assumption */
1192 case 'J': /* JIS input */
1193 case 'E': /* AT&T EUC input */
1194 input_f = JIS_INPUT;
1196 case 'S': /* MS Kanji input */
1197 input_f = SJIS_INPUT;
1198 if (x0201_f==NO_X0201) x0201_f=TRUE;
1200 case 'Z': /* Convert X0208 alphabet to asii */
1201 /* bit:0 Convert X0208
1202 bit:1 Convert Kankaku to one space
1203 bit:2 Convert Kankaku to two spaces
1204 bit:3 Convert HTML Entity
1206 if ('9'>= *cp && *cp>='0')
1207 alpha_f |= 1<<(*cp++ -'0');
1211 case 'x': /* Convert X0201 kana to X0208 or X0201 Conversion */
1212 x0201_f = FALSE; /* No X0201->X0208 conversion */
1214 ESC-(-I in JIS, EUC, MS Kanji
1215 SI/SO in JIS, EUC, MS Kanji
1216 SSO in EUC, JIS, not in MS Kanji
1217 MS Kanji (0xa0-0xdf)
1219 ESC-(-I in JIS (0x20-0x5f)
1220 SSO in EUC (0xa0-0xdf)
1221 0xa0-0xd in MS Kanji (0xa0-0xdf)
1224 case 'X': /* Assume X0201 kana */
1225 /* Default value is NO_X0201 for EUC/MS-Kanji mix */
1228 case 'F': /* prserve new lines */
1229 fold_preserve_f = TRUE;
1230 case 'f': /* folding -f60 or -f */
1233 while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
1235 fold_len += *cp++ - '0';
1237 if (!(0<fold_len && fold_len<BUFSIZ))
1238 fold_len = DEFAULT_FOLD;
1242 while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
1244 fold_margin += *cp++ - '0';
1248 case 'm': /* MIME support */
1249 mime_decode_f = TRUE;
1250 if (*cp=='B'||*cp=='Q') {
1251 mime_decode_mode = *cp++;
1252 mimebuf_f = FIXED_MIME;
1253 } else if (*cp=='N') {
1254 mime_f = TRUE; cp++;
1255 } else if (*cp=='S') {
1256 mime_f = STRICT_MIME; cp++;
1257 } else if (*cp=='0') {
1258 mime_f = FALSE; cp++;
1261 case 'M': /* MIME output */
1264 mimeout_f = FIXED_MIME; cp++;
1265 } else if (*cp=='Q') {
1267 mimeout_f = FIXED_MIME; cp++;
1272 case 'B': /* Broken JIS support */
1274 bit:1 allow any x on ESC-(-x or ESC-$-x
1275 bit:2 reset to ascii on NL
1277 if ('9'>= *cp && *cp>='0')
1278 broken_f |= 1<<(*cp++ -'0');
1283 case 'O':/* for Output file */
1287 case 'c':/* add cr code */
1290 case 'd':/* delete cr code */
1293 case 'I': /* ISO-2022-JP output */
1296 case 'L': /* line mode */
1297 if (*cp=='u') { /* unix */
1298 crmode_f = NL; cp++;
1299 } else if (*cp=='m') { /* mac */
1300 crmode_f = CR; cp++;
1301 } else if (*cp=='w') { /* windows */
1302 crmode_f = CRLF; cp++;
1303 } else if (*cp=='0') { /* no conversion */
1313 /* module muliple options in a string are allowed for Perl moudle */
1314 while(*cp && *cp!='-') cp++;
1318 /* bogus option but ignored */
1324 #ifdef ANSI_C_PROTOTYPE
1325 struct input_code * find_inputcode_byfunc(int (*iconv_func)(int c2,int c1,int c0))
1327 struct input_code * find_inputcode_byfunc(iconv_func)
1328 int (*iconv_func)();
1332 struct input_code *p = input_code_list;
1334 if (iconv_func == p->iconv_func){
1344 static int (*iconv_for_check)() = 0;
1347 #ifdef ANSI_C_PROTOTYPE
1348 void set_iconv(int f, int (*iconv_func)(int c2,int c1,int c0))
1350 void set_iconv(f, iconv_func)
1352 int (*iconv_func)();
1355 #ifdef INPUT_CODE_FIX
1363 #ifdef INPUT_CODE_FIX
1364 && (f == -TRUE || !input_f) /* -TRUE means "FORCE" */
1370 if (estab_f && iconv_for_check != iconv){
1371 struct input_code *p = find_inputcode_byfunc(iconv);
1373 set_input_codename(p->name);
1374 debug(input_codename);
1376 iconv_for_check = iconv;
1381 #define SCORE_L2 (1) /*
\e$BBh
\e(B2
\e$B?e=`4A;z
\e(B */
1382 #define SCORE_KANA (SCORE_L2 << 1) /*
\e$B$$$o$f$kH>3Q%+%J
\e(B */
1383 #define SCORE_DEPEND (SCORE_KANA << 1) /*
\e$B5!<o0MB8J8;z
\e(B */
1384 #ifdef SHIFTJIS_CP932
1385 #define SCORE_CP932 (SCORE_DEPEND << 1) /* CP932
\e$B$K$h$kFI$_49$(
\e(B */
1386 #define SCORE_NO_EXIST (SCORE_CP932 << 1) /*
\e$BB8:_$7$J$$J8;z
\e(B */
1388 #define SCORE_NO_EXIST (SCORE_DEPEND << 1) /*
\e$BB8:_$7$J$$J8;z
\e(B */
1390 #define SCORE_iMIME (SCORE_NO_EXIST << 1) /* MIME
\e$B$K$h$k;XDj
\e(B */
1391 #define SCORE_ERROR (SCORE_iMIME << 1) /*
\e$B%(%i!<
\e(B */
1393 #define SCORE_INIT (SCORE_iMIME)
1395 int score_table_A0[] = {
1398 0, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND,
1399 SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_NO_EXIST,
1402 int score_table_F0[] = {
1403 SCORE_L2, SCORE_L2, SCORE_L2, SCORE_L2,
1404 SCORE_L2, SCORE_DEPEND, SCORE_NO_EXIST, SCORE_NO_EXIST,
1405 SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND,
1406 SCORE_DEPEND, SCORE_NO_EXIST, SCORE_NO_EXIST, SCORE_ERROR,
1409 void set_code_score(ptr, score)
1410 struct input_code *ptr;
1414 ptr->score |= score;
1418 void clr_code_score(ptr, score)
1419 struct input_code *ptr;
1423 ptr->score &= ~score;
1427 void code_score(ptr)
1428 struct input_code *ptr;
1430 int c2 = ptr->buf[0];
1431 int c1 = ptr->buf[1];
1433 set_code_score(ptr, SCORE_ERROR);
1434 }else if (c2 == SSO){
1435 set_code_score(ptr, SCORE_KANA);
1436 #ifdef UTF8_OUTPUT_ENABLE
1437 }else if (!e2w_conv(c2, c1)){
1438 set_code_score(ptr, SCORE_NO_EXIST);
1440 }else if ((c2 & 0x70) == 0x20){
1441 set_code_score(ptr, score_table_A0[c2 & 0x0f]);
1442 }else if ((c2 & 0x70) == 0x70){
1443 set_code_score(ptr, score_table_F0[c2 & 0x0f]);
1444 }else if ((c2 & 0x70) >= 0x50){
1445 set_code_score(ptr, SCORE_L2);
1449 void status_disable(ptr)
1450 struct input_code *ptr;
1455 if (iconv == ptr->iconv_func) set_iconv(FALSE, 0);
1458 void status_push_ch(ptr, c)
1459 struct input_code *ptr;
1462 ptr->buf[ptr->index++] = c;
1465 void status_clear(ptr)
1466 struct input_code *ptr;
1472 void status_reset(ptr)
1473 struct input_code *ptr;
1476 ptr->score = SCORE_INIT;
1479 void status_reinit(ptr)
1480 struct input_code *ptr;
1483 ptr->_file_stat = 0;
1486 void status_check(ptr, c)
1487 struct input_code *ptr;
1490 if (c <= DEL && estab_f){
1495 void s_status(ptr, c)
1496 struct input_code *ptr;
1501 status_check(ptr, c);
1506 #ifdef NUMCHAR_OPTION
1507 }else if ((c & CLASS_MASK) == CLASS_UTF16){
1510 }else if (0xa1 <= c && c <= 0xdf){
1511 status_push_ch(ptr, SSO);
1512 status_push_ch(ptr, c);
1515 }else if ((0x81 <= c && c < 0xa0) || (0xe0 <= c && c <= 0xef)){
1517 status_push_ch(ptr, c);
1518 #ifdef SHIFTJIS_CP932
1520 && CP932_TABLE_BEGIN <= c && c <= CP932_TABLE_END){
1522 status_push_ch(ptr, c);
1523 #endif /* SHIFTJIS_CP932 */
1525 }else if (x0212_f && 0xf0 <= c && c <= 0xfc){
1527 status_push_ch(ptr, c);
1528 #endif /* X0212_ENABLE */
1530 status_disable(ptr);
1534 if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
1535 status_push_ch(ptr, c);
1536 s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]);
1540 status_disable(ptr);
1544 #ifdef SHIFTJIS_CP932
1545 if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
1546 status_push_ch(ptr, c);
1547 if (s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]) == 0){
1548 set_code_score(ptr, SCORE_CP932);
1553 #endif /* SHIFTJIS_CP932 */
1554 #ifndef X0212_ENABLE
1555 status_disable(ptr);
1561 void e_status(ptr, c)
1562 struct input_code *ptr;
1567 status_check(ptr, c);
1572 #ifdef NUMCHAR_OPTION
1573 }else if ((c & CLASS_MASK) == CLASS_UTF16){
1576 }else if (SSO == c || (0xa1 <= c && c <= 0xfe)){
1578 status_push_ch(ptr, c);
1580 }else if (0x8f == c){
1582 status_push_ch(ptr, c);
1583 #endif /* X0212_ENABLE */
1585 status_disable(ptr);
1589 if (0xa1 <= c && c <= 0xfe){
1590 status_push_ch(ptr, c);
1594 status_disable(ptr);
1599 if (0xa1 <= c && c <= 0xfe){
1601 status_push_ch(ptr, c);
1603 status_disable(ptr);
1605 #endif /* X0212_ENABLE */
1609 #ifdef UTF8_INPUT_ENABLE
1610 void w16_status(ptr, c)
1611 struct input_code *ptr;
1618 if (ptr->_file_stat == 0){
1619 if (c == 0xfe || c == 0xff){
1621 status_push_ch(ptr, c);
1622 ptr->_file_stat = 1;
1624 status_disable(ptr);
1625 ptr->_file_stat = -1;
1627 }else if (ptr->_file_stat > 0){
1629 status_push_ch(ptr, c);
1630 }else if (ptr->_file_stat < 0){
1631 status_disable(ptr);
1637 status_disable(ptr);
1638 ptr->_file_stat = -1;
1640 status_push_ch(ptr, c);
1647 if (ptr->stat != c && (c == 0xfe || c == 0xff)){
1648 status_push_ch(ptr, c);
1651 status_disable(ptr);
1652 ptr->_file_stat = -1;
1658 void w_status(ptr, c)
1659 struct input_code *ptr;
1664 status_check(ptr, c);
1669 #ifdef NUMCHAR_OPTION
1670 }else if ((c & CLASS_MASK) == CLASS_UTF16){
1673 }else if (0xc0 <= c && c <= 0xdf){
1675 status_push_ch(ptr, c);
1676 }else if (0xe0 <= c && c <= 0xef){
1678 status_push_ch(ptr, c);
1680 status_disable(ptr);
1685 if (0x80 <= c && c <= 0xbf){
1686 status_push_ch(ptr, c);
1687 if (ptr->index > ptr->stat){
1688 int bom = (ptr->buf[0] == 0xef && ptr->buf[1] == 0xbb
1689 && ptr->buf[2] == 0xbf);
1690 w2e_conv(ptr->buf[0], ptr->buf[1], ptr->buf[2],
1691 &ptr->buf[0], &ptr->buf[1]);
1698 status_disable(ptr);
1709 int action_flag = 1;
1710 struct input_code *result = 0;
1711 struct input_code *p = input_code_list;
1713 (p->status_func)(p, c);
1716 }else if(p->stat == 0){
1727 if (result && !estab_f){
1728 set_iconv(TRUE, result->iconv_func);
1729 }else if (c <= DEL){
1730 struct input_code *ptr = input_code_list;
1745 return std_gc_buf[--std_gc_ndx];
1756 if (std_gc_ndx == STD_GC_BUFSIZE){
1759 std_gc_buf[std_gc_ndx++] = c;
1779 while ((c = (*i_getc)(f)) != EOF)
1788 oconv = output_conv;
1791 /* replace continucation module, from output side */
1793 /* output redicrection */
1795 if (noout_f || guess_f){
1802 if (mimeout_f == TRUE) {
1803 o_base64conv = oconv; oconv = base64_conv;
1805 /* base64_count = 0; */
1809 o_crconv = oconv; oconv = cr_conv;
1812 o_rot_conv = oconv; oconv = rot_conv;
1815 o_iso2022jp_check_conv = oconv; oconv = iso2022jp_check_conv;
1818 o_hira_conv = oconv; oconv = hira_conv;
1821 o_fconv = oconv; oconv = fold_conv;
1824 if (alpha_f || x0201_f) {
1825 o_zconv = oconv; oconv = z_conv;
1829 i_ungetc = std_ungetc;
1830 /* input redicrection */
1833 i_cgetc = i_getc; i_getc = cap_getc;
1834 i_cungetc = i_ungetc; i_ungetc= cap_ungetc;
1837 i_ugetc = i_getc; i_getc = url_getc;
1838 i_uungetc = i_ungetc; i_ungetc= url_ungetc;
1841 #ifdef NUMCHAR_OPTION
1843 i_ngetc = i_getc; i_getc = numchar_getc;
1844 i_nungetc = i_ungetc; i_ungetc= numchar_ungetc;
1847 if (mime_f && mimebuf_f==FIXED_MIME) {
1848 i_mgetc = i_getc; i_getc = mime_getc;
1849 i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
1852 i_bgetc = i_getc; i_getc = broken_getc;
1853 i_bungetc = i_ungetc; i_ungetc = broken_ungetc;
1855 if (input_f == JIS_INPUT || input_f == LATIN1_INPUT) {
1856 set_iconv(-TRUE, e_iconv);
1857 } else if (input_f == SJIS_INPUT) {
1858 set_iconv(-TRUE, s_iconv);
1859 #ifdef UTF8_INPUT_ENABLE
1860 } else if (input_f == UTF8_INPUT) {
1861 set_iconv(-TRUE, w_iconv);
1862 } else if (input_f == UTF16LE_INPUT) {
1863 set_iconv(-TRUE, w_iconv16);
1866 set_iconv(FALSE, e_iconv);
1870 struct input_code *p = input_code_list;
1878 Conversion main loop. Code detection only.
1888 module_connection();
1893 output_mode = ASCII;
1896 #define NEXT continue /* no output, get next */
1897 #define SEND ; /* output c1 and c2, get next */
1898 #define LAST break /* end of loop, go closing */
1900 while ((c1 = (*i_getc)(f)) != EOF) {
1905 /* in case of 8th bit is on */
1906 if (!estab_f&&!mime_decode_mode) {
1907 /* in case of not established yet */
1908 /* It is still ambiguious */
1909 if (h_conv(f, c2, c1)==EOF)
1915 /* in case of already established */
1917 /* ignore bogus code */
1923 /* second byte, 7 bit code */
1924 /* it might be kanji shitfted */
1925 if ((c1 == DEL) || (c1 <= SPACE)) {
1926 /* ignore bogus first code */
1934 #ifdef UTF8_INPUT_ENABLE
1943 #ifdef NUMCHAR_OPTION
1944 } else if ((c1 & CLASS_MASK) == CLASS_UTF16){
1947 } else if (c1 > DEL) {
1949 if (!estab_f && !iso8859_f) {
1950 /* not established yet */
1953 } else { /* estab_f==TRUE */
1958 } else if (SSP<=c1 && c1<0xe0 && iconv == s_iconv) {
1959 /* SJIS X0201 Case... */
1960 if(iso2022jp_f && x0201_f==NO_X0201) {
1961 (*oconv)(GETA1, GETA2);
1968 } else if (c1==SSO && iconv != s_iconv) {
1969 /* EUC X0201 Case */
1970 c1 = (*i_getc)(f); /* skip SSO */
1972 if (SSP<=c1 && c1<0xe0) {
1973 if(iso2022jp_f && x0201_f==NO_X0201) {
1974 (*oconv)(GETA1, GETA2);
1981 } else { /* bogus code, skip SSO and one byte */
1985 /* already established */
1990 } else if ((c1 > SPACE) && (c1 != DEL)) {
1991 /* in case of Roman characters */
1993 /* output 1 shifted byte */
1997 } else if (SPACE<=c1 && c1<(0xe0&0x7f) ){
1998 /* output 1 shifted byte */
1999 if(iso2022jp_f && x0201_f==NO_X0201) {
2000 (*oconv)(GETA1, GETA2);
2007 /* look like bogus code */
2010 } else if (input_mode == X0208) {
2011 /* in case of Kanji shifted */
2014 } else if (c1 == '=' && mime_f && !mime_decode_mode ) {
2015 /* Check MIME code */
2016 if ((c1 = (*i_getc)(f)) == EOF) {
2019 } else if (c1 == '?') {
2020 /* =? is mime conversion start sequence */
2021 if(mime_f == STRICT_MIME) {
2022 /* check in real detail */
2023 if (mime_begin_strict(f) == EOF)
2027 } else if (mime_begin(f) == EOF)
2037 /* normal ASCII code */
2040 } else if (c1 == SI) {
2043 } else if (c1 == SO) {
2046 } else if (c1 == ESC ) {
2047 if ((c1 = (*i_getc)(f)) == EOF) {
2048 /* (*oconv)(0, ESC); don't send bogus code */
2050 } else if (c1 == '$') {
2051 if ((c1 = (*i_getc)(f)) == EOF) {
2053 (*oconv)(0, ESC); don't send bogus code
2054 (*oconv)(0, '$'); */
2056 } else if (c1 == '@'|| c1 == 'B') {
2057 /* This is kanji introduction */
2060 set_input_codename("ISO-2022-JP");
2061 debug(input_codename);
2063 } else if (c1 == '(') {
2064 if ((c1 = (*i_getc)(f)) == EOF) {
2065 /* don't send bogus code
2071 } else if (c1 == '@'|| c1 == 'B') {
2072 /* This is kanji introduction */
2077 } else if (c1 == 'D'){
2081 #endif /* X0212_ENABLE */
2083 /* could be some special code */
2090 } else if (broken_f&0x2) {
2091 /* accept any ESC-(-x as broken code ... */
2101 } else if (c1 == '(') {
2102 if ((c1 = (*i_getc)(f)) == EOF) {
2103 /* don't send bogus code
2105 (*oconv)(0, '('); */
2109 /* This is X0201 kana introduction */
2110 input_mode = X0201; shift_mode = X0201;
2112 } else if (c1 == 'B' || c1 == 'J' || c1 == 'H') {
2113 /* This is X0208 kanji introduction */
2114 input_mode = ASCII; shift_mode = FALSE;
2116 } else if (broken_f&0x2) {
2117 input_mode = ASCII; shift_mode = FALSE;
2122 /* maintain various input_mode here */
2126 } else if ( c1 == 'N' || c1 == 'n' ){
2128 c3 = (*i_getc)(f); /* skip SS2 */
2129 if ( (SPACE<=c3 && c3 < 0x60) || (0xa0<=c3 && c3 < 0xe0)){
2144 } else if ((c1 == NL || c1 == CR) && broken_f&4) {
2145 input_mode = ASCII; set_iconv(FALSE, 0);
2147 } else if (c1 == NL && mime_decode_f && !mime_decode_mode ) {
2148 if ((c1=(*i_getc)(f))!=EOF && c1 == SPACE) {
2156 } else if (c1 == CR && mime_decode_f && !mime_decode_mode ) {
2157 if ((c1=(*i_getc)(f))!=EOF) {
2161 } else if (c1 == NL && (c1=(*i_getc)(f))!=EOF && c1 == SPACE) {
2177 if (input_mode == X0208)
2178 (*oconv)(c2, c1); /* this is JIS, not SJIS/EUC case */
2180 else if (input_mode == X0212)
2181 (*oconv)((0x8f << 8) | c2, c1);
2182 #endif /* X0212_ENABLE */
2183 else if (input_mode)
2184 (*oconv)(input_mode, c1); /* other special case */
2185 else if ((*iconv)(c2, c1, 0) < 0){ /* can be EUC/SJIS */
2186 int c0 = (*i_getc)(f);
2189 (*iconv)(c2, c1, c0);
2195 /* goto next_word */
2199 (*iconv)(EOF, 0, 0);
2200 if (!is_inputcode_set)
2202 iconv == e_iconv ? "EUC-JP" :
2203 iconv == s_iconv ? "Shift_JIS" :
2204 iconv == w_iconv ? "UTF-8" :
2205 iconv == w_iconv16 ? "UTF-16" :
2219 /** it must NOT be in the kanji shifte sequence */
2220 /** it must NOT be written in JIS7 */
2221 /** and it must be after 2 byte 8bit code */
2228 while ((c1 = (*i_getc)(f)) != EOF) {
2234 if (push_hold_buf(c1) == EOF || estab_f){
2240 struct input_code *p = input_code_list;
2241 struct input_code *result = p;
2246 if (p->score < result->score){
2251 set_iconv(FALSE, result->iconv_func);
2256 ** 1) EOF is detected, or
2257 ** 2) Code is established, or
2258 ** 3) Buffer is FULL (but last word is pushed)
2260 ** in 1) and 3) cases, we continue to use
2261 ** Kanji codes by oconv and leave estab_f unchanged.
2266 while (wc < hold_count){
2267 c2 = hold_buf[wc++];
2269 #ifdef NUMCHAR_OPTION
2270 || (c2 & CLASS_MASK) == CLASS_UTF16
2275 }else if (iconv == s_iconv && 0xa1 <= c2 && c2 <= 0xdf){
2276 (*iconv)(X0201, c2, 0);
2279 if (wc < hold_count){
2280 c1 = hold_buf[wc++];
2289 if ((*iconv)(c2, c1, 0) < 0){
2291 if (wc < hold_count){
2292 c0 = hold_buf[wc++];
2301 (*iconv)(c2, c1, c0);
2314 if (hold_count >= HOLD_SIZE*2)
2316 hold_buf[hold_count++] = c2;
2317 return ((hold_count >= HOLD_SIZE*2) ? EOF : hold_count);
2320 int s2e_conv(c2, c1, p2, p1)
2325 #ifdef SHIFTJIS_CP932
2326 if (cp932_f && CP932_TABLE_BEGIN <= c2 && c2 <= CP932_TABLE_END){
2327 extern unsigned short shiftjis_cp932[3][189];
2328 val = shiftjis_cp932[c2 - CP932_TABLE_BEGIN][c1 - 0x40];
2334 #endif /* SHIFTJIS_CP932 */
2336 if (x0212_f && 0xfa <= c2 && c2 <= 0xfc){
2337 extern unsigned short shiftjis_x0212[3][189];
2338 val = shiftjis_x0212[c2 - 0xfa][c1 - 0x40];
2341 c2 = (0x8f << 8) | (val >> 8);
2353 c2 = c2 + c2 - ((c2 <= 0x9f) ? SJ0162 : SJ6394);
2355 c1 = c1 - ((c1 > DEL) ? SPACE : 0x1f);
2363 c2 = x0212_unshift(c2);
2378 } else if ((c2 == EOF) || (c2 == 0) || c2 < SPACE) {
2381 int ret = s2e_conv(c2, c1, &c2, &c1);
2382 if (ret) return ret;
2396 }else if (c2 == 0x8f){
2400 c2 = (c2 << 8) | (c1 & 0x7f);
2402 #ifdef SHIFTJIS_CP932
2405 if (e2s_conv(c2, c1, &s2, &s1) == 0){
2406 s2e_conv(s2, s1, &c2, &c1);
2407 if ((c2 & 0xff00) == 0){
2413 #endif /* SHIFTJIS_CP932 */
2414 #endif /* X0212_ENABLE */
2415 } else if (c2 == SSO){
2418 } else if ((c2 == EOF) || (c2 == 0) || c2 < SPACE) {
2428 #ifdef UTF8_INPUT_ENABLE
2430 w2e_conv(c2, c1, c0, p2, p1)
2434 extern unsigned short * utf8_to_euc_2bytes[];
2435 extern unsigned short ** utf8_to_euc_3bytes[];
2438 if (0xc0 <= c2 && c2 <= 0xef) {
2439 unsigned short **pp;
2442 if (c0 == 0) return -1;
2443 pp = utf8_to_euc_3bytes[c2 - 0x80];
2444 ret = w_iconv_common(c1, c0, pp, sizeof_utf8_to_euc_C2, p2, p1);
2446 ret = w_iconv_common(c2, c1, utf8_to_euc_2bytes, sizeof_utf8_to_euc_2bytes, p2, p1);
2448 #ifdef NUMCHAR_OPTION
2451 if (p1) *p1 = CLASS_UTF16 | ww16_conv(c2, c1, c0);
2456 } else if (c2 == X0201) {
2469 int ret = w2e_conv(c2, c1, c0, &c2, &c1);
2477 w16w_conv(val, p2, p1, p0)
2485 }else if (val < 0x800){
2486 *p2 = 0xc0 | (val >> 6);
2487 *p1 = 0x80 | (val & 0x3f);
2490 *p2 = 0xe0 | (val >> 12);
2491 *p1 = 0x80 | ((val >> 6) & 0x3f);
2492 *p0 = 0x80 | (val & 0x3f);
2497 ww16_conv(c2, c1, c0)
2502 val = (c2 & 0x0f) << 12;
2503 val |= (c1 & 0x3f) << 6;
2505 }else if (c2 >= 0xc0){
2506 val = (c2 & 0x1f) << 6;
2515 w16e_conv(val, p2, p1)
2519 extern unsigned short * utf8_to_euc_2bytes[];
2520 extern unsigned short ** utf8_to_euc_3bytes[];
2522 unsigned short **pp;
2526 w16w_conv(val, &c2, &c1, &c0);
2529 pp = utf8_to_euc_3bytes[c2 - 0x80];
2530 psize = sizeof_utf8_to_euc_C2;
2531 ret = w_iconv_common(c1, c0, pp, psize, p2, p1);
2533 pp = utf8_to_euc_2bytes;
2534 psize = sizeof_utf8_to_euc_2bytes;
2535 ret = w_iconv_common(c2, c1, pp, psize, p2, p1);
2537 #ifdef NUMCHAR_OPTION
2540 *p1 = CLASS_UTF16 | val;
2552 w_iconv16(c2, c1, c0)
2557 if (c2==0376 && c1==0377){
2558 utf16_mode = UTF16LE_INPUT;
2560 } else if (c2==0377 && c1==0376){
2561 utf16_mode = UTF16BE_INPUT;
2564 if (c2 != EOF && utf16_mode == UTF16BE_INPUT) {
2566 tmp=c1; c1=c2; c2=tmp;
2568 if ((c2==0 && c1 < 0x80) || c2==EOF) {
2572 ret = w16e_conv(((c2<<8)&0xff00) + c1, &c2, &c1);
2573 if (ret) return ret;
2579 w_iconv_common(c1, c0, pp, psize, p2, p1)
2581 unsigned short **pp;
2589 if (pp == 0) return 1;
2592 if (c1 < 0 || psize <= c1) return 1;
2594 if (p == 0) return 1;
2597 if (c0 < 0 || sizeof_utf8_to_euc_E5B8 <= c0) return 1;
2599 if (val == 0) return 1;
2606 if (c2 == SO) c2 = X0201;
2615 #ifdef UTF8_OUTPUT_ENABLE
2620 extern unsigned short euc_to_utf8_1byte[];
2621 extern unsigned short * euc_to_utf8_2bytes[];
2622 extern unsigned short * euc_to_utf8_2bytes_ms[];
2626 p = euc_to_utf8_1byte;
2628 } else if (c2 >> 8 == 0x8f){
2629 extern unsigned short * x0212_to_utf8_2bytes[];
2630 c2 = (c2&0x7f) - 0x21;
2631 if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
2632 p = x0212_to_utf8_2bytes[c2];
2638 c2 = (c2&0x7f) - 0x21;
2639 if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
2640 p = ms_ucs_map_f ? euc_to_utf8_2bytes_ms[c2] : euc_to_utf8_2bytes[c2];
2645 c1 = (c1 & 0x7f) - 0x21;
2646 if (0<=c1 && c1<sizeof_euc_to_utf8_1byte)
2662 if (unicode_bom_f==2) {
2669 #ifdef NUMCHAR_OPTION
2670 if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16){
2671 w16w_conv(c1, &c2, &c1, &c0);
2675 if (c0) (*o_putc)(c0);
2682 output_mode = ASCII;
2684 } else if (c2 == ISO8859_1) {
2685 output_mode = ISO8859_1;
2686 (*o_putc)(c1 | 0x080);
2690 val = e2w_conv(c2, c1);
2692 w16w_conv(val, &c2, &c1, &c0);
2696 if (c0) (*o_putc)(c0);
2712 if (unicode_bom_f==2) {
2714 (*o_putc)((unsigned char)'\377');
2718 (*o_putc)((unsigned char)'\377');
2723 if (c2 == ISO8859_1) {
2726 #ifdef NUMCHAR_OPTION
2727 } else if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16) {
2728 c2 = (c1 >> 8) & 0xff;
2732 unsigned short val = e2w_conv(c2, c1);
2733 c2 = (val >> 8) & 0xff;
2752 #ifdef NUMCHAR_OPTION
2753 if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16){
2754 w16e_conv(c1, &c2, &c1);
2755 if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16){
2763 } else if (c2 == 0) {
2764 output_mode = ASCII;
2766 } else if (c2 == X0201) {
2767 output_mode = JAPANESE_EUC;
2768 (*o_putc)(SSO); (*o_putc)(c1|0x80);
2769 } else if (c2 == ISO8859_1) {
2770 output_mode = ISO8859_1;
2771 (*o_putc)(c1 | 0x080);
2773 } else if ((c2 & 0xff00) >> 8 == 0x8f){
2774 output_mode = JAPANESE_EUC;
2775 #ifdef SHIFTJIS_CP932
2778 if (e2s_conv(c2, c1, &s2, &s1) == 0){
2779 s2e_conv(s2, s1, &c2, &c1);
2783 if ((c2 & 0xff00) >> 8 == 0x8f){
2786 (*o_putc)((c2 & 0x7f) | 0x080);
2787 (*o_putc)(c1 | 0x080);
2790 (*o_putc)((c2 & 0x7f) | 0x080);
2791 (*o_putc)(c1 | 0x080);
2795 if ((c1<0x21 || 0x7e<c1) ||
2796 (c2<0x21 || 0x7e<c2)) {
2797 set_iconv(FALSE, 0);
2798 return; /* too late to rescue this char */
2800 output_mode = JAPANESE_EUC;
2801 (*o_putc)(c2 | 0x080);
2802 (*o_putc)(c1 | 0x080);
2812 if ((ret & 0xff00) == 0x8f00){
2813 if (0x75 <= c && c <= 0x7f){
2814 ret = c + (0x109 - 0x75);
2817 if (0x75 <= c && c <= 0x7f){
2818 ret = c + (0x113 - 0x75);
2825 int x0212_unshift(c)
2829 if (0x7f <= c && c <= 0x88){
2830 ret = c + (0x75 - 0x7f);
2831 }else if (0x89 <= c && c <= 0x92){
2832 ret = (0x8f << 8) | 0x80 | (c + (0x75 - 0x89));
2836 #endif /* X0212_ENABLE */
2839 e2s_conv(c2, c1, p2, p1)
2840 int c2, c1, *p2, *p1;
2843 unsigned short *ptr;
2845 extern unsigned short *x0212_shiftjis[];
2847 if ((c2 & 0xff00) == 0x8f00){
2849 if (0x21 <= ndx && ndx <= 0x7e){
2850 ptr = x0212_shiftjis[ndx - 0x21];
2852 val = ptr[(c1 & 0x7f) - 0x21];
2862 c2 = x0212_shift(c2);
2864 #endif /* X0212_ENABLE */
2865 if ((c2 & 0xff00) == 0x8f00){
2868 if (p2) *p2 = ((c2 - 1) >> 1) + ((c2 <= 0x5e) ? 0x71 : 0xb1);
2869 if (p1) *p1 = c1 + ((c2 & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
2878 #ifdef NUMCHAR_OPTION
2879 if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16){
2880 w16e_conv(c1, &c2, &c1);
2886 } else if (c2 == 0) {
2887 output_mode = ASCII;
2889 } else if (c2 == X0201) {
2890 output_mode = SHIFT_JIS;
2892 } else if (c2 == ISO8859_1) {
2893 output_mode = ISO8859_1;
2894 (*o_putc)(c1 | 0x080);
2896 } else if ((c2 & 0xff00) >> 8 == 0x8f){
2897 output_mode = SHIFT_JIS;
2898 if (e2s_conv(c2, c1, &c2, &c1) == 0){
2904 if ((c1<0x20 || 0x7e<c1) ||
2905 (c2<0x20 || 0x7e<c2)) {
2906 set_iconv(FALSE, 0);
2907 return; /* too late to rescue this char */
2909 output_mode = SHIFT_JIS;
2910 e2s_conv(c2, c1, &c2, &c1);
2912 #ifdef SHIFTJIS_CP932
2914 && CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
2915 extern unsigned short cp932inv[2][189];
2916 int c = cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40];
2922 #endif /* SHIFTJIS_CP932 */
2925 if (prefix_table[(unsigned char)c1]){
2926 (*o_putc)(prefix_table[(unsigned char)c1]);
2937 #ifdef NUMCHAR_OPTION
2938 if ((c1 & CLASS_MASK) == CLASS_UTF16){
2939 w16e_conv(c1, &c2, &c1);
2943 if (output_mode !=ASCII && output_mode!=ISO8859_1) {
2946 (*o_putc)(ascii_intro);
2947 output_mode = ASCII;
2951 } else if ((c2 & 0xff00) >> 8 == 0x8f){
2952 if (output_mode!=X0212) {
2953 output_mode = X0212;
2959 (*o_putc)(c2 & 0x7f);
2962 } else if (c2==X0201) {
2963 if (output_mode!=X0201) {
2964 output_mode = X0201;
2970 } else if (c2==ISO8859_1) {
2971 /* iso8859 introduction, or 8th bit on */
2972 /* Can we convert in 7bit form using ESC-'-'-A ?
2974 output_mode = ISO8859_1;
2976 } else if (c2 == 0) {
2977 if (output_mode !=ASCII && output_mode!=ISO8859_1) {
2980 (*o_putc)(ascii_intro);
2981 output_mode = ASCII;
2985 if (output_mode != X0208) {
2986 output_mode = X0208;
2989 (*o_putc)(kanji_intro);
2991 if (c1<0x20 || 0x7e<c1)
2993 if (c2<0x20 || 0x7e<c2)
3005 mime_prechar(c2, c1);
3006 (*o_base64conv)(c2,c1);
3010 static int broken_buf[3];
3011 static int broken_counter = 0;
3012 static int broken_last = 0;
3019 if (broken_counter>0) {
3020 return broken_buf[--broken_counter];
3023 if (c=='$' && broken_last != ESC
3024 && (input_mode==ASCII || input_mode==X0201)) {
3027 if (c1=='@'|| c1=='B') {
3028 broken_buf[0]=c1; broken_buf[1]=c;
3035 } else if (c=='(' && broken_last != ESC
3036 && (input_mode==X0208 || input_mode==X0201)) { /* ) */
3039 if (c1=='J'|| c1=='B') {
3040 broken_buf[0]=c1; broken_buf[1]=c;
3058 if (broken_counter<2)
3059 broken_buf[broken_counter++]=c;
3063 static int prev_cr = 0;
3071 if (! (c2==0&&c1==NL) ) {
3077 } else if (c1=='\r') {
3079 } else if (c1=='\n') {
3080 if (crmode_f==CRLF) {
3081 (*o_crconv)(0,'\r');
3082 } else if (crmode_f==CR) {
3083 (*o_crconv)(0,'\r');
3087 } else if (c1!='\032' || crmode_f!=NL){
3093 Return value of fold_conv()
3095 \n add newline and output char
3096 \r add newline and output nothing
3099 1 (or else) normal output
3101 fold state in prev (previous character)
3103 >0x80 Japanese (X0208/X0201)
3108 This fold algorthm does not preserve heading space in a line.
3109 This is the main difference from fmt.
3112 #define char_size(c2,c1) (c2?2:1)
3121 if (c1== '\r' && !fold_preserve_f) {
3122 fold_state=0; /* ignore cr */
3123 }else if (c1== '\n'&&f_prev=='\r' && fold_preserve_f) {
3125 fold_state=0; /* ignore cr */
3126 } else if (c1== BS) {
3127 if (f_line>0) f_line--;
3129 } else if (c2==EOF && f_line != 0) { /* close open last line */
3131 } else if ((c1=='\n' && !fold_preserve_f)
3132 || ((c1=='\r'||(c1=='\n'&&f_prev!='\r'))
3133 && fold_preserve_f)) {
3135 if (fold_preserve_f) {
3139 } else if ((f_prev == c1 && !fold_preserve_f)
3140 || (f_prev == '\n' && fold_preserve_f)
3141 ) { /* duplicate newline */
3144 fold_state = '\n'; /* output two newline */
3150 if (f_prev&0x80) { /* Japanese? */
3152 fold_state = 0; /* ignore given single newline */
3153 } else if (f_prev==' ') {
3157 if (++f_line<=fold_len)
3161 fold_state = '\r'; /* fold and output nothing */
3165 } else if (c1=='\f') {
3170 fold_state = '\n'; /* output newline and clear */
3171 } else if ( (c2==0 && c1==' ')||
3172 (c2==0 && c1=='\t')||
3173 (c2=='!'&& c1=='!')) {
3174 /* X0208 kankaku or ascii space */
3175 if (f_prev == ' ') {
3176 fold_state = 0; /* remove duplicate spaces */
3179 if (++f_line<=fold_len)
3180 fold_state = ' '; /* output ASCII space only */
3182 f_prev = ' '; f_line = 0;
3183 fold_state = '\r'; /* fold and output nothing */
3187 prev0 = f_prev; /* we still need this one... , but almost done */
3189 if (c2 || c2==X0201)
3190 f_prev |= 0x80; /* this is Japanese */
3191 f_line += char_size(c2,c1);
3192 if (f_line<=fold_len) { /* normal case */
3195 if (f_line>=fold_len+fold_margin) { /* too many kinsou suspension */
3196 f_line = char_size(c2,c1);
3197 fold_state = '\n'; /* We can't wait, do fold now */
3198 } else if (c2==X0201) {
3199 /* simple kinsoku rules return 1 means no folding */
3200 if (c1==(0xde&0x7f)) fold_state = 1; /*
\e$B!+
\e(B*/
3201 else if (c1==(0xdf&0x7f)) fold_state = 1; /*
\e$B!,
\e(B*/
3202 else if (c1==(0xa4&0x7f)) fold_state = 1; /*
\e$B!#
\e(B*/
3203 else if (c1==(0xa3&0x7f)) fold_state = 1; /*
\e$B!$
\e(B*/
3204 else if (c1==(0xa1&0x7f)) fold_state = 1; /*
\e$B!W
\e(B*/
3205 else if (c1==(0xb0&0x7f)) fold_state = 1; /* - */
3206 else if (SPACE<=c1 && c1<=(0xdf&0x7f)) { /* X0201 */
3208 fold_state = '\n';/* add one new f_line before this character */
3211 fold_state = '\n';/* add one new f_line before this character */
3214 /* kinsoku point in ASCII */
3215 if ( c1==')'|| /* { [ ( */
3226 /* just after special */
3227 } else if (!is_alnum(prev0)) {
3228 f_line = char_size(c2,c1);
3230 } else if ((prev0==' ') || /* ignored new f_line */
3231 (prev0=='\n')|| /* ignored new f_line */
3232 (prev0&0x80)) { /* X0208 - ASCII */
3233 f_line = char_size(c2,c1);
3234 fold_state = '\n';/* add one new f_line before this character */
3236 fold_state = 1; /* default no fold in ASCII */
3240 if (c1=='"') fold_state = 1; /*
\e$B!"
\e(B */
3241 else if (c1=='#') fold_state = 1; /*
\e$B!#
\e(B */
3242 else if (c1=='W') fold_state = 1; /*
\e$B!W
\e(B */
3243 else if (c1=='K') fold_state = 1; /*
\e$B!K
\e(B */
3244 else if (c1=='$') fold_state = 1; /*
\e$B!$
\e(B */
3245 else if (c1=='%') fold_state = 1; /*
\e$B!%
\e(B */
3246 else if (c1=='\'') fold_state = 1; /*
\e$B!\
\e(B */
3247 else if (c1=='(') fold_state = 1; /*
\e$B!(
\e(B */
3248 else if (c1==')') fold_state = 1; /*
\e$B!)
\e(B */
3249 else if (c1=='*') fold_state = 1; /*
\e$B!*
\e(B */
3250 else if (c1=='+') fold_state = 1; /*
\e$B!+
\e(B */
3251 else if (c1==',') fold_state = 1; /*
\e$B!,
\e(B */
3252 /* default no fold in kinsoku */
3255 f_line = char_size(c2,c1);
3256 /* add one new f_line before this character */
3259 f_line = char_size(c2,c1);
3261 /* add one new f_line before this character */
3266 /* terminator process */
3267 switch(fold_state) {
3286 int z_prev2=0,z_prev1=0;
3293 /* if (c2) c1 &= 0x7f; assertion */
3295 if (x0201_f && z_prev2==X0201) { /* X0201 */
3296 if (c1==(0xde&0x7f)) { /*
\e$BByE@
\e(B */
3298 (*o_zconv)(dv[(z_prev1-SPACE)*2],dv[(z_prev1-SPACE)*2+1]);
3300 } else if (c1==(0xdf&0x7f)&&ev[(z_prev1-SPACE)*2]) { /*
\e$BH>ByE@
\e(B */
3302 (*o_zconv)(ev[(z_prev1-SPACE)*2],ev[(z_prev1-SPACE)*2+1]);
3306 (*o_zconv)(cv[(z_prev1-SPACE)*2],cv[(z_prev1-SPACE)*2+1]);
3315 if (x0201_f && c2==X0201) {
3316 if (dv[(c1-SPACE)*2]||ev[(c1-SPACE)*2]) {
3317 /* wait for
\e$BByE@
\e(B or
\e$BH>ByE@
\e(B */
3318 z_prev1 = c1; z_prev2 = c2;
3321 (*o_zconv)(cv[(c1-SPACE)*2],cv[(c1-SPACE)*2+1]);
3326 /* JISX0208 Alphabet */
3327 if (alpha_f && c2 == 0x23 ) {
3329 } else if (alpha_f && c2 == 0x21 ) {
3330 /* JISX0208 Kigou */
3335 } else if (alpha_f&0x4) {
3340 } else if (0x20<c1 && c1<0x7f && fv[c1-0x20]) {
3346 case '>': entity = ">"; break;
3347 case '<': entity = "<"; break;
3348 case '\"': entity = """; break;
3349 case '&': entity = "&"; break;
3352 while (*entity) (*o_zconv)(0, *entity++);
3362 #define rot13(c) ( \
3364 (c <= 'M') ? (c + 13): \
3365 (c <= 'Z') ? (c - 13): \
3367 (c <= 'm') ? (c + 13): \
3368 (c <= 'z') ? (c - 13): \
3372 #define rot47(c) ( \
3374 ( c <= 'O' ) ? (c + 47) : \
3375 ( c <= '~' ) ? (c - 47) : \
3383 if (c2==0 || c2==X0201 || c2==ISO8859_1) {
3389 (*o_rot_conv)(c2,c1);
3396 if ((hira_f & 1) && c2==0x25 && 0x20<c1 && c1<0x74) {
3398 } else if ((hira_f & 2) && c2==0x24 && 0x20<c1 && c1<0x74) {
3401 (*o_hira_conv)(c2,c1);
3406 iso2022jp_check_conv(c2,c1)
3409 static int range[RANGE_NUM_MAX][2] = {
3432 if(c2 >= 0x00 && c2 <= 0x20 && c1 >= 0x7f && c1 <= 0xff) {
3436 if((c2 >= 0x29 && c2 <= 0x2f) || (c2 >= 0x75 && c2 <= 0x7e)) {
3441 for (i = 0; i < RANGE_NUM_MAX; i++) {
3442 start = range[i][0];
3445 if (c >= start && c <= end) {
3450 (*o_iso2022jp_check_conv)(c2,c1);
3454 /* This converts =?ISO-2022-JP?B?HOGE HOGE?= */
3456 unsigned char *mime_pattern[] = {
3457 (unsigned char *)"\075?EUC-JP?B?",
3458 (unsigned char *)"\075?SHIFT_JIS?B?",
3459 (unsigned char *)"\075?ISO-8859-1?Q?",
3460 (unsigned char *)"\075?ISO-8859-1?B?",
3461 (unsigned char *)"\075?ISO-2022-JP?B?",
3462 (unsigned char *)"\075?ISO-2022-JP?Q?",
3463 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
3464 (unsigned char *)"\075?UTF-8?B?",
3465 (unsigned char *)"\075?UTF-8?Q?",
3467 (unsigned char *)"\075?US-ASCII?Q?",
3472 /*
\e$B3:Ev$9$k%3!<%I$NM%@hEY$r>e$2$k$?$a$NL\0u
\e(B */
3473 int (*mime_priority_func[])PROTO((int c2, int c1, int c0)) = {
3474 e_iconv, s_iconv, 0, 0, 0, 0,
3475 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
3481 int mime_encode[] = {
3482 JAPANESE_EUC, SHIFT_JIS,ISO8859_1, ISO8859_1, X0208, X0201,
3483 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
3490 int mime_encode_method[] = {
3491 'B', 'B','Q', 'B', 'B', 'Q',
3492 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
3500 #define MAXRECOVER 20
3502 /* I don't trust portablity of toupper */
3503 #define nkf_toupper(c) (('a'<=c && c<='z')?(c-('a'-'A')):c)
3504 #define nkf_isdigit(c) ('0'<=c && c<='9')
3505 #define nkf_isxdigit(c) (nkf_isdigit(c) || ('a'<=c && c<='f') || ('A'<=c && c <= 'F'))
3506 #define nkf_isblank(c) (c == SPACE || c == TAB)
3507 #define nkf_isspace(c) (nkf_isblank(c) || c == CR || c == NL)
3508 #define nkf_isalpha(c) (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'))
3509 #define nkf_isalnum(c) (nkf_isdigit(c) || nkf_isalpha(c))
3514 if (i_getc!=mime_getc) {
3515 i_mgetc = i_getc; i_getc = mime_getc;
3516 i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
3517 if(mime_f==STRICT_MIME) {
3518 i_mgetc_buf = i_mgetc; i_mgetc = mime_getc_buf;
3519 i_mungetc_buf = i_mungetc; i_mungetc = mime_ungetc_buf;
3525 unswitch_mime_getc()
3527 if(mime_f==STRICT_MIME) {
3528 i_mgetc = i_mgetc_buf;
3529 i_mungetc = i_mungetc_buf;
3532 i_ungetc = i_mungetc;
3536 mime_begin_strict(f)
3541 unsigned char *p,*q;
3542 int r[MAXRECOVER]; /* recovery buffer, max mime pattern lenght */
3544 mime_decode_mode = FALSE;
3545 /* =? has been checked */
3547 p = mime_pattern[j];
3550 for(i=2;p[i]>' ';i++) { /* start at =? */
3551 if ( ((r[i] = c1 = (*i_getc)(f))==EOF) || nkf_toupper(c1) != p[i] ) {
3552 /* pattern fails, try next one */
3554 while ((p = mime_pattern[++j])) {
3555 for(k=2;k<i;k++) /* assume length(p) > i */
3556 if (p[k]!=q[k]) break;
3557 if (k==i && nkf_toupper(c1)==p[k]) break;
3559 if (p) continue; /* found next one, continue */
3560 /* all fails, output from recovery buffer */
3568 mime_decode_mode = p[i-2];
3570 set_iconv(FALSE, mime_priority_func[j]);
3571 clr_code_score(find_inputcode_byfunc(mime_priority_func[j]), SCORE_iMIME);
3573 if (mime_decode_mode=='B') {
3574 mimebuf_f = unbuf_f;
3576 /* do MIME integrity check */
3577 return mime_integrity(f,mime_pattern[j]);
3589 /* we don't keep eof of Fifo, becase it contains ?= as
3590 a terminator. It was checked in mime_integrity. */
3591 return ((mimebuf_f)?
3592 (*i_mgetc_buf)(f):Fifo(mime_input++));
3596 mime_ungetc_buf(c,f)
3601 (*i_mungetc_buf)(c,f);
3603 Fifo(--mime_input)=c;
3614 /* In NONSTRICT mode, only =? is checked. In case of failure, we */
3615 /* re-read and convert again from mime_buffer. */
3617 /* =? has been checked */
3619 Fifo(mime_last++)='='; Fifo(mime_last++)='?';
3620 for(i=2;i<MAXRECOVER;i++) { /* start at =? */
3621 /* We accept any character type even if it is breaked by new lines */
3622 c1 = (*i_getc)(f); Fifo(mime_last++)= c1 ;
3623 if (c1=='\n'||c1==' '||c1=='\r'||
3624 c1=='-'||c1=='_'||is_alnum(c1) ) continue;
3626 /* Failed. But this could be another MIME preemble */
3634 c1 = (*i_getc)(f); Fifo(mime_last++) = c1;
3635 if (!(++i<MAXRECOVER) || c1==EOF) break;
3636 if (c1=='b'||c1=='B') {
3637 mime_decode_mode = 'B';
3638 } else if (c1=='q'||c1=='Q') {
3639 mime_decode_mode = 'Q';
3643 c1 = (*i_getc)(f); Fifo(mime_last++) = c1;
3644 if (!(++i<MAXRECOVER) || c1==EOF) break;
3646 mime_decode_mode = FALSE;
3652 if (!mime_decode_mode) {
3653 /* false MIME premble, restart from mime_buffer */
3654 mime_decode_mode = 1; /* no decode, but read from the mime_buffer */
3655 /* Since we are in MIME mode until buffer becomes empty, */
3656 /* we never go into mime_begin again for a while. */
3659 /* discard mime preemble, and goto MIME mode */
3661 /* do no MIME integrity check */
3662 return c1; /* used only for checking EOF */
3677 fprintf(stderr, "%s\n", str);
3683 set_input_codename (codename)
3688 strcmp(codename, "") != 0 &&
3689 strcmp(codename, input_codename) != 0)
3691 is_inputcode_mixed = TRUE;
3693 input_codename = codename;
3694 is_inputcode_set = TRUE;
3699 print_guessed_code (filename)
3702 char *codename = "BINARY";
3703 if (!is_inputcode_mixed) {
3704 if (strcmp(input_codename, "") == 0) {
3707 codename = input_codename;
3710 if (filename != NULL) printf("%s:", filename);
3711 printf("%s\n", codename);
3719 if (nkf_isdigit(x)) return x - '0';
3720 return nkf_toupper(x) - 'A' + 10;
3725 #ifdef ANSI_C_PROTOTYPE
3726 int hex_getc(int ch, FILE *f, int (*g)(FILE *f), int (*u)(int c, FILE *f))
3729 hex_getc(ch, f, g, u)
3742 if (!nkf_isxdigit(c2)){
3747 if (!nkf_isxdigit(c3)){
3752 return (hex2bin(c2) << 4) | hex2bin(c3);
3759 return hex_getc(':', f, i_cgetc, i_cungetc);
3767 return (*i_cungetc)(c, f);
3774 return hex_getc('%', f, i_ugetc, i_uungetc);
3782 return (*i_uungetc)(c, f);
3786 #ifdef NUMCHAR_OPTION
3791 int (*g)() = i_ngetc;
3792 int (*u)() = i_nungetc;
3803 if (buf[i] == 'x' || buf[i] == 'X'){
3804 for (j = 0; j < 5; j++){
3806 if (!nkf_isxdigit(buf[i])){
3813 c |= hex2bin(buf[i]);
3816 for (j = 0; j < 6; j++){
3820 if (!nkf_isdigit(buf[i])){
3827 c += hex2bin(buf[i]);
3833 return CLASS_UTF16 | c;
3843 numchar_ungetc(c, f)
3847 return (*i_nungetc)(c, f);
3856 int c1, c2, c3, c4, cc;
3857 int t1, t2, t3, t4, mode, exit_mode;
3861 int lwsp_size = 128;
3863 if (mime_top != mime_last) { /* Something is in FIFO */
3864 return Fifo(mime_top++);
3866 if (mime_decode_mode==1 ||mime_decode_mode==FALSE) {
3867 mime_decode_mode=FALSE;
3868 unswitch_mime_getc();
3869 return (*i_getc)(f);
3872 if (mimebuf_f == FIXED_MIME)
3873 exit_mode = mime_decode_mode;
3876 if (mime_decode_mode == 'Q') {
3877 if ((c1 = (*i_mgetc)(f)) == EOF) return (EOF);
3879 if (c1=='_') return ' ';
3880 if (c1!='=' && c1!='?') {
3884 mime_decode_mode = exit_mode; /* prepare for quit */
3885 if (c1<=' ') return c1;
3886 if ((c2 = (*i_mgetc)(f)) == EOF) return (EOF);
3887 if (c1=='?'&&c2=='=' && mimebuf_f != FIXED_MIME) {
3888 /* end Q encoding */
3889 input_mode = exit_mode;
3891 lwsp_buf = malloc((lwsp_size+5)*sizeof(char));
3892 if (lwsp_buf==NULL) {
3893 perror("can't malloc");
3896 while ((c1=(*i_getc)(f))!=EOF) {
3901 if ((c1=(*i_getc)(f))!=EOF && (c1==SPACE||c1==TAB)) {
3909 if ((c1=(*i_getc)(f))!=EOF && c1 == NL) {
3910 if ((c1=(*i_getc)(f))!=EOF && (c1==SPACE||c1==TAB)) {
3925 lwsp_buf[lwsp_count] = c1;
3926 if (lwsp_count++>lwsp_size){
3928 lwsp_buf_new = realloc(lwsp_buf, (lwsp_size+5)*sizeof(char));
3929 if (lwsp_buf_new==NULL) {
3932 perror("can't realloc");
3935 lwsp_buf = lwsp_buf_new;
3941 if (lwsp_count > 0) {
3942 if (c1=='=' && (lwsp_buf[lwsp_count-1]==SPACE||lwsp_buf[lwsp_count-1]==TAB)) {
3946 for(lwsp_count--;lwsp_count>0;lwsp_count--)
3947 i_ungetc(lwsp_buf[lwsp_count],f);
3955 if (c1=='='&&c2<' ') { /* this is soft wrap */
3956 while((c1 = (*i_mgetc)(f)) <=' ') {
3957 if ((c1 = (*i_mgetc)(f)) == EOF) return (EOF);
3959 mime_decode_mode = 'Q'; /* still in MIME */
3960 goto restart_mime_q;
3963 mime_decode_mode = 'Q'; /* still in MIME */
3967 if ((c3 = (*i_mgetc)(f)) == EOF) return (EOF);
3968 if (c2<=' ') return c2;
3969 mime_decode_mode = 'Q'; /* still in MIME */
3970 #define hex(c) (('0'<=c&&c<='9')?(c-'0'):\
3971 ('A'<=c&&c<='F')?(c-'A'+10):('a'<=c&&c<='f')?(c-'a'+10):0)
3972 return ((hex(c2)<<4) + hex(c3));
3975 if (mime_decode_mode != 'B') {
3976 mime_decode_mode = FALSE;
3977 return (*i_mgetc)(f);
3981 /* Base64 encoding */
3983 MIME allows line break in the middle of
3984 Base64, but we are very pessimistic in decoding
3985 in unbuf mode because MIME encoded code may broken by
3986 less or editor's control sequence (such as ESC-[-K in unbuffered
3987 mode. ignore incomplete MIME.
3989 mode = mime_decode_mode;
3990 mime_decode_mode = exit_mode; /* prepare for quit */
3992 while ((c1 = (*i_mgetc)(f))<=' ') {
3997 if ((c2 = (*i_mgetc)(f))<=' ') {
4000 if (mime_f != STRICT_MIME) goto mime_c2_retry;
4001 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
4004 if ((c1 == '?') && (c2 == '=')) {
4007 lwsp_buf = malloc((lwsp_size+5)*sizeof(char));
4008 if (lwsp_buf==NULL) {
4009 perror("can't malloc");
4012 while ((c1=(*i_getc)(f))!=EOF) {
4017 if ((c1=(*i_getc)(f))!=EOF && (c1==SPACE||c1==TAB)) {
4025 if ((c1=(*i_getc)(f))!=EOF) {
4029 } else if ((c1=(*i_getc)(f))!=EOF && (c1==SPACE||c1==TAB)) {
4044 lwsp_buf[lwsp_count] = c1;
4045 if (lwsp_count++>lwsp_size){
4047 lwsp_buf_new = realloc(lwsp_buf, (lwsp_size+5)*sizeof(char));
4048 if (lwsp_buf_new==NULL) {
4051 perror("can't realloc");
4054 lwsp_buf = lwsp_buf_new;
4060 if (lwsp_count > 0) {
4061 if (c1=='=' && (lwsp_buf[lwsp_count-1]==SPACE||lwsp_buf[lwsp_count-1]==TAB)) {
4065 for(lwsp_count--;lwsp_count>0;lwsp_count--)
4066 i_ungetc(lwsp_buf[lwsp_count],f);
4075 if ((c3 = (*i_mgetc)(f))<=' ') {
4078 if (mime_f != STRICT_MIME) goto mime_c3_retry;
4079 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
4083 if ((c4 = (*i_mgetc)(f))<=' ') {
4086 if (mime_f != STRICT_MIME) goto mime_c4_retry;
4087 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
4091 mime_decode_mode = mode; /* still in MIME sigh... */
4093 /* BASE 64 decoding */
4095 t1 = 0x3f & base64decode(c1);
4096 t2 = 0x3f & base64decode(c2);
4097 t3 = 0x3f & base64decode(c3);
4098 t4 = 0x3f & base64decode(c4);
4099 cc = ((t1 << 2) & 0x0fc) | ((t2 >> 4) & 0x03);
4101 Fifo(mime_last++) = cc;
4102 cc = ((t2 << 4) & 0x0f0) | ((t3 >> 2) & 0x0f);
4104 Fifo(mime_last++) = cc;
4105 cc = ((t3 << 6) & 0x0c0) | (t4 & 0x3f);
4107 Fifo(mime_last++) = cc;
4112 return Fifo(mime_top++);
4120 Fifo(--mime_top) = c;
4131 /* In buffered mode, read until =? or NL or buffer full
4133 mime_input = mime_top;
4134 mime_last = mime_top;
4135 while(*p) Fifo(mime_input++) = *p++;
4138 while((c=(*i_getc)(f))!=EOF) {
4139 if (((mime_input-mime_top)&MIME_BUF_MASK)==0) {
4140 break; /* buffer full */
4142 if (c=='=' && d=='?') {
4143 /* checked. skip header, start decode */
4144 Fifo(mime_input++) = c;
4145 /* mime_last_input = mime_input; */
4150 if (!( (c=='+'||c=='/'|| c=='=' || c=='?' || is_alnum(c))))
4152 /* Should we check length mod 4? */
4153 Fifo(mime_input++) = c;
4156 /* In case of Incomplete MIME, no MIME decode */
4157 Fifo(mime_input++) = c;
4158 mime_last = mime_input; /* point undecoded buffer */
4159 mime_decode_mode = 1; /* no decode on Fifo last in mime_getc */
4160 switch_mime_getc(); /* anyway we need buffered getc */
4171 i = c - 'A'; /* A..Z 0-25 */
4173 i = c - 'G' /* - 'a' + 26 */ ; /* a..z 26-51 */
4175 } else if (c > '/') {
4176 i = c - '0' + '4' /* - '0' + 52 */ ; /* 0..9 52-61 */
4177 } else if (c == '+') {
4178 i = '>' /* 62 */ ; /* + 62 */
4180 i = '?' /* 63 */ ; /* / 63 */
4185 static char basis_64[] =
4186 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
4189 #define MIMEOUT_BUF_LENGTH (60)
4190 char mimeout_buf[MIMEOUT_BUF_LENGTH+1];
4191 int mimeout_buf_count = 0;
4192 int mimeout_preserve_space = 0;
4193 #define itoh4(c) (c>=10?c+'A'-10:c+'0')
4202 p = mime_pattern[0];
4203 for(i=0;mime_encode[i];i++) {
4204 if (mode == mime_encode[i]) {
4205 p = mime_pattern[i];
4209 mimeout_mode = mime_encode_method[i];
4212 if (base64_count>45) {
4213 if (mimeout_buf_count>0 && nkf_isblank(mimeout_buf[i])){
4214 (*o_mputc)(mimeout_buf[i]);
4220 if (!mimeout_preserve_space && mimeout_buf_count>0
4221 && (mimeout_buf[i]==SPACE || mimeout_buf[i]==TAB
4222 || mimeout_buf[i]==CR || mimeout_buf[i]==NL )) {
4226 if (!mimeout_preserve_space) {
4227 for (;i<mimeout_buf_count;i++) {
4228 if (mimeout_buf[i]==SPACE || mimeout_buf[i]==TAB
4229 || mimeout_buf[i]==CR || mimeout_buf[i]==NL ) {
4230 (*o_mputc)(mimeout_buf[i]);
4237 mimeout_preserve_space = FALSE;
4243 j = mimeout_buf_count;
4244 mimeout_buf_count = 0;
4246 mime_putc(mimeout_buf[i]);
4262 switch(mimeout_mode) {
4267 (*o_mputc)(basis_64[((b64c & 0x3)<< 4)]);
4273 (*o_mputc)(basis_64[((b64c & 0xF) << 2)]);
4279 if (mimeout_f!=FIXED_MIME) {
4281 } else if (mimeout_mode != 'Q')
4290 switch(mimeout_mode) {
4295 } else if (c==CR||c==NL) {
4298 } else if(c<SPACE||c=='='||c=='?'||c=='_'||DEL<=c) {
4300 (*o_mputc)(itoh4(((c>>4)&0xf)));
4301 (*o_mputc)(itoh4((c&0xf)));
4310 (*o_mputc)(basis_64[c>>2]);
4315 (*o_mputc)(basis_64[((b64c & 0x3)<< 4) | ((c & 0xF0) >> 4)]);
4321 (*o_mputc)(basis_64[((b64c & 0xF) << 2) | ((c & 0xC0) >>6)]);
4322 (*o_mputc)(basis_64[c & 0x3F]);
4333 int mime_lastchar2, mime_lastchar1;
4335 void mime_prechar(c2, c1)
4340 if (base64_count + mimeout_buf_count/3*4> 66){
4341 (*o_base64conv)(EOF,0);
4342 (*o_base64conv)(0,NL);
4343 (*o_base64conv)(0,SPACE);
4345 }/*else if (mime_lastchar2){
4346 if (c1 <=DEL && !nkf_isspace(c1)){
4347 (*o_base64conv)(0,SPACE);
4351 if (c2 && mime_lastchar2 == 0
4352 && mime_lastchar1 && !nkf_isspace(mime_lastchar1)){
4353 (*o_base64conv)(0,SPACE);
4356 mime_lastchar2 = c2;
4357 mime_lastchar1 = c1;
4368 if (mimeout_f == FIXED_MIME){
4369 if (mimeout_mode == 'Q'){
4370 if (base64_count > 71){
4371 if (c!=CR && c!=NL) {
4378 if (base64_count > 71){
4383 if (c == EOF) { /* c==EOF */
4387 if (c != EOF) { /* c==EOF */
4393 /* mimeout_f != FIXED_MIME */
4395 if (c == EOF) { /* c==EOF */
4396 j = mimeout_buf_count;
4397 mimeout_buf_count = 0;
4400 /*if (nkf_isspace(mimeout_buf[i])){
4403 mimeout_addchar(mimeout_buf[i]);
4407 (*o_mputc)(mimeout_buf[i]);
4413 if (mimeout_mode=='Q') {
4414 if (c <= DEL && (output_mode==ASCII ||output_mode == ISO8859_1 ) ) {
4426 if (mimeout_buf_count > 0){
4427 lastchar = mimeout_buf[mimeout_buf_count - 1];
4432 if (!mimeout_mode) {
4433 if (c <= DEL && (output_mode==ASCII ||output_mode == ISO8859_1)) {
4434 if (nkf_isspace(c)) {
4435 if (c==CR || c==NL) {
4438 for (i=0;i<mimeout_buf_count;i++) {
4439 (*o_mputc)(mimeout_buf[i]);
4440 if (mimeout_buf[i] == CR || mimeout_buf[i] == NL){
4447 mimeout_buf_count = 1;
4449 if (base64_count > 1
4450 && base64_count + mimeout_buf_count > 76){
4453 if (!nkf_isspace(mimeout_buf[0])){
4458 mimeout_buf[mimeout_buf_count++] = c;
4459 if (mimeout_buf_count>MIMEOUT_BUF_LENGTH) {
4460 open_mime(output_mode);
4465 if (lastchar==CR || lastchar == NL){
4466 for (i=0;i<mimeout_buf_count;i++) {
4467 (*o_mputc)(mimeout_buf[i]);
4470 mimeout_buf_count = 0;
4472 if (lastchar==SPACE) {
4473 for (i=0;i<mimeout_buf_count-1;i++) {
4474 (*o_mputc)(mimeout_buf[i]);
4477 mimeout_buf[0] = SPACE;
4478 mimeout_buf_count = 1;
4480 open_mime(output_mode);
4483 /* mimeout_mode == 'B', 1, 2 */
4484 if ( c<=DEL && (output_mode==ASCII ||output_mode == ISO8859_1 ) ) {
4485 if (lastchar == CR || lastchar == NL){
4486 if (nkf_isblank(c)) {
4487 for (i=0;i<mimeout_buf_count;i++) {
4488 mimeout_addchar(mimeout_buf[i]);
4490 mimeout_buf_count = 0;
4491 } else if (SPACE<c && c<DEL) {
4493 for (i=0;i<mimeout_buf_count;i++) {
4494 (*o_mputc)(mimeout_buf[i]);
4497 mimeout_buf_count = 0;
4500 if (c==SPACE || c==TAB || c==CR || c==NL) {
4501 for (i=0;i<mimeout_buf_count;i++) {
4502 if (SPACE<mimeout_buf[i] && mimeout_buf[i]<DEL) {
4504 for (i=0;i<mimeout_buf_count;i++) {
4505 (*o_mputc)(mimeout_buf[i]);
4508 mimeout_buf_count = 0;
4511 mimeout_buf[mimeout_buf_count++] = c;
4512 if (mimeout_buf_count>MIMEOUT_BUF_LENGTH) {
4514 for (i=0;i<mimeout_buf_count;i++) {
4515 (*o_mputc)(mimeout_buf[i]);
4518 mimeout_buf_count = 0;
4522 if (mimeout_buf_count>0 && SPACE<c && c!='=') {
4523 mimeout_buf[mimeout_buf_count++] = c;
4524 if (mimeout_buf_count>MIMEOUT_BUF_LENGTH) {
4525 j = mimeout_buf_count;
4526 mimeout_buf_count = 0;
4528 mimeout_addchar(mimeout_buf[i]);
4535 if (mimeout_buf_count>0) {
4536 j = mimeout_buf_count;
4537 mimeout_buf_count = 0;
4539 if (mimeout_buf[i]==CR || mimeout_buf[i]==NL)
4541 mimeout_addchar(mimeout_buf[i]);
4547 (*o_mputc)(mimeout_buf[i]);
4549 open_mime(output_mode);
4556 #if defined(PERL_XS) || defined(WIN32DLL)
4561 struct input_code *p = input_code_list;
4574 mime_f = STRICT_MIME;
4575 mime_decode_f = FALSE;
4580 #if defined(MSDOS) || defined(__OS2__)
4585 iso2022jp_f = FALSE;
4586 #ifdef UTF8_OUTPUT_ENABLE
4589 ms_ucs_map_f = FALSE;
4601 is_inputcode_mixed = FALSE;
4602 is_inputcode_set = FALSE;
4606 #ifdef SHIFTJIS_CP932
4612 for (i = 0; i < 256; i++){
4613 prefix_table[i] = 0;
4616 #ifdef UTF8_INPUT_ENABLE
4617 utf16_mode = UTF16LE_INPUT;
4619 mimeout_buf_count = 0;
4624 fold_preserve_f = FALSE;
4627 kanji_intro = DEFAULT_J;
4628 ascii_intro = DEFAULT_R;
4629 fold_margin = FOLD_MARGIN;
4630 output_conv = DEFAULT_CONV;
4631 oconv = DEFAULT_CONV;
4632 o_zconv = no_connection;
4633 o_fconv = no_connection;
4634 o_crconv = no_connection;
4635 o_rot_conv = no_connection;
4636 o_hira_conv = no_connection;
4637 o_base64conv = no_connection;
4638 o_iso2022jp_check_conv = no_connection;
4641 i_ungetc = std_ungetc;
4643 i_bungetc = std_ungetc;
4646 i_mungetc = std_ungetc;
4647 i_mgetc_buf = std_getc;
4648 i_mungetc_buf = std_ungetc;
4649 output_mode = ASCII;
4652 mime_decode_mode = FALSE;
4658 z_prev2=0,z_prev1=0;
4660 iconv_for_check = 0;
4662 input_codename = "";
4670 no_connection(c2,c1)
4673 no_connection2(c2,c1,0);
4677 no_connection2(c2,c1,c0)
4680 fprintf(stderr,"nkf internal module connection failure.\n");
4682 return 0; /* LINT */
4687 #define fprintf dllprintf
4692 fprintf(stderr,"USAGE: nkf(nkf32,wnkf,nkf2) -[flags] [in file] .. [out file for -O flag]\n");
4693 fprintf(stderr,"Flags:\n");
4694 fprintf(stderr,"b,u Output is buffered (DEFAULT),Output is unbuffered\n");
4695 #ifdef DEFAULT_CODE_SJIS
4696 fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit, Shift JIS (DEFAULT), AT&T JIS (EUC), UTF-8\n");
4698 #ifdef DEFAULT_CODE_JIS
4699 fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit (DEFAULT), Shift JIS, AT&T JIS (EUC), UTF-8\n");
4701 #ifdef DEFAULT_CODE_EUC
4702 fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit, Shift JIS, AT&T JIS (EUC) (DEFAULT), UTF-8\n");
4704 #ifdef DEFAULT_CODE_UTF8
4705 fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit, Shift JIS, AT&T JIS (EUC), UTF-8 (DEFAULT)\n");
4707 #ifdef UTF8_OUTPUT_ENABLE
4708 fprintf(stderr," After 'w' you can add more options. (80?|16((B|L)0?)?) \n");
4710 fprintf(stderr,"J,S,E,W Input assumption is JIS 7 bit , Shift JIS, AT&T JIS (EUC), UTF-8\n");
4711 #ifdef UTF8_INPUT_ENABLE
4712 fprintf(stderr," After 'W' you can add more options. (8|16(B|L)?) \n");
4714 fprintf(stderr,"t no conversion\n");
4715 fprintf(stderr,"i_/o_ Output sequence to designate JIS-kanji/ASCII (DEFAULT B)\n");
4716 fprintf(stderr,"r {de/en}crypt ROT13/47\n");
4717 fprintf(stderr,"h 1 hirakana->katakana, 2 katakana->hirakana,3 both\n");
4718 fprintf(stderr,"v Show this usage. V: show version\n");
4719 fprintf(stderr,"m[BQN0] MIME decode [B:base64,Q:quoted,N:non-strict,0:no decode]\n");
4720 fprintf(stderr,"M[BQ] MIME encode [B:base64 Q:quoted]\n");
4721 fprintf(stderr,"l ISO8859-1 (Latin-1) support\n");
4722 fprintf(stderr,"f/F Folding: -f60 or -f or -f60-10 (fold margin 10) F preserve nl\n");
4723 fprintf(stderr,"Z[0-3] Convert X0208 alphabet to ASCII 1: Kankaku to space,2: 2 spaces,\n");
4724 fprintf(stderr," 3: Convert HTML Entity\n");
4725 fprintf(stderr,"X,x Assume X0201 kana in MS-Kanji, -x preserves X0201\n");
4726 fprintf(stderr,"B[0-2] Broken input 0: missing ESC,1: any X on ESC-[($]-X,2: ASCII on NL\n");
4728 fprintf(stderr,"T Text mode output\n");
4730 fprintf(stderr,"O Output to File (DEFAULT 'nkf.out')\n");
4731 fprintf(stderr,"d,c Delete \\r in line feed and \\032, Add \\r in line feed\n");
4732 fprintf(stderr,"I Convert non ISO-2022-JP charactor to GETA\n");
4733 fprintf(stderr,"-L[uwm] line mode u:LF w:CRLF m:CR (DEFAULT noconversion)\n");
4734 fprintf(stderr,"long name options\n");
4735 fprintf(stderr," --fj,--unix,--mac,--windows convert for the system\n");
4736 fprintf(stderr," --jis,--euc,--sjis,--utf8,--utf16,--mime,--base64 convert for the code\n");
4737 fprintf(stderr," --hiragana, --katakana Hiragana/Katakana Conversion\n");
4738 fprintf(stderr," --x0212 Convert JISX0212\n");
4739 fprintf(stderr," --cp932, --no-cp932 CP932 compatibility\n");
4741 fprintf(stderr," --cap-input, --url-input Convert hex after ':' or '%%'\n");
4743 #ifdef NUMCHAR_OPTION
4744 fprintf(stderr," --numchar-input Convert Unicode Character Reference\n");
4746 #ifdef UTF8_OUTPUT_ENABLE
4747 fprintf(stderr," --ms-ucs-map Microsoft UCS Mapping Compatible\n");
4750 fprintf(stderr," --overwrite Overwrite original listed files by filtered result\n");
4752 fprintf(stderr," -g, --guess Guess the input code\n");
4753 fprintf(stderr," --help,--version\n");
4760 fprintf(stderr,"Network Kanji Filter Version %s (%s) "
4761 #if defined(MSDOS) && !defined(__WIN32__) && !defined(__WIN16__)
4764 #if defined(MSDOS) && defined(__WIN16__)
4767 #if defined(MSDOS) && defined(__WIN32__)
4773 ,NKF_VERSION,NKF_RELEASE_DATE);
4774 fprintf(stderr,"\n%s\n",CopyRight);
4779 **
\e$B%Q%C%A@):n<T
\e(B
4780 ** void@merope.pleiades.or.jp (Kusakabe Youichi)
4781 ** NIDE Naoyuki <nide@ics.nara-wu.ac.jp>
4782 ** ohta@src.ricoh.co.jp (Junn Ohta)
4783 ** inouet@strl.nhk.or.jp (Tomoyuki Inoue)
4784 ** kiri@pulser.win.or.jp (Tetsuaki Kiriyama)
4785 ** Kimihiko Sato <sato@sail.t.u-tokyo.ac.jp>
4786 ** a_kuroe@kuroe.aoba.yokohama.jp (Akihiko Kuroe)
4787 ** kono@ie.u-ryukyu.ac.jp (Shinji Kono)
4788 ** GHG00637@nifty-serve.or.jp (COW)