1 /** Network Kanji Filter. (PDS Version)
2 ************************************************************************
3 ** Copyright (C) 1987, Fujitsu LTD. (Itaru ICHIKAWA)
4 **
\e$BO"Mm@h!'
\e(B
\e$B!J3t!KIY;NDL8&5f=j!!%=%U%H#38&!!;T@n!!;j
\e(B
5 **
\e$B!J
\e(BE-Mail Address: ichikawa@flab.fujitsu.co.jp
\e$B!K
\e(B
6 ** Copyright (C) 1996,1998
8 **
\e$BO"Mm@h!'
\e(B
\e$BN05eBg3X>pJs9)3X2J
\e(B
\e$B2OLn
\e(B
\e$B??<#
\e(B mime/X0208 support
9 **
\e$B!J
\e(BE-Mail Address: kono@ie.u-ryukyu.ac.jp
\e$B!K
\e(B
10 **
\e$BO"Mm@h!'
\e(B COW for DOS & Win16 & Win32 & OS/2
11 **
\e$B!J
\e(BE-Mail Address: GHG00637@niftyserve.or.p
\e$B!K
\e(B
13 **
\e$B$3$N%=!<%9$N$$$+$J$kJ#<L!$2~JQ!$=$@5$b5vBz$7$^$9!#$?$@$7!"
\e(B
14 **
\e$B$=$N:]$K$O!"C/$,9W8%$7$?$r<($9$3$NItJ,$r;D$9$3$H!#
\e(B
15 **
\e$B:FG[I[$d;(;o$NIUO?$J$I$NLd$$9g$o$;$bI,MW$"$j$^$;$s!#
\e(B
16 **
\e$B1DMxMxMQ$b>e5-$KH?$7$J$$HO0O$G5v2D$7$^$9!#
\e(B
17 **
\e$B%P%$%J%j$NG[I[$N:]$K$O
\e(Bversion message
\e$B$rJ]B8$9$k$3$H$r>r7o$H$7$^$9!#
\e(B
18 **
\e$B$3$N%W%m%0%i%`$K$D$$$F$OFC$K2?$NJ]>Z$b$7$J$$!"0-$7$+$i$:!#
\e(B
20 ** Everyone is permitted to do anything on this program
21 ** including copying, modifying, improving,
22 ** as long as you don't try to pretend that you wrote it.
23 ** i.e., the above copyright notice has to appear in all copies.
24 ** Binary distribution requires original version messages.
25 ** You don't have to ask before copying, redistribution or publishing.
26 ** THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE.
27 ***********************************************************************/
29 /***********************************************************************
30 ** UTF-8
\e$B%5%]!<%H$K$D$$$F
\e(B
31 **
\e$B=>Mh$N
\e(B nkf
\e$B$HF~$l$+$($F$=$N$^$^;H$($k$h$&$K$J$C$F$$$^$9
\e(B
32 ** nkf -e
\e$B$J$I$H$7$F5/F0$9$k$H!"<+F0H=JL$G
\e(B UTF-8
\e$B$HH=Dj$5$l$l$P!"
\e(B
33 **
\e$B$=$N$^$^
\e(B euc-jp
\e$B$KJQ49$5$l$^$9
\e(B
35 **
\e$B$^$@%P%0$,$"$k2DG=@-$,9b$$$G$9!#
\e(B
36 ** (
\e$BFC$K<+F0H=JL!"%3!<%I:.:_!"%(%i!<=hM}7O
\e(B)
38 **
\e$B2?$+LdBj$r8+$D$1$?$i!"
\e(B
39 ** E-Mail: furukawa@tcp-ip.or.jp
40 **
\e$B$^$G8fO"Mm$r$*4j$$$7$^$9!#
\e(B
41 ***********************************************************************/
44 static char *CopyRight =
45 "Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa),2000 S. Kono, COW, 2002-2003 Kono, Furukawa";
46 static char *Version =
48 static char *Patchlevel =
55 ** USAGE: nkf [flags] [file]
58 ** b Output is bufferred (DEFAULT)
59 ** u Output is unbufferred
63 ** j Outout code is JIS 7 bit (DEFAULT SELECT)
64 ** s Output code is MS Kanji (DEFAULT SELECT)
65 ** e Output code is AT&T JIS (DEFAULT SELECT)
66 ** w Output code is AT&T JIS (DEFAULT SELECT)
67 ** l Output code is JIS 7bit and ISO8859-1 Latin-1
69 ** m MIME conversion for ISO-2022-JP
70 ** I Convert non ISO-2022-JP charactor to GETA by Pekoe <pekoe@lair.net>
71 ** i_ Output sequence to designate JIS-kanji (DEFAULT_J)
72 ** o_ Output sequence to designate single-byte roman characters (DEFAULT_R)
73 ** M MIME output conversion
75 ** r {de/en}crypt ROT13/47
79 ** T Text mode output (for MS-DOS)
81 ** x Do not convert X0201 kana into X0208
82 ** Z Convert X0208 alphabet to ASCII
87 ** B try to fix broken JIS, missing Escape
88 ** B[1-9] broken level
90 ** O Output to 'nkf.out' file or last file name
91 ** d Delete \r in line feed
92 ** c Add \r in line feed
93 ** -- other long option
94 ** -- ignore following option (don't use with -O )
98 #if (defined(__TURBOC__) || defined(LSI_C)) && !defined(MSDOS)
113 #if defined(MSDOS) || defined(__OS2__)
120 #define setbinmode(fp) fsetbin(fp)
121 #else /* Microsoft C, Turbo C */
122 #define setbinmode(fp) setmode(fileno(fp), O_BINARY)
124 #else /* UNIX,OS/2 */
125 #define setbinmode(fp)
128 #ifdef _IOFBF /* SysV and MSDOS */
129 #define setvbuffer(fp, buf, size) setvbuf(fp, buf, _IOFBF, size)
131 #define setvbuffer(fp, buf, size) setbuffer(fp, buf, size)
134 /*Borland C++ 4.5 EasyWin*/
135 #if defined(__TURBOC__) && defined(_Windows) && !defined(__WIN32__) /*Easy Win */
141 /* added by satoru@isoternet.org */
142 #include <sys/stat.h>
152 /* state of output_mode and input_mode
169 /* Input Assumption */
173 #define LATIN1_INPUT 6
175 #define STRICT_MIME 8
180 #define JAPANESE_EUC 10
184 #define UTF8_INPUT 13
185 #define UTF16_INPUT 14
186 #define UTF16BE_INPUT 15
204 #define is_alnum(c) \
205 (('a'<=c && c<='z')||('A'<= c && c<='Z')||('0'<=c && c<='9'))
207 #define HOLD_SIZE 1024
208 #define IOBUF_SIZE 16384
210 #define DEFAULT_J 'B'
211 #define DEFAULT_R 'B'
213 #define SJ0162 0x00e1 /* 01 - 62 ku offset */
214 #define SJ6394 0x0161 /* 63 - 94 ku offset */
216 #define RANGE_NUM_MAX 18
221 #if defined( UTF8_OUTPUT_ENABLE ) || defined( UTF8_INPUT_ENABLE )
222 #define sizeof_euc_utf8 94
223 #define sizeof_euc_to_utf8_1byte 94
224 #define sizeof_euc_to_utf8_2bytes 94
225 #define sizeof_utf8_to_euc_C2 64
226 #define sizeof_utf8_to_euc_E5B8 64
227 #define sizeof_utf8_to_euc_2bytes 112
228 #define sizeof_utf8_to_euc_3bytes 112
231 /* MIME preprocessor */
234 #ifdef EASYWIN /*Easy Win */
235 extern POINT _BufferSize;
238 /* function prototype */
240 #ifdef ANSI_C_PROTOTYPE
242 #define STATIC static
254 void (*status_func)PROTO((struct input_code *, int));
255 int (*iconv_func)PROTO((int c2, int c1, int c0));
258 STATIC int noconvert PROTO((FILE *f));
259 STATIC int kanji_convert PROTO((FILE *f));
260 STATIC int h_conv PROTO((FILE *f,int c2,int c1));
261 STATIC int push_hold_buf PROTO((int c2));
262 STATIC void set_iconv PROTO((int f, int (*iconv_func)()));
263 STATIC int s_iconv PROTO((int c2,int c1,int c0));
264 STATIC int s2e_conv PROTO((int c2, int c1, int *p2, int *p1));
265 STATIC int e_iconv PROTO((int c2,int c1,int c0));
266 #ifdef UTF8_INPUT_ENABLE
267 STATIC int w2e_conv PROTO((int c2,int c1,int c0,int *p2,int *p1));
268 STATIC int w_iconv PROTO((int c2,int c1,int c0));
269 STATIC int w_iconv16 PROTO((int c2,int c1,int c0));
270 STATIC int w_iconv_common PROTO((int c1,int c0,unsigned short **pp,int psize,int *p2,int *p1));
272 #ifdef UTF8_OUTPUT_ENABLE
273 STATIC int e2w_conv PROTO((int c2,int c1));
274 STATIC void w_oconv PROTO((int c2,int c1));
275 STATIC void w_oconv16 PROTO((int c2,int c1));
277 STATIC void e_oconv PROTO((int c2,int c1));
278 STATIC void e2s_conv PROTO((int c2, int c1, int *p2, int *p1));
279 STATIC void s_oconv PROTO((int c2,int c1));
280 STATIC void j_oconv PROTO((int c2,int c1));
281 STATIC void fold_conv PROTO((int c2,int c1));
282 STATIC void cr_conv PROTO((int c2,int c1));
283 STATIC void z_conv PROTO((int c2,int c1));
284 STATIC void rot_conv PROTO((int c2,int c1));
285 STATIC void hira_conv PROTO((int c2,int c1));
286 STATIC void base64_conv PROTO((int c2,int c1));
287 STATIC void iso2022jp_check_conv PROTO((int c2,int c1));
288 STATIC void no_connection PROTO((int c2,int c1));
289 STATIC int no_connection2 PROTO((int c2,int c1,int c0));
291 STATIC void code_score PROTO((struct input_code *ptr));
292 STATIC void code_status PROTO((int c));
294 STATIC void std_putc PROTO((int c));
295 STATIC int std_getc PROTO((FILE *f));
296 STATIC int std_ungetc PROTO((int c,FILE *f));
298 STATIC int broken_getc PROTO((FILE *f));
299 STATIC int broken_ungetc PROTO((int c,FILE *f));
301 STATIC int mime_begin PROTO((FILE *f));
302 STATIC int mime_getc PROTO((FILE *f));
303 STATIC int mime_ungetc PROTO((int c,FILE *f));
305 STATIC int mime_begin_strict PROTO((FILE *f));
306 STATIC int mime_getc_buf PROTO((FILE *f));
307 STATIC int mime_ungetc_buf PROTO((int c,FILE *f));
308 STATIC int mime_integrity PROTO((FILE *f,unsigned char *p));
310 STATIC int base64decode PROTO((int c));
311 STATIC void mime_putc PROTO((int c));
312 STATIC void open_mime PROTO((int c));
313 STATIC void close_mime PROTO(());
314 STATIC void usage PROTO(());
315 STATIC void version PROTO(());
316 STATIC void options PROTO((unsigned char *c));
318 STATIC void reinit PROTO(());
323 static unsigned char stdibuf[IOBUF_SIZE];
324 static unsigned char stdobuf[IOBUF_SIZE];
325 static unsigned char hold_buf[HOLD_SIZE*2];
326 static int hold_count;
328 /* MIME preprocessor fifo */
330 #define MIME_BUF_SIZE (1024) /* 2^n ring buffer */
331 #define MIME_BUF_MASK (MIME_BUF_SIZE-1)
332 #define Fifo(n) mime_buf[(n)&MIME_BUF_MASK]
333 static unsigned char mime_buf[MIME_BUF_SIZE];
334 static unsigned int mime_top = 0;
335 static unsigned int mime_last = 0; /* decoded */
336 static unsigned int mime_input = 0; /* undecoded */
339 static int unbuf_f = FALSE;
340 static int estab_f = FALSE;
341 static int nop_f = FALSE;
342 static int binmode_f = TRUE; /* binary mode */
343 static int rot_f = FALSE; /* rot14/43 mode */
344 static int hira_f = FALSE; /* hira/kata henkan */
345 static int input_f = FALSE; /* non fixed input code */
346 static int alpha_f = FALSE; /* convert JIx0208 alphbet to ASCII */
347 static int mime_f = STRICT_MIME; /* convert MIME B base64 or Q */
348 static int mimebuf_f = FALSE; /* MIME buffered input */
349 static int broken_f = FALSE; /* convert ESC-less broken JIS */
350 static int iso8859_f = FALSE; /* ISO8859 through */
351 static int mimeout_f = FALSE; /* base64 mode */
352 #if defined(MSDOS) || defined(__OS2__)
353 static int x0201_f = TRUE; /* Assume JISX0201 kana */
355 static int x0201_f = NO_X0201; /* Assume NO JISX0201 */
357 static int iso2022jp_f = FALSE; /* convert ISO-2022-JP */
358 #ifdef UTF8_OUTPUT_ENABLE
359 static int w_oconv16_begin_f= 0; /* utf-16 header */
364 static int cap_f = FALSE;
365 static int (*i_cgetc)PROTO((FILE *)) = std_getc; /* input of cgetc */
366 static int (*i_cungetc)PROTO((int c ,FILE *f)) = std_ungetc;
367 STATIC int cap_getc PROTO((FILE *f));
368 STATIC int cap_ungetc PROTO((int c,FILE *f));
370 static int url_f = FALSE;
371 static int (*i_ugetc)PROTO((FILE *)) = std_getc; /* input of ugetc */
372 static int (*i_uungetc)PROTO((int c ,FILE *f)) = std_ungetc;
373 STATIC int url_getc PROTO((FILE *f));
374 STATIC int url_ungetc PROTO((int c,FILE *f));
376 static int numchar_f = FALSE;
377 static int (*i_ngetc)PROTO((FILE *)) = std_getc; /* input of ugetc */
378 static int (*i_nungetc)PROTO((int c ,FILE *f)) = std_ungetc;
379 STATIC int numchar_getc PROTO((FILE *f));
380 STATIC int numchar_ungetc PROTO((int c,FILE *f));
384 static int noout_f = FALSE;
385 STATIC void no_putc PROTO((int c));
386 static int debug_f = FALSE;
387 STATIC void debug PROTO((char *str));
390 #ifdef SHIFTJIS_CP932
391 STATIC int cp932_f = FALSE;
392 #define CP932_TABLE_BEGIN (0xfa)
393 #define CP932_TABLE_END (0xfc)
395 #endif /* SHIFTJIS_CP932 */
397 STATIC void e_status PROTO((struct input_code *, int));
398 STATIC void s_status PROTO((struct input_code *, int));
400 #ifdef UTF8_INPUT_ENABLE
401 STATIC void w_status PROTO((struct input_code *, int));
402 static int utf16_mode = UTF16_INPUT;
405 struct input_code input_code_list[] = {
406 {"EUC-JP", 0, 0, 0, {0, 0, 0}, e_status, e_iconv},
407 {"Shift_JIS", 0, 0, 0, {0, 0, 0}, s_status, s_iconv},
408 {"UTF-8", 0, 0, 0, {0, 0, 0}, w_status, w_iconv},
412 static int mimeout_mode = 0;
413 static int base64_count = 0;
415 /* X0208 -> ASCII converter */
418 static int f_line = 0; /* chars in line */
419 static int f_prev = 0;
420 static int fold_preserve_f = FALSE; /* preserve new lines */
421 static int fold_f = FALSE;
422 static int fold_len = 0;
425 static unsigned char kanji_intro = DEFAULT_J,
426 ascii_intro = DEFAULT_R;
430 #define FOLD_MARGIN 10
431 #define DEFAULT_FOLD 60
433 static int fold_margin = FOLD_MARGIN;
437 #ifdef DEFAULT_CODE_JIS
438 # define DEFAULT_CONV j_oconv
440 #ifdef DEFAULT_CODE_SJIS
441 # define DEFAULT_CONV s_oconv
443 #ifdef DEFAULT_CODE_EUC
444 # define DEFAULT_CONV e_oconv
446 #ifdef DEFAULT_CODE_UTF8
447 # define DEFAULT_CONV w_oconv
450 /* process default */
451 static void (*output_conv)PROTO((int c2,int c1)) = DEFAULT_CONV;
453 static void (*oconv)PROTO((int c2,int c1)) = no_connection;
454 /* s_iconv or oconv */
455 static int (*iconv)PROTO((int c2,int c1,int c0)) = no_connection2;
457 static void (*o_zconv)PROTO((int c2,int c1)) = no_connection;
458 static void (*o_fconv)PROTO((int c2,int c1)) = no_connection;
459 static void (*o_crconv)PROTO((int c2,int c1)) = no_connection;
460 static void (*o_rot_conv)PROTO((int c2,int c1)) = no_connection;
461 static void (*o_hira_conv)PROTO((int c2,int c1)) = no_connection;
462 static void (*o_base64conv)PROTO((int c2,int c1)) = no_connection;
463 static void (*o_iso2022jp_check_conv)PROTO((int c2,int c1)) = no_connection;
465 /* static redirections */
467 static void (*o_putc)PROTO((int c)) = std_putc;
469 static int (*i_getc)PROTO((FILE *f)) = std_getc; /* general input */
470 static int (*i_ungetc)PROTO((int c,FILE *f)) =std_ungetc;
472 static int (*i_bgetc)PROTO((FILE *)) = std_getc; /* input of mgetc */
473 static int (*i_bungetc)PROTO((int c ,FILE *f)) = std_ungetc;
475 static void (*o_mputc)PROTO((int c)) = std_putc ; /* output of mputc */
477 static int (*i_mgetc)PROTO((FILE *)) = std_getc; /* input of mgetc */
478 static int (*i_mungetc)PROTO((int c ,FILE *f)) = std_ungetc;
480 /* for strict mime */
481 static int (*i_mgetc_buf)PROTO((FILE *)) = std_getc; /* input of mgetc_buf */
482 static int (*i_mungetc_buf)PROTO((int c,FILE *f)) = std_ungetc;
485 static int output_mode = ASCII, /* output kanji mode */
486 input_mode = ASCII, /* input kanji mode */
487 shift_mode = FALSE; /* TRUE shift out, or X0201 */
488 static int mime_decode_mode = FALSE; /* MIME mode B base64, Q hex */
490 /* X0201 / X0208 conversion tables */
492 /* X0201 kana conversion table */
495 unsigned char cv[]= {
496 0x21,0x21,0x21,0x23,0x21,0x56,0x21,0x57,
497 0x21,0x22,0x21,0x26,0x25,0x72,0x25,0x21,
498 0x25,0x23,0x25,0x25,0x25,0x27,0x25,0x29,
499 0x25,0x63,0x25,0x65,0x25,0x67,0x25,0x43,
500 0x21,0x3c,0x25,0x22,0x25,0x24,0x25,0x26,
501 0x25,0x28,0x25,0x2a,0x25,0x2b,0x25,0x2d,
502 0x25,0x2f,0x25,0x31,0x25,0x33,0x25,0x35,
503 0x25,0x37,0x25,0x39,0x25,0x3b,0x25,0x3d,
504 0x25,0x3f,0x25,0x41,0x25,0x44,0x25,0x46,
505 0x25,0x48,0x25,0x4a,0x25,0x4b,0x25,0x4c,
506 0x25,0x4d,0x25,0x4e,0x25,0x4f,0x25,0x52,
507 0x25,0x55,0x25,0x58,0x25,0x5b,0x25,0x5e,
508 0x25,0x5f,0x25,0x60,0x25,0x61,0x25,0x62,
509 0x25,0x64,0x25,0x66,0x25,0x68,0x25,0x69,
510 0x25,0x6a,0x25,0x6b,0x25,0x6c,0x25,0x6d,
511 0x25,0x6f,0x25,0x73,0x21,0x2b,0x21,0x2c,
515 /* X0201 kana conversion table for daguten */
518 unsigned char dv[]= {
519 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
520 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
521 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
522 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
523 0x00,0x00,0x00,0x00,0x00,0x00,0x25,0x74,
524 0x00,0x00,0x00,0x00,0x25,0x2c,0x25,0x2e,
525 0x25,0x30,0x25,0x32,0x25,0x34,0x25,0x36,
526 0x25,0x38,0x25,0x3a,0x25,0x3c,0x25,0x3e,
527 0x25,0x40,0x25,0x42,0x25,0x45,0x25,0x47,
528 0x25,0x49,0x00,0x00,0x00,0x00,0x00,0x00,
529 0x00,0x00,0x00,0x00,0x25,0x50,0x25,0x53,
530 0x25,0x56,0x25,0x59,0x25,0x5c,0x00,0x00,
531 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
532 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
533 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
534 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
537 /* X0201 kana conversion table for han-daguten */
540 unsigned char ev[]= {
541 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
542 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
543 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
544 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
545 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
546 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
547 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
548 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
549 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
550 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
551 0x00,0x00,0x00,0x00,0x25,0x51,0x25,0x54,
552 0x25,0x57,0x25,0x5a,0x25,0x5d,0x00,0x00,
553 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
554 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
555 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
556 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
560 /* X0208 kigou conversion table */
561 /* 0x8140 - 0x819e */
563 unsigned char fv[] = {
565 0x00,0x00,0x00,0x00,0x2c,0x2e,0x00,0x3a,
566 0x3b,0x3f,0x21,0x00,0x00,0x27,0x60,0x00,
567 0x5e,0x00,0x5f,0x00,0x00,0x00,0x00,0x00,
568 0x00,0x00,0x00,0x00,0x00,0x2d,0x00,0x2f,
569 0x5c,0x00,0x00,0x7c,0x00,0x00,0x60,0x27,
570 0x22,0x22,0x28,0x29,0x00,0x00,0x5b,0x5d,
571 0x7b,0x7d,0x3c,0x3e,0x00,0x00,0x00,0x00,
572 0x00,0x00,0x00,0x00,0x2b,0x2d,0x00,0x00,
573 0x00,0x3d,0x00,0x3c,0x3e,0x00,0x00,0x00,
574 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
575 0x24,0x00,0x00,0x25,0x23,0x26,0x2a,0x40,
576 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
582 static int file_out = FALSE;
584 static int overwrite = FALSE;
587 static int crmode_f = 0; /* CR, NL, CRLF */
588 #ifdef EASYWIN /*Easy Win */
589 static int end_check;
601 #ifdef EASYWIN /*Easy Win */
602 _BufferSize.y = 400;/*Set Scroll Buffer Size*/
605 for (argc--,argv++; (argc > 0) && **argv == '-'; argc--, argv++) {
606 cp = (unsigned char *)*argv;
609 if(x0201_f == WISH_TRUE)
610 x0201_f = ((!iso2022jp_f)? TRUE : NO_X0201);
612 if (binmode_f == TRUE)
614 if (freopen("","wb",stdout) == NULL)
621 setbuf(stdout, (char *) NULL);
623 setvbuffer(stdout, stdobuf, IOBUF_SIZE);
626 if (binmode_f == TRUE)
628 if (freopen("","rb",stdin) == NULL) return (-1);
632 setvbuffer(stdin, stdibuf, IOBUF_SIZE);
636 kanji_convert(stdin);
642 if ((fin = fopen((origfname = *argv++), "r")) == NULL) {
651 /* reopen file for stdout */
652 if (file_out == TRUE) {
655 outfname = malloc(strlen(origfname)
656 + strlen(".nkftmpXXXXXX")
662 strcpy(outfname, origfname);
666 for (i = strlen(outfname); i; --i){
667 if (outfname[i - 1] == '/'
668 || outfname[i - 1] == '\\'){
674 strcat(outfname, "ntXXXXXX");
676 fd = open(outfname, O_WRONLY | O_CREAT | O_TRUNC,
679 strcat(outfname, ".nkftmpXXXXXX");
680 fd = mkstemp(outfname);
683 || (fd_backup = dup(fileno(stdout))) < 0
684 || dup2(fd, fileno(stdout)) < 0
695 outfname = "nkf.out";
698 if(freopen(outfname, "w", stdout) == NULL) {
702 if (binmode_f == TRUE) {
704 if (freopen("","wb",stdout) == NULL)
711 if (binmode_f == TRUE)
713 if (freopen("","rb",fin) == NULL)
718 setvbuffer(fin, stdibuf, IOBUF_SIZE);
735 if (dup2(fd_backup, fileno(stdout)) < 0){
738 if (stat(origfname, &sb)) {
739 fprintf(stderr, "Can't stat %s\n", origfname);
741 /*
\e$B%Q!<%_%C%7%g%s$rI|85
\e(B */
742 if (chmod(outfname, sb.st_mode)) {
743 fprintf(stderr, "Can't set permission %s\n", outfname);
746 tb[0] = tb[1] = sb.st_mtime;
747 /*
\e$B%?%$%`%9%?%s%W$rI|85
\e(B */
748 if (utime(outfname, tb)) {
749 fprintf(stderr, "Can't set timestamp %s\n", outfname);
752 if (unlink(origfname)){
756 tb.actime = sb.st_atime;
757 tb.modtime = sb.st_mtime;
758 /*
\e$B%?%$%`%9%?%s%W$rI|85
\e(B */
759 if (utime(outfname, &tb)) {
760 fprintf(stderr, "Can't set timestamp %s\n", outfname);
763 if (rename(outfname, origfname)) {
765 fprintf(stderr, "Can't rename %s to %s\n",
766 outfname, origfname);
774 #ifdef EASYWIN /*Easy Win */
775 if (file_out == FALSE)
776 scanf("%d",&end_check);
779 #else /* for Other OS */
780 if (file_out == TRUE)
810 {"katakana-hiragana","h3"},
811 #ifdef UTF8_OUTPUT_ENABLE
815 #ifdef UTF8_INPUT_ENABLE
817 {"utf16-input", "W16"},
825 {"numchar-input", ""},
831 #ifdef SHIFTJIS_CP932
836 static int option_mode;
851 case '-': /* literal options */
852 if (!*cp) { /* ignore the rest of arguments */
856 for (i=0;i<sizeof(long_option)/sizeof(long_option[0]);i++) {
858 p = (unsigned char *)long_option[i].name;
859 for (j=0;*p && *p++ == cp[j];j++);
860 if (! *p && !cp[j]) break;
863 cp = (unsigned char *)long_option[i].alias;
866 if (strcmp(long_option[i].name, "overwrite") == 0){
873 if (strcmp(long_option[i].name, "cap-input") == 0){
877 if (strcmp(long_option[i].name, "url-input") == 0){
881 if (strcmp(long_option[i].name, "numchar-input") == 0){
887 if (strcmp(long_option[i].name, "no-output") == 0){
891 if (strcmp(long_option[i].name, "debug") == 0){
896 #ifdef SHIFTJIS_CP932
897 if (strcmp(long_option[i].name, "cp932") == 0){
904 case 'b': /* buffered mode */
907 case 'u': /* non bufferd mode */
910 case 't': /* transparent mode */
913 case 'j': /* JIS output */
915 output_conv = j_oconv;
917 case 'e': /* AT&T EUC output */
918 output_conv = e_oconv;
920 case 's': /* SJIS output */
921 output_conv = s_oconv;
923 case 'l': /* ISO8859 Latin-1 support, no conversion */
924 iso8859_f = TRUE; /* Only compatible with ISO-2022-JP */
925 input_f = LATIN1_INPUT;
927 case 'i': /* Kanji IN ESC-$-@/B */
928 if (*cp=='@'||*cp=='B')
931 case 'o': /* ASCII IN ESC-(-J/B */
932 if (*cp=='J'||*cp=='B'||*cp=='H')
940 if ('9'>= *cp && *cp>='0')
941 hira_f |= (*cp++ -'0');
948 #if defined(MSDOS) || defined(__OS2__)
963 #ifdef UTF8_OUTPUT_ENABLE
964 case 'w': /* UTF-8 output */
965 if ('1'== cp[0] && '6'==cp[1]) {
966 output_conv = w_oconv16; cp+=2;
968 w_oconv16_begin_f=2; cp++;
971 output_conv = w_oconv;
974 #ifdef UTF8_INPUT_ENABLE
975 case 'W': /* UTF-8 input */
976 if ('1'== cp[0] && '6'==cp[1]) {
977 input_f = UTF16_INPUT;
979 input_f = UTF8_INPUT;
982 /* Input code assumption */
983 case 'J': /* JIS input */
984 case 'E': /* AT&T EUC input */
987 case 'S': /* MS Kanji input */
988 input_f = SJIS_INPUT;
989 if (x0201_f==NO_X0201) x0201_f=TRUE;
991 case 'Z': /* Convert X0208 alphabet to asii */
992 /* bit:0 Convert X0208
993 bit:1 Convert Kankaku to one space
994 bit:2 Convert Kankaku to two spaces
995 bit:3 Convert HTML Entity
997 if ('9'>= *cp && *cp>='0')
998 alpha_f |= 1<<(*cp++ -'0');
1002 case 'x': /* Convert X0201 kana to X0208 or X0201 Conversion */
1003 x0201_f = FALSE; /* No X0201->X0208 conversion */
1005 ESC-(-I in JIS, EUC, MS Kanji
1006 SI/SO in JIS, EUC, MS Kanji
1007 SSO in EUC, JIS, not in MS Kanji
1008 MS Kanji (0xa0-0xdf)
1010 ESC-(-I in JIS (0x20-0x5f)
1011 SSO in EUC (0xa0-0xdf)
1012 0xa0-0xd in MS Kanji (0xa0-0xdf)
1015 case 'X': /* Assume X0201 kana */
1016 /* Default value is NO_X0201 for EUC/MS-Kanji mix */
1019 case 'F': /* prserve new lines */
1020 fold_preserve_f = TRUE;
1021 case 'f': /* folding -f60 or -f */
1024 while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
1026 fold_len += *cp++ - '0';
1028 if (!(0<fold_len && fold_len<BUFSIZ))
1029 fold_len = DEFAULT_FOLD;
1033 while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
1035 fold_margin += *cp++ - '0';
1039 case 'm': /* MIME support */
1040 if (*cp=='B'||*cp=='Q') {
1041 mime_decode_mode = *cp++;
1042 mimebuf_f = FIXED_MIME;
1043 } else if (*cp=='N') {
1044 mime_f = TRUE; cp++;
1045 } else if (*cp=='S') {
1046 mime_f = STRICT_MIME; cp++;
1047 } else if (*cp=='0') {
1048 mime_f = FALSE; cp++;
1051 case 'M': /* MIME output */
1054 mimeout_f = FIXED_MIME; cp++;
1055 } else if (*cp=='Q') {
1057 mimeout_f = FIXED_MIME; cp++;
1062 case 'B': /* Broken JIS support */
1064 bit:1 allow any x on ESC-(-x or ESC-$-x
1065 bit:2 reset to ascii on NL
1067 if ('9'>= *cp && *cp>='0')
1068 broken_f |= 1<<(*cp++ -'0');
1073 case 'O':/* for Output file */
1077 case 'c':/* add cr code */
1080 case 'd':/* delete cr code */
1083 case 'I': /* ISO-2022-JP output */
1086 case 'L': /* line mode */
1087 if (*cp=='u') { /* unix */
1088 crmode_f = NL; cp++;
1089 } else if (*cp=='m') { /* mac */
1090 crmode_f = CR; cp++;
1091 } else if (*cp=='w') { /* windows */
1092 crmode_f = CRLF; cp++;
1093 } else if (*cp=='0') { /* no conversion */
1098 /* module muliple options in a string are allowed for Perl moudle */
1099 while(*cp && *cp!='-') cp++;
1103 /* bogus option but ignored */
1109 #ifdef ANSI_C_PROTOTYPE
1110 void set_iconv(int f, int (*iconv_func)(int c2,int c1,int c0))
1112 void set_iconv(f, iconv_func)
1114 int (*iconv_func)();
1118 static int (*iconv_for_check)() = 0;
1120 #ifdef INPUT_CODE_FIX
1128 #ifdef INPUT_CODE_FIX
1129 && (f == -TRUE || !input_f) /* -TRUE means "FORCE" */
1135 if (estab_f && iconv_for_check != iconv){
1136 #ifdef UTF8_INPUT_ENABLE
1137 if (iconv == w_iconv) debug("UTF-8\n");
1138 if (iconv == w_iconv16) debug("UTF-16\n");
1140 if (iconv == s_iconv) debug("Shift_JIS\n");
1141 if (iconv == e_iconv) debug("EUC-JP\n");
1142 iconv_for_check = iconv;
1147 #define SCORE_KANA (1) /*
\e$B$$$o$f$kH>3Q%+%J
\e(B */
1148 #define SCORE_DEPEND (SCORE_KANA << 1) /*
\e$B5!<o0MB8J8;z
\e(B */
1149 #ifdef SHIFTJIS_CP932
1150 #define SCORE_CP932 (SCORE_DEPEND << 1) /* CP932
\e$B$K$h$kFI$_49$(
\e(B */
1151 #define SCORE_NO_EXIST (SCORE_CP932 << 1) /*
\e$BB8:_$7$J$$J8;z
\e(B */
1153 #define SCORE_NO_EXIST (SCORE_DEPEND << 1) /*
\e$BB8:_$7$J$$J8;z
\e(B */
1155 #define SCORE_ERROR (SCORE_NO_EXIST << 1) /*
\e$B%(%i!<
\e(B */
1156 int score_table_A0[] = {
1159 0, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND,
1160 SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_NO_EXIST,
1163 int score_table_F0[] = {
1165 0, SCORE_DEPEND, SCORE_NO_EXIST, SCORE_NO_EXIST,
1166 SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND,
1167 SCORE_DEPEND, SCORE_NO_EXIST, SCORE_NO_EXIST, SCORE_ERROR,
1170 void set_code_score(ptr, score)
1171 struct input_code *ptr;
1174 ptr->score |= score;
1177 void code_score(ptr)
1178 struct input_code *ptr;
1180 int c2 = ptr->buf[0];
1181 int c1 = ptr->buf[1];
1183 set_code_score(ptr, SCORE_ERROR);
1184 }else if ((c2 & 0xf0) == 0xa0){
1185 set_code_score(ptr, score_table_A0[c2 & 0x0f]);
1186 }else if ((c2 & 0xf0) == 0xf0){
1187 set_code_score(ptr, score_table_F0[c2 & 0x0f]);
1188 }else if (c2 == SSO){
1189 set_code_score(ptr, SCORE_KANA);
1191 #ifdef UTF8_OUTPUT_ENABLE
1192 else if (!e2w_conv(c2, c1)){
1193 set_code_score(ptr, SCORE_NO_EXIST);
1198 void status_disable(ptr)
1199 struct input_code *ptr;
1204 if (iconv == ptr->iconv_func) set_iconv(FALSE, 0);
1207 void status_push_ch(ptr, c)
1208 struct input_code *ptr;
1211 ptr->buf[ptr->index++] = c;
1214 void status_reset(ptr)
1215 struct input_code *ptr;
1222 void status_check(ptr, c)
1223 struct input_code *ptr;
1226 if (c <= DEL && estab_f){
1231 void s_status(ptr, c)
1232 struct input_code *ptr;
1237 status_check(ptr, c);
1242 }else if (0xa1 <= c && c <= 0xdf){
1243 status_push_ch(ptr, SSO);
1244 status_push_ch(ptr, c);
1247 }else if ((0x81 <= c && c < 0xa0) || (0xe0 <= c && c <= 0xef)){
1249 status_push_ch(ptr, c);
1250 #ifdef SHIFTJIS_CP932
1252 && CP932_TABLE_BEGIN <= c && c <= CP932_TABLE_END){
1254 status_push_ch(ptr, c);
1255 #endif /* SHIFTJIS_CP932 */
1257 status_disable(ptr);
1261 if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
1262 status_push_ch(ptr, c);
1263 s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]);
1267 status_disable(ptr);
1270 #ifdef SHIFTJIS_CP932
1272 if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
1273 status_push_ch(ptr, c);
1274 if (s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]) == 0){
1275 set_code_score(ptr, SCORE_CP932);
1280 status_disable(ptr);
1282 #endif /* SHIFTJIS_CP932 */
1286 void e_status(ptr, c)
1287 struct input_code *ptr;
1292 status_check(ptr, c);
1297 }else if (SSO == c || (0xa1 <= c && c <= 0xfe)){
1299 status_push_ch(ptr, c);
1301 status_disable(ptr);
1305 if (0xa1 <= c && c <= 0xfe){
1306 status_push_ch(ptr, c);
1310 status_disable(ptr);
1316 #ifdef UTF8_INPUT_ENABLE
1317 void w_status(ptr, c)
1318 struct input_code *ptr;
1323 status_check(ptr, c);
1328 }else if (0xc0 <= c && c <= 0xdf){
1330 status_push_ch(ptr, c);
1331 }else if (0xe0 <= c && c <= 0xef){
1333 status_push_ch(ptr, c);
1335 status_disable(ptr);
1340 if (0x80 <= c && c <= 0xbf){
1341 status_push_ch(ptr, c);
1342 if (ptr->index > ptr->stat){
1343 w2e_conv(ptr->buf[0], ptr->buf[1], ptr->buf[2],
1344 &ptr->buf[0], &ptr->buf[1]);
1349 status_disable(ptr);
1360 int action_flag = 1;
1361 struct input_code *result = 0;
1362 struct input_code *p = input_code_list;
1364 (p->status_func)(p, c);
1367 }else if(p->stat == 0){
1379 set_iconv(TRUE, result->iconv_func);
1380 }else if (c <= DEL){
1381 struct input_code *ptr = input_code_list;
1391 #define STD_GC_BUFSIZE (256)
1392 int std_gc_buf[STD_GC_BUFSIZE];
1402 return std_gc_buf[--std_gc_ndx];
1414 if (std_gc_ndx == STD_GC_BUFSIZE){
1417 std_gc_buf[std_gc_ndx++] = c;
1437 while ((c = (*i_getc)(f)) != EOF)
1446 oconv = output_conv;
1449 /* replace continucation module, from output side */
1451 /* output redicrection */
1460 if (mimeout_f == TRUE) {
1461 o_base64conv = oconv; oconv = base64_conv;
1463 /* base64_count = 0; */
1467 o_crconv = oconv; oconv = cr_conv;
1470 o_rot_conv = oconv; oconv = rot_conv;
1473 o_iso2022jp_check_conv = oconv; oconv = iso2022jp_check_conv;
1476 o_hira_conv = oconv; oconv = hira_conv;
1479 o_fconv = oconv; oconv = fold_conv;
1482 if (alpha_f || x0201_f) {
1483 o_zconv = oconv; oconv = z_conv;
1487 /* input redicrection */
1490 i_cgetc = i_getc; i_getc = cap_getc;
1491 i_cungetc = i_ungetc; i_ungetc= cap_ungetc;
1494 i_ugetc = i_getc; i_getc = url_getc;
1495 i_uungetc = i_ungetc; i_ungetc= url_ungetc;
1498 i_ngetc = i_getc; i_getc = numchar_getc;
1499 i_nungetc = i_ungetc; i_ungetc= numchar_ungetc;
1502 if (mime_f && mimebuf_f==FIXED_MIME) {
1503 i_mgetc = i_getc; i_getc = mime_getc;
1504 i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
1507 i_bgetc = i_getc; i_getc = broken_getc;
1508 i_bungetc = i_ungetc; i_ungetc = broken_ungetc;
1510 if (input_f == JIS_INPUT || input_f == LATIN1_INPUT) {
1511 set_iconv(-TRUE, e_iconv);
1512 } else if (input_f == SJIS_INPUT) {
1513 set_iconv(-TRUE, s_iconv);
1514 #ifdef UTF8_INPUT_ENABLE
1515 } else if (input_f == UTF8_INPUT) {
1516 set_iconv(-TRUE, w_iconv);
1517 } else if (input_f == UTF16_INPUT) {
1518 set_iconv(-TRUE, w_iconv16);
1521 set_iconv(FALSE, e_iconv);
1525 struct input_code *p = input_code_list;
1533 Conversion main loop. Code detection only.
1543 module_connection();
1548 output_mode = ASCII;
1551 #define NEXT continue /* no output, get next */
1552 #define SEND ; /* output c1 and c2, get next */
1553 #define LAST break /* end of loop, go closing */
1555 while ((c1 = (*i_getc)(f)) != EOF) {
1560 /* in case of 8th bit is on */
1562 /* in case of not established yet */
1563 /* It is still ambiguious */
1564 if (h_conv(f, c2, c1)==EOF)
1570 /* in case of already established */
1572 /* ignore bogus code */
1578 /* second byte, 7 bit code */
1579 /* it might be kanji shitfted */
1580 if ((c1 == DEL) || (c1 <= SPACE)) {
1581 /* ignore bogus first code */
1589 #ifdef UTF8_INPUT_ENABLE
1598 } else if (c1 > DEL) {
1600 if (!estab_f && !iso8859_f) {
1601 /* not established yet */
1604 } else { /* estab_f==TRUE */
1609 } else if (SSP<=c1 && c1<0xe0 && iconv == s_iconv) {
1610 /* SJIS X0201 Case... */
1611 if(iso2022jp_f && x0201_f==NO_X0201) {
1612 (*oconv)(GETA1, GETA2);
1619 } else if (c1==SSO && iconv != s_iconv) {
1620 /* EUC X0201 Case */
1621 c1 = (*i_getc)(f); /* skip SSO */
1623 if (SSP<=c1 && c1<0xe0) {
1624 if(iso2022jp_f && x0201_f==NO_X0201) {
1625 (*oconv)(GETA1, GETA2);
1632 } else { /* bogus code, skip SSO and one byte */
1636 /* already established */
1641 } else if ((c1 > SPACE) && (c1 != DEL)) {
1642 /* in case of Roman characters */
1644 /* output 1 shifted byte */
1648 } else if (SPACE<=c1 && c1<(0xe0&0x7f) ){
1649 /* output 1 shifted byte */
1650 if(iso2022jp_f && x0201_f==NO_X0201) {
1651 (*oconv)(GETA1, GETA2);
1658 /* look like bogus code */
1661 } else if (input_mode == X0208) {
1662 /* in case of Kanji shifted */
1665 } else if (c1 == '=' && mime_f && !mime_decode_mode ) {
1666 /* Check MIME code */
1667 if ((c1 = (*i_getc)(f)) == EOF) {
1670 } else if (c1 == '?') {
1671 /* =? is mime conversion start sequence */
1672 if(mime_f == STRICT_MIME) {
1673 /* check in real detail */
1674 if (mime_begin_strict(f) == EOF)
1678 } else if (mime_begin(f) == EOF)
1688 /* normal ASCII code */
1691 } else if (c1 == SI) {
1694 } else if (c1 == SO) {
1697 } else if (c1 == ESC ) {
1698 if ((c1 = (*i_getc)(f)) == EOF) {
1699 /* (*oconv)(0, ESC); don't send bogus code */
1701 } else if (c1 == '$') {
1702 if ((c1 = (*i_getc)(f)) == EOF) {
1704 (*oconv)(0, ESC); don't send bogus code
1705 (*oconv)(0, '$'); */
1707 } else if (c1 == '@'|| c1 == 'B') {
1708 /* This is kanji introduction */
1712 } else if (c1 == '(') {
1713 if ((c1 = (*i_getc)(f)) == EOF) {
1714 /* don't send bogus code
1720 } else if (c1 == '@'|| c1 == 'B') {
1721 /* This is kanji introduction */
1726 /* could be some special code */
1733 } else if (broken_f&0x2) {
1734 /* accept any ESC-(-x as broken code ... */
1744 } else if (c1 == '(') {
1745 if ((c1 = (*i_getc)(f)) == EOF) {
1746 /* don't send bogus code
1748 (*oconv)(0, '('); */
1752 /* This is X0201 kana introduction */
1753 input_mode = X0201; shift_mode = X0201;
1755 } else if (c1 == 'B' || c1 == 'J' || c1 == 'H') {
1756 /* This is X0208 kanji introduction */
1757 input_mode = ASCII; shift_mode = FALSE;
1759 } else if (broken_f&0x2) {
1760 input_mode = ASCII; shift_mode = FALSE;
1765 /* maintain various input_mode here */
1769 } else if ( c1 == 'N' || c1 == 'n' ){
1771 c1 = (*i_getc)(f); /* skip SS2 */
1772 if ( SPACE<=c1 && c1 < 0xe0 ) {
1781 } else if ((c1 == NL || c1 == CR) && broken_f&4) {
1782 input_mode = ASCII; set_iconv(FALSE, 0);
1788 if (input_mode == X0208)
1789 (*oconv)(c2, c1); /* this is JIS, not SJIS/EUC case */
1790 else if (input_mode)
1791 (*oconv)(input_mode, c1); /* other special case */
1792 else if ((*iconv)(c2, c1, 0) < 0){ /* can be EUC/SJIS */
1793 int c0 = (*i_getc)(f);
1796 (*iconv)(c2, c1, c0);
1802 /* goto next_word */
1806 (*iconv)(EOF, 0, 0);
1819 /** it must NOT be in the kanji shifte sequence */
1820 /** it must NOT be written in JIS7 */
1821 /** and it must be after 2 byte 8bit code */
1828 while ((c1 = (*i_getc)(f)) != EOF) {
1834 if (push_hold_buf(c1) == EOF || estab_f){
1840 struct input_code *p = input_code_list;
1841 struct input_code *result = p;
1843 if (p->score < result->score){
1848 set_iconv(FALSE, p->iconv_func);
1853 ** 1) EOF is detected, or
1854 ** 2) Code is established, or
1855 ** 3) Buffer is FULL (but last word is pushed)
1857 ** in 1) and 3) cases, we continue to use
1858 ** Kanji codes by oconv and leave estab_f unchanged.
1862 while (wc < hold_count){
1863 c2 = hold_buf[wc++];
1867 }else if (iconv == s_iconv && 0xa1 <= c2 && c2 <= 0xdf){
1868 (*iconv)(X0201, c2, 0);
1871 if (wc < hold_count){
1872 c1 = hold_buf[wc++];
1875 if (c1 == EOF) break;
1878 if ((*iconv)(c2, c1, 0) < 0){
1880 if (wc < hold_count){
1881 c0 = hold_buf[wc++];
1884 if (c0 == EOF) break;
1887 (*iconv)(c2, c1, c0);
1901 if (hold_count >= HOLD_SIZE*2)
1903 hold_buf[hold_count++] = c2;
1904 return ((hold_count >= HOLD_SIZE*2) ? EOF : hold_count);
1907 int s2e_conv(c2, c1, p2, p1)
1911 #ifdef SHIFTJIS_CP932
1912 if (CP932_TABLE_BEGIN <= c2 && c2 <= CP932_TABLE_END){
1913 extern unsigned short shiftjis_cp932[3][189];
1914 c1 = shiftjis_cp932[c2 - CP932_TABLE_BEGIN][c1 - 0x40];
1915 if (c1 == 0) return 1;
1919 #endif /* SHIFTJIS_CP932 */
1920 c2 = c2 + c2 - ((c2 <= 0x9f) ? SJ0162 : SJ6394);
1922 c1 = c1 - ((c1 > DEL) ? SPACE : 0x1f);
1939 } else if ((c2 == EOF) || (c2 == 0) || c2 < SPACE) {
1942 int ret = s2e_conv(c2, c1, &c2, &c1);
1943 if (ret) return ret;
1956 } else if (c2 == SSO){
1959 } else if ((c2 == EOF) || (c2 == 0) || c2 < SPACE) {
1969 #ifdef UTF8_INPUT_ENABLE
1971 w2e_conv(c2, c1, c0, p2, p1)
1975 extern unsigned short * utf8_to_euc_2bytes[];
1976 extern unsigned short ** utf8_to_euc_3bytes[];
1978 if (0xc0 <= c2 && c2 <= 0xef) {
1979 unsigned short **pp;
1982 if (c0 == 0) return -1;
1983 pp = utf8_to_euc_3bytes[c2 - 0x80];
1984 return w_iconv_common(c1, c0, pp, sizeof_utf8_to_euc_C2, p2, p1);
1986 return w_iconv_common(c2, c1, utf8_to_euc_2bytes, sizeof_utf8_to_euc_2bytes, p2, p1);
1988 } else if (c2 == X0201) {
2001 int ret = w2e_conv(c2, c1, c0, &c2, &c1);
2009 w16w_conv(val, p2, p1, p0)
2017 }else if (val < 0x800){
2018 *p2 = 0xc0 | (val >> 6);
2019 *p1 = 0x80 | (val & 0x3f);
2022 *p2 = 0xe0 | (val >> 12);
2023 *p1 = 0x80 | ((val >> 6) & 0x3f);
2024 *p0 = 0x80 | (val & 0x3f);
2029 w16e_conv(val, p2, p1)
2033 extern unsigned short * utf8_to_euc_2bytes[];
2034 extern unsigned short ** utf8_to_euc_3bytes[];
2036 unsigned short **pp;
2039 w16w_conv(val, &c2, &c1, &c0);
2042 pp = utf8_to_euc_3bytes[c2 - 0x80];
2043 psize = sizeof_utf8_to_euc_C2;
2045 pp = utf8_to_euc_2bytes;
2046 psize = sizeof_utf8_to_euc_2bytes;
2048 return w_iconv_common(c1, c0, pp, psize, p2, p1);
2054 w_iconv16(c2, c1, c0)
2059 if (c2==0376 && c1==0377){
2060 utf16_mode = UTF16_INPUT;
2062 } else if (c2==0377 && c1==0376){
2063 utf16_mode = UTF16BE_INPUT;
2066 if (utf16_mode == UTF16BE_INPUT) {
2068 tmp=c1; c1=c2; c2=tmp;
2070 if (c2==0 || c2==EOF) {
2074 ret = w16e_conv(((c2<<8)&0xff00) + c1, &c2, &c1);
2075 if (ret) return ret;
2081 w_iconv_common(c1, c0, pp, psize, p2, p1)
2083 unsigned short **pp;
2091 if (pp == 0) return 1;
2094 if (c1 < 0 || psize <= c1) return 1;
2096 if (p == 0) return 1;
2099 if (c0 < 0 || sizeof_utf8_to_euc_E5B8 <= c0) return 1;
2101 if (val == 0) return 1;
2104 if (c2 == SO) c2 = X0201;
2113 #ifdef UTF8_OUTPUT_ENABLE
2118 extern unsigned short euc_to_utf8_1byte[];
2119 extern unsigned short * euc_to_utf8_2bytes[];
2123 p = euc_to_utf8_1byte;
2126 c2 = (c2&0x7f) - 0x21;
2127 if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
2128 p = euc_to_utf8_2bytes[c2];
2133 c1 = (c1 & 0x7f) - 0x21;
2134 if (0<=c1 && c1<sizeof_euc_to_utf8_1byte)
2147 } else if (c2 == 0) {
2148 output_mode = ASCII;
2150 } else if (c2 == ISO8859_1) {
2151 output_mode = ISO8859_1;
2152 (*o_putc)(c1 | 0x080);
2154 unsigned short val = (unsigned short)e2w_conv(c2, c1);
2157 if (0 < val && val < 0x80){
2159 }else if (val < 0x800){
2160 (*o_putc)(0xc0 | (val >> 6));
2161 (*o_putc)(0x80 | (val & 0x3f));
2163 (*o_putc)(0xe0 | (val >> 12));
2164 (*o_putc)(0x80 | ((val >> 6) & 0x3f));
2165 (*o_putc)(0x80 | (val & 0x3f));
2176 if (w_oconv16_begin_f==2) {
2179 w_oconv16_begin_f=1;
2184 } else if (c2 == 0) {
2187 } else if (c2 == ISO8859_1) {
2189 (*o_putc)(c1 | 0x080);
2191 unsigned short val = (unsigned short)e2w_conv(c2, c1);
2192 (*o_putc)((val&0xff00)>>8);
2193 (*o_putc)(val&0xff);
2207 } else if (c2 == 0) {
2208 output_mode = ASCII;
2210 } else if (c2 == X0201) {
2211 output_mode = JAPANESE_EUC;
2212 (*o_putc)(SSO); (*o_putc)(c1|0x80);
2213 } else if (c2 == ISO8859_1) {
2214 output_mode = ISO8859_1;
2215 (*o_putc)(c1 | 0x080);
2217 if ((c1<0x20 || 0x7e<c1) ||
2218 (c2<0x20 || 0x7e<c2)) {
2219 set_iconv(FALSE, 0);
2220 return; /* too late to rescue this char */
2222 output_mode = JAPANESE_EUC;
2223 (*o_putc)(c2 | 0x080);
2224 (*o_putc)(c1 | 0x080);
2229 e2s_conv(c2, c1, p2, p1)
2230 int c2, c1, *p2, *p1;
2232 if (p2) *p2 = ((c2 - 1) >> 1) + ((c2 <= 0x5e) ? 0x71 : 0xb1);
2233 if (p1) *p1 = c1 + ((c2 & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
2244 } else if (c2 == 0) {
2245 output_mode = ASCII;
2247 } else if (c2 == X0201) {
2248 output_mode = SHIFT_JIS;
2250 } else if (c2 == ISO8859_1) {
2251 output_mode = ISO8859_1;
2252 (*o_putc)(c1 | 0x080);
2254 if ((c1<0x20 || 0x7e<c1) ||
2255 (c2<0x20 || 0x7e<c2)) {
2256 set_iconv(FALSE, 0);
2257 return; /* too late to rescue this char */
2259 output_mode = SHIFT_JIS;
2260 e2s_conv(c2, c1, &c2, &c1);
2272 if (output_mode !=ASCII && output_mode!=ISO8859_1) {
2275 (*o_putc)(ascii_intro);
2276 output_mode = ASCII;
2279 } else if (c2==X0201) {
2280 if (output_mode!=X0201) {
2281 output_mode = X0201;
2287 } else if (c2==ISO8859_1) {
2288 /* iso8859 introduction, or 8th bit on */
2289 /* Can we convert in 7bit form using ESC-'-'-A ?
2291 output_mode = ISO8859_1;
2293 } else if (c2 == 0) {
2294 if (output_mode !=ASCII && output_mode!=ISO8859_1) {
2297 (*o_putc)(ascii_intro);
2298 output_mode = ASCII;
2302 if (output_mode != X0208) {
2303 output_mode = X0208;
2306 (*o_putc)(kanji_intro);
2308 if (c1<0x20 || 0x7e<c1)
2310 if (c2<0x20 || 0x7e<c2)
2322 if (base64_count>50 && !mimeout_mode && c2==0 && c1==SPACE) {
2324 } else if (base64_count>66 && mimeout_mode) {
2325 (*o_base64conv)(EOF,0);
2327 (*o_putc)('\t'); base64_count += 7;
2329 (*o_base64conv)(c2,c1);
2333 static int broken_buf[3];
2334 static int broken_counter = 0;
2335 static int broken_last = 0;
2342 if (broken_counter>0) {
2343 return broken_buf[--broken_counter];
2346 if (c=='$' && broken_last != ESC
2347 && (input_mode==ASCII || input_mode==X0201)) {
2350 if (c1=='@'|| c1=='B') {
2351 broken_buf[0]=c1; broken_buf[1]=c;
2358 } else if (c=='(' && broken_last != ESC
2359 && (input_mode==X0208 || input_mode==X0201)) { /* ) */
2362 if (c1=='J'|| c1=='B') {
2363 broken_buf[0]=c1; broken_buf[1]=c;
2381 if (broken_counter<2)
2382 broken_buf[broken_counter++]=c;
2386 static int prev_cr = 0;
2394 if (! (c2==0&&c1==NL) ) {
2400 } else if (c1=='\r') {
2402 } else if (c1=='\n') {
2403 if (crmode_f==CRLF) {
2404 (*o_crconv)(0,'\r');
2405 } else if (crmode_f==CR) {
2406 (*o_crconv)(0,'\r');
2410 } else if (c1!='\032' || crmode_f!=NL){
2416 Return value of fold_conv()
2418 \n add newline and output char
2419 \r add newline and output nothing
2422 1 (or else) normal output
2424 fold state in prev (previous character)
2426 >0x80 Japanese (X0208/X0201)
2431 This fold algorthm does not preserve heading space in a line.
2432 This is the main difference from fmt.
2435 #define char_size(c2,c1) (c2?2:1)
2444 if (c1== '\r' && !fold_preserve_f) {
2445 fold_state=0; /* ignore cr */
2446 }else if (c1== '\n'&&f_prev=='\r' && fold_preserve_f) {
2448 fold_state=0; /* ignore cr */
2449 } else if (c1== BS) {
2450 if (f_line>0) f_line--;
2452 } else if (c2==EOF && f_line != 0) { /* close open last line */
2454 } else if ((c1=='\n' && !fold_preserve_f)
2455 || ((c1=='\r'||(c1=='\n'&&f_prev!='\r'))
2456 && fold_preserve_f)) {
2458 if (fold_preserve_f) {
2462 } else if ((f_prev == c1 && !fold_preserve_f)
2463 || (f_prev == '\n' && fold_preserve_f)
2464 ) { /* duplicate newline */
2467 fold_state = '\n'; /* output two newline */
2473 if (f_prev&0x80) { /* Japanese? */
2475 fold_state = 0; /* ignore given single newline */
2476 } else if (f_prev==' ') {
2480 if (++f_line<=fold_len)
2484 fold_state = '\r'; /* fold and output nothing */
2488 } else if (c1=='\f') {
2493 fold_state = '\n'; /* output newline and clear */
2494 } else if ( (c2==0 && c1==' ')||
2495 (c2==0 && c1=='\t')||
2496 (c2=='!'&& c1=='!')) {
2497 /* X0208 kankaku or ascii space */
2498 if (f_prev == ' ') {
2499 fold_state = 0; /* remove duplicate spaces */
2502 if (++f_line<=fold_len)
2503 fold_state = ' '; /* output ASCII space only */
2505 f_prev = ' '; f_line = 0;
2506 fold_state = '\r'; /* fold and output nothing */
2510 prev0 = f_prev; /* we still need this one... , but almost done */
2512 if (c2 || c2==X0201)
2513 f_prev |= 0x80; /* this is Japanese */
2514 f_line += char_size(c2,c1);
2515 if (f_line<=fold_len) { /* normal case */
2518 if (f_line>=fold_len+fold_margin) { /* too many kinsou suspension */
2519 f_line = char_size(c2,c1);
2520 fold_state = '\n'; /* We can't wait, do fold now */
2521 } else if (c2==X0201) {
2522 /* simple kinsoku rules return 1 means no folding */
2523 if (c1==(0xde&0x7f)) fold_state = 1; /*
\e$B!+
\e(B*/
2524 else if (c1==(0xdf&0x7f)) fold_state = 1; /*
\e$B!,
\e(B*/
2525 else if (c1==(0xa4&0x7f)) fold_state = 1; /*
\e$B!#
\e(B*/
2526 else if (c1==(0xa3&0x7f)) fold_state = 1; /*
\e$B!$
\e(B*/
2527 else if (c1==(0xa1&0x7f)) fold_state = 1; /*
\e$B!W
\e(B*/
2528 else if (c1==(0xb0&0x7f)) fold_state = 1; /* - */
2529 else if (SPACE<=c1 && c1<=(0xdf&0x7f)) { /* X0201 */
2531 fold_state = '\n';/* add one new f_line before this character */
2534 fold_state = '\n';/* add one new f_line before this character */
2537 /* kinsoku point in ASCII */
2538 if ( c1==')'|| /* { [ ( */
2549 /* just after special */
2550 } else if (!is_alnum(prev0)) {
2551 f_line = char_size(c2,c1);
2553 } else if ((prev0==' ') || /* ignored new f_line */
2554 (prev0=='\n')|| /* ignored new f_line */
2555 (prev0&0x80)) { /* X0208 - ASCII */
2556 f_line = char_size(c2,c1);
2557 fold_state = '\n';/* add one new f_line before this character */
2559 fold_state = 1; /* default no fold in ASCII */
2563 if (c1=='"') fold_state = 1; /*
\e$B!"
\e(B */
2564 else if (c1=='#') fold_state = 1; /*
\e$B!#
\e(B */
2565 else if (c1=='W') fold_state = 1; /*
\e$B!W
\e(B */
2566 else if (c1=='K') fold_state = 1; /*
\e$B!K
\e(B */
2567 else if (c1=='$') fold_state = 1; /*
\e$B!$
\e(B */
2568 else if (c1=='%') fold_state = 1; /*
\e$B!%
\e(B */
2569 else if (c1=='\'') fold_state = 1; /*
\e$B!\
\e(B */
2570 else if (c1=='(') fold_state = 1; /*
\e$B!(
\e(B */
2571 else if (c1==')') fold_state = 1; /*
\e$B!)
\e(B */
2572 else if (c1=='*') fold_state = 1; /*
\e$B!*
\e(B */
2573 else if (c1=='+') fold_state = 1; /*
\e$B!+
\e(B */
2574 else if (c1==',') fold_state = 1; /*
\e$B!,
\e(B */
2575 /* default no fold in kinsoku */
2578 f_line = char_size(c2,c1);
2579 /* add one new f_line before this character */
2582 f_line = char_size(c2,c1);
2584 /* add one new f_line before this character */
2589 /* terminator process */
2590 switch(fold_state) {
2609 int z_prev2=0,z_prev1=0;
2616 /* if (c2) c1 &= 0x7f; assertion */
2618 if (x0201_f && z_prev2==X0201) { /* X0201 */
2619 if (c1==(0xde&0x7f)) { /*
\e$BByE@
\e(B */
2621 (*o_zconv)(dv[(z_prev1-SPACE)*2],dv[(z_prev1-SPACE)*2+1]);
2623 } else if (c1==(0xdf&0x7f)&&ev[(z_prev1-SPACE)*2]) { /*
\e$BH>ByE@
\e(B */
2625 (*o_zconv)(ev[(z_prev1-SPACE)*2],ev[(z_prev1-SPACE)*2+1]);
2629 (*o_zconv)(cv[(z_prev1-SPACE)*2],cv[(z_prev1-SPACE)*2+1]);
2638 if (x0201_f && c2==X0201) {
2639 if (dv[(c1-SPACE)*2]||ev[(c1-SPACE)*2]) {
2640 /* wait for
\e$BByE@
\e(B or
\e$BH>ByE@
\e(B */
2641 z_prev1 = c1; z_prev2 = c2;
2644 (*o_zconv)(cv[(c1-SPACE)*2],cv[(c1-SPACE)*2+1]);
2649 /* JISX0208 Alphabet */
2650 if (alpha_f && c2 == 0x23 ) {
2652 } else if (alpha_f && c2 == 0x21 ) {
2653 /* JISX0208 Kigou */
2658 } else if (alpha_f&0x4) {
2663 } else if (0x20<c1 && c1<0x7f && fv[c1-0x20]) {
2669 case '>': entity = ">"; break;
2670 case '<': entity = "<"; break;
2671 case '\"': entity = """; break;
2672 case '&': entity = "&"; break;
2675 while (*entity) (*o_zconv)(0, *entity++);
2685 #define rot13(c) ( \
2687 (c <= 'M') ? (c + 13): \
2688 (c <= 'Z') ? (c - 13): \
2690 (c <= 'm') ? (c + 13): \
2691 (c <= 'z') ? (c - 13): \
2695 #define rot47(c) ( \
2697 ( c <= 'O' ) ? (c + 47) : \
2698 ( c <= '~' ) ? (c - 47) : \
2706 if (c2==0 || c2==X0201 || c2==ISO8859_1) {
2712 (*o_rot_conv)(c2,c1);
2719 if ((hira_f & 1) && c2==0x25 && 0x20<c1 && c1<0x74) {
2721 } else if ((hira_f & 2) && c2==0x24 && 0x20<c1 && c1<0x74) {
2724 (*o_hira_conv)(c2,c1);
2729 iso2022jp_check_conv(c2,c1)
2732 static int range[RANGE_NUM_MAX][2] = {
2755 if(c2 >= 0x00 && c2 <= 0x20 && c1 >= 0x7f && c1 <= 0xff) {
2759 if((c2 >= 0x29 && c2 <= 0x2f) || (c2 >= 0x75 && c2 <= 0x7e)) {
2764 for (i = 0; i < RANGE_NUM_MAX; i++) {
2765 start = range[i][0];
2768 if (c >= start && c <= end) {
2773 (*o_iso2022jp_check_conv)(c2,c1);
2777 /* This converts =?ISO-2022-JP?B?HOGE HOGE?= */
2779 unsigned char *mime_pattern[] = {
2780 (unsigned char *)"\075?EUC-JP?B?",
2781 (unsigned char *)"\075?SHIFT_JIS?B?",
2782 (unsigned char *)"\075?ISO-8859-1?Q?",
2783 (unsigned char *)"\075?ISO-2022-JP?B?",
2784 (unsigned char *)"\075?ISO-2022-JP?Q?",
2785 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
2786 (unsigned char *)"\075?UTF-8?B?",
2791 int mime_encode[] = {
2792 JAPANESE_EUC, SHIFT_JIS,ISO8859_1, X0208, X0201,
2793 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
2799 int mime_encode_method[] = {
2800 'B', 'B','Q', 'B', 'Q',
2801 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
2808 #define MAXRECOVER 20
2810 /* I don't trust portablity of toupper */
2811 #define nkf_toupper(c) (('a'<=c && c<='z')?(c-('a'-'A')):c)
2812 #define nkf_isdigit(c) ('0'<=c && c<='9')
2813 #define nkf_isxdigit(c) (nkf_isdigit(c) || ('a'<=c && c<='f') || ('A'<=c && c <= 'F'))
2818 if (i_getc!=mime_getc) {
2819 i_mgetc = i_getc; i_getc = mime_getc;
2820 i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
2821 if(mime_f==STRICT_MIME) {
2822 i_mgetc_buf = i_mgetc; i_mgetc = mime_getc_buf;
2823 i_mungetc_buf = i_mungetc; i_mungetc = mime_ungetc_buf;
2829 unswitch_mime_getc()
2831 if(mime_f==STRICT_MIME) {
2832 i_mgetc = i_mgetc_buf;
2833 i_mungetc = i_mungetc_buf;
2836 i_ungetc = i_mungetc;
2840 mime_begin_strict(f)
2845 unsigned char *p,*q;
2846 int r[MAXRECOVER]; /* recovery buffer, max mime pattern lenght */
2848 mime_decode_mode = FALSE;
2849 /* =? has been checked */
2851 p = mime_pattern[j];
2854 for(i=2;p[i]>' ';i++) { /* start at =? */
2855 if ( ((r[i] = c1 = (*i_getc)(f))==EOF) || nkf_toupper(c1) != p[i] ) {
2856 /* pattern fails, try next one */
2858 while ((p = mime_pattern[++j])) {
2859 for(k=2;k<i;k++) /* assume length(p) > i */
2860 if (p[k]!=q[k]) break;
2861 if (k==i && nkf_toupper(c1)==p[k]) break;
2863 if (p) continue; /* found next one, continue */
2864 /* all fails, output from recovery buffer */
2872 mime_decode_mode = p[i-2];
2873 if (mime_decode_mode=='B') {
2874 mimebuf_f = unbuf_f;
2876 /* do MIME integrity check */
2877 return mime_integrity(f,mime_pattern[j]);
2889 /* we don't keep eof of Fifo, becase it contains ?= as
2890 a terminator. It was checked in mime_integrity. */
2891 return ((mimebuf_f)?
2892 (*i_mgetc_buf)(f):Fifo(mime_input++));
2896 mime_ungetc_buf(c,f)
2901 (*i_mungetc_buf)(c,f);
2903 Fifo(--mime_input)=c;
2914 /* In NONSTRICT mode, only =? is checked. In case of failure, we */
2915 /* re-read and convert again from mime_buffer. */
2917 /* =? has been checked */
2919 Fifo(mime_last++)='='; Fifo(mime_last++)='?';
2920 for(i=2;i<MAXRECOVER;i++) { /* start at =? */
2921 /* We accept any character type even if it is breaked by new lines */
2922 c1 = (*i_getc)(f); Fifo(mime_last++)= c1 ;
2923 if (c1=='\n'||c1==' '||c1=='\r'||
2924 c1=='-'||c1=='_'||is_alnum(c1) ) continue;
2926 /* Failed. But this could be another MIME preemble */
2934 c1 = (*i_getc)(f); Fifo(mime_last++) = c1;
2935 if (!(++i<MAXRECOVER) || c1==EOF) break;
2936 if (c1=='b'||c1=='B') {
2937 mime_decode_mode = 'B';
2938 } else if (c1=='q'||c1=='Q') {
2939 mime_decode_mode = 'Q';
2943 c1 = (*i_getc)(f); Fifo(mime_last++) = c1;
2944 if (!(++i<MAXRECOVER) || c1==EOF) break;
2946 mime_decode_mode = FALSE;
2952 if (!mime_decode_mode) {
2953 /* false MIME premble, restart from mime_buffer */
2954 mime_decode_mode = 1; /* no decode, but read from the mime_buffer */
2955 /* Since we are in MIME mode until buffer becomes empty, */
2956 /* we never go into mime_begin again for a while. */
2959 /* discard mime preemble, and goto MIME mode */
2961 /* do no MIME integrity check */
2962 return c1; /* used only for checking EOF */
2987 if (nkf_isdigit(x)) return x - '0';
2988 return nkf_toupper(x) - 'A' + 10;
2991 #ifdef ANSI_C_PROTOTYPE
2992 int hex_getc(int ch, FILE *f, int (*g)(FILE *f), int (*u)(int c, FILE *f))
2995 hex_getc(ch, f, g, u)
3008 if (!nkf_isxdigit(c2)){
3013 if (!nkf_isxdigit(c3)){
3018 return (hex2bin(c2) << 4) | hex2bin(c3);
3025 return hex_getc(':', f, i_cgetc, i_cungetc);
3033 return (*i_cungetc)(c, f);
3040 return hex_getc('%', f, i_ugetc, i_uungetc);
3048 return (*i_uungetc)(c, f);
3055 int (*g)() = i_ngetc;
3056 int (*u)() = i_nungetc;
3067 if (buf[i] == 'x' || buf[i] == 'X'){
3068 for (j = 0; j < 5; j++){
3070 if (!nkf_isxdigit(buf[i])){
3077 c |= hex2bin(buf[i]);
3080 for (j = 0; j < 6; j++){
3084 if (!nkf_isdigit(buf[i])){
3091 c += hex2bin(buf[i]);
3102 w16w_conv(c, &c2, &c1, &c0);
3103 if (iconv == w_iconv){
3110 if (w2e_conv(c2, c1, c0, &c2, &c1) == 0){
3113 if (iconv == s_iconv){
3114 e2s_conv(c2, c1, &c2, &c1);
3129 numchar_ungetc(c, f)
3133 return (*i_nungetc)(c, f);
3142 int c1, c2, c3, c4, cc;
3143 int t1, t2, t3, t4, mode, exit_mode;
3145 if (mime_top != mime_last) { /* Something is in FIFO */
3146 return Fifo(mime_top++);
3148 if (mime_decode_mode==1 ||mime_decode_mode==FALSE) {
3149 mime_decode_mode=FALSE;
3150 unswitch_mime_getc();
3151 return (*i_getc)(f);
3154 if (mimebuf_f == FIXED_MIME)
3155 exit_mode = mime_decode_mode;
3158 if (mime_decode_mode == 'Q') {
3159 if ((c1 = (*i_mgetc)(f)) == EOF) return (EOF);
3161 if (c1=='_') return ' ';
3162 if (c1!='=' && c1!='?') {
3166 mime_decode_mode = exit_mode; /* prepare for quit */
3167 if (c1<=' ') return c1;
3168 if ((c2 = (*i_mgetc)(f)) == EOF) return (EOF);
3169 if (c1=='?'&&c2=='=' && mimebuf_f != FIXED_MIME) {
3170 /* end Q encoding */
3171 input_mode = exit_mode;
3172 while((c1=(*i_getc)(f))!=EOF && c1==SPACE
3173 /* && (c1==NL||c1==TAB||c1=='\r') */ ) ;
3176 if (c1=='='&&c2<' ') { /* this is soft wrap */
3177 while((c1 = (*i_mgetc)(f)) <=' ') {
3178 if ((c1 = (*i_mgetc)(f)) == EOF) return (EOF);
3180 mime_decode_mode = 'Q'; /* still in MIME */
3181 goto restart_mime_q;
3184 mime_decode_mode = 'Q'; /* still in MIME */
3188 if ((c3 = (*i_mgetc)(f)) == EOF) return (EOF);
3189 if (c2<=' ') return c2;
3190 mime_decode_mode = 'Q'; /* still in MIME */
3191 #define hex(c) (('0'<=c&&c<='9')?(c-'0'):\
3192 ('A'<=c&&c<='F')?(c-'A'+10):('a'<=c&&c<='f')?(c-'a'+10):0)
3193 return ((hex(c2)<<4) + hex(c3));
3196 if (mime_decode_mode != 'B') {
3197 mime_decode_mode = FALSE;
3198 return (*i_mgetc)(f);
3202 /* Base64 encoding */
3204 MIME allows line break in the middle of
3205 Base64, but we are very pessimistic in decoding
3206 in unbuf mode because MIME encoded code may broken by
3207 less or editor's control sequence (such as ESC-[-K in unbuffered
3208 mode. ignore incomplete MIME.
3210 mode = mime_decode_mode;
3211 mime_decode_mode = exit_mode; /* prepare for quit */
3213 while ((c1 = (*i_mgetc)(f))<=' ') {
3218 if ((c2 = (*i_mgetc)(f))<=' ') {
3221 if (mime_f != STRICT_MIME) goto mime_c2_retry;
3222 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
3225 if ((c1 == '?') && (c2 == '=')) {
3227 while((c1=(*i_getc)(f))!=EOF && c1==SPACE
3228 /* && (c1==NL||c1==TAB||c1=='\r') */ ) ;
3232 if ((c3 = (*i_mgetc)(f))<=' ') {
3235 if (mime_f != STRICT_MIME) goto mime_c3_retry;
3236 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
3240 if ((c4 = (*i_mgetc)(f))<=' ') {
3243 if (mime_f != STRICT_MIME) goto mime_c4_retry;
3244 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
3248 mime_decode_mode = mode; /* still in MIME sigh... */
3250 /* BASE 64 decoding */
3252 t1 = 0x3f & base64decode(c1);
3253 t2 = 0x3f & base64decode(c2);
3254 t3 = 0x3f & base64decode(c3);
3255 t4 = 0x3f & base64decode(c4);
3256 cc = ((t1 << 2) & 0x0fc) | ((t2 >> 4) & 0x03);
3258 Fifo(mime_last++) = cc;
3259 cc = ((t2 << 4) & 0x0f0) | ((t3 >> 2) & 0x0f);
3261 Fifo(mime_last++) = cc;
3262 cc = ((t3 << 6) & 0x0c0) | (t4 & 0x3f);
3264 Fifo(mime_last++) = cc;
3269 return Fifo(mime_top++);
3277 Fifo(--mime_top) = c;
3288 /* In buffered mode, read until =? or NL or buffer full
3290 mime_input = mime_top;
3291 mime_last = mime_top;
3292 while(*p) Fifo(mime_input++) = *p++;
3295 while((c=(*i_getc)(f))!=EOF) {
3296 if (((mime_input-mime_top)&MIME_BUF_MASK)==0) {
3297 break; /* buffer full */
3299 if (c=='=' && d=='?') {
3300 /* checked. skip header, start decode */
3301 Fifo(mime_input++) = c;
3302 /* mime_last_input = mime_input; */
3307 if (!( (c=='+'||c=='/'|| c=='=' || c=='?' || is_alnum(c))))
3309 /* Should we check length mod 4? */
3310 Fifo(mime_input++) = c;
3313 /* In case of Incomplete MIME, no MIME decode */
3314 Fifo(mime_input++) = c;
3315 mime_last = mime_input; /* point undecoded buffer */
3316 mime_decode_mode = 1; /* no decode on Fifo last in mime_getc */
3317 switch_mime_getc(); /* anyway we need buffered getc */
3328 i = c - 'A'; /* A..Z 0-25 */
3330 i = c - 'G' /* - 'a' + 26 */ ; /* a..z 26-51 */
3332 } else if (c > '/') {
3333 i = c - '0' + '4' /* - '0' + 52 */ ; /* 0..9 52-61 */
3334 } else if (c == '+') {
3335 i = '>' /* 62 */ ; /* + 62 */
3337 i = '?' /* 63 */ ; /* / 63 */
3342 static char basis_64[] =
3343 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
3353 p = mime_pattern[0];
3354 for(i=0;mime_encode[i];i++) {
3355 if (mode == mime_encode[i]) {
3356 p = mime_pattern[i];
3360 mimeout_mode = mime_encode_method[i];
3362 /* (*o_mputc)(' '); */
3379 #define itoh4(c) (c>=10?c+'A'-10:c+'0')
3385 if (mimeout_f==FIXED_MIME) {
3386 if (base64_count>71) {
3394 if ( c<=DEL &&(output_mode==ASCII ||output_mode == ISO8859_1 )
3395 && mimeout_f!=FIXED_MIME) {
3396 if (mimeout_mode=='Q') {
3403 if (mimeout_mode!='B' || c!=SPACE) {
3412 } else if (!mimeout_mode && mimeout_f!=FIXED_MIME) {
3413 open_mime(output_mode);
3415 } else { /* c==EOF */
3416 switch(mimeout_mode) {
3421 (*o_mputc)(basis_64[((b64c & 0x3)<< 4)]);
3427 (*o_mputc)(basis_64[((b64c & 0xF) << 2)]);
3433 if (mimeout_f!=FIXED_MIME) {
3435 } else if (mimeout_mode != 'Q')
3440 switch(mimeout_mode) {
3444 (*o_mputc)(itoh4(((c>>4)&0xf)));
3445 (*o_mputc)(itoh4((c&0xf)));
3452 (*o_mputc)(basis_64[c>>2]);
3457 (*o_mputc)(basis_64[((b64c & 0x3)<< 4) | ((c & 0xF0) >> 4)]);
3463 (*o_mputc)(basis_64[((b64c & 0xF) << 2) | ((c & 0xC0) >>6)]);
3464 (*o_mputc)(basis_64[c & 0x3F]);
3484 mime_f = STRICT_MIME;
3488 #if defined(MSDOS) || defined(__OS2__)
3493 iso2022jp_f = FALSE;
3495 kanji_intro = DEFAULT_J;
3496 ascii_intro = DEFAULT_R;
3498 output_conv = DEFAULT_CONV;
3499 oconv = DEFAULT_CONV;
3502 i_mungetc = std_ungetc;
3503 i_mgetc_buf = std_getc;
3504 i_mungetc_buf = std_ungetc;
3507 i_ungetc=std_ungetc;
3510 i_bungetc= std_ungetc;
3514 o_crconv = no_connection;
3515 o_rot_conv = no_connection;
3516 o_iso2022jp_check_conv = no_connection;
3517 o_hira_conv = no_connection;
3518 o_fconv = no_connection;
3519 o_zconv = no_connection;
3522 i_ungetc = std_ungetc;
3524 i_mungetc = std_ungetc;
3526 output_mode = ASCII;
3529 mime_decode_mode = FALSE;
3538 struct input_code *p = input_code_list;
3543 #ifdef UTF8_OUTPUT_ENABLE
3544 if (w_oconv16_begin_f) {
3545 w_oconv16_begin_f = 2;
3550 fold_preserve_f = FALSE;
3553 fold_margin = FOLD_MARGIN;
3556 z_prev2=0,z_prev1=0;
3562 no_connection(c2,c1)
3565 no_connection2(c2,c1,0);
3569 no_connection2(c2,c1,c0)
3572 fprintf(stderr,"nkf internal module connection failure.\n");
3580 fprintf(stderr,"USAGE: nkf(nkf32,wnkf,nkf2) -[flags] [in file] .. [out file for -O flag]\n");
3581 fprintf(stderr,"Flags:\n");
3582 fprintf(stderr,"b,u Output is bufferred (DEFAULT),Output is unbufferred\n");
3583 #ifdef DEFAULT_CODE_SJIS
3584 fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit, Shift JIS (DEFAULT), AT&T JIS (EUC), UTF-8\n");
3586 #ifdef DEFAULT_CODE_JIS
3587 fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit (DEFAULT), Shift JIS, AT&T JIS (EUC), UTF-8\n");
3589 #ifdef DEFAULT_CODE_EUC
3590 fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit, Shift JIS, AT&T JIS (EUC) (DEFAULT), UTF-8\n");
3592 #ifdef DEFAULT_CODE_UTF8
3593 fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit, Shift JIS, AT&T JIS (EUC), UTF-8 (DEFAULT)\n");
3595 fprintf(stderr,"J,S,E,W Input assumption is JIS 7 bit , Shift JIS, AT&T JIS (EUC), UTF-8\n");
3596 fprintf(stderr,"t no conversion\n");
3597 fprintf(stderr,"i_/o_ Output sequence to designate JIS-kanji/ASCII (DEFAULT B)\n");
3598 fprintf(stderr,"r {de/en}crypt ROT13/47\n");
3599 fprintf(stderr,"h 1 hirakana->katakana, 2 katakana->hirakana,3 both\n");
3600 fprintf(stderr,"v Show this usage. V: show version\n");
3601 fprintf(stderr,"m[BQN0] MIME decode [B:base64,Q:quoted,N:non-strict,0:no decode]\n");
3602 fprintf(stderr,"M[BQ] MIME encode [B:base64 Q:quoted]\n");
3603 fprintf(stderr,"l ISO8859-1 (Latin-1) support\n");
3604 fprintf(stderr,"f/F Folding: -f60 or -f or -f60-10 (fold margin 10) F preserve nl\n");
3605 fprintf(stderr,"Z[0-3] Convert X0208 alphabet to ASCII 1: Kankaku to space,2: 2 spaces,\n");
3606 fprintf(stderr," 3: Convert HTML Entity\n");
3607 fprintf(stderr,"X,x Assume X0201 kana in MS-Kanji, -x preserves X0201\n");
3608 fprintf(stderr,"B[0-2] Broken input 0: missing ESC,1: any X on ESC-[($]-X,2: ASCII on NL\n");
3610 fprintf(stderr,"T Text mode output\n");
3612 fprintf(stderr,"O Output to File (DEFAULT 'nkf.out')\n");
3613 fprintf(stderr,"d,c Delete \\r in line feed and \\032, Add \\r in line feed\n");
3614 fprintf(stderr,"I Convert non ISO-2022-JP charactor to GETA\n");
3615 fprintf(stderr,"-L[uwm] line mode u:LF w:CRLF m:CR (DEFAULT noconversion)\n");
3616 fprintf(stderr,"long name options\n");
3617 fprintf(stderr," --fj,--unix,--mac,--windows convert for the system\n");
3618 fprintf(stderr," --jis,--euc,--sjis,--utf8,--utf16,--mime,--base64 convert for the code\n");
3619 fprintf(stderr," --help,--version\n");
3626 fprintf(stderr,"Network Kanji Filter Version %s (%s) "
3627 #if defined(MSDOS) && !defined(_Windows)
3630 #if !defined(__WIN32__) && defined(_Windows)
3633 #if defined(__WIN32__) && defined(_Windows)
3639 ,Version,Patchlevel);
3640 fprintf(stderr,"\n%s\n",CopyRight);
3645 **
\e$B%Q%C%A@):n<T
\e(B
3646 ** void@merope.pleiades.or.jp (Kusakabe Youichi)
3647 ** NIDE Naoyuki <nide@ics.nara-wu.ac.jp>
3648 ** ohta@src.ricoh.co.jp (Junn Ohta)
3649 ** inouet@strl.nhk.or.jp (Tomoyuki Inoue)
3650 ** kiri@pulser.win.or.jp (Tetsuaki Kiriyama)
3651 ** Kimihiko Sato <sato@sail.t.u-tokyo.ac.jp>
3652 ** a_kuroe@kuroe.aoba.yokohama.jp (Akihiko Kuroe)
3653 ** kono@ie.u-ryukyu.ac.jp (Shinji Kono)
3654 ** GHG00637@nifty-serve.or.jp (COW)