5 #endif /* HAVE_CONFIG_H */
\r
7 /** Network Kanji Filter. (PDS Version)
\r
8 ************************************************************************
\r
9 ** Copyright (C) 1987, Fujitsu LTD. (Itaru ICHIKAWA)
\r
10 ** 連絡先: (株)富士通研究所 ソフト3研 市川 至
\r
11 ** (E-Mail Address: ichikawa@flab.fujitsu.co.jp)
\r
12 ** Copyright (C) 1996,1998
\r
13 ** 連絡先: 琉球大学情報工学科 河野 真治 mine/X0208 support
\r
14 ** (E-Mail Address: kono@ie.u-ryukyu.ac.jp)
\r
15 ** 連絡先: COW for DOS & Win16 & Win32 & OS/2
\r
16 ** (E-Mail Address: GHG00637@niftyserve.or.p)
\r
17 ** このソースのいかなる複写,改変,修正も許諾します。ただし、
\r
18 ** その際には、誰が貢献したを示すこの部分を残すこと。
\r
19 ** 再配布や雑誌の付録などの問い合わせも必要ありません。
\r
20 ** このプログラムについては特に何の保証もしない、悪しからず。
\r
21 ** Everyone is permitted to do anything on this program
\r
22 ** including copying, modifying, improving.
\r
23 ** as long as you don't try to pretend that you wrote it.
\r
24 ** i.e., the above copyright notice has to appear in all copies.
\r
25 ** You don't have to ask before copying or publishing.
\r
26 ** THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE.
\r
27 ***********************************************************************/
\r
29 /* 以下のソースは、nkf を文字列操作できるよう改造したライブラリである。
\r
31 nkf_conv(元文字列,出力文字列、out モード)
\r
32 出力文字列を NULL としたときは、元文字列を操作する。
\r
33 バグ : 変換され出力される文字列のための領域はある程度とっておくこと。
\r
35 nkf_convert(元文字列、出力文字列、出力文字列の最大の大きさ、
\r
37 kanji_conv に準じる。出力文字列の最大の大きさが指定できる。
\r
38 その大きさ以上になったときはそれ以上の文字の出力は打ち切られる。
\r
40 nkf の convert に与えるオプションを与える文字列。空白で区切って指定する。
\r
43 このプログラムに関しての著作権がらみのことは nkf に準じるものとする。
\r
44 無保証であるので、使用の場合は自らの責任をもってすること。
\r
49 他で用いられないインターフェースは static にした.
\r
50 コンパイラの Warning メッセージを抑制するように ANSI C の形式にした.
\r
51 文字を unsigned char * で SFILE に蓄えるようにした.
\r
53 input_f == FALSE で convert すると,半角カタカナ SJIS が EUC
\r
54 と判断されてしまうバグ(仕様だった?)を直した.
\r
55 しかしながら,SJIS の半角カタカナ 2 文字と EUC は区別できない
\r
56 場合がある.このときは SJIS として変換することにした.
\r
57 EUC_STRICT_CHECK を定義すると EUC-Japan の定義コードを完全にチェックする
\r
73 /* もし,EUC-Japan の完全なチェックをする場合は EUC_STRICT_CHECK を定義
\r
74 * してください.ただし,1 バイトでも EUC-Japan の未定義文字が含まれていると
\r
75 * EUC とみなされなくなってしまいます.他のプログラムで漢字コードを EUC に変換
\r
76 * した場合,EUC の未定義域へマップされる可能性があります.
\r
78 /* #define EUC_STRICT_CHECK */
\r
81 static char *CopyRight =
\r
82 "Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa),1998 S. Kono, COW";
\r
83 static char *Version =
\r
85 static char *Patchlevel =
\r
86 "0/9711/Shinji Kono";
\r
93 ** USAGE: nkf [flags] [file]
\r
96 ** b Output is bufferred (DEFAULT)
\r
97 ** u Output is unbufferred
\r
101 ** j Outout code is JIS 7 bit (DEFAULT SELECT)
\r
102 ** s Output code is MS Kanji (DEFAULT SELECT)
\r
103 ** e Output code is AT&T JIS (DEFAULT SELECT)
\r
104 ** l Output code is JIS 7bit and ISO8859-1 Latin-1
\r
106 ** m MIME conversion for ISO-2022-JP
\r
107 ** i_ Output sequence to designate JIS-kanji (DEFAULT_J)
\r
108 ** o_ Output sequence to designate single-byte roman characters (DEFAULT_R)
\r
110 ** r {de/en}crypt ROT13/47
\r
112 ** v display Version
\r
114 ** T Text mode output (for MS-DOS)
\r
116 ** x Do not convert X0201 kana into X0208
\r
117 ** Z Convert X0208 alphabet to ASCII
\r
122 ** B try to fix broken JIS, missing Escape
\r
123 ** B[1-9] broken level
\r
125 ** O Output to 'nkf.out' file
\r
126 ** d Delete \r in line feed
\r
127 ** c Add \r in line feed
\r
129 /******************************/
\r
130 /* デフォルトの出力コード選択 */
\r
131 /* Select DEFAULT_CODE */
\r
132 #define DEFAULT_CODE_JIS
\r
133 /* #define DEFAULT_CODE_SJIS */
\r
134 /* #define DEFAULT_CODE_EUC */
\r
135 /******************************/
\r
137 #if (defined(__TURBOC__) || defined(LSI_C)) && !defined(MSDOS)
\r
142 #include <stdlib.h>
\r
143 #ifndef NO_STRING_H
\r
144 #include <string.h>
\r
146 #include <strings.h>
\r
151 #define setbinmode(fp) fsetbin(fp)
\r
152 #else /* Microsoft C, Turbo C */
\r
153 #define setbinmode(fp) setmode(fileno(fp), O_BINARY)
\r
155 #else /* UNIX,OS/2 */
\r
156 #define setbinmode(fp)
\r
159 #ifdef _IOFBF /* SysV and MSDOS */
\r
160 #define setvbuffer(fp, buf, size) setvbuf(fp, buf, _IOFBF, size)
\r
162 #define setvbuffer(fp, buf, size) setbuffer(fp, buf, size)
\r
165 #include "nkflib.h"
\r
177 /* state of output_mode and input_mode */
\r
183 #define JIS_INPUT 4
\r
184 #define SJIS_INPUT 5
\r
185 #define LATIN1_INPUT 6
\r
186 #define FIXED_MIME 7
\r
187 #define DOUBLE_SPACE -2
\r
188 #define EUC_INPUT 8
\r
200 #define HOLD_SIZE 32
\r
201 #define IOBUF_SIZE 16384
\r
203 #define DEFAULT_J 'B'
\r
204 #define DEFAULT_R 'B'
\r
206 #define SJ0162 0x00e1 /* 01 - 62 ku offset */
\r
207 #define SJ6394 0x0161 /* 63 - 94 ku offset */
\r
211 /* 文字列 を FILE みたいに扱う小細工 */
\r
214 これは nkf の漢字コード変換がファイルに対してのみ対応しているのでそれを
\r
215 文字列操作で使えるようにするためのインターフェースである。ただし、
\r
216 対応している機能は少ないし、必要なものしか作っていない。したがって、
\r
217 これらは nkf の中でしか意味のないものであろう。
\r
219 SFILE は FILE みたいなもので文字列をファイルみたいに扱えるようにする。
\r
220 SFILE を使うためには必ずオープンすること。sopen で mode=="new" または
\r
221 "auto" 指定していなければクローズする必要はない。SFILE の中を直接操作
\r
222 した場合はいろいろ問題が出てくるであろう。
\r
226 sopen は open みたいな関数で、
\r
229 maxsize : 文字列が許容できる最大の大きさ。sputc 時に制限を入れるもの。
\r
230 maxsize に -1 を指定するとこの処理を無視するようになる。
\r
231 そのときは、必要以上の文字を sputc しないように気をつけなけれ
\r
233 mode : newstr、stdout、stdin の文字列を指定できる。
\r
234 例えば mode="new stdout"
\r
235 newstr は自動的に文字列のメモリを maxsize だけ獲得する。
\r
236 ただし、maxsize < 1 のときはディフォルトの値を獲得する。
\r
237 stdout は SFILE の標準出力 stdout となる文字列を指定する。
\r
238 stdin は SFILE の標準入力 stdin となる文字列を指定する。
\r
240 sclose は close みたいな関数で、newstr でオープンされていたときは、
\r
243 sgetc、sungetc、sputc、sputchar はそれぞれ getc、ungetc、putc、putchar
\r
244 に相当する。引数の sf が NULL の時は SEOF を返す。
\r
247 typedef struct __SFILE {
\r
248 unsigned char *pointer; /* 文字列現在のポインタ */
\r
249 unsigned char *head; /* 文字列の最初の位置 */
\r
250 unsigned char *tail; /* 文字列の許容の最後の位置 */
\r
251 char mode[20]; /* 文字列オープンモード newstr,stdout,stdin */
\r
252 /* "newstr stdin" の組合わせはない */
\r
256 static SFILE *sstdout=NULL;
\r
257 static SFILE *sstdin=NULL; /* Never used ? */
\r
259 #define BUFSIZ 1024
\r
260 #endif /* BUFSIZ */
\r
261 static char sfile_buffer[BUFSIZ];
\r
262 #ifndef SAFE_CONVERT_LENGTH
\r
263 #define SAFE_CONVERT_LENGTH(len) (2 * (len) + 7)
\r
264 #endif /* SAFE_CONVERT_LENGTH */
\r
267 static SFILE *sopen(SFILE *, char *string,signed int maxsize,char *md);
\r
268 static void sclose(SFILE *sf);
\r
269 static int sgetc(SFILE *sf);
\r
270 static int sungetc(int c,SFILE *sf);
\r
271 static int sputc(int c,SFILE *sf);
\r
272 #define sputchar(c) sputc(c,sstdout)
\r
275 char *nkf_convert(char *si,char *so,int maxsize,char *in_mode,char *out_mode);
\r
276 char *nkf_conv(char *si,char *so,char *out_mode);
\r
278 static int check_kanji_code(unsigned char *p);
\r
280 /* MIME preprocessor */
\r
282 #undef STRICT_MIME /* do stupid strict mime integrity check */
\r
283 #define GETC(p) ((!mime_mode)?sgetc(p):mime_getc(p))
\r
284 #define UNGETC(c,p) ((!mime_mode)?sungetc(c,p):mime_ungetc(c))
\r
287 #ifdef EASYWIN /*Easy Win */
\r
291 /* function prototype */
\r
293 static int noconvert(SFILE *f);
\r
294 static int kanji_convert(SFILE *f);
\r
295 static int h_conv(SFILE *f,int c2,int c1);
\r
296 static int push_hold_buf(int c2,int c1);
\r
297 static int s_iconv(int c2,int c1);
\r
298 static int e_oconv(int c2,int c1);
\r
299 static int s_oconv(int c2,int c1);
\r
300 static int j_oconv(int c2,int c1);
\r
301 static int line_fold(int c2,int c1);
\r
302 static int pre_convert(int c1,int c2);
\r
303 static int mime_begin(SFILE *f);
\r
304 static int mime_getc(SFILE *f);
\r
305 static int mime_ungetc(unsigned int c);
\r
306 static int mime_integrity(SFILE *f,unsigned char *p);
\r
307 static int base64decode(int c);
\r
308 static int usage(void);
\r
309 static void arguments(char *c);
\r
310 static void reinit();
\r
314 static char stdibuf[IOBUF_SIZE];
\r
315 static char stdobuf[IOBUF_SIZE];
\r
316 static unsigned char hold_buf[HOLD_SIZE*2];
\r
317 static int hold_count;
\r
319 /* MIME preprocessor fifo */
\r
321 #define MIME_BUF_SIZE (1024) /* 2^n ring buffer */
\r
322 #define MIME_BUF_MASK (MIME_BUF_SIZE-1)
\r
323 #define Fifo(n) mime_buf[(n)&MIME_BUF_MASK]
\r
324 static unsigned char mime_buf[MIME_BUF_SIZE];
\r
325 static unsigned int mime_top = 0;
\r
326 static unsigned int mime_last = 0; /* decoded */
\r
327 static unsigned int mime_input = 0; /* undecoded */
\r
330 static int unbuf_f = FALSE;
\r
331 static int estab_f = FALSE;
\r
332 static int nop_f = FALSE;
\r
333 static int binmode_f = TRUE; /* binary mode */
\r
334 static int rot_f = FALSE; /* rot14/43 mode */
\r
335 static int input_f = FALSE; /* non fixed input code */
\r
336 static int alpha_f = FALSE; /* convert JIx0208 alphbet to ASCII */
\r
337 static int mime_f = TRUE; /* convert MIME B base64 or Q */
\r
338 static int mimebuf_f = FALSE; /* MIME buffered input */
\r
339 static int broken_f = FALSE; /* convert ESC-less broken JIS */
\r
340 static int iso8859_f = FALSE; /* ISO8859 through */
\r
341 #if defined(MSDOS) || defined(__OS2__)
\r
342 static int x0201_f = TRUE; /* Assume JISX0201 kana */
\r
344 static int x0201_f = NO_X0201; /* Assume NO JISX0201 */
\r
347 /* X0208 -> ASCII converter */
\r
349 static int c1_return;
\r
351 /* fold parameter */
\r
352 static int line = 0; /* chars in line */
\r
353 static int prev = 0;
\r
354 static int fold_f = FALSE;
\r
355 static int fold_len = 0;
\r
358 static char kanji_intro = DEFAULT_J,
\r
359 ascii_intro = DEFAULT_R;
\r
364 #define FOLD_MARGIN 10
\r
365 #define DEFAULT_FOLD 60
\r
369 #ifdef DEFAULT_CODE_JIS
\r
370 # define DEFAULT_CONV j_oconv
\r
372 #ifdef DEFAULT_CODE_SJIS
\r
373 # define DEFAULT_CONV s_oconv
\r
375 #ifdef DEFAULT_CODE_EUC
\r
376 # define DEFAULT_CONV e_oconv
\r
379 static int (*iconv)(int c2,int c1);
\r
380 /* s_iconv or oconv */
\r
381 static int (*oconv)(int c2,int c1) = DEFAULT_CONV;
\r
384 /* Global states */
\r
385 static int output_mode = ASCII, /* output kanji mode */
\r
386 input_mode = ASCII, /* input kanji mode */
\r
387 shift_mode = FALSE; /* TRUE shift out, or X0201 */
\r
388 static int mime_mode = FALSE; /* MIME mode B base64, Q hex */
\r
390 /* X0201 / X0208 conversion tables */
\r
392 /* X0201 kana conversion table */
\r
394 unsigned char cv[]= {
\r
395 0x21,0x21,0x21,0x23,0x21,0x56,0x21,0x57,
\r
396 0x21,0x22,0x21,0x26,0x25,0x72,0x25,0x21,
\r
397 0x25,0x23,0x25,0x25,0x25,0x27,0x25,0x29,
\r
398 0x25,0x63,0x25,0x65,0x25,0x67,0x25,0x43,
\r
399 0x21,0x3c,0x25,0x22,0x25,0x24,0x25,0x26,
\r
400 0x25,0x28,0x25,0x2a,0x25,0x2b,0x25,0x2d,
\r
401 0x25,0x2f,0x25,0x31,0x25,0x33,0x25,0x35,
\r
402 0x25,0x37,0x25,0x39,0x25,0x3b,0x25,0x3d,
\r
403 0x25,0x3f,0x25,0x41,0x25,0x44,0x25,0x46,
\r
404 0x25,0x48,0x25,0x4a,0x25,0x4b,0x25,0x4c,
\r
405 0x25,0x4d,0x25,0x4e,0x25,0x4f,0x25,0x52,
\r
406 0x25,0x55,0x25,0x58,0x25,0x5b,0x25,0x5e,
\r
407 0x25,0x5f,0x25,0x60,0x25,0x61,0x25,0x62,
\r
408 0x25,0x64,0x25,0x66,0x25,0x68,0x25,0x69,
\r
409 0x25,0x6a,0x25,0x6b,0x25,0x6c,0x25,0x6d,
\r
410 0x25,0x6f,0x25,0x73,0x21,0x2b,0x21,0x2c,
\r
414 /* X0201 kana conversion table for daguten */
\r
416 unsigned char dv[]= {
\r
417 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
\r
418 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
\r
419 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
\r
420 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
\r
421 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
\r
422 0x00,0x00,0x00,0x00,0x25,0x2c,0x25,0x2e,
\r
423 0x25,0x30,0x25,0x32,0x25,0x34,0x25,0x36,
\r
424 0x25,0x38,0x25,0x3a,0x25,0x3c,0x25,0x3e,
\r
425 0x25,0x40,0x25,0x42,0x25,0x45,0x25,0x47,
\r
426 0x25,0x49,0x00,0x00,0x00,0x00,0x00,0x00,
\r
427 0x00,0x00,0x00,0x00,0x25,0x50,0x25,0x53,
\r
428 0x25,0x56,0x25,0x59,0x25,0x5c,0x00,0x00,
\r
429 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
\r
430 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
\r
431 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
\r
432 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
\r
435 /* X0201 kana conversion table for han-daguten */
\r
437 unsigned char ev[]= {
\r
438 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
\r
439 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
\r
440 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
\r
441 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
\r
442 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
\r
443 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
\r
444 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
\r
445 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
\r
446 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
\r
447 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
\r
448 0x00,0x00,0x00,0x00,0x25,0x51,0x25,0x54,
\r
449 0x25,0x57,0x25,0x5a,0x25,0x5d,0x00,0x00,
\r
450 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
\r
451 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
\r
452 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
\r
453 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
\r
457 /* X0208 kigou conversion table */
\r
458 /* 0x8140 - 0x819e */
\r
459 unsigned char fv[] = {
\r
461 0x00,0x00,0x00,0x00,0x2c,0x2e,0x00,0x3a,
\r
462 0x3b,0x3f,0x21,0x00,0x00,0x27,0x60,0x00,
\r
463 0x5e,0x00,0x5f,0x00,0x00,0x00,0x00,0x00,
\r
464 0x00,0x00,0x00,0x00,0x00,0x2d,0x00,0x2f,
\r
465 0x5c,0x00,0x00,0x7c,0x00,0x00,0x60,0x27,
\r
466 0x22,0x22,0x28,0x29,0x00,0x00,0x5b,0x5d,
\r
467 0x7b,0x7d,0x3c,0x3e,0x00,0x00,0x00,0x00,
\r
468 0x00,0x00,0x00,0x00,0x2b,0x2d,0x00,0x00,
\r
469 0x00,0x3d,0x00,0x3c,0x3e,0x00,0x00,0x00,
\r
470 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
\r
471 0x24,0x00,0x00,0x25,0x23,0x26,0x2a,0x40,
\r
472 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
\r
479 sopen(SFILE *sf, char *string, signed int maxsize, char *md)
\r
482 strcpy(sf->mode,md);
\r
483 if (strstr(sf->mode,"newstr"))
\r
485 if(maxsize <= sizeof(sfile_buffer))
\r
488 st = (char *)malloc(maxsize);
\r
492 sf->pointer=sf->head=(unsigned char *)st;
\r
493 if (strstr(sf->mode,"stdout"))
\r
495 else if (strstr(sf->mode,"stdin"))
\r
498 maxsize=strlen((char *)st);
\r
500 sf->tail=sf->head+maxsize;
\r
509 if (strstr(sf->mode,"stdout"))
\r
511 if (strstr(sf->mode,"stdin"))
\r
513 if (strstr(sf->mode,"newstr") && sf->head != (unsigned char *)sfile_buffer)
\r
522 if(sf->pointer<sf->tail)
\r
523 return (int)*sf->pointer++;
\r
528 sungetc(int c, SFILE *sf)
\r
532 if (sf->head<sf->pointer) {
\r
533 *--sf->pointer=(unsigned char)c;
\r
540 sputc(int c, SFILE *sf)
\r
544 if (sf->pointer<sf->tail)
\r
545 return (int)(*sf->pointer++=(unsigned char)c);
\r
549 /* public 関数 start */
\r
551 /* nkf 漢字コンバート関数 */
\r
552 /* si must be terminated with '\0' */
\r
554 nkf_convert(char *si, char *so, int maxsize, char *in_mode, char *out_mode)
\r
561 reinit(); /* 使用? */
\r
564 maxsize = SAFE_CONVERT_LENGTH(strlen(si));
\r
565 else if(maxsize == 0)
\r
571 sopen(fi,si,0,"stdin");
\r
572 sopen(fo,so,maxsize,"stdout");
\r
574 sopen(fi,si,0,"stdin");
\r
575 sopen(fo,so,maxsize,"newstr stdout");
\r
581 rot_f = FALSE; /* rot14/43 mode */
\r
582 input_f = FALSE; /* non fixed input code */
\r
583 alpha_f = FALSE; /* convert JIx0208 alphbet to ASCII */
\r
584 mime_f = FALSE; /* convert MIME base64 */
\r
585 broken_f = FALSE; /* convert ESC-less broken JIS */
\r
586 iso8859_f = FALSE; /* ISO8859 through */
\r
588 x0201_f = TRUE; /* Assume JISX0201 kana */
\r
590 x0201_f = NO_X0201; /* Assume NO JISX0201 */
\r
592 line = 0; /* chars in line */
\r
596 kanji_intro = DEFAULT_J;
\r
597 ascii_intro = DEFAULT_R;
\r
598 output_mode = ASCII; /* output kanji mode */
\r
599 input_mode = ASCII; /* input kanji mode */
\r
600 shift_mode = FALSE; /* TRUE shift out, or X0201 */
\r
601 mime_mode = FALSE; /* MIME mode B base64, Q hex */
\r
604 /* No X0201->X0208 conversion 半角カナを有効に*/
\r
611 /* オプション mode 解析 */
\r
613 if (strstr(out_mode,"EUCK")||strstr(out_mode,"euck")||strstr(out_mode,"ujisk")){
\r
614 /*Hankaku Enable (For WRD File )*/
\r
616 /* No X0201->X0208 conversion 半角カナを有効に*/
\r
619 else if (strstr(out_mode,"SJISK")||strstr(out_mode,"sjisk")){
\r
620 /*Hankaku Enable (For WRD File )*/
\r
622 /* No X0201->X0208 conversion 半角カナを有効に*/
\r
625 else if (strstr(out_mode,"JISK")||strstr(out_mode,"jisk")){
\r
626 /*Hankaku Enable (For WRD File )*/
\r
628 /* No X0201->X0208 conversion 半角カナを有効に*/
\r
631 else if (strstr(out_mode,"EUC")||strstr(out_mode,"euc")||strstr(out_mode,"ujis"))
\r
633 else if (strstr(out_mode,"SJIS")||strstr(out_mode,"sjis"))
\r
635 else if (strstr(out_mode,"JIS")||strstr(out_mode,"jis"))
\r
639 if(in_mode != NULL)
\r
641 if(strstr(in_mode,"EUC")||strstr(in_mode,"euc")||strstr(in_mode,"ujis"))
\r
642 input_f = JIS_INPUT;
\r
643 else if (strstr(in_mode,"SJIS")||strstr(in_mode,"sjis"))
\r
644 input_f = SJIS_INPUT;
\r
645 else if (strstr(in_mode,"JIS")||strstr(in_mode,"jis"))
\r
646 input_f = JIS_INPUT;
\r
651 input_f = check_kanji_code((unsigned char *)si);
\r
653 input_f = SJIS_INPUT;
\r
654 else if(input_f == EUC_INPUT)
\r
655 input_f = JIS_INPUT;
\r
656 if(input_f == SJIS_INPUT && x0201_f == NO_X0201)
\r
666 /* Copy `fo' buffer to `si' */
\r
668 a = fo->pointer - fo->head; /* Stored length */
\r
671 memcpy(si, fo->head, a); /* Do copy */
\r
680 nkf_conv(char *si, char *so, char *mode)
\r
682 return nkf_convert(si,so,-1,NULL,mode);
\r
685 /* public 関数 end */
\r
687 #define IS_SJIS_HANKAKU(c) (0xa0 <= (c) && (c) <= 0xdf)
\r
688 #define IS_SJIS_BYTE1(c) ((0x81 <= (c) && (c) <= 0x9f) ||\
\r
689 (0xe0 <= (c) && (c) <= 0xfc))
\r
690 #define IS_SJIS_BYTE2(c) ((0x40 <= (c) && (c) <= 0x7e) ||\
\r
691 (0x80 <= (c) && (c) <= 0xfc))
\r
693 #define IS_EUC_BYTE1(c) (0xa1 <= (c) && (c) <= 0xf4)
\r
694 #ifdef EUC_STRICT_CHECK
\r
695 #define IS_EUC_BYTE2(c) (0xa1 <= (c) && (c) <= 0xfe)
\r
697 #define IS_EUC_BYTE2(c) (0xa0 <= (c) && (c) <= 0xff)
\r
698 #endif /* EUC_STRICT_CHECK */
\r
701 #ifdef EUC_STRICT_CHECK
\r
702 #define EUC_GAP_LIST_SIZE (16*2)
\r
703 static unsigned int euc_gap_list[EUC_GAP_LIST_SIZE] =
\r
722 #endif /* EUC_STRICT_CHECK */
\r
724 static int check_kanji_code(unsigned char *p)
\r
729 /* check JIS or ASCII code */
\r
733 if(*p < SPACE || *p >= DEL)
\r
737 mode = -1; /* None ASCII */
\r
750 while(*p && *p <= DEL)
\r
759 if(IS_SJIS_HANKAKU(c1))
\r
764 if(IS_SJIS_HANKAKU(c1))
\r
766 #ifdef EUC_STRICT_CHECK
\r
768 #endif /* EUC_STRICT_CHECK */
\r
770 0xa0 0xa1 0xdf 0xf4 0xfe
\r
771 |<-----+---- SH -------->| | | SH: SJIS-HANKAKU
\r
772 |<------- E1 ----------->| | E1: EUC (MSB)
\r
773 |<--------E2------------------->| E2: EUC (LSB)
\r
775 if(!IS_EUC_BYTE1(c1) || !IS_EUC_BYTE2(c2))
\r
777 if(!IS_SJIS_HANKAKU(c2)) /* (0xdf..0xfe] */
\r
779 #ifdef EUC_STRICT_CHECK
\r
783 /* Checking more strictly */
\r
784 c = (((unsigned int)c1)<<8 | (unsigned int)c2);
\r
785 for(i = 0; i < EUC_GAP_LIST_SIZE; i += 2)
\r
786 if(euc_gap_list[i] <= c && c <= euc_gap_list[i + 1])
\r
792 #endif /* EUC_STRICT_CHECK */
\r
795 else if(IS_SJIS_BYTE1(c1) && IS_SJIS_BYTE2(c2))
\r
797 if(!(IS_EUC_BYTE1(c1) && IS_EUC_BYTE2(c2)))
\r
801 else if(IS_EUC_BYTE1(c1) && IS_EUC_BYTE2(c2))
\r
806 p++; /* What? Is this japanese? Try check again. */
\r
813 #ifdef EUC_STRICT_CHECK
\r
814 static void fix_euc_code(unsigned char *s, int len)
\r
818 for(i = 0; i < len - 1; i++)
\r
822 c = (((unsigned int)s[i])<<8 | (unsigned int)s[i + 1]);
\r
823 for(j = 0; j < EUC_GAP_LIST_SIZE; j += 2)
\r
824 if(euc_gap_list[j] <= c && c <= euc_gap_list[j + 1])
\r
834 #endif /* EUC_STRICT_CHECK */
\r
837 static int file_out = FALSE;
\r
838 static int add_cr = FALSE;
\r
839 static int del_cr = FALSE;
\r
840 static int end_check;
\r
852 #ifdef EASYWIN /*Easy Win */
\r
853 _BufferSize.y = 400;/*Set Scroll Buffer Size*/
\r
856 for (argc--,argv++; (argc > 0) && **argv == '-'; argc--, argv++) {
\r
861 if(iso8859_f && (oconv != j_oconv || !x0201_f )) {
\r
862 fprintf(stderr,"Mixed ISO8859/JISX0201/SJIS/EUC output is not allowed.\n");
\r
866 if(binmode_f == TRUE)
\r
868 if(freopen("","wb",stdout) == NULL)
\r
871 setbinmode(stdout);
\r
875 setbuf(stdout, (char *) NULL);
\r
877 setvbuffer(stdout, stdobuf, IOBUF_SIZE);
\r
880 if(binmode_f == TRUE)
\r
882 if(freopen("","rb",stdin) == NULL) return (-1);
\r
886 setvbuffer(stdin, stdibuf, IOBUF_SIZE);
\r
890 kanji_convert(stdin);
\r
893 if((fin = fopen(*argv++, "r")) == NULL) {
\r
897 /* reopen file for stdout */
\r
898 if(file_out == TRUE){
\r
900 if(freopen(*argv++, "w", stdout) == NULL) {
\r
906 if(freopen("nkf.out", "w", stdout) == NULL) {
\r
911 if(binmode_f == TRUE) {
\r
913 if(freopen("","wb",stdout) == NULL)
\r
916 setbinmode(stdout);
\r
920 if(binmode_f == TRUE)
\r
922 if(freopen("","rb",fin) == NULL)
\r
927 setvbuffer(fin, stdibuf, IOBUF_SIZE);
\r
931 kanji_convert(fin);
\r
936 #ifdef EASYWIN /*Easy Win */
\r
937 if(file_out == FALSE)
\r
938 scanf("%d",&end_check);
\r
941 #else /* for Other OS */
\r
942 if(file_out == TRUE)
\r
950 arguments(char *cp)
\r
954 case 'b': /* buffered mode */
\r
957 case 'u': /* non bufferd mode */
\r
960 case 't': /* transparent mode */
\r
963 case 'j': /* JIS output */
\r
967 case 'e': /* AT&T EUC output */
\r
970 case 's': /* SJIS output */
\r
973 case 'l': /* ISO8859 Latin-1 support, no conversion */
\r
974 iso8859_f = TRUE; /* Only compatible with ISO-2022-JP */
\r
975 input_f = LATIN1_INPUT;
\r
977 case 'i': /* Kanji IN ESC-$-@/B */
\r
978 if(*cp=='@'||*cp=='B')
\r
979 kanji_intro = *cp++;
\r
981 case 'o': /* ASCII IN ESC-(-J/B */
\r
982 if(*cp=='J'||*cp=='B'||*cp=='H')
\r
983 ascii_intro = *cp++;
\r
988 #if defined(MSDOS) || defined(__OS2__)
\r
999 /* Input code assumption */
\r
1000 case 'J': /* JIS input */
\r
1001 case 'E': /* AT&T EUC input */
\r
1002 input_f = JIS_INPUT;
\r
1004 case 'S': /* MS Kanji input */
\r
1005 input_f = SJIS_INPUT;
\r
1006 if(x0201_f==NO_X0201) x0201_f=TRUE;
\r
1008 case 'Z': /* Convert X0208 alphabet to asii */
\r
1009 /* bit:0 Convert X0208
\r
1010 bit:1 Convert Kankaku to one space
\r
1011 bit:2 Convert Kankaku to two spaces
\r
1013 if('9'>= *cp && *cp>='0')
\r
1014 alpha_f |= 1<<(*cp++ -'0');
\r
1018 case 'x': /* Convert X0201 kana to X0208 or X0201 Conversion */
\r
1019 x0201_f = FALSE; /* No X0201->X0208 conversion */
\r
1021 ESC-(-I in JIS, EUC, MS Kanji
\r
1022 SI/SO in JIS, EUC, MS Kanji
\r
1023 SSO in EUC, JIS, not in MS Kanji
\r
1024 MS Kanji (0xa0-0xdf)
\r
1026 ESC-(-I in JIS (0x20-0x5f)
\r
1027 SSO in EUC (0xa0-0xdf)
\r
1028 0xa0-0xd in MS Kanji (0xa0-0xdf)
\r
1031 case 'X': /* Assume X0201 kana */
\r
1032 /* Default value is NO_X0201 for EUC/MS-Kanji mix */
\r
1035 case 'f': /* folding -f60 or -f */
\r
1037 fold_len = atoi(cp);
\r
1038 if(!(0<fold_len && fold_len<BUFSIZ))
\r
1039 fold_len = DEFAULT_FOLD;
\r
1040 while('0'<= *cp && *cp <='9') cp++;
\r
1042 case 'm': /* MIME support */
\r
1044 if(*cp=='B'||*cp=='Q') {
\r
1045 mime_mode = *cp++;
\r
1046 mimebuf_f = FIXED_MIME;
\r
1047 } else if (*cp=='0') {
\r
1051 case 'M': /* MIME output */
\r
1052 oconv = j_oconv; /* sorry... not yet done.. */
\r
1054 case 'B': /* Broken JIS support */
\r
1055 /* bit:0 no ESC JIS
\r
1056 bit:1 allow any x on ESC-(-x or ESC-$-x
\r
1057 bit:2 reset to ascii on NL
\r
1059 if('9'>= *cp && *cp>='0')
\r
1060 broken_f |= 1<<(*cp++ -'0');
\r
1065 case 'O':/* for Output file */
\r
1069 case 'c':/* add cr code */
\r
1072 case 'd':/* delete cr code */
\r
1076 /* bogus option but ignored */
\r
1084 noconvert(SFILE *f)
\r
1088 while ((c = sgetc(f)) != EOF)
\r
1097 kanji_convert(SFILE *f)
\r
1103 if(input_f == JIS_INPUT || input_f == LATIN1_INPUT) {
\r
1104 estab_f = TRUE; iconv = oconv;
\r
1105 } else if(input_f == SJIS_INPUT) {
\r
1106 estab_f = TRUE; iconv = s_iconv;
\r
1108 estab_f = FALSE; iconv = oconv;
\r
1110 input_mode = ASCII;
\r
1111 output_mode = ASCII;
\r
1112 shift_mode = FALSE;
\r
1114 #define NEXT continue /* no output, get next */
\r
1115 #define SEND ; /* output c1 and c2, get next */
\r
1116 #define LAST break /* end of loop, go closing */
\r
1118 while ((c1 = GETC(f)) != EOF) {
\r
1122 /* in case of 8th bit is on */
\r
1124 /* in case of not established yet */
\r
1126 /* It is still ambiguious */
\r
1127 h_conv(f, c2, c1);
\r
1130 } else if(c1 < AT) {
\r
1131 /* ignore bogus code */
\r
1136 /* it seems to be MS Kanji */
\r
1142 /* in case of already established */
\r
1144 /* ignore bogus code */
\r
1151 /* it might be kanji shitfted */
\r
1152 if((c1 == DEL) || (c1 <= SPACE)) {
\r
1153 /* ignore bogus first code */
\r
1162 if(!estab_f && !iso8859_f) {
\r
1163 /* not established yet */
\r
1165 /* it seems to be MS Kanji */
\r
1168 } else if(c1 < 0xe0) {
\r
1169 /* it seems to be EUC */
\r
1173 /* still ambiguious */
\r
1177 } else { /* estab_f==TRUE */
\r
1180 } else if(SSP<=c1 && c1<0xe0 && iconv == s_iconv) {
\r
1181 /* SJIS X0201 Case... */
\r
1182 /* This is too arrogant, but ... */
\r
1183 if(x0201_f==NO_X0201) {
\r
1189 if(dv[(c1-SSP)*2]||ev[(c1-SSP)*2]) {
\r
1190 /* look ahead for X0201/X0208conversion */
\r
1191 if((c2 = GETC(f)) == EOF) {
\r
1192 (*oconv)(cv[(c1-SSP)*2],cv[(c1-SSP)*2+1]);
\r
1194 } else if(c2==(0xde)) { /* 濁点 */
\r
1195 (*oconv)(dv[(c1-SSP)*2],dv[(c1-SSP)*2+1]);
\r
1198 } else if(c2==(0xdf)&&ev[(c1-SSP)*2]) {
\r
1200 (*oconv)(ev[(c1-SSP)*2],ev[(c1-SSP)*2+1]);
\r
1204 UNGETC(c2,f); c2 = 0;
\r
1206 (*oconv)(cv[(c1-SSP)*2],cv[(c1-SSP)*2+1]);
\r
1210 } else if(c1==SSO && iconv != s_iconv) {
\r
1211 /* EUC X0201 Case */
\r
1212 /* This is too arrogant
\r
1213 if(x0201_f == NO_X0201) {
\r
1218 c1 = GETC(f); /* skip SSO */
\r
1220 if(x0201_f && SSP<=c1 && c1<0xe0) {
\r
1221 if(dv[(c1-SSP)*2]||ev[(c1-SSP)*2]) {
\r
1222 if((c2 = GETC(f)) == EOF) {
\r
1223 (*oconv)(cv[(c1-SSP)*2],cv[(c1-SSP)*2+1]);
\r
1226 /* forward lookup 濁点/半濁点 */
\r
1228 UNGETC(c2,f); c2 = 0;
\r
1229 (*oconv)(cv[(c1-SSP)*2],cv[(c1-SSP)*2+1]);
\r
1231 } else if((c2 = GETC(f)) == EOF) {
\r
1232 (*oconv)(cv[(c1-SSP)*2],cv[(c1-SSP)*2+1]);
\r
1235 } else if(c2==(0xde)) { /* 濁点 */
\r
1236 (*oconv)(dv[(c1-SSP)*2],dv[(c1-SSP)*2+1]);
\r
1239 } else if(c2==(0xdf)&&ev[(c1-SSP)*2]) {
\r
1241 (*oconv)(ev[(c1-SSP)*2],ev[(c1-SSP)*2+1]);
\r
1245 (*oconv)(cv[(c1-SSP)*2],cv[(c1-SSP)*2+1]);
\r
1246 /* we have to check this c2 */
\r
1247 /* and no way to push back SSO */
\r
1249 goto euc_1byte_check;
\r
1252 (*oconv)(cv[(c1-SSP)*2],cv[(c1-SSP)*2+1]);
\r
1256 } else if(c1 < SSP && iconv != s_iconv) {
\r
1257 /* strange code in EUC */
\r
1258 iconv = s_iconv; /* try SJIS */
\r
1262 /* already established */
\r
1267 } else if((c1 > SPACE) && (c1 != DEL)) {
\r
1268 /* in case of Roman characters */
\r
1271 /* output 1 shifted byte */
\r
1272 if(x0201_f && (!iso8859_f||input_mode==X0201) &&
\r
1273 SSP<=c1 && c1<0xe0 ) {
\r
1274 if(dv[(c1-SSP)*2]||ev[(c1-SSP)*2]) {
\r
1275 if((c2 = GETC(f)) == EOF) {
\r
1276 (*oconv)(cv[(c1-SSP)*2],cv[(c1-SSP)*2+1]);
\r
1278 } else if(c2==(0xde&0x7f)) { /* 濁点 */
\r
1279 (*oconv)(dv[(c1-SSP)*2],dv[(c1-SSP)*2+1]);
\r
1282 } else if(c2==(0xdf&0x7f)&&ev[(c1-SSP)*2]) {
\r
1284 (*oconv)(ev[(c1-SSP)*2],ev[(c1-SSP)*2+1]);
\r
1288 UNGETC(c2,f); c2 = 0;
\r
1290 (*oconv)(cv[(c1-SSP)*2],cv[(c1-SSP)*2+1]);
\r
1294 } else if(c1 == '(' && broken_f && input_mode == X0208
\r
1296 /* Try to recover missing escape */
\r
1297 if((c1 = GETC(f)) == EOF) {
\r
1301 if(c1 == 'B' || c1 == 'J' || c1 == 'H') {
\r
1302 input_mode = ASCII; shift_mode = FALSE;
\r
1306 /* do not modify various input_mode */
\r
1307 /* It can be vt100 sequence */
\r
1311 } else if(input_mode == X0208) {
\r
1312 /* in case of Kanji shifted */
\r
1315 /* goto next_byte */
\r
1316 } else if(c1 == '=' && mime_f && !mime_mode ) {
\r
1317 if((c1 = sgetc(f)) == EOF) {
\r
1320 } else if(c1 == '?') {
\r
1321 /* =? is mime conversion start sequence */
\r
1322 if(mime_begin(f) == EOF) /* check in detail */
\r
1331 } else if(c1 == '$' && broken_f && !mime_mode) {
\r
1332 /* try to recover missing escape */
\r
1333 if((c1 = GETC(f)) == EOF) {
\r
1336 } else if(c1 == '@'|| c1 == 'B') {
\r
1337 /* in case of Kanji in ESC sequence */
\r
1338 input_mode = X0208;
\r
1339 shift_mode = FALSE;
\r
1349 } else if(c1 == SI) {
\r
1350 shift_mode = FALSE;
\r
1352 } else if(c1 == SO) {
\r
1353 shift_mode = TRUE;
\r
1355 } else if(c1 == ESC ) {
\r
1356 if((c1 = GETC(f)) == EOF) {
\r
1359 } else if(c1 == '$') {
\r
1360 if((c1 = GETC(f)) == EOF) {
\r
1364 } else if(c1 == '@'|| c1 == 'B') {
\r
1365 /* This is kanji introduction */
\r
1366 input_mode = X0208;
\r
1367 shift_mode = FALSE;
\r
1369 } else if(c1 == '(') {
\r
1370 if((c1 = GETC(f)) == EOF) {
\r
1375 } else if(c1 == '@'|| c1 == 'B') {
\r
1376 /* This is kanji introduction */
\r
1377 input_mode = X0208;
\r
1378 shift_mode = FALSE;
\r
1387 } else if(broken_f&0x2) {
\r
1388 input_mode = X0208;
\r
1389 shift_mode = FALSE;
\r
1397 } else if(c1 == '(') {
\r
1398 if((c1 = GETC(f)) == EOF) {
\r
1404 /* This is X0201 kana introduction */
\r
1405 input_mode = X0201; shift_mode = X0201;
\r
1407 } else if(c1 == 'B' || c1 == 'J' || c1 == 'H') {
\r
1408 /* This is X0208 kanji introduction */
\r
1409 input_mode = ASCII; shift_mode = FALSE;
\r
1411 } else if(broken_f&0x2) {
\r
1412 input_mode = ASCII; shift_mode = FALSE;
\r
1417 /* maintain various input_mode here */
\r
1426 } else if(c1 == NL && broken_f&4) {
\r
1427 input_mode = ASCII;
\r
1433 if(input_mode == X0208)
\r
1434 (*oconv)(c2, c1); /* this is JIS, not SJIS/EUC case */
\r
1436 (*iconv)(c2, c1); /* can be EUC/SJIS */
\r
1439 /* goto next_word */
\r
1451 h_conv(SFILE *f, int c2, int c1)
\r
1456 /** it must NOT be in the kanji shifte sequence */
\r
1457 /** it must NOT be written in JIS7 */
\r
1458 /** and it must be after 2 byte 8bit code */
\r
1461 push_hold_buf(c2, c1);
\r
1464 while ((c1 = GETC(f)) != EOF) {
\r
1468 /* not established */
\r
1470 /* it is still ambiguious yet */
\r
1472 } else if(c1 < AT) {
\r
1473 /* ignore bogus first byte */
\r
1477 /* now established */
\r
1478 /* it seems to be MS Kanji */
\r
1488 /* 8th bit is on */
\r
1490 /* it seems to be MS Kanji */
\r
1493 } else if(c1 < 0xe0) {
\r
1494 /* it seems to be EUC */
\r
1498 /* still ambiguious */
\r
1503 /* 7 bit code , then send without any process */
\r
1507 if((push_hold_buf(c2, c1) == EOF) || estab_f)
\r
1514 ** 1) EOF is detected, or
\r
1515 ** 2) Code is established, or
\r
1516 ** 3) Buffer is FULL (but last word is pushed)
\r
1518 ** in 1) and 3) cases, we continue to use
\r
1519 ** Kanji codes by oconv and leave estab_f unchanged.
\r
1522 for (wc = 0; wc < hold_count; wc += 2) {
\r
1523 c2 = hold_buf[wc];
\r
1524 c1 = hold_buf[wc+1];
\r
1533 push_hold_buf(int c2, int c1)
\r
1535 if(hold_count >= HOLD_SIZE*2)
\r
1537 hold_buf[hold_count++] = c2;
\r
1538 hold_buf[hold_count++] = c1;
\r
1539 return ((hold_count >= HOLD_SIZE*2) ? EOF : hold_count);
\r
1544 s_iconv(int c2, int c1)
\r
1546 if((c2 == EOF) || (c2 == 0)) {
\r
1549 c2 = c2 + c2 - ((c2 <= 0x9f) ? SJ0162 : SJ6394);
\r
1551 c1 = c1 - ((c1 > DEL) ? SPACE : 0x1f);
\r
1562 int e_oconv(int c2, int c1)
\r
1564 c2 = pre_convert(c1,c2); c1 = c1_return;
\r
1566 switch(line_fold(c2,c1)) {
\r
1568 if(add_cr == TRUE) {
\r
1574 case 0: return VOIDVOID;
\r
1576 c1 = '\n'; c2 = 0;
\r
1584 if(c2==DOUBLE_SPACE) {
\r
1585 sputchar(' '); sputchar(' ');
\r
1590 else if(c2 == 0 && (c1&0x80)) {
\r
1591 sputchar(SSO); sputchar(c1);
\r
1592 } else if(c2 == 0) {
\r
1593 if(c1 == '\n' && add_cr == TRUE)
\r
1597 else if(del_cr == FALSE)
\r
1600 if((c1<0x20 || 0x7e<c1) ||
\r
1601 (c2<0x20 || 0x7e<c2)) {
\r
1603 return VOIDVOID; /* too late to rescue this char */
\r
1605 sputchar(c2 | 0x080);
\r
1606 sputchar(c1 | 0x080);
\r
1614 s_oconv(int c2, int c1)
\r
1616 c2 = pre_convert(c1,c2); c1 = c1_return;
\r
1618 switch(line_fold(c2,c1)) {
\r
1620 if(add_cr == TRUE) {
\r
1627 c1 = '\n'; c2 = 0;
\r
1629 case 0: return VOIDVOID;
\r
1636 if(c2==DOUBLE_SPACE) {
\r
1637 sputchar(' '); sputchar(' ');
\r
1642 else if(c2 == 0) {
\r
1643 if(c1 == '\n' && add_cr == TRUE)
\r
1647 else if(del_cr == FALSE)
\r
1650 if((c1<0x20 || 0x7e<c1) ||
\r
1651 (c2<0x20 || 0x7e<c2)) {
\r
1653 return VOIDVOID; /* too late to rescue this char */
\r
1655 sputchar((((c2 - 1) >> 1) + ((c2 <= 0x5e) ? 0x71 : 0xb1)));
\r
1656 sputchar((c1 + ((c2 & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e)));
\r
1662 j_oconv(int c2, int c1)
\r
1664 c2 = pre_convert(c1,c2); c1 = c1_return;
\r
1666 switch(line_fold(c2,c1)) {
\r
1671 sputchar(ascii_intro);
\r
1673 if(add_cr == TRUE) {
\r
1678 output_mode = ASCII;
\r
1681 c1 = '\n'; c2 = 0;
\r
1687 case 0: return VOIDVOID;
\r
1694 sputchar(ascii_intro);
\r
1696 } else if(c2 == 0 && (c1 & 0x80)) {
\r
1697 if(input_mode==X0201 || !iso8859_f) {
\r
1698 if(output_mode!=X0201) {
\r
1702 output_mode = X0201;
\r
1706 /* iso8859 introduction, or 8th bit on */
\r
1707 /* Can we convert in 7bit form using ESC-'-'-A ?
\r
1708 Is this popular? */
\r
1711 } else if(c2 == 0) {
\r
1715 sputchar(ascii_intro);
\r
1716 output_mode = ASCII;
\r
1718 if(c1 == '\n' && add_cr == TRUE)
\r
1722 else if(del_cr == FALSE)
\r
1724 } else if(c2 == DOUBLE_SPACE) {
\r
1728 sputchar(ascii_intro);
\r
1729 output_mode = ASCII;
\r
1732 if(c1 == '\n' && add_cr == TRUE)
\r
1736 else if(del_cr == FALSE)
\r
1739 if(output_mode != X0208) {
\r
1742 sputchar(kanji_intro);
\r
1743 output_mode = X0208;
\r
1745 if(c1<0x20 || 0x7e<c1)
\r
1747 if(c2<0x20 || 0x7e<c2)
\r
1750 if(c1 == '\n' && add_cr == TRUE)
\r
1754 else if(del_cr == FALSE)
\r
1762 #define rot13(c) ( \
\r
1763 ( c < 'A' ) ? c: \
\r
1764 (c <= 'M') ? (c + 13): \
\r
1765 (c <= 'Z') ? (c - 13): \
\r
1766 (c < 'a') ? (c): \
\r
1767 (c <= 'm') ? (c + 13): \
\r
1768 (c <= 'z') ? (c - 13): \
\r
1772 #define rot47(c) ( \
\r
1773 ( c < '!' ) ? c: \
\r
1774 ( c <= 'O' ) ? (c + 47) : \
\r
1775 ( c <= '~' ) ? (c - 47) : \
\r
1781 Return value of line_fold()
\r
1783 \n add newline and output char
\r
1784 \r add newline and output nothing
\r
1787 1 (or else) normal output
\r
1789 fold state in prev (previous character)
\r
1791 >0x80 Japanese (X0208/X0201)
\r
1796 This fold algorthm does not preserve heading space in a line.
\r
1797 This is the main difference from fmt.
\r
1801 line_fold(int c2, int c1)
\r
1805 return 0; /* ignore cr */
\r
1807 if(line>0) line--;
\r
1810 if(c2==EOF && line != 0) /* close open last line */
\r
1814 if(prev == c1) { /* duplicate newline */
\r
1817 return '\n'; /* output two newline */
\r
1823 if(prev&0x80) { /* Japanese? */
\r
1825 return 0; /* ignore given single newline */
\r
1826 } else if(prev==' ') {
\r
1830 if(++line<=fold_len)
\r
1834 return '\r'; /* fold and output nothing */
\r
1844 return '\n'; /* output newline and clear */
\r
1846 /* X0208 kankaku or ascii space */
\r
1847 if( (c2==0&&c1==' ')||
\r
1848 (c2==0&&c1=='\t')||
\r
1849 (c2==DOUBLE_SPACE)||
\r
1850 (c2=='!'&& c1=='!')) {
\r
1852 return 0; /* remove duplicate spaces */
\r
1855 if(++line<=fold_len)
\r
1856 return ' '; /* output ASCII space only */
\r
1858 prev = ' '; line = 0;
\r
1859 return '\r'; /* fold and output nothing */
\r
1862 prev0 = prev; /* we still need this one... , but almost done */
\r
1864 if(c2 || (SSP<=c1 && c1<=0xdf))
\r
1865 prev |= 0x80; /* this is Japanese */
\r
1866 line += (c2==0)?1:2;
\r
1867 if(line<=fold_len) { /* normal case */
\r
1870 if(line>=fold_len+FOLD_MARGIN) { /* too many kinsou suspension */
\r
1871 line = (c2==0)?1:2;
\r
1872 return '\n'; /* We can't wait, do fold now */
\r
1874 /* simple kinsoku rules return 1 means no folding */
\r
1876 if(c1==0xde) return 1; /* ゛*/
\r
1877 if(c1==0xdf) return 1; /* ゜*/
\r
1878 if(c1==0xa4) return 1; /* 。*/
\r
1879 if(c1==0xa3) return 1; /* ,*/
\r
1880 if(c1==0xa1) return 1; /* 」*/
\r
1881 if(c1==0xb0) return 1; /* - */
\r
1882 if(SSP<=c1 && c1<=0xdf) { /* X0201 */
\r
1884 return '\n';/* add one new line before this character */
\r
1886 /* fold point in ASCII { [ ( */
\r
1897 ((prev0=='\n')|| (prev0==' ')|| /* ignored new line */
\r
1898 (prev0&0x80)) /* X0208 - ASCII */
\r
1901 return '\n';/* add one new line before this character */
\r
1903 return 1; /* default no fold in ASCII */
\r
1906 if(c1=='"') return 1; /* 、 */
\r
1907 if(c1=='#') return 1; /* 。 */
\r
1908 if(c1=='$') return 1; /* , */
\r
1909 if(c1=='%') return 1; /* . */
\r
1910 if(c1=='\'') return 1; /* + */
\r
1911 if(c1=='(') return 1; /* ; */
\r
1912 if(c1==')') return 1; /* ? */
\r
1913 if(c1=='*') return 1; /* ! */
\r
1914 if(c1=='+') return 1; /* ゛ */
\r
1915 if(c1==',') return 1; /* ゜ */
\r
1918 return '\n'; /* add one new line before this character */
\r
1923 pre_convert(int c1, int c2)
\r
1925 if(c2) c1 &= 0x7f;
\r
1927 if(c2==EOF) return c2;
\r
1939 /* JISX0208 Alphabet */
\r
1940 if(alpha_f && c2 == 0x23 ) return 0;
\r
1941 /* JISX0208 Kigou */
\r
1942 if(alpha_f && c2 == 0x21 ) {
\r
1947 } else if(alpha_f&0x4) {
\r
1949 return DOUBLE_SPACE;
\r
1953 } else if(0x20<c1 && c1<0x7f && fv[c1-0x20]) {
\r
1954 c1_return = fv[c1-0x20];
\r
1962 #ifdef STRICT_MIME
\r
1963 /* This converts =?ISO-2022-JP?B?HOGE HOGE?= */
\r
1965 unsigned char *mime_pattern[] = {
\r
1966 (unsigned char *)"\075?ISO-8859-1?Q?",
\r
1967 (unsigned char *)"\075?ISO-2022-JP?B?",
\r
1968 (unsigned char *)"\075?ISO-2022-JP?Q?",
\r
1969 (unsigned char *)"\075?JAPANESE_EUC?B?",
\r
1970 (unsigned char *)"\075?SHIFT_JIS?B?",
\r
1974 int mime_encode[] = {
\r
1980 #define MAXRECOVER 20
\r
1981 int iso8859_f_save;
\r
1983 #ifdef STRICT_MIME
\r
1985 #define nkf_toupper(c) (('a'<=c && c<='z')?(c-('a'-'A')):c)
\r
1986 /* I don't trust portablity of toupper */
\r
1989 mime_begin(SFILE *f)
\r
1993 unsigned char *p,*q;
\r
1994 int r[MAXRECOVER]; /* recovery buffer, max mime pattern lenght */
\r
1996 mime_mode = FALSE;
\r
1997 /* =? has been checked */
\r
1999 p = mime_pattern[j];
\r
2000 r[0]='='; r[1]='?';
\r
2002 for(i=2;p[i]>' ';i++) { /* start at =? */
\r
2003 if( ((r[i] = c1 = sgetc(f))==EOF) || nkf_toupper(c1) != p[i] ) {
\r
2004 /* pattern fails, try next one */
\r
2006 while (p = mime_pattern[++j]) {
\r
2007 for(k=2;k<i;k++) /* assume length(p) > i */
\r
2008 if(p[k]!=q[k]) break;
\r
2009 if(k==i && nkf_toupper(c1)==p[k]) break;
\r
2011 if(p) continue; /* found next one, continue */
\r
2012 /* all fails, output from recovery buffer */
\r
2014 for(j=0;j<i;j++) {
\r
2020 mime_mode = mime_encode[j];
\r
2021 iso8859_f_save = iso8859_f;
\r
2025 if(mime_mode=='B') {
\r
2026 mimebuf_f = unbuf_f;
\r
2028 /* do MIME integrity check */
\r
2029 return mime_integrity(f,mime_pattern[j]);
\r
2036 #define mime_getc0(f) (mimebuf_f?sgetc(f):Fifo(mime_input++))
\r
2037 #define mime_ungetc0(c,f) (mimebuf_f?sungetc(c,f):mime_input--)
\r
2041 mime_begin(SFILE *f)
\r
2045 int r[MAXRECOVER]; /* recovery buffer, max mime pattern lenght */
\r
2047 mime_mode = FALSE;
\r
2048 /* =? has been checked */
\r
2050 r[0]='='; r[1]='?';
\r
2051 for(i=2;i<MAXRECOVER;i++) { /* start at =? */
\r
2052 /* We accept any charcter type even if it is breaked by new lines */
\r
2053 if( (r[i] = c1 = sgetc(f))==EOF) break;
\r
2054 if(c1=='=') break;
\r
2055 if(c1<' '&& c1!='\r' && c1!='\n') break;
\r
2058 if(!(i<MAXRECOVER) || (r[i] = c1 = sgetc(f))==EOF) break;
\r
2059 if(c1=='b'||c1=='B') {
\r
2061 } else if(c1=='q'||c1=='Q') {
\r
2067 if(!(i<MAXRECOVER) || (r[i] = c1 = sgetc(f))==EOF) break;
\r
2071 mime_mode = FALSE;
\r
2076 if(!mime_mode || c1==EOF || i==MAXRECOVER) {
\r
2078 for(j=0;j<i;j++) {
\r
2083 iso8859_f_save = iso8859_f;
\r
2084 /* do no MIME integrity check */
\r
2085 return c1; /* used only for checking EOF */
\r
2088 #define mime_getc0(f) sgetc(f)
\r
2089 #define mime_ungetc0(c,f) sungetc(c,f)
\r
2094 mime_getc(SFILE *f)
\r
2096 int c1, c2, c3, c4, cc;
\r
2097 int t1, t2, t3, t4, mode, exit_mode;
\r
2099 if(mime_top != mime_last) { /* Something is in FIFO */
\r
2100 return Fifo(mime_top++);
\r
2103 if(mimebuf_f == FIXED_MIME)
\r
2104 exit_mode = mime_mode;
\r
2106 exit_mode = FALSE;
\r
2107 if(mime_mode == 'Q') {
\r
2108 if((c1 = mime_getc0(f)) == EOF) return (EOF);
\r
2109 if(c1=='_') return ' ';
\r
2110 if(c1!='=' && c1!='?')
\r
2112 mime_mode = exit_mode; /* prepare for quit */
\r
2113 if(c1<=' ') return c1;
\r
2114 if((c2 = mime_getc0(f)) == EOF) return (EOF);
\r
2115 if(c2<=' ') return c2;
\r
2116 if(c1=='?'&&c2=='=') {
\r
2117 /* end Q encoding */
\r
2118 input_mode = exit_mode;
\r
2119 iso8859_f = iso8859_f_save;
\r
2123 mime_mode = 'Q'; /* still in MIME */
\r
2124 mime_ungetc0(c2,f);
\r
2127 if((c3 = mime_getc0(f)) == EOF) return (EOF);
\r
2128 if(c2<=' ') return c2;
\r
2129 mime_mode = 'Q'; /* still in MIME */
\r
2130 #define hex(c) (('0'<=c&&c<='9')?(c-'0'):\
\r
2131 ('A'<=c&&c<='F')?(c-'A'+10):('a'<=c&&c<='f')?(c-'a'+10):0)
\r
2132 return ((hex(c2)<<4) + hex(c3));
\r
2135 if(mime_mode != 'B') {
\r
2136 mime_mode = FALSE;
\r
2141 /* Base64 encoding */
\r
2143 MIME allows line break in the middle of
\r
2144 Base64, but we are very pessimistic in decoding
\r
2145 in unbuf mode because MIME encoded code may broken by
\r
2146 less or editor's control sequence (such as ESC-[-K in unbuffered
\r
2147 mode. ignore incomplete MIME.
\r
2150 mime_mode = exit_mode; /* prepare for quit */
\r
2152 while ((c1 = mime_getc0(f))<=' ') {
\r
2156 if((c2 = mime_getc0(f))<=' ') {
\r
2159 if(mimebuf_f!=FIXED_MIME) input_mode = ASCII;
\r
2162 if((c1 == '?') && (c2 == '=')) {
\r
2163 input_mode = ASCII;
\r
2164 while((c1 = sgetc(f))==' ' /* || c1=='\n' || c1=='\r' */);
\r
2167 if((c3 = mime_getc0(f))<=' ') {
\r
2170 if(mimebuf_f!=FIXED_MIME) input_mode = ASCII;
\r
2173 if((c4 = mime_getc0(f))<=' ') {
\r
2176 if(mimebuf_f!=FIXED_MIME) input_mode = ASCII;
\r
2180 mime_mode = mode; /* still in MIME sigh... */
\r
2182 /* BASE 64 decoding */
\r
2184 t1 = 0x3f & base64decode(c1);
\r
2185 t2 = 0x3f & base64decode(c2);
\r
2186 t3 = 0x3f & base64decode(c3);
\r
2187 t4 = 0x3f & base64decode(c4);
\r
2188 cc = ((t1 << 2) & 0x0fc) | ((t2 >> 4) & 0x03);
\r
2190 Fifo(mime_last++) = cc;
\r
2191 cc = ((t2 << 4) & 0x0f0) | ((t3 >> 2) & 0x0f);
\r
2193 Fifo(mime_last++) = cc;
\r
2194 cc = ((t3 << 6) & 0x0c0) | (t4 & 0x3f);
\r
2196 Fifo(mime_last++) = cc;
\r
2201 return Fifo(mime_top++);
\r
2205 mime_ungetc(unsigned int c)
\r
2207 Fifo(mime_last++) = c;
\r
2211 #ifdef STRICT_MIME
\r
2213 mime_integrity(SFILE *f, unsigned char *p)
\r
2217 /* In buffered mode, read until =? or NL or buffer full
\r
2219 mime_input = mime_top;
\r
2220 mime_last = mime_top;
\r
2221 while(*p) Fifo(mime_input++) = *p++;
\r
2224 while((c=sgetc(f))!=EOF) {
\r
2225 if(((mime_input-mime_top)&MIME_BUF_MASK)==0) break;
\r
2226 if(c=='=' && d=='?') {
\r
2227 /* checked. skip header, start decode */
\r
2228 Fifo(mime_input++) = c;
\r
2232 if(!( (c=='+'||c=='/'|| c=='=' || c=='?' ||
\r
2233 ('a'<=c && c<='z')||('A'<= c && c<='Z')||('0'<=c && c<='9'))))
\r
2235 /* Should we check length mod 4? */
\r
2236 Fifo(mime_input++) = c;
\r
2239 /* In case of Incomplete MIME, no MIME decode */
\r
2240 Fifo(mime_input++) = c;
\r
2241 mime_last = mime_input; /* point undecoded buffer */
\r
2242 mime_mode = 1; /* no decode on Fifo last in mime_getc */
\r
2248 base64decode(int c)
\r
2253 i = c - 'A'; /* A..Z 0-25 */
\r
2255 i = c - 'G' /* - 'a' + 26 */ ; /* a..z 26-51 */
\r
2257 i = c - '0' + '4' /* - '0' + 52 */ ; /* 0..9 52-61 */
\r
2259 i = '>' /* 62 */ ; /* + 62 */
\r
2261 i = '?' /* 63 */ ; /* / 63 */
\r
2271 binmode_f = TRUE;
\r
2276 mimebuf_f = FALSE;
\r
2277 broken_f = FALSE;
\r
2278 iso8859_f = FALSE;
\r
2280 x0201_f = NO_X0201;
\r
2282 kanji_intro = DEFAULT_J;
\r
2283 ascii_intro = DEFAULT_R;
\r
2284 oconv = DEFAULT_CONV;
\r
2285 output_mode = ASCII;
\r
2286 input_mode = ASCII;
\r
2287 shift_mode = FALSE;
\r
2288 mime_mode = FALSE;
\r
2299 fprintf(stderr,"USAGE: nkf(nkf32,wnkf,nkf2) -[flags] [in file] .. [out file for -O flag]\n");
\r
2300 fprintf(stderr,"Flags:\n");
\r
2301 fprintf(stderr,"b,u Output is bufferred (DEFAULT),Output is unbufferred\n");
\r
2302 #ifdef DEFAULT_CODE_SJIS
\r
2303 fprintf(stderr,"j,s,e Outout code is JIS 7 bit, Shift JIS (DEFAULT), AT&T JIS (EUC)\n");
\r
2305 #ifdef DEFAULT_CODE_JIS
\r
2306 fprintf(stderr,"j,s,e Outout code is JIS 7 bit (DEFAULT), Shift JIS, AT&T JIS (EUC)\n");
\r
2308 #ifdef DEFAULT_CODE_EUC
\r
2309 fprintf(stderr,"j,s,e Outout code is JIS 7 bit, Shift JIS, AT&T JIS (EUC) (DEFAULT)\n");
\r
2311 fprintf(stderr,"J,S,E Input assumption is JIS 7 bit , Shift JIS, AT&T JIS (EUC)\n");
\r
2312 fprintf(stderr,"t no conversion\n");
\r
2313 fprintf(stderr,"i_ Output sequence to designate JIS-kanji (DEFAULT B)\n");
\r
2314 fprintf(stderr,"o_ Output sequence to designate ASCII (DEFAULT B)\n");
\r
2315 fprintf(stderr,"r {de/en}crypt ROT13/47\n");
\r
2316 fprintf(stderr,"v Show this usage\n");
\r
2317 fprintf(stderr,"m[BQ0] MIME decode [B:base64,Q:quoted,0:no decode]\n");
\r
2318 fprintf(stderr,"l ISO8859-1 (Latin-1) support\n");
\r
2319 fprintf(stderr,"f Folding: -f60 or -f\n");
\r
2320 fprintf(stderr,"Z[0-2] Convert X0208 alphabet to ASCII 1: Kankaku to space,2: 2 spaces\n");
\r
2321 fprintf(stderr,"X,x Assume X0201 kana in MS-Kanji, -x preserves X0201\n");
\r
2322 fprintf(stderr,"B[0-2] Broken input 0: missing ESC,1: any X on ESC-[($]-X,2: ASCII on NL\n");
\r
2324 fprintf(stderr,"T Text mode output\n");
\r
2326 fprintf(stderr,"O Output to File (DEFAULT 'nkf.out')\n");
\r
2327 fprintf(stderr,"d,c Delete \\r in line feed, Add \\r in line feed\n");
\r
2328 fprintf(stderr,"Network Kanji Filter Version %s (%s) "
\r
2329 #if defined(MSDOS) && !defined(_Windows)
\r
2332 #if !defined(__WIN32__) && defined(_Windows)
\r
2335 #if defined(__WIN32__) && defined(_Windows)
\r
2341 ,Version,Patchlevel);
\r
2342 fprintf(stderr,"\n%s\n",CopyRight);
\r
2350 ** void@merope.pleiades.or.jp (Kusakabe Youichi)
\r
2351 ** NIDE Naoyuki <nide@ics.nara-wu.ac.jp>
\r
2352 ** ohta@src.ricoh.co.jp (Junn Ohta)
\r
2353 ** inouet@strl.nhk.or.jp (Tomoyuki Inoue)
\r
2354 ** kiri@pulser.win.or.jp (Tetsuaki Kiriyama)
\r
2355 ** Kimihiko Sato <sato@sail.t.u-tokyo.ac.jp>
\r
2356 ** a_kuroe@kuroe.aoba.yokohama.jp (Akihiko Kuroe)
\r
2357 ** kono@ie.u-ryukyu.ac.jp (Shinji Kono)
\r
2358 ** GHG00637@nifty-serve.or.jp (COW)
\r