/** Network Kanji Filter. (PDS Version)
+** -*- coding: ISO-2022-JP -*-
************************************************************************
** Copyright (C) 1987, Fujitsu LTD. (Itaru ICHIKAWA)
** \e$BO"Mm@h!'\e(B \e$B!J3t!KIY;NDL8&5f=j!!%=%U%H#38&!!;T@n!!;j\e(B
* \e$B8=:_!"\e(Bnkf \e$B$O\e(B SorceForge \e$B$K$F%a%s%F%J%s%9$,B3$1$i$l$F$$$^$9!#\e(B
* http://sourceforge.jp/projects/nkf/
***********************************************************************/
-#define NKF_IDENT "$Id: nkf.c,v 1.168 2008/01/23 23:24:30 naruse Exp $"
+#define NKF_IDENT "$Id: nkf.c,v 1.175 2008/02/07 19:59:13 naruse Exp $"
#define NKF_VERSION "2.0.8"
-#define NKF_RELEASE_DATE "2008-01-23"
+#define NKF_RELEASE_DATE "2008-02-07"
#define COPY_RIGHT \
"Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa),2000 S. Kono, COW\n" \
"Copyright (C) 2002-2008 Kono, Furukawa, Naruse, mastodon"
/* state of output_mode and input_mode
c2 0 means ASCII
- JIS_X_0201
+ JIS_X_0201_1976_K
ISO_8859_1
JIS_X_0208
EOF all termination
#define CR 0x0d
#define ESC 0x1b
#define SP 0x20
-#define AT 0x40
-#define SSP 0xa0
#define DEL 0x7f
#define SI 0x0f
#define SO 0x0e
-#define SSO 0x8e
+#define SS2 0x8e
#define SS3 0x8f
#define CRLF 0x0D0A
CP50222,
ISO_2022_JP_1,
ISO_2022_JP_3,
+ ISO_2022_JP_2004,
SHIFT_JIS,
WINDOWS_31J,
CP10001,
EUC_JP,
+ EUCJP_NKF,
CP51932,
EUCJP_MS,
EUCJP_ASCII,
UTF_32BE_BOM,
UTF_32LE,
UTF_32LE_BOM,
+ BINARY,
NKF_ENCODING_TABLE_SIZE,
- JIS_X_0201=0x1000,
- JIS_X_0208=0x1001,
- JIS_X_0212=0x1002,
- JIS_X_0213_1=0x1003,
- JIS_X_0213_2=0x1004,
- BINARY
+ JIS_X_0201_1976_K = 0x1013, /* I */ /* JIS C 6220-1969 */
+ /* JIS_X_0201_1976_R = 0x1014, */ /* J */ /* JIS C 6220-1969 */
+ /* JIS_X_0208_1978 = 0x1040, */ /* @ */ /* JIS C 6226-1978 */
+ /* JIS_X_0208_1983 = 0x1087, */ /* B */ /* JIS C 6226-1983 */
+ JIS_X_0208 = 0x1168, /* @B */
+ JIS_X_0212 = 0x1159, /* D */
+ /* JIS_X_0213_2000_1 = 0x1228, */ /* O */
+ JIS_X_0213_2 = 0x1229, /* P */
+ JIS_X_0213_1 = 0x1233, /* Q */
};
nkf_char s_iconv(nkf_char c2, nkf_char c1, nkf_char c0);
{CP50222, "CP50222", &NkfEncodingISO_2022_JP},
{ISO_2022_JP_1, "ISO-2022-JP-1", &NkfEncodingISO_2022_JP},
{ISO_2022_JP_3, "ISO-2022-JP-3", &NkfEncodingISO_2022_JP},
+ {ISO_2022_JP_2004, "ISO-2022-JP-2004", &NkfEncodingISO_2022_JP},
{SHIFT_JIS, "Shift_JIS", &NkfEncodingShift_JIS},
{WINDOWS_31J, "Windows-31J", &NkfEncodingShift_JIS},
{CP10001, "CP10001", &NkfEncodingShift_JIS},
{EUC_JP, "EUC-JP", &NkfEncodingEUC_JP},
+ {EUCJP_NKF, "eucJP-nkf", &NkfEncodingEUC_JP},
{CP51932, "CP51932", &NkfEncodingEUC_JP},
{EUCJP_MS, "eucJP-MS", &NkfEncodingEUC_JP},
{EUCJP_ASCII, "eucJP-ASCII", &NkfEncodingEUC_JP},
{"ISO2022JP-CP932", CP50220},
{"CP50220", CP50220},
{"CP50221", CP50221},
+ {"CSISO2022JP", CP50221},
{"CP50222", CP50222},
{"ISO-2022-JP-1", ISO_2022_JP_1},
{"ISO-2022-JP-3", ISO_2022_JP_3},
+ {"ISO-2022-JP-2004", ISO_2022_JP_2004},
{"SHIFT_JIS", SHIFT_JIS},
{"SJIS", SHIFT_JIS},
{"WINDOWS-31J", WINDOWS_31J},
{"CP10001", CP10001},
{"EUCJP", EUC_JP},
{"EUC-JP", EUC_JP},
+ {"EUCJP-NKF", EUCJP_NKF},
{"CP51932", CP51932},
{"EUC-JP-MS", EUCJP_MS},
{"EUCJP-MS", EUCJP_MS},
#define DEFAULT_R 'B'
-#define RANGE_NUM_MAX 18
#define GETA1 0x22
#define GETA2 0x2e
static nkf_encoding *input_encoding = NULL;
static nkf_encoding *output_encoding = NULL;
-static nkf_char kanji_convert(FILE *f);
-static nkf_char h_conv(FILE *f,nkf_char c2,nkf_char c1);
-static nkf_char push_hold_buf(nkf_char c2);
-static void set_iconv(nkf_char f, nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0));
-static nkf_char s2e_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1);
+static int kanji_convert(FILE *f);
#if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
/* UCS Mapping
* 0: Shift_JIS, eucJP-ascii
static int input_endian = ENDIAN_BIG;
static nkf_char unicode_subchar = '?'; /* the regular substitution character */
static void (*encode_fallback)(nkf_char c) = NULL;
-static nkf_char unicode_to_jis_common(nkf_char c2,nkf_char c1,nkf_char c0,nkf_char *p2,nkf_char *p1);
-static nkf_char w_iconv_common(nkf_char c1,nkf_char c0,const unsigned short *const *pp,nkf_char psize,nkf_char *p2,nkf_char *p1);
-static void w16w_conv(nkf_char val, nkf_char *p2, nkf_char *p1, nkf_char *p0);
-static nkf_char ww16_conv(nkf_char c2, nkf_char c1, nkf_char c0);
-static nkf_char w16e_conv(nkf_char val,nkf_char *p2,nkf_char *p1);
static void w_status(struct input_code *, nkf_char);
#endif
#ifdef UTF8_OUTPUT_ENABLE
static int output_bom_f = FALSE;
static int output_endian = ENDIAN_BIG;
-static nkf_char e2w_conv(nkf_char c2,nkf_char c1);
#endif
-static nkf_char e2s_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1);
-static void fold_conv(nkf_char c2,nkf_char c1);
-static void eol_conv(nkf_char c2,nkf_char c1);
-static void z_conv(nkf_char c2,nkf_char c1);
-static void rot_conv(nkf_char c2,nkf_char c1);
-static void hira_conv(nkf_char c2,nkf_char c1);
-static void iso2022jp_check_conv(nkf_char c2,nkf_char c1);
static void std_putc(nkf_char c);
static nkf_char std_getc(FILE *f);
static unsigned char stdibuf[IOBUF_SIZE];
static unsigned char stdobuf[IOBUF_SIZE];
#endif
-static unsigned char hold_buf[HOLD_SIZE*2];
-static int hold_count = 0;
/* flags */
static int unbuf_f = FALSE;
static nkf_char (*i_uungetc)(nkf_char c ,FILE *f) = std_ungetc;
#endif
-#define PREFIX_EUCG3 NKF_INT32_C(0x8F00)
-#define CLASS_MASK NKF_INT32_C(0xFF000000)
-#define CLASS_UNICODE NKF_INT32_C(0x01000000)
-#define VALUE_MASK NKF_INT32_C(0x00FFFFFF)
-#define UNICODE_MAX NKF_INT32_C(0x0010FFFF)
-#define is_unicode_capsule(c) ((c & CLASS_MASK) == CLASS_UNICODE)
-#define is_unicode_bmp(c) ((c & VALUE_MASK) <= NKF_INT32_C(0xFFFF))
+#define PREFIX_EUCG3 NKF_INT32_C(0x8F00)
+#define CLASS_MASK NKF_INT32_C(0xFF000000)
+#define CLASS_UNICODE NKF_INT32_C(0x01000000)
+#define VALUE_MASK NKF_INT32_C(0x00FFFFFF)
+#define UNICODE_BMP_MAX NKF_INT32_C(0x0000FFFF)
+#define UNICODE_MAX NKF_INT32_C(0x0010FFFF)
+#define nkf_char_euc3_new(c) ((c) | PREFIX_EUCG3)
+#define nkf_char_unicode_new(c) ((c) | CLASS_UNICODE)
+#define nkf_char_unicode_p(c) ((c & CLASS_MASK) == CLASS_UNICODE)
+#define nkf_char_unicode_bmp_p(c) ((c & VALUE_MASK) <= NKF_INT32_C(UNICODE_BMP_MAX))
+#define nkf_char_unicode_value_p(c) ((c & VALUE_MASK) <= NKF_INT32_C(UNICODE_MAX))
#ifdef NUMCHAR_OPTION
static int numchar_f = FALSE;
#endif
static int guess_f = 0; /* 0: OFF, 1: ON, 2: VERBOSE */
-#if !defined PERL_XS
-static void print_guessed_code(char *filename);
-#endif
static void set_input_codename(char *codename);
#ifdef EXEC_IO
/* static nkf_char cp932_conv(nkf_char c2, nkf_char c1); */
#endif /* SHIFTJIS_CP932 */
-#ifdef X0212_ENABLE
static int x0212_f = FALSE;
-#endif
static int x0213_f = FALSE;
static unsigned char prefix_table[256];
{"Shift_JIS", 0, 0, 0, {0, 0, 0}, s_status, s_iconv, 0},
#ifdef UTF8_INPUT_ENABLE
{"UTF-8", 0, 0, 0, {0, 0, 0}, w_status, w_iconv, 0},
- {"UTF-16", 0, 0, 0, {0, 0, 0}, NULL, w_iconv16, 0},
- {"UTF-32", 0, 0, 0, {0, 0, 0}, NULL, w_iconv32, 0},
#endif
{0}
};
static nkf_char (*i_mungetc_buf)(nkf_char c,FILE *f) = std_ungetc;
/* Global states */
-static int output_mode = ASCII, /* output kanji mode */
- input_mode = ASCII, /* input kanji mode */
- shift_mode = FALSE; /* TRUE shift out, or X0201 */
+static int output_mode = ASCII; /* output kanji mode */
+static int input_mode = ASCII; /* input kanji mode */
static int mime_decode_mode = FALSE; /* MIME mode B base64, Q hex */
/* X0201 / X0208 conversion tables */
+static int option_mode = 0;
static int file_out_f = FALSE;
#ifdef OVERWRITE
static int overwrite_f = FALSE;
nkf_enc_to_index(enc) == CP50221 ||\
nkf_enc_to_index(enc) == CP50222)
-#ifndef DEFAULT_ENCIDX
+#ifdef DEFAULT_CODE_LOCALE
static char* nkf_locale_charmap()
{
#ifdef HAVE_LANGINFO_H
if (enc < 0) enc = 0;
return enc;
}
-#endif
+#endif /* DEFAULT_CODE_LOCALE */
static nkf_encoding* nkf_default_encoding()
{
-#ifdef DEFAULT_ENCIDX
- return nkf_enc_from_index(DEFAULT_ENCIDX);
-#else
- nkf_encoding *enc = nkf_locale_encoding();
- if (enc <= 0) enc = nkf_enc_from_index(ISO_2022_JP);
- return enc;
+ nkf_encoding *enc = 0;
+#ifdef DEFAULT_CODE_LOCALE
+ enc = nkf_locale_encoding();
+#elif DEFAULT_ENCIDX
+ enc = nkf_enc_from_index(DEFAULT_ENCIDX);
#endif
+ return enc;
}
#ifndef PERL_XS
);
fprintf(HELP_OUTPUT,
" Default output encoding: "
-#ifdef DEFAULT_ENCIDX
- "%s\n", nkf_enc_name(nkf_default_encoding())
+#ifdef DEFAULT_CODE_LOCALE
+ "LOCALE (%s)\n", nkf_enc_name(nkf_default_encoding())
+#elif DEFAULT_ENCIDX
+ "CONFIG (%s)\n", nkf_enc_name(nkf_default_encoding())
#else
- "%s (%s)\n", nkf_locale_encoding() ? "LOCALE" : "DEFAULT",
- nkf_enc_name(nkf_default_encoding())
+ "NONE\n"
#endif
);
fprintf(HELP_OUTPUT,
{
(*oconv)(0, '\\');
c &= VALUE_MASK;
- if(!is_unicode_bmp(c)){
+ if(!nkf_char_unicode_bmp_p(c)){
(*oconv)(0, 'U');
(*oconv)(0, '0');
(*oconv)(0, '0');
static void set_input_encoding(nkf_encoding *enc)
{
switch (nkf_enc_to_index(enc)) {
+ case ISO_8859_1:
+ iso8859_f = TRUE;
+ break;
case CP50220:
case CP50221:
case CP50222:
#endif
break;
case ISO_2022_JP_1:
-#ifdef X0212_ENABLE
x0212_f = TRUE;
-#endif
break;
case ISO_2022_JP_3:
-#ifdef X0212_ENABLE
x0212_f = TRUE;
-#endif
+ x0213_f = TRUE;
+ break;
+ case ISO_2022_JP_2004:
+ x0212_f = TRUE;
x0213_f = TRUE;
break;
case SHIFT_JIS:
ms_ucs_map_f = UCS_MAP_CP932;
#endif
break;
- case EUC_JP:
break;
case CP10001:
#ifdef SHIFTJIS_CP932
ms_ucs_map_f = UCS_MAP_CP10001;
#endif
break;
+ case EUC_JP:
+ break;
+ case EUCJP_NKF:
+ break;
case CP51932:
#ifdef SHIFTJIS_CP932
cp51932_f = TRUE;
#endif
break;
case ISO_2022_JP_1:
-#ifdef X0212_ENABLE
x0212_f = TRUE;
-#endif
#ifdef SHIFTJIS_CP932
if (cp932inv_f == TRUE) cp932inv_f = FALSE;
#endif
break;
case ISO_2022_JP_3:
-#ifdef X0212_ENABLE
x0212_f = TRUE;
-#endif
x0213_f = TRUE;
#ifdef SHIFTJIS_CP932
if (cp932inv_f == TRUE) cp932inv_f = FALSE;
if (cp932inv_f == TRUE) cp932inv_f = FALSE;
#endif
#ifdef UTF8_OUTPUT_ENABLE
- ms_ucs_map_f = UCS_MAP_CP932;
+ ms_ucs_map_f = UCS_MAP_ASCII;
+#endif
+ break;
+ case EUCJP_NKF:
+ x0212_f = FALSE;
+#ifdef SHIFTJIS_CP932
+ if (cp932inv_f == TRUE) cp932inv_f = FALSE;
+#endif
+#ifdef UTF8_OUTPUT_ENABLE
+ ms_ucs_map_f = UCS_MAP_ASCII;
#endif
break;
case CP51932:
#endif
break;
case EUCJP_MS:
-#ifdef X0212_ENABLE
x0212_f = TRUE;
-#endif
#ifdef UTF8_OUTPUT_ENABLE
ms_ucs_map_f = UCS_MAP_MS;
#endif
break;
case EUCJP_ASCII:
-#ifdef X0212_ENABLE
x0212_f = TRUE;
-#endif
#ifdef UTF8_OUTPUT_ENABLE
ms_ucs_map_f = UCS_MAP_ASCII;
#endif
break;
case EUC_JISX0213:
case EUC_JIS_2004:
-#ifdef X0212_ENABLE
x0212_f = TRUE;
-#endif
x0213_f = TRUE;
#ifdef SHIFTJIS_CP932
if (cp932inv_f == TRUE) cp932inv_f = FALSE;
}
}
-static int option_mode = 0;
-
-void options(unsigned char *cp)
+struct input_code * find_inputcode_byfunc(nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0))
{
- nkf_char i, j;
- unsigned char *p;
- unsigned char *cp_back = NULL;
- char codeset[32];
- nkf_encoding *enc;
-
- if (option_mode==1)
- return;
- while(*cp && *cp++!='-');
- while (*cp || cp_back) {
- if(!*cp){
- cp = cp_back;
- cp_back = NULL;
- continue;
- }
- p = 0;
- switch (*cp++) {
- case '-': /* literal options */
- if (!*cp || *cp == SP) { /* ignore the rest of arguments */
- option_mode = 1;
- return;
- }
- for (i=0;i<sizeof(long_option)/sizeof(long_option[0]);i++) {
- p = (unsigned char *)long_option[i].name;
- for (j=0;*p && *p != '=' && *p == cp[j];p++, j++);
- if (*p == cp[j] || cp[j] == SP){
- p = &cp[j] + 1;
- break;
- }
- p = 0;
+ if (iconv_func){
+ struct input_code *p = input_code_list;
+ while (p->name){
+ if (iconv_func == p->iconv_func){
+ return p;
}
- if (p == 0) {
- fprintf(stderr, "unknown long option: --%s\n", cp);
- return;
- }
- while(*cp && *cp != SP && cp++);
- if (long_option[i].alias[0]){
- cp_back = cp;
- cp = (unsigned char *)long_option[i].alias;
- }else{
- if (strcmp(long_option[i].name, "ic=") == 0){
- nkf_str_upcase((char *)p, codeset, 32);
- enc = nkf_enc_find(codeset);
- if (!enc) continue;
- input_encoding = enc;
- continue;
- }
- if (strcmp(long_option[i].name, "oc=") == 0){
- nkf_str_upcase((char *)p, codeset, 32);
- enc = nkf_enc_find(codeset);
- if (enc <= 0) continue;
- output_encoding = enc;
- continue;
- }
- if (strcmp(long_option[i].name, "guess=") == 0){
- if (p[0] == '0' || p[0] == '1') {
- guess_f = 1;
- } else {
- guess_f = 2;
- }
- continue;
- }
-#ifdef OVERWRITE
- if (strcmp(long_option[i].name, "overwrite") == 0){
- file_out_f = TRUE;
- overwrite_f = TRUE;
- preserve_time_f = TRUE;
- continue;
- }
- if (strcmp(long_option[i].name, "overwrite=") == 0){
- file_out_f = TRUE;
- overwrite_f = TRUE;
- preserve_time_f = TRUE;
- backup_f = TRUE;
- backup_suffix = malloc(strlen((char *) p) + 1);
- strcpy(backup_suffix, (char *) p);
- continue;
- }
- if (strcmp(long_option[i].name, "in-place") == 0){
- file_out_f = TRUE;
- overwrite_f = TRUE;
- preserve_time_f = FALSE;
- continue;
- }
- if (strcmp(long_option[i].name, "in-place=") == 0){
- file_out_f = TRUE;
- overwrite_f = TRUE;
- preserve_time_f = FALSE;
- backup_f = TRUE;
- backup_suffix = malloc(strlen((char *) p) + 1);
- strcpy(backup_suffix, (char *) p);
- continue;
- }
-#endif
-#ifdef INPUT_OPTION
- if (strcmp(long_option[i].name, "cap-input") == 0){
- cap_f = TRUE;
- continue;
- }
- if (strcmp(long_option[i].name, "url-input") == 0){
- url_f = TRUE;
- continue;
- }
+ p++;
+ }
+ }
+ return 0;
+}
+
+void set_iconv(nkf_char f, nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0))
+{
+#ifdef INPUT_CODE_FIX
+ if (f || !input_encoding)
#endif
-#ifdef NUMCHAR_OPTION
- if (strcmp(long_option[i].name, "numchar-input") == 0){
- numchar_f = TRUE;
- continue;
- }
+ if (estab_f != f){
+ estab_f = f;
+ }
+
+ if (iconv_func
+#ifdef INPUT_CODE_FIX
+ && (f == -TRUE || !input_encoding) /* -TRUE means "FORCE" */
#endif
+ ){
+ iconv = iconv_func;
+ }
#ifdef CHECK_OPTION
- if (strcmp(long_option[i].name, "no-output") == 0){
- noout_f = TRUE;
- continue;
- }
- if (strcmp(long_option[i].name, "debug") == 0){
- debug_f = TRUE;
- continue;
- }
-#endif
- if (strcmp(long_option[i].name, "cp932") == 0){
-#ifdef SHIFTJIS_CP932
- cp51932_f = TRUE;
- cp932inv_f = -TRUE;
-#endif
-#ifdef UTF8_OUTPUT_ENABLE
- ms_ucs_map_f = UCS_MAP_CP932;
-#endif
- continue;
- }
- if (strcmp(long_option[i].name, "no-cp932") == 0){
-#ifdef SHIFTJIS_CP932
- cp51932_f = FALSE;
- cp932inv_f = FALSE;
-#endif
-#ifdef UTF8_OUTPUT_ENABLE
- ms_ucs_map_f = UCS_MAP_ASCII;
-#endif
- continue;
- }
-#ifdef SHIFTJIS_CP932
- if (strcmp(long_option[i].name, "cp932inv") == 0){
- cp932inv_f = -TRUE;
- continue;
- }
+ if (estab_f && iconv_for_check != iconv){
+ struct input_code *p = find_inputcode_byfunc(iconv);
+ if (p){
+ set_input_codename(p->name);
+ debug(p->name);
+ }
+ iconv_for_check = iconv;
+ }
#endif
+}
#ifdef X0212_ENABLE
- if (strcmp(long_option[i].name, "x0212") == 0){
- x0212_f = TRUE;
- continue;
- }
-#endif
+nkf_char x0212_shift(nkf_char c)
+{
+ nkf_char ret = c;
+ c &= 0x7f;
+ if (is_eucg3(ret)){
+ if (0x75 <= c && c <= 0x7f){
+ ret = c + (0x109 - 0x75);
+ }
+ }else{
+ if (0x75 <= c && c <= 0x7f){
+ ret = c + (0x113 - 0x75);
+ }
+ }
+ return ret;
+}
-#ifdef EXEC_IO
- if (strcmp(long_option[i].name, "exec-in") == 0){
- exec_f = 1;
- return;
- }
- if (strcmp(long_option[i].name, "exec-out") == 0){
- exec_f = -1;
- return;
- }
+
+nkf_char x0212_unshift(nkf_char c)
+{
+ nkf_char ret = c;
+ if (0x7f <= c && c <= 0x88){
+ ret = c + (0x75 - 0x7f);
+ }else if (0x89 <= c && c <= 0x92){
+ ret = PREFIX_EUCG3 | 0x80 | (c + (0x75 - 0x89));
+ }
+ return ret;
+}
+#endif /* X0212_ENABLE */
+
+nkf_char e2s_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1)
+{
+ nkf_char ndx;
+ if (is_eucg3(c2)){
+ ndx = c2 & 0x7f;
+ if (x0213_f){
+ if((0x21 <= ndx && ndx <= 0x2F)){
+ if (p2) *p2 = ((ndx - 1) >> 1) + 0xec - ndx / 8 * 3;
+ if (p1) *p1 = c1 + ((ndx & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
+ return 0;
+ }else if(0x6E <= ndx && ndx <= 0x7E){
+ if (p2) *p2 = ((ndx - 1) >> 1) + 0xbe;
+ if (p1) *p1 = c1 + ((ndx & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
+ return 0;
+ }
+ return 1;
+ }
+#ifdef X0212_ENABLE
+ else if(nkf_isgraph(ndx)){
+ nkf_char val = 0;
+ const unsigned short *ptr;
+ ptr = x0212_shiftjis[ndx - 0x21];
+ if (ptr){
+ val = ptr[(c1 & 0x7f) - 0x21];
+ }
+ if (val){
+ c2 = val >> 8;
+ c1 = val & 0xff;
+ if (p2) *p2 = c2;
+ if (p1) *p1 = c1;
+ return 0;
+ }
+ c2 = x0212_shift(c2);
+ }
+#endif /* X0212_ENABLE */
+ }
+ if(0x7F < c2) return 1;
+ if (p2) *p2 = ((c2 - 1) >> 1) + ((c2 <= 0x5e) ? 0x71 : 0xb1);
+ if (p1) *p1 = c1 + ((c2 & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
+ return 0;
+}
+
+nkf_char s2e_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1)
+{
+#if defined(SHIFTJIS_CP932) || defined(X0212_ENABLE)
+ nkf_char val;
#endif
-#if defined(UTF8_OUTPUT_ENABLE) && defined(UTF8_INPUT_ENABLE)
- if (strcmp(long_option[i].name, "no-cp932ext") == 0){
- no_cp932ext_f = TRUE;
- continue;
- }
- if (strcmp(long_option[i].name, "no-best-fit-chars") == 0){
- no_best_fit_chars_f = TRUE;
- continue;
- }
- if (strcmp(long_option[i].name, "fb-skip") == 0){
- encode_fallback = NULL;
- continue;
- }
- if (strcmp(long_option[i].name, "fb-html") == 0){
- encode_fallback = encode_fallback_html;
- continue;
- }
- if (strcmp(long_option[i].name, "fb-xml") == 0){
- encode_fallback = encode_fallback_xml;
- continue;
- }
- if (strcmp(long_option[i].name, "fb-java") == 0){
- encode_fallback = encode_fallback_java;
- continue;
- }
- if (strcmp(long_option[i].name, "fb-perl") == 0){
- encode_fallback = encode_fallback_perl;
- continue;
- }
- if (strcmp(long_option[i].name, "fb-subchar") == 0){
- encode_fallback = encode_fallback_subchar;
- continue;
- }
- if (strcmp(long_option[i].name, "fb-subchar=") == 0){
- encode_fallback = encode_fallback_subchar;
- unicode_subchar = 0;
- if (p[0] != '0'){
- /* decimal number */
- for (i = 0; i < 7 && nkf_isdigit(p[i]); i++){
- unicode_subchar *= 10;
- unicode_subchar += hex2bin(p[i]);
- }
- }else if(p[1] == 'x' || p[1] == 'X'){
- /* hexadecimal number */
- for (i = 2; i < 8 && nkf_isxdigit(p[i]); i++){
- unicode_subchar <<= 4;
- unicode_subchar |= hex2bin(p[i]);
- }
- }else{
- /* octal number */
- for (i = 1; i < 8 && nkf_isoctal(p[i]); i++){
- unicode_subchar *= 8;
- unicode_subchar += hex2bin(p[i]);
- }
- }
- w16e_conv(unicode_subchar, &i, &j);
- unicode_subchar = i<<8 | j;
- continue;
- }
-#endif
-#ifdef UTF8_OUTPUT_ENABLE
- if (strcmp(long_option[i].name, "ms-ucs-map") == 0){
- ms_ucs_map_f = UCS_MAP_MS;
- continue;
- }
-#endif
-#ifdef UNICODE_NORMALIZATION
- if (strcmp(long_option[i].name, "utf8mac-input") == 0){
- nfc_f = TRUE;
- continue;
- }
-#endif
- if (strcmp(long_option[i].name, "prefix=") == 0){
- if (nkf_isgraph(p[0])){
- for (i = 1; nkf_isgraph(p[i]); i++){
- prefix_table[p[i]] = p[0];
- }
- }
- continue;
- }
- }
- continue;
- case 'b': /* buffered mode */
- unbuf_f = FALSE;
- continue;
- case 'u': /* non bufferd mode */
- unbuf_f = TRUE;
- continue;
- case 't': /* transparent mode */
- if (*cp=='1') {
- /* alias of -t */
- cp++;
- nop_f = TRUE;
- } else if (*cp=='2') {
- /*
- * -t with put/get
- *
- * nkf -t2MB hoge.bin | nkf -t2mB | diff -s - hoge.bin
- *
- */
- cp++;
- nop_f = 2;
- } else
- nop_f = TRUE;
- continue;
- case 'j': /* JIS output */
- case 'n':
- output_encoding = nkf_enc_from_index(ISO_2022_JP);
- continue;
- case 'e': /* AT&T EUC output */
- output_encoding = nkf_enc_from_index(EUC_JP);
- continue;
- case 's': /* SJIS output */
- output_encoding = nkf_enc_from_index(WINDOWS_31J);
- continue;
- case 'l': /* ISO8859 Latin-1 support, no conversion */
- iso8859_f = TRUE; /* Only compatible with ISO-2022-JP */
- input_encoding = nkf_enc_from_index(ISO_8859_1);
- continue;
- case 'i': /* Kanji IN ESC-$-@/B */
- if (*cp=='@'||*cp=='B')
- kanji_intro = *cp++;
- continue;
- case 'o': /* ASCII IN ESC-(-J/B */
- if (*cp=='J'||*cp=='B'||*cp=='H')
- ascii_intro = *cp++;
- continue;
- case 'h':
- /*
- bit:1 katakana->hiragana
- bit:2 hiragana->katakana
- */
- if ('9'>= *cp && *cp>='0')
- hira_f |= (*cp++ -'0');
- else
- hira_f |= 1;
- continue;
- case 'r':
- rot_f = TRUE;
- continue;
-#if defined(MSDOS) || defined(__OS2__)
- case 'T':
- binmode_f = FALSE;
- continue;
-#endif
-#ifndef PERL_XS
- case 'V':
- show_configuration();
- exit(1);
- break;
- case 'v':
- usage();
- exit(1);
- break;
-#endif
-#ifdef UTF8_OUTPUT_ENABLE
- case 'w': /* UTF-8 output */
- if (cp[0] == '8') {
- cp++;
- if (cp[0] == '0'){
- cp++;
- output_encoding = nkf_enc_from_index(UTF_8N);
- } else {
- output_bom_f = TRUE;
- output_encoding = nkf_enc_from_index(UTF_8_BOM);
- }
- } else {
- int enc_idx;
- if ('1'== cp[0] && '6'==cp[1]) {
- cp += 2;
- enc_idx = UTF_16;
- } else if ('3'== cp[0] && '2'==cp[1]) {
- cp += 2;
- enc_idx = UTF_32;
- } else {
- output_encoding = nkf_enc_from_index(UTF_8);
- continue;
- }
- if (cp[0]=='L') {
- cp++;
- output_endian = ENDIAN_LITTLE;
- } else if (cp[0] == 'B') {
- cp++;
- } else {
- output_encoding = nkf_enc_from_index(enc_idx);
- continue;
- }
- if (cp[0] == '0'){
- cp++;
- enc_idx = enc_idx == UTF_16
- ? (output_endian == ENDIAN_LITTLE ? UTF_16LE : UTF_16BE)
- : (output_endian == ENDIAN_LITTLE ? UTF_32LE : UTF_32BE);
- } else {
- output_bom_f = TRUE;
- enc_idx = enc_idx == UTF_16
- ? (output_endian == ENDIAN_LITTLE ? UTF_16LE_BOM : UTF_16BE_BOM)
- : (output_endian == ENDIAN_LITTLE ? UTF_32LE_BOM : UTF_32BE_BOM);
- }
- output_encoding = nkf_enc_from_index(enc_idx);
- }
- continue;
-#endif
-#ifdef UTF8_INPUT_ENABLE
- case 'W': /* UTF input */
- if (cp[0] == '8') {
- cp++;
- input_encoding = nkf_enc_from_index(UTF_8);
- }else{
- int enc_idx;
- if ('1'== cp[0] && '6'==cp[1]) {
- cp += 2;
- input_endian = ENDIAN_BIG;
- enc_idx = UTF_16;
- } else if ('3'== cp[0] && '2'==cp[1]) {
- cp += 2;
- input_endian = ENDIAN_BIG;
- enc_idx = UTF_32;
- } else {
- input_encoding = nkf_enc_from_index(UTF_8);
- continue;
- }
- if (cp[0]=='L') {
- cp++;
- input_endian = ENDIAN_LITTLE;
- } else if (cp[0] == 'B') {
- cp++;
- input_endian = ENDIAN_BIG;
- }
- enc_idx = enc_idx == UTF_16
- ? (output_endian == ENDIAN_LITTLE ? UTF_16LE : UTF_16BE)
- : (output_endian == ENDIAN_LITTLE ? UTF_32LE : UTF_32BE);
- input_encoding = nkf_enc_from_index(enc_idx);
- }
- continue;
-#endif
- /* Input code assumption */
- case 'J': /* ISO-2022-JP input */
- input_encoding = nkf_enc_from_index(ISO_2022_JP);
- continue;
- case 'E': /* EUC-JP input */
- input_encoding = nkf_enc_from_index(EUC_JP);
- continue;
- case 'S': /* Windows-31J input */
- input_encoding = nkf_enc_from_index(WINDOWS_31J);
- continue;
- case 'Z': /* Convert X0208 alphabet to asii */
- /* alpha_f
- bit:0 Convert JIS X 0208 Alphabet to ASCII
- bit:1 Convert Kankaku to one space
- bit:2 Convert Kankaku to two spaces
- bit:3 Convert HTML Entity
- bit:4 Convert JIS X 0208 Katakana to JIS X 0201 Katakana
- */
- while ('0'<= *cp && *cp <='9') {
- alpha_f |= 1 << (*cp++ - '0');
- }
- if (!alpha_f) alpha_f = 1;
- continue;
- case 'x': /* Convert X0201 kana to X0208 or X0201 Conversion */
- x0201_f = FALSE; /* No X0201->X0208 conversion */
- /* accept X0201
- ESC-(-I in JIS, EUC, MS Kanji
- SI/SO in JIS, EUC, MS Kanji
- SSO in EUC, JIS, not in MS Kanji
- MS Kanji (0xa0-0xdf)
- output X0201
- ESC-(-I in JIS (0x20-0x5f)
- SSO in EUC (0xa0-0xdf)
- 0xa0-0xd in MS Kanji (0xa0-0xdf)
- */
- continue;
- case 'X': /* Convert X0201 kana to X0208 */
- x0201_f = TRUE;
- continue;
- case 'F': /* prserve new lines */
- fold_preserve_f = TRUE;
- case 'f': /* folding -f60 or -f */
- fold_f = TRUE;
- fold_len = 0;
- while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
- fold_len *= 10;
- fold_len += *cp++ - '0';
- }
- if (!(0<fold_len && fold_len<BUFSIZ))
- fold_len = DEFAULT_FOLD;
- if (*cp=='-') {
- fold_margin = 0;
- cp++;
- while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
- fold_margin *= 10;
- fold_margin += *cp++ - '0';
- }
- }
- continue;
- case 'm': /* MIME support */
- /* mime_decode_f = TRUE; */ /* this has too large side effects... */
- if (*cp=='B'||*cp=='Q') {
- mime_decode_mode = *cp++;
- mimebuf_f = FIXED_MIME;
- } else if (*cp=='N') {
- mime_f = TRUE; cp++;
- } else if (*cp=='S') {
- mime_f = STRICT_MIME; cp++;
- } else if (*cp=='0') {
- mime_decode_f = FALSE;
- mime_f = FALSE; cp++;
- } else {
- mime_f = STRICT_MIME;
- }
- continue;
- case 'M': /* MIME output */
- if (*cp=='B') {
- mimeout_mode = 'B';
- mimeout_f = FIXED_MIME; cp++;
- } else if (*cp=='Q') {
- mimeout_mode = 'Q';
- mimeout_f = FIXED_MIME; cp++;
- } else {
- mimeout_f = TRUE;
- }
- continue;
- case 'B': /* Broken JIS support */
- /* bit:0 no ESC JIS
- bit:1 allow any x on ESC-(-x or ESC-$-x
- bit:2 reset to ascii on NL
- */
- if ('9'>= *cp && *cp>='0')
- broken_f |= 1<<(*cp++ -'0');
- else
- broken_f |= TRUE;
- continue;
-#ifndef PERL_XS
- case 'O':/* for Output file */
- file_out_f = TRUE;
- continue;
-#endif
- case 'c':/* add cr code */
- eolmode_f = CRLF;
- continue;
- case 'd':/* delete cr code */
- eolmode_f = LF;
- continue;
- case 'I': /* ISO-2022-JP output */
- iso2022jp_f = TRUE;
- continue;
- case 'L': /* line mode */
- if (*cp=='u') { /* unix */
- eolmode_f = LF; cp++;
- } else if (*cp=='m') { /* mac */
- eolmode_f = CR; cp++;
- } else if (*cp=='w') { /* windows */
- eolmode_f = CRLF; cp++;
- } else if (*cp=='0') { /* no conversion */
- eolmode_f = 0; cp++;
- }
- continue;
-#ifndef PERL_XS
- case 'g':
- if ('2' <= *cp && *cp <= '9') {
- guess_f = 2;
- cp++;
- } else if (*cp == '0' || *cp == '1') {
- guess_f = 1;
- cp++;
- } else {
- guess_f = 1;
- }
- continue;
-#endif
- case SP:
- /* module muliple options in a string are allowed for Perl moudle */
- while(*cp && *cp++!='-');
- continue;
- default:
- fprintf(stderr, "unknown option: -%c\n", *(cp-1));
- /* bogus option but ignored */
- continue;
- }
- }
-}
-
-#ifdef UTF8_INPUT_ENABLE
-nkf_char w_iconv_common(nkf_char c1, nkf_char c0, const unsigned short *const *pp, nkf_char psize, nkf_char *p2, nkf_char *p1)
-{
- nkf_char c2;
- const unsigned short *p;
- unsigned short val;
-
- if (pp == 0) return 1;
-
- c1 -= 0x80;
- if (c1 < 0 || psize <= c1) return 1;
- p = pp[c1];
- if (p == 0) return 1;
-
- c0 -= 0x80;
- if (c0 < 0 || sizeof_utf8_to_euc_C2 <= c0) return 1;
- val = p[c0];
- if (val == 0) return 1;
- if (no_cp932ext_f && (
- (val>>8) == 0x2D || /* NEC special characters */
- val > NKF_INT32_C(0xF300) /* IBM extended characters */
- )) return 1;
-
- c2 = val >> 8;
- if (val > 0x7FFF){
- c2 &= 0x7f;
- c2 |= PREFIX_EUCG3;
- }
- if (c2 == SO) c2 = JIS_X_0201;
- c1 = val & 0x7f;
- if (p2) *p2 = c2;
- if (p1) *p1 = c1;
- return 0;
-}
-
-nkf_char unicode_to_jis_common(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1)
-{
- const unsigned short *const *pp;
- const unsigned short *const *const *ppp;
- static const char no_best_fit_chars_table_C2[] =
- {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 2, 1, 1, 2,
- 0, 0, 1, 1, 0, 1, 0, 1, 2, 1, 1, 1, 1, 1, 1, 1};
- static const char no_best_fit_chars_table_C2_ms[] =
- {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0,
- 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0};
- static const char no_best_fit_chars_table_932_C2[] =
- {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1,
- 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0};
- static const char no_best_fit_chars_table_932_C3[] =
- {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1};
- nkf_char ret = 0;
-
- if(c2 < 0x80){
- *p2 = 0;
- *p1 = c2;
- }else if(c2 < 0xe0){
- if(no_best_fit_chars_f){
- if(ms_ucs_map_f == UCS_MAP_CP932){
- switch(c2){
- case 0xC2:
- if(no_best_fit_chars_table_932_C2[c1&0x3F]) return 1;
- break;
- case 0xC3:
- if(no_best_fit_chars_table_932_C3[c1&0x3F]) return 1;
- break;
- }
- }else if(!cp932inv_f){
- switch(c2){
- case 0xC2:
- if(no_best_fit_chars_table_C2[c1&0x3F]) return 1;
- break;
- case 0xC3:
- if(no_best_fit_chars_table_932_C3[c1&0x3F]) return 1;
- break;
- }
- }else if(ms_ucs_map_f == UCS_MAP_MS){
- if(c2 == 0xC2 && no_best_fit_chars_table_C2_ms[c1&0x3F]) return 1;
- }else if(ms_ucs_map_f == UCS_MAP_CP10001){
- switch(c2){
- case 0xC2:
- switch(c1){
- case 0xA2:
- case 0xA3:
- case 0xA5:
- case 0xA6:
- case 0xAC:
- case 0xAF:
- case 0xB8:
- return 1;
- }
- break;
- }
- }
- }
- pp =
- ms_ucs_map_f == UCS_MAP_CP932 ? utf8_to_euc_2bytes_932 :
- ms_ucs_map_f == UCS_MAP_MS ? utf8_to_euc_2bytes_ms :
- ms_ucs_map_f == UCS_MAP_CP10001 ? utf8_to_euc_2bytes_mac :
- utf8_to_euc_2bytes;
- ret = w_iconv_common(c2, c1, pp, sizeof_utf8_to_euc_2bytes, p2, p1);
- }else if(c0 < 0xF0){
- if(no_best_fit_chars_f){
- if(ms_ucs_map_f == UCS_MAP_CP932){
- if(c2 == 0xE3 && c1 == 0x82 && c0 == 0x94) return 1;
- }else if(ms_ucs_map_f == UCS_MAP_MS){
- switch(c2){
- case 0xE2:
- switch(c1){
- case 0x80:
- if(c0 == 0x94 || c0 == 0x96 || c0 == 0xBE) return 1;
- break;
- case 0x88:
- if(c0 == 0x92) return 1;
- break;
- }
- break;
- case 0xE3:
- if(c1 == 0x80 || c0 == 0x9C) return 1;
- break;
- }
- }else if(ms_ucs_map_f == UCS_MAP_CP10001){
- switch(c2){
- case 0xE3:
- switch(c1){
- case 0x82:
- if(c0 == 0x94) return 1;
- break;
- case 0x83:
- if(c0 == 0xBB) return 1;
- break;
- }
- break;
- }
- }else{
- switch(c2){
- case 0xE2:
- switch(c1){
- case 0x80:
- if(c0 == 0x95) return 1;
- break;
- case 0x88:
- if(c0 == 0xA5) return 1;
- break;
- }
- break;
- case 0xEF:
- switch(c1){
- case 0xBC:
- if(c0 == 0x8D) return 1;
- break;
- case 0xBD:
- if(c0 == 0x9E && !cp932inv_f) return 1;
- break;
- case 0xBF:
- if(0xA0 <= c0 && c0 <= 0xA5) return 1;
- break;
- }
- break;
- }
- }
- }
- ppp =
- ms_ucs_map_f == UCS_MAP_CP932 ? utf8_to_euc_3bytes_932 :
- ms_ucs_map_f == UCS_MAP_MS ? utf8_to_euc_3bytes_ms :
- ms_ucs_map_f == UCS_MAP_CP10001 ? utf8_to_euc_3bytes_mac :
- utf8_to_euc_3bytes;
- ret = w_iconv_common(c1, c0, ppp[c2 - 0xE0], sizeof_utf8_to_euc_C2, p2, p1);
- }else return -1;
-#ifdef SHIFTJIS_CP932
- if (!ret && !cp932inv_f && is_eucg3(*p2)) {
- nkf_char s2, s1;
- if (e2s_conv(*p2, *p1, &s2, &s1) == 0) {
- s2e_conv(s2, s1, p2, p1);
- }else{
- ret = 1;
- }
- }
-#endif
- return ret;
-}
-
-nkf_char w2e_conv(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1)
-{
- nkf_char ret = 0;
-
- if (!c1){
- *p2 = 0;
- *p1 = c2;
- }else if (0xc0 <= c2 && c2 <= 0xef) {
- ret = unicode_to_jis_common(c2, c1, c0, p2, p1);
-#ifdef NUMCHAR_OPTION
- if (ret > 0){
- if (p2) *p2 = 0;
- if (p1) *p1 = CLASS_UNICODE | ww16_conv(c2, c1, c0);
- ret = 0;
+ static const char shift_jisx0213_s1a3_table[5][2] ={ { 1, 8}, { 3, 4}, { 5,12}, {13,14}, {15, 0} };
+#ifdef SHIFTJIS_CP932
+ if (!cp932inv_f && is_ibmext_in_sjis(c2)){
+ val = shiftjis_cp932[c2 - CP932_TABLE_BEGIN][c1 - 0x40];
+ if (val){
+ c2 = val >> 8;
+ c1 = val & 0xff;
}
-#endif
- }
- return ret;
-}
-
-nkf_char w_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
-{
- nkf_char ret = 0;
- static const char w_iconv_utf8_1st_byte[] =
- { /* 0xC0 - 0xFF */
- 20, 20, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 30, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 33, 33,
- 40, 41, 41, 41, 42, 43, 43, 43, 50, 50, 50, 50, 60, 60, 70, 70};
-
- if (c2 < 0 || 0xff < c2) {
- }else if (c2 == 0) { /* 0 : 1 byte*/
- c0 = 0;
- } else if ((c2 & 0xc0) == 0x80) { /* 0x80-0xbf : trail byte */
- return 0;
- } else{
- switch (w_iconv_utf8_1st_byte[c2 - 0xC0]) {
- case 21:
- if (c1 < 0x80 || 0xBF < c1) return 0;
- break;
- case 30:
- if (c0 == 0) return -1;
- if (c1 < 0xA0 || 0xBF < c1 || (c0 & 0xc0) != 0x80)
- return 0;
- break;
- case 31:
- case 33:
- if (c0 == 0) return -1;
- if ((c1 & 0xc0) != 0x80 || (c0 & 0xc0) != 0x80)
- return 0;
- break;
- case 32:
- if (c0 == 0) return -1;
- if (c1 < 0x80 || 0x9F < c1 || (c0 & 0xc0) != 0x80)
- return 0;
- break;
- case 40:
- if (c0 == 0) return -2;
- if (c1 < 0x90 || 0xBF < c1 || (c0 & 0xc0c0) != 0x8080)
- return 0;
- break;
- case 41:
- if (c0 == 0) return -2;
- if (c1 < 0x80 || 0xBF < c1 || (c0 & 0xc0c0) != 0x8080)
- return 0;
- break;
- case 42:
- if (c0 == 0) return -2;
- if (c1 < 0x80 || 0x8F < c1 || (c0 & 0xc0c0) != 0x8080)
- return 0;
- break;
- default:
- return 0;
- break;
- }
}
- if (c2 == 0 || c2 == EOF){
- } else if ((c2 & 0xf8) == 0xf0) { /* 4 bytes */
- c1 = CLASS_UNICODE | ww16_conv(c2, c1, c0);
- c2 = 0;
- } else {
- ret = w2e_conv(c2, c1, c0, &c2, &c1);
- }
- if (ret == 0){
- (*oconv)(c2, c1);
- }
- return ret;
-}
-
-nkf_char w_iconv16(nkf_char c2, nkf_char c1, nkf_char c0)
-{
- nkf_char ret = 0;
- if ((c2==0 && c1 < 0x80) || c2==EOF) {
- (*oconv)(c2, c1);
- return 0;
- }else if (0xD8 <= c2 && c2 <= 0xDB) {
- if (c0 < NKF_INT32_C(0xDC00) || NKF_INT32_C(0xDFFF) < c0)
- return -2;
- c1 = CLASS_UNICODE | ((c2 << 18) + (c1 << 10) + c0 - NKF_INT32_C(0x35FDC00));
- c2 = 0;
- }else if ((c2>>3) == 27) { /* unpaired surrogate */
- /*
- return 2;
- */
- return 1;
- }else ret = w16e_conv(((c2 & 0xff)<<8) + c1, &c2, &c1);
- if (ret) return ret;
- (*oconv)(c2, c1);
- return 0;
-}
-
-nkf_char w_iconv32(nkf_char c2, nkf_char c1, nkf_char c0)
-{
- int ret = 0;
-
- if ((c2 == 0 && c1 < 0x80) || c2==EOF) {
- } else if (is_unicode_bmp(c1)) {
- ret = w16e_conv(c1, &c2, &c1);
- } else {
- c2 = 0;
- c1 = CLASS_UNICODE | c1;
+ if (cp932inv_f
+ && CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
+ nkf_char c = cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40];
+ if (c){
+ c2 = c >> 8;
+ c1 = c & 0xff;
+ }
}
- if (ret) return ret;
- (*oconv)(c2, c1);
- return 0;
-}
-
-#endif
-
-struct input_code * find_inputcode_byfunc(nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0))
-{
- if (iconv_func){
- struct input_code *p = input_code_list;
- while (p->name){
- if (iconv_func == p->iconv_func){
- return p;
+#endif /* SHIFTJIS_CP932 */
+#ifdef X0212_ENABLE
+ if (!x0213_f && is_ibmext_in_sjis(c2)){
+ val = shiftjis_x0212[c2 - 0xfa][c1 - 0x40];
+ if (val){
+ if (val > 0x7FFF){
+ c2 = PREFIX_EUCG3 | ((val >> 8) & 0x7f);
+ c1 = val & 0xff;
+ }else{
+ c2 = val >> 8;
+ c1 = val & 0xff;
}
- p++;
+ if (p2) *p2 = c2;
+ if (p1) *p1 = c1;
+ return 0;
}
}
- return 0;
-}
-
-void set_iconv(nkf_char f, nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0))
-{
-#ifdef INPUT_CODE_FIX
- if (f || !input_encoding)
-#endif
- if (estab_f != f){
- estab_f = f;
- }
-
- if (iconv_func
-#ifdef INPUT_CODE_FIX
- && (f == -TRUE || !input_encoding) /* -TRUE means "FORCE" */
#endif
- ){
- iconv = iconv_func;
- }
-#ifdef CHECK_OPTION
- if (estab_f && iconv_for_check != iconv){
- struct input_code *p = find_inputcode_byfunc(iconv);
- if (p){
- set_input_codename(p->name);
- debug(p->name);
- }
- iconv_for_check = iconv;
+ if(c2 >= 0x80){
+ if(x0213_f && c2 >= 0xF0){
+ if(c2 <= 0xF3 || (c2 == 0xF4 && c1 < 0x9F)){ /* k=1, 3<=k<=5, k=8, 12<=k<=15 */
+ c2 = PREFIX_EUCG3 | 0x20 | shift_jisx0213_s1a3_table[c2 - 0xF0][0x9E < c1];
+ }else{ /* 78<=k<=94 */
+ c2 = PREFIX_EUCG3 | (c2 * 2 - 0x17B);
+ if (0x9E < c1) c2++;
+ }
+ }else{
+#define SJ0162 0x00e1 /* 01 - 62 ku offset */
+#define SJ6394 0x0161 /* 63 - 94 ku offset */
+ c2 = c2 + c2 - ((c2 <= 0x9F) ? SJ0162 : SJ6394);
+ if (0x9E < c1) c2++;
+ }
+ if (c1 < 0x9F)
+ c1 = c1 - ((c1 > DEL) ? SP : 0x1F);
+ else {
+ c1 = c1 - 0x7E;
+ }
}
+
+#ifdef X0212_ENABLE
+ c2 = x0212_unshift(c2);
#endif
+ if (p2) *p2 = c2;
+ if (p1) *p1 = c1;
+ return 0;
}
-#define SCORE_L2 (1) /* \e$BBh\e(B2\e$B?e=`4A;z\e(B */
-#define SCORE_KANA (SCORE_L2 << 1) /* \e$B$$$o$f$kH>3Q%+%J\e(B */
-#define SCORE_DEPEND (SCORE_KANA << 1) /* \e$B5!<o0MB8J8;z\e(B */
-#define SCORE_CP932 (SCORE_DEPEND << 1) /* CP932 \e$B$K$h$kFI$_49$(\e(B (IBM extended characters) */
-#define SCORE_X0212 (SCORE_CP932 << 1) /* JIS X 0212 */
-#define SCORE_NO_EXIST (SCORE_X0212 << 1) /* \e$BB8:_$7$J$$J8;z\e(B */
-#define SCORE_iMIME (SCORE_NO_EXIST << 1) /* MIME \e$B$K$h$k;XDj\e(B */
-#define SCORE_ERROR (SCORE_iMIME << 1) /* \e$B%(%i!<\e(B */
-
-#define SCORE_INIT (SCORE_iMIME)
-
-static const char score_table_A0[] = {
- 0, 0, 0, 0,
- 0, 0, 0, 0,
- 0, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND,
- SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_NO_EXIST,
-};
-
-static const char score_table_F0[] = {
- SCORE_L2, SCORE_L2, SCORE_L2, SCORE_L2,
- SCORE_L2, SCORE_DEPEND, SCORE_NO_EXIST, SCORE_NO_EXIST,
- SCORE_DEPEND, SCORE_DEPEND, SCORE_CP932, SCORE_CP932,
- SCORE_CP932, SCORE_NO_EXIST, SCORE_NO_EXIST, SCORE_ERROR,
-};
-
-void set_code_score(struct input_code *ptr, nkf_char score)
+#if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
+void nkf_unicode_to_utf8(nkf_char val, int *p1, int *p2, int *p3, int *p4)
{
- if (ptr){
- ptr->score |= score;
+ val &= VALUE_MASK;
+ if (val < 0x80){
+ *p1 = val;
+ *p2 = 0;
+ *p3 = 0;
+ *p4 = 0;
+ }else if (val < 0x800){
+ *p1 = 0xc0 | (val >> 6);
+ *p2 = 0x80 | (val & 0x3f);
+ *p3 = 0;
+ *p4 = 0;
+ } else if (nkf_char_unicode_bmp_p(val)) {
+ *p1 = 0xe0 | (val >> 12);
+ *p2 = 0x80 | ((val >> 6) & 0x3f);
+ *p3 = 0x80 | ( val & 0x3f);
+ *p4 = 0;
+ } else if (nkf_char_unicode_value_p(val)) {
+ *p1 = 0xe0 | (val >> 16);
+ *p2 = 0x80 | ((val >> 12) & 0x3f);
+ *p3 = 0x80 | ((val >> 6) & 0x3f);
+ *p4 = 0x80 | ( val & 0x3f);
+ } else {
+ *p1 = 0;
+ *p2 = 0;
+ *p3 = 0;
+ *p4 = 0;
}
}
-void clr_code_score(struct input_code *ptr, nkf_char score)
+nkf_char nkf_utf8_to_unicode(int c1, int c2, int c3, int c4)
{
- if (ptr){
- ptr->score &= ~score;
- }
+ nkf_char wc;
+ if (c1 <= 0x7F) {
+ /* single byte */
+ wc = c1;
+ }
+ else if (c1 <= 0xC3) {
+ /* trail byte or invalid */
+ return -1;
+ }
+ else if (c1 <= 0xDF) {
+ /* 2 bytes */
+ wc = (c1 & 0x1F) << 6;
+ wc |= (c2 & 0x3F);
+ }
+ else if (c1 <= 0xEF) {
+ /* 3 bytes */
+ wc = (c1 & 0x0F) << 12;
+ wc |= (c2 & 0x3F) << 6;
+ wc |= (c3 & 0x3F);
+ }
+ else if (c2 <= 0xF4) {
+ /* 4 bytes */
+ wc = (c1 & 0x0F) << 18;
+ wc |= (c2 & 0x3F) << 12;
+ wc |= (c3 & 0x3F) << 6;
+ wc |= (c4 & 0x3F);
+ }
+ else {
+ return -1;
+ }
+ return wc;
}
-
-void code_score(struct input_code *ptr)
-{
- nkf_char c2 = ptr->buf[0];
-#ifdef UTF8_OUTPUT_ENABLE
- nkf_char c1 = ptr->buf[1];
-#endif
- if (c2 < 0){
- set_code_score(ptr, SCORE_ERROR);
- }else if (c2 == SSO){
- set_code_score(ptr, SCORE_KANA);
- }else if (c2 == 0x8f){
- set_code_score(ptr, SCORE_X0212);
-#ifdef UTF8_OUTPUT_ENABLE
- }else if (!e2w_conv(c2, c1)){
- set_code_score(ptr, SCORE_NO_EXIST);
#endif
- }else if ((c2 & 0x70) == 0x20){
- set_code_score(ptr, score_table_A0[c2 & 0x0f]);
- }else if ((c2 & 0x70) == 0x70){
- set_code_score(ptr, score_table_F0[c2 & 0x0f]);
- }else if ((c2 & 0x70) >= 0x50){
- set_code_score(ptr, SCORE_L2);
- }
-}
-void status_disable(struct input_code *ptr)
+#ifdef UTF8_INPUT_ENABLE
+static int unicode_to_jis_common2(nkf_char c1, nkf_char c0,
+ const unsigned short *const *pp, nkf_char psize,
+ nkf_char *p2, nkf_char *p1)
{
- ptr->stat = -1;
- ptr->buf[0] = -1;
- code_score(ptr);
- if (iconv == ptr->iconv_func) set_iconv(FALSE, 0);
-}
+ nkf_char c2;
+ const unsigned short *p;
+ unsigned short val;
-void status_push_ch(struct input_code *ptr, nkf_char c)
-{
- ptr->buf[ptr->index++] = c;
-}
+ if (pp == 0) return 1;
-void status_clear(struct input_code *ptr)
-{
- ptr->stat = 0;
- ptr->index = 0;
-}
+ c1 -= 0x80;
+ if (c1 < 0 || psize <= c1) return 1;
+ p = pp[c1];
+ if (p == 0) return 1;
+
+ c0 -= 0x80;
+ if (c0 < 0 || sizeof_utf8_to_euc_C2 <= c0) return 1;
+ val = p[c0];
+ if (val == 0) return 1;
+ if (no_cp932ext_f && (
+ (val>>8) == 0x2D || /* NEC special characters */
+ val > NKF_INT32_C(0xF300) /* IBM extended characters */
+ )) return 1;
-void status_reset(struct input_code *ptr)
-{
- status_clear(ptr);
- ptr->score = SCORE_INIT;
+ c2 = val >> 8;
+ if (val > 0x7FFF){
+ c2 &= 0x7f;
+ c2 |= PREFIX_EUCG3;
+ }
+ if (c2 == SO) c2 = JIS_X_0201_1976_K;
+ c1 = val & 0xFF;
+ if (p2) *p2 = c2;
+ if (p1) *p1 = c1;
+ return 0;
}
-void status_reinit(struct input_code *ptr)
+static nkf_char unicode_to_jis_common(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1)
{
- status_reset(ptr);
- ptr->_file_stat = 0;
-}
+ const unsigned short *const *pp;
+ const unsigned short *const *const *ppp;
+ static const char no_best_fit_chars_table_C2[] =
+ {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 2, 1, 1, 2,
+ 0, 0, 1, 1, 0, 1, 0, 1, 2, 1, 1, 1, 1, 1, 1, 1};
+ static const char no_best_fit_chars_table_C2_ms[] =
+ {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0,
+ 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0};
+ static const char no_best_fit_chars_table_932_C2[] =
+ {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1,
+ 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0};
+ static const char no_best_fit_chars_table_932_C3[] =
+ {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1};
+ nkf_char ret = 0;
-void status_check(struct input_code *ptr, nkf_char c)
-{
- if (c <= DEL && estab_f){
- status_reset(ptr);
+ if(c2 < 0x80){
+ *p2 = 0;
+ *p1 = c2;
+ }else if(c2 < 0xe0){
+ if(no_best_fit_chars_f){
+ if(ms_ucs_map_f == UCS_MAP_CP932){
+ switch(c2){
+ case 0xC2:
+ if(no_best_fit_chars_table_932_C2[c1&0x3F]) return 1;
+ break;
+ case 0xC3:
+ if(no_best_fit_chars_table_932_C3[c1&0x3F]) return 1;
+ break;
+ }
+ }else if(!cp932inv_f){
+ switch(c2){
+ case 0xC2:
+ if(no_best_fit_chars_table_C2[c1&0x3F]) return 1;
+ break;
+ case 0xC3:
+ if(no_best_fit_chars_table_932_C3[c1&0x3F]) return 1;
+ break;
+ }
+ }else if(ms_ucs_map_f == UCS_MAP_MS){
+ if(c2 == 0xC2 && no_best_fit_chars_table_C2_ms[c1&0x3F]) return 1;
+ }else if(ms_ucs_map_f == UCS_MAP_CP10001){
+ switch(c2){
+ case 0xC2:
+ switch(c1){
+ case 0xA2:
+ case 0xA3:
+ case 0xA5:
+ case 0xA6:
+ case 0xAC:
+ case 0xAF:
+ case 0xB8:
+ return 1;
+ }
+ break;
+ }
+ }
+ }
+ pp =
+ ms_ucs_map_f == UCS_MAP_CP932 ? utf8_to_euc_2bytes_932 :
+ ms_ucs_map_f == UCS_MAP_MS ? utf8_to_euc_2bytes_ms :
+ ms_ucs_map_f == UCS_MAP_CP10001 ? utf8_to_euc_2bytes_mac :
+ utf8_to_euc_2bytes;
+ ret = unicode_to_jis_common2(c2, c1, pp, sizeof_utf8_to_euc_2bytes, p2, p1);
+ }else if(c0 < 0xF0){
+ if(no_best_fit_chars_f){
+ if(ms_ucs_map_f == UCS_MAP_CP932){
+ if(c2 == 0xE3 && c1 == 0x82 && c0 == 0x94) return 1;
+ }else if(ms_ucs_map_f == UCS_MAP_MS){
+ switch(c2){
+ case 0xE2:
+ switch(c1){
+ case 0x80:
+ if(c0 == 0x94 || c0 == 0x96 || c0 == 0xBE) return 1;
+ break;
+ case 0x88:
+ if(c0 == 0x92) return 1;
+ break;
+ }
+ break;
+ case 0xE3:
+ if(c1 == 0x80 || c0 == 0x9C) return 1;
+ break;
+ }
+ }else if(ms_ucs_map_f == UCS_MAP_CP10001){
+ switch(c2){
+ case 0xE3:
+ switch(c1){
+ case 0x82:
+ if(c0 == 0x94) return 1;
+ break;
+ case 0x83:
+ if(c0 == 0xBB) return 1;
+ break;
+ }
+ break;
+ }
+ }else{
+ switch(c2){
+ case 0xE2:
+ switch(c1){
+ case 0x80:
+ if(c0 == 0x95) return 1;
+ break;
+ case 0x88:
+ if(c0 == 0xA5) return 1;
+ break;
+ }
+ break;
+ case 0xEF:
+ switch(c1){
+ case 0xBC:
+ if(c0 == 0x8D) return 1;
+ break;
+ case 0xBD:
+ if(c0 == 0x9E && !cp932inv_f) return 1;
+ break;
+ case 0xBF:
+ if(0xA0 <= c0 && c0 <= 0xA5) return 1;
+ break;
+ }
+ break;
+ }
+ }
+ }
+ ppp =
+ ms_ucs_map_f == UCS_MAP_CP932 ? utf8_to_euc_3bytes_932 :
+ ms_ucs_map_f == UCS_MAP_MS ? utf8_to_euc_3bytes_ms :
+ ms_ucs_map_f == UCS_MAP_CP10001 ? utf8_to_euc_3bytes_mac :
+ utf8_to_euc_3bytes;
+ ret = unicode_to_jis_common2(c1, c0, ppp[c2 - 0xE0], sizeof_utf8_to_euc_C2, p2, p1);
+ }else return -1;
+#ifdef SHIFTJIS_CP932
+ if (!ret && !cp932inv_f && is_eucg3(*p2)) {
+ nkf_char s2, s1;
+ if (e2s_conv(*p2, *p1, &s2, &s1) == 0) {
+ s2e_conv(s2, s1, p2, p1);
+ }else{
+ ret = 1;
+ }
}
+#endif
+ return ret;
}
-void s_status(struct input_code *ptr, nkf_char c)
+#ifdef UTF8_OUTPUT_ENABLE
+nkf_char e2w_conv(nkf_char c2, nkf_char c1)
{
- switch(ptr->stat){
- case -1:
- status_check(ptr, c);
- break;
- case 0:
- if (c <= DEL){
- break;
-#ifdef NUMCHAR_OPTION
- }else if (is_unicode_capsule(c)){
- break;
-#endif
- }else if (0xa1 <= c && c <= 0xdf){
- status_push_ch(ptr, SSO);
- status_push_ch(ptr, c);
- code_score(ptr);
- status_clear(ptr);
- }else if ((0x81 <= c && c < 0xa0) || (0xe0 <= c && c <= 0xea)){
- ptr->stat = 1;
- status_push_ch(ptr, c);
- }else if (0xed <= c && c <= 0xee){
- ptr->stat = 3;
- status_push_ch(ptr, c);
-#ifdef SHIFTJIS_CP932
- }else if (is_ibmext_in_sjis(c)){
- ptr->stat = 2;
- status_push_ch(ptr, c);
-#endif /* SHIFTJIS_CP932 */
-#ifdef X0212_ENABLE
- }else if (0xf0 <= c && c <= 0xfc){
- ptr->stat = 1;
- status_push_ch(ptr, c);
-#endif /* X0212_ENABLE */
- }else{
- status_disable(ptr);
- }
- break;
- case 1:
- if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
- status_push_ch(ptr, c);
- s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]);
- code_score(ptr);
- status_clear(ptr);
- }else{
- status_disable(ptr);
- }
- break;
- case 2:
-#ifdef SHIFTJIS_CP932
- if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)) {
- status_push_ch(ptr, c);
- if (s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]) == 0) {
- set_code_score(ptr, SCORE_CP932);
- status_clear(ptr);
- break;
+ const unsigned short *p;
+
+ if (c2 == JIS_X_0201_1976_K) {
+ if (ms_ucs_map_f == UCS_MAP_CP10001) {
+ switch (c1) {
+ case 0x20:
+ return 0xA0;
+ case 0x7D:
+ return 0xA9;
}
}
-#endif /* SHIFTJIS_CP932 */
- status_disable(ptr);
- break;
- case 3:
- if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
- status_push_ch(ptr, c);
- s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]);
- set_code_score(ptr, SCORE_CP932);
- status_clear(ptr);
- }else{
- status_disable(ptr);
- }
- break;
+ p = euc_to_utf8_1byte;
+#ifdef X0212_ENABLE
+ } else if (is_eucg3(c2)){
+ if(ms_ucs_map_f == UCS_MAP_ASCII&& c2 == NKF_INT32_C(0x8F22) && c1 == 0x43){
+ return 0xA6;
+ }
+ c2 = (c2&0x7f) - 0x21;
+ if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
+ p = x0212_to_utf8_2bytes[c2];
+ else
+ return 0;
+#endif
+ } else {
+ c2 &= 0x7f;
+ c2 = (c2&0x7f) - 0x21;
+ if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
+ p =
+ ms_ucs_map_f == UCS_MAP_ASCII ? euc_to_utf8_2bytes[c2] :
+ ms_ucs_map_f == UCS_MAP_CP10001 ? euc_to_utf8_2bytes_mac[c2] :
+ euc_to_utf8_2bytes_ms[c2];
+ else
+ return 0;
}
+ if (!p) return 0;
+ c1 = (c1 & 0x7f) - 0x21;
+ if (0<=c1 && c1<sizeof_euc_to_utf8_1byte)
+ return p[c1];
+ return 0;
}
+#endif
-void e_status(struct input_code *ptr, nkf_char c)
+nkf_char w2e_conv(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1)
{
- switch (ptr->stat){
- case -1:
- status_check(ptr, c);
- break;
- case 0:
- if (c <= DEL){
- break;
+ nkf_char ret = 0;
+
+ if (!c1){
+ *p2 = 0;
+ *p1 = c2;
+ }else if (0xc0 <= c2 && c2 <= 0xef) {
+ ret = unicode_to_jis_common(c2, c1, c0, p2, p1);
#ifdef NUMCHAR_OPTION
- }else if (is_unicode_capsule(c)){
- break;
+ if (ret > 0){
+ if (p2) *p2 = 0;
+ if (p1) *p1 = nkf_char_unicode_new(nkf_utf8_to_unicode(c2, c1, c0, 0));
+ ret = 0;
+ }
#endif
- }else if (SSO == c || (0xa1 <= c && c <= 0xfe)){
- ptr->stat = 1;
- status_push_ch(ptr, c);
-#ifdef X0212_ENABLE
- }else if (0x8f == c){
- ptr->stat = 2;
- status_push_ch(ptr, c);
-#endif /* X0212_ENABLE */
- }else{
- status_disable(ptr);
- }
- break;
- case 1:
- if (0xa1 <= c && c <= 0xfe){
- status_push_ch(ptr, c);
- code_score(ptr);
- status_clear(ptr);
- }else{
- status_disable(ptr);
- }
- break;
-#ifdef X0212_ENABLE
- case 2:
- if (0xa1 <= c && c <= 0xfe){
- ptr->stat = 1;
- status_push_ch(ptr, c);
- }else{
- status_disable(ptr);
- }
-#endif /* X0212_ENABLE */
}
+ return ret;
}
#ifdef UTF8_INPUT_ENABLE
-void w_status(struct input_code *ptr, nkf_char c)
+nkf_char w16e_conv(nkf_char val, nkf_char *p2, nkf_char *p1)
{
- switch (ptr->stat){
- case -1:
- status_check(ptr, c);
- break;
- case 0:
- if (c <= DEL){
- break;
-#ifdef NUMCHAR_OPTION
- }else if (is_unicode_capsule(c)){
- break;
-#endif
- }else if (0xc0 <= c && c <= 0xdf){
- ptr->stat = 1;
- status_push_ch(ptr, c);
- }else if (0xe0 <= c && c <= 0xef){
- ptr->stat = 2;
- status_push_ch(ptr, c);
- }else if (0xf0 <= c && c <= 0xf4){
- ptr->stat = 3;
- status_push_ch(ptr, c);
- }else{
- status_disable(ptr);
- }
- break;
- case 1:
- case 2:
- if (0x80 <= c && c <= 0xbf){
- status_push_ch(ptr, c);
- if (ptr->index > ptr->stat){
- int bom = (ptr->buf[0] == 0xef && ptr->buf[1] == 0xbb
- && ptr->buf[2] == 0xbf);
- w2e_conv(ptr->buf[0], ptr->buf[1], ptr->buf[2],
- &ptr->buf[0], &ptr->buf[1]);
- if (!bom){
- code_score(ptr);
- }
- status_clear(ptr);
- }
- }else{
- status_disable(ptr);
- }
- break;
- case 3:
- if (0x80 <= c && c <= 0xbf){
- if (ptr->index < ptr->stat){
- status_push_ch(ptr, c);
- } else {
- status_clear(ptr);
- }
- }else{
- status_disable(ptr);
- }
- break;
+ int c1, c2, c3, c4;
+ nkf_char ret = 0;
+ val &= VALUE_MASK;
+ if (val < 0x80) {
+ *p2 = 0;
+ *p1 = val;
+ }
+ else if (nkf_char_unicode_bmp_p(val)){
+ nkf_unicode_to_utf8(val, &c1, &c2, &c3, &c4);
+ ret = unicode_to_jis_common(c1, c2, c3, p2, p1);
+ if (ret > 0){
+ *p2 = 0;
+ *p1 = nkf_char_unicode_new(val);
+ ret = 0;
+ }
+ }
+ else {
+ *p2 = 0;
+ *p1 = nkf_char_unicode_new(val);
}
+ return ret;
}
#endif
-void code_status(nkf_char c)
+nkf_char e_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
{
- int action_flag = 1;
- struct input_code *result = 0;
- struct input_code *p = input_code_list;
- while (p->name){
- if (!p->status_func) {
- ++p;
- continue;
+ if (c2 == JIS_X_0201_1976_K || c2 == SS2){
+ if (iso2022jp_f && !x0201_f) {
+ c2 = GETA1; c1 = GETA2;
+ } else {
+ c2 = JIS_X_0201_1976_K;
+ c1 &= 0x7f;
}
- if (!p->status_func)
- continue;
- (p->status_func)(p, c);
- if (p->stat > 0){
- action_flag = 0;
- }else if(p->stat == 0){
- if (result){
- action_flag = 0;
- }else{
- result = p;
- }
+#ifdef X0212_ENABLE
+ }else if (c2 == 0x8f){
+ if (c0 == 0){
+ return -1;
}
- ++p;
+ if (!cp51932_f && !x0213_f && 0xF5 <= c1 && c1 <= 0xFE && 0xA1 <= c0 && c0 <= 0xFE) {
+ /* encoding is eucJP-ms, so invert to Unicode Private User Area */
+ c1 = nkf_char_unicode_new((c1 - 0xF5) * 94 + c0 - 0xA1 + 0xE3AC);
+ c2 = 0;
+ } else {
+ c2 = (c2 << 8) | (c1 & 0x7f);
+ c1 = c0 & 0x7f;
+#ifdef SHIFTJIS_CP932
+ if (cp51932_f){
+ nkf_char s2, s1;
+ if (e2s_conv(c2, c1, &s2, &s1) == 0){
+ s2e_conv(s2, s1, &c2, &c1);
+ if (c2 < 0x100){
+ c1 &= 0x7f;
+ c2 &= 0x7f;
+ }
+ }
+ }
+#endif /* SHIFTJIS_CP932 */
+ }
+#endif /* X0212_ENABLE */
+ } else if ((c2 == EOF) || (c2 == 0) || c2 < SP || c2 == ISO_8859_1) {
+ /* NOP */
+ } else {
+ if (!cp51932_f && ms_ucs_map_f && 0xF5 <= c2 && c2 <= 0xFE && 0xA1 <= c1 && c1 <= 0xFE) {
+ /* encoding is eucJP-ms, so invert to Unicode Private User Area */
+ c1 = nkf_char_unicode_new((c2 - 0xF5) * 94 + c1 - 0xA1 + 0xE000);
+ c2 = 0;
+ } else {
+ c1 &= 0x7f;
+ c2 &= 0x7f;
+#ifdef SHIFTJIS_CP932
+ if (cp51932_f && 0x79 <= c2 && c2 <= 0x7c){
+ nkf_char s2, s1;
+ if (e2s_conv(c2, c1, &s2, &s1) == 0){
+ s2e_conv(s2, s1, &c2, &c1);
+ if (c2 < 0x100){
+ c1 &= 0x7f;
+ c2 &= 0x7f;
+ }
+ }
+ }
+#endif /* SHIFTJIS_CP932 */
+ }
+ }
+ (*oconv)(c2, c1);
+ return 0;
+}
+
+nkf_char s_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
+{
+ if (c2 == JIS_X_0201_1976_K || (0xA1 <= c2 && c2 <= 0xDF)) {
+ if (iso2022jp_f && !x0201_f) {
+ c2 = GETA1; c1 = GETA2;
+ } else {
+ c1 &= 0x7f;
+ }
+ } else if ((c2 == EOF) || (c2 == 0) || c2 < SP) {
+ /* NOP */
+ } else if (!x0213_f && 0xF0 <= c2 && c2 <= 0xF9 && 0x40 <= c1 && c1 <= 0xFC) {
+ /* CP932 UDC */
+ if(c1 == 0x7F) return 0;
+ c1 = nkf_char_unicode_new((c2 - 0xF0) * 188 + (c1 - 0x40 - (0x7E < c1)) + 0xE000);
+ c2 = 0;
+ } else {
+ nkf_char ret = s2e_conv(c2, c1, &c2, &c1);
+ if (ret) return ret;
+ }
+ (*oconv)(c2, c1);
+ return 0;
+}
+
+nkf_char w_iconv(nkf_char c1, nkf_char c2, nkf_char c3)
+{
+ nkf_char ret = 0, c4 = 0;
+ static const char w_iconv_utf8_1st_byte[] =
+ { /* 0xC0 - 0xFF */
+ 20, 20, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 30, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 33, 33,
+ 40, 41, 41, 41, 42, 43, 43, 43, 50, 50, 50, 50, 60, 60, 70, 70};
+
+ if (c3 > 0xFF) {
+ c4 = c3 & 0xFF;
+ c3 >>= 8;
+ }
+
+ if (c1 < 0 || 0xff < c1) {
+ }else if (c1 == 0) { /* 0 : 1 byte*/
+ c3 = 0;
+ } else if ((c1 & 0xC0) == 0x80) { /* 0x80-0xbf : trail byte */
+ return 0;
+ } else{
+ switch (w_iconv_utf8_1st_byte[c1 - 0xC0]) {
+ case 21:
+ if (c2 < 0x80 || 0xBF < c2) return 0;
+ break;
+ case 30:
+ if (c3 == 0) return -1;
+ if (c2 < 0xA0 || 0xBF < c2 || (c3 & 0xC0) != 0x80)
+ return 0;
+ break;
+ case 31:
+ case 33:
+ if (c3 == 0) return -1;
+ if ((c2 & 0xC0) != 0x80 || (c3 & 0xC0) != 0x80)
+ return 0;
+ break;
+ case 32:
+ if (c3 == 0) return -1;
+ if (c2 < 0x80 || 0x9F < c2 || (c3 & 0xC0) != 0x80)
+ return 0;
+ break;
+ case 40:
+ if (c3 == 0) return -2;
+ if (c2 < 0x90 || 0xBF < c2 || (c3 & 0xC0) != 0x80 || (c4 & 0xC0) != 0x80)
+ return 0;
+ break;
+ case 41:
+ if (c3 == 0) return -2;
+ if (c2 < 0x80 || 0xBF < c2 || (c3 & 0xC0) != 0x80 || (c4 & 0xC0) != 0x80)
+ return 0;
+ break;
+ case 42:
+ if (c3 == 0) return -2;
+ if (c2 < 0x80 || 0x8F < c2 || (c3 & 0xC0) != 0x80 || (c4 & 0xC0) != 0x80)
+ return 0;
+ break;
+ default:
+ return 0;
+ break;
+ }
}
-
- if (action_flag){
- if (result && !estab_f){
- set_iconv(TRUE, result->iconv_func);
- }else if (c <= DEL){
- struct input_code *ptr = input_code_list;
- while (ptr->name){
- status_reset(ptr);
- ++ptr;
- }
- }
+ if (c1 == 0 || c1 == EOF){
+ } else if ((c1 & 0xf8) == 0xf0) { /* 4 bytes */
+ c2 = nkf_char_unicode_new(nkf_utf8_to_unicode(c1, c2, c3, c4));
+ c1 = 0;
+ } else {
+ ret = w2e_conv(c1, c2, c3, &c1, &c2);
+ }
+ if (ret == 0){
+ (*oconv)(c1, c2);
}
+ return ret;
}
-#ifndef WIN32DLL
-nkf_char std_getc(FILE *f)
+#define NKF_ICONV_INVALID_CODE_RANGE -13
+static size_t unicode_iconv(nkf_char wc)
{
- if (std_gc_ndx){
- return std_gc_buf[--std_gc_ndx];
+ nkf_char c1, c2;
+ int ret = 0;
+
+ if (wc < 0x80) {
+ c2 = 0;
+ c1 = wc;
+ }else if ((wc>>11) == 27) {
+ /* unpaired surrogate */
+ return NKF_ICONV_INVALID_CODE_RANGE;
+ }else if (wc < 0xFFFF) {
+ ret = w16e_conv(wc, &c2, &c1);
+ if (ret) return ret;
+ }else if (wc < 0x10FFFF) {
+ c2 = 0;
+ c1 = nkf_char_unicode_new(wc);
+ } else {
+ return NKF_ICONV_INVALID_CODE_RANGE;
}
- return getc(f);
+ (*oconv)(c2, c1);
+ return 0;
}
-#endif /*WIN32DLL*/
-nkf_char std_ungetc(nkf_char c, FILE *f)
+#define NKF_ICONV_NEED_ONE_MORE_BYTE -1
+#define NKF_ICONV_NEED_TWO_MORE_BYTES -2
+#define UTF16_TO_UTF32(lead, trail) (((lead) << 10) + (trail) - NKF_INT32_C(0x35FDC00))
+size_t nkf_iconv_utf_16(int c1, int c2, int c3, int c4)
{
- if (std_gc_ndx == STD_GC_BUFSIZE){
- return EOF;
+ nkf_char wc;
+
+ if (c1 == EOF) {
+ (*oconv)(EOF, 0);
+ return 0;
}
- std_gc_buf[std_gc_ndx++] = c;
- return c;
+
+ if (input_endian == ENDIAN_BIG) {
+ if (0xD8 <= c1 && c1 <= 0xDB) {
+ if (0xDC <= c3 && c3 <= 0xDF) {
+ wc = UTF16_TO_UTF32(c1 << 8 | c2, c3 << 8 | c4);
+ } else return NKF_ICONV_NEED_TWO_MORE_BYTES;
+ } else {
+ wc = c1 << 8 | c2;
+ }
+ } else {
+ if (0xD8 <= c2 && c2 <= 0xDB) {
+ if (0xDC <= c4 && c4 <= 0xDF) {
+ wc = UTF16_TO_UTF32(c2 << 8 | c1, c4 << 8 | c3);
+ } else return NKF_ICONV_NEED_TWO_MORE_BYTES;
+ } else {
+ wc = c2 << 8 | c1;
+ }
+ }
+
+ return (*unicode_iconv)(wc);
}
-#ifndef WIN32DLL
-void std_putc(nkf_char c)
+nkf_char w_iconv16(nkf_char c2, nkf_char c1, nkf_char c0)
{
- if(c!=EOF)
- putchar(c);
+ return 0;
}
-#endif /*WIN32DLL*/
-nkf_char
-h_conv(FILE *f, nkf_char c2, nkf_char c1)
+nkf_char w_iconv32(nkf_char c2, nkf_char c1, nkf_char c0)
{
- nkf_char ret, c3, c0;
- int hold_index;
-
-
- /** it must NOT be in the kanji shifte sequence */
- /** it must NOT be written in JIS7 */
- /** and it must be after 2 byte 8bit code */
+ return 0;
+}
- hold_count = 0;
- push_hold_buf(c2);
- push_hold_buf(c1);
+size_t nkf_iconv_utf_32(int c1, int c2, int c3, int c4)
+{
+ nkf_char wc;
- while ((c1 = (*i_getc)(f)) != EOF) {
- if (c1 == ESC){
- (*i_ungetc)(c1,f);
- break;
- }
- code_status(c1);
- if (push_hold_buf(c1) == EOF || estab_f){
- break;
- }
+ if (c1 == EOF) {
+ (*oconv)(EOF, 0);
+ return 0;
}
- if (!estab_f){
- struct input_code *p = input_code_list;
- struct input_code *result = p;
- if (c1 == EOF){
- code_status(c1);
- }
- while (p->name){
- if (p->status_func && p->score < result->score){
- result = p;
- }
- ++p;
- }
- set_iconv(TRUE, result->iconv_func);
+ switch(input_endian){
+ case ENDIAN_BIG:
+ wc = c2 << 16 | c3 << 8 | c4;
+ break;
+ case ENDIAN_LITTLE:
+ wc = c3 << 16 | c2 << 8 | c1;
+ break;
+ case ENDIAN_2143:
+ wc = c1 << 16 | c4 << 8 | c3;
+ break;
+ case ENDIAN_3412:
+ wc = c4 << 16 | c1 << 8 | c2;
+ break;
+ default:
+ return NKF_ICONV_INVALID_CODE_RANGE;
}
-
- /** now,
- ** 1) EOF is detected, or
- ** 2) Code is established, or
- ** 3) Buffer is FULL (but last word is pushed)
- **
- ** in 1) and 3) cases, we continue to use
- ** Kanji codes by oconv and leave estab_f unchanged.
- **/
-
- ret = c1;
- hold_index = 0;
- while (hold_index < hold_count){
- c2 = hold_buf[hold_index++];
- if (c2 <= DEL
-#ifdef NUMCHAR_OPTION
- || is_unicode_capsule(c2)
-#endif
- ){
- (*iconv)(0, c2, 0);
- continue;
- }else if (iconv == s_iconv && 0xa1 <= c2 && c2 <= 0xdf){
- (*iconv)(JIS_X_0201, c2, 0);
- continue;
- }
- if (hold_index < hold_count){
- c1 = hold_buf[hold_index++];
- }else{
- c1 = (*i_getc)(f);
- if (c1 == EOF){
- c3 = EOF;
- break;
- }
- code_status(c1);
- }
- c0 = 0;
- switch ((*iconv)(c2, c1, 0)) { /* can be EUC/SJIS/UTF-8 */
- case -2:
- /* 4 bytes UTF-8 */
- if (hold_index < hold_count){
- c0 = hold_buf[hold_index++];
- } else if ((c0 = (*i_getc)(f)) == EOF) {
- ret = EOF;
- break;
- } else {
- code_status(c0);
- c0 <<= 8;
- if (hold_index < hold_count){
- c3 = hold_buf[hold_index++];
- } else if ((c3 = (*i_getc)(f)) == EOF) {
- c0 = ret = EOF;
- break;
- } else {
- code_status(c3);
- (*iconv)(c2, c1, c0|c3);
- }
- }
- break;
- case -1:
- /* 3 bytes EUC or UTF-8 */
- if (hold_index < hold_count){
- c0 = hold_buf[hold_index++];
- } else if ((c0 = (*i_getc)(f)) == EOF) {
- ret = EOF;
- break;
- } else {
- code_status(c0);
- }
- (*iconv)(c2, c1, c0);
- break;
- }
- if (c0 == EOF) break;
- }
- return ret;
+ return (*unicode_iconv)(wc);
}
+#endif
-/*
- * Check and Ignore BOM
- */
-void check_bom(FILE *f)
+#define output_ascii_escape_sequence(mode) do { \
+ if (output_mode != ASCII && output_mode != ISO_8859_1) { \
+ (*o_putc)(ESC); \
+ (*o_putc)('('); \
+ (*o_putc)(ascii_intro); \
+ output_mode = mode; \
+ } \
+} while (0)
+
+void output_escape_sequence(int mode)
{
- int c2;
- switch(c2 = (*i_getc)(f)){
- case 0x00:
- if((c2 = (*i_getc)(f)) == 0x00){
- if((c2 = (*i_getc)(f)) == 0xFE){
- if((c2 = (*i_getc)(f)) == 0xFF){
- if(!input_encoding){
- set_iconv(TRUE, w_iconv32);
- }
- if (iconv == w_iconv32) {
- input_endian = ENDIAN_BIG;
- return;
- }
- (*i_ungetc)(0xFF,f);
- }else (*i_ungetc)(c2,f);
- (*i_ungetc)(0xFE,f);
- }else if(c2 == 0xFF){
- if((c2 = (*i_getc)(f)) == 0xFE){
- if(!input_encoding){
- set_iconv(TRUE, w_iconv32);
- }
- if (iconv == w_iconv32) {
- input_endian = ENDIAN_2143;
- return;
- }
- (*i_ungetc)(0xFF,f);
- }else (*i_ungetc)(c2,f);
- (*i_ungetc)(0xFF,f);
- }else (*i_ungetc)(c2,f);
- (*i_ungetc)(0x00,f);
- }else (*i_ungetc)(c2,f);
- (*i_ungetc)(0x00,f);
- break;
- case 0xEF:
- if((c2 = (*i_getc)(f)) == 0xBB){
- if((c2 = (*i_getc)(f)) == 0xBF){
- if(!input_encoding){
- set_iconv(TRUE, w_iconv);
- }
- if (iconv == w_iconv) {
- return;
- }
- (*i_ungetc)(0xBF,f);
- }else (*i_ungetc)(c2,f);
- (*i_ungetc)(0xBB,f);
- }else (*i_ungetc)(c2,f);
- (*i_ungetc)(0xEF,f);
+ if (output_mode == mode)
+ return;
+ switch(mode) {
+ case ISO_8859_1:
+ (*o_putc)(ESC);
+ (*o_putc)('.');
+ (*o_putc)('A');
break;
- case 0xFE:
- if((c2 = (*i_getc)(f)) == 0xFF){
- if((c2 = (*i_getc)(f)) == 0x00){
- if((c2 = (*i_getc)(f)) == 0x00){
- if(!input_encoding){
- set_iconv(TRUE, w_iconv32);
- }
- if (iconv == w_iconv32) {
- input_endian = ENDIAN_3412;
- return;
- }
- (*i_ungetc)(0x00,f);
- }else (*i_ungetc)(c2,f);
- (*i_ungetc)(0x00,f);
- }else (*i_ungetc)(c2,f);
- if(!input_encoding){
- set_iconv(TRUE, w_iconv16);
- }
- if (iconv == w_iconv16) {
- input_endian = ENDIAN_BIG;
- return;
- }
- (*i_ungetc)(0xFF,f);
- }else (*i_ungetc)(c2,f);
- (*i_ungetc)(0xFE,f);
+ case JIS_X_0201_1976_K:
+ (*o_putc)(ESC);
+ (*o_putc)('(');
+ (*o_putc)('I');
break;
- case 0xFF:
- if((c2 = (*i_getc)(f)) == 0xFE){
- if((c2 = (*i_getc)(f)) == 0x00){
- if((c2 = (*i_getc)(f)) == 0x00){
- if(!input_encoding){
- set_iconv(TRUE, w_iconv32);
- }
- if (iconv == w_iconv32) {
- input_endian = ENDIAN_LITTLE;
- return;
- }
- (*i_ungetc)(0x00,f);
- }else (*i_ungetc)(c2,f);
- (*i_ungetc)(0x00,f);
- }else (*i_ungetc)(c2,f);
- if(!input_encoding){
- set_iconv(TRUE, w_iconv16);
- }
- if (iconv == w_iconv16) {
- input_endian = ENDIAN_LITTLE;
- return;
- }
- (*i_ungetc)(0xFE,f);
- }else (*i_ungetc)(c2,f);
- (*i_ungetc)(0xFF,f);
+ case JIS_X_0208:
+ (*o_putc)(ESC);
+ (*o_putc)('$');
+ (*o_putc)(kanji_intro);
break;
- default:
- (*i_ungetc)(c2,f);
+ case JIS_X_0212:
+ (*o_putc)(ESC);
+ (*o_putc)('$');
+ (*o_putc)('(');
+ (*o_putc)('D');
break;
+ case JIS_X_0213_1:
+ (*o_putc)(ESC);
+ (*o_putc)('$');
+ (*o_putc)('(');
+ (*o_putc)('Q');
+ break;
+ case JIS_X_0213_2:
+ (*o_putc)(ESC);
+ (*o_putc)('$');
+ (*o_putc)('(');
+ (*o_putc)('P');
+ break;
+ }
+ output_mode = mode;
+}
+
+void j_oconv(nkf_char c2, nkf_char c1)
+{
+#ifdef NUMCHAR_OPTION
+ if (c2 == 0 && nkf_char_unicode_p(c1)){
+ w16e_conv(c1, &c2, &c1);
+ if (c2 == 0 && nkf_char_unicode_p(c1)){
+ c2 = c1 & VALUE_MASK;
+ if (ms_ucs_map_f && 0xE000 <= c2 && c2 <= 0xE757) {
+ /* CP5022x UDC */
+ c1 &= 0xFFF;
+ c2 = 0x7F + c1 / 94;
+ c1 = 0x21 + c1 % 94;
+ } else {
+ if (encode_fallback) (*encode_fallback)(c1);
+ return;
+ }
+ }
+ }
+#endif
+ if (c2 == 0) {
+ output_ascii_escape_sequence(ASCII);
+ (*o_putc)(c1);
+ }
+ else if (c2 == EOF) {
+ output_ascii_escape_sequence(ASCII);
+ (*o_putc)(EOF);
+ }
+ else if (c2 == ISO_8859_1) {
+ output_ascii_escape_sequence(ISO_8859_1);
+ (*o_putc)(c1|0x80);
+ }
+ else if (c2 == JIS_X_0201_1976_K) {
+ output_escape_sequence(JIS_X_0201_1976_K);
+ (*o_putc)(c1);
+#ifdef X0212_ENABLE
+ } else if (is_eucg3(c2)){
+ output_escape_sequence(x0213_f ? JIS_X_0213_2 : JIS_X_0212);
+ (*o_putc)(c2 & 0x7f);
+ (*o_putc)(c1);
+#endif
+ } else {
+ if(ms_ucs_map_f
+ ? c2<0x20 || 0x92<c2 || c1<0x20 || 0x7e<c1
+ : c2<0x20 || 0x7e<c2 || c1<0x20 || 0x7e<c1) return;
+ output_escape_sequence(x0213_f ? JIS_X_0213_1 : JIS_X_0208);
+ (*o_putc)(c2);
+ (*o_putc)(c1);
}
}
-nkf_char push_hold_buf(nkf_char c2)
+void e_oconv(nkf_char c2, nkf_char c1)
{
- if (hold_count >= HOLD_SIZE*2)
- return (EOF);
- hold_buf[hold_count++] = (unsigned char)c2;
- return ((hold_count >= HOLD_SIZE*2) ? EOF : hold_count);
-}
+ if (c2 == 0 && nkf_char_unicode_p(c1)){
+ w16e_conv(c1, &c2, &c1);
+ if (c2 == 0 && nkf_char_unicode_p(c1)){
+ c2 = c1 & VALUE_MASK;
+ if (x0212_f && 0xE000 <= c2 && c2 <= 0xE757) {
+ /* eucJP-ms UDC */
+ c1 &= 0xFFF;
+ c2 = c1 / 94;
+ c2 += c2 < 10 ? 0x75 : 0x8FEB;
+ c1 = 0x21 + c1 % 94;
+ if (is_eucg3(c2)){
+ (*o_putc)(0x8f);
+ (*o_putc)((c2 & 0x7f) | 0x080);
+ (*o_putc)(c1 | 0x080);
+ }else{
+ (*o_putc)((c2 & 0x7f) | 0x080);
+ (*o_putc)(c1 | 0x080);
+ }
+ return;
+ } else {
+ if (encode_fallback) (*encode_fallback)(c1);
+ return;
+ }
+ }
+ }
+ if (c2 == EOF) {
+ (*o_putc)(EOF);
+ } else if (c2 == 0) {
+ output_mode = ASCII;
+ (*o_putc)(c1);
+ } else if (c2 == JIS_X_0201_1976_K) {
+ output_mode = EUC_JP;
+ (*o_putc)(SS2); (*o_putc)(c1|0x80);
+ } else if (c2 == ISO_8859_1) {
+ output_mode = ISO_8859_1;
+ (*o_putc)(c1 | 0x080);
#ifdef X0212_ENABLE
-nkf_char x0212_shift(nkf_char c)
-{
- nkf_char ret = c;
- c &= 0x7f;
- if (is_eucg3(ret)){
- if (0x75 <= c && c <= 0x7f){
- ret = c + (0x109 - 0x75);
+ } else if (is_eucg3(c2)){
+ output_mode = EUC_JP;
+#ifdef SHIFTJIS_CP932
+ if (!cp932inv_f){
+ nkf_char s2, s1;
+ if (e2s_conv(c2, c1, &s2, &s1) == 0){
+ s2e_conv(s2, s1, &c2, &c1);
+ }
}
- }else{
- if (0x75 <= c && c <= 0x7f){
- ret = c + (0x113 - 0x75);
+#endif
+ if (c2 == 0) {
+ output_mode = ASCII;
+ (*o_putc)(c1);
+ }else if (is_eucg3(c2)){
+ if (x0212_f){
+ (*o_putc)(0x8f);
+ (*o_putc)((c2 & 0x7f) | 0x080);
+ (*o_putc)(c1 | 0x080);
+ }
+ }else{
+ (*o_putc)((c2 & 0x7f) | 0x080);
+ (*o_putc)(c1 | 0x080);
+ }
+#endif
+ } else {
+ if (!nkf_isgraph(c1) || !nkf_isgraph(c2)) {
+ set_iconv(FALSE, 0);
+ return; /* too late to rescue this char */
}
+ output_mode = EUC_JP;
+ (*o_putc)(c2 | 0x080);
+ (*o_putc)(c1 | 0x080);
}
- return ret;
}
-
-nkf_char x0212_unshift(nkf_char c)
+void s_oconv(nkf_char c2, nkf_char c1)
{
- nkf_char ret = c;
- if (0x7f <= c && c <= 0x88){
- ret = c + (0x75 - 0x7f);
- }else if (0x89 <= c && c <= 0x92){
- ret = PREFIX_EUCG3 | 0x80 | (c + (0x75 - 0x89));
+#ifdef NUMCHAR_OPTION
+ if (c2 == 0 && nkf_char_unicode_p(c1)){
+ w16e_conv(c1, &c2, &c1);
+ if (c2 == 0 && nkf_char_unicode_p(c1)){
+ c2 = c1 & VALUE_MASK;
+ if (!x0213_f && 0xE000 <= c2 && c2 <= 0xE757) {
+ /* CP932 UDC */
+ c1 &= 0xFFF;
+ c2 = c1 / 188 + (cp932inv_f ? 0xF0 : 0xEB);
+ c1 = c1 % 188;
+ c1 += 0x40 + (c1 > 0x3e);
+ (*o_putc)(c2);
+ (*o_putc)(c1);
+ return;
+ } else {
+ if(encode_fallback)(*encode_fallback)(c1);
+ return;
+ }
+ }
+ }
+#endif
+ if (c2 == EOF) {
+ (*o_putc)(EOF);
+ return;
+ } else if (c2 == 0) {
+ output_mode = ASCII;
+ (*o_putc)(c1);
+ } else if (c2 == JIS_X_0201_1976_K) {
+ output_mode = SHIFT_JIS;
+ (*o_putc)(c1|0x80);
+ } else if (c2 == ISO_8859_1) {
+ output_mode = ISO_8859_1;
+ (*o_putc)(c1 | 0x080);
+#ifdef X0212_ENABLE
+ } else if (is_eucg3(c2)){
+ output_mode = SHIFT_JIS;
+ if (e2s_conv(c2, c1, &c2, &c1) == 0){
+ (*o_putc)(c2);
+ (*o_putc)(c1);
+ }
+#endif
+ } else {
+ if (!nkf_isprint(c1) || !nkf_isprint(c2)) {
+ set_iconv(FALSE, 0);
+ return; /* too late to rescue this char */
+ }
+ output_mode = SHIFT_JIS;
+ e2s_conv(c2, c1, &c2, &c1);
+
+#ifdef SHIFTJIS_CP932
+ if (cp932inv_f
+ && CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
+ nkf_char c = cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40];
+ if (c){
+ c2 = c >> 8;
+ c1 = c & 0xff;
+ }
+ }
+#endif /* SHIFTJIS_CP932 */
+
+ (*o_putc)(c2);
+ if (prefix_table[(unsigned char)c1]){
+ (*o_putc)(prefix_table[(unsigned char)c1]);
+ }
+ (*o_putc)(c1);
}
- return ret;
}
-#endif /* X0212_ENABLE */
-nkf_char s2e_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1)
+#ifdef UTF8_OUTPUT_ENABLE
+void w_oconv(nkf_char c2, nkf_char c1)
{
-#if defined(SHIFTJIS_CP932) || defined(X0212_ENABLE)
+ int c3, c4;
nkf_char val;
-#endif
- static const char shift_jisx0213_s1a3_table[5][2] ={ { 1, 8}, { 3, 4}, { 5,12}, {13,14}, {15, 0} };
-#ifdef SHIFTJIS_CP932
- if (!cp932inv_f && is_ibmext_in_sjis(c2)){
- val = shiftjis_cp932[c2 - CP932_TABLE_BEGIN][c1 - 0x40];
+
+ if (output_bom_f) {
+ output_bom_f = FALSE;
+ (*o_putc)('\357');
+ (*o_putc)('\273');
+ (*o_putc)('\277');
+ }
+
+ if (c2 == EOF) {
+ (*o_putc)(EOF);
+ return;
+ }
+
+ if (c2 == 0 && nkf_char_unicode_p(c1)){
+ val = c1 & VALUE_MASK;
+ nkf_unicode_to_utf8(val, &c1, &c2, &c3, &c4);
+ (*o_putc)(c1);
+ if (c2) (*o_putc)(c2);
+ if (c3) (*o_putc)(c3);
+ if (c4) (*o_putc)(c4);
+ return;
+ }
+
+ if (c2 == 0) {
+ (*o_putc)(c1);
+ } else {
+ val = e2w_conv(c2, c1);
if (val){
- c2 = val >> 8;
- c1 = val & 0xff;
+ nkf_unicode_to_utf8(val, &c1, &c2, &c3, &c4);
+ (*o_putc)(c1);
+ if (c2) (*o_putc)(c2);
+ if (c3) (*o_putc)(c3);
+ if (c4) (*o_putc)(c4);
}
}
- if (cp932inv_f
- && CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
- nkf_char c = cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40];
- if (c){
- c2 = c >> 8;
- c1 = c & 0xff;
+}
+
+void w_oconv16(nkf_char c2, nkf_char c1)
+{
+ if (output_bom_f) {
+ output_bom_f = FALSE;
+ if (output_endian == ENDIAN_LITTLE){
+ (*o_putc)(0xFF);
+ (*o_putc)(0xFE);
+ }else{
+ (*o_putc)(0xFE);
+ (*o_putc)(0xFF);
}
}
-#endif /* SHIFTJIS_CP932 */
-#ifdef X0212_ENABLE
- if (!x0213_f && is_ibmext_in_sjis(c2)){
- val = shiftjis_x0212[c2 - 0xfa][c1 - 0x40];
- if (val){
- if (val > 0x7FFF){
- c2 = PREFIX_EUCG3 | ((val >> 8) & 0x7f);
- c1 = val & 0xff;
- }else{
- c2 = val >> 8;
- c1 = val & 0xff;
+
+ if (c2 == EOF) {
+ (*o_putc)(EOF);
+ return;
+ }
+
+ if (c2 == 0 && nkf_char_unicode_p(c1)) {
+ if (nkf_char_unicode_bmp_p(c1)) {
+ c2 = (c1 >> 8) & 0xff;
+ c1 &= 0xff;
+ } else {
+ c1 &= VALUE_MASK;
+ if (c1 <= UNICODE_MAX) {
+ c2 = (c1 >> 10) + NKF_INT32_C(0xD7C0); /* high surrogate */
+ c1 = (c1 & 0x3FF) + NKF_INT32_C(0xDC00); /* low surrogate */
+ if (output_endian == ENDIAN_LITTLE){
+ (*o_putc)(c2 & 0xff);
+ (*o_putc)((c2 >> 8) & 0xff);
+ (*o_putc)(c1 & 0xff);
+ (*o_putc)((c1 >> 8) & 0xff);
+ }else{
+ (*o_putc)((c2 >> 8) & 0xff);
+ (*o_putc)(c2 & 0xff);
+ (*o_putc)((c1 >> 8) & 0xff);
+ (*o_putc)(c1 & 0xff);
+ }
}
- if (p2) *p2 = c2;
- if (p1) *p1 = c1;
- return 0;
+ return;
}
+ } else if (c2) {
+ nkf_char val = e2w_conv(c2, c1);
+ c2 = (val >> 8) & 0xff;
+ c1 = val & 0xff;
+ if (!val) return;
}
-#endif
- if(c2 >= 0x80){
- if(x0213_f && c2 >= 0xF0){
- if(c2 <= 0xF3 || (c2 == 0xF4 && c1 < 0x9F)){ /* k=1, 3<=k<=5, k=8, 12<=k<=15 */
- c2 = PREFIX_EUCG3 | 0x20 | shift_jisx0213_s1a3_table[c2 - 0xF0][0x9E < c1];
- }else{ /* 78<=k<=94 */
- c2 = PREFIX_EUCG3 | (c2 * 2 - 0x17B);
- if (0x9E < c1) c2++;
- }
- }else{
-#define SJ0162 0x00e1 /* 01 - 62 ku offset */
-#define SJ6394 0x0161 /* 63 - 94 ku offset */
- c2 = c2 + c2 - ((c2 <= 0x9F) ? SJ0162 : SJ6394);
- if (0x9E < c1) c2++;
- }
- if (c1 < 0x9F)
- c1 = c1 - ((c1 > DEL) ? SP : 0x1F);
- else {
- c1 = c1 - 0x7E;
- }
+ if (output_endian == ENDIAN_LITTLE){
+ (*o_putc)(c1);
+ (*o_putc)(c2);
+ }else{
+ (*o_putc)(c2);
+ (*o_putc)(c1);
}
-
-#ifdef X0212_ENABLE
- c2 = x0212_unshift(c2);
-#endif
- if (p2) *p2 = c2;
- if (p1) *p1 = c1;
- return 0;
}
-nkf_char s_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
+void w_oconv32(nkf_char c2, nkf_char c1)
{
- if (c2 == JIS_X_0201) {
- c1 &= 0x7f;
- } else if ((c2 == EOF) || (c2 == 0) || c2 < SP) {
- /* NOP */
- } else if (!x0213_f && 0xF0 <= c2 && c2 <= 0xF9 && 0x40 <= c1 && c1 <= 0xFC) {
- /* CP932 UDC */
- if(c1 == 0x7F) return 0;
- c1 = (c2 - 0xF0) * 188 + (c1 - 0x40 - (0x7E < c1)) + 0xE000 + CLASS_UNICODE;
- c2 = 0;
- } else {
- nkf_char ret = s2e_conv(c2, c1, &c2, &c1);
- if (ret) return ret;
+ if (output_bom_f) {
+ output_bom_f = FALSE;
+ if (output_endian == ENDIAN_LITTLE){
+ (*o_putc)(0xFF);
+ (*o_putc)(0xFE);
+ (*o_putc)(0);
+ (*o_putc)(0);
+ }else{
+ (*o_putc)(0);
+ (*o_putc)(0);
+ (*o_putc)(0xFE);
+ (*o_putc)(0xFF);
+ }
}
- (*oconv)(c2, c1);
- return 0;
-}
-nkf_char e_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
-{
- if (c2 == JIS_X_0201) {
- c1 &= 0x7f;
-#ifdef X0212_ENABLE
- }else if (c2 == 0x8f){
- if (c0 == 0){
- return -1;
- }
- if (!cp51932_f && !x0213_f && 0xF5 <= c1 && c1 <= 0xFE && 0xA1 <= c0 && c0 <= 0xFE) {
- /* encoding is eucJP-ms, so invert to Unicode Private User Area */
- c1 = (c1 - 0xF5) * 94 + c0 - 0xA1 + 0xE3AC + CLASS_UNICODE;
- c2 = 0;
- } else {
- c2 = (c2 << 8) | (c1 & 0x7f);
- c1 = c0 & 0x7f;
-#ifdef SHIFTJIS_CP932
- if (cp51932_f){
- nkf_char s2, s1;
- if (e2s_conv(c2, c1, &s2, &s1) == 0){
- s2e_conv(s2, s1, &c2, &c1);
- if (c2 < 0x100){
- c1 &= 0x7f;
- c2 &= 0x7f;
- }
- }
- }
-#endif /* SHIFTJIS_CP932 */
- }
-#endif /* X0212_ENABLE */
- } else if (c2 == SSO){
- c2 = JIS_X_0201;
- c1 &= 0x7f;
- } else if ((c2 == EOF) || (c2 == 0) || c2 < SP) {
- /* NOP */
- } else {
- if (!cp51932_f && ms_ucs_map_f && 0xF5 <= c2 && c2 <= 0xFE && 0xA1 <= c1 && c1 <= 0xFE) {
- /* encoding is eucJP-ms, so invert to Unicode Private User Area */
- c1 = (c2 - 0xF5) * 94 + c1 - 0xA1 + 0xE000 + CLASS_UNICODE;
- c2 = 0;
- } else {
- c1 &= 0x7f;
- c2 &= 0x7f;
-#ifdef SHIFTJIS_CP932
- if (cp51932_f && 0x79 <= c2 && c2 <= 0x7c){
- nkf_char s2, s1;
- if (e2s_conv(c2, c1, &s2, &s1) == 0){
- s2e_conv(s2, s1, &c2, &c1);
- if (c2 < 0x100){
- c1 &= 0x7f;
- c2 &= 0x7f;
- }
- }
- }
-#endif /* SHIFTJIS_CP932 */
- }
+ if (c2 == EOF) {
+ (*o_putc)(EOF);
+ return;
+ }
+
+ if (c2 == ISO_8859_1) {
+ c1 |= 0x80;
+ } else if (c2 == 0 && nkf_char_unicode_p(c1)) {
+ c1 &= VALUE_MASK;
+ } else if (c2) {
+ c1 = e2w_conv(c2, c1);
+ if (!c1) return;
+ }
+ if (output_endian == ENDIAN_LITTLE){
+ (*o_putc)( c1 & 0xFF);
+ (*o_putc)((c1 >> 8) & 0xFF);
+ (*o_putc)((c1 >> 16) & 0xFF);
+ (*o_putc)(0);
+ }else{
+ (*o_putc)(0);
+ (*o_putc)((c1 >> 16) & 0xFF);
+ (*o_putc)((c1 >> 8) & 0xFF);
+ (*o_putc)( c1 & 0xFF);
}
- (*oconv)(c2, c1);
- return 0;
}
+#endif
-#if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
-void w16w_conv(nkf_char val, nkf_char *p2, nkf_char *p1, nkf_char *p0)
+#define SCORE_L2 (1) /* \e$BBh\e(B2\e$B?e=`4A;z\e(B */
+#define SCORE_KANA (SCORE_L2 << 1) /* \e$B$$$o$f$kH>3Q%+%J\e(B */
+#define SCORE_DEPEND (SCORE_KANA << 1) /* \e$B5!<o0MB8J8;z\e(B */
+#define SCORE_CP932 (SCORE_DEPEND << 1) /* CP932 \e$B$K$h$kFI$_49$(\e(B (IBM extended characters) */
+#define SCORE_X0212 (SCORE_CP932 << 1) /* JIS X 0212 */
+#define SCORE_NO_EXIST (SCORE_X0212 << 1) /* \e$BB8:_$7$J$$J8;z\e(B */
+#define SCORE_iMIME (SCORE_NO_EXIST << 1) /* MIME \e$B$K$h$k;XDj\e(B */
+#define SCORE_ERROR (SCORE_iMIME << 1) /* \e$B%(%i!<\e(B */
+
+#define SCORE_INIT (SCORE_iMIME)
+
+static const char score_table_A0[] = {
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND,
+ SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_NO_EXIST,
+};
+
+static const char score_table_F0[] = {
+ SCORE_L2, SCORE_L2, SCORE_L2, SCORE_L2,
+ SCORE_L2, SCORE_DEPEND, SCORE_NO_EXIST, SCORE_NO_EXIST,
+ SCORE_DEPEND, SCORE_DEPEND, SCORE_CP932, SCORE_CP932,
+ SCORE_CP932, SCORE_NO_EXIST, SCORE_NO_EXIST, SCORE_ERROR,
+};
+
+void set_code_score(struct input_code *ptr, nkf_char score)
{
- val &= VALUE_MASK;
- if (val < 0x80){
- *p2 = val;
- *p1 = 0;
- *p0 = 0;
- }else if (val < 0x800){
- *p2 = 0xc0 | (val >> 6);
- *p1 = 0x80 | (val & 0x3f);
- *p0 = 0;
- } else if (val <= NKF_INT32_C(0xFFFF)) {
- *p2 = 0xe0 | (val >> 12);
- *p1 = 0x80 | ((val >> 6) & 0x3f);
- *p0 = 0x80 | (val & 0x3f);
- } else if (val <= NKF_INT32_C(0x10FFFF)) {
- *p2 = 0xe0 | (val >> 16);
- *p1 = 0x80 | ((val >> 12) & 0x3f);
- *p0 = 0x8080 | ((val << 2) & 0x3f00)| (val & 0x3f);
- } else {
- *p2 = 0;
- *p1 = 0;
- *p0 = 0;
+ if (ptr){
+ ptr->score |= score;
}
}
-#endif
-#ifdef UTF8_INPUT_ENABLE
-nkf_char ww16_conv(nkf_char c2, nkf_char c1, nkf_char c0)
+void clr_code_score(struct input_code *ptr, nkf_char score)
{
- nkf_char val;
- if (c2 >= 0xf8) {
- val = -1;
- } else if (c2 >= 0xf0){
- /* c2: 1st, c1: 2nd, c0: 3rd/4th */
- val = (c2 & 0x0f) << 18;
- val |= (c1 & 0x3f) << 12;
- val |= (c0 & 0x3f00) >> 2;
- val |= (c0 & 0x3f);
- }else if (c2 >= 0xe0){
- val = (c2 & 0x0f) << 12;
- val |= (c1 & 0x3f) << 6;
- val |= (c0 & 0x3f);
- }else if (c2 >= 0xc0){
- val = (c2 & 0x1f) << 6;
- val |= (c1 & 0x3f);
- }else{
- val = c2;
+ if (ptr){
+ ptr->score &= ~score;
}
- return val;
}
-nkf_char w16e_conv(nkf_char val, nkf_char *p2, nkf_char *p1)
+void code_score(struct input_code *ptr)
{
- nkf_char c2, c1, c0;
- nkf_char ret = 0;
- val &= VALUE_MASK;
- if (val < 0x80){
- *p2 = 0;
- *p1 = val;
- }else{
- w16w_conv(val, &c2, &c1, &c0);
- ret = unicode_to_jis_common(c2, c1, c0, p2, p1);
-#ifdef NUMCHAR_OPTION
- if (ret > 0){
- *p2 = 0;
- *p1 = CLASS_UNICODE | val;
- ret = 0;
- }
+ nkf_char c2 = ptr->buf[0];
+#ifdef UTF8_OUTPUT_ENABLE
+ nkf_char c1 = ptr->buf[1];
+#endif
+ if (c2 < 0){
+ set_code_score(ptr, SCORE_ERROR);
+ }else if (c2 == SS2){
+ set_code_score(ptr, SCORE_KANA);
+ }else if (c2 == 0x8f){
+ set_code_score(ptr, SCORE_X0212);
+#ifdef UTF8_OUTPUT_ENABLE
+ }else if (!e2w_conv(c2, c1)){
+ set_code_score(ptr, SCORE_NO_EXIST);
#endif
+ }else if ((c2 & 0x70) == 0x20){
+ set_code_score(ptr, score_table_A0[c2 & 0x0f]);
+ }else if ((c2 & 0x70) == 0x70){
+ set_code_score(ptr, score_table_F0[c2 & 0x0f]);
+ }else if ((c2 & 0x70) >= 0x50){
+ set_code_score(ptr, SCORE_L2);
}
- return ret;
}
-#endif
-#ifdef UTF8_OUTPUT_ENABLE
-nkf_char e2w_conv(nkf_char c2, nkf_char c1)
+void status_disable(struct input_code *ptr)
{
- const unsigned short *p;
+ ptr->stat = -1;
+ ptr->buf[0] = -1;
+ code_score(ptr);
+ if (iconv == ptr->iconv_func) set_iconv(FALSE, 0);
+}
- if (c2 == JIS_X_0201) {
- if (ms_ucs_map_f == UCS_MAP_CP10001) {
- switch (c1) {
- case 0x20:
- return 0xA0;
- case 0x7D:
- return 0xA9;
- }
- }
- p = euc_to_utf8_1byte;
-#ifdef X0212_ENABLE
- } else if (is_eucg3(c2)){
- if(ms_ucs_map_f == UCS_MAP_ASCII&& c2 == NKF_INT32_C(0x8F22) && c1 == 0x43){
- return 0xA6;
- }
- c2 = (c2&0x7f) - 0x21;
- if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
- p = x0212_to_utf8_2bytes[c2];
- else
- return 0;
-#endif
- } else {
- c2 &= 0x7f;
- c2 = (c2&0x7f) - 0x21;
- if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
- p =
- ms_ucs_map_f == UCS_MAP_ASCII ? euc_to_utf8_2bytes[c2] :
- ms_ucs_map_f == UCS_MAP_CP10001 ? euc_to_utf8_2bytes_mac[c2] :
- euc_to_utf8_2bytes_ms[c2];
- else
- return 0;
- }
- if (!p) return 0;
- c1 = (c1 & 0x7f) - 0x21;
- if (0<=c1 && c1<sizeof_euc_to_utf8_1byte)
- return p[c1];
- return 0;
+void status_push_ch(struct input_code *ptr, nkf_char c)
+{
+ ptr->buf[ptr->index++] = c;
}
-void w_oconv(nkf_char c2, nkf_char c1)
+void status_clear(struct input_code *ptr)
{
- nkf_char c0;
- nkf_char val;
+ ptr->stat = 0;
+ ptr->index = 0;
+}
- if (output_bom_f) {
- output_bom_f = FALSE;
- (*o_putc)('\357');
- (*o_putc)('\273');
- (*o_putc)('\277');
- }
+void status_reset(struct input_code *ptr)
+{
+ status_clear(ptr);
+ ptr->score = SCORE_INIT;
+}
- if (c2 == EOF) {
- (*o_putc)(EOF);
- return;
- }
+void status_reinit(struct input_code *ptr)
+{
+ status_reset(ptr);
+ ptr->_file_stat = 0;
+}
-#ifdef NUMCHAR_OPTION
- if (c2 == 0 && is_unicode_capsule(c1)){
- val = c1 & VALUE_MASK;
- if (val < 0x80){
- (*o_putc)(val);
- }else if (val < 0x800){
- (*o_putc)(0xC0 | (val >> 6));
- (*o_putc)(0x80 | (val & 0x3f));
- } else if (val <= NKF_INT32_C(0xFFFF)) {
- (*o_putc)(0xE0 | (val >> 12));
- (*o_putc)(0x80 | ((val >> 6) & 0x3f));
- (*o_putc)(0x80 | (val & 0x3f));
- } else if (val <= NKF_INT32_C(0x10FFFF)) {
- (*o_putc)(0xF0 | ( val>>18));
- (*o_putc)(0x80 | ((val>>12) & 0x3f));
- (*o_putc)(0x80 | ((val>> 6) & 0x3f));
- (*o_putc)(0x80 | ( val & 0x3f));
- }
- return;
+void status_check(struct input_code *ptr, nkf_char c)
+{
+ if (c <= DEL && estab_f){
+ status_reset(ptr);
}
-#endif
+}
- if (c2 == 0) {
- output_mode = ASCII;
- (*o_putc)(c1);
- } else if (c2 == ISO_8859_1) {
- output_mode = UTF_8;
- (*o_putc)(c1 | 0x080);
- } else {
- output_mode = UTF_8;
- val = e2w_conv(c2, c1);
- if (val){
- w16w_conv(val, &c2, &c1, &c0);
- (*o_putc)(c2);
- if (c1){
- (*o_putc)(c1);
- if (c0) (*o_putc)(c0);
- }
- }
+void s_status(struct input_code *ptr, nkf_char c)
+{
+ switch(ptr->stat){
+ case -1:
+ status_check(ptr, c);
+ break;
+ case 0:
+ if (c <= DEL){
+ break;
+ }else if (nkf_char_unicode_p(c)){
+ break;
+ }else if (0xa1 <= c && c <= 0xdf){
+ status_push_ch(ptr, SS2);
+ status_push_ch(ptr, c);
+ code_score(ptr);
+ status_clear(ptr);
+ }else if ((0x81 <= c && c < 0xa0) || (0xe0 <= c && c <= 0xea)){
+ ptr->stat = 1;
+ status_push_ch(ptr, c);
+ }else if (0xed <= c && c <= 0xee){
+ ptr->stat = 3;
+ status_push_ch(ptr, c);
+#ifdef SHIFTJIS_CP932
+ }else if (is_ibmext_in_sjis(c)){
+ ptr->stat = 2;
+ status_push_ch(ptr, c);
+#endif /* SHIFTJIS_CP932 */
+#ifdef X0212_ENABLE
+ }else if (0xf0 <= c && c <= 0xfc){
+ ptr->stat = 1;
+ status_push_ch(ptr, c);
+#endif /* X0212_ENABLE */
+ }else{
+ status_disable(ptr);
+ }
+ break;
+ case 1:
+ if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
+ status_push_ch(ptr, c);
+ s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]);
+ code_score(ptr);
+ status_clear(ptr);
+ }else{
+ status_disable(ptr);
+ }
+ break;
+ case 2:
+#ifdef SHIFTJIS_CP932
+ if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)) {
+ status_push_ch(ptr, c);
+ if (s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]) == 0) {
+ set_code_score(ptr, SCORE_CP932);
+ status_clear(ptr);
+ break;
+ }
+ }
+#endif /* SHIFTJIS_CP932 */
+ status_disable(ptr);
+ break;
+ case 3:
+ if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
+ status_push_ch(ptr, c);
+ s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]);
+ set_code_score(ptr, SCORE_CP932);
+ status_clear(ptr);
+ }else{
+ status_disable(ptr);
+ }
+ break;
}
}
-void w_oconv16(nkf_char c2, nkf_char c1)
+void e_status(struct input_code *ptr, nkf_char c)
{
- if (output_bom_f) {
- output_bom_f = FALSE;
- if (output_endian == ENDIAN_LITTLE){
- (*o_putc)((unsigned char)'\377');
- (*o_putc)('\376');
- }else{
- (*o_putc)('\376');
- (*o_putc)((unsigned char)'\377');
- }
+ switch (ptr->stat){
+ case -1:
+ status_check(ptr, c);
+ break;
+ case 0:
+ if (c <= DEL){
+ break;
+ }else if (nkf_char_unicode_p(c)){
+ break;
+ }else if (SS2 == c || (0xa1 <= c && c <= 0xfe)){
+ ptr->stat = 1;
+ status_push_ch(ptr, c);
+#ifdef X0212_ENABLE
+ }else if (0x8f == c){
+ ptr->stat = 2;
+ status_push_ch(ptr, c);
+#endif /* X0212_ENABLE */
+ }else{
+ status_disable(ptr);
+ }
+ break;
+ case 1:
+ if (0xa1 <= c && c <= 0xfe){
+ status_push_ch(ptr, c);
+ code_score(ptr);
+ status_clear(ptr);
+ }else{
+ status_disable(ptr);
+ }
+ break;
+#ifdef X0212_ENABLE
+ case 2:
+ if (0xa1 <= c && c <= 0xfe){
+ ptr->stat = 1;
+ status_push_ch(ptr, c);
+ }else{
+ status_disable(ptr);
+ }
+#endif /* X0212_ENABLE */
}
+}
- if (c2 == EOF) {
- (*o_putc)(EOF);
- return;
+#ifdef UTF8_INPUT_ENABLE
+void w_status(struct input_code *ptr, nkf_char c)
+{
+ switch (ptr->stat){
+ case -1:
+ status_check(ptr, c);
+ break;
+ case 0:
+ if (c <= DEL){
+ break;
+ }else if (nkf_char_unicode_p(c)){
+ break;
+ }else if (0xc0 <= c && c <= 0xdf){
+ ptr->stat = 1;
+ status_push_ch(ptr, c);
+ }else if (0xe0 <= c && c <= 0xef){
+ ptr->stat = 2;
+ status_push_ch(ptr, c);
+ }else if (0xf0 <= c && c <= 0xf4){
+ ptr->stat = 3;
+ status_push_ch(ptr, c);
+ }else{
+ status_disable(ptr);
+ }
+ break;
+ case 1:
+ case 2:
+ if (0x80 <= c && c <= 0xbf){
+ status_push_ch(ptr, c);
+ if (ptr->index > ptr->stat){
+ int bom = (ptr->buf[0] == 0xef && ptr->buf[1] == 0xbb
+ && ptr->buf[2] == 0xbf);
+ w2e_conv(ptr->buf[0], ptr->buf[1], ptr->buf[2],
+ &ptr->buf[0], &ptr->buf[1]);
+ if (!bom){
+ code_score(ptr);
+ }
+ status_clear(ptr);
+ }
+ }else{
+ status_disable(ptr);
+ }
+ break;
+ case 3:
+ if (0x80 <= c && c <= 0xbf){
+ if (ptr->index < ptr->stat){
+ status_push_ch(ptr, c);
+ } else {
+ status_clear(ptr);
+ }
+ }else{
+ status_disable(ptr);
+ }
+ break;
}
+}
+#endif
- if (c2 == ISO_8859_1) {
- c2 = 0;
- c1 |= 0x80;
-#ifdef NUMCHAR_OPTION
- } else if (c2 == 0 && is_unicode_capsule(c1)) {
- if (is_unicode_bmp(c1)) {
- c2 = (c1 >> 8) & 0xff;
- c1 &= 0xff;
- } else {
- c1 &= VALUE_MASK;
- if (c1 <= UNICODE_MAX) {
- c2 = (c1 >> 10) + NKF_INT32_C(0xD7C0); /* high surrogate */
- c1 = (c1 & 0x3FF) + NKF_INT32_C(0xDC00); /* low surrogate */
- if (output_endian == ENDIAN_LITTLE){
- (*o_putc)(c2 & 0xff);
- (*o_putc)((c2 >> 8) & 0xff);
- (*o_putc)(c1 & 0xff);
- (*o_putc)((c1 >> 8) & 0xff);
- }else{
- (*o_putc)((c2 >> 8) & 0xff);
- (*o_putc)(c2 & 0xff);
- (*o_putc)((c1 >> 8) & 0xff);
- (*o_putc)(c1 & 0xff);
- }
+void code_status(nkf_char c)
+{
+ int action_flag = 1;
+ struct input_code *result = 0;
+ struct input_code *p = input_code_list;
+ while (p->name){
+ if (!p->status_func) {
+ ++p;
+ continue;
+ }
+ if (!p->status_func)
+ continue;
+ (p->status_func)(p, c);
+ if (p->stat > 0){
+ action_flag = 0;
+ }else if(p->stat == 0){
+ if (result){
+ action_flag = 0;
+ }else{
+ result = p;
}
- return;
}
-#endif
- } else if (c2) {
- nkf_char val = e2w_conv(c2, c1);
- c2 = (val >> 8) & 0xff;
- c1 = val & 0xff;
- if (!val) return;
- }
- if (output_endian == ENDIAN_LITTLE){
- (*o_putc)(c1);
- (*o_putc)(c2);
- }else{
- (*o_putc)(c2);
- (*o_putc)(c1);
+ ++p;
}
-}
-void w_oconv32(nkf_char c2, nkf_char c1)
-{
- if (output_bom_f) {
- output_bom_f = FALSE;
- if (output_endian == ENDIAN_LITTLE){
- (*o_putc)((unsigned char)'\377');
- (*o_putc)('\376');
- (*o_putc)('\000');
- (*o_putc)('\000');
- }else{
- (*o_putc)('\000');
- (*o_putc)('\000');
- (*o_putc)('\376');
- (*o_putc)((unsigned char)'\377');
+ if (action_flag){
+ if (result && !estab_f){
+ set_iconv(TRUE, result->iconv_func);
+ }else if (c <= DEL){
+ struct input_code *ptr = input_code_list;
+ while (ptr->name){
+ status_reset(ptr);
+ ++ptr;
+ }
}
}
+}
- if (c2 == EOF) {
- (*o_putc)(EOF);
- return;
+#ifndef WIN32DLL
+nkf_char std_getc(FILE *f)
+{
+ if (std_gc_ndx){
+ return std_gc_buf[--std_gc_ndx];
}
+ return getc(f);
+}
+#endif /*WIN32DLL*/
- if (c2 == ISO_8859_1) {
- c1 |= 0x80;
-#ifdef NUMCHAR_OPTION
- } else if (c2 == 0 && is_unicode_capsule(c1)) {
- c1 &= VALUE_MASK;
-#endif
- } else if (c2) {
- c1 = e2w_conv(c2, c1);
- if (!c1) return;
- }
- if (output_endian == ENDIAN_LITTLE){
- (*o_putc)( c1 & NKF_INT32_C(0x000000FF));
- (*o_putc)((c1 & NKF_INT32_C(0x0000FF00)) >> 8);
- (*o_putc)((c1 & NKF_INT32_C(0x00FF0000)) >> 16);
- (*o_putc)('\000');
- }else{
- (*o_putc)('\000');
- (*o_putc)((c1 & NKF_INT32_C(0x00FF0000)) >> 16);
- (*o_putc)((c1 & NKF_INT32_C(0x0000FF00)) >> 8);
- (*o_putc)( c1 & NKF_INT32_C(0x000000FF));
+nkf_char std_ungetc(nkf_char c, FILE *f)
+{
+ if (std_gc_ndx == STD_GC_BUFSIZE){
+ return EOF;
}
+ std_gc_buf[std_gc_ndx++] = c;
+ return c;
}
-#endif
-void e_oconv(nkf_char c2, nkf_char c1)
+#ifndef WIN32DLL
+void std_putc(nkf_char c)
{
-#ifdef NUMCHAR_OPTION
- if (c2 == 0 && is_unicode_capsule(c1)){
- w16e_conv(c1, &c2, &c1);
- if (c2 == 0 && is_unicode_capsule(c1)){
- c2 = c1 & VALUE_MASK;
- if (x0212_f && 0xE000 <= c2 && c2 <= 0xE757) {
- /* eucJP-ms UDC */
- c1 &= 0xFFF;
- c2 = c1 / 94;
- c2 += c2 < 10 ? 0x75 : 0x8FEB;
- c1 = 0x21 + c1 % 94;
- if (is_eucg3(c2)){
- (*o_putc)(0x8f);
- (*o_putc)((c2 & 0x7f) | 0x080);
- (*o_putc)(c1 | 0x080);
- }else{
- (*o_putc)((c2 & 0x7f) | 0x080);
- (*o_putc)(c1 | 0x080);
- }
- return;
- } else {
- if (encode_fallback) (*encode_fallback)(c1);
- return;
- }
- }
- }
-#endif
- if (c2 == EOF) {
- (*o_putc)(EOF);
- return;
- } else if (c2 == 0) {
- output_mode = ASCII;
- (*o_putc)(c1);
- } else if (c2 == JIS_X_0201) {
- output_mode = EUC_JP;
- (*o_putc)(SSO); (*o_putc)(c1|0x80);
- } else if (c2 == ISO_8859_1) {
- output_mode = ISO_8859_1;
- (*o_putc)(c1 | 0x080);
-#ifdef X0212_ENABLE
- } else if (is_eucg3(c2)){
- output_mode = EUC_JP;
-#ifdef SHIFTJIS_CP932
- if (!cp932inv_f){
- nkf_char s2, s1;
- if (e2s_conv(c2, c1, &s2, &s1) == 0){
- s2e_conv(s2, s1, &c2, &c1);
- }
- }
-#endif
- if (c2 == 0) {
- output_mode = ASCII;
- (*o_putc)(c1);
- }else if (is_eucg3(c2)){
- if (x0212_f){
- (*o_putc)(0x8f);
- (*o_putc)((c2 & 0x7f) | 0x080);
- (*o_putc)(c1 | 0x080);
- }
- }else{
- (*o_putc)((c2 & 0x7f) | 0x080);
- (*o_putc)(c1 | 0x080);
- }
-#endif
- } else {
- if (!nkf_isgraph(c1) || !nkf_isgraph(c2)) {
- set_iconv(FALSE, 0);
- return; /* too late to rescue this char */
- }
- output_mode = EUC_JP;
- (*o_putc)(c2 | 0x080);
- (*o_putc)(c1 | 0x080);
- }
+ if(c!=EOF)
+ putchar(c);
}
+#endif /*WIN32DLL*/
-nkf_char e2s_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1)
+static unsigned char hold_buf[HOLD_SIZE*2];
+static int hold_count = 0;
+nkf_char push_hold_buf(nkf_char c2)
{
- nkf_char ndx;
- if (is_eucg3(c2)){
- ndx = c2 & 0x7f;
- if (x0213_f){
- if((0x21 <= ndx && ndx <= 0x2F)){
- if (p2) *p2 = ((ndx - 1) >> 1) + 0xec - ndx / 8 * 3;
- if (p1) *p1 = c1 + ((ndx & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
- return 0;
- }else if(0x6E <= ndx && ndx <= 0x7E){
- if (p2) *p2 = ((ndx - 1) >> 1) + 0xbe;
- if (p1) *p1 = c1 + ((ndx & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
- return 0;
- }
- return 1;
- }
-#ifdef X0212_ENABLE
- else if(nkf_isgraph(ndx)){
- nkf_char val = 0;
- const unsigned short *ptr;
- ptr = x0212_shiftjis[ndx - 0x21];
- if (ptr){
- val = ptr[(c1 & 0x7f) - 0x21];
- }
- if (val){
- c2 = val >> 8;
- c1 = val & 0xff;
- if (p2) *p2 = c2;
- if (p1) *p1 = c1;
- return 0;
- }
- c2 = x0212_shift(c2);
- }
-#endif /* X0212_ENABLE */
- }
- if(0x7F < c2) return 1;
- if (p2) *p2 = ((c2 - 1) >> 1) + ((c2 <= 0x5e) ? 0x71 : 0xb1);
- if (p1) *p1 = c1 + ((c2 & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
- return 0;
+ if (hold_count >= HOLD_SIZE*2)
+ return (EOF);
+ hold_buf[hold_count++] = (unsigned char)c2;
+ return ((hold_count >= HOLD_SIZE*2) ? EOF : hold_count);
}
-void s_oconv(nkf_char c2, nkf_char c1)
+static int h_conv(FILE *f, int c1, int c2)
{
-#ifdef NUMCHAR_OPTION
- if (c2 == 0 && is_unicode_capsule(c1)){
- w16e_conv(c1, &c2, &c1);
- if (c2 == 0 && is_unicode_capsule(c1)){
- c2 = c1 & VALUE_MASK;
- if (!x0213_f && 0xE000 <= c2 && c2 <= 0xE757) {
- /* CP932 UDC */
- c1 &= 0xFFF;
- c2 = c1 / 188 + (cp932inv_f ? 0xF0 : 0xEB);
- c1 = c1 % 188;
- c1 += 0x40 + (c1 > 0x3e);
- (*o_putc)(c2);
- (*o_putc)(c1);
- return;
- } else {
- if(encode_fallback)(*encode_fallback)(c1);
- return;
- }
- }
+ int ret, c4, c3;
+ int hold_index;
+
+
+ /** it must NOT be in the kanji shifte sequence */
+ /** it must NOT be written in JIS7 */
+ /** and it must be after 2 byte 8bit code */
+
+ hold_count = 0;
+ push_hold_buf(c1);
+ push_hold_buf(c2);
+
+ while ((c2 = (*i_getc)(f)) != EOF) {
+ if (c2 == ESC){
+ (*i_ungetc)(c2,f);
+ break;
+ }
+ code_status(c2);
+ if (push_hold_buf(c2) == EOF || estab_f) {
+ break;
+ }
}
-#endif
- if (c2 == EOF) {
- (*o_putc)(EOF);
- return;
- } else if (c2 == 0) {
- output_mode = ASCII;
- (*o_putc)(c1);
- } else if (c2 == JIS_X_0201) {
- output_mode = SHIFT_JIS;
- (*o_putc)(c1|0x80);
- } else if (c2 == ISO_8859_1) {
- output_mode = ISO_8859_1;
- (*o_putc)(c1 | 0x080);
-#ifdef X0212_ENABLE
- } else if (is_eucg3(c2)){
- output_mode = SHIFT_JIS;
- if (e2s_conv(c2, c1, &c2, &c1) == 0){
- (*o_putc)(c2);
- (*o_putc)(c1);
+
+ if (!estab_f) {
+ struct input_code *p = input_code_list;
+ struct input_code *result = p;
+ if (c2 == EOF) {
+ code_status(c2);
}
-#endif
- } else {
- if (!nkf_isprint(c1) || !nkf_isprint(c2)) {
- set_iconv(FALSE, 0);
- return; /* too late to rescue this char */
+ while (p->name) {
+ if (p->status_func && p->score < result->score) {
+ result = p;
+ }
+ p++;
}
- output_mode = SHIFT_JIS;
- e2s_conv(c2, c1, &c2, &c1);
+ set_iconv(TRUE, result->iconv_func);
+ }
+
+
+ /** now,
+ ** 1) EOF is detected, or
+ ** 2) Code is established, or
+ ** 3) Buffer is FULL (but last word is pushed)
+ **
+ ** in 1) and 3) cases, we continue to use
+ ** Kanji codes by oconv and leave estab_f unchanged.
+ **/
-#ifdef SHIFTJIS_CP932
- if (cp932inv_f
- && CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
- nkf_char c = cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40];
- if (c){
- c2 = c >> 8;
- c1 = c & 0xff;
+ ret = c2;
+ hold_index = 0;
+ while (hold_index < hold_count){
+ c1 = hold_buf[hold_index++];
+ if (c1 <= DEL){
+ (*iconv)(0, c1, 0);
+ continue;
+ }else if (iconv == s_iconv && 0xa1 <= c1 && c1 <= 0xdf){
+ (*iconv)(JIS_X_0201_1976_K, c1, 0);
+ continue;
+ }
+ if (hold_index < hold_count){
+ c2 = hold_buf[hold_index++];
+ }else{
+ c2 = (*i_getc)(f);
+ if (c2 == EOF){
+ c4 = EOF;
+ break;
}
+ code_status(c2);
}
-#endif /* SHIFTJIS_CP932 */
-
- (*o_putc)(c2);
- if (prefix_table[(unsigned char)c1]){
- (*o_putc)(prefix_table[(unsigned char)c1]);
+ c3 = 0;
+ switch ((*iconv)(c1, c2, 0)) { /* can be EUC/SJIS/UTF-8 */
+ case -2:
+ /* 4 bytes UTF-8 */
+ if (hold_index < hold_count){
+ c3 = hold_buf[hold_index++];
+ } else if ((c3 = (*i_getc)(f)) == EOF) {
+ ret = EOF;
+ break;
+ } else {
+ code_status(c3);
+ if (hold_index < hold_count){
+ c4 = hold_buf[hold_index++];
+ } else if ((c4 = (*i_getc)(f)) == EOF) {
+ c3 = ret = EOF;
+ break;
+ } else {
+ code_status(c4);
+ (*iconv)(c1, c2, (c3<<8)|c4);
+ }
+ }
+ break;
+ case -1:
+ /* 3 bytes EUC or UTF-8 */
+ if (hold_index < hold_count){
+ c3 = hold_buf[hold_index++];
+ } else if ((c3 = (*i_getc)(f)) == EOF) {
+ ret = EOF;
+ break;
+ } else {
+ code_status(c3);
+ }
+ (*iconv)(c1, c2, c3);
+ break;
}
- (*o_putc)(c1);
+ if (c3 == EOF) break;
}
+ return ret;
}
-void j_oconv(nkf_char c2, nkf_char c1)
+/*
+ * Check and Ignore BOM
+ */
+void check_bom(FILE *f)
{
-#ifdef NUMCHAR_OPTION
- if (c2 == 0 && is_unicode_capsule(c1)){
- w16e_conv(c1, &c2, &c1);
- if (c2 == 0 && is_unicode_capsule(c1)){
- c2 = c1 & VALUE_MASK;
- if (ms_ucs_map_f && 0xE000 <= c2 && c2 <= 0xE757) {
- /* CP5022x UDC */
- c1 &= 0xFFF;
- c2 = 0x7F + c1 / 94;
- c1 = 0x21 + c1 % 94;
- } else {
- if (encode_fallback) (*encode_fallback)(c1);
- return;
+ int c2;
+ switch(c2 = (*i_getc)(f)){
+ case 0x00:
+ if((c2 = (*i_getc)(f)) == 0x00){
+ if((c2 = (*i_getc)(f)) == 0xFE){
+ if((c2 = (*i_getc)(f)) == 0xFF){
+ if(!input_encoding){
+ set_iconv(TRUE, w_iconv32);
+ }
+ if (iconv == w_iconv32) {
+ input_endian = ENDIAN_BIG;
+ return;
+ }
+ (*i_ungetc)(0xFF,f);
+ }else (*i_ungetc)(c2,f);
+ (*i_ungetc)(0xFE,f);
+ }else if(c2 == 0xFF){
+ if((c2 = (*i_getc)(f)) == 0xFE){
+ if(!input_encoding){
+ set_iconv(TRUE, w_iconv32);
+ }
+ if (iconv == w_iconv32) {
+ input_endian = ENDIAN_2143;
+ return;
+ }
+ (*i_ungetc)(0xFF,f);
+ }else (*i_ungetc)(c2,f);
+ (*i_ungetc)(0xFF,f);
+ }else (*i_ungetc)(c2,f);
+ (*i_ungetc)(0x00,f);
+ }else (*i_ungetc)(c2,f);
+ (*i_ungetc)(0x00,f);
+ break;
+ case 0xEF:
+ if((c2 = (*i_getc)(f)) == 0xBB){
+ if((c2 = (*i_getc)(f)) == 0xBF){
+ if(!input_encoding){
+ set_iconv(TRUE, w_iconv);
+ }
+ if (iconv == w_iconv) {
+ return;
+ }
+ (*i_ungetc)(0xBF,f);
+ }else (*i_ungetc)(c2,f);
+ (*i_ungetc)(0xBB,f);
+ }else (*i_ungetc)(c2,f);
+ (*i_ungetc)(0xEF,f);
+ break;
+ case 0xFE:
+ if((c2 = (*i_getc)(f)) == 0xFF){
+ if((c2 = (*i_getc)(f)) == 0x00){
+ if((c2 = (*i_getc)(f)) == 0x00){
+ if(!input_encoding){
+ set_iconv(TRUE, w_iconv32);
+ }
+ if (iconv == w_iconv32) {
+ input_endian = ENDIAN_3412;
+ return;
+ }
+ (*i_ungetc)(0x00,f);
+ }else (*i_ungetc)(c2,f);
+ (*i_ungetc)(0x00,f);
+ }else (*i_ungetc)(c2,f);
+ if(!input_encoding){
+ set_iconv(TRUE, w_iconv16);
}
- }
- }
-#endif
- if (c2 == EOF) {
- if (output_mode !=ASCII && output_mode!=ISO_8859_1) {
- (*o_putc)(ESC);
- (*o_putc)('(');
- (*o_putc)(ascii_intro);
- output_mode = ASCII;
- }
- (*o_putc)(EOF);
-#ifdef X0212_ENABLE
- } else if (is_eucg3(c2)){
- if(x0213_f){
- if(output_mode!=JIS_X_0213_2){
- output_mode = JIS_X_0213_2;
- (*o_putc)(ESC);
- (*o_putc)('$');
- (*o_putc)('(');
- (*o_putc)(0x50);
+ if (iconv == w_iconv16) {
+ input_endian = ENDIAN_BIG;
+ return;
}
- }else{
- if(output_mode!=JIS_X_0212){
- output_mode = JIS_X_0212;
- (*o_putc)(ESC);
- (*o_putc)('$');
- (*o_putc)('(');
- (*o_putc)(0x44);
+ (*i_ungetc)(0xFF,f);
+ }else (*i_ungetc)(c2,f);
+ (*i_ungetc)(0xFE,f);
+ break;
+ case 0xFF:
+ if((c2 = (*i_getc)(f)) == 0xFE){
+ if((c2 = (*i_getc)(f)) == 0x00){
+ if((c2 = (*i_getc)(f)) == 0x00){
+ if(!input_encoding){
+ set_iconv(TRUE, w_iconv32);
+ }
+ if (iconv == w_iconv32) {
+ input_endian = ENDIAN_LITTLE;
+ return;
+ }
+ (*i_ungetc)(0x00,f);
+ }else (*i_ungetc)(c2,f);
+ (*i_ungetc)(0x00,f);
+ }else (*i_ungetc)(c2,f);
+ if(!input_encoding){
+ set_iconv(TRUE, w_iconv16);
}
- }
- (*o_putc)(c2 & 0x7f);
- (*o_putc)(c1);
-#endif
- } else if (c2==JIS_X_0201) {
- if (output_mode!=JIS_X_0201) {
- output_mode = JIS_X_0201;
- (*o_putc)(ESC);
- (*o_putc)('(');
- (*o_putc)('I');
- }
- (*o_putc)(c1);
- } else if (c2==ISO_8859_1) {
- /* iso8859 introduction, or 8th bit on */
- /* Can we convert in 7bit form using ESC-'-'-A ?
- Is this popular? */
- output_mode = ISO_8859_1;
- (*o_putc)(c1|0x80);
- } else if (c2 == 0) {
- if (output_mode !=ASCII && output_mode!=ISO_8859_1) {
- (*o_putc)(ESC);
- (*o_putc)('(');
- (*o_putc)(ascii_intro);
- output_mode = ASCII;
- }
- (*o_putc)(c1);
- } else {
- if(ms_ucs_map_f
- ? c2<0x20 || 0x92<c2 || c1<0x20 || 0x7e<c1
- : c2<0x20 || 0x7e<c2 || c1<0x20 || 0x7e<c1) return;
- if(x0213_f){
- if (output_mode!=JIS_X_0213_1) {
- output_mode = JIS_X_0213_1;
- (*o_putc)(ESC);
- (*o_putc)('$');
- (*o_putc)('(');
- (*o_putc)(0x4F);
+ if (iconv == w_iconv16) {
+ input_endian = ENDIAN_LITTLE;
+ return;
}
- }else if (output_mode != JIS_X_0208) {
- output_mode = JIS_X_0208;
- (*o_putc)(ESC);
- (*o_putc)('$');
- (*o_putc)(kanji_intro);
- }
- (*o_putc)(c2);
- (*o_putc)(c1);
+ (*i_ungetc)(0xFE,f);
+ }else (*i_ungetc)(c2,f);
+ (*i_ungetc)(0xFF,f);
+ break;
+ default:
+ (*i_ungetc)(c2,f);
+ break;
}
}
}
c = (*i_bgetc)(f);
if (c=='$' && broken_state.status != ESC
- && (input_mode==ASCII || input_mode==JIS_X_0201)) {
+ && (input_mode == ASCII || input_mode == JIS_X_0201_1976_K)) {
c1= (*i_bgetc)(f);
broken_state.status = 0;
if (c1=='@'|| c1=='B') {
return c;
}
} else if (c=='(' && broken_state.status != ESC
- && (input_mode==JIS_X_0208 || input_mode==JIS_X_0201)) { /* ) */
+ && (input_mode == JIS_X_0208 || input_mode == JIS_X_0201_1976_K)) {
c1= (*i_bgetc)(f);
broken_state.status = 0;
if (c1=='J'|| c1=='B') {
} else {
prev0 = f_prev; /* we still need this one... , but almost done */
f_prev = c1;
- if (c2 || c2==JIS_X_0201)
+ if (c2 || c2 == JIS_X_0201_1976_K)
f_prev |= 0x80; /* this is Japanese */
f_line += char_size(c2,c1);
if (f_line<=fold_len) { /* normal case */
if (f_line>fold_len+fold_margin) { /* too many kinsoku suspension */
f_line = char_size(c2,c1);
fold_state = LF; /* We can't wait, do fold now */
- } else if (c2==JIS_X_0201) {
+ } else if (c2 == JIS_X_0201_1976_K) {
/* simple kinsoku rules return 1 means no folding */
if (c1==(0xde&0x7f)) fold_state = 1; /* \e$B!+\e(B*/
else if (c1==(0xdf&0x7f)) fold_state = 1; /* \e$B!,\e(B*/
/* if (c2) c1 &= 0x7f; assertion */
- if (c2 == JIS_X_0201 && (c1 == 0x20 || c1 == 0x7D || c1 == 0x7E)) {
+ if (c2 == JIS_X_0201_1976_K && (c1 == 0x20 || c1 == 0x7D || c1 == 0x7E)) {
(*o_zconv)(c2,c1);
return;
}
if (x0201_f) {
- if (z_prev2 == JIS_X_0201) {
- if (c2 == JIS_X_0201) {
+ if (z_prev2 == JIS_X_0201_1976_K) {
+ if (c2 == JIS_X_0201_1976_K) {
if (c1 == (0xde&0x7f)) { /* \e$BByE@\e(B */
z_prev2 = 0;
(*o_zconv)(dv[(z_prev1-SP)*2], dv[(z_prev1-SP)*2+1]);
z_prev2 = 0;
(*o_zconv)(cv[(z_prev1-SP)*2], cv[(z_prev1-SP)*2+1]);
}
- if (c2 == JIS_X_0201) {
+ if (c2 == JIS_X_0201_1976_K) {
if (dv[(c1-SP)*2] || ev[(c1-SP)*2]) {
/* wait for \e$BByE@\e(B or \e$BH>ByE@\e(B */
z_prev1 = c1;
break;
}
if (c) {
- (*o_zconv)(JIS_X_0201, c);
+ (*o_zconv)(JIS_X_0201_1976_K, c);
return;
}
} else if (c2 == 0x25) {
};
if (fullwidth_to_halfwidth[c1-0x20]){
c2 = fullwidth_to_halfwidth[c1-0x20];
- (*o_zconv)(JIS_X_0201, c2>>8);
+ (*o_zconv)(JIS_X_0201_1976_K, c2>>8);
if (c2 & 0xFF) {
- (*o_zconv)(JIS_X_0201, c2&0xFF);
+ (*o_zconv)(JIS_X_0201_1976_K, c2&0xFF);
}
return;
}
void rot_conv(nkf_char c2, nkf_char c1)
{
- if (c2==0 || c2==JIS_X_0201 || c2==ISO_8859_1) {
+ if (c2 == 0 || c2 == JIS_X_0201_1976_K || c2 == ISO_8859_1) {
c1 = rot13(c1);
} else if (c2) {
c1 = rot47(c1);
return;
} else if (c1 == 0x74 && nkf_enc_unicode_p(output_encoding)) {
c2 = 0;
- c1 = CLASS_UNICODE | 0x3094;
+ c1 = nkf_char_unicode_new(0x3094);
(*o_hira_conv)(c2,c1);
return;
}
}
}
if (hira_f & 2) {
- if (c2 == 0 && c1 == (CLASS_UNICODE | 0x3094)) {
+ if (c2 == 0 && c1 == nkf_char_unicode_new(0x3094)) {
c2 = 0x25;
c1 = 0x74;
} else if (c2 == 0x24 && 0x20 < c1 && c1 < 0x74) {
void iso2022jp_check_conv(nkf_char c2, nkf_char c1)
{
+#define RANGE_NUM_MAX 18
static const nkf_char range[RANGE_NUM_MAX][2] = {
{0x222f, 0x2239,},
{0x2242, 0x2249,},
};
static const nkf_char mime_encode[] = {
- EUC_JP, SHIFT_JIS, ISO_8859_1, ISO_8859_1, JIS_X_0208, JIS_X_0201,
+ EUC_JP, SHIFT_JIS, ISO_8859_1, ISO_8859_1, JIS_X_0208, JIS_X_0201_1976_K,
#if defined(UTF8_INPUT_ENABLE)
UTF_8, UTF_8,
#endif
}
}
if (c != -1){
- return CLASS_UNICODE | c;
+ return nkf_char_unicode_new(c);
}
while (i > 0){
(*u)(buf[i], f);
i_mungetc_buf = std_ungetc;
output_mode = ASCII;
input_mode = ASCII;
- shift_mode = FALSE;
mime_decode_mode = FALSE;
file_out_f = FALSE;
eolmode_f = 0;
#endif /*WIN32DLL*/
}
-void module_connection(void)
+int module_connection(void)
{
if (input_encoding) set_input_encoding(input_encoding);
if (!output_encoding) {
output_encoding = nkf_default_encoding();
}
+ if (!output_encoding) {
+ if (noout_f || guess_f) output_encoding = nkf_enc_from_index(ISO_2022_JP);
+ else return -1;
+ }
set_output_encoding(output_encoding);
oconv = nkf_enc_to_oconv(output_encoding);
o_putc = std_putc;
status_reinit(p++);
}
}
+ return 0;
}
/*
}
#endif
-nkf_char kanji_convert(FILE *f)
+int kanji_convert(FILE *f)
{
- nkf_char c3, c2=0, c1, c0=0;
+ nkf_char c1=0, c2=0, c3=0, c4=0;
+ int shift_mode = FALSE; /* TRUE or FALSE or JIS_X_0201_1976_K */
int is_8bit = FALSE;
if (input_encoding && !nkf_enc_asciicompat(input_encoding)) {
input_mode = ASCII;
output_mode = ASCII;
- shift_mode = FALSE;
-#define NEXT continue /* no output, get next */
-#define SEND ; /* output c1 and c2, get next */
-#define LAST break /* end of loop, go closing */
+#define NEXT continue /* no output, get next */
+#define SKIP c2=0;continue /* no output, get next */
+#define MORE c2=c1;continue /* need one more byte */
+#define SEND ; /* output c1 and c2, get next */
+#define LAST break /* end of loop, go closing */
- module_connection();
+ if (module_connection() < 0) {
+#if !defined(PERL_XS) && !defined(WIN32DLL)
+ fprintf(stderr, "no output encoding given\n");
+#endif
+ return -1;
+ }
check_bom(f);
+#ifdef UTF8_INPUT_ENABLE
+ if(iconv == w_iconv32){
+ while ((c1 = (*i_getc)(f)) != EOF &&
+ (c2 = (*i_getc)(f)) != EOF &&
+ (c3 = (*i_getc)(f)) != EOF &&
+ (c4 = (*i_getc)(f)) != EOF) {
+ nkf_iconv_utf_32(c1, c2, c3, c4);
+ }
+ (*i_ungetc)(EOF, f);
+ }
+ else if (iconv == w_iconv16) {
+ while ((c1 = (*i_getc)(f)) != EOF &&
+ (c2 = (*i_getc)(f)) != EOF) {
+ if (nkf_iconv_utf_16(c1, c2, 0, 0) == -2 &&
+ (c3 = (*i_getc)(f)) != EOF &&
+ (c4 = (*i_getc)(f)) != EOF) {
+ nkf_iconv_utf_16(c1, c2, c3, c4);
+ }
+ }
+ (*i_ungetc)(EOF, f);
+ }
+#endif
+
while ((c1 = (*i_getc)(f)) != EOF) {
#ifdef INPUT_CODE_FIX
if (!input_encoding)
code_status(c1);
if (c2) {
/* second byte */
- if (c2 > ((input_encoding && nkf_enc_cp5022x_p(input_encoding)) ? 0x92 : DEL)) {
+ if (c2 > DEL) {
/* in case of 8th bit is on */
if (!estab_f&&!mime_decode_mode) {
/* in case of not established yet */
/* It is still ambiguious */
if (h_conv(f, c2, c1)==EOF)
LAST;
- else
- c2 = 0;
- NEXT;
- } else {
+ SKIP;
+ }
+ else {
/* in case of already established */
- if (c1 < AT) {
- /* ignore bogus code and not CP5022x UCD */
- c2 = 0;
- NEXT;
+ if (c1 < 0x40) {
+ /* ignore bogus code */
+ SKIP;
} else {
SEND;
}
}
- } else
- /* second byte, 7 bit code */
- /* it might be kanji shitfted */
- if ((c1 == DEL) || (c1 <= SP)) {
- /* ignore bogus first code */
- c2 = 0;
- NEXT;
- } else
- SEND;
- } else {
- /* first byte */
-#ifdef UTF8_INPUT_ENABLE
- if (iconv == w_iconv16) {
- if (input_endian == ENDIAN_BIG) {
- c2 = c1;
- if ((c1 = (*i_getc)(f)) != EOF) {
- if (0xD8 <= c2 && c2 <= 0xDB) {
- if ((c0 = (*i_getc)(f)) != EOF) {
- c0 <<= 8;
- if ((c3 = (*i_getc)(f)) != EOF) {
- c0 |= c3;
- } else c2 = EOF;
- } else c2 = EOF;
- }
- } else c2 = EOF;
- } else {
- if ((c2 = (*i_getc)(f)) != EOF) {
- if (0xD8 <= c2 && c2 <= 0xDB) {
- if ((c3 = (*i_getc)(f)) != EOF) {
- if ((c0 = (*i_getc)(f)) != EOF) {
- c0 <<= 8;
- c0 |= c3;
- } else c2 = EOF;
- } else c2 = EOF;
- }
- } else c2 = EOF;
- }
- SEND;
- } else if(iconv == w_iconv32){
- int c3 = c1;
- if((c2 = (*i_getc)(f)) != EOF &&
- (c1 = (*i_getc)(f)) != EOF &&
- (c0 = (*i_getc)(f)) != EOF){
- switch(input_endian){
- case ENDIAN_BIG:
- c1 = (c2&0xFF)<<16 | (c1&0xFF)<<8 | (c0&0xFF);
- break;
- case ENDIAN_LITTLE:
- c1 = (c3&0xFF) | (c2&0xFF)<<8 | (c1&0xFF)<<16;
- break;
- case ENDIAN_2143:
- c1 = (c3&0xFF)<<16 | (c1&0xFF) | (c0&0xFF)<<8;
- break;
- case ENDIAN_3412:
- c1 = (c3&0xFF)<<8 | (c2&0xFF) | (c0&0xFF)<<16;
- break;
- }
- c2 = 0;
- }else{
- c2 = EOF;
- }
- SEND;
- } else
-#endif
-#ifdef NUMCHAR_OPTION
- if (is_unicode_capsule(c1)){
- SEND;
- } else
-#endif
- if (c1 > ((input_encoding && nkf_enc_cp5022x_p(input_encoding)) ? 0x92 : DEL)) {
- /* 8 bit code */
- if (!estab_f && !iso8859_f) {
- /* not established yet */
- c2 = c1;
- NEXT;
+ }
+ /* 2nd byte of 7 bit code or SJIS */
+ SEND;
+ }
+ else {
+ /* first byte */
+ if (input_mode == JIS_X_0208 && DEL <= c1 && c1 < 0x92) {
+ /* CP5022x */
+ MORE;
+ } else if (c1 > DEL) {
+ /* 8 bit code */
+ if (!estab_f && !iso8859_f) {
+ /* not established yet */
+ MORE;
} else { /* estab_f==TRUE */
if (iso8859_f) {
c2 = ISO_8859_1;
c1 &= 0x7f;
SEND;
- } else if (SSP<=c1 && c1<0xe0 && iconv == s_iconv) {
- /* SJIS X0201 Case... */
- if (iso2022jp_f && !x0201_f) {
- (*oconv)(GETA1, GETA2);
- NEXT;
- } else {
- c2 = JIS_X_0201;
- c1 &= 0x7f;
- SEND;
- }
- } else if (c1==SSO && iconv != s_iconv) {
- /* EUC X0201 Case */
- c1 = (*i_getc)(f); /* skip SSO */
- code_status(c1);
- if (SSP<=c1 && c1<0xe0) {
- if (iso2022jp_f && !x0201_f) {
- (*oconv)(GETA1, GETA2);
- NEXT;
- } else {
- c2 = JIS_X_0201;
- c1 &= 0x7f;
- SEND;
- }
- } else { /* bogus code, skip SSO and one byte */
- NEXT;
- }
- } else if (ms_ucs_map_f == UCS_MAP_CP10001 &&
- (c1 == 0xFD || c1 == 0xFE)) {
- /* CP10001 */
- c2 = JIS_X_0201;
+ }
+ else if ((iconv == s_iconv && 0xA0 <= c1 && c1 <= 0xDF) ||
+ (ms_ucs_map_f == UCS_MAP_CP10001 && (c1 == 0xFD || c1 == 0xFE))) {
+ /* JIS X 0201 */
+ c2 = JIS_X_0201_1976_K;
c1 &= 0x7f;
SEND;
- } else {
- /* already established */
- c2 = c1;
- NEXT;
- }
- }
- } else if ((c1 > SP) && (c1 != DEL)) {
- /* in case of Roman characters */
- if (shift_mode) {
- /* output 1 shifted byte */
- if (iso8859_f) {
- c2 = ISO_8859_1;
- SEND;
- } else if (SP <= c1 && c1 < (0xe0&0x7f)){
- /* output 1 shifted byte */
- if (iso2022jp_f && !x0201_f) {
- (*oconv)(GETA1, GETA2);
- NEXT;
- } else {
- c2 = JIS_X_0201;
- SEND;
- }
+ }
+ else {
+ /* already established */
+ MORE;
+ }
+ }
+ } else if (SP < c1 && c1 < DEL) {
+ /* in case of Roman characters */
+ if (shift_mode) {
+ /* output 1 shifted byte */
+ if (iso8859_f) {
+ c2 = ISO_8859_1;
+ SEND;
+ } else if (SP <= c1 && c1 < (0xE0&0x7F)){
+ /* output 1 shifted byte */
+ c2 = JIS_X_0201_1976_K;
+ SEND;
} else {
/* look like bogus code */
- NEXT;
+ SKIP;
}
} else if (input_mode == JIS_X_0208 || input_mode == JIS_X_0212 ||
input_mode == JIS_X_0213_1 || input_mode == JIS_X_0213_2) {
/* in case of Kanji shifted */
- c2 = c1;
- NEXT;
+ MORE;
} else if (c1 == '=' && mime_f && !mime_decode_mode) {
/* Check MIME code */
if ((c1 = (*i_getc)(f)) == EOF) {
/* check in real detail */
if (mime_begin_strict(f) == EOF)
LAST;
- else
- NEXT;
+ SKIP;
} else if (mime_begin(f) == EOF)
- LAST;
- else
- NEXT;
- } else {
- (*oconv)(0, '=');
- (*i_ungetc)(c1,f);
- NEXT;
- }
- } else {
- /* normal ASCII code */
- SEND;
- }
- } else if (c1 == SI && (!is_8bit || mime_decode_mode)) {
- shift_mode = FALSE;
- NEXT;
- } else if (c1 == SO && (!is_8bit || mime_decode_mode)) {
- shift_mode = TRUE;
- NEXT;
+ LAST;
+ SKIP;
+ } else {
+ (*oconv)(0, '=');
+ (*i_ungetc)(c1,f);
+ SKIP;
+ }
+ } else {
+ /* normal ASCII code */
+ SEND;
+ }
+ } else if (c1 == SI && (!is_8bit || mime_decode_mode)) {
+ shift_mode = FALSE;
+ SKIP;
+ } else if (c1 == SO && (!is_8bit || mime_decode_mode)) {
+ shift_mode = TRUE;
+ SKIP;
} else if (c1 == ESC && (!is_8bit || mime_decode_mode)) {
if ((c1 = (*i_getc)(f)) == EOF) {
/* (*oconv)(0, ESC); don't send bogus code */
#ifdef CHECK_OPTION
debug("ISO-2022-JP");
#endif
- NEXT;
- } else if (c1 == '(') {
- if ((c1 = (*i_getc)(f)) == EOF) {
- /* don't send bogus code
- (*oconv)(0, ESC);
- (*oconv)(0, '$');
- (*oconv)(0, '(');
- */
- LAST;
- } else if (c1 == '@'|| c1 == 'B') {
- /* This is kanji introduction */
- input_mode = JIS_X_0208;
- shift_mode = FALSE;
- NEXT;
+ SKIP;
+ } else if (c1 == '(') {
+ if ((c1 = (*i_getc)(f)) == EOF) {
+ /* don't send bogus code
+ (*oconv)(0, ESC);
+ (*oconv)(0, '$');
+ (*oconv)(0, '(');
+ */
+ LAST;
+ } else if (c1 == '@'|| c1 == 'B') {
+ /* This is kanji introduction */
+ input_mode = JIS_X_0208;
+ shift_mode = FALSE;
+ SKIP;
#ifdef X0212_ENABLE
- } else if (c1 == 'D'){
- input_mode = JIS_X_0212;
- shift_mode = FALSE;
- NEXT;
+ } else if (c1 == 'D'){
+ input_mode = JIS_X_0212;
+ shift_mode = FALSE;
+ SKIP;
#endif /* X0212_ENABLE */
- } else if (c1 == 0x4F){
- input_mode = JIS_X_0213_1;
- shift_mode = FALSE;
- NEXT;
- } else if (c1 == 0x50){
- input_mode = JIS_X_0213_2;
- shift_mode = FALSE;
- NEXT;
- } else {
- /* could be some special code */
- (*oconv)(0, ESC);
- (*oconv)(0, '$');
- (*oconv)(0, '(');
- (*oconv)(0, c1);
- NEXT;
- }
- } else if (broken_f&0x2) {
- /* accept any ESC-(-x as broken code ... */
- input_mode = JIS_X_0208;
- shift_mode = FALSE;
- NEXT;
- } else {
- (*oconv)(0, ESC);
- (*oconv)(0, '$');
- (*oconv)(0, c1);
- NEXT;
- }
- } else if (c1 == '(') {
- if ((c1 = (*i_getc)(f)) == EOF) {
- /* don't send bogus code
- (*oconv)(0, ESC);
- (*oconv)(0, '('); */
- LAST;
- } else {
- if (c1 == 'I') {
- /* This is X0201 kana introduction */
- input_mode = JIS_X_0201; shift_mode = JIS_X_0201;
- NEXT;
- } else if (c1 == 'B' || c1 == 'J' || c1 == 'H') {
- /* This is X0208 kanji introduction */
- input_mode = ASCII; shift_mode = FALSE;
- NEXT;
- } else if (broken_f&0x2) {
- input_mode = ASCII; shift_mode = FALSE;
- NEXT;
- } else {
+ } else if (c1 == 'O' || c1 == 'Q'){
+ input_mode = JIS_X_0213_1;
+ shift_mode = FALSE;
+ SKIP;
+ } else if (c1 == 'P'){
+ input_mode = JIS_X_0213_2;
+ shift_mode = FALSE;
+ SKIP;
+ } else {
+ /* could be some special code */
+ (*oconv)(0, ESC);
+ (*oconv)(0, '$');
+ (*oconv)(0, '(');
+ (*oconv)(0, c1);
+ SKIP;
+ }
+ } else if (broken_f&0x2) {
+ /* accept any ESC-(-x as broken code ... */
+ input_mode = JIS_X_0208;
+ shift_mode = FALSE;
+ SKIP;
+ } else {
+ (*oconv)(0, ESC);
+ (*oconv)(0, '$');
+ (*oconv)(0, c1);
+ SKIP;
+ }
+ } else if (c1 == '(') {
+ if ((c1 = (*i_getc)(f)) == EOF) {
+ /* don't send bogus code
+ (*oconv)(0, ESC);
+ (*oconv)(0, '('); */
+ LAST;
+ } else {
+ if (c1 == 'I') {
+ /* This is X0201 kana introduction */
+ input_mode = JIS_X_0201_1976_K; shift_mode = JIS_X_0201_1976_K;
+ SKIP;
+ } else if (c1 == 'B' || c1 == 'J' || c1 == 'H') {
+ /* This is X0208 kanji introduction */
+ input_mode = ASCII; shift_mode = FALSE;
+ SKIP;
+ } else if (broken_f&0x2) {
+ input_mode = ASCII; shift_mode = FALSE;
+ SKIP;
+ } else {
(*oconv)(0, ESC);
(*oconv)(0, '(');
/* maintain various input_mode here */
}
} else if ( c1 == 'N' || c1 == 'n'){
/* SS2 */
- c3 = (*i_getc)(f); /* skip SS2 */
- if ( (SP<=c3 && c3 < 0x60) || (0xa0<=c3 && c3 < 0xe0)){
- c1 = c3;
- c2 = JIS_X_0201;
+ c4 = (*i_getc)(f); /* skip SS2 */
+ if ( (SP<=c4 && c4 < 0x60) || (0xa0<=c4 && c4 < 0xe0)){
+ c1 = c4;
+ c2 = JIS_X_0201_1976_K;
SEND;
}else{
- (*i_ungetc)(c3, f);
+ (*i_ungetc)(c4, f);
/* lonely ESC */
(*oconv)(0, ESC);
SEND;
NUM : 2 0 3 4 5 X 1
*/
static const char jphone_emoji_first_table[7] = {2, 0, 3, 4, 5, 0, 1};
- c0 = (jphone_emoji_first_table[c1 % 7] << 8) - SP + 0xE000 + CLASS_UNICODE;
+ c3 = nkf_char_unicode_new((jphone_emoji_first_table[c1 % 7] << 8) - SP + 0xE000);
while ((c1 = (*i_getc)(f)) != EOF) {
if (SP <= c1 && c1 <= 'z') {
- (*oconv)(0, c1 + c0);
+ (*oconv)(0, c1 + c3);
} else break; /* c1 == SO */
}
}
}
if (c1 == EOF) LAST;
- NEXT;
+ SKIP;
} else {
/* lonely ESC */
(*oconv)(0, ESC);
} else {
i_ungetc(c1,f);
}
- c1 = CR;
- SEND;
+ c1 = CR;
+ SEND;
+ }
+ }
+ } else
+ SEND;
+ }
+ /* send: */
+ switch(input_mode){
+ case ASCII:
+ switch ((*iconv)(c2, c1, 0)) { /* can be EUC / SJIS / UTF-8 */
+ case -2:
+ /* 4 bytes UTF-8 */
+ if ((c3 = (*i_getc)(f)) != EOF) {
+ code_status(c3);
+ c3 <<= 8;
+ if ((c4 = (*i_getc)(f)) != EOF) {
+ code_status(c4);
+ (*iconv)(c2, c1, c3|c4);
+ }
+ }
+ break;
+ case -1:
+ /* 3 bytes EUC or UTF-8 */
+ if ((c3 = (*i_getc)(f)) != EOF) {
+ code_status(c3);
+ (*iconv)(c2, c1, c3);
+ }
+ break;
+ }
+ break;
+ case JIS_X_0208:
+ case JIS_X_0213_1:
+ if (ms_ucs_map_f &&
+ 0x7F <= c2 && c2 <= 0x92 &&
+ 0x21 <= c1 && c1 <= 0x7E) {
+ /* CP932 UDC */
+ if(c1 == 0x7F)
+ SKIP;
+ c1 = nkf_char_unicode_new((c2 - 0x7F) * 94 + c1 - 0x21 + 0xE000);
+ c2 = 0;
+ }
+ (*oconv)(c2, c1); /* this is JIS, not SJIS/EUC case */
+ break;
+#ifdef X0212_ENABLE
+ case JIS_X_0212:
+ (*oconv)(PREFIX_EUCG3 | c2, c1);
+ break;
+#endif /* X0212_ENABLE */
+ case JIS_X_0213_2:
+ (*oconv)(PREFIX_EUCG3 | c2, c1);
+ break;
+ default:
+ (*oconv)(input_mode, c1); /* other special case */
+ }
+
+ c2 = 0;
+ c3 = 0;
+ continue;
+ /* goto next_word */
+ }
+
+ /* epilogue */
+ (*iconv)(EOF, 0, 0);
+ if (!input_codename)
+ {
+ if (is_8bit) {
+ struct input_code *p = input_code_list;
+ struct input_code *result = p;
+ while (p->name){
+ if (p->score < result->score) result = p;
+ ++p;
+ }
+ set_input_codename(result->name);
+#ifdef CHECK_OPTION
+ debug(result->name);
+#endif
+ }
+ }
+ return 0;
+}
+
+/*
+ * int options(unsigned char *cp)
+ *
+ * return values:
+ * 0: success
+ * -1: ArgumentError
+ */
+int options(unsigned char *cp)
+{
+ nkf_char i, j;
+ unsigned char *p;
+ unsigned char *cp_back = NULL;
+ char codeset[32];
+ nkf_encoding *enc;
+
+ if (option_mode==1)
+ return 0;
+ while(*cp && *cp++!='-');
+ while (*cp || cp_back) {
+ if(!*cp){
+ cp = cp_back;
+ cp_back = NULL;
+ continue;
+ }
+ p = 0;
+ switch (*cp++) {
+ case '-': /* literal options */
+ if (!*cp || *cp == SP) { /* ignore the rest of arguments */
+ option_mode = 1;
+ return 0;
+ }
+ for (i=0;i<sizeof(long_option)/sizeof(long_option[0]);i++) {
+ p = (unsigned char *)long_option[i].name;
+ for (j=0;*p && *p != '=' && *p == cp[j];p++, j++);
+ if (*p == cp[j] || cp[j] == SP){
+ p = &cp[j] + 1;
+ break;
+ }
+ p = 0;
+ }
+ if (p == 0) {
+#if !defined(PERL_XS) && !defined(WIN32DLL)
+ fprintf(stderr, "unknown long option: --%s\n", cp);
+#endif
+ return -1;
+ }
+ while(*cp && *cp != SP && cp++);
+ if (long_option[i].alias[0]){
+ cp_back = cp;
+ cp = (unsigned char *)long_option[i].alias;
+ }else{
+ if (strcmp(long_option[i].name, "ic=") == 0){
+ nkf_str_upcase((char *)p, codeset, 32);
+ enc = nkf_enc_find(codeset);
+ if (!enc) continue;
+ input_encoding = enc;
+ continue;
+ }
+ if (strcmp(long_option[i].name, "oc=") == 0){
+ nkf_str_upcase((char *)p, codeset, 32);
+ enc = nkf_enc_find(codeset);
+ if (enc <= 0) continue;
+ output_encoding = enc;
+ continue;
+ }
+ if (strcmp(long_option[i].name, "guess=") == 0){
+ if (p[0] == '0' || p[0] == '1') {
+ guess_f = 1;
+ } else {
+ guess_f = 2;
+ }
+ continue;
+ }
+#ifdef OVERWRITE
+ if (strcmp(long_option[i].name, "overwrite") == 0){
+ file_out_f = TRUE;
+ overwrite_f = TRUE;
+ preserve_time_f = TRUE;
+ continue;
+ }
+ if (strcmp(long_option[i].name, "overwrite=") == 0){
+ file_out_f = TRUE;
+ overwrite_f = TRUE;
+ preserve_time_f = TRUE;
+ backup_f = TRUE;
+ backup_suffix = malloc(strlen((char *) p) + 1);
+ strcpy(backup_suffix, (char *) p);
+ continue;
+ }
+ if (strcmp(long_option[i].name, "in-place") == 0){
+ file_out_f = TRUE;
+ overwrite_f = TRUE;
+ preserve_time_f = FALSE;
+ continue;
+ }
+ if (strcmp(long_option[i].name, "in-place=") == 0){
+ file_out_f = TRUE;
+ overwrite_f = TRUE;
+ preserve_time_f = FALSE;
+ backup_f = TRUE;
+ backup_suffix = malloc(strlen((char *) p) + 1);
+ strcpy(backup_suffix, (char *) p);
+ continue;
+ }
+#endif
+#ifdef INPUT_OPTION
+ if (strcmp(long_option[i].name, "cap-input") == 0){
+ cap_f = TRUE;
+ continue;
+ }
+ if (strcmp(long_option[i].name, "url-input") == 0){
+ url_f = TRUE;
+ continue;
+ }
+#endif
+#ifdef NUMCHAR_OPTION
+ if (strcmp(long_option[i].name, "numchar-input") == 0){
+ numchar_f = TRUE;
+ continue;
+ }
+#endif
+#ifdef CHECK_OPTION
+ if (strcmp(long_option[i].name, "no-output") == 0){
+ noout_f = TRUE;
+ continue;
+ }
+ if (strcmp(long_option[i].name, "debug") == 0){
+ debug_f = TRUE;
+ continue;
+ }
+#endif
+ if (strcmp(long_option[i].name, "cp932") == 0){
+#ifdef SHIFTJIS_CP932
+ cp51932_f = TRUE;
+ cp932inv_f = -TRUE;
+#endif
+#ifdef UTF8_OUTPUT_ENABLE
+ ms_ucs_map_f = UCS_MAP_CP932;
+#endif
+ continue;
+ }
+ if (strcmp(long_option[i].name, "no-cp932") == 0){
+#ifdef SHIFTJIS_CP932
+ cp51932_f = FALSE;
+ cp932inv_f = FALSE;
+#endif
+#ifdef UTF8_OUTPUT_ENABLE
+ ms_ucs_map_f = UCS_MAP_ASCII;
+#endif
+ continue;
+ }
+#ifdef SHIFTJIS_CP932
+ if (strcmp(long_option[i].name, "cp932inv") == 0){
+ cp932inv_f = -TRUE;
+ continue;
+ }
+#endif
+
+#ifdef X0212_ENABLE
+ if (strcmp(long_option[i].name, "x0212") == 0){
+ x0212_f = TRUE;
+ continue;
+ }
+#endif
+
+#ifdef EXEC_IO
+ if (strcmp(long_option[i].name, "exec-in") == 0){
+ exec_f = 1;
+ return 0;
+ }
+ if (strcmp(long_option[i].name, "exec-out") == 0){
+ exec_f = -1;
+ return 0;
+ }
+#endif
+#if defined(UTF8_OUTPUT_ENABLE) && defined(UTF8_INPUT_ENABLE)
+ if (strcmp(long_option[i].name, "no-cp932ext") == 0){
+ no_cp932ext_f = TRUE;
+ continue;
+ }
+ if (strcmp(long_option[i].name, "no-best-fit-chars") == 0){
+ no_best_fit_chars_f = TRUE;
+ continue;
+ }
+ if (strcmp(long_option[i].name, "fb-skip") == 0){
+ encode_fallback = NULL;
+ continue;
+ }
+ if (strcmp(long_option[i].name, "fb-html") == 0){
+ encode_fallback = encode_fallback_html;
+ continue;
+ }
+ if (strcmp(long_option[i].name, "fb-xml") == 0){
+ encode_fallback = encode_fallback_xml;
+ continue;
+ }
+ if (strcmp(long_option[i].name, "fb-java") == 0){
+ encode_fallback = encode_fallback_java;
+ continue;
+ }
+ if (strcmp(long_option[i].name, "fb-perl") == 0){
+ encode_fallback = encode_fallback_perl;
+ continue;
+ }
+ if (strcmp(long_option[i].name, "fb-subchar") == 0){
+ encode_fallback = encode_fallback_subchar;
+ continue;
+ }
+ if (strcmp(long_option[i].name, "fb-subchar=") == 0){
+ encode_fallback = encode_fallback_subchar;
+ unicode_subchar = 0;
+ if (p[0] != '0'){
+ /* decimal number */
+ for (i = 0; i < 7 && nkf_isdigit(p[i]); i++){
+ unicode_subchar *= 10;
+ unicode_subchar += hex2bin(p[i]);
+ }
+ }else if(p[1] == 'x' || p[1] == 'X'){
+ /* hexadecimal number */
+ for (i = 2; i < 8 && nkf_isxdigit(p[i]); i++){
+ unicode_subchar <<= 4;
+ unicode_subchar |= hex2bin(p[i]);
+ }
+ }else{
+ /* octal number */
+ for (i = 1; i < 8 && nkf_isoctal(p[i]); i++){
+ unicode_subchar *= 8;
+ unicode_subchar += hex2bin(p[i]);
+ }
}
+ w16e_conv(unicode_subchar, &i, &j);
+ unicode_subchar = i<<8 | j;
+ continue;
}
- } else if (c1 == DEL && input_mode == JIS_X_0208) {
- /* CP5022x */
- c2 = c1;
- NEXT;
- } else
- SEND;
- }
- /* send: */
- switch(input_mode){
- case ASCII:
- switch ((*iconv)(c2, c1, c0)) { /* can be EUC / SJIS / UTF-8 / UTF-16 */
- case -2:
- /* 4 bytes UTF-8 */
- if ((c0 = (*i_getc)(f)) != EOF) {
- code_status(c0);
- c0 <<= 8;
- if ((c3 = (*i_getc)(f)) != EOF) {
- code_status(c3);
- (*iconv)(c2, c1, c0|c3);
- }
+#endif
+#ifdef UTF8_OUTPUT_ENABLE
+ if (strcmp(long_option[i].name, "ms-ucs-map") == 0){
+ ms_ucs_map_f = UCS_MAP_MS;
+ continue;
}
- break;
- case -1:
- /* 3 bytes EUC or UTF-8 */
- if ((c0 = (*i_getc)(f)) != EOF) {
- code_status(c0);
- (*iconv)(c2, c1, c0);
+#endif
+#ifdef UNICODE_NORMALIZATION
+ if (strcmp(long_option[i].name, "utf8mac-input") == 0){
+ nfc_f = TRUE;
+ continue;
}
- break;
- }
- break;
- case JIS_X_0208:
- case JIS_X_0213_1:
- if (ms_ucs_map_f &&
- 0x7F <= c2 && c2 <= 0x92 &&
- 0x21 <= c1 && c1 <= 0x7E) {
- /* CP932 UDC */
- if(c1 == 0x7F) return 0;
- c1 = (c2 - 0x7F) * 94 + c1 - 0x21 + 0xE000 + CLASS_UNICODE;
- c2 = 0;
+#endif
+ if (strcmp(long_option[i].name, "prefix=") == 0){
+ if (nkf_isgraph(p[0])){
+ for (i = 1; nkf_isgraph(p[i]); i++){
+ prefix_table[p[i]] = p[0];
+ }
+ }
+ continue;
+ }
+#if !defined(PERL_XS) && !defined(WIN32DLL)
+ fprintf(stderr, "unsupported long option: --%s\n", long_option[i].name);
+#endif
+ return -1;
}
- (*oconv)(c2, c1); /* this is JIS, not SJIS/EUC case */
- break;
-#ifdef X0212_ENABLE
- case JIS_X_0212:
- (*oconv)(PREFIX_EUCG3 | c2, c1);
+ continue;
+ case 'b': /* buffered mode */
+ unbuf_f = FALSE;
+ continue;
+ case 'u': /* non bufferd mode */
+ unbuf_f = TRUE;
+ continue;
+ case 't': /* transparent mode */
+ if (*cp=='1') {
+ /* alias of -t */
+ cp++;
+ nop_f = TRUE;
+ } else if (*cp=='2') {
+ /*
+ * -t with put/get
+ *
+ * nkf -t2MB hoge.bin | nkf -t2mB | diff -s - hoge.bin
+ *
+ */
+ cp++;
+ nop_f = 2;
+ } else
+ nop_f = TRUE;
+ continue;
+ case 'j': /* JIS output */
+ case 'n':
+ output_encoding = nkf_enc_from_index(ISO_2022_JP);
+ continue;
+ case 'e': /* AT&T EUC output */
+ output_encoding = nkf_enc_from_index(EUCJP_NKF);
+ continue;
+ case 's': /* SJIS output */
+ output_encoding = nkf_enc_from_index(WINDOWS_31J);
+ continue;
+ case 'l': /* ISO8859 Latin-1 support, no conversion */
+ iso8859_f = TRUE; /* Only compatible with ISO-2022-JP */
+ input_encoding = nkf_enc_from_index(ISO_8859_1);
+ continue;
+ case 'i': /* Kanji IN ESC-$-@/B */
+ if (*cp=='@'||*cp=='B')
+ kanji_intro = *cp++;
+ continue;
+ case 'o': /* ASCII IN ESC-(-J/B */
+ if (*cp=='J'||*cp=='B'||*cp=='H')
+ ascii_intro = *cp++;
+ continue;
+ case 'h':
+ /*
+ bit:1 katakana->hiragana
+ bit:2 hiragana->katakana
+ */
+ if ('9'>= *cp && *cp>='0')
+ hira_f |= (*cp++ -'0');
+ else
+ hira_f |= 1;
+ continue;
+ case 'r':
+ rot_f = TRUE;
+ continue;
+#if defined(MSDOS) || defined(__OS2__)
+ case 'T':
+ binmode_f = FALSE;
+ continue;
+#endif
+#ifndef PERL_XS
+ case 'V':
+ show_configuration();
+ exit(1);
break;
-#endif /* X0212_ENABLE */
- case JIS_X_0213_2:
- (*oconv)(PREFIX_EUCG3 | c2, c1);
+ case 'v':
+ usage();
+ exit(1);
break;
- default:
- (*oconv)(input_mode, c1); /* other special case */
- }
-
- c2 = 0;
- c0 = 0;
- continue;
- /* goto next_word */
- }
-
- /* epilogue */
- (*iconv)(EOF, 0, 0);
- if (!input_codename)
- {
- if (is_8bit) {
- struct input_code *p = input_code_list;
- struct input_code *result = p;
- while (p->name){
- if (p->score < result->score) result = p;
- ++p;
+#endif
+#ifdef UTF8_OUTPUT_ENABLE
+ case 'w': /* UTF-8 output */
+ if (cp[0] == '8') {
+ cp++;
+ if (cp[0] == '0'){
+ cp++;
+ output_encoding = nkf_enc_from_index(UTF_8N);
+ } else {
+ output_bom_f = TRUE;
+ output_encoding = nkf_enc_from_index(UTF_8_BOM);
+ }
+ } else {
+ int enc_idx;
+ if ('1'== cp[0] && '6'==cp[1]) {
+ cp += 2;
+ enc_idx = UTF_16;
+ } else if ('3'== cp[0] && '2'==cp[1]) {
+ cp += 2;
+ enc_idx = UTF_32;
+ } else {
+ output_encoding = nkf_enc_from_index(UTF_8);
+ continue;
+ }
+ if (cp[0]=='L') {
+ cp++;
+ output_endian = ENDIAN_LITTLE;
+ } else if (cp[0] == 'B') {
+ cp++;
+ } else {
+ output_encoding = nkf_enc_from_index(enc_idx);
+ continue;
+ }
+ if (cp[0] == '0'){
+ cp++;
+ enc_idx = enc_idx == UTF_16
+ ? (output_endian == ENDIAN_LITTLE ? UTF_16LE : UTF_16BE)
+ : (output_endian == ENDIAN_LITTLE ? UTF_32LE : UTF_32BE);
+ } else {
+ output_bom_f = TRUE;
+ enc_idx = enc_idx == UTF_16
+ ? (output_endian == ENDIAN_LITTLE ? UTF_16LE_BOM : UTF_16BE_BOM)
+ : (output_endian == ENDIAN_LITTLE ? UTF_32LE_BOM : UTF_32BE_BOM);
+ }
+ output_encoding = nkf_enc_from_index(enc_idx);
}
- set_input_codename(result->name);
-#ifdef CHECK_OPTION
- debug(result->name);
+ continue;
+#endif
+#ifdef UTF8_INPUT_ENABLE
+ case 'W': /* UTF input */
+ if (cp[0] == '8') {
+ cp++;
+ input_encoding = nkf_enc_from_index(UTF_8);
+ }else{
+ int enc_idx;
+ if ('1'== cp[0] && '6'==cp[1]) {
+ cp += 2;
+ input_endian = ENDIAN_BIG;
+ enc_idx = UTF_16;
+ } else if ('3'== cp[0] && '2'==cp[1]) {
+ cp += 2;
+ input_endian = ENDIAN_BIG;
+ enc_idx = UTF_32;
+ } else {
+ input_encoding = nkf_enc_from_index(UTF_8);
+ continue;
+ }
+ if (cp[0]=='L') {
+ cp++;
+ input_endian = ENDIAN_LITTLE;
+ } else if (cp[0] == 'B') {
+ cp++;
+ input_endian = ENDIAN_BIG;
+ }
+ enc_idx = enc_idx == UTF_16
+ ? (output_endian == ENDIAN_LITTLE ? UTF_16LE : UTF_16BE)
+ : (output_endian == ENDIAN_LITTLE ? UTF_32LE : UTF_32BE);
+ input_encoding = nkf_enc_from_index(enc_idx);
+ }
+ continue;
+#endif
+ /* Input code assumption */
+ case 'J': /* ISO-2022-JP input */
+ input_encoding = nkf_enc_from_index(ISO_2022_JP);
+ continue;
+ case 'E': /* EUC-JP input */
+ input_encoding = nkf_enc_from_index(EUCJP_NKF);
+ continue;
+ case 'S': /* Windows-31J input */
+ input_encoding = nkf_enc_from_index(WINDOWS_31J);
+ continue;
+ case 'Z': /* Convert X0208 alphabet to asii */
+ /* alpha_f
+ bit:0 Convert JIS X 0208 Alphabet to ASCII
+ bit:1 Convert Kankaku to one space
+ bit:2 Convert Kankaku to two spaces
+ bit:3 Convert HTML Entity
+ bit:4 Convert JIS X 0208 Katakana to JIS X 0201 Katakana
+ */
+ while ('0'<= *cp && *cp <='9') {
+ alpha_f |= 1 << (*cp++ - '0');
+ }
+ if (!alpha_f) alpha_f = 1;
+ continue;
+ case 'x': /* Convert X0201 kana to X0208 or X0201 Conversion */
+ x0201_f = FALSE; /* No X0201->X0208 conversion */
+ /* accept X0201
+ ESC-(-I in JIS, EUC, MS Kanji
+ SI/SO in JIS, EUC, MS Kanji
+ SS2 in EUC, JIS, not in MS Kanji
+ MS Kanji (0xa0-0xdf)
+ output X0201
+ ESC-(-I in JIS (0x20-0x5f)
+ SS2 in EUC (0xa0-0xdf)
+ 0xa0-0xd in MS Kanji (0xa0-0xdf)
+ */
+ continue;
+ case 'X': /* Convert X0201 kana to X0208 */
+ x0201_f = TRUE;
+ continue;
+ case 'F': /* prserve new lines */
+ fold_preserve_f = TRUE;
+ case 'f': /* folding -f60 or -f */
+ fold_f = TRUE;
+ fold_len = 0;
+ while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
+ fold_len *= 10;
+ fold_len += *cp++ - '0';
+ }
+ if (!(0<fold_len && fold_len<BUFSIZ))
+ fold_len = DEFAULT_FOLD;
+ if (*cp=='-') {
+ fold_margin = 0;
+ cp++;
+ while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
+ fold_margin *= 10;
+ fold_margin += *cp++ - '0';
+ }
+ }
+ continue;
+ case 'm': /* MIME support */
+ /* mime_decode_f = TRUE; */ /* this has too large side effects... */
+ if (*cp=='B'||*cp=='Q') {
+ mime_decode_mode = *cp++;
+ mimebuf_f = FIXED_MIME;
+ } else if (*cp=='N') {
+ mime_f = TRUE; cp++;
+ } else if (*cp=='S') {
+ mime_f = STRICT_MIME; cp++;
+ } else if (*cp=='0') {
+ mime_decode_f = FALSE;
+ mime_f = FALSE; cp++;
+ } else {
+ mime_f = STRICT_MIME;
+ }
+ continue;
+ case 'M': /* MIME output */
+ if (*cp=='B') {
+ mimeout_mode = 'B';
+ mimeout_f = FIXED_MIME; cp++;
+ } else if (*cp=='Q') {
+ mimeout_mode = 'Q';
+ mimeout_f = FIXED_MIME; cp++;
+ } else {
+ mimeout_f = TRUE;
+ }
+ continue;
+ case 'B': /* Broken JIS support */
+ /* bit:0 no ESC JIS
+ bit:1 allow any x on ESC-(-x or ESC-$-x
+ bit:2 reset to ascii on NL
+ */
+ if ('9'>= *cp && *cp>='0')
+ broken_f |= 1<<(*cp++ -'0');
+ else
+ broken_f |= TRUE;
+ continue;
+#ifndef PERL_XS
+ case 'O':/* for Output file */
+ file_out_f = TRUE;
+ continue;
+#endif
+ case 'c':/* add cr code */
+ eolmode_f = CRLF;
+ continue;
+ case 'd':/* delete cr code */
+ eolmode_f = LF;
+ continue;
+ case 'I': /* ISO-2022-JP output */
+ iso2022jp_f = TRUE;
+ continue;
+ case 'L': /* line mode */
+ if (*cp=='u') { /* unix */
+ eolmode_f = LF; cp++;
+ } else if (*cp=='m') { /* mac */
+ eolmode_f = CR; cp++;
+ } else if (*cp=='w') { /* windows */
+ eolmode_f = CRLF; cp++;
+ } else if (*cp=='0') { /* no conversion */
+ eolmode_f = 0; cp++;
+ }
+ continue;
+#ifndef PERL_XS
+ case 'g':
+ if ('2' <= *cp && *cp <= '9') {
+ guess_f = 2;
+ cp++;
+ } else if (*cp == '0' || *cp == '1') {
+ guess_f = 1;
+ cp++;
+ } else {
+ guess_f = 1;
+ }
+ continue;
+#endif
+ case SP:
+ /* module muliple options in a string are allowed for Perl moudle */
+ while(*cp && *cp++!='-');
+ continue;
+ default:
+#if !defined(PERL_XS) && !defined(WIN32DLL)
+ fprintf(stderr, "unknown option: -%c\n", *(cp-1));
#endif
+ /* bogus option but ignored */
+ return -1;
}
}
- return 1;
+ return 0;
}
#ifdef WIN32DLL
#ifdef EASYWIN /*Easy Win */
_BufferSize.y = 400;/*Set Scroll Buffer Size*/
#endif
+#ifdef DEFAULT_CODE_LOCALE
setlocale(LC_CTYPE, "");
-
+#endif
for (argc--,argv++; (argc > 0) && **argv == '-'; argc--, argv++) {
cp = (unsigned char *)*argv;
options(cp);
#ifdef EXEC_IO
exec_f = exec_f_back;
#endif
-#ifdef X0212_ENABLE
x0212_f = x0212_f_back;
-#endif
x0213_f = x0213_f_back;
}