* Add alias x-iso2022jp-cp932, CP50220, CP50221, CP50222, ISO-2022-JP-MS.
(nkf doesn't complete these codesets yet)
** E-Mail: furukawa@tcp-ip.or.jp
** \e$B$^$G8fO"Mm$r$*4j$$$7$^$9!#\e(B
***********************************************************************/
** E-Mail: furukawa@tcp-ip.or.jp
** \e$B$^$G8fO"Mm$r$*4j$$$7$^$9!#\e(B
***********************************************************************/
-/* $Id: nkf.c,v 1.94 2006/03/24 06:14:32 naruse Exp $ */
+/* $Id: nkf.c,v 1.95 2006/03/26 13:10:41 naruse Exp $ */
#define NKF_VERSION "2.0.6"
#define NKF_VERSION "2.0.6"
-#define NKF_RELEASE_DATE "2006-03-24"
+#define NKF_RELEASE_DATE "2006-03-26"
#include "config.h"
#define COPY_RIGHT \
#include "config.h"
#define COPY_RIGHT \
STATIC int ww16_conv PROTO((int c2, int c1, int c0));
STATIC int w16e_conv PROTO((unsigned short val,int *p2,int *p1));
#endif
STATIC int ww16_conv PROTO((int c2, int c1, int c0));
STATIC int w16e_conv PROTO((unsigned short val,int *p2,int *p1));
#endif
-#if defined(UTF8_OUTPUT_ENABLE) && defined(UTF8_INPUT_ENABLE)
-STATIC int internal_unicode_f = FALSE; /* Internal Unicode Processing */
-#endif
#ifdef UTF8_OUTPUT_ENABLE
STATIC int unicode_bom_f= 0; /* Output Unicode BOM */
STATIC int w_oconv16_LE = 0; /* utf-16 little endian */
#ifdef UTF8_OUTPUT_ENABLE
STATIC int unicode_bom_f= 0; /* Output Unicode BOM */
STATIC int w_oconv16_LE = 0; /* utf-16 little endian */
#ifdef X0212_ENABLE
{"x0212", ""},
#endif
#ifdef X0212_ENABLE
{"x0212", ""},
#endif
-#if defined(UTF8_OUTPUT_ENABLE) && defined(UTF8_INPUT_ENABLE)
- {"internal-unicode", ""},
-#endif
#ifdef UTF8_OUTPUT_ENABLE
{"utf8", "w"},
{"utf16", "w16"},
#ifdef UTF8_OUTPUT_ENABLE
{"utf8", "w"},
{"utf16", "w16"},
codeset[i] = nkf_toupper(p[i]);
}
codeset[i] = 0;
codeset[i] = nkf_toupper(p[i]);
}
codeset[i] = 0;
- if(strcmp(codeset, "ISO-2022-JP") == 0){
+ if(strcmp(codeset, "ISO-2022-JP") == 0 ||
+ strcmp(codeset, "X-ISO2022JP-CP932") == 0 ||
+ strcmp(codeset, "CP50220") == 0 ||
+ strcmp(codeset, "CP50221") == 0 ||
+ strcmp(codeset, "CP50222") == 0 ||
+ strcmp(codeset, "ISO-2022-JP-MS") == 0){
input_f = JIS_INPUT;
}else if(strcmp(codeset, "ISO-2022-JP-1") == 0){
input_f = JIS_INPUT;
input_f = JIS_INPUT;
}else if(strcmp(codeset, "ISO-2022-JP-1") == 0){
input_f = JIS_INPUT;
#ifdef UTF8_OUTPUT_ENABLE
ms_ucs_map_f = UCS_MAP_ASCII;
#endif
#ifdef UTF8_OUTPUT_ENABLE
ms_ucs_map_f = UCS_MAP_ASCII;
#endif
- }else if(strcmp(codeset, "SHIFT_JISX0213") == 0){
+ }else if(strcmp(codeset, "SHIFT_JISX0213") == 0 ||
+ strcmp(codeset, "SHIFT_JIS-2004") == 0){
input_f = SJIS_INPUT;
x0213_f = TRUE;
#ifdef SHIFTJIS_CP932
input_f = SJIS_INPUT;
x0213_f = TRUE;
#ifdef SHIFTJIS_CP932
cp932inv_f = FALSE;
#endif
if (x0201_f==NO_X0201) x0201_f=TRUE;
cp932inv_f = FALSE;
#endif
if (x0201_f==NO_X0201) x0201_f=TRUE;
- }else if(strcmp(codeset, "EUC-JISX0213") == 0){
+ }else if(strcmp(codeset, "EUC-JISX0213") == 0 ||
+ strcmp(codeset, "EUC-JIS-2004") == 0){
input_f = JIS_INPUT;
x0201_f = FALSE;
x0213_f = TRUE;
input_f = JIS_INPUT;
x0201_f = FALSE;
x0213_f = TRUE;
codeset[i] = nkf_toupper(p[i]);
}
codeset[i] = 0;
codeset[i] = nkf_toupper(p[i]);
}
codeset[i] = 0;
- if(strcmp(codeset, "ISO-2022-JP") == 0){
+ if(strcmp(codeset, "ISO-2022-JP") == 0 ||
+ strcmp(codeset, "CP50220") == 0){
+ output_conv = j_oconv;
+ }else if(strcmp(codeset, "X-ISO2022JP-CP932") == 0){
+ output_conv = j_oconv;
+ no_cp932ext_f = TRUE;
+ }else if(strcmp(codeset, "CP50221") == 0 ||
+ strcmp(codeset, "ISO-2022-JP-MS") == 0){
}else if(strcmp(codeset, "ISO-2022-JP-1") == 0){
output_conv = j_oconv;
#ifdef X0212_ENABLE
}else if(strcmp(codeset, "ISO-2022-JP-1") == 0){
output_conv = j_oconv;
#ifdef X0212_ENABLE
#ifdef UTF8_OUTPUT_ENABLE
ms_ucs_map_f = UCS_MAP_ASCII;
#endif
#ifdef UTF8_OUTPUT_ENABLE
ms_ucs_map_f = UCS_MAP_ASCII;
#endif
- }else if(strcmp(codeset, "SHIFT_JISX0213") == 0){
+ }else if(strcmp(codeset, "SHIFT_JISX0213") == 0 ||
+ strcmp(codeset, "SHIFT_JIS-2004") == 0){
output_conv = s_oconv;
x0213_f = TRUE;
#ifdef SHIFTJIS_CP932
cp932inv_f = FALSE;
#endif
output_conv = s_oconv;
x0213_f = TRUE;
#ifdef SHIFTJIS_CP932
cp932inv_f = FALSE;
#endif
- }else if(strcmp(codeset, "EUC-JISX0213") == 0){
+ }else if(strcmp(codeset, "EUC-JISX0213") == 0 ||
+ strcmp(codeset, "EUC-JIS-2004") == 0){
output_conv = e_oconv;
#ifdef X0212_ENABLE
x0212_f = TRUE;
output_conv = e_oconv;
#ifdef X0212_ENABLE
x0212_f = TRUE;
}
#endif
#if defined(UTF8_OUTPUT_ENABLE) && defined(UTF8_INPUT_ENABLE)
}
#endif
#if defined(UTF8_OUTPUT_ENABLE) && defined(UTF8_INPUT_ENABLE)
- if (strcmp(long_option[i].name, "internal-unicode") == 0){
- internal_unicode_f = TRUE;
- continue;
- }
if (strcmp(long_option[i].name, "no-cp932ext") == 0){
no_cp932ext_f = TRUE;
continue;
if (strcmp(long_option[i].name, "no-cp932ext") == 0){
no_cp932ext_f = TRUE;
continue;
}else return 0;
}
if (c2 == 0 || c2 == EOF){
}else return 0;
}
if (c2 == 0 || c2 == EOF){
-#ifdef UTF8_OUTPUT_ENABLE
- } else if (internal_unicode_f && (output_conv == w_oconv || output_conv == w_oconv16)){
- unsigned short val = 0;
- if(c2 == 0){
- c2 = c1;
- c1 = 0;
- }
- val = ww16_conv(c2, c1, c0);
- c2 = (val >> 8) & 0xff;
- c1 = val & 0xff;
-#endif
} else {
ret = w2e_conv(c2, c1, c0, &c2, &c1);
}
} else {
ret = w2e_conv(c2, c1, c0, &c2, &c1);
}
return 0;
}else if((c2>>3)==27){ /* surrogate pair */
return 1;
return 0;
}else if((c2>>3)==27){ /* surrogate pair */
return 1;
-#ifdef UTF8_OUTPUT_ENABLE
- }else if (internal_unicode_f && (output_conv == w_oconv || output_conv == w_oconv16)){
-#endif
}else ret = w16e_conv(((c2<<8)&0xff00) + c1, &c2, &c1);
if (ret) return ret;
(*oconv)(c2, c1);
}else ret = w16e_conv(((c2<<8)&0xff00) + c1, &c2, &c1);
if (ret) return ret;
(*oconv)(c2, c1);
(*o_putc)(c1 | 0x080);
} else {
output_mode = UTF8;
(*o_putc)(c1 | 0x080);
} else {
output_mode = UTF8;
-#ifdef UTF8_INPUT_ENABLE
- if (internal_unicode_f && (iconv == w_iconv || iconv == w_iconv16))
- val = ((c2<<8)&0xff00) + c1;
- else
-#endif
- val = e2w_conv(c2, c1);
+ val = e2w_conv(c2, c1);
if (val){
w16w_conv(val, &c2, &c1, &c0);
(*o_putc)(c2);
if (val){
w16w_conv(val, &c2, &c1, &c0);
(*o_putc)(c2);
-#ifdef UTF8_INPUT_ENABLE
- if (internal_unicode_f && (iconv == w_iconv || iconv == w_iconv16)){
- } else
-#endif
if (c2 == ISO8859_1) {
c2 = 0;
c1 |= 0x80;
if (c2 == ISO8859_1) {
c2 = 0;
c1 |= 0x80;
#if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
ms_ucs_map_f = UCS_MAP_ASCII;
#endif
#if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
ms_ucs_map_f = UCS_MAP_ASCII;
#endif
-#if defined(UTF8_OUTPUT_ENABLE) && defined(UTF8_INPUT_ENABLE)
- internal_unicode_f = FALSE;
-#endif
#ifdef UTF8_INPUT_ENABLE
no_cp932ext_f = FALSE;
ignore_zwnbsp_f = TRUE;
#ifdef UTF8_INPUT_ENABLE
no_cp932ext_f = FALSE;
ignore_zwnbsp_f = TRUE;