** E-Mail: furukawa@tcp-ip.or.jp
** \e$B$^$G8fO"Mm$r$*4j$$$7$^$9!#\e(B
***********************************************************************/
-/* $Id: nkf.c,v 1.87 2006/01/05 08:45:32 naruse Exp $ */
-#define NKF_VERSION "2.0.5"
-#define NKF_RELEASE_DATE "2005-12-08"
+/* $Id: nkf.c,v 1.93 2006/03/14 15:55:58 naruse Exp $ */
+#define NKF_VERSION "2.0.6"
+#define NKF_RELEASE_DATE "2006-03-14"
#include "config.h"
#define COPY_RIGHT \
- "Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa),2000 S. Kono, COW, 2002-2005 Kono, Furukawa, Naruse"
+ "Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa),2000 S. Kono, COW\n" \
+ " 2002-2006 Kono, Furukawa, Naruse, mastodon"
/*
#define X0201 2
#define ISO8859_1 8
#define NO_X0201 3
-#define X0212 16
+#define X0212 0x2844
+#define X0213_1 0x2850
+#define X0213_2 0x2850
/* Input Assumption */
#if defined(UTF8_OUTPUT_ENABLE) || defined(UTF8_INPUT_ENABLE)
-#define sizeof_euc_utf8 94
#define sizeof_euc_to_utf8_1byte 94
#define sizeof_euc_to_utf8_2bytes 94
#define sizeof_utf8_to_euc_C2 64
STATIC int s_iconv PROTO((int c2,int c1,int c0));
STATIC int s2e_conv PROTO((int c2, int c1, int *p2, int *p1));
STATIC int e_iconv PROTO((int c2,int c1,int c0));
+#if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
+/* UCS Mapping
+ * 0: Shift_JIS, eucJP-ascii
+ * 1: eucJP-ms
+ * 2: CP932, CP51932
+ */
+#define UCS_MAP_ASCII 0
+#define UCS_MAP_MS 1
+#define UCS_MAP_CP932 2
+STATIC int ms_ucs_map_f = UCS_MAP_ASCII;
+#endif
#ifdef UTF8_INPUT_ENABLE
-/* don't convert characters when the mapping is not defined in the standard */
-STATIC int strict_mapping_f = TRUE;
-/* disable NEC special, NEC-selected IBM extended and IBM extended characters */
-STATIC int disable_cp932ext_f = FALSE;
+/* no NEC special, NEC-selected IBM extended and IBM extended characters */
+STATIC int no_cp932ext_f = FALSE;
/* ignore ZERO WIDTH NO-BREAK SPACE */
STATIC int ignore_zwnbsp_f = TRUE;
-/* don't convert characters that can't secure round trip convertion */
-STATIC int unicode_round_trip_f = FALSE;
+STATIC int no_best_fit_chars_f = FALSE;
+STATIC int unicode_subchar = '?'; /* the regular substitution character */
STATIC void encode_fallback_html PROTO((int c));
STATIC void encode_fallback_xml PROTO((int c));
STATIC void encode_fallback_java PROTO((int c));
STATIC int ww16_conv PROTO((int c2, int c1, int c0));
STATIC int w16e_conv PROTO((unsigned short val,int *p2,int *p1));
#endif
+#if defined(UTF8_OUTPUT_ENABLE) && defined(UTF8_INPUT_ENABLE)
+STATIC int internal_unicode_f = FALSE; /* Internal Unicode Processing */
+#endif
#ifdef UTF8_OUTPUT_ENABLE
+STATIC int unicode_bom_f= 0; /* Output Unicode BOM */
+STATIC int w_oconv16_LE = 0; /* utf-16 little endian */
STATIC int e2w_conv PROTO((int c2,int c1));
STATIC void w_oconv PROTO((int c2,int c1));
STATIC void w_oconv16 PROTO((int c2,int c1));
STATIC int x0201_f = NO_X0201; /* Assume NO JISX0201 */
#endif
STATIC int iso2022jp_f = FALSE; /* convert ISO-2022-JP */
-#if defined(UTF8_OUTPUT_ENABLE) && defined(UTF8_INPUT_ENABLE)
-STATIC int internal_unicode_f = FALSE; /* Internal Unicode Processing */
-#endif
-#ifdef UTF8_OUTPUT_ENABLE
-STATIC int unicode_bom_f= 0; /* Output Unicode BOM */
-STATIC int w_oconv16_LE = 0; /* utf-16 little endian */
-STATIC int ms_ucs_map_f = FALSE; /* Microsoft UCS Mapping Compatible */
-STATIC int unicode_subchar = '?'; /* the regular substitution character */
-#endif
#ifdef UNICODE_NORMALIZATION
STATIC int nfc_f = FALSE;
#endif
#ifdef SHIFTJIS_CP932
-/* invert IBM extended characters to others
- and controls some UCS mapping for Microsoft Code Page */
+/* invert IBM extended characters to others */
STATIC int cp51932_f = TRUE;
#define CP932_TABLE_BEGIN (0xfa)
#define CP932_TABLE_END (0xfc)
STATIC int x0212_shift PROTO((int c));
STATIC int x0212_unshift PROTO((int c));
#endif
+STATIC int x0213_f = FALSE;
STATIC unsigned char prefix_table[256];
#define CRLF 1
-STATIC int file_out = FALSE;
+STATIC int file_out_f = FALSE;
#ifdef OVERWRITE
-STATIC int overwrite = FALSE;
+STATIC int overwrite_f = FALSE;
+STATIC int preserve_time_f = FALSE;
+STATIC int backup_f = FALSE;
+STATIC char *backup_suffix = "";
+STATIC char *get_backup_filename PROTO((const char *suffix, const char *filename));
#endif
STATIC int crmode_f = 0; /* CR, NL, CRLF */
#endif
/* reopen file for stdout */
- if (file_out == TRUE) {
+ if (file_out_f == TRUE) {
#ifdef OVERWRITE
- if (overwrite){
+ if (overwrite_f){
outfname = malloc(strlen(origfname)
+ strlen(".nkftmpXXXXXX")
+ 1);
}
strcat(outfname, "ntXXXXXX");
mktemp(outfname);
- fd = open(outfname, O_WRONLY | O_CREAT | O_TRUNC,
+ fd = open(outfname, O_WRONLY | O_CREAT | O_TRUNC | O_EXCL,
S_IREAD | S_IWRITE);
#else
strcat(outfname, ".nkftmpXXXXXX");
}
fclose(fin);
#ifdef OVERWRITE
- if (overwrite) {
+ if (overwrite_f) {
struct stat sb;
#if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__)
time_t tb[2];
}
/* \e$B%?%$%`%9%?%s%W$rI|85\e(B */
+ if(preserve_time_f){
#if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__)
- tb[0] = tb[1] = sb.st_mtime;
- if (utime(outfname, tb)) {
- fprintf(stderr, "Can't set timestamp %s\n", outfname);
- }
+ tb[0] = tb[1] = sb.st_mtime;
+ if (utime(outfname, tb)) {
+ fprintf(stderr, "Can't set timestamp %s\n", outfname);
+ }
#else
- tb.actime = sb.st_atime;
- tb.modtime = sb.st_mtime;
- if (utime(outfname, &tb)) {
- fprintf(stderr, "Can't set timestamp %s\n", outfname);
- }
+ tb.actime = sb.st_atime;
+ tb.modtime = sb.st_mtime;
+ if (utime(outfname, &tb)) {
+ fprintf(stderr, "Can't set timestamp %s\n", outfname);
+ }
#endif
+ }
+ if(backup_f){
+ char *backup_filename = get_backup_filename(backup_suffix, origfname);
#ifdef MSDOS
- if (unlink(origfname)){
- perror(origfname);
- }
+ unlink(backup_filename);
#endif
+ if (rename(origfname, backup_filename)) {
+ perror(backup_filename);
+ fprintf(stderr, "Can't rename %s to %s\n",
+ origfname, backup_filename);
+ }
+ }else{
+#ifdef MSDOS
+ if (unlink(origfname)){
+ perror(origfname);
+ }
+#endif
+ }
if (rename(outfname, origfname)) {
perror(origfname);
fprintf(stderr, "Can't rename %s to %s\n",
}
}
#ifdef EASYWIN /*Easy Win */
- if (file_out == FALSE)
+ if (file_out_f == FALSE)
scanf("%d",&end_check);
else
fclose(stdout);
#else /* for Other OS */
- if (file_out == TRUE)
+ if (file_out_f == TRUE)
fclose(stdout);
#endif /*Easy Win */
return (0);
}
#endif /* WIN32DLL */
+#ifdef OVERWRITE
+char *get_backup_filename(suffix, filename)
+ const char *suffix;
+ const char *filename;
+{
+ char *backup_filename = NULL;
+ int asterisk_count = 0;
+ int i, j;
+ int filename_length = strlen(filename);
+
+ for(i = 0; suffix[i]; i++){
+ if(suffix[i] == '*') asterisk_count++;
+ }
+
+ if(asterisk_count){
+ backup_filename = malloc(strlen(suffix) + (asterisk_count * (filename_length - 1)) + 1);
+ if (!backup_filename){
+ perror("Can't malloc backup filename.");
+ return NULL;
+ }
+
+ for(i = 0, j = 0; suffix[i];){
+ if(suffix[i] == '*'){
+ backup_filename[j] = '\0';
+ strncat(backup_filename, filename, filename_length);
+ i++;
+ j += filename_length;
+ }else{
+ backup_filename[j++] = suffix[i++];
+ }
+ }
+ backup_filename[j] = '\0';
+ }else{
+ j = strlen(suffix) + filename_length;
+ backup_filename = malloc( + 1);
+ strcpy(backup_filename, filename);
+ strcat(backup_filename, suffix);
+ backup_filename[j] = '\0';
+ }
+ return backup_filename;
+}
+#endif
+
STATIC const
struct {
const char *name;
#ifdef UTF8_INPUT_ENABLE
{"utf8-input", "W"},
{"utf16-input", "W16"},
- {"disable-cp932ext", ""},
- {"strict-mapping", ""},
- {"enable-round-trip",""},
+ {"no-cp932ext", ""},
+ {"no-best-fit-chars",""},
#endif
#ifdef UNICODE_NORMALIZATION
{"utf8mac-input", ""},
#endif
#ifdef OVERWRITE
{"overwrite", ""},
+ {"overwrite=", ""},
+ {"in-place", ""},
+ {"in-place=", ""},
#endif
#ifdef INPUT_OPTION
{"cap-input", ""},
}else if(strcmp(codeset, "SHIFT_JIS") == 0){
input_f = SJIS_INPUT;
if (x0201_f==NO_X0201) x0201_f=TRUE;
- }else if(strcmp(codeset, "CP932") == 0){
+ }else if(strcmp(codeset, "WINDOWS-31J") == 0 ||
+ strcmp(codeset, "CSWINDOWS31J") == 0 ||
+ strcmp(codeset, "CP932") == 0 ||
+ strcmp(codeset, "MS932") == 0){
input_f = SJIS_INPUT;
x0201_f = FALSE;
#ifdef SHIFTJIS_CP932
cp51932_f = TRUE;
- cp932inv_f = TRUE;
#endif
#ifdef UTF8_OUTPUT_ENABLE
- ms_ucs_map_f = TRUE;
+ ms_ucs_map_f = UCS_MAP_CP932;
#endif
}else if(strcmp(codeset, "EUCJP") == 0 ||
strcmp(codeset, "EUC-JP") == 0){
x0201_f = FALSE;
#ifdef SHIFTJIS_CP932
cp51932_f = TRUE;
- cp932inv_f = TRUE;
#endif
#ifdef UTF8_OUTPUT_ENABLE
- ms_ucs_map_f = TRUE;
+ ms_ucs_map_f = UCS_MAP_CP932;
#endif
}else if(strcmp(codeset, "EUC-JP-MS") == 0 ||
- strcmp(codeset, "EUCJP-MS") == 0){
+ strcmp(codeset, "EUCJP-MS") == 0 ||
+ strcmp(codeset, "EUCJPMS") == 0){
input_f = JIS_INPUT;
x0201_f = FALSE;
#ifdef SHIFTJIS_CP932
cp51932_f = FALSE;
- cp932inv_f = TRUE;
#endif
#ifdef UTF8_OUTPUT_ENABLE
- ms_ucs_map_f = TRUE;
+ ms_ucs_map_f = UCS_MAP_MS;
#endif
}else if(strcmp(codeset, "EUC-JP-ASCII") == 0 ||
strcmp(codeset, "EUCJP-ASCII") == 0){
x0201_f = FALSE;
#ifdef SHIFTJIS_CP932
cp51932_f = FALSE;
- cp932inv_f = TRUE;
#endif
#ifdef UTF8_OUTPUT_ENABLE
- ms_ucs_map_f = FALSE;
+ ms_ucs_map_f = UCS_MAP_ASCII;
+#endif
+ }else if(strcmp(codeset, "SHIFT_JISX0213") == 0){
+ input_f = SJIS_INPUT;
+ x0213_f = TRUE;
+#ifdef SHIFTJIS_CP932
+ cp51932_f = FALSE;
+ cp932inv_f = FALSE;
+#endif
+ if (x0201_f==NO_X0201) x0201_f=TRUE;
+ }else if(strcmp(codeset, "EUC-JISX0213") == 0){
+ input_f = JIS_INPUT;
+ x0201_f = FALSE;
+ x0213_f = TRUE;
+#ifdef SHIFTJIS_CP932
+ cp51932_f = FALSE;
+ cp932inv_f = FALSE;
#endif
#ifdef UTF8_INPUT_ENABLE
}else if(strcmp(codeset, "UTF-8") == 0 ||
output_conv = j_oconv;
}else if(strcmp(codeset, "SHIFT_JIS") == 0){
output_conv = s_oconv;
- }else if(strcmp(codeset, "CP932") == 0){
+ }else if(strcmp(codeset, "WINDOWS-31J") == 0 ||
+ strcmp(codeset, "CSWINDOWS31J") == 0 ||
+ strcmp(codeset, "CP932") == 0 ||
+ strcmp(codeset, "MS932") == 0){
output_conv = s_oconv;
x0201_f = FALSE;
#ifdef SHIFTJIS_CP932
- cp51932_f = TRUE;
- cp932inv_f = TRUE;
+ cp51932_f = TRUE;
+ cp932inv_f = TRUE;
#endif
#ifdef UTF8_OUTPUT_ENABLE
- ms_ucs_map_f = TRUE;
+ ms_ucs_map_f = UCS_MAP_CP932;
#endif
}else if(strcmp(codeset, "EUCJP") == 0 ||
strcmp(codeset, "EUC-JP") == 0){
output_conv = e_oconv;
x0201_f = FALSE;
#ifdef SHIFTJIS_CP932
- cp51932_f = TRUE;
- cp932inv_f = TRUE;
+ cp51932_f = TRUE;
#endif
#ifdef UTF8_OUTPUT_ENABLE
- ms_ucs_map_f = TRUE;
+ ms_ucs_map_f = UCS_MAP_CP932;
#endif
}else if(strcmp(codeset, "EUC-JP-MS") == 0 ||
- strcmp(codeset, "EUCJP-MS") == 0){
+ strcmp(codeset, "EUCJP-MS") == 0 ||
+ strcmp(codeset, "EUCJPMS") == 0){
output_conv = e_oconv;
x0201_f = FALSE;
+#ifdef X0212_ENABLE
x0212_f = TRUE;
+#endif
#ifdef SHIFTJIS_CP932
- cp51932_f = FALSE;
+ cp51932_f = FALSE;
#endif
#ifdef UTF8_OUTPUT_ENABLE
- ms_ucs_map_f = TRUE;
+ ms_ucs_map_f = UCS_MAP_MS;
#endif
}else if(strcmp(codeset, "EUC-JP-ASCII") == 0 ||
strcmp(codeset, "EUCJP-ASCII") == 0){
output_conv = e_oconv;
x0201_f = FALSE;
+#ifdef X0212_ENABLE
x0212_f = TRUE;
+#endif
#ifdef SHIFTJIS_CP932
- cp51932_f = FALSE;
+ cp51932_f = FALSE;
#endif
#ifdef UTF8_OUTPUT_ENABLE
- ms_ucs_map_f = FALSE;
+ ms_ucs_map_f = UCS_MAP_ASCII;
+#endif
+ }else if(strcmp(codeset, "SHIFT_JISX0213") == 0){
+ output_conv = s_oconv;
+ x0213_f = TRUE;
+#ifdef SHIFTJIS_CP932
+ cp932inv_f = FALSE;
+#endif
+ }else if(strcmp(codeset, "EUC-JISX0213") == 0){
+ output_conv = e_oconv;
+#ifdef X0212_ENABLE
+ x0212_f = TRUE;
+#endif
+ x0213_f = TRUE;
+#ifdef SHIFTJIS_CP932
+ cp51932_f = FALSE;
#endif
#ifdef UTF8_OUTPUT_ENABLE
}else if(strcmp(codeset, "UTF-8") == 0){
}
#ifdef OVERWRITE
if (strcmp(long_option[i].name, "overwrite") == 0){
- file_out = TRUE;
- overwrite = TRUE;
+ file_out_f = TRUE;
+ overwrite_f = TRUE;
+ preserve_time_f = TRUE;
continue;
}
+ if (strcmp(long_option[i].name, "overwrite=") == 0){
+ file_out_f = TRUE;
+ overwrite_f = TRUE;
+ preserve_time_f = TRUE;
+ backup_f = TRUE;
+ backup_suffix = malloc(strlen(p) + 1);
+ strcpy(backup_suffix, p);
+ continue;
+ }
+ if (strcmp(long_option[i].name, "in-place") == 0){
+ file_out_f = TRUE;
+ overwrite_f = TRUE;
+ preserve_time_f = FALSE;
+ continue;
+ }
+ if (strcmp(long_option[i].name, "in-place=") == 0){
+ file_out_f = TRUE;
+ overwrite_f = TRUE;
+ preserve_time_f = FALSE;
+ backup_f = TRUE;
+ backup_suffix = malloc(strlen(p) + 1);
+ strcpy(backup_suffix, p);
+ continue;
+ }
#endif
#ifdef INPUT_OPTION
if (strcmp(long_option[i].name, "cap-input") == 0){
cp932inv_f = TRUE;
#endif
#ifdef UTF8_OUTPUT_ENABLE
- ms_ucs_map_f = TRUE;
+ ms_ucs_map_f = UCS_MAP_CP932;
#endif
continue;
}
cp932inv_f = FALSE;
#endif
#ifdef UTF8_OUTPUT_ENABLE
- ms_ucs_map_f = FALSE;
+ ms_ucs_map_f = UCS_MAP_ASCII;
#endif
continue;
}
internal_unicode_f = TRUE;
continue;
}
- if (strcmp(long_option[i].name, "disable-cp932ext") == 0){
- disable_cp932ext_f = TRUE;
+ if (strcmp(long_option[i].name, "no-cp932ext") == 0){
+ no_cp932ext_f = TRUE;
continue;
}
- if (strcmp(long_option[i].name, "enable-round-trip") == 0){
- unicode_round_trip_f = TRUE;
+ if (strcmp(long_option[i].name, "no-best-fit-chars") == 0){
+ no_best_fit_chars_f = TRUE;
continue;
}
if (strcmp(long_option[i].name, "fb-skip") == 0){
#endif
#ifdef UTF8_OUTPUT_ENABLE
if (strcmp(long_option[i].name, "ms-ucs-map") == 0){
- ms_ucs_map_f = TRUE;
+ ms_ucs_map_f = UCS_MAP_MS;
continue;
}
#endif
continue;
#ifndef PERL_XS
case 'O':/* for Output file */
- file_out = TRUE;
+ file_out_f = TRUE;
continue;
#endif
case 'c':/* add cr code */
#endif /* SHIFTJIS_CP932 */
#ifndef X0212_ENABLE
status_disable(ptr);
- break;
#endif
+ break;
}
}
if(input_f == SJIS_INPUT
#ifdef UTF8_INPUT_ENABLE
- || input_f == UTF8_INPUT || input_f == UTF16BE_INPUT
+ || input_f == UTF8_INPUT || input_f == UTF16BE_INPUT || input_f == UTF16LE_INPUT
#endif
){
is_8bit = TRUE;
#define LAST break /* end of loop, go closing */
while ((c1 = (*i_getc)(f)) != EOF) {
- code_status(c1);
+#ifdef INPUT_CODE_FIX
+ if (!input_f)
+#endif
+ code_status(c1);
if (c2) {
/* second byte */
if (c2 > DEL) {
return ((hold_count >= HOLD_SIZE*2) ? EOF : hold_count);
}
+const int shift_jisx0213_s1a3_table[5][2] ={ { 1, 8}, { 3, 4}, { 5,12}, {13,14}, {15, 0} };
+
int s2e_conv(c2, c1, p2, p1)
int c2, c1;
int *p2, *p1;
}
#endif /* SHIFTJIS_CP932 */
#ifdef X0212_ENABLE
- if (x0212_f && 0xfa <= c2 && c2 <= 0xfc){
+ if (!x0213_f && x0212_f && 0xfa <= c2 && c2 <= 0xfc){
extern const unsigned short shiftjis_x0212[3][189];
val = shiftjis_x0212[c2 - 0xfa][c1 - 0x40];
if (val){
}
#endif
if(c2 >= 0x80){
- c2 = c2 + c2 - ((c2 <= 0x9f) ? SJ0162 : SJ6394);
- if (c1 < 0x9f)
- c1 = c1 - ((c1 > DEL) ? SPACE : 0x1f);
+ if(x0213_f && c2 >= 0xF0){
+ if(c2 <= 0xF3 || (c2 == 0xF4 && c1 < 0x9F)){ /* k=1, 3<=k<=5, k=8, 12<=k<=15 */
+ c2 = 0x8F20 + shift_jisx0213_s1a3_table[c2 - 0xF0][0x9E < c1];
+ }else{ /* 78<=k<=94 */
+ c2 = 0x8F00 | (c2 * 2 - 0x17B);
+ if (0x9E < c1) c2++;
+ }
+ }else{
+ c2 = c2 + c2 - ((c2 <= 0x9F) ? SJ0162 : SJ6394);
+ if (0x9E < c1) c2++;
+ }
+ if (c1 < 0x9F)
+ c1 = c1 - ((c1 > DEL) ? SPACE : 0x1F);
else {
- c1 = c1 - 0x7e;
- c2++;
+ c1 = c1 - 0x7E;
}
}
}else return 0;
}
if (c2 == 0 || c2 == EOF){
-#if defined(UTF8_OUTPUT_ENABLE) && defined(UTF8_INPUT_ENABLE)
+#ifdef UTF8_OUTPUT_ENABLE
} else if (internal_unicode_f && (output_conv == w_oconv || output_conv == w_oconv16)){
unsigned short val = 0;
if(c2 == 0){
}
return ret;
}
+#endif
+#if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
void
w16w_conv(val, p2, p1, p0)
unsigned short val;
*p0 = 0x80 | (val & 0x3f);
}
}
+#endif
+#ifdef UTF8_INPUT_ENABLE
int
ww16_conv(c2, c1, c0)
int c2, c1, c0;
*p2 = 0;
*p1 = val;
}else{
- if(!ms_ucs_map_f){
- /* eucJP-ascii */
- switch(val){
- case 0x203E:
- *p2 = 0x21;
- *p1 = 0x31;
- return ret;
- break;
- case 0xFF5E:
- *p2 = 0x8F22;
- *p1 = 0x37;
- return ret;
- break;
- }
- }
w16w_conv(val, &c2, &c1, &c0);
ret = unicode_to_jis_common(c2, c1, c0, p2, p1);
#ifdef NUMCHAR_OPTION
}
return ret;
}
+#endif
+#ifdef UTF8_INPUT_ENABLE
int
w_iconv16(c2, c1, c0)
int c2, c1,c0;
int *p2, *p1;
{
extern const unsigned short *const utf8_to_euc_2bytes[];
+ extern const unsigned short *const utf8_to_euc_2bytes_ms[];
+ extern const unsigned short *const utf8_to_euc_2bytes_932[];
extern const unsigned short *const *const utf8_to_euc_3bytes[];
+ extern const unsigned short *const *const utf8_to_euc_3bytes_ms[];
+ extern const unsigned short *const *const utf8_to_euc_3bytes_932[];
+ const unsigned short *const *pp;
+ const unsigned short *const *const *ppp;
+ STATIC const int no_best_fit_chars_table_C2[] =
+ {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1,
+ 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0};
+ STATIC const int no_best_fit_chars_table_C2_ascii[] =
+ {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0,
+ 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0};
+ STATIC const int no_best_fit_chars_table_932_C2[] =
+ {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1,
+ 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0};
+ STATIC const int no_best_fit_chars_table_932_C3[] =
+ {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1};
int ret = 0;
- if(c2 < 0xe0){
- if (ms_ucs_map_f && cp51932_f){
- /* CP932/CP51932: U+00A6 (BROKEN BAR) -> not 0x8fa2c3, but 0x7c */
- if(c2 == 0xC2){
- switch(c1){
- case 0xA5:
- if (p2) *p2 = 0;
- if (p1) *p1 = 0x5C;
- return 0;
- case 0xA6:
- if (p2) *p2 = 0;
- if (p1) *p1 = 0x7C;
- return 0;
- }
- }
- }else if(strict_mapping_f){
- switch(c2){
- case 0xC2:
- switch(c1){
- case 0xAB: case 0xAD: case 0xB2: case 0xB3:
- case 0xB5: case 0xB7: case 0xB9: case 0xBB:
- return 1;
- }
- break;
- case 0xC3:
- switch(c1){
- case 0x90:
- return 1;
- }
- break;
- }
- }
- ret = w_iconv_common(c2, c1, utf8_to_euc_2bytes, sizeof_utf8_to_euc_2bytes, p2, p1);
- if(!ret && !ms_ucs_map_f && !x0212_f){
- if(*p2 == 0 && *p1 < 0x80){
- return 1;
- }else if(*p2 > 0xFF){
- int s2, s1;
- if (e2s_conv(*p2, *p1, &s2, &s1) == 0){
- s2e_conv(s2, s1, p2, p1);
- if(*p2 == 0 && *p1 < 0x80)
- return 1;
- }else return 1;
- }
- }
- }else if(c0){
- if(unicode_round_trip_f){
- switch(c2){
- case 0xE2:
- switch(c1){
- case 0x80:
- if(c0 == 0x95) return 1;
+ if(c2 < 0x80){
+ *p2 = 0;
+ *p1 = c2;
+ }else if(c2 < 0xe0){
+ if(no_best_fit_chars_f){
+ if(ms_ucs_map_f == UCS_MAP_CP932){
+ switch(c2){
+ case 0xC2:
+ if(no_best_fit_chars_table_932_C2[c1&0x3F]) return 1;
break;
- case 0x88:
- if(c0 == 0xA5) return 1;
+ case 0xC3:
+ if(no_best_fit_chars_table_932_C3[c1&0x3F]) return 1;
break;
}
- break;
- case 0xEF:
- switch(c1){
- case 0xBB:
- if(c0 == 0xBF) return 1;
- break;
- case 0xBC:
- if(c0 == 0x8D) return 1;
- break;
- case 0xBF:
- if(0xA0 <= c0 && c0 <= 0xA5) return 1;
- break;
- }
- break;
- }
- }
- if(!ms_ucs_map_f){
- /* eucJP-ascii */
- if(c2 == 0xE2 && c1 == 0x80 && c0 == 0xBE){
- if (p2) *p2 = 0x21;
- if (p1) *p1 = 0x31;
- return ret;
- }else if(c2 == 0xEF && c1 == 0xBD && c0 == 0x9E){
- if (p2) *p2 = 0x8F22;
- if (p1) *p1 = 0x37;
- return ret;
+ }else if(cp51932_f){
+ if(c2 == 0xC2 && no_best_fit_chars_table_C2[c1&0x3F]) return 1;
+ }else{
+ if(c2 == 0xC2 && no_best_fit_chars_table_C2_ascii[c1&0x3F]) return 1;
}
}
- if(!strict_mapping_f);
- else if(ms_ucs_map_f && cp51932_f){
- /* Microsoft Code Page */
- switch(c2){
- case 0xE2:
- switch(c1){
- case 0x80:
- switch(c0){
- case 0x94: case 0x96: case 0xBE:
- return 1;
+ pp =
+ ms_ucs_map_f == UCS_MAP_CP932 ? utf8_to_euc_2bytes_932 :
+ ms_ucs_map_f == UCS_MAP_MS ? utf8_to_euc_2bytes_ms :
+ utf8_to_euc_2bytes;
+ ret = w_iconv_common(c2, c1, pp, sizeof_utf8_to_euc_2bytes, p2, p1);
+ }else if(c0){
+ if(no_best_fit_chars_f){
+ if(ms_ucs_map_f == UCS_MAP_CP932){
+ if(c2 == 0xE3 && c1 == 0x82 && c0 == 0x94) return 1;
+ }else if(ms_ucs_map_f == UCS_MAP_MS){
+ switch(c2){
+ case 0xE2:
+ switch(c1){
+ case 0x80:
+ if(c0 == 0x94 || c0 == 0x96 || c0 == 0xBE) return 1;
+ break;
+ case 0x88:
+ if(c0 == 0x92) return 1;
+ break;
}
break;
- case 0x88:
- if(c0 == 0x92)
- return 1;
+ case 0xE3:
+ if(c1 == 0x80 || c0 == 0x9C) return 1;
break;
}
- break;
- case 0xE3:
- switch(c1){
- case 0x80:
- if(c0 == 0x9C)
- return 1;
+ }else{
+ switch(c2){
+ case 0xE2:
+ switch(c1){
+ case 0x80:
+ if(c0 == 0x95) return 1;
+ break;
+ case 0x88:
+ if(c0 == 0xA5) return 1;
+ break;
+ }
+ break;
+ case 0xEF:
+ switch(c1){
+ case 0xBC:
+ if(c0 == 0x8D) return 1;
+ break;
+ case 0xBD:
+ if(c0 == 0x9E && cp51932_f) return 1;
+ break;
+ case 0xBF:
+ if(0xA0 <= c0 && c0 <= 0xA5) return 1;
+ break;
+ }
break;
}
- break;
}
- }else{
- /* eucJP-open */
- if(c2 == 0xE3 && c1 == 0x82 && c0 == 0x94)
- return 1;
}
- ret = w_iconv_common(c1, c0, utf8_to_euc_3bytes[c2 - 0xE0], sizeof_utf8_to_euc_C2, p2, p1);
+ ppp =
+ ms_ucs_map_f == UCS_MAP_CP932 ? utf8_to_euc_3bytes_932 :
+ ms_ucs_map_f == UCS_MAP_MS ? utf8_to_euc_3bytes_ms :
+ utf8_to_euc_3bytes;
+ ret = w_iconv_common(c1, c0, ppp[c2 - 0xE0], sizeof_utf8_to_euc_C2, p2, p1);
}else return -1;
return ret;
}
if (p == 0) return 1;
c0 -= 0x80;
- if (c0 < 0 || sizeof_utf8_to_euc_E5B8 <= c0) return 1;
+ if (c0 < 0 || sizeof_utf8_to_euc_C2 <= c0) return 1;
val = p[c0];
if (val == 0) return 1;
- if (disable_cp932ext_f && (
- (val>>8) == 0x2D || /* disable NEC special characters */
- val > 0xF300 /* disable NEC special characters */
+ if (no_cp932ext_f && (
+ (val>>8) == 0x2D || /* NEC special characters */
+ val > 0xF300 /* NEC special characters */
)) return 1;
c2 = val >> 8;
return 0;
}
-#endif
-
-#ifdef UTF8_OUTPUT_ENABLE
void
nkf_each_char_to_hex(f, c)
void (*f)PROTO((int c2,int c1));
}
return;
}
+#endif
+#ifdef UTF8_OUTPUT_ENABLE
int
e2w_conv(c2, c1)
int c2, c1;
p = euc_to_utf8_1byte;
#ifdef X0212_ENABLE
} else if (c2 >> 8 == 0x8f){
- if(!ms_ucs_map_f && c2 == 0x8F22 && c1 == 0x43){
+ if(ms_ucs_map_f == UCS_MAP_ASCII&& c2 == 0x8F22 && c1 == 0x43){
return 0xA6;
}
extern const unsigned short *const x0212_to_utf8_2bytes[];
c2 &= 0x7f;
c2 = (c2&0x7f) - 0x21;
if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
- p = ms_ucs_map_f ? euc_to_utf8_2bytes_ms[c2] : euc_to_utf8_2bytes[c2];
+ p = ms_ucs_map_f != UCS_MAP_ASCII ? euc_to_utf8_2bytes_ms[c2] : euc_to_utf8_2bytes[c2];
else
return 0;
}
(*o_putc)(c1 | 0x080);
} else {
output_mode = UTF8;
+#ifdef UTF8_INPUT_ENABLE
if (internal_unicode_f && (iconv == w_iconv || iconv == w_iconv16))
val = ((c2<<8)&0xff00) + c1;
- else val = e2w_conv(c2, c1);
+ else
+#endif
+ val = e2w_conv(c2, c1);
if (val){
w16w_conv(val, &c2, &c1, &c0);
(*o_putc)(c2);
unicode_bom_f=1;
}
+#ifdef UTF8_INPUT_ENABLE
if (internal_unicode_f && (iconv == w_iconv || iconv == w_iconv16)){
- } else if (c2 == ISO8859_1) {
+ } else
+#endif
+ if (c2 == ISO8859_1) {
c2 = 0;
c1 |= 0x80;
#ifdef NUMCHAR_OPTION
e2s_conv(c2, c1, p2, p1)
int c2, c1, *p2, *p1;
{
-#ifdef X0212_ENABLE
- int val = 0;
- const unsigned short *ptr;
int ndx;
- extern const unsigned short *const x0212_shiftjis[];
if ((c2 & 0xff00) == 0x8f00){
- ndx = c2 & 0x7f;
- if (0x21 <= ndx && ndx <= 0x7e){
- ptr = x0212_shiftjis[ndx - 0x21];
- if (ptr){
- val = ptr[(c1 & 0x7f) - 0x21];
- }
- if (val){
- c2 = val >> 8;
- c1 = val & 0xff;
- if (p2) *p2 = c2;
- if (p1) *p1 = c1;
- return 0;
- }
- }
- c2 = x0212_shift(c2);
- }
+ ndx = c2 & 0xff;
+ if (x0213_f){
+ if((0x21 <= ndx && ndx <= 0x2F)){
+ if (p2) *p2 = ((ndx - 1) >> 1) + 0xec - ndx / 8 * 3;
+ if (p1) *p1 = c1 + ((ndx & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
+ return 0;
+ }else if(0x6E <= ndx && ndx <= 0x7E){
+ if (p2) *p2 = ((ndx - 1) >> 1) + 0xbe;
+ if (p1) *p1 = c1 + ((ndx & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
+ return 0;
+ }
+ return 1;
+ }
+#ifdef X0212_ENABLE
+ else if(0x21 <= ndx && ndx <= 0x7e){
+ int val = 0;
+ const unsigned short *ptr;
+ extern const unsigned short *const x0212_shiftjis[];
+ ndx = c2 & 0x7f;
+ ptr = x0212_shiftjis[ndx - 0x21];
+ if (ptr){
+ val = ptr[(c1 & 0x7f) - 0x21];
+ }
+ if (val){
+ c2 = val >> 8;
+ c1 = val & 0xff;
+ if (p2) *p2 = c2;
+ if (p1) *p1 = c1;
+ return 0;
+ }
+ c2 = x0212_shift(c2);
+ }
#endif /* X0212_ENABLE */
- if ((c2 & 0xff00) == 0x8f00){
- return 1;
}
+ if(0x7F < c2) return 1;
if (p2) *p2 = ((c2 - 1) >> 1) + ((c2 <= 0x5e) ? 0x71 : 0xb1);
if (p1) *p1 = c1 + ((c2 & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
return 0;
(*o_putc)(EOF);
#ifdef X0212_ENABLE
} else if ((c2 & 0xff00) >> 8 == 0x8f){
- if (output_mode!=X0212) {
- output_mode = X0212;
- (*o_putc)(ESC);
- (*o_putc)('$');
- (*o_putc)('(');
- (*o_putc)('D');
+ if(x0213_f){
+ if(output_mode!=X0213_2){
+ output_mode = X0213_2;
+ }
+ }else{
+ if(output_mode!=X0212){
+ output_mode = X0212;
+ }
}
+ (*o_putc)(ESC);
+ (*o_putc)('$');
+ (*o_putc)('(');
+ (*o_putc)(output_mode & 0x7F);
(*o_putc)(c2 & 0x7f);
(*o_putc)(c1);
#endif
}
(*o_putc)(c1);
} else {
- if (output_mode != X0208) {
+ if(x0213_f){
+ if (output_mode!=X0213_1) {
+ output_mode = X0213_1;
+ (*o_putc)(ESC);
+ (*o_putc)('$');
+ (*o_putc)('(');
+ (*o_putc)(output_mode & 0x7F);
+ }
+ }else if (output_mode != X0208) {
output_mode = X0208;
(*o_putc)(ESC);
(*o_putc)('$');
(const unsigned char *)"\075?ISO-8859-1?B?",
(const unsigned char *)"\075?ISO-2022-JP?B?",
(const unsigned char *)"\075?ISO-2022-JP?Q?",
-#if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
+#if defined(UTF8_INPUT_ENABLE)
(const unsigned char *)"\075?UTF-8?B?",
(const unsigned char *)"\075?UTF-8?Q?",
#endif
/* \e$B3:Ev$9$k%3!<%I$NM%@hEY$r>e$2$k$?$a$NL\0u\e(B */
int (*mime_priority_func[])PROTO((int c2, int c1, int c0)) = {
e_iconv, s_iconv, 0, 0, 0, 0,
-#if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
+#if defined(UTF8_INPUT_ENABLE)
w_iconv, w_iconv,
#endif
0,
const int mime_encode[] = {
JAPANESE_EUC, SHIFT_JIS,ISO8859_1, ISO8859_1, X0208, X0201,
-#if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
+#if defined(UTF8_INPUT_ENABLE)
UTF8, UTF8,
#endif
ASCII,
const int mime_encode_method[] = {
'B', 'B','Q', 'B', 'B', 'Q',
-#if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
+#if defined(UTF8_INPUT_ENABLE)
'B', 'Q',
#endif
'Q',
if (mime_decode_mode == 'Q') {
if ((c1 = (*i_mgetc)(f)) == EOF) return (EOF);
restart_mime_q:
- if (c1=='_') return ' ';
+ if (c1=='_' && mimebuf_f != FIXED_MIME) return ' ';
if (c1<=' ' || DEL<=c1) {
mime_decode_mode = exit_mode; /* prepare for quit */
return c1;
}
- if (c1!='=' && c1!='?') {
+ if (c1!='=' && (c1!='?' || mimebuf_f == FIXED_MIME)) {
return c1;
}
{
switch(mimeout_mode) {
case 'Q':
- if(c==SPACE){
- (*o_mputc)('_');
- base64_count++;
- } else if (c==CR||c==NL) {
+ if (c==CR||c==NL) {
(*o_mputc)(c);
base64_count = 0;
- } else if(c<SPACE||c=='='||c=='?'||c=='_'||DEL<=c) {
+ } else if(!nkf_isalnum(c)) {
(*o_mputc)('=');
(*o_mputc)(itoh4(((c>>4)&0xf)));
(*o_mputc)(itoh4((c&0xf)));
x0201_f = NO_X0201;
#endif
iso2022jp_f = FALSE;
+#if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
+ ms_ucs_map_f = UCS_MAP_ASCII;
+#endif
#if defined(UTF8_OUTPUT_ENABLE) && defined(UTF8_INPUT_ENABLE)
internal_unicode_f = FALSE;
#endif
-#ifdef UTF8_OUTPUT_ENABLE
- unicode_bom_f = 0;
- w_oconv16_LE = 0;
- ms_ucs_map_f = FALSE;
- strict_mapping_f = TRUE;
- disable_cp932ext_f = FALSE;
+#ifdef UTF8_INPUT_ENABLE
+ no_cp932ext_f = FALSE;
ignore_zwnbsp_f = TRUE;
- unicode_round_trip_f = FALSE;
+ no_best_fit_chars_f = FALSE;
encode_fallback = NULL;
unicode_subchar = '?';
#endif
+#ifdef UTF8_OUTPUT_ENABLE
+ unicode_bom_f = 0;
+ w_oconv16_LE = 0;
+#endif
#ifdef UNICODE_NORMALIZATION
nfc_f = FALSE;
#endif
#endif
#ifdef X0212_ENABLE
x0212_f = FALSE;
+ x0213_f = FALSE;
#endif
{
int i;
input_mode = ASCII;
shift_mode = FALSE;
mime_decode_mode = FALSE;
- file_out = FALSE;
+ file_out_f = FALSE;
crmode_f = 0;
option_mode = 0;
broken_counter = 0;
fprintf(stderr,"Flags:\n");
fprintf(stderr,"b,u Output is buffered (DEFAULT),Output is unbuffered\n");
#ifdef DEFAULT_CODE_SJIS
- fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit, Shift JIS (DEFAULT), AT&T JIS (EUC), UTF-8N\n");
+ fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit, Shift_JIS (DEFAULT), EUC-JP, UTF-8N\n");
#endif
#ifdef DEFAULT_CODE_JIS
- fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit (DEFAULT), Shift JIS, AT&T JIS (EUC), UTF-8N\n");
+ fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit (DEFAULT), Shift JIS, EUC-JP, UTF-8N\n");
#endif
#ifdef DEFAULT_CODE_EUC
- fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit, Shift JIS, AT&T JIS (EUC) (DEFAULT), UTF-8N\n");
+ fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit, Shift JIS, EUC-JP (DEFAULT), UTF-8N\n");
#endif
#ifdef DEFAULT_CODE_UTF8
- fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit, Shift JIS, AT&T JIS (EUC), UTF-8N (DEFAULT)\n");
+ fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit, Shift JIS, EUC-JP, UTF-8N (DEFAULT)\n");
#endif
#ifdef UTF8_OUTPUT_ENABLE
- fprintf(stderr," After 'w' you can add more options. (80?|16((B|L)0?)?) \n");
+ fprintf(stderr," After 'w' you can add more options. -w[ 8 [0], 16 [[BL] [0]] ]\n");
#endif
- fprintf(stderr,"J,S,E,W Input assumption is JIS 7 bit , Shift JIS, AT&T JIS (EUC), UTF-8\n");
+ fprintf(stderr,"J,S,E,W Input assumption is JIS 7 bit , Shift JIS, EUC-JP, UTF-8\n");
#ifdef UTF8_INPUT_ENABLE
- fprintf(stderr," After 'W' you can add more options. (8|16(B|L)?) \n");
+ fprintf(stderr," After 'W' you can add more options. -W[ 8, 16 [BL] ] \n");
#endif
fprintf(stderr,"t no conversion\n");
- fprintf(stderr,"i_/o_ Output sequence to designate JIS-kanji/ASCII (DEFAULT B)\n");
+ fprintf(stderr,"i[@B] Specify the Esc Seq for JIS X 0208-1978/83 (DEFAULT B)\n");
+ fprintf(stderr,"o[BJH] Specify the Esc Seq for ASCII/Roman (DEFAULT B)\n");
fprintf(stderr,"r {de/en}crypt ROT13/47\n");
fprintf(stderr,"h 1 katakana->hiragana, 2 hiragana->katakana, 3 both\n");
fprintf(stderr,"v Show this usage. V: show version\n");
fprintf(stderr,"M[BQ] MIME encode [B:base64 Q:quoted]\n");
fprintf(stderr,"l ISO8859-1 (Latin-1) support\n");
fprintf(stderr,"f/F Folding: -f60 or -f or -f60-10 (fold margin 10) F preserve nl\n");
- fprintf(stderr,"Z[0-3] Convert X0208 alphabet to ASCII 1: Kankaku to space,2: 2 spaces,\n");
- fprintf(stderr," 3: Convert HTML Entity\n");
+ fprintf(stderr,"Z[0-3] Convert X0208 alphabet to ASCII\n");
+ fprintf(stderr," 1: Kankaku to 1 space 2: to 2 spaces 3: Convert to HTML Entity\n");
fprintf(stderr,"X,x Assume X0201 kana in MS-Kanji, -x preserves X0201\n");
fprintf(stderr,"B[0-2] Broken input 0: missing ESC,1: any X on ESC-[($]-X,2: ASCII on NL\n");
#ifdef MSDOS
fprintf(stderr,"T Text mode output\n");
#endif
fprintf(stderr,"O Output to File (DEFAULT 'nkf.out')\n");
- fprintf(stderr,"d,c Delete \\r in line feed and \\032, Add \\r in line feed\n");
fprintf(stderr,"I Convert non ISO-2022-JP charactor to GETA\n");
+ fprintf(stderr,"d,c Convert line breaks -d: LF -c: CRLF\n");
fprintf(stderr,"-L[uwm] line mode u:LF w:CRLF m:CR (DEFAULT noconversion)\n");
- fprintf(stderr,"long name options\n");
- fprintf(stderr," --ic=<input codeset> --oc=<output codeset> set the input or output codeset\n");
- fprintf(stderr," --fj,--unix,--mac,--windows convert for the system\n");
- fprintf(stderr," --jis,--euc,--sjis,--utf8,--utf16,--mime,--base64 convert for the code\n");
- fprintf(stderr," --hiragana, --katakana Hiragana/Katakana Conversion\n");
- fprintf(stderr," --prefix= Insert escape before troublesome characters of Shift_JIS\n");
+ fprintf(stderr,"\n");
+ fprintf(stderr,"Long name options\n");
+ fprintf(stderr," --ic=<input codeset> --oc=<output codeset>\n");
+ fprintf(stderr," Specify the input or output codeset\n");
+ fprintf(stderr," --fj --unix --mac --windows\n");
+ fprintf(stderr," --jis --euc --sjis --utf8 --utf16 --mime --base64\n");
+ fprintf(stderr," Convert for the system or code\n");
+ fprintf(stderr," --hiragana --katakana --katakana-hiragana\n");
+ fprintf(stderr," To Hiragana/Katakana Conversion\n");
+ fprintf(stderr," --prefix= Insert escape before troublesome characters of Shift_JIS\n");
#ifdef INPUT_OPTION
fprintf(stderr," --cap-input, --url-input Convert hex after ':' or '%%'\n");
#endif
#endif
#ifdef UTF8_INPUT_ENABLE
fprintf(stderr," --fb-{skip, html, xml, perl, java, subchar}\n");
- fprintf(stderr," set the way nkf handles unassigned characters\n");
+ fprintf(stderr," Specify how nkf handles unassigned characters\n");
#endif
#ifdef OVERWRITE
- fprintf(stderr," --overwrite Overwrite original listed files by filtered result\n");
-#endif
- fprintf(stderr," -g, --guess Guess the input code\n");
- fprintf(stderr," --help,--version\n");
+ fprintf(stderr," --in-place[=SUFFIX] --overwrite[=SUFFIX]\n");
+ fprintf(stderr," Overwrite original listed files by filtered result\n");
+ fprintf(stderr," --overwrite preserves timestamp of original files\n");
+#endif
+ fprintf(stderr," -g --guess Guess the input code\n");
+ fprintf(stderr," --help --version Show this help/the version\n");
+ fprintf(stderr," For more information, see also man nkf\n");
+ fprintf(stderr,"\n");
version();
}