man/nkf.1.pm

   1 # Copyright (c) 1987, Fujitsu LTD. (Itaru ICHIKAWA).
   2 # Copyright (c) 1996-2018, The nkf Project.
   3 # All rights reserved.
   4 #
   5 # This software is provided 'as-is', without any express or implied
   6 # warranty. In no event will the authors be held liable for any damages
   7 # arising from the use of this software.
   8 #
   9 # Permission is granted to anyone to use this software for any purpose,
  10 # including commercial applications, and to alter it and redistribute it
  11 # freely, subject to the following restrictions:
  12 #
  13 # 1. The origin of this software must not be misrepresented; you must not
  14 # claim that you wrote the original software. If you use this software
  15 # in a product, an acknowledgment in the product documentation would be
  16 # appreciated but is not required.
  17 #
  18 # 2. Altered source versions must be plainly marked as such, and must not be
  19 # misrepresented as being the original software.
  20 #
  21 # 3. This notice may not be removed or altered from any source distribution.
  22
  23 package NKF;
  24
  25 use strict;
  26 use vars qw($VERSION @ISA @EXPORT @EXPORT_OK);
  27
  28 require Exporter;
  29 require DynaLoader;
  30
  31 @ISA = qw(Exporter DynaLoader);
  32 # Items to export into callers namespace by default. Note: do not export
  33 # names by default without a very good reason. Use EXPORT_OK instead.
  34 # Do not simply export all your public functions/methods/constants.
  35 @EXPORT = qw(
  36         nkf     nkf_continue    inputcode
  37 );
  38 $VERSION = '2.15';
  39
  40 bootstrap NKF $VERSION;
  41
  42 # Preloaded methods go here.
  43
  44 # Autoload methods go after =cut, and are processed by the autosplit program.
  45
  46 1;
  47 __END__
  48
  49 #
  50 #
  51
  52 =head1 NAME
  53
  54
  55
  56 nkf - Network Kanji Filter
  57
  58
  59 =head1 SYNOPSIS
  60
  61
  62
  63 nkf B<[-butjnesliohrTVvwWJESZxXFfmMBOcdILg]> B<[>I<file ...>B<]>
  64
  65
  66 =head1 DESCRIPTION
  67
  68
  69 B<Nkf> is a yet another kanji code converter among networks, hosts and terminals.
  70 It converts input kanji code to designated kanji code
  71 such as ISO-2022-JP, Shift_JIS, EUC-JP, UTF-8, UTF-16 or UTF-32.
  72
  73 One of the most unique faculty of B<nkf> is the guess of the input kanji encodings.
  74 It currently recognizes ISO-2022-JP, Shift_JIS, EUC-JP, UTF-8, UTF-16 and UTF-32.
  75 So users needn't set the input kanji code explicitly.
  76
  77 By default, X0201 kana is converted into X0208 kana.
  78 For X0201 kana, SO/SI, SSO and ESC-(-I methods are supported.
  79 For automatic code detection, nkf assumes no X0201 kana in Shift_JIS.
  80 To accept X0201 in Shift_JIS, use B<-X>, B<-x> or B<-S>.
  81
  82 multiple options are specified as separate strings, such as
  83
  84   print nkf('--ic=UTF8-MAC', '-w', $string), "\n";
  85
  86 except the last arguments.
  87
  88 =head1 OPTIONS
  89
  90 =over
  91
  92 =item B<-J -S -E -W -W16 -W32 -j -s -e -w -w16 -w32>
  93
  94 Specify input and output encodings. Upper case is input.
  95 cf. --ic and --oc.
  96
  97 =over
  98
  99 =item B<-J>
 100
 101 ISO-2022-JP (JIS code).
 102
 103 =item B<-S>
 104
 105 Shift_JIS and JIS X 0201 kana.
 106 EUC-JP is recognized as X0201 kana. Without B<-x> flag,
 107 JIS X 0201 Katakana (a.k.a.halfwidth kana) is converted into JIS X 0208.
 108 If you use Windows, see Windows-31J (CP932).
 109
 110 =item B<-E>
 111
 112 EUC-JP.
 113
 114 =item B<-W>
 115
 116 UTF-8N.
 117
 118 =item B<-W16[BL][0]>
 119
 120 UTF-16.
 121 B or L gives whether Big Endian or Little Endian.
 122 0 gives whether put BOM or not.
 123
 124 =item B<-W32[BL][0]>
 125
 126 UTF-32.
 127 B or L gives whether Big Endian or Little Endian.
 128 0 gives whether put BOM or not.
 129
 130 =back
 131
 132 =item B<-b -u>
 133
 134 Output is buffered (DEFAULT), Output is unbuffered.
 135
 136 =item B<-t>
 137
 138 No conversion.
 139
 140 =item B<-i[@B]>
 141
 142 Specify the escape sequence for JIS X 0208.
 143
 144 =over
 145
 146 =item B<-i@>
 147
 148 Use ESC ( @. (JIS X 0208-1978)
 149
 150 =item B<-iB>
 151
 152 Use ESC ( B. (JIS X 0208-1983/1990 DEFAULT)
 153
 154 =back
 155
 156 =item B<-o[BJ]>
 157
 158 Specify the escape sequence for US-ASCII/JIS X 0201 Roman. (DEFAULT B)
 159
 160 =item B<-r>
 161
 162 {de/en}crypt ROT13/47
 163
 164 =item B<-h[123] --hiragana --katakana --katakana-hiragana>
 165
 166 =over
 167
 168 =item B<-h1 --hiragana>
 169
 170 Katakana to Hiragana conversion.
 171
 172 =item B<-h2 --katakana>
 173
 174 Hiragana to Katakana conversion.
 175
 176 =item B<-h3 --katakana-hiragana>
 177
 178 Katakana to Hiragana and Hiragana to Katakana conversion.
 179
 180 =back
 181
 182 =item B<-T>
 183
 184 Text mode output (MS-DOS)
 185
 186 =item B<-f[I<m> [- I<n>]]>
 187
 188 Folding on I<m> length with I<n> margin in a line.
 189 Without this option, fold length is 60 and fold margin is 10.
 190
 191 =item B<-F>
 192
 193 New line preserving line folding.
 194
 195 =item B<-Z[0-3]>
 196
 197 Convert X0208 alphabet (Fullwidth Alphabets) to ASCII.
 198
 199 =over
 200
 201 =item B<-Z -Z0>
 202
 203 Convert X0208 alphabet to ASCII.
 204
 205 =item B<-Z1>
 206
 207 Convert X0208 kankaku to single ASCII space.
 208
 209 =item B<-Z2>
 210
 211 Convert X0208 kankaku to double ASCII spaces.
 212
 213 =item B<-Z3>
 214
 215 Replacing fullwidth >, <, ", & into '&gt;', '&lt;', '&quot;', '&amp;' as in HTML.
 216
 217 =back
 218
 219 =item B<-X -x>
 220
 221 With B<-X> or without this option, X0201 is converted into X0208 Kana.
 222 With B<-x>, try to preserve X0208 kana and do not convert X0201 kana to X0208.
 223 In JIS output, ESC-(-I is used. In EUC output, SS2 is used.
 224
 225 =item B<-B[0-2]>
 226
 227 Assume broken JIS-Kanji input, which lost ESC.
 228 Useful when your site is using old B-News Nihongo patch.
 229
 230 =over
 231
 232 =item B<-B1>
 233
 234 allows any chars after ESC-( or ESC-$.
 235
 236 =item B<-B2>
 237
 238 force ASCII after NL.
 239
 240 =back
 241
 242 =item B<-I>
 243
 244 Replacing non iso-2022-jp char into a geta character
 245 (substitute character in Japanese).
 246
 247 =item B<-m[BQN0]>
 248
 249 MIME ISO-2022-JP/ISO8859-1 decode. (DEFAULT)
 250 To see ISO8859-1 (Latin-1) -l is necessary.
 251
 252 =over
 253
 254 =item B<-mB>
 255
 256 Decode MIME base64 encoded stream. Remove header or other part before
 257 conversion.
 258
 259 =item B<-mQ>
 260
 261 Decode MIME quoted stream. '_' in quoted stream is converted to space.
 262
 263 =item B<-mN>
 264
 265 Non-strict decoding.
 266 It allows line break in the middle of the base64 encoding.
 267
 268 =item B<-m0>
 269
 270 No MIME decode.
 271
 272 =back
 273
 274 =item B<-M>
 275
 276 MIME encode. Header style. All ASCII code and control characters are intact.
 277
 278 =over
 279
 280 =item B<-MB>
 281
 282 MIME encode Base64 stream.
 283 Kanji conversion is performed before encoding, so this cannot be used as a picture encoder.
 284
 285 =item B<-MQ>
 286
 287 Perform quoted encoding.
 288
 289 =back
 290
 291 =item B<-l>
 292
 293 Input and output code is ISO8859-1 (Latin-1) and ISO-2022-JP.
 294 B<-s>, B<-e> and B<-x> are not compatible with this option.
 295
 296 =item B<-L[uwm] -d -c>
 297
 298 Convert line breaks.
 299
 300 =over
 301
 302 =item B<-Lu -d>
 303
 304 unix (LF)
 305
 306 =item B<-Lw -c>
 307
 308 windows (CRLF)
 309
 310 =item B<-Lm>
 311
 312 mac (CR)
 313
 314 Without this option, nkf doesn't convert line breaks.
 315
 316 =back
 317
 318 =item B<--fj --unix --mac --msdos --windows>
 319
 320 Convert for these systems.
 321
 322 =item B<--jis --euc --sjis --mime --base64>
 323
 324 Convert to named code.
 325
 326 =item B<--jis-input --euc-input --sjis-input --mime-input --base64-input>
 327
 328 Assume input system
 329
 330 =item B<--ic=I<input codeset> --oc=I<output codeset>>
 331
 332 Set the input or output codeset.
 333 NKF supports following codesets and those codeset names are case insensitive.
 334
 335 =over
 336
 337 =item ISO-2022-JP
 338
 339 a.k.a. RFC1468, 7bit JIS, JUNET
 340
 341 =item EUC-JP (eucJP-nkf)
 342
 343 a.k.a. AT&T JIS, Japanese EUC, UJIS
 344
 345 =item eucJP-ascii
 346
 347 =item eucJP-ms
 348
 349 =item CP51932
 350
 351 Microsoft Version of EUC-JP.
 352
 353 =item Shift_JIS
 354
 355 a.k.a. SJIS, MS_Kanji
 356
 357 =item Windows-31J
 358
 359 a.k.a. CP932
 360
 361 =item UTF-8
 362
 363 same as UTF-8N
 364
 365 =item UTF-8N
 366
 367 UTF-8 without BOM
 368
 369 =item UTF-8-BOM
 370
 371 UTF-8 with BOM
 372
 373 =item UTF8-MAC (input only)
 374
 375 decomposed UTF-8
 376
 377 =item UTF-16
 378
 379 same as UTF-16BE
 380
 381 =item UTF-16BE
 382
 383 UTF-16 Big Endian without BOM
 384
 385 =item UTF-16BE-BOM
 386
 387 UTF-16 Big Endian with BOM
 388
 389 =item UTF-16LE
 390
 391 UTF-16 Little Endian without BOM
 392
 393 =item UTF-16LE-BOM
 394
 395 UTF-16 Little Endian with BOM
 396
 397 =item UTF-32
 398
 399 same as UTF-32BE
 400
 401 =item UTF-32BE
 402
 403 UTF-32 Big Endian without BOM
 404
 405 =item UTF-32BE-BOM
 406
 407 UTF-32 Big Endian with BOM
 408
 409 =item UTF-32LE
 410
 411 UTF-32 Little Endian without BOM
 412
 413 =item UTF-32LE-BOM
 414
 415 UTF-32 Little Endian with BOM
 416
 417 =back
 418
 419 =item B<--fb-{skip, html, xml, perl, java, subchar}>
 420
 421 Specify the way that nkf handles unassigned characters.
 422 Without this option, --fb-skip is assumed.
 423
 424 =item B<--prefix=I<escape character>I<target character>..>
 425
 426 When nkf converts to Shift_JIS,
 427 nkf adds a specified escape character to specified 2nd byte of Shift_JIS characters.
 428 1st byte of argument is the escape character and following bytes are target characters.
 429
 430 =item B<--no-cp932ext>
 431
 432 Handle the characters extended in CP932 as unassigned characters.
 433
 434 =item B<--no-best-fit-chars>
 435
 436 When Unicode to Encoded byte conversion,
 437 don't convert characters which is not round trip safe.
 438 When Unicode to Unicode conversion,
 439 with this and -x option, nkf can be used as UTF converter.
 440 (In other words, without this and -x option, nkf doesn't save some characters)
 441
 442 When nkf converts strings that related to path, you should use this option.
 443
 444 =item B<--cap-input>
 445
 446 Decode hex encoded characters.
 447
 448 =item B<--url-input>
 449
 450 Unescape percent escaped characters.
 451
 452 =item B<--numchar-input>
 453
 454 Decode character reference, such as "&#....;".
 455
 456
 457 =item B<--in-place[=>I<SUFFIX>B<]>  B<--overwrite[=>I<SUFFIX>B<]>
 458
 459 Overwrite B<original> listed files by filtered result.
 460
 461 B<Note> --overwrite preserves timestamps of original files.
 462
 463 =item B<--guess=[12]>
 464
 465 Print guessed encoding and newline. (2 is default, 1 is only encoding)
 466
 467 =item B<--help>
 468
 469 Print nkf's help.
 470
 471 =item B<--version>
 472
 473 Print nkf's version.
 474
 475
 476 =item B<-->
 477
 478 Ignore rest of -option.
 479
 480 =back
 481
 482 =head1 AUTHOR
 483
 484 Copyright (c) 1987, Fujitsu LTD. (Itaru ICHIKAWA).
 485
 486 Copyright (c) 1996-2018, The nkf Project.
 487
 488
 489 =cut