2 # $Id: Jcode.pm,v 1.5 2007/03/11 11:46:05 takezoe Exp $
9 use vars qw($RCSID $VERSION $DEBUG);
11 $RCSID = q$Id: Jcode.pm,v 1.5 2007/03/11 11:46:05 takezoe Exp $;
12 $VERSION = do { my @r = (q$Revision: 1.5 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
15 # we no longer use Exporter
16 use vars qw($USE_ENCODE);
17 $USE_ENCODE = ($] >= 5.008001);
20 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
22 @EXPORT = qw(jcode getcode);
23 @EXPORT_OK = qw($RCSID $VERSION $DEBUG);
24 %EXPORT_TAGS = ( all => [ @EXPORT, @EXPORT_OK ] );
27 q("") => sub { $_[0]->euc },
28 q(==) => sub { overload::StrVal($_[0]) eq overload::StrVal($_[1]) },
29 q(.=) => sub { $_[0]->append( $_[1] ) },
34 $DEBUG and warn "Using Encode";
35 my $data = join("", <DATA>);
39 $DEBUG and warn "Not Using Encode";
40 require Jcode::_Classic;
42 unshift @ISA, qw/Jcode::_Classic/;
43 for my $sub (qw/jcode getcode convert load_module/){
45 *{$sub} = \&{'Jcode::_Classic::' . $sub };
47 for my $enc (qw/sjis jis ucs2 utf8/){
49 *{"euc_" . $enc} = \&{"Jcode::_Classic::" . "euc_" . $enc};
50 *{$enc . "_euc"} = \&{"Jcode::_Classic::" . $enc . "_euc"};
57 # This idea was inspired by JEncode
58 # http://www.donzoko.net/cgi/jencode/
65 use Scalar::Util; # to resolve from_to() vs. 'constant' issue.
71 iso_2022_jp => 'iso-2022-jp',
75 my %ename2j = reverse %jname2e;
77 our $FALLBACK = Encode::LEAVE_SRC;
78 sub FB_PERLQQ() { Encode::FB_PERLQQ() };
79 sub FB_XMLCREF() { Encode::FB_XMLCREF() };
80 sub FB_HTMLCREF() { Encode::FB_HTMLCREF() };
81 #for my $fb (qw/FB_PERLQQ FB_XMLCREF FB_HTMLCREF/){
83 # *{$fb} = \&{"Encode::$fb"};
87 #######################################
89 #######################################
91 sub jcode { return __PACKAGE__->new(@_); }
94 # Used to be in Jcode::Constants
100 1990 => '\e&\@\e\$B',
103 ASCII => '[\x00-\x7f]',
104 BIN => '[\x00-\x06\x7f\xff]',
105 EUC_0212 => '\x8f[\xa1-\xfe][\xa1-\xfe]',
106 EUC_C => '[\xa1-\xfe][\xa1-\xfe]',
107 EUC_KANA => '\x8e[\xa1-\xdf]',
108 JIS_0208 => "$_0208{1978}|$_0208{1983}|$_0208{1990}",
109 JIS_0212 => "\e" . '\$\(D',
110 JIS_ASC => "\e" . '\([BJ]',
111 JIS_KANA => "\e" . '\(I',
112 SJIS_C => '[\x81-\x9f\xe0-\xfc][\x40-\x7e\x80-\xfc]',
113 SJIS_KANA => '[\xa1-\xdf]',
114 UTF8 => '[\xc0-\xdf][\x80-\xbf]|[\xe0-\xef][\x80-\xbf][\x80-\xbf]'
120 $result = $n if $n > $result;
127 my $r_str = ref $arg ? $arg : \$arg;
128 Encode::is_utf8($$r_str) and return 'utf8';
129 my ($code, $nmatch, $sjis, $euc, $utf8) = ("", 0, 0, 0, 0);
130 if ($$r_str =~ /$RE{BIN}/o) { # 'binary'
133 while $$r_str =~ /(\x00$RE{ASCII})+/go;
134 if ($ucs2){ # smells like raw unicode
135 ($code, $nmatch) = ('ucs2', $ucs2);
137 ($code, $nmatch) = ('binary', 0);
140 elsif ($$r_str !~ /[\e\x80-\xff]/o) { # not Japanese
141 ($code, $nmatch) = ('ascii', 1);
145 $RE{JIS_0208}|$RE{JIS_0212}|$RE{JIS_ASC}|$RE{JIS_KANA}
148 ($code, $nmatch) = ('jis', 1);
150 else { # should be euc|sjis|utf8
151 # use of (?:) by Hiroki Ohzaki <ohzaki@iod.ricoh.co.jp>
153 while $$r_str =~ /((?:$RE{SJIS_C})+)/go;
155 while $$r_str =~ /((?:$RE{EUC_C}|$RE{EUC_KANA}|$RE{EUC_0212})+)/go;
157 while $$r_str =~ /((?:$RE{UTF8})+)/go;
158 # $utf8 *= 1.5; # M. Takahashi's suggestion
159 $nmatch = _max($utf8, $sjis, $euc);
160 carp ">DEBUG:sjis = $sjis, euc = $euc, utf8 = $utf8" if $DEBUG >= 3;
162 ($euc > $sjis and $euc > $utf8) ? 'euc' :
163 ($sjis > $euc and $sjis > $utf8) ? 'sjis' :
164 ($utf8 > $euc and $utf8 > $sjis) ? 'utf8' : undef;
166 return wantarray ? ($code, $nmatch) : $code;
170 my $r_str = (ref $_[0]) ? $_[0] : \$_[0];
171 my (undef,$ocode,$icode,$opt) = @_;
172 Encode::is_utf8($$r_str) and utf8::encode($$r_str);
173 defined $icode or $icode = getcode($r_str) or return;
174 $icode eq 'binary' and return $$r_str;
176 $jname2e{$icode} and $icode = $jname2e{$icode};
177 $jname2e{$ocode} and $ocode = $jname2e{$ocode};
181 ? jcode($r_str, $icode)->h2z->$ocode
182 : jcode($r_str, $icode)->z2h->$ocode ;
185 if (Scalar::Util::readonly($$r_str)){
187 Encode::from_to($tmp, $icode, $ocode);
190 Encode::from_to($$r_str, $icode, $ocode);
196 #######################################
198 #######################################
203 bless $self => $class;
204 defined $_[0] or $_[0] = '';
211 my $r_str = (ref $str) ? $str : \$str;
212 my $code = $_[1] if(defined $_[1]);
213 my $icode = $code || getcode($r_str) || 'euc';
214 $self->{icode} = $jname2e{$icode} || $icode;
215 # binary and flagged utf8 are stored as-is
216 unless (Encode::is_utf8($$r_str) || $icode eq 'binary'){
217 $$r_str = decode($self->{icode}, $$r_str);
219 $self->{r_str} = $r_str;
221 $self->{method} = 'Encode';
222 $self->{fallback} = $FALLBACK;
229 my $r_str = (ref $str) ? $str : \$str;
230 my $code = $_[1] if(defined $_[1]);
231 my $icode = $code || getcode($r_str) || 'euc';
232 $self->{icode} = $jname2e{$icode} || $icode;
233 # binary and flagged utf8 are stored as-is
234 unless (Encode::is_utf8($$r_str) || $icode eq 'binary'){
235 $$r_str = decode($self->{icode}, $$r_str);
237 ${ $self->{r_str} } .= $$r_str;
239 $self->{method} = 'internal';
243 #######################################
245 #######################################
247 for my $method (qw/r_str icode nmatch error_m error_r error_tr/){
249 *{$method} = sub { $_[0]->{$method} };
254 @_ or return $self->{fallback};
255 $self->{fallback} = $_[0]|Encode::LEAVE_SRC;
259 #######################################
261 #######################################
263 sub utf8 { encode_utf8( ${$_[0]->{r_str}} ) }
266 # Those supported in Jcode 0.x are defined as default
269 for my $enc (keys %jname2e){
271 my $name = $jname2e{$enc} || $enc;
272 my $e = find_encoding($name) or croak "$enc not supported";
274 my $r_str = $_[0]->{r_str};
275 Encode::is_utf8($$r_str) ?
276 $e->encode($$r_str, $_[0]->{fallback}) : $$r_str;
281 # The rest is defined on the fly
290 or confess "$self is not an object";
291 my $myname = $AUTOLOAD;
292 $myname =~ s/.*:://; # strip fully-qualified portion
293 $myname eq 'DESTROY' and return;
294 my $e = find_encoding($myname)
295 or confess __PACKAGE__, ": unknown encoding: $myname";
296 $DEBUG and warn ref($self), "->$myname defined";
300 my $str = ${ $_[0]->{r_str} };
301 Encode::is_utf8($str) ?
302 $e->encode($str, $_[0]->{fallback}) : $str;
307 #######################################
308 # Length, Translation and Fold
309 #######################################
312 length( ${$_[0]->{r_str}} );
317 my $str = ${$self->{r_str}};
318 my $from = Encode::is_utf8($_[0]) ? $_[0] : decode('euc-jp', $_[0]);
319 my $to = Encode::is_utf8($_[1]) ? $_[1] : decode('euc-jp', $_[1]);
320 my $opt = $_[2] || '';
321 $from =~ s,\\,\\\\,og; $from =~ s,/,\\/,og;
322 $to =~ s,\\,\\\\,og; $to =~ s,/,\\/,og;
323 $opt =~ s,[^a-z],,og;
324 my $match = eval qq{ \$str =~ tr/$from/$to/$opt };
326 $self->{error_tr} = $@;
329 $self->{r_str} = \$str;
330 $self->{nmatch} = $match || 0;
336 my $r_str = $self->{r_str};
337 my $bpl = shift || 72;
338 my $nl = shift || "\n";
343 my ($len, $i) = (0,0);
345 if( defined $kin and (ref $kin) eq 'ARRAY' ){
346 %kinsoku = map { my $k = Encode::is_utf8($_) ?
347 $_ : decode('euc-jp' => $_);
351 while($$r_str =~ m/(.)/sg){
353 # <UFF61> \xA1 |0 # HALFWIDTH IDEOGRAPHIC FULL STOP
354 # <UFF9F> \xDF |0 # HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK
355 my $ord = ord($char);
356 my $clen = $ord < 128 ? 1
358 : $ord <= 0xff9f ? 1 : 2;
359 if ($len + $clen > $bpl){
360 unless($kinsoku{$char}){
368 defined($lines[$i]) or pop @lines;
369 $$r_str = join($nl, @lines);
371 $self->{r_str} = $r_str;
372 my $e = find_encoding($self->{icode});
374 Encode::is_utf8($_) ? $e->encode($_, $self->{fallback}) : $_
377 return wantarray ? @lines : $self;
380 #######################################
382 #######################################
386 my $euc = $self->euc;
387 Encode::JP::H2Z::h2z(\$euc, @_);
388 $self->set($euc => 'euc');
394 my $euc = $self->euc;
395 Encode::JP::H2Z::z2h(\$euc, @_);
396 $self->set($euc => 'euc');
400 #######################################
402 #######################################
406 my $utf8 = Encode::decode('MIME-Header', $self->utf8);
407 $self->set($utf8 =>'utf8');
412 my $str = $self->euc;
414 my $lf = shift || "\n";
415 my $bpl = shift || 76;
416 my ($trailing_crlf) = ($$r_str =~ /(\n|\r|\x0d\x0a)$/o);
417 $str = _mime_unstructured_header($$r_str, $lf, $bpl);
418 not $trailing_crlf and $str =~ s/(\n|\r|\x0d\x0a)$//o;
423 # shamelessly stolen from
424 # http://www.din.or.jp/~ohzaki/perl.htm#JP_Base64
427 sub _add_encoded_word {
428 require MIME::Base64;
429 my($str, $line, $bpl) = @_;
431 while (length($str)) {
434 if (length($line) + 22 +
435 ($target =~ /^(?:$RE{EUC_0212}|$RE{EUC_C})/o) * 8 > $bpl) {
436 $line =~ s/[ \t\n\r]*$/\n/;
441 my $iso_2022_jp = jcode($target, 'euc')->iso_2022_jp;
442 if (my $count = ($iso_2022_jp =~ tr/\x80-\xff//d)){
443 $DEBUG and warn $count;
444 $target = jcode($iso_2022_jp, 'iso_2022_jp')->euc;
446 my $encoded = '=?ISO-2022-JP?B?' .
447 MIME::Base64::encode_base64($iso_2022_jp, '')
449 if (length($encoded) + length($line) > $bpl) {
451 s/($RE{EUC_0212}|$RE{EUC_KANA}|$RE{EUC_C}|$RE{ASCII})$//o;
459 return $result . $line;
462 sub _mime_unstructured_header {
463 my ($oldheader, $lf, $bpl) = @_;
464 my(@words, @wordstmp, $i);
466 $oldheader =~ s/\s+$//;
467 @wordstmp = split /\s+/, $oldheader;
468 for ($i = 0; $i < $#wordstmp; $i++) {
469 if ($wordstmp[$i] !~ /^[\x21-\x7E]+$/ and
470 $wordstmp[$i + 1] !~ /^[\x21-\x7E]+$/) {
471 $wordstmp[$i + 1] = "$wordstmp[$i] $wordstmp[$i + 1]";
473 push(@words, $wordstmp[$i]);
476 push(@words, $wordstmp[-1]);
477 for my $word (@words) {
478 if ($word =~ /^[\x21-\x7E]+$/) {
479 $header =~ /(?:.*\n)*(.*)/;
480 if (length($1) + length($word) > $bpl) {
481 $header .= "$lf $word";
486 $header = _add_encoded_word($word, $header, $bpl);
488 $header =~ /(?:.*\n)*(.*)/;
489 if (length($1) == $bpl) {
495 $header =~ s/\n? $/\n/;
499 #######################################
500 # Matching and Replacing
501 #######################################
503 no warnings 'uninitialized';
508 my $r_str = $self->{r_str};
509 my $pattern = Encode::is_utf8($_[0]) ? shift : decode("euc-jp" => shift);
510 my $opt = shift || '' ;
513 $pattern =~ s,\\,\\\\,og; $pattern =~ s,/,\\/,og;
514 $opt =~ s,[^a-z],,og;
516 eval qq{ \@match = (\$\$r_str =~ m/$pattern/$opt) };
518 $self->{error_m} = $@;
521 # print @match, "\n";
522 wantarray ? map {encode('euc-jp' => $_)} @match : scalar @match;
528 my $r_str = $self->{r_str};
529 my $pattern = Encode::is_utf8($_[0]) ? shift : decode("euc-jp" => shift);
530 my $replace = Encode::is_utf8($_[0]) ? shift : decode("euc-jp" => shift);
533 $pattern =~ s,\\,\\\\,og; $pattern =~ s,/,\\/,og;
534 $replace =~ s,\\,\\\\,og; $replace =~ s,/,\\/,og;
535 $opt =~ s,[^a-z],,og;
537 eval qq{ (\$\$r_str =~ s/$pattern/$replace/$opt) };
539 $self->{error_s} = $@;
549 Jcode - Japanese Charset Handler
556 Jcode::convert(\$str, $ocode, $icode, "z");
558 print Jcode->new($str)->h2z->tr($from, $to)->utf8;
564 B<<Japanese document is now available as L<Jcode::Nihongo>. >>
566 Jcode.pm supports both object and traditional approach.
567 With object approach, you can go like;
569 $iso_2022_jp = Jcode->new($str)->h2z->jis;
571 Which is more elegant than:
574 &jcode::convert(\$iso_2022_jp, 'jis', &jcode::getcode(\$str), "z");
576 For those unfamiliar with objects, Jcode.pm still supports C<getcode()>
579 If the perl version is 5.8.1, Jcode acts as a wrapper to L<Encode>,
580 the standard charset handler module for Perl 5.8 or later.
584 Methods mentioned here all return Jcode object unless otherwise mentioned.
590 =item $j = Jcode-E<gt>new($str [, $icode])
592 Creates Jcode object $j from $str. Input code is automatically checked
593 unless you explicitly set $icode. For available charset, see L<getcode>
596 For perl 5.8.1 or better, C<$icode> can be I<any encoding name>
597 that L<Encode> understands.
599 $j = Jcode->new($european, 'iso-latin1');
601 When the object is stringified, it returns the EUC-converted string so
602 you can <print $j> instead of <print $j->euc>.
606 =item Passing Reference
608 Instead of scalar value, You can use reference as
612 This saves time a little bit. In exchange of the value of $str being
613 converted. (In a way, $str is now "tied" to jcode object).
617 =item $j-E<gt>set($str [, $icode])
619 Sets $j's internal string to $str. Handy when you use Jcode object repeatedly
620 (saves time and memory to create object).
622 # converts mailbox to SJIS format
623 my $jconv = new Jcode;
626 print $jconv->set(\$_)->mime_decode->sjis;
629 =item $j-E<gt>append($str [, $icode]);
631 Appends $str to $j's internal string.
633 =item $j = jcode($str [, $icode]);
635 shortcut for Jcode->new() so you can go like;
639 =head2 Encoded Strings
641 In general, you can retrieve I<encoded> string as $j-E<gt>I<encoded>.
645 =item $sjis = jcode($str)->sjis
647 =item $euc = $j-E<gt>euc
649 =item $jis = $j-E<gt>jis
651 =item $sjis = $j-E<gt>sjis
653 =item $ucs2 = $j-E<gt>ucs2
655 =item $utf8 = $j-E<gt>utf8
657 What you code is what you get :)
659 =item $iso_2022_jp = $j-E<gt>iso_2022_jp
661 Same as C<< $j->h2z->jis >>.
662 Hankaku Kanas are forcibly converted to Zenkaku.
664 For perl 5.8.1 and better, you can also use any encoding names and
665 aliases that Encode supports. For example:
667 $european = $j->iso_latin1; # replace '-' with '_' for names.
669 B<FYI>: L<Encode::Encoder> uses similar trick.
673 =item $j-E<gt>fallback($fallback)
675 For perl is 5.8.1 or better, Jcode stores the internal string in
676 UTF-8. Any character that does not map to I<< -E<gt>encoding >> are
677 replaced with a '?', which is L<Encode> standard.
679 my $unistr = "\x{262f}"; # YIN YANG
680 my $j = jcode($unistr); # $j->euc is '?'
682 You can change this behavior by specifying fallback like L<Encode>.
683 Values are the same as L<Encode>. C<Jcode::FB_PERLQQ>,
684 C<Jcode::FB_XMLCREF>, C<Jcode::FB_HTMLCREF> are aliased to those
685 of L<Encode> for convenice.
687 print $j->fallback(Jcode::FB_PERLQQ)->euc; # '\x{262f}'
688 print $j->fallback(Jcode::FB_XMLCREF)->euc; # '☯'
689 print $j->fallback(Jcode::FB_HTMLCREF)->euc; # '☯'
691 The global variable C<$Jcode::FALLBACK> stores the default fallback so you can override that by assigning the value.
693 $Jcode::FALLBACK = Jcode::FB_PERLQQ; # set default fallback scheme
697 =item [@lines =] $jcode-E<gt>jfold([$width, $newline_str, $kref])
699 folds lines in jcode string every $width (default: 72) where $width is
700 the number of "halfwidth" character. Fullwidth Characters are counted
703 with a newline string spefied by $newline_str (default: "\n").
705 Rudimentary kinsoku suppport is now available for Perl 5.8.1 and better.
707 =item $length = $jcode-E<gt>jlength();
709 returns character length properly, rather than byte length.
713 =head2 Methods that use MIME::Base64
715 To use methods below, you need L<MIME::Base64>. To install, simply
717 perl -MCPAN -e 'CPAN::Shell->install("MIME::Base64")'
719 If your perl is 5.6 or better, there is no need since L<MIME::Base64>
724 =item $mime_header = $j-E<gt>mime_encode([$lf, $bpl])
726 Converts $str to MIME-Header documented in RFC1522.
727 When $lf is specified, it uses $lf to fold line (default: \n).
728 When $bpl is specified, it uses $bpl for the number of bytes (default: 76;
729 this number must be smaller than 76).
731 For Perl 5.8.1 or better, you can also encode MIME Header as:
733 $mime_header = $j->MIME_Header;
735 In which case the resulting C<$mime_header> is MIME-B-encoded UTF-8
736 whereas C<< $j->mime_encode() >> returnes MIME-B-encoded ISO-2022-JP.
737 Most modern MUAs support both.
739 =item $j-E<gt>mime_decode;
741 Decodes MIME-Header in Jcode object. For perl 5.8.1 or better, you
742 can also do the same as:
744 Jcode->new($str, 'MIME-Header')
748 =head2 Hankaku vs. Zenkaku
752 =item $j-E<gt>h2z([$keep_dakuten])
754 Converts X201 kana (Hankaku) to X208 kana (Zenkaku).
755 When $keep_dakuten is set, it leaves dakuten as is
756 (That is, "ka + dakuten" is left as is instead of
757 being converted to "ga")
759 You can retrieve the number of matches via $j->nmatch;
763 Converts X208 kana (Zenkaku) to X201 kana (Hankaku).
765 You can retrieve the number of matches via $j->nmatch;
769 =head2 Regexp emulators
771 To use C<< -E<gt>m() >> and C<< -E<gt>s() >>, you need perl 5.8.1 or
776 =item $j-E<gt>tr($from, $to, $opt);
778 Applies C<tr/$from/$to/> on Jcode object where $from and $to are
779 EUC-JP strings. On perl 5.8.1 or better, $from and $to can
780 also be flagged UTF-8 strings.
782 If C<$opt> is set, C<tr/$from/$to/$opt> is applied. C<$opt> must
783 be 'c', 'd' or the combination thereof.
785 You can retrieve the number of matches via $j->nmatch;
787 The following methods are available only for perl 5.8.1 or better.
789 =item $j-E<gt>s($patter, $replace, $opt);
791 Applies C<s/$pattern/$replace/$opt>. C<$pattern> and C<replace> must
792 be in EUC-JP or flagged UTF-8. C<$opt> are the same as regexp options.
793 See L<perlre> for regexp options.
795 Like C<< $j->tr() >>, C<< $j->s() >> returns the object itself so
796 you can nest the operation as follows;
798 $j->tr("a-z", "A-Z")->s("foo", "bar");
800 =item [@match = ] $j-E<gt>m($pattern, $opt);
802 Applies C<m/$patter/$opt>. Note that this method DOES NOT RETURN
803 AN OBJECT so you can't chain the method like C<< $j->s() >>.
807 =head2 Instance Variables
809 If you need to access instance variables of Jcode object, use access
810 methods below instead of directly accessing them (That's what OOP
813 FYI, Jcode uses a ref to array instead of ref to hash (common way) to
814 optimize speed (Actually you don't have to know as long as you use
815 access methods instead; Once again, that's OOP)
821 Reference to the EUC-coded String.
825 Input charcode in recent operation.
829 Number of matches (Used in $j->tr, etc.)
839 =item ($code, [$nmatch]) = getcode($str)
841 Returns char code of $str. Return codes are as follows
843 ascii Ascii (Contains no Japanese Code)
844 binary Binary (Not Text File)
847 jis JIS (ISO-2022-JP)
848 ucs2 UCS2 (Raw Unicode)
851 When array context is used instead of scaler, it also returns how many
852 character codes are found. As mentioned above, $str can be \$str
855 B<jcode.pl Users:> This function is 100% upper-conpatible with
856 jcode::getcode() -- well, almost;
858 * When its return value is an array, the order is the opposite;
859 jcode::getcode() returns $nmatch first.
861 * jcode::getcode() returns 'undef' when the number of EUC characters
862 is equal to that of SJIS. Jcode::getcode() returns EUC. for
863 Jcode.pm there is no in-betweens.
865 =item Jcode::convert($str, [$ocode, $icode, $opt])
867 Converts $str to char code specified by $ocode. When $icode is specified
868 also, it assumes $icode for input string instead of the one checked by
869 getcode(). As mentioned above, $str can be \$str instead.
871 B<jcode.pl Users:> This function is 100% upper-conpatible with
878 For perl is 5.8.1 or later, Jcode acts as a wrapper to L<Encode>.
879 Meaning Jcode is subject to bugs therein.
881 =head1 ACKNOWLEDGEMENTS
883 This package owes a lot in motivation, design, and code, to the jcode.pl
884 for Perl4 by Kazumasa Utashiro <utashiro@iij.ad.jp>.
886 Hiroki Ohzaki <ohzaki@iod.ricoh.co.jp> has helped me polish regexp from the
887 very first stage of development.
889 JEncode by makamaka@donzoko.net has inspired me to integrate Encode to
890 Jcode. He has also contributed Japanese POD.
892 And folks at Jcode Mailing list <jcode5@ring.gr.jp>. Without them, I
893 couldn't have coded this far.
901 L<http://www.iana.org/assignments/character-sets>
905 Copyright 1999-2005 Dan Kogai <dankogai@dan.co.jp>
907 This library is free software; you can redistribute it
908 and/or modify it under the same terms as Perl itself.