10 our $MAX_PHONATE = 40;
16 if (length(Encode::decode('utf-8', $phonate)) > $MAX_PHONATE) {
17 print STDERR "Warning: $.: too long phonate `$phonate'\n";
19 if ($phonate =~ /[^あいうえおかきくけこさしすせそたちつてとなにぬねのはひふへほまみむめもらりるれろがぎぐげござじずぜぞだぢづでどばびぶべぼぁぃぅぇぉっょゃゅゎぱぴぷぺぽやゆよわをんヴー]/) {
20 print STDERR "Warning: $.: ilegal character in `$phonate'\n";
27 if (length(Encode::decode('utf-8', $word)) > $MAX_WORD) {
28 print STDERR "Warning: $.: too long word `$word'\n";
30 if ($word =~ /[ \t",#]/) {
31 print STDERR "Warning: $.: ilegal character in `$word'\n";
36 my $utf8_string = Encode::decode('utf-8', shift);
37 return Encode::encode('euc-jp', $utf8_string);
41 my $utf8_string = Encode::decode('utf-8', shift);
42 # 'cp932' は、「~」の変換でおかしくなるので使用しない。
43 return Encode::encode('shift_jis', $utf8_string);
51 ODIC - 沖縄辞書 <http://www.zukeran.org/o-dic/> フォーマット向けの共通関数
58 next if (/^\s*$|^\s*\#.*$/); # 空行・コメントのみの行を読み飛ばす
60 if (/^(\S+)\s+(\S+)\s+(\S+)\s+#\s*([[:^cntrl:]]*).*$/) {
61 my $phonate = $1; # 読み
64 my $comment = $4; # コメント
65 ODIC::check_phonate($phonate);
66 ODIC::check_word($word);
69 } elsif (/^(\S+)\s+(\S+)\s+(\S+)/) {
70 my $phonate = $1; # 読み
73 my $comment = ''; # コメント
74 ODIC::check_phonate($phonate);
75 ODIC::check_word($word);
79 print STDERR "Error: $.: too few field number `$_'\n";
86 print "$phonate\t$word\t$class\t$comment\n";