From dc95919f41955987bc28c0f922f3949f0f0ed575 Mon Sep 17 00:00:00 2001 From: "YAMASHIRO, Jun" Date: Sun, 11 May 2014 13:57:44 +0900 Subject: [PATCH] =?utf8?q?1be894d798e=20=E3=81=A7=E5=BF=98=E3=82=8C?= =?utf8?q?=E3=81=A6=E3=81=84=E3=81=9F=20wcount.pl=20=E3=81=AE=20UTF-8=20?= =?utf8?q?=E5=AF=BE=E5=BF=9C=E3=82=92=20commit=20=E3=81=97=E3=81=9F?= =?utf8?q?=E3=80=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit --- script/wcount.pl | 91 ++++++++++++++++++++++---------------------------------- 1 file changed, 35 insertions(+), 56 deletions(-) diff --git a/script/wcount.pl b/script/wcount.pl index e8d4375..633953c 100755 --- a/script/wcount.pl +++ b/script/wcount.pl @@ -1,28 +1,29 @@ #!/usr/bin/perl # -# wcount.pl - ²­Æì¼­½ñ¤Îñ¸ì¿ô¤ò¥Õ¥¡¥¤¥ë¤´¤È¤Ë½¸·× +# wcount.pl - 沖縄辞書の単語数をファイルごとに集計 # # $Id: wcount.pl,v 1.5 2002/06/16 04:31:52 void Exp $ # -# # foreach i (*.dic) -# nkf -e $i | ./wcount.pl | wc -l +# # foreach i (../*.dic) +# cat $i | ./wcount.pl | wc -l # echo $i # end require 5.6.0; +require 'ODIC.pm'; use strict; our $phonate; our $word; our $class; while (<>) { - s/#.*$//; # `#'°Ü¹Ô¤ò¼è¤êµî¤ë - next if (/^\s*$/); # ¤½¤Î·ë²Ì¶õ¹Ô¤Ë¤Ê¤Ã¤¿¹Ô¤ÏÆɤßÈô¤Ð¤¹¡£ + s/#.*$//; # `#'以降を取り去る + next if (/^\s*$/); # その結果空行になった行は読み飛ばす。 if (/(\S+)\s+(\S+)\s+(\S+)/) { - $phonate = $1; # ÆÉ¤ß - $word = $2; # ñ¸ì - $class = $3; # ÉÊ»ì - &check_phonate; - &check_word; + $phonate = $1; # 読み + $word = $2; # 単語 + $class = $3; # 品詞 + ODIC::check_phonate($phonate); + ODIC::check_word($word); &check_class; } else { @@ -33,99 +34,77 @@ while (<>) { exit 0; -sub check_phonate -{ - if (length($phonate) > 40) { - print STDERR "Warning: $.: too long phonate `$phonate'\n"; - } - if ($phonate =~ /[^¤¢¤¤¤¦¤¨¤ª¤«¤­¤¯¤±¤³¤µ¤·¤¹¤»¤½¤¿¤Á¤Ä¤Æ¤È¤Ê¤Ë¤Ì¤Í¤Î¤Ï¤Ò¤Õ¤Ø¤Û¤Þ¤ß¤à¤á¤â¤é¤ê¤ë¤ì¤í¤¬¤®¤°¤²¤´¤¶¤¸¤º¤¼¤¾¤À¤Â¤Å¤Ç¤É¤Ð¤Ó¤Ö¤Ù¤Ü¤¡¤£¤¥¤§¤©¤Ã¤ç¤ã¤å¤î¤Ñ¤Ô¤×¤Ú¤Ý¤ä¤æ¤è¤ï¤ò¤ó¥ô¡¼]/) { - print STDERR "Warning: $.: ilegal character in `$phonate'\n"; - } -} - - -sub check_word -{ - if (length($word) > 64) { - print STDERR "Warning: $.: too long word `$word'\n"; - } - if ($word =~ /[ \t",#]/) { - print STDERR "Warning: $.: ilegal character in `$word'\n"; - } -} - - sub check_class { - if ($class eq "ÉáÄÌ̾»ì") { + if ($class eq "普通名詞") { print "$phonate\t$word\tclass\n"; } - elsif ($class eq "¥µÊÑ̾»ì") { + elsif ($class eq "サ変名詞") { print "$phonate\t$word\tclass\n"; } - elsif ($class eq "·Áư̾»ì") { + elsif ($class eq "形動名詞") { print "$phonate\t$word\tclass\n"; } - elsif ($class eq "À«") { + elsif ($class eq "姓") { print "$phonate\t$word\tclass\n"; } - elsif ($class eq "̾") { + elsif ($class eq "名") { print "$phonate\t$word\tclass\n"; } - elsif ($class eq "¤½¤Î¾¤Î¿Í̾") { + elsif ($class eq "その他の人名") { print "$phonate\t$word\tclass\n"; } - elsif ($class eq "ñ½ãÃÏ̾") { + elsif ($class eq "単純地名") { print "$phonate\t$word\tclass\n"; } - elsif ($class eq "ÀÜÈø¸ìÉÕ¤­ÃÏ̾") { + elsif ($class eq "接尾語付き地名") { print "$phonate\t$word\tclass\n"; } - elsif ($class eq "ÁÈ¿¥Ì¾") { + elsif ($class eq "組織名") { print "$phonate\t$word\tclass\n"; } - elsif ($class eq "¤½¤Î¾¸Çͭ̾»ì") { + elsif ($class eq "その他固有名詞") { print "$phonate\t$word\tclass\n"; } - elsif ($class eq "Éû»ì") { + elsif ($class eq "副詞") { print "$phonate\t$word\tclass\n"; } - elsif ($class eq "Àܳ»ì") { + elsif ($class eq "接続詞") { print "$phonate\t$word\tclass\n"; } - elsif ($class eq "´¶Æ°»ì") { + elsif ($class eq "感動詞") { print "$phonate\t$word\tclass\n"; } - elsif ($class eq "·ÁÍÆ»ì") { + elsif ($class eq "形容詞") { print "$phonate\t$word\tclass\n"; } - elsif ($class eq "·ÁÍÆÆ°»ì") { + elsif ($class eq "形容動詞") { print "$phonate\t$word\tclass\n"; } - elsif ($class eq "ÀÜƬ¸ì") { + elsif ($class eq "接頭語") { print "$phonate\t$word\tclass\n"; } - elsif ($class eq "¿ô»úÎóÀÜƬ¸ì") { + elsif ($class eq "数字列接頭語") { print "$phonate\t$word\tclass\n"; } - elsif ($class eq "ÀÜÈø¸ì") { + elsif ($class eq "接尾語") { print "$phonate\t$word\tclass\n"; } - elsif ($class eq "¿Í̾ÀÜÈø¸ì") { + elsif ($class eq "人名接尾語") { print "$phonate\t$word\tclass\n"; } - elsif ($class eq "ÃÏ̾ÀÜÈø¸ì") { + elsif ($class eq "地名接尾語") { print "$phonate\t$word\tclass\n"; } - elsif ($class eq "ÁÈ¿¥Ì¾ÀÜÈø¸ì") { + elsif ($class eq "組織名接尾語") { print "$phonate\t$word\tclass\n"; } - elsif ($class eq "¿ô»úÎóÀÜÈø¸ì") { + elsif ($class eq "数字列接尾語") { print "$phonate\t$word\tclass\n"; } - elsif ($class eq "À®¶ç") { + elsif ($class eq "成句") { print "$phonate\t$word\tclass\n"; } - elsif ($class eq "̵ÉÊ»ì") { + elsif ($class eq "無品詞") { print "$phonate\t$word\tclass\n"; } else { -- 2.11.0