From ec28dd57e17c1d1006d03c0b86679978d66b8ca0 Mon Sep 17 00:00:00 2001 From: void Date: Mon, 21 Apr 2003 18:20:44 +0000 Subject: [PATCH] add script for chasen --- doc/README.1ST | 3 +- script/oki2cha.pl | 170 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 172 insertions(+), 1 deletion(-) create mode 100755 script/oki2cha.pl diff --git a/doc/README.1ST b/doc/README.1ST index 99fbb8e..60f76b1 100644 --- a/doc/README.1ST +++ b/doc/README.1ST @@ -1,7 +1,7 @@ # # ²­Æì¼­½ñ¤Ë´Ø¤¹¤ë¥É¥­¥å¥á¥ó¥È # -# $Id: README.1ST,v 1.6 2003/04/06 05:38:46 void Exp $ +# $Id: README.1ST,v 1.7 2003/04/21 18:20:44 void Exp $ # 1. ²­Æì¼­½ñ¤È¤Ï? @@ -49,6 +49,7 @@ oki2kotoeri.pl ¤³¤È¤¨¤ê¤Î°ì³çÅÐÏ¿Íѥƥ­¥¹¥È¤Ø¤ÎÊÑ´¹ oki2jis.pl JIS X 4062·Á¼°¤Î¥Æ¥­¥¹¥È¤Ø¤ÎÊÑ´¹ oki2atk.pl ATOK13·Á¼°¤Î¥Æ¥­¥¹¥È¤Ø¤ÎÊÑ´¹ + oki2cha.pl Ãã䥤ǻȤ¨¤ë·Á¼°¤Î¥Æ¥­¥¹¥È¤ËÊÑ´¹¤¹¤ë ²­Æì¼­½ñ¤Î¥Ç¡¼¥¿·Á¼°¤Ë¤Ä¤¤¤Æ¤Ï¡¢ format.txt ¤ò»²¾È¤·¤Æ¤¯¤À¤µ¤¤¡£ diff --git a/script/oki2cha.pl b/script/oki2cha.pl new file mode 100755 index 0000000..55cd648 --- /dev/null +++ b/script/oki2cha.pl @@ -0,0 +1,170 @@ +#!/usr/bin/perl +# +# oki2cha.pl - ²­Æì¼­½ñ¤Î¥Õ¥¡¥¤¥ë¤òÃã䥤ǻȤ¨¤ë·Á¼°¤Î¥Æ¥­¥¹¥È¤ËÊÑ´¹¤¹¤ë +# +# $Id: oki2cha.pl,v 1.1 2003/04/21 18:20:45 void Exp $ +# +# ¤³¤Î¥¹¥¯¥ê¥×¥È¤ò»È¤Ã¤Æ¡¢°Ê²¼¤Î¤è¤¦¤Ë¡Öokinawa.dic¡×¤òÀ¸À®¤·¤Æ¤¯¤À¤µ¤¤¡£ +# $ nkf -e *.dic | ./oki2cha.pl > okinawa.dic +# $ /usr/local/libexec/chasen/makeint okinawa.dic > okinawa.txt +# $ /usr/local/libexec/chasen/sortdic okinawa.txt > okinawa.int +# $ /usr/local/libexec/chasen/pattool -F okinawa +# $ rm okinawa.txt +require 5.6.0; +use strict; +our $phonate; +our $word; +our $class; + +&header; +while (<>) { + s/#.*$//; # `#'°Ü¹Ô¤ò¼è¤êµî¤ë + next if (/^\s*$/); # ¤½¤Î·ë²Ì¶õ¹Ô¤Ë¤Ê¤Ã¤¿¹Ô¤ÏÆɤßÈô¤Ð¤¹¡£ + if (/(\S+)\s+(\S+)\s+(\S+)/) { + $phonate = $1; # ÆÉ¤ß + $word = $2; # ñ¸ì + $class = $3; # ÉÊ»ì + &check_phonate; + &check_word; + &convert_class; + } + else { + print STDERR "Error: $.: too few field number `$_'\n"; + print "$_"; + } +} +&version; +exit 0; + + +sub check_phonate +{ + if (length($phonate) > 40) { + print STDERR "Warning: $.: too long phonate `$phonate'\n"; + } + if ($phonate =~ /[^¤¢¤¤¤¦¤¨¤ª¤«¤­¤¯¤±¤³¤µ¤·¤¹¤»¤½¤¿¤Á¤Ä¤Æ¤È¤Ê¤Ë¤Ì¤Í¤Î¤Ï¤Ò¤Õ¤Ø¤Û¤Þ¤ß¤à¤á¤â¤é¤ê¤ë¤ì¤í¤¬¤®¤°¤²¤´¤¶¤¸¤º¤¼¤¾¤À¤Â¤Å¤Ç¤É¤Ð¤Ó¤Ö¤Ù¤Ü¤¡¤£¤¥¤§¤©¤Ã¤ç¤ã¤å¤î¤Ñ¤Ô¤×¤Ú¤Ý¤ä¤æ¤è¤ï¤ò¤ó¥ô¡¼]/) { + print STDERR "Warning: $.: ilegal character in `$phonate'\n"; + } +} + + +sub check_word +{ + if (length($word) > 64) { + print STDERR "Warning: $.: too long word `$word'\n"; + } + if ($word =~ /[ \t",#]/) { + print STDERR "Warning: $.: ilegal character in `$word'\n"; + } +} + + +sub convert_class { + if ($class eq "ÉáÄÌ̾»ì") { + print "(ÉÊ»ì (̾»ì °ìÈÌ)) ((¸«½Ð¤·¸ì ($word 2000)) (ÆÉ¤ß $phonate))\n"; + } + elsif ($class eq "¥µÊÑ̾»ì") { + print "(ÉÊ»ì (̾»ì ¥µÊÑÀܳ)) ((¸«½Ð¤·¸ì ($word 2000)) (ÆÉ¤ß $phonate))\n"; + } + elsif ($class eq "·Áư̾»ì") { + print "(ÉÊ»ì (̾»ì ·ÁÍÆÆ°»ì¸ì´´)) ((¸«½Ð¤·¸ì ($word 2000)) (ÆÉ¤ß $phonate))\n"; + } + elsif ($class eq "À«") { + print "(ÉÊ»ì (̾»ì ¸Çͭ̾»ì ¿Í̾ À«)) ((¸«½Ð¤·¸ì ($word 2000)) (ÆÉ¤ß $phonate))\n"; + } + elsif ($class eq "̾") { + print "(ÉÊ»ì (̾»ì ¸Çͭ̾»ì ¿Í̾ ̾)) ((¸«½Ð¤·¸ì ($word 2000)) (ÆÉ¤ß $phonate))\n"; + } + elsif ($class eq "¤½¤Î¾¤Î¿Í̾") { + print "(ÉÊ»ì (̾»ì ¸Çͭ̾»ì ¿Í̾ °ìÈÌ)) ((¸«½Ð¤·¸ì ($word 2000)) (ÆÉ¤ß $phonate))\n"; + } + elsif ($class eq "ñ½ãÃÏ̾") { + print "(ÉÊ»ì (̾»ì ¸Çͭ̾»ì ÃÏ°è °ìÈÌ)) ((¸«½Ð¤·¸ì ($word 2000)) (ÆÉ¤ß $phonate))\n"; + } + elsif ($class eq "ÀÜÈø¸ìÉÕ¤­ÃÏ̾") { + print "(ÉÊ»ì (̾»ì ¸Çͭ̾»ì ÃÏ°è °ìÈÌ)) ((¸«½Ð¤·¸ì ($word 2000)) (ÆÉ¤ß $phonate))\n"; + } + elsif ($class eq "ÁÈ¿¥Ì¾") { + print "(ÉÊ»ì (̾»ì ¸Çͭ̾»ì ÁÈ¿¥)) ((¸«½Ð¤·¸ì ($word 2000)) (ÆÉ¤ß $phonate))\n"; + } + elsif ($class eq "¤½¤Î¾¸Çͭ̾»ì") { + print "(ÉÊ»ì (̾»ì ¸Çͭ̾»ì °ìÈÌ)) ((¸«½Ð¤·¸ì ($word 2000)) (ÆÉ¤ß $phonate))\n"; + } + elsif ($class eq "Éû»ì") { + print "(ÉÊ»ì (Éû»ì °ìÈÌ)) ((¸«½Ð¤·¸ì ($word 2000)) (ÆÉ¤ß $phonate))\n"; + } + elsif ($class eq "Àܳ»ì") { + print "(ÉÊ»ì (Àܳ»ì)) ((¸«½Ð¤·¸ì ($word 2000)) (ÆÉ¤ß $phonate))\n"; + } + elsif ($class eq "´¶Æ°»ì") { + print "(ÉÊ»ì (´¶Æ°»ì)) ((¸«½Ð¤·¸ì ($word 2000)) (ÆÉ¤ß $phonate))\n"; + } + elsif ($class eq "·ÁÍÆ»ì") { + print "(ÉÊ»ì (·ÁÍÆ»ì ¼«Î©)) ((¸«½Ð¤·¸ì ($word 2000)) (ÆÉ¤ß $phonate))\n"; + } + elsif ($class eq "·ÁÍÆÆ°»ì") { + print "(ÉÊ»ì (̾»ì ·ÁÍÆÆ°»ì¸ì´´)) ((¸«½Ð¤·¸ì ($word 2000)) (ÆÉ¤ß $phonate))\n"; + } + elsif ($class eq "ÀÜƬ¸ì") { # !!! + print "(ÉÊ»ì (ÀÜƬ»ì ̾»ìÀܳ)) ((¸«½Ð¤·¸ì ($word 2000)) (ÆÉ¤ß $phonate))\n"; + } + elsif ($class eq "¿ô»úÎóÀÜƬ¸ì") { + print "(ÉÊ»ì (ÀÜƬ»ì ¿ôÀܳ)) ((¸«½Ð¤·¸ì ($word 2000)) (ÆÉ¤ß $phonate))\n"; + } + elsif ($class eq "ÀÜÈø¸ì") { + print "(ÉÊ»ì (̾»ì ÀÜÈø °ìÈÌ)) ((¸«½Ð¤·¸ì ($word 2000)) (ÆÉ¤ß $phonate))\n"; + } + elsif ($class eq "¿Í̾ÀÜÈø¸ì") { + print "(ÉÊ»ì (̾»ì ÀÜÈø ¿Í̾)) ((¸«½Ð¤·¸ì ($word 2000)) (ÆÉ¤ß $phonate))\n"; + } + elsif ($class eq "ÃÏ̾ÀÜÈø¸ì") { + print "(ÉÊ»ì (̾»ì ÀÜÈø ÃÏ°è)) ((¸«½Ð¤·¸ì ($word 2000)) (ÆÉ¤ß $phonate))\n"; + } + elsif ($class eq "ÁÈ¿¥Ì¾ÀÜÈø¸ì") { + print "(ÉÊ»ì (̾»ì ÀÜÈø °ìÈÌ)) ((¸«½Ð¤·¸ì ($word 2000)) (ÆÉ¤ß $phonate))\n"; + } + elsif ($class eq "¿ô»úÎóÀÜÈø¸ì") { + print "(ÉÊ»ì (̾»ì ÀÜÈø ½õ¿ô»ì)) ((¸«½Ð¤·¸ì ($word 2000)) (ÆÉ¤ß $phonate))\n"; + } + elsif ($class eq "À®¶ç") { + print "(ÉÊ»ì (̾»ì °úÍÑʸ»úÎó)) ((¸«½Ð¤·¸ì ($word 2000)) (ÆÉ¤ß $phonate))\n"; + } + elsif ($class eq "̵ÉÊ»ì") { + print "(ÉÊ»ì (̾»ì °úÍÑʸ»úÎó)) ((¸«½Ð¤·¸ì ($word 2000)) (ÆÉ¤ß $phonate))\n"; + } + else { + print STDERR "Error: $.: unknown class `$class': $phonate\t$word\n"; + print "$phonate,$word,$class\n"; + } +} + + +sub header { +# my($year, $mon, $mday); +# my($hour, $min, $sec); + +# ($sec, $min, $hour, $mday, $mon, $year) = localtime(time()); +# $year += 1900; +# $mon++; +# print ",,\"Ŭ¹çµ¬³Ê=JIS X 4062:1998\"\n"; +# print ",,\"ɽÂê=²­Æì¼­½ñ\"\n"; +# print ",,\"ʬÌî=ΰµå¡¢²­Æì\"\n"; +# print ",,\"ÈÇ=Âè$year/$mon/$mdayÈÇ\"\n"; +# print ",,\"ÊÔ¼Ô=²­Æì¼­½ñ¥×¥í¥¸¥§¥¯¥È\"\n"; +# print ",,\"ºîÀ®Æü=$year-$mon-$mday\"\n"; +} + + +sub version { + my $sec; + my $min; + my $hour; + my $mday; + my $mon; + my $year; + + ($sec, $min, $hour, $mday, $mon, $year) = localtime(time()); + $year += 1900; + $mon++; + print "(ÉÊ»ì (̾»ì °úÍÑʸ»úÎó)) ((¸«½Ð¤·¸ì ($year/$mon/$mday²­Æì¼­½ñ¤ÎÆüÉÕ¤± 2000)) (ÆÉ¤ß ¤ª¤­¤Ê¤ï¤¸¤·¤ç¤Î¤Ò¤Å¤±))\n"; +} -- 2.11.0