# -*- mode: perl; coding: utf-8 -*- # keitairc/lib/plugins/33distinct # based on 'Add link to google maps japan on address-like texts' # by twk, from http://nonn-et-twk.net/twk/node/51 # # $Id: 33distinct,v 1.1 2008-08-03 02:36:48 morimoto Exp $ # $Source: /home/ishikawa/work/keitairc/tmp/keitairc/lib/plugins/33distinct,v $ # The line number (1 incremented) and filename below must be # actual. see perlsyn. # line 12 "keitairc/lib/plugins/32address" use Encode; use utf8; my $tokyo_special_wards = '(?:千代田区|中央区|港区|新宿区|文京区|台東区|墨田区|江東区|品川区|目黒区|大田区|世田谷区|渋谷区|中野区|杉並区|豊島区|北区|荒川区|板橋区|練馬区|足立区|葛飾区|江戸川区)'; my $tokyo_cities = '(?:昭島市|あきる野市|稲城市|青梅市|清瀬市|国立市|小金井市|国分寺市|小平市|狛江市|立川市|多摩市|調布市|西東京市|八王子市|羽村市|東久留米市|東村山市|東大和市|日野市|府中市|福生市|町田市|三鷹市|武蔵野市|武蔵村山市)'; my $NO_WHITESPACE_ex = q/[^\s ::()\[\][]「」()、。]/; my $SHICHOSON_ex = '(?:' . $tokyo_special_wards . '|' . $tokyo_cities . ')'; my $AZA_ex = $NO_WHITESPACE_ex . '{1,20}'; my $BANCHI_ex = q/[\d0-9]/ . '{1,8}'; # 一二三四五六七八九十東西南北無ABC my $BUILDING1_re = q/^[^\d0-9]+[\d0-9]+(?:(?:[-―-ー]|丁目|番地?|号)[\d0-9]+)+/; my $BUILDING2_re = q/^(.*(?:丁目|番地?|号))[^地\d0-9]+(?:ビル|$)/; my $re = { general => '(?:' . $SHICHOSON_ex . ')' . '[ \t ]*' . '(?:' . $AZA_ex . '[ \t ]*' . '(?:' . $BANCHI_ex . '(?:丁目|番地?|号)?[-―-ー]?)' . '{1,4}' . ')', building1 => $BUILDING1_re, building2 => $BUILDING2_re, }; $plugin = { name => 'distinct', message_replace_regexp => '(' . $re->{general} . ')', message_replace_imprementation => sub { my ($session_id, $src) = @_; my $s = $src; $s =~ s/.*($re->{general}).*/$1/; # remove building names since google maps rarely recognize them $s =~ s/($re->{building1}).*/$1/; $s =~ s/$re->{building2}.*/$1/; return sprintf('%s', $::cf->web_root(), $session_id, URI::Escape::uri_escape_utf8($s), $src); }, }; 1;