mave_base.rb

   1 require 'kconv'
   2
   3 #===============================================================================
   4 #
   5 #       Integer
   6 #
   7 class Integer
   8
   9         #-----------------------------------------------------------
  10         #
  11         #       数値を人間に読みやすい表現形式で返す
  12         #
  13         def to_h(dot = false)
  14                 unless((full = self.to_s).length > 3)
  15                         full
  16                 else
  17                         case full.length % 3
  18                                 when 1 then top = full[0, 1] + (dot ? '.' + full[1, 1] : '')
  19                                 when 2 then top = full[0, 2]
  20                                 when 0 then top = full[0, 3]
  21                         end
  22                         top + '....KKKMMMGGGTTTPPP'[full.length].chr
  23                 end
  24         end
  25 end
  26
  27 #===============================================================================
  28 #
  29 #       String
  30 #
  31 class String
  32
  33         #-----------------------------------------------------------
  34         #
  35         #       キャラクタセットの定義
  36         #
  37         #               http://www.iana.org/assignments/character-sets
  38         #
  39         @@charsets = {
  40                 'ISO-2022-JP'   => Kconv::JIS,
  41                 'SHIFT_JIS'             => Kconv::SJIS,
  42                 'EUC-JP'                => Kconv::EUC,
  43                 'UTF-8'                 => Kconv::UTF8,
  44         }
  45
  46         def self.charset(charset)
  47                 @@charsets[charset]
  48         end
  49
  50         #-----------------------------------------------------------
  51         #
  52         #       メッセージヘッダのデコーダ
  53         #
  54         #               http://tools.ietf.org/html/rfc2047
  55         #
  56         @@decoders = {}
  57         @@current_decode_charset = 'UTF-8'
  58
  59         def self.bind_decoder(code)
  60                 @@decoders[code.upcase] = Proc.new
  61         end
  62
  63         def self.set_decode_charset(charset)
  64                 @@current_decode_charset = charset
  65         end
  66
  67         def decode_mh                                                                                           # decode message header
  68                 gsub(/=\?([^?]+)\?(B|Q)\?([^?]+)\?=/i) {
  69                         $3.decode_ec($2).decode_cs(@@current_decode_charset, $1)
  70                 }.gsub(/[\x00-\x1F]/, '^x')
  71         end
  72
  73         def decode_ec(code)                                                                                     # decode encodings
  74                 (it = @@decoders[code.upcase]) ? it.call(self) : self
  75         end
  76
  77         def decode_cs(out_code, in_code)                                                        # decode character sets
  78                 (it = @@decoders[in_code.upcase]) ? it.call(self, out_code) : self
  79         end
  80
  81         #-----------------------------------------------------------
  82         #
  83         #       文字列を指定の長さに切り詰める
  84         #
  85         @@snippers = {}
  86         @@each_snippers = {}
  87         @@current_snip_charset = 'UTF-8'
  88 #       @@current_snip_charset = 'EUC-JP'
  89 #       @@current_snip_charset = 'SHIFT_JIS'
  90
  91         def self.bind_snipper(charset)
  92                 @@snippers[charset] = Proc.new
  93         end
  94
  95         def self.bind_each_snipper(charset)
  96                 @@each_snippers[charset] = Proc.new
  97         end
  98
  99         def self.set_snip_charset(charset)
 100                 @@current_snip_charset = charset
 101         end
 102
 103         def snip(n)
 104                 @@snippers[@@current_snip_charset].call(self, n)
 105         end
 106
 107         def each_snip(n, max = 9999)
 108                 @@each_snippers[@@current_snip_charset].call(self, n, max, Proc.new)
 109         end
 110
 111         #-----------------------------------------------------------
 112         #
 113         #       端末の UTF-8 対応の不備(記号の幅)を補う
 114         #
 115         #               http://ja.wikipedia.org/wiki/UTF-8
 116         #
 117         def enspc
 118 #               return(self)                                                                                    # UTF-8 以外ならコメントを生かす
 119                 self.gsub(/[\xC0-\xE2][\x80-\xBF]+/) {|c|                               #### for UTF-8 いーかげん
 120                         c + '_'
 121                 }
 122         end
 123
 124         #-----------------------------------------------------------
 125         #
 126         #       Re: をまとめる
 127         #
 128         def group_re(level = 0, re = 'Re')
 129                 base = self.dup
 130                 while(base =~ /^\s*#{re}\^?\d*:/i)
 131                         base.sub!(/^\s*#{re}\^?(\d*):\s*/i) {
 132                                 level += ($1.to_i > 1 ? $1.to_i : 1)
 133                                 ''
 134                         }
 135                 end
 136                 (level < 1 ? '' : "#{re}: ") + base
 137 #               (level < 1 ? '' : "#{re}#{level < 2 ? '' : "^#{level}"}: ") + base  # Re^3 表記
 138         end
 139
 140         #-----------------------------------------------------------
 141         #
 142         #       Fw: をまとめる
 143         #
 144         def group_fw(level = 0)
 145                 group_re(level, 'Fw')
 146         end
 147 end
 148
 149 #===============================================================================
 150 #
 151 #       多言語対応クラス
 152 #
 153 #               http://www.gnu.org/software/gettext/gettext.html
 154 #
 155 class Intl
 156
 157         @@domains = {}
 158         @@domains[@@current_domain = 'default'] = {}
 159
 160         def self.bind_text_domain(domain, dirname = '')
 161                 load "#{dirname}#{domain}.pmo"
 162                 @@domains[domain] = @@catalog
 163         end
 164
 165         def self.set_text_domain(domain)
 166                 @@current_domain = domain
 167         end
 168
 169         def self.get_text(msgid)
 170                 @@domains[@@current_domain][msgid] || msgid
 171         end
 172 end
 173
 174 #===============================================================================
 175 #
 176 #       多言語対応
 177 #
 178 def _(msgid)
 179         Intl.get_text(msgid)
 180 end
 181
 182 #===============================================================================
 183 #
 184 #       その他
 185 #
 186 def yap(arg = 'done.')
 187         @yap = 0 unless(@yap)
 188         print "#{@yap += 1}: #{arg.inspect}\n"
 189 end
 190
 191 def debug(log = 'log.', obj = self)
 192         @debug = File.new('debug.log', 'a') and @debug.write('-' * 76 + "\n") unless(@debug)
 193         @debug.write(obj.to_s + ': ' + log.to_s + "\n")
 194 end
 195
 196 #===============================================================================
 197 #
 198 #       各種デコーダを登録
 199 #
 200 String.bind_decoder('7BIT') {|str|                                                              # 7bit decoder
 201         str
 202 }
 203 String.bind_decoder('8BIT') {|str|                                                              # 8bit decoder
 204         str
 205 }
 206 String.bind_decoder('BINARY') {|str|                                                    # binary decoder?
 207         '-- binary --'
 208 }
 209 String.bind_decoder('BASE64') {|str|                                                    # Base64 decoder
 210         str.unpack('m')[0]
 211 }
 212 String.bind_decoder('QUOTED-PRINTABLE') {|str|                                  # Quoted Printable decoder
 213         str.unpack('M')[0]
 214 }
 215 String.bind_decoder('B') {|str|                                                                 # Base64 decoder
 216         str.unpack('m')[0]
 217 }
 218 String.bind_decoder('Q') {|str|                                                                 # Quoted Printable decoder
 219         str.unpack('M')[0]
 220 }
 221 String.bind_decoder('US-ASCII') {|str, out_code|                                # us-ascii decoder
 222         str
 223 }
 224 String.bind_decoder('ISO-2022-JP') {|str, out_code|                             # iso-2022-jp decoder
 225         str.kconv(String.charset(out_code), Kconv::JIS)
 226 }
 227 String.bind_decoder('SHIFT_JIS') {|str, out_code|                               # shift_jis decoder
 228         str.kconv(String.charset(out_code), Kconv::SJIS)
 229 }
 230 String.bind_decoder('EUC-JP') {|str, out_code|                                  # euc-jp decoder
 231         str.kconv(String.charset(out_code), Kconv::EUC)
 232 }
 233 String.bind_decoder('UTF-8') {|str, out_code|                                   # utf-8 decoder
 234         str.kconv(String.charset(out_code), Kconv::UTF8)
 235 }
 236 #       http://tools.ietf.org/html/rfc2152
 237 String.bind_decoder('UTF-7') {|str, out_code|                                   # utf-7 decoder
 238         str.gsub(%r|\+([A-Za-z0-9+/]+)-?|) {|p|
 239                 ($1 + '==').unpack('m')[0].kconv(Kconv::UTF8, Kconv::UTF16)
 240         }.gsub(/\+-/, '+').kconv(String.charset(out_code), Kconv::UTF8)
 241 }
 242
 243 #### String.wsize                                                                                               # 表示幅を得る
 244
 245 #### String.center                                                                                              # センタリングする
 246
 247 # 11bit: 0xC0-0xDF 0x80-0xBF
 248 # 16bit: 0xE0-0xEF 0x80-0xBF 0x80-0xBF
 249 # 21bit: 0xF0-0xF7 0x80-0xBF 0x80-0xBF 0x80-0xBF
 250 # 26bit: 0xF8-0xFB 0x80-0xBF 0x80-0xBF 0x80-0xBF 0x80-0xBF
 251 # 31bit: 0xFC-0xFD 0x80-0xBF 0x80-0xBF 0x80-0xBF 0x80-0xBF 0x80-0xBF
 252
 253 String.bind_snipper('UTF-8') {|str, n|                                                  # 指定の長さに切り詰める
 254         ws = str[0, n * 2].gsub(/[\xC0-\xFD][\x80-\xBF]+/, "\xFF\xFF")[0, n]
 255         wc = ws.count("\xFF")
 256         str.slice(0, n + wc / 2 - wc % 2) + ' ' * (n - ws.length + wc % 2)
 257 }
 258
 259 String.bind_each_snipper('UTF-8') {|str, n, max, proc|                  # 指定の長さに切り詰め、順に行を渡す
 260         p = 0; while(p <= str.length)                                                           # '<': 改行文字のみの行は省略
 261                 break if((max -= 1) < 0)
 262                 ws = str[p, n * 2].gsub(/[\xC0-\xFD][\x80-\xBF]+/, "\xFF\xFF")[0, n]
 263                 wc = ws.count("\xFF")
 264                 proc.call(str.slice(p, nn = n + wc / 2 - wc % 2) + ' ' * (n - ws.length + wc % 2))
 265                 p += nn
 266         end
 267 }
 268
 269 #     漢字: 0xA1-0xFE 0xA1-0xFE
 270 # 半角カナ: 0x8E 0xA1-0xDF
 271 # 補助漢字: 0x8F 0xA1-0xFE 0xA1-0xFE
 272
 273 String.bind_snipper('EUC-JP') {|str, n|                                                 #### 指定の長さに切り詰める
 274         ws = str[0, n * 2].gsub(/[\xA1-\xFE][\xA1-\xFE]/, "\xFF\xFF")[0, n]
 275         wc = ws.count("\xFF")
 276         str.slice(0, n - wc % 2) + ' ' * (n - ws.length + wc % 2)
 277 }
 278
 279 String.bind_each_snipper('EUC-JP') {|str, n, max, proc|                 #### 指定の長さに切り詰め、順に行を渡す
 280         p = 0; while(p <= str.length)                                                           # '<': 改行文字のみの行は省略
 281                 break if((max -= 1) < 0)
 282                 ws = str[p, n * 2].gsub(/[\xA1-\xFE][\xA1-\xFE]/, "\xFF\xFF")[0, n]
 283                 wc = ws.count("\xFF")
 284                 proc.call(str.slice(p, nn = n - wc % 2) + ' ' * (n - ws.length + wc % 2))
 285                 p += nn
 286         end
 287 }
 288
 289 #### TAB 対応
 290
 291 __END__
 292