mave_base.rb

   1 require 'kconv'
   2
   3 #===============================================================================
   4 #
   5 #       Integer
   6 #
   7 class Integer
   8
   9         #-----------------------------------------------------------
  10         #
  11         #       数値を人間に読みやすい表現形式で返す
  12         #
  13         def to_h(dot = false)
  14                 unless((full = self.to_s).length > 3)
  15                         full
  16                 else
  17                         case full.length % 3
  18                                 when 1 then top = full[0, 1] + (dot ? '.' + full[1, 1] : '')
  19                                 when 2 then top = full[0, 2]
  20                                 when 0 then top = full[0, 3]
  21                         end
  22                         top + '....KKKMMMGGGTTTPPP'[full.length].chr
  23                 end
  24         end
  25 end
  26
  27 #===============================================================================
  28 #
  29 #       String
  30 #
  31 class String
  32
  33         #-----------------------------------------------------------
  34         #
  35         #       キャラクタセットの定義
  36         #
  37         #               http://www.iana.org/assignments/character-sets
  38         #
  39         @@charsets = {
  40                 'ISO-2022-JP'   => Kconv::JIS,
  41                 'SHIFT_JIS'             => Kconv::SJIS,
  42                 'EUC-JP'                => Kconv::EUC,
  43                 'UTF-8'                 => Kconv::UTF8,
  44         }
  45
  46         def self.charset(charset)
  47                 @@charsets[charset]
  48         end
  49
  50         #-----------------------------------------------------------
  51         #
  52         #       メッセージヘッダのデコーダ
  53         #
  54         #               http://tools.ietf.org/html/rfc2047
  55         #
  56         @@decoders = {}
  57         @@current_decode_charset = 'UTF-8'
  58
  59         def self.bind_decoder(code)
  60                 @@decoders[code.upcase] = Proc.new
  61         end
  62
  63         def self.set_decode_charset(charset)
  64                 @@current_decode_charset = charset
  65         end
  66
  67         def decode_mh                                                                                           # decode message header
  68                 gsub(/=\?([^?]+)\?(B|Q)\?([^?]+)\?=/i) {
  69                         $3.decode_ec($2).decode_cs(@@current_decode_charset, $1)
  70                 }.gsub(/[\x00-\x1F]/, '^x')
  71         end
  72
  73         def decode_ec(code)                                                                                     # decode encodings
  74                 (it = @@decoders[code.upcase]) ? it.call(self) : self
  75         end
  76
  77         def decode_cs(out_code, in_code)                                                        # decode character sets
  78                 (it = @@decoders[in_code.upcase]) ? it.call(self, out_code) : self
  79         end
  80
  81         #-----------------------------------------------------------
  82         #
  83         #       メッセージヘッダのエンコーダ
  84         #
  85         #               http://tools.ietf.org/html/rfc2047
  86         #
  87         @@encoders = {}
  88         @@current_encode_charset = 'ISO-2022-JP'
  89 #       @@current_encode_charset = 'UTF-8'
  90         @@current_encode_encoding = 'B'
  91 #       @@current_encode_encoding = 'Q'
  92
  93         def self.bind_encoder(code)
  94                 @@encoders[code.upcase] = Proc.new
  95         end
  96
  97         def self.set_encode_charset(charset)
  98                 @@current_encode_charset = charset
  99         end
 100         def encode_mh_multi(field_name)                                                         # encode message header multi line
 101                 begin
 102                         (it = @@encoders[('<%s><%s><%s>' % ['MULTI', @@current_encode_charset, @@current_encode_encoding]).upcase]) ? it.call(self, field_name, Proc.new) : raise
 103                 rescue
 104                         field_name + ': ' + self.encode_mh
 105                 end
 106         end
 107         def encode_mh                                                                                           # encode message header
 108                 "=?%s?%s?%s?=" % [@@current_encode_charset, @@current_encode_encoding,
 109                         self.encode_cs(@@current_encode_charset, 'UTF-8').encode_ec(@@current_encode_encoding).chomp]
 110         end
 111
 112         def encode_ec(code)                                                                                     # encode encodings
 113                 (it = @@encoders[code.upcase]) ? it.call(self) : self
 114         end
 115
 116         def encode_cs(out_code, in_code)                                                        # encode character sets
 117                 decode_cs(out_code, in_code)
 118         end
 119
 120         def encode_body(code)                                                                           # encode message body
 121                 encode_cs(code, @@current_decode_charset)
 122         end
 123
 124         #-----------------------------------------------------------
 125         #
 126         #       RFC 2231 拡張表現のエンコーダ
 127         #
 128         #               http://tools.ietf.org/html/rfc2231
 129         #
 130         @@rfc2231_encoders = {}
 131 #       @@current_rfc2231_encode_charset = 'UTF-8'
 132         @@current_rfc2231_encode_charset = '<LEGACY><ISO-2022-JP><B>'
 133
 134         def self.bind_rfc2231_encoder(code)
 135                 @@rfc2231_encoders[code.upcase] = Proc.new
 136         end
 137
 138         def self.set_rfc2231_encode_charset(charset)
 139                 @@current_rfc2231_encode_charset = charset
 140         end
 141
 142         def rfc2231_encode(attr, n = 78)
 143                 @@rfc2231_encoders[@@current_rfc2231_encode_charset].call(self, attr, n, Proc.new)
 144         end
 145
 146         #-----------------------------------------------------------
 147         #
 148         #       value のエンコーダ
 149         #
 150         def value_encode
 151                 self =~ /[^-.0-9A-Z_]/i ? "\"#{self.gsub(/"/, '\"')}\"" : self
 152         end
 153
 154         #-----------------------------------------------------------
 155         #
 156         #       ext-octet のデコーダ、エンコーダ
 157         #
 158         def ext_decode
 159                 self.gsub(/%([0-9A-F]{2})/i) { $1.to_i(16).chr }
 160         end
 161         def ext_encode
 162                 self.gsub(/[^-.0-9A-Z_]/i) {|c| '%%%02X' % c[0] }
 163         end
 164
 165         #-----------------------------------------------------------
 166         #
 167         #       文字列を指定の長さに切り詰める
 168         #
 169         @@wsizer = {}
 170         @@centerer = {}
 171         @@snippers = {}
 172         @@each_snippers = {}
 173         @@current_snip_charset = 'UTF-8'
 174 #       @@current_snip_charset = 'EUC-JP'
 175 #       @@current_snip_charset = 'SHIFT_JIS'
 176
 177         def self.bind_wsizer(charset)
 178                 @@wsizer[charset] = Proc.new
 179         end
 180
 181         def self.bind_centerer(charset)
 182                 @@centerer[charset] = Proc.new
 183         end
 184
 185         def self.bind_snipper(charset)
 186                 @@snippers[charset] = Proc.new
 187         end
 188
 189         def self.bind_each_snipper(charset)
 190                 @@each_snippers[charset] = Proc.new
 191         end
 192
 193         def self.set_snip_charset(charset)
 194                 @@current_snip_charset = charset
 195         end
 196
 197         def wsize
 198                 @@wsizer[@@current_snip_charset].call(self)
 199         end
 200
 201         def center(n, padding = ' ')
 202                 @@centerer[@@current_snip_charset].call(self, n, padding)
 203         end
 204
 205         def snip(n, charset = @@current_snip_charset)
 206                 @@snippers[charset].call(self, n)
 207         end
 208
 209         def each_snip(n, max = 9999)
 210                 @@each_snippers[@@current_snip_charset].call(self, n, max, Proc.new)
 211         end
 212
 213         #-----------------------------------------------------------
 214         #
 215         #       端末の UTF-8 対応の不備(記号の幅)を補う
 216         #
 217         #               http://ja.wikipedia.org/wiki/UTF-8
 218         #
 219         def enspc
 220 #               return(self)                                                                                    # UTF-8 以外ならコメントを生かす
 221                 self.gsub(/[\xC0-\xE2][\x80-\xBF]+/) {|c|                               #### for UTF-8 いーかげん
 222                         c + ' '
 223                 }
 224         end
 225
 226         #-----------------------------------------------------------
 227         #
 228         #       Re: をまとめる
 229         #
 230         def group_re(level = 0, re = 'Re')
 231                 base = self.dup
 232                 while(base =~ /^\s*#{re}\^?\d*:/i)
 233                         base.sub!(/^\s*#{re}\^?(\d*):\s*/i) {
 234                                 level += ($1.to_i > 1 ? $1.to_i : 1)
 235                                 ''
 236                         }
 237                 end
 238                 (level < 1 ? '' : "#{re}: ") + base
 239 #               (level < 1 ? '' : "#{re}#{level < 2 ? '' : "^#{level}"}: ") + base  # Re^3 表記
 240         end
 241
 242         #-----------------------------------------------------------
 243         #
 244         #       Fw: をまとめる
 245         #
 246         def group_fw(level = 0)
 247                 group_re(level, 'Fw')
 248         end
 249 end
 250
 251 #===============================================================================
 252 #
 253 #       多言語対応クラス
 254 #
 255 #               http://www.gnu.org/software/gettext/gettext.html
 256 #
 257 class Intl
 258
 259         @@domains = {}
 260         @@domains[@@current_domain = 'default'] = {}
 261
 262         def self.bind_text_domain(domain, dirname = '')
 263                 load "#{dirname}#{domain}.pmo"
 264                 @@domains[domain] = @@catalog
 265         end
 266
 267         def self.set_text_domain(domain)
 268                 @@current_domain = domain
 269         end
 270
 271         def self.get_text(msgid)
 272                 @@domains[@@current_domain][msgid] || msgid
 273         end
 274 end
 275
 276 #===============================================================================
 277 #
 278 #       多言語対応
 279 #
 280 def _(msgid)
 281         Intl.get_text(msgid)
 282 end
 283
 284 #===============================================================================
 285 #
 286 #       その他
 287 #
 288 def yap(arg = 'done.')
 289         @yap = 0 unless(@yap)
 290         print "#{@yap += 1}: #{arg.inspect}\n"
 291 end
 292
 293 def debug(log = 'log.', obj = self)
 294         @debug = File.new('debug.log', 'a') and @debug.write('-' * 76 + "\n") unless(@debug)
 295         @debug.write(obj.to_s + ': ' + log.to_s + "\n")
 296 end
 297
 298 #===============================================================================
 299 #
 300 #       各種デコーダ/エンコーダを登録
 301 #
 302 String.bind_decoder('7BIT') {|str|                                                              # 7bit decoder
 303         str
 304 }
 305 String.bind_decoder('8BIT') {|str|                                                              # 8bit decoder
 306         str
 307 }
 308 String.bind_decoder('BINARY') {|str, out_code|                                  # binary decoder?
 309 #       '-- binary --'
 310         str.inspect
 311 }
 312 String.bind_decoder('BASE64') {|str|                                                    # Base64 decoder
 313         str.unpack('m')[0]
 314 }
 315 String.bind_decoder('QUOTED-PRINTABLE') {|str|                                  # Quoted Printable decoder
 316         str.unpack('M')[0]
 317 }
 318 String.bind_decoder('B') {|str|                                                                 # Base64 decoder
 319         str.unpack('m')[0]
 320 }
 321 String.bind_encoder('B') {|str|                                                                 # Base64 encoder
 322         [str].pack('m999')
 323 }
 324 String.bind_decoder('Q') {|str|                                                                 # Quoted Printable decoder
 325         str.unpack('M')[0]
 326 }
 327 String.bind_encoder('Q') {|str|                                                                 # Quoted Printable encoder
 328         [str].pack('M999')
 329 }
 330 String.bind_encoder('<MULTI><ISO-2022-JP><B>') {|str, field_name, proc| # encode message header multi line
 331         src = str + ' '; single_max = (max_length = 76) - (line = field_name + ': ').length
 332         while(src.length > 1)
 333                 multi_max = ((single_max - 18) / 4 * 3 - 6) / 2
 334                 if(single_max > 0 and src =~ /^([\x20-\x7E]{1,#{single_max}})[\x20\xC0-\xFD]/)
 335                         line += src.slice!(0, $1.length)
 336                         single_max -= $1.length
 337                 elsif(multi_max > 0 and src =~ /^(([\xC0-\xFD][\x80-\xBF]+){1,#{multi_max}})/)
 338                         line += (line0 = src.slice!(0, $1.length).encode_mh)
 339                         single_max -= line0.length
 340                 else
 341                         line == '' and raise
 342                         proc.call(line.gsub(/^\t\s/, "\s")); line = "\t"; single_max = max_length
 343                 end
 344         end
 345         proc.call(line.gsub(/^\t\s/, "\s"))
 346 }
 347 String.bind_encoder('<MULTI><UTF-8><B>') {|str, field_name, proc|       # encode message header multi line
 348         src = str + ' '; single_max = (max_length = 76) - (line = field_name + ': ').length
 349         while(src.length > 1)
 350                 multi_max = (single_max - 12) / 4 * 3
 351                 if(single_max > 0 and src =~ /^([\x20-\x7E]{1,#{single_max}})[\x20\xC0-\xFD]/)
 352                         line += src.slice!(0, $1.length)
 353                         single_max -= $1.length
 354                 elsif(multi_max > 0 and src =~ /^([\x80-\xFD]{1,#{multi_max}})[\x20-\x7E\xC0-\xFD]/)
 355                         line += (line0 = src.slice!(0, $1.length).encode_mh)
 356                         single_max -= line0.length
 357                 else
 358                         line == '' and raise
 359                         proc.call(line.gsub(/^\t\s/, "\s")); line = "\t"; single_max = max_length
 360                 end
 361         end
 362         proc.call(line.gsub(/^\t\s/, "\s"))
 363 }
 364 String.bind_decoder('US-ASCII') {|str, out_code|                                # us-ascii decoder
 365         str
 366 }
 367 String.bind_decoder('ISO-2022-JP') {|str, out_code|                             # iso-2022-jp decoder
 368         str.kconv(String.charset(out_code), Kconv::JIS)
 369 }
 370 String.bind_decoder('ISO-2022-JP-1') {|str, out_code|                   # iso-2022-jp-1 decoder
 371         str.kconv(String.charset(out_code), Kconv::JIS)
 372 }
 373 String.bind_decoder('ISO-2022-JP-2') {|str, out_code|                   # iso-2022-jp-2 decoder
 374         str.kconv(String.charset(out_code), Kconv::JIS)
 375 }
 376 String.bind_decoder('SHIFT_JIS') {|str, out_code|                               # shift_jis decoder
 377         str.kconv(String.charset(out_code), Kconv::SJIS)
 378 }
 379 String.bind_decoder('EUC-JP') {|str, out_code|                                  # euc-jp decoder
 380         str.kconv(String.charset(out_code), Kconv::EUC)
 381 }
 382 String.bind_decoder('UTF-8') {|str, out_code|                                   # utf-8 decoder
 383         str.kconv(String.charset(out_code), Kconv::UTF8)
 384 }
 385 #       http://tools.ietf.org/html/rfc2152
 386 String.bind_decoder('UTF-7') {|str, out_code|                                   # utf-7 decoder
 387         str.gsub(%r|\+([A-Za-z0-9+/]+)-?|) {|p|
 388                 ($1 + '==').unpack('m')[0].kconv(Kconv::UTF8, Kconv::UTF16)
 389         }.gsub(/\+-/, '+').kconv(String.charset(out_code), Kconv::UTF8)
 390 }
 391
 392 # 11bit: 0xC0-0xDF 0x80-0xBF
 393 # 16bit: 0xE0-0xEF 0x80-0xBF 0x80-0xBF
 394 # 21bit: 0xF0-0xF7 0x80-0xBF 0x80-0xBF 0x80-0xBF
 395 # 26bit: 0xF8-0xFB 0x80-0xBF 0x80-0xBF 0x80-0xBF 0x80-0xBF
 396 # 31bit: 0xFC-0xFD 0x80-0xBF 0x80-0xBF 0x80-0xBF 0x80-0xBF 0x80-0xBF
 397
 398 String.bind_wsizer('UTF-8') {|str|                                                              # 表示幅を得る
 399         str.gsub(/[\xC0-\xFD][\x80-\xBF]+/, "\xFF\xFF").length
 400 }
 401
 402 String.bind_centerer('UTF-8') {|str, n, padding|                                # センタリングする
 403         w = n - str.wsize
 404         w = 0 if(w < 0)
 405         (padding * (w >> 1) + str + padding * w).snip(n)
 406 }
 407
 408 String.bind_snipper('UTF-8') {|str, n|                                                  # 指定の長さに切り詰める
 409         ws = str[0, n * 2].gsub(/[\xC0-\xFD][\x80-\xBF]+/, "\xFF\xFF")[0, n]
 410         wc = ws.count("\xFF")
 411         str.slice(0, n + wc / 2 - wc % 2) + ' ' * (n - ws.length + wc % 2)
 412 }
 413
 414 String.bind_each_snipper('UTF-8') {|str, n, max, proc|                  # 指定の長さに切り詰め、順に行を渡す
 415         p = 0; while(p <= str.length)                                                           # '<': 改行文字のみの行は省略
 416                 break if((max -= 1) < 0)
 417                 ws = str[p, n * 2].gsub(/[\xC0-\xFD][\x80-\xBF]+/, "\xFF\xFF")[0, n]
 418                 wc = ws.count("\xFF")
 419                 proc.call(str.slice(p, nn = n + wc / 2 - wc % 2) + ' ' * (n - ws.length + wc % 2))
 420                 p += nn
 421         end
 422 }
 423
 424 String.bind_rfc2231_encoder('UTF-8') {|str, attr, n, proc|              # RFC 2231 拡張表現にエンコードして返す(添付ファイル名指定用)
 425         multi = false; head = ''; str =~ /[\xC0-\xFD]/ and multi = true and head = "utf8''"
 426         w = n - attr.size - "\t*n*=;".size - head.size
 427         nth = -1; nl = ''; el = ''; str.gsub(/./u) {|nc|
 428                 multi = true if(!multi and nc =~ /[\xC0-\xFD]/)
 429                 if(lnl = nl and lel = el and nl += nc and (el += (ec = nc.ext_encode)).size > w)
 430                         proc.call("\t%s*%d%s=%s%s;" % [attr, nth += 1, multi ? '*' : '', head, multi ? lel : lnl.value_encode])
 431                         multi = false; head = ''
 432                         w = n - attr.size - "\t*n*=;".size - head.size
 433                         nl = nc; el = ec
 434                 end
 435         }
 436         proc.call("\t%s%s%s=%s%s" % [attr, nth == -1 ? '' : "*#{(nth += 1).to_s}", multi ? '*': '', head, multi ? el : nl.value_encode])
 437 }
 438
 439 String.bind_rfc2231_encoder('<LEGACY><ISO-2022-JP><B>') {|str, attr, n, proc|   # RFC 違反だが B encoding でエンコードして返す(添付ファイル名指定用)
 440         proc.call("\t%s=\"=?%s?%s?%s?=\"" % [attr, 'ISO-2022-JP', 'B', str.encode_cs('ISO-2022-JP', 'UTF-8').encode_ec('B').chomp])
 441 }
 442
 443 #     漢字: 0xA1-0xFE 0xA1-0xFE
 444 # 半角カナ: 0x8E 0xA1-0xDF
 445 # 補助漢字: 0x8F 0xA1-0xFE 0xA1-0xFE
 446
 447 String.bind_wsizer('EUC-JP') {|str|                                                             # 表示幅を得る
 448         str.gsub(/[\xA1-\xFE][\xA1-\xFE]/, "\xFF\xFF").length
 449 }
 450
 451 String.bind_centerer('EUC-JP') {|str, n, padding|                               # センタリングする
 452         w = n - str.wsize
 453         w = 0 if(w < 0)
 454         (padding * (w >> 1) + str + padding * w).snip(n)
 455 }
 456
 457 String.bind_snipper('EUC-JP') {|str, n|                                                 #### 指定の長さに切り詰める
 458         ws = str[0, n * 2].gsub(/[\xA1-\xFE][\xA1-\xFE]/, "\xFF\xFF")[0, n]
 459         wc = ws.count("\xFF")
 460         str.slice(0, n - wc % 2) + ' ' * (n - ws.length + wc % 2)
 461 }
 462
 463 String.bind_each_snipper('EUC-JP') {|str, n, max, proc|                 #### 指定の長さに切り詰め、順に行を渡す
 464         p = 0; while(p <= str.length)                                                           # '<': 改行文字のみの行は省略
 465                 break if((max -= 1) < 0)
 466                 ws = str[p, n * 2].gsub(/[\xA1-\xFE][\xA1-\xFE]/, "\xFF\xFF")[0, n]
 467                 wc = ws.count("\xFF")
 468                 proc.call(str.slice(p, nn = n - wc % 2) + ' ' * (n - ws.length + wc % 2))
 469                 p += nn
 470         end
 471 }
 472
 473 #### TAB 対応
 474
 475 __END__
 476