3 #===============================================================================
9 #-----------------------------------------------------------
14 unless((full = self.to_s).length > 3)
18 when 1 then top = full[0, 1] + (dot ? '.' + full[1, 1] : '')
19 when 2 then top = full[0, 2]
20 when 0 then top = full[0, 3]
22 top + '....KKKMMMGGGTTTPPP'[full.length].chr
27 #===============================================================================
33 #-----------------------------------------------------------
37 # http://www.iana.org/assignments/character-sets
40 'ISO-2022-JP' => Kconv::JIS,
41 'SHIFT_JIS' => Kconv::SJIS,
42 'EUC-JP' => Kconv::EUC,
43 'UTF-8' => Kconv::UTF8,
46 def self.charset(charset)
50 #-----------------------------------------------------------
54 # http://tools.ietf.org/html/rfc2047
57 @@current_decode_charset = 'UTF-8'
59 def self.bind_decoder(code)
60 @@decoders[code.upcase] = Proc.new
63 def self.set_decode_charset(charset)
64 @@current_decode_charset = charset
67 def decode_mh # decode message header
68 gsub(/=\?([^?]+)\?(B|Q)\?([^?]+)\?=/i) {
69 $3.decode_ec($2).decode_cs(@@current_decode_charset, $1)
70 }.gsub(/[\x00-\x1F]/, '^x')
73 def decode_ec(code) # decode encodings
74 (it = @@decoders[code.upcase]) ? it.call(self) : self
77 def decode_cs(out_code, in_code) # decode character sets
78 (it = @@decoders[in_code.upcase]) ? it.call(self, out_code) : self
81 #-----------------------------------------------------------
85 # http://tools.ietf.org/html/rfc2047
88 @@current_encode_charset = 'ISO-2022-JP'
89 # @@current_encode_charset = 'UTF-8'
90 @@current_encode_encoding = 'B'
91 # @@current_encode_encoding = 'Q'
93 def self.bind_encoder(code)
94 @@encoders[code.upcase] = Proc.new
97 def self.set_encode_charset(charset)
98 @@current_encode_charset = charset
100 def encode_mh_multi(field_name) # encode message header multi line
102 (it = @@encoders[('<%s><%s><%s>' % ['MULTI', @@current_encode_charset, @@current_encode_encoding]).upcase]) ? it.call(self, field_name, Proc.new) : raise
104 field_name + ': ' + self.encode_mh
107 def encode_mh # encode message header
108 "=?%s?%s?%s?=" % [@@current_encode_charset, @@current_encode_encoding,
109 self.encode_cs(@@current_encode_charset, 'UTF-8').encode_ec(@@current_encode_encoding).chomp]
112 def encode_ec(code) # encode encodings
113 (it = @@encoders[code.upcase]) ? it.call(self) : self
116 def encode_cs(out_code, in_code) # encode character sets
117 decode_cs(out_code, in_code)
120 def encode_body(code) # encode message body
121 encode_cs(code, @@current_decode_charset)
124 #-----------------------------------------------------------
126 # RFC 2231 拡張表現のエンコーダ
128 # http://tools.ietf.org/html/rfc2231
130 @@rfc2231_encoders = {}
131 # @@current_rfc2231_encode_charset = 'UTF-8'
132 @@current_rfc2231_encode_charset = '<LEGACY><ISO-2022-JP><B>'
134 def self.bind_rfc2231_encoder(code)
135 @@rfc2231_encoders[code.upcase] = Proc.new
138 def self.set_rfc2231_encode_charset(charset)
139 @@current_rfc2231_encode_charset = charset
142 def rfc2231_encode(attr, n = 78)
143 @@rfc2231_encoders[@@current_rfc2231_encode_charset].call(self, attr, n, Proc.new)
146 #-----------------------------------------------------------
151 self =~ /[^-.0-9A-Z_]/i ? "\"#{self.gsub(/"/, '\"')}\"" : self
154 #-----------------------------------------------------------
156 # ext-octet のデコーダ、エンコーダ
159 self.gsub(/%([0-9A-F]{2})/i) { $1.to_i(16).chr }
162 self.gsub(/[^-.0-9A-Z_]/i) {|c| '%%%02X' % c[0] }
165 #-----------------------------------------------------------
173 @@current_snip_charset = 'UTF-8'
174 # @@current_snip_charset = 'EUC-JP'
175 # @@current_snip_charset = 'SHIFT_JIS'
177 def self.bind_wsizer(charset)
178 @@wsizer[charset] = Proc.new
181 def self.bind_centerer(charset)
182 @@centerer[charset] = Proc.new
185 def self.bind_snipper(charset)
186 @@snippers[charset] = Proc.new
189 def self.bind_each_snipper(charset)
190 @@each_snippers[charset] = Proc.new
193 def self.set_snip_charset(charset)
194 @@current_snip_charset = charset
198 @@wsizer[@@current_snip_charset].call(self)
201 def center(n, padding = ' ')
202 @@centerer[@@current_snip_charset].call(self, n, padding)
205 def snip(n, charset = @@current_snip_charset)
206 @@snippers[charset].call(self, n)
209 def each_snip(n, max = 9999)
210 @@each_snippers[@@current_snip_charset].call(self, n, max, Proc.new)
213 #-----------------------------------------------------------
215 # 端末の UTF-8 対応の不備(記号の幅)を補う
217 # http://ja.wikipedia.org/wiki/UTF-8
220 # return(self) # UTF-8 以外ならコメントを生かす
221 self.gsub(/[\xC0-\xE2][\x80-\xBF]+/) {|c| #### for UTF-8 いーかげん
226 #-----------------------------------------------------------
230 def group_re(level = 0, re = 'Re')
232 while(base =~ /^\s*#{re}\^?\d*:/i)
233 base.sub!(/^\s*#{re}\^?(\d*):\s*/i) {
234 level += ($1.to_i > 1 ? $1.to_i : 1)
238 (level < 1 ? '' : "#{re}: ") + base
239 # (level < 1 ? '' : "#{re}#{level < 2 ? '' : "^#{level}"}: ") + base # Re^3 表記
242 #-----------------------------------------------------------
246 def group_fw(level = 0)
247 group_re(level, 'Fw')
251 #===============================================================================
255 # http://www.gnu.org/software/gettext/gettext.html
260 @@domains[@@current_domain = 'default'] = {}
262 def self.bind_text_domain(domain, dirname = '')
263 load "#{dirname}#{domain}.pmo"
264 @@domains[domain] = @@catalog
267 def self.set_text_domain(domain)
268 @@current_domain = domain
271 def self.get_text(msgid)
272 @@domains[@@current_domain][msgid] || msgid
276 #===============================================================================
284 #===============================================================================
288 def yap(arg = 'done.')
289 @yap = 0 unless(@yap)
290 print "#{@yap += 1}: #{arg.inspect}\n"
293 def debug(log = 'log.', obj = self)
294 @debug = File.new('debug.log', 'a') and @debug.write('-' * 76 + "\n") unless(@debug)
295 @debug.write(obj.to_s + ': ' + log.to_s + "\n")
298 #===============================================================================
302 String.bind_decoder('7BIT') {|str| # 7bit decoder
305 String.bind_decoder('8BIT') {|str| # 8bit decoder
308 String.bind_decoder('BINARY') {|str, out_code| # binary decoder?
312 String.bind_decoder('BASE64') {|str| # Base64 decoder
315 String.bind_decoder('QUOTED-PRINTABLE') {|str| # Quoted Printable decoder
318 String.bind_decoder('B') {|str| # Base64 decoder
321 String.bind_encoder('B') {|str| # Base64 encoder
324 String.bind_decoder('Q') {|str| # Quoted Printable decoder
327 String.bind_encoder('Q') {|str| # Quoted Printable encoder
330 String.bind_encoder('<MULTI><ISO-2022-JP><B>') {|str, field_name, proc| # encode message header multi line
331 src = str + ' '; single_max = (max_length = 76) - (line = field_name + ': ').length
332 while(src.length > 1)
333 multi_max = ((single_max - 18) / 4 * 3 - 6) / 2
334 if(single_max > 0 and src =~ /^([\x20-\x7E]{1,#{single_max}})[\x20\xC0-\xFD]/)
335 line += src.slice!(0, $1.length)
336 single_max -= $1.length
337 elsif(multi_max > 0 and src =~ /^(([\xC0-\xFD][\x80-\xBF]+){1,#{multi_max}})/)
338 line += (line0 = src.slice!(0, $1.length).encode_mh)
339 single_max -= line0.length
342 proc.call(line.gsub(/^\t\s/, "\s")); line = "\t"; single_max = max_length
345 proc.call(line.gsub(/^\t\s/, "\s"))
347 String.bind_encoder('<MULTI><UTF-8><B>') {|str, field_name, proc| # encode message header multi line
348 src = str + ' '; single_max = (max_length = 76) - (line = field_name + ': ').length
349 while(src.length > 1)
350 multi_max = (single_max - 12) / 4 * 3
351 if(single_max > 0 and src =~ /^([\x20-\x7E]{1,#{single_max}})[\x20\xC0-\xFD]/)
352 line += src.slice!(0, $1.length)
353 single_max -= $1.length
354 elsif(multi_max > 0 and src =~ /^([\x80-\xFD]{1,#{multi_max}})[\x20-\x7E\xC0-\xFD]/)
355 line += (line0 = src.slice!(0, $1.length).encode_mh)
356 single_max -= line0.length
359 proc.call(line.gsub(/^\t\s/, "\s")); line = "\t"; single_max = max_length
362 proc.call(line.gsub(/^\t\s/, "\s"))
364 String.bind_decoder('US-ASCII') {|str, out_code| # us-ascii decoder
367 String.bind_decoder('ISO-2022-JP') {|str, out_code| # iso-2022-jp decoder
368 str.kconv(String.charset(out_code), Kconv::JIS)
370 String.bind_decoder('ISO-2022-JP-1') {|str, out_code| # iso-2022-jp-1 decoder
371 str.kconv(String.charset(out_code), Kconv::JIS)
373 String.bind_decoder('ISO-2022-JP-2') {|str, out_code| # iso-2022-jp-2 decoder
374 str.kconv(String.charset(out_code), Kconv::JIS)
376 String.bind_decoder('SHIFT_JIS') {|str, out_code| # shift_jis decoder
377 str.kconv(String.charset(out_code), Kconv::SJIS)
379 String.bind_decoder('EUC-JP') {|str, out_code| # euc-jp decoder
380 str.kconv(String.charset(out_code), Kconv::EUC)
382 String.bind_decoder('UTF-8') {|str, out_code| # utf-8 decoder
383 str.kconv(String.charset(out_code), Kconv::UTF8)
385 # http://tools.ietf.org/html/rfc2152
386 String.bind_decoder('UTF-7') {|str, out_code| # utf-7 decoder
387 str.gsub(%r|\+([A-Za-z0-9+/]+)-?|) {|p|
388 ($1 + '==').unpack('m')[0].kconv(Kconv::UTF8, Kconv::UTF16)
389 }.gsub(/\+-/, '+').kconv(String.charset(out_code), Kconv::UTF8)
392 # 11bit: 0xC0-0xDF 0x80-0xBF
393 # 16bit: 0xE0-0xEF 0x80-0xBF 0x80-0xBF
394 # 21bit: 0xF0-0xF7 0x80-0xBF 0x80-0xBF 0x80-0xBF
395 # 26bit: 0xF8-0xFB 0x80-0xBF 0x80-0xBF 0x80-0xBF 0x80-0xBF
396 # 31bit: 0xFC-0xFD 0x80-0xBF 0x80-0xBF 0x80-0xBF 0x80-0xBF 0x80-0xBF
398 String.bind_wsizer('UTF-8') {|str| # 表示幅を得る
399 str.gsub(/[\xC0-\xFD][\x80-\xBF]+/, "\xFF\xFF").length
402 String.bind_centerer('UTF-8') {|str, n, padding| # センタリングする
405 (padding * (w >> 1) + str + padding * w).snip(n)
408 String.bind_snipper('UTF-8') {|str, n| # 指定の長さに切り詰める
409 ws = str[0, n * 2].gsub(/[\xC0-\xFD][\x80-\xBF]+/, "\xFF\xFF")[0, n]
410 wc = ws.count("\xFF")
411 str.slice(0, n + wc / 2 - wc % 2) + ' ' * (n - ws.length + wc % 2)
414 String.bind_each_snipper('UTF-8') {|str, n, max, proc| # 指定の長さに切り詰め、順に行を渡す
415 p = 0; while(p <= str.length) # '<': 改行文字のみの行は省略
416 break if((max -= 1) < 0)
417 ws = str[p, n * 2].gsub(/[\xC0-\xFD][\x80-\xBF]+/, "\xFF\xFF")[0, n]
418 wc = ws.count("\xFF")
419 proc.call(str.slice(p, nn = n + wc / 2 - wc % 2) + ' ' * (n - ws.length + wc % 2))
424 String.bind_rfc2231_encoder('UTF-8') {|str, attr, n, proc| # RFC 2231 拡張表現にエンコードして返す(添付ファイル名指定用)
425 multi = false; head = ''; str =~ /[\xC0-\xFD]/ and multi = true and head = "utf8''"
426 w = n - attr.size - "\t*n*=;".size - head.size
427 nth = -1; nl = ''; el = ''; str.gsub(/./u) {|nc|
428 multi = true if(!multi and nc =~ /[\xC0-\xFD]/)
429 if(lnl = nl and lel = el and nl += nc and (el += (ec = nc.ext_encode)).size > w)
430 proc.call("\t%s*%d%s=%s%s;" % [attr, nth += 1, multi ? '*' : '', head, multi ? lel : lnl.value_encode])
431 multi = false; head = ''
432 w = n - attr.size - "\t*n*=;".size - head.size
436 proc.call("\t%s%s%s=%s%s" % [attr, nth == -1 ? '' : "*#{(nth += 1).to_s}", multi ? '*': '', head, multi ? el : nl.value_encode])
439 String.bind_rfc2231_encoder('<LEGACY><ISO-2022-JP><B>') {|str, attr, n, proc| # RFC 違反だが B encoding でエンコードして返す(添付ファイル名指定用)
440 proc.call("\t%s=\"=?%s?%s?%s?=\"" % [attr, 'ISO-2022-JP', 'B', str.encode_cs('ISO-2022-JP', 'UTF-8').encode_ec('B').chomp])
443 # 漢字: 0xA1-0xFE 0xA1-0xFE
444 # 半角カナ: 0x8E 0xA1-0xDF
445 # 補助漢字: 0x8F 0xA1-0xFE 0xA1-0xFE
447 String.bind_wsizer('EUC-JP') {|str| # 表示幅を得る
448 str.gsub(/[\xA1-\xFE][\xA1-\xFE]/, "\xFF\xFF").length
451 String.bind_centerer('EUC-JP') {|str, n, padding| # センタリングする
454 (padding * (w >> 1) + str + padding * w).snip(n)
457 String.bind_snipper('EUC-JP') {|str, n| #### 指定の長さに切り詰める
458 ws = str[0, n * 2].gsub(/[\xA1-\xFE][\xA1-\xFE]/, "\xFF\xFF")[0, n]
459 wc = ws.count("\xFF")
460 str.slice(0, n - wc % 2) + ' ' * (n - ws.length + wc % 2)
463 String.bind_each_snipper('EUC-JP') {|str, n, max, proc| #### 指定の長さに切り詰め、順に行を渡す
464 p = 0; while(p <= str.length) # '<': 改行文字のみの行は省略
465 break if((max -= 1) < 0)
466 ws = str[p, n * 2].gsub(/[\xA1-\xFE][\xA1-\xFE]/, "\xFF\xFF")[0, n]
467 wc = ws.count("\xFF")
468 proc.call(str.slice(p, nn = n - wc % 2) + ' ' * (n - ws.length + wc % 2))