--- /dev/null
+# Unicodeを扱うライブラリ
+# コード形式: UTF-8
+
+UCS_FORMAT = /^U\+[0-9a-fA-F]+$/
+CJK_UNIFIED = [0x4E00, 0x9FFF]
+#CJK_UNIFIED_EXT_A = [0x3400, 0x4DBF]
+CJK_UNIFIED_EXT_A = [0x3400, 0x4DFF]
+CJK_UNIFIED_EXT_B = [0x20000, 0x2A6DF]
+CJK_UNIFIED_EXT_C = [0x2A700, 0x2B73F]
+# CJK_UNIFIED_EXT_D = [0x?, 0x?]
+
+# 条件:codespaceの順序に並べる
+CODESPACE = [CJK_UNIFIED_EXT_A,
+ CJK_UNIFIED,
+ CJK_UNIFIED_EXT_B,
+ CJK_UNIFIED_EXT_C,
+ ]
+
+class UnicodeUtility
+ def nextCodepoint(codepoint)
+ CODESPACE.each do |scode, ecode|
+ return scode if codepoint < scode
+ if scode<= codepoint and codepoint < ecode then
+ return codepoint + 1
+ elsif codepoint == ecode then
+ codepoint = ecode
+ end
+ end
+ return nil
+ end
+ def ucs2char(ucs)
+ return nil unless ucs =~ UCS_FORMAT
+ code = ucs.slice(2, ucs.size)
+ [code.hex].pack("U*")
+ end
+ def char2ucs(char)
+ # TODO: Check char in codespace
+ # Unicode codespace is a range of integers from 0 to 10FFFF_hex
+ return format("U+%04X", char.unpack("U")[0])
+ end
+ def codepoint2ucs(codepoint)
+ return format("U+%04X", codepoint)
+ end
+ def ucs2codepoint(ucs)
+# return nil unless ucs =~ UCS_FORMAT
+ code = ucs.slice(2, ucs.size)
+# [code.hex].pack("U*")
+ return code.hex
+ end
+end
--- /dev/null
+require File.join(File.dirname(__FILE__), '..', 'lib', 'unicode')
+require 'test/unit'
+ONE_UCS = "U+4E00"
+
+class UnicodeUtilityTest < Test::Unit::TestCase
+ def setup
+ @unicode = UnicodeUtility.new
+ end
+ def test_next_codepoint
+ first_a = CJK_UNIFIED_EXT_A[0]
+ last_a = CJK_UNIFIED_EXT_A[1]
+ first_cjk = CJK_UNIFIED[0]
+ last_cjk = CJK_UNIFIED[1]
+ first_b = CJK_UNIFIED_EXT_B[0]
+ last_c = CJK_UNIFIED_EXT_C[1]
+
+ assert_equal first_a + 1, @unicode.nextCodepoint(first_a)
+ assert_equal first_cjk, @unicode.nextCodepoint(last_a)
+ assert_equal first_cjk + 1, @unicode.nextCodepoint(first_cjk)
+ assert_equal first_b, @unicode.nextCodepoint(last_cjk)
+ assert_equal nil, @unicode.nextCodepoint(last_c)
+ end
+ def test_ucs2char
+ assert_equal "一", @unicode.ucs2char(ONE_UCS)
+ assert_equal "丁", @unicode.ucs2char("U+4E01")
+ assert_equal nil, @unicode.ucs2char("4E01")
+ assert_equal nil, @unicode.ucs2char("")
+ end
+ def test_char2ucs
+ assert_equal ONE_UCS, @unicode.char2ucs("一")
+ assert_equal "U+4E01", @unicode.char2ucs("丁")
+ assert_equal "U+20000", @unicode.char2ucs("𠀀")
+ # TODO: codespace(a range of integers from 0 to 10FFFF_hex)
+ assert_equal "U+0000", @unicode.char2ucs("")
+ end
+ def test_codepoint2ucs
+ assert_equal ONE_UCS, @unicode.codepoint2ucs(0x4E00)
+ end
+ def test_ucs2codepoint2
+ assert_equal 0x4E00, @unicode.ucs2codepoint(ONE_UCS)
+ end
+end