1 // Copyright 2012 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
9 // cjk returns an implicit collation element for a CJK rune.
10 func cjk(r rune) []rawCE {
11 // A CJK character C is represented in the DUCET as
12 // [.AAAA.0020.0002.C][.BBBB.0000.0000.C]
13 // Where AAAA is the most significant 15 bits plus a base value.
14 // Any base value will work for the test, so we pick the common value of FB40.
17 {w: []int{base + int(r>>15), defaultSecondary, defaultTertiary, int(r)}},
18 {w: []int{int(r&0x7FFF) | 0x8000, 0, 0, int(r)}},
22 func pCE(p int) []rawCE {
23 return mkCE([]int{p, defaultSecondary, defaultTertiary, 0}, 0)
26 func pqCE(p, q int) []rawCE {
27 return mkCE([]int{p, defaultSecondary, defaultTertiary, q}, 0)
30 func ptCE(p, t int) []rawCE {
31 return mkCE([]int{p, defaultSecondary, t, 0}, 0)
34 func ptcCE(p, t int, ccc uint8) []rawCE {
35 return mkCE([]int{p, defaultSecondary, t, 0}, ccc)
38 func sCE(s int) []rawCE {
39 return mkCE([]int{0, s, defaultTertiary, 0}, 0)
42 func stCE(s, t int) []rawCE {
43 return mkCE([]int{0, s, t, 0}, 0)
46 func scCE(s int, ccc uint8) []rawCE {
47 return mkCE([]int{0, s, defaultTertiary, 0}, ccc)
50 func mkCE(w []int, ccc uint8) []rawCE {
51 return []rawCE{rawCE{w, ccc}}
54 // ducetElem is used to define test data that is used to generate a table.
55 type ducetElem struct {
60 func newBuilder(t *testing.T, ducet []ducetElem) *Builder {
62 for _, e := range ducet {
64 for _, ce := range e.ces {
65 ces = append(ces, ce.w)
67 if err := b.Add([]rune(e.str), ces, nil); err != nil {
76 type convertTest struct {
81 var convLargeTests = []convertTest{
82 {pCE(0xFB39), pCE(0xFB39), false},
83 {cjk(0x2F9B2), pqCE(0x3F9B2, 0x2F9B2), false},
84 {pCE(0xFB40), pCE(0), true},
85 {append(pCE(0xFB40), pCE(0)[0]), pCE(0), true},
86 {pCE(0xFFFE), pCE(illegalOffset), false},
87 {pCE(0xFFFF), pCE(illegalOffset + 1), false},
90 func TestConvertLarge(t *testing.T) {
91 for i, tt := range convLargeTests {
93 for _, ce := range tt.in {
94 e.elems = append(e.elems, makeRawCE(ce.w, ce.ccc))
96 elems, err := convertLargeWeights(e.elems)
99 t.Errorf("%d: expected error; none found", i)
102 } else if err != nil {
103 t.Errorf("%d: unexpected error: %v", i, err)
105 if !equalCEArrays(elems, tt.out) {
106 t.Errorf("%d: conversion was %x; want %x", i, elems, tt.out)
111 // Collation element table for simplify tests.
112 var simplifyTest = []ducetElem{
113 {"\u0300", sCE(30)}, // grave
114 {"\u030C", sCE(40)}, // caron
120 {"\u05F2", append(ptCE(200, 4), ptCE(200, 4)[0])},
122 {"\u00C0", append(ptCE(100, 8), sCE(30)...)}, // A with grave, can be removed
123 {"\u00C8", append(ptCE(105, 8), sCE(30)...)}, // E with grave
124 {"\uFB1F", append(ptCE(200, 4), ptCE(200, 4)[0], sCE(80)[0])}, // eliminated by NFD
125 {"\u00C8\u0302", ptCE(106, 8)}, // block previous from simplifying
126 {"\u01C5", append(ptCE(104, 9), ptCE(130, 4)[0], stCE(40, maxTertiary)[0])}, // eliminated by NFKD
127 // no removal: tertiary value of third element is not maxTertiary
128 {"\u2162", append(ptCE(110, 9), ptCE(110, 4)[0], ptCE(110, 8)[0])},
131 var genColTests = []ducetElem{
132 {"\uFA70", pqCE(0x1FA70, 0xFA70)},
133 {"A\u0300", append(ptCE(100, 8), sCE(30)...)},
134 {"A\u0300\uFA70", append(ptCE(100, 8), sCE(30)[0], pqCE(0x1FA70, 0xFA70)[0])},
135 {"A\u0300A\u0300", append(ptCE(100, 8), sCE(30)[0], ptCE(100, 8)[0], sCE(30)[0])},
138 func TestGenColElems(t *testing.T) {
139 b := newBuilder(t, simplifyTest[:5])
141 for i, tt := range genColTests {
142 res := b.root.genColElems(tt.str)
143 if !equalCEArrays(tt.ces, res) {
144 t.Errorf("%d: result %X; want %X", i, res, tt.ces)
149 type strArray []string
151 func (sa strArray) contains(s string) bool {
152 for _, e := range sa {
160 var simplifyRemoved = strArray{"\u00C0", "\uFB1F"}
161 var simplifyMarked = strArray{"\u01C5"}
163 func TestSimplify(t *testing.T) {
164 b := newBuilder(t, simplifyTest)
168 for i, tt := range simplifyTest {
169 if simplifyRemoved.contains(tt.str) {
173 if e.str != tt.str || !equalCEArrays(e.elems, tt.ces) {
174 t.Errorf("%d: found element %s -> %X; want %s -> %X", i, e.str, e.elems, tt.str, tt.ces)
179 for e := o.front(); e != nil; e, _ = e.nextIndexed() {
180 gold := simplifyMarked.contains(e.str)
184 if gold != e.decompose {
185 t.Errorf("%d: %s has decompose %v; want %v", i, e.str, e.decompose, gold)
189 if k != len(simplifyMarked) {
190 t.Errorf(" an entry that should be marked as decompose was deleted")
194 var expandTest = []ducetElem{
195 {"\u0300", append(scCE(29, 230), scCE(30, 230)...)},
196 {"\u00C0", append(ptCE(100, 8), scCE(30, 230)...)},
197 {"\u00C8", append(ptCE(105, 8), scCE(30, 230)...)},
198 {"\u00C9", append(ptCE(105, 8), scCE(30, 230)...)}, // identical expansion
199 {"\u05F2", append(ptCE(200, 4), ptCE(200, 4)[0], ptCE(200, 4)[0])},
200 {"\u01FF", append(ptCE(200, 4), ptcCE(201, 4, 0)[0], scCE(30, 230)[0])},
203 func TestExpand(t *testing.T) {
206 totalElements = 2 + 2 + 2 + 3 + 3 + totalExpansions
208 b := newBuilder(t, expandTest)
210 b.processExpansions(o)
213 for _, tt := range expandTest {
214 exp := b.t.ExpandElem[e.expansionIndex:]
215 if int(exp[0]) != len(tt.ces) {
216 t.Errorf("%U: len(expansion)==%d; want %d", []rune(tt.str)[0], exp[0], len(tt.ces))
219 for j, w := range tt.ces {
220 if ce, _ := makeCE(w); exp[j] != ce {
221 t.Errorf("%U: element %d is %X; want %X", []rune(tt.str)[0], j, exp[j], ce)
224 e, _ = e.nextIndexed()
227 if len(b.t.ExpandElem) != totalElements {
228 t.Errorf("len(expandElem)==%d; want %d", len(b.t.ExpandElem), totalElements)
232 var contractTest = []ducetElem{
242 // shares suffixes with a*
248 // starter to be ignored
252 func TestContract(t *testing.T) {
254 totalElements = 5 + 5 + 4
256 b := newBuilder(t, contractTest)
258 b.processContractions(o)
260 indexMap := make(map[int]bool)
261 handleMap := make(map[rune]*entry)
262 for e := o.front(); e != nil; e, _ = e.nextIndexed() {
263 if e.contractionHandle.n > 0 {
264 handleMap[e.runes[0]] = e
265 indexMap[e.contractionHandle.index] = true
269 if len(indexMap) != 2 {
270 t.Errorf("number of tries is %d; want %d", len(indexMap), 2)
272 for _, tt := range contractTest {
273 e, ok := handleMap[[]rune(tt.str)[0]]
278 offset, n := lookup(&b.t.ContractTries, e.contractionHandle, []byte(str))
280 t.Errorf("%s: bytes consumed==%d; want %d", tt.str, n, len(str))
282 ce := b.t.ContractElem[offset+e.contractionIndex]
283 if want, _ := makeCE(tt.ces[0]); want != ce {
284 t.Errorf("%s: element %X; want %X", tt.str, ce, want)
287 if len(b.t.ContractElem) != totalElements {
288 t.Errorf("len(expandElem)==%d; want %d", len(b.t.ContractElem), totalElements)