1 // Copyright 2012 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
11 "golang.org/x/text/internal/colltab"
12 "golang.org/x/text/language"
15 type weightsTest struct {
29 // ignore returns an initialized boolean array based on the given Level.
30 // A negative value means using the default setting of quaternary.
31 func ignore(level colltab.Level) (ignore [colltab.NumLevels]bool) {
33 level = colltab.Quaternary
35 for i := range ignore {
36 ignore[i] = level < colltab.Level(i)
41 func makeCE(w []int) colltab.Elem {
42 ce, err := colltab.MakeElem(w[0], w[1], w[2], uint8(w[3]))
49 func (o opts) collator() *Collator {
52 ignore: ignore(colltab.Level(o.lev - 1)),
54 backwards: o.backwards,
55 caseLevel: o.caseLevel,
56 variableTop: uint32(o.top),
66 func wpq(p, q int) Weights {
67 return W(p, defaults.Secondary, defaults.Tertiary, q)
70 func wsq(s, q int) Weights {
71 return W(0, s, defaults.Tertiary, q)
74 func wq(q int) Weights {
78 var zero = W(0, 0, 0, 0)
80 var processTests = []weightsTest{
82 { // simple sequence of non-variables
83 opt: opts{alt: altShifted, top: 100},
84 in: ColElems{W(200), W(300), W(400)},
85 out: ColElems{wpq(200, maxQ), wpq(300, maxQ), wpq(400, maxQ)},
87 { // first is a variable
88 opt: opts{alt: altShifted, top: 250},
89 in: ColElems{W(200), W(300), W(400)},
90 out: ColElems{wq(200), wpq(300, maxQ), wpq(400, maxQ)},
92 { // all but first are variable
93 opt: opts{alt: altShifted, top: 999},
94 in: ColElems{W(1000), W(200), W(300), W(400)},
95 out: ColElems{wpq(1000, maxQ), wq(200), wq(300), wq(400)},
97 { // first is a modifier
98 opt: opts{alt: altShifted, top: 999},
99 in: ColElems{W(0, 10), W(1000)},
100 out: ColElems{wsq(10, maxQ), wpq(1000, maxQ)},
102 { // primary ignorables
103 opt: opts{alt: altShifted, top: 250},
104 in: ColElems{W(200), W(0, 10), W(300), W(0, 15), W(400)},
105 out: ColElems{wq(200), zero, wpq(300, maxQ), wsq(15, maxQ), wpq(400, maxQ)},
107 { // secondary ignorables
108 opt: opts{alt: altShifted, top: 250},
109 in: ColElems{W(200), W(0, 0, 10), W(300), W(0, 0, 15), W(400)},
110 out: ColElems{wq(200), zero, wpq(300, maxQ), W(0, 0, 15, maxQ), wpq(400, maxQ)},
112 { // tertiary ignorables, no change
113 opt: opts{alt: altShifted, top: 250},
114 in: ColElems{W(200), zero, W(300), zero, W(400)},
115 out: ColElems{wq(200), zero, wpq(300, maxQ), zero, wpq(400, maxQ)},
118 // ShiftTrimmed (same as Shifted)
119 { // simple sequence of non-variables
120 opt: opts{alt: altShiftTrimmed, top: 100},
121 in: ColElems{W(200), W(300), W(400)},
122 out: ColElems{wpq(200, maxQ), wpq(300, maxQ), wpq(400, maxQ)},
124 { // first is a variable
125 opt: opts{alt: altShiftTrimmed, top: 250},
126 in: ColElems{W(200), W(300), W(400)},
127 out: ColElems{wq(200), wpq(300, maxQ), wpq(400, maxQ)},
129 { // all but first are variable
130 opt: opts{alt: altShiftTrimmed, top: 999},
131 in: ColElems{W(1000), W(200), W(300), W(400)},
132 out: ColElems{wpq(1000, maxQ), wq(200), wq(300), wq(400)},
134 { // first is a modifier
135 opt: opts{alt: altShiftTrimmed, top: 999},
136 in: ColElems{W(0, 10), W(1000)},
137 out: ColElems{wsq(10, maxQ), wpq(1000, maxQ)},
139 { // primary ignorables
140 opt: opts{alt: altShiftTrimmed, top: 250},
141 in: ColElems{W(200), W(0, 10), W(300), W(0, 15), W(400)},
142 out: ColElems{wq(200), zero, wpq(300, maxQ), wsq(15, maxQ), wpq(400, maxQ)},
144 { // secondary ignorables
145 opt: opts{alt: altShiftTrimmed, top: 250},
146 in: ColElems{W(200), W(0, 0, 10), W(300), W(0, 0, 15), W(400)},
147 out: ColElems{wq(200), zero, wpq(300, maxQ), W(0, 0, 15, maxQ), wpq(400, maxQ)},
149 { // tertiary ignorables, no change
150 opt: opts{alt: altShiftTrimmed, top: 250},
151 in: ColElems{W(200), zero, W(300), zero, W(400)},
152 out: ColElems{wq(200), zero, wpq(300, maxQ), zero, wpq(400, maxQ)},
156 { // simple sequence of non-variables
157 opt: opts{alt: altBlanked, top: 100},
158 in: ColElems{W(200), W(300), W(400)},
159 out: ColElems{W(200), W(300), W(400)},
161 { // first is a variable
162 opt: opts{alt: altBlanked, top: 250},
163 in: ColElems{W(200), W(300), W(400)},
164 out: ColElems{zero, W(300), W(400)},
166 { // all but first are variable
167 opt: opts{alt: altBlanked, top: 999},
168 in: ColElems{W(1000), W(200), W(300), W(400)},
169 out: ColElems{W(1000), zero, zero, zero},
171 { // first is a modifier
172 opt: opts{alt: altBlanked, top: 999},
173 in: ColElems{W(0, 10), W(1000)},
174 out: ColElems{W(0, 10), W(1000)},
176 { // primary ignorables
177 opt: opts{alt: altBlanked, top: 250},
178 in: ColElems{W(200), W(0, 10), W(300), W(0, 15), W(400)},
179 out: ColElems{zero, zero, W(300), W(0, 15), W(400)},
181 { // secondary ignorables
182 opt: opts{alt: altBlanked, top: 250},
183 in: ColElems{W(200), W(0, 0, 10), W(300), W(0, 0, 15), W(400)},
184 out: ColElems{zero, zero, W(300), W(0, 0, 15), W(400)},
186 { // tertiary ignorables, no change
187 opt: opts{alt: altBlanked, top: 250},
188 in: ColElems{W(200), zero, W(300), zero, W(400)},
189 out: ColElems{zero, zero, W(300), zero, W(400)},
192 // Non-ignorable: input is always equal to output.
193 { // all but first are variable
194 opt: opts{alt: altNonIgnorable, top: 999},
195 in: ColElems{W(1000), W(200), W(300), W(400)},
196 out: ColElems{W(1000), W(200), W(300), W(400)},
198 { // primary ignorables
199 opt: opts{alt: altNonIgnorable, top: 250},
200 in: ColElems{W(200), W(0, 10), W(300), W(0, 15), W(400)},
201 out: ColElems{W(200), W(0, 10), W(300), W(0, 15), W(400)},
203 { // secondary ignorables
204 opt: opts{alt: altNonIgnorable, top: 250},
205 in: ColElems{W(200), W(0, 0, 10), W(300), W(0, 0, 15), W(400)},
206 out: ColElems{W(200), W(0, 0, 10), W(300), W(0, 0, 15), W(400)},
208 { // tertiary ignorables, no change
209 opt: opts{alt: altNonIgnorable, top: 250},
210 in: ColElems{W(200), zero, W(300), zero, W(400)},
211 out: ColElems{W(200), zero, W(300), zero, W(400)},
215 func TestProcessWeights(t *testing.T) {
216 for i, tt := range processTests {
217 in := convertFromWeights(tt.in)
218 out := convertFromWeights(tt.out)
219 processWeights(tt.opt.alt, uint32(tt.opt.top), in)
220 for j, w := range in {
222 t.Errorf("%d: Weights %d was %v; want %v", i, j, w, out[j])
228 type keyFromElemTest struct {
234 var defS = byte(defaults.Secondary)
235 var defT = byte(defaults.Tertiary)
237 const sep = 0 // separator byte
239 var keyFromElemTests = []keyFromElemTest{
240 { // simple primary and secondary weights.
241 opts{alt: altShifted},
242 ColElems{W(0x200), W(0x7FFF), W(0, 0x30), W(0x100)},
243 []byte{0x2, 0, 0x7F, 0xFF, 0x1, 0x00, // primary
244 sep, sep, 0, defS, 0, defS, 0, 0x30, 0, defS, // secondary
245 sep, sep, defT, defT, defT, defT, // tertiary
246 sep, 0xFF, 0xFF, 0xFF, 0xFF, // quaternary
249 { // same as first, but with zero element that need to be removed
250 opts{alt: altShifted},
251 ColElems{W(0x200), zero, W(0x7FFF), W(0, 0x30), zero, W(0x100)},
252 []byte{0x2, 0, 0x7F, 0xFF, 0x1, 0x00, // primary
253 sep, sep, 0, defS, 0, defS, 0, 0x30, 0, defS, // secondary
254 sep, sep, defT, defT, defT, defT, // tertiary
255 sep, 0xFF, 0xFF, 0xFF, 0xFF, // quaternary
258 { // same as first, with large primary values
259 opts{alt: altShifted},
260 ColElems{W(0x200), W(0x8000), W(0, 0x30), W(0x12345)},
261 []byte{0x2, 0, 0x80, 0x80, 0x00, 0x81, 0x23, 0x45, // primary
262 sep, sep, 0, defS, 0, defS, 0, 0x30, 0, defS, // secondary
263 sep, sep, defT, defT, defT, defT, // tertiary
264 sep, 0xFF, 0xFF, 0xFF, 0xFF, // quaternary
267 { // same as first, but with the secondary level backwards
268 opts{alt: altShifted, backwards: true},
269 ColElems{W(0x200), W(0x7FFF), W(0, 0x30), W(0x100)},
270 []byte{0x2, 0, 0x7F, 0xFF, 0x1, 0x00, // primary
271 sep, sep, 0, defS, 0, 0x30, 0, defS, 0, defS, // secondary
272 sep, sep, defT, defT, defT, defT, // tertiary
273 sep, 0xFF, 0xFF, 0xFF, 0xFF, // quaternary
276 { // same as first, ignoring quaternary level
277 opts{alt: altShifted, lev: 3},
278 ColElems{W(0x200), zero, W(0x7FFF), W(0, 0x30), zero, W(0x100)},
279 []byte{0x2, 0, 0x7F, 0xFF, 0x1, 0x00, // primary
280 sep, sep, 0, defS, 0, defS, 0, 0x30, 0, defS, // secondary
281 sep, sep, defT, defT, defT, defT, // tertiary
284 { // same as first, ignoring tertiary level
285 opts{alt: altShifted, lev: 2},
286 ColElems{W(0x200), zero, W(0x7FFF), W(0, 0x30), zero, W(0x100)},
287 []byte{0x2, 0, 0x7F, 0xFF, 0x1, 0x00, // primary
288 sep, sep, 0, defS, 0, defS, 0, 0x30, 0, defS, // secondary
291 { // same as first, ignoring secondary level
292 opts{alt: altShifted, lev: 1},
293 ColElems{W(0x200), zero, W(0x7FFF), W(0, 0x30), zero, W(0x100)},
294 []byte{0x2, 0, 0x7F, 0xFF, 0x1, 0x00},
296 { // simple primary and secondary weights.
297 opts{alt: altShiftTrimmed, top: 0x250},
298 ColElems{W(0x300), W(0x200), W(0x7FFF), W(0, 0x30), W(0x800)},
299 []byte{0x3, 0, 0x7F, 0xFF, 0x8, 0x00, // primary
300 sep, sep, 0, defS, 0, defS, 0, 0x30, 0, defS, // secondary
301 sep, sep, defT, defT, defT, defT, // tertiary
302 sep, 0xFF, 0x2, 0, // quaternary
305 { // as first, primary with case level enabled
306 opts{alt: altShifted, lev: 1, caseLevel: true},
307 ColElems{W(0x200), W(0x7FFF), W(0, 0x30), W(0x100)},
308 []byte{0x2, 0, 0x7F, 0xFF, 0x1, 0x00, // primary
309 sep, sep, // secondary
310 sep, sep, defT, defT, defT, defT, // tertiary
315 func TestKeyFromElems(t *testing.T) {
317 for i, tt := range keyFromElemTests {
319 in := convertFromWeights(tt.in)
320 processWeights(tt.opt.alt, uint32(tt.opt.top), in)
321 tt.opt.collator().keyFromElems(&buf, in)
323 if len(res) != len(tt.out) {
324 t.Errorf("%d: len(ws) was %d; want %d (%X should be %X)", i, len(res), len(tt.out), res, tt.out)
330 for j, c := range res[:n] {
332 t.Errorf("%d: byte %d was %X; want %X", i, j, c, tt.out[j])
338 func TestGetColElems(t *testing.T) {
339 for i, tt := range appendNextTests {
340 c, err := makeTable(tt.in)
342 // error is reported in TestAppendNext
345 // Create one large test per table
346 str := make([]byte, 0, 4000)
348 for len(str) < 3000 {
349 for _, chk := range tt.chk {
350 str = append(str, chk.in[:chk.n]...)
351 out = append(out, chk.out...)
354 for j, chk := range append(tt.chk, check{string(str), len(str), out}) {
355 out := convertFromWeights(chk.out)
356 ce := c.getColElems([]byte(chk.in)[:chk.n])
357 if len(ce) != len(out) {
358 t.Errorf("%d:%d: len(ws) was %d; want %d", i, j, len(ce), len(out))
362 for k, w := range ce {
363 w, _ = colltab.MakeElem(w.Primary(), w.Secondary(), int(w.Tertiary()), 0)
365 t.Errorf("%d:%d: Weights %d was %X; want %X", i, j, k, w, out[k])
376 type keyTest struct {
381 var keyTests = []keyTest{
383 []byte{0, 100, 0, 200, 1, 44, 0, 0, 0, 32, 0, 32, 0, 32, 0, 0, 2, 2, 2, 0, 255, 255, 255},
386 []byte{0, 102, 0, 0, 0, 32, 0, 0, 2, 0, 255},
389 []byte{0, 100, 0, 100, 0, 100, 0, 100, 0, 100, 0, 0,
390 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 0,
392 255, 255, 255, 255, 255,
395 // Issue 16391: incomplete rune at end of UTF-8 sequence.
396 {"\xc2", []byte{133, 255, 253, 0, 0, 0, 32, 0, 0, 2, 0, 255}},
397 {"\xc2a", []byte{133, 255, 253, 0, 100, 0, 0, 0, 32, 0, 32, 0, 0, 2, 2, 0, 255, 255}},
400 func TestKey(t *testing.T) {
401 c, _ := makeTable(appendNextTests[4].in)
402 c.alternate = altShifted
403 c.ignore = ignore(colltab.Quaternary)
407 for _, tt := range keyTests {
408 keys1 = append(keys1, c.Key(&buf, []byte(tt.in)))
409 keys2 = append(keys2, c.KeyFromString(&buf, tt.in))
411 // Separate generation from testing to ensure buffers are not overwritten.
412 for i, tt := range keyTests {
413 if !bytes.Equal(keys1[i], tt.out) {
414 t.Errorf("%d: Key(%q) = %d; want %d", i, tt.in, keys1[i], tt.out)
416 if !bytes.Equal(keys2[i], tt.out) {
417 t.Errorf("%d: KeyFromString(%q) = %d; want %d", i, tt.in, keys2[i], tt.out)
422 type compareTest struct {
424 res int // comparison result
427 var compareTests = []compareTest{
429 {"a\u0301b", "ab", 1},
430 {"a", "a\u0301", -1},
431 {"ab", "a\u0301b", -1},
432 {"bc", "a\u0301c", 1},
434 {"a\u0301", "a\u0301", 0},
436 // Only clip prefixes of whole runes.
437 {"\u302E", "\u302F", 1},
438 // Don't clip prefixes when last rune of prefix may be part of contraction.
439 {"a\u035E", "a\u0301\u035F", -1},
440 {"a\u0301\u035Fb", "a\u0301\u035F", -1},
443 func TestCompare(t *testing.T) {
444 c, _ := makeTable(appendNextTests[4].in)
445 for i, tt := range compareTests {
446 if res := c.Compare([]byte(tt.a), []byte(tt.b)); res != tt.res {
447 t.Errorf("%d: Compare(%q, %q) == %d; want %d", i, tt.a, tt.b, res, tt.res)
449 if res := c.CompareString(tt.a, tt.b); res != tt.res {
450 t.Errorf("%d: CompareString(%q, %q) == %d; want %d", i, tt.a, tt.b, res, tt.res)
455 func TestNumeric(t *testing.T) {
456 c := New(language.English, Loose, Numeric)
458 for i, tt := range []struct {
464 {"2", "12", -1}, // Fullwidth is sorted as usual.
465 {"₂", "₁₂", 1}, // Subscript is not sorted as numbers.
466 {"②", "①②", 1}, // Circled is not sorted as numbers.
467 { // Imperial Aramaic, is not sorted as number.
469 "\U00010858\U00010859",
476 {"A-0001", "A-1", 0},
478 if got := c.CompareString(tt.a, tt.b); got != tt.want {
479 t.Errorf("%d: CompareString(%s, %s) = %d; want %d", i, tt.a, tt.b, got, tt.want)