// Copyright 2012 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package build import "testing" // cjk returns an implicit collation element for a CJK rune. func cjk(r rune) []rawCE { // A CJK character C is represented in the DUCET as // [.AAAA.0020.0002.C][.BBBB.0000.0000.C] // Where AAAA is the most significant 15 bits plus a base value. // Any base value will work for the test, so we pick the common value of FB40. const base = 0xFB40 return []rawCE{ {w: []int{base + int(r>>15), defaultSecondary, defaultTertiary, int(r)}}, {w: []int{int(r&0x7FFF) | 0x8000, 0, 0, int(r)}}, } } func pCE(p int) []rawCE { return mkCE([]int{p, defaultSecondary, defaultTertiary, 0}, 0) } func pqCE(p, q int) []rawCE { return mkCE([]int{p, defaultSecondary, defaultTertiary, q}, 0) } func ptCE(p, t int) []rawCE { return mkCE([]int{p, defaultSecondary, t, 0}, 0) } func ptcCE(p, t int, ccc uint8) []rawCE { return mkCE([]int{p, defaultSecondary, t, 0}, ccc) } func sCE(s int) []rawCE { return mkCE([]int{0, s, defaultTertiary, 0}, 0) } func stCE(s, t int) []rawCE { return mkCE([]int{0, s, t, 0}, 0) } func scCE(s int, ccc uint8) []rawCE { return mkCE([]int{0, s, defaultTertiary, 0}, ccc) } func mkCE(w []int, ccc uint8) []rawCE { return []rawCE{rawCE{w, ccc}} } // ducetElem is used to define test data that is used to generate a table. type ducetElem struct { str string ces []rawCE } func newBuilder(t *testing.T, ducet []ducetElem) *Builder { b := NewBuilder() for _, e := range ducet { ces := [][]int{} for _, ce := range e.ces { ces = append(ces, ce.w) } if err := b.Add([]rune(e.str), ces, nil); err != nil { t.Errorf(err.Error()) } } b.t = &table{} b.root.sort() return b } type convertTest struct { in, out []rawCE err bool } var convLargeTests = []convertTest{ {pCE(0xFB39), pCE(0xFB39), false}, {cjk(0x2F9B2), pqCE(0x3F9B2, 0x2F9B2), false}, {pCE(0xFB40), pCE(0), true}, {append(pCE(0xFB40), pCE(0)[0]), pCE(0), true}, {pCE(0xFFFE), pCE(illegalOffset), false}, {pCE(0xFFFF), pCE(illegalOffset + 1), false}, } func TestConvertLarge(t *testing.T) { for i, tt := range convLargeTests { e := new(entry) for _, ce := range tt.in { e.elems = append(e.elems, makeRawCE(ce.w, ce.ccc)) } elems, err := convertLargeWeights(e.elems) if tt.err { if err == nil { t.Errorf("%d: expected error; none found", i) } continue } else if err != nil { t.Errorf("%d: unexpected error: %v", i, err) } if !equalCEArrays(elems, tt.out) { t.Errorf("%d: conversion was %x; want %x", i, elems, tt.out) } } } // Collation element table for simplify tests. var simplifyTest = []ducetElem{ {"\u0300", sCE(30)}, // grave {"\u030C", sCE(40)}, // caron {"A", ptCE(100, 8)}, {"D", ptCE(104, 8)}, {"E", ptCE(105, 8)}, {"I", ptCE(110, 8)}, {"z", ptCE(130, 8)}, {"\u05F2", append(ptCE(200, 4), ptCE(200, 4)[0])}, {"\u05B7", sCE(80)}, {"\u00C0", append(ptCE(100, 8), sCE(30)...)}, // A with grave, can be removed {"\u00C8", append(ptCE(105, 8), sCE(30)...)}, // E with grave {"\uFB1F", append(ptCE(200, 4), ptCE(200, 4)[0], sCE(80)[0])}, // eliminated by NFD {"\u00C8\u0302", ptCE(106, 8)}, // block previous from simplifying {"\u01C5", append(ptCE(104, 9), ptCE(130, 4)[0], stCE(40, maxTertiary)[0])}, // eliminated by NFKD // no removal: tertiary value of third element is not maxTertiary {"\u2162", append(ptCE(110, 9), ptCE(110, 4)[0], ptCE(110, 8)[0])}, } var genColTests = []ducetElem{ {"\uFA70", pqCE(0x1FA70, 0xFA70)}, {"A\u0300", append(ptCE(100, 8), sCE(30)...)}, {"A\u0300\uFA70", append(ptCE(100, 8), sCE(30)[0], pqCE(0x1FA70, 0xFA70)[0])}, {"A\u0300A\u0300", append(ptCE(100, 8), sCE(30)[0], ptCE(100, 8)[0], sCE(30)[0])}, } func TestGenColElems(t *testing.T) { b := newBuilder(t, simplifyTest[:5]) for i, tt := range genColTests { res := b.root.genColElems(tt.str) if !equalCEArrays(tt.ces, res) { t.Errorf("%d: result %X; want %X", i, res, tt.ces) } } } type strArray []string func (sa strArray) contains(s string) bool { for _, e := range sa { if e == s { return true } } return false } var simplifyRemoved = strArray{"\u00C0", "\uFB1F"} var simplifyMarked = strArray{"\u01C5"} func TestSimplify(t *testing.T) { b := newBuilder(t, simplifyTest) o := &b.root simplify(o) for i, tt := range simplifyTest { if simplifyRemoved.contains(tt.str) { continue } e := o.find(tt.str) if e.str != tt.str || !equalCEArrays(e.elems, tt.ces) { t.Errorf("%d: found element %s -> %X; want %s -> %X", i, e.str, e.elems, tt.str, tt.ces) break } } var i, k int for e := o.front(); e != nil; e, _ = e.nextIndexed() { gold := simplifyMarked.contains(e.str) if gold { k++ } if gold != e.decompose { t.Errorf("%d: %s has decompose %v; want %v", i, e.str, e.decompose, gold) } i++ } if k != len(simplifyMarked) { t.Errorf(" an entry that should be marked as decompose was deleted") } } var expandTest = []ducetElem{ {"\u0300", append(scCE(29, 230), scCE(30, 230)...)}, {"\u00C0", append(ptCE(100, 8), scCE(30, 230)...)}, {"\u00C8", append(ptCE(105, 8), scCE(30, 230)...)}, {"\u00C9", append(ptCE(105, 8), scCE(30, 230)...)}, // identical expansion {"\u05F2", append(ptCE(200, 4), ptCE(200, 4)[0], ptCE(200, 4)[0])}, {"\u01FF", append(ptCE(200, 4), ptcCE(201, 4, 0)[0], scCE(30, 230)[0])}, } func TestExpand(t *testing.T) { const ( totalExpansions = 5 totalElements = 2 + 2 + 2 + 3 + 3 + totalExpansions ) b := newBuilder(t, expandTest) o := &b.root b.processExpansions(o) e := o.front() for _, tt := range expandTest { exp := b.t.ExpandElem[e.expansionIndex:] if int(exp[0]) != len(tt.ces) { t.Errorf("%U: len(expansion)==%d; want %d", []rune(tt.str)[0], exp[0], len(tt.ces)) } exp = exp[1:] for j, w := range tt.ces { if ce, _ := makeCE(w); exp[j] != ce { t.Errorf("%U: element %d is %X; want %X", []rune(tt.str)[0], j, exp[j], ce) } } e, _ = e.nextIndexed() } // Verify uniquing. if len(b.t.ExpandElem) != totalElements { t.Errorf("len(expandElem)==%d; want %d", len(b.t.ExpandElem), totalElements) } } var contractTest = []ducetElem{ {"abc", pCE(102)}, {"abd", pCE(103)}, {"a", pCE(100)}, {"ab", pCE(101)}, {"ac", pCE(104)}, {"bcd", pCE(202)}, {"b", pCE(200)}, {"bc", pCE(201)}, {"bd", pCE(203)}, // shares suffixes with a* {"Ab", pCE(301)}, {"A", pCE(300)}, {"Ac", pCE(304)}, {"Abc", pCE(302)}, {"Abd", pCE(303)}, // starter to be ignored {"z", pCE(1000)}, } func TestContract(t *testing.T) { const ( totalElements = 5 + 5 + 4 ) b := newBuilder(t, contractTest) o := &b.root b.processContractions(o) indexMap := make(map[int]bool) handleMap := make(map[rune]*entry) for e := o.front(); e != nil; e, _ = e.nextIndexed() { if e.contractionHandle.n > 0 { handleMap[e.runes[0]] = e indexMap[e.contractionHandle.index] = true } } // Verify uniquing. if len(indexMap) != 2 { t.Errorf("number of tries is %d; want %d", len(indexMap), 2) } for _, tt := range contractTest { e, ok := handleMap[[]rune(tt.str)[0]] if !ok { continue } str := tt.str[1:] offset, n := lookup(&b.t.ContractTries, e.contractionHandle, []byte(str)) if len(str) != n { t.Errorf("%s: bytes consumed==%d; want %d", tt.str, n, len(str)) } ce := b.t.ContractElem[offset+e.contractionIndex] if want, _ := makeCE(tt.ces[0]); want != ce { t.Errorf("%s: element %X; want %X", tt.str, ce, want) } } if len(b.t.ContractElem) != totalElements { t.Errorf("len(expandElem)==%d; want %d", len(b.t.ContractElem), totalElements) } }