OSDN Git Service

new repo
[bytom/vapor.git] / vendor / golang.org / x / text / collate / table_test.go
1 // Copyright 2012 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 package collate
6
7 import (
8         "testing"
9
10         "golang.org/x/text/collate/build"
11         "golang.org/x/text/internal/colltab"
12         "golang.org/x/text/unicode/norm"
13 )
14
15 type ColElems []Weights
16
17 type input struct {
18         str string
19         ces [][]int
20 }
21
22 type check struct {
23         in  string
24         n   int
25         out ColElems
26 }
27
28 type tableTest struct {
29         in  []input
30         chk []check
31 }
32
33 func w(ce ...int) Weights {
34         return W(ce...)
35 }
36
37 var defaults = w(0)
38
39 func pt(p, t int) []int {
40         return []int{p, defaults.Secondary, t}
41 }
42
43 func makeTable(in []input) (*Collator, error) {
44         b := build.NewBuilder()
45         for _, r := range in {
46                 if e := b.Add([]rune(r.str), r.ces, nil); e != nil {
47                         panic(e)
48                 }
49         }
50         t, err := b.Build()
51         if err != nil {
52                 return nil, err
53         }
54         return NewFromTable(t), nil
55 }
56
57 // modSeq holds a seqeunce of modifiers in increasing order of CCC long enough
58 // to cause a segment overflow if not handled correctly. The last rune in this
59 // list has a CCC of 214.
60 var modSeq = []rune{
61         0x05B1, 0x05B2, 0x05B3, 0x05B4, 0x05B5, 0x05B6, 0x05B7, 0x05B8, 0x05B9, 0x05BB,
62         0x05BC, 0x05BD, 0x05BF, 0x05C1, 0x05C2, 0xFB1E, 0x064B, 0x064C, 0x064D, 0x064E,
63         0x064F, 0x0650, 0x0651, 0x0652, 0x0670, 0x0711, 0x0C55, 0x0C56, 0x0E38, 0x0E48,
64         0x0EB8, 0x0EC8, 0x0F71, 0x0F72, 0x0F74, 0x0321, 0x1DCE,
65 }
66
67 var mods []input
68 var modW = func() ColElems {
69         ws := ColElems{}
70         for _, r := range modSeq {
71                 rune := norm.NFC.PropertiesString(string(r))
72                 ws = append(ws, w(0, int(rune.CCC())))
73                 mods = append(mods, input{string(r), [][]int{{0, int(rune.CCC())}}})
74         }
75         return ws
76 }()
77
78 var appendNextTests = []tableTest{
79         { // test getWeights
80                 []input{
81                         {"a", [][]int{{100}}},
82                         {"b", [][]int{{105}}},
83                         {"c", [][]int{{110}}},
84                         {"ß", [][]int{{120}}},
85                 },
86                 []check{
87                         {"a", 1, ColElems{w(100)}},
88                         {"b", 1, ColElems{w(105)}},
89                         {"c", 1, ColElems{w(110)}},
90                         {"d", 1, ColElems{w(0x50064)}},
91                         {"ab", 1, ColElems{w(100)}},
92                         {"bc", 1, ColElems{w(105)}},
93                         {"dd", 1, ColElems{w(0x50064)}},
94                         {"ß", 2, ColElems{w(120)}},
95                 },
96         },
97         { // test expansion
98                 []input{
99                         {"u", [][]int{{100}}},
100                         {"U", [][]int{{100}, {0, 25}}},
101                         {"w", [][]int{{100}, {100}}},
102                         {"W", [][]int{{100}, {0, 25}, {100}, {0, 25}}},
103                 },
104                 []check{
105                         {"u", 1, ColElems{w(100)}},
106                         {"U", 1, ColElems{w(100), w(0, 25)}},
107                         {"w", 1, ColElems{w(100), w(100)}},
108                         {"W", 1, ColElems{w(100), w(0, 25), w(100), w(0, 25)}},
109                 },
110         },
111         { // test decompose
112                 []input{
113                         {"D", [][]int{pt(104, 8)}},
114                         {"z", [][]int{pt(130, 8)}},
115                         {"\u030C", [][]int{{0, 40}}},                               // Caron
116                         {"\u01C5", [][]int{pt(104, 9), pt(130, 4), {0, 40, 0x1F}}}, // Dž = D+z+caron
117                 },
118                 []check{
119                         {"\u01C5", 2, ColElems{w(pt(104, 9)...), w(pt(130, 4)...), w(0, 40, 0x1F)}},
120                 },
121         },
122         { // test basic contraction
123                 []input{
124                         {"a", [][]int{{100}}},
125                         {"ab", [][]int{{101}}},
126                         {"aab", [][]int{{101}, {101}}},
127                         {"abc", [][]int{{102}}},
128                         {"b", [][]int{{200}}},
129                         {"c", [][]int{{300}}},
130                         {"d", [][]int{{400}}},
131                 },
132                 []check{
133                         {"a", 1, ColElems{w(100)}},
134                         {"aa", 1, ColElems{w(100)}},
135                         {"aac", 1, ColElems{w(100)}},
136                         {"d", 1, ColElems{w(400)}},
137                         {"ab", 2, ColElems{w(101)}},
138                         {"abb", 2, ColElems{w(101)}},
139                         {"aab", 3, ColElems{w(101), w(101)}},
140                         {"aaba", 3, ColElems{w(101), w(101)}},
141                         {"abc", 3, ColElems{w(102)}},
142                         {"abcd", 3, ColElems{w(102)}},
143                 },
144         },
145         { // test discontinuous contraction
146                 append(mods, []input{
147                         // modifiers; secondary weight equals ccc
148                         {"\u0316", [][]int{{0, 220}}},
149                         {"\u0317", [][]int{{0, 220}, {0, 220}}},
150                         {"\u302D", [][]int{{0, 222}}},
151                         {"\u302E", [][]int{{0, 225}}}, // used as starter
152                         {"\u302F", [][]int{{0, 224}}}, // used as starter
153                         {"\u18A9", [][]int{{0, 228}}},
154                         {"\u0300", [][]int{{0, 230}}},
155                         {"\u0301", [][]int{{0, 230}}},
156                         {"\u0315", [][]int{{0, 232}}},
157                         {"\u031A", [][]int{{0, 232}}},
158                         {"\u035C", [][]int{{0, 233}}},
159                         {"\u035F", [][]int{{0, 233}}},
160                         {"\u035D", [][]int{{0, 234}}},
161                         {"\u035E", [][]int{{0, 234}}},
162                         {"\u0345", [][]int{{0, 240}}},
163
164                         // starters
165                         {"a", [][]int{{100}}},
166                         {"b", [][]int{{200}}},
167                         {"c", [][]int{{300}}},
168                         {"\u03B1", [][]int{{900}}},
169                         {"\x01", [][]int{{0, 0, 0, 0}}},
170
171                         // contractions
172                         {"a\u0300", [][]int{{101}}},
173                         {"a\u0301", [][]int{{102}}},
174                         {"a\u035E", [][]int{{110}}},
175                         {"a\u035Eb\u035E", [][]int{{115}}},
176                         {"ac\u035Eaca\u035E", [][]int{{116}}},
177                         {"a\u035Db\u035D", [][]int{{117}}},
178                         {"a\u0301\u035Db", [][]int{{120}}},
179                         {"a\u0301\u035F", [][]int{{121}}},
180                         {"a\u0301\u035Fb", [][]int{{119}}},
181                         {"\u03B1\u0345", [][]int{{901}, {902}}},
182                         {"\u302E\u302F", [][]int{{0, 131}, {0, 131}}},
183                         {"\u302F\u18A9", [][]int{{0, 130}}},
184                 }...),
185                 []check{
186                         {"a\x01\u0300", 1, ColElems{w(100)}},
187                         {"ab", 1, ColElems{w(100)}},                              // closing segment
188                         {"a\u0316\u0300b", 5, ColElems{w(101), w(0, 220)}},       // closing segment
189                         {"a\u0316\u0300", 5, ColElems{w(101), w(0, 220)}},        // no closing segment
190                         {"a\u0316\u0300\u035Cb", 5, ColElems{w(101), w(0, 220)}}, // completes before segment end
191                         {"a\u0316\u0300\u035C", 5, ColElems{w(101), w(0, 220)}},  // completes before segment end
192
193                         {"a\u0316\u0301b", 5, ColElems{w(102), w(0, 220)}},       // closing segment
194                         {"a\u0316\u0301", 5, ColElems{w(102), w(0, 220)}},        // no closing segment
195                         {"a\u0316\u0301\u035Cb", 5, ColElems{w(102), w(0, 220)}}, // completes before segment end
196                         {"a\u0316\u0301\u035C", 5, ColElems{w(102), w(0, 220)}},  // completes before segment end
197
198                         // match blocked by modifier with same ccc
199                         {"a\u0301\u0315\u031A\u035Fb", 3, ColElems{w(102)}},
200
201                         // multiple gaps
202                         {"a\u0301\u035Db", 6, ColElems{w(120)}},
203                         {"a\u0301\u035F", 5, ColElems{w(121)}},
204                         {"a\u0301\u035Fb", 6, ColElems{w(119)}},
205                         {"a\u0316\u0301\u035F", 7, ColElems{w(121), w(0, 220)}},
206                         {"a\u0301\u0315\u035Fb", 7, ColElems{w(121), w(0, 232)}},
207                         {"a\u0316\u0301\u0315\u035Db", 5, ColElems{w(102), w(0, 220)}},
208                         {"a\u0316\u0301\u0315\u035F", 9, ColElems{w(121), w(0, 220), w(0, 232)}},
209                         {"a\u0316\u0301\u0315\u035Fb", 9, ColElems{w(121), w(0, 220), w(0, 232)}},
210                         {"a\u0316\u0301\u0315\u035F\u035D", 9, ColElems{w(121), w(0, 220), w(0, 232)}},
211                         {"a\u0316\u0301\u0315\u035F\u035Db", 9, ColElems{w(121), w(0, 220), w(0, 232)}},
212
213                         // handling of segment overflow
214                         { // just fits within segment
215                                 "a" + string(modSeq[:30]) + "\u0301",
216                                 3 + len(string(modSeq[:30])),
217                                 append(ColElems{w(102)}, modW[:30]...),
218                         },
219                         {"a" + string(modSeq[:31]) + "\u0301", 1, ColElems{w(100)}}, // overflow
220                         {"a" + string(modSeq) + "\u0301", 1, ColElems{w(100)}},
221                         { // just fits within segment with two interstitial runes
222                                 "a" + string(modSeq[:28]) + "\u0301\u0315\u035F",
223                                 7 + len(string(modSeq[:28])),
224                                 append(append(ColElems{w(121)}, modW[:28]...), w(0, 232)),
225                         },
226                         { // second half does not fit within segment
227                                 "a" + string(modSeq[:29]) + "\u0301\u0315\u035F",
228                                 3 + len(string(modSeq[:29])),
229                                 append(ColElems{w(102)}, modW[:29]...),
230                         },
231
232                         // discontinuity can only occur in last normalization segment
233                         {"a\u035Eb\u035E", 6, ColElems{w(115)}},
234                         {"a\u0316\u035Eb\u035E", 5, ColElems{w(110), w(0, 220)}},
235                         {"a\u035Db\u035D", 6, ColElems{w(117)}},
236                         {"a\u0316\u035Db\u035D", 1, ColElems{w(100)}},
237                         {"a\u035Eb\u0316\u035E", 8, ColElems{w(115), w(0, 220)}},
238                         {"a\u035Db\u0316\u035D", 8, ColElems{w(117), w(0, 220)}},
239                         {"ac\u035Eaca\u035E", 9, ColElems{w(116)}},
240                         {"a\u0316c\u035Eaca\u035E", 1, ColElems{w(100)}},
241                         {"ac\u035Eac\u0316a\u035E", 1, ColElems{w(100)}},
242
243                         // expanding contraction
244                         {"\u03B1\u0345", 4, ColElems{w(901), w(902)}},
245
246                         // Theoretical possibilities
247                         // contraction within a gap
248                         {"a\u302F\u18A9\u0301", 9, ColElems{w(102), w(0, 130)}},
249                         // expansion within a gap
250                         {"a\u0317\u0301", 5, ColElems{w(102), w(0, 220), w(0, 220)}},
251                         // repeating CCC blocks last modifier
252                         {"a\u302E\u302F\u0301", 1, ColElems{w(100)}},
253                         // The trailing combining characters (with lower CCC) should block the first one.
254                         // TODO: make the following pass.
255                         // {"a\u035E\u0316\u0316", 1, ColElems{w(100)}},
256                         {"a\u035F\u035Eb", 5, ColElems{w(110), w(0, 233)}},
257                         // Last combiner should match after normalization.
258                         // TODO: make the following pass.
259                         // {"a\u035D\u0301", 3, ColElems{w(102), w(0, 234)}},
260                         // The first combiner is blocking the second one as they have the same CCC.
261                         {"a\u035D\u035Eb", 1, ColElems{w(100)}},
262                 },
263         },
264 }
265
266 func TestAppendNext(t *testing.T) {
267         for i, tt := range appendNextTests {
268                 c, err := makeTable(tt.in)
269                 if err != nil {
270                         t.Errorf("%d: error creating table: %v", i, err)
271                         continue
272                 }
273                 for j, chk := range tt.chk {
274                         ws, n := c.t.AppendNext(nil, []byte(chk.in))
275                         if n != chk.n {
276                                 t.Errorf("%d:%d: bytes consumed was %d; want %d", i, j, n, chk.n)
277                         }
278                         out := convertFromWeights(chk.out)
279                         if len(ws) != len(out) {
280                                 t.Errorf("%d:%d: len(ws) was %d; want %d (%X vs %X)\n%X", i, j, len(ws), len(out), ws, out, chk.in)
281                                 continue
282                         }
283                         for k, w := range ws {
284                                 w, _ = colltab.MakeElem(w.Primary(), w.Secondary(), int(w.Tertiary()), 0)
285                                 if w != out[k] {
286                                         t.Errorf("%d:%d: Weights %d was %X; want %X", i, j, k, w, out[k])
287                                 }
288                         }
289                 }
290         }
291 }