OSDN Git Service

new repo
[bytom/vapor.git] / vendor / golang.org / x / text / search / pattern_test.go
1 // Copyright 2015 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 package search
6
7 import (
8         "reflect"
9         "strings"
10         "testing"
11
12         "golang.org/x/text/language"
13 )
14
15 func TestCompile(t *testing.T) {
16         for i, tc := range []struct {
17                 desc    string
18                 pattern string
19                 options []Option
20                 n       int
21         }{{
22                 desc:    "empty",
23                 pattern: "",
24                 n:       0,
25         }, {
26                 desc:    "single",
27                 pattern: "a",
28                 n:       1,
29         }, {
30                 desc:    "keep modifier",
31                 pattern: "a\u0300", // U+0300: COMBINING GRAVE ACCENT
32                 n:       2,
33         }, {
34                 desc:    "remove modifier",
35                 pattern: "a\u0300", // U+0300: COMBINING GRAVE ACCENT
36                 options: []Option{IgnoreDiacritics},
37                 n:       1,
38         }, {
39                 desc:    "single with double collation element",
40                 pattern: "ä",
41                 n:       2,
42         }, {
43                 desc:    "leading variable",
44                 pattern: " a",
45                 n:       2,
46         }, {
47                 desc:    "trailing variable",
48                 pattern: "aa ",
49                 n:       3,
50         }, {
51                 desc:    "leading and trailing variable",
52                 pattern: " äb ",
53                 n:       5,
54         }, {
55                 desc:    "keep interior variable",
56                 pattern: " ä b ",
57                 n:       6,
58         }, {
59                 desc:    "keep interior variables",
60                 pattern: " b  ä ",
61                 n:       7,
62         }, {
63                 desc:    "remove ignoreables (zero-weights across the board)",
64                 pattern: "\u009Db\u009Dä\u009D", // U+009D: OPERATING SYSTEM COMMAND
65                 n:       3,
66         }} {
67                 m := New(language.Und, tc.options...)
68                 p := m.CompileString(tc.pattern)
69                 if len(p.ce) != tc.n {
70                         t.Errorf("%d:%s: Compile(%+q): got %d; want %d", i, tc.desc, tc.pattern, len(p.ce), tc.n)
71                 }
72         }
73 }
74
75 func TestNorm(t *testing.T) {
76         // U+0300: COMBINING GRAVE ACCENT (CCC=230)
77         // U+031B: COMBINING HORN (CCC=216)
78         for _, tc := range []struct {
79                 desc string
80                 a    string
81                 b    string
82                 want bool // a and b compile into the same pattern?
83         }{{
84                 "simple",
85                 "eee\u0300\u031b",
86                 "eee\u031b\u0300",
87                 true,
88         }, {
89                 "large number of modifiers in pattern",
90                 strings.Repeat("\u0300", 29) + "\u0318",
91                 "\u0318" + strings.Repeat("\u0300", 29),
92                 true,
93         }, {
94                 "modifier overflow in pattern",
95                 strings.Repeat("\u0300", 30) + "\u0318",
96                 "\u0318" + strings.Repeat("\u0300", 30),
97                 false,
98         }} {
99                 m := New(language.Und)
100                 a := m.CompileString(tc.a)
101                 b := m.CompileString(tc.b)
102                 if got := reflect.DeepEqual(a, b); got != tc.want {
103                         t.Errorf("Compile(a) == Compile(b) == %v; want %v", got, tc.want)
104                 }
105         }
106 }
107
108 func TestForwardSearch(t *testing.T) {
109         for i, tc := range []struct {
110                 desc    string
111                 tag     string
112                 options []Option
113                 pattern string
114                 text    string
115                 want    []int
116         }{{
117                 // The semantics of an empty search is to match nothing.
118                 // TODO: change this to be in line with strings.Index? It is quite a
119                 // different beast, so not sure yet.
120
121                 desc:    "empty pattern and text",
122                 tag:     "und",
123                 pattern: "",
124                 text:    "",
125                 want:    nil, // TODO: consider: []int{0, 0},
126         }, {
127                 desc:    "non-empty pattern and empty text",
128                 tag:     "und",
129                 pattern: " ",
130                 text:    "",
131                 want:    nil,
132         }, {
133                 desc:    "empty pattern and non-empty text",
134                 tag:     "und",
135                 pattern: "",
136                 text:    "abc",
137                 want:    nil, // TODO: consider: []int{0, 0, 1, 1, 2, 2, 3, 3},
138         }, {
139                 // Variable-only patterns. We don't support variables at the moment,
140                 // but verify that, given this, the behavior is indeed as expected.
141
142                 desc:    "exact match of variable",
143                 tag:     "und",
144                 pattern: " ",
145                 text:    " ",
146                 want:    []int{0, 1},
147         }, {
148                 desc:    "variables not handled by default",
149                 tag:     "und",
150                 pattern: "- ",
151                 text:    " -",
152                 want:    nil, // Would be (1, 2) for a median match with variable}.
153         }, {
154                 desc:    "multiple subsequent identical variables",
155                 tag:     "und",
156                 pattern: " ",
157                 text:    "    ",
158                 want:    []int{0, 1, 1, 2, 2, 3, 3, 4},
159         }, {
160                 desc:    "text with variables",
161                 tag:     "und",
162                 options: []Option{IgnoreDiacritics},
163                 pattern: "abc",
164                 text:    "3 abc 3",
165                 want:    []int{2, 5},
166         }, {
167                 desc:    "pattern with interior variables",
168                 tag:     "und",
169                 options: []Option{IgnoreDiacritics},
170                 pattern: "a b c",
171                 text:    "3 a b c abc a  b  c 3",
172                 want:    []int{2, 7}, // Would have 3 matches using variable.
173
174                 // TODO: Different variable handling settings.
175         }, {
176                 // Options.
177
178                 desc:    "match all levels",
179                 tag:     "und",
180                 pattern: "Abc",
181                 text:    "abcAbcABCÁbcábc",
182                 want:    []int{3, 6},
183         }, {
184                 desc:    "ignore diacritics in text",
185                 tag:     "und",
186                 options: []Option{IgnoreDiacritics},
187                 pattern: "Abc",
188                 text:    "Ábc",
189                 want:    []int{0, 4},
190         }, {
191                 desc:    "ignore diacritics in pattern",
192                 tag:     "und",
193                 options: []Option{IgnoreDiacritics},
194                 pattern: "Ábc",
195                 text:    "Abc",
196                 want:    []int{0, 3},
197         }, {
198                 desc:    "ignore diacritics",
199                 tag:     "und",
200                 options: []Option{IgnoreDiacritics},
201                 pattern: "Abc",
202                 text:    "abcAbcABCÁbcábc",
203                 want:    []int{3, 6, 9, 13},
204         }, {
205                 desc:    "ignore case",
206                 tag:     "und",
207                 options: []Option{IgnoreCase},
208                 pattern: "Abc",
209                 text:    "abcAbcABCÁbcábc",
210                 want:    []int{0, 3, 3, 6, 6, 9},
211         }, {
212                 desc:    "ignore case and diacritics",
213                 tag:     "und",
214                 options: []Option{IgnoreCase, IgnoreDiacritics},
215                 pattern: "Abc",
216                 text:    "abcAbcABCÁbcábc",
217                 want:    []int{0, 3, 3, 6, 6, 9, 9, 13, 13, 17},
218         }, {
219                 desc:    "ignore width to fullwidth",
220                 tag:     "und",
221                 options: []Option{IgnoreWidth},
222                 pattern: "abc",
223                 text:    "123 \uFF41\uFF42\uFF43 123", // U+FF41-3: FULLWIDTH LATIN SMALL LETTER A-C
224                 want:    []int{4, 13},
225         }, {
226                 // TODO: distinguish between case and width.
227                 desc:    "don't ignore width to fullwidth, ignoring only case",
228                 tag:     "und",
229                 options: []Option{IgnoreCase},
230                 pattern: "abc",
231                 text:    "123 \uFF41\uFF42\uFF43 123", // U+FF41-3: FULLWIDTH LATIN SMALL LETTER A-C
232                 want:    []int{4, 13},
233         }, {
234                 desc:    "ignore width to fullwidth and diacritics",
235                 tag:     "und",
236                 options: []Option{IgnoreWidth, IgnoreDiacritics},
237                 pattern: "abc",
238                 text:    "123 \uFF41\uFF42\uFF43 123", // U+FF41-3: FULLWIDTH LATIN SMALL LETTER A-C
239                 want:    []int{4, 13},
240         }, {
241                 desc:    "whole grapheme, single rune",
242                 tag:     "und",
243                 pattern: "eee",
244                 text:    "123 eeé 123",
245                 want:    nil,
246         }, {
247                 // Note: rules on when to apply contractions may, for certain languages,
248                 // differ between search and collation. For example, "ch" is not
249                 // considered a contraction for the purpose of searching in Spanish.
250                 // Therefore, be careful picking this test.
251                 desc:    "whole grapheme, contractions",
252                 tag:     "da",
253                 pattern: "aba",
254                 // Fails at the primary level, because "aa" is a contraction.
255                 text: "123 abaa 123",
256                 want: []int{},
257         }, {
258                 desc:    "whole grapheme, trailing modifier",
259                 tag:     "und",
260                 pattern: "eee",
261                 text:    "123 eee\u0300 123", // U+0300: COMBINING GRAVE ACCENT
262                 want:    nil,
263         }, {
264                 // Language-specific matching.
265
266                 desc:    "",
267                 tag:     "da",
268                 options: []Option{IgnoreCase},
269                 pattern: "Århus",
270                 text:    "AarhusÅrhus  Århus  ",
271                 want:    []int{0, 6, 6, 12, 14, 20},
272         }, {
273                 desc:    "",
274                 tag:     "da",
275                 options: []Option{IgnoreCase},
276                 pattern: "Aarhus",
277                 text:    "Århus Aarhus",
278                 want:    []int{0, 6, 7, 13},
279         }, {
280                 desc:    "",
281                 tag:     "en", // Å does not match A for English.
282                 options: []Option{IgnoreCase},
283                 pattern: "Aarhus",
284                 text:    "Århus",
285                 want:    nil,
286         }, {
287                 desc:    "ignore modifier in text",
288                 options: []Option{IgnoreDiacritics},
289                 tag:     "und",
290                 pattern: "eee",
291                 text:    "123 eee\u0300 123", // U+0300: COMBINING GRAVE ACCENT
292                 want:    []int{4, 9},         // Matches on grapheme boundary.
293         }, {
294                 desc:    "ignore multiple modifiers in text",
295                 options: []Option{IgnoreDiacritics},
296                 tag:     "und",
297                 pattern: "eee",
298                 text:    "123 eee\u0300\u0300 123", // U+0300: COMBINING GRAVE ACCENT
299                 want:    []int{4, 11},              // Matches on grapheme boundary.
300         }, {
301                 desc:    "ignore modifier in pattern",
302                 options: []Option{IgnoreDiacritics},
303                 tag:     "und",
304                 pattern: "eee\u0300", // U+0300: COMBINING GRAVE ACCENT
305                 text:    "123 eee 123",
306                 want:    []int{4, 7},
307         }, {
308                 desc:    "ignore multiple modifiers in pattern",
309                 options: []Option{IgnoreDiacritics},
310                 tag:     "und",
311                 pattern: "eee\u0300\u0300", // U+0300: COMBINING GRAVE ACCENT
312                 text:    "123 eee 123",
313                 want:    []int{4, 7},
314         }, {
315                 desc: "match non-normalized pattern",
316                 tag:  "und",
317                 // U+0300: COMBINING GRAVE ACCENT (CCC=230)
318                 // U+031B: COMBINING HORN (CCC=216)
319                 pattern: "eee\u0300\u031b",
320                 text:    "123 eee\u031b\u0300 123",
321                 want:    []int{4, 11},
322         }, {
323                 desc: "match non-normalized text",
324                 tag:  "und",
325                 // U+0300: COMBINING GRAVE ACCENT (CCC=230)
326                 // U+031B: COMBINING HORN (CCC=216)
327                 pattern: "eee\u031b\u0300",
328                 text:    "123 eee\u0300\u031b 123",
329                 want:    []int{4, 11},
330         }} {
331                 m := New(language.MustParse(tc.tag), tc.options...)
332                 p := m.CompileString(tc.pattern)
333                 for j := 0; j < len(tc.text); {
334                         start, end := p.IndexString(tc.text[j:])
335                         if start == -1 && end == -1 {
336                                 j++
337                                 continue
338                         }
339                         start += j
340                         end += j
341                         j = end
342                         if len(tc.want) == 0 {
343                                 t.Errorf("%d:%s: found unexpected result [%d %d]", i, tc.desc, start, end)
344                                 break
345                         }
346                         if tc.want[0] != start || tc.want[1] != end {
347                                 t.Errorf("%d:%s: got [%d %d]; want %v", i, tc.desc, start, end, tc.want[:2])
348                                 tc.want = tc.want[2:]
349                                 break
350                         }
351                         tc.want = tc.want[2:]
352                 }
353                 if len(tc.want) != 0 {
354                         t.Errorf("%d:%s: %d extra results", i, tc.desc, len(tc.want)/2)
355                 }
356         }
357 }