OSDN Git Service

new repo
[bytom/vapor.git] / vendor / golang.org / x / text / secure / precis / enforce_test.go
1 // Copyright 2015 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 package precis
6
7 import (
8         "bytes"
9         "fmt"
10         "reflect"
11         "testing"
12
13         "golang.org/x/text/internal/testtext"
14         "golang.org/x/text/secure/bidirule"
15         "golang.org/x/text/transform"
16 )
17
18 type testCase struct {
19         input  string
20         output string
21         err    error
22 }
23
24 var enforceTestCases = []struct {
25         name  string
26         p     *Profile
27         cases []testCase
28 }{
29         {"Basic", NewFreeform(), []testCase{
30                 {"e\u0301\u031f", "\u00e9\u031f", nil}, // normalize
31         }},
32
33         {"Context Rule 1", NewFreeform(), []testCase{
34                 // Rule 1: zero-width non-joiner (U+200C)
35                 // From RFC:
36                 //   False
37                 //   If Canonical_Combining_Class(Before(cp)) .eq.  Virama Then True;
38                 //   If RegExpMatch((Joining_Type:{L,D})(Joining_Type:T)*\u200C
39                 //          (Joining_Type:T)*(Joining_Type:{R,D})) Then True;
40                 //
41                 // Example runes for different joining types:
42                 // Join L: U+A872; PHAGS-PA SUPERFIXED LETTER RA
43                 // Join D: U+062C; HAH WITH DOT BELOW
44                 // Join T: U+0610; ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM
45                 // Join R: U+0627; ALEF
46                 // Virama: U+0A4D; GURMUKHI SIGN VIRAMA
47                 // Virama and Join T: U+0ACD; GUJARATI SIGN VIRAMA
48                 {"\u200c", "", errContext},
49                 {"\u200ca", "", errContext},
50                 {"a\u200c", "", errContext},
51                 {"\u200c\u0627", "", errContext},             // missing JoinStart
52                 {"\u062c\u200c", "", errContext},             // missing JoinEnd
53                 {"\u0610\u200c\u0610\u0627", "", errContext}, // missing JoinStart
54                 {"\u062c\u0610\u200c\u0610", "", errContext}, // missing JoinEnd
55
56                 // Variants of: D T* U+200c T* R
57                 {"\u062c\u200c\u0627", "\u062c\u200c\u0627", nil},
58                 {"\u062c\u0610\u200c\u0610\u0627", "\u062c\u0610\u200c\u0610\u0627", nil},
59                 {"\u062c\u0610\u0610\u200c\u0610\u0610\u0627", "\u062c\u0610\u0610\u200c\u0610\u0610\u0627", nil},
60                 {"\u062c\u0610\u200c\u0627", "\u062c\u0610\u200c\u0627", nil},
61                 {"\u062c\u200c\u0610\u0627", "\u062c\u200c\u0610\u0627", nil},
62
63                 // Variants of: L T* U+200c T* D
64                 {"\ua872\u200c\u062c", "\ua872\u200c\u062c", nil},
65                 {"\ua872\u0610\u200c\u0610\u062c", "\ua872\u0610\u200c\u0610\u062c", nil},
66                 {"\ua872\u0610\u0610\u200c\u0610\u0610\u062c", "\ua872\u0610\u0610\u200c\u0610\u0610\u062c", nil},
67                 {"\ua872\u0610\u200c\u062c", "\ua872\u0610\u200c\u062c", nil},
68                 {"\ua872\u200c\u0610\u062c", "\ua872\u200c\u0610\u062c", nil},
69
70                 // Virama
71                 {"\u0a4d\u200c", "\u0a4d\u200c", nil},
72                 {"\ua872\u0a4d\u200c", "\ua872\u0a4d\u200c", nil},
73                 {"\ua872\u0a4d\u0610\u200c", "", errContext},
74                 {"\ua872\u0a4d\u0610\u200c", "", errContext},
75
76                 {"\u0acd\u200c", "\u0acd\u200c", nil},
77                 {"\ua872\u0acd\u200c", "\ua872\u0acd\u200c", nil},
78                 {"\ua872\u0acd\u0610\u200c", "", errContext},
79                 {"\ua872\u0acd\u0610\u200c", "", errContext},
80
81                 // Using Virama as join T
82                 {"\ua872\u0acd\u200c\u062c", "\ua872\u0acd\u200c\u062c", nil},
83                 {"\ua872\u200c\u0acd\u062c", "\ua872\u200c\u0acd\u062c", nil},
84         }},
85
86         {"Context Rule 2", NewFreeform(), []testCase{
87                 // Rule 2: zero-width joiner (U+200D)
88                 {"\u200d", "", errContext},
89                 {"\u200da", "", errContext},
90                 {"a\u200d", "", errContext},
91
92                 {"\u0a4d\u200d", "\u0a4d\u200d", nil},
93                 {"\ua872\u0a4d\u200d", "\ua872\u0a4d\u200d", nil},
94                 {"\u0a4da\u200d", "", errContext},
95         }},
96
97         {"Context Rule 3", NewFreeform(), []testCase{
98                 // Rule 3: middle dot
99                 {"·", "", errContext},
100                 {"l·", "", errContext},
101                 {"·l", "", errContext},
102                 {"a·", "", errContext},
103                 {"l·a", "", errContext},
104                 {"a·a", "", errContext},
105                 {"l·l", "l·l", nil},
106                 {"al·la", "al·la", nil},
107         }},
108
109         {"Context Rule 4", NewFreeform(), []testCase{
110                 // Rule 4: Greek lower numeral U+0375
111                 {"͵", "", errContext},
112                 {"͵a", "", errContext},
113                 {"α͵", "", errContext},
114                 {"͵α", "͵α", nil},
115                 {"α͵α", "α͵α", nil},
116                 {"͵͵α", "͵͵α", nil}, // The numeric sign is itself Greek.
117                 {"α͵͵α", "α͵͵α", nil},
118                 {"α͵͵", "", errContext},
119                 {"α͵͵a", "", errContext},
120         }},
121
122         {"Context Rule 5+6", NewFreeform(), []testCase{
123                 // Rule 5+6: Hebrew preceding
124                 // U+05f3: Geresh
125                 {"׳", "", errContext},
126                 {"׳ה", "", errContext},
127                 {"a׳b", "", errContext},
128                 {"ש׳", "ש׳", nil},     // U+05e9 U+05f3
129                 {"ש׳׳׳", "ש׳׳׳", nil}, // U+05e9 U+05f3
130
131                 // U+05f4: Gershayim
132                 {"״", "", errContext},
133                 {"״ה", "", errContext},
134                 {"a״b", "", errContext},
135                 {"ש״", "ש״", nil},       // U+05e9 U+05f4
136                 {"ש״״״", "ש״״״", nil},   // U+05e9 U+05f4
137                 {"aש״״״", "aש״״״", nil}, // U+05e9 U+05f4
138         }},
139
140         {"Context Rule 7", NewFreeform(), []testCase{
141                 // Rule 7: Katakana middle Dot
142                 {"・", "", errContext},
143                 {"abc・", "", errContext},
144                 {"・def", "", errContext},
145                 {"abc・def", "", errContext},
146                 {"aヅc・def", "aヅc・def", nil},
147                 {"abc・dぶf", "abc・dぶf", nil},
148                 {"⺐bc・def", "⺐bc・def", nil},
149         }},
150
151         {"Context Rule 8+9", NewFreeform(), []testCase{
152                 // Rule 8+9: Arabic Indic Digit
153                 {"١٢٣٤٥۶", "", errContext},
154                 {"۱۲۳۴۵٦", "", errContext},
155                 {"١٢٣٤٥", "١٢٣٤٥", nil},
156                 {"۱۲۳۴۵", "۱۲۳۴۵", nil},
157         }},
158
159         {"Nickname", Nickname, []testCase{
160                 {"  Swan  of   Avon   ", "Swan of Avon", nil},
161                 {"", "", errEmptyString},
162                 {" ", "", errEmptyString},
163                 {"  ", "", errEmptyString},
164                 {"a\u00A0a\u1680a\u2000a\u2001a\u2002a\u2003a\u2004a\u2005a\u2006a\u2007a\u2008a\u2009a\u200Aa\u202Fa\u205Fa\u3000a", "a a a a a a a a a a a a a a a a a", nil},
165                 {"Foo", "Foo", nil},
166                 {"foo", "foo", nil},
167                 {"Foo Bar", "Foo Bar", nil},
168                 {"foo bar", "foo bar", nil},
169                 {"\u03A3", "\u03A3", nil},
170                 {"\u03C3", "\u03C3", nil},
171                 // Greek final sigma is left as is (do not fold!)
172                 {"\u03C2", "\u03C2", nil},
173                 {"\u265A", "♚", nil},
174                 {"Richard \u2163", "Richard IV", nil},
175                 {"\u212B", "Å", nil},
176                 {"\uFB00", "ff", nil}, // because of NFKC
177                 {"שa", "שa", nil},     // no bidi rule
178                 {"동일조건변경허락", "동일조건변경허락", nil},
179         }},
180         {"OpaqueString", OpaqueString, []testCase{
181                 {"  Swan  of   Avon   ", "  Swan  of   Avon   ", nil},
182                 {"", "", errEmptyString},
183                 {" ", " ", nil},
184                 {"  ", "  ", nil},
185                 {"a\u00A0a\u1680a\u2000a\u2001a\u2002a\u2003a\u2004a\u2005a\u2006a\u2007a\u2008a\u2009a\u200Aa\u202Fa\u205Fa\u3000a", "a a a a a a a a a a a a a a a a a", nil},
186                 {"Foo", "Foo", nil},
187                 {"foo", "foo", nil},
188                 {"Foo Bar", "Foo Bar", nil},
189                 {"foo bar", "foo bar", nil},
190                 {"\u03C3", "\u03C3", nil},
191                 {"Richard \u2163", "Richard \u2163", nil},
192                 {"\u212B", "Å", nil},
193                 {"Jack of \u2666s", "Jack of \u2666s", nil},
194                 {"my cat is a \u0009by", "", errDisallowedRune},
195                 {"שa", "שa", nil}, // no bidi rule
196         }},
197         {"UsernameCaseMapped", UsernameCaseMapped, []testCase{
198                 // TODO: Should this work?
199                 // {UsernameCaseMapped, "", "", errDisallowedRune},
200                 {"juliet@example.com", "juliet@example.com", nil},
201                 {"fussball", "fussball", nil},
202                 {"fu\u00DFball", "fu\u00DFball", nil},
203                 {"\u03C0", "\u03C0", nil},
204                 {"\u03A3", "\u03C3", nil},
205                 {"\u03C3", "\u03C3", nil},
206                 // Greek final sigma is left as is (do not fold!)
207                 {"\u03C2", "\u03C2", nil},
208                 {"\u0049", "\u0069", nil},
209                 {"\u0049", "\u0069", nil},
210                 {"\u03D2", "", errDisallowedRune},
211                 {"\u03B0", "\u03B0", nil},
212                 {"foo bar", "", errDisallowedRune},
213                 {"♚", "", errDisallowedRune},
214                 {"\u007E", "~", nil},
215                 {"a", "a", nil},
216                 {"!", "!", nil},
217                 {"²", "", errDisallowedRune},
218                 {"\t", "", errDisallowedRune},
219                 {"\n", "", errDisallowedRune},
220                 {"\u26D6", "", errDisallowedRune},
221                 {"\u26FF", "", errDisallowedRune},
222                 {"\uFB00", "", errDisallowedRune},
223                 {"\u1680", "", errDisallowedRune},
224                 {" ", "", errDisallowedRune},
225                 {"  ", "", errDisallowedRune},
226                 {"\u01C5", "", errDisallowedRune},
227                 {"\u16EE", "", errDisallowedRune}, // Nl RUNIC ARLAUG SYMBOL
228                 {"\u0488", "", errDisallowedRune}, // Me COMBINING CYRILLIC HUNDRED THOUSANDS SIGN
229                 {"\u212B", "\u00e5", nil},         // Angstrom sign, NFC -> U+00E5
230                 {"A\u030A", "å", nil},             // A + ring
231                 {"\u00C5", "å", nil},              // A with ring
232                 {"\u00E7", "ç", nil},              // c cedille
233                 {"\u0063\u0327", "ç", nil},        // c + cedille
234                 {"\u0158", "ř", nil},
235                 {"\u0052\u030C", "ř", nil},
236
237                 {"\u1E61", "\u1E61", nil}, // LATIN SMALL LETTER S WITH DOT ABOVE
238
239                 // Confusable characters ARE allowed and should NOT be mapped.
240                 {"\u0410", "\u0430", nil}, // CYRILLIC CAPITAL LETTER A
241
242                 // Full width should be mapped to the canonical decomposition.
243                 {"AB", "ab", nil},
244                 {"שc", "", bidirule.ErrInvalid}, // bidi rule
245
246         }},
247         {"UsernameCasePreserved", UsernameCasePreserved, []testCase{
248                 {"ABC", "ABC", nil},
249                 {"AB", "AB", nil},
250                 {"שc", "", bidirule.ErrInvalid}, // bidi rule
251                 {"\uFB00", "", errDisallowedRune},
252                 {"\u212B", "\u00c5", nil},    // Angstrom sign, NFC -> U+00E5
253                 {"ẛ", "", errDisallowedRune}, // LATIN SMALL LETTER LONG S WITH DOT ABOVE
254         }},
255 }
256
257 func doTests(t *testing.T, fn func(t *testing.T, p *Profile, tc testCase)) {
258         for _, g := range enforceTestCases {
259                 for i, tc := range g.cases {
260                         name := fmt.Sprintf("%s:%d:%+q", g.name, i, tc.input)
261                         testtext.Run(t, name, func(t *testing.T) {
262                                 fn(t, g.p, tc)
263                         })
264                 }
265         }
266 }
267
268 func TestString(t *testing.T) {
269         doTests(t, func(t *testing.T, p *Profile, tc testCase) {
270                 if e, err := p.String(tc.input); tc.err != err || e != tc.output {
271                         t.Errorf("got %+q (err: %v); want %+q (err: %v)", e, err, tc.output, tc.err)
272                 }
273         })
274 }
275
276 func TestBytes(t *testing.T) {
277         doTests(t, func(t *testing.T, p *Profile, tc testCase) {
278                 if e, err := p.Bytes([]byte(tc.input)); tc.err != err || string(e) != tc.output {
279                         t.Errorf("got %+q (err: %v); want %+q (err: %v)", string(e), err, tc.output, tc.err)
280                 }
281         })
282         // Test that calling Bytes with something that doesn't transform returns a
283         // copy.
284         orig := []byte("hello")
285         b, _ := NewFreeform().Bytes(orig)
286         if reflect.ValueOf(b).Pointer() == reflect.ValueOf(orig).Pointer() {
287                 t.Error("original and result are the same slice; should be a copy")
288         }
289 }
290
291 func TestAppend(t *testing.T) {
292         doTests(t, func(t *testing.T, p *Profile, tc testCase) {
293                 if e, err := p.Append(nil, []byte(tc.input)); tc.err != err || string(e) != tc.output {
294                         t.Errorf("got %+q (err: %v); want %+q (err: %v)", string(e), err, tc.output, tc.err)
295                 }
296         })
297 }
298
299 func TestStringMallocs(t *testing.T) {
300         if n := testtext.AllocsPerRun(100, func() { UsernameCaseMapped.String("helloworld") }); n > 0 {
301                 // TODO: reduce this to 0.
302                 t.Skipf("got %f allocs, want 0", n)
303         }
304 }
305
306 func TestAppendMallocs(t *testing.T) {
307         str := []byte("helloworld")
308         out := make([]byte, 0, len(str))
309         if n := testtext.AllocsPerRun(100, func() { UsernameCaseMapped.Append(out, str) }); n > 0 {
310                 t.Errorf("got %f allocs, want 0", n)
311         }
312 }
313
314 func TestTransformMallocs(t *testing.T) {
315         str := []byte("helloworld")
316         out := make([]byte, 0, len(str))
317         tr := UsernameCaseMapped.NewTransformer()
318         if n := testtext.AllocsPerRun(100, func() {
319                 tr.Reset()
320                 tr.Transform(out, str, true)
321         }); n > 0 {
322                 t.Errorf("got %f allocs, want 0", n)
323         }
324 }
325
326 func min(a, b int) int {
327         if a < b {
328                 return a
329         }
330         return b
331 }
332
333 // TestTransformerShortBuffers tests that the precis.Transformer implements the
334 // spirit, not just the letter (the method signatures), of the
335 // transform.Transformer interface.
336 //
337 // In particular, it tests that, if one or both of the dst or src buffers are
338 // short, so that multiple Transform calls are required to complete the overall
339 // transformation, the end result is identical to one Transform call with
340 // sufficiently long buffers.
341 func TestTransformerShortBuffers(t *testing.T) {
342         srcUnit := []byte("a\u0300cce\u0301nts") // NFD normalization form.
343         wantUnit := []byte("àccénts")            // NFC normalization form.
344         src := bytes.Repeat(srcUnit, 16)
345         want := bytes.Repeat(wantUnit, 16)
346         const long = 4096
347         dst := make([]byte, long)
348
349         // 5, 7, 9, 11, 13, 16 and 17 are all pair-wise co-prime, which means that
350         // slicing the dst and src buffers into 5, 7, 13 and 17 byte chunks will
351         // fall at different places inside the repeated srcUnit's and wantUnit's.
352         if len(srcUnit) != 11 || len(wantUnit) != 9 || len(src) > long || len(want) > long {
353                 t.Fatal("inconsistent lengths")
354         }
355
356         tr := NewFreeform().NewTransformer()
357         for _, deltaD := range []int{5, 7, 13, 17, long} {
358         loop:
359                 for _, deltaS := range []int{5, 7, 13, 17, long} {
360                         tr.Reset()
361                         d0 := 0
362                         s0 := 0
363                         for {
364                                 d1 := min(len(dst), d0+deltaD)
365                                 s1 := min(len(src), s0+deltaS)
366                                 nDst, nSrc, err := tr.Transform(dst[d0:d1:d1], src[s0:s1:s1], s1 == len(src))
367                                 d0 += nDst
368                                 s0 += nSrc
369                                 if err == nil {
370                                         break
371                                 }
372                                 if err == transform.ErrShortDst || err == transform.ErrShortSrc {
373                                         continue
374                                 }
375                                 t.Errorf("deltaD=%d, deltaS=%d: %v", deltaD, deltaS, err)
376                                 continue loop
377                         }
378                         if s0 != len(src) {
379                                 t.Errorf("deltaD=%d, deltaS=%d: s0: got %d, want %d", deltaD, deltaS, s0, len(src))
380                                 continue
381                         }
382                         if d0 != len(want) {
383                                 t.Errorf("deltaD=%d, deltaS=%d: d0: got %d, want %d", deltaD, deltaS, d0, len(want))
384                                 continue
385                         }
386                         got := dst[:d0]
387                         if !bytes.Equal(got, want) {
388                                 t.Errorf("deltaD=%d, deltaS=%d:\ngot  %q\nwant %q", deltaD, deltaS, got, want)
389                                 continue
390                         }
391                 }
392         }
393 }