1 // Copyright 2016 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
13 "golang.org/x/text/internal/gen"
14 "golang.org/x/text/internal/testtext"
15 "golang.org/x/text/internal/ucd"
18 func TestAllocToUnicode(t *testing.T) {
19 avg := testtext.AllocsPerRun(1000, func() {
20 ToUnicode("www.golang.org")
23 t.Errorf("got %f; want 0", avg)
27 func TestAllocToASCII(t *testing.T) {
28 avg := testtext.AllocsPerRun(1000, func() {
29 ToASCII("www.golang.org")
32 t.Errorf("got %f; want 0", avg)
36 func TestProfiles(t *testing.T) {
37 testCases := []struct {
41 {"Punycode", punycode, New()},
42 {"Registration", registration, New(ValidateForRegistration())},
43 {"Registration", registration, New(
44 ValidateForRegistration(),
45 VerifyDNSLength(true),
48 {"Lookup", lookup, New(MapForLookup(), BidiRule(), Transitional(true))},
49 {"Display", display, New(MapForLookup(), BidiRule())},
51 for _, tc := range testCases {
52 // Functions are not comparable, but the printed version will include
54 got := fmt.Sprintf("%#v", tc.got)
55 want := fmt.Sprintf("%#v", tc.want)
57 t.Errorf("%s: \ngot %#v,\nwant %#v", tc.name, got, want)
62 // doTest performs a single test f(input) and verifies that the output matches
63 // out and that the returned error is expected. The errors string contains
64 // all allowed error codes as categorized in
65 // http://www.unicode.org/Public/idna/9.0.0/IdnaTest.txt:
71 func doTest(t *testing.T, f func(string) (string, error), name, input, want, errors string) {
72 errors = strings.Trim(errors, "[]")
75 test = "err:" + errors
77 // Replace some of the escape sequences to make it easier to single out
78 // tests on the command name.
79 in := strings.Trim(strconv.QuoteToASCII(input), `"`)
80 in = strings.Replace(in, `\u`, "#", -1)
81 in = strings.Replace(in, `\U`, "#", -1)
82 name = fmt.Sprintf("%s/%s/%s", name, in, test)
84 testtext.Run(t, name, func(t *testing.T) {
88 code := err.(interface {
91 if strings.Index(errors, code) == -1 {
92 t.Errorf("error %q not in set of expected errors {%v}", code, errors)
94 } else if errors != "" {
95 t.Errorf("no errors; want error in {%v}", errors)
98 if want != "" && got != want {
99 t.Errorf(`string: got %+q; want %+q`, got, want)
104 // TestLabelErrors tests strings returned in case of error. All results should
105 // be identical to the reference implementation and can be verified at
106 // http://unicode.org/cldr/utility/idna.jsp. The reference implementation,
107 // however, seems to not display Bidi and ContextJ errors.
109 // In some cases the behavior of browsers is added as a comment. In all cases,
110 // whenever a resolve search returns an error here, Chrome will treat the input
111 // string as a search string (including those for Bidi and Context J errors),
112 // unless noted otherwise.
113 func TestLabelErrors(t *testing.T) {
114 encode := func(s string) string { s, _ = encode(acePrefix, s); return s }
117 f func(string) (string, error)
119 punyA := kind{"PunycodeA", punycode.ToASCII}
120 resolve := kind{"ResolveA", Lookup.ToASCII}
121 display := kind{"ToUnicode", Display.ToUnicode}
122 p := New(VerifyDNSLength(true), MapForLookup(), BidiRule())
123 lengthU := kind{"CheckLengthU", p.ToUnicode}
124 lengthA := kind{"CheckLengthA", p.ToASCII}
125 p = New(MapForLookup(), StrictDomainName(false))
126 std3 := kind{"STD3", p.ToASCII}
128 testCases := []struct {
134 {lengthU, "", "", "A4"}, // From UTS 46 conformance test.
135 {lengthA, "", "", "A4"},
137 {lengthU, "xn--", "", "A4"},
138 {lengthU, "foo.xn--", "foo.", "A4"}, // TODO: is dropping xn-- correct?
139 {lengthU, "xn--.foo", ".foo", "A4"},
140 {lengthU, "foo.xn--.bar", "foo..bar", "A4"},
142 {display, "xn--", "", ""},
143 {display, "foo.xn--", "foo.", ""}, // TODO: is dropping xn-- correct?
144 {display, "xn--.foo", ".foo", ""},
145 {display, "foo.xn--.bar", "foo..bar", ""},
147 {lengthA, "a..b", "a..b", "A4"},
148 {punyA, ".b", ".b", ""},
149 // For backwards compatibility, the Punycode profile does not map runes.
150 {punyA, "\u3002b", "xn--b-83t", ""},
151 {punyA, "..b", "..b", ""},
152 // Only strip leading empty labels for certain profiles. Stripping
153 // leading empty labels here but not for "empty" punycode above seems
154 // inconsistent, but seems to be applied by both the conformance test
155 // and Chrome. So we turn it off by default, support it as an option,
156 // and enable it in profiles where it seems commonplace.
157 {lengthA, ".b", "b", ""},
158 {lengthA, "\u3002b", "b", ""},
159 {lengthA, "..b", "b", ""},
160 {lengthA, "b..", "b..", ""},
162 {resolve, "a..b", "a..b", ""},
163 {resolve, ".b", "b", ""},
164 {resolve, "\u3002b", "b", ""},
165 {resolve, "..b", "b", ""},
166 {resolve, "b..", "b..", ""},
170 {punyA, "*.foo.com", "*.foo.com", ""},
171 {punyA, "Foo.com", "Foo.com", ""},
174 {display, "*.foo.com", "*.foo.com", "P1"},
175 {std3, "*.foo.com", "*.foo.com", ""},
177 // Don't map U+2490 (DIGIT NINE FULL STOP). This is the behavior of
178 // Chrome, Safari, and IE. Firefox will first map ⒐ to 9. and return
180 {resolve, "lab⒐be", "xn--labbe-zh9b", "P1"}, // encode("lab⒐be")
181 {display, "lab⒐be", "lab⒐be", "P1"},
183 {resolve, "plan⒐faß.de", "xn--planfass-c31e.de", "P1"}, // encode("plan⒐fass") + ".de"
184 {display, "Plan⒐faß.de", "plan⒐faß.de", "P1"},
186 // Chrome 54.0 recognizes the error and treats this input verbatim as a
188 // Safari 10.0 (non-conform spec) decomposes "⒈" and computes the
189 // punycode on the result using transitional mapping.
190 // Firefox 49.0.1 goes haywire on this string and prints a bunch of what
191 // seems to be nested punycode encodings.
192 {resolve, "日本⒈co.ßßß.de", "xn--co-wuw5954azlb.ssssss.de", "P1"},
193 {display, "日本⒈co.ßßß.de", "日本⒈co.ßßß.de", "P1"},
195 {resolve, "a\u200Cb", "ab", ""},
196 {display, "a\u200Cb", "a\u200Cb", "C"},
198 {resolve, encode("a\u200Cb"), encode("a\u200Cb"), "C"},
199 {display, "a\u200Cb", "a\u200Cb", "C"},
201 {resolve, "grﻋﺮﺑﻲ.de", "xn--gr-gtd9a1b0g.de", "B"},
203 // Notice how the string gets transformed, even with an error.
204 // Chrome will use the original string if it finds an error, so not
205 // the transformed one.
207 "gr\ufecb\ufeae\ufe91\ufef2.de",
208 "gr\u0639\u0631\u0628\u064a.de",
212 {resolve, "\u0671.\u03c3\u07dc", "xn--qib.xn--4xa21s", "B"}, // ٱ.σߜ
213 {display, "\u0671.\u03c3\u07dc", "\u0671.\u03c3\u07dc", "B"},
216 {resolve, "a\u0323\u0322", "xn--jta191l", ""}, // ạ̢
217 {display, "a\u0323\u0322", "\u1ea1\u0322", ""},
219 // Non-normalized strings are not normalized when they originate from
220 // punycode. Despite the error, Chrome, Safari and Firefox will attempt
221 // to look up the input punycode.
222 {resolve, encode("a\u0323\u0322") + ".com", "xn--a-tdbc.com", "V1"},
223 {display, encode("a\u0323\u0322") + ".com", "a\u0323\u0322.com", "V1"},
226 for _, tc := range testCases {
227 doTest(t, tc.f, tc.name, tc.input, tc.want, tc.wantErr)
231 func TestConformance(t *testing.T) {
232 testtext.SkipIfNotLong(t)
234 r := gen.OpenUnicodeFile("idna", "", "IdnaTest.txt")
239 p := ucd.New(r, ucd.CommentHandler(func(s string) {
241 section = strings.ToLower(strings.Split(s, " ")[0])
244 transitional := New(Transitional(true), VerifyDNSLength(true), BidiRule(), MapForLookup())
245 nonTransitional := New(VerifyDNSLength(true), BidiRule(), MapForLookup())
250 profiles := []*Profile{}
253 profiles = append(profiles, transitional)
255 profiles = append(profiles, nonTransitional)
257 profiles = append(profiles, transitional)
258 profiles = append(profiles, nonTransitional)
261 src := unescape(p.String(1))
263 wantToUnicode := unescape(p.String(2))
264 if wantToUnicode == "" {
267 wantToASCII := unescape(p.String(3))
268 if wantToASCII == "" {
269 wantToASCII = wantToUnicode
271 wantErrToUnicode := ""
272 if strings.HasPrefix(wantToUnicode, "[") {
273 wantErrToUnicode = wantToUnicode
277 if strings.HasPrefix(wantToASCII, "[") {
278 wantErrToASCII = wantToASCII
282 // TODO: also do IDNA tests.
283 // invalidInIDNA2008 := p.String(4) == "NV8"
285 for _, p := range profiles {
286 name := fmt.Sprintf("%s:%s", section, p)
287 doTest(t, p.ToUnicode, name+":ToUnicode", src, wantToUnicode, wantErrToUnicode)
288 doTest(t, p.ToASCII, name+":ToASCII", src, wantToASCII, wantErrToASCII)
293 func unescape(s string) string {
294 s, err := strconv.Unquote(`"` + s + `"`)