vendor/golang.org/x/text/internal/export/idna/idna_test.go

   1 // Copyright 2016 The Go Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style
   3 // license that can be found in the LICENSE file.
   4
   5 package idna
   6
   7 import (
   8         "fmt"
   9         "strconv"
  10         "strings"
  11         "testing"
  12
  13         "golang.org/x/text/internal/gen"
  14         "golang.org/x/text/internal/testtext"
  15         "golang.org/x/text/internal/ucd"
  16 )
  17
  18 func TestAllocToUnicode(t *testing.T) {
  19         avg := testtext.AllocsPerRun(1000, func() {
  20                 ToUnicode("www.golang.org")
  21         })
  22         if avg > 0 {
  23                 t.Errorf("got %f; want 0", avg)
  24         }
  25 }
  26
  27 func TestAllocToASCII(t *testing.T) {
  28         avg := testtext.AllocsPerRun(1000, func() {
  29                 ToASCII("www.golang.org")
  30         })
  31         if avg > 0 {
  32                 t.Errorf("got %f; want 0", avg)
  33         }
  34 }
  35
  36 func TestProfiles(t *testing.T) {
  37         testCases := []struct {
  38                 name      string
  39                 want, got *Profile
  40         }{
  41                 {"Punycode", punycode, New()},
  42                 {"Registration", registration, New(ValidateForRegistration())},
  43                 {"Registration", registration, New(
  44                         ValidateForRegistration(),
  45                         VerifyDNSLength(true),
  46                         BidiRule(),
  47                 )},
  48                 {"Lookup", lookup, New(MapForLookup(), BidiRule(), Transitional(true))},
  49                 {"Display", display, New(MapForLookup(), BidiRule())},
  50         }
  51         for _, tc := range testCases {
  52                 // Functions are not comparable, but the printed version will include
  53                 // their pointers.
  54                 got := fmt.Sprintf("%#v", tc.got)
  55                 want := fmt.Sprintf("%#v", tc.want)
  56                 if got != want {
  57                         t.Errorf("%s: \ngot  %#v,\nwant %#v", tc.name, got, want)
  58                 }
  59         }
  60 }
  61
  62 // doTest performs a single test f(input) and verifies that the output matches
  63 // out and that the returned error is expected. The errors string contains
  64 // all allowed error codes as categorized in
  65 // http://www.unicode.org/Public/idna/9.0.0/IdnaTest.txt:
  66 // P: Processing
  67 // V: Validity
  68 // A: to ASCII
  69 // B: Bidi
  70 // C: Context J
  71 func doTest(t *testing.T, f func(string) (string, error), name, input, want, errors string) {
  72         errors = strings.Trim(errors, "[]")
  73         test := "ok"
  74         if errors != "" {
  75                 test = "err:" + errors
  76         }
  77         // Replace some of the escape sequences to make it easier to single out
  78         // tests on the command name.
  79         in := strings.Trim(strconv.QuoteToASCII(input), `"`)
  80         in = strings.Replace(in, `\u`, "#", -1)
  81         in = strings.Replace(in, `\U`, "#", -1)
  82         name = fmt.Sprintf("%s/%s/%s", name, in, test)
  83
  84         testtext.Run(t, name, func(t *testing.T) {
  85                 got, err := f(input)
  86
  87                 if err != nil {
  88                         code := err.(interface {
  89                                 code() string
  90                         }).code()
  91                         if strings.Index(errors, code) == -1 {
  92                                 t.Errorf("error %q not in set of expected errors {%v}", code, errors)
  93                         }
  94                 } else if errors != "" {
  95                         t.Errorf("no errors; want error in {%v}", errors)
  96                 }
  97
  98                 if want != "" && got != want {
  99                         t.Errorf(`string: got %+q; want %+q`, got, want)
 100                 }
 101         })
 102 }
 103
 104 // TestLabelErrors tests strings returned in case of error. All results should
 105 // be identical to the reference implementation and can be verified at
 106 // http://unicode.org/cldr/utility/idna.jsp. The reference implementation,
 107 // however, seems to not display Bidi and ContextJ errors.
 108 //
 109 // In some cases the behavior of browsers is added as a comment. In all cases,
 110 // whenever a resolve search returns an error here, Chrome will treat the input
 111 // string as a search string (including those for Bidi and Context J errors),
 112 // unless noted otherwise.
 113 func TestLabelErrors(t *testing.T) {
 114         encode := func(s string) string { s, _ = encode(acePrefix, s); return s }
 115         type kind struct {
 116                 name string
 117                 f    func(string) (string, error)
 118         }
 119         punyA := kind{"PunycodeA", punycode.ToASCII}
 120         resolve := kind{"ResolveA", Lookup.ToASCII}
 121         display := kind{"ToUnicode", Display.ToUnicode}
 122         p := New(VerifyDNSLength(true), MapForLookup(), BidiRule())
 123         lengthU := kind{"CheckLengthU", p.ToUnicode}
 124         lengthA := kind{"CheckLengthA", p.ToASCII}
 125         p = New(MapForLookup(), StrictDomainName(false))
 126         std3 := kind{"STD3", p.ToASCII}
 127
 128         testCases := []struct {
 129                 kind
 130                 input   string
 131                 want    string
 132                 wantErr string
 133         }{
 134                 {lengthU, "", "", "A4"}, // From UTS 46 conformance test.
 135                 {lengthA, "", "", "A4"},
 136
 137                 {lengthU, "xn--", "", "A4"},
 138                 {lengthU, "foo.xn--", "foo.", "A4"}, // TODO: is dropping xn-- correct?
 139                 {lengthU, "xn--.foo", ".foo", "A4"},
 140                 {lengthU, "foo.xn--.bar", "foo..bar", "A4"},
 141
 142                 {display, "xn--", "", ""},
 143                 {display, "foo.xn--", "foo.", ""}, // TODO: is dropping xn-- correct?
 144                 {display, "xn--.foo", ".foo", ""},
 145                 {display, "foo.xn--.bar", "foo..bar", ""},
 146
 147                 {lengthA, "a..b", "a..b", "A4"},
 148                 {punyA, ".b", ".b", ""},
 149                 // For backwards compatibility, the Punycode profile does not map runes.
 150                 {punyA, "\u3002b", "xn--b-83t", ""},
 151                 {punyA, "..b", "..b", ""},
 152                 // Only strip leading empty labels for certain profiles. Stripping
 153                 // leading empty labels here but not for "empty" punycode above seems
 154                 // inconsistent, but seems to be applied by both the conformance test
 155                 // and Chrome. So we turn it off by default, support it as an option,
 156                 // and enable it in profiles where it seems commonplace.
 157                 {lengthA, ".b", "b", ""},
 158                 {lengthA, "\u3002b", "b", ""},
 159                 {lengthA, "..b", "b", ""},
 160                 {lengthA, "b..", "b..", ""},
 161
 162                 {resolve, "a..b", "a..b", ""},
 163                 {resolve, ".b", "b", ""},
 164                 {resolve, "\u3002b", "b", ""},
 165                 {resolve, "..b", "b", ""},
 166                 {resolve, "b..", "b..", ""},
 167
 168                 // Raw punycode
 169                 {punyA, "", "", ""},
 170                 {punyA, "*.foo.com", "*.foo.com", ""},
 171                 {punyA, "Foo.com", "Foo.com", ""},
 172
 173                 // STD3 rules
 174                 {display, "*.foo.com", "*.foo.com", "P1"},
 175                 {std3, "*.foo.com", "*.foo.com", ""},
 176
 177                 // Don't map U+2490 (DIGIT NINE FULL STOP). This is the behavior of
 178                 // Chrome, Safari, and IE. Firefox will first map ⒐ to 9. and return
 179                 // lab9.be.
 180                 {resolve, "lab⒐be", "xn--labbe-zh9b", "P1"}, // encode("lab⒐be")
 181                 {display, "lab⒐be", "lab⒐be", "P1"},
 182
 183                 {resolve, "plan⒐faß.de", "xn--planfass-c31e.de", "P1"}, // encode("plan⒐fass") + ".de"
 184                 {display, "Plan⒐faß.de", "plan⒐faß.de", "P1"},
 185
 186                 // Chrome 54.0 recognizes the error and treats this input verbatim as a
 187                 // search string.
 188                 // Safari 10.0 (non-conform spec) decomposes "⒈" and computes the
 189                 // punycode on the result using transitional mapping.
 190                 // Firefox 49.0.1 goes haywire on this string and prints a bunch of what
 191                 // seems to be nested punycode encodings.
 192                 {resolve, "日本⒈co.ßßß.de", "xn--co-wuw5954azlb.ssssss.de", "P1"},
 193                 {display, "日本⒈co.ßßß.de", "日本⒈co.ßßß.de", "P1"},
 194
 195                 {resolve, "a\u200Cb", "ab", ""},
 196                 {display, "a\u200Cb", "a\u200Cb", "C"},
 197
 198                 {resolve, encode("a\u200Cb"), encode("a\u200Cb"), "C"},
 199                 {display, "a\u200Cb", "a\u200Cb", "C"},
 200
 201                 {resolve, "grﻋﺮﺑﻲ.de", "xn--gr-gtd9a1b0g.de", "B"},
 202                 {
 203                         // Notice how the string gets transformed, even with an error.
 204                         // Chrome will use the original string if it finds an error, so not
 205                         // the transformed one.
 206                         display,
 207                         "gr\ufecb\ufeae\ufe91\ufef2.de",
 208                         "gr\u0639\u0631\u0628\u064a.de",
 209                         "B",
 210                 },
 211
 212                 {resolve, "\u0671.\u03c3\u07dc", "xn--qib.xn--4xa21s", "B"}, // ٱ.σߜ
 213                 {display, "\u0671.\u03c3\u07dc", "\u0671.\u03c3\u07dc", "B"},
 214
 215                 // normalize input
 216                 {resolve, "a\u0323\u0322", "xn--jta191l", ""}, // ạ̢
 217                 {display, "a\u0323\u0322", "\u1ea1\u0322", ""},
 218
 219                 // Non-normalized strings are not normalized when they originate from
 220                 // punycode. Despite the error, Chrome, Safari and Firefox will attempt
 221                 // to look up the input punycode.
 222                 {resolve, encode("a\u0323\u0322") + ".com", "xn--a-tdbc.com", "V1"},
 223                 {display, encode("a\u0323\u0322") + ".com", "a\u0323\u0322.com", "V1"},
 224         }
 225
 226         for _, tc := range testCases {
 227                 doTest(t, tc.f, tc.name, tc.input, tc.want, tc.wantErr)
 228         }
 229 }
 230
 231 func TestConformance(t *testing.T) {
 232         testtext.SkipIfNotLong(t)
 233
 234         r := gen.OpenUnicodeFile("idna", "", "IdnaTest.txt")
 235         defer r.Close()
 236
 237         section := "main"
 238         started := false
 239         p := ucd.New(r, ucd.CommentHandler(func(s string) {
 240                 if started {
 241                         section = strings.ToLower(strings.Split(s, " ")[0])
 242                 }
 243         }))
 244         transitional := New(Transitional(true), VerifyDNSLength(true), BidiRule(), MapForLookup())
 245         nonTransitional := New(VerifyDNSLength(true), BidiRule(), MapForLookup())
 246         for p.Next() {
 247                 started = true
 248
 249                 // What to test
 250                 profiles := []*Profile{}
 251                 switch p.String(0) {
 252                 case "T":
 253                         profiles = append(profiles, transitional)
 254                 case "N":
 255                         profiles = append(profiles, nonTransitional)
 256                 case "B":
 257                         profiles = append(profiles, transitional)
 258                         profiles = append(profiles, nonTransitional)
 259                 }
 260
 261                 src := unescape(p.String(1))
 262
 263                 wantToUnicode := unescape(p.String(2))
 264                 if wantToUnicode == "" {
 265                         wantToUnicode = src
 266                 }
 267                 wantToASCII := unescape(p.String(3))
 268                 if wantToASCII == "" {
 269                         wantToASCII = wantToUnicode
 270                 }
 271                 wantErrToUnicode := ""
 272                 if strings.HasPrefix(wantToUnicode, "[") {
 273                         wantErrToUnicode = wantToUnicode
 274                         wantToUnicode = ""
 275                 }
 276                 wantErrToASCII := ""
 277                 if strings.HasPrefix(wantToASCII, "[") {
 278                         wantErrToASCII = wantToASCII
 279                         wantToASCII = ""
 280                 }
 281
 282                 // TODO: also do IDNA tests.
 283                 // invalidInIDNA2008 := p.String(4) == "NV8"
 284
 285                 for _, p := range profiles {
 286                         name := fmt.Sprintf("%s:%s", section, p)
 287                         doTest(t, p.ToUnicode, name+":ToUnicode", src, wantToUnicode, wantErrToUnicode)
 288                         doTest(t, p.ToASCII, name+":ToASCII", src, wantToASCII, wantErrToASCII)
 289                 }
 290         }
 291 }
 292
 293 func unescape(s string) string {
 294         s, err := strconv.Unquote(`"` + s + `"`)
 295         if err != nil {
 296                 panic(err)
 297         }
 298         return s
 299 }