vendor/golang.org/x/text/unicode/norm/ucd_test.go

   1 // Copyright 2011 The Go Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style
   3 // license that can be found in the LICENSE file.
   4
   5 package norm
   6
   7 import (
   8         "bufio"
   9         "bytes"
  10         "fmt"
  11         "regexp"
  12         "runtime"
  13         "strconv"
  14         "strings"
  15         "sync"
  16         "testing"
  17         "time"
  18         "unicode/utf8"
  19
  20         "golang.org/x/text/internal/gen"
  21         "golang.org/x/text/internal/testtext"
  22 )
  23
  24 var once sync.Once
  25
  26 func skipShort(t *testing.T) {
  27         testtext.SkipIfNotLong(t)
  28
  29         once.Do(func() { loadTestData(t) })
  30 }
  31
  32 // This regression test runs the test set in NormalizationTest.txt
  33 // (taken from http://www.unicode.org/Public/<unicode.Version>/ucd/).
  34 //
  35 // NormalizationTest.txt has form:
  36 // @Part0 # Specific cases
  37 // #
  38 // 1E0A;1E0A;0044 0307;1E0A;0044 0307; # (Ḋ; Ḋ; D◌̇; Ḋ; D◌̇; ) LATIN CAPITAL LETTER D WITH DOT ABOVE
  39 // 1E0C;1E0C;0044 0323;1E0C;0044 0323; # (Ḍ; Ḍ; D◌̣; Ḍ; D◌̣; ) LATIN CAPITAL LETTER D WITH DOT BELOW
  40 //
  41 // Each test has 5 columns (c1, c2, c3, c4, c5), where
  42 // (c1, c2, c3, c4, c5) == (c1, NFC(c1), NFD(c1), NFKC(c1), NFKD(c1))
  43 //
  44 // CONFORMANCE:
  45 // 1. The following invariants must be true for all conformant implementations
  46 //
  47 //    NFC
  48 //      c2 ==  NFC(c1) ==  NFC(c2) ==  NFC(c3)
  49 //      c4 ==  NFC(c4) ==  NFC(c5)
  50 //
  51 //    NFD
  52 //      c3 ==  NFD(c1) ==  NFD(c2) ==  NFD(c3)
  53 //      c5 ==  NFD(c4) ==  NFD(c5)
  54 //
  55 //    NFKC
  56 //      c4 == NFKC(c1) == NFKC(c2) == NFKC(c3) == NFKC(c4) == NFKC(c5)
  57 //
  58 //    NFKD
  59 //      c5 == NFKD(c1) == NFKD(c2) == NFKD(c3) == NFKD(c4) == NFKD(c5)
  60 //
  61 // 2. For every code point X assigned in this version of Unicode that is not
  62 //    specifically listed in Part 1, the following invariants must be true
  63 //    for all conformant implementations:
  64 //
  65 //      X == NFC(X) == NFD(X) == NFKC(X) == NFKD(X)
  66 //
  67
  68 // Column types.
  69 const (
  70         cRaw = iota
  71         cNFC
  72         cNFD
  73         cNFKC
  74         cNFKD
  75         cMaxColumns
  76 )
  77
  78 // Holds data from NormalizationTest.txt
  79 var part []Part
  80
  81 type Part struct {
  82         name   string
  83         number int
  84         tests  []Test
  85 }
  86
  87 type Test struct {
  88         name   string
  89         partnr int
  90         number int
  91         r      rune                // used for character by character test
  92         cols   [cMaxColumns]string // Each has 5 entries, see below.
  93 }
  94
  95 func (t Test) Name() string {
  96         if t.number < 0 {
  97                 return part[t.partnr].name
  98         }
  99         return fmt.Sprintf("%s:%d", part[t.partnr].name, t.number)
 100 }
 101
 102 var partRe = regexp.MustCompile(`@Part(\d) # (.*)$`)
 103 var testRe = regexp.MustCompile(`^` + strings.Repeat(`([\dA-F ]+);`, 5) + ` # (.*)$`)
 104
 105 var counter int
 106
 107 // Load the data form NormalizationTest.txt
 108 func loadTestData(t *testing.T) {
 109         f := gen.OpenUCDFile("NormalizationTest.txt")
 110         defer f.Close()
 111         scanner := bufio.NewScanner(f)
 112         for scanner.Scan() {
 113                 line := scanner.Text()
 114                 if len(line) == 0 || line[0] == '#' {
 115                         continue
 116                 }
 117                 m := partRe.FindStringSubmatch(line)
 118                 if m != nil {
 119                         if len(m) < 3 {
 120                                 t.Fatal("Failed to parse Part: ", line)
 121                         }
 122                         i, err := strconv.Atoi(m[1])
 123                         if err != nil {
 124                                 t.Fatal(err)
 125                         }
 126                         name := m[2]
 127                         part = append(part, Part{name: name[:len(name)-1], number: i})
 128                         continue
 129                 }
 130                 m = testRe.FindStringSubmatch(line)
 131                 if m == nil || len(m) < 7 {
 132                         t.Fatalf(`Failed to parse: "%s" result: %#v`, line, m)
 133                 }
 134                 test := Test{name: m[6], partnr: len(part) - 1, number: counter}
 135                 counter++
 136                 for j := 1; j < len(m)-1; j++ {
 137                         for _, split := range strings.Split(m[j], " ") {
 138                                 r, err := strconv.ParseUint(split, 16, 64)
 139                                 if err != nil {
 140                                         t.Fatal(err)
 141                                 }
 142                                 if test.r == 0 {
 143                                         // save for CharacterByCharacterTests
 144                                         test.r = rune(r)
 145                                 }
 146                                 var buf [utf8.UTFMax]byte
 147                                 sz := utf8.EncodeRune(buf[:], rune(r))
 148                                 test.cols[j-1] += string(buf[:sz])
 149                         }
 150                 }
 151                 part := &part[len(part)-1]
 152                 part.tests = append(part.tests, test)
 153         }
 154         if scanner.Err() != nil {
 155                 t.Fatal(scanner.Err())
 156         }
 157 }
 158
 159 func cmpResult(t *testing.T, tc *Test, name string, f Form, gold, test, result string) {
 160         if gold != result {
 161                 t.Errorf("%s:%s: %s(%+q)=%+q; want %+q: %s",
 162                         tc.Name(), name, fstr[f], test, result, gold, tc.name)
 163         }
 164 }
 165
 166 func cmpIsNormal(t *testing.T, tc *Test, name string, f Form, test string, result, want bool) {
 167         if result != want {
 168                 t.Errorf("%s:%s: %s(%+q)=%v; want %v", tc.Name(), name, fstr[f], test, result, want)
 169         }
 170 }
 171
 172 func doTest(t *testing.T, tc *Test, f Form, gold, test string) {
 173         testb := []byte(test)
 174         result := f.Bytes(testb)
 175         cmpResult(t, tc, "Bytes", f, gold, test, string(result))
 176
 177         sresult := f.String(test)
 178         cmpResult(t, tc, "String", f, gold, test, sresult)
 179
 180         acc := []byte{}
 181         i := Iter{}
 182         i.InitString(f, test)
 183         for !i.Done() {
 184                 acc = append(acc, i.Next()...)
 185         }
 186         cmpResult(t, tc, "Iter.Next", f, gold, test, string(acc))
 187
 188         buf := make([]byte, 128)
 189         acc = nil
 190         for p := 0; p < len(testb); {
 191                 nDst, nSrc, _ := f.Transform(buf, testb[p:], true)
 192                 acc = append(acc, buf[:nDst]...)
 193                 p += nSrc
 194         }
 195         cmpResult(t, tc, "Transform", f, gold, test, string(acc))
 196
 197         for i := range test {
 198                 out := f.Append(f.Bytes([]byte(test[:i])), []byte(test[i:])...)
 199                 cmpResult(t, tc, fmt.Sprintf(":Append:%d", i), f, gold, test, string(out))
 200         }
 201         cmpIsNormal(t, tc, "IsNormal", f, test, f.IsNormal([]byte(test)), test == gold)
 202         cmpIsNormal(t, tc, "IsNormalString", f, test, f.IsNormalString(test), test == gold)
 203 }
 204
 205 func doConformanceTests(t *testing.T, tc *Test, partn int) {
 206         for i := 0; i <= 2; i++ {
 207                 doTest(t, tc, NFC, tc.cols[1], tc.cols[i])
 208                 doTest(t, tc, NFD, tc.cols[2], tc.cols[i])
 209                 doTest(t, tc, NFKC, tc.cols[3], tc.cols[i])
 210                 doTest(t, tc, NFKD, tc.cols[4], tc.cols[i])
 211         }
 212         for i := 3; i <= 4; i++ {
 213                 doTest(t, tc, NFC, tc.cols[3], tc.cols[i])
 214                 doTest(t, tc, NFD, tc.cols[4], tc.cols[i])
 215                 doTest(t, tc, NFKC, tc.cols[3], tc.cols[i])
 216                 doTest(t, tc, NFKD, tc.cols[4], tc.cols[i])
 217         }
 218 }
 219
 220 func TestCharacterByCharacter(t *testing.T) {
 221         skipShort(t)
 222         tests := part[1].tests
 223         var last rune = 0
 224         for i := 0; i <= len(tests); i++ { // last one is special case
 225                 var r rune
 226                 if i == len(tests) {
 227                         r = 0x2FA1E // Don't have to go to 0x10FFFF
 228                 } else {
 229                         r = tests[i].r
 230                 }
 231                 for last++; last < r; last++ {
 232                         // Check all characters that were not explicitly listed in the test.
 233                         tc := &Test{partnr: 1, number: -1}
 234                         char := string(last)
 235                         doTest(t, tc, NFC, char, char)
 236                         doTest(t, tc, NFD, char, char)
 237                         doTest(t, tc, NFKC, char, char)
 238                         doTest(t, tc, NFKD, char, char)
 239                 }
 240                 if i < len(tests) {
 241                         doConformanceTests(t, &tests[i], 1)
 242                 }
 243         }
 244 }
 245
 246 func TestStandardTests(t *testing.T) {
 247         skipShort(t)
 248         for _, j := range []int{0, 2, 3} {
 249                 for _, test := range part[j].tests {
 250                         doConformanceTests(t, &test, j)
 251                 }
 252         }
 253 }
 254
 255 // TestPerformance verifies that normalization is O(n). If any of the
 256 // code does not properly check for maxCombiningChars, normalization
 257 // may exhibit O(n**2) behavior.
 258 func TestPerformance(t *testing.T) {
 259         skipShort(t)
 260         runtime.GOMAXPROCS(2)
 261         success := make(chan bool, 1)
 262         go func() {
 263                 buf := bytes.Repeat([]byte("\u035D"), 1024*1024)
 264                 buf = append(buf, "\u035B"...)
 265                 NFC.Append(nil, buf...)
 266                 success <- true
 267         }()
 268         timeout := time.After(1 * time.Second)
 269         select {
 270         case <-success:
 271                 // test completed before the timeout
 272         case <-timeout:
 273                 t.Errorf(`unexpectedly long time to complete PerformanceTest`)
 274         }
 275 }