vendor/golang.org/x/text/cases/map_test.go

   1 // Copyright 2014 The Go Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style
   3 // license that can be found in the LICENSE file.
   4
   5 package cases
   6
   7 import (
   8         "bytes"
   9         "fmt"
  10         "path"
  11         "strings"
  12         "testing"
  13         "unicode/utf8"
  14
  15         "golang.org/x/text/internal/testtext"
  16         "golang.org/x/text/language"
  17         "golang.org/x/text/transform"
  18         "golang.org/x/text/unicode/norm"
  19 )
  20
  21 type testCase struct {
  22         lang  string
  23         src   interface{} // string, []string, or nil to skip test
  24         title interface{} // string, []string, or nil to skip test
  25         lower interface{} // string, []string, or nil to skip test
  26         upper interface{} // string, []string, or nil to skip test
  27         opts  options
  28 }
  29
  30 var testCases = []testCase{
  31         0: {
  32                 lang:  "und",
  33                 src:   "abc aBc ABC abC İsıI ΕΣΆΣ",
  34                 title: "Abc Abc Abc Abc İsıi Εσάσ",
  35                 lower: "abc abc abc abc i\u0307sıi εσάσ",
  36                 upper: "ABC ABC ABC ABC İSII ΕΣΆΣ",
  37                 opts:  getOpts(HandleFinalSigma(false)),
  38         },
  39
  40         1: {
  41                 lang:  "und",
  42                 src:   "abc aBc ABC abC İsıI ΕΣΆΣ Σ _Σ -Σ",
  43                 title: "Abc Abc Abc Abc İsıi Εσάς Σ _Σ -Σ",
  44                 lower: "abc abc abc abc i\u0307sıi εσάς σ _σ -σ",
  45                 upper: "ABC ABC ABC ABC İSII ΕΣΆΣ Σ _Σ -Σ",
  46                 opts:  getOpts(HandleFinalSigma(true)),
  47         },
  48
  49         2: { // Title cased runes.
  50                 lang:  supported,
  51                 src:   "ǅA",
  52                 title: "ǅa",
  53                 lower: "ǆa",
  54                 upper: "ǄA",
  55         },
  56
  57         3: {
  58                 // Title breaking.
  59                 lang: supported,
  60                 src: []string{
  61                         "FOO CASE TEST",
  62                         "DON'T DO THiS",
  63                         "χωΡΊΣ χωΡΊΣ^a χωΡΊΣ:a χωΡΊΣ:^a χωΡΊΣ^ όμΩΣ Σ",
  64                         "with-hyphens",
  65                         "49ers 49ers",
  66                         `"capitalize a^a -hyphen 0X _u a_u:a`,
  67                         "MidNumLet a.b\u2018c\u2019d\u2024e\ufe52f\uff07f\uff0eg",
  68                         "MidNum a,b;c\u037ed\u0589e\u060cf\u2044g\ufe50h",
  69                         "\u0345 x\u3031x x\u05d0x \u05d0x a'.a a.a a4,a",
  70                 },
  71                 title: []string{
  72                         "Foo Case Test",
  73                         "Don't Do This",
  74                         "Χωρίς Χωρίσ^A Χωρίσ:a Χωρίσ:^A Χωρίς^ Όμως Σ",
  75                         "With-Hyphens",
  76                         // Note that 49Ers is correct according to the spec.
  77                         // TODO: provide some option to the user to treat different
  78                         // characters as cased.
  79                         "49Ers 49Ers",
  80                         `"Capitalize A^A -Hyphen 0X _U A_u:a`,
  81                         "Midnumlet A.b\u2018c\u2019d\u2024e\ufe52f\uff07f\uff0eg",
  82                         "Midnum A,B;C\u037eD\u0589E\u060cF\u2044G\ufe50H",
  83                         "\u0399 X\u3031X X\u05d0x \u05d0X A'.A A.a A4,A",
  84                 },
  85         },
  86
  87         // TODO: These are known deviations from the options{} Unicode Word Breaking
  88         // Algorithm.
  89         // {
  90         //      "und",
  91         //      "x_\u3031_x a4,4a",
  92         //      "X_\u3031_x A4,4a", // Currently is "X_\U3031_X A4,4A".
  93         //      "x_\u3031_x a4,4a",
  94         //      "X_\u3031_X A4,4A",
  95         //      options{},
  96         // },
  97
  98         4: {
  99                 // Tests title options
 100                 lang:  "und",
 101                 src:   "abc aBc ABC abC İsıI o'Brien",
 102                 title: "Abc ABc ABC AbC İsıI O'Brien",
 103                 opts:  getOpts(NoLower),
 104         },
 105
 106         5: {
 107                 lang:  "el",
 108                 src:   "aBc ΟΔΌΣ Οδός Σο ΣΟ Σ oΣ ΟΣ σ ἕξ \u03ac",
 109                 title: "Abc Οδός Οδός Σο Σο Σ Oς Ος Σ Ἕξ \u0386",
 110                 lower: "abc οδός οδός σο σο σ oς ος σ ἕξ \u03ac",
 111                 upper: "ABC ΟΔΟΣ ΟΔΟΣ ΣΟ ΣΟ Σ OΣ ΟΣ Σ ΕΞ \u0391", // Uppercase removes accents
 112         },
 113
 114         6: {
 115                 lang:  "tr az",
 116                 src:   "Isiİ İsıI I\u0307sIiİ İsıI\u0307 I\u0300\u0307",
 117                 title: "Isii İsıı I\u0307sıii İsıi I\u0300\u0307",
 118                 lower: "ısii isıı isıii isıi \u0131\u0300\u0307",
 119                 upper: "ISİİ İSII I\u0307SIİİ İSII\u0307 I\u0300\u0307",
 120         },
 121
 122         7: {
 123                 lang:  "lt",
 124                 src:   "I Ï J J̈ Į Į̈ Ì Í Ĩ xi̇̈ xj̇̈ xį̇̈ xi̇̀ xi̇́ xi̇̃ XI XÏ XJ XJ̈ XĮ XĮ̈ XI̟̤",
 125                 title: "I Ï J J̈ Į Į̈ Ì Í Ĩ Xi̇̈ Xj̇̈ Xį̇̈ Xi̇̀ Xi̇́ Xi̇̃ Xi Xi̇̈ Xj Xj̇̈ Xį Xį̇̈ Xi̟̤",
 126                 lower: "i i̇̈ j j̇̈ į į̇̈ i̇̀ i̇́ i̇̃ xi̇̈ xj̇̈ xį̇̈ xi̇̀ xi̇́ xi̇̃ xi xi̇̈ xj xj̇̈ xį xį̇̈ xi̟̤",
 127                 upper: "I Ï J J̈ Į Į̈ Ì Í Ĩ XÏ XJ̈ XĮ̈ XÌ XÍ XĨ XI XÏ XJ XJ̈ XĮ XĮ̈ XI̟̤",
 128         },
 129
 130         8: {
 131                 lang:  "lt",
 132                 src:   "\u012e\u0300 \u00cc i\u0307\u0300 i\u0307\u0301 i\u0307\u0303 i\u0307\u0308 i\u0300\u0307",
 133                 title: "\u012e\u0300 \u00cc \u00cc \u00cd \u0128 \u00cf I\u0300\u0307",
 134                 lower: "\u012f\u0307\u0300 i\u0307\u0300 i\u0307\u0300 i\u0307\u0301 i\u0307\u0303 i\u0307\u0308 i\u0300\u0307",
 135                 upper: "\u012e\u0300 \u00cc \u00cc \u00cd \u0128 \u00cf I\u0300\u0307",
 136         },
 137
 138         9: {
 139                 lang:  "nl",
 140                 src:   "ijs IJs Ij Ijs İJ İJs aa aA 'ns 'S",
 141                 title: "IJs IJs IJ IJs İj İjs Aa Aa 'ns 's",
 142         },
 143
 144         // Note: this specification is not currently part of CLDR. The same holds
 145         // for the leading apostrophe handling for Dutch.
 146         // See http://unicode.org/cldr/trac/ticket/7078.
 147         10: {
 148                 lang:  "af",
 149                 src:   "wag 'n bietjie",
 150                 title: "Wag 'n Bietjie",
 151                 lower: "wag 'n bietjie",
 152                 upper: "WAG 'N BIETJIE",
 153         },
 154 }
 155
 156 func TestCaseMappings(t *testing.T) {
 157         for i, tt := range testCases {
 158                 src, ok := tt.src.([]string)
 159                 if !ok {
 160                         src = strings.Split(tt.src.(string), " ")
 161                 }
 162
 163                 for _, lang := range strings.Split(tt.lang, " ") {
 164                         tag := language.MustParse(lang)
 165                         testEntry := func(name string, mk func(language.Tag, options) transform.SpanningTransformer, gold interface{}) {
 166                                 c := Caser{mk(tag, tt.opts)}
 167                                 if gold != nil {
 168                                         wants, ok := gold.([]string)
 169                                         if !ok {
 170                                                 wants = strings.Split(gold.(string), " ")
 171                                         }
 172                                         for j, want := range wants {
 173                                                 if got := c.String(src[j]); got != want {
 174                                                         t.Errorf("%d:%s:\n%s.String(%+q):\ngot  %+q;\nwant %+q", i, lang, name, src[j], got, want)
 175                                                 }
 176                                         }
 177                                 }
 178                                 dst := make([]byte, 256) // big enough to hold any result
 179                                 src := []byte(strings.Join(src, " "))
 180                                 v := testtext.AllocsPerRun(20, func() {
 181                                         c.Transform(dst, src, true)
 182                                 })
 183                                 if v > 1.1 {
 184                                         t.Errorf("%d:%s:\n%s: number of allocs was %f; want 0", i, lang, name, v)
 185                                 }
 186                         }
 187                         testEntry("Upper", makeUpper, tt.upper)
 188                         testEntry("Lower", makeLower, tt.lower)
 189                         testEntry("Title", makeTitle, tt.title)
 190                 }
 191         }
 192 }
 193
 194 // TestAlloc tests that some mapping methods should not cause any allocation.
 195 func TestAlloc(t *testing.T) {
 196         dst := make([]byte, 256) // big enough to hold any result
 197         src := []byte(txtNonASCII)
 198
 199         for i, f := range []func() Caser{
 200                 func() Caser { return Upper(language.Und) },
 201                 func() Caser { return Lower(language.Und) },
 202                 func() Caser { return Lower(language.Und, HandleFinalSigma(false)) },
 203                 // TODO: use a shared copy for these casers as well, in order of
 204                 // importance, starting with the most important:
 205                 // func() Caser { return Title(language.Und) },
 206                 // func() Caser { return Title(language.Und, HandleFinalSigma(false)) },
 207         } {
 208                 testtext.Run(t, "", func(t *testing.T) {
 209                         var c Caser
 210                         v := testtext.AllocsPerRun(10, func() {
 211                                 c = f()
 212                         })
 213                         if v > 0 {
 214                                 // TODO: Right now only Upper has 1 allocation. Special-case Lower
 215                                 // and Title as well to have less allocations for the root locale.
 216                                 t.Errorf("%d:init: number of allocs was %f; want 0", i, v)
 217                         }
 218                         v = testtext.AllocsPerRun(2, func() {
 219                                 c.Transform(dst, src, true)
 220                         })
 221                         if v > 0 {
 222                                 t.Errorf("%d:transform: number of allocs was %f; want 0", i, v)
 223                         }
 224                 })
 225         }
 226 }
 227
 228 func testHandover(t *testing.T, c Caser, src string) {
 229         want := c.String(src)
 230         // Find the common prefix.
 231         pSrc := 0
 232         for ; pSrc < len(src) && pSrc < len(want) && want[pSrc] == src[pSrc]; pSrc++ {
 233         }
 234
 235         // Test handover for each substring of the prefix.
 236         for i := 0; i < pSrc; i++ {
 237                 testtext.Run(t, fmt.Sprint("interleave/", i), func(t *testing.T) {
 238                         dst := make([]byte, 4*len(src))
 239                         c.Reset()
 240                         nSpan, _ := c.Span([]byte(src[:i]), false)
 241                         copy(dst, src[:nSpan])
 242                         nTransform, _, _ := c.Transform(dst[nSpan:], []byte(src[nSpan:]), true)
 243                         got := string(dst[:nSpan+nTransform])
 244                         if got != want {
 245                                 t.Errorf("full string: got %q; want %q", got, want)
 246                         }
 247                 })
 248         }
 249 }
 250
 251 func TestHandover(t *testing.T) {
 252         testCases := []struct {
 253                 desc          string
 254                 t             Caser
 255                 first, second string
 256         }{{
 257                 "title/nosigma/single midword",
 258                 Title(language.Und, HandleFinalSigma(false)),
 259                 "A.", "a",
 260         }, {
 261                 "title/nosigma/single midword",
 262                 Title(language.Und, HandleFinalSigma(false)),
 263                 "A", ".a",
 264         }, {
 265                 "title/nosigma/double midword",
 266                 Title(language.Und, HandleFinalSigma(false)),
 267                 "A..", "a",
 268         }, {
 269                 "title/nosigma/double midword",
 270                 Title(language.Und, HandleFinalSigma(false)),
 271                 "A.", ".a",
 272         }, {
 273                 "title/nosigma/double midword",
 274                 Title(language.Und, HandleFinalSigma(false)),
 275                 "A", "..a",
 276         }, {
 277                 "title/sigma/single midword",
 278                 Title(language.Und),
 279                 "ΟΣ.", "a",
 280         }, {
 281                 "title/sigma/single midword",
 282                 Title(language.Und),
 283                 "ΟΣ", ".a",
 284         }, {
 285                 "title/sigma/double midword",
 286                 Title(language.Und),
 287                 "ΟΣ..", "a",
 288         }, {
 289                 "title/sigma/double midword",
 290                 Title(language.Und),
 291                 "ΟΣ.", ".a",
 292         }, {
 293                 "title/sigma/double midword",
 294                 Title(language.Und),
 295                 "ΟΣ", "..a",
 296         }, {
 297                 "title/af/leading apostrophe",
 298                 Title(language.Afrikaans),
 299                 "'", "n bietje",
 300         }}
 301         for _, tc := range testCases {
 302                 testtext.Run(t, tc.desc, func(t *testing.T) {
 303                         src := tc.first + tc.second
 304                         want := tc.t.String(src)
 305                         tc.t.Reset()
 306                         n, _ := tc.t.Span([]byte(tc.first), false)
 307
 308                         dst := make([]byte, len(want))
 309                         copy(dst, tc.first[:n])
 310
 311                         nDst, _, _ := tc.t.Transform(dst[n:], []byte(src[n:]), true)
 312                         got := string(dst[:n+nDst])
 313                         if got != want {
 314                                 t.Errorf("got %q; want %q", got, want)
 315                         }
 316                 })
 317         }
 318 }
 319
 320 // minBufSize is the size of the buffer by which the casing operation in
 321 // this package are guaranteed to make progress.
 322 const minBufSize = norm.MaxSegmentSize
 323
 324 type bufferTest struct {
 325         desc, src, want  string
 326         firstErr         error
 327         dstSize, srcSize int
 328         t                transform.SpanningTransformer
 329 }
 330
 331 var bufferTests []bufferTest
 332
 333 func init() {
 334         bufferTests = []bufferTest{{
 335                 desc:     "und/upper/short dst",
 336                 src:      "abcdefg",
 337                 want:     "ABCDEFG",
 338                 firstErr: transform.ErrShortDst,
 339                 dstSize:  3,
 340                 srcSize:  minBufSize,
 341                 t:        Upper(language.Und),
 342         }, {
 343                 desc:     "und/upper/short src",
 344                 src:      "123é56",
 345                 want:     "123É56",
 346                 firstErr: transform.ErrShortSrc,
 347                 dstSize:  4,
 348                 srcSize:  4,
 349                 t:        Upper(language.Und),
 350         }, {
 351                 desc:     "und/upper/no error on short",
 352                 src:      "12",
 353                 want:     "12",
 354                 firstErr: nil,
 355                 dstSize:  1,
 356                 srcSize:  1,
 357                 t:        Upper(language.Und),
 358         }, {
 359                 desc:     "und/lower/short dst",
 360                 src:      "ABCDEFG",
 361                 want:     "abcdefg",
 362                 firstErr: transform.ErrShortDst,
 363                 dstSize:  3,
 364                 srcSize:  minBufSize,
 365                 t:        Lower(language.Und),
 366         }, {
 367                 desc:     "und/lower/short src",
 368                 src:      "123É56",
 369                 want:     "123é56",
 370                 firstErr: transform.ErrShortSrc,
 371                 dstSize:  4,
 372                 srcSize:  4,
 373                 t:        Lower(language.Und),
 374         }, {
 375                 desc:     "und/lower/no error on short",
 376                 src:      "12",
 377                 want:     "12",
 378                 firstErr: nil,
 379                 dstSize:  1,
 380                 srcSize:  1,
 381                 t:        Lower(language.Und),
 382         }, {
 383                 desc:    "und/lower/simple (no final sigma)",
 384                 src:     "ΟΣ ΟΣΣ",
 385                 want:    "οσ οσσ",
 386                 dstSize: minBufSize,
 387                 srcSize: minBufSize,
 388                 t:       Lower(language.Und, HandleFinalSigma(false)),
 389         }, {
 390                 desc:    "und/title/simple (no final sigma)",
 391                 src:     "ΟΣ ΟΣΣ",
 392                 want:    "Οσ Οσσ",
 393                 dstSize: minBufSize,
 394                 srcSize: minBufSize,
 395                 t:       Title(language.Und, HandleFinalSigma(false)),
 396         }, {
 397                 desc:    "und/title/final sigma: no error",
 398                 src:     "ΟΣ",
 399                 want:    "Ος",
 400                 dstSize: minBufSize,
 401                 srcSize: minBufSize,
 402                 t:       Title(language.Und),
 403         }, {
 404                 desc:     "und/title/final sigma: short source",
 405                 src:      "ΟΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣ",
 406                 want:     "Οσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσς",
 407                 firstErr: transform.ErrShortSrc,
 408                 dstSize:  minBufSize,
 409                 srcSize:  10,
 410                 t:        Title(language.Und),
 411         }, {
 412                 desc:     "und/title/final sigma: short destination 1",
 413                 src:      "ΟΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣ",
 414                 want:     "Οσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσς",
 415                 firstErr: transform.ErrShortDst,
 416                 dstSize:  10,
 417                 srcSize:  minBufSize,
 418                 t:        Title(language.Und),
 419         }, {
 420                 desc:     "und/title/final sigma: short destination 2",
 421                 src:      "ΟΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣ",
 422                 want:     "Οσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσς",
 423                 firstErr: transform.ErrShortDst,
 424                 dstSize:  9,
 425                 srcSize:  minBufSize,
 426                 t:        Title(language.Und),
 427         }, {
 428                 desc:     "und/title/final sigma: short destination 3",
 429                 src:      "ΟΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣ",
 430                 want:     "Οσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσς",
 431                 firstErr: transform.ErrShortDst,
 432                 dstSize:  8,
 433                 srcSize:  minBufSize,
 434                 t:        Title(language.Und),
 435         }, {
 436                 desc:     "und/title/clipped UTF-8 rune",
 437                 src:      "σσσσσσσσσσσ",
 438                 want:     "Σσσσσσσσσσσ",
 439                 firstErr: transform.ErrShortSrc,
 440                 dstSize:  minBufSize,
 441                 srcSize:  5,
 442                 t:        Title(language.Und),
 443         }, {
 444                 desc:    "und/title/clipped UTF-8 rune atEOF",
 445                 src:     "σσσ" + string([]byte{0xCF}),
 446                 want:    "Σσσ" + string([]byte{0xCF}),
 447                 dstSize: minBufSize,
 448                 srcSize: minBufSize,
 449                 t:       Title(language.Und),
 450         }, {
 451                 // Note: the choice to change the final sigma at the end in case of
 452                 // too many case ignorables is arbitrary. The main reason for this
 453                 // choice is that it results in simpler code.
 454                 desc:    "und/title/final sigma: max ignorables",
 455                 src:     "ΟΣ" + strings.Repeat(".", maxIgnorable) + "a",
 456                 want:    "Οσ" + strings.Repeat(".", maxIgnorable) + "A",
 457                 dstSize: minBufSize,
 458                 srcSize: minBufSize,
 459                 t:       Title(language.Und),
 460         }, {
 461                 // Note: the choice to change the final sigma at the end in case of
 462                 // too many case ignorables is arbitrary. The main reason for this
 463                 // choice is that it results in simpler code.
 464                 desc:    "und/title/long string",
 465                 src:     "AA" + strings.Repeat(".", maxIgnorable+1) + "a",
 466                 want:    "Aa" + strings.Repeat(".", maxIgnorable+1) + "A",
 467                 dstSize: minBufSize,
 468                 srcSize: len("AA" + strings.Repeat(".", maxIgnorable+1)),
 469                 t:       Title(language.Und),
 470         }, {
 471                 // Note: the choice to change the final sigma at the end in case of
 472                 // too many case ignorables is arbitrary. The main reason for this
 473                 // choice is that it results in simpler code.
 474                 desc:    "und/title/final sigma: too many ignorables",
 475                 src:     "ΟΣ" + strings.Repeat(".", maxIgnorable+1) + "a",
 476                 want:    "Ος" + strings.Repeat(".", maxIgnorable+1) + "A",
 477                 dstSize: minBufSize,
 478                 srcSize: len("ΟΣ" + strings.Repeat(".", maxIgnorable+1)),
 479                 t:       Title(language.Und),
 480         }, {
 481                 desc:    "und/title/final sigma: apostrophe",
 482                 src:     "ΟΣ''a",
 483                 want:    "Οσ''A",
 484                 dstSize: minBufSize,
 485                 srcSize: minBufSize,
 486                 t:       Title(language.Und),
 487         }, {
 488                 desc:    "el/upper/max ignorables",
 489                 src:     "ο" + strings.Repeat("\u0321", maxIgnorable-1) + "\u0313",
 490                 want:    "Ο" + strings.Repeat("\u0321", maxIgnorable-1),
 491                 dstSize: minBufSize,
 492                 srcSize: minBufSize,
 493                 t:       Upper(language.Greek),
 494         }, {
 495                 desc:    "el/upper/too many ignorables",
 496                 src:     "ο" + strings.Repeat("\u0321", maxIgnorable) + "\u0313",
 497                 want:    "Ο" + strings.Repeat("\u0321", maxIgnorable) + "\u0313",
 498                 dstSize: minBufSize,
 499                 srcSize: len("ο" + strings.Repeat("\u0321", maxIgnorable)),
 500                 t:       Upper(language.Greek),
 501         }, {
 502                 desc:     "el/upper/short dst",
 503                 src:      "123ο",
 504                 want:     "123Ο",
 505                 firstErr: transform.ErrShortDst,
 506                 dstSize:  3,
 507                 srcSize:  minBufSize,
 508                 t:        Upper(language.Greek),
 509         }, {
 510                 desc:    "lt/lower/max ignorables",
 511                 src:     "I" + strings.Repeat("\u0321", maxIgnorable-1) + "\u0300",
 512                 want:    "i" + strings.Repeat("\u0321", maxIgnorable-1) + "\u0307\u0300",
 513                 dstSize: minBufSize,
 514                 srcSize: minBufSize,
 515                 t:       Lower(language.Lithuanian),
 516         }, {
 517                 desc:    "lt/lower/too many ignorables",
 518                 src:     "I" + strings.Repeat("\u0321", maxIgnorable) + "\u0300",
 519                 want:    "i" + strings.Repeat("\u0321", maxIgnorable) + "\u0300",
 520                 dstSize: minBufSize,
 521                 srcSize: len("I" + strings.Repeat("\u0321", maxIgnorable)),
 522                 t:       Lower(language.Lithuanian),
 523         }, {
 524                 desc:     "lt/lower/decomposition with short dst buffer 1",
 525                 src:      "aaaaa\u00cc", // U+00CC LATIN CAPITAL LETTER I GRAVE
 526                 firstErr: transform.ErrShortDst,
 527                 want:     "aaaaai\u0307\u0300",
 528                 dstSize:  5,
 529                 srcSize:  minBufSize,
 530                 t:        Lower(language.Lithuanian),
 531         }, {
 532                 desc:     "lt/lower/decomposition with short dst buffer 2",
 533                 src:      "aaaa\u00cc", // U+00CC LATIN CAPITAL LETTER I GRAVE
 534                 firstErr: transform.ErrShortDst,
 535                 want:     "aaaai\u0307\u0300",
 536                 dstSize:  5,
 537                 srcSize:  minBufSize,
 538                 t:        Lower(language.Lithuanian),
 539         }, {
 540                 desc:    "lt/upper/max ignorables",
 541                 src:     "i" + strings.Repeat("\u0321", maxIgnorable-1) + "\u0307\u0300",
 542                 want:    "I" + strings.Repeat("\u0321", maxIgnorable-1) + "\u0300",
 543                 dstSize: minBufSize,
 544                 srcSize: minBufSize,
 545                 t:       Upper(language.Lithuanian),
 546         }, {
 547                 desc:    "lt/upper/too many ignorables",
 548                 src:     "i" + strings.Repeat("\u0321", maxIgnorable) + "\u0307\u0300",
 549                 want:    "I" + strings.Repeat("\u0321", maxIgnorable) + "\u0307\u0300",
 550                 dstSize: minBufSize,
 551                 srcSize: len("i" + strings.Repeat("\u0321", maxIgnorable)),
 552                 t:       Upper(language.Lithuanian),
 553         }, {
 554                 desc:     "lt/upper/short dst",
 555                 src:      "12i\u0307\u0300",
 556                 want:     "12\u00cc",
 557                 firstErr: transform.ErrShortDst,
 558                 dstSize:  3,
 559                 srcSize:  minBufSize,
 560                 t:        Upper(language.Lithuanian),
 561         }, {
 562                 desc:    "aztr/lower/max ignorables",
 563                 src:     "I" + strings.Repeat("\u0321", maxIgnorable-1) + "\u0307\u0300",
 564                 want:    "i" + strings.Repeat("\u0321", maxIgnorable-1) + "\u0300",
 565                 dstSize: minBufSize,
 566                 srcSize: minBufSize,
 567                 t:       Lower(language.Turkish),
 568         }, {
 569                 desc:    "aztr/lower/too many ignorables",
 570                 src:     "I" + strings.Repeat("\u0321", maxIgnorable) + "\u0307\u0300",
 571                 want:    "\u0131" + strings.Repeat("\u0321", maxIgnorable) + "\u0307\u0300",
 572                 dstSize: minBufSize,
 573                 srcSize: len("I" + strings.Repeat("\u0321", maxIgnorable)),
 574                 t:       Lower(language.Turkish),
 575         }, {
 576                 desc:     "nl/title/pre-IJ cutoff",
 577                 src:      "  ij",
 578                 want:     "  IJ",
 579                 firstErr: transform.ErrShortDst,
 580                 dstSize:  2,
 581                 srcSize:  minBufSize,
 582                 t:        Title(language.Dutch),
 583         }, {
 584                 desc:     "nl/title/mid-IJ cutoff",
 585                 src:      "  ij",
 586                 want:     "  IJ",
 587                 firstErr: transform.ErrShortDst,
 588                 dstSize:  3,
 589                 srcSize:  minBufSize,
 590                 t:        Title(language.Dutch),
 591         }, {
 592                 desc:     "af/title/apostrophe",
 593                 src:      "'n bietje",
 594                 want:     "'n Bietje",
 595                 firstErr: transform.ErrShortDst,
 596                 dstSize:  3,
 597                 srcSize:  minBufSize,
 598                 t:        Title(language.Afrikaans),
 599         }}
 600 }
 601
 602 func TestShortBuffersAndOverflow(t *testing.T) {
 603         for i, tt := range bufferTests {
 604                 testtext.Run(t, tt.desc, func(t *testing.T) {
 605                         buf := make([]byte, tt.dstSize)
 606                         got := []byte{}
 607                         var nSrc, nDst int
 608                         var err error
 609                         for p := 0; p < len(tt.src); p += nSrc {
 610                                 q := p + tt.srcSize
 611                                 if q > len(tt.src) {
 612                                         q = len(tt.src)
 613                                 }
 614                                 nDst, nSrc, err = tt.t.Transform(buf, []byte(tt.src[p:q]), q == len(tt.src))
 615                                 got = append(got, buf[:nDst]...)
 616
 617                                 if p == 0 && err != tt.firstErr {
 618                                         t.Errorf("%d:%s:\n error was %v; want %v", i, tt.desc, err, tt.firstErr)
 619                                         break
 620                                 }
 621                         }
 622                         if string(got) != tt.want {
 623                                 t.Errorf("%d:%s:\ngot  %+q;\nwant %+q", i, tt.desc, got, tt.want)
 624                         }
 625                         testHandover(t, Caser{tt.t}, tt.src)
 626                 })
 627         }
 628 }
 629
 630 func TestSpan(t *testing.T) {
 631         for _, tt := range []struct {
 632                 desc  string
 633                 src   string
 634                 want  string
 635                 atEOF bool
 636                 err   error
 637                 t     Caser
 638         }{{
 639                 desc:  "und/upper/basic",
 640                 src:   "abcdefg",
 641                 want:  "",
 642                 atEOF: true,
 643                 err:   transform.ErrEndOfSpan,
 644                 t:     Upper(language.Und),
 645         }, {
 646                 desc:  "und/upper/short src",
 647                 src:   "123É"[:4],
 648                 want:  "123",
 649                 atEOF: false,
 650                 err:   transform.ErrShortSrc,
 651                 t:     Upper(language.Und),
 652         }, {
 653                 desc:  "und/upper/no error on short",
 654                 src:   "12",
 655                 want:  "12",
 656                 atEOF: false,
 657                 t:     Upper(language.Und),
 658         }, {
 659                 desc:  "und/lower/basic",
 660                 src:   "ABCDEFG",
 661                 want:  "",
 662                 atEOF: true,
 663                 err:   transform.ErrEndOfSpan,
 664                 t:     Lower(language.Und),
 665         }, {
 666                 desc:  "und/lower/short src num",
 667                 src:   "123é"[:4],
 668                 want:  "123",
 669                 atEOF: false,
 670                 err:   transform.ErrShortSrc,
 671                 t:     Lower(language.Und),
 672         }, {
 673                 desc:  "und/lower/short src greek",
 674                 src:   "αβγé"[:7],
 675                 want:  "αβγ",
 676                 atEOF: false,
 677                 err:   transform.ErrShortSrc,
 678                 t:     Lower(language.Und),
 679         }, {
 680                 desc:  "und/lower/no error on short",
 681                 src:   "12",
 682                 want:  "12",
 683                 atEOF: false,
 684                 t:     Lower(language.Und),
 685         }, {
 686                 desc:  "und/lower/simple (no final sigma)",
 687                 src:   "ος οσσ",
 688                 want:  "οσ οσσ",
 689                 atEOF: true,
 690                 t:     Lower(language.Und, HandleFinalSigma(false)),
 691         }, {
 692                 desc:  "und/title/simple (no final sigma)",
 693                 src:   "Οσ Οσσ",
 694                 want:  "Οσ Οσσ",
 695                 atEOF: true,
 696                 t:     Title(language.Und, HandleFinalSigma(false)),
 697         }, {
 698                 desc: "und/lower/final sigma: no error",
 699                 src:  "οΣ", // Oς
 700                 want: "ο",  // Oς
 701                 err:  transform.ErrEndOfSpan,
 702                 t:    Lower(language.Und),
 703         }, {
 704                 desc: "und/title/final sigma: no error",
 705                 src:  "ΟΣ", // Oς
 706                 want: "Ο",  // Oς
 707                 err:  transform.ErrEndOfSpan,
 708                 t:    Title(language.Und),
 709         }, {
 710                 desc: "und/title/final sigma: no short source!",
 711                 src:  "ΟσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσΣ",
 712                 want: "Οσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσ",
 713                 err:  transform.ErrEndOfSpan,
 714                 t:    Title(language.Und),
 715         }, {
 716                 desc:  "und/title/clipped UTF-8 rune",
 717                 src:   "Σσ" + string([]byte{0xCF}),
 718                 want:  "Σσ",
 719                 atEOF: false,
 720                 err:   transform.ErrShortSrc,
 721                 t:     Title(language.Und),
 722         }, {
 723                 desc:  "und/title/clipped UTF-8 rune atEOF",
 724                 src:   "Σσσ" + string([]byte{0xCF}),
 725                 want:  "Σσσ" + string([]byte{0xCF}),
 726                 atEOF: true,
 727                 t:     Title(language.Und),
 728         }, {
 729                 // Note: the choice to change the final sigma at the end in case of
 730                 // too many case ignorables is arbitrary. The main reason for this
 731                 // choice is that it results in simpler code.
 732                 desc: "und/title/long string",
 733                 src:  "A" + strings.Repeat("a", maxIgnorable+5),
 734                 want: "A" + strings.Repeat("a", maxIgnorable+5),
 735                 t:    Title(language.Und),
 736         }, {
 737                 // Note: the choice to change the final sigma at the end in case of
 738                 // too many case ignorables is arbitrary. The main reason for this
 739                 // choice is that it results in simpler code.
 740                 desc:  "und/title/cyrillic",
 741                 src:   "При",
 742                 want:  "При",
 743                 atEOF: true,
 744                 t:     Title(language.Und, HandleFinalSigma(false)),
 745         }, {
 746                 // Note: the choice to change the final sigma at the end in case of
 747                 // too many case ignorables is arbitrary. The main reason for this
 748                 // choice is that it results in simpler code.
 749                 desc: "und/title/final sigma: max ignorables",
 750                 src:  "Οσ" + strings.Repeat(".", maxIgnorable) + "A",
 751                 want: "Οσ" + strings.Repeat(".", maxIgnorable) + "A",
 752                 t:    Title(language.Und),
 753         }, {
 754                 desc: "el/upper/max ignorables - not implemented",
 755                 src:  "Ο" + strings.Repeat("\u0321", maxIgnorable-1) + "\u0313",
 756                 want: "",
 757                 err:  transform.ErrEndOfSpan,
 758                 t:    Upper(language.Greek),
 759         }, {
 760                 desc: "el/upper/too many ignorables - not implemented",
 761                 src:  "Ο" + strings.Repeat("\u0321", maxIgnorable) + "\u0313",
 762                 want: "",
 763                 err:  transform.ErrEndOfSpan,
 764                 t:    Upper(language.Greek),
 765         }, {
 766                 desc: "el/upper/short dst",
 767                 src:  "123ο",
 768                 want: "",
 769                 err:  transform.ErrEndOfSpan,
 770                 t:    Upper(language.Greek),
 771         }, {
 772                 desc: "lt/lower/max ignorables",
 773                 src:  "i" + strings.Repeat("\u0321", maxIgnorable-1) + "\u0307\u0300",
 774                 want: "i" + strings.Repeat("\u0321", maxIgnorable-1) + "\u0307\u0300",
 775                 t:    Lower(language.Lithuanian),
 776         }, {
 777                 desc: "lt/lower/isLower",
 778                 src:  "I" + strings.Repeat("\u0321", maxIgnorable) + "\u0300",
 779                 want: "",
 780                 err:  transform.ErrEndOfSpan,
 781                 t:    Lower(language.Lithuanian),
 782         }, {
 783                 desc: "lt/lower/not identical",
 784                 src:  "aaaaa\u00cc", // U+00CC LATIN CAPITAL LETTER I GRAVE
 785                 err:  transform.ErrEndOfSpan,
 786                 want: "aaaaa",
 787                 t:    Lower(language.Lithuanian),
 788         }, {
 789                 desc: "lt/lower/identical",
 790                 src:  "aaaai\u0307\u0300", // U+00CC LATIN CAPITAL LETTER I GRAVE
 791                 want: "aaaai\u0307\u0300",
 792                 t:    Lower(language.Lithuanian),
 793         }, {
 794                 desc: "lt/upper/not implemented",
 795                 src:  "I" + strings.Repeat("\u0321", maxIgnorable-1) + "\u0300",
 796                 want: "",
 797                 err:  transform.ErrEndOfSpan,
 798                 t:    Upper(language.Lithuanian),
 799         }, {
 800                 desc: "lt/upper/not implemented, ascii",
 801                 src:  "AB",
 802                 want: "",
 803                 err:  transform.ErrEndOfSpan,
 804                 t:    Upper(language.Lithuanian),
 805         }, {
 806                 desc: "nl/title/pre-IJ cutoff",
 807                 src:  "  IJ",
 808                 want: "  IJ",
 809                 t:    Title(language.Dutch),
 810         }, {
 811                 desc: "nl/title/mid-IJ cutoff",
 812                 src:  "  Ia",
 813                 want: "  Ia",
 814                 t:    Title(language.Dutch),
 815         }, {
 816                 desc: "af/title/apostrophe",
 817                 src:  "'n Bietje",
 818                 want: "'n Bietje",
 819                 t:    Title(language.Afrikaans),
 820         }, {
 821                 desc: "af/title/apostrophe-incorrect",
 822                 src:  "'N Bietje",
 823                 // The Single_Quote (a MidWord), needs to be retained as unspanned so
 824                 // that a successive call to Transform can detect that N should not be
 825                 // capitalized.
 826                 want: "",
 827                 err:  transform.ErrEndOfSpan,
 828                 t:    Title(language.Afrikaans),
 829         }} {
 830                 testtext.Run(t, tt.desc, func(t *testing.T) {
 831                         for p := 0; p < len(tt.want); p += utf8.RuneLen([]rune(tt.src[p:])[0]) {
 832                                 tt.t.Reset()
 833                                 n, err := tt.t.Span([]byte(tt.src[:p]), false)
 834                                 if err != nil && err != transform.ErrShortSrc {
 835                                         t.Errorf("early failure:Span(%+q): %v (%d < %d)", tt.src[:p], err, n, len(tt.want))
 836                                         break
 837                                 }
 838                         }
 839                         tt.t.Reset()
 840                         n, err := tt.t.Span([]byte(tt.src), tt.atEOF)
 841                         if n != len(tt.want) || err != tt.err {
 842                                 t.Errorf("Span(%+q, %v): got %d, %v; want %d, %v", tt.src, tt.atEOF, n, err, len(tt.want), tt.err)
 843                         }
 844                         testHandover(t, tt.t, tt.src)
 845                 })
 846         }
 847 }
 848
 849 var txtASCII = strings.Repeat("The quick brown fox jumps over the lazy dog. ", 50)
 850
 851 // Taken from http://creativecommons.org/licenses/by-sa/3.0/vn/
 852 const txt_vn = `Với các điều kiện sau: Ghi nhận công của tác giả.  Nếu bạn sử
 853 dụng, chuyển đổi, hoặc xây dựng dự án từ  nội dung được chia sẻ này, bạn phải áp
 854 dụng giấy phép này hoặc  một giấy phép khác có các điều khoản tương tự như giấy
 855 phép này cho dự án của bạn. Hiểu rằng: Miễn — Bất kỳ các điều kiện nào trên đây
 856 cũng có thể được miễn bỏ nếu bạn được sự cho phép của người sở hữu bản quyền.
 857 Phạm vi công chúng — Khi tác phẩm hoặc bất kỳ chương nào của tác phẩm đã trong
 858 vùng dành cho công chúng theo quy định của pháp luật thì tình trạng của nó không
 859 bị ảnh hưởng bởi giấy phép trong bất kỳ trường hợp nào.`
 860
 861 // http://creativecommons.org/licenses/by-sa/2.5/cn/
 862 const txt_cn = `您可以自由： 复制、发行、展览、表演、放映、
 863 广播或通过信息网络传播本作品 创作演绎作品
 864 对本作品进行商业性使用 惟须遵守下列条件：
 865 署名 — 您必须按照作者或者许可人指定的方式对作品进行署名。
 866 相同方式共享 — 如果您改变、转换本作品或者以本作品为基础进行创作，
 867 您只能采用与本协议相同的许可协议发布基于本作品的演绎作品。`
 868
 869 // Taken from http://creativecommons.org/licenses/by-sa/1.0/deed.ru
 870 const txt_ru = `При обязательном соблюдении следующих условий: Attribution — Вы
 871 должны атрибутировать произведение (указывать автора и источник) в порядке,
 872 предусмотренном автором или лицензиаром (но только так, чтобы никоим образом не
 873 подразумевалось, что они поддерживают вас или использование вами данного
 874 произведения). Υπό τις ακόλουθες προϋποθέσεις:`
 875
 876 // Taken from http://creativecommons.org/licenses/by-sa/3.0/gr/
 877 const txt_gr = `Αναφορά Δημιουργού — Θα πρέπει να κάνετε την αναφορά στο έργο με
 878 τον τρόπο που έχει οριστεί από το δημιουργό ή το χορηγούντο την άδεια (χωρίς
 879 όμως να εννοείται με οποιονδήποτε τρόπο ότι εγκρίνουν εσάς ή τη χρήση του έργου
 880 από εσάς). Παρόμοια Διανομή — Εάν αλλοιώσετε, τροποποιήσετε ή δημιουργήσετε
 881 περαιτέρω βασισμένοι στο έργο θα μπορείτε να διανέμετε το έργο που θα προκύψει
 882 μόνο με την ίδια ή παρόμοια άδεια.`
 883
 884 const txtNonASCII = txt_vn + txt_cn + txt_ru + txt_gr
 885
 886 // TODO: Improve ASCII performance.
 887
 888 func BenchmarkCasers(b *testing.B) {
 889         for _, s := range []struct{ name, text string }{
 890                 {"ascii", txtASCII},
 891                 {"nonASCII", txtNonASCII},
 892                 {"short", "При"},
 893         } {
 894                 src := []byte(s.text)
 895                 // Measure case mappings in bytes package for comparison.
 896                 for _, f := range []struct {
 897                         name string
 898                         fn   func(b []byte) []byte
 899                 }{
 900                         {"lower", bytes.ToLower},
 901                         {"title", bytes.ToTitle},
 902                         {"upper", bytes.ToUpper},
 903                 } {
 904                         testtext.Bench(b, path.Join(s.name, "bytes", f.name), func(b *testing.B) {
 905                                 b.SetBytes(int64(len(src)))
 906                                 for i := 0; i < b.N; i++ {
 907                                         f.fn(src)
 908                                 }
 909                         })
 910                 }
 911                 for _, t := range []struct {
 912                         name  string
 913                         caser transform.SpanningTransformer
 914                 }{
 915                         {"fold/default", Fold()},
 916                         {"upper/default", Upper(language.Und)},
 917                         {"lower/sigma", Lower(language.Und)},
 918                         {"lower/simple", Lower(language.Und, HandleFinalSigma(false))},
 919                         {"title/sigma", Title(language.Und)},
 920                         {"title/simple", Title(language.Und, HandleFinalSigma(false))},
 921                 } {
 922                         c := Caser{t.caser}
 923                         dst := make([]byte, len(src))
 924                         testtext.Bench(b, path.Join(s.name, t.name, "transform"), func(b *testing.B) {
 925                                 b.SetBytes(int64(len(src)))
 926                                 for i := 0; i < b.N; i++ {
 927                                         c.Reset()
 928                                         c.Transform(dst, src, true)
 929                                 }
 930                         })
 931                         // No need to check span for simple cases, as they will be the same
 932                         // as sigma.
 933                         if strings.HasSuffix(t.name, "/simple") {
 934                                 continue
 935                         }
 936                         spanSrc := c.Bytes(src)
 937                         testtext.Bench(b, path.Join(s.name, t.name, "span"), func(b *testing.B) {
 938                                 c.Reset()
 939                                 if n, _ := c.Span(spanSrc, true); n < len(spanSrc) {
 940                                         b.Fatalf("spanner is not recognizing text %q as done (at %d)", spanSrc, n)
 941                                 }
 942                                 b.SetBytes(int64(len(spanSrc)))
 943                                 for i := 0; i < b.N; i++ {
 944                                         c.Reset()
 945                                         c.Span(spanSrc, true)
 946                                 }
 947                         })
 948                 }
 949         }
 950 }