1 // Copyright 2014 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
15 "golang.org/x/text/internal/testtext"
16 "golang.org/x/text/language"
17 "golang.org/x/text/transform"
18 "golang.org/x/text/unicode/norm"
21 type testCase struct {
23 src interface{} // string, []string, or nil to skip test
24 title interface{} // string, []string, or nil to skip test
25 lower interface{} // string, []string, or nil to skip test
26 upper interface{} // string, []string, or nil to skip test
30 var testCases = []testCase{
33 src: "abc aBc ABC abC İsıI ΕΣΆΣ",
34 title: "Abc Abc Abc Abc İsıi Εσάσ",
35 lower: "abc abc abc abc i\u0307sıi εσάσ",
36 upper: "ABC ABC ABC ABC İSII ΕΣΆΣ",
37 opts: getOpts(HandleFinalSigma(false)),
42 src: "abc aBc ABC abC İsıI ΕΣΆΣ Σ _Σ -Σ",
43 title: "Abc Abc Abc Abc İsıi Εσάς Σ _Σ -Σ",
44 lower: "abc abc abc abc i\u0307sıi εσάς σ _σ -σ",
45 upper: "ABC ABC ABC ABC İSII ΕΣΆΣ Σ _Σ -Σ",
46 opts: getOpts(HandleFinalSigma(true)),
49 2: { // Title cased runes.
63 "χωΡΊΣ χωΡΊΣ^a χωΡΊΣ:a χωΡΊΣ:^a χωΡΊΣ^ όμΩΣ Σ",
66 `"capitalize a^a -hyphen 0X _u a_u:a`,
67 "MidNumLet a.b\u2018c\u2019d\u2024e\ufe52f\uff07f\uff0eg",
68 "MidNum a,b;c\u037ed\u0589e\u060cf\u2044g\ufe50h",
69 "\u0345 x\u3031x x\u05d0x \u05d0x a'.a a.a a4,a",
74 "Χωρίς Χωρίσ^A Χωρίσ:a Χωρίσ:^A Χωρίς^ Όμως Σ",
76 // Note that 49Ers is correct according to the spec.
77 // TODO: provide some option to the user to treat different
78 // characters as cased.
80 `"Capitalize A^A -Hyphen 0X _U A_u:a`,
81 "Midnumlet A.b\u2018c\u2019d\u2024e\ufe52f\uff07f\uff0eg",
82 "Midnum A,B;C\u037eD\u0589E\u060cF\u2044G\ufe50H",
83 "\u0399 X\u3031X X\u05d0x \u05d0X A'.A A.a A4,A",
87 // TODO: These are known deviations from the options{} Unicode Word Breaking
91 // "x_\u3031_x a4,4a",
92 // "X_\u3031_x A4,4a", // Currently is "X_\U3031_X A4,4A".
93 // "x_\u3031_x a4,4a",
94 // "X_\u3031_X A4,4A",
99 // Tests title options
101 src: "abc aBc ABC abC İsıI o'Brien",
102 title: "Abc ABc ABC AbC İsıI O'Brien",
103 opts: getOpts(NoLower),
108 src: "aBc ΟΔΌΣ Οδός Σο ΣΟ Σ oΣ ΟΣ σ ἕξ \u03ac",
109 title: "Abc Οδός Οδός Σο Σο Σ Oς Ος Σ Ἕξ \u0386",
110 lower: "abc οδός οδός σο σο σ oς ος σ ἕξ \u03ac",
111 upper: "ABC ΟΔΟΣ ΟΔΟΣ ΣΟ ΣΟ Σ OΣ ΟΣ Σ ΕΞ \u0391", // Uppercase removes accents
116 src: "Isiİ İsıI I\u0307sIiİ İsıI\u0307 I\u0300\u0307",
117 title: "Isii İsıı I\u0307sıii İsıi I\u0300\u0307",
118 lower: "ısii isıı isıii isıi \u0131\u0300\u0307",
119 upper: "ISİİ İSII I\u0307SIİİ İSII\u0307 I\u0300\u0307",
124 src: "I Ï J J̈ Į Į̈ Ì Í Ĩ xi̇̈ xj̇̈ xį̇̈ xi̇̀ xi̇́ xi̇̃ XI XÏ XJ XJ̈ XĮ XĮ̈ XI̟̤",
125 title: "I Ï J J̈ Į Į̈ Ì Í Ĩ Xi̇̈ Xj̇̈ Xį̇̈ Xi̇̀ Xi̇́ Xi̇̃ Xi Xi̇̈ Xj Xj̇̈ Xį Xį̇̈ Xi̟̤",
126 lower: "i i̇̈ j j̇̈ į į̇̈ i̇̀ i̇́ i̇̃ xi̇̈ xj̇̈ xį̇̈ xi̇̀ xi̇́ xi̇̃ xi xi̇̈ xj xj̇̈ xį xį̇̈ xi̟̤",
127 upper: "I Ï J J̈ Į Į̈ Ì Í Ĩ XÏ XJ̈ XĮ̈ XÌ XÍ XĨ XI XÏ XJ XJ̈ XĮ XĮ̈ XI̟̤",
132 src: "\u012e\u0300 \u00cc i\u0307\u0300 i\u0307\u0301 i\u0307\u0303 i\u0307\u0308 i\u0300\u0307",
133 title: "\u012e\u0300 \u00cc \u00cc \u00cd \u0128 \u00cf I\u0300\u0307",
134 lower: "\u012f\u0307\u0300 i\u0307\u0300 i\u0307\u0300 i\u0307\u0301 i\u0307\u0303 i\u0307\u0308 i\u0300\u0307",
135 upper: "\u012e\u0300 \u00cc \u00cc \u00cd \u0128 \u00cf I\u0300\u0307",
140 src: "ijs IJs Ij Ijs İJ İJs aa aA 'ns 'S",
141 title: "IJs IJs IJ IJs İj İjs Aa Aa 'ns 's",
144 // Note: this specification is not currently part of CLDR. The same holds
145 // for the leading apostrophe handling for Dutch.
146 // See http://unicode.org/cldr/trac/ticket/7078.
149 src: "wag 'n bietjie",
150 title: "Wag 'n Bietjie",
151 lower: "wag 'n bietjie",
152 upper: "WAG 'N BIETJIE",
156 func TestCaseMappings(t *testing.T) {
157 for i, tt := range testCases {
158 src, ok := tt.src.([]string)
160 src = strings.Split(tt.src.(string), " ")
163 for _, lang := range strings.Split(tt.lang, " ") {
164 tag := language.MustParse(lang)
165 testEntry := func(name string, mk func(language.Tag, options) transform.SpanningTransformer, gold interface{}) {
166 c := Caser{mk(tag, tt.opts)}
168 wants, ok := gold.([]string)
170 wants = strings.Split(gold.(string), " ")
172 for j, want := range wants {
173 if got := c.String(src[j]); got != want {
174 t.Errorf("%d:%s:\n%s.String(%+q):\ngot %+q;\nwant %+q", i, lang, name, src[j], got, want)
178 dst := make([]byte, 256) // big enough to hold any result
179 src := []byte(strings.Join(src, " "))
180 v := testtext.AllocsPerRun(20, func() {
181 c.Transform(dst, src, true)
184 t.Errorf("%d:%s:\n%s: number of allocs was %f; want 0", i, lang, name, v)
187 testEntry("Upper", makeUpper, tt.upper)
188 testEntry("Lower", makeLower, tt.lower)
189 testEntry("Title", makeTitle, tt.title)
194 // TestAlloc tests that some mapping methods should not cause any allocation.
195 func TestAlloc(t *testing.T) {
196 dst := make([]byte, 256) // big enough to hold any result
197 src := []byte(txtNonASCII)
199 for i, f := range []func() Caser{
200 func() Caser { return Upper(language.Und) },
201 func() Caser { return Lower(language.Und) },
202 func() Caser { return Lower(language.Und, HandleFinalSigma(false)) },
203 // TODO: use a shared copy for these casers as well, in order of
204 // importance, starting with the most important:
205 // func() Caser { return Title(language.Und) },
206 // func() Caser { return Title(language.Und, HandleFinalSigma(false)) },
208 testtext.Run(t, "", func(t *testing.T) {
210 v := testtext.AllocsPerRun(10, func() {
214 // TODO: Right now only Upper has 1 allocation. Special-case Lower
215 // and Title as well to have less allocations for the root locale.
216 t.Errorf("%d:init: number of allocs was %f; want 0", i, v)
218 v = testtext.AllocsPerRun(2, func() {
219 c.Transform(dst, src, true)
222 t.Errorf("%d:transform: number of allocs was %f; want 0", i, v)
228 func testHandover(t *testing.T, c Caser, src string) {
229 want := c.String(src)
230 // Find the common prefix.
232 for ; pSrc < len(src) && pSrc < len(want) && want[pSrc] == src[pSrc]; pSrc++ {
235 // Test handover for each substring of the prefix.
236 for i := 0; i < pSrc; i++ {
237 testtext.Run(t, fmt.Sprint("interleave/", i), func(t *testing.T) {
238 dst := make([]byte, 4*len(src))
240 nSpan, _ := c.Span([]byte(src[:i]), false)
241 copy(dst, src[:nSpan])
242 nTransform, _, _ := c.Transform(dst[nSpan:], []byte(src[nSpan:]), true)
243 got := string(dst[:nSpan+nTransform])
245 t.Errorf("full string: got %q; want %q", got, want)
251 func TestHandover(t *testing.T) {
252 testCases := []struct {
257 "title/nosigma/single midword",
258 Title(language.Und, HandleFinalSigma(false)),
261 "title/nosigma/single midword",
262 Title(language.Und, HandleFinalSigma(false)),
265 "title/nosigma/double midword",
266 Title(language.Und, HandleFinalSigma(false)),
269 "title/nosigma/double midword",
270 Title(language.Und, HandleFinalSigma(false)),
273 "title/nosigma/double midword",
274 Title(language.Und, HandleFinalSigma(false)),
277 "title/sigma/single midword",
281 "title/sigma/single midword",
285 "title/sigma/double midword",
289 "title/sigma/double midword",
293 "title/sigma/double midword",
297 "title/af/leading apostrophe",
298 Title(language.Afrikaans),
301 for _, tc := range testCases {
302 testtext.Run(t, tc.desc, func(t *testing.T) {
303 src := tc.first + tc.second
304 want := tc.t.String(src)
306 n, _ := tc.t.Span([]byte(tc.first), false)
308 dst := make([]byte, len(want))
309 copy(dst, tc.first[:n])
311 nDst, _, _ := tc.t.Transform(dst[n:], []byte(src[n:]), true)
312 got := string(dst[:n+nDst])
314 t.Errorf("got %q; want %q", got, want)
320 // minBufSize is the size of the buffer by which the casing operation in
321 // this package are guaranteed to make progress.
322 const minBufSize = norm.MaxSegmentSize
324 type bufferTest struct {
325 desc, src, want string
328 t transform.SpanningTransformer
331 var bufferTests []bufferTest
334 bufferTests = []bufferTest{{
335 desc: "und/upper/short dst",
338 firstErr: transform.ErrShortDst,
341 t: Upper(language.Und),
343 desc: "und/upper/short src",
346 firstErr: transform.ErrShortSrc,
349 t: Upper(language.Und),
351 desc: "und/upper/no error on short",
357 t: Upper(language.Und),
359 desc: "und/lower/short dst",
362 firstErr: transform.ErrShortDst,
365 t: Lower(language.Und),
367 desc: "und/lower/short src",
370 firstErr: transform.ErrShortSrc,
373 t: Lower(language.Und),
375 desc: "und/lower/no error on short",
381 t: Lower(language.Und),
383 desc: "und/lower/simple (no final sigma)",
388 t: Lower(language.Und, HandleFinalSigma(false)),
390 desc: "und/title/simple (no final sigma)",
395 t: Title(language.Und, HandleFinalSigma(false)),
397 desc: "und/title/final sigma: no error",
402 t: Title(language.Und),
404 desc: "und/title/final sigma: short source",
405 src: "ΟΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣ",
406 want: "Οσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσς",
407 firstErr: transform.ErrShortSrc,
410 t: Title(language.Und),
412 desc: "und/title/final sigma: short destination 1",
413 src: "ΟΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣ",
414 want: "Οσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσς",
415 firstErr: transform.ErrShortDst,
418 t: Title(language.Und),
420 desc: "und/title/final sigma: short destination 2",
421 src: "ΟΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣ",
422 want: "Οσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσς",
423 firstErr: transform.ErrShortDst,
426 t: Title(language.Und),
428 desc: "und/title/final sigma: short destination 3",
429 src: "ΟΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣ",
430 want: "Οσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσς",
431 firstErr: transform.ErrShortDst,
434 t: Title(language.Und),
436 desc: "und/title/clipped UTF-8 rune",
439 firstErr: transform.ErrShortSrc,
442 t: Title(language.Und),
444 desc: "und/title/clipped UTF-8 rune atEOF",
445 src: "σσσ" + string([]byte{0xCF}),
446 want: "Σσσ" + string([]byte{0xCF}),
449 t: Title(language.Und),
451 // Note: the choice to change the final sigma at the end in case of
452 // too many case ignorables is arbitrary. The main reason for this
453 // choice is that it results in simpler code.
454 desc: "und/title/final sigma: max ignorables",
455 src: "ΟΣ" + strings.Repeat(".", maxIgnorable) + "a",
456 want: "Οσ" + strings.Repeat(".", maxIgnorable) + "A",
459 t: Title(language.Und),
461 // Note: the choice to change the final sigma at the end in case of
462 // too many case ignorables is arbitrary. The main reason for this
463 // choice is that it results in simpler code.
464 desc: "und/title/long string",
465 src: "AA" + strings.Repeat(".", maxIgnorable+1) + "a",
466 want: "Aa" + strings.Repeat(".", maxIgnorable+1) + "A",
468 srcSize: len("AA" + strings.Repeat(".", maxIgnorable+1)),
469 t: Title(language.Und),
471 // Note: the choice to change the final sigma at the end in case of
472 // too many case ignorables is arbitrary. The main reason for this
473 // choice is that it results in simpler code.
474 desc: "und/title/final sigma: too many ignorables",
475 src: "ΟΣ" + strings.Repeat(".", maxIgnorable+1) + "a",
476 want: "Ος" + strings.Repeat(".", maxIgnorable+1) + "A",
478 srcSize: len("ΟΣ" + strings.Repeat(".", maxIgnorable+1)),
479 t: Title(language.Und),
481 desc: "und/title/final sigma: apostrophe",
486 t: Title(language.Und),
488 desc: "el/upper/max ignorables",
489 src: "ο" + strings.Repeat("\u0321", maxIgnorable-1) + "\u0313",
490 want: "Ο" + strings.Repeat("\u0321", maxIgnorable-1),
493 t: Upper(language.Greek),
495 desc: "el/upper/too many ignorables",
496 src: "ο" + strings.Repeat("\u0321", maxIgnorable) + "\u0313",
497 want: "Ο" + strings.Repeat("\u0321", maxIgnorable) + "\u0313",
499 srcSize: len("ο" + strings.Repeat("\u0321", maxIgnorable)),
500 t: Upper(language.Greek),
502 desc: "el/upper/short dst",
505 firstErr: transform.ErrShortDst,
508 t: Upper(language.Greek),
510 desc: "lt/lower/max ignorables",
511 src: "I" + strings.Repeat("\u0321", maxIgnorable-1) + "\u0300",
512 want: "i" + strings.Repeat("\u0321", maxIgnorable-1) + "\u0307\u0300",
515 t: Lower(language.Lithuanian),
517 desc: "lt/lower/too many ignorables",
518 src: "I" + strings.Repeat("\u0321", maxIgnorable) + "\u0300",
519 want: "i" + strings.Repeat("\u0321", maxIgnorable) + "\u0300",
521 srcSize: len("I" + strings.Repeat("\u0321", maxIgnorable)),
522 t: Lower(language.Lithuanian),
524 desc: "lt/lower/decomposition with short dst buffer 1",
525 src: "aaaaa\u00cc", // U+00CC LATIN CAPITAL LETTER I GRAVE
526 firstErr: transform.ErrShortDst,
527 want: "aaaaai\u0307\u0300",
530 t: Lower(language.Lithuanian),
532 desc: "lt/lower/decomposition with short dst buffer 2",
533 src: "aaaa\u00cc", // U+00CC LATIN CAPITAL LETTER I GRAVE
534 firstErr: transform.ErrShortDst,
535 want: "aaaai\u0307\u0300",
538 t: Lower(language.Lithuanian),
540 desc: "lt/upper/max ignorables",
541 src: "i" + strings.Repeat("\u0321", maxIgnorable-1) + "\u0307\u0300",
542 want: "I" + strings.Repeat("\u0321", maxIgnorable-1) + "\u0300",
545 t: Upper(language.Lithuanian),
547 desc: "lt/upper/too many ignorables",
548 src: "i" + strings.Repeat("\u0321", maxIgnorable) + "\u0307\u0300",
549 want: "I" + strings.Repeat("\u0321", maxIgnorable) + "\u0307\u0300",
551 srcSize: len("i" + strings.Repeat("\u0321", maxIgnorable)),
552 t: Upper(language.Lithuanian),
554 desc: "lt/upper/short dst",
555 src: "12i\u0307\u0300",
557 firstErr: transform.ErrShortDst,
560 t: Upper(language.Lithuanian),
562 desc: "aztr/lower/max ignorables",
563 src: "I" + strings.Repeat("\u0321", maxIgnorable-1) + "\u0307\u0300",
564 want: "i" + strings.Repeat("\u0321", maxIgnorable-1) + "\u0300",
567 t: Lower(language.Turkish),
569 desc: "aztr/lower/too many ignorables",
570 src: "I" + strings.Repeat("\u0321", maxIgnorable) + "\u0307\u0300",
571 want: "\u0131" + strings.Repeat("\u0321", maxIgnorable) + "\u0307\u0300",
573 srcSize: len("I" + strings.Repeat("\u0321", maxIgnorable)),
574 t: Lower(language.Turkish),
576 desc: "nl/title/pre-IJ cutoff",
579 firstErr: transform.ErrShortDst,
582 t: Title(language.Dutch),
584 desc: "nl/title/mid-IJ cutoff",
587 firstErr: transform.ErrShortDst,
590 t: Title(language.Dutch),
592 desc: "af/title/apostrophe",
595 firstErr: transform.ErrShortDst,
598 t: Title(language.Afrikaans),
602 func TestShortBuffersAndOverflow(t *testing.T) {
603 for i, tt := range bufferTests {
604 testtext.Run(t, tt.desc, func(t *testing.T) {
605 buf := make([]byte, tt.dstSize)
609 for p := 0; p < len(tt.src); p += nSrc {
614 nDst, nSrc, err = tt.t.Transform(buf, []byte(tt.src[p:q]), q == len(tt.src))
615 got = append(got, buf[:nDst]...)
617 if p == 0 && err != tt.firstErr {
618 t.Errorf("%d:%s:\n error was %v; want %v", i, tt.desc, err, tt.firstErr)
622 if string(got) != tt.want {
623 t.Errorf("%d:%s:\ngot %+q;\nwant %+q", i, tt.desc, got, tt.want)
625 testHandover(t, Caser{tt.t}, tt.src)
630 func TestSpan(t *testing.T) {
631 for _, tt := range []struct {
639 desc: "und/upper/basic",
643 err: transform.ErrEndOfSpan,
644 t: Upper(language.Und),
646 desc: "und/upper/short src",
650 err: transform.ErrShortSrc,
651 t: Upper(language.Und),
653 desc: "und/upper/no error on short",
657 t: Upper(language.Und),
659 desc: "und/lower/basic",
663 err: transform.ErrEndOfSpan,
664 t: Lower(language.Und),
666 desc: "und/lower/short src num",
670 err: transform.ErrShortSrc,
671 t: Lower(language.Und),
673 desc: "und/lower/short src greek",
677 err: transform.ErrShortSrc,
678 t: Lower(language.Und),
680 desc: "und/lower/no error on short",
684 t: Lower(language.Und),
686 desc: "und/lower/simple (no final sigma)",
690 t: Lower(language.Und, HandleFinalSigma(false)),
692 desc: "und/title/simple (no final sigma)",
696 t: Title(language.Und, HandleFinalSigma(false)),
698 desc: "und/lower/final sigma: no error",
701 err: transform.ErrEndOfSpan,
702 t: Lower(language.Und),
704 desc: "und/title/final sigma: no error",
707 err: transform.ErrEndOfSpan,
708 t: Title(language.Und),
710 desc: "und/title/final sigma: no short source!",
711 src: "ΟσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσΣ",
712 want: "Οσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσ",
713 err: transform.ErrEndOfSpan,
714 t: Title(language.Und),
716 desc: "und/title/clipped UTF-8 rune",
717 src: "Σσ" + string([]byte{0xCF}),
720 err: transform.ErrShortSrc,
721 t: Title(language.Und),
723 desc: "und/title/clipped UTF-8 rune atEOF",
724 src: "Σσσ" + string([]byte{0xCF}),
725 want: "Σσσ" + string([]byte{0xCF}),
727 t: Title(language.Und),
729 // Note: the choice to change the final sigma at the end in case of
730 // too many case ignorables is arbitrary. The main reason for this
731 // choice is that it results in simpler code.
732 desc: "und/title/long string",
733 src: "A" + strings.Repeat("a", maxIgnorable+5),
734 want: "A" + strings.Repeat("a", maxIgnorable+5),
735 t: Title(language.Und),
737 // Note: the choice to change the final sigma at the end in case of
738 // too many case ignorables is arbitrary. The main reason for this
739 // choice is that it results in simpler code.
740 desc: "und/title/cyrillic",
744 t: Title(language.Und, HandleFinalSigma(false)),
746 // Note: the choice to change the final sigma at the end in case of
747 // too many case ignorables is arbitrary. The main reason for this
748 // choice is that it results in simpler code.
749 desc: "und/title/final sigma: max ignorables",
750 src: "Οσ" + strings.Repeat(".", maxIgnorable) + "A",
751 want: "Οσ" + strings.Repeat(".", maxIgnorable) + "A",
752 t: Title(language.Und),
754 desc: "el/upper/max ignorables - not implemented",
755 src: "Ο" + strings.Repeat("\u0321", maxIgnorable-1) + "\u0313",
757 err: transform.ErrEndOfSpan,
758 t: Upper(language.Greek),
760 desc: "el/upper/too many ignorables - not implemented",
761 src: "Ο" + strings.Repeat("\u0321", maxIgnorable) + "\u0313",
763 err: transform.ErrEndOfSpan,
764 t: Upper(language.Greek),
766 desc: "el/upper/short dst",
769 err: transform.ErrEndOfSpan,
770 t: Upper(language.Greek),
772 desc: "lt/lower/max ignorables",
773 src: "i" + strings.Repeat("\u0321", maxIgnorable-1) + "\u0307\u0300",
774 want: "i" + strings.Repeat("\u0321", maxIgnorable-1) + "\u0307\u0300",
775 t: Lower(language.Lithuanian),
777 desc: "lt/lower/isLower",
778 src: "I" + strings.Repeat("\u0321", maxIgnorable) + "\u0300",
780 err: transform.ErrEndOfSpan,
781 t: Lower(language.Lithuanian),
783 desc: "lt/lower/not identical",
784 src: "aaaaa\u00cc", // U+00CC LATIN CAPITAL LETTER I GRAVE
785 err: transform.ErrEndOfSpan,
787 t: Lower(language.Lithuanian),
789 desc: "lt/lower/identical",
790 src: "aaaai\u0307\u0300", // U+00CC LATIN CAPITAL LETTER I GRAVE
791 want: "aaaai\u0307\u0300",
792 t: Lower(language.Lithuanian),
794 desc: "lt/upper/not implemented",
795 src: "I" + strings.Repeat("\u0321", maxIgnorable-1) + "\u0300",
797 err: transform.ErrEndOfSpan,
798 t: Upper(language.Lithuanian),
800 desc: "lt/upper/not implemented, ascii",
803 err: transform.ErrEndOfSpan,
804 t: Upper(language.Lithuanian),
806 desc: "nl/title/pre-IJ cutoff",
809 t: Title(language.Dutch),
811 desc: "nl/title/mid-IJ cutoff",
814 t: Title(language.Dutch),
816 desc: "af/title/apostrophe",
819 t: Title(language.Afrikaans),
821 desc: "af/title/apostrophe-incorrect",
823 // The Single_Quote (a MidWord), needs to be retained as unspanned so
824 // that a successive call to Transform can detect that N should not be
827 err: transform.ErrEndOfSpan,
828 t: Title(language.Afrikaans),
830 testtext.Run(t, tt.desc, func(t *testing.T) {
831 for p := 0; p < len(tt.want); p += utf8.RuneLen([]rune(tt.src[p:])[0]) {
833 n, err := tt.t.Span([]byte(tt.src[:p]), false)
834 if err != nil && err != transform.ErrShortSrc {
835 t.Errorf("early failure:Span(%+q): %v (%d < %d)", tt.src[:p], err, n, len(tt.want))
840 n, err := tt.t.Span([]byte(tt.src), tt.atEOF)
841 if n != len(tt.want) || err != tt.err {
842 t.Errorf("Span(%+q, %v): got %d, %v; want %d, %v", tt.src, tt.atEOF, n, err, len(tt.want), tt.err)
844 testHandover(t, tt.t, tt.src)
849 var txtASCII = strings.Repeat("The quick brown fox jumps over the lazy dog. ", 50)
851 // Taken from http://creativecommons.org/licenses/by-sa/3.0/vn/
852 const txt_vn = `Với các điều kiện sau: Ghi nhận công của tác giả. Nếu bạn sử
853 dụng, chuyển đổi, hoặc xây dựng dự án từ nội dung được chia sẻ này, bạn phải áp
854 dụng giấy phép này hoặc một giấy phép khác có các điều khoản tương tự như giấy
855 phép này cho dự án của bạn. Hiểu rằng: Miễn — Bất kỳ các điều kiện nào trên đây
856 cũng có thể được miễn bỏ nếu bạn được sự cho phép của người sở hữu bản quyền.
857 Phạm vi công chúng — Khi tác phẩm hoặc bất kỳ chương nào của tác phẩm đã trong
858 vùng dành cho công chúng theo quy định của pháp luật thì tình trạng của nó không
859 bị ảnh hưởng bởi giấy phép trong bất kỳ trường hợp nào.`
861 // http://creativecommons.org/licenses/by-sa/2.5/cn/
862 const txt_cn = `您可以自由: 复制、发行、展览、表演、放映、
863 广播或通过信息网络传播本作品 创作演绎作品
864 对本作品进行商业性使用 惟须遵守下列条件:
865 署名 — 您必须按照作者或者许可人指定的方式对作品进行署名。
866 相同方式共享 — 如果您改变、转换本作品或者以本作品为基础进行创作,
867 您只能采用与本协议相同的许可协议发布基于本作品的演绎作品。`
869 // Taken from http://creativecommons.org/licenses/by-sa/1.0/deed.ru
870 const txt_ru = `При обязательном соблюдении следующих условий: Attribution — Вы
871 должны атрибутировать произведение (указывать автора и источник) в порядке,
872 предусмотренном автором или лицензиаром (но только так, чтобы никоим образом не
873 подразумевалось, что они поддерживают вас или использование вами данного
874 произведения). Υπό τις ακόλουθες προϋποθέσεις:`
876 // Taken from http://creativecommons.org/licenses/by-sa/3.0/gr/
877 const txt_gr = `Αναφορά Δημιουργού — Θα πρέπει να κάνετε την αναφορά στο έργο με
878 τον τρόπο που έχει οριστεί από το δημιουργό ή το χορηγούντο την άδεια (χωρίς
879 όμως να εννοείται με οποιονδήποτε τρόπο ότι εγκρίνουν εσάς ή τη χρήση του έργου
880 από εσάς). Παρόμοια Διανομή — Εάν αλλοιώσετε, τροποποιήσετε ή δημιουργήσετε
881 περαιτέρω βασισμένοι στο έργο θα μπορείτε να διανέμετε το έργο που θα προκύψει
882 μόνο με την ίδια ή παρόμοια άδεια.`
884 const txtNonASCII = txt_vn + txt_cn + txt_ru + txt_gr
886 // TODO: Improve ASCII performance.
888 func BenchmarkCasers(b *testing.B) {
889 for _, s := range []struct{ name, text string }{
891 {"nonASCII", txtNonASCII},
894 src := []byte(s.text)
895 // Measure case mappings in bytes package for comparison.
896 for _, f := range []struct {
898 fn func(b []byte) []byte
900 {"lower", bytes.ToLower},
901 {"title", bytes.ToTitle},
902 {"upper", bytes.ToUpper},
904 testtext.Bench(b, path.Join(s.name, "bytes", f.name), func(b *testing.B) {
905 b.SetBytes(int64(len(src)))
906 for i := 0; i < b.N; i++ {
911 for _, t := range []struct {
913 caser transform.SpanningTransformer
915 {"fold/default", Fold()},
916 {"upper/default", Upper(language.Und)},
917 {"lower/sigma", Lower(language.Und)},
918 {"lower/simple", Lower(language.Und, HandleFinalSigma(false))},
919 {"title/sigma", Title(language.Und)},
920 {"title/simple", Title(language.Und, HandleFinalSigma(false))},
923 dst := make([]byte, len(src))
924 testtext.Bench(b, path.Join(s.name, t.name, "transform"), func(b *testing.B) {
925 b.SetBytes(int64(len(src)))
926 for i := 0; i < b.N; i++ {
928 c.Transform(dst, src, true)
931 // No need to check span for simple cases, as they will be the same
933 if strings.HasSuffix(t.name, "/simple") {
936 spanSrc := c.Bytes(src)
937 testtext.Bench(b, path.Join(s.name, t.name, "span"), func(b *testing.B) {
939 if n, _ := c.Span(spanSrc, true); n < len(spanSrc) {
940 b.Fatalf("spanner is not recognizing text %q as done (at %d)", spanSrc, n)
942 b.SetBytes(int64(len(spanSrc)))
943 for i := 0; i < b.N; i++ {
945 c.Span(spanSrc, true)