1 // Copyright 2013 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
11 "golang.org/x/text/internal/testtext"
14 func TestTagSize(t *testing.T) {
16 typ := reflect.TypeOf(id)
18 t.Errorf("size of Tag was %d; want 24", typ.Size())
22 func TestIsRoot(t *testing.T) {
25 t.Errorf("unspecified should be root.")
27 for i, tt := range parseTests() {
28 loc, _ := Parse(tt.in)
29 undef := tt.lang == "und" && tt.script == "" && tt.region == "" && tt.ext == ""
30 if loc.IsRoot() != undef {
31 t.Errorf("%d: was %v; want %v", i, loc.IsRoot(), undef)
36 func TestEquality(t *testing.T) {
37 for i, tt := range parseTests()[48:49] {
40 t1 := Make(tag.String())
42 t.Errorf("%d:%s: equality test 1 failed\n got: %#v\nwant: %#v)", i, s, t1, tag)
46 t.Errorf("%d:%s: equality test 2 failed\n got: %#v\nwant: %#v", i, s, t2, tag)
51 func TestMakeString(t *testing.T) {
52 tests := []struct{ in, out string }{
56 {"de-1901", "nl-1901"},
57 {"de-1901", "de-Arab-1901"},
58 {"x-a-b", "de-Arab-x-a-b"},
61 for i, tt := range tests {
63 mod, _ := Parse(tt.out)
65 for j := 0; j < 2; j++ {
67 if str := id.String(); str != tt.out {
68 t.Errorf("%d:%d: found %s; want %s", i, j, id.String(), tt.out)
71 // The bytes to string conversion as used in remakeString
72 // occasionally measures as more than one alloc, breaking this test.
73 // To alleviate this we set the number of runs to more than 1.
74 if n := testtext.AllocsPerRun(8, id.remakeString); n > 1 {
75 t.Errorf("%d: # allocs got %.1f; want <= 1", i, n)
80 func TestCompactIndex(t *testing.T) {
86 // TODO: these values will change with each CLDR update. This issue
87 // will be solved if we decide to fix the indexes.
89 {"ca-ES-valencia", 1, true},
90 {"ca-ES-valencia-u-va-posix", 0, false},
91 {"ca-ES-valencia-u-co-phonebk", 1, true},
92 {"ca-ES-valencia-u-co-phonebk-va-posix", 0, false},
93 {"x-klingon", 0, false},
95 {"en-US-u-va-posix", 2, true},
97 {"en-u-co-phonebk", 133, true},
98 {"en-001", 134, true},
99 {"sh", 0, false}, // We don't normalize.
101 for _, tt := range tests {
102 x, ok := CompactIndex(Raw.MustParse(tt.tag))
103 if x != tt.index || ok != tt.ok {
104 t.Errorf("%s: got %d, %v; want %d %v", tt.tag, x, ok, tt.index, tt.ok)
109 func TestBase(t *testing.T) {
115 {"x-abc", "und", No},
117 {"und-Cyrl", "ru", High},
118 // If a region is not included, the official language should be English.
119 {"und-US", "en", High},
120 // TODO: not-explicitly listed scripts should probably be und, No
121 // Modify addTags to return info on how the match was derived.
122 // {"und-Aghb", "und", No},
124 for i, tt := range tests {
125 loc, _ := Parse(tt.loc)
126 lang, conf := loc.Base()
127 if lang.String() != tt.lang {
128 t.Errorf("%d: language was %s; want %s", i, lang, tt.lang)
131 t.Errorf("%d: confidence was %d; want %d", i, conf, tt.conf)
136 func TestParseBase(t *testing.T) {
145 {"dut", "dut", true}, // bibliographic
146 {"aaj", "und", false}, // unknown
147 {"qaa", "qaa", true},
150 {"aaaa", "und", false},
152 for i, tt := range tests {
153 x, err := ParseBase(tt.in)
154 if x.String() != tt.out || err == nil != tt.ok {
155 t.Errorf("%d:%s: was %s, %v; want %s, %v", i, tt.in, x, err == nil, tt.out, tt.ok)
157 if y, _, _ := Raw.Make(tt.out).Raw(); x != y {
158 t.Errorf("%d:%s: tag was %s; want %s", i, tt.in, x, y)
163 func TestScript(t *testing.T) {
168 {"und", "Latn", Low},
169 {"en-Latn", "Latn", Exact},
170 {"en", "Latn", High},
172 {"kk", "Cyrl", High},
173 {"kk-CN", "Arab", Low},
174 {"cmn", "Hans", Low},
175 {"ru", "Cyrl", High},
176 {"ru-RU", "Cyrl", High},
177 {"yue", "Hant", Low},
178 {"x-abc", "Zzzz", Low},
179 {"und-zyyy", "Zyyy", Exact},
181 for i, tt := range tests {
182 loc, _ := Parse(tt.loc)
183 sc, conf := loc.Script()
184 if sc.String() != tt.scr {
185 t.Errorf("%d:%s: script was %s; want %s", i, tt.loc, sc, tt.scr)
188 t.Errorf("%d:%s: confidence was %d; want %d", i, tt.loc, conf, tt.conf)
193 func TestParseScript(t *testing.T) {
199 {"Latn", "Latn", true},
200 {"zzzz", "Zzzz", true},
201 {"zyyy", "Zyyy", true},
202 {"Latm", "Zzzz", false},
203 {"Zzz", "Zzzz", false},
205 {"Zzzxx", "Zzzz", false},
207 for i, tt := range tests {
208 x, err := ParseScript(tt.in)
209 if x.String() != tt.out || err == nil != tt.ok {
210 t.Errorf("%d:%s: was %s, %v; want %s, %v", i, tt.in, x, err == nil, tt.out, tt.ok)
213 if _, y, _ := Raw.Make("und-" + tt.out).Raw(); x != y {
214 t.Errorf("%d:%s: tag was %s; want %s", i, tt.in, x, y)
220 func TestRegion(t *testing.T) {
227 {"zh-Hant", "TW", Low},
228 {"en-US", "US", Exact},
232 {"x-abc", "ZZ", Low},
234 for i, tt := range tests {
235 loc, _ := Raw.Parse(tt.loc)
236 reg, conf := loc.Region()
237 if reg.String() != tt.reg {
238 t.Errorf("%d:%s: region was %s; want %s", i, tt.loc, reg, tt.reg)
241 t.Errorf("%d:%s: confidence was %d; want %d", i, tt.loc, conf, tt.conf)
246 func TestEncodeM49(t *testing.T) {
256 for i, tt := range tests {
257 if r, err := EncodeM49(tt.m49); r.String() != tt.code || err == nil != tt.ok {
258 t.Errorf("%d:%d: was %s, %v; want %s, %v", i, tt.m49, r, err == nil, tt.code, tt.ok)
261 for i := 1; i <= 1000; i++ {
262 if r, err := EncodeM49(i); err == nil && r.M49() == 0 {
263 t.Errorf("%d has no error, but maps to undefined region", i)
268 func TestParseRegion(t *testing.T) {
274 {"001", "001", true},
276 {"899", "ZZ", false},
281 {"CCCC", "ZZ", false},
284 for i, tt := range tests {
285 r, err := ParseRegion(tt.in)
286 if r.String() != tt.out || err == nil != tt.ok {
287 t.Errorf("%d:%s: was %s, %v; want %s, %v", i, tt.in, r, err == nil, tt.out, tt.ok)
290 if _, _, y := Raw.Make("und-" + tt.out).Raw(); r != y {
291 t.Errorf("%d:%s: tag was %s; want %s", i, tt.in, r, y)
297 func TestIsCountry(t *testing.T) {
315 for i, tt := range tests {
316 reg, _ := getRegionID([]byte(tt.reg))
318 if r.IsCountry() != tt.country {
319 t.Errorf("%d: IsCountry(%s) was %v; want %v", i, tt.reg, r.IsCountry(), tt.country)
324 func TestIsGroup(t *testing.T) {
342 for i, tt := range tests {
343 reg, _ := getRegionID([]byte(tt.reg))
345 if r.IsGroup() != tt.group {
346 t.Errorf("%d: IsGroup(%s) was %v; want %v", i, tt.reg, r.IsGroup(), tt.group)
351 func TestContains(t *testing.T) {
353 enclosing, contained string
356 // A region contains itself.
358 {"001", "001", true},
360 // Direct containment.
361 {"001", "002", true},
367 // Indirect containemnt.
369 {"001", "419", true},
370 {"001", "013", true},
373 {"US", "001", false},
374 {"155", "EU", false},
376 for i, tt := range tests {
377 enc, _ := getRegionID([]byte(tt.enclosing))
378 con, _ := getRegionID([]byte(tt.contained))
380 if got := r.Contains(Region{con}); got != tt.contains {
381 t.Errorf("%d: %s.Contains(%s) was %v; want %v", i, tt.enclosing, tt.contained, got, tt.contains)
386 func TestRegionCanonicalize(t *testing.T) {
387 for i, tt := range []struct{ in, out string }{
395 r := MustParseRegion(tt.in)
396 want := MustParseRegion(tt.out)
397 if got := r.Canonicalize(); got != want {
398 t.Errorf("%d: got %v; want %v", i, got, want)
403 func TestRegionTLD(t *testing.T) {
404 for _, tt := range []struct {
412 // In ccTLD before in ISO.
415 // Non-standard assignment of ccTLD to ISO code.
418 // Exceptionally reserved in ISO and valid ccTLD.
424 // Exceptionally reserved in ISO and invalid ccTLD.
432 // Transitionally reserved in ISO (e.g. deprecated) but valid ccTLD as
433 // it is still being phased out.
437 // Transitionally reserved in ISO (e.g. deprecated) and invalid ccTLD.
438 // Defined in package language as it has a mapping in CLDR.
444 // Not defined in package: SF.
446 // Indeterminately reserved in ISO.
447 // Defined in package language as it has a legacy mapping in CLDR.
451 // Not defined in package: EW, FL, JA, LF, PI, RA, RB, RC, RI, RL, RM,
452 // RN, RP, WG, WL, WV, and YV.
454 // Not assigned in ISO, but legacy definitions in CLDR.
458 // Normal mappings but somewhat special status in ccTLD.
464 // Have values when normalized, but not as is.
472 {"XK", "ZZ", false}, // Sometimes used for Kosovo, but invalid ccTLD.
478 r := MustParseRegion(tt.in)
481 want = MustParseRegion(tt.out)
484 if got := err == nil; got != tt.ok {
485 t.Errorf("error(%v): got %v; want %v", r, got, tt.ok)
488 t.Errorf("TLD(%v): got %v; want %v", r, tld, want)
493 func TestCanonicalize(t *testing.T) {
494 // TODO: do a full test using CLDR data in a separate regression test.
499 {"en-Latn", "en", SuppressScript},
500 {"sr-Cyrl", "sr-Cyrl", SuppressScript},
501 {"sh", "sr-Latn", Legacy},
502 {"sh-HR", "sr-Latn-HR", Legacy},
503 {"sh-Cyrl-HR", "sr-Cyrl-HR", Legacy},
504 {"tl", "fil", Legacy},
505 {"no", "no", Legacy},
506 {"no", "nb", Legacy | CLDR},
507 {"cmn", "cmn", Legacy},
508 {"cmn", "zh", Macro},
509 {"cmn-u-co-stroke", "zh-u-co-stroke", Macro},
510 {"yue", "yue", Macro},
512 {"nb", "nb", Macro | CLDR},
514 {"no", "no", Macro | CLDR},
515 {"iw", "he", DeprecatedBase},
516 {"iw", "he", Deprecated | CLDR},
517 {"mo", "ro-MD", Deprecated}, // Adopted by CLDR as of version 25.
518 {"alb", "sq", Legacy}, // bibliographic
519 {"dut", "nl", Legacy}, // bibliographic
520 // As of CLDR 25, mo is no longer considered a legacy mapping.
521 {"mo", "mo", Legacy | CLDR},
522 {"und-AN", "und-AN", Deprecated},
523 {"und-YD", "und-YE", DeprecatedRegion},
524 {"und-YD", "und-YD", DeprecatedBase},
525 {"und-Qaai", "und-Zinh", DeprecatedScript},
526 {"und-Qaai", "und-Qaai", DeprecatedBase},
527 {"drh", "mn", All}, // drh -> khk -> mn
529 for i, tt := range tests {
530 in, _ := Raw.Parse(tt.in)
531 in, _ = tt.option.Canonicalize(in)
532 if in.String() != tt.out {
533 t.Errorf("%d:%s: was %s; want %s", i, tt.in, in.String(), tt.out)
535 if int(in.pVariant) > int(in.pExt) || int(in.pExt) > len(in.str) {
536 t.Errorf("%d:%s:offsets %d <= %d <= %d must be true", i, tt.in, in.pVariant, in.pExt, len(in.str))
540 for _, base := range Supported.BaseLanguages() {
541 tag, _ := Raw.Compose(base)
542 got, _ := All.Canonicalize(tag)
543 want, _ := All.Canonicalize(got)
545 t.Errorf("idem(%s): got %s; want %s", tag, got, want)
550 func TestTypeForKey(t *testing.T) {
551 tests := []struct{ key, in, out string }{
553 {"co", "en-u-abc", ""},
554 {"co", "en-u-co-phonebk", "phonebk"},
555 {"co", "en-u-co-phonebk-cu-aud", "phonebk"},
556 {"co", "x-foo-u-co-phonebk", ""},
557 {"nu", "en-u-co-phonebk-nu-arabic", "arabic"},
558 {"kc", "cmn-u-co-stroke", ""},
560 for _, tt := range tests {
561 if v := Make(tt.in).TypeForKey(tt.key); v != tt.out {
562 t.Errorf("%q[%q]: was %q; want %q", tt.in, tt.key, v, tt.out)
567 func TestSetTypeForKey(t *testing.T) {
569 key, value, in, out string
572 // replace existing value
573 {"co", "pinyin", "en-u-co-phonebk", "en-u-co-pinyin", false},
574 {"co", "pinyin", "en-u-co-phonebk-cu-xau", "en-u-co-pinyin-cu-xau", false},
575 {"co", "pinyin", "en-u-co-phonebk-v-xx", "en-u-co-pinyin-v-xx", false},
576 {"co", "pinyin", "en-u-co-phonebk-x-x", "en-u-co-pinyin-x-x", false},
577 {"nu", "arabic", "en-u-co-phonebk-nu-vaai", "en-u-co-phonebk-nu-arabic", false},
578 // add to existing -u extension
579 {"co", "pinyin", "en-u-ca-gregory", "en-u-ca-gregory-co-pinyin", false},
580 {"co", "pinyin", "en-u-ca-gregory-nu-vaai", "en-u-ca-gregory-co-pinyin-nu-vaai", false},
581 {"co", "pinyin", "en-u-ca-gregory-v-va", "en-u-ca-gregory-co-pinyin-v-va", false},
582 {"co", "pinyin", "en-u-ca-gregory-x-a", "en-u-ca-gregory-co-pinyin-x-a", false},
583 {"ca", "gregory", "en-u-co-pinyin", "en-u-ca-gregory-co-pinyin", false},
585 {"co", "", "en-u-co-phonebk", "en", false},
586 {"co", "", "en-u-ca-gregory-co-phonebk", "en-u-ca-gregory", false},
587 {"co", "", "en-u-co-phonebk-nu-arabic", "en-u-nu-arabic", false},
588 {"co", "", "en", "en", false},
590 {"co", "pinyin", "en", "en-u-co-pinyin", false},
591 {"co", "pinyin", "und", "und-u-co-pinyin", false},
592 {"co", "pinyin", "en-a-aaa", "en-a-aaa-u-co-pinyin", false},
593 {"co", "pinyin", "en-x-aaa", "en-u-co-pinyin-x-aaa", false},
594 {"co", "pinyin", "en-v-aa", "en-u-co-pinyin-v-aa", false},
595 {"co", "pinyin", "en-a-aaa-x-x", "en-a-aaa-u-co-pinyin-x-x", false},
596 {"co", "pinyin", "en-a-aaa-v-va", "en-a-aaa-u-co-pinyin-v-va", false},
597 // error on invalid values
598 {"co", "pinyinxxx", "en", "en", true},
599 {"co", "piny.n", "en", "en", true},
600 {"co", "pinyinxxx", "en-a-aaa", "en-a-aaa", true},
601 {"co", "pinyinxxx", "en-u-aaa", "en-u-aaa", true},
602 {"co", "pinyinxxx", "en-u-aaa-co-pinyin", "en-u-aaa-co-pinyin", true},
603 {"co", "pinyi.", "en-u-aaa-co-pinyin", "en-u-aaa-co-pinyin", true},
604 {"col", "pinyin", "en", "en", true},
605 {"co", "cu", "en", "en", true},
606 // error when setting on a private use tag
607 {"co", "phonebook", "x-foo", "x-foo", true},
609 for i, tt := range tests {
611 if v, err := tag.SetTypeForKey(tt.key, tt.value); v.String() != tt.out {
612 t.Errorf("%d:%q[%q]=%q: was %q; want %q", i, tt.in, tt.key, tt.value, v, tt.out)
613 } else if (err != nil) != tt.err {
614 t.Errorf("%d:%q[%q]=%q: error was %v; want %v", i, tt.in, tt.key, tt.value, err != nil, tt.err)
615 } else if val := v.TypeForKey(tt.key); err == nil && val != tt.value {
616 t.Errorf("%d:%q[%q]==%q: was %v; want %v", i, tt.out, tt.key, tt.value, val, tt.value)
618 if len(tag.String()) <= 3 {
619 // Simulate a tag for which the string has not been set.
620 tag.str, tag.pExt, tag.pVariant = "", 0, 0
621 if tag, err := tag.SetTypeForKey(tt.key, tt.value); err == nil {
622 if val := tag.TypeForKey(tt.key); err == nil && val != tt.value {
623 t.Errorf("%d:%q[%q]==%q: was %v; want %v", i, tt.out, tt.key, tt.value, val, tt.value)
630 func TestFindKeyAndType(t *testing.T) {
631 // out is either the matched type in case of a match or the original
632 // string up till the insertion point.
638 // Don't search past a private use extension.
639 {"co", false, "en-x-foo-u-co-pinyin", "en"},
640 {"co", false, "x-foo-u-co-pinyin", ""},
641 {"co", false, "en-s-fff-x-foo", "en-s-fff"},
642 // Insertion points in absence of -u extension.
643 {"cu", false, "en", ""}, // t.str is ""
644 {"cu", false, "en-v-va", "en"},
645 {"cu", false, "en-a-va", "en-a-va"},
646 {"cu", false, "en-a-va-v-va", "en-a-va"},
647 {"cu", false, "en-x-a", "en"},
648 // Tags with the -u extension.
649 {"co", true, "en-u-co-standard", "standard"},
650 {"co", true, "yue-u-co-pinyin", "pinyin"},
651 {"co", true, "en-u-co-abc", "abc"},
652 {"co", true, "en-u-co-abc-def", "abc-def"},
653 {"co", true, "en-u-co-abc-def-x-foo", "abc-def"},
654 {"co", true, "en-u-co-standard-nu-arab", "standard"},
655 {"co", true, "yue-u-co-pinyin-nu-arab", "pinyin"},
657 {"cu", true, "en-u-co-standard", "en-u-co-standard"},
658 {"cu", true, "yue-u-co-pinyin-x-foo", "yue-u-co-pinyin"},
659 {"cu", true, "en-u-co-abc", "en-u-co-abc"},
660 {"cu", true, "en-u-nu-arabic", "en-u"},
661 {"cu", true, "en-u-co-abc-def-nu-arabic", "en-u-co-abc-def"},
663 for i, tt := range tests {
664 start, end, hasExt := Make(tt.in).findTypeForKey(tt.key)
666 res := tt.in[start:end]
668 t.Errorf("%d:%s: was %q; want %q", i, tt.in, res, tt.out)
671 if hasExt != tt.hasExt {
672 t.Errorf("%d:%s: hasExt was %v; want %v", i, tt.in, hasExt, tt.hasExt)
675 if tt.in[:start] != tt.out {
676 t.Errorf("%d:%s: insertion point was %q; want %q", i, tt.in, tt.in[:start], tt.out)
682 func TestParent(t *testing.T) {
683 tests := []struct{ in, out string }{
684 // Strip variants and extensions first
685 {"de-u-co-phonebk", "de"},
687 {"de-Latn-1994", "de"}, // remove superfluous script.
689 // Ensure the canonical Tag for an entry is in the chain for base-script
693 // Skip the script if it is the maximized version. CLDR files for the
694 // skipped tag are always empty.
695 {"zh-Hans-TW", "zh"},
696 {"zh-Hans-CN", "zh"},
698 // Insert the script if the maximized script is not the same as the
699 // maximized script of the base language.
700 {"zh-TW", "zh-Hant"},
701 {"zh-HK", "zh-Hant"},
702 {"zh-Hant-TW", "zh-Hant"},
703 {"zh-Hant-HK", "zh-Hant"},
705 // Non-default script skips to und.
722 // World english inherits from en-001.
723 {"en-150", "en-001"},
738 // Spanish in Latin-American countries have es-419 as parent.
759 // exceptions (according to CLDR)
762 // Inherit from pt-PT, instead of pt for these countries.
771 for _, tt := range tests {
772 tag := Raw.MustParse(tt.in)
773 if p := Raw.MustParse(tt.out); p != tag.Parent() {
774 t.Errorf("%s: was %v; want %v", tt.in, tag.Parent(), p)
780 // Tags without error that don't need to be changed.
781 benchBasic = []string{
805 // Tags with extensions, not changes required.
808 "x-aa-bbbb-cccccccc-d",
809 "en-x_cc-b-bbb-a-aaa",
810 "en-c_cc-b-bbb-a-aaa-x-x",
812 "en-Cyrl-u-co-phonebk",
813 "en-US-u-co-phonebk-cu-xau",
814 "en-nedix-u-co-phonebk",
819 // Change, but not memory allocation required.
820 benchSimpleChange = []string{
827 // Change and memory allocation required.
828 benchChangeAlloc = []string{
829 "en-c_cc-b-bbb-a-aaa",
830 "en-u-cu-xua-co-phonebk",
831 "en-u-cu-xua-co-phonebk-a-cd",
832 "en-u-def-abc-cu-xua-co-phonebk",
833 "en-t-en-Cyrl-NL-1994",
834 "en-t-en-Cyrl-NL-1994-t0-abc-def",
836 // Tags that result in errors.
840 "en-u-cu-co-phonebk",
848 benchChange = append(benchSimpleChange, benchChangeAlloc...)
849 benchAll = append(append(append(benchBasic, benchExt...), benchChange...), benchErr...)
852 func doParse(b *testing.B, tag []string) {
853 for i := 0; i < b.N; i++ {
854 // Use the modulo instead of looping over all tags so that we get a somewhat
856 Parse(tag[i%len(tag)])
860 func BenchmarkParse(b *testing.B) {
864 func BenchmarkParseBasic(b *testing.B) {
865 doParse(b, benchBasic)
868 func BenchmarkParseError(b *testing.B) {
872 func BenchmarkParseSimpleChange(b *testing.B) {
873 doParse(b, benchSimpleChange)
876 func BenchmarkParseChangeAlloc(b *testing.B) {
877 doParse(b, benchChangeAlloc)