1 // Copyright 2014 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
13 "golang.org/x/text/internal/testtext"
14 "golang.org/x/text/language"
17 // TODO: test that tables are properly dropped by the linker for various use
21 firstLang2aa = language.MustParseBase("aa")
22 lastLang2zu = language.MustParseBase("zu")
23 firstLang3ace = language.MustParseBase("ace")
24 lastLang3zza = language.MustParseBase("zza")
25 firstTagAr001 = language.MustParse("ar-001")
26 lastTagZhHant = language.MustParse("zh-Hant")
29 // TestValues tests that for all languages, regions, and scripts in Values, at
30 // least one language has a name defined for it by checking it exists in
31 // English, which is assumed to be the most comprehensive. It is also tested
32 // that a Namer returns "" for unsupported values.
33 func TestValues(t *testing.T) {
34 type testcase struct {
38 // checkDefined checks that a value exists in a Namer.
39 checkDefined := func(x interface{}, namers []testcase) {
40 for _, n := range namers {
41 testtext.Run(t, fmt.Sprintf("%s.Name(%s)", n.kind, x), func(t *testing.T) {
42 if n.n.Name(x) == "" {
43 // As of version 28 there is no data for az-Arab in English,
44 // although there is useful data in other languages.
45 if x.(fmt.Stringer).String() == "az-Arab" {
48 t.Errorf("supported but no result")
53 // checkUnsupported checks that a value does not exist in a Namer.
54 checkUnsupported := func(x interface{}, namers []testcase) {
55 for _, n := range namers {
56 if got := n.n.Name(x); got != "" {
57 t.Fatalf("%s.Name(%s): unsupported tag gave non-empty result: %q", n.kind, x, got)
62 tags := map[language.Tag]bool{}
64 {"Languages(en)", Languages(language.English)},
65 {"Tags(en)", Tags(language.English)},
66 {"English.Languages()", English.Languages()},
67 {"English.Tags()", English.Tags()},
69 for _, tag := range Values.Tags() {
70 checkDefined(tag, namers)
73 for _, base := range language.Supported.BaseLanguages() {
74 tag, _ := language.All.Compose(base)
76 checkUnsupported(tag, namers)
80 regions := map[language.Region]bool{}
82 {"Regions(en)", Regions(language.English)},
83 {"English.Regions()", English.Regions()},
85 for _, r := range Values.Regions() {
86 checkDefined(r, namers)
89 for _, r := range language.Supported.Regions() {
90 if r = r.Canonicalize(); !regions[r] {
91 checkUnsupported(r, namers)
95 scripts := map[language.Script]bool{}
97 {"Scripts(en)", Scripts(language.English)},
98 {"English.Scripts()", English.Scripts()},
100 for _, s := range Values.Scripts() {
101 checkDefined(s, namers)
104 for _, s := range language.Supported.Scripts() {
105 // Canonicalize the script.
106 tag, _ := language.DeprecatedScript.Compose(s)
107 if _, s, _ = tag.Raw(); !scripts[s] {
108 checkUnsupported(s, namers)
113 // TestSupported tests that we have at least some Namers for languages that we
114 // claim to support. To test the claims in the documentation, it also verifies
115 // that if a Namer is returned, it will have at least some data.
116 func TestSupported(t *testing.T) {
117 supportedTags := Supported.Tags()
118 if len(supportedTags) != numSupported {
119 t.Errorf("number of supported was %d; want %d", len(supportedTags), numSupported)
122 namerFuncs := []struct {
124 fn func(language.Tag) Namer
127 {"Languages", Languages},
128 {"Regions", Regions},
129 {"Scripts", Scripts},
132 // Verify that we have at least one Namer for all tags we claim to support.
133 tags := make(map[language.Tag]bool)
134 for _, tag := range supportedTags {
135 // Test we have at least one Namer for this supported Tag.
137 for _, kind := range namerFuncs {
138 if defined(t, kind.kind, kind.fn(tag), tag) {
143 t.Errorf("%s: supported, but no data available", tag)
146 t.Errorf("%s: included in Supported.Tags more than once", tag)
151 // Verify that we have no Namers for tags we don't claim to support.
152 for _, base := range language.Supported.BaseLanguages() {
153 tag, _ := language.All.Compose(base)
154 // Skip tags that are supported after matching.
155 if _, _, conf := matcher.Match(tag); conf != language.No {
158 // Test there are no Namers for this tag.
159 for _, kind := range namerFuncs {
160 if defined(t, kind.kind, kind.fn(tag), tag) {
161 t.Errorf("%[1]s(%[2]s) returns a Namer, but %[2]s is not in the set of supported Tags.", kind.kind, tag)
167 // defined reports whether n is a proper Namer, which means it is non-nil and
168 // must have at least one non-empty value.
169 func defined(t *testing.T, kind string, n Namer, tag language.Tag) bool {
175 for _, t := range Values.Tags() {
181 for _, t := range Values.BaseLanguages() {
187 for _, t := range Values.Regions() {
193 for _, t := range Values.Scripts() {
199 t.Errorf("%s(%s) returns non-nil Namer without content", kind, tag)
203 func TestCoverage(t *testing.T) {
204 en := language.English
209 {Languages(en), Values.Tags()},
210 {Scripts(en), Values.Scripts()},
211 {Regions(en), Values.Regions()},
213 for i, tt := range tests {
214 uniq := make(map[string]interface{})
216 v := reflect.ValueOf(tt.x)
217 for j := 0; j < v.Len(); j++ {
218 x := v.Index(j).Interface()
219 // As of version 28 there is no data for az-Arab in English,
220 // although there is useful data in other languages.
221 if x.(fmt.Stringer).String() == "az-Arab" {
226 t.Errorf("%d:%d:%s: missing content", i, j, x)
227 } else if uniq[s] != nil {
228 t.Errorf("%d:%d:%s: identical return value %q for %v and %v", i, j, x, s, x, uniq[s])
235 // TestUpdate tests whether dictionary entries for certain languages need to be
236 // updated. For some languages, some of the headers may be empty or they may be
237 // identical to the parent. This code detects if such entries need to be updated
238 // after a table update.
239 func TestUpdate(t *testing.T) {
244 {ModernStandardArabic, "ar-001"},
245 {AmericanEnglish, "en-US"},
246 {EuropeanSpanish, "es-ES"},
247 {BrazilianPortuguese, "pt-BR"},
248 {SimplifiedChinese, "zh-Hans"},
251 for _, tt := range tests {
252 _, i, _ := matcher.Match(language.MustParse(tt.tag))
253 if !reflect.DeepEqual(tt.d.lang, langHeaders[i]) {
254 t.Errorf("%s: lang table update needed", tt.tag)
256 if !reflect.DeepEqual(tt.d.script, scriptHeaders[i]) {
257 t.Errorf("%s: script table update needed", tt.tag)
259 if !reflect.DeepEqual(tt.d.region, regionHeaders[i]) {
260 t.Errorf("%s: region table update needed", tt.tag)
265 func TestIndex(t *testing.T) {
266 notIn := []string{"aa", "xx", "zz", "aaa", "xxx", "zzz", "Aaaa", "Xxxx", "Zzzz"}
299 for i, tt := range tests {
300 // Create the test set from the tagIndex.
302 for sz := 2; sz <= 4; sz++ {
304 for j := 0; j < len(a); j += sz {
306 if idx := tt.index(s); idx != cnt {
307 t.Errorf("%d:%s: index was %d; want %d", i, s, idx, cnt)
312 if n := tt.len(); n != cnt {
313 t.Errorf("%d: len was %d; want %d", i, n, cnt)
315 for _, x := range notIn {
316 if idx := tt.index(x); idx != -1 {
317 t.Errorf("%d:%s: index was %d; want -1", i, x, idx)
323 func TestTag(t *testing.T) {
329 {"agq", "sr", ""}, // sr is in Value.Languages(), but is not supported by agq.
330 {"nl", "nl", "Nederlands"},
331 // CLDR 30 dropped Vlaams as the word for nl-BE. It is still called
332 // Flemish in English, though. TODO: check if this is a CLDR bug.
333 // {"nl", "nl-BE", "Vlaams"},
334 {"nl", "nl-BE", "Nederlands (België)"},
335 {"nl", "vls", "West-Vlaams"},
336 {"en", "nl-BE", "Flemish"},
337 {"en", "en", "English"},
338 {"en", "en-GB", "British English"},
339 {"en", "en-US", "American English"}, // American English in CLDR 24+
340 {"ru", "ru", "русский"},
341 {"ru", "ru-RU", "русский (Россия)"},
342 {"ru", "ru-Cyrl", "русский (кириллица)"},
343 {"en", lastLang2zu.String(), "Zulu"},
344 {"en", firstLang2aa.String(), "Afar"},
345 {"en", lastLang3zza.String(), "Zaza"},
346 {"en", firstLang3ace.String(), "Achinese"},
347 {"en", firstTagAr001.String(), "Modern Standard Arabic"},
348 {"en", lastTagZhHant.String(), "Traditional Chinese"},
351 // If full tag doesn't match, try without script or region.
352 {"en", "aa-Hans", "Afar (Simplified Han)"},
353 {"en", "af-Arab", "Afrikaans (Arabic)"},
354 {"en", "zu-Cyrl", "Zulu (Cyrillic)"},
355 {"en", "aa-GB", "Afar (United Kingdom)"},
356 {"en", "af-NA", "Afrikaans (Namibia)"},
357 {"en", "zu-BR", "Zulu (Brazil)"},
358 // Correct inheritance and language selection.
359 {"zh", "zh-TW", "中文 (台湾)"},
360 {"zh", "zh-Hant-TW", "繁体中文 (台湾)"},
361 {"zh-Hant", "zh-TW", "中文 (台灣)"},
362 {"zh-Hant", "zh-Hant-TW", "繁體中文 (台灣)"},
363 // Some rather arbitrary interpretations for Serbian. This is arguably
364 // correct and consistent with the way zh-[Hant-]TW is handled. It will
365 // also give results more in line with the expectations if users
366 // explicitly use "sh".
367 {"sr-Latn", "sr-ME", "srpski (Crna Gora)"},
368 {"sr-Latn", "sr-Latn-ME", "srpskohrvatski (Crna Gora)"},
369 // Double script and region
370 {"nl", "en-Cyrl-BE", "Engels (Cyrillisch, België)"},
371 // Canonical equivalents.
372 {"ro", "ro-MD", "moldovenească"},
373 {"ro", "mo", "moldovenească"},
375 for i, tt := range tests {
376 d := Tags(language.MustParse(tt.dict))
377 if n := d.Name(language.Raw.MustParse(tt.tag)); n != tt.name {
378 // There are inconsistencies w.r.t. capitalization in the tests
379 // due to CLDR's update procedure which treats modern and other
380 // languages differently.
381 // See http://unicode.org/cldr/trac/ticket/8051.
382 // TODO: use language capitalization to sanitize the strings.
383 t.Errorf("%d:%s:%s: was %q; want %q", i, tt.dict, tt.tag, n, tt.name)
388 func TestLanguage(t *testing.T) {
394 {"agq", "sr", ""}, // sr is in Value.Languages(), but is not supported by agq.
395 // CLDR 30 dropped Vlaams as the word for nl-BE. It is still called
396 // Flemish in English, though. TODO: this is probably incorrect.
397 // West-Vlaams (vls) is not Vlaams. West-Vlaams could be considered its
398 // own language, whereas Vlaams is generally Dutch. So expect to have
399 // to change these tests back.
400 {"nl", "nl", "Nederlands"},
401 {"nl", "vls", "West-Vlaams"},
402 {"nl", "nl-BE", "Nederlands"},
403 {"en", "pt", "Portuguese"},
404 {"en", "pt-PT", "European Portuguese"},
405 {"en", "pt-BR", "Brazilian Portuguese"},
406 {"en", "en", "English"},
407 {"en", "en-GB", "British English"},
408 {"en", "en-US", "American English"}, // American English in CLDR 24+
409 {"en", lastLang2zu.String(), "Zulu"},
410 {"en", firstLang2aa.String(), "Afar"},
411 {"en", lastLang3zza.String(), "Zaza"},
412 {"en", firstLang3ace.String(), "Achinese"},
413 {"en", firstTagAr001.String(), "Modern Standard Arabic"},
414 {"en", lastTagZhHant.String(), "Traditional Chinese"},
417 // If full tag doesn't match, try without script or region.
418 {"en", "aa-Hans", "Afar"},
419 {"en", "af-Arab", "Afrikaans"},
420 {"en", "zu-Cyrl", "Zulu"},
421 {"en", "aa-GB", "Afar"},
422 {"en", "af-NA", "Afrikaans"},
423 {"en", "zu-BR", "Zulu"},
424 {"agq", "zh-Hant", ""},
425 // Canonical equivalents.
426 {"ro", "ro-MD", "moldovenească"},
427 {"ro", "mo", "moldovenească"},
428 {"en", "sh", "Serbo-Croatian"},
429 {"en", "sr-Latn", "Serbo-Croatian"},
430 {"en", "sr", "Serbian"},
431 {"en", "sr-ME", "Serbian"},
432 {"en", "sr-Latn-ME", "Serbo-Croatian"}, // See comments in TestTag.
434 for i, tt := range tests {
435 testtext.Run(t, tt.dict+"/"+tt.tag, func(t *testing.T) {
436 d := Languages(language.Raw.MustParse(tt.dict))
437 if n := d.Name(language.Raw.MustParse(tt.tag)); n != tt.name {
438 t.Errorf("%d:%s:%s: was %q; want %q", i, tt.dict, tt.tag, n, tt.name)
440 if len(tt.tag) <= 3 {
441 if n := d.Name(language.MustParseBase(tt.tag)); n != tt.name {
442 t.Errorf("%d:%s:base(%s): was %q; want %q", i, tt.dict, tt.tag, n, tt.name)
449 func TestScript(t *testing.T) {
455 {"nl", "Arab", "Arabisch"},
456 {"en", "Arab", "Arabic"},
457 {"en", "Zzzz", "Unknown Script"},
458 {"zh-Hant", "Hang", "韓文字"},
459 {"zh-Hant-HK", "Hang", "韓文字"},
460 {"zh", "Arab", "阿拉伯文"},
461 {"zh-Hans-HK", "Arab", "阿拉伯文"}, // same as zh
462 {"zh-Hant", "Arab", "阿拉伯文"},
463 {"zh-Hant-HK", "Arab", "阿拉伯文"}, // same as zh
464 // Canonicalized form
465 {"en", "Qaai", "Inherited"}, // deprecated script, now is Zinh
466 {"en", "sh", "Unknown Script"}, // sh canonicalizes to sr-Latn
467 {"en", "en", "Unknown Script"},
468 // Don't introduce scripts with canonicalization.
469 {"en", "sh", "Unknown Script"}, // sh canonicalizes to sr-Latn
471 for i, tt := range tests {
472 d := Scripts(language.MustParse(tt.dict))
474 if unicode.IsUpper(rune(tt.scr[0])) {
475 x = language.MustParseScript(tt.scr)
476 tag, _ := language.Raw.Compose(x)
477 if n := d.Name(tag); n != tt.name {
478 t.Errorf("%d:%s:%s: was %q; want %q", i, tt.dict, tt.scr, n, tt.name)
481 x = language.Raw.MustParse(tt.scr)
483 if n := d.Name(x); n != tt.name {
484 t.Errorf("%d:%s:%s: was %q; want %q", i, tt.dict, tt.scr, n, tt.name)
489 func TestRegion(t *testing.T) {
495 {"nl", "NL", "Nederland"},
496 {"en", "US", "United States"},
497 {"en", "ZZ", "Unknown Region"},
498 {"en", "UM", "U.S. Outlying Islands"},
499 {"en-GB", "UM", "U.S. Outlying Islands"},
500 {"en-GB", "NL", "Netherlands"},
501 // Canonical equivalents
502 {"en", "UK", "United Kingdom"},
504 {"en", "pt", "Unknown Region"},
505 {"en", "und", "Unknown Region"},
506 // Don't introduce regions with canonicalization.
507 {"en", "mo", "Unknown Region"},
509 for i, tt := range tests {
510 d := Regions(language.MustParse(tt.dict))
512 if unicode.IsUpper(rune(tt.reg[0])) {
514 x = language.MustParseRegion(tt.reg)
515 tag, _ := language.Raw.Compose(x)
516 if n := d.Name(tag); n != tt.name {
517 t.Errorf("%d:%s:%s: was %q; want %q", i, tt.dict, tt.reg, n, tt.name)
521 x = language.Raw.MustParse(tt.reg)
523 if n := d.Name(x); n != tt.name {
524 t.Errorf("%d:%s:%s: was %q; want %q", i, tt.dict, tt.reg, n, tt.name)
529 func TestSelf(t *testing.T) {
534 {"nl", "Nederlands"},
535 // CLDR 30 dropped Vlaams as the word for nl-BE. It is still called
536 // Flemish in English, though. TODO: check if this is a CLDR bug.
537 // {"nl-BE", "Vlaams"},
538 {"nl-BE", "Nederlands"},
539 {"en-GB", "British English"},
540 {lastLang2zu.String(), "isiZulu"},
541 {firstLang2aa.String(), ""}, // not defined
542 {lastLang3zza.String(), ""}, // not defined
543 {firstLang3ace.String(), ""}, // not defined
544 {firstTagAr001.String(), "العربية الرسمية الحديثة"},
546 {lastTagZhHant.String(), "繁體中文"},
549 // Drop entries that are not in the requested script, even if there is
550 // an entry for the language.
554 // Append the country name in the language of the matching language.
555 {"af-NA", "Afrikaans"},
557 // zh-TW should match zh-Hant instead of zh!
561 {"zh-Hant-TW", "繁體中文"},
562 {"zh-Hans-TW", "简体中文"},
563 // Take the entry for sr which has the matching script.
564 // TODO: Capitalization changed as of CLDR 26, but change seems
565 // arbitrary. Revisit capitalization with revision 27. See
566 // http://unicode.org/cldr/trac/ticket/8051.
568 // TODO: sr-ME should show up as Serbian or Montenegrin, not Serbo-
569 // Croatian. This is an artifact of the current algorithm, which is the
570 // way it is to have the preferred behavior for other languages such as
571 // Chinese. We can hardwire this case in the table generator or package
572 // code, but we first check if CLDR can be updated.
573 // {"sr-ME", "Srpski"}, // Is Srpskohrvatski
574 {"sr-Latn-ME", "srpskohrvatski"},
575 {"sr-Cyrl-ME", "српски"},
577 // Canonical equivalents.
578 {"ro-MD", "moldovenească"},
579 {"mo", "moldovenească"},
580 // NOTE: kk is defined, but in Cyrillic script. For China, Arab is the
581 // dominant script. We do not have data for kk-Arab and we chose to not
582 // fall back in such cases.
585 for i, tt := range tests {
587 if n := d.Name(language.Raw.MustParse(tt.tag)); n != tt.name {
588 t.Errorf("%d:%s: was %q; want %q", i, tt.tag, n, tt.name)
593 func TestDictionaryLang(t *testing.T) {
599 {English, "en", "English"},
600 {Portuguese, "af", "africâner"},
601 {EuropeanPortuguese, "af", "africanês"},
602 {English, "nl-BE", "Flemish"},
604 for i, test := range tests {
605 tag := language.MustParse(test.tag)
606 if got := test.d.Tags().Name(tag); got != test.name {
607 t.Errorf("%d:%v: got %s; want %s", i, tag, got, test.name)
609 if base, _ := language.Compose(tag.Base()); base == tag {
610 if got := test.d.Languages().Name(base); got != test.name {
611 t.Errorf("%d:%v: got %s; want %s", i, tag, got, test.name)
617 func TestDictionaryRegion(t *testing.T) {
623 {English, "FR", "France"},
624 {Portuguese, "009", "Oceania"},
625 {EuropeanPortuguese, "009", "Oceânia"},
627 for i, test := range tests {
628 tag := language.MustParseRegion(test.region)
629 if got := test.d.Regions().Name(tag); got != test.name {
630 t.Errorf("%d:%v: got %s; want %s", i, tag, got, test.name)
635 func TestDictionaryScript(t *testing.T) {
641 {English, "Cyrl", "Cyrillic"},
642 {Portuguese, "Gujr", "gujerati"},
643 {EuropeanPortuguese, "Gujr", "guzerate"},
645 for i, test := range tests {
646 tag := language.MustParseScript(test.script)
647 if got := test.d.Scripts().Name(tag); got != test.name {
648 t.Errorf("%d:%v: got %s; want %s", i, tag, got, test.name)