1 // Copyright 2013 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
7 // Language tag table generator.
8 // Data read from the web.
26 "golang.org/x/text/internal/gen"
27 "golang.org/x/text/internal/tag"
28 "golang.org/x/text/unicode/cldr"
32 test = flag.Bool("test",
34 "test existing tables; can be used to compare web data with package data.")
35 outputFile = flag.String("output",
37 "output file for generated tables")
40 var comment = []string{
42 lang holds an alphabetically sorted list of ISO-639 language identifiers.
43 All entries are 4 bytes. The index of the identifier (divided by 4) is the language tag.
44 For 2-byte language identifiers, the two successive bytes have the following meaning:
45 - if the first letter of the 2- and 3-letter ISO codes are the same:
46 the second and third letter of the 3-letter ISO code.
47 - otherwise: a 0 and a by 2 bits right-shifted index into altLangISO3.
48 For 3-byte language identifiers the 4th byte is 0.`,
50 langNoIndex is a bit vector of all 3-letter language codes that are not used as an index
51 in lookup tables. The language ids for these language codes are derived directly
52 from the letters and are not consecutive.`,
54 altLangISO3 holds an alphabetically sorted list of 3-letter language code alternatives
55 to 2-letter language codes that cannot be derived using the method described above.
56 Each 3-letter code is followed by its 1-byte langID.`,
58 altLangIndex is used to convert indexes in altLangISO3 to langIDs.`,
60 langAliasMap maps langIDs to their suggested replacements.`,
62 script is an alphabetically sorted list of ISO 15924 codes. The index
63 of the script in the string, divided by 4, is the internal scriptID.`,
65 isoRegionOffset needs to be added to the index of regionISO to obtain the regionID
66 for 2-letter ISO codes. (The first isoRegionOffset regionIDs are reserved for
67 the UN.M49 codes used for groups.)`,
69 regionISO holds a list of alphabetically sorted 2-letter ISO region codes.
70 Each 2-letter codes is followed by two bytes with the following meaning:
71 - [A-Z}{2}: the first letter of the 2-letter code plus these two
72 letters form the 3-letter ISO code.
73 - 0, n: index into altRegionISO3.`,
75 regionTypes defines the status of a region for various standards.`,
77 m49 maps regionIDs to UN.M49 codes. The first isoRegionOffset entries are
78 codes indicating collections of regions.`,
80 m49Index gives indexes into fromM49 based on the three most significant bits
81 of a 10-bit UN.M49 code. To search an UN.M49 code in fromM49, search in
82 fromM49[m49Index[msb39(code)]:m49Index[msb3(code)+1]]
83 for an entry where the first 7 bits match the 7 lsb of the UN.M49 code.
84 The region code is stored in the 9 lsb of the indexed value.`,
86 fromM49 contains entries to map UN.M49 codes to regions. See m49Index for details.`,
88 altRegionISO3 holds a list of 3-letter region codes that cannot be
89 mapped to 2-letter codes using the default algorithm. This is a short list.`,
91 altRegionIDs holds a list of regionIDs the positions of which match those
92 of the 3-letter ISO codes in altRegionISO3.`,
94 variantNumSpecialized is the number of specialized variants in variants.`,
96 suppressScript is an index from langID to the dominant script for that language,
97 if it exists. If a script is given, it should be suppressed from the language tag.`,
99 likelyLang is a lookup table, indexed by langID, for the most likely
100 scripts and regions given incomplete information. If more entries exist for a
101 given language, region and script are the index and size respectively
102 of the list in likelyLangList.`,
104 likelyLangList holds lists info associated with likelyLang.`,
106 likelyRegion is a lookup table, indexed by regionID, for the most likely
107 languages and scripts given incomplete information. If more entries exist
108 for a given regionID, lang and script are the index and size respectively
109 of the list in likelyRegionList.
110 TODO: exclude containers and user-definable regions from the list.`,
112 likelyRegionList holds lists info associated with likelyRegion.`,
114 likelyScript is a lookup table, indexed by scriptID, for the most likely
115 languages and regions given a script.`,
117 matchLang holds pairs of langIDs of base languages that are typically
118 mutually intelligible. Each pair is associated with a confidence and
119 whether the intelligibility goes one or both ways.`,
121 matchScript holds pairs of scriptIDs where readers of one script
122 can typically also read the other. Each is associated with a confidence.`,
124 nRegionGroups is the number of region groups.`,
126 regionInclusion maps region identifiers to sets of regions in regionInclusionBits,
127 where each set holds all groupings that are directly connected in a region
130 regionInclusionBits is an array of bit vectors where every vector represents
131 a set of region groupings. These sets are used to compute the distance
132 between two regions for the purpose of language matching.`,
134 regionInclusionNext marks, for each entry in regionInclusionBits, the set of
135 all groups that are reachable from the groups set in the respective entry.`,
138 // TODO: consider changing some of these structures to tries. This can reduce
139 // memory, but may increase the need for memory allocations. This could be
140 // mitigated if we can piggyback on language tags for common cases.
142 func failOnError(e error) {
151 Indexed setType = 1 + iota // all elements must be of same size
155 type stringSet struct {
159 // We often need to update values after the creation of an index is completed.
160 // We include a convenience map for keeping track of this.
161 update map[string]string
162 typ setType // used for checking.
165 func (ss *stringSet) clone() stringSet {
167 c.s = append([]string(nil), c.s...)
171 func (ss *stringSet) setType(t setType) {
172 if ss.typ != t && ss.typ != 0 {
173 log.Panicf("type %d cannot be assigned as it was already %d", t, ss.typ)
177 // parse parses a whitespace-separated string and initializes ss with its
179 func (ss *stringSet) parse(s string) {
180 scan := bufio.NewScanner(strings.NewReader(s))
181 scan.Split(bufio.ScanWords)
187 func (ss *stringSet) assertChangeable() {
189 log.Panic("attempt to modify a frozen stringSet")
193 func (ss *stringSet) add(s string) {
194 ss.assertChangeable()
195 ss.s = append(ss.s, s)
196 ss.sorted = ss.frozen
199 func (ss *stringSet) freeze() {
204 func (ss *stringSet) compact() {
211 for i := 1; i < len(a); i++ {
218 ss.sorted = ss.frozen
221 type funcSorter struct {
222 fn func(a, b string) bool
226 func (s funcSorter) Less(i, j int) bool {
227 return s.fn(s.StringSlice[i], s.StringSlice[j])
230 func (ss *stringSet) sortFunc(f func(a, b string) bool) {
232 sort.Sort(funcSorter{f, sort.StringSlice(ss.s)})
235 func (ss *stringSet) remove(s string) {
236 ss.assertChangeable()
237 if i, ok := ss.find(s); ok {
238 copy(ss.s[i:], ss.s[i+1:])
239 ss.s = ss.s[:len(ss.s)-1]
243 func (ss *stringSet) replace(ol, nu string) {
244 ss.s[ss.index(ol)] = nu
245 ss.sorted = ss.frozen
248 func (ss *stringSet) index(s string) int {
253 log.Panicf("find: item %q is not in list. Closest match is %q.", s, ss.s[i])
255 log.Panicf("find: item %q is not in list", s)
261 func (ss *stringSet) find(s string) (int, bool) {
263 i := sort.SearchStrings(ss.s, s)
264 return i, i != len(ss.s) && ss.s[i] == s
267 func (ss *stringSet) slice() []string {
272 func (ss *stringSet) updateLater(v, key string) {
273 if ss.update == nil {
274 ss.update = map[string]string{}
279 // join joins the string and ensures that all entries are of the same length.
280 func (ss *stringSet) join() string {
283 for _, s := range ss.s {
285 log.Panicf("join: not all entries are of the same length: %q", s)
288 ss.s = append(ss.s, strings.Repeat("\xff", n))
289 return strings.Join(ss.s, "")
292 // ianaEntry holds information for an entry in the IANA Language Subtag Repository.
293 // All types use the same entry.
294 // See http://tools.ietf.org/html/bcp47#section-5.1 for a description of the various
296 type ianaEntry struct {
303 suppressScript string
308 type builder struct {
310 hw io.Writer // MultiWriter for w and w.Hash
312 supp *cldr.SupplementalData
315 locale stringSet // common locales
316 lang stringSet // canonical language ids (2 or 3 letter ISO codes) with data
317 langNoIndex stringSet // 3-letter ISO codes with no associated data
318 script stringSet // 4-letter ISO codes
319 region stringSet // 2-letter ISO or 3-digit UN M49 codes
320 variant stringSet // 4-8-alphanumeric variant code.
322 // Region codes that are groups with their corresponding group IDs.
326 registry map[string]*ianaEntry
331 func newBuilder(w *gen.CodeWriter) *builder {
332 r := gen.OpenCLDRCoreZip()
335 data, err := d.DecodeZip(r)
339 hw: io.MultiWriter(w, w.Hash),
341 supp: data.Supplemental(),
347 func (b *builder) parseRegistry() {
348 r := gen.OpenIANAFile("assignments/language-subtag-registry")
350 b.registry = make(map[string]*ianaEntry)
352 scan := bufio.NewScanner(r)
353 scan.Split(bufio.ScanWords)
354 var record *ianaEntry
355 for more := scan.Scan(); more; {
361 record = &ianaEntry{typ: value}
362 case "Subtag:", "Tag:":
363 if s := strings.SplitN(value, "..", 2); len(s) > 1 {
364 for a := s[0]; a <= s[1]; a = inc(a) {
365 b.addToRegistry(a, record)
368 b.addToRegistry(value, record)
370 case "Suppress-Script:":
371 record.suppressScript = value
375 record.deprecated = value
376 case "Macrolanguage:":
378 case "Preferred-Value:":
379 record.preferred = value
381 record.prefix = append(record.prefix, value)
386 for more = scan.Scan(); more; more = scan.Scan() {
388 if b[0] == '%' || b[len(b)-1] == ':' {
391 buf = append(buf, ' ')
392 buf = append(buf, b...)
394 record.description = append(record.description, string(buf))
401 if scan.Err() != nil {
402 log.Panic(scan.Err())
406 func (b *builder) addToRegistry(key string, entry *ianaEntry) {
407 if info, ok := b.registry[key]; ok {
408 if info.typ != "language" || entry.typ != "extlang" {
409 log.Fatalf("parseRegistry: tag %q already exists", key)
412 b.registry[key] = entry
416 var commentIndex = make(map[string]string)
419 for _, s := range comment {
420 key := strings.TrimSpace(strings.SplitN(s, " ", 2)[0])
421 commentIndex[key] = s
425 func (b *builder) comment(name string) {
426 if s := commentIndex[name]; len(s) > 0 {
433 func (b *builder) pf(f string, x ...interface{}) {
434 fmt.Fprintf(b.hw, f, x...)
435 fmt.Fprint(b.hw, "\n")
438 func (b *builder) p(x ...interface{}) {
439 fmt.Fprintln(b.hw, x...)
442 func (b *builder) addSize(s int) {
444 b.pf("// Size: %d bytes", s)
447 func (b *builder) writeConst(name string, x interface{}) {
449 b.w.WriteConst(name, x)
452 // writeConsts computes f(v) for all v in values and writes the results
453 // as constants named _v to a single constant block.
454 func (b *builder) writeConsts(f func(string) int, values ...string) {
456 for _, v := range values {
457 b.pf("\t_%s = %v", v, f(v))
462 // writeType writes the type of the given value, which must be a struct.
463 func (b *builder) writeType(value interface{}) {
464 b.comment(reflect.TypeOf(value).Name())
468 func (b *builder) writeSlice(name string, ss interface{}) {
469 b.writeSliceAddSize(name, 0, ss)
472 func (b *builder) writeSliceAddSize(name string, extraSize int, ss interface{}) {
474 b.w.Size += extraSize
475 v := reflect.ValueOf(ss)
477 b.pf("// Size: %d bytes, %d elements", v.Len()*int(t.Size())+extraSize, v.Len())
479 fmt.Fprintf(b.w, "var %s = ", name)
488 func (b *builder) writeSortedMap(name string, ss *stringSet, index func(s string) uint16) {
489 ss.sortFunc(func(a, b string) bool {
490 return index(a) < index(b)
493 for _, s := range ss.s {
494 m = append(m, fromTo{index(s), index(ss.update[s])})
496 b.writeSlice(name, m)
499 const base = 'z' - 'a' + 1
501 func strToInt(s string) uint {
503 for i := 0; i < len(s); i++ {
505 v += uint(s[i] - 'a')
510 // converts the given integer to the original ASCII string passed to strToInt.
511 // len(s) must match the number of characters obtained.
512 func intToStr(v uint, s []byte) {
513 for i := len(s) - 1; i >= 0; i-- {
514 s[i] = byte(v%base) + 'a'
519 func (b *builder) writeBitVector(name string, ss []string) {
520 vec := make([]uint8, int(math.Ceil(math.Pow(base, float64(len(ss[0])))/8)))
521 for _, s := range ss {
523 vec[v/8] |= 1 << (v % 8)
525 b.writeSlice(name, vec)
528 // TODO: convert this type into a list or two-stage trie.
529 func (b *builder) writeMapFunc(name string, m map[string]string, f func(string) uint16) {
531 v := reflect.ValueOf(m)
532 sz := v.Len() * (2 + int(v.Type().Key().Size()))
533 for _, k := range m {
538 b.pf(`var %s = map[string]uint16{`, name)
540 keys = append(keys, k)
543 for _, k := range keys {
544 b.pf("\t%q: %v,", k, f(m[k]))
549 func (b *builder) writeMap(name string, m interface{}) {
551 v := reflect.ValueOf(m)
552 sz := v.Len() * (2 + int(v.Type().Key().Size()) + int(v.Type().Elem().Size()))
554 f := strings.FieldsFunc(fmt.Sprintf("%#v", m), func(r rune) bool {
555 return strings.IndexRune("{}, ", r) != -1
558 b.pf(`var %s = %s{`, name, f[0])
559 for _, kv := range f[1:] {
565 func (b *builder) langIndex(s string) uint16 {
569 if i, ok := b.lang.find(s); ok {
572 return uint16(strToInt(s)) + uint16(len(b.lang.s))
575 // inc advances the string to its lexicographical successor.
576 func inc(s string) string {
577 const maxTagLength = 4
578 var buf [maxTagLength]byte
579 intToStr(strToInt(strings.ToLower(s))+1, buf[:len(s)])
580 for i := 0; i < len(s); i++ {
585 return string(buf[:len(s)])
588 func (b *builder) parseIndices() {
589 meta := b.supp.Metadata
591 for k, v := range b.registry {
595 if len(k) == 2 || v.suppressScript != "" || v.scope == "special" {
612 // Include any language for which there is data.
613 for _, lang := range b.data.Locales() {
614 if x := b.data.RawLDML(lang); false ||
615 x.LocaleDisplayNames != nil ||
616 x.Characters != nil ||
617 x.Delimiters != nil ||
618 x.Measurement != nil ||
622 x.ListPatterns != nil ||
623 x.Collations != nil ||
624 x.Segmentations != nil ||
626 x.Annotations != nil ||
629 from := strings.Split(lang, "_")
630 if lang := from[0]; lang != "root" {
635 // Include locales for plural rules, which uses a different structure.
636 for _, plurals := range b.data.Supplemental().Plurals {
637 for _, rules := range plurals.PluralRules {
638 for _, lang := range strings.Split(rules.Locales, " ") {
639 if lang = strings.Split(lang, "_")[0]; lang != "root" {
645 // Include languages in likely subtags.
646 for _, m := range b.supp.LikelySubtags.LikelySubtag {
647 from := strings.Split(m.From, "_")
650 // Include ISO-639 alpha-3 bibliographic entries.
651 for _, a := range meta.Alias.LanguageAlias {
652 if a.Reason == "bibliographic" {
653 b.langNoIndex.add(a.Type)
656 // Include regions in territoryAlias (not all are in the IANA registry!)
657 for _, reg := range b.supp.Metadata.Alias.TerritoryAlias {
658 if len(reg.Type) == 2 {
659 b.region.add(reg.Type)
663 for _, s := range b.lang.s {
665 b.langNoIndex.remove(s)
668 b.writeConst("numLanguages", len(b.lang.slice())+len(b.langNoIndex.slice()))
669 b.writeConst("numScripts", len(b.script.slice()))
670 b.writeConst("numRegions", len(b.region.slice()))
672 // Add dummy codes at the start of each list to represent "unspecified".
678 b.locale.parse(meta.DefaultContent.Locales)
681 // TODO: region inclusion data will probably not be use used in future matchers.
683 func (b *builder) computeRegionGroups() {
684 b.groups = make(map[int]index)
686 // Create group indices.
687 for i := 1; b.region.s[i][0] < 'A'; i++ { // Base M49 indices on regionID.
688 b.groups[i] = index(len(b.groups))
690 for _, g := range b.supp.TerritoryContainment.Group {
691 // Skip UN and EURO zone as they are flattening the containment
693 if g.Type == "EZ" || g.Type == "UN" {
696 group := b.region.index(g.Type)
697 if _, ok := b.groups[group]; !ok {
698 b.groups[group] = index(len(b.groups))
701 if len(b.groups) > 64 {
702 log.Fatalf("only 64 groups supported, found %d", len(b.groups))
704 b.writeConst("nRegionGroups", len(b.groups))
707 var langConsts = []string{
708 "af", "am", "ar", "az", "bg", "bn", "ca", "cs", "da", "de", "el", "en", "es",
709 "et", "fa", "fi", "fil", "fr", "gu", "he", "hi", "hr", "hu", "hy", "id", "is",
710 "it", "ja", "ka", "kk", "km", "kn", "ko", "ky", "lo", "lt", "lv", "mk", "ml",
711 "mn", "mo", "mr", "ms", "mul", "my", "nb", "ne", "nl", "no", "pa", "pl", "pt",
712 "ro", "ru", "sh", "si", "sk", "sl", "sq", "sr", "sv", "sw", "ta", "te", "th",
713 "tl", "tn", "tr", "uk", "ur", "uz", "vi", "zh", "zu",
715 // constants for grandfathered tags (if not already defined)
716 "jbo", "ami", "bnn", "hak", "tlh", "lb", "nv", "pwn", "tao", "tay", "tsu",
717 "nn", "sfb", "vgt", "sgg", "cmn", "nan", "hsn",
720 // writeLanguage generates all tables needed for language canonicalization.
721 func (b *builder) writeLanguage() {
722 meta := b.supp.Metadata
724 b.writeConst("nonCanonicalUnd", b.lang.index("und"))
725 b.writeConsts(func(s string) int { return int(b.langIndex(s)) }, langConsts...)
726 b.writeConst("langPrivateStart", b.langIndex("qaa"))
727 b.writeConst("langPrivateEnd", b.langIndex("qtz"))
729 // Get language codes that need to be mapped (overlong 3-letter codes,
730 // deprecated 2-letter codes, legacy and grandfathered tags.)
731 langAliasMap := stringSet{}
732 aliasTypeMap := map[string]langAliasType{}
734 // altLangISO3 get the alternative ISO3 names that need to be mapped.
735 altLangISO3 := stringSet{}
736 // Add dummy start to avoid the use of index 0.
737 altLangISO3.add("---")
738 altLangISO3.updateLater("---", "aa")
740 lang := b.lang.clone()
741 for _, a := range meta.Alias.LanguageAlias {
742 if a.Replacement == "" {
743 a.Replacement = "und"
745 // TODO: support mapping to tags
746 repl := strings.SplitN(a.Replacement, "_", 2)[0]
747 if a.Reason == "overlong" {
748 if len(a.Replacement) == 2 && len(a.Type) == 3 {
749 lang.updateLater(a.Replacement, a.Type)
751 } else if len(a.Type) <= 3 {
753 case "macrolanguage":
754 aliasTypeMap[a.Type] = langMacro
758 case "bibliographic", "legacy":
762 aliasTypeMap[a.Type] = langLegacy
764 log.Fatalf("new %s alias: %s", a.Reason, a.Type)
766 langAliasMap.add(a.Type)
767 langAliasMap.updateLater(a.Type, repl)
770 // Manually add the mapping of "nb" (Norwegian) to its macro language.
771 // This can be removed if CLDR adopts this change.
772 langAliasMap.add("nb")
773 langAliasMap.updateLater("nb", "no")
774 aliasTypeMap["nb"] = langMacro
776 for k, v := range b.registry {
777 // Also add deprecated values for 3-letter ISO codes, which CLDR omits.
778 if v.typ == "language" && v.deprecated != "" && v.preferred != "" {
780 langAliasMap.updateLater(k, v.preferred)
781 aliasTypeMap[k] = langDeprecated
784 // Fix CLDR mappings.
785 lang.updateLater("tl", "tgl")
786 lang.updateLater("sh", "hbs")
787 lang.updateLater("mo", "mol")
788 lang.updateLater("no", "nor")
789 lang.updateLater("tw", "twi")
790 lang.updateLater("nb", "nob")
791 lang.updateLater("ak", "aka")
792 lang.updateLater("bh", "bih")
794 // Ensure that each 2-letter code is matched with a 3-letter code.
795 for _, v := range lang.s[1:] {
796 s, ok := lang.update[v]
798 if s, ok = lang.update[langAliasMap.update[v]]; !ok {
805 altLangISO3.updateLater(s, v)
809 // Complete canonicalized language tags.
811 for i, v := range lang.s {
812 // We can avoid these manual entries by using the IANA registry directly.
813 // Seems easier to update the list manually, as changes are rare.
814 // The panic in this loop will trigger if we miss an entry.
816 if s, ok := lang.update[v]; ok {
820 add = string([]byte{0, byte(altLangISO3.index(s))})
822 } else if len(v) == 3 {
825 log.Panicf("no data for long form of %q", v)
829 b.writeConst("lang", tag.Index(lang.join()))
831 b.writeConst("langNoIndexOffset", len(b.lang.s))
833 // space of all valid 3-letter language identifiers.
834 b.writeBitVector("langNoIndex", b.langNoIndex.slice())
836 altLangIndex := []uint16{}
837 for i, s := range altLangISO3.slice() {
838 altLangISO3.s[i] += string([]byte{byte(len(altLangIndex))})
840 idx := b.lang.index(altLangISO3.update[s])
841 altLangIndex = append(altLangIndex, uint16(idx))
844 b.writeConst("altLangISO3", tag.Index(altLangISO3.join()))
845 b.writeSlice("altLangIndex", altLangIndex)
847 b.writeSortedMap("langAliasMap", &langAliasMap, b.langIndex)
848 types := make([]langAliasType, len(langAliasMap.s))
849 for i, s := range langAliasMap.s {
850 types[i] = aliasTypeMap[s]
852 b.writeSlice("langAliasTypes", types)
855 var scriptConsts = []string{
856 "Latn", "Hani", "Hans", "Hant", "Qaaa", "Qaai", "Qabx", "Zinh", "Zyyy",
860 func (b *builder) writeScript() {
861 b.writeConsts(b.script.index, scriptConsts...)
862 b.writeConst("script", tag.Index(b.script.join()))
864 supp := make([]uint8, len(b.lang.slice()))
865 for i, v := range b.lang.slice()[1:] {
866 if sc := b.registry[v].suppressScript; sc != "" {
867 supp[i+1] = uint8(b.script.index(sc))
870 b.writeSlice("suppressScript", supp)
872 // There is only one deprecated script in CLDR. This value is hard-coded.
873 // We check here if the code must be updated.
874 for _, a := range b.supp.Metadata.Alias.ScriptAlias {
875 if a.Type != "Qaai" {
876 log.Panicf("unexpected deprecated stript %q", a.Type)
881 func parseM49(s string) int16 {
885 v, err := strconv.ParseUint(s, 10, 10)
890 var regionConsts = []string{
891 "001", "419", "BR", "CA", "ES", "GB", "MD", "PT", "UK", "US",
892 "ZZ", "XA", "XC", "XK", // Unofficial tag for Kosovo.
895 func (b *builder) writeRegion() {
896 b.writeConsts(b.region.index, regionConsts...)
898 isoOffset := b.region.index("AA")
899 m49map := make([]int16, len(b.region.slice()))
900 fromM49map := make(map[int16]int)
902 altRegionIDs := []uint16{}
904 b.writeConst("isoRegionOffset", isoOffset)
906 // 2-letter region lookup and mapping to numeric codes.
907 regionISO := b.region.clone()
908 regionISO.s = regionISO.s[isoOffset:]
909 regionISO.sorted = false
911 regionTypes := make([]byte, len(b.region.s))
913 // Is the region valid BCP 47?
914 for s, e := range b.registry {
915 if len(s) == 2 && s == strings.ToUpper(s) {
916 i := b.region.index(s)
917 for _, d := range e.description {
918 if strings.Contains(d, "Private use") {
919 regionTypes[i] = iso3166UserAssigned
922 regionTypes[i] |= bcp47Region
926 // Is the region a valid ccTLD?
927 r := gen.OpenIANAFile("domains/root/db")
930 buf, err := ioutil.ReadAll(r)
932 re := regexp.MustCompile(`"/domains/root/db/([a-z]{2}).html"`)
933 for _, m := range re.FindAllSubmatch(buf, -1) {
934 i := b.region.index(strings.ToUpper(string(m[1])))
935 regionTypes[i] |= ccTLD
938 b.writeSlice("regionTypes", regionTypes)
940 iso3Set := make(map[string]int)
941 update := func(iso2, iso3 string) {
942 i := regionISO.index(iso2)
943 if j, ok := iso3Set[iso3]; !ok && iso3[0] == iso2[0] {
944 regionISO.s[i] += iso3[1:]
948 regionISO.s[i] += string([]byte{0, byte(j)})
950 iso3Set[iso3] = len(altRegionISO3)
951 regionISO.s[i] += string([]byte{0, byte(len(altRegionISO3))})
952 altRegionISO3 += iso3
953 altRegionIDs = append(altRegionIDs, uint16(isoOffset+i))
957 for _, tc := range b.supp.CodeMappings.TerritoryCodes {
958 i := regionISO.index(tc.Type) + isoOffset
959 if d := m49map[i]; d != 0 {
960 log.Panicf("%s found as a duplicate UN.M49 code of %03d", tc.Numeric, d)
962 m49 := parseM49(tc.Numeric)
964 if r := fromM49map[m49]; r == 0 {
967 dep := b.registry[regionISO.s[r-isoOffset]].deprecated
968 if t := b.registry[tc.Type]; t != nil && dep != "" && (t.deprecated == "" || t.deprecated > dep) {
973 for _, ta := range b.supp.Metadata.Alias.TerritoryAlias {
974 if len(ta.Type) == 3 && ta.Type[0] <= '9' && len(ta.Replacement) == 2 {
975 from := parseM49(ta.Type)
976 if r := fromM49map[from]; r == 0 {
977 fromM49map[from] = regionISO.index(ta.Replacement) + isoOffset
981 for _, tc := range b.supp.CodeMappings.TerritoryCodes {
982 if len(tc.Alpha3) == 3 {
983 update(tc.Type, tc.Alpha3)
986 // This entries are not included in territoryCodes. Mostly 3-letter variants
987 // of deleted codes and an entry for QU.
988 for _, m := range []struct{ iso2, iso3 string }{
1002 // These three-letter codes are used for others as well.
1005 update(m.iso2, m.iso3)
1007 for i, s := range regionISO.s {
1009 regionISO.s[i] = s + " "
1012 b.writeConst("regionISO", tag.Index(regionISO.join()))
1013 b.writeConst("altRegionISO3", altRegionISO3)
1014 b.writeSlice("altRegionIDs", altRegionIDs)
1016 // Create list of deprecated regions.
1017 // TODO: consider inserting SF -> FI. Not included by CLDR, but is the only
1018 // Transitionally-reserved mapping not included.
1019 regionOldMap := stringSet{}
1020 // Include regions in territoryAlias (not all are in the IANA registry!)
1021 for _, reg := range b.supp.Metadata.Alias.TerritoryAlias {
1022 if len(reg.Type) == 2 && reg.Reason == "deprecated" && len(reg.Replacement) == 2 {
1023 regionOldMap.add(reg.Type)
1024 regionOldMap.updateLater(reg.Type, reg.Replacement)
1025 i, _ := regionISO.find(reg.Type)
1026 j, _ := regionISO.find(reg.Replacement)
1027 if k := m49map[i+isoOffset]; k == 0 {
1028 m49map[i+isoOffset] = m49map[j+isoOffset]
1032 b.writeSortedMap("regionOldMap", ®ionOldMap, func(s string) uint16 {
1033 return uint16(b.region.index(s))
1035 // 3-digit region lookup, groupings.
1036 for i := 1; i < isoOffset; i++ {
1037 m := parseM49(b.region.s[i])
1041 b.writeSlice("m49", m49map)
1047 if len(m49map) >= 1<<regionBits {
1048 log.Fatalf("Maximum number of regions exceeded: %d > %d", len(m49map), 1<<regionBits)
1050 m49Index := [9]int16{}
1051 fromM49 := []uint16{}
1053 for k, _ := range fromM49map {
1054 m49 = append(m49, int(k))
1057 for _, k := range m49[1:] {
1058 val := (k & (1<<searchBits - 1)) << regionBits
1059 fromM49 = append(fromM49, uint16(val|fromM49map[int16(k)]))
1060 m49Index[1:][k>>searchBits] = int16(len(fromM49))
1062 b.writeSlice("m49Index", m49Index)
1063 b.writeSlice("fromM49", fromM49)
1067 // TODO: put these lists in regionTypes as user data? Could be used for
1068 // various optimizations and refinements and could be exposed in the API.
1069 iso3166Except = "AC CP DG EA EU FX IC SU TA UK"
1070 iso3166Trans = "AN BU CS NT TP YU ZR" // SF is not in our set of Regions.
1071 // DY and RH are actually not deleted, but indeterminately reserved.
1072 iso3166DelCLDR = "CT DD DY FQ HV JT MI NH NQ PC PU PZ RH VD WK YD"
1076 iso3166UserAssigned = 1 << iota
1081 func find(list []string, s string) int {
1082 for i, t := range list {
1090 // writeVariants generates per-variant information and creates a map from variant
1091 // name to index value. We assign index values such that sorting multiple
1092 // variants by index value will result in the correct order.
1093 // There are two types of variants: specialized and general. Specialized variants
1094 // are only applicable to certain language or language-script pairs. Generalized
1095 // variants apply to any language. Generalized variants always sort after
1096 // specialized variants. We will therefore always assign a higher index value
1097 // to a generalized variant than any other variant. Generalized variants are
1098 // sorted alphabetically among themselves.
1099 // Specialized variants may also sort after other specialized variants. Such
1100 // variants will be ordered after any of the variants they may follow.
1101 // We assume that if a variant x is followed by a variant y, then for any prefix
1102 // p of x, p-x is a prefix of y. This allows us to order tags based on the
1103 // maximum of the length of any of its prefixes.
1104 // TODO: it is possible to define a set of Prefix values on variants such that
1105 // a total order cannot be defined to the point that this algorithm breaks.
1106 // In other words, we cannot guarantee the same order of variants for the
1107 // future using the same algorithm or for non-compliant combinations of
1108 // variants. For this reason, consider using simple alphabetic sorting
1109 // of variants and ignore Prefix restrictions altogether.
1110 func (b *builder) writeVariant() {
1111 generalized := stringSet{}
1112 specialized := stringSet{}
1113 specializedExtend := stringSet{}
1114 // Collate the variants by type and check assumptions.
1115 for _, v := range b.variant.slice() {
1117 if len(e.prefix) == 0 {
1121 c := strings.Split(e.prefix[0], "-")
1122 hasScriptOrRegion := false
1124 _, hasScriptOrRegion = b.script.find(c[1])
1125 if !hasScriptOrRegion {
1126 _, hasScriptOrRegion = b.region.find(c[1])
1130 if len(c) == 1 || len(c) == 2 && hasScriptOrRegion {
1131 // Variant is preceded by a language.
1135 // Variant is preceded by another variant.
1136 specializedExtend.add(v)
1137 prefix := c[0] + "-"
1138 if hasScriptOrRegion {
1141 for _, p := range e.prefix {
1142 // Verify that the prefix minus the last element is a prefix of the
1143 // predecessor element.
1144 i := strings.LastIndex(p, "-")
1145 pred := b.registry[p[i+1:]]
1146 if find(pred.prefix, p[:i]) < 0 {
1147 log.Fatalf("prefix %q for variant %q not consistent with predecessor spec", p, v)
1149 // The sorting used below does not work in the general case. It works
1150 // if we assume that variants that may be followed by others only have
1151 // prefixes of the same length. Verify this.
1152 count := strings.Count(p[:i], "-")
1153 for _, q := range pred.prefix {
1154 if c := strings.Count(q, "-"); c != count {
1155 log.Fatalf("variant %q preceding %q has a prefix %q of size %d; want %d", p[i+1:], v, q, c, count)
1158 if !strings.HasPrefix(p, prefix) {
1159 log.Fatalf("prefix %q of variant %q should start with %q", p, v, prefix)
1164 // Sort extended variants.
1165 a := specializedExtend.s
1166 less := func(v, w string) bool {
1167 // Sort by the maximum number of elements.
1168 maxCount := func(s string) (max int) {
1169 for _, p := range b.registry[s].prefix {
1170 if c := strings.Count(p, "-"); c > max {
1176 if cv, cw := maxCount(v), maxCount(w); cv != cw {
1179 // Sort by name as tie breaker.
1182 sort.Sort(funcSorter{less, sort.StringSlice(a)})
1183 specializedExtend.frozen = true
1185 // Create index from variant name to index.
1186 variantIndex := make(map[string]uint8)
1187 add := func(s []string) {
1188 for _, v := range s {
1189 variantIndex[v] = uint8(len(variantIndex))
1192 add(specialized.slice())
1193 add(specializedExtend.s)
1194 numSpecialized := len(variantIndex)
1195 add(generalized.slice())
1196 if n := len(variantIndex); n > 255 {
1197 log.Fatalf("maximum number of variants exceeded: was %d; want <= 255", n)
1199 b.writeMap("variantIndex", variantIndex)
1200 b.writeConst("variantNumSpecialized", numSpecialized)
1203 func (b *builder) writeLanguageInfo() {
1206 // writeLikelyData writes tables that are used both for finding parent relations and for
1207 // language matching. Each entry contains additional bits to indicate the status of the
1208 // data to know when it cannot be used for parent relations.
1209 func (b *builder) writeLikelyData() {
1215 type ( // generated types
1216 likelyScriptRegion struct {
1221 likelyLangScript struct {
1226 likelyLangRegion struct {
1230 // likelyTag is used for getting likely tags for group regions, where
1231 // the likely region might be a region contained in the group.
1238 var ( // generated variables
1239 likelyRegionGroup = make([]likelyTag, len(b.groups))
1240 likelyLang = make([]likelyScriptRegion, len(b.lang.s))
1241 likelyRegion = make([]likelyLangScript, len(b.region.s))
1242 likelyScript = make([]likelyLangRegion, len(b.script.s))
1243 likelyLangList = []likelyScriptRegion{}
1244 likelyRegionList = []likelyLangScript{}
1246 type fromTo struct {
1249 langToOther := map[int][]fromTo{}
1250 regionToOther := map[int][]fromTo{}
1251 for _, m := range b.supp.LikelySubtags.LikelySubtag {
1252 from := strings.Split(m.From, "_")
1253 to := strings.Split(m.To, "_")
1255 log.Fatalf("invalid number of subtags in %q: found %d, want 3", m.To, len(to))
1258 log.Fatalf("invalid number of subtags: found %d, want 1-3", len(from))
1260 if from[0] != to[0] && from[0] != "und" {
1261 log.Fatalf("unexpected language change in expansion: %s -> %s", from, to)
1264 if from[2] != to[2] {
1265 log.Fatalf("unexpected region change in expansion: %s -> %s", from, to)
1267 if from[0] != "und" {
1268 log.Fatalf("unexpected fully specified from tag: %s -> %s", from, to)
1271 if len(from) == 1 || from[0] != "und" {
1273 if from[0] != "und" {
1274 id = b.lang.index(from[0])
1276 langToOther[id] = append(langToOther[id], fromTo{from, to})
1277 } else if len(from) == 2 && len(from[1]) == 4 {
1278 sid := b.script.index(from[1])
1279 likelyScript[sid].lang = uint16(b.langIndex(to[0]))
1280 likelyScript[sid].region = uint16(b.region.index(to[2]))
1282 r := b.region.index(from[len(from)-1])
1283 if id, ok := b.groups[r]; ok {
1284 if from[0] != "und" {
1285 log.Fatalf("region changed unexpectedly: %s -> %s", from, to)
1287 likelyRegionGroup[id].lang = uint16(b.langIndex(to[0]))
1288 likelyRegionGroup[id].script = uint8(b.script.index(to[1]))
1289 likelyRegionGroup[id].region = uint16(b.region.index(to[2]))
1291 regionToOther[r] = append(regionToOther[r], fromTo{from, to})
1295 b.writeType(likelyLangRegion{})
1296 b.writeSlice("likelyScript", likelyScript)
1298 for id := range b.lang.s {
1299 list := langToOther[id]
1301 likelyLang[id].region = uint16(b.region.index(list[0].to[2]))
1302 likelyLang[id].script = uint8(b.script.index(list[0].to[1]))
1303 } else if len(list) > 1 {
1304 likelyLang[id].flags = isList
1305 likelyLang[id].region = uint16(len(likelyLangList))
1306 likelyLang[id].script = uint8(len(list))
1307 for _, x := range list {
1309 if len(x.from) > 1 {
1310 if x.from[1] == x.to[2] {
1311 flags = regionInFrom
1313 flags = scriptInFrom
1316 likelyLangList = append(likelyLangList, likelyScriptRegion{
1317 region: uint16(b.region.index(x.to[2])),
1318 script: uint8(b.script.index(x.to[1])),
1324 // TODO: merge suppressScript data with this table.
1325 b.writeType(likelyScriptRegion{})
1326 b.writeSlice("likelyLang", likelyLang)
1327 b.writeSlice("likelyLangList", likelyLangList)
1329 for id := range b.region.s {
1330 list := regionToOther[id]
1332 likelyRegion[id].lang = uint16(b.langIndex(list[0].to[0]))
1333 likelyRegion[id].script = uint8(b.script.index(list[0].to[1]))
1334 if len(list[0].from) > 2 {
1335 likelyRegion[id].flags = scriptInFrom
1337 } else if len(list) > 1 {
1338 likelyRegion[id].flags = isList
1339 likelyRegion[id].lang = uint16(len(likelyRegionList))
1340 likelyRegion[id].script = uint8(len(list))
1341 for i, x := range list {
1342 if len(x.from) == 2 && i != 0 || i > 0 && len(x.from) != 3 {
1343 log.Fatalf("unspecified script must be first in list: %v at %d", x.from, i)
1345 x := likelyLangScript{
1346 lang: uint16(b.langIndex(x.to[0])),
1347 script: uint8(b.script.index(x.to[1])),
1349 if len(list[0].from) > 2 {
1350 x.flags = scriptInFrom
1352 likelyRegionList = append(likelyRegionList, x)
1356 b.writeType(likelyLangScript{})
1357 b.writeSlice("likelyRegion", likelyRegion)
1358 b.writeSlice("likelyRegionList", likelyRegionList)
1360 b.writeType(likelyTag{})
1361 b.writeSlice("likelyRegionGroup", likelyRegionGroup)
1364 type mutualIntelligibility struct {
1370 type scriptIntelligibility struct {
1371 wantLang, haveLang uint16
1372 wantScript, haveScript uint8
1377 type regionIntelligibility struct {
1378 lang uint16 // compact language id
1379 script uint8 // 0 means any
1380 group uint8 // 0 means any; if bit 7 is set it means inverse
1385 // writeMatchData writes tables with languages and scripts for which there is
1386 // mutual intelligibility. The data is based on CLDR's languageMatching data.
1387 // Note that we use a different algorithm than the one defined by CLDR and that
1388 // we slightly modify the data. For example, we convert scores to confidence levels.
1389 // We also drop all region-related data as we use a different algorithm to
1390 // determine region equivalence.
1391 func (b *builder) writeMatchData() {
1392 lm := b.supp.LanguageMatching.LanguageMatches
1393 cldr.MakeSlice(&lm).SelectAnyOf("type", "written_new")
1395 regionHierarchy := map[string][]string{}
1396 for _, g := range b.supp.TerritoryContainment.Group {
1397 regions := strings.Split(g.Contains, " ")
1398 regionHierarchy[g.Type] = append(regionHierarchy[g.Type], regions...)
1400 regionToGroups := make([]uint8, len(b.region.s))
1402 idToIndex := map[string]uint8{}
1403 for i, mv := range lm[0].MatchVariable {
1405 log.Fatalf("Too many groups: %d", i)
1407 idToIndex[mv.Id] = uint8(i + 1)
1408 // TODO: also handle '-'
1409 for _, r := range strings.Split(mv.Value, "+") {
1411 for k := 0; k < len(todo); k++ {
1413 regionToGroups[b.region.index(r)] |= 1 << uint8(i)
1414 todo = append(todo, regionHierarchy[r]...)
1418 b.writeSlice("regionToGroups", regionToGroups)
1420 // maps language id to in- and out-of-group region.
1421 paradigmLocales := [][3]uint16{}
1422 locales := strings.Split(lm[0].ParadigmLocales[0].Locales, " ")
1423 for i := 0; i < len(locales); i += 2 {
1425 for j := 0; j < 2; j++ {
1426 pc := strings.SplitN(locales[i+j], "-", 2)
1427 x[0] = b.langIndex(pc[0])
1429 x[1+j] = uint16(b.region.index(pc[1]))
1432 paradigmLocales = append(paradigmLocales, x)
1434 b.writeSlice("paradigmLocales", paradigmLocales)
1436 b.writeType(mutualIntelligibility{})
1437 b.writeType(scriptIntelligibility{})
1438 b.writeType(regionIntelligibility{})
1440 matchLang := []mutualIntelligibility{}
1441 matchScript := []scriptIntelligibility{}
1442 matchRegion := []regionIntelligibility{}
1443 // Convert the languageMatch entries in lists keyed by desired language.
1444 for _, m := range lm[0].LanguageMatch {
1445 // Different versions of CLDR use different separators.
1446 desired := strings.Replace(m.Desired, "-", "_", -1)
1447 supported := strings.Replace(m.Supported, "-", "_", -1)
1448 d := strings.Split(desired, "_")
1449 s := strings.Split(supported, "_")
1450 if len(d) != len(s) {
1451 log.Fatalf("not supported: desired=%q; supported=%q", desired, supported)
1454 distance, _ := strconv.ParseInt(m.Distance, 10, 8)
1457 if desired == supported && desired == "*_*" {
1460 // language-script pair.
1461 matchScript = append(matchScript, scriptIntelligibility{
1462 wantLang: uint16(b.langIndex(d[0])),
1463 haveLang: uint16(b.langIndex(s[0])),
1464 wantScript: uint8(b.script.index(d[1])),
1465 haveScript: uint8(b.script.index(s[1])),
1466 distance: uint8(distance),
1468 if m.Oneway != "true" {
1469 matchScript = append(matchScript, scriptIntelligibility{
1470 wantLang: uint16(b.langIndex(s[0])),
1471 haveLang: uint16(b.langIndex(d[0])),
1472 wantScript: uint8(b.script.index(s[1])),
1473 haveScript: uint8(b.script.index(d[1])),
1474 distance: uint8(distance),
1478 if desired == supported && desired == "*" {
1482 // nb == no is already handled by macro mapping. Check there
1483 // really is only this case.
1484 if d[0] != "no" || s[0] != "nb" {
1485 log.Fatalf("unhandled equivalence %s == %s", s[0], d[0])
1489 // TODO: consider dropping oneway field and just doubling the entry.
1490 matchLang = append(matchLang, mutualIntelligibility{
1491 want: uint16(b.langIndex(d[0])),
1492 have: uint16(b.langIndex(s[0])),
1493 distance: uint8(distance),
1494 oneway: m.Oneway == "true",
1497 if desired == supported && desired == "*_*_*" {
1500 if desired != supported { // (Weird but correct.)
1501 log.Fatalf("not supported: desired=%q; supported=%q", desired, supported)
1504 ri := regionIntelligibility{
1505 lang: b.langIndex(d[0]),
1506 distance: uint8(distance),
1509 ri.script = uint8(b.script.index(d[1]))
1513 ri.group = 0x80 // not contained in anything
1514 case strings.HasPrefix(d[2], "$!"):
1516 d[2] = "$" + d[2][len("$!"):]
1518 case strings.HasPrefix(d[2], "$"):
1519 ri.group |= idToIndex[d[2]]
1521 matchRegion = append(matchRegion, ri)
1523 log.Fatalf("not supported: desired=%q; supported=%q", desired, supported)
1526 sort.SliceStable(matchLang, func(i, j int) bool {
1527 return matchLang[i].distance < matchLang[j].distance
1529 b.writeSlice("matchLang", matchLang)
1531 sort.SliceStable(matchScript, func(i, j int) bool {
1532 return matchScript[i].distance < matchScript[j].distance
1534 b.writeSlice("matchScript", matchScript)
1536 sort.SliceStable(matchRegion, func(i, j int) bool {
1537 return matchRegion[i].distance < matchRegion[j].distance
1539 b.writeSlice("matchRegion", matchRegion)
1542 func (b *builder) writeRegionInclusionData() {
1544 // mm holds for each group the set of groups with a distance of 1.
1545 mm = make(map[int][]index)
1547 // containment holds for each group the transitive closure of
1548 // containment of other groups.
1549 containment = make(map[index][]index)
1551 for _, g := range b.supp.TerritoryContainment.Group {
1552 // Skip UN and EURO zone as they are flattening the containment
1554 if g.Type == "EZ" || g.Type == "UN" {
1557 group := b.region.index(g.Type)
1558 groupIdx := b.groups[group]
1559 for _, mem := range strings.Split(g.Contains, " ") {
1560 r := b.region.index(mem)
1561 mm[r] = append(mm[r], groupIdx)
1562 if g, ok := b.groups[r]; ok {
1563 mm[group] = append(mm[group], g)
1564 containment[groupIdx] = append(containment[groupIdx], g)
1569 regionContainment := make([]uint64, len(b.groups))
1570 for _, g := range b.groups {
1573 // Compute the transitive closure of containment.
1574 for i := 0; i < len(l); i++ {
1575 l = append(l, containment[l[i]]...)
1578 // Compute the bitmask.
1579 regionContainment[g] = 1 << g
1580 for _, v := range l {
1581 regionContainment[g] |= 1 << v
1584 b.writeSlice("regionContainment", regionContainment)
1586 regionInclusion := make([]uint8, len(b.region.s))
1587 bvs := make(map[uint64]index)
1588 // Make the first bitvector positions correspond with the groups.
1589 for r, i := range b.groups {
1590 bv := uint64(1 << i)
1591 for _, g := range mm[r] {
1595 regionInclusion[r] = uint8(bvs[bv])
1597 for r := 1; r < len(b.region.s); r++ {
1598 if _, ok := b.groups[r]; !ok {
1600 for _, g := range mm[r] {
1604 // Pick the world for unspecified regions.
1605 bv = 1 << b.groups[b.region.index("001")]
1607 if _, ok := bvs[bv]; !ok {
1608 bvs[bv] = index(len(bvs))
1610 regionInclusion[r] = uint8(bvs[bv])
1613 b.writeSlice("regionInclusion", regionInclusion)
1614 regionInclusionBits := make([]uint64, len(bvs))
1615 for k, v := range bvs {
1616 regionInclusionBits[v] = uint64(k)
1618 // Add bit vectors for increasingly large distances until a fixed point is reached.
1619 regionInclusionNext := []uint8{}
1620 for i := 0; i < len(regionInclusionBits); i++ {
1621 bits := regionInclusionBits[i]
1623 for i := uint(0); i < uint(len(b.groups)); i++ {
1624 if bits&(1<<i) != 0 {
1625 next |= regionInclusionBits[i]
1628 if _, ok := bvs[next]; !ok {
1629 bvs[next] = index(len(bvs))
1630 regionInclusionBits = append(regionInclusionBits, next)
1632 regionInclusionNext = append(regionInclusionNext, uint8(bvs[next]))
1634 b.writeSlice("regionInclusionBits", regionInclusionBits)
1635 b.writeSlice("regionInclusionNext", regionInclusionNext)
1638 type parentRel struct {
1646 func (b *builder) writeParents() {
1647 b.writeType(parentRel{})
1649 parents := []parentRel{}
1651 // Construct parent overrides.
1653 for _, p := range b.data.Supplemental().ParentLocales.ParentLocale {
1654 // Skipping non-standard scripts to root is implemented using addTags.
1655 if p.Parent == "root" {
1659 sub := strings.Split(p.Parent, "_")
1660 parent := parentRel{lang: b.langIndex(sub[0])}
1662 // TODO: check that all undefined scripts are indeed Latn in these
1664 parent.maxScript = uint8(b.script.index("Latn"))
1665 parent.toRegion = uint16(b.region.index(sub[1]))
1667 parent.script = uint8(b.script.index(sub[1]))
1668 parent.maxScript = parent.script
1669 parent.toRegion = uint16(b.region.index(sub[2]))
1671 for _, c := range strings.Split(p.Locales, " ") {
1672 region := b.region.index(c[strings.LastIndex(c, "_")+1:])
1673 parent.fromRegion = append(parent.fromRegion, uint16(region))
1675 parents = append(parents, parent)
1676 n += len(parent.fromRegion)
1678 b.writeSliceAddSize("parents", n*2, parents)
1684 gen.Repackage("gen_common.go", "common.go", "language")
1686 w := gen.NewCodeWriter()
1687 defer w.WriteGoFile("tables.go", "language")
1689 fmt.Fprintln(w, `import "golang.org/x/text/internal/tag"`)
1692 gen.WriteCLDRVersion(w)
1695 b.writeType(fromTo{})
1700 // TODO: b.writeLocale()
1701 b.computeRegionGroups()
1704 b.writeRegionInclusionData()