1 // Copyright 2014 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
7 // Generator for display name tables.
20 "golang.org/x/text/internal/gen"
21 "golang.org/x/text/language"
22 "golang.org/x/text/unicode/cldr"
26 test = flag.Bool("test", false,
27 "test existing tables; can be used to compare web data with package data.")
28 outputFile = flag.String("output", "tables.go", "output file")
30 stats = flag.Bool("stats", false, "prints statistics to stderr")
32 short = flag.Bool("short", false, `Use "short" alternatives, when available.`)
33 draft = flag.String("draft",
35 `Minimal draft requirements (approved, contributed, provisional, unconfirmed).`)
36 pkg = flag.String("package",
38 "the name of the package in which the generated file is to be included")
40 tags = newTagSet("tags",
42 "space-separated list of tags to include or empty for all")
43 dict = newTagSet("dict",
45 "space-separated list or tags for which to include a Dictionary. "+
46 `"" means the common list from go.text/language.`)
49 func dictTags() (tag []language.Tag) {
50 // TODO: replace with language.Common.Tags() once supported.
51 const str = "af am ar ar-001 az bg bn ca cs da de el en en-US en-GB " +
52 "es es-ES es-419 et fa fi fil fr fr-CA gu he hi hr hu hy id is it ja " +
53 "ka kk km kn ko ky lo lt lv mk ml mn mr ms my ne nl no pa pl pt pt-BR " +
54 "pt-PT ro ru si sk sl sq sr sr-Latn sv sw ta te th tr uk ur uz vi " +
55 "zh zh-Hans zh-Hant zu"
57 for _, s := range strings.Split(str, " ") {
58 tag = append(tag, language.MustParse(s))
66 // Read the CLDR zip file.
67 r := gen.OpenCLDRCoreZip()
71 d.SetDirFilter("main", "supplemental")
72 d.SetSectionFilter("localeDisplayNames")
73 data, err := d.DecodeZip(r)
75 log.Fatalf("DecodeZip: %v", err)
78 w := gen.NewCodeWriter()
79 defer w.WriteGoFile(*outputFile, "display")
81 gen.WriteCLDRVersion(w)
86 group: make(map[string]*group),
91 const tagForm = language.All
93 // tagSet is used to parse command line flags of tags. It implements the
94 // flag.Value interface.
95 type tagSet map[language.Tag]bool
97 func newTagSet(name string, tags []language.Tag, usage string) tagSet {
98 f := tagSet(make(map[language.Tag]bool))
99 for _, t := range tags {
102 flag.Var(f, name, usage)
106 // String implements the String method of the flag.Value interface.
107 func (f tagSet) String() string {
110 tags = append(tags, t.String())
113 return strings.Join(tags, " ")
116 // Set implements Set from the flag.Value interface.
117 func (f tagSet) Set(s string) error {
119 for _, s := range strings.Split(s, " ") {
121 tag, err := tagForm.Parse(s)
132 func (f tagSet) contains(t language.Tag) bool {
139 // builder is used to create all tables with display name information.
140 type builder struct {
147 // destination tags for the current locale.
149 toTagIndex map[string]int
151 // list of supported tags
152 supported []language.Tag
154 // key-value pairs per group
155 group map[string]*group
158 sizeIndex int // total size of all indexes of headers
159 sizeData int // total size of all data of headers
164 // Maps from a given language to the Namer data for this language.
165 lang map[language.Tag]keyValues
173 // set sets the typ to the name for locale loc.
174 func (g *group) set(t language.Tag, typ, name string) {
185 type keyValues map[string]string
193 var versionInfo = `// Version is deprecated. Use CLDRVersion.
198 var self = language.MustParse("mul")
200 // generate builds and writes all tables.
201 func (b *builder) generate() {
202 fmt.Fprintf(b.w, versionInfo, cldr.Version)
205 b.setData("lang", func(g *group, loc language.Tag, ldn *cldr.LocaleDisplayNames) {
206 if ldn.Languages != nil {
207 for _, v := range ldn.Languages.Language {
208 tag := tagForm.MustParse(v.Type)
209 if tags.contains(tag) {
210 g.set(loc, tag.String(), v.Data())
215 b.setData("script", func(g *group, loc language.Tag, ldn *cldr.LocaleDisplayNames) {
216 if ldn.Scripts != nil {
217 for _, v := range ldn.Scripts.Script {
218 code := language.MustParseScript(v.Type)
219 if code.IsPrivateUse() { // Qaaa..Qabx
220 // TODO: data currently appears to be very meager.
221 // Reconsider if we have data for English.
222 if loc == language.English {
223 log.Fatal("Consider including data for private use scripts.")
227 g.set(loc, code.String(), v.Data())
231 b.setData("region", func(g *group, loc language.Tag, ldn *cldr.LocaleDisplayNames) {
232 if ldn.Territories != nil {
233 for _, v := range ldn.Territories.Territory {
234 g.set(loc, language.MustParseRegion(v.Type).String(), v.Data())
244 b.writeGroup("script")
245 b.writeGroup("region")
247 b.w.WriteConst("numSupported", len(b.supported))
248 buf := bytes.Buffer{}
249 for _, tag := range b.supported {
250 fmt.Fprint(&buf, tag.String(), "|")
252 b.w.WriteConst("supported", buf.String())
254 b.writeDictionaries()
256 b.supported = []language.Tag{self}
258 // Compute the names of locales in their own language. Some of these names
259 // may be specified in their parent locales. We iterate the maximum depth
260 // of the parent three times to match successive parents of tags until a
261 // possible match is found.
262 for i := 0; i < 4; i++ {
263 b.setData("self", func(g *group, tag language.Tag, ldn *cldr.LocaleDisplayNames) {
265 if b, s, r := tag.Raw(); i > 0 && (s != language.Script{} && r == language.Region{}) {
266 parent, _ = language.Raw.Compose(b)
268 if ldn.Languages != nil {
269 for _, v := range ldn.Languages.Language {
270 key := tagForm.MustParse(v.Type)
273 g.set(self, tag.String(), v.Data())
275 for k := 0; k < i; k++ {
279 g.set(self, saved.String(), v.Data()) // set does not overwrite a value.
289 func (b *builder) setData(name string, f func(*group, language.Tag, *cldr.LocaleDisplayNames)) {
294 b.toTagIndex = make(map[string]int)
298 g = &group{lang: make(map[language.Tag]keyValues)}
301 for _, loc := range b.data.Locales() {
302 // We use RawLDML instead of LDML as we are managing our own inheritance
303 // in this implementation.
304 ldml := b.data.RawLDML(loc)
306 // We do not support the POSIX variant (it is not a supported BCP 47
307 // variant). This locale also doesn't happen to contain any data, so
308 // we'll skip it by checking for this.
309 tag, err := tagForm.Parse(loc)
311 if ldml.LocaleDisplayNames != nil {
312 log.Fatalf("setData: %v", err)
316 if ldml.LocaleDisplayNames != nil && tags.contains(tag) {
317 f(g, tag, ldml.LocaleDisplayNames)
322 func (b *builder) filter() {
323 filter := func(s *cldr.Slice) {
325 s.SelectOnePerGroup("alt", []string{"short", ""})
327 s.SelectOnePerGroup("alt", []string{"stand-alone", ""})
329 d, err := cldr.ParseDraft(*draft)
331 log.Fatalf("filter: %v", err)
335 for _, loc := range b.data.Locales() {
336 if ldn := b.data.RawLDML(loc).LocaleDisplayNames; ldn != nil {
337 if ldn.Languages != nil {
338 s := cldr.MakeSlice(&ldn.Languages.Language)
339 if filter(&s); len(ldn.Languages.Language) == 0 {
343 if ldn.Scripts != nil {
344 s := cldr.MakeSlice(&ldn.Scripts.Script)
345 if filter(&s); len(ldn.Scripts.Script) == 0 {
349 if ldn.Territories != nil {
350 s := cldr.MakeSlice(&ldn.Territories.Territory)
351 if filter(&s); len(ldn.Territories.Territory) == 0 {
352 ldn.Territories = nil
359 // makeSupported creates a list of all supported locales.
360 func (b *builder) makeSupported() {
361 // tags across groups
362 for _, g := range b.group {
363 for t, _ := range g.lang {
364 b.supported = append(b.supported, t)
367 b.supported = b.supported[:unique(tagsSorter(b.supported))]
371 type tagsSorter []language.Tag
373 func (a tagsSorter) Len() int { return len(a) }
374 func (a tagsSorter) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
375 func (a tagsSorter) Less(i, j int) bool { return a[i].String() < a[j].String() }
377 func (b *builder) writeGroup(name string) {
380 for _, kv := range g.lang {
381 for t, _ := range kv {
382 g.toTags = append(g.toTags, t)
385 g.toTags = g.toTags[:unique(tagsBySize(g.toTags))]
387 // Allocate header per supported value.
388 g.headers = make([]header, len(b.supported))
389 for i, sup := range b.supported {
390 kv, ok := g.lang[sup]
392 g.headers[i].tag = sup
396 index := make([]uint16, len(g.toTags), len(g.toTags)+1)
397 for j, t := range g.toTags {
398 index[j] = uint16(len(data))
399 data = append(data, kv[t]...)
401 index = append(index, uint16(len(data)))
403 // Trim the tail of the index.
404 // TODO: indexes can be reduced in size quite a bit more.
406 for ; n >= 2 && index[n-2] == index[n-1]; n-- {
410 // Workaround for a bug in CLDR 26.
411 // See http://unicode.org/cldr/trac/ticket/8042.
412 if cldr.Version == "26" && sup.String() == "hsb" {
413 data = bytes.Replace(data, []byte{'"'}, nil, 1)
415 g.headers[i] = header{sup, string(data), index}
417 g.writeTable(b.w, name)
420 type tagsBySize []string
422 func (l tagsBySize) Len() int { return len(l) }
423 func (l tagsBySize) Swap(i, j int) { l[i], l[j] = l[j], l[i] }
424 func (l tagsBySize) Less(i, j int) bool {
426 // Sort single-tag entries based on size first. Otherwise alphabetic.
427 if len(a) != len(b) && (len(a) <= 4 || len(b) <= 4) {
428 return len(a) < len(b)
433 // parentIndices returns slice a of len(tags) where tags[a[i]] is the parent
435 func parentIndices(tags []language.Tag) []int16 {
436 index := make(map[language.Tag]int16)
437 for i, t := range tags {
441 // Construct default parents.
442 parents := make([]int16, len(tags))
443 for i, t := range tags {
445 for t = t.Parent(); t != language.Und; t = t.Parent() {
446 if j, ok := index[t]; ok {
455 func (b *builder) writeParents() {
456 parents := parentIndices(b.supported)
457 fmt.Fprintf(b.w, "var parents = ")
458 b.w.WriteArray(parents)
461 // writeKeys writes keys to a special index used by the display package.
462 // tags are assumed to be sorted by length.
463 func writeKeys(w *gen.CodeWriter, name string, keys []string) {
464 w.Size += int(3 * reflect.TypeOf("").Size())
465 w.WriteComment("Number of keys: %d", len(keys))
466 fmt.Fprintf(w, "var (\n\t%sIndex = tagIndex{\n", name)
467 for i := 2; i <= 4; i++ {
469 for _, t := range keys {
475 s := strings.Join(sub, "")
477 fmt.Fprintf(w, ",\n")
478 keys = keys[len(sub):]
480 fmt.Fprintln(w, "\t}")
482 w.Size += int(reflect.TypeOf([]string{}).Size())
483 fmt.Fprintf(w, "\t%sTagsLong = ", name)
486 fmt.Fprintln(w, ")\n")
489 // identifier creates an identifier from the given tag.
490 func identifier(t language.Tag) string {
491 return strings.Replace(t.String(), "-", "", -1)
494 func (h *header) writeEntry(w *gen.CodeWriter, name string) {
495 if len(dict) > 0 && dict.contains(h.tag) {
496 fmt.Fprintf(w, "\t{ // %s\n", h.tag)
497 fmt.Fprintf(w, "\t\t%[1]s%[2]sStr,\n\t\t%[1]s%[2]sIdx,\n", identifier(h.tag), name)
498 fmt.Fprintln(w, "\t},")
499 } else if len(h.data) == 0 {
500 fmt.Fprintln(w, "\t\t{}, //", h.tag)
502 fmt.Fprintf(w, "\t{ // %s\n", h.tag)
503 w.WriteString(h.data)
505 w.WriteSlice(h.index)
506 fmt.Fprintln(w, ",\n\t},")
510 // write the data for the given header as single entries. The size for this data
511 // was already accounted for in writeEntry.
512 func (h *header) writeSingle(w *gen.CodeWriter, name string) {
513 if len(dict) > 0 && dict.contains(h.tag) {
514 tag := identifier(h.tag)
515 w.WriteConst(tag+name+"Str", h.data)
517 // Note that we create a slice instead of an array. If we use an array
518 // we need to refer to it as a[:] in other tables, which will cause the
519 // array to always be included by the linker. See Issue 7651.
520 w.WriteVar(tag+name+"Idx", h.index)
524 // WriteTable writes an entry for a single Namer.
525 func (g *group) writeTable(w *gen.CodeWriter, name string) {
527 writeKeys(w, name, g.toTags)
528 w.Size += len(g.headers) * int(reflect.ValueOf(g.headers[0]).Type().Size())
530 fmt.Fprintf(w, "var %sHeaders = [%d]header{\n", name, len(g.headers))
532 title := strings.Title(name)
533 for _, h := range g.headers {
534 h.writeEntry(w, title)
536 fmt.Fprintln(w, "}\n")
538 for _, h := range g.headers {
539 h.writeSingle(w, title)
542 fmt.Fprintf(w, "// Total size for %s: %d bytes (%d KB)\n\n", name, n, n/1000)
545 func (b *builder) writeDictionaries() {
546 fmt.Fprintln(b.w, "// Dictionary entries of frequent languages")
547 fmt.Fprintln(b.w, "var (")
548 parents := parentIndices(b.supported)
550 for i, t := range b.supported {
551 if dict.contains(t) {
552 ident := identifier(t)
553 fmt.Fprintf(b.w, "\t%s = Dictionary{ // %s\n", ident, t)
554 if p := parents[i]; p == -1 {
555 fmt.Fprintln(b.w, "\t\tnil,")
557 fmt.Fprintf(b.w, "\t\t&%s,\n", identifier(b.supported[p]))
559 fmt.Fprintf(b.w, "\t\theader{%[1]sLangStr, %[1]sLangIdx},\n", ident)
560 fmt.Fprintf(b.w, "\t\theader{%[1]sScriptStr, %[1]sScriptIdx},\n", ident)
561 fmt.Fprintf(b.w, "\t\theader{%[1]sRegionStr, %[1]sRegionIdx},\n", ident)
562 fmt.Fprintln(b.w, "\t}")
565 fmt.Fprintln(b.w, ")")
569 sz := reflect.TypeOf(s).Size()
570 sz += reflect.TypeOf(a).Size()
572 sz += reflect.TypeOf(&a).Size()
573 n := int(sz) * len(dict)
574 fmt.Fprintf(b.w, "// Total size for %d entries: %d bytes (%d KB)\n\n", len(dict), n, n/1000)
579 // unique sorts the given lists and removes duplicate entries by swapping them
580 // past position k, where k is the number of unique values. It returns k.
581 func unique(a sort.Interface) int {
587 for i := 1; i < a.Len(); i++ {