1 // Copyright 2016 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
17 "golang.org/x/text/internal"
18 "golang.org/x/text/internal/gen"
19 "golang.org/x/text/internal/number"
20 "golang.org/x/text/internal/stringset"
21 "golang.org/x/text/language"
22 "golang.org/x/text/unicode/cldr"
26 test = flag.Bool("test", false,
27 "test existing tables; can be used to compare web data with package data.")
28 outputFile = flag.String("output", "tables.go", "output file")
29 outputTestFile = flag.String("testoutput", "data_test.go", "output file")
31 draft = flag.String("draft",
33 `Minimal draft requirements (approved, contributed, provisional, unconfirmed).`)
41 gen.Repackage("gen_common.go", "common.go", pkg)
42 // Read the CLDR zip file.
43 r := gen.OpenCLDRCoreZip()
47 d.SetDirFilter("supplemental", "main")
48 d.SetSectionFilter("numbers", "numberingSystem")
49 data, err := d.DecodeZip(r)
51 log.Fatalf("DecodeZip: %v", err)
54 w := gen.NewCodeWriter()
55 defer w.WriteGoFile(*outputFile, pkg)
57 fmt.Fprintln(w, `import "golang.org/x/text/internal/stringset"`)
59 gen.WriteCLDRVersion(w)
66 var systemMap = map[string]system{"latn": 0}
68 func getNumberSystem(str string) system {
69 ns, ok := systemMap[str]
71 log.Fatalf("No index for numbering system %q", str)
76 func genNumSystem(w *gen.CodeWriter, data *cldr.CLDR) {
77 numSysData := []systemData{
78 {digitSize: 1, zero: [4]byte{'0'}},
81 for _, ns := range data.Supplemental().NumberingSystems.NumberingSystem {
82 if len(ns.Digits) == 0 {
90 // non-consecutive digits: treat as "algorithmic"
94 zero, sz := utf8.DecodeRuneInString(ns.Digits)
95 if ns.Digits[sz-1]+9 > 0xBF { // 1011 1111: highest continuation byte
96 log.Fatalf("Last byte of zero value overflows for %s", ns.Id)
100 for _, r := range ns.Digits {
101 // Verify that we can do simple math on the UTF-8 byte sequence
102 // of zero to get the digit.
104 // Runes not consecutive.
105 log.Fatalf("Digit %d of %s (%U) is not offset correctly from zero value", i, ns.Id, r)
109 var x [utf8.UTFMax]byte
110 utf8.EncodeRune(x[:], zero)
111 id := system(len(numSysData))
112 systemMap[ns.Id] = id
113 numSysData = append(numSysData, systemData{
119 w.WriteVar("numSysData", numSysData)
121 algoID := system(len(numSysData))
122 fmt.Fprintln(w, "const (")
123 for _, ns := range data.Supplemental().NumberingSystems.NumberingSystem {
124 id, ok := systemMap[ns.Id]
127 systemMap[ns.Id] = id
130 fmt.Fprintf(w, "num%s = %#x\n", strings.Title(ns.Id), id)
132 fmt.Fprintln(w, "numNumberSystems")
135 fmt.Fprintln(w, "var systemMap = map[string]system{")
136 for _, ns := range data.Supplemental().NumberingSystems.NumberingSystem {
137 fmt.Fprintf(w, "%q: num%s,\n", ns.Id, strings.Title(ns.Id))
138 w.Size += len(ns.Id) + 16 + 1 // very coarse approximation
143 func genSymbols(w *gen.CodeWriter, data *cldr.CLDR) {
144 d, err := cldr.ParseDraft(*draft)
146 log.Fatalf("invalid draft level: %v", err)
149 nNumberSystems := system(len(systemMap))
151 type symbols [NumSymbolTypes]string
154 tag int // from language.CompactIndex
157 symbolMap := map[key]*symbols{}
159 defaults := map[int]system{}
161 for _, lang := range data.Locales() {
162 ldml := data.RawLDML(lang)
163 if ldml.Numbers == nil {
166 langIndex, ok := language.CompactIndex(language.MustParse(lang))
168 log.Fatalf("No compact index for language %s", lang)
170 if d := ldml.Numbers.DefaultNumberingSystem; len(d) > 0 {
171 defaults[langIndex] = getNumberSystem(d[0].Data())
174 syms := cldr.MakeSlice(&ldml.Numbers.Symbols)
177 getFirst := func(name string, x interface{}) string {
178 v := reflect.ValueOf(x)
179 slice := cldr.MakeSlice(x)
180 slice.SelectAnyOf("alt", "", "alt")
181 if reflect.Indirect(v).Len() == 0 {
183 } else if reflect.Indirect(v).Len() > 1 {
184 log.Fatalf("%s: multiple values of %q within single symbol not supported.", lang, name)
186 return reflect.Indirect(v).Index(0).MethodByName("Data").Call(nil)[0].String()
189 for _, sym := range ldml.Numbers.Symbols {
190 if sym.NumberSystem == "" {
191 // This is just linking the default of root to "latn".
194 symbolMap[key{langIndex, getNumberSystem(sym.NumberSystem)}] = &symbols{
195 SymDecimal: getFirst("decimal", &sym.Decimal),
196 SymGroup: getFirst("group", &sym.Group),
197 SymList: getFirst("list", &sym.List),
198 SymPercentSign: getFirst("percentSign", &sym.PercentSign),
199 SymPlusSign: getFirst("plusSign", &sym.PlusSign),
200 SymMinusSign: getFirst("minusSign", &sym.MinusSign),
201 SymExponential: getFirst("exponential", &sym.Exponential),
202 SymSuperscriptingExponent: getFirst("superscriptingExponent", &sym.SuperscriptingExponent),
203 SymPerMille: getFirst("perMille", &sym.PerMille),
204 SymInfinity: getFirst("infinity", &sym.Infinity),
205 SymNan: getFirst("nan", &sym.Nan),
206 SymTimeSeparator: getFirst("timeSeparator", &sym.TimeSeparator),
211 // Expand all values.
212 for k, syms := range symbolMap {
213 for t := SymDecimal; t < NumSymbolTypes; t++ {
216 p = int(internal.Parent[p])
217 if pSyms, ok := symbolMap[key{p, k.system}]; ok && (*pSyms)[t] != "" {
218 syms[t] = (*pSyms)[t]
221 if p == 0 /* und */ {
222 // Default to root, latn.
223 syms[t] = (*symbolMap[key{}])[t]
229 // Unique the symbol sets and write the string data.
230 m := map[symbols]int{}
231 sb := stringset.NewBuilder()
233 symIndex := [][NumSymbolTypes]byte{}
235 for ns := system(0); ns < nNumberSystems; ns++ {
236 for _, l := range data.Locales() {
237 langIndex, _ := language.CompactIndex(language.MustParse(l))
238 s := symbolMap[key{langIndex, ns}]
242 if _, ok := m[*s]; !ok {
243 m[*s] = len(symIndex)
245 var x [NumSymbolTypes]byte
246 for i := SymDecimal; i < NumSymbolTypes; i++ {
247 x[i] = byte(sb.Index((*s)[i]))
249 symIndex = append(symIndex, x)
253 w.WriteVar("symIndex", symIndex)
254 w.WriteVar("symData", sb.Set())
256 // resolveSymbolIndex gets the index from the closest matching locale,
257 // including the locale itself.
258 resolveSymbolIndex := func(langIndex int, ns system) byte {
260 if sym := symbolMap[key{langIndex, ns}]; sym != nil {
264 return 0 // und, latn
266 langIndex = int(internal.Parent[langIndex])
270 // Create an index with the symbols for each locale for the latn numbering
271 // system. If this is not the default, or the only one, for a locale, we
272 // will overwrite the value later.
273 var langToDefaults [language.NumCompactTags]byte
274 for _, l := range data.Locales() {
275 langIndex, _ := language.CompactIndex(language.MustParse(l))
276 langToDefaults[langIndex] = resolveSymbolIndex(langIndex, 0)
279 // Delete redundant entries.
280 for _, l := range data.Locales() {
281 langIndex, _ := language.CompactIndex(language.MustParse(l))
282 def := defaults[langIndex]
283 syms := symbolMap[key{langIndex, def}]
287 for ns := system(0); ns < nNumberSystems; ns++ {
291 if altSyms, ok := symbolMap[key{langIndex, ns}]; ok && *altSyms == *syms {
292 delete(symbolMap, key{langIndex, ns})
297 // Create a sorted list of alternatives per language. This will only need to
298 // be referenced if a user specified an alternative numbering system.
299 var langToAlt []altSymData
300 for _, l := range data.Locales() {
301 langIndex, _ := language.CompactIndex(language.MustParse(l))
302 start := len(langToAlt)
304 log.Fatal("Number of alternative assignments > 0x7F")
306 // Create the entry for the default value.
307 def := defaults[langIndex]
308 langToAlt = append(langToAlt, altSymData{
309 compactTag: uint16(langIndex),
311 symIndex: resolveSymbolIndex(langIndex, def),
314 for ns := system(0); ns < nNumberSystems; ns++ {
318 if sym := symbolMap[key{langIndex, ns}]; sym != nil {
319 langToAlt = append(langToAlt, altSymData{
320 compactTag: uint16(langIndex),
322 symIndex: resolveSymbolIndex(langIndex, ns),
326 if def == 0 && len(langToAlt) == start+1 {
327 // No additional data: erase the entry.
328 langToAlt = langToAlt[:start]
330 // Overwrite the entry in langToDefaults.
331 langToDefaults[langIndex] = 0x80 | byte(start)
335 langToDefaults maps a compact language index to the default numbering system
336 and default symbol set`)
337 w.WriteVar("langToDefaults", langToDefaults)
340 langToAlt is a list of numbering system and symbol set pairs, sorted and
341 marked by compact language index.`)
342 w.WriteVar("langToAlt", langToAlt)
345 // genFormats generates the lookup table for decimal, scientific and percent
348 // CLDR allows for patterns to be different per language for different numbering
349 // systems. In practice the patterns are set to be consistent for a language
350 // independent of the numbering system. genFormats verifies that no language
351 // deviates from this.
352 func genFormats(w *gen.CodeWriter, data *cldr.CLDR) {
353 d, err := cldr.ParseDraft(*draft)
355 log.Fatalf("invalid draft level: %v", err)
358 // Fill the first slot with a dummy so we can identify unspecified tags.
359 formats := []number.Pattern{{}}
360 patterns := map[string]int{}
362 // TODO: It would be possible to eliminate two of these slices by having
363 // another indirection and store a reference to the combination of patterns.
364 decimal := make([]byte, language.NumCompactTags)
365 scientific := make([]byte, language.NumCompactTags)
366 percent := make([]byte, language.NumCompactTags)
368 for _, lang := range data.Locales() {
369 ldml := data.RawLDML(lang)
370 if ldml.Numbers == nil {
373 langIndex, ok := language.CompactIndex(language.MustParse(lang))
375 log.Fatalf("No compact index for language %s", lang)
377 type patternSlice []*struct {
379 Numbers string `xml:"numbers,attr"`
380 Count string `xml:"count,attr"`
383 add := func(name string, tags []byte, ps patternSlice) {
384 sl := cldr.MakeSlice(&ps)
389 if len(ps) > 2 || len(ps) == 2 && ps[0] != ps[1] {
390 log.Fatalf("Inconsistent %d patterns for language %s", name, lang)
394 index, ok := patterns[s]
396 nf, err := number.ParsePattern(s)
402 formats = append(formats, *nf)
404 tags[langIndex] = byte(index)
407 for _, df := range ldml.Numbers.DecimalFormats {
408 for _, l := range df.DecimalFormatLength {
412 for _, f := range l.DecimalFormat {
413 add("decimal", decimal, f.Pattern)
417 for _, df := range ldml.Numbers.ScientificFormats {
418 for _, l := range df.ScientificFormatLength {
422 for _, f := range l.ScientificFormat {
423 add("scientific", scientific, f.Pattern)
427 for _, df := range ldml.Numbers.PercentFormats {
428 for _, l := range df.PercentFormatLength {
432 for _, f := range l.PercentFormat {
433 add("percent", percent, f.Pattern)
439 // Complete the parent tag array to reflect inheritance. An index of 0
440 // indicates an unspecified value.
441 for _, data := range [][]byte{decimal, scientific, percent} {
442 for i := range data {
444 for ; data[p] == 0; p = internal.Parent[p] {
449 w.WriteVar("tagToDecimal", decimal)
450 w.WriteVar("tagToScientific", scientific)
451 w.WriteVar("tagToPercent", percent)
453 value := strings.Replace(fmt.Sprintf("%#v", formats), "number.", "", -1)
454 // Break up the lines. This won't give ideal perfect formatting, but it is
455 // better than one huge line.
456 value = strings.Replace(value, ", ", ",\n", -1)
457 fmt.Fprintf(w, "var formats = %s\n", value)