OSDN Git Service

new repo
[bytom/vapor.git] / vendor / golang.org / x / text / internal / number / gen.go
1 // Copyright 2016 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 // +build ignore
6
7 package main
8
9 import (
10         "flag"
11         "fmt"
12         "log"
13         "reflect"
14         "strings"
15         "unicode/utf8"
16
17         "golang.org/x/text/internal"
18         "golang.org/x/text/internal/gen"
19         "golang.org/x/text/internal/number"
20         "golang.org/x/text/internal/stringset"
21         "golang.org/x/text/language"
22         "golang.org/x/text/unicode/cldr"
23 )
24
25 var (
26         test = flag.Bool("test", false,
27                 "test existing tables; can be used to compare web data with package data.")
28         outputFile     = flag.String("output", "tables.go", "output file")
29         outputTestFile = flag.String("testoutput", "data_test.go", "output file")
30
31         draft = flag.String("draft",
32                 "contributed",
33                 `Minimal draft requirements (approved, contributed, provisional, unconfirmed).`)
34 )
35
36 func main() {
37         gen.Init()
38
39         const pkg = "number"
40
41         gen.Repackage("gen_common.go", "common.go", pkg)
42         // Read the CLDR zip file.
43         r := gen.OpenCLDRCoreZip()
44         defer r.Close()
45
46         d := &cldr.Decoder{}
47         d.SetDirFilter("supplemental", "main")
48         d.SetSectionFilter("numbers", "numberingSystem")
49         data, err := d.DecodeZip(r)
50         if err != nil {
51                 log.Fatalf("DecodeZip: %v", err)
52         }
53
54         w := gen.NewCodeWriter()
55         defer w.WriteGoFile(*outputFile, pkg)
56
57         fmt.Fprintln(w, `import "golang.org/x/text/internal/stringset"`)
58
59         gen.WriteCLDRVersion(w)
60
61         genNumSystem(w, data)
62         genSymbols(w, data)
63         genFormats(w, data)
64 }
65
66 var systemMap = map[string]system{"latn": 0}
67
68 func getNumberSystem(str string) system {
69         ns, ok := systemMap[str]
70         if !ok {
71                 log.Fatalf("No index for numbering system %q", str)
72         }
73         return ns
74 }
75
76 func genNumSystem(w *gen.CodeWriter, data *cldr.CLDR) {
77         numSysData := []systemData{
78                 {digitSize: 1, zero: [4]byte{'0'}},
79         }
80
81         for _, ns := range data.Supplemental().NumberingSystems.NumberingSystem {
82                 if len(ns.Digits) == 0 {
83                         continue
84                 }
85                 switch ns.Id {
86                 case "latn":
87                         // hard-wired
88                         continue
89                 case "hanidec":
90                         // non-consecutive digits: treat as "algorithmic"
91                         continue
92                 }
93
94                 zero, sz := utf8.DecodeRuneInString(ns.Digits)
95                 if ns.Digits[sz-1]+9 > 0xBF { // 1011 1111: highest continuation byte
96                         log.Fatalf("Last byte of zero value overflows for %s", ns.Id)
97                 }
98
99                 i := rune(0)
100                 for _, r := range ns.Digits {
101                         // Verify that we can do simple math on the UTF-8 byte sequence
102                         // of zero to get the digit.
103                         if zero+i != r {
104                                 // Runes not consecutive.
105                                 log.Fatalf("Digit %d of %s (%U) is not offset correctly from zero value", i, ns.Id, r)
106                         }
107                         i++
108                 }
109                 var x [utf8.UTFMax]byte
110                 utf8.EncodeRune(x[:], zero)
111                 id := system(len(numSysData))
112                 systemMap[ns.Id] = id
113                 numSysData = append(numSysData, systemData{
114                         id:        id,
115                         digitSize: byte(sz),
116                         zero:      x,
117                 })
118         }
119         w.WriteVar("numSysData", numSysData)
120
121         algoID := system(len(numSysData))
122         fmt.Fprintln(w, "const (")
123         for _, ns := range data.Supplemental().NumberingSystems.NumberingSystem {
124                 id, ok := systemMap[ns.Id]
125                 if !ok {
126                         id = algoID
127                         systemMap[ns.Id] = id
128                         algoID++
129                 }
130                 fmt.Fprintf(w, "num%s = %#x\n", strings.Title(ns.Id), id)
131         }
132         fmt.Fprintln(w, "numNumberSystems")
133         fmt.Fprintln(w, ")")
134
135         fmt.Fprintln(w, "var systemMap = map[string]system{")
136         for _, ns := range data.Supplemental().NumberingSystems.NumberingSystem {
137                 fmt.Fprintf(w, "%q: num%s,\n", ns.Id, strings.Title(ns.Id))
138                 w.Size += len(ns.Id) + 16 + 1 // very coarse approximation
139         }
140         fmt.Fprintln(w, "}")
141 }
142
143 func genSymbols(w *gen.CodeWriter, data *cldr.CLDR) {
144         d, err := cldr.ParseDraft(*draft)
145         if err != nil {
146                 log.Fatalf("invalid draft level: %v", err)
147         }
148
149         nNumberSystems := system(len(systemMap))
150
151         type symbols [NumSymbolTypes]string
152
153         type key struct {
154                 tag    int // from language.CompactIndex
155                 system system
156         }
157         symbolMap := map[key]*symbols{}
158
159         defaults := map[int]system{}
160
161         for _, lang := range data.Locales() {
162                 ldml := data.RawLDML(lang)
163                 if ldml.Numbers == nil {
164                         continue
165                 }
166                 langIndex, ok := language.CompactIndex(language.MustParse(lang))
167                 if !ok {
168                         log.Fatalf("No compact index for language %s", lang)
169                 }
170                 if d := ldml.Numbers.DefaultNumberingSystem; len(d) > 0 {
171                         defaults[langIndex] = getNumberSystem(d[0].Data())
172                 }
173
174                 syms := cldr.MakeSlice(&ldml.Numbers.Symbols)
175                 syms.SelectDraft(d)
176
177                 getFirst := func(name string, x interface{}) string {
178                         v := reflect.ValueOf(x)
179                         slice := cldr.MakeSlice(x)
180                         slice.SelectAnyOf("alt", "", "alt")
181                         if reflect.Indirect(v).Len() == 0 {
182                                 return ""
183                         } else if reflect.Indirect(v).Len() > 1 {
184                                 log.Fatalf("%s: multiple values of %q within single symbol not supported.", lang, name)
185                         }
186                         return reflect.Indirect(v).Index(0).MethodByName("Data").Call(nil)[0].String()
187                 }
188
189                 for _, sym := range ldml.Numbers.Symbols {
190                         if sym.NumberSystem == "" {
191                                 // This is just linking the default of root to "latn".
192                                 continue
193                         }
194                         symbolMap[key{langIndex, getNumberSystem(sym.NumberSystem)}] = &symbols{
195                                 SymDecimal:                getFirst("decimal", &sym.Decimal),
196                                 SymGroup:                  getFirst("group", &sym.Group),
197                                 SymList:                   getFirst("list", &sym.List),
198                                 SymPercentSign:            getFirst("percentSign", &sym.PercentSign),
199                                 SymPlusSign:               getFirst("plusSign", &sym.PlusSign),
200                                 SymMinusSign:              getFirst("minusSign", &sym.MinusSign),
201                                 SymExponential:            getFirst("exponential", &sym.Exponential),
202                                 SymSuperscriptingExponent: getFirst("superscriptingExponent", &sym.SuperscriptingExponent),
203                                 SymPerMille:               getFirst("perMille", &sym.PerMille),
204                                 SymInfinity:               getFirst("infinity", &sym.Infinity),
205                                 SymNan:                    getFirst("nan", &sym.Nan),
206                                 SymTimeSeparator:          getFirst("timeSeparator", &sym.TimeSeparator),
207                         }
208                 }
209         }
210
211         // Expand all values.
212         for k, syms := range symbolMap {
213                 for t := SymDecimal; t < NumSymbolTypes; t++ {
214                         p := k.tag
215                         for syms[t] == "" {
216                                 p = int(internal.Parent[p])
217                                 if pSyms, ok := symbolMap[key{p, k.system}]; ok && (*pSyms)[t] != "" {
218                                         syms[t] = (*pSyms)[t]
219                                         break
220                                 }
221                                 if p == 0 /* und */ {
222                                         // Default to root, latn.
223                                         syms[t] = (*symbolMap[key{}])[t]
224                                 }
225                         }
226                 }
227         }
228
229         // Unique the symbol sets and write the string data.
230         m := map[symbols]int{}
231         sb := stringset.NewBuilder()
232
233         symIndex := [][NumSymbolTypes]byte{}
234
235         for ns := system(0); ns < nNumberSystems; ns++ {
236                 for _, l := range data.Locales() {
237                         langIndex, _ := language.CompactIndex(language.MustParse(l))
238                         s := symbolMap[key{langIndex, ns}]
239                         if s == nil {
240                                 continue
241                         }
242                         if _, ok := m[*s]; !ok {
243                                 m[*s] = len(symIndex)
244                                 sb.Add(s[:]...)
245                                 var x [NumSymbolTypes]byte
246                                 for i := SymDecimal; i < NumSymbolTypes; i++ {
247                                         x[i] = byte(sb.Index((*s)[i]))
248                                 }
249                                 symIndex = append(symIndex, x)
250                         }
251                 }
252         }
253         w.WriteVar("symIndex", symIndex)
254         w.WriteVar("symData", sb.Set())
255
256         // resolveSymbolIndex gets the index from the closest matching locale,
257         // including the locale itself.
258         resolveSymbolIndex := func(langIndex int, ns system) byte {
259                 for {
260                         if sym := symbolMap[key{langIndex, ns}]; sym != nil {
261                                 return byte(m[*sym])
262                         }
263                         if langIndex == 0 {
264                                 return 0 // und, latn
265                         }
266                         langIndex = int(internal.Parent[langIndex])
267                 }
268         }
269
270         // Create an index with the symbols for each locale for the latn numbering
271         // system. If this is not the default, or the only one, for a locale, we
272         // will overwrite the value later.
273         var langToDefaults [language.NumCompactTags]byte
274         for _, l := range data.Locales() {
275                 langIndex, _ := language.CompactIndex(language.MustParse(l))
276                 langToDefaults[langIndex] = resolveSymbolIndex(langIndex, 0)
277         }
278
279         // Delete redundant entries.
280         for _, l := range data.Locales() {
281                 langIndex, _ := language.CompactIndex(language.MustParse(l))
282                 def := defaults[langIndex]
283                 syms := symbolMap[key{langIndex, def}]
284                 if syms == nil {
285                         continue
286                 }
287                 for ns := system(0); ns < nNumberSystems; ns++ {
288                         if ns == def {
289                                 continue
290                         }
291                         if altSyms, ok := symbolMap[key{langIndex, ns}]; ok && *altSyms == *syms {
292                                 delete(symbolMap, key{langIndex, ns})
293                         }
294                 }
295         }
296
297         // Create a sorted list of alternatives per language. This will only need to
298         // be referenced if a user specified an alternative numbering system.
299         var langToAlt []altSymData
300         for _, l := range data.Locales() {
301                 langIndex, _ := language.CompactIndex(language.MustParse(l))
302                 start := len(langToAlt)
303                 if start > 0x7F {
304                         log.Fatal("Number of alternative assignments > 0x7F")
305                 }
306                 // Create the entry for the default value.
307                 def := defaults[langIndex]
308                 langToAlt = append(langToAlt, altSymData{
309                         compactTag: uint16(langIndex),
310                         system:     def,
311                         symIndex:   resolveSymbolIndex(langIndex, def),
312                 })
313
314                 for ns := system(0); ns < nNumberSystems; ns++ {
315                         if def == ns {
316                                 continue
317                         }
318                         if sym := symbolMap[key{langIndex, ns}]; sym != nil {
319                                 langToAlt = append(langToAlt, altSymData{
320                                         compactTag: uint16(langIndex),
321                                         system:     ns,
322                                         symIndex:   resolveSymbolIndex(langIndex, ns),
323                                 })
324                         }
325                 }
326                 if def == 0 && len(langToAlt) == start+1 {
327                         // No additional data: erase the entry.
328                         langToAlt = langToAlt[:start]
329                 } else {
330                         // Overwrite the entry in langToDefaults.
331                         langToDefaults[langIndex] = 0x80 | byte(start)
332                 }
333         }
334         w.WriteComment(`
335 langToDefaults maps a compact language index to the default numbering system
336 and default symbol set`)
337         w.WriteVar("langToDefaults", langToDefaults)
338
339         w.WriteComment(`
340 langToAlt is a list of numbering system and symbol set pairs, sorted and
341 marked by compact language index.`)
342         w.WriteVar("langToAlt", langToAlt)
343 }
344
345 // genFormats generates the lookup table for decimal, scientific and percent
346 // patterns.
347 //
348 // CLDR allows for patterns to be different per language for different numbering
349 // systems. In practice the patterns are set to be consistent for a language
350 // independent of the numbering system. genFormats verifies that no language
351 // deviates from this.
352 func genFormats(w *gen.CodeWriter, data *cldr.CLDR) {
353         d, err := cldr.ParseDraft(*draft)
354         if err != nil {
355                 log.Fatalf("invalid draft level: %v", err)
356         }
357
358         // Fill the first slot with a dummy so we can identify unspecified tags.
359         formats := []number.Pattern{{}}
360         patterns := map[string]int{}
361
362         // TODO: It would be possible to eliminate two of these slices by having
363         // another indirection and store a reference to the combination of patterns.
364         decimal := make([]byte, language.NumCompactTags)
365         scientific := make([]byte, language.NumCompactTags)
366         percent := make([]byte, language.NumCompactTags)
367
368         for _, lang := range data.Locales() {
369                 ldml := data.RawLDML(lang)
370                 if ldml.Numbers == nil {
371                         continue
372                 }
373                 langIndex, ok := language.CompactIndex(language.MustParse(lang))
374                 if !ok {
375                         log.Fatalf("No compact index for language %s", lang)
376                 }
377                 type patternSlice []*struct {
378                         cldr.Common
379                         Numbers string `xml:"numbers,attr"`
380                         Count   string `xml:"count,attr"`
381                 }
382
383                 add := func(name string, tags []byte, ps patternSlice) {
384                         sl := cldr.MakeSlice(&ps)
385                         sl.SelectDraft(d)
386                         if len(ps) == 0 {
387                                 return
388                         }
389                         if len(ps) > 2 || len(ps) == 2 && ps[0] != ps[1] {
390                                 log.Fatalf("Inconsistent %d patterns for language %s", name, lang)
391                         }
392                         s := ps[0].Data()
393
394                         index, ok := patterns[s]
395                         if !ok {
396                                 nf, err := number.ParsePattern(s)
397                                 if err != nil {
398                                         log.Fatal(err)
399                                 }
400                                 index = len(formats)
401                                 patterns[s] = index
402                                 formats = append(formats, *nf)
403                         }
404                         tags[langIndex] = byte(index)
405                 }
406
407                 for _, df := range ldml.Numbers.DecimalFormats {
408                         for _, l := range df.DecimalFormatLength {
409                                 if l.Type != "" {
410                                         continue
411                                 }
412                                 for _, f := range l.DecimalFormat {
413                                         add("decimal", decimal, f.Pattern)
414                                 }
415                         }
416                 }
417                 for _, df := range ldml.Numbers.ScientificFormats {
418                         for _, l := range df.ScientificFormatLength {
419                                 if l.Type != "" {
420                                         continue
421                                 }
422                                 for _, f := range l.ScientificFormat {
423                                         add("scientific", scientific, f.Pattern)
424                                 }
425                         }
426                 }
427                 for _, df := range ldml.Numbers.PercentFormats {
428                         for _, l := range df.PercentFormatLength {
429                                 if l.Type != "" {
430                                         continue
431                                 }
432                                 for _, f := range l.PercentFormat {
433                                         add("percent", percent, f.Pattern)
434                                 }
435                         }
436                 }
437         }
438
439         // Complete the parent tag array to reflect inheritance. An index of 0
440         // indicates an unspecified value.
441         for _, data := range [][]byte{decimal, scientific, percent} {
442                 for i := range data {
443                         p := uint16(i)
444                         for ; data[p] == 0; p = internal.Parent[p] {
445                         }
446                         data[i] = data[p]
447                 }
448         }
449         w.WriteVar("tagToDecimal", decimal)
450         w.WriteVar("tagToScientific", scientific)
451         w.WriteVar("tagToPercent", percent)
452
453         value := strings.Replace(fmt.Sprintf("%#v", formats), "number.", "", -1)
454         // Break up the lines. This won't give ideal perfect formatting, but it is
455         // better than one huge line.
456         value = strings.Replace(value, ", ", ",\n", -1)
457         fmt.Fprintf(w, "var formats = %s\n", value)
458 }