OSDN Git Service

new repo
[bytom/vapor.git] / vendor / golang.org / x / text / unicode / cldr / makexml.go
1 // Copyright 2013 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 // +build ignore
6
7 // This tool generates types for the various XML formats of CLDR.
8 package main
9
10 import (
11         "archive/zip"
12         "bytes"
13         "encoding/xml"
14         "flag"
15         "fmt"
16         "io"
17         "io/ioutil"
18         "log"
19         "os"
20         "regexp"
21         "strings"
22
23         "golang.org/x/text/internal/gen"
24 )
25
26 var outputFile = flag.String("output", "xml.go", "output file name")
27
28 func main() {
29         flag.Parse()
30
31         r := gen.OpenCLDRCoreZip()
32         buffer, err := ioutil.ReadAll(r)
33         if err != nil {
34                 log.Fatal("Could not read zip file")
35         }
36         r.Close()
37         z, err := zip.NewReader(bytes.NewReader(buffer), int64(len(buffer)))
38         if err != nil {
39                 log.Fatalf("Could not read zip archive: %v", err)
40         }
41
42         var buf bytes.Buffer
43
44         version := gen.CLDRVersion()
45
46         for _, dtd := range files {
47                 for _, f := range z.File {
48                         if strings.HasSuffix(f.Name, dtd.file+".dtd") {
49                                 r, err := f.Open()
50                                 failOnError(err)
51
52                                 b := makeBuilder(&buf, dtd)
53                                 b.parseDTD(r)
54                                 b.resolve(b.index[dtd.top[0]])
55                                 b.write()
56                                 if b.version != "" && version != b.version {
57                                         println(f.Name)
58                                         log.Fatalf("main: inconsistent versions: found %s; want %s", b.version, version)
59                                 }
60                                 break
61                         }
62                 }
63         }
64         fmt.Fprintln(&buf, "// Version is the version of CLDR from which the XML definitions are generated.")
65         fmt.Fprintf(&buf, "const Version = %q\n", version)
66
67         gen.WriteGoFile(*outputFile, "cldr", buf.Bytes())
68 }
69
70 func failOnError(err error) {
71         if err != nil {
72                 log.New(os.Stderr, "", log.Lshortfile).Output(2, err.Error())
73                 os.Exit(1)
74         }
75 }
76
77 // configuration data per DTD type
78 type dtd struct {
79         file string   // base file name
80         root string   // Go name of the root XML element
81         top  []string // create a different type for this section
82
83         skipElem    []string // hard-coded or deprecated elements
84         skipAttr    []string // attributes to exclude
85         predefined  []string // hard-coded elements exist of the form <name>Elem
86         forceRepeat []string // elements to make slices despite DTD
87 }
88
89 var files = []dtd{
90         {
91                 file: "ldmlBCP47",
92                 root: "LDMLBCP47",
93                 top:  []string{"ldmlBCP47"},
94                 skipElem: []string{
95                         "cldrVersion", // deprecated, not used
96                 },
97         },
98         {
99                 file: "ldmlSupplemental",
100                 root: "SupplementalData",
101                 top:  []string{"supplementalData"},
102                 skipElem: []string{
103                         "cldrVersion", // deprecated, not used
104                 },
105                 forceRepeat: []string{
106                         "plurals", // data defined in plurals.xml and ordinals.xml
107                 },
108         },
109         {
110                 file: "ldml",
111                 root: "LDML",
112                 top: []string{
113                         "ldml", "collation", "calendar", "timeZoneNames", "localeDisplayNames", "numbers",
114                 },
115                 skipElem: []string{
116                         "cp",       // not used anywhere
117                         "special",  // not used anywhere
118                         "fallback", // deprecated, not used
119                         "alias",    // in Common
120                         "default",  // in Common
121                 },
122                 skipAttr: []string{
123                         "hiraganaQuarternary", // typo in DTD, correct version included as well
124                 },
125                 predefined: []string{"rules"},
126         },
127 }
128
129 var comments = map[string]string{
130         "ldmlBCP47": `
131 // LDMLBCP47 holds information on allowable values for various variables in LDML.
132 `,
133         "supplementalData": `
134 // SupplementalData holds information relevant for internationalization
135 // and proper use of CLDR, but that is not contained in the locale hierarchy.
136 `,
137         "ldml": `
138 // LDML is the top-level type for locale-specific data.
139 `,
140         "collation": `
141 // Collation contains rules that specify a certain sort-order,
142 // as a tailoring of the root order. 
143 // The parsed rules are obtained by passing a RuleProcessor to Collation's
144 // Process method.
145 `,
146         "calendar": `
147 // Calendar specifies the fields used for formatting and parsing dates and times.
148 // The month and quarter names are identified numerically, starting at 1.
149 // The day (of the week) names are identified with short strings, since there is
150 // no universally-accepted numeric designation.
151 `,
152         "dates": `
153 // Dates contains information regarding the format and parsing of dates and times.
154 `,
155         "localeDisplayNames": `
156 // LocaleDisplayNames specifies localized display names for for scripts, languages,
157 // countries, currencies, and variants.
158 `,
159         "numbers": `
160 // Numbers supplies information for formatting and parsing numbers and currencies.
161 `,
162 }
163
164 type element struct {
165         name      string // XML element name
166         category  string // elements contained by this element
167         signature string // category + attrKey*
168
169         attr []*attribute // attributes supported by this element.
170         sub  []struct {   // parsed and evaluated sub elements of this element.
171                 e      *element
172                 repeat bool // true if the element needs to be a slice
173         }
174
175         resolved bool // prevent multiple resolutions of this element.
176 }
177
178 type attribute struct {
179         name string
180         key  string
181         list []string
182
183         tag string // Go tag
184 }
185
186 var (
187         reHead  = regexp.MustCompile(` *(\w+) +([\w\-]+)`)
188         reAttr  = regexp.MustCompile(` *(\w+) *(?:(\w+)|\(([\w\- \|]+)\)) *(?:#([A-Z]*) *(?:\"([\.\d+])\")?)? *("[\w\-:]*")?`)
189         reElem  = regexp.MustCompile(`^ *(EMPTY|ANY|\(.*\)[\*\+\?]?) *$`)
190         reToken = regexp.MustCompile(`\w\-`)
191 )
192
193 // builder is used to read in the DTD files from CLDR and generate Go code
194 // to be used with the encoding/xml package.
195 type builder struct {
196         w       io.Writer
197         index   map[string]*element
198         elem    []*element
199         info    dtd
200         version string
201 }
202
203 func makeBuilder(w io.Writer, d dtd) builder {
204         return builder{
205                 w:     w,
206                 index: make(map[string]*element),
207                 elem:  []*element{},
208                 info:  d,
209         }
210 }
211
212 // parseDTD parses a DTD file.
213 func (b *builder) parseDTD(r io.Reader) {
214         for d := xml.NewDecoder(r); ; {
215                 t, err := d.Token()
216                 if t == nil {
217                         break
218                 }
219                 failOnError(err)
220                 dir, ok := t.(xml.Directive)
221                 if !ok {
222                         continue
223                 }
224                 m := reHead.FindSubmatch(dir)
225                 dir = dir[len(m[0]):]
226                 ename := string(m[2])
227                 el, elementFound := b.index[ename]
228                 switch string(m[1]) {
229                 case "ELEMENT":
230                         if elementFound {
231                                 log.Fatal("parseDTD: duplicate entry for element %q", ename)
232                         }
233                         m := reElem.FindSubmatch(dir)
234                         if m == nil {
235                                 log.Fatalf("parseDTD: invalid element %q", string(dir))
236                         }
237                         if len(m[0]) != len(dir) {
238                                 log.Fatal("parseDTD: invalid element %q", string(dir), len(dir), len(m[0]), string(m[0]))
239                         }
240                         s := string(m[1])
241                         el = &element{
242                                 name:     ename,
243                                 category: s,
244                         }
245                         b.index[ename] = el
246                 case "ATTLIST":
247                         if !elementFound {
248                                 log.Fatalf("parseDTD: unknown element %q", ename)
249                         }
250                         s := string(dir)
251                         m := reAttr.FindStringSubmatch(s)
252                         if m == nil {
253                                 log.Fatal(fmt.Errorf("parseDTD: invalid attribute %q", string(dir)))
254                         }
255                         if m[4] == "FIXED" {
256                                 b.version = m[5]
257                         } else {
258                                 switch m[1] {
259                                 case "draft", "references", "alt", "validSubLocales", "standard" /* in Common */ :
260                                 case "type", "choice":
261                                 default:
262                                         el.attr = append(el.attr, &attribute{
263                                                 name: m[1],
264                                                 key:  s,
265                                                 list: reToken.FindAllString(m[3], -1),
266                                         })
267                                         el.signature = fmt.Sprintf("%s=%s+%s", el.signature, m[1], m[2])
268                                 }
269                         }
270                 }
271         }
272 }
273
274 var reCat = regexp.MustCompile(`[ ,\|]*(?:(\(|\)|\#?[\w_-]+)([\*\+\?]?))?`)
275
276 // resolve takes a parsed element and converts it into structured data
277 // that can be used to generate the XML code.
278 func (b *builder) resolve(e *element) {
279         if e.resolved {
280                 return
281         }
282         b.elem = append(b.elem, e)
283         e.resolved = true
284         s := e.category
285         found := make(map[string]bool)
286         sequenceStart := []int{}
287         for len(s) > 0 {
288                 m := reCat.FindStringSubmatch(s)
289                 if m == nil {
290                         log.Fatalf("%s: invalid category string %q", e.name, s)
291                 }
292                 repeat := m[2] == "*" || m[2] == "+" || in(b.info.forceRepeat, m[1])
293                 switch m[1] {
294                 case "":
295                 case "(":
296                         sequenceStart = append(sequenceStart, len(e.sub))
297                 case ")":
298                         if len(sequenceStart) == 0 {
299                                 log.Fatalf("%s: unmatched closing parenthesis", e.name)
300                         }
301                         for i := sequenceStart[len(sequenceStart)-1]; i < len(e.sub); i++ {
302                                 e.sub[i].repeat = e.sub[i].repeat || repeat
303                         }
304                         sequenceStart = sequenceStart[:len(sequenceStart)-1]
305                 default:
306                         if in(b.info.skipElem, m[1]) {
307                         } else if sub, ok := b.index[m[1]]; ok {
308                                 if !found[sub.name] {
309                                         e.sub = append(e.sub, struct {
310                                                 e      *element
311                                                 repeat bool
312                                         }{sub, repeat})
313                                         found[sub.name] = true
314                                         b.resolve(sub)
315                                 }
316                         } else if m[1] == "#PCDATA" || m[1] == "ANY" {
317                         } else if m[1] != "EMPTY" {
318                                 log.Fatalf("resolve:%s: element %q not found", e.name, m[1])
319                         }
320                 }
321                 s = s[len(m[0]):]
322         }
323 }
324
325 // return true if s is contained in set.
326 func in(set []string, s string) bool {
327         for _, v := range set {
328                 if v == s {
329                         return true
330                 }
331         }
332         return false
333 }
334
335 var repl = strings.NewReplacer("-", " ", "_", " ")
336
337 // title puts the first character or each character following '_' in title case and
338 // removes all occurrences of '_'.
339 func title(s string) string {
340         return strings.Replace(strings.Title(repl.Replace(s)), " ", "", -1)
341 }
342
343 // writeElem generates Go code for a single element, recursively.
344 func (b *builder) writeElem(tab int, e *element) {
345         p := func(f string, x ...interface{}) {
346                 f = strings.Replace(f, "\n", "\n"+strings.Repeat("\t", tab), -1)
347                 fmt.Fprintf(b.w, f, x...)
348         }
349         if len(e.sub) == 0 && len(e.attr) == 0 {
350                 p("Common")
351                 return
352         }
353         p("struct {")
354         tab++
355         p("\nCommon")
356         for _, attr := range e.attr {
357                 if !in(b.info.skipAttr, attr.name) {
358                         p("\n%s string `xml:\"%s,attr\"`", title(attr.name), attr.name)
359                 }
360         }
361         for _, sub := range e.sub {
362                 if in(b.info.predefined, sub.e.name) {
363                         p("\n%sElem", sub.e.name)
364                         continue
365                 }
366                 if in(b.info.skipElem, sub.e.name) {
367                         continue
368                 }
369                 p("\n%s ", title(sub.e.name))
370                 if sub.repeat {
371                         p("[]")
372                 }
373                 p("*")
374                 if in(b.info.top, sub.e.name) {
375                         p(title(sub.e.name))
376                 } else {
377                         b.writeElem(tab, sub.e)
378                 }
379                 p(" `xml:\"%s\"`", sub.e.name)
380         }
381         tab--
382         p("\n}")
383 }
384
385 // write generates the Go XML code.
386 func (b *builder) write() {
387         for i, name := range b.info.top {
388                 e := b.index[name]
389                 if e != nil {
390                         fmt.Fprintf(b.w, comments[name])
391                         name := title(e.name)
392                         if i == 0 {
393                                 name = b.info.root
394                         }
395                         fmt.Fprintf(b.w, "type %s ", name)
396                         b.writeElem(0, e)
397                         fmt.Fprint(b.w, "\n")
398                 }
399         }
400 }