OSDN Git Service

new repo
[bytom/vapor.git] / vendor / golang.org / x / text / encoding / htmlindex / gen.go
1 // Copyright 2015 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 // +build ignore
6
7 package main
8
9 import (
10         "bytes"
11         "encoding/json"
12         "fmt"
13         "log"
14         "strings"
15
16         "golang.org/x/text/internal/gen"
17 )
18
19 type group struct {
20         Encodings []struct {
21                 Labels []string
22                 Name   string
23         }
24 }
25
26 func main() {
27         gen.Init()
28
29         r := gen.Open("https://encoding.spec.whatwg.org", "whatwg", "encodings.json")
30         var groups []group
31         if err := json.NewDecoder(r).Decode(&groups); err != nil {
32                 log.Fatalf("Error reading encodings.json: %v", err)
33         }
34
35         w := &bytes.Buffer{}
36         fmt.Fprintln(w, "type htmlEncoding byte")
37         fmt.Fprintln(w, "const (")
38         for i, g := range groups {
39                 for _, e := range g.Encodings {
40                         key := strings.ToLower(e.Name)
41                         name := consts[key]
42                         if name == "" {
43                                 log.Fatalf("No const defined for %s.", key)
44                         }
45                         if i == 0 {
46                                 fmt.Fprintf(w, "%s htmlEncoding = iota\n", name)
47                         } else {
48                                 fmt.Fprintf(w, "%s\n", name)
49                         }
50                 }
51         }
52         fmt.Fprintln(w, "numEncodings")
53         fmt.Fprint(w, ")\n\n")
54
55         fmt.Fprintln(w, "var canonical = [numEncodings]string{")
56         for _, g := range groups {
57                 for _, e := range g.Encodings {
58                         fmt.Fprintf(w, "%q,\n", strings.ToLower(e.Name))
59                 }
60         }
61         fmt.Fprint(w, "}\n\n")
62
63         fmt.Fprintln(w, "var nameMap = map[string]htmlEncoding{")
64         for _, g := range groups {
65                 for _, e := range g.Encodings {
66                         for _, l := range e.Labels {
67                                 key := strings.ToLower(e.Name)
68                                 name := consts[key]
69                                 fmt.Fprintf(w, "%q: %s,\n", l, name)
70                         }
71                 }
72         }
73         fmt.Fprint(w, "}\n\n")
74
75         var tags []string
76         fmt.Fprintln(w, "var localeMap = []htmlEncoding{")
77         for _, loc := range locales {
78                 tags = append(tags, loc.tag)
79                 fmt.Fprintf(w, "%s, // %s \n", consts[loc.name], loc.tag)
80         }
81         fmt.Fprint(w, "}\n\n")
82
83         fmt.Fprintf(w, "const locales = %q\n", strings.Join(tags, " "))
84
85         gen.WriteGoFile("tables.go", "htmlindex", w.Bytes())
86 }
87
88 // consts maps canonical encoding name to internal constant.
89 var consts = map[string]string{
90         "utf-8":          "utf8",
91         "ibm866":         "ibm866",
92         "iso-8859-2":     "iso8859_2",
93         "iso-8859-3":     "iso8859_3",
94         "iso-8859-4":     "iso8859_4",
95         "iso-8859-5":     "iso8859_5",
96         "iso-8859-6":     "iso8859_6",
97         "iso-8859-7":     "iso8859_7",
98         "iso-8859-8":     "iso8859_8",
99         "iso-8859-8-i":   "iso8859_8I",
100         "iso-8859-10":    "iso8859_10",
101         "iso-8859-13":    "iso8859_13",
102         "iso-8859-14":    "iso8859_14",
103         "iso-8859-15":    "iso8859_15",
104         "iso-8859-16":    "iso8859_16",
105         "koi8-r":         "koi8r",
106         "koi8-u":         "koi8u",
107         "macintosh":      "macintosh",
108         "windows-874":    "windows874",
109         "windows-1250":   "windows1250",
110         "windows-1251":   "windows1251",
111         "windows-1252":   "windows1252",
112         "windows-1253":   "windows1253",
113         "windows-1254":   "windows1254",
114         "windows-1255":   "windows1255",
115         "windows-1256":   "windows1256",
116         "windows-1257":   "windows1257",
117         "windows-1258":   "windows1258",
118         "x-mac-cyrillic": "macintoshCyrillic",
119         "gbk":            "gbk",
120         "gb18030":        "gb18030",
121         // "hz-gb-2312":     "hzgb2312", // Was removed from WhatWG
122         "big5":           "big5",
123         "euc-jp":         "eucjp",
124         "iso-2022-jp":    "iso2022jp",
125         "shift_jis":      "shiftJIS",
126         "euc-kr":         "euckr",
127         "replacement":    "replacement",
128         "utf-16be":       "utf16be",
129         "utf-16le":       "utf16le",
130         "x-user-defined": "xUserDefined",
131 }
132
133 // locales is taken from
134 // https://html.spec.whatwg.org/multipage/syntax.html#encoding-sniffing-algorithm.
135 var locales = []struct{ tag, name string }{
136         // The default value. Explicitly state latin to benefit from the exact
137         // script option, while still making 1252 the default encoding for languages
138         // written in Latin script.
139         {"und_Latn", "windows-1252"},
140         {"ar", "windows-1256"},
141         {"ba", "windows-1251"},
142         {"be", "windows-1251"},
143         {"bg", "windows-1251"},
144         {"cs", "windows-1250"},
145         {"el", "iso-8859-7"},
146         {"et", "windows-1257"},
147         {"fa", "windows-1256"},
148         {"he", "windows-1255"},
149         {"hr", "windows-1250"},
150         {"hu", "iso-8859-2"},
151         {"ja", "shift_jis"},
152         {"kk", "windows-1251"},
153         {"ko", "euc-kr"},
154         {"ku", "windows-1254"},
155         {"ky", "windows-1251"},
156         {"lt", "windows-1257"},
157         {"lv", "windows-1257"},
158         {"mk", "windows-1251"},
159         {"pl", "iso-8859-2"},
160         {"ru", "windows-1251"},
161         {"sah", "windows-1251"},
162         {"sk", "windows-1250"},
163         {"sl", "iso-8859-2"},
164         {"sr", "windows-1251"},
165         {"tg", "windows-1251"},
166         {"th", "windows-874"},
167         {"tr", "windows-1254"},
168         {"tt", "windows-1251"},
169         {"uk", "windows-1251"},
170         {"vi", "windows-1258"},
171         {"zh-hans", "gb18030"},
172         {"zh-hant", "big5"},
173 }