OSDN Git Service

Hulk did something
[bytom/vapor.git] / vendor / golang.org / x / text / encoding / htmlindex / gen.go
diff --git a/vendor/golang.org/x/text/encoding/htmlindex/gen.go b/vendor/golang.org/x/text/encoding/htmlindex/gen.go
new file mode 100644 (file)
index 0000000..ac6b4a7
--- /dev/null
@@ -0,0 +1,173 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build ignore
+
+package main
+
+import (
+       "bytes"
+       "encoding/json"
+       "fmt"
+       "log"
+       "strings"
+
+       "golang.org/x/text/internal/gen"
+)
+
+type group struct {
+       Encodings []struct {
+               Labels []string
+               Name   string
+       }
+}
+
+func main() {
+       gen.Init()
+
+       r := gen.Open("https://encoding.spec.whatwg.org", "whatwg", "encodings.json")
+       var groups []group
+       if err := json.NewDecoder(r).Decode(&groups); err != nil {
+               log.Fatalf("Error reading encodings.json: %v", err)
+       }
+
+       w := &bytes.Buffer{}
+       fmt.Fprintln(w, "type htmlEncoding byte")
+       fmt.Fprintln(w, "const (")
+       for i, g := range groups {
+               for _, e := range g.Encodings {
+                       key := strings.ToLower(e.Name)
+                       name := consts[key]
+                       if name == "" {
+                               log.Fatalf("No const defined for %s.", key)
+                       }
+                       if i == 0 {
+                               fmt.Fprintf(w, "%s htmlEncoding = iota\n", name)
+                       } else {
+                               fmt.Fprintf(w, "%s\n", name)
+                       }
+               }
+       }
+       fmt.Fprintln(w, "numEncodings")
+       fmt.Fprint(w, ")\n\n")
+
+       fmt.Fprintln(w, "var canonical = [numEncodings]string{")
+       for _, g := range groups {
+               for _, e := range g.Encodings {
+                       fmt.Fprintf(w, "%q,\n", strings.ToLower(e.Name))
+               }
+       }
+       fmt.Fprint(w, "}\n\n")
+
+       fmt.Fprintln(w, "var nameMap = map[string]htmlEncoding{")
+       for _, g := range groups {
+               for _, e := range g.Encodings {
+                       for _, l := range e.Labels {
+                               key := strings.ToLower(e.Name)
+                               name := consts[key]
+                               fmt.Fprintf(w, "%q: %s,\n", l, name)
+                       }
+               }
+       }
+       fmt.Fprint(w, "}\n\n")
+
+       var tags []string
+       fmt.Fprintln(w, "var localeMap = []htmlEncoding{")
+       for _, loc := range locales {
+               tags = append(tags, loc.tag)
+               fmt.Fprintf(w, "%s, // %s \n", consts[loc.name], loc.tag)
+       }
+       fmt.Fprint(w, "}\n\n")
+
+       fmt.Fprintf(w, "const locales = %q\n", strings.Join(tags, " "))
+
+       gen.WriteGoFile("tables.go", "htmlindex", w.Bytes())
+}
+
+// consts maps canonical encoding name to internal constant.
+var consts = map[string]string{
+       "utf-8":          "utf8",
+       "ibm866":         "ibm866",
+       "iso-8859-2":     "iso8859_2",
+       "iso-8859-3":     "iso8859_3",
+       "iso-8859-4":     "iso8859_4",
+       "iso-8859-5":     "iso8859_5",
+       "iso-8859-6":     "iso8859_6",
+       "iso-8859-7":     "iso8859_7",
+       "iso-8859-8":     "iso8859_8",
+       "iso-8859-8-i":   "iso8859_8I",
+       "iso-8859-10":    "iso8859_10",
+       "iso-8859-13":    "iso8859_13",
+       "iso-8859-14":    "iso8859_14",
+       "iso-8859-15":    "iso8859_15",
+       "iso-8859-16":    "iso8859_16",
+       "koi8-r":         "koi8r",
+       "koi8-u":         "koi8u",
+       "macintosh":      "macintosh",
+       "windows-874":    "windows874",
+       "windows-1250":   "windows1250",
+       "windows-1251":   "windows1251",
+       "windows-1252":   "windows1252",
+       "windows-1253":   "windows1253",
+       "windows-1254":   "windows1254",
+       "windows-1255":   "windows1255",
+       "windows-1256":   "windows1256",
+       "windows-1257":   "windows1257",
+       "windows-1258":   "windows1258",
+       "x-mac-cyrillic": "macintoshCyrillic",
+       "gbk":            "gbk",
+       "gb18030":        "gb18030",
+       // "hz-gb-2312":     "hzgb2312", // Was removed from WhatWG
+       "big5":           "big5",
+       "euc-jp":         "eucjp",
+       "iso-2022-jp":    "iso2022jp",
+       "shift_jis":      "shiftJIS",
+       "euc-kr":         "euckr",
+       "replacement":    "replacement",
+       "utf-16be":       "utf16be",
+       "utf-16le":       "utf16le",
+       "x-user-defined": "xUserDefined",
+}
+
+// locales is taken from
+// https://html.spec.whatwg.org/multipage/syntax.html#encoding-sniffing-algorithm.
+var locales = []struct{ tag, name string }{
+       // The default value. Explicitly state latin to benefit from the exact
+       // script option, while still making 1252 the default encoding for languages
+       // written in Latin script.
+       {"und_Latn", "windows-1252"},
+       {"ar", "windows-1256"},
+       {"ba", "windows-1251"},
+       {"be", "windows-1251"},
+       {"bg", "windows-1251"},
+       {"cs", "windows-1250"},
+       {"el", "iso-8859-7"},
+       {"et", "windows-1257"},
+       {"fa", "windows-1256"},
+       {"he", "windows-1255"},
+       {"hr", "windows-1250"},
+       {"hu", "iso-8859-2"},
+       {"ja", "shift_jis"},
+       {"kk", "windows-1251"},
+       {"ko", "euc-kr"},
+       {"ku", "windows-1254"},
+       {"ky", "windows-1251"},
+       {"lt", "windows-1257"},
+       {"lv", "windows-1257"},
+       {"mk", "windows-1251"},
+       {"pl", "iso-8859-2"},
+       {"ru", "windows-1251"},
+       {"sah", "windows-1251"},
+       {"sk", "windows-1250"},
+       {"sl", "iso-8859-2"},
+       {"sr", "windows-1251"},
+       {"tg", "windows-1251"},
+       {"th", "windows-874"},
+       {"tr", "windows-1254"},
+       {"tt", "windows-1251"},
+       {"uk", "windows-1251"},
+       {"vi", "windows-1258"},
+       {"zh-hans", "gb18030"},
+       {"zh-hant", "big5"},
+}