OSDN Git Service

new repo
[bytom/vapor.git] / vendor / golang.org / x / net / html / atom / gen.go
1 // Copyright 2012 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 // +build ignore
6
7 package main
8
9 // This program generates table.go and table_test.go.
10 // Invoke as
11 //
12 //      go run gen.go |gofmt >table.go
13 //      go run gen.go -test |gofmt >table_test.go
14
15 import (
16         "flag"
17         "fmt"
18         "math/rand"
19         "os"
20         "sort"
21         "strings"
22 )
23
24 // identifier converts s to a Go exported identifier.
25 // It converts "div" to "Div" and "accept-charset" to "AcceptCharset".
26 func identifier(s string) string {
27         b := make([]byte, 0, len(s))
28         cap := true
29         for _, c := range s {
30                 if c == '-' {
31                         cap = true
32                         continue
33                 }
34                 if cap && 'a' <= c && c <= 'z' {
35                         c -= 'a' - 'A'
36                 }
37                 cap = false
38                 b = append(b, byte(c))
39         }
40         return string(b)
41 }
42
43 var test = flag.Bool("test", false, "generate table_test.go")
44
45 func main() {
46         flag.Parse()
47
48         var all []string
49         all = append(all, elements...)
50         all = append(all, attributes...)
51         all = append(all, eventHandlers...)
52         all = append(all, extra...)
53         sort.Strings(all)
54
55         if *test {
56                 fmt.Printf("// generated by go run gen.go -test; DO NOT EDIT\n\n")
57                 fmt.Printf("package atom\n\n")
58                 fmt.Printf("var testAtomList = []string{\n")
59                 for _, s := range all {
60                         fmt.Printf("\t%q,\n", s)
61                 }
62                 fmt.Printf("}\n")
63                 return
64         }
65
66         // uniq - lists have dups
67         // compute max len too
68         maxLen := 0
69         w := 0
70         for _, s := range all {
71                 if w == 0 || all[w-1] != s {
72                         if maxLen < len(s) {
73                                 maxLen = len(s)
74                         }
75                         all[w] = s
76                         w++
77                 }
78         }
79         all = all[:w]
80
81         // Find hash that minimizes table size.
82         var best *table
83         for i := 0; i < 1000000; i++ {
84                 if best != nil && 1<<(best.k-1) < len(all) {
85                         break
86                 }
87                 h := rand.Uint32()
88                 for k := uint(0); k <= 16; k++ {
89                         if best != nil && k >= best.k {
90                                 break
91                         }
92                         var t table
93                         if t.init(h, k, all) {
94                                 best = &t
95                                 break
96                         }
97                 }
98         }
99         if best == nil {
100                 fmt.Fprintf(os.Stderr, "failed to construct string table\n")
101                 os.Exit(1)
102         }
103
104         // Lay out strings, using overlaps when possible.
105         layout := append([]string{}, all...)
106
107         // Remove strings that are substrings of other strings
108         for changed := true; changed; {
109                 changed = false
110                 for i, s := range layout {
111                         if s == "" {
112                                 continue
113                         }
114                         for j, t := range layout {
115                                 if i != j && t != "" && strings.Contains(s, t) {
116                                         changed = true
117                                         layout[j] = ""
118                                 }
119                         }
120                 }
121         }
122
123         // Join strings where one suffix matches another prefix.
124         for {
125                 // Find best i, j, k such that layout[i][len-k:] == layout[j][:k],
126                 // maximizing overlap length k.
127                 besti := -1
128                 bestj := -1
129                 bestk := 0
130                 for i, s := range layout {
131                         if s == "" {
132                                 continue
133                         }
134                         for j, t := range layout {
135                                 if i == j {
136                                         continue
137                                 }
138                                 for k := bestk + 1; k <= len(s) && k <= len(t); k++ {
139                                         if s[len(s)-k:] == t[:k] {
140                                                 besti = i
141                                                 bestj = j
142                                                 bestk = k
143                                         }
144                                 }
145                         }
146                 }
147                 if bestk > 0 {
148                         layout[besti] += layout[bestj][bestk:]
149                         layout[bestj] = ""
150                         continue
151                 }
152                 break
153         }
154
155         text := strings.Join(layout, "")
156
157         atom := map[string]uint32{}
158         for _, s := range all {
159                 off := strings.Index(text, s)
160                 if off < 0 {
161                         panic("lost string " + s)
162                 }
163                 atom[s] = uint32(off<<8 | len(s))
164         }
165
166         // Generate the Go code.
167         fmt.Printf("// generated by go run gen.go; DO NOT EDIT\n\n")
168         fmt.Printf("package atom\n\nconst (\n")
169         for _, s := range all {
170                 fmt.Printf("\t%s Atom = %#x\n", identifier(s), atom[s])
171         }
172         fmt.Printf(")\n\n")
173
174         fmt.Printf("const hash0 = %#x\n\n", best.h0)
175         fmt.Printf("const maxAtomLen = %d\n\n", maxLen)
176
177         fmt.Printf("var table = [1<<%d]Atom{\n", best.k)
178         for i, s := range best.tab {
179                 if s == "" {
180                         continue
181                 }
182                 fmt.Printf("\t%#x: %#x, // %s\n", i, atom[s], s)
183         }
184         fmt.Printf("}\n")
185         datasize := (1 << best.k) * 4
186
187         fmt.Printf("const atomText =\n")
188         textsize := len(text)
189         for len(text) > 60 {
190                 fmt.Printf("\t%q +\n", text[:60])
191                 text = text[60:]
192         }
193         fmt.Printf("\t%q\n\n", text)
194
195         fmt.Fprintf(os.Stderr, "%d atoms; %d string bytes + %d tables = %d total data\n", len(all), textsize, datasize, textsize+datasize)
196 }
197
198 type byLen []string
199
200 func (x byLen) Less(i, j int) bool { return len(x[i]) > len(x[j]) }
201 func (x byLen) Swap(i, j int)      { x[i], x[j] = x[j], x[i] }
202 func (x byLen) Len() int           { return len(x) }
203
204 // fnv computes the FNV hash with an arbitrary starting value h.
205 func fnv(h uint32, s string) uint32 {
206         for i := 0; i < len(s); i++ {
207                 h ^= uint32(s[i])
208                 h *= 16777619
209         }
210         return h
211 }
212
213 // A table represents an attempt at constructing the lookup table.
214 // The lookup table uses cuckoo hashing, meaning that each string
215 // can be found in one of two positions.
216 type table struct {
217         h0   uint32
218         k    uint
219         mask uint32
220         tab  []string
221 }
222
223 // hash returns the two hashes for s.
224 func (t *table) hash(s string) (h1, h2 uint32) {
225         h := fnv(t.h0, s)
226         h1 = h & t.mask
227         h2 = (h >> 16) & t.mask
228         return
229 }
230
231 // init initializes the table with the given parameters.
232 // h0 is the initial hash value,
233 // k is the number of bits of hash value to use, and
234 // x is the list of strings to store in the table.
235 // init returns false if the table cannot be constructed.
236 func (t *table) init(h0 uint32, k uint, x []string) bool {
237         t.h0 = h0
238         t.k = k
239         t.tab = make([]string, 1<<k)
240         t.mask = 1<<k - 1
241         for _, s := range x {
242                 if !t.insert(s) {
243                         return false
244                 }
245         }
246         return true
247 }
248
249 // insert inserts s in the table.
250 func (t *table) insert(s string) bool {
251         h1, h2 := t.hash(s)
252         if t.tab[h1] == "" {
253                 t.tab[h1] = s
254                 return true
255         }
256         if t.tab[h2] == "" {
257                 t.tab[h2] = s
258                 return true
259         }
260         if t.push(h1, 0) {
261                 t.tab[h1] = s
262                 return true
263         }
264         if t.push(h2, 0) {
265                 t.tab[h2] = s
266                 return true
267         }
268         return false
269 }
270
271 // push attempts to push aside the entry in slot i.
272 func (t *table) push(i uint32, depth int) bool {
273         if depth > len(t.tab) {
274                 return false
275         }
276         s := t.tab[i]
277         h1, h2 := t.hash(s)
278         j := h1 + h2 - i
279         if t.tab[j] != "" && !t.push(j, depth+1) {
280                 return false
281         }
282         t.tab[j] = s
283         return true
284 }
285
286 // The lists of element names and attribute keys were taken from
287 // https://html.spec.whatwg.org/multipage/indices.html#index
288 // as of the "HTML Living Standard - Last Updated 21 February 2015" version.
289
290 var elements = []string{
291         "a",
292         "abbr",
293         "address",
294         "area",
295         "article",
296         "aside",
297         "audio",
298         "b",
299         "base",
300         "bdi",
301         "bdo",
302         "blockquote",
303         "body",
304         "br",
305         "button",
306         "canvas",
307         "caption",
308         "cite",
309         "code",
310         "col",
311         "colgroup",
312         "command",
313         "data",
314         "datalist",
315         "dd",
316         "del",
317         "details",
318         "dfn",
319         "dialog",
320         "div",
321         "dl",
322         "dt",
323         "em",
324         "embed",
325         "fieldset",
326         "figcaption",
327         "figure",
328         "footer",
329         "form",
330         "h1",
331         "h2",
332         "h3",
333         "h4",
334         "h5",
335         "h6",
336         "head",
337         "header",
338         "hgroup",
339         "hr",
340         "html",
341         "i",
342         "iframe",
343         "img",
344         "input",
345         "ins",
346         "kbd",
347         "keygen",
348         "label",
349         "legend",
350         "li",
351         "link",
352         "map",
353         "mark",
354         "menu",
355         "menuitem",
356         "meta",
357         "meter",
358         "nav",
359         "noscript",
360         "object",
361         "ol",
362         "optgroup",
363         "option",
364         "output",
365         "p",
366         "param",
367         "pre",
368         "progress",
369         "q",
370         "rp",
371         "rt",
372         "ruby",
373         "s",
374         "samp",
375         "script",
376         "section",
377         "select",
378         "small",
379         "source",
380         "span",
381         "strong",
382         "style",
383         "sub",
384         "summary",
385         "sup",
386         "table",
387         "tbody",
388         "td",
389         "template",
390         "textarea",
391         "tfoot",
392         "th",
393         "thead",
394         "time",
395         "title",
396         "tr",
397         "track",
398         "u",
399         "ul",
400         "var",
401         "video",
402         "wbr",
403 }
404
405 // https://html.spec.whatwg.org/multipage/indices.html#attributes-3
406
407 var attributes = []string{
408         "abbr",
409         "accept",
410         "accept-charset",
411         "accesskey",
412         "action",
413         "alt",
414         "async",
415         "autocomplete",
416         "autofocus",
417         "autoplay",
418         "challenge",
419         "charset",
420         "checked",
421         "cite",
422         "class",
423         "cols",
424         "colspan",
425         "command",
426         "content",
427         "contenteditable",
428         "contextmenu",
429         "controls",
430         "coords",
431         "crossorigin",
432         "data",
433         "datetime",
434         "default",
435         "defer",
436         "dir",
437         "dirname",
438         "disabled",
439         "download",
440         "draggable",
441         "dropzone",
442         "enctype",
443         "for",
444         "form",
445         "formaction",
446         "formenctype",
447         "formmethod",
448         "formnovalidate",
449         "formtarget",
450         "headers",
451         "height",
452         "hidden",
453         "high",
454         "href",
455         "hreflang",
456         "http-equiv",
457         "icon",
458         "id",
459         "inputmode",
460         "ismap",
461         "itemid",
462         "itemprop",
463         "itemref",
464         "itemscope",
465         "itemtype",
466         "keytype",
467         "kind",
468         "label",
469         "lang",
470         "list",
471         "loop",
472         "low",
473         "manifest",
474         "max",
475         "maxlength",
476         "media",
477         "mediagroup",
478         "method",
479         "min",
480         "minlength",
481         "multiple",
482         "muted",
483         "name",
484         "novalidate",
485         "open",
486         "optimum",
487         "pattern",
488         "ping",
489         "placeholder",
490         "poster",
491         "preload",
492         "radiogroup",
493         "readonly",
494         "rel",
495         "required",
496         "reversed",
497         "rows",
498         "rowspan",
499         "sandbox",
500         "spellcheck",
501         "scope",
502         "scoped",
503         "seamless",
504         "selected",
505         "shape",
506         "size",
507         "sizes",
508         "sortable",
509         "sorted",
510         "span",
511         "src",
512         "srcdoc",
513         "srclang",
514         "start",
515         "step",
516         "style",
517         "tabindex",
518         "target",
519         "title",
520         "translate",
521         "type",
522         "typemustmatch",
523         "usemap",
524         "value",
525         "width",
526         "wrap",
527 }
528
529 var eventHandlers = []string{
530         "onabort",
531         "onautocomplete",
532         "onautocompleteerror",
533         "onafterprint",
534         "onbeforeprint",
535         "onbeforeunload",
536         "onblur",
537         "oncancel",
538         "oncanplay",
539         "oncanplaythrough",
540         "onchange",
541         "onclick",
542         "onclose",
543         "oncontextmenu",
544         "oncuechange",
545         "ondblclick",
546         "ondrag",
547         "ondragend",
548         "ondragenter",
549         "ondragleave",
550         "ondragover",
551         "ondragstart",
552         "ondrop",
553         "ondurationchange",
554         "onemptied",
555         "onended",
556         "onerror",
557         "onfocus",
558         "onhashchange",
559         "oninput",
560         "oninvalid",
561         "onkeydown",
562         "onkeypress",
563         "onkeyup",
564         "onlanguagechange",
565         "onload",
566         "onloadeddata",
567         "onloadedmetadata",
568         "onloadstart",
569         "onmessage",
570         "onmousedown",
571         "onmousemove",
572         "onmouseout",
573         "onmouseover",
574         "onmouseup",
575         "onmousewheel",
576         "onoffline",
577         "ononline",
578         "onpagehide",
579         "onpageshow",
580         "onpause",
581         "onplay",
582         "onplaying",
583         "onpopstate",
584         "onprogress",
585         "onratechange",
586         "onreset",
587         "onresize",
588         "onscroll",
589         "onseeked",
590         "onseeking",
591         "onselect",
592         "onshow",
593         "onsort",
594         "onstalled",
595         "onstorage",
596         "onsubmit",
597         "onsuspend",
598         "ontimeupdate",
599         "ontoggle",
600         "onunload",
601         "onvolumechange",
602         "onwaiting",
603 }
604
605 // extra are ad-hoc values not covered by any of the lists above.
606 var extra = []string{
607         "align",
608         "annotation",
609         "annotation-xml",
610         "applet",
611         "basefont",
612         "bgsound",
613         "big",
614         "blink",
615         "center",
616         "color",
617         "desc",
618         "face",
619         "font",
620         "foreignObject", // HTML is case-insensitive, but SVG-embedded-in-HTML is case-sensitive.
621         "foreignobject",
622         "frame",
623         "frameset",
624         "image",
625         "isindex",
626         "listing",
627         "malignmark",
628         "marquee",
629         "math",
630         "mglyph",
631         "mi",
632         "mn",
633         "mo",
634         "ms",
635         "mtext",
636         "nobr",
637         "noembed",
638         "noframes",
639         "plaintext",
640         "prompt",
641         "public",
642         "spacer",
643         "strike",
644         "svg",
645         "system",
646         "tt",
647         "xmp",
648 }