OSDN Git Service

fix commands
[bytom/shuttle.git] / vendor / github.com / bytom / vendor / golang.org / x / text / cases / gen.go
diff --git a/vendor/github.com/bytom/vendor/golang.org/x/text/cases/gen.go b/vendor/github.com/bytom/vendor/golang.org/x/text/cases/gen.go
new file mode 100644 (file)
index 0000000..24b7230
--- /dev/null
@@ -0,0 +1,839 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build ignore
+
+// This program generates the trie for casing operations. The Unicode casing
+// algorithm requires the lookup of various properties and mappings for each
+// rune. The table generated by this generator combines several of the most
+// frequently used of these into a single trie so that they can be accessed
+// with a single lookup.
+package main
+
+import (
+       "bytes"
+       "fmt"
+       "io"
+       "io/ioutil"
+       "log"
+       "reflect"
+       "strconv"
+       "strings"
+       "unicode"
+
+       "golang.org/x/text/internal/gen"
+       "golang.org/x/text/internal/triegen"
+       "golang.org/x/text/internal/ucd"
+       "golang.org/x/text/unicode/norm"
+)
+
+func main() {
+       gen.Init()
+       genTables()
+       genTablesTest()
+       gen.Repackage("gen_trieval.go", "trieval.go", "cases")
+}
+
+// runeInfo contains all information for a rune that we care about for casing
+// operations.
+type runeInfo struct {
+       Rune rune
+
+       entry info // trie value for this rune.
+
+       CaseMode info
+
+       // Simple case mappings.
+       Simple [1 + maxCaseMode][]rune
+
+       // Special casing
+       HasSpecial  bool
+       Conditional bool
+       Special     [1 + maxCaseMode][]rune
+
+       // Folding
+       FoldSimple  rune
+       FoldSpecial rune
+       FoldFull    []rune
+
+       // TODO: FC_NFKC, or equivalent data.
+
+       // Properties
+       SoftDotted     bool
+       CaseIgnorable  bool
+       Cased          bool
+       DecomposeGreek bool
+       BreakType      string
+       BreakCat       breakCategory
+
+       // We care mostly about 0, Above, and IotaSubscript.
+       CCC byte
+}
+
+type breakCategory int
+
+const (
+       breakBreak breakCategory = iota
+       breakLetter
+       breakMid
+)
+
+// mapping returns the case mapping for the given case type.
+func (r *runeInfo) mapping(c info) string {
+       if r.HasSpecial {
+               return string(r.Special[c])
+       }
+       if len(r.Simple[c]) != 0 {
+               return string(r.Simple[c])
+       }
+       return string(r.Rune)
+}
+
+func parse(file string, f func(p *ucd.Parser)) {
+       ucd.Parse(gen.OpenUCDFile(file), f)
+}
+
+func parseUCD() []runeInfo {
+       chars := make([]runeInfo, unicode.MaxRune)
+
+       get := func(r rune) *runeInfo {
+               c := &chars[r]
+               c.Rune = r
+               return c
+       }
+
+       parse("UnicodeData.txt", func(p *ucd.Parser) {
+               ri := get(p.Rune(0))
+               ri.CCC = byte(p.Int(ucd.CanonicalCombiningClass))
+               ri.Simple[cLower] = p.Runes(ucd.SimpleLowercaseMapping)
+               ri.Simple[cUpper] = p.Runes(ucd.SimpleUppercaseMapping)
+               ri.Simple[cTitle] = p.Runes(ucd.SimpleTitlecaseMapping)
+               if p.String(ucd.GeneralCategory) == "Lt" {
+                       ri.CaseMode = cTitle
+               }
+       })
+
+       // <code>; <property>
+       parse("PropList.txt", func(p *ucd.Parser) {
+               if p.String(1) == "Soft_Dotted" {
+                       chars[p.Rune(0)].SoftDotted = true
+               }
+       })
+
+       // <code>; <word break type>
+       parse("DerivedCoreProperties.txt", func(p *ucd.Parser) {
+               ri := get(p.Rune(0))
+               switch p.String(1) {
+               case "Case_Ignorable":
+                       ri.CaseIgnorable = true
+               case "Cased":
+                       ri.Cased = true
+               case "Lowercase":
+                       ri.CaseMode = cLower
+               case "Uppercase":
+                       ri.CaseMode = cUpper
+               }
+       })
+
+       // <code>; <lower> ; <title> ; <upper> ; (<condition_list> ;)?
+       parse("SpecialCasing.txt", func(p *ucd.Parser) {
+               // We drop all conditional special casing and deal with them manually in
+               // the language-specific case mappers. Rune 0x03A3 is the only one with
+               // a conditional formatting that is not language-specific. However,
+               // dealing with this letter is tricky, especially in a streaming
+               // context, so we deal with it in the Caser for Greek specifically.
+               ri := get(p.Rune(0))
+               if p.String(4) == "" {
+                       ri.HasSpecial = true
+                       ri.Special[cLower] = p.Runes(1)
+                       ri.Special[cTitle] = p.Runes(2)
+                       ri.Special[cUpper] = p.Runes(3)
+               } else {
+                       ri.Conditional = true
+               }
+       })
+
+       // TODO: Use text breaking according to UAX #29.
+       // <code>; <word break type>
+       parse("auxiliary/WordBreakProperty.txt", func(p *ucd.Parser) {
+               ri := get(p.Rune(0))
+               ri.BreakType = p.String(1)
+
+               // We collapse the word breaking properties onto the categories we need.
+               switch p.String(1) { // TODO: officially we need to canonicalize.
+               case "MidLetter", "MidNumLet", "Single_Quote":
+                       ri.BreakCat = breakMid
+                       if !ri.CaseIgnorable {
+                               // finalSigma relies on the fact that all breakMid runes are
+                               // also a Case_Ignorable. Revisit this code when this changes.
+                               log.Fatalf("Rune %U, which has a break category mid, is not a case ignorable", ri)
+                       }
+               case "ALetter", "Hebrew_Letter", "Numeric", "Extend", "ExtendNumLet", "Format", "ZWJ":
+                       ri.BreakCat = breakLetter
+               }
+       })
+
+       // <code>; <type>; <mapping>
+       parse("CaseFolding.txt", func(p *ucd.Parser) {
+               ri := get(p.Rune(0))
+               switch p.String(1) {
+               case "C":
+                       ri.FoldSimple = p.Rune(2)
+                       ri.FoldFull = p.Runes(2)
+               case "S":
+                       ri.FoldSimple = p.Rune(2)
+               case "T":
+                       ri.FoldSpecial = p.Rune(2)
+               case "F":
+                       ri.FoldFull = p.Runes(2)
+               default:
+                       log.Fatalf("%U: unknown type: %s", p.Rune(0), p.String(1))
+               }
+       })
+
+       return chars
+}
+
+func genTables() {
+       chars := parseUCD()
+       verifyProperties(chars)
+
+       t := triegen.NewTrie("case")
+       for i := range chars {
+               c := &chars[i]
+               makeEntry(c)
+               t.Insert(rune(i), uint64(c.entry))
+       }
+
+       w := gen.NewCodeWriter()
+       defer w.WriteGoFile("tables.go", "cases")
+
+       gen.WriteUnicodeVersion(w)
+
+       // TODO: write CLDR version after adding a mechanism to detect that the
+       // tables on which the manually created locale-sensitive casing code is
+       // based hasn't changed.
+
+       w.WriteVar("xorData", string(xorData))
+       w.WriteVar("exceptions", string(exceptionData))
+
+       sz, err := t.Gen(w, triegen.Compact(&sparseCompacter{}))
+       if err != nil {
+               log.Fatal(err)
+       }
+       w.Size += sz
+}
+
+func makeEntry(ri *runeInfo) {
+       if ri.CaseIgnorable {
+               if ri.Cased {
+                       ri.entry = cIgnorableCased
+               } else {
+                       ri.entry = cIgnorableUncased
+               }
+       } else {
+               ri.entry = ri.CaseMode
+       }
+
+       // TODO: handle soft-dotted.
+
+       ccc := cccOther
+       switch ri.CCC {
+       case 0: // Not_Reordered
+               ccc = cccZero
+       case above: // Above
+               ccc = cccAbove
+       }
+       switch ri.BreakCat {
+       case breakBreak:
+               ccc = cccBreak
+       case breakMid:
+               ri.entry |= isMidBit
+       }
+
+       ri.entry |= ccc
+
+       if ri.CaseMode == cUncased {
+               return
+       }
+
+       // Need to do something special.
+       if ri.CaseMode == cTitle || ri.HasSpecial || ri.mapping(cTitle) != ri.mapping(cUpper) {
+               makeException(ri)
+               return
+       }
+       if f := string(ri.FoldFull); len(f) > 0 && f != ri.mapping(cUpper) && f != ri.mapping(cLower) {
+               makeException(ri)
+               return
+       }
+
+       // Rune is either lowercase or uppercase.
+
+       orig := string(ri.Rune)
+       mapped := ""
+       if ri.CaseMode == cUpper {
+               mapped = ri.mapping(cLower)
+       } else {
+               mapped = ri.mapping(cUpper)
+       }
+
+       if len(orig) != len(mapped) {
+               makeException(ri)
+               return
+       }
+
+       if string(ri.FoldFull) == ri.mapping(cUpper) {
+               ri.entry |= inverseFoldBit
+       }
+
+       n := len(orig)
+
+       // Create per-byte XOR mask.
+       var b []byte
+       for i := 0; i < n; i++ {
+               b = append(b, orig[i]^mapped[i])
+       }
+
+       // Remove leading 0 bytes, but keep at least one byte.
+       for ; len(b) > 1 && b[0] == 0; b = b[1:] {
+       }
+
+       if len(b) == 1 && b[0]&0xc0 == 0 {
+               ri.entry |= info(b[0]) << xorShift
+               return
+       }
+
+       key := string(b)
+       x, ok := xorCache[key]
+       if !ok {
+               xorData = append(xorData, 0) // for detecting start of sequence
+               xorData = append(xorData, b...)
+
+               x = len(xorData) - 1
+               xorCache[key] = x
+       }
+       ri.entry |= info(x<<xorShift) | xorIndexBit
+}
+
+var xorCache = map[string]int{}
+
+// xorData contains byte-wise XOR data for the least significant bytes of a
+// UTF-8 encoded rune. An index points to the last byte. The sequence starts
+// with a zero terminator.
+var xorData = []byte{}
+
+// See the comments in gen_trieval.go re "the exceptions slice".
+var exceptionData = []byte{0}
+
+// makeException encodes case mappings that cannot be expressed in a simple
+// XOR diff.
+func makeException(ri *runeInfo) {
+       ccc := ri.entry & cccMask
+       // Set exception bit and retain case type.
+       ri.entry &= 0x0007
+       ri.entry |= exceptionBit
+
+       if len(exceptionData) >= 1<<numExceptionBits {
+               log.Fatalf("%U:exceptionData too large %x > %d bits", ri.Rune, len(exceptionData), numExceptionBits)
+       }
+
+       // Set the offset in the exceptionData array.
+       ri.entry |= info(len(exceptionData) << exceptionShift)
+
+       orig := string(ri.Rune)
+       tc := ri.mapping(cTitle)
+       uc := ri.mapping(cUpper)
+       lc := ri.mapping(cLower)
+       ff := string(ri.FoldFull)
+
+       // addString sets the length of a string and adds it to the expansions array.
+       addString := func(s string, b *byte) {
+               if len(s) == 0 {
+                       // Zero-length mappings exist, but only for conditional casing,
+                       // which we are representing outside of this table.
+                       log.Fatalf("%U: has zero-length mapping.", ri.Rune)
+               }
+               *b <<= 3
+               if s != orig {
+                       n := len(s)
+                       if n > 7 {
+                               log.Fatalf("%U: mapping larger than 7 (%d)", ri.Rune, n)
+                       }
+                       *b |= byte(n)
+                       exceptionData = append(exceptionData, s...)
+               }
+       }
+
+       // byte 0:
+       exceptionData = append(exceptionData, byte(ccc)|byte(len(ff)))
+
+       // byte 1:
+       p := len(exceptionData)
+       exceptionData = append(exceptionData, 0)
+
+       if len(ff) > 7 { // May be zero-length.
+               log.Fatalf("%U: fold string larger than 7 (%d)", ri.Rune, len(ff))
+       }
+       exceptionData = append(exceptionData, ff...)
+       ct := ri.CaseMode
+       if ct != cLower {
+               addString(lc, &exceptionData[p])
+       }
+       if ct != cUpper {
+               addString(uc, &exceptionData[p])
+       }
+       if ct != cTitle {
+               // If title is the same as upper, we set it to the original string so
+               // that it will be marked as not present. This implies title case is
+               // the same as upper case.
+               if tc == uc {
+                       tc = orig
+               }
+               addString(tc, &exceptionData[p])
+       }
+}
+
+// sparseCompacter is a trie value block Compacter. There are many cases where
+// successive runes alternate between lower- and upper-case. This Compacter
+// exploits this by adding a special case type where the case value is obtained
+// from or-ing it with the least-significant bit of the rune, creating large
+// ranges of equal case values that compress well.
+type sparseCompacter struct {
+       sparseBlocks  [][]uint16
+       sparseOffsets []uint16
+       sparseCount   int
+}
+
+// makeSparse returns the number of elements that compact block would contain
+// as well as the modified values.
+func makeSparse(vals []uint64) ([]uint16, int) {
+       // Copy the values.
+       values := make([]uint16, len(vals))
+       for i, v := range vals {
+               values[i] = uint16(v)
+       }
+
+       alt := func(i int, v uint16) uint16 {
+               if cm := info(v & fullCasedMask); cm == cUpper || cm == cLower {
+                       // Convert cLower or cUpper to cXORCase value, which has the form 11x.
+                       xor := v
+                       xor &^= 1
+                       xor |= uint16(i&1) ^ (v & 1)
+                       xor |= 0x4
+                       return xor
+               }
+               return v
+       }
+
+       var count int
+       var previous uint16
+       for i, v := range values {
+               if v != 0 {
+                       // Try if the unmodified value is equal to the previous.
+                       if v == previous {
+                               continue
+                       }
+
+                       // Try if the xor-ed value is equal to the previous value.
+                       a := alt(i, v)
+                       if a == previous {
+                               values[i] = a
+                               continue
+                       }
+
+                       // This is a new value.
+                       count++
+
+                       // Use the xor-ed value if it will be identical to the next value.
+                       if p := i + 1; p < len(values) && alt(p, values[p]) == a {
+                               values[i] = a
+                               v = a
+                       }
+               }
+               previous = v
+       }
+       return values, count
+}
+
+func (s *sparseCompacter) Size(v []uint64) (int, bool) {
+       _, n := makeSparse(v)
+
+       // We limit using this method to having 16 entries.
+       if n > 16 {
+               return 0, false
+       }
+
+       return 2 + int(reflect.TypeOf(valueRange{}).Size())*n, true
+}
+
+func (s *sparseCompacter) Store(v []uint64) uint32 {
+       h := uint32(len(s.sparseOffsets))
+       values, sz := makeSparse(v)
+       s.sparseBlocks = append(s.sparseBlocks, values)
+       s.sparseOffsets = append(s.sparseOffsets, uint16(s.sparseCount))
+       s.sparseCount += sz
+       return h
+}
+
+func (s *sparseCompacter) Handler() string {
+       // The sparse global variable and its lookup method is defined in gen_trieval.go.
+       return "sparse.lookup"
+}
+
+func (s *sparseCompacter) Print(w io.Writer) (retErr error) {
+       p := func(format string, args ...interface{}) {
+               _, err := fmt.Fprintf(w, format, args...)
+               if retErr == nil && err != nil {
+                       retErr = err
+               }
+       }
+
+       ls := len(s.sparseBlocks)
+       if ls == len(s.sparseOffsets) {
+               s.sparseOffsets = append(s.sparseOffsets, uint16(s.sparseCount))
+       }
+       p("// sparseOffsets: %d entries, %d bytes\n", ls+1, (ls+1)*2)
+       p("var sparseOffsets = %#v\n\n", s.sparseOffsets)
+
+       ns := s.sparseCount
+       p("// sparseValues: %d entries, %d bytes\n", ns, ns*4)
+       p("var sparseValues = [%d]valueRange {", ns)
+       for i, values := range s.sparseBlocks {
+               p("\n// Block %#x, offset %#x", i, s.sparseOffsets[i])
+               var v uint16
+               for i, nv := range values {
+                       if nv != v {
+                               if v != 0 {
+                                       p(",hi:%#02x},", 0x80+i-1)
+                               }
+                               if nv != 0 {
+                                       p("\n{value:%#04x,lo:%#02x", nv, 0x80+i)
+                               }
+                       }
+                       v = nv
+               }
+               if v != 0 {
+                       p(",hi:%#02x},", 0x80+len(values)-1)
+               }
+       }
+       p("\n}\n\n")
+       return
+}
+
+// verifyProperties that properties of the runes that are relied upon in the
+// implementation. Each property is marked with an identifier that is referred
+// to in the places where it is used.
+func verifyProperties(chars []runeInfo) {
+       for i, c := range chars {
+               r := rune(i)
+
+               // Rune properties.
+
+               // A.1: modifier never changes on lowercase. [ltLower]
+               if c.CCC > 0 && unicode.ToLower(r) != r {
+                       log.Fatalf("%U: non-starter changes when lowercased", r)
+               }
+
+               // A.2: properties of decompositions starting with I or J. [ltLower]
+               d := norm.NFD.PropertiesString(string(r)).Decomposition()
+               if len(d) > 0 {
+                       if d[0] == 'I' || d[0] == 'J' {
+                               // A.2.1: we expect at least an ASCII character and a modifier.
+                               if len(d) < 3 {
+                                       log.Fatalf("%U: length of decomposition was %d; want >= 3", r, len(d))
+                               }
+
+                               // All subsequent runes are modifiers and all have the same CCC.
+                               runes := []rune(string(d[1:]))
+                               ccc := chars[runes[0]].CCC
+
+                               for _, mr := range runes[1:] {
+                                       mc := chars[mr]
+
+                                       // A.2.2: all modifiers have a CCC of Above or less.
+                                       if ccc == 0 || ccc > above {
+                                               log.Fatalf("%U: CCC of successive rune (%U) was %d; want (0,230]", r, mr, ccc)
+                                       }
+
+                                       // A.2.3: a sequence of modifiers all have the same CCC.
+                                       if mc.CCC != ccc {
+                                               log.Fatalf("%U: CCC of follow-up modifier (%U) was %d; want %d", r, mr, mc.CCC, ccc)
+                                       }
+
+                                       // A.2.4: for each trailing r, r in [0x300, 0x311] <=> CCC == Above.
+                                       if (ccc == above) != (0x300 <= mr && mr <= 0x311) {
+                                               log.Fatalf("%U: modifier %U in [U+0300, U+0311] != ccc(%U) == 230", r, mr, mr)
+                                       }
+
+                                       if i += len(string(mr)); i >= len(d) {
+                                               break
+                                       }
+                               }
+                       }
+               }
+
+               // A.3: no U+0307 in decomposition of Soft-Dotted rune. [ltUpper]
+               if unicode.Is(unicode.Soft_Dotted, r) && strings.Contains(string(d), "\u0307") {
+                       log.Fatalf("%U: decomposition of soft-dotted rune may not contain U+0307", r)
+               }
+
+               // A.4: only rune U+0345 may be of CCC Iota_Subscript. [elUpper]
+               if c.CCC == iotaSubscript && r != 0x0345 {
+                       log.Fatalf("%U: only rune U+0345 may have CCC Iota_Subscript", r)
+               }
+
+               // A.5: soft-dotted runes do not have exceptions.
+               if c.SoftDotted && c.entry&exceptionBit != 0 {
+                       log.Fatalf("%U: soft-dotted has exception", r)
+               }
+
+               // A.6: Greek decomposition. [elUpper]
+               if unicode.Is(unicode.Greek, r) {
+                       if b := norm.NFD.PropertiesString(string(r)).Decomposition(); b != nil {
+                               runes := []rune(string(b))
+                               // A.6.1: If a Greek rune decomposes and the first rune of the
+                               // decomposition is greater than U+00FF, the rune is always
+                               // great and not a modifier.
+                               if f := runes[0]; unicode.IsMark(f) || f > 0xFF && !unicode.Is(unicode.Greek, f) {
+                                       log.Fatalf("%U: expected first rune of Greek decomposition to be letter, found %U", r, f)
+                               }
+                               // A.6.2: Any follow-up rune in a Greek decomposition is a
+                               // modifier of which the first should be gobbled in
+                               // decomposition.
+                               for _, m := range runes[1:] {
+                                       switch m {
+                                       case 0x0313, 0x0314, 0x0301, 0x0300, 0x0306, 0x0342, 0x0308, 0x0304, 0x345:
+                                       default:
+                                               log.Fatalf("%U: modifier %U is outside of expected Greek modifier set", r, m)
+                                       }
+                               }
+                       }
+               }
+
+               // Breaking properties.
+
+               // B.1: all runes with CCC > 0 are of break type Extend.
+               if c.CCC > 0 && c.BreakType != "Extend" {
+                       log.Fatalf("%U: CCC == %d, but got break type %s; want Extend", r, c.CCC, c.BreakType)
+               }
+
+               // B.2: all cased runes with c.CCC == 0 are of break type ALetter.
+               if c.CCC == 0 && c.Cased && c.BreakType != "ALetter" {
+                       log.Fatalf("%U: cased, but got break type %s; want ALetter", r, c.BreakType)
+               }
+
+               // B.3: letter category.
+               if c.CCC == 0 && c.BreakCat != breakBreak && !c.CaseIgnorable {
+                       if c.BreakCat != breakLetter {
+                               log.Fatalf("%U: check for letter break type gave %d; want %d", r, c.BreakCat, breakLetter)
+                       }
+               }
+       }
+}
+
+func genTablesTest() {
+       w := &bytes.Buffer{}
+
+       fmt.Fprintln(w, "var (")
+       printProperties(w, "DerivedCoreProperties.txt", "Case_Ignorable", verifyIgnore)
+
+       // We discard the output as we know we have perfect functions. We run them
+       // just to verify the properties are correct.
+       n := printProperties(ioutil.Discard, "DerivedCoreProperties.txt", "Cased", verifyCased)
+       n += printProperties(ioutil.Discard, "DerivedCoreProperties.txt", "Lowercase", verifyLower)
+       n += printProperties(ioutil.Discard, "DerivedCoreProperties.txt", "Uppercase", verifyUpper)
+       if n > 0 {
+               log.Fatalf("One of the discarded properties does not have a perfect filter.")
+       }
+
+       // <code>; <lower> ; <title> ; <upper> ; (<condition_list> ;)?
+       fmt.Fprintln(w, "\tspecial = map[rune]struct{ toLower, toTitle, toUpper string }{")
+       parse("SpecialCasing.txt", func(p *ucd.Parser) {
+               // Skip conditional entries.
+               if p.String(4) != "" {
+                       return
+               }
+               r := p.Rune(0)
+               fmt.Fprintf(w, "\t\t0x%04x: {%q, %q, %q},\n",
+                       r, string(p.Runes(1)), string(p.Runes(2)), string(p.Runes(3)))
+       })
+       fmt.Fprint(w, "\t}\n\n")
+
+       // <code>; <type>; <runes>
+       table := map[rune]struct{ simple, full, special string }{}
+       parse("CaseFolding.txt", func(p *ucd.Parser) {
+               r := p.Rune(0)
+               t := p.String(1)
+               v := string(p.Runes(2))
+               if t != "T" && v == string(unicode.ToLower(r)) {
+                       return
+               }
+               x := table[r]
+               switch t {
+               case "C":
+                       x.full = v
+                       x.simple = v
+               case "S":
+                       x.simple = v
+               case "F":
+                       x.full = v
+               case "T":
+                       x.special = v
+               }
+               table[r] = x
+       })
+       fmt.Fprintln(w, "\tfoldMap = map[rune]struct{ simple, full, special string }{")
+       for r := rune(0); r < 0x10FFFF; r++ {
+               x, ok := table[r]
+               if !ok {
+                       continue
+               }
+               fmt.Fprintf(w, "\t\t0x%04x: {%q, %q, %q},\n", r, x.simple, x.full, x.special)
+       }
+       fmt.Fprint(w, "\t}\n\n")
+
+       // Break property
+       notBreak := map[rune]bool{}
+       parse("auxiliary/WordBreakProperty.txt", func(p *ucd.Parser) {
+               switch p.String(1) {
+               case "Extend", "Format", "MidLetter", "MidNumLet", "Single_Quote",
+                       "ALetter", "Hebrew_Letter", "Numeric", "ExtendNumLet", "ZWJ":
+                       notBreak[p.Rune(0)] = true
+               }
+       })
+
+       fmt.Fprintln(w, "\tbreakProp = []struct{ lo, hi rune }{")
+       inBreak := false
+       for r := rune(0); r <= lastRuneForTesting; r++ {
+               if isBreak := !notBreak[r]; isBreak != inBreak {
+                       if isBreak {
+                               fmt.Fprintf(w, "\t\t{0x%x, ", r)
+                       } else {
+                               fmt.Fprintf(w, "0x%x},\n", r-1)
+                       }
+                       inBreak = isBreak
+               }
+       }
+       if inBreak {
+               fmt.Fprintf(w, "0x%x},\n", lastRuneForTesting)
+       }
+       fmt.Fprint(w, "\t}\n\n")
+
+       // Word break test
+       // Filter out all samples that do not contain cased characters.
+       cased := map[rune]bool{}
+       parse("DerivedCoreProperties.txt", func(p *ucd.Parser) {
+               if p.String(1) == "Cased" {
+                       cased[p.Rune(0)] = true
+               }
+       })
+
+       fmt.Fprintln(w, "\tbreakTest = []string{")
+       parse("auxiliary/WordBreakTest.txt", func(p *ucd.Parser) {
+               c := strings.Split(p.String(0), " ")
+
+               const sep = '|'
+               numCased := 0
+               test := ""
+               for ; len(c) >= 2; c = c[2:] {
+                       if c[0] == "รท" && test != "" {
+                               test += string(sep)
+                       }
+                       i, err := strconv.ParseUint(c[1], 16, 32)
+                       r := rune(i)
+                       if err != nil {
+                               log.Fatalf("Invalid rune %q.", c[1])
+                       }
+                       if r == sep {
+                               log.Fatalf("Separator %q not allowed in test data. Pick another one.", sep)
+                       }
+                       if cased[r] {
+                               numCased++
+                       }
+                       test += string(r)
+               }
+               if numCased > 1 {
+                       fmt.Fprintf(w, "\t\t%q,\n", test)
+               }
+       })
+       fmt.Fprintln(w, "\t}")
+
+       fmt.Fprintln(w, ")")
+
+       gen.WriteGoFile("tables_test.go", "cases", w.Bytes())
+}
+
+// These functions are just used for verification that their definition have not
+// changed in the Unicode Standard.
+
+func verifyCased(r rune) bool {
+       return verifyLower(r) || verifyUpper(r) || unicode.IsTitle(r)
+}
+
+func verifyLower(r rune) bool {
+       return unicode.IsLower(r) || unicode.Is(unicode.Other_Lowercase, r)
+}
+
+func verifyUpper(r rune) bool {
+       return unicode.IsUpper(r) || unicode.Is(unicode.Other_Uppercase, r)
+}
+
+// verifyIgnore is an approximation of the Case_Ignorable property using the
+// core unicode package. It is used to reduce the size of the test data.
+func verifyIgnore(r rune) bool {
+       props := []*unicode.RangeTable{
+               unicode.Mn,
+               unicode.Me,
+               unicode.Cf,
+               unicode.Lm,
+               unicode.Sk,
+       }
+       for _, p := range props {
+               if unicode.Is(p, r) {
+                       return true
+               }
+       }
+       return false
+}
+
+// printProperties prints tables of rune properties from the given UCD file.
+// A filter func f can be given to exclude certain values. A rune r will have
+// the indicated property if it is in the generated table or if f(r).
+func printProperties(w io.Writer, file, property string, f func(r rune) bool) int {
+       verify := map[rune]bool{}
+       n := 0
+       varNameParts := strings.Split(property, "_")
+       varNameParts[0] = strings.ToLower(varNameParts[0])
+       fmt.Fprintf(w, "\t%s = map[rune]bool{\n", strings.Join(varNameParts, ""))
+       parse(file, func(p *ucd.Parser) {
+               if p.String(1) == property {
+                       r := p.Rune(0)
+                       verify[r] = true
+                       if !f(r) {
+                               n++
+                               fmt.Fprintf(w, "\t\t0x%.4x: true,\n", r)
+                       }
+               }
+       })
+       fmt.Fprint(w, "\t}\n\n")
+
+       // Verify that f is correct, that is, it represents a subset of the property.
+       for r := rune(0); r <= lastRuneForTesting; r++ {
+               if !verify[r] && f(r) {
+                       log.Fatalf("Incorrect filter func for property %q.", property)
+               }
+       }
+       return n
+}
+
+// The newCaseTrie, sparseValues and sparseOffsets definitions below are
+// placeholders referred to by gen_trieval.go. The real definitions are
+// generated by this program and written to tables.go.
+
+func newCaseTrie(int) int { return 0 }
+
+var (
+       sparseValues  [0]valueRange
+       sparseOffsets [0]uint16
+)