1 // Copyright 2012 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
15 "golang.org/x/text/language"
16 "golang.org/x/text/unicode/norm"
19 // TODO: replace with functionality in language package.
20 // parent computes the parent language for the given language.
21 // It returns false if the parent is already root.
22 func parent(locale string) (parent string, ok bool) {
26 if i := strings.LastIndex(locale, "-"); i != -1 {
27 return locale[:i], true
32 // rewriter is used to both unique strings and create variants of strings
33 // to add to the test set.
34 type rewriter struct {
39 func newRewriter() *rewriter {
41 seen: make(map[string]bool),
45 func (r *rewriter) insert(a []string, s string) []string {
53 // rewrite takes a sequence of strings in, adds variants of the these strings
54 // based on options and removes duplicates.
55 func (r *rewriter) rewrite(ss []string) []string {
57 for _, s := range ss {
62 for c := unicode.SimpleFold(rn); c != rn; c = unicode.SimpleFold(c) {
64 ns = r.insert(ns, string(rs))
71 // exemplarySet holds a parsed set of characters from the exemplarCharacters table.
72 type exemplarySet struct {
75 charIndex int // cumulative total of phrases, including this set
78 type phraseGenerator struct {
79 sets [exN]exemplarySet
83 func (g *phraseGenerator) init(id string) {
84 ec := exemplarCharacters
85 loc := language.Make(id).String()
86 // get sets for locale or parent locale if the set is not defined.
87 for i := range g.sets {
88 for p, ok := loc, true; ok; p, ok = parent(p) {
89 if set, ok := ec[p]; ok && set[i] != "" {
90 g.sets[i].set = strings.Split(set[i], " ")
97 for i := range g.sets {
98 g.sets[i].set = r.rewrite(g.sets[i].set)
101 for i, set := range g.sets {
103 g.sets[i].charIndex = g.n
107 // phrase returns the ith phrase, where i < g.n.
108 func (g *phraseGenerator) phrase(i int) string {
109 for _, set := range g.sets {
110 if i < set.charIndex {
111 return set.set[i-(set.charIndex-len(set.set))]
114 panic("index out of range")
117 // generate generates inputs by combining all pairs of examplar strings.
118 // If doNorm is true, all input strings are normalized to NFC.
119 // TODO: allow other variations, statistical models, and random
120 // trailing sequences.
121 func (g *phraseGenerator) generate(doNorm bool) []Input {
127 // TODO: use a better way to limit the input size.
128 if sq := int(math.Sqrt(float64(*limit))); g.n > sq {
132 a := make([]Input, 0, size)
133 buf8 := make([]byte, 0, buf8Size)
134 buf16 := make([]uint16, 0, buf16Size)
136 addInput := func(str string) {
137 buf8 = buf8[len(buf8):]
138 buf16 = buf16[len(buf16):]
139 if len(str) > cap(buf8) {
140 buf8 = make([]byte, 0, buf8Size)
142 if len(str) > cap(buf16) {
143 buf16 = make([]uint16, 0, buf16Size)
146 buf8 = norm.NFD.AppendString(buf8, str)
148 buf8 = append(buf8, str...)
150 buf16 = appendUTF16(buf16, buf8)
151 a = append(a, makeInput(buf8, buf16))
153 for i := 0; i < g.n; i++ {
156 for j := 0; j < g.n; j++ {
162 rnd := rand.New(rand.NewSource(int64(rand.Int())))
164 j := i + rnd.Intn(len(a)-i)
165 a[i], a[j] = a[j], a[i]
166 a[i].index = i // allow restoring this order if input is used multiple times.
171 func appendUTF16(buf []uint16, s []byte) []uint16 {
173 r, sz := utf8.DecodeRune(s)
175 r1, r2 := utf16.EncodeRune(r)
177 buf = append(buf, uint16(r1), uint16(r2))
179 buf = append(buf, uint16(r))