1 // Copyright 2014 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
7 // This file contains common lookup code that is shared between the various
8 // implementations of Namer and Dictionaries.
15 "golang.org/x/text/language"
18 type namer interface {
19 // name gets the string for the given index. It should walk the
20 // inheritance chain if a value is not present in the base index.
24 func nameLanguage(n namer, x interface{}) string {
25 t, _ := language.All.Compose(x)
27 i, _, _ := langTagSet.index(t.Raw())
28 if s := n.name(i); s != "" {
31 if t = t.Parent(); t == language.Und {
37 func nameScript(n namer, x interface{}) string {
38 t, _ := language.DeprecatedScript.Compose(x)
40 return n.name(scriptIndex.index(s.String()))
43 func nameRegion(n namer, x interface{}) string {
44 t, _ := language.DeprecatedRegion.Compose(x)
46 return n.name(regionIndex.index(r.String()))
49 func nameTag(langN, scrN, regN namer, x interface{}) string {
50 t, ok := x.(language.Tag)
54 const form = language.All &^ language.SuppressScript
55 if c, err := form.Canonicalize(t); err == nil {
58 _, sRaw, rRaw := t.Raw()
59 i, scr, reg := langTagSet.index(t.Raw())
61 if str := langN.name(i); str != "" {
62 if hasS, hasR := (scr != language.Script{}), (reg != language.Region{}); hasS || hasR {
65 ss = scrN.name(scriptIndex.index(scr.String()))
68 sr = regN.name(regionIndex.index(reg.String()))
70 // TODO: use patterns in CLDR or at least confirm they are the
71 // same for all languages.
72 if ss != "" && sr != "" {
73 return fmt.Sprintf("%s (%s, %s)", str, ss, sr)
75 if ss != "" || sr != "" {
76 return fmt.Sprintf("%s (%s%s)", str, ss, sr)
82 if t = t.Parent(); t == language.Und {
85 i, _, _ = langTagSet.index(t.Raw())
90 // header contains the data and indexes for a single namer.
91 // data contains a series of strings concatenated into one. index contains the
92 // offsets for a string in data. For example, consider a header that defines
93 // strings for the languages de, el, en, fi, and nl:
96 // data: "GermanGreekEnglishDutch",
97 // index: []uint16{ 0, 6, 11, 18, 18, 23 },
100 // For a language with index i, the string is defined by
101 // data[index[i]:index[i+1]]. So the number of elements in index is always one
102 // greater than the number of languages for which header defines a value.
103 // A string for a language may be empty, which means the name is undefined. In
104 // the above example, the name for fi (Finnish) is undefined.
110 // name looks up the name for a tag in the dictionary, given its index.
111 func (h *header) name(i int) string {
112 if 0 <= i && i < len(h.index)-1 {
113 return h.data[h.index[i]:h.index[i+1]]
118 // tagSet is used to find the index of a language in a set of tags.
130 // selfTagSet is used for indexing the language strings in their own
137 zzzz = language.MustParseScript("Zzzz")
138 zz = language.MustParseRegion("ZZ")
141 // index returns the index of the tag for the given base, script and region or
142 // its parent if the tag is not available. If the match is for a parent entry,
143 // the excess script and region are returned.
144 func (ts *tagSet) index(base language.Base, scr language.Script, reg language.Region) (int, language.Script, language.Region) {
145 lang := base.String()
147 if (scr != language.Script{} || reg != language.Region{}) {
149 scr = language.Script{}
152 reg = language.Region{}
155 i := sort.SearchStrings(ts.long, lang)
156 // All entries have either a script or a region and not both.
157 scrStr, regStr := scr.String(), reg.String()
158 for ; i < len(ts.long) && strings.HasPrefix(ts.long[i], lang); i++ {
159 if s := ts.long[i][len(lang)+1:]; s == scrStr {
160 scr = language.Script{}
161 index = i + ts.single.len()
163 } else if s == regStr {
164 reg = language.Region{}
165 index = i + ts.single.len()
171 index = ts.single.index(lang)
173 return index, scr, reg
176 func (ts *tagSet) Tags() []language.Tag {
177 tags := make([]language.Tag, 0, ts.single.len()+len(ts.long))
178 ts.single.keys(func(s string) {
179 tags = append(tags, language.Raw.MustParse(s))
181 for _, s := range ts.long {
182 tags = append(tags, language.Raw.MustParse(s))
187 func supportedScripts() []language.Script {
188 scr := make([]language.Script, 0, scriptIndex.len())
189 scriptIndex.keys(func(s string) {
190 scr = append(scr, language.MustParseScript(s))
195 func supportedRegions() []language.Region {
196 reg := make([]language.Region, 0, regionIndex.len())
197 regionIndex.keys(func(s string) {
198 reg = append(reg, language.MustParseRegion(s))
203 // tagIndex holds a concatenated lists of subtags of length 2 to 4, one string
204 // for each length, which can be used in combination with binary search to get
205 // the index associated with a tag.
206 // For example, a tagIndex{
207 // "arenesfrruzh", // 6 2-byte tags.
208 // "barwae", // 2 3-byte tags.
211 // would mean that the 2-byte tag "fr" had an index of 3, and the 3-byte tag
212 // "wae" had an index of 7.
213 type tagIndex [3]string
215 func (t *tagIndex) index(s string) int {
217 if sz < 2 || 4 < sz {
221 index := sort.Search(len(a)/sz, func(i int) bool {
223 return a[p:p+sz] >= s
226 if end := p + sz; end > len(a) || a[p:end] != s {
229 // Add the number of tags for smaller sizes.
230 for i := 0; i < sz-2; i++ {
231 index += len(t[i]) / (i + 2)
236 // len returns the number of tags that are contained in the tagIndex.
237 func (t *tagIndex) len() (n int) {
238 for i, s := range t {
239 n += len(s) / (i + 2)
244 // keys calls f for each tag.
245 func (t *tagIndex) keys(f func(key string)) {
246 for i, s := range *t {
247 for ; s != ""; s = s[i+2:] {