vendor/golang.org/x/text/language/display/lookup.go

   1 // Copyright 2014 The Go Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style
   3 // license that can be found in the LICENSE file.
   4
   5 package display
   6
   7 // This file contains common lookup code that is shared between the various
   8 // implementations of Namer and Dictionaries.
   9
  10 import (
  11         "fmt"
  12         "sort"
  13         "strings"
  14
  15         "golang.org/x/text/language"
  16 )
  17
  18 type namer interface {
  19         // name gets the string for the given index. It should walk the
  20         // inheritance chain if a value is not present in the base index.
  21         name(idx int) string
  22 }
  23
  24 func nameLanguage(n namer, x interface{}) string {
  25         t, _ := language.All.Compose(x)
  26         for {
  27                 i, _, _ := langTagSet.index(t.Raw())
  28                 if s := n.name(i); s != "" {
  29                         return s
  30                 }
  31                 if t = t.Parent(); t == language.Und {
  32                         return ""
  33                 }
  34         }
  35 }
  36
  37 func nameScript(n namer, x interface{}) string {
  38         t, _ := language.DeprecatedScript.Compose(x)
  39         _, s, _ := t.Raw()
  40         return n.name(scriptIndex.index(s.String()))
  41 }
  42
  43 func nameRegion(n namer, x interface{}) string {
  44         t, _ := language.DeprecatedRegion.Compose(x)
  45         _, _, r := t.Raw()
  46         return n.name(regionIndex.index(r.String()))
  47 }
  48
  49 func nameTag(langN, scrN, regN namer, x interface{}) string {
  50         t, ok := x.(language.Tag)
  51         if !ok {
  52                 return ""
  53         }
  54         const form = language.All &^ language.SuppressScript
  55         if c, err := form.Canonicalize(t); err == nil {
  56                 t = c
  57         }
  58         _, sRaw, rRaw := t.Raw()
  59         i, scr, reg := langTagSet.index(t.Raw())
  60         for i != -1 {
  61                 if str := langN.name(i); str != "" {
  62                         if hasS, hasR := (scr != language.Script{}), (reg != language.Region{}); hasS || hasR {
  63                                 ss, sr := "", ""
  64                                 if hasS {
  65                                         ss = scrN.name(scriptIndex.index(scr.String()))
  66                                 }
  67                                 if hasR {
  68                                         sr = regN.name(regionIndex.index(reg.String()))
  69                                 }
  70                                 // TODO: use patterns in CLDR or at least confirm they are the
  71                                 // same for all languages.
  72                                 if ss != "" && sr != "" {
  73                                         return fmt.Sprintf("%s (%s, %s)", str, ss, sr)
  74                                 }
  75                                 if ss != "" || sr != "" {
  76                                         return fmt.Sprintf("%s (%s%s)", str, ss, sr)
  77                                 }
  78                         }
  79                         return str
  80                 }
  81                 scr, reg = sRaw, rRaw
  82                 if t = t.Parent(); t == language.Und {
  83                         return ""
  84                 }
  85                 i, _, _ = langTagSet.index(t.Raw())
  86         }
  87         return ""
  88 }
  89
  90 // header contains the data and indexes for a single namer.
  91 // data contains a series of strings concatenated into one. index contains the
  92 // offsets for a string in data. For example, consider a header that defines
  93 // strings for the languages de, el, en, fi, and nl:
  94 //
  95 //              header{
  96 //                      data: "GermanGreekEnglishDutch",
  97 //              index: []uint16{ 0, 6, 11, 18, 18, 23 },
  98 //              }
  99 //
 100 // For a language with index i, the string is defined by
 101 // data[index[i]:index[i+1]]. So the number of elements in index is always one
 102 // greater than the number of languages for which header defines a value.
 103 // A string for a language may be empty, which means the name is undefined. In
 104 // the above example, the name for fi (Finnish) is undefined.
 105 type header struct {
 106         data  string
 107         index []uint16
 108 }
 109
 110 // name looks up the name for a tag in the dictionary, given its index.
 111 func (h *header) name(i int) string {
 112         if 0 <= i && i < len(h.index)-1 {
 113                 return h.data[h.index[i]:h.index[i+1]]
 114         }
 115         return ""
 116 }
 117
 118 // tagSet is used to find the index of a language in a set of tags.
 119 type tagSet struct {
 120         single tagIndex
 121         long   []string
 122 }
 123
 124 var (
 125         langTagSet = tagSet{
 126                 single: langIndex,
 127                 long:   langTagsLong,
 128         }
 129
 130         // selfTagSet is used for indexing the language strings in their own
 131         // language.
 132         selfTagSet = tagSet{
 133                 single: selfIndex,
 134                 long:   selfTagsLong,
 135         }
 136
 137         zzzz = language.MustParseScript("Zzzz")
 138         zz   = language.MustParseRegion("ZZ")
 139 )
 140
 141 // index returns the index of the tag for the given base, script and region or
 142 // its parent if the tag is not available. If the match is for a parent entry,
 143 // the excess script and region are returned.
 144 func (ts *tagSet) index(base language.Base, scr language.Script, reg language.Region) (int, language.Script, language.Region) {
 145         lang := base.String()
 146         index := -1
 147         if (scr != language.Script{} || reg != language.Region{}) {
 148                 if scr == zzzz {
 149                         scr = language.Script{}
 150                 }
 151                 if reg == zz {
 152                         reg = language.Region{}
 153                 }
 154
 155                 i := sort.SearchStrings(ts.long, lang)
 156                 // All entries have either a script or a region and not both.
 157                 scrStr, regStr := scr.String(), reg.String()
 158                 for ; i < len(ts.long) && strings.HasPrefix(ts.long[i], lang); i++ {
 159                         if s := ts.long[i][len(lang)+1:]; s == scrStr {
 160                                 scr = language.Script{}
 161                                 index = i + ts.single.len()
 162                                 break
 163                         } else if s == regStr {
 164                                 reg = language.Region{}
 165                                 index = i + ts.single.len()
 166                                 break
 167                         }
 168                 }
 169         }
 170         if index == -1 {
 171                 index = ts.single.index(lang)
 172         }
 173         return index, scr, reg
 174 }
 175
 176 func (ts *tagSet) Tags() []language.Tag {
 177         tags := make([]language.Tag, 0, ts.single.len()+len(ts.long))
 178         ts.single.keys(func(s string) {
 179                 tags = append(tags, language.Raw.MustParse(s))
 180         })
 181         for _, s := range ts.long {
 182                 tags = append(tags, language.Raw.MustParse(s))
 183         }
 184         return tags
 185 }
 186
 187 func supportedScripts() []language.Script {
 188         scr := make([]language.Script, 0, scriptIndex.len())
 189         scriptIndex.keys(func(s string) {
 190                 scr = append(scr, language.MustParseScript(s))
 191         })
 192         return scr
 193 }
 194
 195 func supportedRegions() []language.Region {
 196         reg := make([]language.Region, 0, regionIndex.len())
 197         regionIndex.keys(func(s string) {
 198                 reg = append(reg, language.MustParseRegion(s))
 199         })
 200         return reg
 201 }
 202
 203 // tagIndex holds a concatenated lists of subtags of length 2 to 4, one string
 204 // for each length, which can be used in combination with binary search to get
 205 // the index associated with a tag.
 206 // For example, a tagIndex{
 207 //   "arenesfrruzh",  // 6 2-byte tags.
 208 //   "barwae",        // 2 3-byte tags.
 209 //   "",
 210 // }
 211 // would mean that the 2-byte tag "fr" had an index of 3, and the 3-byte tag
 212 // "wae" had an index of 7.
 213 type tagIndex [3]string
 214
 215 func (t *tagIndex) index(s string) int {
 216         sz := len(s)
 217         if sz < 2 || 4 < sz {
 218                 return -1
 219         }
 220         a := t[sz-2]
 221         index := sort.Search(len(a)/sz, func(i int) bool {
 222                 p := i * sz
 223                 return a[p:p+sz] >= s
 224         })
 225         p := index * sz
 226         if end := p + sz; end > len(a) || a[p:end] != s {
 227                 return -1
 228         }
 229         // Add the number of tags for smaller sizes.
 230         for i := 0; i < sz-2; i++ {
 231                 index += len(t[i]) / (i + 2)
 232         }
 233         return index
 234 }
 235
 236 // len returns the number of tags that are contained in the tagIndex.
 237 func (t *tagIndex) len() (n int) {
 238         for i, s := range t {
 239                 n += len(s) / (i + 2)
 240         }
 241         return n
 242 }
 243
 244 // keys calls f for each tag.
 245 func (t *tagIndex) keys(f func(key string)) {
 246         for i, s := range *t {
 247                 for ; s != ""; s = s[i+2:] {
 248                         f(s[:i+2])
 249                 }
 250         }
 251 }