vendor/golang.org/x/text/language/parse.go

   1 // Copyright 2013 The Go Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style
   3 // license that can be found in the LICENSE file.
   4
   5 package language
   6
   7 import (
   8         "bytes"
   9         "errors"
  10         "fmt"
  11         "sort"
  12         "strconv"
  13         "strings"
  14
  15         "golang.org/x/text/internal/tag"
  16 )
  17
  18 // isAlpha returns true if the byte is not a digit.
  19 // b must be an ASCII letter or digit.
  20 func isAlpha(b byte) bool {
  21         return b > '9'
  22 }
  23
  24 // isAlphaNum returns true if the string contains only ASCII letters or digits.
  25 func isAlphaNum(s []byte) bool {
  26         for _, c := range s {
  27                 if !('a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || '0' <= c && c <= '9') {
  28                         return false
  29                 }
  30         }
  31         return true
  32 }
  33
  34 // errSyntax is returned by any of the parsing functions when the
  35 // input is not well-formed, according to BCP 47.
  36 // TODO: return the position at which the syntax error occurred?
  37 var errSyntax = errors.New("language: tag is not well-formed")
  38
  39 // ValueError is returned by any of the parsing functions when the
  40 // input is well-formed but the respective subtag is not recognized
  41 // as a valid value.
  42 type ValueError struct {
  43         v [8]byte
  44 }
  45
  46 func mkErrInvalid(s []byte) error {
  47         var e ValueError
  48         copy(e.v[:], s)
  49         return e
  50 }
  51
  52 func (e ValueError) tag() []byte {
  53         n := bytes.IndexByte(e.v[:], 0)
  54         if n == -1 {
  55                 n = 8
  56         }
  57         return e.v[:n]
  58 }
  59
  60 // Error implements the error interface.
  61 func (e ValueError) Error() string {
  62         return fmt.Sprintf("language: subtag %q is well-formed but unknown", e.tag())
  63 }
  64
  65 // Subtag returns the subtag for which the error occurred.
  66 func (e ValueError) Subtag() string {
  67         return string(e.tag())
  68 }
  69
  70 // scanner is used to scan BCP 47 tokens, which are separated by _ or -.
  71 type scanner struct {
  72         b     []byte
  73         bytes [max99thPercentileSize]byte
  74         token []byte
  75         start int // start position of the current token
  76         end   int // end position of the current token
  77         next  int // next point for scan
  78         err   error
  79         done  bool
  80 }
  81
  82 func makeScannerString(s string) scanner {
  83         scan := scanner{}
  84         if len(s) <= len(scan.bytes) {
  85                 scan.b = scan.bytes[:copy(scan.bytes[:], s)]
  86         } else {
  87                 scan.b = []byte(s)
  88         }
  89         scan.init()
  90         return scan
  91 }
  92
  93 // makeScanner returns a scanner using b as the input buffer.
  94 // b is not copied and may be modified by the scanner routines.
  95 func makeScanner(b []byte) scanner {
  96         scan := scanner{b: b}
  97         scan.init()
  98         return scan
  99 }
 100
 101 func (s *scanner) init() {
 102         for i, c := range s.b {
 103                 if c == '_' {
 104                         s.b[i] = '-'
 105                 }
 106         }
 107         s.scan()
 108 }
 109
 110 // restToLower converts the string between start and end to lower case.
 111 func (s *scanner) toLower(start, end int) {
 112         for i := start; i < end; i++ {
 113                 c := s.b[i]
 114                 if 'A' <= c && c <= 'Z' {
 115                         s.b[i] += 'a' - 'A'
 116                 }
 117         }
 118 }
 119
 120 func (s *scanner) setError(e error) {
 121         if s.err == nil || (e == errSyntax && s.err != errSyntax) {
 122                 s.err = e
 123         }
 124 }
 125
 126 // resizeRange shrinks or grows the array at position oldStart such that
 127 // a new string of size newSize can fit between oldStart and oldEnd.
 128 // Sets the scan point to after the resized range.
 129 func (s *scanner) resizeRange(oldStart, oldEnd, newSize int) {
 130         s.start = oldStart
 131         if end := oldStart + newSize; end != oldEnd {
 132                 diff := end - oldEnd
 133                 if end < cap(s.b) {
 134                         b := make([]byte, len(s.b)+diff)
 135                         copy(b, s.b[:oldStart])
 136                         copy(b[end:], s.b[oldEnd:])
 137                         s.b = b
 138                 } else {
 139                         s.b = append(s.b[end:], s.b[oldEnd:]...)
 140                 }
 141                 s.next = end + (s.next - s.end)
 142                 s.end = end
 143         }
 144 }
 145
 146 // replace replaces the current token with repl.
 147 func (s *scanner) replace(repl string) {
 148         s.resizeRange(s.start, s.end, len(repl))
 149         copy(s.b[s.start:], repl)
 150 }
 151
 152 // gobble removes the current token from the input.
 153 // Caller must call scan after calling gobble.
 154 func (s *scanner) gobble(e error) {
 155         s.setError(e)
 156         if s.start == 0 {
 157                 s.b = s.b[:+copy(s.b, s.b[s.next:])]
 158                 s.end = 0
 159         } else {
 160                 s.b = s.b[:s.start-1+copy(s.b[s.start-1:], s.b[s.end:])]
 161                 s.end = s.start - 1
 162         }
 163         s.next = s.start
 164 }
 165
 166 // deleteRange removes the given range from s.b before the current token.
 167 func (s *scanner) deleteRange(start, end int) {
 168         s.setError(errSyntax)
 169         s.b = s.b[:start+copy(s.b[start:], s.b[end:])]
 170         diff := end - start
 171         s.next -= diff
 172         s.start -= diff
 173         s.end -= diff
 174 }
 175
 176 // scan parses the next token of a BCP 47 string.  Tokens that are larger
 177 // than 8 characters or include non-alphanumeric characters result in an error
 178 // and are gobbled and removed from the output.
 179 // It returns the end position of the last token consumed.
 180 func (s *scanner) scan() (end int) {
 181         end = s.end
 182         s.token = nil
 183         for s.start = s.next; s.next < len(s.b); {
 184                 i := bytes.IndexByte(s.b[s.next:], '-')
 185                 if i == -1 {
 186                         s.end = len(s.b)
 187                         s.next = len(s.b)
 188                         i = s.end - s.start
 189                 } else {
 190                         s.end = s.next + i
 191                         s.next = s.end + 1
 192                 }
 193                 token := s.b[s.start:s.end]
 194                 if i < 1 || i > 8 || !isAlphaNum(token) {
 195                         s.gobble(errSyntax)
 196                         continue
 197                 }
 198                 s.token = token
 199                 return end
 200         }
 201         if n := len(s.b); n > 0 && s.b[n-1] == '-' {
 202                 s.setError(errSyntax)
 203                 s.b = s.b[:len(s.b)-1]
 204         }
 205         s.done = true
 206         return end
 207 }
 208
 209 // acceptMinSize parses multiple tokens of the given size or greater.
 210 // It returns the end position of the last token consumed.
 211 func (s *scanner) acceptMinSize(min int) (end int) {
 212         end = s.end
 213         s.scan()
 214         for ; len(s.token) >= min; s.scan() {
 215                 end = s.end
 216         }
 217         return end
 218 }
 219
 220 // Parse parses the given BCP 47 string and returns a valid Tag. If parsing
 221 // failed it returns an error and any part of the tag that could be parsed.
 222 // If parsing succeeded but an unknown value was found, it returns
 223 // ValueError. The Tag returned in this case is just stripped of the unknown
 224 // value. All other values are preserved. It accepts tags in the BCP 47 format
 225 // and extensions to this standard defined in
 226 // http://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
 227 // The resulting tag is canonicalized using the default canonicalization type.
 228 func Parse(s string) (t Tag, err error) {
 229         return Default.Parse(s)
 230 }
 231
 232 // Parse parses the given BCP 47 string and returns a valid Tag. If parsing
 233 // failed it returns an error and any part of the tag that could be parsed.
 234 // If parsing succeeded but an unknown value was found, it returns
 235 // ValueError. The Tag returned in this case is just stripped of the unknown
 236 // value. All other values are preserved. It accepts tags in the BCP 47 format
 237 // and extensions to this standard defined in
 238 // http://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
 239 // The resulting tag is canonicalized using the the canonicalization type c.
 240 func (c CanonType) Parse(s string) (t Tag, err error) {
 241         // TODO: consider supporting old-style locale key-value pairs.
 242         if s == "" {
 243                 return und, errSyntax
 244         }
 245         if len(s) <= maxAltTaglen {
 246                 b := [maxAltTaglen]byte{}
 247                 for i, c := range s {
 248                         // Generating invalid UTF-8 is okay as it won't match.
 249                         if 'A' <= c && c <= 'Z' {
 250                                 c += 'a' - 'A'
 251                         } else if c == '_' {
 252                                 c = '-'
 253                         }
 254                         b[i] = byte(c)
 255                 }
 256                 if t, ok := grandfathered(b); ok {
 257                         return t, nil
 258                 }
 259         }
 260         scan := makeScannerString(s)
 261         t, err = parse(&scan, s)
 262         t, changed := t.canonicalize(c)
 263         if changed {
 264                 t.remakeString()
 265         }
 266         return t, err
 267 }
 268
 269 func parse(scan *scanner, s string) (t Tag, err error) {
 270         t = und
 271         var end int
 272         if n := len(scan.token); n <= 1 {
 273                 scan.toLower(0, len(scan.b))
 274                 if n == 0 || scan.token[0] != 'x' {
 275                         return t, errSyntax
 276                 }
 277                 end = parseExtensions(scan)
 278         } else if n >= 4 {
 279                 return und, errSyntax
 280         } else { // the usual case
 281                 t, end = parseTag(scan)
 282                 if n := len(scan.token); n == 1 {
 283                         t.pExt = uint16(end)
 284                         end = parseExtensions(scan)
 285                 } else if end < len(scan.b) {
 286                         scan.setError(errSyntax)
 287                         scan.b = scan.b[:end]
 288                 }
 289         }
 290         if int(t.pVariant) < len(scan.b) {
 291                 if end < len(s) {
 292                         s = s[:end]
 293                 }
 294                 if len(s) > 0 && tag.Compare(s, scan.b) == 0 {
 295                         t.str = s
 296                 } else {
 297                         t.str = string(scan.b)
 298                 }
 299         } else {
 300                 t.pVariant, t.pExt = 0, 0
 301         }
 302         return t, scan.err
 303 }
 304
 305 // parseTag parses language, script, region and variants.
 306 // It returns a Tag and the end position in the input that was parsed.
 307 func parseTag(scan *scanner) (t Tag, end int) {
 308         var e error
 309         // TODO: set an error if an unknown lang, script or region is encountered.
 310         t.lang, e = getLangID(scan.token)
 311         scan.setError(e)
 312         scan.replace(t.lang.String())
 313         langStart := scan.start
 314         end = scan.scan()
 315         for len(scan.token) == 3 && isAlpha(scan.token[0]) {
 316                 // From http://tools.ietf.org/html/bcp47, <lang>-<extlang> tags are equivalent
 317                 // to a tag of the form <extlang>.
 318                 lang, e := getLangID(scan.token)
 319                 if lang != 0 {
 320                         t.lang = lang
 321                         copy(scan.b[langStart:], lang.String())
 322                         scan.b[langStart+3] = '-'
 323                         scan.start = langStart + 4
 324                 }
 325                 scan.gobble(e)
 326                 end = scan.scan()
 327         }
 328         if len(scan.token) == 4 && isAlpha(scan.token[0]) {
 329                 t.script, e = getScriptID(script, scan.token)
 330                 if t.script == 0 {
 331                         scan.gobble(e)
 332                 }
 333                 end = scan.scan()
 334         }
 335         if n := len(scan.token); n >= 2 && n <= 3 {
 336                 t.region, e = getRegionID(scan.token)
 337                 if t.region == 0 {
 338                         scan.gobble(e)
 339                 } else {
 340                         scan.replace(t.region.String())
 341                 }
 342                 end = scan.scan()
 343         }
 344         scan.toLower(scan.start, len(scan.b))
 345         t.pVariant = byte(end)
 346         end = parseVariants(scan, end, t)
 347         t.pExt = uint16(end)
 348         return t, end
 349 }
 350
 351 var separator = []byte{'-'}
 352
 353 // parseVariants scans tokens as long as each token is a valid variant string.
 354 // Duplicate variants are removed.
 355 func parseVariants(scan *scanner, end int, t Tag) int {
 356         start := scan.start
 357         varIDBuf := [4]uint8{}
 358         variantBuf := [4][]byte{}
 359         varID := varIDBuf[:0]
 360         variant := variantBuf[:0]
 361         last := -1
 362         needSort := false
 363         for ; len(scan.token) >= 4; scan.scan() {
 364                 // TODO: measure the impact of needing this conversion and redesign
 365                 // the data structure if there is an issue.
 366                 v, ok := variantIndex[string(scan.token)]
 367                 if !ok {
 368                         // unknown variant
 369                         // TODO: allow user-defined variants?
 370                         scan.gobble(mkErrInvalid(scan.token))
 371                         continue
 372                 }
 373                 varID = append(varID, v)
 374                 variant = append(variant, scan.token)
 375                 if !needSort {
 376                         if last < int(v) {
 377                                 last = int(v)
 378                         } else {
 379                                 needSort = true
 380                                 // There is no legal combinations of more than 7 variants
 381                                 // (and this is by no means a useful sequence).
 382                                 const maxVariants = 8
 383                                 if len(varID) > maxVariants {
 384                                         break
 385                                 }
 386                         }
 387                 }
 388                 end = scan.end
 389         }
 390         if needSort {
 391                 sort.Sort(variantsSort{varID, variant})
 392                 k, l := 0, -1
 393                 for i, v := range varID {
 394                         w := int(v)
 395                         if l == w {
 396                                 // Remove duplicates.
 397                                 continue
 398                         }
 399                         varID[k] = varID[i]
 400                         variant[k] = variant[i]
 401                         k++
 402                         l = w
 403                 }
 404                 if str := bytes.Join(variant[:k], separator); len(str) == 0 {
 405                         end = start - 1
 406                 } else {
 407                         scan.resizeRange(start, end, len(str))
 408                         copy(scan.b[scan.start:], str)
 409                         end = scan.end
 410                 }
 411         }
 412         return end
 413 }
 414
 415 type variantsSort struct {
 416         i []uint8
 417         v [][]byte
 418 }
 419
 420 func (s variantsSort) Len() int {
 421         return len(s.i)
 422 }
 423
 424 func (s variantsSort) Swap(i, j int) {
 425         s.i[i], s.i[j] = s.i[j], s.i[i]
 426         s.v[i], s.v[j] = s.v[j], s.v[i]
 427 }
 428
 429 func (s variantsSort) Less(i, j int) bool {
 430         return s.i[i] < s.i[j]
 431 }
 432
 433 type bytesSort [][]byte
 434
 435 func (b bytesSort) Len() int {
 436         return len(b)
 437 }
 438
 439 func (b bytesSort) Swap(i, j int) {
 440         b[i], b[j] = b[j], b[i]
 441 }
 442
 443 func (b bytesSort) Less(i, j int) bool {
 444         return bytes.Compare(b[i], b[j]) == -1
 445 }
 446
 447 // parseExtensions parses and normalizes the extensions in the buffer.
 448 // It returns the last position of scan.b that is part of any extension.
 449 // It also trims scan.b to remove excess parts accordingly.
 450 func parseExtensions(scan *scanner) int {
 451         start := scan.start
 452         exts := [][]byte{}
 453         private := []byte{}
 454         end := scan.end
 455         for len(scan.token) == 1 {
 456                 extStart := scan.start
 457                 ext := scan.token[0]
 458                 end = parseExtension(scan)
 459                 extension := scan.b[extStart:end]
 460                 if len(extension) < 3 || (ext != 'x' && len(extension) < 4) {
 461                         scan.setError(errSyntax)
 462                         end = extStart
 463                         continue
 464                 } else if start == extStart && (ext == 'x' || scan.start == len(scan.b)) {
 465                         scan.b = scan.b[:end]
 466                         return end
 467                 } else if ext == 'x' {
 468                         private = extension
 469                         break
 470                 }
 471                 exts = append(exts, extension)
 472         }
 473         sort.Sort(bytesSort(exts))
 474         if len(private) > 0 {
 475                 exts = append(exts, private)
 476         }
 477         scan.b = scan.b[:start]
 478         if len(exts) > 0 {
 479                 scan.b = append(scan.b, bytes.Join(exts, separator)...)
 480         } else if start > 0 {
 481                 // Strip trailing '-'.
 482                 scan.b = scan.b[:start-1]
 483         }
 484         return end
 485 }
 486
 487 // parseExtension parses a single extension and returns the position of
 488 // the extension end.
 489 func parseExtension(scan *scanner) int {
 490         start, end := scan.start, scan.end
 491         switch scan.token[0] {
 492         case 'u':
 493                 attrStart := end
 494                 scan.scan()
 495                 for last := []byte{}; len(scan.token) > 2; scan.scan() {
 496                         if bytes.Compare(scan.token, last) != -1 {
 497                                 // Attributes are unsorted. Start over from scratch.
 498                                 p := attrStart + 1
 499                                 scan.next = p
 500                                 attrs := [][]byte{}
 501                                 for scan.scan(); len(scan.token) > 2; scan.scan() {
 502                                         attrs = append(attrs, scan.token)
 503                                         end = scan.end
 504                                 }
 505                                 sort.Sort(bytesSort(attrs))
 506                                 copy(scan.b[p:], bytes.Join(attrs, separator))
 507                                 break
 508                         }
 509                         last = scan.token
 510                         end = scan.end
 511                 }
 512                 var last, key []byte
 513                 for attrEnd := end; len(scan.token) == 2; last = key {
 514                         key = scan.token
 515                         keyEnd := scan.end
 516                         end = scan.acceptMinSize(3)
 517                         // TODO: check key value validity
 518                         if keyEnd == end || bytes.Compare(key, last) != 1 {
 519                                 // We have an invalid key or the keys are not sorted.
 520                                 // Start scanning keys from scratch and reorder.
 521                                 p := attrEnd + 1
 522                                 scan.next = p
 523                                 keys := [][]byte{}
 524                                 for scan.scan(); len(scan.token) == 2; {
 525                                         keyStart, keyEnd := scan.start, scan.end
 526                                         end = scan.acceptMinSize(3)
 527                                         if keyEnd != end {
 528                                                 keys = append(keys, scan.b[keyStart:end])
 529                                         } else {
 530                                                 scan.setError(errSyntax)
 531                                                 end = keyStart
 532                                         }
 533                                 }
 534                                 sort.Sort(bytesSort(keys))
 535                                 reordered := bytes.Join(keys, separator)
 536                                 if e := p + len(reordered); e < end {
 537                                         scan.deleteRange(e, end)
 538                                         end = e
 539                                 }
 540                                 copy(scan.b[p:], bytes.Join(keys, separator))
 541                                 break
 542                         }
 543                 }
 544         case 't':
 545                 scan.scan()
 546                 if n := len(scan.token); n >= 2 && n <= 3 && isAlpha(scan.token[1]) {
 547                         _, end = parseTag(scan)
 548                         scan.toLower(start, end)
 549                 }
 550                 for len(scan.token) == 2 && !isAlpha(scan.token[1]) {
 551                         end = scan.acceptMinSize(3)
 552                 }
 553         case 'x':
 554                 end = scan.acceptMinSize(1)
 555         default:
 556                 end = scan.acceptMinSize(2)
 557         }
 558         return end
 559 }
 560
 561 // Compose creates a Tag from individual parts, which may be of type Tag, Base,
 562 // Script, Region, Variant, []Variant, Extension, []Extension or error. If a
 563 // Base, Script or Region or slice of type Variant or Extension is passed more
 564 // than once, the latter will overwrite the former. Variants and Extensions are
 565 // accumulated, but if two extensions of the same type are passed, the latter
 566 // will replace the former. A Tag overwrites all former values and typically
 567 // only makes sense as the first argument. The resulting tag is returned after
 568 // canonicalizing using the Default CanonType. If one or more errors are
 569 // encountered, one of the errors is returned.
 570 func Compose(part ...interface{}) (t Tag, err error) {
 571         return Default.Compose(part...)
 572 }
 573
 574 // Compose creates a Tag from individual parts, which may be of type Tag, Base,
 575 // Script, Region, Variant, []Variant, Extension, []Extension or error. If a
 576 // Base, Script or Region or slice of type Variant or Extension is passed more
 577 // than once, the latter will overwrite the former. Variants and Extensions are
 578 // accumulated, but if two extensions of the same type are passed, the latter
 579 // will replace the former. A Tag overwrites all former values and typically
 580 // only makes sense as the first argument. The resulting tag is returned after
 581 // canonicalizing using CanonType c. If one or more errors are encountered,
 582 // one of the errors is returned.
 583 func (c CanonType) Compose(part ...interface{}) (t Tag, err error) {
 584         var b builder
 585         if err = b.update(part...); err != nil {
 586                 return und, err
 587         }
 588         t, _ = b.tag.canonicalize(c)
 589
 590         if len(b.ext) > 0 || len(b.variant) > 0 {
 591                 sort.Sort(sortVariant(b.variant))
 592                 sort.Strings(b.ext)
 593                 if b.private != "" {
 594                         b.ext = append(b.ext, b.private)
 595                 }
 596                 n := maxCoreSize + tokenLen(b.variant...) + tokenLen(b.ext...)
 597                 buf := make([]byte, n)
 598                 p := t.genCoreBytes(buf)
 599                 t.pVariant = byte(p)
 600                 p += appendTokens(buf[p:], b.variant...)
 601                 t.pExt = uint16(p)
 602                 p += appendTokens(buf[p:], b.ext...)
 603                 t.str = string(buf[:p])
 604         } else if b.private != "" {
 605                 t.str = b.private
 606                 t.remakeString()
 607         }
 608         return
 609 }
 610
 611 type builder struct {
 612         tag Tag
 613
 614         private string // the x extension
 615         ext     []string
 616         variant []string
 617
 618         err error
 619 }
 620
 621 func (b *builder) addExt(e string) {
 622         if e == "" {
 623         } else if e[0] == 'x' {
 624                 b.private = e
 625         } else {
 626                 b.ext = append(b.ext, e)
 627         }
 628 }
 629
 630 var errInvalidArgument = errors.New("invalid Extension or Variant")
 631
 632 func (b *builder) update(part ...interface{}) (err error) {
 633         replace := func(l *[]string, s string, eq func(a, b string) bool) bool {
 634                 if s == "" {
 635                         b.err = errInvalidArgument
 636                         return true
 637                 }
 638                 for i, v := range *l {
 639                         if eq(v, s) {
 640                                 (*l)[i] = s
 641                                 return true
 642                         }
 643                 }
 644                 return false
 645         }
 646         for _, x := range part {
 647                 switch v := x.(type) {
 648                 case Tag:
 649                         b.tag.lang = v.lang
 650                         b.tag.region = v.region
 651                         b.tag.script = v.script
 652                         if v.str != "" {
 653                                 b.variant = nil
 654                                 for x, s := "", v.str[v.pVariant:v.pExt]; s != ""; {
 655                                         x, s = nextToken(s)
 656                                         b.variant = append(b.variant, x)
 657                                 }
 658                                 b.ext, b.private = nil, ""
 659                                 for i, e := int(v.pExt), ""; i < len(v.str); {
 660                                         i, e = getExtension(v.str, i)
 661                                         b.addExt(e)
 662                                 }
 663                         }
 664                 case Base:
 665                         b.tag.lang = v.langID
 666                 case Script:
 667                         b.tag.script = v.scriptID
 668                 case Region:
 669                         b.tag.region = v.regionID
 670                 case Variant:
 671                         if !replace(&b.variant, v.variant, func(a, b string) bool { return a == b }) {
 672                                 b.variant = append(b.variant, v.variant)
 673                         }
 674                 case Extension:
 675                         if !replace(&b.ext, v.s, func(a, b string) bool { return a[0] == b[0] }) {
 676                                 b.addExt(v.s)
 677                         }
 678                 case []Variant:
 679                         b.variant = nil
 680                         for _, x := range v {
 681                                 b.update(x)
 682                         }
 683                 case []Extension:
 684                         b.ext, b.private = nil, ""
 685                         for _, e := range v {
 686                                 b.update(e)
 687                         }
 688                 // TODO: support parsing of raw strings based on morphology or just extensions?
 689                 case error:
 690                         err = v
 691                 }
 692         }
 693         return
 694 }
 695
 696 func tokenLen(token ...string) (n int) {
 697         for _, t := range token {
 698                 n += len(t) + 1
 699         }
 700         return
 701 }
 702
 703 func appendTokens(b []byte, token ...string) int {
 704         p := 0
 705         for _, t := range token {
 706                 b[p] = '-'
 707                 copy(b[p+1:], t)
 708                 p += 1 + len(t)
 709         }
 710         return p
 711 }
 712
 713 type sortVariant []string
 714
 715 func (s sortVariant) Len() int {
 716         return len(s)
 717 }
 718
 719 func (s sortVariant) Swap(i, j int) {
 720         s[j], s[i] = s[i], s[j]
 721 }
 722
 723 func (s sortVariant) Less(i, j int) bool {
 724         return variantIndex[s[i]] < variantIndex[s[j]]
 725 }
 726
 727 func findExt(list []string, x byte) int {
 728         for i, e := range list {
 729                 if e[0] == x {
 730                         return i
 731                 }
 732         }
 733         return -1
 734 }
 735
 736 // getExtension returns the name, body and end position of the extension.
 737 func getExtension(s string, p int) (end int, ext string) {
 738         if s[p] == '-' {
 739                 p++
 740         }
 741         if s[p] == 'x' {
 742                 return len(s), s[p:]
 743         }
 744         end = nextExtension(s, p)
 745         return end, s[p:end]
 746 }
 747
 748 // nextExtension finds the next extension within the string, searching
 749 // for the -<char>- pattern from position p.
 750 // In the fast majority of cases, language tags will have at most
 751 // one extension and extensions tend to be small.
 752 func nextExtension(s string, p int) int {
 753         for n := len(s) - 3; p < n; {
 754                 if s[p] == '-' {
 755                         if s[p+2] == '-' {
 756                                 return p
 757                         }
 758                         p += 3
 759                 } else {
 760                         p++
 761                 }
 762         }
 763         return len(s)
 764 }
 765
 766 var errInvalidWeight = errors.New("ParseAcceptLanguage: invalid weight")
 767
 768 // ParseAcceptLanguage parses the contents of a Accept-Language header as
 769 // defined in http://www.ietf.org/rfc/rfc2616.txt and returns a list of Tags and
 770 // a list of corresponding quality weights. It is more permissive than RFC 2616
 771 // and may return non-nil slices even if the input is not valid.
 772 // The Tags will be sorted by highest weight first and then by first occurrence.
 773 // Tags with a weight of zero will be dropped. An error will be returned if the
 774 // input could not be parsed.
 775 func ParseAcceptLanguage(s string) (tag []Tag, q []float32, err error) {
 776         var entry string
 777         for s != "" {
 778                 if entry, s = split(s, ','); entry == "" {
 779                         continue
 780                 }
 781
 782                 entry, weight := split(entry, ';')
 783
 784                 // Scan the language.
 785                 t, err := Parse(entry)
 786                 if err != nil {
 787                         id, ok := acceptFallback[entry]
 788                         if !ok {
 789                                 return nil, nil, err
 790                         }
 791                         t = Tag{lang: id}
 792                 }
 793
 794                 // Scan the optional weight.
 795                 w := 1.0
 796                 if weight != "" {
 797                         weight = consume(weight, 'q')
 798                         weight = consume(weight, '=')
 799                         // consume returns the empty string when a token could not be
 800                         // consumed, resulting in an error for ParseFloat.
 801                         if w, err = strconv.ParseFloat(weight, 32); err != nil {
 802                                 return nil, nil, errInvalidWeight
 803                         }
 804                         // Drop tags with a quality weight of 0.
 805                         if w <= 0 {
 806                                 continue
 807                         }
 808                 }
 809
 810                 tag = append(tag, t)
 811                 q = append(q, float32(w))
 812         }
 813         sortStable(&tagSort{tag, q})
 814         return tag, q, nil
 815 }
 816
 817 // consume removes a leading token c from s and returns the result or the empty
 818 // string if there is no such token.
 819 func consume(s string, c byte) string {
 820         if s == "" || s[0] != c {
 821                 return ""
 822         }
 823         return strings.TrimSpace(s[1:])
 824 }
 825
 826 func split(s string, c byte) (head, tail string) {
 827         if i := strings.IndexByte(s, c); i >= 0 {
 828                 return strings.TrimSpace(s[:i]), strings.TrimSpace(s[i+1:])
 829         }
 830         return strings.TrimSpace(s), ""
 831 }
 832
 833 // Add hack mapping to deal with a small number of cases that that occur
 834 // in Accept-Language (with reasonable frequency).
 835 var acceptFallback = map[string]langID{
 836         "english": _en,
 837         "deutsch": _de,
 838         "italian": _it,
 839         "french":  _fr,
 840         "*":       _mul, // defined in the spec to match all languages.
 841 }
 842
 843 type tagSort struct {
 844         tag []Tag
 845         q   []float32
 846 }
 847
 848 func (s *tagSort) Len() int {
 849         return len(s.q)
 850 }
 851
 852 func (s *tagSort) Less(i, j int) bool {
 853         return s.q[i] > s.q[j]
 854 }
 855
 856 func (s *tagSort) Swap(i, j int) {
 857         s.tag[i], s.tag[j] = s.tag[j], s.tag[i]
 858         s.q[i], s.q[j] = s.q[j], s.q[i]
 859 }