1 // Copyright 2015 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
12 // This file contains a parser for the CLDR number patterns as described in
13 // http://unicode.org/reports/tr35/tr35-numbers.html#Number_Format_Patterns.
15 // The following BNF is derived from this standard.
17 // pattern := subpattern (';' subpattern)?
18 // subpattern := affix? number exponent? affix?
19 // number := decimal | sigDigits
20 // decimal := '#'* '0'* ('.' fraction)? | '#' | '0'
21 // fraction := '0'* '#'*
22 // sigDigits := '#'* '@' '@'* '#'*
23 // exponent := 'E' '+'? '0'* '0'
27 // - An affix pattern may contain any runes, but runes with special meaning
29 // - Sequences of digits, '#', and '@' in decimal and sigDigits may have
30 // interstitial commas.
32 // TODO: replace special characters in affixes (-, +, ¤) with control codes.
34 // Pattern holds information for formatting numbers. It is designed to hold
35 // information from CLDR number patterns.
37 // This pattern is precompiled for all patterns for all languages. Even though
38 // the number of patterns is not very large, we want to keep this small.
40 // This type is only intended for internal use.
44 Affix string // includes prefix and suffix. First byte is prefix length.
45 Offset uint16 // Offset into Affix for prefix and suffix
46 NegOffset uint16 // Offset into Affix for negative prefix and suffix or 0.
54 // A RoundingContext indicates how a number should be converted to digits.
55 // It contains all information needed to determine the "visible digits" as
56 // required by the pluralization rules.
57 type RoundingContext struct {
58 // TODO: unify these two fields so that there is a more unambiguous meaning
59 // of how precision is handled.
60 MaxSignificantDigits int16 // -1 is unlimited
61 MaxFractionDigits int16 // -1 is unlimited
64 IncrementScale uint8 // May differ from printed scale.
68 DigitShift uint8 // Number of decimals to shift. Used for % and ‰.
71 MinIntegerDigits uint8
73 MaxIntegerDigits uint8
74 MinFractionDigits uint8
75 MinSignificantDigits uint8
77 MinExponentDigits uint8
80 func (r *RoundingContext) scale() int {
81 // scale is 0 when precision is set.
82 if r.MaxSignificantDigits != 0 {
85 return int(r.MaxFractionDigits)
88 func (r *RoundingContext) precision() int { return int(r.MaxSignificantDigits) }
90 // SetScale fixes the RoundingContext to a fixed number of fraction digits.
91 func (r *RoundingContext) SetScale(scale int) {
92 r.MinFractionDigits = uint8(scale)
93 r.MaxFractionDigits = int16(scale)
96 func (r *RoundingContext) SetPrecision(prec int) {
97 r.MaxSignificantDigits = int16(prec)
100 func (r *RoundingContext) isScientific() bool {
101 return r.MinExponentDigits > 0
104 func (f *Pattern) needsSep(pos int) bool {
106 size := int(f.GroupingSize[0])
107 if size == 0 || p == 0 {
113 if p -= size; p < 0 {
116 // TODO: make second groupingsize the same as first if 0 so that we can
118 if x := int(f.GroupingSize[1]); x != 0 {
124 // A PatternFlag is a bit mask for the flag field of a Pattern.
125 type PatternFlag uint8
128 AlwaysSign PatternFlag = 1 << iota
129 ElideSign // Use space instead of plus sign. AlwaysSign must be true.
131 AlwaysDecimalSeparator
132 ParenthesisForNegative // Common pattern. Saves space.
137 PadBeforePrefix = 0 // Default
138 PadAfterPrefix = PadAfterAffix
139 PadBeforeSuffix = PadAfterNumber
140 PadAfterSuffix = PadAfterNumber | PadAfterAffix
141 PadMask = PadAfterNumber | PadAfterAffix
157 func (p *parser) setError(err error) {
163 func (p *parser) updateGrouping() {
165 0 < p.groupingCount && p.groupingCount < 255 {
166 p.GroupingSize[1] = p.GroupingSize[0]
167 p.GroupingSize[0] = uint8(p.groupingCount)
174 // TODO: more sensible and localizeable error messages.
175 errMultiplePadSpecifiers = errors.New("format: pattern has multiple pad specifiers")
176 errInvalidPadSpecifier = errors.New("format: invalid pad specifier")
177 errInvalidQuote = errors.New("format: invalid quote")
178 errAffixTooLarge = errors.New("format: prefix or suffix exceeds maximum UTF-8 length of 256 bytes")
179 errDuplicatePercentSign = errors.New("format: duplicate percent sign")
180 errDuplicatePermilleSign = errors.New("format: duplicate permille sign")
181 errUnexpectedEnd = errors.New("format: unexpected end of pattern")
184 // ParsePattern extracts formatting information from a CLDR number pattern.
186 // See http://unicode.org/reports/tr35/tr35-numbers.html#Number_Format_Patterns.
187 func ParsePattern(s string) (f *Pattern, err error) {
188 p := parser{Pattern: &Pattern{}}
190 s = p.parseSubPattern(s)
193 // Parse negative sub pattern.
195 p.setError(errors.New("format: error parsing first sub pattern"))
198 neg := parser{Pattern: &Pattern{}} // just for extracting the affixes.
199 s = neg.parseSubPattern(s[len(";"):])
200 p.NegOffset = uint16(len(p.buf))
201 p.buf = append(p.buf, neg.buf...)
204 p.setError(errors.New("format: spurious characters at end of pattern"))
209 if affix := string(p.buf); affix == "\x00\x00" || affix == "\x00\x00\x00\x00" {
210 // No prefix or suffixes.
215 if p.Increment == 0 {
218 return p.Pattern, nil
221 func (p *parser) parseSubPattern(s string) string {
222 s = p.parsePad(s, PadBeforePrefix)
224 s = p.parsePad(s, PadAfterPrefix)
226 s = p.parse(p.number, s)
229 s = p.parsePad(s, PadBeforeSuffix)
231 s = p.parsePad(s, PadAfterSuffix)
235 func (p *parser) parsePad(s string, f PatternFlag) (tail string) {
236 if len(s) >= 2 && s[0] == '*' {
237 r, sz := utf8.DecodeRuneInString(s[1:])
239 p.err = errMultiplePadSpecifiers
249 func (p *parser) parseAffix(s string) string {
251 p.buf = append(p.buf, 0) // placeholder for affix length
253 s = p.parse(p.affix, s)
255 n := len(p.buf) - x - 1
257 p.setError(errAffixTooLarge)
263 // state implements a state transition. It returns the new state. A state
264 // function may set an error on the parser or may simply return on an incorrect
265 // token and let the next phase fail.
266 type state func(r rune) state
268 // parse repeatedly applies a state function on the given string until a
269 // termination condition is reached.
270 func (p *parser) parse(fn state, s string) (tail string) {
271 for i, r := range s {
272 p.doNotTerminate = false
273 if fn = fn(r); fn == nil || p.err != nil {
278 if p.doNotTerminate {
279 p.setError(errUnexpectedEnd)
284 func (p *parser) affix(r rune) state {
286 case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
287 '#', '@', '.', '*', ',', ';':
293 if p.DigitShift != 0 {
294 p.setError(errDuplicatePercentSign)
297 case '\u2030': // ‰ Per mille
298 if p.DigitShift != 0 {
299 p.setError(errDuplicatePermilleSign)
302 // TODO: handle currency somehow: ¤, ¤¤, ¤¤¤, ¤¤¤¤
304 p.buf = append(p.buf, string(r)...)
308 func (p *parser) escapeFirst(r rune) state {
311 p.buf = append(p.buf, "\\'"...)
314 p.buf = append(p.buf, '\'')
315 p.buf = append(p.buf, string(r)...)
320 func (p *parser) escape(r rune) state {
324 p.buf = append(p.buf, '\'')
327 p.buf = append(p.buf, string(r)...)
332 // number parses a number. The BNF says the integer part should always have
333 // a '0', but that does not appear to be the case according to the rest of the
334 // documentation. We will allow having only '#' numbers.
335 func (p *parser) number(r rune) state {
343 return p.sigDigits(r)
345 if p.leadingSharps == 0 { // no leading commas
350 p.MaxIntegerDigits = uint8(p.leadingSharps)
352 case '.': // allow ".##" etc.
355 case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
363 func (p *parser) integer(r rune) state {
364 if !('0' <= r && r <= '9') {
368 if p.leadingSharps > 0 {
369 p.MaxIntegerDigits = uint8(p.leadingSharps) + p.MinIntegerDigits
380 p.Increment = p.Increment*10 + uint32(r-'0')
386 func (p *parser) sigDigits(r rune) state {
390 p.MaxSignificantDigits++
391 p.MinSignificantDigits++
393 return p.sigDigitsFinal(r)
396 return p.normalizeSigDigitsWithExponent()
404 func (p *parser) sigDigitsFinal(r rune) state {
408 p.MaxSignificantDigits++
411 return p.normalizeSigDigitsWithExponent()
416 return p.sigDigitsFinal
419 func (p *parser) normalizeSigDigitsWithExponent() state {
420 p.MinIntegerDigits, p.MaxIntegerDigits = 1, 1
421 p.MinFractionDigits = p.MinSignificantDigits - 1
422 p.MaxFractionDigits = p.MaxSignificantDigits - 1
423 p.MinSignificantDigits, p.MaxSignificantDigits = 0, 0
427 func (p *parser) fraction(r rune) state {
429 case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
430 p.Increment = p.Increment*10 + uint32(r-'0')
432 p.MinFractionDigits++
433 p.MaxFractionDigits++
435 p.MaxFractionDigits++
437 if p.leadingSharps > 0 {
438 p.MaxIntegerDigits = uint8(p.leadingSharps) + p.MinIntegerDigits
447 func (p *parser) exponent(r rune) state {
450 // Set mode and check it wasn't already set.
451 if p.Flags&AlwaysExpSign != 0 || p.MinExponentDigits > 0 {
454 p.Flags |= AlwaysExpSign
455 p.doNotTerminate = true
458 p.MinExponentDigits++
461 // termination condition
462 if p.MinExponentDigits == 0 {
463 p.setError(errors.New("format: need at least one digit"))