387 lines
9.4 KiB
Go
387 lines
9.4 KiB
Go
|
// Copyright 2015 The Go Authors. All rights reserved.
|
||
|
// Use of this source code is governed by a BSD-style
|
||
|
// license that can be found in the LICENSE file.
|
||
|
|
||
|
package number
|
||
|
|
||
|
import (
|
||
|
"errors"
|
||
|
"unicode/utf8"
|
||
|
)
|
||
|
|
||
|
// This file contains a parser for the CLDR number patterns as described in
|
||
|
// http://unicode.org/reports/tr35/tr35-numbers.html#Number_Format_Patterns.
|
||
|
//
|
||
|
// The following BNF is derived from this standard.
|
||
|
//
|
||
|
// pattern := subpattern (';' subpattern)?
|
||
|
// subpattern := affix? number exponent? affix?
|
||
|
// number := decimal | sigDigits
|
||
|
// decimal := '#'* '0'* ('.' fraction)? | '#' | '0'
|
||
|
// fraction := '0'* '#'*
|
||
|
// sigDigits := '#'* '@' '@'* '#'*
|
||
|
// exponent := 'E' '+'? '0'* '0'
|
||
|
// padSpec := '*' \L
|
||
|
//
|
||
|
// Notes:
|
||
|
// - An affix pattern may contain any runes, but runes with special meaning
|
||
|
// should be escaped.
|
||
|
// - Sequences of digits, '#', and '@' in decimal and sigDigits may have
|
||
|
// interstitial commas.
|
||
|
|
||
|
// TODO: replace special characters in affixes (-, +, ¤) with control codes.
|
||
|
|
||
|
// Format holds information for formatting numbers. It is designed to hold
|
||
|
// information from CLDR number patterns.
|
||
|
//
|
||
|
// This pattern is precompiled for all patterns for all languages. Even though
|
||
|
// the number of patterns is not very large, we want to keep this small.
|
||
|
//
|
||
|
// This type is only intended for internal use.
|
||
|
type Format struct {
|
||
|
// TODO: this struct can be packed a lot better than it is now. Should be
|
||
|
// possible to make it 32 bytes.
|
||
|
|
||
|
Affix string // includes prefix and suffix. First byte is prefix length.
|
||
|
Offset uint16 // Offset into Affix for prefix and suffix
|
||
|
NegOffset uint16 // Offset into Affix for negative prefix and suffix or 0.
|
||
|
|
||
|
Multiplier uint32
|
||
|
RoundIncrement uint32 // Use Min*Digits to determine scale
|
||
|
PadRune rune
|
||
|
|
||
|
FormatWidth uint16
|
||
|
|
||
|
GroupingSize [2]uint8
|
||
|
Flags FormatFlag
|
||
|
|
||
|
// Number of digits.
|
||
|
MinIntegerDigits uint8
|
||
|
MaxIntegerDigits uint8
|
||
|
MinFractionDigits uint8
|
||
|
MaxFractionDigits uint8
|
||
|
MinSignificantDigits uint8
|
||
|
MaxSignificantDigits uint8
|
||
|
MinExponentDigits uint8
|
||
|
}
|
||
|
|
||
|
// A FormatFlag is a bit mask for the flag field of a Format.
|
||
|
type FormatFlag uint8
|
||
|
|
||
|
const (
|
||
|
AlwaysSign FormatFlag = 1 << iota
|
||
|
AlwaysExpSign
|
||
|
AlwaysDecimalSeparator
|
||
|
ParenthesisForNegative // Common pattern. Saves space.
|
||
|
|
||
|
PadAfterNumber
|
||
|
PadAfterAffix
|
||
|
|
||
|
PadBeforePrefix = 0 // Default
|
||
|
PadAfterPrefix = PadAfterAffix
|
||
|
PadBeforeSuffix = PadAfterNumber
|
||
|
PadAfterSuffix = PadAfterNumber | PadAfterAffix
|
||
|
PadMask = PadAfterNumber | PadAfterAffix
|
||
|
)
|
||
|
|
||
|
type parser struct {
|
||
|
*Format
|
||
|
|
||
|
leadingSharps int
|
||
|
|
||
|
pos int
|
||
|
err error
|
||
|
doNotTerminate bool
|
||
|
groupingCount uint
|
||
|
hasGroup bool
|
||
|
buf []byte
|
||
|
}
|
||
|
|
||
|
func (p *parser) setError(err error) {
|
||
|
if p.err == nil {
|
||
|
p.err = err
|
||
|
}
|
||
|
}
|
||
|
|
||
|
func (p *parser) updateGrouping() {
|
||
|
if p.hasGroup && p.groupingCount < 255 {
|
||
|
p.GroupingSize[1] = p.GroupingSize[0]
|
||
|
p.GroupingSize[0] = uint8(p.groupingCount)
|
||
|
}
|
||
|
p.groupingCount = 0
|
||
|
p.hasGroup = true
|
||
|
}
|
||
|
|
||
|
var (
|
||
|
// TODO: more sensible and localizeable error messages.
|
||
|
errMultiplePadSpecifiers = errors.New("format: pattern has multiple pad specifiers")
|
||
|
errInvalidPadSpecifier = errors.New("format: invalid pad specifier")
|
||
|
errInvalidQuote = errors.New("format: invalid quote")
|
||
|
errAffixTooLarge = errors.New("format: prefix or suffix exceeds maximum UTF-8 length of 256 bytes")
|
||
|
errDuplicatePercentSign = errors.New("format: duplicate percent sign")
|
||
|
errDuplicatePermilleSign = errors.New("format: duplicate permille sign")
|
||
|
errUnexpectedEnd = errors.New("format: unexpected end of pattern")
|
||
|
)
|
||
|
|
||
|
// ParsePattern extracts formatting information from a CLDR number pattern.
|
||
|
//
|
||
|
// See http://unicode.org/reports/tr35/tr35-numbers.html#Number_Format_Patterns.
|
||
|
func ParsePattern(s string) (f *Format, err error) {
|
||
|
p := parser{Format: &Format{}}
|
||
|
|
||
|
s = p.parseSubPattern(s)
|
||
|
|
||
|
if s != "" {
|
||
|
// Parse negative sub pattern.
|
||
|
if s[0] != ';' {
|
||
|
p.setError(errors.New("format: error parsing first sub pattern"))
|
||
|
return nil, p.err
|
||
|
}
|
||
|
neg := parser{Format: &Format{}} // just for extracting the affixes.
|
||
|
s = neg.parseSubPattern(s[len(";"):])
|
||
|
p.NegOffset = uint16(len(p.buf))
|
||
|
p.buf = append(p.buf, neg.buf...)
|
||
|
}
|
||
|
if s != "" {
|
||
|
p.setError(errors.New("format: spurious characters at end of pattern"))
|
||
|
}
|
||
|
if p.err != nil {
|
||
|
return nil, p.err
|
||
|
}
|
||
|
if affix := string(p.buf); affix == "\x00\x00" || affix == "\x00\x00\x00\x00" {
|
||
|
// No prefix or suffixes.
|
||
|
p.NegOffset = 0
|
||
|
} else {
|
||
|
p.Affix = affix
|
||
|
}
|
||
|
return p.Format, nil
|
||
|
}
|
||
|
|
||
|
func (p *parser) parseSubPattern(s string) string {
|
||
|
s = p.parsePad(s, PadBeforePrefix)
|
||
|
s = p.parseAffix(s)
|
||
|
s = p.parsePad(s, PadAfterPrefix)
|
||
|
|
||
|
s = p.parse(p.number, s)
|
||
|
|
||
|
s = p.parsePad(s, PadBeforeSuffix)
|
||
|
s = p.parseAffix(s)
|
||
|
s = p.parsePad(s, PadAfterSuffix)
|
||
|
return s
|
||
|
}
|
||
|
|
||
|
func (p *parser) parsePad(s string, f FormatFlag) (tail string) {
|
||
|
if len(s) >= 2 && s[0] == '*' {
|
||
|
r, sz := utf8.DecodeRuneInString(s[1:])
|
||
|
if p.PadRune != 0 {
|
||
|
p.err = errMultiplePadSpecifiers
|
||
|
} else {
|
||
|
p.Flags |= f
|
||
|
p.PadRune = r
|
||
|
}
|
||
|
return s[1+sz:]
|
||
|
}
|
||
|
return s
|
||
|
}
|
||
|
|
||
|
func (p *parser) parseAffix(s string) string {
|
||
|
x := len(p.buf)
|
||
|
p.buf = append(p.buf, 0) // placeholder for affix length
|
||
|
|
||
|
s = p.parse(p.affix, s)
|
||
|
|
||
|
n := len(p.buf) - x - 1
|
||
|
if n > 0xFF {
|
||
|
p.setError(errAffixTooLarge)
|
||
|
}
|
||
|
p.buf[x] = uint8(n)
|
||
|
return s
|
||
|
}
|
||
|
|
||
|
// state implements a state transition. It returns the new state. A state
|
||
|
// function may set an error on the parser or may simply return on an incorrect
|
||
|
// token and let the next phase fail.
|
||
|
type state func(r rune) state
|
||
|
|
||
|
// parse repeatedly applies a state function on the given string until a
|
||
|
// termination condition is reached.
|
||
|
func (p *parser) parse(fn state, s string) (tail string) {
|
||
|
for i, r := range s {
|
||
|
p.doNotTerminate = false
|
||
|
if fn = fn(r); fn == nil || p.err != nil {
|
||
|
return s[i:]
|
||
|
}
|
||
|
p.FormatWidth++
|
||
|
}
|
||
|
if p.doNotTerminate {
|
||
|
p.setError(errUnexpectedEnd)
|
||
|
}
|
||
|
return ""
|
||
|
}
|
||
|
|
||
|
func (p *parser) affix(r rune) state {
|
||
|
switch r {
|
||
|
case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
|
||
|
'#', '@', '.', '*', ',', ';':
|
||
|
return nil
|
||
|
case '\'':
|
||
|
return p.escape
|
||
|
case '%':
|
||
|
if p.Multiplier != 0 {
|
||
|
p.setError(errDuplicatePercentSign)
|
||
|
}
|
||
|
p.Multiplier = 100
|
||
|
case '\u2030': // ‰ Per mille
|
||
|
if p.Multiplier != 0 {
|
||
|
p.setError(errDuplicatePermilleSign)
|
||
|
}
|
||
|
p.Multiplier = 1000
|
||
|
// TODO: handle currency somehow: ¤, ¤¤, ¤¤¤, ¤¤¤¤
|
||
|
}
|
||
|
p.buf = append(p.buf, string(r)...)
|
||
|
return p.affix
|
||
|
}
|
||
|
|
||
|
func (p *parser) escape(r rune) state {
|
||
|
switch r {
|
||
|
case '\'':
|
||
|
return p.affix
|
||
|
default:
|
||
|
p.buf = append(p.buf, string(r)...)
|
||
|
}
|
||
|
return p.escape
|
||
|
}
|
||
|
|
||
|
// number parses a number. The BNF says the integer part should always have
|
||
|
// a '0', but that does not appear to be the case according to the rest of the
|
||
|
// documentation. We will allow having only '#' numbers.
|
||
|
func (p *parser) number(r rune) state {
|
||
|
switch r {
|
||
|
case '#':
|
||
|
p.groupingCount++
|
||
|
p.leadingSharps++
|
||
|
case '@':
|
||
|
p.groupingCount++
|
||
|
p.leadingSharps = 0
|
||
|
return p.sigDigits(r)
|
||
|
case ',':
|
||
|
if p.leadingSharps == 0 { // no leading commas
|
||
|
return nil
|
||
|
}
|
||
|
p.updateGrouping()
|
||
|
case 'E':
|
||
|
p.MaxIntegerDigits = uint8(p.leadingSharps)
|
||
|
return p.exponent
|
||
|
case '.': // allow ".##" etc.
|
||
|
p.updateGrouping()
|
||
|
return p.fraction
|
||
|
case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
|
||
|
return p.integer(r)
|
||
|
default:
|
||
|
return nil
|
||
|
}
|
||
|
return p.number
|
||
|
}
|
||
|
|
||
|
func (p *parser) integer(r rune) state {
|
||
|
if !('0' <= r && r <= '9') {
|
||
|
var next state
|
||
|
switch r {
|
||
|
case 'E':
|
||
|
if p.leadingSharps > 0 {
|
||
|
p.MaxIntegerDigits = uint8(p.leadingSharps) + p.MinIntegerDigits
|
||
|
}
|
||
|
next = p.exponent
|
||
|
case '.':
|
||
|
next = p.fraction
|
||
|
}
|
||
|
p.updateGrouping()
|
||
|
return next
|
||
|
}
|
||
|
p.RoundIncrement = p.RoundIncrement*10 + uint32(r-'0')
|
||
|
p.groupingCount++
|
||
|
p.MinIntegerDigits++
|
||
|
return p.integer
|
||
|
}
|
||
|
|
||
|
func (p *parser) sigDigits(r rune) state {
|
||
|
switch r {
|
||
|
case '@':
|
||
|
p.groupingCount++
|
||
|
p.MaxSignificantDigits++
|
||
|
p.MinSignificantDigits++
|
||
|
case '#':
|
||
|
return p.sigDigitsFinal(r)
|
||
|
case 'E':
|
||
|
p.updateGrouping()
|
||
|
return p.normalizeSigDigitsWithExponent()
|
||
|
default:
|
||
|
p.updateGrouping()
|
||
|
return nil
|
||
|
}
|
||
|
return p.sigDigits
|
||
|
}
|
||
|
|
||
|
func (p *parser) sigDigitsFinal(r rune) state {
|
||
|
switch r {
|
||
|
case '#':
|
||
|
p.groupingCount++
|
||
|
p.MaxSignificantDigits++
|
||
|
case 'E':
|
||
|
p.updateGrouping()
|
||
|
return p.normalizeSigDigitsWithExponent()
|
||
|
default:
|
||
|
p.updateGrouping()
|
||
|
return nil
|
||
|
}
|
||
|
return p.sigDigitsFinal
|
||
|
}
|
||
|
|
||
|
func (p *parser) normalizeSigDigitsWithExponent() state {
|
||
|
p.MinIntegerDigits, p.MaxIntegerDigits = 1, 1
|
||
|
p.MinFractionDigits = p.MinSignificantDigits - 1
|
||
|
p.MaxFractionDigits = p.MaxSignificantDigits - 1
|
||
|
p.MinSignificantDigits, p.MaxSignificantDigits = 0, 0
|
||
|
return p.exponent
|
||
|
}
|
||
|
|
||
|
func (p *parser) fraction(r rune) state {
|
||
|
switch r {
|
||
|
case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
|
||
|
p.RoundIncrement = p.RoundIncrement*10 + uint32(r-'0')
|
||
|
p.MinFractionDigits++
|
||
|
p.MaxFractionDigits++
|
||
|
case '#':
|
||
|
p.MaxFractionDigits++
|
||
|
case 'E':
|
||
|
if p.leadingSharps > 0 {
|
||
|
p.MaxIntegerDigits = uint8(p.leadingSharps) + p.MinIntegerDigits
|
||
|
}
|
||
|
return p.exponent
|
||
|
default:
|
||
|
return nil
|
||
|
}
|
||
|
return p.fraction
|
||
|
}
|
||
|
|
||
|
func (p *parser) exponent(r rune) state {
|
||
|
switch r {
|
||
|
case '+':
|
||
|
// Set mode and check it wasn't already set.
|
||
|
if p.Flags&AlwaysExpSign != 0 || p.MinExponentDigits > 0 {
|
||
|
break
|
||
|
}
|
||
|
p.Flags |= AlwaysExpSign
|
||
|
p.doNotTerminate = true
|
||
|
return p.exponent
|
||
|
case '0':
|
||
|
p.MinExponentDigits++
|
||
|
return p.exponent
|
||
|
}
|
||
|
// termination condition
|
||
|
if p.MinExponentDigits == 0 {
|
||
|
p.setError(errors.New("format: need at least one digit"))
|
||
|
}
|
||
|
return nil
|
||
|
}
|