12 tokInvalid token = iota
22 func (t token) String() string {
41 return "unknown token"
44 // A scanner holds the scanner's internal state while processing
51 ch rune // current character
52 offset int // character offset
53 rdOffset int // reading offset (position after current character)
56 func (s *scanner) init(src []byte) {
60 s.next() // advance onto the first input rune
63 const bom = 0xFEFF // byte order mark, always prohibited
65 // next reads the next Unicode char into s.ch.
66 // s.ch < 0 means end-of-file.
67 func (s *scanner) next() {
68 if s.rdOffset < len(s.src) {
70 r, w := rune(s.src[s.rdOffset]), 1
73 s.error(s.offset+1, "illegal character NUL")
74 case r >= utf8.RuneSelf:
76 r, w = utf8.DecodeRune(s.src[s.rdOffset:])
77 if r == utf8.RuneError && w == 1 {
78 s.error(s.offset, "illegal UTF-8 encoding")
80 s.error(s.offset, "illegal byte order mark")
91 func (s *scanner) error(offs int, msg string) {
92 panic(parseError{pos: offs, msg: msg})
95 func isLetter(ch rune) bool {
96 return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_' || ch >= utf8.RuneSelf && unicode.IsLetter(ch)
99 func isDigit(ch rune) bool {
100 return '0' <= ch && ch <= '9' || ch >= utf8.RuneSelf && unicode.IsDigit(ch)
103 func (s *scanner) scanIdentifier() string {
105 for isLetter(s.ch) || isDigit(s.ch) {
108 return string(s.src[offs:s.offset])
111 func digitVal(ch rune) int {
113 case '0' <= ch && ch <= '9':
115 case 'a' <= ch && ch <= 'f':
116 return int(ch - 'a' + 10)
117 case 'A' <= ch && ch <= 'F':
118 return int(ch - 'A' + 10)
120 return 16 // larger than any legal digit val
123 func (s *scanner) scanMantissa(base int) {
124 for digitVal(s.ch) < base {
129 func (s *scanner) scanNumber() {
130 // digitVal(s.ch) < 10
135 if s.ch == 'x' || s.ch == 'X' {
139 if s.offset-offs <= 2 {
140 // only scanned "0x" or "0X"
141 s.error(offs, "illegal hexadecimal number")
143 } else if digitVal(s.ch) < 10 {
144 s.error(offs, "illegal leading 0 in number")
152 func (s *scanner) scanString() {
153 // "'" opening already consumed
159 s.error(offs, "string literal not terminated")
167 s.error(offs, "illegal backslash in string literal")
172 func (s *scanner) skipWhitespace() {
173 for s.ch == ' ' || s.ch == '\t' {
178 func (s *scanner) Scan() (pos int, tok token, lit string) {
181 // current token start
184 // determine token value
187 lit = s.scanIdentifier()
195 case '0' <= ch && ch <= '9':
199 s.next() // always make progress
202 return pos, tokEOF, ""
206 case '.', '(', ')', '=':
210 if s.offset-pos <= 1 {
211 s.error(pos, "illegal $ character")
215 s.error(pos, fmt.Sprintf("illegal character %q", ch))
218 lit = string(s.src[pos:s.offset])