OSDN Git Service

Hulk did something
[bytom/vapor.git] / vendor / github.com / pelletier / go-toml / lexer.go
1 // TOML lexer.
2 //
3 // Written using the principles developed by Rob Pike in
4 // http://www.youtube.com/watch?v=HxaD_trXwRE
5
6 package toml
7
8 import (
9         "bytes"
10         "errors"
11         "fmt"
12         "regexp"
13         "strconv"
14         "strings"
15 )
16
17 var dateRegexp *regexp.Regexp
18
19 // Define state functions
20 type tomlLexStateFn func() tomlLexStateFn
21
22 // Define lexer
23 type tomlLexer struct {
24         inputIdx          int
25         input             []rune // Textual source
26         currentTokenStart int
27         currentTokenStop  int
28         tokens            []token
29         depth             int
30         line              int
31         col               int
32         endbufferLine     int
33         endbufferCol      int
34 }
35
36 // Basic read operations on input
37
38 func (l *tomlLexer) read() rune {
39         r := l.peek()
40         if r == '\n' {
41                 l.endbufferLine++
42                 l.endbufferCol = 1
43         } else {
44                 l.endbufferCol++
45         }
46         l.inputIdx++
47         return r
48 }
49
50 func (l *tomlLexer) next() rune {
51         r := l.read()
52
53         if r != eof {
54                 l.currentTokenStop++
55         }
56         return r
57 }
58
59 func (l *tomlLexer) ignore() {
60         l.currentTokenStart = l.currentTokenStop
61         l.line = l.endbufferLine
62         l.col = l.endbufferCol
63 }
64
65 func (l *tomlLexer) skip() {
66         l.next()
67         l.ignore()
68 }
69
70 func (l *tomlLexer) fastForward(n int) {
71         for i := 0; i < n; i++ {
72                 l.next()
73         }
74 }
75
76 func (l *tomlLexer) emitWithValue(t tokenType, value string) {
77         l.tokens = append(l.tokens, token{
78                 Position: Position{l.line, l.col},
79                 typ:      t,
80                 val:      value,
81         })
82         l.ignore()
83 }
84
85 func (l *tomlLexer) emit(t tokenType) {
86         l.emitWithValue(t, string(l.input[l.currentTokenStart:l.currentTokenStop]))
87 }
88
89 func (l *tomlLexer) peek() rune {
90         if l.inputIdx >= len(l.input) {
91                 return eof
92         }
93         return l.input[l.inputIdx]
94 }
95
96 func (l *tomlLexer) peekString(size int) string {
97         maxIdx := len(l.input)
98         upperIdx := l.inputIdx + size // FIXME: potential overflow
99         if upperIdx > maxIdx {
100                 upperIdx = maxIdx
101         }
102         return string(l.input[l.inputIdx:upperIdx])
103 }
104
105 func (l *tomlLexer) follow(next string) bool {
106         return next == l.peekString(len(next))
107 }
108
109 // Error management
110
111 func (l *tomlLexer) errorf(format string, args ...interface{}) tomlLexStateFn {
112         l.tokens = append(l.tokens, token{
113                 Position: Position{l.line, l.col},
114                 typ:      tokenError,
115                 val:      fmt.Sprintf(format, args...),
116         })
117         return nil
118 }
119
120 // State functions
121
122 func (l *tomlLexer) lexVoid() tomlLexStateFn {
123         for {
124                 next := l.peek()
125                 switch next {
126                 case '[':
127                         return l.lexTableKey
128                 case '#':
129                         return l.lexComment(l.lexVoid)
130                 case '=':
131                         return l.lexEqual
132                 case '\r':
133                         fallthrough
134                 case '\n':
135                         l.skip()
136                         continue
137                 }
138
139                 if isSpace(next) {
140                         l.skip()
141                 }
142
143                 if l.depth > 0 {
144                         return l.lexRvalue
145                 }
146
147                 if isKeyStartChar(next) {
148                         return l.lexKey
149                 }
150
151                 if next == eof {
152                         l.next()
153                         break
154                 }
155         }
156
157         l.emit(tokenEOF)
158         return nil
159 }
160
161 func (l *tomlLexer) lexRvalue() tomlLexStateFn {
162         for {
163                 next := l.peek()
164                 switch next {
165                 case '.':
166                         return l.errorf("cannot start float with a dot")
167                 case '=':
168                         return l.lexEqual
169                 case '[':
170                         l.depth++
171                         return l.lexLeftBracket
172                 case ']':
173                         l.depth--
174                         return l.lexRightBracket
175                 case '{':
176                         return l.lexLeftCurlyBrace
177                 case '}':
178                         return l.lexRightCurlyBrace
179                 case '#':
180                         return l.lexComment(l.lexRvalue)
181                 case '"':
182                         return l.lexString
183                 case '\'':
184                         return l.lexLiteralString
185                 case ',':
186                         return l.lexComma
187                 case '\r':
188                         fallthrough
189                 case '\n':
190                         l.skip()
191                         if l.depth == 0 {
192                                 return l.lexVoid
193                         }
194                         return l.lexRvalue
195                 case '_':
196                         return l.errorf("cannot start number with underscore")
197                 }
198
199                 if l.follow("true") {
200                         return l.lexTrue
201                 }
202
203                 if l.follow("false") {
204                         return l.lexFalse
205                 }
206
207                 if isSpace(next) {
208                         l.skip()
209                         continue
210                 }
211
212                 if next == eof {
213                         l.next()
214                         break
215                 }
216
217                 possibleDate := l.peekString(35)
218                 dateMatch := dateRegexp.FindString(possibleDate)
219                 if dateMatch != "" {
220                         l.fastForward(len(dateMatch))
221                         return l.lexDate
222                 }
223
224                 if next == '+' || next == '-' || isDigit(next) {
225                         return l.lexNumber
226                 }
227
228                 if isAlphanumeric(next) {
229                         return l.lexKey
230                 }
231
232                 return l.errorf("no value can start with %c", next)
233         }
234
235         l.emit(tokenEOF)
236         return nil
237 }
238
239 func (l *tomlLexer) lexLeftCurlyBrace() tomlLexStateFn {
240         l.next()
241         l.emit(tokenLeftCurlyBrace)
242         return l.lexRvalue
243 }
244
245 func (l *tomlLexer) lexRightCurlyBrace() tomlLexStateFn {
246         l.next()
247         l.emit(tokenRightCurlyBrace)
248         return l.lexRvalue
249 }
250
251 func (l *tomlLexer) lexDate() tomlLexStateFn {
252         l.emit(tokenDate)
253         return l.lexRvalue
254 }
255
256 func (l *tomlLexer) lexTrue() tomlLexStateFn {
257         l.fastForward(4)
258         l.emit(tokenTrue)
259         return l.lexRvalue
260 }
261
262 func (l *tomlLexer) lexFalse() tomlLexStateFn {
263         l.fastForward(5)
264         l.emit(tokenFalse)
265         return l.lexRvalue
266 }
267
268 func (l *tomlLexer) lexEqual() tomlLexStateFn {
269         l.next()
270         l.emit(tokenEqual)
271         return l.lexRvalue
272 }
273
274 func (l *tomlLexer) lexComma() tomlLexStateFn {
275         l.next()
276         l.emit(tokenComma)
277         return l.lexRvalue
278 }
279
280 func (l *tomlLexer) lexKey() tomlLexStateFn {
281         growingString := ""
282
283         for r := l.peek(); isKeyChar(r) || r == '\n' || r == '\r'; r = l.peek() {
284                 if r == '"' {
285                         l.next()
286                         str, err := l.lexStringAsString(`"`, false, true)
287                         if err != nil {
288                                 return l.errorf(err.Error())
289                         }
290                         growingString += `"` + str + `"`
291                         l.next()
292                         continue
293                 } else if r == '\n' {
294                         return l.errorf("keys cannot contain new lines")
295                 } else if isSpace(r) {
296                         break
297                 } else if !isValidBareChar(r) {
298                         return l.errorf("keys cannot contain %c character", r)
299                 }
300                 growingString += string(r)
301                 l.next()
302         }
303         l.emitWithValue(tokenKey, growingString)
304         return l.lexVoid
305 }
306
307 func (l *tomlLexer) lexComment(previousState tomlLexStateFn) tomlLexStateFn {
308         return func() tomlLexStateFn {
309                 for next := l.peek(); next != '\n' && next != eof; next = l.peek() {
310                         if next == '\r' && l.follow("\r\n") {
311                                 break
312                         }
313                         l.next()
314                 }
315                 l.ignore()
316                 return previousState
317         }
318 }
319
320 func (l *tomlLexer) lexLeftBracket() tomlLexStateFn {
321         l.next()
322         l.emit(tokenLeftBracket)
323         return l.lexRvalue
324 }
325
326 func (l *tomlLexer) lexLiteralStringAsString(terminator string, discardLeadingNewLine bool) (string, error) {
327         growingString := ""
328
329         if discardLeadingNewLine {
330                 if l.follow("\r\n") {
331                         l.skip()
332                         l.skip()
333                 } else if l.peek() == '\n' {
334                         l.skip()
335                 }
336         }
337
338         // find end of string
339         for {
340                 if l.follow(terminator) {
341                         return growingString, nil
342                 }
343
344                 next := l.peek()
345                 if next == eof {
346                         break
347                 }
348                 growingString += string(l.next())
349         }
350
351         return "", errors.New("unclosed string")
352 }
353
354 func (l *tomlLexer) lexLiteralString() tomlLexStateFn {
355         l.skip()
356
357         // handle special case for triple-quote
358         terminator := "'"
359         discardLeadingNewLine := false
360         if l.follow("''") {
361                 l.skip()
362                 l.skip()
363                 terminator = "'''"
364                 discardLeadingNewLine = true
365         }
366
367         str, err := l.lexLiteralStringAsString(terminator, discardLeadingNewLine)
368         if err != nil {
369                 return l.errorf(err.Error())
370         }
371
372         l.emitWithValue(tokenString, str)
373         l.fastForward(len(terminator))
374         l.ignore()
375         return l.lexRvalue
376 }
377
378 // Lex a string and return the results as a string.
379 // Terminator is the substring indicating the end of the token.
380 // The resulting string does not include the terminator.
381 func (l *tomlLexer) lexStringAsString(terminator string, discardLeadingNewLine, acceptNewLines bool) (string, error) {
382         growingString := ""
383
384         if discardLeadingNewLine {
385                 if l.follow("\r\n") {
386                         l.skip()
387                         l.skip()
388                 } else if l.peek() == '\n' {
389                         l.skip()
390                 }
391         }
392
393         for {
394                 if l.follow(terminator) {
395                         return growingString, nil
396                 }
397
398                 if l.follow("\\") {
399                         l.next()
400                         switch l.peek() {
401                         case '\r':
402                                 fallthrough
403                         case '\n':
404                                 fallthrough
405                         case '\t':
406                                 fallthrough
407                         case ' ':
408                                 // skip all whitespace chars following backslash
409                                 for strings.ContainsRune("\r\n\t ", l.peek()) {
410                                         l.next()
411                                 }
412                         case '"':
413                                 growingString += "\""
414                                 l.next()
415                         case 'n':
416                                 growingString += "\n"
417                                 l.next()
418                         case 'b':
419                                 growingString += "\b"
420                                 l.next()
421                         case 'f':
422                                 growingString += "\f"
423                                 l.next()
424                         case '/':
425                                 growingString += "/"
426                                 l.next()
427                         case 't':
428                                 growingString += "\t"
429                                 l.next()
430                         case 'r':
431                                 growingString += "\r"
432                                 l.next()
433                         case '\\':
434                                 growingString += "\\"
435                                 l.next()
436                         case 'u':
437                                 l.next()
438                                 code := ""
439                                 for i := 0; i < 4; i++ {
440                                         c := l.peek()
441                                         if !isHexDigit(c) {
442                                                 return "", errors.New("unfinished unicode escape")
443                                         }
444                                         l.next()
445                                         code = code + string(c)
446                                 }
447                                 intcode, err := strconv.ParseInt(code, 16, 32)
448                                 if err != nil {
449                                         return "", errors.New("invalid unicode escape: \\u" + code)
450                                 }
451                                 growingString += string(rune(intcode))
452                         case 'U':
453                                 l.next()
454                                 code := ""
455                                 for i := 0; i < 8; i++ {
456                                         c := l.peek()
457                                         if !isHexDigit(c) {
458                                                 return "", errors.New("unfinished unicode escape")
459                                         }
460                                         l.next()
461                                         code = code + string(c)
462                                 }
463                                 intcode, err := strconv.ParseInt(code, 16, 64)
464                                 if err != nil {
465                                         return "", errors.New("invalid unicode escape: \\U" + code)
466                                 }
467                                 growingString += string(rune(intcode))
468                         default:
469                                 return "", errors.New("invalid escape sequence: \\" + string(l.peek()))
470                         }
471                 } else {
472                         r := l.peek()
473
474                         if 0x00 <= r && r <= 0x1F && !(acceptNewLines && (r == '\n' || r == '\r')) {
475                                 return "", fmt.Errorf("unescaped control character %U", r)
476                         }
477                         l.next()
478                         growingString += string(r)
479                 }
480
481                 if l.peek() == eof {
482                         break
483                 }
484         }
485
486         return "", errors.New("unclosed string")
487 }
488
489 func (l *tomlLexer) lexString() tomlLexStateFn {
490         l.skip()
491
492         // handle special case for triple-quote
493         terminator := `"`
494         discardLeadingNewLine := false
495         acceptNewLines := false
496         if l.follow(`""`) {
497                 l.skip()
498                 l.skip()
499                 terminator = `"""`
500                 discardLeadingNewLine = true
501                 acceptNewLines = true
502         }
503
504         str, err := l.lexStringAsString(terminator, discardLeadingNewLine, acceptNewLines)
505
506         if err != nil {
507                 return l.errorf(err.Error())
508         }
509
510         l.emitWithValue(tokenString, str)
511         l.fastForward(len(terminator))
512         l.ignore()
513         return l.lexRvalue
514 }
515
516 func (l *tomlLexer) lexTableKey() tomlLexStateFn {
517         l.next()
518
519         if l.peek() == '[' {
520                 // token '[[' signifies an array of tables
521                 l.next()
522                 l.emit(tokenDoubleLeftBracket)
523                 return l.lexInsideTableArrayKey
524         }
525         // vanilla table key
526         l.emit(tokenLeftBracket)
527         return l.lexInsideTableKey
528 }
529
530 func (l *tomlLexer) lexInsideTableArrayKey() tomlLexStateFn {
531         for r := l.peek(); r != eof; r = l.peek() {
532                 switch r {
533                 case ']':
534                         if l.currentTokenStop > l.currentTokenStart {
535                                 l.emit(tokenKeyGroupArray)
536                         }
537                         l.next()
538                         if l.peek() != ']' {
539                                 break
540                         }
541                         l.next()
542                         l.emit(tokenDoubleRightBracket)
543                         return l.lexVoid
544                 case '[':
545                         return l.errorf("table array key cannot contain ']'")
546                 default:
547                         l.next()
548                 }
549         }
550         return l.errorf("unclosed table array key")
551 }
552
553 func (l *tomlLexer) lexInsideTableKey() tomlLexStateFn {
554         for r := l.peek(); r != eof; r = l.peek() {
555                 switch r {
556                 case ']':
557                         if l.currentTokenStop > l.currentTokenStart {
558                                 l.emit(tokenKeyGroup)
559                         }
560                         l.next()
561                         l.emit(tokenRightBracket)
562                         return l.lexVoid
563                 case '[':
564                         return l.errorf("table key cannot contain ']'")
565                 default:
566                         l.next()
567                 }
568         }
569         return l.errorf("unclosed table key")
570 }
571
572 func (l *tomlLexer) lexRightBracket() tomlLexStateFn {
573         l.next()
574         l.emit(tokenRightBracket)
575         return l.lexRvalue
576 }
577
578 func (l *tomlLexer) lexNumber() tomlLexStateFn {
579         r := l.peek()
580         if r == '+' || r == '-' {
581                 l.next()
582         }
583         pointSeen := false
584         expSeen := false
585         digitSeen := false
586         for {
587                 next := l.peek()
588                 if next == '.' {
589                         if pointSeen {
590                                 return l.errorf("cannot have two dots in one float")
591                         }
592                         l.next()
593                         if !isDigit(l.peek()) {
594                                 return l.errorf("float cannot end with a dot")
595                         }
596                         pointSeen = true
597                 } else if next == 'e' || next == 'E' {
598                         expSeen = true
599                         l.next()
600                         r := l.peek()
601                         if r == '+' || r == '-' {
602                                 l.next()
603                         }
604                 } else if isDigit(next) {
605                         digitSeen = true
606                         l.next()
607                 } else if next == '_' {
608                         l.next()
609                 } else {
610                         break
611                 }
612                 if pointSeen && !digitSeen {
613                         return l.errorf("cannot start float with a dot")
614                 }
615         }
616
617         if !digitSeen {
618                 return l.errorf("no digit in that number")
619         }
620         if pointSeen || expSeen {
621                 l.emit(tokenFloat)
622         } else {
623                 l.emit(tokenInteger)
624         }
625         return l.lexRvalue
626 }
627
628 func (l *tomlLexer) run() {
629         for state := l.lexVoid; state != nil; {
630                 state = state()
631         }
632 }
633
634 func init() {
635         dateRegexp = regexp.MustCompile(`^\d{1,4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(\.\d{1,9})?(Z|[+-]\d{2}:\d{2})`)
636 }
637
638 // Entry point
639 func lexToml(inputBytes []byte) []token {
640         runes := bytes.Runes(inputBytes)
641         l := &tomlLexer{
642                 input:         runes,
643                 tokens:        make([]token, 0, 256),
644                 line:          1,
645                 col:           1,
646                 endbufferLine: 1,
647                 endbufferCol:  1,
648         }
649         l.run()
650         return l.tokens
651 }