vendor/github.com/magiconair/properties/lex.go

   1 // Copyright 2017 Frank Schroeder. All rights reserved.
   2 // Use of this source code is governed by a BSD-style
   3 // license that can be found in the LICENSE file.
   4 //
   5 // Parts of the lexer are from the template/text/parser package
   6 // For these parts the following applies:
   7 //
   8 // Copyright 2011 The Go Authors. All rights reserved.
   9 // Use of this source code is governed by a BSD-style
  10 // license that can be found in the LICENSE file of the go 1.2
  11 // distribution.
  12
  13 package properties
  14
  15 import (
  16         "fmt"
  17         "strconv"
  18         "strings"
  19         "unicode/utf8"
  20 )
  21
  22 // item represents a token or text string returned from the scanner.
  23 type item struct {
  24         typ itemType // The type of this item.
  25         pos int      // The starting position, in bytes, of this item in the input string.
  26         val string   // The value of this item.
  27 }
  28
  29 func (i item) String() string {
  30         switch {
  31         case i.typ == itemEOF:
  32                 return "EOF"
  33         case i.typ == itemError:
  34                 return i.val
  35         case len(i.val) > 10:
  36                 return fmt.Sprintf("%.10q...", i.val)
  37         }
  38         return fmt.Sprintf("%q", i.val)
  39 }
  40
  41 // itemType identifies the type of lex items.
  42 type itemType int
  43
  44 const (
  45         itemError itemType = iota // error occurred; value is text of error
  46         itemEOF
  47         itemKey     // a key
  48         itemValue   // a value
  49         itemComment // a comment
  50 )
  51
  52 // defines a constant for EOF
  53 const eof = -1
  54
  55 // permitted whitespace characters space, FF and TAB
  56 const whitespace = " \f\t"
  57
  58 // stateFn represents the state of the scanner as a function that returns the next state.
  59 type stateFn func(*lexer) stateFn
  60
  61 // lexer holds the state of the scanner.
  62 type lexer struct {
  63         input   string    // the string being scanned
  64         state   stateFn   // the next lexing function to enter
  65         pos     int       // current position in the input
  66         start   int       // start position of this item
  67         width   int       // width of last rune read from input
  68         lastPos int       // position of most recent item returned by nextItem
  69         runes   []rune    // scanned runes for this item
  70         items   chan item // channel of scanned items
  71 }
  72
  73 // next returns the next rune in the input.
  74 func (l *lexer) next() rune {
  75         if l.pos >= len(l.input) {
  76                 l.width = 0
  77                 return eof
  78         }
  79         r, w := utf8.DecodeRuneInString(l.input[l.pos:])
  80         l.width = w
  81         l.pos += l.width
  82         return r
  83 }
  84
  85 // peek returns but does not consume the next rune in the input.
  86 func (l *lexer) peek() rune {
  87         r := l.next()
  88         l.backup()
  89         return r
  90 }
  91
  92 // backup steps back one rune. Can only be called once per call of next.
  93 func (l *lexer) backup() {
  94         l.pos -= l.width
  95 }
  96
  97 // emit passes an item back to the client.
  98 func (l *lexer) emit(t itemType) {
  99         i := item{t, l.start, string(l.runes)}
 100         l.items <- i
 101         l.start = l.pos
 102         l.runes = l.runes[:0]
 103 }
 104
 105 // ignore skips over the pending input before this point.
 106 func (l *lexer) ignore() {
 107         l.start = l.pos
 108 }
 109
 110 // appends the rune to the current value
 111 func (l *lexer) appendRune(r rune) {
 112         l.runes = append(l.runes, r)
 113 }
 114
 115 // accept consumes the next rune if it's from the valid set.
 116 func (l *lexer) accept(valid string) bool {
 117         if strings.ContainsRune(valid, l.next()) {
 118                 return true
 119         }
 120         l.backup()
 121         return false
 122 }
 123
 124 // acceptRun consumes a run of runes from the valid set.
 125 func (l *lexer) acceptRun(valid string) {
 126         for strings.ContainsRune(valid, l.next()) {
 127         }
 128         l.backup()
 129 }
 130
 131 // acceptRunUntil consumes a run of runes up to a terminator.
 132 func (l *lexer) acceptRunUntil(term rune) {
 133         for term != l.next() {
 134         }
 135         l.backup()
 136 }
 137
 138 // hasText returns true if the current parsed text is not empty.
 139 func (l *lexer) isNotEmpty() bool {
 140         return l.pos > l.start
 141 }
 142
 143 // lineNumber reports which line we're on, based on the position of
 144 // the previous item returned by nextItem. Doing it this way
 145 // means we don't have to worry about peek double counting.
 146 func (l *lexer) lineNumber() int {
 147         return 1 + strings.Count(l.input[:l.lastPos], "\n")
 148 }
 149
 150 // errorf returns an error token and terminates the scan by passing
 151 // back a nil pointer that will be the next state, terminating l.nextItem.
 152 func (l *lexer) errorf(format string, args ...interface{}) stateFn {
 153         l.items <- item{itemError, l.start, fmt.Sprintf(format, args...)}
 154         return nil
 155 }
 156
 157 // nextItem returns the next item from the input.
 158 func (l *lexer) nextItem() item {
 159         i := <-l.items
 160         l.lastPos = i.pos
 161         return i
 162 }
 163
 164 // lex creates a new scanner for the input string.
 165 func lex(input string) *lexer {
 166         l := &lexer{
 167                 input: input,
 168                 items: make(chan item),
 169                 runes: make([]rune, 0, 32),
 170         }
 171         go l.run()
 172         return l
 173 }
 174
 175 // run runs the state machine for the lexer.
 176 func (l *lexer) run() {
 177         for l.state = lexBeforeKey(l); l.state != nil; {
 178                 l.state = l.state(l)
 179         }
 180 }
 181
 182 // state functions
 183
 184 // lexBeforeKey scans until a key begins.
 185 func lexBeforeKey(l *lexer) stateFn {
 186         switch r := l.next(); {
 187         case isEOF(r):
 188                 l.emit(itemEOF)
 189                 return nil
 190
 191         case isEOL(r):
 192                 l.ignore()
 193                 return lexBeforeKey
 194
 195         case isComment(r):
 196                 return lexComment
 197
 198         case isWhitespace(r):
 199                 l.acceptRun(whitespace)
 200                 l.ignore()
 201                 return lexKey
 202
 203         default:
 204                 l.backup()
 205                 return lexKey
 206         }
 207 }
 208
 209 // lexComment scans a comment line. The comment character has already been scanned.
 210 func lexComment(l *lexer) stateFn {
 211         l.acceptRun(whitespace)
 212         l.ignore()
 213         for {
 214                 switch r := l.next(); {
 215                 case isEOF(r):
 216                         l.ignore()
 217                         l.emit(itemEOF)
 218                         return nil
 219                 case isEOL(r):
 220                         l.emit(itemComment)
 221                         return lexBeforeKey
 222                 default:
 223                         l.appendRune(r)
 224                 }
 225         }
 226 }
 227
 228 // lexKey scans the key up to a delimiter
 229 func lexKey(l *lexer) stateFn {
 230         var r rune
 231
 232 Loop:
 233         for {
 234                 switch r = l.next(); {
 235
 236                 case isEscape(r):
 237                         err := l.scanEscapeSequence()
 238                         if err != nil {
 239                                 return l.errorf(err.Error())
 240                         }
 241
 242                 case isEndOfKey(r):
 243                         l.backup()
 244                         break Loop
 245
 246                 case isEOF(r):
 247                         break Loop
 248
 249                 default:
 250                         l.appendRune(r)
 251                 }
 252         }
 253
 254         if len(l.runes) > 0 {
 255                 l.emit(itemKey)
 256         }
 257
 258         if isEOF(r) {
 259                 l.emit(itemEOF)
 260                 return nil
 261         }
 262
 263         return lexBeforeValue
 264 }
 265
 266 // lexBeforeValue scans the delimiter between key and value.
 267 // Leading and trailing whitespace is ignored.
 268 // We expect to be just after the key.
 269 func lexBeforeValue(l *lexer) stateFn {
 270         l.acceptRun(whitespace)
 271         l.accept(":=")
 272         l.acceptRun(whitespace)
 273         l.ignore()
 274         return lexValue
 275 }
 276
 277 // lexValue scans text until the end of the line. We expect to be just after the delimiter.
 278 func lexValue(l *lexer) stateFn {
 279         for {
 280                 switch r := l.next(); {
 281                 case isEscape(r):
 282                         if isEOL(l.peek()) {
 283                                 l.next()
 284                                 l.acceptRun(whitespace)
 285                         } else {
 286                                 err := l.scanEscapeSequence()
 287                                 if err != nil {
 288                                         return l.errorf(err.Error())
 289                                 }
 290                         }
 291
 292                 case isEOL(r):
 293                         l.emit(itemValue)
 294                         l.ignore()
 295                         return lexBeforeKey
 296
 297                 case isEOF(r):
 298                         l.emit(itemValue)
 299                         l.emit(itemEOF)
 300                         return nil
 301
 302                 default:
 303                         l.appendRune(r)
 304                 }
 305         }
 306 }
 307
 308 // scanEscapeSequence scans either one of the escaped characters
 309 // or a unicode literal. We expect to be after the escape character.
 310 func (l *lexer) scanEscapeSequence() error {
 311         switch r := l.next(); {
 312
 313         case isEscapedCharacter(r):
 314                 l.appendRune(decodeEscapedCharacter(r))
 315                 return nil
 316
 317         case atUnicodeLiteral(r):
 318                 return l.scanUnicodeLiteral()
 319
 320         case isEOF(r):
 321                 return fmt.Errorf("premature EOF")
 322
 323         // silently drop the escape character and append the rune as is
 324         default:
 325                 l.appendRune(r)
 326                 return nil
 327         }
 328 }
 329
 330 // scans a unicode literal in the form \uXXXX. We expect to be after the \u.
 331 func (l *lexer) scanUnicodeLiteral() error {
 332         // scan the digits
 333         d := make([]rune, 4)
 334         for i := 0; i < 4; i++ {
 335                 d[i] = l.next()
 336                 if d[i] == eof || !strings.ContainsRune("0123456789abcdefABCDEF", d[i]) {
 337                         return fmt.Errorf("invalid unicode literal")
 338                 }
 339         }
 340
 341         // decode the digits into a rune
 342         r, err := strconv.ParseInt(string(d), 16, 0)
 343         if err != nil {
 344                 return err
 345         }
 346
 347         l.appendRune(rune(r))
 348         return nil
 349 }
 350
 351 // decodeEscapedCharacter returns the unescaped rune. We expect to be after the escape character.
 352 func decodeEscapedCharacter(r rune) rune {
 353         switch r {
 354         case 'f':
 355                 return '\f'
 356         case 'n':
 357                 return '\n'
 358         case 'r':
 359                 return '\r'
 360         case 't':
 361                 return '\t'
 362         default:
 363                 return r
 364         }
 365 }
 366
 367 // atUnicodeLiteral reports whether we are at a unicode literal.
 368 // The escape character has already been consumed.
 369 func atUnicodeLiteral(r rune) bool {
 370         return r == 'u'
 371 }
 372
 373 // isComment reports whether we are at the start of a comment.
 374 func isComment(r rune) bool {
 375         return r == '#' || r == '!'
 376 }
 377
 378 // isEndOfKey reports whether the rune terminates the current key.
 379 func isEndOfKey(r rune) bool {
 380         return strings.ContainsRune(" \f\t\r\n:=", r)
 381 }
 382
 383 // isEOF reports whether we are at EOF.
 384 func isEOF(r rune) bool {
 385         return r == eof
 386 }
 387
 388 // isEOL reports whether we are at a new line character.
 389 func isEOL(r rune) bool {
 390         return r == '\n' || r == '\r'
 391 }
 392
 393 // isEscape reports whether the rune is the escape character which
 394 // prefixes unicode literals and other escaped characters.
 395 func isEscape(r rune) bool {
 396         return r == '\\'
 397 }
 398
 399 // isEscapedCharacter reports whether we are at one of the characters that need escaping.
 400 // The escape character has already been consumed.
 401 func isEscapedCharacter(r rune) bool {
 402         return strings.ContainsRune(" :=fnrt", r)
 403 }
 404
 405 // isWhitespace reports whether the rune is a whitespace character.
 406 func isWhitespace(r rune) bool {
 407         return strings.ContainsRune(whitespace, r)
 408 }