// TOML JSONPath lexer. // // Written using the principles developed by Rob Pike in // http://www.youtube.com/watch?v=HxaD_trXwRE package query import ( "fmt" "github.com/pelletier/go-toml" "strconv" "strings" "unicode/utf8" ) // Lexer state function type queryLexStateFn func() queryLexStateFn // Lexer definition type queryLexer struct { input string start int pos int width int tokens chan token depth int line int col int stringTerm string } func (l *queryLexer) run() { for state := l.lexVoid; state != nil; { state = state() } close(l.tokens) } func (l *queryLexer) nextStart() { // iterate by runes (utf8 characters) // search for newlines and advance line/col counts for i := l.start; i < l.pos; { r, width := utf8.DecodeRuneInString(l.input[i:]) if r == '\n' { l.line++ l.col = 1 } else { l.col++ } i += width } // advance start position to next token l.start = l.pos } func (l *queryLexer) emit(t tokenType) { l.tokens <- token{ Position: toml.Position{Line: l.line, Col: l.col}, typ: t, val: l.input[l.start:l.pos], } l.nextStart() } func (l *queryLexer) emitWithValue(t tokenType, value string) { l.tokens <- token{ Position: toml.Position{Line: l.line, Col: l.col}, typ: t, val: value, } l.nextStart() } func (l *queryLexer) next() rune { if l.pos >= len(l.input) { l.width = 0 return eof } var r rune r, l.width = utf8.DecodeRuneInString(l.input[l.pos:]) l.pos += l.width return r } func (l *queryLexer) ignore() { l.nextStart() } func (l *queryLexer) backup() { l.pos -= l.width } func (l *queryLexer) errorf(format string, args ...interface{}) queryLexStateFn { l.tokens <- token{ Position: toml.Position{Line: l.line, Col: l.col}, typ: tokenError, val: fmt.Sprintf(format, args...), } return nil } func (l *queryLexer) peek() rune { r := l.next() l.backup() return r } func (l *queryLexer) accept(valid string) bool { if strings.ContainsRune(valid, l.next()) { return true } l.backup() return false } func (l *queryLexer) follow(next string) bool { return strings.HasPrefix(l.input[l.pos:], next) } func (l *queryLexer) lexVoid() queryLexStateFn { for { next := l.peek() switch next { case '$': l.pos++ l.emit(tokenDollar) continue case '.': if l.follow("..") { l.pos += 2 l.emit(tokenDotDot) } else { l.pos++ l.emit(tokenDot) } continue case '[': l.pos++ l.emit(tokenLeftBracket) continue case ']': l.pos++ l.emit(tokenRightBracket) continue case ',': l.pos++ l.emit(tokenComma) continue case '*': l.pos++ l.emit(tokenStar) continue case '(': l.pos++ l.emit(tokenLeftParen) continue case ')': l.pos++ l.emit(tokenRightParen) continue case '?': l.pos++ l.emit(tokenQuestion) continue case ':': l.pos++ l.emit(tokenColon) continue case '\'': l.ignore() l.stringTerm = string(next) return l.lexString case '"': l.ignore() l.stringTerm = string(next) return l.lexString } if isSpace(next) { l.next() l.ignore() continue } if isAlphanumeric(next) { return l.lexKey } if next == '+' || next == '-' || isDigit(next) { return l.lexNumber } if l.next() == eof { break } return l.errorf("unexpected char: '%v'", next) } l.emit(tokenEOF) return nil } func (l *queryLexer) lexKey() queryLexStateFn { for { next := l.peek() if !isAlphanumeric(next) { l.emit(tokenKey) return l.lexVoid } if l.next() == eof { break } } l.emit(tokenEOF) return nil } func (l *queryLexer) lexString() queryLexStateFn { l.pos++ l.ignore() growingString := "" for { if l.follow(l.stringTerm) { l.emitWithValue(tokenString, growingString) l.pos++ l.ignore() return l.lexVoid } if l.follow("\\\"") { l.pos++ growingString += "\"" } else if l.follow("\\'") { l.pos++ growingString += "'" } else if l.follow("\\n") { l.pos++ growingString += "\n" } else if l.follow("\\b") { l.pos++ growingString += "\b" } else if l.follow("\\f") { l.pos++ growingString += "\f" } else if l.follow("\\/") { l.pos++ growingString += "/" } else if l.follow("\\t") { l.pos++ growingString += "\t" } else if l.follow("\\r") { l.pos++ growingString += "\r" } else if l.follow("\\\\") { l.pos++ growingString += "\\" } else if l.follow("\\u") { l.pos += 2 code := "" for i := 0; i < 4; i++ { c := l.peek() l.pos++ if !isHexDigit(c) { return l.errorf("unfinished unicode escape") } code = code + string(c) } l.pos-- intcode, err := strconv.ParseInt(code, 16, 32) if err != nil { return l.errorf("invalid unicode escape: \\u" + code) } growingString += string(rune(intcode)) } else if l.follow("\\U") { l.pos += 2 code := "" for i := 0; i < 8; i++ { c := l.peek() l.pos++ if !isHexDigit(c) { return l.errorf("unfinished unicode escape") } code = code + string(c) } l.pos-- intcode, err := strconv.ParseInt(code, 16, 32) if err != nil { return l.errorf("invalid unicode escape: \\u" + code) } growingString += string(rune(intcode)) } else if l.follow("\\") { l.pos++ return l.errorf("invalid escape sequence: \\" + string(l.peek())) } else { growingString += string(l.peek()) } if l.next() == eof { break } } return l.errorf("unclosed string") } func (l *queryLexer) lexNumber() queryLexStateFn { l.ignore() if !l.accept("+") { l.accept("-") } pointSeen := false digitSeen := false for { next := l.next() if next == '.' { if pointSeen { return l.errorf("cannot have two dots in one float") } if !isDigit(l.peek()) { return l.errorf("float cannot end with a dot") } pointSeen = true } else if isDigit(next) { digitSeen = true } else { l.backup() break } if pointSeen && !digitSeen { return l.errorf("cannot start float with a dot") } } if !digitSeen { return l.errorf("no digit in that number") } if pointSeen { l.emit(tokenFloat) } else { l.emit(tokenInteger) } return l.lexVoid } // Entry point func lexQuery(input string) chan token { l := &queryLexer{ input: input, tokens: make(chan token), line: 1, col: 1, } go l.run() return l.tokens }