1 // Package parser implements a parser for HCL (HashiCorp Configuration
11 "github.com/hashicorp/hcl/hcl/ast"
12 "github.com/hashicorp/hcl/hcl/scanner"
13 "github.com/hashicorp/hcl/hcl/token"
23 comments []*ast.CommentGroup
24 leadComment *ast.CommentGroup // last lead comment
25 lineComment *ast.CommentGroup // last line comment
29 n int // buffer size (max = 1)
32 func newParser(src []byte) *Parser {
38 // Parse returns the fully parsed source and returns the abstract syntax tree.
39 func Parse(src []byte) (*ast.File, error) {
40 // normalize all line endings
41 // since the scanner and output only work with "\n" line endings, we may
42 // end up with dangling "\r" characters in the parsed data.
43 src = bytes.Replace(src, []byte("\r\n"), []byte("\n"), -1)
49 var errEofToken = errors.New("EOF token found")
51 // Parse returns the fully parsed source and returns the abstract syntax tree.
52 func (p *Parser) Parse() (*ast.File, error) {
55 p.sc.Error = func(pos token.Pos, msg string) {
56 scerr = &PosError{Pos: pos, Err: errors.New(msg)}
59 f.Node, err = p.objectList(false)
67 f.Comments = p.comments
71 // objectList parses a list of items within an object (generally k/v pairs).
72 // The parameter" obj" tells this whether to we are within an object (braces:
73 // '{', '}') or just at the top level. If we're within an object, we end
75 func (p *Parser) objectList(obj bool) (*ast.ObjectList, error) {
76 defer un(trace(p, "ParseObjectList"))
77 node := &ast.ObjectList{}
83 if tok.Type == token.RBRACE {
88 n, err := p.objectItem()
89 if err == errEofToken {
90 break // we are finished
93 // we don't return a nil node, because might want to use already
101 // object lists can be optionally comma-delimited e.g. when a list of maps
102 // is being expressed, so a comma is allowed here - it's simply consumed
104 if tok.Type != token.COMMA {
111 func (p *Parser) consumeComment() (comment *ast.Comment, endline int) {
112 endline = p.tok.Pos.Line
114 // count the endline if it's multiline comment, ie starting with /*
115 if len(p.tok.Text) > 1 && p.tok.Text[1] == '*' {
116 // don't use range here - no need to decode Unicode code points
117 for i := 0; i < len(p.tok.Text); i++ {
118 if p.tok.Text[i] == '\n' {
124 comment = &ast.Comment{Start: p.tok.Pos, Text: p.tok.Text}
129 func (p *Parser) consumeCommentGroup(n int) (comments *ast.CommentGroup, endline int) {
130 var list []*ast.Comment
131 endline = p.tok.Pos.Line
133 for p.tok.Type == token.COMMENT && p.tok.Pos.Line <= endline+n {
134 var comment *ast.Comment
135 comment, endline = p.consumeComment()
136 list = append(list, comment)
139 // add comment group to the comments list
140 comments = &ast.CommentGroup{List: list}
141 p.comments = append(p.comments, comments)
146 // objectItem parses a single object item
147 func (p *Parser) objectItem() (*ast.ObjectItem, error) {
148 defer un(trace(p, "ParseObjectItem"))
150 keys, err := p.objectKey()
151 if len(keys) > 0 && err == errEofToken {
152 // We ignore eof token here since it is an error if we didn't
153 // receive a value (but we did receive a key) for the item.
156 if len(keys) > 0 && err != nil && p.tok.Type == token.RBRACE {
157 // This is a strange boolean statement, but what it means is:
158 // We have keys with no value, and we're likely in an object
159 // (since RBrace ends an object). For this, we set err to nil so
160 // we continue and get the error below of having the wrong value
164 // Reset the token type so we don't think it completed fine. See
165 // objectType which uses p.tok.Type to check if we're done with
167 p.tok.Type = token.EOF
173 o := &ast.ObjectItem{
177 if p.leadComment != nil {
178 o.LeadComment = p.leadComment
185 o.Val, err = p.object()
190 o.Val, err = p.objectType()
195 keyStr := make([]string, 0, len(keys))
196 for _, k := range keys {
197 keyStr = append(keyStr, k.Token.Text)
200 return nil, &PosError{
203 "key '%s' expected start of object ('{') or assignment ('=')",
204 strings.Join(keyStr, " ")),
208 // do a look-ahead for line comment
210 if len(keys) > 0 && o.Val.Pos().Line == keys[0].Pos().Line && p.lineComment != nil {
211 o.LineComment = p.lineComment
218 // objectKey parses an object key and returns a ObjectKey AST
219 func (p *Parser) objectKey() ([]*ast.ObjectKey, error) {
221 keys := make([]*ast.ObjectKey, 0)
227 // It is very important to also return the keys here as well as
228 // the error. This is because we need to be able to tell if we
229 // did parse keys prior to finding the EOF, or if we just found
231 return keys, errEofToken
233 // assignment or object only, but not nested objects. this is not
234 // allowed: `foo bar = {}`
236 return nil, &PosError{
238 Err: fmt.Errorf("nested object expected: LBRACE got: %s", p.tok.Type),
243 return nil, &PosError{
245 Err: errors.New("no object keys found!"),
253 // If we have no keys, then it is a syntax error. i.e. {{}} is not
258 Err: fmt.Errorf("expected: IDENT | STRING got: %s", p.tok.Type),
264 case token.IDENT, token.STRING:
266 keys = append(keys, &ast.ObjectKey{Token: p.tok})
268 return keys, &PosError{
270 Err: fmt.Errorf("illegal character"),
273 return keys, &PosError{
275 Err: fmt.Errorf("expected: IDENT | STRING | ASSIGN | LBRACE got: %s", p.tok.Type),
281 // object parses any type of object, such as number, bool, string, object or
283 func (p *Parser) object() (ast.Node, error) {
284 defer un(trace(p, "ParseType"))
288 case token.NUMBER, token.FLOAT, token.BOOL, token.STRING, token.HEREDOC:
289 return p.literalType()
291 return p.objectType()
297 return nil, errEofToken
300 return nil, &PosError{
302 Err: fmt.Errorf("Unknown token: %+v", tok),
306 // objectType parses an object type and returns a ObjectType AST
307 func (p *Parser) objectType() (*ast.ObjectType, error) {
308 defer un(trace(p, "ParseObjectType"))
310 // we assume that the currently scanned token is a LBRACE
311 o := &ast.ObjectType{
315 l, err := p.objectList(true)
317 // if we hit RBRACE, we are good to go (means we parsed all Items), if it's
318 // not a RBRACE, it's an syntax error and we just return it.
319 if err != nil && p.tok.Type != token.RBRACE {
323 // No error, scan and expect the ending to be a brace
324 if tok := p.scan(); tok.Type != token.RBRACE {
325 return nil, &PosError{
327 Err: fmt.Errorf("object expected closing RBRACE got: %s", tok.Type),
332 o.Rbrace = p.tok.Pos // advanced via parseObjectList
336 // listType parses a list type and returns a ListType AST
337 func (p *Parser) listType() (*ast.ListType, error) {
338 defer un(trace(p, "ParseListType"))
340 // we assume that the currently scanned token is a LBRACK
350 case token.COMMA, token.RBRACK:
352 return nil, &PosError{
355 "error parsing list, expected comma or list end, got: %s",
361 case token.BOOL, token.NUMBER, token.FLOAT, token.STRING, token.HEREDOC:
362 node, err := p.literalType()
367 // If there is a lead comment, apply it
368 if p.leadComment != nil {
369 node.LeadComment = p.leadComment
376 // get next list item or we are at the end
377 // do a look-ahead for line comment
379 if p.lineComment != nil && len(l.List) > 0 {
380 lit, ok := l.List[len(l.List)-1].(*ast.LiteralType)
382 lit.LineComment = p.lineComment
383 l.List[len(l.List)-1] = lit
392 // Looks like a nested object, so parse it out
393 node, err := p.objectType()
395 return nil, &PosError{
398 "error while trying to parse object within list: %s", err),
404 node, err := p.listType()
406 return nil, &PosError{
409 "error while trying to parse list within list: %s", err),
418 return nil, &PosError{
420 Err: fmt.Errorf("unexpected token while parsing list: %s", tok.Type),
426 // literalType parses a literal type and returns a LiteralType AST
427 func (p *Parser) literalType() (*ast.LiteralType, error) {
428 defer un(trace(p, "ParseLiteral"))
430 return &ast.LiteralType{
435 // scan returns the next token from the underlying scanner. If a token has
436 // been unscanned then read that instead. In the process, it collects any
437 // comment groups encountered, and remembers the last lead and line comments.
438 func (p *Parser) scan() token.Token {
439 // If we have a token on the buffer, then return it.
445 // Otherwise read the next token from the scanner and Save it to the buffer
446 // in case we unscan later.
450 if p.tok.Type == token.COMMENT {
451 var comment *ast.CommentGroup
454 // fmt.Printf("p.tok.Pos.Line = %+v prev: %d endline %d \n",
455 // p.tok.Pos.Line, prev.Pos.Line, endline)
456 if p.tok.Pos.Line == prev.Pos.Line {
457 // The comment is on same line as the previous token; it
458 // cannot be a lead comment but may be a line comment.
459 comment, endline = p.consumeCommentGroup(0)
460 if p.tok.Pos.Line != endline {
461 // The next token is on a different line, thus
462 // the last comment group is a line comment.
463 p.lineComment = comment
467 // consume successor comments, if any
469 for p.tok.Type == token.COMMENT {
470 comment, endline = p.consumeCommentGroup(1)
473 if endline+1 == p.tok.Pos.Line && p.tok.Type != token.RBRACE {
475 case token.RBRACE, token.RBRACK:
476 // Do not count for these cases
478 // The next token is following on the line immediately after the
479 // comment group, thus the last comment group is a lead comment.
480 p.leadComment = comment
489 // unscan pushes the previously read token back onto the buffer.
490 func (p *Parser) unscan() {
494 // ----------------------------------------------------------------------------
497 func (p *Parser) printTrace(a ...interface{}) {
502 const dots = ". . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . "
504 fmt.Printf("%5d:%3d: ", p.tok.Pos.Line, p.tok.Pos.Column)
516 func trace(p *Parser, msg string) *Parser {
517 p.printTrace(msg, "(")
522 // Usage pattern: defer un(trace(p, "..."))