vendor/github.com/hashicorp/hcl/hcl/parser/parser.go

   1 // Package parser implements a parser for HCL (HashiCorp Configuration
   2 // Language)
   3 package parser
   4
   5 import (
   6         "bytes"
   7         "errors"
   8         "fmt"
   9         "strings"
  10
  11         "github.com/hashicorp/hcl/hcl/ast"
  12         "github.com/hashicorp/hcl/hcl/scanner"
  13         "github.com/hashicorp/hcl/hcl/token"
  14 )
  15
  16 type Parser struct {
  17         sc *scanner.Scanner
  18
  19         // Last read token
  20         tok       token.Token
  21         commaPrev token.Token
  22
  23         comments    []*ast.CommentGroup
  24         leadComment *ast.CommentGroup // last lead comment
  25         lineComment *ast.CommentGroup // last line comment
  26
  27         enableTrace bool
  28         indent      int
  29         n           int // buffer size (max = 1)
  30 }
  31
  32 func newParser(src []byte) *Parser {
  33         return &Parser{
  34                 sc: scanner.New(src),
  35         }
  36 }
  37
  38 // Parse returns the fully parsed source and returns the abstract syntax tree.
  39 func Parse(src []byte) (*ast.File, error) {
  40         // normalize all line endings
  41         // since the scanner and output only work with "\n" line endings, we may
  42         // end up with dangling "\r" characters in the parsed data.
  43         src = bytes.Replace(src, []byte("\r\n"), []byte("\n"), -1)
  44
  45         p := newParser(src)
  46         return p.Parse()
  47 }
  48
  49 var errEofToken = errors.New("EOF token found")
  50
  51 // Parse returns the fully parsed source and returns the abstract syntax tree.
  52 func (p *Parser) Parse() (*ast.File, error) {
  53         f := &ast.File{}
  54         var err, scerr error
  55         p.sc.Error = func(pos token.Pos, msg string) {
  56                 scerr = &PosError{Pos: pos, Err: errors.New(msg)}
  57         }
  58
  59         f.Node, err = p.objectList(false)
  60         if scerr != nil {
  61                 return nil, scerr
  62         }
  63         if err != nil {
  64                 return nil, err
  65         }
  66
  67         f.Comments = p.comments
  68         return f, nil
  69 }
  70
  71 // objectList parses a list of items within an object (generally k/v pairs).
  72 // The parameter" obj" tells this whether to we are within an object (braces:
  73 // '{', '}') or just at the top level. If we're within an object, we end
  74 // at an RBRACE.
  75 func (p *Parser) objectList(obj bool) (*ast.ObjectList, error) {
  76         defer un(trace(p, "ParseObjectList"))
  77         node := &ast.ObjectList{}
  78
  79         for {
  80                 if obj {
  81                         tok := p.scan()
  82                         p.unscan()
  83                         if tok.Type == token.RBRACE {
  84                                 break
  85                         }
  86                 }
  87
  88                 n, err := p.objectItem()
  89                 if err == errEofToken {
  90                         break // we are finished
  91                 }
  92
  93                 // we don't return a nil node, because might want to use already
  94                 // collected items.
  95                 if err != nil {
  96                         return node, err
  97                 }
  98
  99                 node.Add(n)
 100
 101                 // object lists can be optionally comma-delimited e.g. when a list of maps
 102                 // is being expressed, so a comma is allowed here - it's simply consumed
 103                 tok := p.scan()
 104                 if tok.Type != token.COMMA {
 105                         p.unscan()
 106                 }
 107         }
 108         return node, nil
 109 }
 110
 111 func (p *Parser) consumeComment() (comment *ast.Comment, endline int) {
 112         endline = p.tok.Pos.Line
 113
 114         // count the endline if it's multiline comment, ie starting with /*
 115         if len(p.tok.Text) > 1 && p.tok.Text[1] == '*' {
 116                 // don't use range here - no need to decode Unicode code points
 117                 for i := 0; i < len(p.tok.Text); i++ {
 118                         if p.tok.Text[i] == '\n' {
 119                                 endline++
 120                         }
 121                 }
 122         }
 123
 124         comment = &ast.Comment{Start: p.tok.Pos, Text: p.tok.Text}
 125         p.tok = p.sc.Scan()
 126         return
 127 }
 128
 129 func (p *Parser) consumeCommentGroup(n int) (comments *ast.CommentGroup, endline int) {
 130         var list []*ast.Comment
 131         endline = p.tok.Pos.Line
 132
 133         for p.tok.Type == token.COMMENT && p.tok.Pos.Line <= endline+n {
 134                 var comment *ast.Comment
 135                 comment, endline = p.consumeComment()
 136                 list = append(list, comment)
 137         }
 138
 139         // add comment group to the comments list
 140         comments = &ast.CommentGroup{List: list}
 141         p.comments = append(p.comments, comments)
 142
 143         return
 144 }
 145
 146 // objectItem parses a single object item
 147 func (p *Parser) objectItem() (*ast.ObjectItem, error) {
 148         defer un(trace(p, "ParseObjectItem"))
 149
 150         keys, err := p.objectKey()
 151         if len(keys) > 0 && err == errEofToken {
 152                 // We ignore eof token here since it is an error if we didn't
 153                 // receive a value (but we did receive a key) for the item.
 154                 err = nil
 155         }
 156         if len(keys) > 0 && err != nil && p.tok.Type == token.RBRACE {
 157                 // This is a strange boolean statement, but what it means is:
 158                 // We have keys with no value, and we're likely in an object
 159                 // (since RBrace ends an object). For this, we set err to nil so
 160                 // we continue and get the error below of having the wrong value
 161                 // type.
 162                 err = nil
 163
 164                 // Reset the token type so we don't think it completed fine. See
 165                 // objectType which uses p.tok.Type to check if we're done with
 166                 // the object.
 167                 p.tok.Type = token.EOF
 168         }
 169         if err != nil {
 170                 return nil, err
 171         }
 172
 173         o := &ast.ObjectItem{
 174                 Keys: keys,
 175         }
 176
 177         if p.leadComment != nil {
 178                 o.LeadComment = p.leadComment
 179                 p.leadComment = nil
 180         }
 181
 182         switch p.tok.Type {
 183         case token.ASSIGN:
 184                 o.Assign = p.tok.Pos
 185                 o.Val, err = p.object()
 186                 if err != nil {
 187                         return nil, err
 188                 }
 189         case token.LBRACE:
 190                 o.Val, err = p.objectType()
 191                 if err != nil {
 192                         return nil, err
 193                 }
 194         default:
 195                 keyStr := make([]string, 0, len(keys))
 196                 for _, k := range keys {
 197                         keyStr = append(keyStr, k.Token.Text)
 198                 }
 199
 200                 return nil, &PosError{
 201                         Pos: p.tok.Pos,
 202                         Err: fmt.Errorf(
 203                                 "key '%s' expected start of object ('{') or assignment ('=')",
 204                                 strings.Join(keyStr, " ")),
 205                 }
 206         }
 207
 208         // do a look-ahead for line comment
 209         p.scan()
 210         if len(keys) > 0 && o.Val.Pos().Line == keys[0].Pos().Line && p.lineComment != nil {
 211                 o.LineComment = p.lineComment
 212                 p.lineComment = nil
 213         }
 214         p.unscan()
 215         return o, nil
 216 }
 217
 218 // objectKey parses an object key and returns a ObjectKey AST
 219 func (p *Parser) objectKey() ([]*ast.ObjectKey, error) {
 220         keyCount := 0
 221         keys := make([]*ast.ObjectKey, 0)
 222
 223         for {
 224                 tok := p.scan()
 225                 switch tok.Type {
 226                 case token.EOF:
 227                         // It is very important to also return the keys here as well as
 228                         // the error. This is because we need to be able to tell if we
 229                         // did parse keys prior to finding the EOF, or if we just found
 230                         // a bare EOF.
 231                         return keys, errEofToken
 232                 case token.ASSIGN:
 233                         // assignment or object only, but not nested objects. this is not
 234                         // allowed: `foo bar = {}`
 235                         if keyCount > 1 {
 236                                 return nil, &PosError{
 237                                         Pos: p.tok.Pos,
 238                                         Err: fmt.Errorf("nested object expected: LBRACE got: %s", p.tok.Type),
 239                                 }
 240                         }
 241
 242                         if keyCount == 0 {
 243                                 return nil, &PosError{
 244                                         Pos: p.tok.Pos,
 245                                         Err: errors.New("no object keys found!"),
 246                                 }
 247                         }
 248
 249                         return keys, nil
 250                 case token.LBRACE:
 251                         var err error
 252
 253                         // If we have no keys, then it is a syntax error. i.e. {{}} is not
 254                         // allowed.
 255                         if len(keys) == 0 {
 256                                 err = &PosError{
 257                                         Pos: p.tok.Pos,
 258                                         Err: fmt.Errorf("expected: IDENT | STRING got: %s", p.tok.Type),
 259                                 }
 260                         }
 261
 262                         // object
 263                         return keys, err
 264                 case token.IDENT, token.STRING:
 265                         keyCount++
 266                         keys = append(keys, &ast.ObjectKey{Token: p.tok})
 267                 case token.ILLEGAL:
 268                         return keys, &PosError{
 269                                 Pos: p.tok.Pos,
 270                                 Err: fmt.Errorf("illegal character"),
 271                         }
 272                 default:
 273                         return keys, &PosError{
 274                                 Pos: p.tok.Pos,
 275                                 Err: fmt.Errorf("expected: IDENT | STRING | ASSIGN | LBRACE got: %s", p.tok.Type),
 276                         }
 277                 }
 278         }
 279 }
 280
 281 // object parses any type of object, such as number, bool, string, object or
 282 // list.
 283 func (p *Parser) object() (ast.Node, error) {
 284         defer un(trace(p, "ParseType"))
 285         tok := p.scan()
 286
 287         switch tok.Type {
 288         case token.NUMBER, token.FLOAT, token.BOOL, token.STRING, token.HEREDOC:
 289                 return p.literalType()
 290         case token.LBRACE:
 291                 return p.objectType()
 292         case token.LBRACK:
 293                 return p.listType()
 294         case token.COMMENT:
 295                 // implement comment
 296         case token.EOF:
 297                 return nil, errEofToken
 298         }
 299
 300         return nil, &PosError{
 301                 Pos: tok.Pos,
 302                 Err: fmt.Errorf("Unknown token: %+v", tok),
 303         }
 304 }
 305
 306 // objectType parses an object type and returns a ObjectType AST
 307 func (p *Parser) objectType() (*ast.ObjectType, error) {
 308         defer un(trace(p, "ParseObjectType"))
 309
 310         // we assume that the currently scanned token is a LBRACE
 311         o := &ast.ObjectType{
 312                 Lbrace: p.tok.Pos,
 313         }
 314
 315         l, err := p.objectList(true)
 316
 317         // if we hit RBRACE, we are good to go (means we parsed all Items), if it's
 318         // not a RBRACE, it's an syntax error and we just return it.
 319         if err != nil && p.tok.Type != token.RBRACE {
 320                 return nil, err
 321         }
 322
 323         // No error, scan and expect the ending to be a brace
 324         if tok := p.scan(); tok.Type != token.RBRACE {
 325                 return nil, &PosError{
 326                         Pos: tok.Pos,
 327                         Err: fmt.Errorf("object expected closing RBRACE got: %s", tok.Type),
 328                 }
 329         }
 330
 331         o.List = l
 332         o.Rbrace = p.tok.Pos // advanced via parseObjectList
 333         return o, nil
 334 }
 335
 336 // listType parses a list type and returns a ListType AST
 337 func (p *Parser) listType() (*ast.ListType, error) {
 338         defer un(trace(p, "ParseListType"))
 339
 340         // we assume that the currently scanned token is a LBRACK
 341         l := &ast.ListType{
 342                 Lbrack: p.tok.Pos,
 343         }
 344
 345         needComma := false
 346         for {
 347                 tok := p.scan()
 348                 if needComma {
 349                         switch tok.Type {
 350                         case token.COMMA, token.RBRACK:
 351                         default:
 352                                 return nil, &PosError{
 353                                         Pos: tok.Pos,
 354                                         Err: fmt.Errorf(
 355                                                 "error parsing list, expected comma or list end, got: %s",
 356                                                 tok.Type),
 357                                 }
 358                         }
 359                 }
 360                 switch tok.Type {
 361                 case token.BOOL, token.NUMBER, token.FLOAT, token.STRING, token.HEREDOC:
 362                         node, err := p.literalType()
 363                         if err != nil {
 364                                 return nil, err
 365                         }
 366
 367                         // If there is a lead comment, apply it
 368                         if p.leadComment != nil {
 369                                 node.LeadComment = p.leadComment
 370                                 p.leadComment = nil
 371                         }
 372
 373                         l.Add(node)
 374                         needComma = true
 375                 case token.COMMA:
 376                         // get next list item or we are at the end
 377                         // do a look-ahead for line comment
 378                         p.scan()
 379                         if p.lineComment != nil && len(l.List) > 0 {
 380                                 lit, ok := l.List[len(l.List)-1].(*ast.LiteralType)
 381                                 if ok {
 382                                         lit.LineComment = p.lineComment
 383                                         l.List[len(l.List)-1] = lit
 384                                         p.lineComment = nil
 385                                 }
 386                         }
 387                         p.unscan()
 388
 389                         needComma = false
 390                         continue
 391                 case token.LBRACE:
 392                         // Looks like a nested object, so parse it out
 393                         node, err := p.objectType()
 394                         if err != nil {
 395                                 return nil, &PosError{
 396                                         Pos: tok.Pos,
 397                                         Err: fmt.Errorf(
 398                                                 "error while trying to parse object within list: %s", err),
 399                                 }
 400                         }
 401                         l.Add(node)
 402                         needComma = true
 403                 case token.LBRACK:
 404                         node, err := p.listType()
 405                         if err != nil {
 406                                 return nil, &PosError{
 407                                         Pos: tok.Pos,
 408                                         Err: fmt.Errorf(
 409                                                 "error while trying to parse list within list: %s", err),
 410                                 }
 411                         }
 412                         l.Add(node)
 413                 case token.RBRACK:
 414                         // finished
 415                         l.Rbrack = p.tok.Pos
 416                         return l, nil
 417                 default:
 418                         return nil, &PosError{
 419                                 Pos: tok.Pos,
 420                                 Err: fmt.Errorf("unexpected token while parsing list: %s", tok.Type),
 421                         }
 422                 }
 423         }
 424 }
 425
 426 // literalType parses a literal type and returns a LiteralType AST
 427 func (p *Parser) literalType() (*ast.LiteralType, error) {
 428         defer un(trace(p, "ParseLiteral"))
 429
 430         return &ast.LiteralType{
 431                 Token: p.tok,
 432         }, nil
 433 }
 434
 435 // scan returns the next token from the underlying scanner. If a token has
 436 // been unscanned then read that instead. In the process, it collects any
 437 // comment groups encountered, and remembers the last lead and line comments.
 438 func (p *Parser) scan() token.Token {
 439         // If we have a token on the buffer, then return it.
 440         if p.n != 0 {
 441                 p.n = 0
 442                 return p.tok
 443         }
 444
 445         // Otherwise read the next token from the scanner and Save it to the buffer
 446         // in case we unscan later.
 447         prev := p.tok
 448         p.tok = p.sc.Scan()
 449
 450         if p.tok.Type == token.COMMENT {
 451                 var comment *ast.CommentGroup
 452                 var endline int
 453
 454                 // fmt.Printf("p.tok.Pos.Line = %+v prev: %d endline %d \n",
 455                 // p.tok.Pos.Line, prev.Pos.Line, endline)
 456                 if p.tok.Pos.Line == prev.Pos.Line {
 457                         // The comment is on same line as the previous token; it
 458                         // cannot be a lead comment but may be a line comment.
 459                         comment, endline = p.consumeCommentGroup(0)
 460                         if p.tok.Pos.Line != endline {
 461                                 // The next token is on a different line, thus
 462                                 // the last comment group is a line comment.
 463                                 p.lineComment = comment
 464                         }
 465                 }
 466
 467                 // consume successor comments, if any
 468                 endline = -1
 469                 for p.tok.Type == token.COMMENT {
 470                         comment, endline = p.consumeCommentGroup(1)
 471                 }
 472
 473                 if endline+1 == p.tok.Pos.Line && p.tok.Type != token.RBRACE {
 474                         switch p.tok.Type {
 475                         case token.RBRACE, token.RBRACK:
 476                                 // Do not count for these cases
 477                         default:
 478                                 // The next token is following on the line immediately after the
 479                                 // comment group, thus the last comment group is a lead comment.
 480                                 p.leadComment = comment
 481                         }
 482                 }
 483
 484         }
 485
 486         return p.tok
 487 }
 488
 489 // unscan pushes the previously read token back onto the buffer.
 490 func (p *Parser) unscan() {
 491         p.n = 1
 492 }
 493
 494 // ----------------------------------------------------------------------------
 495 // Parsing support
 496
 497 func (p *Parser) printTrace(a ...interface{}) {
 498         if !p.enableTrace {
 499                 return
 500         }
 501
 502         const dots = ". . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . "
 503         const n = len(dots)
 504         fmt.Printf("%5d:%3d: ", p.tok.Pos.Line, p.tok.Pos.Column)
 505
 506         i := 2 * p.indent
 507         for i > n {
 508                 fmt.Print(dots)
 509                 i -= n
 510         }
 511         // i <= n
 512         fmt.Print(dots[0:i])
 513         fmt.Println(a...)
 514 }
 515
 516 func trace(p *Parser, msg string) *Parser {
 517         p.printTrace(msg, "(")
 518         p.indent++
 519         return p
 520 }
 521
 522 // Usage pattern: defer un(trace(p, "..."))
 523 func un(p *Parser) {
 524         p.indent--
 525         p.printTrace(")")
 526 }