compiler/parse.go

   1 package compiler
   2
   3 import (
   4         "bytes"
   5         "encoding/hex"
   6         "fmt"
   7         "strconv"
   8         "unicode"
   9 )
  10
  11 // We have some function naming conventions.
  12 //
  13 // For terminals:
  14 //   scanX     takes buf and position, returns new position (and maybe a value)
  15 //   peekX     takes *parser, returns bool or string
  16 //   consumeX  takes *parser and maybe a required literal, maybe returns value
  17 //             also updates the parser position
  18 //
  19 // For nonterminals:
  20 //   parseX    takes *parser, returns AST node, updates parser position
  21
  22 type parser struct {
  23         buf []byte
  24         pos int
  25 }
  26
  27 func (p *parser) errorf(format string, args ...interface{}) {
  28         panic(parserErr{buf: p.buf, offset: p.pos, format: format, args: args})
  29 }
  30
  31 // parse is the main entry point to the parser
  32 func parse(buf []byte) (contracts []*Contract, err error) {
  33         defer func() {
  34                 if val := recover(); val != nil {
  35                         if e, ok := val.(parserErr); ok {
  36                                 err = e
  37                         } else {
  38                                 panic(val)
  39                         }
  40                 }
  41         }()
  42         p := &parser{buf: buf}
  43         contracts = parseContracts(p)
  44         return
  45 }
  46
  47 // parse contracts
  48 func parseContracts(p *parser) []*Contract {
  49         var result []*Contract
  50         contracts := parseImportDirectives(p)
  51         for _, c := range contracts {
  52                 result = append(result, c)
  53         }
  54
  55         if pos := scanKeyword(p.buf, p.pos, "contract"); pos < 0 {
  56                 p.errorf("expected contract")
  57         }
  58         for peekKeyword(p) == "contract" {
  59                 contract := parseContract(p)
  60                 result = append(result, contract)
  61         }
  62         return result
  63 }
  64
  65 // contract name(p1, p2: t1, p3: t2) locks value { ... }
  66 func parseContract(p *parser) *Contract {
  67         consumeKeyword(p, "contract")
  68         name := consumeIdentifier(p)
  69         params := parseParams(p)
  70         // locks amount of asset
  71         consumeKeyword(p, "locks")
  72         value := ValueInfo{}
  73         value.Amount = consumeIdentifier(p)
  74         consumeKeyword(p, "of")
  75         value.Asset = consumeIdentifier(p)
  76         consumeTok(p, "{")
  77         clauses := parseClauses(p)
  78         consumeTok(p, "}")
  79         return &Contract{Name: name, Params: params, Clauses: clauses, Value: value}
  80 }
  81
  82 // (p1, p2: t1, p3: t2)
  83 func parseParams(p *parser) []*Param {
  84         var params []*Param
  85         consumeTok(p, "(")
  86         first := true
  87         for !peekTok(p, ")") {
  88                 if first {
  89                         first = false
  90                 } else {
  91                         consumeTok(p, ",")
  92                 }
  93                 pt := parseParamsType(p)
  94                 params = append(params, pt...)
  95         }
  96         consumeTok(p, ")")
  97         return params
  98 }
  99
 100 func parseClauses(p *parser) []*Clause {
 101         var clauses []*Clause
 102         for !peekTok(p, "}") {
 103                 c := parseClause(p)
 104                 clauses = append(clauses, c)
 105         }
 106         return clauses
 107 }
 108
 109 func parseParamsType(p *parser) []*Param {
 110         firstName := consumeIdentifier(p)
 111         params := []*Param{&Param{Name: firstName}}
 112         for peekTok(p, ",") {
 113                 consumeTok(p, ",")
 114                 name := consumeIdentifier(p)
 115                 params = append(params, &Param{Name: name})
 116         }
 117         consumeTok(p, ":")
 118         typ := consumeIdentifier(p)
 119         for _, parm := range params {
 120                 if tdesc, ok := types[typ]; ok {
 121                         parm.Type = tdesc
 122                 } else {
 123                         p.errorf("unknown type %s", typ)
 124                 }
 125         }
 126         return params
 127 }
 128
 129 func parseClause(p *parser) *Clause {
 130         var c Clause
 131         consumeKeyword(p, "clause")
 132         c.Name = consumeIdentifier(p)
 133         c.Params = parseParams(p)
 134         consumeTok(p, "{")
 135         c.statements = parseStatements(p)
 136         consumeTok(p, "}")
 137         return &c
 138 }
 139
 140 func parseStatements(p *parser) []statement {
 141         var statements []statement
 142         for !peekTok(p, "}") {
 143                 s := parseStatement(p)
 144                 statements = append(statements, s)
 145         }
 146         return statements
 147 }
 148
 149 func parseStatement(p *parser) statement {
 150         switch peekKeyword(p) {
 151         case "if":
 152                 return parseIfStmt(p)
 153         case "define":
 154                 return parseDefineStmt(p)
 155         case "assign":
 156                 return parseAssignStmt(p)
 157         case "verify":
 158                 return parseVerifyStmt(p)
 159         case "lock":
 160                 return parseLockStmt(p)
 161         case "unlock":
 162                 return parseUnlockStmt(p)
 163         }
 164         panic(parseErr(p.buf, p.pos, "unknown keyword \"%s\"", peekKeyword(p)))
 165 }
 166
 167 func parseIfStmt(p *parser) *ifStatement {
 168         consumeKeyword(p, "if")
 169         condition := parseExpr(p)
 170         body := &IfStatmentBody{}
 171         consumeTok(p, "{")
 172         body.trueBody = parseStatements(p)
 173         consumeTok(p, "}")
 174         if peekKeyword(p) == "else" {
 175                 consumeKeyword(p, "else")
 176                 consumeTok(p, "{")
 177                 body.falseBody = parseStatements(p)
 178                 consumeTok(p, "}")
 179         }
 180         return &ifStatement{condition: condition, body: body}
 181 }
 182
 183 func parseDefineStmt(p *parser) *defineStatement {
 184         defineStat := &defineStatement{}
 185         consumeKeyword(p, "define")
 186         param := &Param{}
 187         param.Name = consumeIdentifier(p)
 188         consumeTok(p, ":")
 189         variableType := consumeIdentifier(p)
 190         if tdesc, ok := types[variableType]; ok {
 191                 param.Type = tdesc
 192         } else {
 193                 p.errorf("unknown type %s", variableType)
 194         }
 195         defineStat.variable = param
 196         if peekTok(p, "=") {
 197                 consumeTok(p, "=")
 198                 defineStat.expr = parseExpr(p)
 199         }
 200         return defineStat
 201 }
 202
 203 func parseAssignStmt(p *parser) *assignStatement {
 204         consumeKeyword(p, "assign")
 205         varName := consumeIdentifier(p)
 206         consumeTok(p, "=")
 207         expr := parseExpr(p)
 208         return &assignStatement{variable: &Param{Name: varName}, expr: expr}
 209 }
 210
 211 func parseVerifyStmt(p *parser) *verifyStatement {
 212         consumeKeyword(p, "verify")
 213         expr := parseExpr(p)
 214         return &verifyStatement{expr: expr}
 215 }
 216
 217 func parseLockStmt(p *parser) *lockStatement {
 218         consumeKeyword(p, "lock")
 219         lockedAmount := parseExpr(p)
 220         consumeKeyword(p, "of")
 221         lockedAsset := parseExpr(p)
 222         consumeKeyword(p, "with")
 223         program := parseExpr(p)
 224         return &lockStatement{lockedAmount: lockedAmount, lockedAsset: lockedAsset, program: program}
 225 }
 226
 227 func parseUnlockStmt(p *parser) *unlockStatement {
 228         consumeKeyword(p, "unlock")
 229         unlockedAmount := parseExpr(p)
 230         consumeKeyword(p, "of")
 231         unlockedAsset := parseExpr(p)
 232         return &unlockStatement{unlockedAmount: unlockedAmount, unlockedAsset: unlockedAsset}
 233 }
 234
 235 func parseExpr(p *parser) expression {
 236         // Uses the precedence-climbing algorithm
 237         // <https://en.wikipedia.org/wiki/Operator-precedence_parser#Precedence_climbing_method>
 238         expr := parseUnaryExpr(p)
 239         expr2, pos := parseExprCont(p, expr, 0)
 240         if pos < 0 {
 241                 p.errorf("expected expression")
 242         }
 243         p.pos = pos
 244         return expr2
 245 }
 246
 247 func parseUnaryExpr(p *parser) expression {
 248         op, pos := scanUnaryOp(p.buf, p.pos)
 249         if pos < 0 {
 250                 return parseExpr2(p)
 251         }
 252         p.pos = pos
 253         expr := parseUnaryExpr(p)
 254         return &unaryExpr{op: op, expr: expr}
 255 }
 256
 257 func parseExprCont(p *parser, lhs expression, minPrecedence int) (expression, int) {
 258         for {
 259                 op, pos := scanBinaryOp(p.buf, p.pos)
 260                 if pos < 0 || op.precedence < minPrecedence {
 261                         break
 262                 }
 263                 p.pos = pos
 264
 265                 rhs := parseUnaryExpr(p)
 266
 267                 for {
 268                         op2, pos2 := scanBinaryOp(p.buf, p.pos)
 269                         if pos2 < 0 || op2.precedence <= op.precedence {
 270                                 break
 271                         }
 272                         rhs, p.pos = parseExprCont(p, rhs, op2.precedence)
 273                         if p.pos < 0 {
 274                                 return nil, -1 // or is this an error?
 275                         }
 276                 }
 277                 lhs = &binaryExpr{left: lhs, right: rhs, op: op}
 278         }
 279         return lhs, p.pos
 280 }
 281
 282 func parseExpr2(p *parser) expression {
 283         if expr, pos := scanLiteralExpr(p.buf, p.pos); pos >= 0 {
 284                 p.pos = pos
 285                 return expr
 286         }
 287         return parseExpr3(p)
 288 }
 289
 290 func parseExpr3(p *parser) expression {
 291         e := parseExpr4(p)
 292         if peekTok(p, "(") {
 293                 args := parseArgs(p)
 294                 return &callExpr{fn: e, args: args}
 295         }
 296         return e
 297 }
 298
 299 func parseExpr4(p *parser) expression {
 300         if peekTok(p, "(") {
 301                 consumeTok(p, "(")
 302                 e := parseExpr(p)
 303                 consumeTok(p, ")")
 304                 return e
 305         }
 306         if peekTok(p, "[") {
 307                 var elts []expression
 308                 consumeTok(p, "[")
 309                 first := true
 310                 for !peekTok(p, "]") {
 311                         if first {
 312                                 first = false
 313                         } else {
 314                                 consumeTok(p, ",")
 315                         }
 316                         e := parseExpr(p)
 317                         elts = append(elts, e)
 318                 }
 319                 consumeTok(p, "]")
 320                 return listExpr(elts)
 321         }
 322         name := consumeIdentifier(p)
 323         return varRef(name)
 324 }
 325
 326 func parseArgs(p *parser) []expression {
 327         var exprs []expression
 328         consumeTok(p, "(")
 329         first := true
 330         for !peekTok(p, ")") {
 331                 if first {
 332                         first = false
 333                 } else {
 334                         consumeTok(p, ",")
 335                 }
 336                 e := parseExpr(p)
 337                 exprs = append(exprs, e)
 338         }
 339         consumeTok(p, ")")
 340         return exprs
 341 }
 342
 343 // peek functions
 344
 345 func peekKeyword(p *parser) string {
 346         name, _ := scanIdentifier(p.buf, p.pos)
 347         return name
 348 }
 349
 350 func peekTok(p *parser, token string) bool {
 351         pos := scanTok(p.buf, p.pos, token)
 352         return pos >= 0
 353 }
 354
 355 // consume functions
 356
 357 var keywords = []string{
 358         "contract", "clause", "verify", "locks", "of",
 359         "lock", "with", "unlock", "if", "else",
 360         "define", "assign", "true", "false",
 361 }
 362
 363 func consumeKeyword(p *parser, keyword string) {
 364         pos := scanKeyword(p.buf, p.pos, keyword)
 365         if pos < 0 {
 366                 p.errorf("expected keyword %s", keyword)
 367         }
 368         p.pos = pos
 369 }
 370
 371 func consumeIdentifier(p *parser) string {
 372         name, pos := scanIdentifier(p.buf, p.pos)
 373         if pos < 0 {
 374                 p.errorf("expected identifier")
 375         }
 376         p.pos = pos
 377         return name
 378 }
 379
 380 func consumeTok(p *parser, token string) {
 381         pos := scanTok(p.buf, p.pos, token)
 382         if pos < 0 {
 383                 p.errorf("expected %s token", token)
 384         }
 385         p.pos = pos
 386 }
 387
 388 // scan functions
 389
 390 func scanUnaryOp(buf []byte, offset int) (*unaryOp, int) {
 391         // Maximum munch. Make sure "-3" scans as ("-3"), not ("-", "3").
 392         if _, pos := scanIntLiteral(buf, offset); pos >= 0 {
 393                 return nil, -1
 394         }
 395         for _, op := range unaryOps {
 396                 newOffset := scanTok(buf, offset, op.op)
 397                 if newOffset >= 0 {
 398                         return &op, newOffset
 399                 }
 400         }
 401         return nil, -1
 402 }
 403
 404 func scanBinaryOp(buf []byte, offset int) (*binaryOp, int) {
 405         offset = skipWsAndComments(buf, offset)
 406         var (
 407                 found     *binaryOp
 408                 newOffset = -1
 409         )
 410         for i, op := range binaryOps {
 411                 offset2 := scanTok(buf, offset, op.op)
 412                 if offset2 >= 0 {
 413                         if found == nil || len(op.op) > len(found.op) {
 414                                 found = &binaryOps[i]
 415                                 newOffset = offset2
 416                         }
 417                 }
 418         }
 419         return found, newOffset
 420 }
 421
 422 // TODO(bobg): boolean literals?
 423 func scanLiteralExpr(buf []byte, offset int) (expression, int) {
 424         offset = skipWsAndComments(buf, offset)
 425         intliteral, newOffset := scanIntLiteral(buf, offset)
 426         if newOffset >= 0 {
 427                 return intliteral, newOffset
 428         }
 429         strliteral, newOffset := scanStrLiteral(buf, offset)
 430         if newOffset >= 0 {
 431                 return strliteral, newOffset
 432         }
 433         bytesliteral, newOffset := scanBytesLiteral(buf, offset) // 0x6c249a...
 434         if newOffset >= 0 {
 435                 return bytesliteral, newOffset
 436         }
 437         booleanLiteral, newOffset := scanBoolLiteral(buf, offset) // true or false
 438         if newOffset >= 0 {
 439                 return booleanLiteral, newOffset
 440         }
 441         return nil, -1
 442 }
 443
 444 func scanIdentifier(buf []byte, offset int) (string, int) {
 445         offset = skipWsAndComments(buf, offset)
 446         i := offset
 447         for ; i < len(buf) && isIDChar(buf[i], i == offset); i++ {
 448         }
 449         if i == offset {
 450                 return "", -1
 451         }
 452         return string(buf[offset:i]), i
 453 }
 454
 455 func scanTok(buf []byte, offset int, s string) int {
 456         offset = skipWsAndComments(buf, offset)
 457         prefix := []byte(s)
 458         if bytes.HasPrefix(buf[offset:], prefix) {
 459                 return offset + len(prefix)
 460         }
 461         return -1
 462 }
 463
 464 func scanKeyword(buf []byte, offset int, keyword string) int {
 465         id, newOffset := scanIdentifier(buf, offset)
 466         if newOffset < 0 {
 467                 return -1
 468         }
 469         if id != keyword {
 470                 return -1
 471         }
 472         return newOffset
 473 }
 474
 475 func scanIntLiteral(buf []byte, offset int) (integerLiteral, int) {
 476         offset = skipWsAndComments(buf, offset)
 477         start := offset
 478         if offset < len(buf) && buf[offset] == '-' {
 479                 offset++
 480         }
 481         i := offset
 482         for ; i < len(buf) && unicode.IsDigit(rune(buf[i])); i++ {
 483                 // the literal is BytesLiteral when it starts with 0x/0X
 484                 if buf[i] == '0' && i < len(buf)-1 && (buf[i+1] == 'x' || buf[i+1] == 'X') {
 485                         return 0, -1
 486                 }
 487         }
 488         if i > offset {
 489                 n, err := strconv.ParseInt(string(buf[start:i]), 10, 64)
 490                 if err != nil {
 491                         return 0, -1
 492                 }
 493                 return integerLiteral(n), i
 494         }
 495         return 0, -1
 496 }
 497
 498 func scanStrLiteral(buf []byte, offset int) (bytesLiteral, int) {
 499         offset = skipWsAndComments(buf, offset)
 500         if offset >= len(buf) || !(buf[offset] == '\'' || buf[offset] == '"') {
 501                 return bytesLiteral{}, -1
 502         }
 503         var byteBuf bytesLiteral
 504         for i := offset + 1; i < len(buf); i++ {
 505                 if (buf[offset] == '\'' && buf[i] == '\'') || (buf[offset] == '"' && buf[i] == '"') {
 506                         return byteBuf, i + 1
 507                 }
 508                 if buf[i] == '\\' && i < len(buf)-1 {
 509                         if c, ok := scanEscape(buf[i+1]); ok {
 510                                 byteBuf = append(byteBuf, c)
 511                                 i++
 512                                 continue
 513                         }
 514                 }
 515                 byteBuf = append(byteBuf, buf[i])
 516         }
 517         panic(parseErr(buf, offset, "unterminated string literal"))
 518 }
 519
 520 func scanBytesLiteral(buf []byte, offset int) (bytesLiteral, int) {
 521         offset = skipWsAndComments(buf, offset)
 522         if offset+4 >= len(buf) {
 523                 return nil, -1
 524         }
 525         if buf[offset] != '0' || (buf[offset+1] != 'x' && buf[offset+1] != 'X') {
 526                 return nil, -1
 527         }
 528         if !isHexDigit(buf[offset+2]) || !isHexDigit(buf[offset+3]) {
 529                 return nil, -1
 530         }
 531         i := offset + 4
 532         for ; i < len(buf); i += 2 {
 533                 if i == len(buf)-1 {
 534                         panic(parseErr(buf, offset, "odd number of digits in hex literal"))
 535                 }
 536                 if !isHexDigit(buf[i]) {
 537                         break
 538                 }
 539                 if !isHexDigit(buf[i+1]) {
 540                         panic(parseErr(buf, offset, "odd number of digits in hex literal"))
 541                 }
 542         }
 543         decoded := make([]byte, hex.DecodedLen(i-(offset+2)))
 544         _, err := hex.Decode(decoded, buf[offset+2:i])
 545         if err != nil {
 546                 return bytesLiteral{}, -1
 547         }
 548         return bytesLiteral(decoded), i
 549 }
 550
 551 func scanBoolLiteral(buf []byte, offset int) (booleanLiteral, int) {
 552         offset = skipWsAndComments(buf, offset)
 553         if offset >= len(buf) {
 554                 return false, -1
 555         }
 556
 557         newOffset := scanKeyword(buf, offset, "true")
 558         if newOffset < 0 {
 559                 if newOffset = scanKeyword(buf, offset, "false"); newOffset < 0 {
 560                         return false, -1
 561                 }
 562                 return false, newOffset
 563         }
 564         return true, newOffset
 565 }
 566
 567 func skipWsAndComments(buf []byte, offset int) int {
 568         var inComment bool
 569         for ; offset < len(buf); offset++ {
 570                 c := buf[offset]
 571                 if inComment {
 572                         if c == '\n' {
 573                                 inComment = false
 574                         }
 575                 } else {
 576                         if c == '/' && offset < len(buf)-1 && buf[offset+1] == '/' {
 577                                 inComment = true
 578                                 offset++ // skip two chars instead of one
 579                         } else if !unicode.IsSpace(rune(c)) {
 580                                 break
 581                         }
 582                 }
 583         }
 584         return offset
 585 }
 586
 587 func isHexDigit(b byte) bool {
 588         return (b >= '0' && b <= '9') || (b >= 'a' && b <= 'f') || (b >= 'A' && b <= 'F')
 589 }
 590
 591 func isIDChar(c byte, initial bool) bool {
 592         if c >= 'a' && c <= 'z' {
 593                 return true
 594         }
 595         if c >= 'A' && c <= 'Z' {
 596                 return true
 597         }
 598         if c == '_' {
 599                 return true
 600         }
 601         if initial {
 602                 return false
 603         }
 604         return unicode.IsDigit(rune(c))
 605 }
 606
 607 type parserErr struct {
 608         buf    []byte
 609         offset int
 610         format string
 611         args   []interface{}
 612 }
 613
 614 func parseErr(buf []byte, offset int, format string, args ...interface{}) error {
 615         return parserErr{buf: buf, offset: offset, format: format, args: args}
 616 }
 617
 618 func (p parserErr) Error() string {
 619         // Lines start at 1, columns start at 0, like nature intended.
 620         line := 1
 621         col := 0
 622         for i := 0; i < p.offset; i++ {
 623                 if p.buf[i] == '\n' {
 624                         line++
 625                         col = 0
 626                 } else {
 627                         col++
 628                 }
 629         }
 630         args := []interface{}{line, col}
 631         args = append(args, p.args...)
 632         return fmt.Sprintf("line %d, col %d: "+p.format, args...)
 633 }
 634
 635 func scanEscape(c byte) (byte, bool) {
 636         escapeFlag := true
 637         switch c {
 638         case '\'', '"', '\\':
 639         case 'b':
 640                 c = '\b'
 641         case 'f':
 642                 c = '\f'
 643         case 'n':
 644                 c = '\n'
 645         case 'r':
 646                 c = '\r'
 647         case 't':
 648                 c = '\t'
 649         case 'v':
 650                 c = '\v'
 651         default:
 652                 escapeFlag = false
 653         }
 654         return c, escapeFlag
 655 }