equity/compiler/parse.go

   1 package compiler
   2
   3 import (
   4         "bytes"
   5         "encoding/hex"
   6         "fmt"
   7         "strconv"
   8         "unicode"
   9 )
  10
  11 // We have some function naming conventions.
  12 //
  13 // For terminals:
  14 //   scanX     takes buf and position, returns new position (and maybe a value)
  15 //   peekX     takes *parser, returns bool or string
  16 //   consumeX  takes *parser and maybe a required literal, maybe returns value
  17 //             also updates the parser position
  18 //
  19 // For nonterminals:
  20 //   parseX    takes *parser, returns AST node, updates parser position
  21
  22 type parser struct {
  23         buf []byte
  24         pos int
  25 }
  26
  27 func (p *parser) errorf(format string, args ...interface{}) {
  28         panic(parserErr{buf: p.buf, offset: p.pos, format: format, args: args})
  29 }
  30
  31 // parse is the main entry point to the parser
  32 func parse(buf []byte) (contracts []*Contract, err error) {
  33         defer func() {
  34                 if val := recover(); val != nil {
  35                         if e, ok := val.(parserErr); ok {
  36                                 err = e
  37                         } else {
  38                                 panic(val)
  39                         }
  40                 }
  41         }()
  42         p := &parser{buf: buf}
  43         contracts = parseContracts(p)
  44         return
  45 }
  46
  47 // parse functions
  48
  49 func parseContracts(p *parser) []*Contract {
  50         var result []*Contract
  51         if pos := scanKeyword(p.buf, p.pos, "contract"); pos < 0 {
  52                 p.errorf("expected contract")
  53         }
  54
  55         for peekKeyword(p) == "contract" {
  56                 contract := parseContract(p)
  57                 result = append(result, contract)
  58         }
  59         return result
  60 }
  61
  62 // contract name(p1, p2: t1, p3: t2) locks value { ... }
  63 func parseContract(p *parser) *Contract {
  64         consumeKeyword(p, "contract")
  65         name := consumeIdentifier(p)
  66         params := parseParams(p)
  67         // locks amount of asset
  68         consumeKeyword(p, "locks")
  69         value := ValueInfo{}
  70         value.Amount = consumeIdentifier(p)
  71         consumeKeyword(p, "of")
  72         value.Asset = consumeIdentifier(p)
  73         consumeTok(p, "{")
  74         clauses := parseClauses(p)
  75         consumeTok(p, "}")
  76         return &Contract{Name: name, Params: params, Clauses: clauses, Value: value}
  77 }
  78
  79 // (p1, p2: t1, p3: t2)
  80 func parseParams(p *parser) []*Param {
  81         var params []*Param
  82         consumeTok(p, "(")
  83         first := true
  84         for !peekTok(p, ")") {
  85                 if first {
  86                         first = false
  87                 } else {
  88                         consumeTok(p, ",")
  89                 }
  90                 pt := parseParamsType(p)
  91                 params = append(params, pt...)
  92         }
  93         consumeTok(p, ")")
  94         return params
  95 }
  96
  97 func parseClauses(p *parser) []*Clause {
  98         var clauses []*Clause
  99         for !peekTok(p, "}") {
 100                 c := parseClause(p)
 101                 clauses = append(clauses, c)
 102         }
 103         return clauses
 104 }
 105
 106 func parseParamsType(p *parser) []*Param {
 107         firstName := consumeIdentifier(p)
 108         params := []*Param{&Param{Name: firstName}}
 109         for peekTok(p, ",") {
 110                 consumeTok(p, ",")
 111                 name := consumeIdentifier(p)
 112                 params = append(params, &Param{Name: name})
 113         }
 114         consumeTok(p, ":")
 115         typ := consumeIdentifier(p)
 116         for _, parm := range params {
 117                 if tdesc, ok := types[typ]; ok {
 118                         parm.Type = tdesc
 119                 } else {
 120                         p.errorf("unknown type %s", typ)
 121                 }
 122         }
 123         return params
 124 }
 125
 126 func parseClause(p *parser) *Clause {
 127         var c Clause
 128         consumeKeyword(p, "clause")
 129         c.Name = consumeIdentifier(p)
 130         c.Params = parseParams(p)
 131         consumeTok(p, "{")
 132         c.statements = parseStatements(p)
 133         consumeTok(p, "}")
 134         return &c
 135 }
 136
 137 func parseStatements(p *parser) []statement {
 138         var statements []statement
 139         for !peekTok(p, "}") {
 140                 s := parseStatement(p)
 141                 statements = append(statements, s)
 142         }
 143         return statements
 144 }
 145
 146 func parseStatement(p *parser) statement {
 147         switch peekKeyword(p) {
 148         case "if":
 149                 return parseIfStmt(p)
 150         case "define":
 151                 return parseDefineStmt(p)
 152         case "assign":
 153                 return parseAssignStmt(p)
 154         case "verify":
 155                 return parseVerifyStmt(p)
 156         case "lock":
 157                 return parseLockStmt(p)
 158         case "unlock":
 159                 return parseUnlockStmt(p)
 160         }
 161         panic(parseErr(p.buf, p.pos, "unknown keyword \"%s\"", peekKeyword(p)))
 162 }
 163
 164 func parseIfStmt(p *parser) *ifStatement {
 165         consumeKeyword(p, "if")
 166         condition := parseExpr(p)
 167         body := &IfStatmentBody{}
 168         consumeTok(p, "{")
 169         body.trueBody = parseStatements(p)
 170         consumeTok(p, "}")
 171         if peekKeyword(p) == "else" {
 172                 consumeKeyword(p, "else")
 173                 consumeTok(p, "{")
 174                 body.falseBody = parseStatements(p)
 175                 consumeTok(p, "}")
 176         }
 177         return &ifStatement{condition: condition, body: body}
 178 }
 179
 180 func parseDefineStmt(p *parser) *defineStatement {
 181         defineStat := &defineStatement{}
 182         consumeKeyword(p, "define")
 183         param := &Param{}
 184         param.Name = consumeIdentifier(p)
 185         consumeTok(p, ":")
 186         variableType := consumeIdentifier(p)
 187         if tdesc, ok := types[variableType]; ok {
 188                 param.Type = tdesc
 189         } else {
 190                 p.errorf("unknown type %s", variableType)
 191         }
 192         defineStat.variable = param
 193         if peekTok(p, "=") {
 194                 consumeTok(p, "=")
 195                 defineStat.expr = parseExpr(p)
 196         }
 197         return defineStat
 198 }
 199
 200 func parseAssignStmt(p *parser) *assignStatement {
 201         consumeKeyword(p, "assign")
 202         varName := consumeIdentifier(p)
 203         consumeTok(p, "=")
 204         expr := parseExpr(p)
 205         return &assignStatement{variable: &Param{Name: varName}, expr: expr}
 206 }
 207
 208 func parseVerifyStmt(p *parser) *verifyStatement {
 209         consumeKeyword(p, "verify")
 210         expr := parseExpr(p)
 211         return &verifyStatement{expr: expr}
 212 }
 213
 214 func parseLockStmt(p *parser) *lockStatement {
 215         consumeKeyword(p, "lock")
 216         lockedAmount := parseExpr(p)
 217         consumeKeyword(p, "of")
 218         lockedAsset := parseExpr(p)
 219         consumeKeyword(p, "with")
 220         program := parseExpr(p)
 221         return &lockStatement{lockedAmount: lockedAmount, lockedAsset: lockedAsset, program: program}
 222 }
 223
 224 func parseUnlockStmt(p *parser) *unlockStatement {
 225         consumeKeyword(p, "unlock")
 226         unlockedAmount := parseExpr(p)
 227         consumeKeyword(p, "of")
 228         unlockedAsset := parseExpr(p)
 229         return &unlockStatement{unlockedAmount: unlockedAmount, unlockedAsset: unlockedAsset}
 230 }
 231
 232 func parseExpr(p *parser) expression {
 233         // Uses the precedence-climbing algorithm
 234         // <https://en.wikipedia.org/wiki/Operator-precedence_parser#Precedence_climbing_method>
 235         expr := parseUnaryExpr(p)
 236         expr2, pos := parseExprCont(p, expr, 0)
 237         if pos < 0 {
 238                 p.errorf("expected expression")
 239         }
 240         p.pos = pos
 241         return expr2
 242 }
 243
 244 func parseUnaryExpr(p *parser) expression {
 245         op, pos := scanUnaryOp(p.buf, p.pos)
 246         if pos < 0 {
 247                 return parseExpr2(p)
 248         }
 249         p.pos = pos
 250         expr := parseUnaryExpr(p)
 251         return &unaryExpr{op: op, expr: expr}
 252 }
 253
 254 func parseExprCont(p *parser, lhs expression, minPrecedence int) (expression, int) {
 255         for {
 256                 op, pos := scanBinaryOp(p.buf, p.pos)
 257                 if pos < 0 || op.precedence < minPrecedence {
 258                         break
 259                 }
 260                 p.pos = pos
 261
 262                 rhs := parseUnaryExpr(p)
 263
 264                 for {
 265                         op2, pos2 := scanBinaryOp(p.buf, p.pos)
 266                         if pos2 < 0 || op2.precedence <= op.precedence {
 267                                 break
 268                         }
 269                         rhs, p.pos = parseExprCont(p, rhs, op2.precedence)
 270                         if p.pos < 0 {
 271                                 return nil, -1 // or is this an error?
 272                         }
 273                 }
 274                 lhs = &binaryExpr{left: lhs, right: rhs, op: op}
 275         }
 276         return lhs, p.pos
 277 }
 278
 279 func parseExpr2(p *parser) expression {
 280         if expr, pos := scanLiteralExpr(p.buf, p.pos); pos >= 0 {
 281                 p.pos = pos
 282                 return expr
 283         }
 284         return parseExpr3(p)
 285 }
 286
 287 func parseExpr3(p *parser) expression {
 288         e := parseExpr4(p)
 289         if peekTok(p, "(") {
 290                 args := parseArgs(p)
 291                 return &callExpr{fn: e, args: args}
 292         }
 293         return e
 294 }
 295
 296 func parseExpr4(p *parser) expression {
 297         if peekTok(p, "(") {
 298                 consumeTok(p, "(")
 299                 e := parseExpr(p)
 300                 consumeTok(p, ")")
 301                 return e
 302         }
 303         if peekTok(p, "[") {
 304                 var elts []expression
 305                 consumeTok(p, "[")
 306                 first := true
 307                 for !peekTok(p, "]") {
 308                         if first {
 309                                 first = false
 310                         } else {
 311                                 consumeTok(p, ",")
 312                         }
 313                         e := parseExpr(p)
 314                         elts = append(elts, e)
 315                 }
 316                 consumeTok(p, "]")
 317                 return listExpr(elts)
 318         }
 319         name := consumeIdentifier(p)
 320         return varRef(name)
 321 }
 322
 323 func parseArgs(p *parser) []expression {
 324         var exprs []expression
 325         consumeTok(p, "(")
 326         first := true
 327         for !peekTok(p, ")") {
 328                 if first {
 329                         first = false
 330                 } else {
 331                         consumeTok(p, ",")
 332                 }
 333                 e := parseExpr(p)
 334                 exprs = append(exprs, e)
 335         }
 336         consumeTok(p, ")")
 337         return exprs
 338 }
 339
 340 // peek functions
 341
 342 func peekKeyword(p *parser) string {
 343         name, _ := scanIdentifier(p.buf, p.pos)
 344         return name
 345 }
 346
 347 func peekTok(p *parser, token string) bool {
 348         pos := scanTok(p.buf, p.pos, token)
 349         return pos >= 0
 350 }
 351
 352 // consume functions
 353
 354 var keywords = []string{
 355         "contract", "clause", "verify", "locks", "of",
 356         "lock", "with", "unlock", "if", "else",
 357         "define", "assign", "true", "false",
 358 }
 359
 360 func consumeKeyword(p *parser, keyword string) {
 361         pos := scanKeyword(p.buf, p.pos, keyword)
 362         if pos < 0 {
 363                 p.errorf("expected keyword %s", keyword)
 364         }
 365         p.pos = pos
 366 }
 367
 368 func consumeIdentifier(p *parser) string {
 369         name, pos := scanIdentifier(p.buf, p.pos)
 370         if pos < 0 {
 371                 p.errorf("expected identifier")
 372         }
 373         p.pos = pos
 374         return name
 375 }
 376
 377 func consumeTok(p *parser, token string) {
 378         pos := scanTok(p.buf, p.pos, token)
 379         if pos < 0 {
 380                 p.errorf("expected %s token", token)
 381         }
 382         p.pos = pos
 383 }
 384
 385 // scan functions
 386
 387 func scanUnaryOp(buf []byte, offset int) (*unaryOp, int) {
 388         // Maximum munch. Make sure "-3" scans as ("-3"), not ("-", "3").
 389         if _, pos := scanIntLiteral(buf, offset); pos >= 0 {
 390                 return nil, -1
 391         }
 392         for _, op := range unaryOps {
 393                 newOffset := scanTok(buf, offset, op.op)
 394                 if newOffset >= 0 {
 395                         return &op, newOffset
 396                 }
 397         }
 398         return nil, -1
 399 }
 400
 401 func scanBinaryOp(buf []byte, offset int) (*binaryOp, int) {
 402         offset = skipWsAndComments(buf, offset)
 403         var (
 404                 found     *binaryOp
 405                 newOffset = -1
 406         )
 407         for i, op := range binaryOps {
 408                 offset2 := scanTok(buf, offset, op.op)
 409                 if offset2 >= 0 {
 410                         if found == nil || len(op.op) > len(found.op) {
 411                                 found = &binaryOps[i]
 412                                 newOffset = offset2
 413                         }
 414                 }
 415         }
 416         return found, newOffset
 417 }
 418
 419 // TODO(bobg): boolean literals?
 420 func scanLiteralExpr(buf []byte, offset int) (expression, int) {
 421         offset = skipWsAndComments(buf, offset)
 422         intliteral, newOffset := scanIntLiteral(buf, offset)
 423         if newOffset >= 0 {
 424                 return intliteral, newOffset
 425         }
 426         strliteral, newOffset := scanStrLiteral(buf, offset)
 427         if newOffset >= 0 {
 428                 return strliteral, newOffset
 429         }
 430         bytesliteral, newOffset := scanBytesLiteral(buf, offset) // 0x6c249a...
 431         if newOffset >= 0 {
 432                 return bytesliteral, newOffset
 433         }
 434         booleanLiteral, newOffset := scanBoolLiteral(buf, offset) // true or false
 435         if newOffset >= 0 {
 436                 return booleanLiteral, newOffset
 437         }
 438         return nil, -1
 439 }
 440
 441 func scanIdentifier(buf []byte, offset int) (string, int) {
 442         offset = skipWsAndComments(buf, offset)
 443         i := offset
 444         for ; i < len(buf) && isIDChar(buf[i], i == offset); i++ {
 445         }
 446         if i == offset {
 447                 return "", -1
 448         }
 449         return string(buf[offset:i]), i
 450 }
 451
 452 func scanTok(buf []byte, offset int, s string) int {
 453         offset = skipWsAndComments(buf, offset)
 454         prefix := []byte(s)
 455         if bytes.HasPrefix(buf[offset:], prefix) {
 456                 return offset + len(prefix)
 457         }
 458         return -1
 459 }
 460
 461 func scanKeyword(buf []byte, offset int, keyword string) int {
 462         id, newOffset := scanIdentifier(buf, offset)
 463         if newOffset < 0 {
 464                 return -1
 465         }
 466         if id != keyword {
 467                 return -1
 468         }
 469         return newOffset
 470 }
 471
 472 func scanIntLiteral(buf []byte, offset int) (integerLiteral, int) {
 473         offset = skipWsAndComments(buf, offset)
 474         start := offset
 475         if offset < len(buf) && buf[offset] == '-' {
 476                 offset++
 477         }
 478         i := offset
 479         for ; i < len(buf) && unicode.IsDigit(rune(buf[i])); i++ {
 480                 // the literal is BytesLiteral when it starts with 0x/0X
 481                 if buf[i] == '0' && i < len(buf)-1 && (buf[i+1] == 'x' || buf[i+1] == 'X') {
 482                         return 0, -1
 483                 }
 484         }
 485         if i > offset {
 486                 n, err := strconv.ParseInt(string(buf[start:i]), 10, 64)
 487                 if err != nil {
 488                         return 0, -1
 489                 }
 490                 return integerLiteral(n), i
 491         }
 492         return 0, -1
 493 }
 494
 495 func scanStrLiteral(buf []byte, offset int) (bytesLiteral, int) {
 496         offset = skipWsAndComments(buf, offset)
 497         if offset >= len(buf) || buf[offset] != '\'' {
 498                 return bytesLiteral{}, -1
 499         }
 500         var byteBuf bytesLiteral
 501         for i := offset + 1; i < len(buf); i++ {
 502                 if buf[i] == '\'' {
 503                         return byteBuf, i + 1
 504                 }
 505                 if buf[i] == '\\' && i < len(buf)-1 {
 506                         if c, ok := scanEscape(buf[i+1]); ok {
 507                                 byteBuf = append(byteBuf, c)
 508                                 i++
 509                                 continue
 510                         }
 511                 }
 512                 byteBuf = append(byteBuf, buf[i])
 513         }
 514         panic(parseErr(buf, offset, "unterminated string literal"))
 515 }
 516
 517 func scanBytesLiteral(buf []byte, offset int) (bytesLiteral, int) {
 518         offset = skipWsAndComments(buf, offset)
 519         if offset+4 >= len(buf) {
 520                 return nil, -1
 521         }
 522         if buf[offset] != '0' || (buf[offset+1] != 'x' && buf[offset+1] != 'X') {
 523                 return nil, -1
 524         }
 525         if !isHexDigit(buf[offset+2]) || !isHexDigit(buf[offset+3]) {
 526                 return nil, -1
 527         }
 528         i := offset + 4
 529         for ; i < len(buf); i += 2 {
 530                 if i == len(buf)-1 {
 531                         panic(parseErr(buf, offset, "odd number of digits in hex literal"))
 532                 }
 533                 if !isHexDigit(buf[i]) {
 534                         break
 535                 }
 536                 if !isHexDigit(buf[i+1]) {
 537                         panic(parseErr(buf, offset, "odd number of digits in hex literal"))
 538                 }
 539         }
 540         decoded := make([]byte, hex.DecodedLen(i-(offset+2)))
 541         _, err := hex.Decode(decoded, buf[offset+2:i])
 542         if err != nil {
 543                 return bytesLiteral{}, -1
 544         }
 545         return bytesLiteral(decoded), i
 546 }
 547
 548 func scanBoolLiteral(buf []byte, offset int) (booleanLiteral, int) {
 549         offset = skipWsAndComments(buf, offset)
 550         if offset >= len(buf) {
 551                 return false, -1
 552         }
 553
 554         newOffset := scanKeyword(buf, offset, "true")
 555         if newOffset < 0 {
 556                 if newOffset = scanKeyword(buf, offset, "false"); newOffset < 0 {
 557                         return false, -1
 558                 }
 559                 return false, newOffset
 560         }
 561         return true, newOffset
 562 }
 563
 564 func skipWsAndComments(buf []byte, offset int) int {
 565         var inComment bool
 566         for ; offset < len(buf); offset++ {
 567                 c := buf[offset]
 568                 if inComment {
 569                         if c == '\n' {
 570                                 inComment = false
 571                         }
 572                 } else {
 573                         if c == '/' && offset < len(buf)-1 && buf[offset+1] == '/' {
 574                                 inComment = true
 575                                 offset++ // skip two chars instead of one
 576                         } else if !unicode.IsSpace(rune(c)) {
 577                                 break
 578                         }
 579                 }
 580         }
 581         return offset
 582 }
 583
 584 func isHexDigit(b byte) bool {
 585         return (b >= '0' && b <= '9') || (b >= 'a' && b <= 'f') || (b >= 'A' && b <= 'F')
 586 }
 587
 588 func isIDChar(c byte, initial bool) bool {
 589         if c >= 'a' && c <= 'z' {
 590                 return true
 591         }
 592         if c >= 'A' && c <= 'Z' {
 593                 return true
 594         }
 595         if c == '_' {
 596                 return true
 597         }
 598         if initial {
 599                 return false
 600         }
 601         return unicode.IsDigit(rune(c))
 602 }
 603
 604 type parserErr struct {
 605         buf    []byte
 606         offset int
 607         format string
 608         args   []interface{}
 609 }
 610
 611 func parseErr(buf []byte, offset int, format string, args ...interface{}) error {
 612         return parserErr{buf: buf, offset: offset, format: format, args: args}
 613 }
 614
 615 func (p parserErr) Error() string {
 616         // Lines start at 1, columns start at 0, like nature intended.
 617         line := 1
 618         col := 0
 619         for i := 0; i < p.offset; i++ {
 620                 if p.buf[i] == '\n' {
 621                         line++
 622                         col = 0
 623                 } else {
 624                         col++
 625                 }
 626         }
 627         args := []interface{}{line, col}
 628         args = append(args, p.args...)
 629         return fmt.Sprintf("line %d, col %d: "+p.format, args...)
 630 }
 631
 632 func scanEscape(c byte) (byte, bool) {
 633         escapeFlag := true
 634         switch c {
 635         case '\'', '"', '\\':
 636         case 'b':
 637                 c = '\b'
 638         case 'f':
 639                 c = '\f'
 640         case 'n':
 641                 c = '\n'
 642         case 'r':
 643                 c = '\r'
 644         case 't':
 645                 c = '\t'
 646         case 'v':
 647                 c = '\v'
 648         default:
 649                 escapeFlag = false
 650         }
 651         return c, escapeFlag
 652 }