From c9db589887bbf5b08504277c25fbe298f66ff051 Mon Sep 17 00:00:00 2001 From: Wouter van Oortmerssen Date: Mon, 12 Nov 2018 20:15:01 +0000 Subject: [PATCH] [WebAssembly] Added WasmAsmParser. Summary: This is to replace the ELFAsmParser that WebAssembly was using, which so far was a stub that didn't do anything, and couldn't work correctly with wasm. This new class is there to implement generic directives related to wasm as a binary format. Wasm target specific directives are still parsed in WebAssemblyAsmParser as before. The two classes now cooperate more correctly too. Also implemented .result which was missing. Any unknown directives will now result in errors. Reviewers: dschuff, sbc100 Subscribers: mgorny, jgravelle-google, eraman, aheejin, sunfish, llvm-commits Differential Revision: https://reviews.llvm.org/D54360 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@346700 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/MC/MCParser/AsmParser.cpp | 8 +- lib/MC/MCParser/CMakeLists.txt | 1 + lib/MC/MCParser/WasmAsmParser.cpp | 145 +++++++++++++++++++++ .../WebAssembly/AsmParser/WebAssemblyAsmParser.cpp | 105 +++++---------- test/CodeGen/WebAssembly/inline-asm-roundtrip.ll | 1 + test/MC/WebAssembly/basic-assembly.s | 3 + 6 files changed, 184 insertions(+), 79 deletions(-) create mode 100644 lib/MC/MCParser/WasmAsmParser.cpp diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp index 3f7b507791e..7299ed29e4b 100644 --- a/lib/MC/MCParser/AsmParser.cpp +++ b/lib/MC/MCParser/AsmParser.cpp @@ -675,6 +675,7 @@ namespace llvm { extern MCAsmParserExtension *createDarwinAsmParser(); extern MCAsmParserExtension *createELFAsmParser(); extern MCAsmParserExtension *createCOFFAsmParser(); +extern MCAsmParserExtension *createWasmAsmParser(); } // end namespace llvm @@ -705,10 +706,7 @@ AsmParser::AsmParser(SourceMgr &SM, MCContext &Ctx, MCStreamer &Out, PlatformParser.reset(createELFAsmParser()); break; case MCObjectFileInfo::IsWasm: - // TODO: WASM will need its own MCAsmParserExtension implementation, but - // for now we can re-use the ELF one, since the directives can be the - // same for now. - PlatformParser.reset(createELFAsmParser()); + PlatformParser.reset(createWasmAsmParser()); break; } @@ -3921,7 +3919,7 @@ bool AsmParser::parseDirectiveCFIStartProc() { parseToken(AsmToken::EndOfStatement)) return addErrorSuffix(" in '.cfi_startproc' directive"); } - + // TODO(kristina): Deal with a corner case of incorrect diagnostic context // being produced if this directive is emitted as part of preprocessor macro // expansion which can *ONLY* happen if Clang's cc1as is the API consumer. diff --git a/lib/MC/MCParser/CMakeLists.txt b/lib/MC/MCParser/CMakeLists.txt index 99fdd016799..0c54e8e9019 100644 --- a/lib/MC/MCParser/CMakeLists.txt +++ b/lib/MC/MCParser/CMakeLists.txt @@ -8,6 +8,7 @@ add_llvm_library(LLVMMCParser MCAsmParser.cpp MCAsmParserExtension.cpp MCTargetAsmParser.cpp + WasmAsmParser.cpp ADDITIONAL_HEADER_DIRS ${LLVM_MAIN_INCLUDE_DIR}/llvm/MC/MCParser diff --git a/lib/MC/MCParser/WasmAsmParser.cpp b/lib/MC/MCParser/WasmAsmParser.cpp new file mode 100644 index 00000000000..93bb0cb3c72 --- /dev/null +++ b/lib/MC/MCParser/WasmAsmParser.cpp @@ -0,0 +1,145 @@ +//===- WasmAsmParser.cpp - Wasm Assembly Parser -----------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +// -- +// +// Note, this is for wasm, the binary format (analogous to ELF), not wasm, +// the instruction set (analogous to x86), for which parsing code lives in +// WebAssemblyAsmParser. +// +// This file contains processing for generic directives implemented using +// MCTargetStreamer, the ones that depend on WebAssemblyTargetStreamer are in +// WebAssemblyAsmParser. +// +//===----------------------------------------------------------------------===// + +#include "llvm/BinaryFormat/Wasm.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCParser/MCAsmLexer.h" +#include "llvm/MC/MCParser/MCAsmParser.h" +#include "llvm/MC/MCParser/MCAsmParserExtension.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCSymbolWasm.h" +#include "llvm/Support/MachineValueType.h" + +using namespace llvm; + +namespace { + +class WasmAsmParser : public MCAsmParserExtension { + MCAsmParser *Parser; + MCAsmLexer *Lexer; + + template + void addDirectiveHandler(StringRef Directive) { + MCAsmParser::ExtensionDirectiveHandler Handler = std::make_pair( + this, HandleDirective); + + getParser().addDirectiveHandler(Directive, Handler); + } + +public: + WasmAsmParser() : Parser(nullptr), Lexer(nullptr) { + BracketExpressionsSupported = true; + } + + void Initialize(MCAsmParser &P) override { + Parser = &P; + Lexer = &Parser->getLexer(); + // Call the base implementation. + this->MCAsmParserExtension::Initialize(*Parser); + + addDirectiveHandler<&WasmAsmParser::parseSectionDirectiveText>(".text"); + addDirectiveHandler<&WasmAsmParser::parseSectionDirective>(".section"); + addDirectiveHandler<&WasmAsmParser::parseDirectiveSize>(".size"); + addDirectiveHandler<&WasmAsmParser::parseDirectiveType>(".type"); + } + + bool Error(const StringRef &msg, const AsmToken &tok) { + return Parser->Error(tok.getLoc(), msg + tok.getString()); + } + + bool IsNext(AsmToken::TokenKind Kind) { + auto ok = Lexer->is(Kind); + if (ok) Lex(); + return ok; + } + + bool Expect(AsmToken::TokenKind Kind, const char *KindName) { + if (!IsNext(Kind)) + return Error(std::string("Expected ") + KindName + ", instead got: ", + Lexer->getTok()); + return false; + } + + bool parseSectionDirectiveText(StringRef, SMLoc) { + // FIXME: .text currently no-op. + return false; + } + + bool parseSectionDirective(StringRef, SMLoc) { + // FIXME: .section currently no-op. + while (Lexer->isNot(AsmToken::EndOfStatement)) Parser->Lex(); + return false; + } + + // TODO: This function is almost the same as ELFAsmParser::ParseDirectiveSize + // so maybe could be shared somehow. + bool parseDirectiveSize(StringRef, SMLoc) { + StringRef Name; + if (Parser->parseIdentifier(Name)) + return TokError("expected identifier in directive"); + auto Sym = getContext().getOrCreateSymbol(Name); + if (Lexer->isNot(AsmToken::Comma)) + return TokError("unexpected token in directive"); + Lex(); + const MCExpr *Expr; + if (Parser->parseExpression(Expr)) + return true; + if (Lexer->isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in directive"); + Lex(); + // MCWasmStreamer implements this. + getStreamer().emitELFSize(Sym, Expr); + return false; + } + + bool parseDirectiveType(StringRef, SMLoc) { + // This could be the start of a function, check if followed by + // "label,@function" + if (!Lexer->is(AsmToken::Identifier)) + return Error("Expected label after .type directive, got: ", + Lexer->getTok()); + auto WasmSym = cast( + getStreamer().getContext().getOrCreateSymbol( + Lexer->getTok().getString())); + Lex(); + if (!(IsNext(AsmToken::Comma) && IsNext(AsmToken::At) && + Lexer->is(AsmToken::Identifier))) + return Error("Expected label,@type declaration, got: ", Lexer->getTok()); + auto TypeName = Lexer->getTok().getString(); + if (TypeName == "function") + WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION); + else if (TypeName == "global") + WasmSym->setType(wasm::WASM_SYMBOL_TYPE_GLOBAL); + else + return Error("Unknown WASM symbol type: ", Lexer->getTok()); + Lex(); + return Expect(AsmToken::EndOfStatement, "EOL"); + } +}; + +} // end anonymous namespace + +namespace llvm { + +MCAsmParserExtension *createWasmAsmParser() { + return new WasmAsmParser; +} + +} // end namespace llvm diff --git a/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp b/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp index efa6793cff2..703ea2d7d02 100644 --- a/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp +++ b/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp @@ -132,13 +132,12 @@ struct WebAssemblyOperand : public MCParsedAsmOperand { class WebAssemblyAsmParser final : public MCTargetAsmParser { MCAsmParser &Parser; MCAsmLexer &Lexer; - MCSymbolWasm *LastSymbol; public: WebAssemblyAsmParser(const MCSubtargetInfo &STI, MCAsmParser &Parser, const MCInstrInfo &MII, const MCTargetOptions &Options) : MCTargetAsmParser(Options, STI, MII), Parser(Parser), - Lexer(Parser.getLexer()), LastSymbol(nullptr) { + Lexer(Parser.getLexer()) { setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits())); } @@ -191,6 +190,19 @@ public: .Default({MVT::INVALID_SIMPLE_VALUE_TYPE, wasm::WASM_TYPE_NORESULT}); } + bool ParseRegTypeList(std::vector &Types) { + while (Lexer.is(AsmToken::Identifier)) { + auto RegType = ParseRegType(Lexer.getTok().getString()).first; + if (RegType == MVT::INVALID_SIMPLE_VALUE_TYPE) + return true; + Types.push_back(RegType); + Parser.Lex(); + if (!IsNext(AsmToken::Comma)) + break; + } + return Expect(AsmToken::EndOfStatement, "EOL"); + } + void ParseSingleInteger(bool IsNegative, OperandVector &Operands) { auto &Int = Lexer.getTok(); int64_t Val = Int.getIntVal(); @@ -314,10 +326,9 @@ public: return false; } - void onLabelParsed(MCSymbol *Symbol) override { - LastSymbol = cast(Symbol); - } - + // This function processes wasm-specific directives streamed to + // WebAssemblyTargetStreamer, all others go to the generic parser + // (see WasmAsmParser). bool ParseDirective(AsmToken DirectiveID) override { // This function has a really weird return value behavior that is different // from all the other parsing functions: @@ -331,44 +342,7 @@ public: reinterpret_cast(*Out.getTargetStreamer()); // TODO: any time we return an error, at least one token must have been // consumed, otherwise this will not signal an error to the caller. - if (DirectiveID.getString() == ".type") { - // This could be the start of a function, check if followed by - // "label,@function" - if (!Lexer.is(AsmToken::Identifier)) - return Error("Expected label after .type directive, got: ", - Lexer.getTok()); - auto WasmSym = cast( - TOut.getStreamer().getContext().getOrCreateSymbol( - Lexer.getTok().getString())); - Parser.Lex(); - if (!(IsNext(AsmToken::Comma) && IsNext(AsmToken::At) && - Lexer.is(AsmToken::Identifier))) - return Error("Expected label,@type declaration, got: ", Lexer.getTok()); - auto TypeName = Lexer.getTok().getString(); - if (TypeName == "function") - WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION); - else if (TypeName == "global") - WasmSym->setType(wasm::WASM_SYMBOL_TYPE_GLOBAL); - else - return Error("Unknown WASM symbol type: ", Lexer.getTok()); - Parser.Lex(); - return Expect(AsmToken::EndOfStatement, "EOL"); - } else if (DirectiveID.getString() == ".size") { - if (!Lexer.is(AsmToken::Identifier)) - return Error("Expected label after .size directive, got: ", - Lexer.getTok()); - auto WasmSym = cast( - TOut.getStreamer().getContext().getOrCreateSymbol( - Lexer.getTok().getString())); - Parser.Lex(); - if (!IsNext(AsmToken::Comma)) - return Error("Expected `,`, got: ", Lexer.getTok()); - const MCExpr *Exp; - if (Parser.parseExpression(Exp)) - return Error("Cannot parse .size expression: ", Lexer.getTok()); - WasmSym->setSize(Exp); - return Expect(AsmToken::EndOfStatement, "EOL"); - } else if (DirectiveID.getString() == ".globaltype") { + if (DirectiveID.getString() == ".globaltype") { if (!Lexer.is(AsmToken::Identifier)) return Error("Expected symbol name after .globaltype directive, got: ", Lexer.getTok()); @@ -392,40 +366,23 @@ public: // And emit the directive again. TOut.emitGlobalType(WasmSym); return Expect(AsmToken::EndOfStatement, "EOL"); - } else if (DirectiveID.getString() == ".param" || - DirectiveID.getString() == ".local") { - // Track the number of locals, needed for correct virtual register - // assignment elsewhere. - // Also output a directive to the streamer. + } else if (DirectiveID.getString() == ".param") { std::vector Params; + if (ParseRegTypeList(Params)) return true; + TOut.emitParam(nullptr /* unused */, Params); + return false; + } else if (DirectiveID.getString() == ".result") { + std::vector Results; + if (ParseRegTypeList(Results)) return true; + TOut.emitResult(nullptr /* unused */, Results); + return false; + } else if (DirectiveID.getString() == ".local") { std::vector Locals; - while (Lexer.is(AsmToken::Identifier)) { - auto RegType = ParseRegType(Lexer.getTok().getString()).first; - if (RegType == MVT::INVALID_SIMPLE_VALUE_TYPE) - return true; - if (DirectiveID.getString() == ".param") { - Params.push_back(RegType); - } else { - Locals.push_back(RegType); - } - Parser.Lex(); - if (!IsNext(AsmToken::Comma)) - break; - } - assert(LastSymbol); - // TODO: LastSymbol isn't even used by emitParam, so could be removed. - TOut.emitParam(LastSymbol, Params); + if (ParseRegTypeList(Locals)) return true; TOut.emitLocal(Locals); - return Expect(AsmToken::EndOfStatement, "EOL"); - } else { - // TODO: remove. - while (Lexer.isNot(AsmToken::EndOfStatement)) - Parser.Lex(); - return Expect(AsmToken::EndOfStatement, "EOL"); + return false; } - // TODO: current ELF directive parsing is broken, fix this is a followup. - //return true; // We didn't process this directive. - return false; + return true; // We didn't process this directive. } bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned & /*Opcode*/, diff --git a/test/CodeGen/WebAssembly/inline-asm-roundtrip.ll b/test/CodeGen/WebAssembly/inline-asm-roundtrip.ll index 7fcc3cf276a..831fd44d804 100644 --- a/test/CodeGen/WebAssembly/inline-asm-roundtrip.ll +++ b/test/CodeGen/WebAssembly/inline-asm-roundtrip.ll @@ -22,6 +22,7 @@ target triple = "wasm32-unknown-unknown" ; CHECK-LABEL: main: ; CHECK-NEXT: .param i32, i32 +; CHECK-NEXT: .result i32 ; CHECK-NEXT: .local i32 ; CHECK-NEXT: i32.const 1 ; CHECK-NEXT: set_local [[SRC:[0-9]+]] diff --git a/test/MC/WebAssembly/basic-assembly.s b/test/MC/WebAssembly/basic-assembly.s index c2b316c9243..9e09ab2e042 100644 --- a/test/MC/WebAssembly/basic-assembly.s +++ b/test/MC/WebAssembly/basic-assembly.s @@ -3,10 +3,12 @@ # RUN: llvm-mc -triple=wasm32-unknown-unknown -filetype=obj -mattr=+simd128,+nontrapping-fptoint,+exception-handling < %s .text + .section .text.main,"",@ .type test0,@function test0: # Test all types: .param i32, i64 + .result i32 .local f32, f64, v128, v128 # Explicit getlocal/setlocal: get_local 2 @@ -65,6 +67,7 @@ test0: # CHECK: .text # CHECK-LABEL: test0: # CHECK-NEXT: .param i32, i64 +# CHECK-NEXT: .result i32 # CHECK-NEXT: .local f32, f64 # CHECK-NEXT: get_local 2 # CHECK-NEXT: set_local 2 -- 2.11.0