1 //===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 #include "MCTargetDesc/X86BaseInfo.h"
11 #include "X86AsmInstrumentation.h"
12 #include "X86AsmParserCommon.h"
13 #include "X86Operand.h"
14 #include "InstPrinter/X86IntelInstPrinter.h"
15 #include "llvm/ADT/STLExtras.h"
16 #include "llvm/ADT/SmallString.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/ADT/StringSwitch.h"
19 #include "llvm/ADT/Twine.h"
20 #include "llvm/MC/MCContext.h"
21 #include "llvm/MC/MCExpr.h"
22 #include "llvm/MC/MCInst.h"
23 #include "llvm/MC/MCInstrInfo.h"
24 #include "llvm/MC/MCParser/MCAsmLexer.h"
25 #include "llvm/MC/MCParser/MCAsmParser.h"
26 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
27 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
28 #include "llvm/MC/MCRegisterInfo.h"
29 #include "llvm/MC/MCSection.h"
30 #include "llvm/MC/MCStreamer.h"
31 #include "llvm/MC/MCSubtargetInfo.h"
32 #include "llvm/MC/MCSymbol.h"
33 #include "llvm/Support/SourceMgr.h"
34 #include "llvm/Support/TargetRegistry.h"
35 #include "llvm/Support/raw_ostream.h"
41 static bool checkScale(unsigned Scale, StringRef &ErrMsg) {
42 if (Scale != 1 && Scale != 2 && Scale != 4 && Scale != 8) {
43 ErrMsg = "scale factor in address must be 1, 2, 4 or 8";
51 static const char OpPrecedence[] = {
70 class X86AsmParser : public MCTargetAsmParser {
71 const MCInstrInfo &MII;
72 ParseInstructionInfo *InstInfo;
73 std::unique_ptr<X86AsmInstrumentation> Instrumentation;
77 SMLoc consumeToken() {
78 MCAsmParser &Parser = getParser();
79 SMLoc Result = Parser.getTok().getLoc();
84 unsigned MatchInstruction(const OperandVector &Operands, MCInst &Inst,
85 uint64_t &ErrorInfo, bool matchingInlineAsm,
86 unsigned VariantID = 0) {
87 // In Code16GCC mode, match as 32-bit.
89 SwitchMode(X86::Mode32Bit);
90 unsigned rv = MatchInstructionImpl(Operands, Inst, ErrorInfo,
91 matchingInlineAsm, VariantID);
93 SwitchMode(X86::Mode16Bit);
97 enum InfixCalculatorTok {
116 enum IntelOperatorKind {
124 class InfixCalculator {
125 typedef std::pair< InfixCalculatorTok, int64_t > ICToken;
126 SmallVector<InfixCalculatorTok, 4> InfixOperatorStack;
127 SmallVector<ICToken, 4> PostfixStack;
129 bool isUnaryOperator(const InfixCalculatorTok Op) {
130 return Op == IC_NEG || Op == IC_NOT;
134 int64_t popOperand() {
135 assert (!PostfixStack.empty() && "Poped an empty stack!");
136 ICToken Op = PostfixStack.pop_back_val();
137 if (!(Op.first == IC_IMM || Op.first == IC_REGISTER))
138 return -1; // The invalid Scale value will be caught later by checkScale
141 void pushOperand(InfixCalculatorTok Op, int64_t Val = 0) {
142 assert ((Op == IC_IMM || Op == IC_REGISTER) &&
143 "Unexpected operand!");
144 PostfixStack.push_back(std::make_pair(Op, Val));
147 void popOperator() { InfixOperatorStack.pop_back(); }
148 void pushOperator(InfixCalculatorTok Op) {
149 // Push the new operator if the stack is empty.
150 if (InfixOperatorStack.empty()) {
151 InfixOperatorStack.push_back(Op);
155 // Push the new operator if it has a higher precedence than the operator
156 // on the top of the stack or the operator on the top of the stack is a
158 unsigned Idx = InfixOperatorStack.size() - 1;
159 InfixCalculatorTok StackOp = InfixOperatorStack[Idx];
160 if (OpPrecedence[Op] > OpPrecedence[StackOp] || StackOp == IC_LPAREN) {
161 InfixOperatorStack.push_back(Op);
165 // The operator on the top of the stack has higher precedence than the
167 unsigned ParenCount = 0;
169 // Nothing to process.
170 if (InfixOperatorStack.empty())
173 Idx = InfixOperatorStack.size() - 1;
174 StackOp = InfixOperatorStack[Idx];
175 if (!(OpPrecedence[StackOp] >= OpPrecedence[Op] || ParenCount))
178 // If we have an even parentheses count and we see a left parentheses,
179 // then stop processing.
180 if (!ParenCount && StackOp == IC_LPAREN)
183 if (StackOp == IC_RPAREN) {
185 InfixOperatorStack.pop_back();
186 } else if (StackOp == IC_LPAREN) {
188 InfixOperatorStack.pop_back();
190 InfixOperatorStack.pop_back();
191 PostfixStack.push_back(std::make_pair(StackOp, 0));
194 // Push the new operator.
195 InfixOperatorStack.push_back(Op);
199 // Push any remaining operators onto the postfix stack.
200 while (!InfixOperatorStack.empty()) {
201 InfixCalculatorTok StackOp = InfixOperatorStack.pop_back_val();
202 if (StackOp != IC_LPAREN && StackOp != IC_RPAREN)
203 PostfixStack.push_back(std::make_pair(StackOp, 0));
206 if (PostfixStack.empty())
209 SmallVector<ICToken, 16> OperandStack;
210 for (unsigned i = 0, e = PostfixStack.size(); i != e; ++i) {
211 ICToken Op = PostfixStack[i];
212 if (Op.first == IC_IMM || Op.first == IC_REGISTER) {
213 OperandStack.push_back(Op);
214 } else if (isUnaryOperator(Op.first)) {
215 assert (OperandStack.size() > 0 && "Too few operands.");
216 ICToken Operand = OperandStack.pop_back_val();
217 assert (Operand.first == IC_IMM &&
218 "Unary operation with a register!");
221 report_fatal_error("Unexpected operator!");
224 OperandStack.push_back(std::make_pair(IC_IMM, -Operand.second));
227 OperandStack.push_back(std::make_pair(IC_IMM, ~Operand.second));
231 assert (OperandStack.size() > 1 && "Too few operands.");
233 ICToken Op2 = OperandStack.pop_back_val();
234 ICToken Op1 = OperandStack.pop_back_val();
237 report_fatal_error("Unexpected operator!");
240 Val = Op1.second + Op2.second;
241 OperandStack.push_back(std::make_pair(IC_IMM, Val));
244 Val = Op1.second - Op2.second;
245 OperandStack.push_back(std::make_pair(IC_IMM, Val));
248 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
249 "Multiply operation with an immediate and a register!");
250 Val = Op1.second * Op2.second;
251 OperandStack.push_back(std::make_pair(IC_IMM, Val));
254 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
255 "Divide operation with an immediate and a register!");
256 assert (Op2.second != 0 && "Division by zero!");
257 Val = Op1.second / Op2.second;
258 OperandStack.push_back(std::make_pair(IC_IMM, Val));
261 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
262 "Modulo operation with an immediate and a register!");
263 Val = Op1.second % Op2.second;
264 OperandStack.push_back(std::make_pair(IC_IMM, Val));
267 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
268 "Or operation with an immediate and a register!");
269 Val = Op1.second | Op2.second;
270 OperandStack.push_back(std::make_pair(IC_IMM, Val));
273 assert(Op1.first == IC_IMM && Op2.first == IC_IMM &&
274 "Xor operation with an immediate and a register!");
275 Val = Op1.second ^ Op2.second;
276 OperandStack.push_back(std::make_pair(IC_IMM, Val));
279 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
280 "And operation with an immediate and a register!");
281 Val = Op1.second & Op2.second;
282 OperandStack.push_back(std::make_pair(IC_IMM, Val));
285 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
286 "Left shift operation with an immediate and a register!");
287 Val = Op1.second << Op2.second;
288 OperandStack.push_back(std::make_pair(IC_IMM, Val));
291 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
292 "Right shift operation with an immediate and a register!");
293 Val = Op1.second >> Op2.second;
294 OperandStack.push_back(std::make_pair(IC_IMM, Val));
299 assert (OperandStack.size() == 1 && "Expected a single result.");
300 return OperandStack.pop_back_val().second;
304 enum IntelExprState {
327 class IntelExprStateMachine {
328 IntelExprState State, PrevState;
329 unsigned BaseReg, IndexReg, TmpReg, Scale;
334 InlineAsmIdentifierInfo Info;
339 IntelExprStateMachine()
340 : State(IES_INIT), PrevState(IES_ERROR), BaseReg(0), IndexReg(0),
341 TmpReg(0), Scale(1), Imm(0), Sym(nullptr), BracCount(0),
346 void addImm(int64_t imm) { Imm += imm; }
347 short getBracCount() { return BracCount; }
348 bool isMemExpr() { return MemExpr; }
349 unsigned getBaseReg() { return BaseReg; }
350 unsigned getIndexReg() { return IndexReg; }
351 unsigned getScale() { return Scale; }
352 const MCExpr *getSym() { return Sym; }
353 StringRef getSymName() { return SymName; }
354 int64_t getImm() { return Imm + IC.execute(); }
355 bool isValidEndState() {
356 return State == IES_RBRAC || State == IES_INTEGER;
358 bool hadError() { return State == IES_ERROR; }
359 InlineAsmIdentifierInfo &getIdentifierInfo() { return Info; }
362 IntelExprState CurrState = State;
371 IC.pushOperator(IC_OR);
374 PrevState = CurrState;
377 IntelExprState CurrState = State;
386 IC.pushOperator(IC_XOR);
389 PrevState = CurrState;
392 IntelExprState CurrState = State;
401 IC.pushOperator(IC_AND);
404 PrevState = CurrState;
407 IntelExprState CurrState = State;
416 IC.pushOperator(IC_LSHIFT);
419 PrevState = CurrState;
422 IntelExprState CurrState = State;
431 IC.pushOperator(IC_RSHIFT);
434 PrevState = CurrState;
436 bool onPlus(StringRef &ErrMsg) {
437 IntelExprState CurrState = State;
446 IC.pushOperator(IC_PLUS);
447 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
448 // If we already have a BaseReg, then assume this is the IndexReg with
454 ErrMsg = "BaseReg/IndexReg already set!";
463 PrevState = CurrState;
466 bool onMinus(StringRef &ErrMsg) {
467 IntelExprState CurrState = State;
490 // push minus operator if it is not a negate operator
491 if (CurrState == IES_REGISTER || CurrState == IES_RPAREN ||
492 CurrState == IES_INTEGER || CurrState == IES_RBRAC)
493 IC.pushOperator(IC_MINUS);
494 else if (PrevState == IES_REGISTER && CurrState == IES_MULTIPLY) {
495 // We have negate operator for Scale: it's illegal
496 ErrMsg = "Scale can't be negative";
499 IC.pushOperator(IC_NEG);
500 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
501 // If we already have a BaseReg, then assume this is the IndexReg with
507 ErrMsg = "BaseReg/IndexReg already set!";
516 PrevState = CurrState;
520 IntelExprState CurrState = State;
540 IC.pushOperator(IC_NOT);
543 PrevState = CurrState;
546 bool onRegister(unsigned Reg, StringRef &ErrMsg) {
547 IntelExprState CurrState = State;
555 State = IES_REGISTER;
557 IC.pushOperand(IC_REGISTER);
560 // Index Register - Scale * Register
561 if (PrevState == IES_INTEGER) {
563 ErrMsg = "BaseReg/IndexReg already set!";
566 State = IES_REGISTER;
568 // Get the scale and replace the 'Scale * Register' with '0'.
569 Scale = IC.popOperand();
570 if (checkScale(Scale, ErrMsg))
572 IC.pushOperand(IC_IMM);
579 PrevState = CurrState;
582 bool onIdentifierExpr(const MCExpr *SymRef, StringRef SymRefName,
585 bool HasSymbol = Sym != nullptr;
595 MemExpr = !(SymRef->getKind() == MCExpr::Constant);
598 SymName = SymRefName;
599 IC.pushOperand(IC_IMM);
603 ErrMsg = "cannot use more than one symbol in memory operand";
606 bool onInteger(int64_t TmpInt, StringRef &ErrMsg) {
607 IntelExprState CurrState = State;
627 if (PrevState == IES_REGISTER && CurrState == IES_MULTIPLY) {
628 // Index Register - Register * Scale
630 ErrMsg = "BaseReg/IndexReg already set!";
635 if (checkScale(Scale, ErrMsg))
637 // Get the scale and replace the 'Register * Scale' with '0'.
640 IC.pushOperand(IC_IMM, TmpInt);
644 PrevState = CurrState;
656 State = IES_MULTIPLY;
657 IC.pushOperator(IC_MULTIPLY);
670 IC.pushOperator(IC_DIVIDE);
683 IC.pushOperator(IC_MOD);
699 IC.pushOperator(IC_PLUS);
702 assert(!BracCount && "BracCount should be zero on parsing's start");
711 IntelExprState CurrState = State;
719 if (BracCount-- != 1)
722 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
723 // If we already have a BaseReg, then assume this is the IndexReg with
728 assert (!IndexReg && "BaseReg/IndexReg already set!");
735 PrevState = CurrState;
739 IntelExprState CurrState = State;
759 IC.pushOperator(IC_LPAREN);
762 PrevState = CurrState;
774 IC.pushOperator(IC_RPAREN);
780 bool Error(SMLoc L, const Twine &Msg, SMRange Range = None,
781 bool MatchingInlineAsm = false) {
782 MCAsmParser &Parser = getParser();
783 if (MatchingInlineAsm) {
784 if (!getLexer().isAtStartOfStatement())
785 Parser.eatToEndOfStatement();
788 return Parser.Error(L, Msg, Range);
791 std::nullptr_t ErrorOperand(SMLoc Loc, StringRef Msg) {
796 std::unique_ptr<X86Operand> DefaultMemSIOperand(SMLoc Loc);
797 std::unique_ptr<X86Operand> DefaultMemDIOperand(SMLoc Loc);
798 bool IsSIReg(unsigned Reg);
799 unsigned GetSIDIForRegClass(unsigned RegClassID, unsigned Reg, bool IsSIReg);
801 AddDefaultSrcDestOperands(OperandVector &Operands,
802 std::unique_ptr<llvm::MCParsedAsmOperand> &&Src,
803 std::unique_ptr<llvm::MCParsedAsmOperand> &&Dst);
804 bool VerifyAndAdjustOperands(OperandVector &OrigOperands,
805 OperandVector &FinalOperands);
806 std::unique_ptr<X86Operand> ParseOperand();
807 std::unique_ptr<X86Operand> ParseATTOperand();
808 std::unique_ptr<X86Operand> ParseIntelOperand();
809 std::unique_ptr<X86Operand> ParseIntelOffsetOfOperator();
810 bool ParseIntelDotOperator(IntelExprStateMachine &SM, SMLoc &End);
811 unsigned IdentifyIntelInlineAsmOperator(StringRef Name);
812 unsigned ParseIntelInlineAsmOperator(unsigned OpKind);
813 std::unique_ptr<X86Operand> ParseRoundingModeOp(SMLoc Start, SMLoc End);
814 bool ParseIntelNamedOperator(StringRef Name, IntelExprStateMachine &SM);
815 void RewriteIntelExpression(IntelExprStateMachine &SM, SMLoc Start,
817 bool ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End);
818 bool ParseIntelInlineAsmIdentifier(const MCExpr *&Val, StringRef &Identifier,
819 InlineAsmIdentifierInfo &Info,
820 bool IsUnevaluatedOperand, SMLoc &End);
822 std::unique_ptr<X86Operand> ParseMemOperand(unsigned SegReg, SMLoc StartLoc);
824 bool ParseIntelMemoryOperandSize(unsigned &Size);
825 std::unique_ptr<X86Operand>
826 CreateMemForInlineAsm(unsigned SegReg, const MCExpr *Disp, unsigned BaseReg,
827 unsigned IndexReg, unsigned Scale, SMLoc Start,
828 SMLoc End, unsigned Size, StringRef Identifier,
829 const InlineAsmIdentifierInfo &Info);
831 bool parseDirectiveEven(SMLoc L);
832 bool ParseDirectiveWord(unsigned Size, SMLoc L);
833 bool ParseDirectiveCode(StringRef IDVal, SMLoc L);
835 bool processInstruction(MCInst &Inst, const OperandVector &Ops);
837 /// Wrapper around MCStreamer::EmitInstruction(). Possibly adds
838 /// instrumentation around Inst.
839 void EmitInstruction(MCInst &Inst, OperandVector &Operands, MCStreamer &Out);
841 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
842 OperandVector &Operands, MCStreamer &Out,
844 bool MatchingInlineAsm) override;
846 void MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op, OperandVector &Operands,
847 MCStreamer &Out, bool MatchingInlineAsm);
849 bool ErrorMissingFeature(SMLoc IDLoc, uint64_t ErrorInfo,
850 bool MatchingInlineAsm);
852 bool MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
853 OperandVector &Operands, MCStreamer &Out,
855 bool MatchingInlineAsm);
857 bool MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode,
858 OperandVector &Operands, MCStreamer &Out,
860 bool MatchingInlineAsm);
862 bool OmitRegisterFromClobberLists(unsigned RegNo) override;
864 /// Parses AVX512 specific operand primitives: masked registers ({%k<NUM>}, {z})
865 /// and memory broadcasting ({1to<NUM>}) primitives, updating Operands vector if required.
866 /// return false if no parsing errors occurred, true otherwise.
867 bool HandleAVX512Operand(OperandVector &Operands,
868 const MCParsedAsmOperand &Op);
870 bool ParseZ(std::unique_ptr<X86Operand> &Z, const SMLoc &StartLoc);
872 bool is64BitMode() const {
873 // FIXME: Can tablegen auto-generate this?
874 return getSTI().getFeatureBits()[X86::Mode64Bit];
876 bool is32BitMode() const {
877 // FIXME: Can tablegen auto-generate this?
878 return getSTI().getFeatureBits()[X86::Mode32Bit];
880 bool is16BitMode() const {
881 // FIXME: Can tablegen auto-generate this?
882 return getSTI().getFeatureBits()[X86::Mode16Bit];
884 void SwitchMode(unsigned mode) {
885 MCSubtargetInfo &STI = copySTI();
886 FeatureBitset AllModes({X86::Mode64Bit, X86::Mode32Bit, X86::Mode16Bit});
887 FeatureBitset OldMode = STI.getFeatureBits() & AllModes;
888 unsigned FB = ComputeAvailableFeatures(
889 STI.ToggleFeature(OldMode.flip(mode)));
890 setAvailableFeatures(FB);
892 assert(FeatureBitset({mode}) == (STI.getFeatureBits() & AllModes));
895 unsigned getPointerWidth() {
896 if (is16BitMode()) return 16;
897 if (is32BitMode()) return 32;
898 if (is64BitMode()) return 64;
899 llvm_unreachable("invalid mode");
902 bool isParsingIntelSyntax() {
903 return getParser().getAssemblerDialect();
906 /// @name Auto-generated Matcher Functions
909 #define GET_ASSEMBLER_HEADER
910 #include "X86GenAsmMatcher.inc"
916 X86AsmParser(const MCSubtargetInfo &sti, MCAsmParser &Parser,
917 const MCInstrInfo &mii, const MCTargetOptions &Options)
918 : MCTargetAsmParser(Options, sti), MII(mii), InstInfo(nullptr),
921 // Initialize the set of available features.
922 setAvailableFeatures(ComputeAvailableFeatures(getSTI().getFeatureBits()));
923 Instrumentation.reset(
924 CreateX86AsmInstrumentation(Options, Parser.getContext(), STI));
927 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
929 void SetFrameRegister(unsigned RegNo) override;
931 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
932 SMLoc NameLoc, OperandVector &Operands) override;
934 bool ParseDirective(AsmToken DirectiveID) override;
936 } // end anonymous namespace
938 /// @name Auto-generated Match Functions
941 static unsigned MatchRegisterName(StringRef Name);
945 static bool CheckBaseRegAndIndexRegAndScale(unsigned BaseReg, unsigned IndexReg,
946 unsigned Scale, StringRef &ErrMsg) {
947 // If we have both a base register and an index register make sure they are
948 // both 64-bit or 32-bit registers.
949 // To support VSIB, IndexReg can be 128-bit or 256-bit registers.
951 if ((BaseReg == X86::RIP && IndexReg != 0) || (IndexReg == X86::RIP)) {
952 ErrMsg = "invalid base+index expression";
955 if (BaseReg != 0 && IndexReg != 0) {
956 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) &&
957 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
958 X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg)) &&
959 IndexReg != X86::RIZ) {
960 ErrMsg = "base register is 64-bit, but index register is not";
963 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) &&
964 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
965 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) &&
966 IndexReg != X86::EIZ){
967 ErrMsg = "base register is 32-bit, but index register is not";
970 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg)) {
971 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) ||
972 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) {
973 ErrMsg = "base register is 16-bit, but index register is not";
976 if (((BaseReg == X86::BX || BaseReg == X86::BP) &&
977 IndexReg != X86::SI && IndexReg != X86::DI) ||
978 ((BaseReg == X86::SI || BaseReg == X86::DI) &&
979 IndexReg != X86::BX && IndexReg != X86::BP)) {
980 ErrMsg = "invalid 16-bit base/index register combination";
985 return checkScale(Scale, ErrMsg);
988 bool X86AsmParser::ParseRegister(unsigned &RegNo,
989 SMLoc &StartLoc, SMLoc &EndLoc) {
990 MCAsmParser &Parser = getParser();
992 const AsmToken &PercentTok = Parser.getTok();
993 StartLoc = PercentTok.getLoc();
995 // If we encounter a %, ignore it. This code handles registers with and
996 // without the prefix, unprefixed registers can occur in cfi directives.
997 if (!isParsingIntelSyntax() && PercentTok.is(AsmToken::Percent))
998 Parser.Lex(); // Eat percent token.
1000 const AsmToken &Tok = Parser.getTok();
1001 EndLoc = Tok.getEndLoc();
1003 if (Tok.isNot(AsmToken::Identifier)) {
1004 if (isParsingIntelSyntax()) return true;
1005 return Error(StartLoc, "invalid register name",
1006 SMRange(StartLoc, EndLoc));
1009 RegNo = MatchRegisterName(Tok.getString());
1011 // If the match failed, try the register name as lowercase.
1013 RegNo = MatchRegisterName(Tok.getString().lower());
1015 // The "flags" register cannot be referenced directly.
1016 // Treat it as an identifier instead.
1017 if (isParsingInlineAsm() && isParsingIntelSyntax() && RegNo == X86::EFLAGS)
1020 if (!is64BitMode()) {
1021 // FIXME: This should be done using Requires<Not64BitMode> and
1022 // Requires<In64BitMode> so "eiz" usage in 64-bit instructions can be also
1024 // FIXME: Check AH, CH, DH, BH cannot be used in an instruction requiring a
1026 if (RegNo == X86::RIZ ||
1027 X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo) ||
1028 X86II::isX86_64NonExtLowByteReg(RegNo) ||
1029 X86II::isX86_64ExtendedReg(RegNo))
1030 return Error(StartLoc, "register %"
1031 + Tok.getString() + " is only available in 64-bit mode",
1032 SMRange(StartLoc, EndLoc));
1033 } else if (!getSTI().getFeatureBits()[X86::FeatureAVX512]) {
1034 if (X86II::is32ExtendedReg(RegNo))
1035 return Error(StartLoc, "register %"
1036 + Tok.getString() + " is only available with AVX512",
1037 SMRange(StartLoc, EndLoc));
1040 // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens.
1041 if (RegNo == 0 && (Tok.getString() == "st" || Tok.getString() == "ST")) {
1043 Parser.Lex(); // Eat 'st'
1045 // Check to see if we have '(4)' after %st.
1046 if (getLexer().isNot(AsmToken::LParen))
1051 const AsmToken &IntTok = Parser.getTok();
1052 if (IntTok.isNot(AsmToken::Integer))
1053 return Error(IntTok.getLoc(), "expected stack index");
1054 switch (IntTok.getIntVal()) {
1055 case 0: RegNo = X86::ST0; break;
1056 case 1: RegNo = X86::ST1; break;
1057 case 2: RegNo = X86::ST2; break;
1058 case 3: RegNo = X86::ST3; break;
1059 case 4: RegNo = X86::ST4; break;
1060 case 5: RegNo = X86::ST5; break;
1061 case 6: RegNo = X86::ST6; break;
1062 case 7: RegNo = X86::ST7; break;
1063 default: return Error(IntTok.getLoc(), "invalid stack index");
1066 if (getParser().Lex().isNot(AsmToken::RParen))
1067 return Error(Parser.getTok().getLoc(), "expected ')'");
1069 EndLoc = Parser.getTok().getEndLoc();
1070 Parser.Lex(); // Eat ')'
1074 EndLoc = Parser.getTok().getEndLoc();
1076 // If this is "db[0-7]", match it as an alias
1078 if (RegNo == 0 && Tok.getString().size() == 3 &&
1079 Tok.getString().startswith("db")) {
1080 switch (Tok.getString()[2]) {
1081 case '0': RegNo = X86::DR0; break;
1082 case '1': RegNo = X86::DR1; break;
1083 case '2': RegNo = X86::DR2; break;
1084 case '3': RegNo = X86::DR3; break;
1085 case '4': RegNo = X86::DR4; break;
1086 case '5': RegNo = X86::DR5; break;
1087 case '6': RegNo = X86::DR6; break;
1088 case '7': RegNo = X86::DR7; break;
1092 EndLoc = Parser.getTok().getEndLoc();
1093 Parser.Lex(); // Eat it.
1099 if (isParsingIntelSyntax()) return true;
1100 return Error(StartLoc, "invalid register name",
1101 SMRange(StartLoc, EndLoc));
1104 Parser.Lex(); // Eat identifier token.
1108 void X86AsmParser::SetFrameRegister(unsigned RegNo) {
1109 Instrumentation->SetInitialFrameRegister(RegNo);
1112 std::unique_ptr<X86Operand> X86AsmParser::DefaultMemSIOperand(SMLoc Loc) {
1113 bool Parse32 = is32BitMode() || Code16GCC;
1114 unsigned Basereg = is64BitMode() ? X86::RSI : (Parse32 ? X86::ESI : X86::SI);
1115 const MCExpr *Disp = MCConstantExpr::create(0, getContext());
1116 return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp,
1117 /*BaseReg=*/Basereg, /*IndexReg=*/0, /*Scale=*/1,
1121 std::unique_ptr<X86Operand> X86AsmParser::DefaultMemDIOperand(SMLoc Loc) {
1122 bool Parse32 = is32BitMode() || Code16GCC;
1123 unsigned Basereg = is64BitMode() ? X86::RDI : (Parse32 ? X86::EDI : X86::DI);
1124 const MCExpr *Disp = MCConstantExpr::create(0, getContext());
1125 return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp,
1126 /*BaseReg=*/Basereg, /*IndexReg=*/0, /*Scale=*/1,
1130 bool X86AsmParser::IsSIReg(unsigned Reg) {
1132 default: llvm_unreachable("Only (R|E)SI and (R|E)DI are expected!");
1144 unsigned X86AsmParser::GetSIDIForRegClass(unsigned RegClassID, unsigned Reg,
1146 switch (RegClassID) {
1147 default: llvm_unreachable("Unexpected register class");
1148 case X86::GR64RegClassID:
1149 return IsSIReg ? X86::RSI : X86::RDI;
1150 case X86::GR32RegClassID:
1151 return IsSIReg ? X86::ESI : X86::EDI;
1152 case X86::GR16RegClassID:
1153 return IsSIReg ? X86::SI : X86::DI;
1157 void X86AsmParser::AddDefaultSrcDestOperands(
1158 OperandVector& Operands, std::unique_ptr<llvm::MCParsedAsmOperand> &&Src,
1159 std::unique_ptr<llvm::MCParsedAsmOperand> &&Dst) {
1160 if (isParsingIntelSyntax()) {
1161 Operands.push_back(std::move(Dst));
1162 Operands.push_back(std::move(Src));
1165 Operands.push_back(std::move(Src));
1166 Operands.push_back(std::move(Dst));
1170 bool X86AsmParser::VerifyAndAdjustOperands(OperandVector &OrigOperands,
1171 OperandVector &FinalOperands) {
1173 if (OrigOperands.size() > 1) {
1174 // Check if sizes match, OrigOperands also contains the instruction name
1175 assert(OrigOperands.size() == FinalOperands.size() + 1 &&
1176 "Operand size mismatch");
1178 SmallVector<std::pair<SMLoc, std::string>, 2> Warnings;
1179 // Verify types match
1180 int RegClassID = -1;
1181 for (unsigned int i = 0; i < FinalOperands.size(); ++i) {
1182 X86Operand &OrigOp = static_cast<X86Operand &>(*OrigOperands[i + 1]);
1183 X86Operand &FinalOp = static_cast<X86Operand &>(*FinalOperands[i]);
1185 if (FinalOp.isReg() &&
1186 (!OrigOp.isReg() || FinalOp.getReg() != OrigOp.getReg()))
1187 // Return false and let a normal complaint about bogus operands happen
1190 if (FinalOp.isMem()) {
1192 if (!OrigOp.isMem())
1193 // Return false and let a normal complaint about bogus operands happen
1196 unsigned OrigReg = OrigOp.Mem.BaseReg;
1197 unsigned FinalReg = FinalOp.Mem.BaseReg;
1199 // If we've already encounterd a register class, make sure all register
1200 // bases are of the same register class
1201 if (RegClassID != -1 &&
1202 !X86MCRegisterClasses[RegClassID].contains(OrigReg)) {
1203 return Error(OrigOp.getStartLoc(),
1204 "mismatching source and destination index registers");
1207 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(OrigReg))
1208 RegClassID = X86::GR64RegClassID;
1209 else if (X86MCRegisterClasses[X86::GR32RegClassID].contains(OrigReg))
1210 RegClassID = X86::GR32RegClassID;
1211 else if (X86MCRegisterClasses[X86::GR16RegClassID].contains(OrigReg))
1212 RegClassID = X86::GR16RegClassID;
1214 // Unexpected register class type
1215 // Return false and let a normal complaint about bogus operands happen
1218 bool IsSI = IsSIReg(FinalReg);
1219 FinalReg = GetSIDIForRegClass(RegClassID, FinalReg, IsSI);
1221 if (FinalReg != OrigReg) {
1222 std::string RegName = IsSI ? "ES:(R|E)SI" : "ES:(R|E)DI";
1223 Warnings.push_back(std::make_pair(
1224 OrigOp.getStartLoc(),
1225 "memory operand is only for determining the size, " + RegName +
1226 " will be used for the location"));
1229 FinalOp.Mem.Size = OrigOp.Mem.Size;
1230 FinalOp.Mem.SegReg = OrigOp.Mem.SegReg;
1231 FinalOp.Mem.BaseReg = FinalReg;
1235 // Produce warnings only if all the operands passed the adjustment - prevent
1236 // legal cases like "movsd (%rax), %xmm0" mistakenly produce warnings
1237 for (auto &WarningMsg : Warnings) {
1238 Warning(WarningMsg.first, WarningMsg.second);
1241 // Remove old operands
1242 for (unsigned int i = 0; i < FinalOperands.size(); ++i)
1243 OrigOperands.pop_back();
1245 // OrigOperands.append(FinalOperands.begin(), FinalOperands.end());
1246 for (unsigned int i = 0; i < FinalOperands.size(); ++i)
1247 OrigOperands.push_back(std::move(FinalOperands[i]));
1252 std::unique_ptr<X86Operand> X86AsmParser::ParseOperand() {
1253 if (isParsingIntelSyntax())
1254 return ParseIntelOperand();
1255 return ParseATTOperand();
1258 std::unique_ptr<X86Operand> X86AsmParser::CreateMemForInlineAsm(
1259 unsigned SegReg, const MCExpr *Disp, unsigned BaseReg, unsigned IndexReg,
1260 unsigned Scale, SMLoc Start, SMLoc End, unsigned Size, StringRef Identifier,
1261 const InlineAsmIdentifierInfo &Info) {
1262 // If we found a decl other than a VarDecl, then assume it is a FuncDecl or
1263 // some other label reference.
1264 if (isa<MCSymbolRefExpr>(Disp) && Info.OpDecl && !Info.IsVarDecl) {
1265 // Insert an explicit size if the user didn't have one.
1267 Size = getPointerWidth();
1268 InstInfo->AsmRewrites->emplace_back(AOK_SizeDirective, Start,
1272 // Create an absolute memory reference in order to match against
1273 // instructions taking a PC relative operand.
1274 return X86Operand::CreateMem(getPointerWidth(), Disp, Start, End, Size,
1275 Identifier, Info.OpDecl);
1279 // We either have a direct symbol reference, or an offset from a symbol. The
1280 // parser always puts the symbol on the LHS, so look there for size
1281 // calculation purposes.
1282 unsigned FrontendSize = 0;
1283 const MCBinaryExpr *BinOp = dyn_cast<MCBinaryExpr>(Disp);
1285 isa<MCSymbolRefExpr>(BinOp ? BinOp->getLHS() : Disp);
1286 if (IsSymRef && !Size && Info.Type)
1287 FrontendSize = Info.Type * 8; // Size is in terms of bits in this context.
1289 // When parsing inline assembly we set the base register to a non-zero value
1290 // if we don't know the actual value at this time. This is necessary to
1291 // get the matching correct in some cases.
1292 BaseReg = BaseReg ? BaseReg : 1;
1293 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, BaseReg,
1294 IndexReg, Scale, Start, End, Size, Identifier,
1295 Info.OpDecl, FrontendSize);
1298 // Some binary bitwise operators have a named synonymous
1299 // Query a candidate string for being such a named operator
1300 // and if so - invoke the appropriate handler
1301 bool X86AsmParser::ParseIntelNamedOperator(StringRef Name, IntelExprStateMachine &SM) {
1302 // A named operator should be either lower or upper case, but not a mix
1303 if (Name.compare(Name.lower()) && Name.compare(Name.upper()))
1305 if (Name.equals_lower("not"))
1307 else if (Name.equals_lower("or"))
1309 else if (Name.equals_lower("shl"))
1311 else if (Name.equals_lower("shr"))
1313 else if (Name.equals_lower("xor"))
1315 else if (Name.equals_lower("and"))
1317 else if (Name.equals_lower("mod"))
1324 bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
1325 MCAsmParser &Parser = getParser();
1326 const AsmToken &Tok = Parser.getTok();
1329 AsmToken::TokenKind PrevTK = AsmToken::Error;
1332 bool UpdateLocLex = true;
1333 AsmToken::TokenKind TK = getLexer().getKind();
1337 if ((Done = SM.isValidEndState()))
1339 return Error(Tok.getLoc(), "unknown token in expression");
1340 case AsmToken::EndOfStatement:
1343 case AsmToken::Real:
1344 // DotOperator: [ebx].0
1345 UpdateLocLex = false;
1346 if (ParseIntelDotOperator(SM, End))
1349 case AsmToken::String:
1350 case AsmToken::Identifier: {
1351 // This could be a register or a symbolic displacement.
1354 SMLoc IdentLoc = Tok.getLoc();
1355 StringRef Identifier = Tok.getString();
1356 UpdateLocLex = false;
1357 if (TK != AsmToken::String && !ParseRegister(TmpReg, IdentLoc, End)) {
1358 if (SM.onRegister(TmpReg, ErrMsg))
1359 return Error(Tok.getLoc(), ErrMsg);
1360 } else if (ParseIntelNamedOperator(Identifier, SM)) {
1361 UpdateLocLex = true;
1362 } else if (!isParsingInlineAsm()) {
1363 if (getParser().parsePrimaryExpr(Val, End))
1364 return Error(Tok.getLoc(), "Unexpected identifier!");
1365 if (auto *CE = dyn_cast<MCConstantExpr>(Val)) {
1366 if (SM.onInteger(CE->getValue(), ErrMsg))
1367 return Error(IdentLoc, ErrMsg);
1368 } else if (SM.onIdentifierExpr(Val, Identifier, ErrMsg))
1369 return Error(IdentLoc, ErrMsg);
1370 } else if (unsigned OpKind = IdentifyIntelInlineAsmOperator(Identifier)) {
1371 if (OpKind == IOK_OFFSET)
1372 return Error(IdentLoc, "Dealing OFFSET operator as part of"
1373 "a compound immediate expression is yet to be supported");
1374 int64_t Val = ParseIntelInlineAsmOperator(OpKind);
1377 if (SM.onInteger(Val, ErrMsg))
1378 return Error(IdentLoc, ErrMsg);
1379 } else if (Identifier.count('.') && PrevTK == AsmToken::RBrac) {
1380 if (ParseIntelDotOperator(SM, End))
1382 } else if (ParseIntelInlineAsmIdentifier(Val, Identifier,
1383 SM.getIdentifierInfo(),
1384 /*Unevaluated=*/false, End)) {
1386 } else if (SM.onIdentifierExpr(Val, Identifier, ErrMsg)) {
1387 return Error(IdentLoc, ErrMsg);
1391 case AsmToken::Integer: {
1392 // Look for 'b' or 'f' following an Integer as a directional label
1393 SMLoc Loc = getTok().getLoc();
1394 int64_t IntVal = getTok().getIntVal();
1395 End = consumeToken();
1396 UpdateLocLex = false;
1397 if (getLexer().getKind() == AsmToken::Identifier) {
1398 StringRef IDVal = getTok().getString();
1399 if (IDVal == "f" || IDVal == "b") {
1401 getContext().getDirectionalLocalSymbol(IntVal, IDVal == "b");
1402 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
1404 MCSymbolRefExpr::create(Sym, Variant, getContext());
1405 if (IDVal == "b" && Sym->isUndefined())
1406 return Error(Loc, "invalid reference to undefined symbol");
1407 StringRef Identifier = Sym->getName();
1408 if (SM.onIdentifierExpr(Val, Identifier, ErrMsg))
1409 return Error(Loc, ErrMsg);
1410 End = consumeToken();
1412 if (SM.onInteger(IntVal, ErrMsg))
1413 return Error(Loc, ErrMsg);
1416 if (SM.onInteger(IntVal, ErrMsg))
1417 return Error(Loc, ErrMsg);
1421 case AsmToken::Plus:
1422 if (SM.onPlus(ErrMsg))
1423 return Error(getTok().getLoc(), ErrMsg);
1425 case AsmToken::Minus:
1426 if (SM.onMinus(ErrMsg))
1427 return Error(getTok().getLoc(), ErrMsg);
1429 case AsmToken::Tilde: SM.onNot(); break;
1430 case AsmToken::Star: SM.onStar(); break;
1431 case AsmToken::Slash: SM.onDivide(); break;
1432 case AsmToken::Pipe: SM.onOr(); break;
1433 case AsmToken::Caret: SM.onXor(); break;
1434 case AsmToken::Amp: SM.onAnd(); break;
1435 case AsmToken::LessLess:
1436 SM.onLShift(); break;
1437 case AsmToken::GreaterGreater:
1438 SM.onRShift(); break;
1439 case AsmToken::LBrac:
1441 return Error(Tok.getLoc(), "unexpected bracket encountered");
1443 case AsmToken::RBrac:
1445 return Error(Tok.getLoc(), "unexpected bracket encountered");
1447 case AsmToken::LParen: SM.onLParen(); break;
1448 case AsmToken::RParen: SM.onRParen(); break;
1451 return Error(Tok.getLoc(), "unknown token in expression");
1453 if (!Done && UpdateLocLex)
1454 End = consumeToken();
1461 void X86AsmParser::RewriteIntelExpression(IntelExprStateMachine &SM,
1462 SMLoc Start, SMLoc End) {
1464 unsigned ExprLen = End.getPointer() - Start.getPointer();
1465 // Skip everything before a symbol displacement (if we have one)
1467 StringRef SymName = SM.getSymName();
1468 if (unsigned Len = SymName.data() - Start.getPointer())
1469 InstInfo->AsmRewrites->emplace_back(AOK_Skip, Start, Len);
1470 Loc = SMLoc::getFromPointer(SymName.data() + SymName.size());
1471 ExprLen = End.getPointer() - (SymName.data() + SymName.size());
1472 // If we have only a symbol than there's no need for complex rewrite,
1473 // simply skip everything after it
1474 if (!(SM.getBaseReg() || SM.getIndexReg() || SM.getImm())) {
1476 InstInfo->AsmRewrites->emplace_back(AOK_Skip, Loc, ExprLen);
1480 // Build an Intel Expression rewrite
1481 StringRef BaseRegStr;
1482 StringRef IndexRegStr;
1483 if (SM.getBaseReg())
1484 BaseRegStr = X86IntelInstPrinter::getRegisterName(SM.getBaseReg());
1485 if (SM.getIndexReg())
1486 IndexRegStr = X86IntelInstPrinter::getRegisterName(SM.getIndexReg());
1488 IntelExpr Expr(BaseRegStr, IndexRegStr, SM.getScale(), SM.getImm(), SM.isMemExpr());
1489 InstInfo->AsmRewrites->emplace_back(Loc, ExprLen, Expr);
1492 // Inline assembly may use variable names with namespace alias qualifiers.
1493 bool X86AsmParser::ParseIntelInlineAsmIdentifier(const MCExpr *&Val,
1494 StringRef &Identifier,
1495 InlineAsmIdentifierInfo &Info,
1496 bool IsUnevaluatedOperand,
1498 MCAsmParser &Parser = getParser();
1499 assert(isParsingInlineAsm() && "Expected to be parsing inline assembly.");
1502 StringRef LineBuf(Identifier.data());
1504 SemaCallback->LookupInlineAsmIdentifier(LineBuf, Info, IsUnevaluatedOperand);
1506 const AsmToken &Tok = Parser.getTok();
1507 SMLoc Loc = Tok.getLoc();
1509 // Advance the token stream until the end of the current token is
1510 // after the end of what the frontend claimed.
1511 const char *EndPtr = Tok.getLoc().getPointer() + LineBuf.size();
1513 End = Tok.getEndLoc();
1515 } while (End.getPointer() < EndPtr);
1516 Identifier = LineBuf;
1518 // The frontend should end parsing on an assembler token boundary, unless it
1520 assert((End.getPointer() == EndPtr || !Result) &&
1521 "frontend claimed part of a token?");
1523 // If the identifier lookup was unsuccessful, assume that we are dealing with
1526 StringRef InternalName =
1527 SemaCallback->LookupInlineAsmLabel(Identifier, getSourceManager(),
1529 assert(InternalName.size() && "We should have an internal name here.");
1530 // Push a rewrite for replacing the identifier name with the internal name.
1531 InstInfo->AsmRewrites->emplace_back(AOK_Label, Loc, Identifier.size(),
1535 // Create the symbol reference.
1536 MCSymbol *Sym = getContext().getOrCreateSymbol(Identifier);
1537 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
1538 Val = MCSymbolRefExpr::create(Sym, Variant, getParser().getContext());
1542 //ParseRoundingModeOp - Parse AVX-512 rounding mode operand
1543 std::unique_ptr<X86Operand>
1544 X86AsmParser::ParseRoundingModeOp(SMLoc Start, SMLoc End) {
1545 MCAsmParser &Parser = getParser();
1546 const AsmToken &Tok = Parser.getTok();
1547 // Eat "{" and mark the current place.
1548 const SMLoc consumedToken = consumeToken();
1549 if (Tok.getIdentifier().startswith("r")){
1550 int rndMode = StringSwitch<int>(Tok.getIdentifier())
1551 .Case("rn", X86::STATIC_ROUNDING::TO_NEAREST_INT)
1552 .Case("rd", X86::STATIC_ROUNDING::TO_NEG_INF)
1553 .Case("ru", X86::STATIC_ROUNDING::TO_POS_INF)
1554 .Case("rz", X86::STATIC_ROUNDING::TO_ZERO)
1557 return ErrorOperand(Tok.getLoc(), "Invalid rounding mode.");
1558 Parser.Lex(); // Eat "r*" of r*-sae
1559 if (!getLexer().is(AsmToken::Minus))
1560 return ErrorOperand(Tok.getLoc(), "Expected - at this point");
1561 Parser.Lex(); // Eat "-"
1562 Parser.Lex(); // Eat the sae
1563 if (!getLexer().is(AsmToken::RCurly))
1564 return ErrorOperand(Tok.getLoc(), "Expected } at this point");
1565 Parser.Lex(); // Eat "}"
1566 const MCExpr *RndModeOp =
1567 MCConstantExpr::create(rndMode, Parser.getContext());
1568 return X86Operand::CreateImm(RndModeOp, Start, End);
1570 if(Tok.getIdentifier().equals("sae")){
1571 Parser.Lex(); // Eat the sae
1572 if (!getLexer().is(AsmToken::RCurly))
1573 return ErrorOperand(Tok.getLoc(), "Expected } at this point");
1574 Parser.Lex(); // Eat "}"
1575 return X86Operand::CreateToken("{sae}", consumedToken);
1577 return ErrorOperand(Tok.getLoc(), "unknown token in expression");
1580 /// Parse the '.' operator.
1581 bool X86AsmParser::ParseIntelDotOperator(IntelExprStateMachine &SM, SMLoc &End) {
1582 const AsmToken &Tok = getTok();
1585 // Drop the optional '.'.
1586 StringRef DotDispStr = Tok.getString();
1587 if (DotDispStr.startswith("."))
1588 DotDispStr = DotDispStr.drop_front(1);
1590 // .Imm gets lexed as a real.
1591 if (Tok.is(AsmToken::Real)) {
1593 DotDispStr.getAsInteger(10, DotDisp);
1594 Offset = DotDisp.getZExtValue();
1595 } else if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) {
1596 std::pair<StringRef, StringRef> BaseMember = DotDispStr.split('.');
1597 if (SemaCallback->LookupInlineAsmField(BaseMember.first, BaseMember.second,
1599 return Error(Tok.getLoc(), "Unable to lookup field reference!");
1601 return Error(Tok.getLoc(), "Unexpected token type!");
1603 // Eat the DotExpression and update End
1604 End = SMLoc::getFromPointer(DotDispStr.data());
1605 const char *DotExprEndLoc = DotDispStr.data() + DotDispStr.size();
1606 while (Tok.getLoc().getPointer() < DotExprEndLoc)
1612 /// Parse the 'offset' operator. This operator is used to specify the
1613 /// location rather then the content of a variable.
1614 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOffsetOfOperator() {
1615 MCAsmParser &Parser = getParser();
1616 const AsmToken &Tok = Parser.getTok();
1617 SMLoc OffsetOfLoc = Tok.getLoc();
1618 Parser.Lex(); // Eat offset.
1621 InlineAsmIdentifierInfo Info;
1622 SMLoc Start = Tok.getLoc(), End;
1623 StringRef Identifier = Tok.getString();
1624 if (ParseIntelInlineAsmIdentifier(Val, Identifier, Info,
1625 /*Unevaluated=*/false, End))
1628 // Don't emit the offset operator.
1629 InstInfo->AsmRewrites->emplace_back(AOK_Skip, OffsetOfLoc, 7);
1631 // The offset operator will have an 'r' constraint, thus we need to create
1632 // register operand to ensure proper matching. Just pick a GPR based on
1633 // the size of a pointer.
1634 bool Parse32 = is32BitMode() || Code16GCC;
1635 unsigned RegNo = is64BitMode() ? X86::RBX : (Parse32 ? X86::EBX : X86::BX);
1637 return X86Operand::CreateReg(RegNo, Start, End, /*GetAddress=*/true,
1638 OffsetOfLoc, Identifier, Info.OpDecl);
1641 // Query a candidate string for being an Intel assembly operator
1642 // Report back its kind, or IOK_INVALID if does not evaluated as a known one
1643 unsigned X86AsmParser::IdentifyIntelInlineAsmOperator(StringRef Name) {
1644 return StringSwitch<unsigned>(Name)
1645 .Cases("TYPE","type",IOK_TYPE)
1646 .Cases("SIZE","size",IOK_SIZE)
1647 .Cases("LENGTH","length",IOK_LENGTH)
1648 .Cases("OFFSET","offset",IOK_OFFSET)
1649 .Default(IOK_INVALID);
1652 /// Parse the 'LENGTH', 'TYPE' and 'SIZE' operators. The LENGTH operator
1653 /// returns the number of elements in an array. It returns the value 1 for
1654 /// non-array variables. The SIZE operator returns the size of a C or C++
1655 /// variable. A variable's size is the product of its LENGTH and TYPE. The
1656 /// TYPE operator returns the size of a C or C++ type or variable. If the
1657 /// variable is an array, TYPE returns the size of a single element.
1658 unsigned X86AsmParser::ParseIntelInlineAsmOperator(unsigned OpKind) {
1659 MCAsmParser &Parser = getParser();
1660 const AsmToken &Tok = Parser.getTok();
1661 Parser.Lex(); // Eat operator.
1663 const MCExpr *Val = nullptr;
1664 InlineAsmIdentifierInfo Info;
1665 SMLoc Start = Tok.getLoc(), End;
1666 StringRef Identifier = Tok.getString();
1667 if (ParseIntelInlineAsmIdentifier(Val, Identifier, Info,
1668 /*Unevaluated=*/true, End))
1672 Error(Start, "unable to lookup expression");
1678 default: llvm_unreachable("Unexpected operand kind!");
1679 case IOK_LENGTH: CVal = Info.Length; break;
1680 case IOK_SIZE: CVal = Info.Size; break;
1681 case IOK_TYPE: CVal = Info.Type; break;
1687 bool X86AsmParser::ParseIntelMemoryOperandSize(unsigned &Size) {
1688 Size = StringSwitch<unsigned>(getTok().getString())
1689 .Cases("BYTE", "byte", 8)
1690 .Cases("WORD", "word", 16)
1691 .Cases("DWORD", "dword", 32)
1692 .Cases("FWORD", "fword", 48)
1693 .Cases("QWORD", "qword", 64)
1694 .Cases("MMWORD","mmword", 64)
1695 .Cases("XWORD", "xword", 80)
1696 .Cases("TBYTE", "tbyte", 80)
1697 .Cases("XMMWORD", "xmmword", 128)
1698 .Cases("YMMWORD", "ymmword", 256)
1699 .Cases("ZMMWORD", "zmmword", 512)
1700 .Cases("OPAQUE", "opaque", -1U) // needs to be non-zero, but doesn't matter
1703 const AsmToken &Tok = Lex(); // Eat operand size (e.g., byte, word).
1704 if (!(Tok.getString().equals("PTR") || Tok.getString().equals("ptr")))
1705 return Error(Tok.getLoc(), "Expected 'PTR' or 'ptr' token!");
1711 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperand() {
1712 MCAsmParser &Parser = getParser();
1713 const AsmToken &Tok = Parser.getTok();
1716 // FIXME: Offset operator
1717 // Should be handled as part of immediate expression, as other operators
1718 // Currently, only supported as a stand-alone operand
1719 if (isParsingInlineAsm())
1720 if (IdentifyIntelInlineAsmOperator(Tok.getString()) == IOK_OFFSET)
1721 return ParseIntelOffsetOfOperator();
1723 // Parse optional Size directive.
1725 if (ParseIntelMemoryOperandSize(Size))
1727 bool PtrInOperand = bool(Size);
1729 Start = Tok.getLoc();
1731 // Rounding mode operand.
1732 if (getSTI().getFeatureBits()[X86::FeatureAVX512] &&
1733 getLexer().is(AsmToken::LCurly))
1734 return ParseRoundingModeOp(Start, End);
1736 // Register operand.
1738 if (Tok.is(AsmToken::Identifier) && !ParseRegister(RegNo, Start, End)) {
1739 if (RegNo == X86::RIP)
1740 return ErrorOperand(Start, "rip can only be used as a base register");
1741 // A Register followed by ':' is considered a segment override
1742 if (Tok.isNot(AsmToken::Colon))
1743 return !PtrInOperand ? X86Operand::CreateReg(RegNo, Start, End) :
1744 ErrorOperand(Start, "expected memory operand after 'ptr', "
1745 "found register operand instead");
1746 // An alleged segment override. check if we have a valid segment register
1747 if (!X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo))
1748 return ErrorOperand(Start, "invalid segment register");
1749 // Eat ':' and update Start location
1750 Start = Lex().getLoc();
1753 // Immediates and Memory
1754 IntelExprStateMachine SM;
1755 if (ParseIntelExpression(SM, End))
1758 if (isParsingInlineAsm())
1759 RewriteIntelExpression(SM, Start, Tok.getLoc());
1761 int64_t Imm = SM.getImm();
1762 const MCExpr *Disp = SM.getSym();
1763 const MCExpr *ImmDisp = MCConstantExpr::create(Imm, getContext());
1765 Disp = MCBinaryExpr::createAdd(Disp, ImmDisp, getContext());
1769 // RegNo != 0 specifies a valid segment register,
1770 // and we are parsing a segment override
1771 if (!SM.isMemExpr() && !RegNo)
1772 return X86Operand::CreateImm(Disp, Start, End);
1775 unsigned BaseReg = SM.getBaseReg();
1776 unsigned IndexReg = SM.getIndexReg();
1777 unsigned Scale = SM.getScale();
1779 if ((BaseReg || IndexReg) &&
1780 CheckBaseRegAndIndexRegAndScale(BaseReg, IndexReg, Scale, ErrMsg))
1781 return ErrorOperand(Start, ErrMsg);
1782 if (isParsingInlineAsm())
1783 return CreateMemForInlineAsm(RegNo, Disp, BaseReg, IndexReg,
1784 Scale, Start, End, Size, SM.getSymName(),
1785 SM.getIdentifierInfo());
1786 if (!(BaseReg || IndexReg || RegNo))
1787 return X86Operand::CreateMem(getPointerWidth(), Disp, Start, End, Size);
1788 return X86Operand::CreateMem(getPointerWidth(), RegNo, Disp,
1789 BaseReg, IndexReg, Scale, Start, End, Size);
1792 std::unique_ptr<X86Operand> X86AsmParser::ParseATTOperand() {
1793 MCAsmParser &Parser = getParser();
1794 switch (getLexer().getKind()) {
1796 // Parse a memory operand with no segment register.
1797 return ParseMemOperand(0, Parser.getTok().getLoc());
1798 case AsmToken::Percent: {
1799 // Read the register.
1802 if (ParseRegister(RegNo, Start, End)) return nullptr;
1803 if (RegNo == X86::EIZ || RegNo == X86::RIZ) {
1804 Error(Start, "%eiz and %riz can only be used as index registers",
1805 SMRange(Start, End));
1808 if (RegNo == X86::RIP) {
1809 Error(Start, "%rip can only be used as a base register",
1810 SMRange(Start, End));
1814 // If this is a segment register followed by a ':', then this is the start
1815 // of a memory reference, otherwise this is a normal register reference.
1816 if (getLexer().isNot(AsmToken::Colon))
1817 return X86Operand::CreateReg(RegNo, Start, End);
1819 if (!X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo))
1820 return ErrorOperand(Start, "invalid segment register");
1822 getParser().Lex(); // Eat the colon.
1823 return ParseMemOperand(RegNo, Start);
1825 case AsmToken::Dollar: {
1826 // $42 -> immediate.
1827 SMLoc Start = Parser.getTok().getLoc(), End;
1830 if (getParser().parseExpression(Val, End))
1832 return X86Operand::CreateImm(Val, Start, End);
1834 case AsmToken::LCurly:{
1835 SMLoc Start = Parser.getTok().getLoc(), End;
1836 if (getSTI().getFeatureBits()[X86::FeatureAVX512])
1837 return ParseRoundingModeOp(Start, End);
1838 return ErrorOperand(Start, "Unexpected '{' in expression");
1843 // true on failure, false otherwise
1844 // If no {z} mark was found - Parser doesn't advance
1845 bool X86AsmParser::ParseZ(std::unique_ptr<X86Operand> &Z,
1846 const SMLoc &StartLoc) {
1847 MCAsmParser &Parser = getParser();
1848 // Assuming we are just pass the '{' mark, quering the next token
1849 // Searched for {z}, but none was found. Return false, as no parsing error was
1851 if (!(getLexer().is(AsmToken::Identifier) &&
1852 (getLexer().getTok().getIdentifier() == "z")))
1854 Parser.Lex(); // Eat z
1855 // Query and eat the '}' mark
1856 if (!getLexer().is(AsmToken::RCurly))
1857 return Error(getLexer().getLoc(), "Expected } at this point");
1858 Parser.Lex(); // Eat '}'
1859 // Assign Z with the {z} mark opernad
1860 Z = X86Operand::CreateToken("{z}", StartLoc);
1864 // true on failure, false otherwise
1865 bool X86AsmParser::HandleAVX512Operand(OperandVector &Operands,
1866 const MCParsedAsmOperand &Op) {
1867 MCAsmParser &Parser = getParser();
1868 if(getSTI().getFeatureBits()[X86::FeatureAVX512]) {
1869 if (getLexer().is(AsmToken::LCurly)) {
1870 // Eat "{" and mark the current place.
1871 const SMLoc consumedToken = consumeToken();
1872 // Distinguish {1to<NUM>} from {%k<NUM>}.
1873 if(getLexer().is(AsmToken::Integer)) {
1874 // Parse memory broadcasting ({1to<NUM>}).
1875 if (getLexer().getTok().getIntVal() != 1)
1876 return TokError("Expected 1to<NUM> at this point");
1877 Parser.Lex(); // Eat "1" of 1to8
1878 if (!getLexer().is(AsmToken::Identifier) ||
1879 !getLexer().getTok().getIdentifier().startswith("to"))
1880 return TokError("Expected 1to<NUM> at this point");
1881 // Recognize only reasonable suffixes.
1882 const char *BroadcastPrimitive =
1883 StringSwitch<const char*>(getLexer().getTok().getIdentifier())
1884 .Case("to2", "{1to2}")
1885 .Case("to4", "{1to4}")
1886 .Case("to8", "{1to8}")
1887 .Case("to16", "{1to16}")
1889 if (!BroadcastPrimitive)
1890 return TokError("Invalid memory broadcast primitive.");
1891 Parser.Lex(); // Eat "toN" of 1toN
1892 if (!getLexer().is(AsmToken::RCurly))
1893 return TokError("Expected } at this point");
1894 Parser.Lex(); // Eat "}"
1895 Operands.push_back(X86Operand::CreateToken(BroadcastPrimitive,
1897 // No AVX512 specific primitives can pass
1898 // after memory broadcasting, so return.
1901 // Parse either {k}{z}, {z}{k}, {k} or {z}
1902 // last one have no meaning, but GCC accepts it
1903 // Currently, we're just pass a '{' mark
1904 std::unique_ptr<X86Operand> Z;
1905 if (ParseZ(Z, consumedToken))
1907 // Reaching here means that parsing of the allegadly '{z}' mark yielded
1909 // Query for the need of further parsing for a {%k<NUM>} mark
1910 if (!Z || getLexer().is(AsmToken::LCurly)) {
1911 SMLoc StartLoc = Z ? consumeToken() : consumedToken;
1912 // Parse an op-mask register mark ({%k<NUM>}), which is now to be
1916 if (!ParseRegister(RegNo, RegLoc, StartLoc) &&
1917 X86MCRegisterClasses[X86::VK1RegClassID].contains(RegNo)) {
1918 if (RegNo == X86::K0)
1919 return Error(RegLoc, "Register k0 can't be used as write mask");
1920 if (!getLexer().is(AsmToken::RCurly))
1921 return Error(getLexer().getLoc(), "Expected } at this point");
1922 Operands.push_back(X86Operand::CreateToken("{", StartLoc));
1924 X86Operand::CreateReg(RegNo, StartLoc, StartLoc));
1925 Operands.push_back(X86Operand::CreateToken("}", consumeToken()));
1927 return Error(getLexer().getLoc(),
1928 "Expected an op-mask register at this point");
1929 // {%k<NUM>} mark is found, inquire for {z}
1930 if (getLexer().is(AsmToken::LCurly) && !Z) {
1931 // Have we've found a parsing error, or found no (expected) {z} mark
1932 // - report an error
1933 if (ParseZ(Z, consumeToken()) || !Z)
1934 return Error(getLexer().getLoc(),
1935 "Expected a {z} mark at this point");
1938 // '{z}' on its own is meaningless, hence should be ignored.
1939 // on the contrary - have it been accompanied by a K register,
1942 Operands.push_back(std::move(Z));
1950 /// ParseMemOperand: segment: disp(basereg, indexreg, scale). The '%ds:' prefix
1951 /// has already been parsed if present.
1952 std::unique_ptr<X86Operand> X86AsmParser::ParseMemOperand(unsigned SegReg,
1955 MCAsmParser &Parser = getParser();
1956 // We have to disambiguate a parenthesized expression "(4+5)" from the start
1957 // of a memory operand with a missing displacement "(%ebx)" or "(,%eax)". The
1958 // only way to do this without lookahead is to eat the '(' and see what is
1960 const MCExpr *Disp = MCConstantExpr::create(0, getParser().getContext());
1961 if (getLexer().isNot(AsmToken::LParen)) {
1963 if (getParser().parseExpression(Disp, ExprEnd)) return nullptr;
1965 // After parsing the base expression we could either have a parenthesized
1966 // memory address or not. If not, return now. If so, eat the (.
1967 if (getLexer().isNot(AsmToken::LParen)) {
1968 // Unless we have a segment register, treat this as an immediate.
1970 return X86Operand::CreateMem(getPointerWidth(), Disp, MemStart, ExprEnd);
1971 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, 0, 0, 1,
1978 // Okay, we have a '('. We don't know if this is an expression or not, but
1979 // so we have to eat the ( to see beyond it.
1980 SMLoc LParenLoc = Parser.getTok().getLoc();
1981 Parser.Lex(); // Eat the '('.
1983 if (getLexer().is(AsmToken::Percent) || getLexer().is(AsmToken::Comma)) {
1984 // Nothing to do here, fall into the code below with the '(' part of the
1985 // memory operand consumed.
1989 // It must be an parenthesized expression, parse it now.
1990 if (getParser().parseParenExpression(Disp, ExprEnd))
1993 // After parsing the base expression we could either have a parenthesized
1994 // memory address or not. If not, return now. If so, eat the (.
1995 if (getLexer().isNot(AsmToken::LParen)) {
1996 // Unless we have a segment register, treat this as an immediate.
1998 return X86Operand::CreateMem(getPointerWidth(), Disp, LParenLoc,
2000 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, 0, 0, 1,
2009 // If we reached here, then we just ate the ( of the memory operand. Process
2010 // the rest of the memory operand.
2011 unsigned BaseReg = 0, IndexReg = 0, Scale = 1;
2012 SMLoc IndexLoc, BaseLoc;
2014 if (getLexer().is(AsmToken::Percent)) {
2015 SMLoc StartLoc, EndLoc;
2016 BaseLoc = Parser.getTok().getLoc();
2017 if (ParseRegister(BaseReg, StartLoc, EndLoc)) return nullptr;
2018 if (BaseReg == X86::EIZ || BaseReg == X86::RIZ) {
2019 Error(StartLoc, "eiz and riz can only be used as index registers",
2020 SMRange(StartLoc, EndLoc));
2025 if (getLexer().is(AsmToken::Comma)) {
2026 Parser.Lex(); // Eat the comma.
2027 IndexLoc = Parser.getTok().getLoc();
2029 // Following the comma we should have either an index register, or a scale
2030 // value. We don't support the later form, but we want to parse it
2033 // Not that even though it would be completely consistent to support syntax
2034 // like "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this.
2035 if (getLexer().is(AsmToken::Percent)) {
2037 if (ParseRegister(IndexReg, L, L))
2039 if (BaseReg == X86::RIP) {
2040 Error(IndexLoc, "%rip as base register can not have an index register");
2043 if (IndexReg == X86::RIP) {
2044 Error(IndexLoc, "%rip is not allowed as an index register");
2048 if (getLexer().isNot(AsmToken::RParen)) {
2049 // Parse the scale amount:
2050 // ::= ',' [scale-expression]
2051 if (getLexer().isNot(AsmToken::Comma)) {
2052 Error(Parser.getTok().getLoc(),
2053 "expected comma in scale expression");
2056 Parser.Lex(); // Eat the comma.
2058 if (getLexer().isNot(AsmToken::RParen)) {
2059 SMLoc Loc = Parser.getTok().getLoc();
2062 if (getParser().parseAbsoluteExpression(ScaleVal)){
2063 Error(Loc, "expected scale expression");
2067 // Validate the scale amount.
2068 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
2070 Error(Loc, "scale factor in 16-bit address must be 1");
2073 if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 &&
2075 Error(Loc, "scale factor in address must be 1, 2, 4 or 8");
2078 Scale = (unsigned)ScaleVal;
2081 } else if (getLexer().isNot(AsmToken::RParen)) {
2082 // A scale amount without an index is ignored.
2084 SMLoc Loc = Parser.getTok().getLoc();
2087 if (getParser().parseAbsoluteExpression(Value))
2091 Warning(Loc, "scale factor without index register is ignored");
2096 // Ok, we've eaten the memory operand, verify we have a ')' and eat it too.
2097 if (getLexer().isNot(AsmToken::RParen)) {
2098 Error(Parser.getTok().getLoc(), "unexpected token in memory operand");
2101 SMLoc MemEnd = Parser.getTok().getEndLoc();
2102 Parser.Lex(); // Eat the ')'.
2104 // Check for use of invalid 16-bit registers. Only BX/BP/SI/DI are allowed,
2105 // and then only in non-64-bit modes. Except for DX, which is a special case
2106 // because an unofficial form of in/out instructions uses it.
2107 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
2108 (is64BitMode() || (BaseReg != X86::BX && BaseReg != X86::BP &&
2109 BaseReg != X86::SI && BaseReg != X86::DI)) &&
2110 BaseReg != X86::DX) {
2111 Error(BaseLoc, "invalid 16-bit base register");
2115 X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg)) {
2116 Error(IndexLoc, "16-bit memory operand may not include only index register");
2121 if (CheckBaseRegAndIndexRegAndScale(BaseReg, IndexReg, Scale, ErrMsg)) {
2122 Error(BaseLoc, ErrMsg);
2126 if (SegReg || BaseReg || IndexReg)
2127 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, BaseReg,
2128 IndexReg, Scale, MemStart, MemEnd);
2129 return X86Operand::CreateMem(getPointerWidth(), Disp, MemStart, MemEnd);
2132 bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
2133 SMLoc NameLoc, OperandVector &Operands) {
2134 MCAsmParser &Parser = getParser();
2136 StringRef PatchedName = Name;
2138 if ((Name.equals("jmp") || Name.equals("jc") || Name.equals("jz")) &&
2139 isParsingIntelSyntax() && isParsingInlineAsm()) {
2140 StringRef NextTok = Parser.getTok().getString();
2141 if (NextTok == "short") {
2143 NameLoc.getFromPointer(NameLoc.getPointer() + Name.size());
2144 // Eat the short keyword
2146 // MS ignores the short keyword, it determines the jmp type based
2147 // on the distance of the label
2148 InstInfo->AsmRewrites->emplace_back(AOK_Skip, NameEndLoc,
2149 NextTok.size() + 1);
2153 // FIXME: Hack to recognize setneb as setne.
2154 if (PatchedName.startswith("set") && PatchedName.endswith("b") &&
2155 PatchedName != "setb" && PatchedName != "setnb")
2156 PatchedName = PatchedName.substr(0, Name.size()-1);
2158 // FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}.
2159 if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) &&
2160 (PatchedName.endswith("ss") || PatchedName.endswith("sd") ||
2161 PatchedName.endswith("ps") || PatchedName.endswith("pd"))) {
2162 bool IsVCMP = PatchedName[0] == 'v';
2163 unsigned CCIdx = IsVCMP ? 4 : 3;
2164 unsigned ComparisonCode = StringSwitch<unsigned>(
2165 PatchedName.slice(CCIdx, PatchedName.size() - 2))
2167 .Case("eq_oq", 0x00)
2169 .Case("lt_os", 0x01)
2171 .Case("le_os", 0x02)
2172 .Case("unord", 0x03)
2173 .Case("unord_q", 0x03)
2175 .Case("neq_uq", 0x04)
2177 .Case("nlt_us", 0x05)
2179 .Case("nle_us", 0x06)
2181 .Case("ord_q", 0x07)
2182 /* AVX only from here */
2183 .Case("eq_uq", 0x08)
2185 .Case("nge_us", 0x09)
2187 .Case("ngt_us", 0x0A)
2188 .Case("false", 0x0B)
2189 .Case("false_oq", 0x0B)
2190 .Case("neq_oq", 0x0C)
2192 .Case("ge_os", 0x0D)
2194 .Case("gt_os", 0x0E)
2196 .Case("true_uq", 0x0F)
2197 .Case("eq_os", 0x10)
2198 .Case("lt_oq", 0x11)
2199 .Case("le_oq", 0x12)
2200 .Case("unord_s", 0x13)
2201 .Case("neq_us", 0x14)
2202 .Case("nlt_uq", 0x15)
2203 .Case("nle_uq", 0x16)
2204 .Case("ord_s", 0x17)
2205 .Case("eq_us", 0x18)
2206 .Case("nge_uq", 0x19)
2207 .Case("ngt_uq", 0x1A)
2208 .Case("false_os", 0x1B)
2209 .Case("neq_os", 0x1C)
2210 .Case("ge_oq", 0x1D)
2211 .Case("gt_oq", 0x1E)
2212 .Case("true_us", 0x1F)
2214 if (ComparisonCode != ~0U && (IsVCMP || ComparisonCode < 8)) {
2216 Operands.push_back(X86Operand::CreateToken(PatchedName.slice(0, CCIdx),
2219 const MCExpr *ImmOp = MCConstantExpr::create(ComparisonCode,
2220 getParser().getContext());
2221 Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc));
2223 PatchedName = PatchedName.substr(PatchedName.size() - 2);
2227 // FIXME: Hack to recognize vpcmp<comparison code>{ub,uw,ud,uq,b,w,d,q}.
2228 if (PatchedName.startswith("vpcmp") &&
2229 (PatchedName.endswith("b") || PatchedName.endswith("w") ||
2230 PatchedName.endswith("d") || PatchedName.endswith("q"))) {
2231 unsigned CCIdx = PatchedName.drop_back().back() == 'u' ? 2 : 1;
2232 unsigned ComparisonCode = StringSwitch<unsigned>(
2233 PatchedName.slice(5, PatchedName.size() - CCIdx))
2234 .Case("eq", 0x0) // Only allowed on unsigned. Checked below.
2237 //.Case("false", 0x3) // Not a documented alias.
2241 //.Case("true", 0x7) // Not a documented alias.
2243 if (ComparisonCode != ~0U && (ComparisonCode != 0 || CCIdx == 2)) {
2244 Operands.push_back(X86Operand::CreateToken("vpcmp", NameLoc));
2246 const MCExpr *ImmOp = MCConstantExpr::create(ComparisonCode,
2247 getParser().getContext());
2248 Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc));
2250 PatchedName = PatchedName.substr(PatchedName.size() - CCIdx);
2254 // FIXME: Hack to recognize vpcom<comparison code>{ub,uw,ud,uq,b,w,d,q}.
2255 if (PatchedName.startswith("vpcom") &&
2256 (PatchedName.endswith("b") || PatchedName.endswith("w") ||
2257 PatchedName.endswith("d") || PatchedName.endswith("q"))) {
2258 unsigned CCIdx = PatchedName.drop_back().back() == 'u' ? 2 : 1;
2259 unsigned ComparisonCode = StringSwitch<unsigned>(
2260 PatchedName.slice(5, PatchedName.size() - CCIdx))
2270 if (ComparisonCode != ~0U) {
2271 Operands.push_back(X86Operand::CreateToken("vpcom", NameLoc));
2273 const MCExpr *ImmOp = MCConstantExpr::create(ComparisonCode,
2274 getParser().getContext());
2275 Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc));
2277 PatchedName = PatchedName.substr(PatchedName.size() - CCIdx);
2281 Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc));
2283 // Determine whether this is an instruction prefix.
2285 // Enhace prefixes integrity robustness. for example, following forms
2286 // are currently tolerated:
2287 // repz repnz <insn> ; GAS errors for the use of two similar prefixes
2288 // lock addq %rax, %rbx ; Destination operand must be of memory type
2289 // xacquire <insn> ; xacquire must be accompanied by 'lock'
2290 bool isPrefix = StringSwitch<bool>(Name)
2295 "data32", "data16", true)
2296 .Cases("xacquire", "xrelease", true)
2297 .Cases("acquire", "release", isParsingIntelSyntax())
2300 bool CurlyAsEndOfStatement = false;
2301 // This does the actual operand parsing. Don't parse any more if we have a
2302 // prefix juxtaposed with an operation like "lock incl 4(%rax)", because we
2303 // just want to parse the "lock" as the first instruction and the "incl" as
2305 if (getLexer().isNot(AsmToken::EndOfStatement) && !isPrefix) {
2307 // Parse '*' modifier.
2308 if (getLexer().is(AsmToken::Star))
2309 Operands.push_back(X86Operand::CreateToken("*", consumeToken()));
2311 // Read the operands.
2313 if (std::unique_ptr<X86Operand> Op = ParseOperand()) {
2314 Operands.push_back(std::move(Op));
2315 if (HandleAVX512Operand(Operands, *Operands.back()))
2320 // check for comma and eat it
2321 if (getLexer().is(AsmToken::Comma))
2327 // In MS inline asm curly braces mark the beginning/end of a block,
2328 // therefore they should be interepreted as end of statement
2329 CurlyAsEndOfStatement =
2330 isParsingIntelSyntax() && isParsingInlineAsm() &&
2331 (getLexer().is(AsmToken::LCurly) || getLexer().is(AsmToken::RCurly));
2332 if (getLexer().isNot(AsmToken::EndOfStatement) && !CurlyAsEndOfStatement)
2333 return TokError("unexpected token in argument list");
2336 // Consume the EndOfStatement or the prefix separator Slash
2337 if (getLexer().is(AsmToken::EndOfStatement) ||
2338 (isPrefix && getLexer().is(AsmToken::Slash)))
2340 else if (CurlyAsEndOfStatement)
2341 // Add an actual EndOfStatement before the curly brace
2342 Info.AsmRewrites->emplace_back(AOK_EndOfStatement,
2343 getLexer().getTok().getLoc(), 0);
2345 // This is for gas compatibility and cannot be done in td.
2346 // Adding "p" for some floating point with no argument.
2347 // For example: fsub --> fsubp
2349 Name == "fsub" || Name == "fdiv" || Name == "fsubr" || Name == "fdivr";
2350 if (IsFp && Operands.size() == 1) {
2351 const char *Repl = StringSwitch<const char *>(Name)
2352 .Case("fsub", "fsubp")
2353 .Case("fdiv", "fdivp")
2354 .Case("fsubr", "fsubrp")
2355 .Case("fdivr", "fdivrp");
2356 static_cast<X86Operand &>(*Operands[0]).setTokenValue(Repl);
2359 // Moving a 32 or 16 bit value into a segment register has the same
2360 // behavior. Modify such instructions to always take shorter form.
2361 if ((Name == "mov" || Name == "movw" || Name == "movl") &&
2362 (Operands.size() == 3)) {
2363 X86Operand &Op1 = (X86Operand &)*Operands[1];
2364 X86Operand &Op2 = (X86Operand &)*Operands[2];
2365 SMLoc Loc = Op1.getEndLoc();
2366 if (Op1.isReg() && Op2.isReg() &&
2367 X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(
2369 (X86MCRegisterClasses[X86::GR16RegClassID].contains(Op1.getReg()) ||
2370 X86MCRegisterClasses[X86::GR32RegClassID].contains(Op1.getReg()))) {
2371 // Change instruction name to match new instruction.
2372 if (Name != "mov" && Name[3] == (is16BitMode() ? 'l' : 'w')) {
2373 Name = is16BitMode() ? "movw" : "movl";
2374 Operands[0] = X86Operand::CreateToken(Name, NameLoc);
2376 // Select the correct equivalent 16-/32-bit source register.
2378 getX86SubSuperRegisterOrZero(Op1.getReg(), is16BitMode() ? 16 : 32);
2379 Operands[1] = X86Operand::CreateReg(Reg, Loc, Loc);
2383 // This is a terrible hack to handle "out[s]?[bwl]? %al, (%dx)" ->
2384 // "outb %al, %dx". Out doesn't take a memory form, but this is a widely
2385 // documented form in various unofficial manuals, so a lot of code uses it.
2386 if ((Name == "outb" || Name == "outsb" || Name == "outw" || Name == "outsw" ||
2387 Name == "outl" || Name == "outsl" || Name == "out" || Name == "outs") &&
2388 Operands.size() == 3) {
2389 X86Operand &Op = (X86Operand &)*Operands.back();
2390 if (Op.isMem() && Op.Mem.SegReg == 0 &&
2391 isa<MCConstantExpr>(Op.Mem.Disp) &&
2392 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
2393 Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
2394 SMLoc Loc = Op.getEndLoc();
2395 Operands.back() = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
2398 // Same hack for "in[s]?[bwl]? (%dx), %al" -> "inb %dx, %al".
2399 if ((Name == "inb" || Name == "insb" || Name == "inw" || Name == "insw" ||
2400 Name == "inl" || Name == "insl" || Name == "in" || Name == "ins") &&
2401 Operands.size() == 3) {
2402 X86Operand &Op = (X86Operand &)*Operands[1];
2403 if (Op.isMem() && Op.Mem.SegReg == 0 &&
2404 isa<MCConstantExpr>(Op.Mem.Disp) &&
2405 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
2406 Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
2407 SMLoc Loc = Op.getEndLoc();
2408 Operands[1] = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
2412 SmallVector<std::unique_ptr<MCParsedAsmOperand>, 2> TmpOperands;
2413 bool HadVerifyError = false;
2415 // Append default arguments to "ins[bwld]"
2416 if (Name.startswith("ins") &&
2417 (Operands.size() == 1 || Operands.size() == 3) &&
2418 (Name == "insb" || Name == "insw" || Name == "insl" || Name == "insd" ||
2421 AddDefaultSrcDestOperands(TmpOperands,
2422 X86Operand::CreateReg(X86::DX, NameLoc, NameLoc),
2423 DefaultMemDIOperand(NameLoc));
2424 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
2427 // Append default arguments to "outs[bwld]"
2428 if (Name.startswith("outs") &&
2429 (Operands.size() == 1 || Operands.size() == 3) &&
2430 (Name == "outsb" || Name == "outsw" || Name == "outsl" ||
2431 Name == "outsd" || Name == "outs")) {
2432 AddDefaultSrcDestOperands(TmpOperands, DefaultMemSIOperand(NameLoc),
2433 X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2434 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
2437 // Transform "lods[bwlq]" into "lods[bwlq] ($SIREG)" for appropriate
2438 // values of $SIREG according to the mode. It would be nice if this
2439 // could be achieved with InstAlias in the tables.
2440 if (Name.startswith("lods") &&
2441 (Operands.size() == 1 || Operands.size() == 2) &&
2442 (Name == "lods" || Name == "lodsb" || Name == "lodsw" ||
2443 Name == "lodsl" || Name == "lodsd" || Name == "lodsq")) {
2444 TmpOperands.push_back(DefaultMemSIOperand(NameLoc));
2445 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
2448 // Transform "stos[bwlq]" into "stos[bwlq] ($DIREG)" for appropriate
2449 // values of $DIREG according to the mode. It would be nice if this
2450 // could be achieved with InstAlias in the tables.
2451 if (Name.startswith("stos") &&
2452 (Operands.size() == 1 || Operands.size() == 2) &&
2453 (Name == "stos" || Name == "stosb" || Name == "stosw" ||
2454 Name == "stosl" || Name == "stosd" || Name == "stosq")) {
2455 TmpOperands.push_back(DefaultMemDIOperand(NameLoc));
2456 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
2459 // Transform "scas[bwlq]" into "scas[bwlq] ($DIREG)" for appropriate
2460 // values of $DIREG according to the mode. It would be nice if this
2461 // could be achieved with InstAlias in the tables.
2462 if (Name.startswith("scas") &&
2463 (Operands.size() == 1 || Operands.size() == 2) &&
2464 (Name == "scas" || Name == "scasb" || Name == "scasw" ||
2465 Name == "scasl" || Name == "scasd" || Name == "scasq")) {
2466 TmpOperands.push_back(DefaultMemDIOperand(NameLoc));
2467 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
2470 // Add default SI and DI operands to "cmps[bwlq]".
2471 if (Name.startswith("cmps") &&
2472 (Operands.size() == 1 || Operands.size() == 3) &&
2473 (Name == "cmps" || Name == "cmpsb" || Name == "cmpsw" ||
2474 Name == "cmpsl" || Name == "cmpsd" || Name == "cmpsq")) {
2475 AddDefaultSrcDestOperands(TmpOperands, DefaultMemDIOperand(NameLoc),
2476 DefaultMemSIOperand(NameLoc));
2477 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
2480 // Add default SI and DI operands to "movs[bwlq]".
2481 if (((Name.startswith("movs") &&
2482 (Name == "movs" || Name == "movsb" || Name == "movsw" ||
2483 Name == "movsl" || Name == "movsd" || Name == "movsq")) ||
2484 (Name.startswith("smov") &&
2485 (Name == "smov" || Name == "smovb" || Name == "smovw" ||
2486 Name == "smovl" || Name == "smovd" || Name == "smovq"))) &&
2487 (Operands.size() == 1 || Operands.size() == 3)) {
2488 if (Name == "movsd" && Operands.size() == 1 && !isParsingIntelSyntax())
2489 Operands.back() = X86Operand::CreateToken("movsl", NameLoc);
2490 AddDefaultSrcDestOperands(TmpOperands, DefaultMemSIOperand(NameLoc),
2491 DefaultMemDIOperand(NameLoc));
2492 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
2495 // Check if we encountered an error for one the string insturctions
2496 if (HadVerifyError) {
2497 return HadVerifyError;
2500 // FIXME: Hack to handle recognize s{hr,ar,hl} $1, <op>. Canonicalize to
2502 if ((Name.startswith("shr") || Name.startswith("sar") ||
2503 Name.startswith("shl") || Name.startswith("sal") ||
2504 Name.startswith("rcl") || Name.startswith("rcr") ||
2505 Name.startswith("rol") || Name.startswith("ror")) &&
2506 Operands.size() == 3) {
2507 if (isParsingIntelSyntax()) {
2509 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[2]);
2510 if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) &&
2511 cast<MCConstantExpr>(Op1.getImm())->getValue() == 1)
2512 Operands.pop_back();
2514 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]);
2515 if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) &&
2516 cast<MCConstantExpr>(Op1.getImm())->getValue() == 1)
2517 Operands.erase(Operands.begin() + 1);
2521 // Transforms "int $3" into "int3" as a size optimization. We can't write an
2522 // instalias with an immediate operand yet.
2523 if (Name == "int" && Operands.size() == 2) {
2524 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]);
2526 if (auto *CE = dyn_cast<MCConstantExpr>(Op1.getImm()))
2527 if (CE->getValue() == 3) {
2528 Operands.erase(Operands.begin() + 1);
2529 static_cast<X86Operand &>(*Operands[0]).setTokenValue("int3");
2533 // Transforms "xlat mem8" into "xlatb"
2534 if ((Name == "xlat" || Name == "xlatb") && Operands.size() == 2) {
2535 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]);
2537 Warning(Op1.getStartLoc(), "memory operand is only for determining the "
2538 "size, (R|E)BX will be used for the location");
2539 Operands.pop_back();
2540 static_cast<X86Operand &>(*Operands[0]).setTokenValue("xlatb");
2547 bool X86AsmParser::processInstruction(MCInst &Inst, const OperandVector &Ops) {
2551 static const char *getSubtargetFeatureName(uint64_t Val);
2553 void X86AsmParser::EmitInstruction(MCInst &Inst, OperandVector &Operands,
2555 Instrumentation->InstrumentAndEmitInstruction(Inst, Operands, getContext(),
2559 bool X86AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
2560 OperandVector &Operands,
2561 MCStreamer &Out, uint64_t &ErrorInfo,
2562 bool MatchingInlineAsm) {
2563 if (isParsingIntelSyntax())
2564 return MatchAndEmitIntelInstruction(IDLoc, Opcode, Operands, Out, ErrorInfo,
2566 return MatchAndEmitATTInstruction(IDLoc, Opcode, Operands, Out, ErrorInfo,
2570 void X86AsmParser::MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op,
2571 OperandVector &Operands, MCStreamer &Out,
2572 bool MatchingInlineAsm) {
2573 // FIXME: This should be replaced with a real .td file alias mechanism.
2574 // Also, MatchInstructionImpl should actually *do* the EmitInstruction
2576 const char *Repl = StringSwitch<const char *>(Op.getToken())
2577 .Case("finit", "fninit")
2578 .Case("fsave", "fnsave")
2579 .Case("fstcw", "fnstcw")
2580 .Case("fstcww", "fnstcw")
2581 .Case("fstenv", "fnstenv")
2582 .Case("fstsw", "fnstsw")
2583 .Case("fstsww", "fnstsw")
2584 .Case("fclex", "fnclex")
2588 Inst.setOpcode(X86::WAIT);
2590 if (!MatchingInlineAsm)
2591 EmitInstruction(Inst, Operands, Out);
2592 Operands[0] = X86Operand::CreateToken(Repl, IDLoc);
2596 bool X86AsmParser::ErrorMissingFeature(SMLoc IDLoc, uint64_t ErrorInfo,
2597 bool MatchingInlineAsm) {
2598 assert(ErrorInfo && "Unknown missing feature!");
2599 SmallString<126> Msg;
2600 raw_svector_ostream OS(Msg);
2601 OS << "instruction requires:";
2603 for (unsigned i = 0; i < (sizeof(ErrorInfo)*8-1); ++i) {
2604 if (ErrorInfo & Mask)
2605 OS << ' ' << getSubtargetFeatureName(ErrorInfo & Mask);
2608 return Error(IDLoc, OS.str(), SMRange(), MatchingInlineAsm);
2611 bool X86AsmParser::MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
2612 OperandVector &Operands,
2614 uint64_t &ErrorInfo,
2615 bool MatchingInlineAsm) {
2616 assert(!Operands.empty() && "Unexpect empty operand list!");
2617 X86Operand &Op = static_cast<X86Operand &>(*Operands[0]);
2618 assert(Op.isToken() && "Leading operand should always be a mnemonic!");
2619 SMRange EmptyRange = None;
2621 // First, handle aliases that expand to multiple instructions.
2622 MatchFPUWaitAlias(IDLoc, Op, Operands, Out, MatchingInlineAsm);
2624 bool WasOriginallyInvalidOperand = false;
2627 // First, try a direct match.
2628 switch (MatchInstruction(Operands, Inst, ErrorInfo, MatchingInlineAsm,
2629 isParsingIntelSyntax())) {
2630 default: llvm_unreachable("Unexpected match result!");
2632 // Some instructions need post-processing to, for example, tweak which
2633 // encoding is selected. Loop on it while changes happen so the
2634 // individual transformations can chain off each other.
2635 if (!MatchingInlineAsm)
2636 while (processInstruction(Inst, Operands))
2640 if (!MatchingInlineAsm)
2641 EmitInstruction(Inst, Operands, Out);
2642 Opcode = Inst.getOpcode();
2644 case Match_MissingFeature:
2645 return ErrorMissingFeature(IDLoc, ErrorInfo, MatchingInlineAsm);
2646 case Match_InvalidOperand:
2647 WasOriginallyInvalidOperand = true;
2649 case Match_MnemonicFail:
2653 // FIXME: Ideally, we would only attempt suffix matches for things which are
2654 // valid prefixes, and we could just infer the right unambiguous
2655 // type. However, that requires substantially more matcher support than the
2658 // Change the operand to point to a temporary token.
2659 StringRef Base = Op.getToken();
2660 SmallString<16> Tmp;
2663 Op.setTokenValue(Tmp);
2665 // If this instruction starts with an 'f', then it is a floating point stack
2666 // instruction. These come in up to three forms for 32-bit, 64-bit, and
2667 // 80-bit floating point, which use the suffixes s,l,t respectively.
2669 // Otherwise, we assume that this may be an integer instruction, which comes
2670 // in 8/16/32/64-bit forms using the b,w,l,q suffixes respectively.
2671 const char *Suffixes = Base[0] != 'f' ? "bwlq" : "slt\0";
2673 // Check for the various suffix matches.
2674 uint64_t ErrorInfoIgnore;
2675 uint64_t ErrorInfoMissingFeature = 0; // Init suppresses compiler warnings.
2678 for (unsigned I = 0, E = array_lengthof(Match); I != E; ++I) {
2679 Tmp.back() = Suffixes[I];
2680 Match[I] = MatchInstruction(Operands, Inst, ErrorInfoIgnore,
2681 MatchingInlineAsm, isParsingIntelSyntax());
2682 // If this returned as a missing feature failure, remember that.
2683 if (Match[I] == Match_MissingFeature)
2684 ErrorInfoMissingFeature = ErrorInfoIgnore;
2687 // Restore the old token.
2688 Op.setTokenValue(Base);
2690 // If exactly one matched, then we treat that as a successful match (and the
2691 // instruction will already have been filled in correctly, since the failing
2692 // matches won't have modified it).
2693 unsigned NumSuccessfulMatches =
2694 std::count(std::begin(Match), std::end(Match), Match_Success);
2695 if (NumSuccessfulMatches == 1) {
2697 if (!MatchingInlineAsm)
2698 EmitInstruction(Inst, Operands, Out);
2699 Opcode = Inst.getOpcode();
2703 // Otherwise, the match failed, try to produce a decent error message.
2705 // If we had multiple suffix matches, then identify this as an ambiguous
2707 if (NumSuccessfulMatches > 1) {
2709 unsigned NumMatches = 0;
2710 for (unsigned I = 0, E = array_lengthof(Match); I != E; ++I)
2711 if (Match[I] == Match_Success)
2712 MatchChars[NumMatches++] = Suffixes[I];
2714 SmallString<126> Msg;
2715 raw_svector_ostream OS(Msg);
2716 OS << "ambiguous instructions require an explicit suffix (could be ";
2717 for (unsigned i = 0; i != NumMatches; ++i) {
2720 if (i + 1 == NumMatches)
2722 OS << "'" << Base << MatchChars[i] << "'";
2725 Error(IDLoc, OS.str(), EmptyRange, MatchingInlineAsm);
2729 // Okay, we know that none of the variants matched successfully.
2731 // If all of the instructions reported an invalid mnemonic, then the original
2732 // mnemonic was invalid.
2733 if (std::count(std::begin(Match), std::end(Match), Match_MnemonicFail) == 4) {
2734 if (!WasOriginallyInvalidOperand) {
2735 return Error(IDLoc, "invalid instruction mnemonic '" + Base + "'",
2736 Op.getLocRange(), MatchingInlineAsm);
2739 // Recover location info for the operand if we know which was the problem.
2740 if (ErrorInfo != ~0ULL) {
2741 if (ErrorInfo >= Operands.size())
2742 return Error(IDLoc, "too few operands for instruction", EmptyRange,
2745 X86Operand &Operand = (X86Operand &)*Operands[ErrorInfo];
2746 if (Operand.getStartLoc().isValid()) {
2747 SMRange OperandRange = Operand.getLocRange();
2748 return Error(Operand.getStartLoc(), "invalid operand for instruction",
2749 OperandRange, MatchingInlineAsm);
2753 return Error(IDLoc, "invalid operand for instruction", EmptyRange,
2757 // If one instruction matched with a missing feature, report this as a
2759 if (std::count(std::begin(Match), std::end(Match),
2760 Match_MissingFeature) == 1) {
2761 ErrorInfo = ErrorInfoMissingFeature;
2762 return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeature,
2766 // If one instruction matched with an invalid operand, report this as an
2768 if (std::count(std::begin(Match), std::end(Match),
2769 Match_InvalidOperand) == 1) {
2770 return Error(IDLoc, "invalid operand for instruction", EmptyRange,
2774 // If all of these were an outright failure, report it in a useless way.
2775 Error(IDLoc, "unknown use of instruction mnemonic without a size suffix",
2776 EmptyRange, MatchingInlineAsm);
2780 bool X86AsmParser::MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode,
2781 OperandVector &Operands,
2783 uint64_t &ErrorInfo,
2784 bool MatchingInlineAsm) {
2785 assert(!Operands.empty() && "Unexpect empty operand list!");
2786 X86Operand &Op = static_cast<X86Operand &>(*Operands[0]);
2787 assert(Op.isToken() && "Leading operand should always be a mnemonic!");
2788 StringRef Mnemonic = Op.getToken();
2789 SMRange EmptyRange = None;
2790 StringRef Base = Op.getToken();
2792 // First, handle aliases that expand to multiple instructions.
2793 MatchFPUWaitAlias(IDLoc, Op, Operands, Out, MatchingInlineAsm);
2797 // Find one unsized memory operand, if present.
2798 X86Operand *UnsizedMemOp = nullptr;
2799 for (const auto &Op : Operands) {
2800 X86Operand *X86Op = static_cast<X86Operand *>(Op.get());
2801 if (X86Op->isMemUnsized()) {
2802 UnsizedMemOp = X86Op;
2803 // Have we found an unqualified memory operand,
2804 // break. IA allows only one memory operand.
2809 // Allow some instructions to have implicitly pointer-sized operands. This is
2810 // compatible with gas.
2812 static const char *const PtrSizedInstrs[] = {"call", "jmp", "push"};
2813 for (const char *Instr : PtrSizedInstrs) {
2814 if (Mnemonic == Instr) {
2815 UnsizedMemOp->Mem.Size = getPointerWidth();
2821 SmallVector<unsigned, 8> Match;
2822 uint64_t ErrorInfoMissingFeature = 0;
2824 // If unsized push has immediate operand we should default the default pointer
2825 // size for the size.
2826 if (Mnemonic == "push" && Operands.size() == 2) {
2827 auto *X86Op = static_cast<X86Operand *>(Operands[1].get());
2828 if (X86Op->isImm()) {
2829 // If it's not a constant fall through and let remainder take care of it.
2830 const auto *CE = dyn_cast<MCConstantExpr>(X86Op->getImm());
2831 unsigned Size = getPointerWidth();
2833 (isIntN(Size, CE->getValue()) || isUIntN(Size, CE->getValue()))) {
2834 SmallString<16> Tmp;
2836 Tmp += (is64BitMode())
2838 : (is32BitMode()) ? "l" : (is16BitMode()) ? "w" : " ";
2839 Op.setTokenValue(Tmp);
2840 // Do match in ATT mode to allow explicit suffix usage.
2841 Match.push_back(MatchInstruction(Operands, Inst, ErrorInfo,
2843 false /*isParsingIntelSyntax()*/));
2844 Op.setTokenValue(Base);
2849 // If an unsized memory operand is present, try to match with each memory
2850 // operand size. In Intel assembly, the size is not part of the instruction
2852 if (UnsizedMemOp && UnsizedMemOp->isMemUnsized()) {
2853 static const unsigned MopSizes[] = {8, 16, 32, 64, 80, 128, 256, 512};
2854 for (unsigned Size : MopSizes) {
2855 UnsizedMemOp->Mem.Size = Size;
2856 uint64_t ErrorInfoIgnore;
2857 unsigned LastOpcode = Inst.getOpcode();
2858 unsigned M = MatchInstruction(Operands, Inst, ErrorInfoIgnore,
2859 MatchingInlineAsm, isParsingIntelSyntax());
2860 if (Match.empty() || LastOpcode != Inst.getOpcode())
2863 // If this returned as a missing feature failure, remember that.
2864 if (Match.back() == Match_MissingFeature)
2865 ErrorInfoMissingFeature = ErrorInfoIgnore;
2868 // Restore the size of the unsized memory operand if we modified it.
2869 UnsizedMemOp->Mem.Size = 0;
2872 // If we haven't matched anything yet, this is not a basic integer or FPU
2873 // operation. There shouldn't be any ambiguity in our mnemonic table, so try
2874 // matching with the unsized operand.
2875 if (Match.empty()) {
2876 Match.push_back(MatchInstruction(
2877 Operands, Inst, ErrorInfo, MatchingInlineAsm, isParsingIntelSyntax()));
2878 // If this returned as a missing feature failure, remember that.
2879 if (Match.back() == Match_MissingFeature)
2880 ErrorInfoMissingFeature = ErrorInfo;
2883 // Restore the size of the unsized memory operand if we modified it.
2885 UnsizedMemOp->Mem.Size = 0;
2887 // If it's a bad mnemonic, all results will be the same.
2888 if (Match.back() == Match_MnemonicFail) {
2889 return Error(IDLoc, "invalid instruction mnemonic '" + Mnemonic + "'",
2890 Op.getLocRange(), MatchingInlineAsm);
2893 unsigned NumSuccessfulMatches =
2894 std::count(std::begin(Match), std::end(Match), Match_Success);
2896 // If matching was ambiguous and we had size information from the frontend,
2897 // try again with that. This handles cases like "movxz eax, m8/m16".
2898 if (UnsizedMemOp && NumSuccessfulMatches > 1 &&
2899 UnsizedMemOp->getMemFrontendSize()) {
2900 UnsizedMemOp->Mem.Size = UnsizedMemOp->getMemFrontendSize();
2901 unsigned M = MatchInstruction(
2902 Operands, Inst, ErrorInfo, MatchingInlineAsm, isParsingIntelSyntax());
2903 if (M == Match_Success)
2904 NumSuccessfulMatches = 1;
2906 // Add a rewrite that encodes the size information we used from the
2908 InstInfo->AsmRewrites->emplace_back(
2909 AOK_SizeDirective, UnsizedMemOp->getStartLoc(),
2910 /*Len=*/0, UnsizedMemOp->getMemFrontendSize());
2913 // If exactly one matched, then we treat that as a successful match (and the
2914 // instruction will already have been filled in correctly, since the failing
2915 // matches won't have modified it).
2916 if (NumSuccessfulMatches == 1) {
2917 // Some instructions need post-processing to, for example, tweak which
2918 // encoding is selected. Loop on it while changes happen so the individual
2919 // transformations can chain off each other.
2920 if (!MatchingInlineAsm)
2921 while (processInstruction(Inst, Operands))
2924 if (!MatchingInlineAsm)
2925 EmitInstruction(Inst, Operands, Out);
2926 Opcode = Inst.getOpcode();
2928 } else if (NumSuccessfulMatches > 1) {
2929 assert(UnsizedMemOp &&
2930 "multiple matches only possible with unsized memory operands");
2931 return Error(UnsizedMemOp->getStartLoc(),
2932 "ambiguous operand size for instruction '" + Mnemonic + "\'",
2933 UnsizedMemOp->getLocRange());
2936 // If one instruction matched with a missing feature, report this as a
2938 if (std::count(std::begin(Match), std::end(Match),
2939 Match_MissingFeature) == 1) {
2940 ErrorInfo = ErrorInfoMissingFeature;
2941 return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeature,
2945 // If one instruction matched with an invalid operand, report this as an
2947 if (std::count(std::begin(Match), std::end(Match),
2948 Match_InvalidOperand) == 1) {
2949 return Error(IDLoc, "invalid operand for instruction", EmptyRange,
2953 // If all of these were an outright failure, report it in a useless way.
2954 return Error(IDLoc, "unknown instruction mnemonic", EmptyRange,
2958 bool X86AsmParser::OmitRegisterFromClobberLists(unsigned RegNo) {
2959 return X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo);
2962 bool X86AsmParser::ParseDirective(AsmToken DirectiveID) {
2963 MCAsmParser &Parser = getParser();
2964 StringRef IDVal = DirectiveID.getIdentifier();
2965 if (IDVal == ".word")
2966 return ParseDirectiveWord(2, DirectiveID.getLoc());
2967 else if (IDVal.startswith(".code"))
2968 return ParseDirectiveCode(IDVal, DirectiveID.getLoc());
2969 else if (IDVal.startswith(".att_syntax")) {
2970 getParser().setParsingInlineAsm(false);
2971 if (getLexer().isNot(AsmToken::EndOfStatement)) {
2972 if (Parser.getTok().getString() == "prefix")
2974 else if (Parser.getTok().getString() == "noprefix")
2975 return Error(DirectiveID.getLoc(), "'.att_syntax noprefix' is not "
2976 "supported: registers must have a "
2977 "'%' prefix in .att_syntax");
2979 getParser().setAssemblerDialect(0);
2981 } else if (IDVal.startswith(".intel_syntax")) {
2982 getParser().setAssemblerDialect(1);
2983 getParser().setParsingInlineAsm(true);
2984 if (getLexer().isNot(AsmToken::EndOfStatement)) {
2985 if (Parser.getTok().getString() == "noprefix")
2987 else if (Parser.getTok().getString() == "prefix")
2988 return Error(DirectiveID.getLoc(), "'.intel_syntax prefix' is not "
2989 "supported: registers must not have "
2990 "a '%' prefix in .intel_syntax");
2993 } else if (IDVal == ".even")
2994 return parseDirectiveEven(DirectiveID.getLoc());
2998 /// parseDirectiveEven
3000 bool X86AsmParser::parseDirectiveEven(SMLoc L) {
3001 if (getLexer().isNot(AsmToken::EndOfStatement)) {
3002 TokError("unexpected token in directive");
3005 const MCSection *Section = getStreamer().getCurrentSectionOnly();
3007 getStreamer().InitSections(false);
3008 Section = getStreamer().getCurrentSectionOnly();
3010 if (Section->UseCodeAlign())
3011 getStreamer().EmitCodeAlignment(2, 0);
3013 getStreamer().EmitValueToAlignment(2, 0, 1, 0);
3016 /// ParseDirectiveWord
3017 /// ::= .word [ expression (, expression)* ]
3018 bool X86AsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
3019 MCAsmParser &Parser = getParser();
3020 if (getLexer().isNot(AsmToken::EndOfStatement)) {
3022 const MCExpr *Value;
3023 SMLoc ExprLoc = getLexer().getLoc();
3024 if (getParser().parseExpression(Value))
3027 if (const auto *MCE = dyn_cast<MCConstantExpr>(Value)) {
3028 assert(Size <= 8 && "Invalid size");
3029 uint64_t IntValue = MCE->getValue();
3030 if (!isUIntN(8 * Size, IntValue) && !isIntN(8 * Size, IntValue))
3031 return Error(ExprLoc, "literal value out of range for directive");
3032 getStreamer().EmitIntValue(IntValue, Size);
3034 getStreamer().EmitValue(Value, Size, ExprLoc);
3037 if (getLexer().is(AsmToken::EndOfStatement))
3040 // FIXME: Improve diagnostic.
3041 if (getLexer().isNot(AsmToken::Comma)) {
3042 Error(L, "unexpected token in directive");
3053 /// ParseDirectiveCode
3054 /// ::= .code16 | .code32 | .code64
3055 bool X86AsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) {
3056 MCAsmParser &Parser = getParser();
3058 if (IDVal == ".code16") {
3060 if (!is16BitMode()) {
3061 SwitchMode(X86::Mode16Bit);
3062 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16);
3064 } else if (IDVal == ".code16gcc") {
3065 // .code16gcc parses as if in 32-bit mode, but emits code in 16-bit mode.
3068 if (!is16BitMode()) {
3069 SwitchMode(X86::Mode16Bit);
3070 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16);
3072 } else if (IDVal == ".code32") {
3074 if (!is32BitMode()) {
3075 SwitchMode(X86::Mode32Bit);
3076 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32);
3078 } else if (IDVal == ".code64") {
3080 if (!is64BitMode()) {
3081 SwitchMode(X86::Mode64Bit);
3082 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code64);
3085 Error(L, "unknown directive " + IDVal);
3092 // Force static initialization.
3093 extern "C" void LLVMInitializeX86AsmParser() {
3094 RegisterMCAsmParser<X86AsmParser> X(getTheX86_32Target());
3095 RegisterMCAsmParser<X86AsmParser> Y(getTheX86_64Target());
3098 #define GET_REGISTER_MATCHER
3099 #define GET_MATCHER_IMPLEMENTATION
3100 #define GET_SUBTARGET_FEATURE_NAME
3101 #include "X86GenAsmMatcher.inc"