//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_MC_MCASMLEXER_H
-#define LLVM_MC_MCASMLEXER_H
+#ifndef LLVM_MC_MCPARSER_MCASMLEXER_H
+#define LLVM_MC_MCPARSER_MCASMLEXER_H
+#include "llvm/ADT/APInt.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/DataTypes.h"
#include "llvm/Support/SMLoc.h"
namespace llvm {
-class MCAsmLexer;
-class MCInst;
-class Target;
-/// AsmToken - Target independent representation for an assembler token.
+/// Target independent representation for an assembler token.
class AsmToken {
public:
enum TokenKind {
// Integer values.
Integer,
+ BigNum, // larger than 64 bits
// Real values.
Real,
- // Register values (stored in IntVal). Only used by MCTargetAsmLexer.
- Register,
-
// No-value.
EndOfStatement,
Colon,
+ Space,
Plus, Minus, Tilde,
Slash, // '/'
BackSlash, // '\'
Greater, GreaterEqual, GreaterGreater, At
};
+private:
TokenKind Kind;
/// A reference to the entire token contents; this is always a pointer into
/// a memory buffer owned by the source manager.
StringRef Str;
- int64_t IntVal;
+ APInt IntVal;
public:
AsmToken() {}
- AsmToken(TokenKind _Kind, StringRef _Str, int64_t _IntVal = 0)
- : Kind(_Kind), Str(_Str), IntVal(_IntVal) {}
+ AsmToken(TokenKind Kind, StringRef Str, APInt IntVal)
+ : Kind(Kind), Str(Str), IntVal(IntVal) {}
+ AsmToken(TokenKind Kind, StringRef Str, int64_t IntVal = 0)
+ : Kind(Kind), Str(Str), IntVal(64, IntVal, true) {}
TokenKind getKind() const { return Kind; }
bool is(TokenKind K) const { return Kind == K; }
bool isNot(TokenKind K) const { return Kind != K; }
SMLoc getLoc() const;
+ SMLoc getEndLoc() const;
+ SMRange getLocRange() const;
- /// getStringContents - Get the contents of a string token (without quotes).
+ /// Get the contents of a string token (without quotes).
StringRef getStringContents() const {
assert(Kind == String && "This token isn't a string!");
return Str.slice(1, Str.size() - 1);
}
- /// getIdentifier - Get the identifier string for the current token, which
- /// should be an identifier or a string. This gets the portion of the string
- /// which should be used as the identifier, e.g., it does not include the
- /// quotes on strings.
+ /// Get the identifier string for the current token, which should be an
+ /// identifier or a string. This gets the portion of the string which should
+ /// be used as the identifier, e.g., it does not include the quotes on
+ /// strings.
StringRef getIdentifier() const {
if (Kind == Identifier)
return getString();
return getStringContents();
}
- /// getString - Get the string for the current token, this includes all
- /// characters (for example, the quotes on strings) in the token.
+ /// Get the string for the current token, this includes all characters (for
+ /// example, the quotes on strings) in the token.
///
/// The returned StringRef points into the source manager's memory buffer, and
/// is safe to store across calls to Lex().
// as a single token, then diagnose as an invalid number).
int64_t getIntVal() const {
assert(Kind == Integer && "This token isn't an integer!");
- return IntVal;
+ return IntVal.getZExtValue();
}
- /// getRegVal - Get the register number for the current token, which should
- /// be a register.
- unsigned getRegVal() const {
- assert(Kind == Register && "This token isn't a register!");
- return static_cast<unsigned>(IntVal);
+ APInt getAPIntVal() const {
+ assert((Kind == Integer || Kind == BigNum) &&
+ "This token isn't an integer!");
+ return IntVal;
}
};
-/// MCAsmLexer - Generic assembler lexer interface, for use by target specific
-/// assembly lexers.
+/// Generic assembler lexer interface, for use by target specific assembly
+/// lexers.
class MCAsmLexer {
/// The current token, stored in the base class for faster access.
- AsmToken CurTok;
+ SmallVector<AsmToken, 1> CurTok;
/// The location and description of the current error
SMLoc ErrLoc;
std::string Err;
- MCAsmLexer(const MCAsmLexer &); // DO NOT IMPLEMENT
- void operator=(const MCAsmLexer &); // DO NOT IMPLEMENT
+ MCAsmLexer(const MCAsmLexer &) = delete;
+ void operator=(const MCAsmLexer &) = delete;
protected: // Can only create subclasses.
const char *TokStart;
+ bool SkipSpace;
+ bool AllowAtInIdentifier;
MCAsmLexer();
virtual AsmToken LexToken() = 0;
- void SetError(const SMLoc &errLoc, const std::string &err) {
+ void SetError(SMLoc errLoc, const std::string &err) {
ErrLoc = errLoc;
Err = err;
}
public:
virtual ~MCAsmLexer();
- /// Lex - Consume the next token from the input stream and return it.
+ /// Consume the next token from the input stream and return it.
///
/// The lexer will continuosly return the end-of-file token once the end of
/// the main input file has been reached.
const AsmToken &Lex() {
- return CurTok = LexToken();
+ assert(!CurTok.empty());
+ CurTok.erase(CurTok.begin());
+ if (CurTok.empty())
+ CurTok.emplace_back(LexToken());
+ return CurTok.front();
+ }
+
+ void UnLex(AsmToken const &Token) {
+ CurTok.insert(CurTok.begin(), Token);
}
virtual StringRef LexUntilEndOfStatement() = 0;
- /// getLoc - Get the current source location.
+ /// Get the current source location.
SMLoc getLoc() const;
- /// getTok - Get the current (last) lexed token.
- const AsmToken &getTok() {
- return CurTok;
+ /// Get the current (last) lexed token.
+ const AsmToken &getTok() const {
+ return CurTok[0];
}
- /// getErrLoc - Get the current error location
- const SMLoc &getErrLoc() {
+ /// Look ahead at the next token to be lexed.
+ const AsmToken peekTok(bool ShouldSkipSpace = true) {
+ AsmToken Tok;
+
+ MutableArrayRef<AsmToken> Buf(Tok);
+ size_t ReadCount = peekTokens(Buf, ShouldSkipSpace);
+
+ assert(ReadCount == 1);
+ (void)ReadCount;
+
+ return Tok;
+ }
+
+ /// Look ahead an arbitrary number of tokens.
+ virtual size_t peekTokens(MutableArrayRef<AsmToken> Buf,
+ bool ShouldSkipSpace = true) = 0;
+
+ /// Get the current error location
+ SMLoc getErrLoc() {
return ErrLoc;
}
- /// getErr - Get the current error string
+ /// Get the current error string
const std::string &getErr() {
return Err;
}
- /// getKind - Get the kind of current token.
- AsmToken::TokenKind getKind() const { return CurTok.getKind(); }
+ /// Get the kind of current token.
+ AsmToken::TokenKind getKind() const { return getTok().getKind(); }
+
+ /// Check if the current token has kind \p K.
+ bool is(AsmToken::TokenKind K) const { return getTok().is(K); }
+
+ /// Check if the current token has kind \p K.
+ bool isNot(AsmToken::TokenKind K) const { return getTok().isNot(K); }
- /// is - Check if the current token has kind \arg K.
- bool is(AsmToken::TokenKind K) const { return CurTok.is(K); }
+ /// Set whether spaces should be ignored by the lexer
+ void setSkipSpace(bool val) { SkipSpace = val; }
- /// isNot - Check if the current token has kind \arg K.
- bool isNot(AsmToken::TokenKind K) const { return CurTok.isNot(K); }
+ bool getAllowAtInIdentifier() { return AllowAtInIdentifier; }
+ void setAllowAtInIdentifier(bool v) { AllowAtInIdentifier = v; }
};
} // End llvm namespace