From a59e8779964992457ada1af6a5f48068523cfd42 Mon Sep 17 00:00:00 2001 From: Chris Lattner Date: Sun, 21 Jun 2009 07:19:10 +0000 Subject: [PATCH] some baby steps. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@73848 91177308-0d34-0410-b5e6-96231b3b80d8 --- tools/llvm-mc/AsmLexer.cpp | 87 +++++++++++++++++++++++++++++++++++++++++++ tools/llvm-mc/AsmLexer.h | 92 ++++++++++++++++++++++++++++++++++++++++++++++ tools/llvm-mc/llvm-mc.cpp | 24 +++++++++++- 3 files changed, 201 insertions(+), 2 deletions(-) create mode 100644 tools/llvm-mc/AsmLexer.cpp create mode 100644 tools/llvm-mc/AsmLexer.h diff --git a/tools/llvm-mc/AsmLexer.cpp b/tools/llvm-mc/AsmLexer.cpp new file mode 100644 index 00000000000..da86465d7fe --- /dev/null +++ b/tools/llvm-mc/AsmLexer.cpp @@ -0,0 +1,87 @@ +//===- AsmLexer.cpp - Lexer for Assembly Files ----------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class implements the lexer for assembly files. +// +//===----------------------------------------------------------------------===// + +#include "AsmLexer.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/MemoryBuffer.h" +using namespace llvm; + +AsmLexer::AsmLexer(SourceMgr &SM) : SrcMgr(SM) { + CurBuffer = 0; + CurBuf = SrcMgr.getMemoryBuffer(CurBuffer); + CurPtr = CurBuf->getBufferStart(); + TokStart = 0; +} + +void AsmLexer::PrintError(const char *Loc, const std::string &Msg) const { + SrcMgr.PrintError(SMLoc::getFromPointer(Loc), Msg); +} + +void AsmLexer::PrintError(SMLoc Loc, const std::string &Msg) const { + SrcMgr.PrintError(Loc, Msg); +} + +int AsmLexer::getNextChar() { + char CurChar = *CurPtr++; + switch (CurChar) { + default: + return (unsigned char)CurChar; + case 0: { + // A nul character in the stream is either the end of the current buffer or + // a random nul in the file. Disambiguate that here. + if (CurPtr-1 != CurBuf->getBufferEnd()) + return 0; // Just whitespace. + + // If this is the end of an included file, pop the parent file off the + // include stack. + SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer); + if (ParentIncludeLoc != SMLoc()) { + CurBuffer = SrcMgr.FindBufferContainingLoc(ParentIncludeLoc); + CurBuf = SrcMgr.getMemoryBuffer(CurBuffer); + CurPtr = ParentIncludeLoc.getPointer(); + return getNextChar(); + } + + // Otherwise, return end of file. + --CurPtr; // Another call to lex will return EOF again. + return EOF; + } + } +} + +asmtok::TokKind AsmLexer::LexToken() { + TokStart = CurPtr; + // This always consumes at least one character. + int CurChar = getNextChar(); + + switch (CurChar) { + default: + // Handle letters: [a-zA-Z_] +// if (isalpha(CurChar) || CurChar == '_' || CurChar == '#') +// return LexIdentifier(); + + // Unknown character, emit an error. + return asmtok::Error; + case EOF: return asmtok::Eof; + case 0: + case ' ': + case '\t': + case '\n': + case '\r': + // Ignore whitespace. + return LexToken(); + case ':': return asmtok::Colon; + case '+': return asmtok::Plus; + case '-': return asmtok::Minus; + } +} \ No newline at end of file diff --git a/tools/llvm-mc/AsmLexer.h b/tools/llvm-mc/AsmLexer.h new file mode 100644 index 00000000000..08e6f9c6eeb --- /dev/null +++ b/tools/llvm-mc/AsmLexer.h @@ -0,0 +1,92 @@ +//===- AsmLexer.h - Lexer for Assembly Files --------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class declares the lexer for assembly files. +// +//===----------------------------------------------------------------------===// + +#ifndef ASMLEXER_H +#define ASMLEXER_H + +#include "llvm/Support/DataTypes.h" +#include +#include + +namespace llvm { +class MemoryBuffer; +class SourceMgr; +class SMLoc; + +namespace asmtok { + enum TokKind { + // Markers + Eof, Error, + + Identifier, + IntVal, + + + Colon, + Plus, + Minus + }; +} + +/// AsmLexer - Lexer class for assembly files. +class AsmLexer { + SourceMgr &SrcMgr; + + const char *CurPtr; + const MemoryBuffer *CurBuf; + + // Information about the current token. + const char *TokStart; + asmtok::TokKind CurKind; + std::string CurStrVal; // This is valid for Identifier. + int64_t CurIntVal; + + /// CurBuffer - This is the current buffer index we're lexing from as managed + /// by the SourceMgr object. + int CurBuffer; + +public: + AsmLexer(SourceMgr &SrcMgr); + ~AsmLexer() {} + + asmtok::TokKind Lex() { + return CurKind = LexToken(); + } + + asmtok::TokKind getKind() const { return CurKind; } + + const std::string &getCurStrVal() const { + assert(CurKind == asmtok::Identifier && + "This token doesn't have a string value"); + return CurStrVal; + } + int64_t getCurIntVal() const { + assert(CurKind == asmtok::IntVal && "This token isn't an integer"); + return CurIntVal; + } + + SMLoc getLoc() const; + + void PrintError(const char *Loc, const std::string &Msg) const; + void PrintError(SMLoc Loc, const std::string &Msg) const; + +private: + int getNextChar(); + + /// LexToken - Read the next token and return its code. + asmtok::TokKind LexToken(); +}; + +} // end namespace llvm + +#endif diff --git a/tools/llvm-mc/llvm-mc.cpp b/tools/llvm-mc/llvm-mc.cpp index 74e419ccfca..83642988e37 100644 --- a/tools/llvm-mc/llvm-mc.cpp +++ b/tools/llvm-mc/llvm-mc.cpp @@ -19,6 +19,7 @@ #include "llvm/Support/SourceMgr.h" #include "llvm/Support/raw_ostream.h" #include "llvm/System/Signals.h" +#include "AsmLexer.h" using namespace llvm; static cl::opt @@ -63,10 +64,29 @@ static int AssembleInput(const char *ProgName) { // Record the location of the include directories so that the lexer can find // it later. SrcMgr.setIncludeDirs(IncludeDirs); + + - //TGParser Parser(SrcMgr); - //return Parser.ParseFile(); + AsmLexer Lexer(SrcMgr); + asmtok::TokKind Tok = Lexer.Lex(); + while (Tok != asmtok::Eof) { + switch (Tok) { + default: outs() << "<>\n"; break; + case asmtok::Error: outs() << "<>\n"; break; + case asmtok::Identifier: + outs() << "identifier: " << Lexer.getCurStrVal() << '\n'; + break; + case asmtok::IntVal: + outs() << "int: " << Lexer.getCurIntVal() << '\n'; + break; + case asmtok::Colon: outs() << "Colon\n"; break; + case asmtok::Plus: outs() << "Plus\n"; break; + case asmtok::Minus: outs() << "Minus\n"; break; + } + + Tok = Lexer.Lex(); + } return 1; } -- 2.11.0