OSDN Git Service

[ORC] Add support for multithreaded compiles to LLJIT and LLLazyJIT.
authorLang Hames <lhames@gmail.com>
Wed, 26 Sep 2018 02:39:42 +0000 (02:39 +0000)
committerLang Hames <lhames@gmail.com>
Wed, 26 Sep 2018 02:39:42 +0000 (02:39 +0000)
LLJIT and LLLazyJIT can now be constructed with an optional NumCompileThreads
arguments. If this is non-zero then a thread-pool will be created with the
given number of threads, and compile tasks will be dispatched to the thread
pool.

To enable testing of this feature, two new flags are added to lli:

(1) -compile-threads=N (N = 0 by default) controls the number of compile threads
to use.

(2) -thread-entry can be used to execute code on additional threads. For each
-thread-entry argument supplied (multiple are allowed) a new thread will be
created and the given symbol called. These additional thread entry points are
called after static constructors are run, but before main.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@343058 91177308-0d34-0410-b5e6-96231b3b80d8

include/llvm/ExecutionEngine/Orc/ExecutionUtils.h
include/llvm/ExecutionEngine/Orc/LLJIT.h
lib/ExecutionEngine/Orc/LLJIT.cpp
test/ExecutionEngine/OrcLazy/multiple-compile-threads-basic.ll [new file with mode: 0644]
tools/lli/lli.cpp

index f56b403..6a97316 100644 (file)
@@ -67,6 +67,9 @@ public:
   SubtargetFeatures &getFeatures() { return Features; }
   TargetOptions &getOptions() { return Options; }
 
+  Triple& getTargetTriple() { return TT; }
+  const Triple& getTargetTriple() const { return TT; }
+
 private:
   Triple TT;
   std::string Arch;
index 57b991f..4c840da 100644 (file)
@@ -22,7 +22,7 @@
 #include "llvm/ExecutionEngine/Orc/ObjectTransformLayer.h"
 #include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h"
 #include "llvm/ExecutionEngine/Orc/ThreadSafeModule.h"
-#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/ThreadPool.h"
 
 namespace llvm {
 namespace orc {
@@ -30,11 +30,19 @@ namespace orc {
 /// A pre-fabricated ORC JIT stack that can serve as an alternative to MCJIT.
 class LLJIT {
 public:
+
+  /// Destruct this instance. If a multi-threaded instance, waits for all
+  /// compile threads to complete.
+  ~LLJIT();
+
   /// Create an LLJIT instance.
+  /// If NumCompileThreads is not equal to zero, creates a multi-threaded
+  /// LLJIT with the given number of compile threads.
   static Expected<std::unique_ptr<LLJIT>>
-  Create(std::unique_ptr<TargetMachine> TM, DataLayout DL);
+  Create(JITTargetMachineBuilder JTMB, DataLayout DL,
+         unsigned NumCompileThreads = 0);
 
-  /// Returns a reference to the ExecutionSession for this JIT instance.
+  /// Returns the ExecutionSession for this instance.
   ExecutionSession &getExecutionSession() { return *ES; }
 
   /// Returns a reference to the JITDylib representing the JIT'd main program.
@@ -91,9 +99,15 @@ public:
   RTDyldObjectLinkingLayer2 &getObjLinkingLayer() { return ObjLinkingLayer; }
 
 protected:
+
+  /// Create an LLJIT instance with a single compile thread.
   LLJIT(std::unique_ptr<ExecutionSession> ES, std::unique_ptr<TargetMachine> TM,
         DataLayout DL);
 
+  /// Create an LLJIT instance with multiple compile threads.
+  LLJIT(std::unique_ptr<ExecutionSession> ES, JITTargetMachineBuilder JTMB,
+        DataLayout DL, unsigned NumCompileThreads);
+
   std::unique_ptr<RuntimeDyld::MemoryManager> getMemoryManager(VModuleKey K);
 
   std::string mangle(StringRef UnmangledName);
@@ -105,8 +119,8 @@ protected:
   std::unique_ptr<ExecutionSession> ES;
   JITDylib &Main;
 
-  std::unique_ptr<TargetMachine> TM;
   DataLayout DL;
+  std::unique_ptr<ThreadPool> CompileThreads;
 
   RTDyldObjectLinkingLayer2 ObjLinkingLayer;
   IRCompileLayer2 CompileLayer;
@@ -118,9 +132,13 @@ protected:
 /// compilation of LLVM IR.
 class LLLazyJIT : public LLJIT {
 public:
+
   /// Create an LLLazyJIT instance.
+  /// If NumCompileThreads is not equal to zero, creates a multi-threaded
+  /// LLLazyJIT with the given number of compile threads.
   static Expected<std::unique_ptr<LLLazyJIT>>
-  Create(std::unique_ptr<TargetMachine> TM, DataLayout DL);
+  Create(JITTargetMachineBuilder JTMB, DataLayout DL,
+         unsigned NumCompileThreads = 0);
 
   /// Set an IR transform (e.g. pass manager pipeline) to run on each function
   /// when it is compiled.
@@ -137,11 +155,20 @@ public:
   }
 
 private:
+
+  // Create a single-threaded LLLazyJIT instance.
   LLLazyJIT(std::unique_ptr<ExecutionSession> ES,
             std::unique_ptr<TargetMachine> TM, DataLayout DL,
             std::unique_ptr<JITCompileCallbackManager> CCMgr,
             std::function<std::unique_ptr<IndirectStubsManager>()> ISMBuilder);
 
+  // Create a multi-threaded LLLazyJIT instance.
+  LLLazyJIT(std::unique_ptr<ExecutionSession> ES,
+            JITTargetMachineBuilder JTMB, DataLayout DL,
+            unsigned NumCompileThreads,
+            std::unique_ptr<JITCompileCallbackManager> CCMgr,
+            std::function<std::unique_ptr<IndirectStubsManager>()> ISMBuilder);
+
   std::unique_ptr<JITCompileCallbackManager> CCMgr;
   std::function<std::unique_ptr<IndirectStubsManager>()> ISMBuilder;
 
index c79c47a..ecdfd86 100644 (file)
 #include "llvm/ExecutionEngine/SectionMemoryManager.h"
 #include "llvm/IR/Mangler.h"
 
+namespace {
+
+  // A SimpleCompiler that owns its TargetMachine.
+  class TMOwningSimpleCompiler : public llvm::orc::SimpleCompiler {
+  public:
+    TMOwningSimpleCompiler(std::unique_ptr<llvm::TargetMachine> TM)
+      : llvm::orc::SimpleCompiler(*TM), TM(std::move(TM)) {}
+  private:
+    // FIXME: shared because std::functions (and thus
+    // IRCompileLayer2::CompileFunction) are not moveable.
+    std::shared_ptr<llvm::TargetMachine> TM;
+  };
+
+} // end anonymous namespace
+
 namespace llvm {
 namespace orc {
 
+LLJIT::~LLJIT() {
+  if (CompileThreads)
+    CompileThreads->wait();
+}
+
 Expected<std::unique_ptr<LLJIT>>
-LLJIT::Create(std::unique_ptr<TargetMachine> TM, DataLayout DL) {
+LLJIT::Create(JITTargetMachineBuilder JTMB, DataLayout DL,
+              unsigned NumCompileThreads) {
+
+  if (NumCompileThreads == 0) {
+    // If NumCompileThreads == 0 then create a single-threaded LLJIT instance.
+    auto TM = JTMB.createTargetMachine();
+    if (!TM)
+      return TM.takeError();
+    return std::unique_ptr<LLJIT>(new LLJIT(llvm::make_unique<ExecutionSession>(),
+                                            std::move(*TM), std::move(DL)));
+  }
+
   return std::unique_ptr<LLJIT>(new LLJIT(llvm::make_unique<ExecutionSession>(),
-                                          std::move(TM), std::move(DL)));
+                                          std::move(JTMB), std::move(DL),
+                                          NumCompileThreads));
 }
 
 Error LLJIT::defineAbsolute(StringRef Name, JITEvaluatedSymbol Sym) {
@@ -52,12 +84,35 @@ Expected<JITEvaluatedSymbol> LLJIT::lookupLinkerMangled(JITDylib &JD,
 LLJIT::LLJIT(std::unique_ptr<ExecutionSession> ES,
              std::unique_ptr<TargetMachine> TM, DataLayout DL)
     : ES(std::move(ES)), Main(this->ES->createJITDylib("main")),
-      TM(std::move(TM)), DL(std::move(DL)),
+      DL(std::move(DL)),
       ObjLinkingLayer(*this->ES,
                       [this](VModuleKey K) { return getMemoryManager(K); }),
-      CompileLayer(*this->ES, ObjLinkingLayer, SimpleCompiler(*this->TM)),
+      CompileLayer(*this->ES, ObjLinkingLayer, TMOwningSimpleCompiler(std::move(TM))),
       CtorRunner(Main), DtorRunner(Main) {}
 
+LLJIT::LLJIT(std::unique_ptr<ExecutionSession> ES,
+             JITTargetMachineBuilder JTMB, DataLayout DL,
+             unsigned NumCompileThreads)
+    : ES(std::move(ES)), Main(this->ES->createJITDylib("main")),
+      DL(std::move(DL)),
+      ObjLinkingLayer(*this->ES,
+                      [this](VModuleKey K) { return getMemoryManager(K); }),
+      CompileLayer(*this->ES, ObjLinkingLayer, MultiThreadedSimpleCompiler(std::move(JTMB))),
+      CtorRunner(Main), DtorRunner(Main) {
+  assert(NumCompileThreads != 0 &&
+         "Multithreaded LLJIT instance can not be created with 0 threads");
+
+  CompileThreads = llvm::make_unique<ThreadPool>(NumCompileThreads);
+  this->ES->setDispatchMaterialization([this](JITDylib &JD, std::unique_ptr<MaterializationUnit> MU) {
+      // FIXME: Switch to move capture once we have c++14.
+      auto SharedMU = std::shared_ptr<MaterializationUnit>(std::move(MU));
+      auto Work = [SharedMU, &JD]() {
+        SharedMU->doMaterialize(JD);
+      };
+      CompileThreads->async(std::move(Work));
+    });
+}
+
 std::unique_ptr<RuntimeDyld::MemoryManager>
 LLJIT::getMemoryManager(VModuleKey K) {
   return llvm::make_unique<SectionMemoryManager>();
@@ -90,10 +145,11 @@ void LLJIT::recordCtorDtors(Module &M) {
 }
 
 Expected<std::unique_ptr<LLLazyJIT>>
-LLLazyJIT::Create(std::unique_ptr<TargetMachine> TM, DataLayout DL) {
+  LLLazyJIT::Create(JITTargetMachineBuilder JTMB, DataLayout DL,
+                    unsigned NumCompileThreads) {
   auto ES = llvm::make_unique<ExecutionSession>();
 
-  const Triple &TT = TM->getTargetTriple();
+  const Triple &TT = JTMB.getTargetTriple();
 
   auto CCMgr = createLocalCompileCallbackManager(TT, *ES, 0);
   if (!CCMgr)
@@ -107,9 +163,18 @@ LLLazyJIT::Create(std::unique_ptr<TargetMachine> TM, DataLayout DL) {
         std::string("No indirect stubs manager builder for ") + TT.str(),
         inconvertibleErrorCode());
 
-  return std::unique_ptr<LLLazyJIT>(
-      new LLLazyJIT(std::move(ES), std::move(TM), std::move(DL),
+  if (NumCompileThreads == 0) {
+    auto TM = JTMB.createTargetMachine();
+    if (!TM)
+      return TM.takeError();
+    return std::unique_ptr<LLLazyJIT>(
+      new LLLazyJIT(std::move(ES), std::move(*TM), std::move(DL),
                     std::move(CCMgr), std::move(ISMBuilder)));
+  }
+
+  return std::unique_ptr<LLLazyJIT>(
+      new LLLazyJIT(std::move(ES), std::move(JTMB), std::move(DL),
+                    NumCompileThreads, std::move(CCMgr), std::move(ISMBuilder)));
 }
 
 Error LLLazyJIT::addLazyIRModule(JITDylib &JD, ThreadSafeModule TSM) {
@@ -135,5 +200,14 @@ LLLazyJIT::LLLazyJIT(
       CODLayer(*this->ES, TransformLayer, *this->CCMgr, std::move(ISMBuilder)) {
 }
 
+LLLazyJIT::LLLazyJIT(
+    std::unique_ptr<ExecutionSession> ES, JITTargetMachineBuilder JTMB,
+    DataLayout DL, unsigned NumCompileThreads, std::unique_ptr<JITCompileCallbackManager> CCMgr,
+    std::function<std::unique_ptr<IndirectStubsManager>()> ISMBuilder)
+    : LLJIT(std::move(ES), std::move(JTMB), std::move(DL), NumCompileThreads),
+      CCMgr(std::move(CCMgr)), TransformLayer(*this->ES, CompileLayer),
+      CODLayer(*this->ES, TransformLayer, *this->CCMgr, std::move(ISMBuilder)) {
+}
+
 } // End namespace orc.
 } // End namespace llvm.
diff --git a/test/ExecutionEngine/OrcLazy/multiple-compile-threads-basic.ll b/test/ExecutionEngine/OrcLazy/multiple-compile-threads-basic.ll
new file mode 100644 (file)
index 0000000..a53f23b
--- /dev/null
@@ -0,0 +1,18 @@
+; RUN: lli -jit-kind=orc-lazy -compile-threads=5 -thread-entry hello %s | FileCheck %s
+;
+; CHECK: Hello
+
+@.str = private unnamed_addr constant [7 x i8] c"Hello\0A\00", align 1
+
+define void @hello() {
+entry:
+  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str, i32 0, i32 0))
+  ret void
+}
+
+declare i32 @printf(i8*, ...)
+
+define i32 @main(i32 %argc, i8** %argv) {
+entry:
+  ret i32 0
+}
index 2312d77..49d5db1 100644 (file)
@@ -97,6 +97,17 @@ namespace {
                                            "orc-lazy",
                                            "Orc-based lazy JIT.")));
 
+  cl::opt<unsigned>
+  LazyJITCompileThreads("compile-threads",
+                        cl::desc("Choose the number of compile threads "
+                                 "(jit-kind=orc-lazy only)"),
+                        cl::init(0));
+
+  cl::list<std::string>
+  ThreadEntryPoints("thread-entry",
+                    cl::desc("calls the given entry-point on a new thread "
+                             "(jit-kind=orc-lazy only)"));
+
   // The MCJIT supports building for a target address space separate from
   // the JIT compilation process. Use a forked process and a copying
   // memory manager with IPC to execute using this functionality.
@@ -363,6 +374,19 @@ int main(int argc, char **argv, char * const *envp) {
 
   if (UseJITKind == JITKind::OrcLazy)
     return runOrcLazyJIT(argv[0]);
+  else {
+    // Make sure nobody used an orc-lazy specific option accidentally.
+
+    if (LazyJITCompileThreads != 0) {
+      errs() << "-compile-threads requires -jit-kind=orc-lazy\n";
+      exit(1);
+    }
+
+    if (!ThreadEntryPoints.empty()) {
+      errs() << "-thread-entry requires -jit-kind=orc-lazy\n";
+      exit(1);
+    }
+  }
 
   LLVMContext Context;
 
@@ -745,11 +769,11 @@ int runOrcLazyJIT(const char *ProgName) {
     reportError(Err, ProgName);
 
   const auto &TT = MainModule.getModule()->getTargetTriple();
-  orc::JITTargetMachineBuilder TMD =
+  orc::JITTargetMachineBuilder JTMB =
       TT.empty() ? ExitOnErr(orc::JITTargetMachineBuilder::detectHost())
                  : orc::JITTargetMachineBuilder(Triple(TT));
 
-  TMD.setArch(MArch)
+  JTMB.setArch(MArch)
       .setCPU(getCPUStr())
       .addFeatures(getFeatureList())
       .setRelocationModel(RelocModel.getNumOccurrences()
@@ -758,9 +782,13 @@ int runOrcLazyJIT(const char *ProgName) {
       .setCodeModel(CMModel.getNumOccurrences()
                         ? Optional<CodeModel::Model>(CMModel)
                         : None);
-  auto TM = ExitOnErr(TMD.createTargetMachine());
-  auto DL = TM->createDataLayout();
-  auto J = ExitOnErr(orc::LLLazyJIT::Create(std::move(TM), DL));
+  DataLayout DL("");
+  {
+    // Create a throwaway TargetMachine to get the data layout.
+    auto TM = ExitOnErr(JTMB.createTargetMachine());
+    DL = TM->createDataLayout();
+  }
+  auto J = ExitOnErr(orc::LLLazyJIT::Create(std::move(JTMB), DL, LazyJITCompileThreads));
 
   auto Dump = createDebugDumper();
 
@@ -807,6 +835,16 @@ int runOrcLazyJIT(const char *ProgName) {
   // Run any static constructors.
   ExitOnErr(J->runConstructors());
 
+  // Run any -thread-entry points.
+  std::vector<std::thread> AltEntryThreads;
+  for (auto &ThreadEntryPoint : ThreadEntryPoints) {
+    auto EntryPointSym = ExitOnErr(J->lookup(ThreadEntryPoint));
+    typedef void (*EntryPointPtr)();
+    auto EntryPoint =
+      reinterpret_cast<EntryPointPtr>(static_cast<uintptr_t>(EntryPointSym.getAddress()));
+    AltEntryThreads.push_back(std::thread([EntryPoint]() { EntryPoint(); }));
+  }
+
   // Run main.
   auto MainSym = ExitOnErr(J->lookup("main"));
   typedef int (*MainFnPtr)(int, const char *[]);
@@ -817,8 +855,12 @@ int runOrcLazyJIT(const char *ProgName) {
       reinterpret_cast<MainFnPtr>(static_cast<uintptr_t>(MainSym.getAddress()));
   auto Result = Main(ArgV.size(), (const char **)ArgV.data());
 
-  ExitOnErr(J->runDestructors());
+  // Wait for -entry-point threads.
+  for (auto &AltEntryThread : AltEntryThreads)
+    AltEntryThread.join();
 
+  // Run destructors.
+  ExitOnErr(J->runDestructors());
   CXXRuntimeOverrides.runDestructors();
 
   return Result;