From 40466cc758823d3e50691dfd8c39e0f9c2cc08a1 Mon Sep 17 00:00:00 2001 From: Justin Holewinski Date: Thu, 22 Sep 2011 16:45:37 +0000 Subject: [PATCH] PTX: Customize codegen passes in backend git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@140306 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PTX/PTXTargetMachine.cpp | 285 ++++++++++++++++++++++++++++++++++++ lib/Target/PTX/PTXTargetMachine.h | 29 ++++ 2 files changed, 314 insertions(+) diff --git a/lib/Target/PTX/PTXTargetMachine.cpp b/lib/Target/PTX/PTXTargetMachine.cpp index 396010234fe..1e384ad0336 100644 --- a/lib/Target/PTX/PTXTargetMachine.cpp +++ b/lib/Target/PTX/PTXTargetMachine.cpp @@ -14,8 +14,32 @@ #include "PTX.h" #include "PTXTargetMachine.h" #include "llvm/PassManager.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/Verifier.h" +#include "llvm/Assembly/PrintModulePass.h" +#include "llvm/ADT/OwningPtr.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/MachineFunctionAnalysis.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/TargetRegistry.h" + using namespace llvm; @@ -43,6 +67,21 @@ namespace { "e-p:32:32-i64:32:32-f64:32:32-v128:32:128-v64:32:64-n32:64"; const char* DataLayout64 = "e-p:64:64-i64:32:32-f64:32:32-v128:32:128-v64:32:64-n32:64"; + + // Copied from LLVMTargetMachine.cpp + void printNoVerify(PassManagerBase &PM, const char *Banner) { + if (PrintMachineCode) + PM.add(createMachineFunctionPrinterPass(dbgs(), Banner)); + } + + void printAndVerify(PassManagerBase &PM, + const char *Banner) { + if (PrintMachineCode) + PM.add(createMachineFunctionPrinterPass(dbgs(), Banner)); + + //if (VerifyMachineCode) + // PM.add(createMachineVerifierPass(Banner)); + } } // DataLayout and FrameLowering are filled with dummy data @@ -82,3 +121,249 @@ bool PTXTargetMachine::addPostRegAlloc(PassManagerBase &PM, PM.add(createPTXMFInfoExtract(*this, OptLevel)); return false; } + +bool PTXTargetMachine::addPassesToEmitFile(PassManagerBase &PM, + formatted_raw_ostream &Out, + CodeGenFileType FileType, + CodeGenOpt::Level OptLevel, + bool DisableVerify) { + // This is mostly based on LLVMTargetMachine::addPassesToEmitFile + + // Add common CodeGen passes. + MCContext *Context = 0; + if (addCommonCodeGenPasses(PM, OptLevel, DisableVerify, Context)) + return true; + assert(Context != 0 && "Failed to get MCContext"); + + if (hasMCSaveTempLabels()) + Context->setAllowTemporaryLabels(false); + + const MCAsmInfo &MAI = *getMCAsmInfo(); + const MCSubtargetInfo &STI = getSubtarget(); + OwningPtr AsmStreamer; + + switch (FileType) { + default: return true; + case CGFT_AssemblyFile: { + MCInstPrinter *InstPrinter = + getTarget().createMCInstPrinter(MAI.getAssemblerDialect(), MAI, STI); + + // Create a code emitter if asked to show the encoding. + MCCodeEmitter *MCE = 0; + MCAsmBackend *MAB = 0; + + MCStreamer *S = getTarget().createAsmStreamer(*Context, Out, + true, /* verbose asm */ + hasMCUseLoc(), + hasMCUseCFI(), + InstPrinter, + MCE, MAB, + false /* show MC encoding */); + AsmStreamer.reset(S); + break; + } + case CGFT_ObjectFile: { + llvm_unreachable("Object file emission is not supported with PTX"); + } + case CGFT_Null: + // The Null output is intended for use for performance analysis and testing, + // not real users. + AsmStreamer.reset(createNullStreamer(*Context)); + break; + } + + // MC Logging + //AsmStreamer.reset(createLoggingStreamer(AsmStreamer.take(), errs())); + + // Create the AsmPrinter, which takes ownership of AsmStreamer if successful. + FunctionPass *Printer = getTarget().createAsmPrinter(*this, *AsmStreamer); + if (Printer == 0) + return true; + + // If successful, createAsmPrinter took ownership of AsmStreamer. + AsmStreamer.take(); + + PM.add(Printer); + + PM.add(createGCInfoDeleter()); + return false; +} + +bool PTXTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, + CodeGenOpt::Level OptLevel, + bool DisableVerify, + MCContext *&OutContext) { + // Add standard LLVM codegen passes. + // This is derived from LLVMTargetMachine::addCommonCodeGenPasses, with some + // modifications for the PTX target. + + // Standard LLVM-Level Passes. + + // Basic AliasAnalysis support. + // Add TypeBasedAliasAnalysis before BasicAliasAnalysis so that + // BasicAliasAnalysis wins if they disagree. This is intended to help + // support "obvious" type-punning idioms. + PM.add(createTypeBasedAliasAnalysisPass()); + PM.add(createBasicAliasAnalysisPass()); + + // Before running any passes, run the verifier to determine if the input + // coming from the front-end and/or optimizer is valid. + if (!DisableVerify) + PM.add(createVerifierPass()); + + // Run loop strength reduction before anything else. + if (OptLevel != CodeGenOpt::None) { + PM.add(createLoopStrengthReducePass(getTargetLowering())); + //PM.add(createPrintFunctionPass("\n\n*** Code after LSR ***\n", &dbgs())); + } + + PM.add(createGCLoweringPass()); + + // Make sure that no unreachable blocks are instruction selected. + PM.add(createUnreachableBlockEliminationPass()); + + PM.add(createLowerInvokePass(getTargetLowering())); + // The lower invoke pass may create unreachable code. Remove it. + PM.add(createUnreachableBlockEliminationPass()); + + if (OptLevel != CodeGenOpt::None) + PM.add(createCodeGenPreparePass(getTargetLowering())); + + PM.add(createStackProtectorPass(getTargetLowering())); + + addPreISel(PM, OptLevel); + + //PM.add(createPrintFunctionPass("\n\n" + // "*** Final LLVM Code input to ISel ***\n", + // &dbgs())); + + // All passes which modify the LLVM IR are now complete; run the verifier + // to ensure that the IR is valid. + if (!DisableVerify) + PM.add(createVerifierPass()); + + // Standard Lower-Level Passes. + + // Install a MachineModuleInfo class, which is an immutable pass that holds + // all the per-module stuff we're generating, including MCContext. + MachineModuleInfo *MMI = new MachineModuleInfo(*getMCAsmInfo(), + *getRegisterInfo(), + &getTargetLowering()->getObjFileLowering()); + PM.add(MMI); + OutContext = &MMI->getContext(); // Return the MCContext specifically by-ref. + + // Set up a MachineFunction for the rest of CodeGen to work on. + PM.add(new MachineFunctionAnalysis(*this, OptLevel)); + + // Ask the target for an isel. + if (addInstSelector(PM, OptLevel)) + return true; + + // Print the instruction selected machine code... + printAndVerify(PM, "After Instruction Selection"); + + // Expand pseudo-instructions emitted by ISel. + PM.add(createExpandISelPseudosPass()); + + // Pre-ra tail duplication. + if (OptLevel != CodeGenOpt::None) { + PM.add(createTailDuplicatePass(true)); + printAndVerify(PM, "After Pre-RegAlloc TailDuplicate"); + } + + // Optimize PHIs before DCE: removing dead PHI cycles may make more + // instructions dead. + if (OptLevel != CodeGenOpt::None) + PM.add(createOptimizePHIsPass()); + + // If the target requests it, assign local variables to stack slots relative + // to one another and simplify frame index references where possible. + PM.add(createLocalStackSlotAllocationPass()); + + if (OptLevel != CodeGenOpt::None) { + // With optimization, dead code should already be eliminated. However + // there is one known exception: lowered code for arguments that are only + // used by tail calls, where the tail calls reuse the incoming stack + // arguments directly (see t11 in test/CodeGen/X86/sibcall.ll). + PM.add(createDeadMachineInstructionElimPass()); + printAndVerify(PM, "After codegen DCE pass"); + + PM.add(createMachineLICMPass()); + PM.add(createMachineCSEPass()); + PM.add(createMachineSinkingPass()); + printAndVerify(PM, "After Machine LICM, CSE and Sinking passes"); + + PM.add(createPeepholeOptimizerPass()); + printAndVerify(PM, "After codegen peephole optimization pass"); + } + + // Run pre-ra passes. + if (addPreRegAlloc(PM, OptLevel)) + printAndVerify(PM, "After PreRegAlloc passes"); + + // Perform register allocation. + PM.add(createPTXRegisterAllocator()); + printAndVerify(PM, "After Register Allocation"); + + // Perform stack slot coloring and post-ra machine LICM. + if (OptLevel != CodeGenOpt::None) { + // FIXME: Re-enable coloring with register when it's capable of adding + // kill markers. + PM.add(createStackSlotColoringPass(false)); + + // FIXME: Post-RA LICM has asserts that fire on virtual registers. + // Run post-ra machine LICM to hoist reloads / remats. + //if (!DisablePostRAMachineLICM) + // PM.add(createMachineLICMPass(false)); + + printAndVerify(PM, "After StackSlotColoring and postra Machine LICM"); + } + + // Run post-ra passes. + if (addPostRegAlloc(PM, OptLevel)) + printAndVerify(PM, "After PostRegAlloc passes"); + + PM.add(createLowerSubregsPass()); + printAndVerify(PM, "After LowerSubregs"); + + // Insert prolog/epilog code. Eliminate abstract frame index references... + PM.add(createPrologEpilogCodeInserter()); + printAndVerify(PM, "After PrologEpilogCodeInserter"); + + // Run pre-sched2 passes. + if (addPreSched2(PM, OptLevel)) + printAndVerify(PM, "After PreSched2 passes"); + + // Second pass scheduler. + if (OptLevel != CodeGenOpt::None) { + PM.add(createPostRAScheduler(OptLevel)); + printAndVerify(PM, "After PostRAScheduler"); + } + + // Branch folding must be run after regalloc and prolog/epilog insertion. + if (OptLevel != CodeGenOpt::None) { + PM.add(createBranchFoldingPass(getEnableTailMergeDefault())); + printNoVerify(PM, "After BranchFolding"); + } + + // Tail duplication. + if (OptLevel != CodeGenOpt::None) { + PM.add(createTailDuplicatePass(false)); + printNoVerify(PM, "After TailDuplicate"); + } + + PM.add(createGCMachineCodeAnalysisPass()); + + //if (PrintGCInfo) + // PM.add(createGCInfoPrinter(dbgs())); + + if (OptLevel != CodeGenOpt::None) { + PM.add(createCodePlacementOptPass()); + printNoVerify(PM, "After CodePlacementOpt"); + } + + if (addPreEmitPass(PM, OptLevel)) + printNoVerify(PM, "After PreEmit passes"); + + return false; +} diff --git a/lib/Target/PTX/PTXTargetMachine.h b/lib/Target/PTX/PTXTargetMachine.h index 3cf081fc4e1..d5726b9866b 100644 --- a/lib/Target/PTX/PTXTargetMachine.h +++ b/lib/Target/PTX/PTXTargetMachine.h @@ -56,6 +56,35 @@ class PTXTargetMachine : public LLVMTargetMachine { CodeGenOpt::Level OptLevel); virtual bool addPostRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel); + + // We override this method to supply our own set of codegen passes. + virtual bool addPassesToEmitFile(PassManagerBase &, + formatted_raw_ostream &, + CodeGenFileType, + CodeGenOpt::Level, + bool = true); + + // Emission of machine code through JITCodeEmitter is not supported. + virtual bool addPassesToEmitMachineCode(PassManagerBase &, + JITCodeEmitter &, + CodeGenOpt::Level, + bool = true) { + return true; + } + + // Emission of machine code through MCJIT is not supported. + virtual bool addPassesToEmitMC(PassManagerBase &, + MCContext *&, + raw_ostream &, + CodeGenOpt::Level, + bool = true) { + return true; + } + + private: + + bool addCommonCodeGenPasses(PassManagerBase &, CodeGenOpt::Level, + bool DisableVerify, MCContext *&OutCtx); }; // class PTXTargetMachine -- 2.11.0