From fe61fb1e1082c81653ed78efd6d471592a2e57ad Mon Sep 17 00:00:00 2001 From: Bob Wilson Date: Fri, 12 Feb 2010 01:30:21 +0000 Subject: [PATCH] Add a new pass on machine instructions to optimize away PHI cycles that reduce down to a single value. InstCombine already does this transformation but DAG legalization may introduce new opportunities. This has turned out to be important for ARM where 64-bit values are split up during type legalization: InstCombine is not able to remove the PHI cycles on the 64-bit values but the separate 32-bit values can be optimized. I measured the compile time impact of this (running llc on 176.gcc) and it was not significant. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@95951 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/CodeGen/Passes.h | 4 + lib/CodeGen/CMakeLists.txt | 1 + lib/CodeGen/LLVMTargetMachine.cpp | 1 + lib/CodeGen/OptimizePHIs.cpp | 141 ++++++++++++++++++++++++++++ test/CodeGen/Thumb2/2010-02-11-phi-cycle.ll | 34 +++++++ 5 files changed, 181 insertions(+) create mode 100644 lib/CodeGen/OptimizePHIs.cpp create mode 100644 test/CodeGen/Thumb2/2010-02-11-phi-cycle.ll diff --git a/include/llvm/CodeGen/Passes.h b/include/llvm/CodeGen/Passes.h index 7e0da3f6a8d..dbc73cbc545 100644 --- a/include/llvm/CodeGen/Passes.h +++ b/include/llvm/CodeGen/Passes.h @@ -174,6 +174,10 @@ namespace llvm { /// optimization by increasing uses of extended values. FunctionPass *createOptimizeExtsPass(); + /// createOptimizePHIsPass - This pass optimizes machine instruction PHIs + /// to take advantage of opportunities created during DAG legalization. + FunctionPass *createOptimizePHIsPass(); + /// createStackSlotColoringPass - This pass performs stack slot coloring. FunctionPass *createStackSlotColoringPass(bool); diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt index 9fcbea9437f..46d3268259c 100644 --- a/lib/CodeGen/CMakeLists.txt +++ b/lib/CodeGen/CMakeLists.txt @@ -39,6 +39,7 @@ add_llvm_library(LLVMCodeGen ObjectCodeEmitter.cpp OcamlGC.cpp OptimizeExts.cpp + OptimizePHIs.cpp PHIElimination.cpp Passes.cpp PostRASchedulerList.cpp diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp index 40e015067f4..3c12afaacf6 100644 --- a/lib/CodeGen/LLVMTargetMachine.cpp +++ b/lib/CodeGen/LLVMTargetMachine.cpp @@ -299,6 +299,7 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, if (OptLevel != CodeGenOpt::None) { PM.add(createOptimizeExtsPass()); + PM.add(createOptimizePHIsPass()); if (!DisableMachineLICM) PM.add(createMachineLICMPass()); if (!DisableMachineSink) diff --git a/lib/CodeGen/OptimizePHIs.cpp b/lib/CodeGen/OptimizePHIs.cpp new file mode 100644 index 00000000000..5b3fa6a68ab --- /dev/null +++ b/lib/CodeGen/OptimizePHIs.cpp @@ -0,0 +1,141 @@ +//===-- OptimizePHIs.cpp - Optimize machine instruction PHIs --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass optimizes machine instruction PHIs to take advantage of +// opportunities created during DAG legalization. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "phi-opt" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Function.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" +using namespace llvm; + +STATISTIC(NumPHICycles, "Number of PHI cycles replaced"); + +namespace { + class OptimizePHIs : public MachineFunctionPass { + MachineRegisterInfo *MRI; + const TargetInstrInfo *TII; + + public: + static char ID; // Pass identification + OptimizePHIs() : MachineFunctionPass(&ID) {} + + virtual bool runOnMachineFunction(MachineFunction &MF); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + private: + bool IsSingleValuePHICycle(const MachineInstr *MI, unsigned &SingleValReg, + SmallSet &RegsInCycle); + bool ReplacePHICycles(MachineBasicBlock &MBB); + }; +} + +char OptimizePHIs::ID = 0; +static RegisterPass +X("opt-phis", "Optimize machine instruction PHIs"); + +FunctionPass *llvm::createOptimizePHIsPass() { return new OptimizePHIs(); } + +bool OptimizePHIs::runOnMachineFunction(MachineFunction &Fn) { + MRI = &Fn.getRegInfo(); + TII = Fn.getTarget().getInstrInfo(); + + // Find PHI cycles that can be replaced by a single value. InstCombine + // does this, but DAG legalization may introduce new opportunities, e.g., + // when i64 values are split up for 32-bit targets. + bool Changed = false; + for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) + Changed |= ReplacePHICycles(*I); + + return Changed; +} + +/// IsSingleValuePHICycle - Check if MI is a PHI where all the source operands +/// are copies of SingleValReg, possibly via copies through other PHIs. If +/// SingleValReg is zero on entry, it is set to the register with the single +/// non-copy value. RegsInCycle is a set used to keep track of the PHIs that +/// have been scanned. +bool OptimizePHIs::IsSingleValuePHICycle(const MachineInstr *MI, + unsigned &SingleValReg, + SmallSet &RegsInCycle) { + assert(MI->isPHI() && "IsSingleValuePHICycle expects a PHI instruction"); + unsigned DstReg = MI->getOperand(0).getReg(); + + // See if we already saw this register. + if (!RegsInCycle.insert(DstReg)) + return true; + + // Don't scan crazily complex things. + if (RegsInCycle.size() == 16) + return false; + + // Scan the PHI operands. + for (unsigned i = 1; i != MI->getNumOperands(); i += 2) { + unsigned SrcReg = MI->getOperand(i).getReg(); + if (SrcReg == DstReg) + continue; + const MachineInstr *SrcMI = MRI->getVRegDef(SrcReg); + + // Skip over register-to-register moves. + unsigned MvSrcReg, MvDstReg, SrcSubIdx, DstSubIdx; + if (SrcMI && + TII->isMoveInstr(*SrcMI, MvSrcReg, MvDstReg, SrcSubIdx, DstSubIdx) && + SrcSubIdx == 0 && DstSubIdx == 0 && + TargetRegisterInfo::isVirtualRegister(MvSrcReg)) + SrcMI = MRI->getVRegDef(MvSrcReg); + if (!SrcMI) + return false; + + if (SrcMI->isPHI()) { + if (!IsSingleValuePHICycle(SrcMI, SingleValReg, RegsInCycle)) + return false; + } else { + // Fail if there is more than one non-phi/non-move register. + if (SingleValReg != 0) + return false; + SingleValReg = SrcReg; + } + } + return true; +} + +/// ReplacePHICycles - Find PHI cycles that can be replaced by a single +/// value and remove them. +bool OptimizePHIs::ReplacePHICycles(MachineBasicBlock &MBB) { + bool Changed = false; + for (MachineBasicBlock::iterator + MII = MBB.begin(), E = MBB.end(); MII != E; ) { + MachineInstr *MI = &*MII++; + if (!MI->isPHI()) + break; + + unsigned SingleValReg = 0; + SmallSet RegsInCycle; + if (IsSingleValuePHICycle(MI, SingleValReg, RegsInCycle) && + SingleValReg != 0) { + MRI->replaceRegWith(MI->getOperand(0).getReg(), SingleValReg); + MI->eraseFromParent(); + ++NumPHICycles; + Changed = true; + } + } + return Changed; +} diff --git a/test/CodeGen/Thumb2/2010-02-11-phi-cycle.ll b/test/CodeGen/Thumb2/2010-02-11-phi-cycle.ll new file mode 100644 index 00000000000..997257885e6 --- /dev/null +++ b/test/CodeGen/Thumb2/2010-02-11-phi-cycle.ll @@ -0,0 +1,34 @@ +; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s +target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32" + +define arm_apcscc i32 @test(i32 %n) nounwind { +; CHECK: test: +; CHECK-NOT: mov +; CHECK: return +entry: + %0 = icmp eq i32 %n, 1 ; [#uses=1] + br i1 %0, label %return, label %bb.nph + +bb.nph: ; preds = %entry + %tmp = add i32 %n, -1 ; [#uses=1] + br label %bb + +bb: ; preds = %bb.nph, %bb + %indvar = phi i32 [ 0, %bb.nph ], [ %indvar.next, %bb ] ; [#uses=1] + %u.05 = phi i64 [ undef, %bb.nph ], [ %ins, %bb ] ; [#uses=1] + %1 = tail call arm_apcscc i32 @f() nounwind ; [#uses=1] + %tmp4 = zext i32 %1 to i64 ; [#uses=1] + %mask = and i64 %u.05, -4294967296 ; [#uses=1] + %ins = or i64 %tmp4, %mask ; [#uses=2] + tail call arm_apcscc void @g(i64 %ins) nounwind + %indvar.next = add i32 %indvar, 1 ; [#uses=2] + %exitcond = icmp eq i32 %indvar.next, %tmp ; [#uses=1] + br i1 %exitcond, label %return, label %bb + +return: ; preds = %bb, %entry + ret i32 undef +} + +declare arm_apcscc i32 @f() + +declare arm_apcscc void @g(i64) -- 2.11.0