From: Adam Nemet Date: Fri, 18 Mar 2016 00:27:29 +0000 (+0000) Subject: [Aarch64] Add pass LoopDataPrefetch for Cyclone X-Git-Tag: android-x86-7.1-r4~36420 X-Git-Url: http://git.osdn.net/view?a=commitdiff_plain;h=bbb72f5976f13e5197c23e76563beb32db66cbbc;p=android-x86%2Fexternal-llvm.git [Aarch64] Add pass LoopDataPrefetch for Cyclone Summary: This wires up the pass for Cyclone but keeps it off for now because we need a few more TTIs. The getPrefetchMinStride value is not very well tuned right now but it works well with CFP2006/433.milc which motivated this. Tests will be added as part of the upcoming large-stride prefetching patch. Reviewers: t.p.northover Subscribers: llvm-commits, aemerson, hfinkel, rengolin Differential Revision: http://reviews.llvm.org/D17943 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@263770 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/AArch64/AArch64TargetMachine.cpp b/lib/Target/AArch64/AArch64TargetMachine.cpp index 67b29830d89..791cb900a3f 100644 --- a/lib/Target/AArch64/AArch64TargetMachine.cpp +++ b/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -101,6 +101,11 @@ static cl::opt EnableGlobalMerge("aarch64-global-merge", cl::Hidden, cl::desc("Enable the global merge pass")); +static cl::opt + EnableLoopDataPrefetch("aarch64-loop-data-prefetch", cl::Hidden, + cl::desc("Enable the loop data prefetch pass"), + cl::init(false)); + extern "C" void LLVMInitializeAArch64Target() { // Register the target. RegisterTargetMachine X(TheAArch64leTarget); @@ -236,6 +241,14 @@ void AArch64PassConfig::addIRPasses() { if (TM->getOptLevel() != CodeGenOpt::None && EnableAtomicTidy) addPass(createCFGSimplificationPass()); + // Run LoopDataPrefetch for Cyclone (the only subtarget that defines a + // non-zero getPrefetchDistance). + // + // Run this before LSR to remove the multiplies involved in computing the + // pointer values N iterations ahead. + if (TM->getOptLevel() != CodeGenOpt::None && EnableLoopDataPrefetch) + addPass(createLoopDataPrefetchPass()); + TargetPassConfig::addIRPasses(); // Match interleaved memory accesses to ldN/stN intrinsics. diff --git a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index 08b89dc3c67..5803e07dbcf 100644 --- a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -20,6 +20,11 @@ using namespace llvm; #define DEBUG_TYPE "aarch64tti" +static cl::opt CyclonePrefetchDistance( + "cyclone-prefetch-distance", + cl::desc("Number of instructions to prefetch ahead for Cyclone"), + cl::init(280), cl::Hidden); + /// \brief Calculate the cost of materializing a 64-bit value. This helper /// method might only calculate a fraction of a larger immediate. Therefore it /// is valid to return a cost of ZERO. @@ -573,3 +578,15 @@ bool AArch64TTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst, } return true; } + +unsigned AArch64TTIImpl::getCacheLineSize() { + if (ST->isCyclone()) + return 64; + return BaseT::getCacheLineSize(); +} + +unsigned AArch64TTIImpl::getPrefetchDistance() { + if (ST->isCyclone()) + return CyclonePrefetchDistance; + return BaseT::getPrefetchDistance(); +} diff --git a/lib/Target/AArch64/AArch64TargetTransformInfo.h b/lib/Target/AArch64/AArch64TargetTransformInfo.h index ec58c4fe309..ad711f511e5 100644 --- a/lib/Target/AArch64/AArch64TargetTransformInfo.h +++ b/lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -127,6 +127,10 @@ public: int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef Indices, unsigned Alignment, unsigned AddressSpace); + + unsigned getCacheLineSize(); + + unsigned getPrefetchDistance(); /// @} };