From aab3c8c7b38b28a3cbcb016f2476182dd2a09686 Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Tue, 23 Apr 2019 15:25:14 +0000 Subject: [PATCH] [InstCombine] Convert a masked.load of a dereferenceable address to an unconditional load If we have a masked.load from a location we know to be dereferenceable, we can simply issue a speculative unconditional load against that address. The key advantage is that it produces IR which is well understood by the optimizer. The select (cnd, load, passthrough) form produced should be pattern matchable back to hardware predication if profitable. Differential Revision: https://reviews.llvm.org/D59703 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@359000 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/InstCombine/InstCombineCalls.cpp | 18 ++++++++++++++---- test/Transforms/InstCombine/masked_intrinsics.ll | 5 +++-- 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index 0fe52b145fe..51c72eb1837 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -21,6 +21,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/ADT/Twine.h" #include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/Loads.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Transforms/Utils/Local.h" @@ -1183,17 +1184,26 @@ static APInt possiblyDemandedEltsInMask(Value *Mask) { } // TODO, Obvious Missing Transforms: -// * Dereferenceable address -> speculative load/select // * Narrow width by halfs excluding zero/undef lanes static Value *simplifyMaskedLoad(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder) { + Value *LoadPtr = II.getArgOperand(0); + unsigned Alignment = cast(II.getArgOperand(1))->getZExtValue(); + // If the mask is all ones or undefs, this is a plain vector load of the 1st // argument. - if (maskIsAllOneOrUndef(II.getArgOperand(2))) { - Value *LoadPtr = II.getArgOperand(0); - unsigned Alignment = cast(II.getArgOperand(1))->getZExtValue(); + if (maskIsAllOneOrUndef(II.getArgOperand(2))) return Builder.CreateAlignedLoad(II.getType(), LoadPtr, Alignment, "unmaskedload"); + + // If we can unconditionally load from this address, replace with a + // load/select idiom. TODO: use DT for context sensitive query + if (isDereferenceableAndAlignedPointer(LoadPtr, Alignment, + II.getModule()->getDataLayout(), + &II, nullptr)) { + Value *LI = Builder.CreateAlignedLoad(II.getType(), LoadPtr, Alignment, + "unmaskedload"); + return Builder.CreateSelect(II.getArgOperand(2), LI, II.getArgOperand(3)); } return nullptr; diff --git a/test/Transforms/InstCombine/masked_intrinsics.ll b/test/Transforms/InstCombine/masked_intrinsics.ll index d71402e1192..b451724866e 100644 --- a/test/Transforms/InstCombine/masked_intrinsics.ll +++ b/test/Transforms/InstCombine/masked_intrinsics.ll @@ -87,8 +87,9 @@ define <2 x double> @load_speculative(<2 x double>* dereferenceable(16) %ptr, ; CHECK-LABEL: @load_speculative( ; CHECK-NEXT: [[PTV1:%.*]] = insertelement <2 x double> undef, double [[PT:%.*]], i64 0 ; CHECK-NEXT: [[PTV2:%.*]] = shufflevector <2 x double> [[PTV1]], <2 x double> undef, <2 x i32> zeroinitializer -; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* nonnull [[PTR:%.*]], i32 4, <2 x i1> [[MASK:%.*]], <2 x double> [[PTV2]]) -; CHECK-NEXT: ret <2 x double> [[RES]] +; CHECK-NEXT: [[UNMASKEDLOAD:%.*]] = load <2 x double>, <2 x double>* [[PTR:%.*]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = select <2 x i1> [[MASK:%.*]], <2 x double> [[UNMASKEDLOAD]], <2 x double> [[PTV2]] +; CHECK-NEXT: ret <2 x double> [[TMP1]] ; double %pt, <2 x i1> %mask) { %ptv1 = insertelement <2 x double> undef, double %pt, i64 0 -- 2.11.0