From d6671ee90c1423eb18c6fab11819df850ae2200d Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Fri, 15 May 2020 14:54:51 -0400 Subject: [PATCH] InferAddressSpaces: Handle ptrmask intrinsic This one is slightly odd since it counts as an address expression, which previously could never fail. Allow the existing TTI hook to return the value to use, and re-use it for handling how to handle ptrmask. Handles the no-op addrspacecasts for AMDGPU. We could probably do something better based on analysis of the mask value based on the address space, but leave that for now. --- llvm/include/llvm/Analysis/TargetTransformInfo.h | 16 +++-- .../llvm/Analysis/TargetTransformInfoImpl.h | 6 +- llvm/include/llvm/CodeGen/BasicTTIImpl.h | 6 +- llvm/lib/Analysis/TargetTransformInfo.cpp | 5 +- .../Target/AMDGPU/AMDGPUTargetTransformInfo.cpp | 30 ++++++--- llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h | 4 +- llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp | 77 ++++++++++++++++++---- .../InferAddressSpaces/AMDGPU/ptrmask.ll | 21 +++--- 8 files changed, 113 insertions(+), 52 deletions(-) diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h index c50c696741b..51aa1cb1cb1 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -379,9 +379,10 @@ public: /// Rewrite intrinsic call \p II such that \p OldV will be replaced with \p /// NewV, which has a different address space. This should happen for every /// operand index that collectFlatAddressOperands returned for the intrinsic. - /// \returns true if the intrinsic /// was handled. - bool rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, - Value *NewV) const; + /// \returns nullptr if the intrinsic was not handled. Otherwise, returns the + /// new value (which may be the original \p II with modified operands). + Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, + Value *NewV) const; /// Test whether calls to a function lower to actual program function /// calls. @@ -1236,8 +1237,9 @@ public: virtual unsigned getFlatAddressSpace() = 0; virtual bool collectFlatAddressOperands(SmallVectorImpl &OpIndexes, Intrinsic::ID IID) const = 0; - virtual bool rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, - Value *NewV) const = 0; + virtual Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, + Value *OldV, + Value *NewV) const = 0; virtual bool isLoweredToCall(const Function *F) = 0; virtual void getUnrollingPreferences(Loop *L, ScalarEvolution &, UnrollingPreferences &UP) = 0; @@ -1505,8 +1507,8 @@ public: return Impl.collectFlatAddressOperands(OpIndexes, IID); } - bool rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, - Value *NewV) const override { + Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, + Value *NewV) const override { return Impl.rewriteIntrinsicWithAddressSpace(II, OldV, NewV); } diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h index f98b8bf7da2..0e8fc5dd6cf 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -86,9 +86,9 @@ public: return false; } - bool rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, - Value *NewV) const { - return false; + Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, + Value *NewV) const { + return nullptr; } bool isLoweredToCall(const Function *F) { diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index cc751a5b478..c751c3703ba 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -222,9 +222,9 @@ public: return false; } - bool rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, - Value *OldV, Value *NewV) const { - return false; + Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, + Value *NewV) const { + return nullptr; } bool isLegalAddImmediate(int64_t imm) { diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp index 9f319c40ae6..0c34050a662 100644 --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -290,9 +290,8 @@ bool TargetTransformInfo::collectFlatAddressOperands( return TTIImpl->collectFlatAddressOperands(OpIndexes, IID); } -bool TargetTransformInfo::rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, - Value *OldV, - Value *NewV) const { +Value *TargetTransformInfo::rewriteIntrinsicWithAddressSpace( + IntrinsicInst *II, Value *OldV, Value *NewV) const { return TTIImpl->rewriteIntrinsicWithAddressSpace(II, OldV, NewV); } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp index 2405a24dd14..324dcba86c2 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -851,8 +851,9 @@ bool GCNTTIImpl::collectFlatAddressOperands(SmallVectorImpl &OpIndexes, } } -bool GCNTTIImpl::rewriteIntrinsicWithAddressSpace( - IntrinsicInst *II, Value *OldV, Value *NewV) const { +Value *GCNTTIImpl::rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, + Value *OldV, + Value *NewV) const { auto IntrID = II->getIntrinsicID(); switch (IntrID) { case Intrinsic::amdgcn_atomic_inc: @@ -862,7 +863,7 @@ bool GCNTTIImpl::rewriteIntrinsicWithAddressSpace( case Intrinsic::amdgcn_ds_fmax: { const ConstantInt *IsVolatile = cast(II->getArgOperand(4)); if (!IsVolatile->isZero()) - return false; + return nullptr; Module *M = II->getParent()->getParent()->getParent(); Type *DestTy = II->getType(); Type *SrcTy = NewV->getType(); @@ -870,7 +871,7 @@ bool GCNTTIImpl::rewriteIntrinsicWithAddressSpace( Intrinsic::getDeclaration(M, II->getIntrinsicID(), {DestTy, SrcTy}); II->setArgOperand(0, NewV); II->setCalledFunction(NewDecl); - return true; + return II; } case Intrinsic::amdgcn_is_shared: case Intrinsic::amdgcn_is_private: { @@ -880,12 +881,25 @@ bool GCNTTIImpl::rewriteIntrinsicWithAddressSpace( LLVMContext &Ctx = NewV->getType()->getContext(); ConstantInt *NewVal = (TrueAS == NewAS) ? ConstantInt::getTrue(Ctx) : ConstantInt::getFalse(Ctx); - II->replaceAllUsesWith(NewVal); - II->eraseFromParent(); - return true; + return NewVal; + } + case Intrinsic::ptrmask: { + unsigned OldAS = OldV->getType()->getPointerAddressSpace(); + unsigned NewAS = NewV->getType()->getPointerAddressSpace(); + if (!getTLI()->isNoopAddrSpaceCast(OldAS, NewAS)) + return nullptr; + + Module *M = II->getParent()->getParent()->getParent(); + Value *MaskOp = II->getArgOperand(1); + Type *MaskTy = MaskOp->getType(); + Function *NewDecl = Intrinsic::getDeclaration(M, Intrinsic::ptrmask, + {NewV->getType(), MaskTy}); + CallInst *NewCall = CallInst::Create(NewDecl->getFunctionType(), NewDecl, + {NewV, MaskOp}, "", II); + return NewCall; } default: - return false; + return nullptr; } } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h index bc74965c178..72c040fa4d9 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h @@ -211,8 +211,8 @@ public: bool collectFlatAddressOperands(SmallVectorImpl &OpIndexes, Intrinsic::ID IID) const; - bool rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, - Value *OldV, Value *NewV) const; + Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, + Value *NewV) const; unsigned getVectorSplitCost() { return 0; } diff --git a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp index 6fd6b841785..d407d0439cd 100644 --- a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp +++ b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp @@ -175,6 +175,11 @@ private: bool isSafeToCastConstAddrSpace(Constant *C, unsigned NewAS) const; + Value *cloneInstructionWithNewAddressSpace( + Instruction *I, unsigned NewAddrSpace, + const ValueToValueMapTy &ValueWithNewAddrSpace, + SmallVectorImpl *UndefUsesToFix) const; + // Changes the flat address expressions in function F to point to specific // address spaces if InferredAddrSpace says so. Postorder is the postorder of // all flat expressions in the use-def graph of function F. @@ -218,20 +223,24 @@ INITIALIZE_PASS(InferAddressSpaces, DEBUG_TYPE, "Infer address spaces", // TODO: Currently, we consider only phi, bitcast, addrspacecast, and // getelementptr operators. static bool isAddressExpression(const Value &V) { - if (!isa(V)) + const Operator *Op = dyn_cast(&V); + if (!Op) return false; - const Operator &Op = cast(V); - switch (Op.getOpcode()) { + switch (Op->getOpcode()) { case Instruction::PHI: - assert(Op.getType()->isPointerTy()); + assert(Op->getType()->isPointerTy()); return true; case Instruction::BitCast: case Instruction::AddrSpaceCast: case Instruction::GetElementPtr: return true; case Instruction::Select: - return Op.getType()->isPointerTy(); + return Op->getType()->isPointerTy(); + case Instruction::Call: { + const IntrinsicInst *II = dyn_cast(&V); + return II && II->getIntrinsicID() == Intrinsic::ptrmask; + } default: return false; } @@ -254,12 +263,17 @@ static SmallVector getPointerOperands(const Value &V) { return {Op.getOperand(0)}; case Instruction::Select: return {Op.getOperand(1), Op.getOperand(2)}; + case Instruction::Call: { + const IntrinsicInst &II = cast(Op); + assert(II.getIntrinsicID() == Intrinsic::ptrmask && + "unexpected intrinsic call"); + return {II.getArgOperand(0)}; + } default: llvm_unreachable("Unexpected instruction type."); } } -// TODO: Move logic to TTI? bool InferAddressSpaces::rewriteIntrinsicOperands(IntrinsicInst *II, Value *OldV, Value *NewV) const { @@ -275,8 +289,17 @@ bool InferAddressSpaces::rewriteIntrinsicOperands(IntrinsicInst *II, II->setCalledFunction(NewDecl); return true; } - default: - return TTI->rewriteIntrinsicWithAddressSpace(II, OldV, NewV); + case Intrinsic::ptrmask: + // This is handled as an address expression, not as a use memory operation. + return false; + default: { + Value *Rewrite = TTI->rewriteIntrinsicWithAddressSpace(II, OldV, NewV); + if (!Rewrite) + return false; + if (Rewrite != II) + II->replaceAllUsesWith(Rewrite); + return true; + } } } @@ -285,6 +308,7 @@ void InferAddressSpaces::collectRewritableIntrinsicOperands( DenseSet &Visited) const { auto IID = II->getIntrinsicID(); switch (IID) { + case Intrinsic::ptrmask: case Intrinsic::objectsize: appendsFlatAddressExpressionToPostorderStack(II->getArgOperand(0), PostorderStack, Visited); @@ -438,10 +462,13 @@ static Value *operandWithNewAddressSpaceOrCreateUndef( // Note that we do not necessarily clone `I`, e.g., if it is an addrspacecast // from a pointer whose type already matches. Therefore, this function returns a // Value* instead of an Instruction*. -static Value *cloneInstructionWithNewAddressSpace( +// +// This may also return nullptr in the case the instruction could not be +// rewritten. +Value *InferAddressSpaces::cloneInstructionWithNewAddressSpace( Instruction *I, unsigned NewAddrSpace, const ValueToValueMapTy &ValueWithNewAddrSpace, - SmallVectorImpl *UndefUsesToFix) { + SmallVectorImpl *UndefUsesToFix) const { Type *NewPtrType = I->getType()->getPointerElementType()->getPointerTo(NewAddrSpace); @@ -456,6 +483,23 @@ static Value *cloneInstructionWithNewAddressSpace( return Src; } + if (IntrinsicInst *II = dyn_cast(I)) { + // Technically the intrinsic ID is a pointer typed argument, so specially + // handle calls early. + assert(II->getIntrinsicID() == Intrinsic::ptrmask); + Value *NewPtr = operandWithNewAddressSpaceOrCreateUndef( + II->getArgOperandUse(0), NewAddrSpace, ValueWithNewAddrSpace, + UndefUsesToFix); + Value *Rewrite = + TTI->rewriteIntrinsicWithAddressSpace(II, II->getArgOperand(0), NewPtr); + if (Rewrite) { + assert(Rewrite != II && "cannot modify this pointer operation in place"); + return Rewrite; + } + + return nullptr; + } + // Computes the converted pointer operands. SmallVector NewPointerOperands; for (const Use &OperandUse : I->operands()) { @@ -591,7 +635,7 @@ Value *InferAddressSpaces::cloneValueWithNewAddressSpace( if (Instruction *I = dyn_cast(V)) { Value *NewV = cloneInstructionWithNewAddressSpace( I, NewAddrSpace, ValueWithNewAddrSpace, UndefUsesToFix); - if (Instruction *NewI = dyn_cast(NewV)) { + if (Instruction *NewI = dyn_cast_or_null(NewV)) { if (NewI->getParent() == nullptr) { NewI->insertBefore(I); NewI->takeName(I); @@ -879,8 +923,10 @@ bool InferAddressSpaces::rewriteWithNewAddressSpaces( for (Value* V : Postorder) { unsigned NewAddrSpace = InferredAddrSpace.lookup(V); if (V->getType()->getPointerAddressSpace() != NewAddrSpace) { - ValueWithNewAddrSpace[V] = cloneValueWithNewAddressSpace( - V, NewAddrSpace, ValueWithNewAddrSpace, &UndefUsesToFix); + Value *New = cloneValueWithNewAddressSpace( + V, NewAddrSpace, ValueWithNewAddrSpace, &UndefUsesToFix); + if (New) + ValueWithNewAddrSpace[V] = New; } } @@ -890,7 +936,10 @@ bool InferAddressSpaces::rewriteWithNewAddressSpaces( // Fixes all the undef uses generated by cloneInstructionWithNewAddressSpace. for (const Use *UndefUse : UndefUsesToFix) { User *V = UndefUse->getUser(); - User *NewV = cast(ValueWithNewAddrSpace.lookup(V)); + User *NewV = cast_or_null(ValueWithNewAddrSpace.lookup(V)); + if (!NewV) + continue; + unsigned OperandNo = UndefUse->getOperandNo(); assert(isa(NewV->getOperand(OperandNo))); NewV->setOperand(OperandNo, ValueWithNewAddrSpace.lookup(UndefUse->get())); diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/ptrmask.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/ptrmask.ll index 40daa4877db..6f25a3ec2ad 100644 --- a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/ptrmask.ll +++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/ptrmask.ll @@ -42,9 +42,8 @@ define i8 @ptrmask_cast_region_to_flat(i8 addrspace(2)* %src.ptr, i64 %mask) { define i8 @ptrmask_cast_global_to_flat(i8 addrspace(1)* %src.ptr, i64 %mask) { ; CHECK-LABEL: @ptrmask_cast_global_to_flat( -; CHECK-NEXT: [[CAST:%.*]] = addrspacecast i8 addrspace(1)* [[SRC_PTR:%.*]] to i8* -; CHECK-NEXT: [[MASKED:%.*]] = call i8* @llvm.ptrmask.p0i8.i64(i8* [[CAST]], i64 [[MASK:%.*]]) -; CHECK-NEXT: [[LOAD:%.*]] = load i8, i8* [[MASKED]], align 1 +; CHECK-NEXT: [[TMP1:%.*]] = call i8 addrspace(1)* @llvm.ptrmask.p1i8.i64(i8 addrspace(1)* [[SRC_PTR:%.*]], i64 [[MASK:%.*]]) +; CHECK-NEXT: [[LOAD:%.*]] = load i8, i8 addrspace(1)* [[TMP1]], align 1 ; CHECK-NEXT: ret i8 [[LOAD]] ; %cast = addrspacecast i8 addrspace(1)* %src.ptr to i8* @@ -55,9 +54,8 @@ define i8 @ptrmask_cast_global_to_flat(i8 addrspace(1)* %src.ptr, i64 %mask) { define i8 @ptrmask_cast_999_to_flat(i8 addrspace(999)* %src.ptr, i64 %mask) { ; CHECK-LABEL: @ptrmask_cast_999_to_flat( -; CHECK-NEXT: [[CAST:%.*]] = addrspacecast i8 addrspace(999)* [[SRC_PTR:%.*]] to i8* -; CHECK-NEXT: [[MASKED:%.*]] = call i8* @llvm.ptrmask.p0i8.i64(i8* [[CAST]], i64 [[MASK:%.*]]) -; CHECK-NEXT: [[LOAD:%.*]] = load i8, i8* [[MASKED]], align 1 +; CHECK-NEXT: [[TMP1:%.*]] = call i8 addrspace(999)* @llvm.ptrmask.p999i8.i64(i8 addrspace(999)* [[SRC_PTR:%.*]], i64 [[MASK:%.*]]) +; CHECK-NEXT: [[LOAD:%.*]] = load i8, i8 addrspace(999)* [[TMP1]], align 1 ; CHECK-NEXT: ret i8 [[LOAD]] ; %cast = addrspacecast i8 addrspace(999)* %src.ptr to i8* @@ -121,8 +119,8 @@ define i8 @ptrmask_cast_local_to_flat_global(i64 %mask) { define i8 @ptrmask_cast_global_to_flat_global(i64 %mask) { ; CHECK-LABEL: @ptrmask_cast_global_to_flat_global( -; CHECK-NEXT: [[MASKED:%.*]] = call i8* @llvm.ptrmask.p0i8.i64(i8* addrspacecast (i8 addrspace(1)* @gv to i8*), i64 [[MASK:%.*]]) -; CHECK-NEXT: [[LOAD:%.*]] = load i8, i8* [[MASKED]], align 1 +; CHECK-NEXT: [[TMP1:%.*]] = call i8 addrspace(1)* @llvm.ptrmask.p1i8.i64(i8 addrspace(1)* @gv, i64 [[MASK:%.*]]) +; CHECK-NEXT: [[LOAD:%.*]] = load i8, i8 addrspace(1)* [[TMP1]], align 1 ; CHECK-NEXT: ret i8 [[LOAD]] ; %masked = call i8* @llvm.ptrmask.p0i8.i64(i8* addrspacecast (i8 addrspace(1)* @gv to i8*), i64 %mask) @@ -132,10 +130,9 @@ define i8 @ptrmask_cast_global_to_flat_global(i64 %mask) { define i8 @multi_ptrmask_cast_global_to_flat(i8 addrspace(1)* %src.ptr, i64 %mask) { ; CHECK-LABEL: @multi_ptrmask_cast_global_to_flat( -; CHECK-NEXT: [[CAST:%.*]] = addrspacecast i8 addrspace(1)* [[SRC_PTR:%.*]] to i8* -; CHECK-NEXT: [[LOAD0:%.*]] = load i8, i8 addrspace(1)* [[SRC_PTR]], align 1 -; CHECK-NEXT: [[MASKED:%.*]] = call i8* @llvm.ptrmask.p0i8.i64(i8* [[CAST]], i64 [[MASK:%.*]]) -; CHECK-NEXT: [[LOAD1:%.*]] = load i8, i8* [[MASKED]], align 1 +; CHECK-NEXT: [[LOAD0:%.*]] = load i8, i8 addrspace(1)* [[SRC_PTR:%.*]], align 1 +; CHECK-NEXT: [[TMP1:%.*]] = call i8 addrspace(1)* @llvm.ptrmask.p1i8.i64(i8 addrspace(1)* [[SRC_PTR]], i64 [[MASK:%.*]]) +; CHECK-NEXT: [[LOAD1:%.*]] = load i8, i8 addrspace(1)* [[TMP1]], align 1 ; CHECK-NEXT: [[ADD:%.*]] = add i8 [[LOAD0]], [[LOAD1]] ; CHECK-NEXT: ret i8 [[ADD]] ; -- 2.11.0