From 19dfb4b388a8199c1c62555ee7d8e88464f9c22e Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Tue, 5 Jun 2018 19:52:56 +0000 Subject: [PATCH] AMDGPU: Preserve metadata when widening loads Preserves the low bound of the !range. I don't think it's legal to do anything with the top half since it's theoretically reading garbage. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@334045 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp | 25 ++++++- .../CodeGen/AMDGPU/widen_extending_scalar_loads.ll | 76 ++++++++++++++++++++++ 2 files changed, 99 insertions(+), 2 deletions(-) diff --git a/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp index b1ae1e0cb08..f0b9c9e9455 100644 --- a/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp +++ b/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp @@ -465,7 +465,7 @@ bool AMDGPUCodeGenPrepare::visitBinaryOperator(BinaryOperator &I) { return Changed; } -bool AMDGPUCodeGenPrepare::visitLoadInst(LoadInst &I) { +bool AMDGPUCodeGenPrepare::visitLoadInst(LoadInst &I) { if ((I.getPointerAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS || I.getPointerAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS_32BIT) && canWidenScalarExtLoad(I)) { @@ -475,7 +475,28 @@ bool AMDGPUCodeGenPrepare::visitLoadInst(LoadInst &I) { Type *I32Ty = Builder.getInt32Ty(); Type *PT = PointerType::get(I32Ty, I.getPointerAddressSpace()); Value *BitCast= Builder.CreateBitCast(I.getPointerOperand(), PT); - Value *WidenLoad = Builder.CreateLoad(BitCast); + LoadInst *WidenLoad = Builder.CreateLoad(BitCast); + WidenLoad->copyMetadata(I); + + // If we have range metadata, we need to convert the type, and not make + // assumptions about the high bits. + if (auto *Range = WidenLoad->getMetadata(LLVMContext::MD_range)) { + ConstantInt *Lower = + mdconst::extract(Range->getOperand(0)); + + if (Lower->getValue().isNullValue()) { + WidenLoad->setMetadata(LLVMContext::MD_range, nullptr); + } else { + Metadata *LowAndHigh[] = { + ConstantAsMetadata::get(ConstantInt::get(I32Ty, Lower->getValue().zext(32))), + // Don't make assumptions about the high bits. + ConstantAsMetadata::get(ConstantInt::get(I32Ty, 0)) + }; + + WidenLoad->setMetadata(LLVMContext::MD_range, + MDNode::get(Mod->getContext(), LowAndHigh)); + } + } int TySize = Mod->getDataLayout().getTypeSizeInBits(I.getType()); Type *IntNTy = Builder.getIntNTy(TySize); diff --git a/test/CodeGen/AMDGPU/widen_extending_scalar_loads.ll b/test/CodeGen/AMDGPU/widen_extending_scalar_loads.ll index 7c0dc6f58d8..6c1f9239f2a 100644 --- a/test/CodeGen/AMDGPU/widen_extending_scalar_loads.ll +++ b/test/CodeGen/AMDGPU/widen_extending_scalar_loads.ll @@ -189,4 +189,80 @@ define amdgpu_kernel void @use_dispatch_ptr(i32 addrspace(1)* %ptr) #1 { ret void } +; OPT-LABEL: @constant_load_i16_align4_range( +; OPT: load i32, i32 addrspace(4)* %1, !range !0 +define amdgpu_kernel void @constant_load_i16_align4_range(i32 addrspace(1)* %out, i16 addrspace(4)* %in) #0 { + %ld = load i16, i16 addrspace(4)* %in, align 4, !range !0 + %ext = sext i16 %ld to i32 + store i32 %ext, i32 addrspace(1)* %out + ret void +} + +; OPT-LABEL: @constant_load_i16_align4_range_max( +; OPT: load i32, i32 addrspace(4)* %1, !range !0 +define amdgpu_kernel void @constant_load_i16_align4_range_max(i32 addrspace(1)* %out, i16 addrspace(4)* %in) #0 { + %ld = load i16, i16 addrspace(4)* %in, align 4, !range !1 + %ext = sext i16 %ld to i32 + store i32 %ext, i32 addrspace(1)* %out + ret void +} + +; OPT-LABEL: @constant_load_i16_align4_complex_range( +; OPT: load i32, i32 addrspace(4)* %1, !range !1 +define amdgpu_kernel void @constant_load_i16_align4_complex_range(i32 addrspace(1)* %out, i16 addrspace(4)* %in) #0 { + %ld = load i16, i16 addrspace(4)* %in, align 4, !range !2 + %ext = sext i16 %ld to i32 + store i32 %ext, i32 addrspace(1)* %out + ret void +} + +; OPT-LABEL: @constant_load_i16_align4_range_from_0( +; OPT: load i32, i32 addrspace(4)* %1{{$}} +define amdgpu_kernel void @constant_load_i16_align4_range_from_0(i32 addrspace(1)* %out, i16 addrspace(4)* %in) #0 { + %ld = load i16, i16 addrspace(4)* %in, align 4, !range !3 + %ext = sext i16 %ld to i32 + store i32 %ext, i32 addrspace(1)* %out + ret void +} + +; OPT-LABEL: @constant_load_i16_align4_range_from_neg( +; OPT: load i32, i32 addrspace(4)* %1, !range !2 +define amdgpu_kernel void @constant_load_i16_align4_range_from_neg(i32 addrspace(1)* %out, i16 addrspace(4)* %in) #0 { + %ld = load i16, i16 addrspace(4)* %in, align 4, !range !4 + %ext = sext i16 %ld to i32 + store i32 %ext, i32 addrspace(1)* %out + ret void +} + +; OPT-LABEL: @constant_load_i16_align4_range_from_neg_to_0( +; OPT: load i32, i32 addrspace(4)* %1, !range !2 +define amdgpu_kernel void @constant_load_i16_align4_range_from_neg_to_0(i32 addrspace(1)* %out, i16 addrspace(4)* %in) #0 { + %ld = load i16, i16 addrspace(4)* %in, align 4, !range !5 + %ext = sext i16 %ld to i32 + store i32 %ext, i32 addrspace(1)* %out + ret void +} + +; OPT-LABEL: @constant_load_i16_align4_invariant +; OPT: load i32, i32 addrspace(4)* %1, !invariant.load !3 +define amdgpu_kernel void @constant_load_i16_align4_invariant(i32 addrspace(1)* %out, i16 addrspace(4)* %in) #0 { + %ld = load i16, i16 addrspace(4)* %in, align 4, !invariant.load !6 + %ext = sext i16 %ld to i32 + store i32 %ext, i32 addrspace(1)* %out + ret void +} + attributes #0 = { nounwind } + +; OPT: !0 = !{i32 5, i32 0} +; OPT: !1 = !{i32 8, i32 0} +; OPT: !2 = !{i32 65520, i32 0} +; OPT: !3 = !{} + +!0 = !{i16 5, i16 500} +!1 = !{i16 5, i16 -1} +!2 = !{i16 8, i16 12, i16 42, i16 99} +!3 = !{i16 0, i16 255} +!4 = !{i16 -16, i16 16} +!5 = !{i16 -16, i16 0} +!6 = !{} -- 2.11.0