From: Matt Arsenault Date: Thu, 29 Mar 2018 19:59:28 +0000 (+0000) Subject: AMDGPU: Fix selection error on constant loads with < 4 byte alignment X-Git-Tag: android-x86-7.1-r4~3046 X-Git-Url: http://git.osdn.net/view?a=commitdiff_plain;h=3ca9749f0a3cfdb13fbff446b17df5ee1ef68da9;p=android-x86%2Fexternal-llvm.git AMDGPU: Fix selection error on constant loads with < 4 byte alignment git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@328818 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp index b034f3d546d..1de2ab4f099 100644 --- a/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/lib/Target/AMDGPU/SIISelLowering.cpp @@ -3464,10 +3464,6 @@ bool SITargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const { return false; } -static bool isDwordAligned(unsigned Alignment) { - return Alignment % 4 == 0; -} - //===----------------------------------------------------------------------===// // Custom DAG Lowering Operations //===----------------------------------------------------------------------===// @@ -5385,21 +5381,23 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { AMDGPUASI.PRIVATE_ADDRESS : AMDGPUASI.GLOBAL_ADDRESS; unsigned NumElements = MemVT.getVectorNumElements(); + if (AS == AMDGPUASI.CONSTANT_ADDRESS || AS == AMDGPUASI.CONSTANT_ADDRESS_32BIT) { - if (!Op->isDivergent()) + if (!Op->isDivergent() && Alignment >= 4) return SDValue(); // Non-uniform loads will be selected to MUBUF instructions, so they // have the same legalization requirements as global and private // loads. // } + if (AS == AMDGPUASI.CONSTANT_ADDRESS || AS == AMDGPUASI.CONSTANT_ADDRESS_32BIT || AS == AMDGPUASI.GLOBAL_ADDRESS) { if (Subtarget->getScalarizeGlobalBehavior() && !Op->isDivergent() && !Load->isVolatile() && isMemOpHasNoClobberedMemOperand(Load) && - isDwordAligned(Alignment)) + Alignment >= 4) return SDValue(); // Non-uniform loads will be selected to MUBUF instructions, so they // have the same legalization requirements as global and private diff --git a/test/CodeGen/AMDGPU/load-constant-i16.ll b/test/CodeGen/AMDGPU/load-constant-i16.ll index 68ff90e32e6..31bb2067d76 100644 --- a/test/CodeGen/AMDGPU/load-constant-i16.ll +++ b/test/CodeGen/AMDGPU/load-constant-i16.ll @@ -72,6 +72,18 @@ entry: ret void } +; FUNC-LABEL: {{^}}constant_load_v16i16_align2: +; GCN-HSA: flat_load_dwordx4 +; GCN-HSA: flat_load_dwordx4 +; GCN-HSA: flat_store_dwordx4 +; GCN-HSA: flat_store_dwordx4 +define amdgpu_kernel void @constant_load_v16i16_align2(<16 x i16> addrspace(4)* %ptr0) #0 { +entry: + %ld = load <16 x i16>, <16 x i16> addrspace(4)* %ptr0, align 2 + store <16 x i16> %ld, <16 x i16> addrspace(1)* undef, align 32 + ret void +} + ; FUNC-LABEL: {{^}}constant_zextload_i16_to_i32: ; GCN-NOHSA: buffer_load_ushort ; GCN-NOHSA: buffer_store_dword diff --git a/test/CodeGen/AMDGPU/load-global-i16.ll b/test/CodeGen/AMDGPU/load-global-i16.ll index 6d243340395..c1dcc1d3126 100644 --- a/test/CodeGen/AMDGPU/load-global-i16.ll +++ b/test/CodeGen/AMDGPU/load-global-i16.ll @@ -83,6 +83,18 @@ entry: ret void } +; GCN-LABEL: {{^}}global_load_v16i16_align2: +; GCN-HSA: flat_load_dwordx4 +; GCN-HSA: flat_load_dwordx4 +; GCN-HSA: flat_store_dwordx4 +; GCN-HSA: flat_store_dwordx4 +define amdgpu_kernel void @global_load_v16i16_align2(<16 x i16> addrspace(1)* %in, <16 x i16> addrspace(1)* %out) #0 { +entry: + %ld = load <16 x i16>, <16 x i16> addrspace(1)* %in, align 2 + store <16 x i16> %ld, <16 x i16> addrspace(1)* %out, align 32 + ret void +} + ; FUNC-LABEL: {{^}}global_zextload_i16_to_i32: ; GCN-NOHSA: buffer_load_ushort ; GCN-NOHSA: buffer_store_dword