From 822ea1bfe892e610d0da8de108dbf51020a0d9f6 Mon Sep 17 00:00:00 2001 From: Stanislav Mekhanoshin Date: Wed, 13 Jun 2018 18:52:54 +0000 Subject: [PATCH] [AMDGPU] Corrected computeKnownBits for V_PERM_B32 Differential Revision: https://reviews.llvm.org/D48133 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@334640 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 15 ++++++++------- test/CodeGen/AMDGPU/permute.ll | 22 ++++++++++++++++++++++ 2 files changed, 30 insertions(+), 7 deletions(-) diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 148de14dd5e..19106a5ae8d 100644 --- a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -4317,18 +4317,19 @@ void AMDGPUTargetLowering::computeKnownBitsForTargetNode( unsigned Sel = CMask->getZExtValue(); for (unsigned I = 0; I < 32; I += 8) { - unsigned ByteMask = 0xff << I; unsigned SelBits = Sel & 0xff; if (SelBits < 4) { - Known.One |= RHSKnown.One & ByteMask; - Known.Zero |= RHSKnown.Zero & ByteMask; + SelBits *= 8; + Known.One |= ((RHSKnown.One.getZExtValue() >> SelBits) & 0xff) << I; + Known.Zero |= ((RHSKnown.Zero.getZExtValue() >> SelBits) & 0xff) << I; } else if (SelBits < 7) { - Known.One |= LHSKnown.One & ByteMask; - Known.Zero |= LHSKnown.Zero & ByteMask; + SelBits = (SelBits & 3) * 8; + Known.One |= ((LHSKnown.One.getZExtValue() >> SelBits) & 0xff) << I; + Known.Zero |= ((LHSKnown.Zero.getZExtValue() >> SelBits) & 0xff) << I; } else if (SelBits == 0x0c) { - Known.Zero |= ByteMask; + Known.Zero |= 0xff << I; } else if (SelBits > 0x0c) { - Known.One |= ByteMask; + Known.One |= 0xff << I; } Sel >>= 8; } diff --git a/test/CodeGen/AMDGPU/permute.ll b/test/CodeGen/AMDGPU/permute.ll index a0354407267..795b466b4d6 100644 --- a/test/CodeGen/AMDGPU/permute.ll +++ b/test/CodeGen/AMDGPU/permute.ll @@ -196,4 +196,26 @@ bb: ret void } +; GCN-LABEL: {{^}}known_ffff8004: +; GCN-DAG: v_mov_b32_e32 [[MASK:v[0-9]+]], 0xffff0500 +; GCN-DAG: v_mov_b32_e32 [[RES:v[0-9]+]], 0xffff8004 +; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]] +; GCN: store_dword v[{{[0-9:]+}}], [[RES]]{{$}} +define amdgpu_kernel void @known_ffff8004(i32 addrspace(1)* nocapture %arg, i32 %arg1) { +bb: + %id = tail call i32 @llvm.amdgcn.workitem.id.x() + %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id + %load = load i32, i32 addrspace(1)* %gep, align 4 + %mask1 = or i32 %arg1, 4 + %mask2 = or i32 %load, 32768 ; 0x8000 + %and = and i32 %mask1, 16711935 ; 0x00ff00ff + %tmp1 = and i32 %mask2, 4294967040 ; 0xffffff00 + %tmp2 = or i32 %tmp1, 4294901760 ; 0xffff0000 + %tmp3 = or i32 %tmp2, %and + store i32 %tmp3, i32 addrspace(1)* %gep, align 4 + %v = and i32 %tmp3, 4294934532 ; 0xffff8004 + store i32 %v, i32 addrspace(1)* %arg, align 4 + ret void +} + declare i32 @llvm.amdgcn.workitem.id.x() -- 2.11.0