From 9506598fb2809614fe963819b86f9a5b533000ee Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 26 Aug 2015 20:48:04 +0000 Subject: [PATCH] AMDGPU: Don't reprocess instructions when splitting i64 bcnt git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@246079 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AMDGPU/SIInstrInfo.cpp | 9 +++++---- test/CodeGen/AMDGPU/ctpop64.ll | 19 +++++++++++++++++++ 2 files changed, 24 insertions(+), 4 deletions(-) diff --git a/lib/Target/AMDGPU/SIInstrInfo.cpp b/lib/Target/AMDGPU/SIInstrInfo.cpp index 0bd0aba4954..d6f70a6da46 100644 --- a/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -2513,18 +2513,19 @@ void SIInstrInfo::splitScalar64BitBCNT(SmallVectorImpl &Worklist MachineOperand SrcRegSub1 = buildExtractSubRegOrImm(MII, MRI, Src, SrcRC, AMDGPU::sub1, SrcSubRC); - MachineInstr *First = BuildMI(MBB, MII, DL, InstDesc, MidReg) + BuildMI(MBB, MII, DL, InstDesc, MidReg) .addOperand(SrcRegSub0) .addImm(0); - MachineInstr *Second = BuildMI(MBB, MII, DL, InstDesc, ResultReg) + BuildMI(MBB, MII, DL, InstDesc, ResultReg) .addOperand(SrcRegSub1) .addReg(MidReg); MRI.replaceRegWith(Dest.getReg(), ResultReg); - Worklist.push_back(First); - Worklist.push_back(Second); + // We don't need to legalize operands here. src0 for etiher instruction can be + // an SGPR, and the second input is unused or determined here. + addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist); } void SIInstrInfo::splitScalar64BitBFE(SmallVectorImpl &Worklist, diff --git a/test/CodeGen/AMDGPU/ctpop64.ll b/test/CodeGen/AMDGPU/ctpop64.ll index e1a0ee3ea21..82cdd52f2ce 100644 --- a/test/CodeGen/AMDGPU/ctpop64.ll +++ b/test/CodeGen/AMDGPU/ctpop64.ll @@ -36,6 +36,25 @@ define void @v_ctpop_i64(i32 addrspace(1)* noalias %out, i64 addrspace(1)* noali ret void } +; FIXME: We shouldn't emit the v_mov_b32 0 +; FUNC-LABEL: {{^}}v_ctpop_i64_user: +; GCN: buffer_load_dwordx2 v{{\[}}[[LOVAL:[0-9]+]]:[[HIVAL:[0-9]+]]{{\]}}, +; GCN: v_bcnt_u32_b32_e64 [[MIDRESULT:v[0-9]+]], v[[LOVAL]], 0 +; SI-NEXT: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], v[[HIVAL]], [[MIDRESULT]] +; VI-NEXT: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], v[[HIVAL]], [[MIDRESULT]] +; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}} +; GCN-DAG: v_or_b32_e32 v[[RESULT_LO:[0-9]+]], s{{[0-9]+}}, [[RESULT]] +; GCN-DAG: v_or_b32_e32 v[[RESULT_HI:[0-9]+]], s{{[0-9]+}}, v[[ZERO]] +; GCN: buffer_store_dwordx2 v{{\[}}[[RESULT_LO]]:[[RESULT_HI]]{{\]}} +; GCN: s_endpgm +define void @v_ctpop_i64_user(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in, i64 %s.val) nounwind { + %val = load i64, i64 addrspace(1)* %in, align 8 + %ctpop = call i64 @llvm.ctpop.i64(i64 %val) nounwind readnone + %or = or i64 %ctpop, %s.val + store i64 %or, i64 addrspace(1)* %out + ret void +} + ; FUNC-LABEL: {{^}}s_ctpop_v2i64: ; GCN: s_bcnt1_i32_b64 ; GCN: s_bcnt1_i32_b64 -- 2.11.0