From d7173826331eb2c263ecee607e86cebcf1c08ad7 Mon Sep 17 00:00:00 2001 From: Petar Avramovic Date: Tue, 30 Jun 2020 10:57:41 +0200 Subject: [PATCH] AMDGPU/GlobalISel: Select icmp intrinsic Select into corresponding V_CMP instruction based on CmpInst predicate, stored as immediate, in last operand. Differential Revision: https://reviews.llvm.org/D82652 --- .../Target/AMDGPU/AMDGPUInstructionSelector.cpp | 30 ++++++++++++++++++++ llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h | 1 + .../CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.icmp.ll | 32 ++++++++++++++++++++++ 3 files changed, 63 insertions(+) create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.icmp.ll diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index 6380d6b91d6..c3d5e78964c 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -889,6 +889,8 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC(MachineInstr &I) const { return constrainCopyLikeIntrin(I, AMDGPU::WWM); case Intrinsic::amdgcn_div_scale: return selectDivScale(I); + case Intrinsic::amdgcn_icmp: + return selectIntrinsicIcmp(I); default: return selectImpl(I, *CoverageInfo); } @@ -1009,6 +1011,34 @@ bool AMDGPUInstructionSelector::selectG_ICMP(MachineInstr &I) const { return Ret; } +bool AMDGPUInstructionSelector::selectIntrinsicIcmp(MachineInstr &I) const { + Register Dst = I.getOperand(0).getReg(); + if (isVCC(Dst, *MRI)) + return false; + + if (MRI->getType(Dst).getSizeInBits() != STI.getWavefrontSize()) + return false; + + MachineBasicBlock *BB = I.getParent(); + const DebugLoc &DL = I.getDebugLoc(); + Register SrcReg = I.getOperand(2).getReg(); + unsigned Size = RBI.getSizeInBits(SrcReg, *MRI, TRI); + auto Pred = static_cast(I.getOperand(4).getImm()); + + int Opcode = getV_CMPOpcode(Pred, Size); + if (Opcode == -1) + return false; + + MachineInstr *ICmp = BuildMI(*BB, &I, DL, TII.get(Opcode), Dst) + .add(I.getOperand(2)) + .add(I.getOperand(3)); + RBI.constrainGenericRegister(ICmp->getOperand(0).getReg(), *TRI.getBoolRC(), + *MRI); + bool Ret = constrainSelectedInstRegOperands(*ICmp, TII, TRI, RBI); + I.eraseFromParent(); + return Ret; +} + bool AMDGPUInstructionSelector::selectEndCfIntrinsic(MachineInstr &MI) const { // FIXME: Manually selecting to avoid dealiing with the SReg_1 trick // SelectionDAG uses for wave32 vs wave64. diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h index 0ac6788c69b..f8a8b5db4b5 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h @@ -106,6 +106,7 @@ private: bool selectInterpP1F16(MachineInstr &MI) const; bool selectDivScale(MachineInstr &MI) const; + bool selectIntrinsicIcmp(MachineInstr &MI) const; bool selectG_INTRINSIC(MachineInstr &I) const; bool selectEndCfIntrinsic(MachineInstr &MI) const; diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.icmp.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.icmp.ll new file mode 100644 index 00000000000..dc04f15967a --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.icmp.ll @@ -0,0 +1,32 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -global-isel -global-isel-abort=1 -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s + +define amdgpu_ps void @test_intr_icmp_eq_i64(i64 addrspace(1)* %out, i32 %src) #0 { +; GCN-LABEL: test_intr_icmp_eq_i64: +; GCN: ; %bb.0: +; GCN-NEXT: v_cmp_eq_u32_e64 s[0:1], 0x64, v2 +; GCN-NEXT: v_mov_b32_e32 v3, s1 +; GCN-NEXT: v_mov_b32_e32 v2, s0 +; GCN-NEXT: global_store_dwordx2 v[0:1], v[2:3], off +; GCN-NEXT: s_endpgm + %result = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %src, i32 100, i32 32) + store i64 %result, i64 addrspace(1)* %out + ret void +} + +define amdgpu_ps void @test_intr_icmp_ne_i32(i32 addrspace(1)* %out, i32 %src) #1 { +; GCN-LABEL: test_intr_icmp_ne_i32: +; GCN: ; %bb.0: +; GCN-NEXT: v_cmp_ne_u32_e64 s0, 0x64, v2 +; GCN-NEXT: ; implicit-def: $vcc_hi +; GCN-NEXT: v_mov_b32_e32 v2, s0 +; GCN-NEXT: global_store_dword v[0:1], v2, off +; GCN-NEXT: s_endpgm + %result = call i32 @llvm.amdgcn.icmp.i32.i32(i32 %src, i32 100, i32 33) + store i32 %result, i32 addrspace(1)* %out + ret void +} +declare i64 @llvm.amdgcn.icmp.i64.i32(i32, i32, i32) +declare i32 @llvm.amdgcn.icmp.i32.i32(i32, i32, i32) +attributes #0 = { "target-features"="+wavefrontsize64" } +attributes #1 = { "target-features"="+wavefrontsize32" } -- 2.11.0