From: Wei Ding Date: Thu, 28 Jul 2016 16:42:13 +0000 (+0000) Subject: AMDGPU : Add intrinsics for compare with the full wavefront result X-Git-Tag: android-x86-7.1-r4~29540 X-Git-Url: http://git.osdn.net/view?a=commitdiff_plain;h=ee8c4ca1e1d2ae293d8e5f9ce7c346c3e32deca2;p=android-x86%2Fexternal-llvm.git AMDGPU : Add intrinsics for compare with the full wavefront result Differential Revision: http://reviews.llvm.org/D22482 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@276998 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/llvm/IR/IntrinsicsAMDGPU.td b/include/llvm/IR/IntrinsicsAMDGPU.td index a0798c5e4bc..7163d0e70fa 100644 --- a/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/include/llvm/IR/IntrinsicsAMDGPU.td @@ -407,6 +407,14 @@ def int_amdgcn_lerp : GCCBuiltin<"__builtin_amdgcn_lerp">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_amdgcn_icmp : + Intrinsic<[llvm_i64_ty], [llvm_anyint_ty, LLVMMatchType<0>, llvm_i32_ty], + [IntrNoMem, IntrConvergent]>; + +def int_amdgcn_fcmp : + Intrinsic<[llvm_i64_ty], [llvm_anyfloat_ty, LLVMMatchType<0>, llvm_i32_ty], + [IntrNoMem, IntrConvergent]>; + //===----------------------------------------------------------------------===// // CI+ Intrinsics //===----------------------------------------------------------------------===// diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 5abb10aced6..a014549297f 100644 --- a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -2658,6 +2658,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(RETURN) NODE_NAME_CASE(DWORDADDR) NODE_NAME_CASE(FRACT) + NODE_NAME_CASE(SETCC) NODE_NAME_CASE(CLAMP) NODE_NAME_CASE(COS_HW) NODE_NAME_CASE(SIN_HW) diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.h b/lib/Target/AMDGPU/AMDGPUISelLowering.h index f1fde8069a1..206c93cd0bc 100644 --- a/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ b/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -223,6 +223,9 @@ enum NodeType : unsigned { DWORDADDR, FRACT, CLAMP, + // This is SETCC with the full mask result which is used for a compare with a + // result bit per item in the wavefront. + SETCC, // SIN_HW, COS_HW - f32 for SI, 1 ULP max error, valid from -100 pi to 100 pi. // Denormals handled on some parts. diff --git a/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/lib/Target/AMDGPU/AMDGPUInstrInfo.td index f16ea8de429..1547d22f59d 100644 --- a/lib/Target/AMDGPU/AMDGPUInstrInfo.td +++ b/lib/Target/AMDGPU/AMDGPUInstrInfo.td @@ -144,6 +144,11 @@ def AMDGPUcarry : SDNode<"AMDGPUISD::CARRY", SDTIntBinOp, []>; // out = (src1 > src0) ? 1 : 0 def AMDGPUborrow : SDNode<"AMDGPUISD::BORROW", SDTIntBinOp, []>; +def AMDGPUSetCCOp : SDTypeProfile<1, 3, [ // setcc + SDTCisVT<0, i64>, SDTCisSameAs<1, 2>, SDTCisVT<3, OtherVT> +]>; + +def AMDGPUsetcc : SDNode<"AMDGPUISD::SETCC", AMDGPUSetCCOp>; def AMDGPUcvt_f32_ubyte0 : SDNode<"AMDGPUISD::CVT_F32_UBYTE0", SDTIntToFPOp, []>; diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp index 1b9a3f276bc..c02ac6ccf1a 100644 --- a/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/lib/Target/AMDGPU/SIISelLowering.cpp @@ -31,6 +31,7 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/Analysis.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Function.h" @@ -2213,6 +2214,34 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, return DAG.getNode(AMDGPUISD::DIV_SCALE, DL, Op->getVTList(), Src0, Denominator, Numerator); } + case Intrinsic::amdgcn_icmp: { + const auto *CD = dyn_cast(Op.getOperand(3)); + int CondCode = CD->getSExtValue(); + + if (CondCode < ICmpInst::Predicate::FIRST_ICMP_PREDICATE || + CondCode >= ICmpInst::Predicate::BAD_ICMP_PREDICATE) + return DAG.getUNDEF(VT); + + ICmpInst::Predicate IcInput = + static_cast(CondCode); + ISD::CondCode CCOpcode = getICmpCondCode(IcInput); + return DAG.getNode(AMDGPUISD::SETCC, DL, VT, Op.getOperand(1), + Op.getOperand(2), DAG.getCondCode(CCOpcode)); + } + case Intrinsic::amdgcn_fcmp: { + const auto *CD = dyn_cast(Op.getOperand(3)); + int CondCode = CD->getSExtValue(); + + if (CondCode <= FCmpInst::Predicate::FCMP_FALSE || + CondCode >= FCmpInst::Predicate::FCMP_TRUE) + return DAG.getUNDEF(VT); + + FCmpInst::Predicate IcInput = + static_cast(CondCode); + ISD::CondCode CCOpcode = getFCmpCondCode(IcInput); + return DAG.getNode(AMDGPUISD::SETCC, DL, VT, Op.getOperand(1), + Op.getOperand(2), DAG.getCondCode(CCOpcode)); + } case Intrinsic::amdgcn_fmul_legacy: return DAG.getNode(AMDGPUISD::FMUL_LEGACY, DL, VT, Op.getOperand(1), Op.getOperand(2)); diff --git a/lib/Target/AMDGPU/SIInstructions.td b/lib/Target/AMDGPU/SIInstructions.td index 387a8a71825..8a8ab045642 100644 --- a/lib/Target/AMDGPU/SIInstructions.td +++ b/lib/Target/AMDGPU/SIInstructions.td @@ -2366,6 +2366,71 @@ def : Pat < >; //===----------------------------------------------------------------------===// +// V_ICMPIntrinsic Pattern. +//===----------------------------------------------------------------------===// +class ICMP_Pattern : Pat < + (AMDGPUsetcc vt:$src0, vt:$src1, cond), + (inst $src0, $src1) +>; + +def : ICMP_Pattern ; +def : ICMP_Pattern ; +def : ICMP_Pattern ; +def : ICMP_Pattern ; +def : ICMP_Pattern ; +def : ICMP_Pattern ; +def : ICMP_Pattern ; +def : ICMP_Pattern ; +def : ICMP_Pattern ; +def : ICMP_Pattern ; + +def : ICMP_Pattern ; +def : ICMP_Pattern ; +def : ICMP_Pattern ; +def : ICMP_Pattern ; +def : ICMP_Pattern ; +def : ICMP_Pattern ; +def : ICMP_Pattern ; +def : ICMP_Pattern ; +def : ICMP_Pattern ; +def : ICMP_Pattern ; + +class FCMP_Pattern : Pat < + (i64 (AMDGPUsetcc (vt (VOP3Mods vt:$src0, i32:$src0_modifiers)), + (vt (VOP3Mods vt:$src1, i32:$src1_modifiers)), cond)), + (inst $src0_modifiers, $src0, $src1_modifiers, $src1, + DSTCLAMP.NONE, DSTOMOD.NONE) +>; + +def : FCMP_Pattern ; +def : FCMP_Pattern ; +def : FCMP_Pattern ; +def : FCMP_Pattern ; +def : FCMP_Pattern ; +def : FCMP_Pattern ; + +def : FCMP_Pattern ; +def : FCMP_Pattern ; +def : FCMP_Pattern ; +def : FCMP_Pattern ; +def : FCMP_Pattern ; +def : FCMP_Pattern ; + +def : FCMP_Pattern ; +def : FCMP_Pattern ; +def : FCMP_Pattern ; +def : FCMP_Pattern ; +def : FCMP_Pattern ; +def : FCMP_Pattern ; + +def : FCMP_Pattern ; +def : FCMP_Pattern ; +def : FCMP_Pattern ; +def : FCMP_Pattern ; +def : FCMP_Pattern ; +def : FCMP_Pattern ; + +//===----------------------------------------------------------------------===// // SMRD Patterns //===----------------------------------------------------------------------===// diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.ll new file mode 100644 index 00000000000..67c17890044 --- /dev/null +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.ll @@ -0,0 +1,228 @@ +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s + +declare i64 @llvm.amdgcn.fcmp.f32(float, float, i32) #0 +declare i64 @llvm.amdgcn.fcmp.f64(double, double, i32) #0 +declare float @llvm.fabs.f32(float) #0 + +; GCN-LABEL: {{^}}v_fcmp_f32_oeq_with_fabs: +; GCN: v_cmp_eq_f32_e64 {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}}, |{{v[0-9]+}}| +define void @v_fcmp_f32_oeq_with_fabs(i64 addrspace(1)* %out, float %src, float %a) { + %temp = call float @llvm.fabs.f32(float %a) + %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float %temp, i32 1) + store i64 %result, i64 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}v_fcmp_f32_oeq_both_operands_with_fabs: +; GCN: v_cmp_eq_f32_e64 {{s\[[0-9]+:[0-9]+\]}}, |{{s[0-9]+}}|, |{{v[0-9]+}}| +define void @v_fcmp_f32_oeq_both_operands_with_fabs(i64 addrspace(1)* %out, float %src, float %a) { + %temp = call float @llvm.fabs.f32(float %a) + %src_input = call float @llvm.fabs.f32(float %src) + %result = call i64 @llvm.amdgcn.fcmp.f32(float %src_input, float %temp, i32 1) + store i64 %result, i64 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}v_fcmp: +; GCN-NOT: v_cmp_eq_f32_e64 +define void @v_fcmp(i64 addrspace(1)* %out, float %src) { + %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 -1) + store i64 %result, i64 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}v_fcmp_f32_oeq: +; GCN: v_cmp_eq_f32_e64 +define void @v_fcmp_f32_oeq(i64 addrspace(1)* %out, float %src) { + %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 1) + store i64 %result, i64 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}v_fcmp_f32_one: +; GCN: v_cmp_neq_f32_e64 +define void @v_fcmp_f32_one(i64 addrspace(1)* %out, float %src) { + %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 6) + store i64 %result, i64 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}v_fcmp_f32_ogt: +; GCN: v_cmp_gt_f32_e64 +define void @v_fcmp_f32_ogt(i64 addrspace(1)* %out, float %src) { + %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 2) + store i64 %result, i64 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}v_fcmp_f32_oge: +; GCN: v_cmp_ge_f32_e64 +define void @v_fcmp_f32_oge(i64 addrspace(1)* %out, float %src) { + %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 3) + store i64 %result, i64 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}v_fcmp_f32_olt: +; GCN: v_cmp_lt_f32_e64 +define void @v_fcmp_f32_olt(i64 addrspace(1)* %out, float %src) { + %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 4) + store i64 %result, i64 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}v_fcmp_f32_ole: +; GCN: v_cmp_le_f32_e64 +define void @v_fcmp_f32_ole(i64 addrspace(1)* %out, float %src) { + %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 5) + store i64 %result, i64 addrspace(1)* %out + ret void +} + + +; GCN-LABEL: {{^}}v_fcmp_f32_ueq: +; GCN: v_cmp_nlg_f32_e64 +define void @v_fcmp_f32_ueq(i64 addrspace(1)* %out, float %src) { + %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 9) + store i64 %result, i64 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}v_fcmp_f32_une: +; GCN: v_cmp_neq_f32_e64 +define void @v_fcmp_f32_une(i64 addrspace(1)* %out, float %src) { + %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 14) + store i64 %result, i64 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}v_fcmp_f32_ugt: +; GCN: v_cmp_nle_f32_e64 +define void @v_fcmp_f32_ugt(i64 addrspace(1)* %out, float %src) { + %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 10) + store i64 %result, i64 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}v_fcmp_f32_uge: +; GCN: v_cmp_nlt_f32_e64 +define void @v_fcmp_f32_uge(i64 addrspace(1)* %out, float %src) { + %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 11) + store i64 %result, i64 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}v_fcmp_f32_ult: +; GCN: v_cmp_nge_f32_e64 +define void @v_fcmp_f32_ult(i64 addrspace(1)* %out, float %src) { + %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 12) + store i64 %result, i64 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}v_fcmp_f32_ule: +; GCN: v_cmp_ngt_f32_e64 +define void @v_fcmp_f32_ule(i64 addrspace(1)* %out, float %src) { + %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 13) + store i64 %result, i64 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}v_fcmp_f64_oeq: +; GCN: v_cmp_eq_f64_e64 +define void @v_fcmp_f64_oeq(i64 addrspace(1)* %out, double %src) { + %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 1) + store i64 %result, i64 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}v_fcmp_f64_one: +; GCN: v_cmp_neq_f64_e64 +define void @v_fcmp_f64_one(i64 addrspace(1)* %out, double %src) { + %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 6) + store i64 %result, i64 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}v_fcmp_f64_ogt: +; GCN: v_cmp_gt_f64_e64 +define void @v_fcmp_f64_ogt(i64 addrspace(1)* %out, double %src) { + %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 2) + store i64 %result, i64 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}v_fcmp_f64_oge: +; GCN: v_cmp_ge_f64_e64 +define void @v_fcmp_f64_oge(i64 addrspace(1)* %out, double %src) { + %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 3) + store i64 %result, i64 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}v_fcmp_f64_olt: +; GCN: v_cmp_lt_f64_e64 +define void @v_fcmp_f64_olt(i64 addrspace(1)* %out, double %src) { + %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 4) + store i64 %result, i64 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}v_fcmp_f64_ole: +; GCN: v_cmp_le_f64_e64 +define void @v_fcmp_f64_ole(i64 addrspace(1)* %out, double %src) { + %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 5) + store i64 %result, i64 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}v_fcmp_f64_ueq: +; GCN: v_cmp_nlg_f64_e64 +define void @v_fcmp_f64_ueq(i64 addrspace(1)* %out, double %src) { + %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 9) + store i64 %result, i64 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}v_fcmp_f64_une: +; GCN: v_cmp_neq_f64_e64 +define void @v_fcmp_f64_une(i64 addrspace(1)* %out, double %src) { + %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 14) + store i64 %result, i64 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}v_fcmp_f64_ugt: +; GCN: v_cmp_nle_f64_e64 +define void @v_fcmp_f64_ugt(i64 addrspace(1)* %out, double %src) { + %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 10) + store i64 %result, i64 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}v_fcmp_f64_uge: +; GCN: v_cmp_nlt_f64_e64 +define void @v_fcmp_f64_uge(i64 addrspace(1)* %out, double %src) { + %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 11) + store i64 %result, i64 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}v_fcmp_f64_ult: +; GCN: v_cmp_nge_f64_e64 +define void @v_fcmp_f64_ult(i64 addrspace(1)* %out, double %src) { + %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 12) + store i64 %result, i64 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}v_fcmp_f64_ule: +; GCN: v_cmp_ngt_f64_e64 +define void @v_fcmp_f64_ule(i64 addrspace(1)* %out, double %src) { + %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 13) + store i64 %result, i64 addrspace(1)* %out + ret void +} + +attributes #0 = { nounwind readnone convergent } diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.ll new file mode 100644 index 00000000000..0797e5ead00 --- /dev/null +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.ll @@ -0,0 +1,172 @@ +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s + +declare i64 @llvm.amdgcn.icmp.i32(i32, i32, i32) #0 +declare i64 @llvm.amdgcn.icmp.i64(i64, i64, i32) #0 + +; GCN-LABEL: {{^}}v_icmp_i32_eq: +; GCN: v_cmp_eq_i32_e64 +define void @v_icmp_i32_eq(i64 addrspace(1)* %out, i32 %src) { + %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 32) + store i64 %result, i64 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}v_icmp: +; GCN-NOT: v_cmp_eq_i32_e64 +define void @v_icmp(i64 addrspace(1)* %out, i32 %src) { + %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 30) + store i64 %result, i64 addrspace(1)* %out + ret void +} +; GCN-LABEL: {{^}}v_icmp_i32_ne: +; GCN: v_cmp_ne_i32_e64 +define void @v_icmp_i32_ne(i64 addrspace(1)* %out, i32 %src) { + %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 33) + store i64 %result, i64 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}v_icmp_u32_ugt: +; GCN: v_cmp_gt_u32_e64 +define void @v_icmp_u32_ugt(i64 addrspace(1)* %out, i32 %src) { + %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 34) + store i64 %result, i64 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}v_icmp_u32_uge: +; GCN: v_cmp_ge_u32_e64 +define void @v_icmp_u32_uge(i64 addrspace(1)* %out, i32 %src) { + %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 35) + store i64 %result, i64 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}v_icmp_u32_ult: +; GCN: v_cmp_lt_u32_e64 +define void @v_icmp_u32_ult(i64 addrspace(1)* %out, i32 %src) { + %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 36) + store i64 %result, i64 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}v_icmp_u32_ule: +; GCN: v_cmp_le_u32_e64 +define void @v_icmp_u32_ule(i64 addrspace(1)* %out, i32 %src) { + %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 37) + store i64 %result, i64 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}v_icmp_i32_sgt: +; GCN: v_cmp_gt_i32_e64 +define void @v_icmp_i32_sgt(i64 addrspace(1)* %out, i32 %src) #1 { + %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 38) + store i64 %result, i64 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}v_icmp_i32_sge: +; GCN: v_cmp_ge_i32_e64 +define void @v_icmp_i32_sge(i64 addrspace(1)* %out, i32 %src) { + %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 39) + store i64 %result, i64 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}v_icmp_i32_slt: +; GCN: v_cmp_lt_i32_e64 +define void @v_icmp_i32_slt(i64 addrspace(1)* %out, i32 %src) { + %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 40) + store i64 %result, i64 addrspace(1)* %out + ret void +} +; GCN-LABEL: {{^}}v_icmp_i32_sle: +; GCN: v_cmp_le_i32_e64 +define void @v_icmp_i32_sle(i64 addrspace(1)* %out, i32 %src) { + %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 41) + store i64 %result, i64 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}v_icmp_i64_eq: +; GCN: v_cmp_eq_i64_e64 +define void @v_icmp_i64_eq(i64 addrspace(1)* %out, i64 %src) { + %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 32) + store i64 %result, i64 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}v_icmp_i64_ne: +; GCN: v_cmp_ne_i64_e64 +define void @v_icmp_i64_ne(i64 addrspace(1)* %out, i64 %src) { + %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 33) + store i64 %result, i64 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}v_icmp_u64_ugt: +; GCN: v_cmp_gt_u64_e64 +define void @v_icmp_u64_ugt(i64 addrspace(1)* %out, i64 %src) { + %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 34) + store i64 %result, i64 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}v_icmp_u64_uge: +; GCN: v_cmp_ge_u64_e64 +define void @v_icmp_u64_uge(i64 addrspace(1)* %out, i64 %src) { + %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 35) + store i64 %result, i64 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}v_icmp_u64_ult: +; GCN: v_cmp_lt_u64_e64 +define void @v_icmp_u64_ult(i64 addrspace(1)* %out, i64 %src) { + %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 36) + store i64 %result, i64 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}v_icmp_u64_ule: +; GCN: v_cmp_le_u64_e64 +define void @v_icmp_u64_ule(i64 addrspace(1)* %out, i64 %src) { + %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 37) + store i64 %result, i64 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}v_icmp_i64_sgt: +; GCN: v_cmp_gt_i64_e64 +define void @v_icmp_i64_sgt(i64 addrspace(1)* %out, i64 %src) { + %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 38) + store i64 %result, i64 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}v_icmp_i64_sge: +; GCN: v_cmp_ge_i64_e64 +define void @v_icmp_i64_sge(i64 addrspace(1)* %out, i64 %src) { + %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 39) + store i64 %result, i64 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}v_icmp_i64_slt: +; GCN: v_cmp_lt_i64_e64 +define void @v_icmp_i64_slt(i64 addrspace(1)* %out, i64 %src) { + %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 40) + store i64 %result, i64 addrspace(1)* %out + ret void +} +; GCN-LABEL: {{^}}v_icmp_i64_sle: +; GCN: v_cmp_le_i64_e64 +define void @v_icmp_i64_sle(i64 addrspace(1)* %out, i64 %src) { + %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 41) + store i64 %result, i64 addrspace(1)* %out + ret void +} + +attributes #0 = { nounwind readnone convergent }