From: Krzysztof Parzyszek Date: Thu, 14 Dec 2017 21:28:48 +0000 (+0000) Subject: [Hexagon] Generate HVX code for comparisons and selects X-Git-Tag: android-x86-7.1-r4~7210 X-Git-Url: http://git.osdn.net/view?a=commitdiff_plain;h=a9bb60c600bfec513cc00b7b126dfade550a3356;p=android-x86%2Fexternal-llvm.git [Hexagon] Generate HVX code for comparisons and selects git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@320744 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp index 0b7765c4391..be6d13a9fd0 100644 --- a/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -1272,6 +1272,9 @@ SDValue HexagonTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { SDValue LHS = Op.getOperand(0); SDValue RHS = Op.getOperand(1); + if (Subtarget.useHVXOps() && Subtarget.isHVXVectorType(ty(LHS))) + return LowerHvxSetCC(Op, DAG); + SDValue Cmp = Op.getOperand(2); ISD::CondCode CC = cast(Cmp)->get(); @@ -1732,6 +1735,9 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, addRegisterClass(MVT::v128i8, &Hexagon::HvxWRRegClass); addRegisterClass(MVT::v64i16, &Hexagon::HvxWRRegClass); addRegisterClass(MVT::v32i32, &Hexagon::HvxWRRegClass); + addRegisterClass(MVT::v16i1, &Hexagon::HvxQRRegClass); + addRegisterClass(MVT::v32i1, &Hexagon::HvxQRRegClass); + addRegisterClass(MVT::v64i1, &Hexagon::HvxQRRegClass); addRegisterClass(MVT::v512i1, &Hexagon::HvxQRRegClass); } else if (Subtarget.useHVX128BOps()) { addRegisterClass(MVT::v128i8, &Hexagon::HvxVRRegClass); @@ -1740,6 +1746,9 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, addRegisterClass(MVT::v256i8, &Hexagon::HvxWRRegClass); addRegisterClass(MVT::v128i16, &Hexagon::HvxWRRegClass); addRegisterClass(MVT::v64i32, &Hexagon::HvxWRRegClass); + addRegisterClass(MVT::v32i1, &Hexagon::HvxQRRegClass); + addRegisterClass(MVT::v64i1, &Hexagon::HvxQRRegClass); + addRegisterClass(MVT::v128i1, &Hexagon::HvxQRRegClass); addRegisterClass(MVT::v1024i1, &Hexagon::HvxQRRegClass); } } @@ -2001,10 +2010,12 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, setIndexedLoadAction(ISD::POST_INC, T, Legal); setIndexedStoreAction(ISD::POST_INC, T, Legal); - setOperationAction(ISD::ADD, T, Legal); - setOperationAction(ISD::SUB, T, Legal); - setOperationAction(ISD::MUL, T, Custom); + setOperationAction(ISD::ADD, T, Legal); + setOperationAction(ISD::SUB, T, Legal); + setOperationAction(ISD::VSELECT, T, Legal); + setOperationAction(ISD::MUL, T, Custom); + setOperationAction(ISD::SETCC, T, Custom); setOperationAction(ISD::BUILD_VECTOR, T, Custom); setOperationAction(ISD::INSERT_SUBVECTOR, T, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, T, Custom); diff --git a/lib/Target/Hexagon/HexagonISelLowering.h b/lib/Target/Hexagon/HexagonISelLowering.h index fca48d4cca6..0a5771ec883 100644 --- a/lib/Target/Hexagon/HexagonISelLowering.h +++ b/lib/Target/Hexagon/HexagonISelLowering.h @@ -347,6 +347,7 @@ namespace HexagonISD { SDValue LowerHvxExtractSubvector(SDValue Op, SelectionDAG &DAG) const; SDValue LowerHvxInsertSubvector(SDValue Op, SelectionDAG &DAG) const; SDValue LowerHvxMul(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerHvxSetCC(SDValue Op, SelectionDAG &DAG) const; std::pair findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) diff --git a/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp index cfa0c003e41..652dffce841 100644 --- a/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp +++ b/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp @@ -383,3 +383,74 @@ HexagonTargetLowering::LowerHvxMul(SDValue Op, SelectionDAG &DAG) const { } return SDValue(); } + +SDValue +HexagonTargetLowering::LowerHvxSetCC(SDValue Op, SelectionDAG &DAG) const { + MVT VecTy = ty(Op.getOperand(0)); + assert(VecTy == ty(Op.getOperand(1))); + + SDValue Cmp = Op.getOperand(2); + ISD::CondCode CC = cast(Cmp)->get(); + bool Negate = false, Swap = false; + + // HVX has instructions for SETEQ, SETGT, SETUGT. The other comparisons + // can be arranged as operand-swapped/negated versions of these. Since + // the generated code will have the original CC expressed as + // (negate (swap-op NewCmp)), + // the condition code for the NewCmp should be calculated from the original + // CC by applying these operations in the reverse order. + + switch (CC) { + case ISD::SETNE: // !eq + case ISD::SETLE: // !gt + case ISD::SETGE: // !lt + case ISD::SETULE: // !ugt + case ISD::SETUGE: // !ult + CC = ISD::getSetCCInverse(CC, true); + Negate = true; + break; + default: + break; + } + + switch (CC) { + case ISD::SETLT: // swap gt + case ISD::SETULT: // swap ugt + CC = ISD::getSetCCSwappedOperands(CC); + Swap = true; + break; + default: + break; + } + + assert(CC == ISD::SETEQ || CC == ISD::SETGT || CC == ISD::SETUGT); + + MVT ElemTy = VecTy.getVectorElementType(); + unsigned ElemWidth = ElemTy.getSizeInBits(); + assert(isPowerOf2_32(ElemWidth)); + + auto getIdx = [] (unsigned Code) { + static const unsigned Idx[] = { ISD::SETEQ, ISD::SETGT, ISD::SETUGT }; + for (unsigned I = 0, E = array_lengthof(Idx); I != E; ++I) + if (Code == Idx[I]) + return I; + llvm_unreachable("Unhandled CondCode"); + }; + + static unsigned OpcTable[3][3] = { + // SETEQ SETGT, SETUGT + /* Byte */ { Hexagon::V6_veqb, Hexagon::V6_vgtb, Hexagon::V6_vgtub }, + /* Half */ { Hexagon::V6_veqh, Hexagon::V6_vgth, Hexagon::V6_vgtuh }, + /* Word */ { Hexagon::V6_veqw, Hexagon::V6_vgtw, Hexagon::V6_vgtuw } + }; + + unsigned CmpOpc = OpcTable[Log2_32(ElemWidth)-3][getIdx(CC)]; + + MVT ResTy = ty(Op); + const SDLoc &dl(Op); + SDValue OpL = Swap ? Op.getOperand(1) : Op.getOperand(0); + SDValue OpR = Swap ? Op.getOperand(0) : Op.getOperand(1); + SDValue CmpV = getNode(CmpOpc, dl, ResTy, {OpL, OpR}, DAG); + return Negate ? getNode(Hexagon::V6_pred_not, dl, ResTy, {CmpV}, DAG) + : CmpV; +} diff --git a/lib/Target/Hexagon/HexagonPatterns.td b/lib/Target/Hexagon/HexagonPatterns.td index e6c8ad4deea..8ed9a8d01ce 100644 --- a/lib/Target/Hexagon/HexagonPatterns.td +++ b/lib/Target/Hexagon/HexagonPatterns.td @@ -88,6 +88,10 @@ def V8I8: PatLeaf<(v8i8 DoubleRegs:$R)>; def V4I16: PatLeaf<(v4i16 DoubleRegs:$R)>; def V2I32: PatLeaf<(v2i32 DoubleRegs:$R)>; +def HQ8: PatLeaf<(VecQ8 HvxQR:$R)>; +def HQ16: PatLeaf<(VecQ16 HvxQR:$R)>; +def HQ32: PatLeaf<(VecQ32 HvxQR:$R)>; + def HVI8: PatLeaf<(VecI8 HvxVR:$R)>; def HVI16: PatLeaf<(VecI16 HvxVR:$R)>; def HVI32: PatLeaf<(VecI32 HvxVR:$R)>; @@ -2932,7 +2936,14 @@ let Predicates = [UseHVX] in { def: Pat<(sub HVI16:$Vs, HVI16:$Vt), (V6_vsubh HvxVR:$Vs, HvxVR:$Vt)>; def: Pat<(sub HVI32:$Vs, HVI32:$Vt), (V6_vsubw HvxVR:$Vs, HvxVR:$Vt)>; - def: Pat<(and HVI8:$Vs, HVI8:$Vt), (V6_vand HvxVR:$Vs, HvxVR:$Vt)>; - def: Pat<(or HVI8:$Vs, HVI8:$Vt), (V6_vor HvxVR:$Vs, HvxVR:$Vt)>; - def: Pat<(xor HVI8:$Vs, HVI8:$Vt), (V6_vxor HvxVR:$Vs, HvxVR:$Vt)>; + def: Pat<(and HVI8:$Vs, HVI8:$Vt), (V6_vand HvxVR:$Vs, HvxVR:$Vt)>; + def: Pat<(or HVI8:$Vs, HVI8:$Vt), (V6_vor HvxVR:$Vs, HvxVR:$Vt)>; + def: Pat<(xor HVI8:$Vs, HVI8:$Vt), (V6_vxor HvxVR:$Vs, HvxVR:$Vt)>; + + def: Pat<(vselect HQ8:$Qu, HVI8:$Vs, HVI8:$Vt), + (V6_vmux HvxQR:$Qu, HvxVR:$Vs, HvxVR:$Vt)>; + def: Pat<(vselect HQ16:$Qu, HVI16:$Vs, HVI16:$Vt), + (V6_vmux HvxQR:$Qu, HvxVR:$Vs, HvxVR:$Vt)>; + def: Pat<(vselect HQ32:$Qu, HVI32:$Vs, HVI32:$Vt), + (V6_vmux HvxQR:$Qu, HvxVR:$Vs, HvxVR:$Vt)>; } diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.td b/lib/Target/Hexagon/HexagonRegisterInfo.td index aa8ecb8d314..2ceed70c249 100644 --- a/lib/Target/Hexagon/HexagonRegisterInfo.td +++ b/lib/Target/Hexagon/HexagonRegisterInfo.td @@ -225,6 +225,15 @@ def VecPI16 def VecPI32 : ValueTypeByHwMode<[Hvx64, Hvx64old, Hvx128, Hvx128old, DefaultMode], [v32i32, v32i32, v64i32, v64i32, v32i32]>; +def VecQ8 + : ValueTypeByHwMode<[Hvx64, Hvx64old, Hvx128, Hvx128old, DefaultMode], + [v64i1, v64i1, v128i1, v128i1, v64i1]>; +def VecQ16 + : ValueTypeByHwMode<[Hvx64, Hvx64old, Hvx128, Hvx128old, DefaultMode], + [v32i1, v32i1, v64i1, v64i1, v32i1]>; +def VecQ32 + : ValueTypeByHwMode<[Hvx64, Hvx64old, Hvx128, Hvx128old, DefaultMode], + [v16i1, v16i1, v32i1, v32i1, v16i1]>; // HVX register classes @@ -263,7 +272,8 @@ def HvxWR : RegisterClass<"Hexagon", [VecPI8, VecPI16, VecPI32], 1024, [RegInfo<1024,1024,1024>, RegInfo<2048,2048,2048>, RegInfo<1024,1024,1024>]>; } -def HvxQR : RegisterClass<"Hexagon", [VecI1], 512, (add Q0, Q1, Q2, Q3)> { +def HvxQR : RegisterClass<"Hexagon", [VecI1, VecQ8, VecQ16, VecQ32], 512, + (add Q0, Q1, Q2, Q3)> { let RegInfos = RegInfoByHwMode<[Hvx64, Hvx128, DefaultMode], [RegInfo<512,512,512>, RegInfo<1024,1024,1024>, RegInfo<512,512,512>]>; } diff --git a/test/CodeGen/Hexagon/autohvx/vector-compare-128b.ll b/test/CodeGen/Hexagon/autohvx/vector-compare-128b.ll new file mode 100644 index 00000000000..b4d78d5e50f --- /dev/null +++ b/test/CodeGen/Hexagon/autohvx/vector-compare-128b.ll @@ -0,0 +1,294 @@ +; RUN: llc -march=hexagon < %s | FileCheck %s + +; --- Byte + +; CHECK-LABEL: test_00: +; CHECK: q[[Q000:[0-3]]] = vcmp.eq(v0.b,v1.b) +; CHECK: v0 = vmux(q[[Q000]],v0,v1) +define <128 x i8> @test_00(<128 x i8> %v0, <128 x i8> %v1) #0 { + %t0 = icmp eq <128 x i8> %v0, %v1 + %t1 = select <128 x i1> %t0, <128 x i8> %v0, <128 x i8> %v1 + ret <128 x i8> %t1 +} + +; CHECK-LABEL: test_01: +; CHECK: q[[Q010:[0-3]]] = vcmp.eq(v0.b,v1.b) +; CHECK: q[[Q011:[0-9]]] = not(q[[Q010]]) +; CHECK: v0 = vmux(q[[Q011]],v0,v1) +define <128 x i8> @test_01(<128 x i8> %v0, <128 x i8> %v1) #0 { + %t0 = icmp ne <128 x i8> %v0, %v1 + %t1 = select <128 x i1> %t0, <128 x i8> %v0, <128 x i8> %v1 + ret <128 x i8> %t1 +} + +; CHECK-LABEL: test_02: +; CHECK: q[[Q020:[0-3]]] = vcmp.gt(v1.b,v0.b) +; CHECK: v0 = vmux(q[[Q020]],v0,v1) +define <128 x i8> @test_02(<128 x i8> %v0, <128 x i8> %v1) #0 { + %t0 = icmp slt <128 x i8> %v0, %v1 + %t1 = select <128 x i1> %t0, <128 x i8> %v0, <128 x i8> %v1 + ret <128 x i8> %t1 +} + +; CHECK-LABEL: test_03: +; CHECK: q[[Q030:[0-3]]] = vcmp.gt(v0.b,v1.b) +; CHECK: q[[Q031:[0-9]]] = not(q[[Q030]]) +; CHECK: v0 = vmux(q[[Q031]],v0,v1) +define <128 x i8> @test_03(<128 x i8> %v0, <128 x i8> %v1) #0 { + %t0 = icmp sle <128 x i8> %v0, %v1 + %t1 = select <128 x i1> %t0, <128 x i8> %v0, <128 x i8> %v1 + ret <128 x i8> %t1 +} + +; CHECK-LABEL: test_04: +; CHECK: q[[Q040:[0-3]]] = vcmp.gt(v0.b,v1.b) +; CHECK: v0 = vmux(q[[Q040]],v0,v1) +define <128 x i8> @test_04(<128 x i8> %v0, <128 x i8> %v1) #0 { + %t0 = icmp sgt <128 x i8> %v0, %v1 + %t1 = select <128 x i1> %t0, <128 x i8> %v0, <128 x i8> %v1 + ret <128 x i8> %t1 +} + +; CHECK-LABEL: test_05: +; CHECK: q[[Q050:[0-3]]] = vcmp.gt(v1.b,v0.b) +; CHECK: q[[Q051:[0-9]]] = not(q[[Q050]]) +; CHECK: v0 = vmux(q[[Q051]],v0,v1) +define <128 x i8> @test_05(<128 x i8> %v0, <128 x i8> %v1) #0 { + %t0 = icmp sge <128 x i8> %v0, %v1 + %t1 = select <128 x i1> %t0, <128 x i8> %v0, <128 x i8> %v1 + ret <128 x i8> %t1 +} + +; CHECK-LABEL: test_06: +; CHECK: q[[Q060:[0-3]]] = vcmp.gt(v1.ub,v0.ub) +; CHECK: v0 = vmux(q[[Q060]],v0,v1) +define <128 x i8> @test_06(<128 x i8> %v0, <128 x i8> %v1) #0 { + %t0 = icmp ult <128 x i8> %v0, %v1 + %t1 = select <128 x i1> %t0, <128 x i8> %v0, <128 x i8> %v1 + ret <128 x i8> %t1 +} + +; CHECK-LABEL: test_07: +; CHECK: q[[Q070:[0-3]]] = vcmp.gt(v0.ub,v1.ub) +; CHECK: q[[Q071:[0-9]]] = not(q[[Q070]]) +; CHECK: v0 = vmux(q[[Q071]],v0,v1) +define <128 x i8> @test_07(<128 x i8> %v0, <128 x i8> %v1) #0 { + %t0 = icmp ule <128 x i8> %v0, %v1 + %t1 = select <128 x i1> %t0, <128 x i8> %v0, <128 x i8> %v1 + ret <128 x i8> %t1 +} + +; CHECK-LABEL: test_08: +; CHECK: q[[Q080:[0-3]]] = vcmp.gt(v0.ub,v1.ub) +; CHECK: v0 = vmux(q[[Q080]],v0,v1) +define <128 x i8> @test_08(<128 x i8> %v0, <128 x i8> %v1) #0 { + %t0 = icmp ugt <128 x i8> %v0, %v1 + %t1 = select <128 x i1> %t0, <128 x i8> %v0, <128 x i8> %v1 + ret <128 x i8> %t1 +} + +; CHECK-LABEL: test_09: +; CHECK: q[[Q090:[0-3]]] = vcmp.gt(v1.ub,v0.ub) +; CHECK: q[[Q091:[0-9]]] = not(q[[Q090]]) +; CHECK: v0 = vmux(q[[Q091]],v0,v1) +define <128 x i8> @test_09(<128 x i8> %v0, <128 x i8> %v1) #0 { + %t0 = icmp uge <128 x i8> %v0, %v1 + %t1 = select <128 x i1> %t0, <128 x i8> %v0, <128 x i8> %v1 + ret <128 x i8> %t1 +} + +; --- Half + +; CHECK-LABEL: test_10: +; CHECK: q[[Q100:[0-3]]] = vcmp.eq(v0.h,v1.h) +; CHECK: v0 = vmux(q[[Q100]],v0,v1) +define <64 x i16> @test_10(<64 x i16> %v0, <64 x i16> %v1) #0 { + %t0 = icmp eq <64 x i16> %v0, %v1 + %t1 = select <64 x i1> %t0, <64 x i16> %v0, <64 x i16> %v1 + ret <64 x i16> %t1 +} + +; CHECK-LABEL: test_11: +; CHECK: q[[Q110:[0-3]]] = vcmp.eq(v0.h,v1.h) +; CHECK: q[[Q111:[0-9]]] = not(q[[Q110]]) +; CHECK: v0 = vmux(q[[Q111]],v0,v1) +define <64 x i16> @test_11(<64 x i16> %v0, <64 x i16> %v1) #0 { + %t0 = icmp ne <64 x i16> %v0, %v1 + %t1 = select <64 x i1> %t0, <64 x i16> %v0, <64 x i16> %v1 + ret <64 x i16> %t1 +} + +; CHECK-LABEL: test_12: +; CHECK: q[[Q120:[0-3]]] = vcmp.gt(v1.h,v0.h) +; CHECK: v0 = vmux(q[[Q120]],v0,v1) +define <64 x i16> @test_12(<64 x i16> %v0, <64 x i16> %v1) #0 { + %t0 = icmp slt <64 x i16> %v0, %v1 + %t1 = select <64 x i1> %t0, <64 x i16> %v0, <64 x i16> %v1 + ret <64 x i16> %t1 +} + +; CHECK-LABEL: test_13: +; CHECK: q[[Q130:[0-3]]] = vcmp.gt(v0.h,v1.h) +; CHECK: q[[Q131:[0-9]]] = not(q[[Q130]]) +; CHECK: v0 = vmux(q[[Q031]],v0,v1) +define <64 x i16> @test_13(<64 x i16> %v0, <64 x i16> %v1) #0 { + %t0 = icmp sle <64 x i16> %v0, %v1 + %t1 = select <64 x i1> %t0, <64 x i16> %v0, <64 x i16> %v1 + ret <64 x i16> %t1 +} + +; CHECK-LABEL: test_14: +; CHECK: q[[Q140:[0-3]]] = vcmp.gt(v0.h,v1.h) +; CHECK: v0 = vmux(q[[Q140]],v0,v1) +define <64 x i16> @test_14(<64 x i16> %v0, <64 x i16> %v1) #0 { + %t0 = icmp sgt <64 x i16> %v0, %v1 + %t1 = select <64 x i1> %t0, <64 x i16> %v0, <64 x i16> %v1 + ret <64 x i16> %t1 +} + +; CHECK-LABEL: test_15: +; CHECK: q[[Q150:[0-3]]] = vcmp.gt(v1.h,v0.h) +; CHECK: q[[Q151:[0-9]]] = not(q[[Q150]]) +; CHECK: v0 = vmux(q[[Q151]],v0,v1) +define <64 x i16> @test_15(<64 x i16> %v0, <64 x i16> %v1) #0 { + %t0 = icmp sge <64 x i16> %v0, %v1 + %t1 = select <64 x i1> %t0, <64 x i16> %v0, <64 x i16> %v1 + ret <64 x i16> %t1 +} + +; CHECK-LABEL: test_16: +; CHECK: q[[Q160:[0-3]]] = vcmp.gt(v1.uh,v0.uh) +; CHECK: v0 = vmux(q[[Q160]],v0,v1) +define <64 x i16> @test_16(<64 x i16> %v0, <64 x i16> %v1) #0 { + %t0 = icmp ult <64 x i16> %v0, %v1 + %t1 = select <64 x i1> %t0, <64 x i16> %v0, <64 x i16> %v1 + ret <64 x i16> %t1 +} + +; CHECK-LABEL: test_17: +; CHECK: q[[Q170:[0-3]]] = vcmp.gt(v0.uh,v1.uh) +; CHECK: q[[Q171:[0-9]]] = not(q[[Q170]]) +; CHECK: v0 = vmux(q[[Q171]],v0,v1) +define <64 x i16> @test_17(<64 x i16> %v0, <64 x i16> %v1) #0 { + %t0 = icmp ule <64 x i16> %v0, %v1 + %t1 = select <64 x i1> %t0, <64 x i16> %v0, <64 x i16> %v1 + ret <64 x i16> %t1 +} + +; CHECK-LABEL: test_18: +; CHECK: q[[Q180:[0-3]]] = vcmp.gt(v0.uh,v1.uh) +; CHECK: v0 = vmux(q[[Q180]],v0,v1) +define <64 x i16> @test_18(<64 x i16> %v0, <64 x i16> %v1) #0 { + %t0 = icmp ugt <64 x i16> %v0, %v1 + %t1 = select <64 x i1> %t0, <64 x i16> %v0, <64 x i16> %v1 + ret <64 x i16> %t1 +} + +; CHECK-LABEL: test_19: +; CHECK: q[[Q190:[0-3]]] = vcmp.gt(v1.uh,v0.uh) +; CHECK: q[[Q191:[0-9]]] = not(q[[Q190]]) +; CHECK: v0 = vmux(q[[Q191]],v0,v1) +define <64 x i16> @test_19(<64 x i16> %v0, <64 x i16> %v1) #0 { + %t0 = icmp uge <64 x i16> %v0, %v1 + %t1 = select <64 x i1> %t0, <64 x i16> %v0, <64 x i16> %v1 + ret <64 x i16> %t1 +} + +; --- Word + +; CHECK-LABEL: test_20: +; CHECK: q[[Q200:[0-3]]] = vcmp.eq(v0.w,v1.w) +; CHECK: v0 = vmux(q[[Q200]],v0,v1) +define <32 x i32> @test_20(<32 x i32> %v0, <32 x i32> %v1) #0 { + %t0 = icmp eq <32 x i32> %v0, %v1 + %t1 = select <32 x i1> %t0, <32 x i32> %v0, <32 x i32> %v1 + ret <32 x i32> %t1 +} + +; CHECK-LABEL: test_21: +; CHECK: q[[Q210:[0-3]]] = vcmp.eq(v0.w,v1.w) +; CHECK: q[[Q211:[0-9]]] = not(q[[Q210]]) +; CHECK: v0 = vmux(q[[Q211]],v0,v1) +define <32 x i32> @test_21(<32 x i32> %v0, <32 x i32> %v1) #0 { + %t0 = icmp ne <32 x i32> %v0, %v1 + %t1 = select <32 x i1> %t0, <32 x i32> %v0, <32 x i32> %v1 + ret <32 x i32> %t1 +} + +; CHECK-LABEL: test_22: +; CHECK: q[[Q220:[0-3]]] = vcmp.gt(v1.w,v0.w) +; CHECK: v0 = vmux(q[[Q220]],v0,v1) +define <32 x i32> @test_22(<32 x i32> %v0, <32 x i32> %v1) #0 { + %t0 = icmp slt <32 x i32> %v0, %v1 + %t1 = select <32 x i1> %t0, <32 x i32> %v0, <32 x i32> %v1 + ret <32 x i32> %t1 +} + +; CHECK-LABEL: test_23: +; CHECK: q[[Q230:[0-3]]] = vcmp.gt(v0.w,v1.w) +; CHECK: q[[Q231:[0-9]]] = not(q[[Q230]]) +; CHECK: v0 = vmux(q[[Q031]],v0,v1) +define <32 x i32> @test_23(<32 x i32> %v0, <32 x i32> %v1) #0 { + %t0 = icmp sle <32 x i32> %v0, %v1 + %t1 = select <32 x i1> %t0, <32 x i32> %v0, <32 x i32> %v1 + ret <32 x i32> %t1 +} + +; CHECK-LABEL: test_24: +; CHECK: q[[Q240:[0-3]]] = vcmp.gt(v0.w,v1.w) +; CHECK: v0 = vmux(q[[Q240]],v0,v1) +define <32 x i32> @test_24(<32 x i32> %v0, <32 x i32> %v1) #0 { + %t0 = icmp sgt <32 x i32> %v0, %v1 + %t1 = select <32 x i1> %t0, <32 x i32> %v0, <32 x i32> %v1 + ret <32 x i32> %t1 +} + +; CHECK-LABEL: test_25: +; CHECK: q[[Q250:[0-3]]] = vcmp.gt(v1.w,v0.w) +; CHECK: q[[Q251:[0-9]]] = not(q[[Q250]]) +; CHECK: v0 = vmux(q[[Q251]],v0,v1) +define <32 x i32> @test_25(<32 x i32> %v0, <32 x i32> %v1) #0 { + %t0 = icmp sge <32 x i32> %v0, %v1 + %t1 = select <32 x i1> %t0, <32 x i32> %v0, <32 x i32> %v1 + ret <32 x i32> %t1 +} + +; CHECK-LABEL: test_26: +; CHECK: q[[Q260:[0-3]]] = vcmp.gt(v1.uw,v0.uw) +; CHECK: v0 = vmux(q[[Q260]],v0,v1) +define <32 x i32> @test_26(<32 x i32> %v0, <32 x i32> %v1) #0 { + %t0 = icmp ult <32 x i32> %v0, %v1 + %t1 = select <32 x i1> %t0, <32 x i32> %v0, <32 x i32> %v1 + ret <32 x i32> %t1 +} + +; CHECK-LABEL: test_27: +; CHECK: q[[Q270:[0-3]]] = vcmp.gt(v0.uw,v1.uw) +; CHECK: q[[Q271:[0-9]]] = not(q[[Q270]]) +; CHECK: v0 = vmux(q[[Q271]],v0,v1) +define <32 x i32> @test_27(<32 x i32> %v0, <32 x i32> %v1) #0 { + %t0 = icmp ule <32 x i32> %v0, %v1 + %t1 = select <32 x i1> %t0, <32 x i32> %v0, <32 x i32> %v1 + ret <32 x i32> %t1 +} + +; CHECK-LABEL: test_28: +; CHECK: q[[Q280:[0-3]]] = vcmp.gt(v0.uw,v1.uw) +; CHECK: v0 = vmux(q[[Q280]],v0,v1) +define <32 x i32> @test_28(<32 x i32> %v0, <32 x i32> %v1) #0 { + %t0 = icmp ugt <32 x i32> %v0, %v1 + %t1 = select <32 x i1> %t0, <32 x i32> %v0, <32 x i32> %v1 + ret <32 x i32> %t1 +} + +; CHECK-LABEL: test_29: +; CHECK: q[[Q290:[0-3]]] = vcmp.gt(v1.uw,v0.uw) +; CHECK: q[[Q291:[0-9]]] = not(q[[Q290]]) +; CHECK: v0 = vmux(q[[Q291]],v0,v1) +define <32 x i32> @test_29(<32 x i32> %v0, <32 x i32> %v1) #0 { + %t0 = icmp uge <32 x i32> %v0, %v1 + %t1 = select <32 x i1> %t0, <32 x i32> %v0, <32 x i32> %v1 + ret <32 x i32> %t1 +} + +attributes #0 = { nounwind readnone "target-cpu"="hexagonv60" "target-features"="+hvx,+hvx-length128b" } diff --git a/test/CodeGen/Hexagon/autohvx/vector-compare-64b.ll b/test/CodeGen/Hexagon/autohvx/vector-compare-64b.ll new file mode 100644 index 00000000000..4c0e19791d4 --- /dev/null +++ b/test/CodeGen/Hexagon/autohvx/vector-compare-64b.ll @@ -0,0 +1,294 @@ +; RUN: llc -march=hexagon < %s | FileCheck %s + +; --- Byte + +; CHECK-LABEL: test_00: +; CHECK: q[[Q000:[0-3]]] = vcmp.eq(v0.b,v1.b) +; CHECK: v0 = vmux(q[[Q000]],v0,v1) +define <64 x i8> @test_00(<64 x i8> %v0, <64 x i8> %v1) #0 { + %t0 = icmp eq <64 x i8> %v0, %v1 + %t1 = select <64 x i1> %t0, <64 x i8> %v0, <64 x i8> %v1 + ret <64 x i8> %t1 +} + +; CHECK-LABEL: test_01: +; CHECK: q[[Q010:[0-3]]] = vcmp.eq(v0.b,v1.b) +; CHECK: q[[Q011:[0-9]]] = not(q[[Q010]]) +; CHECK: v0 = vmux(q[[Q011]],v0,v1) +define <64 x i8> @test_01(<64 x i8> %v0, <64 x i8> %v1) #0 { + %t0 = icmp ne <64 x i8> %v0, %v1 + %t1 = select <64 x i1> %t0, <64 x i8> %v0, <64 x i8> %v1 + ret <64 x i8> %t1 +} + +; CHECK-LABEL: test_02: +; CHECK: q[[Q020:[0-3]]] = vcmp.gt(v1.b,v0.b) +; CHECK: v0 = vmux(q[[Q020]],v0,v1) +define <64 x i8> @test_02(<64 x i8> %v0, <64 x i8> %v1) #0 { + %t0 = icmp slt <64 x i8> %v0, %v1 + %t1 = select <64 x i1> %t0, <64 x i8> %v0, <64 x i8> %v1 + ret <64 x i8> %t1 +} + +; CHECK-LABEL: test_03: +; CHECK: q[[Q030:[0-3]]] = vcmp.gt(v0.b,v1.b) +; CHECK: q[[Q031:[0-9]]] = not(q[[Q030]]) +; CHECK: v0 = vmux(q[[Q031]],v0,v1) +define <64 x i8> @test_03(<64 x i8> %v0, <64 x i8> %v1) #0 { + %t0 = icmp sle <64 x i8> %v0, %v1 + %t1 = select <64 x i1> %t0, <64 x i8> %v0, <64 x i8> %v1 + ret <64 x i8> %t1 +} + +; CHECK-LABEL: test_04: +; CHECK: q[[Q040:[0-3]]] = vcmp.gt(v0.b,v1.b) +; CHECK: v0 = vmux(q[[Q040]],v0,v1) +define <64 x i8> @test_04(<64 x i8> %v0, <64 x i8> %v1) #0 { + %t0 = icmp sgt <64 x i8> %v0, %v1 + %t1 = select <64 x i1> %t0, <64 x i8> %v0, <64 x i8> %v1 + ret <64 x i8> %t1 +} + +; CHECK-LABEL: test_05: +; CHECK: q[[Q050:[0-3]]] = vcmp.gt(v1.b,v0.b) +; CHECK: q[[Q051:[0-9]]] = not(q[[Q050]]) +; CHECK: v0 = vmux(q[[Q051]],v0,v1) +define <64 x i8> @test_05(<64 x i8> %v0, <64 x i8> %v1) #0 { + %t0 = icmp sge <64 x i8> %v0, %v1 + %t1 = select <64 x i1> %t0, <64 x i8> %v0, <64 x i8> %v1 + ret <64 x i8> %t1 +} + +; CHECK-LABEL: test_06: +; CHECK: q[[Q060:[0-3]]] = vcmp.gt(v1.ub,v0.ub) +; CHECK: v0 = vmux(q[[Q060]],v0,v1) +define <64 x i8> @test_06(<64 x i8> %v0, <64 x i8> %v1) #0 { + %t0 = icmp ult <64 x i8> %v0, %v1 + %t1 = select <64 x i1> %t0, <64 x i8> %v0, <64 x i8> %v1 + ret <64 x i8> %t1 +} + +; CHECK-LABEL: test_07: +; CHECK: q[[Q070:[0-3]]] = vcmp.gt(v0.ub,v1.ub) +; CHECK: q[[Q071:[0-9]]] = not(q[[Q070]]) +; CHECK: v0 = vmux(q[[Q071]],v0,v1) +define <64 x i8> @test_07(<64 x i8> %v0, <64 x i8> %v1) #0 { + %t0 = icmp ule <64 x i8> %v0, %v1 + %t1 = select <64 x i1> %t0, <64 x i8> %v0, <64 x i8> %v1 + ret <64 x i8> %t1 +} + +; CHECK-LABEL: test_08: +; CHECK: q[[Q080:[0-3]]] = vcmp.gt(v0.ub,v1.ub) +; CHECK: v0 = vmux(q[[Q080]],v0,v1) +define <64 x i8> @test_08(<64 x i8> %v0, <64 x i8> %v1) #0 { + %t0 = icmp ugt <64 x i8> %v0, %v1 + %t1 = select <64 x i1> %t0, <64 x i8> %v0, <64 x i8> %v1 + ret <64 x i8> %t1 +} + +; CHECK-LABEL: test_09: +; CHECK: q[[Q090:[0-3]]] = vcmp.gt(v1.ub,v0.ub) +; CHECK: q[[Q091:[0-9]]] = not(q[[Q090]]) +; CHECK: v0 = vmux(q[[Q091]],v0,v1) +define <64 x i8> @test_09(<64 x i8> %v0, <64 x i8> %v1) #0 { + %t0 = icmp uge <64 x i8> %v0, %v1 + %t1 = select <64 x i1> %t0, <64 x i8> %v0, <64 x i8> %v1 + ret <64 x i8> %t1 +} + +; --- Half + +; CHECK-LABEL: test_10: +; CHECK: q[[Q100:[0-3]]] = vcmp.eq(v0.h,v1.h) +; CHECK: v0 = vmux(q[[Q100]],v0,v1) +define <32 x i16> @test_10(<32 x i16> %v0, <32 x i16> %v1) #0 { + %t0 = icmp eq <32 x i16> %v0, %v1 + %t1 = select <32 x i1> %t0, <32 x i16> %v0, <32 x i16> %v1 + ret <32 x i16> %t1 +} + +; CHECK-LABEL: test_11: +; CHECK: q[[Q110:[0-3]]] = vcmp.eq(v0.h,v1.h) +; CHECK: q[[Q111:[0-9]]] = not(q[[Q110]]) +; CHECK: v0 = vmux(q[[Q111]],v0,v1) +define <32 x i16> @test_11(<32 x i16> %v0, <32 x i16> %v1) #0 { + %t0 = icmp ne <32 x i16> %v0, %v1 + %t1 = select <32 x i1> %t0, <32 x i16> %v0, <32 x i16> %v1 + ret <32 x i16> %t1 +} + +; CHECK-LABEL: test_12: +; CHECK: q[[Q120:[0-3]]] = vcmp.gt(v1.h,v0.h) +; CHECK: v0 = vmux(q[[Q120]],v0,v1) +define <32 x i16> @test_12(<32 x i16> %v0, <32 x i16> %v1) #0 { + %t0 = icmp slt <32 x i16> %v0, %v1 + %t1 = select <32 x i1> %t0, <32 x i16> %v0, <32 x i16> %v1 + ret <32 x i16> %t1 +} + +; CHECK-LABEL: test_13: +; CHECK: q[[Q130:[0-3]]] = vcmp.gt(v0.h,v1.h) +; CHECK: q[[Q131:[0-9]]] = not(q[[Q130]]) +; CHECK: v0 = vmux(q[[Q031]],v0,v1) +define <32 x i16> @test_13(<32 x i16> %v0, <32 x i16> %v1) #0 { + %t0 = icmp sle <32 x i16> %v0, %v1 + %t1 = select <32 x i1> %t0, <32 x i16> %v0, <32 x i16> %v1 + ret <32 x i16> %t1 +} + +; CHECK-LABEL: test_14: +; CHECK: q[[Q140:[0-3]]] = vcmp.gt(v0.h,v1.h) +; CHECK: v0 = vmux(q[[Q140]],v0,v1) +define <32 x i16> @test_14(<32 x i16> %v0, <32 x i16> %v1) #0 { + %t0 = icmp sgt <32 x i16> %v0, %v1 + %t1 = select <32 x i1> %t0, <32 x i16> %v0, <32 x i16> %v1 + ret <32 x i16> %t1 +} + +; CHECK-LABEL: test_15: +; CHECK: q[[Q150:[0-3]]] = vcmp.gt(v1.h,v0.h) +; CHECK: q[[Q151:[0-9]]] = not(q[[Q150]]) +; CHECK: v0 = vmux(q[[Q151]],v0,v1) +define <32 x i16> @test_15(<32 x i16> %v0, <32 x i16> %v1) #0 { + %t0 = icmp sge <32 x i16> %v0, %v1 + %t1 = select <32 x i1> %t0, <32 x i16> %v0, <32 x i16> %v1 + ret <32 x i16> %t1 +} + +; CHECK-LABEL: test_16: +; CHECK: q[[Q160:[0-3]]] = vcmp.gt(v1.uh,v0.uh) +; CHECK: v0 = vmux(q[[Q160]],v0,v1) +define <32 x i16> @test_16(<32 x i16> %v0, <32 x i16> %v1) #0 { + %t0 = icmp ult <32 x i16> %v0, %v1 + %t1 = select <32 x i1> %t0, <32 x i16> %v0, <32 x i16> %v1 + ret <32 x i16> %t1 +} + +; CHECK-LABEL: test_17: +; CHECK: q[[Q170:[0-3]]] = vcmp.gt(v0.uh,v1.uh) +; CHECK: q[[Q171:[0-9]]] = not(q[[Q170]]) +; CHECK: v0 = vmux(q[[Q171]],v0,v1) +define <32 x i16> @test_17(<32 x i16> %v0, <32 x i16> %v1) #0 { + %t0 = icmp ule <32 x i16> %v0, %v1 + %t1 = select <32 x i1> %t0, <32 x i16> %v0, <32 x i16> %v1 + ret <32 x i16> %t1 +} + +; CHECK-LABEL: test_18: +; CHECK: q[[Q180:[0-3]]] = vcmp.gt(v0.uh,v1.uh) +; CHECK: v0 = vmux(q[[Q180]],v0,v1) +define <32 x i16> @test_18(<32 x i16> %v0, <32 x i16> %v1) #0 { + %t0 = icmp ugt <32 x i16> %v0, %v1 + %t1 = select <32 x i1> %t0, <32 x i16> %v0, <32 x i16> %v1 + ret <32 x i16> %t1 +} + +; CHECK-LABEL: test_19: +; CHECK: q[[Q190:[0-3]]] = vcmp.gt(v1.uh,v0.uh) +; CHECK: q[[Q191:[0-9]]] = not(q[[Q190]]) +; CHECK: v0 = vmux(q[[Q191]],v0,v1) +define <32 x i16> @test_19(<32 x i16> %v0, <32 x i16> %v1) #0 { + %t0 = icmp uge <32 x i16> %v0, %v1 + %t1 = select <32 x i1> %t0, <32 x i16> %v0, <32 x i16> %v1 + ret <32 x i16> %t1 +} + +; --- Word + +; CHECK-LABEL: test_20: +; CHECK: q[[Q200:[0-3]]] = vcmp.eq(v0.w,v1.w) +; CHECK: v0 = vmux(q[[Q200]],v0,v1) +define <16 x i32> @test_20(<16 x i32> %v0, <16 x i32> %v1) #0 { + %t0 = icmp eq <16 x i32> %v0, %v1 + %t1 = select <16 x i1> %t0, <16 x i32> %v0, <16 x i32> %v1 + ret <16 x i32> %t1 +} + +; CHECK-LABEL: test_21: +; CHECK: q[[Q210:[0-3]]] = vcmp.eq(v0.w,v1.w) +; CHECK: q[[Q211:[0-9]]] = not(q[[Q210]]) +; CHECK: v0 = vmux(q[[Q211]],v0,v1) +define <16 x i32> @test_21(<16 x i32> %v0, <16 x i32> %v1) #0 { + %t0 = icmp ne <16 x i32> %v0, %v1 + %t1 = select <16 x i1> %t0, <16 x i32> %v0, <16 x i32> %v1 + ret <16 x i32> %t1 +} + +; CHECK-LABEL: test_22: +; CHECK: q[[Q220:[0-3]]] = vcmp.gt(v1.w,v0.w) +; CHECK: v0 = vmux(q[[Q220]],v0,v1) +define <16 x i32> @test_22(<16 x i32> %v0, <16 x i32> %v1) #0 { + %t0 = icmp slt <16 x i32> %v0, %v1 + %t1 = select <16 x i1> %t0, <16 x i32> %v0, <16 x i32> %v1 + ret <16 x i32> %t1 +} + +; CHECK-LABEL: test_23: +; CHECK: q[[Q230:[0-3]]] = vcmp.gt(v0.w,v1.w) +; CHECK: q[[Q231:[0-9]]] = not(q[[Q230]]) +; CHECK: v0 = vmux(q[[Q031]],v0,v1) +define <16 x i32> @test_23(<16 x i32> %v0, <16 x i32> %v1) #0 { + %t0 = icmp sle <16 x i32> %v0, %v1 + %t1 = select <16 x i1> %t0, <16 x i32> %v0, <16 x i32> %v1 + ret <16 x i32> %t1 +} + +; CHECK-LABEL: test_24: +; CHECK: q[[Q240:[0-3]]] = vcmp.gt(v0.w,v1.w) +; CHECK: v0 = vmux(q[[Q240]],v0,v1) +define <16 x i32> @test_24(<16 x i32> %v0, <16 x i32> %v1) #0 { + %t0 = icmp sgt <16 x i32> %v0, %v1 + %t1 = select <16 x i1> %t0, <16 x i32> %v0, <16 x i32> %v1 + ret <16 x i32> %t1 +} + +; CHECK-LABEL: test_25: +; CHECK: q[[Q250:[0-3]]] = vcmp.gt(v1.w,v0.w) +; CHECK: q[[Q251:[0-9]]] = not(q[[Q250]]) +; CHECK: v0 = vmux(q[[Q251]],v0,v1) +define <16 x i32> @test_25(<16 x i32> %v0, <16 x i32> %v1) #0 { + %t0 = icmp sge <16 x i32> %v0, %v1 + %t1 = select <16 x i1> %t0, <16 x i32> %v0, <16 x i32> %v1 + ret <16 x i32> %t1 +} + +; CHECK-LABEL: test_26: +; CHECK: q[[Q260:[0-3]]] = vcmp.gt(v1.uw,v0.uw) +; CHECK: v0 = vmux(q[[Q260]],v0,v1) +define <16 x i32> @test_26(<16 x i32> %v0, <16 x i32> %v1) #0 { + %t0 = icmp ult <16 x i32> %v0, %v1 + %t1 = select <16 x i1> %t0, <16 x i32> %v0, <16 x i32> %v1 + ret <16 x i32> %t1 +} + +; CHECK-LABEL: test_27: +; CHECK: q[[Q270:[0-3]]] = vcmp.gt(v0.uw,v1.uw) +; CHECK: q[[Q271:[0-9]]] = not(q[[Q270]]) +; CHECK: v0 = vmux(q[[Q271]],v0,v1) +define <16 x i32> @test_27(<16 x i32> %v0, <16 x i32> %v1) #0 { + %t0 = icmp ule <16 x i32> %v0, %v1 + %t1 = select <16 x i1> %t0, <16 x i32> %v0, <16 x i32> %v1 + ret <16 x i32> %t1 +} + +; CHECK-LABEL: test_28: +; CHECK: q[[Q280:[0-3]]] = vcmp.gt(v0.uw,v1.uw) +; CHECK: v0 = vmux(q[[Q280]],v0,v1) +define <16 x i32> @test_28(<16 x i32> %v0, <16 x i32> %v1) #0 { + %t0 = icmp ugt <16 x i32> %v0, %v1 + %t1 = select <16 x i1> %t0, <16 x i32> %v0, <16 x i32> %v1 + ret <16 x i32> %t1 +} + +; CHECK-LABEL: test_29: +; CHECK: q[[Q290:[0-3]]] = vcmp.gt(v1.uw,v0.uw) +; CHECK: q[[Q291:[0-9]]] = not(q[[Q290]]) +; CHECK: v0 = vmux(q[[Q291]],v0,v1) +define <16 x i32> @test_29(<16 x i32> %v0, <16 x i32> %v1) #0 { + %t0 = icmp uge <16 x i32> %v0, %v1 + %t1 = select <16 x i1> %t0, <16 x i32> %v0, <16 x i32> %v1 + ret <16 x i32> %t1 +} + +attributes #0 = { nounwind readnone "target-cpu"="hexagonv60" "target-features"="+hvx,+hvx-length64b" }