From 81f6df5c35734845b0b84e7b6c8ab79b2676ee4f Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 15 Jun 2017 14:39:34 +0000 Subject: [PATCH] Revert r305465: [X86][AVX512] Improve lowering of AVX512 compare intrinsics (remove redundant shift left+right instructions). This is causing windows buildbot failures git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@305470 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 95 - lib/Target/X86/X86InstrAVX512.td | 645 +- test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll | 32 + test/CodeGen/X86/avx512vl-vec-masked-cmp.ll | 13484 ---------------------- test/CodeGen/X86/compress_expand.ll | 8 +- test/CodeGen/X86/masked_memop.ll | 16 +- 6 files changed, 75 insertions(+), 14205 deletions(-) delete mode 100644 test/CodeGen/X86/avx512vl-vec-masked-cmp.ll diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index cfdbbc3ee32..29b438e9bff 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -5059,20 +5059,6 @@ static SDValue insert256BitVector(SDValue Result, SDValue Vec, unsigned IdxVal, return insertSubVector(Result, Vec, IdxVal, DAG, dl, 256); } -// Return true if the instruction zeroes the unused upper part of the -// destination and accepts mask. -static bool isMaskedZeroUpperBitsvXi1(unsigned int Opcode) { - switch (Opcode) { - default: - return false; - case X86ISD::PCMPEQM: - case X86ISD::PCMPGTM: - case X86ISD::CMPM: - case X86ISD::CMPMU: - return true; - } -} - /// Insert i1-subvector to i1-vector. static SDValue insert1BitVector(SDValue Op, SelectionDAG &DAG, const X86Subtarget &Subtarget) { @@ -5105,22 +5091,6 @@ static SDValue insert1BitVector(SDValue Op, SelectionDAG &DAG, // 3. Subvector should be inserted in the middle (for example v2i1 // to v16i1, index 2) - // If this node widens - by concatenating zeroes - the type of the result - // of a node with instruction that zeroes all upper (irrelevant) bits of the - // output register, mark this node as legal to enable replacing them with - // the v8i1 version of the previous instruction during instruction selection. - // For example, VPCMPEQDZ128rr instruction stores its v4i1 result in a k-reg, - // while zeroing all the upper remaining 60 bits of the register. if the - // result of such instruction is inserted into an allZeroVector, then we can - // safely remove insert_vector (in instruction selection) as the cmp instr - // already zeroed the rest of the register. - if (ISD::isBuildVectorAllZeros(Vec.getNode()) && IdxVal == 0 && - (isMaskedZeroUpperBitsvXi1(SubVec.getOpcode()) || - (SubVec.getOpcode() == ISD::AND && - (isMaskedZeroUpperBitsvXi1(SubVec.getOperand(0).getOpcode()) || - isMaskedZeroUpperBitsvXi1(SubVec.getOperand(1).getOpcode()))))) - return Op; - // extend to natively supported kshift MVT MinVT = Subtarget.hasDQI() ? MVT::v8i1 : MVT::v16i1; MVT WideOpVT = OpVT; @@ -7943,60 +7913,6 @@ static SDValue LowerAVXCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) { return concat256BitVectors(V1, V2, ResVT, NumElems, DAG, dl); } -// Return true if all the operands of the given CONCAT_VECTORS node are zeros -// except for the first one. (CONCAT_VECTORS Op, 0, 0,...,0) -static bool isExpandWithZeros(const SDValue &Op) { - assert(Op.getOpcode() == ISD::CONCAT_VECTORS && - "Expand with zeros only possible in CONCAT_VECTORS nodes!"); - - for (unsigned i = 1; i < Op.getNumOperands(); i++) - if (!ISD::isBuildVectorAllZeros(Op.getOperand(i).getNode())) - return false; - - return true; -} - -// Returns true if the given node is a type promotion (by concatenating i1 -// zeros) of the result of a node that already zeros all upper bits of -// k-register. -static SDValue isTypePromotionOfi1ZeroUpBits(SDValue Op) { - unsigned Opc = Op.getOpcode(); - - assert(Opc == ISD::CONCAT_VECTORS && - Op.getSimpleValueType().getVectorElementType() == MVT::i1 && - "Unexpected node to check for type promotion!"); - - // As long as we are concatenating zeros to the upper part of a previous node - // result, climb up the tree until a node with different opcode is - // encountered - while (Opc == ISD::INSERT_SUBVECTOR || Opc == ISD::CONCAT_VECTORS) { - if (Opc == ISD::INSERT_SUBVECTOR) { - if (ISD::isBuildVectorAllZeros(Op.getOperand(0).getNode()) && - Op.getConstantOperandVal(2) == 0) - Op = Op.getOperand(1); - else - return SDValue(); - } else { // Opc == ISD::CONCAT_VECTORS - if (isExpandWithZeros(Op)) - Op = Op.getOperand(0); - else - return SDValue(); - } - Opc = Op.getOpcode(); - } - - // Check if the first inserted node zeroes the upper bits, or an 'and' result - // of a node that zeros the upper bits (its masked version). - if (isMaskedZeroUpperBitsvXi1(Op.getOpcode()) || - (Op.getOpcode() == ISD::AND && - (isMaskedZeroUpperBitsvXi1(Op.getOperand(0).getOpcode()) || - isMaskedZeroUpperBitsvXi1(Op.getOperand(1).getOpcode())))) { - return Op; - } - - return SDValue(); -} - static SDValue LowerCONCAT_VECTORSvXi1(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG & DAG) { @@ -8007,17 +7923,6 @@ static SDValue LowerCONCAT_VECTORSvXi1(SDValue Op, assert(isPowerOf2_32(NumOfOperands) && "Unexpected number of operands in CONCAT_VECTORS"); - // If this node promotes - by concatenating zeroes - the type of the result - // of a node with instruction that zeroes all upper (irrelevant) bits of the - // output register, mark it as legal and catch the pattern in instruction - // selection to avoid emitting extra insturctions (for zeroing upper bits). - if (SDValue Promoted = isTypePromotionOfi1ZeroUpBits(Op)) { - SDValue ZeroC = DAG.getConstant(0, dl, MVT::i64); - SDValue AllZeros = DAG.getSplatBuildVector(ResVT, dl, ZeroC); - return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, AllZeros, Promoted, - ZeroC); - } - SDValue Undef = DAG.getUNDEF(ResVT); if (NumOfOperands > 2) { // Specialize the cases when all, or all but one, of the operands are undef. diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 6fa9fdc73f8..2620679df25 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -185,20 +185,6 @@ def avx512vl_f32_info : AVX512VLVectorVTInfo; -class X86KVectorVTInfo { - RegisterClass KRC = _krc; - RegisterClass KRCWM = _krcwm; - ValueType KVT = _vt; -} - -def v2i1_info : X86KVectorVTInfo; -def v4i1_info : X86KVectorVTInfo; -def v8i1_info : X86KVectorVTInfo; -def v16i1_info : X86KVectorVTInfo; -def v32i1_info : X86KVectorVTInfo; -def v64i1_info : X86KVectorVTInfo; - // This multiclass generates the masking variants from the non-masking // variant. It only provides the assembly pieces for the masking variants. // It assumes custom ISel patterns for masking which can be provided as @@ -1749,217 +1735,17 @@ defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq", X86pcmpgtm, avx512vl_i64_info, HasAVX512>, T8PD, VEX_W, EVEX_CD8<64, CD8VF>; +let Predicates = [HasAVX512, NoVLX] in { +def : Pat<(v8i1 (X86pcmpgtm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))), + (COPY_TO_REGCLASS (VPCMPGTDZrr + (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), + (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm))), VK8)>; -multiclass avx512_icmp_packed_lowering Preds> { -let Predicates = Preds in { - def : Pat<(insert_subvector (NewInf.KVT immAllZerosV), - (_.KVT (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))), - (i64 0)), - (COPY_TO_REGCLASS (!cast(InstrStr##rr) _.RC:$src1, _.RC:$src2), - NewInf.KRC)>; - - def : Pat<(insert_subvector (NewInf.KVT immAllZerosV), - (_.KVT (OpNode (_.VT _.RC:$src1), - (_.VT (bitconvert (_.LdFrag addr:$src2))))), - (i64 0)), - (COPY_TO_REGCLASS (!cast(InstrStr##rm) _.RC:$src1, addr:$src2), - NewInf.KRC)>; - - def : Pat<(insert_subvector (NewInf.KVT immAllZerosV), - (_.KVT (and _.KRCWM:$mask, - (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2)))), - (i64 0)), - (COPY_TO_REGCLASS (!cast(InstrStr##rrk) _.KRCWM:$mask, - _.RC:$src1, _.RC:$src2), - NewInf.KRC)>; - - def : Pat<(insert_subvector (NewInf.KVT immAllZerosV), - (_.KVT (and (_.KVT _.KRCWM:$mask), - (_.KVT (OpNode (_.VT _.RC:$src1), - (_.VT (bitconvert - (_.LdFrag addr:$src2))))))), - (i64 0)), - (COPY_TO_REGCLASS (!cast(InstrStr##rmk) _.KRCWM:$mask, - _.RC:$src1, addr:$src2), - NewInf.KRC)>; -} -} - -multiclass avx512_icmp_packed_rmb_lowering Preds> - : avx512_icmp_packed_lowering<_, NewInf, OpNode, InstrStr, Preds> { -let Predicates = Preds in { - def : Pat<(insert_subvector (NewInf.KVT immAllZerosV), - (_.KVT (OpNode (_.VT _.RC:$src1), - (X86VBroadcast (_.ScalarLdFrag addr:$src2)))), - (i64 0)), - (COPY_TO_REGCLASS (!cast(InstrStr##rmb) _.RC:$src1, addr:$src2), - NewInf.KRC)>; - - def : Pat<(insert_subvector (NewInf.KVT immAllZerosV), - (_.KVT (and (_.KVT _.KRCWM:$mask), - (_.KVT (OpNode (_.VT _.RC:$src1), - (X86VBroadcast - (_.ScalarLdFrag addr:$src2)))))), - (i64 0)), - (COPY_TO_REGCLASS (!cast(InstrStr##rmbk) _.KRCWM:$mask, - _.RC:$src1, addr:$src2), - NewInf.KRC)>; -} -} - -// VPCMPEQB - i8 -defm : avx512_icmp_packed_lowering; -defm : avx512_icmp_packed_lowering; - -defm : avx512_icmp_packed_lowering; - -// VPCMPEQW - i16 -defm : avx512_icmp_packed_lowering; -defm : avx512_icmp_packed_lowering; -defm : avx512_icmp_packed_lowering; - -defm : avx512_icmp_packed_lowering; -defm : avx512_icmp_packed_lowering; - -defm : avx512_icmp_packed_lowering; - -// VPCMPEQD - i32 -defm : avx512_icmp_packed_rmb_lowering; -defm : avx512_icmp_packed_rmb_lowering; -defm : avx512_icmp_packed_rmb_lowering; -defm : avx512_icmp_packed_rmb_lowering; - -defm : avx512_icmp_packed_rmb_lowering; -defm : avx512_icmp_packed_rmb_lowering; -defm : avx512_icmp_packed_rmb_lowering; - -defm : avx512_icmp_packed_rmb_lowering; -defm : avx512_icmp_packed_rmb_lowering; - -// VPCMPEQQ - i64 -defm : avx512_icmp_packed_rmb_lowering; -defm : avx512_icmp_packed_rmb_lowering; -defm : avx512_icmp_packed_rmb_lowering; -defm : avx512_icmp_packed_rmb_lowering; -defm : avx512_icmp_packed_rmb_lowering; - -defm : avx512_icmp_packed_rmb_lowering; -defm : avx512_icmp_packed_rmb_lowering; -defm : avx512_icmp_packed_rmb_lowering; -defm : avx512_icmp_packed_rmb_lowering; - -defm : avx512_icmp_packed_rmb_lowering; -defm : avx512_icmp_packed_rmb_lowering; -defm : avx512_icmp_packed_rmb_lowering; - -// VPCMPGTB - i8 -defm : avx512_icmp_packed_lowering; -defm : avx512_icmp_packed_lowering; - -defm : avx512_icmp_packed_lowering; - -// VPCMPGTW - i16 -defm : avx512_icmp_packed_lowering; -defm : avx512_icmp_packed_lowering; -defm : avx512_icmp_packed_lowering; - -defm : avx512_icmp_packed_lowering; -defm : avx512_icmp_packed_lowering; - -defm : avx512_icmp_packed_lowering; - -// VPCMPGTD - i32 -defm : avx512_icmp_packed_rmb_lowering; -defm : avx512_icmp_packed_rmb_lowering; -defm : avx512_icmp_packed_rmb_lowering; -defm : avx512_icmp_packed_rmb_lowering; - -defm : avx512_icmp_packed_rmb_lowering; -defm : avx512_icmp_packed_rmb_lowering; -defm : avx512_icmp_packed_rmb_lowering; - -defm : avx512_icmp_packed_rmb_lowering; -defm : avx512_icmp_packed_rmb_lowering; - -// VPCMPGTQ - i64 -defm : avx512_icmp_packed_rmb_lowering; -defm : avx512_icmp_packed_rmb_lowering; -defm : avx512_icmp_packed_rmb_lowering; -defm : avx512_icmp_packed_rmb_lowering; -defm : avx512_icmp_packed_rmb_lowering; - -defm : avx512_icmp_packed_rmb_lowering; -defm : avx512_icmp_packed_rmb_lowering; -defm : avx512_icmp_packed_rmb_lowering; -defm : avx512_icmp_packed_rmb_lowering; - -defm : avx512_icmp_packed_rmb_lowering; -defm : avx512_icmp_packed_rmb_lowering; -defm : avx512_icmp_packed_rmb_lowering; +def : Pat<(v8i1 (X86pcmpeqm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))), + (COPY_TO_REGCLASS (VPCMPEQDZrr + (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), + (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm))), VK8)>; +} multiclass avx512_icmp_cc opc, string Suffix, SDNode OpNode, X86VectorVTInfo _> { @@ -2122,237 +1908,6 @@ defm VPCMPQ : avx512_icmp_cc_rmb_vl<0x1F, "q", X86cmpm, avx512vl_i64_info, defm VPCMPUQ : avx512_icmp_cc_rmb_vl<0x1E, "uq", X86cmpmu, avx512vl_i64_info, HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>; -multiclass avx512_icmp_cc_packed_lowering Preds> { -let Predicates = Preds in { - def : Pat<(insert_subvector (NewInf.KVT immAllZerosV), - (_.KVT (OpNode (_.VT _.RC:$src1), - (_.VT _.RC:$src2), - imm:$cc)), - (i64 0)), - (COPY_TO_REGCLASS (!cast(InstrStr##rri) _.RC:$src1, - _.RC:$src2, - imm:$cc), - NewInf.KRC)>; - - def : Pat<(insert_subvector (NewInf.KVT immAllZerosV), - (_.KVT (OpNode (_.VT _.RC:$src1), - (_.VT (bitconvert (_.LdFrag addr:$src2))), - imm:$cc)), - (i64 0)), - (COPY_TO_REGCLASS (!cast(InstrStr##rmi) _.RC:$src1, - addr:$src2, - imm:$cc), - NewInf.KRC)>; - - def : Pat<(insert_subvector (NewInf.KVT immAllZerosV), - (_.KVT (and _.KRCWM:$mask, - (OpNode (_.VT _.RC:$src1), - (_.VT _.RC:$src2), - imm:$cc))), - (i64 0)), - (COPY_TO_REGCLASS (!cast(InstrStr##rrik) _.KRCWM:$mask, - _.RC:$src1, - _.RC:$src2, - imm:$cc), - NewInf.KRC)>; - - def : Pat<(insert_subvector (NewInf.KVT immAllZerosV), - (_.KVT (and (_.KVT _.KRCWM:$mask), - (_.KVT (OpNode (_.VT _.RC:$src1), - (_.VT (bitconvert - (_.LdFrag addr:$src2))), - imm:$cc)))), - (i64 0)), - (COPY_TO_REGCLASS (!cast(InstrStr##rmik) _.KRCWM:$mask, - _.RC:$src1, - addr:$src2, - imm:$cc), - NewInf.KRC)>; -} -} - -multiclass avx512_icmp_cc_packed_rmb_lowering Preds> - : avx512_icmp_cc_packed_lowering<_, NewInf, OpNode, InstrStr, Preds> { -let Predicates = Preds in { - def : Pat<(insert_subvector (NewInf.KVT immAllZerosV), - (_.KVT (OpNode (_.VT _.RC:$src1), - (X86VBroadcast (_.ScalarLdFrag addr:$src2)), - imm:$cc)), - (i64 0)), - (COPY_TO_REGCLASS (!cast(InstrStr##rmib) _.RC:$src1, - addr:$src2, - imm:$cc), - NewInf.KRC)>; - - def : Pat<(insert_subvector (NewInf.KVT immAllZerosV), - (_.KVT (and (_.KVT _.KRCWM:$mask), - (_.KVT (OpNode (_.VT _.RC:$src1), - (X86VBroadcast - (_.ScalarLdFrag addr:$src2)), - imm:$cc)))), - (i64 0)), - (COPY_TO_REGCLASS (!cast(InstrStr##rmibk) _.KRCWM:$mask, - _.RC:$src1, - addr:$src2, - imm:$cc), - NewInf.KRC)>; -} -} - -// VPCMPB - i8 -defm : avx512_icmp_cc_packed_lowering; -defm : avx512_icmp_cc_packed_lowering; - -defm : avx512_icmp_cc_packed_lowering; - -// VPCMPW - i16 -defm : avx512_icmp_cc_packed_lowering; -defm : avx512_icmp_cc_packed_lowering; -defm : avx512_icmp_cc_packed_lowering; - -defm : avx512_icmp_cc_packed_lowering; -defm : avx512_icmp_cc_packed_lowering; - -defm : avx512_icmp_cc_packed_lowering; - -// VPCMPD - i32 -defm : avx512_icmp_cc_packed_rmb_lowering; -defm : avx512_icmp_cc_packed_rmb_lowering; -defm : avx512_icmp_cc_packed_rmb_lowering; -defm : avx512_icmp_cc_packed_rmb_lowering; - -defm : avx512_icmp_cc_packed_rmb_lowering; -defm : avx512_icmp_cc_packed_rmb_lowering; -defm : avx512_icmp_cc_packed_rmb_lowering; - -defm : avx512_icmp_cc_packed_rmb_lowering; -defm : avx512_icmp_cc_packed_rmb_lowering; - -// VPCMPQ - i64 -defm : avx512_icmp_cc_packed_rmb_lowering; -defm : avx512_icmp_cc_packed_rmb_lowering; -defm : avx512_icmp_cc_packed_rmb_lowering; -defm : avx512_icmp_cc_packed_rmb_lowering; -defm : avx512_icmp_cc_packed_rmb_lowering; - -defm : avx512_icmp_cc_packed_rmb_lowering; -defm : avx512_icmp_cc_packed_rmb_lowering; -defm : avx512_icmp_cc_packed_rmb_lowering; -defm : avx512_icmp_cc_packed_rmb_lowering; - -defm : avx512_icmp_cc_packed_rmb_lowering; -defm : avx512_icmp_cc_packed_rmb_lowering; -defm : avx512_icmp_cc_packed_rmb_lowering; - -// VPCMPUB - i8 -defm : avx512_icmp_cc_packed_lowering; -defm : avx512_icmp_cc_packed_lowering; - -defm : avx512_icmp_cc_packed_lowering; - -// VPCMPUW - i16 -defm : avx512_icmp_cc_packed_lowering; -defm : avx512_icmp_cc_packed_lowering; -defm : avx512_icmp_cc_packed_lowering; - -defm : avx512_icmp_cc_packed_lowering; -defm : avx512_icmp_cc_packed_lowering; - -defm : avx512_icmp_cc_packed_lowering; - -// VPCMPUD - i32 -defm : avx512_icmp_cc_packed_rmb_lowering; -defm : avx512_icmp_cc_packed_rmb_lowering; -defm : avx512_icmp_cc_packed_rmb_lowering; -defm : avx512_icmp_cc_packed_rmb_lowering; - -defm : avx512_icmp_cc_packed_rmb_lowering; -defm : avx512_icmp_cc_packed_rmb_lowering; -defm : avx512_icmp_cc_packed_rmb_lowering; - -defm : avx512_icmp_cc_packed_rmb_lowering; -defm : avx512_icmp_cc_packed_rmb_lowering; - -// VPCMPUQ - i64 -defm : avx512_icmp_cc_packed_rmb_lowering; -defm : avx512_icmp_cc_packed_rmb_lowering; -defm : avx512_icmp_cc_packed_rmb_lowering; -defm : avx512_icmp_cc_packed_rmb_lowering; -defm : avx512_icmp_cc_packed_rmb_lowering; - -defm : avx512_icmp_cc_packed_rmb_lowering; -defm : avx512_icmp_cc_packed_rmb_lowering; -defm : avx512_icmp_cc_packed_rmb_lowering; -defm : avx512_icmp_cc_packed_rmb_lowering; - -defm : avx512_icmp_cc_packed_rmb_lowering; -defm : avx512_icmp_cc_packed_rmb_lowering; -defm : avx512_icmp_cc_packed_rmb_lowering; - multiclass avx512_vcmp_common { defm rri : AVX512_maskable_cmp<0xC2, MRMSrcReg, _, @@ -2443,108 +1998,21 @@ defm VCMPPD : avx512_vcmp, defm VCMPPS : avx512_vcmp, AVX512PSIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; -multiclass avx512_fcmp_cc_packed_lowering Preds> { -let Predicates = Preds in { - def : Pat<(insert_subvector (NewInf.KVT immAllZerosV), - (_.KVT (X86cmpm (_.VT _.RC:$src1), - (_.VT _.RC:$src2), - imm:$cc)), - (i64 0)), - (COPY_TO_REGCLASS (!cast(InstrStr##rri) _.RC:$src1, - _.RC:$src2, - imm:$cc), - NewInf.KRC)>; - - def : Pat<(insert_subvector (NewInf.KVT immAllZerosV), - (_.KVT (X86cmpm (_.VT _.RC:$src1), - (_.VT (bitconvert (_.LdFrag addr:$src2))), - imm:$cc)), - (i64 0)), - (COPY_TO_REGCLASS (!cast(InstrStr##rmi) _.RC:$src1, - addr:$src2, - imm:$cc), - NewInf.KRC)>; - - def : Pat<(insert_subvector (NewInf.KVT immAllZerosV), - (_.KVT (X86cmpm (_.VT _.RC:$src1), - (X86VBroadcast (_.ScalarLdFrag addr:$src2)), - imm:$cc)), - (i64 0)), - (COPY_TO_REGCLASS (!cast(InstrStr##rmbi) _.RC:$src1, - addr:$src2, - imm:$cc), - NewInf.KRC)>; -} -} - -multiclass avx512_fcmp_cc_packed_sae_lowering Preds> - : avx512_fcmp_cc_packed_lowering<_, NewInf, InstrStr, Preds> { - -let Predicates = Preds in - def : Pat<(insert_subvector (NewInf.KVT immAllZerosV), - (_.KVT (X86cmpmRnd (_.VT _.RC:$src1), - (_.VT _.RC:$src2), - imm:$cc, - (i32 FROUND_NO_EXC))), - (i64 0)), - (COPY_TO_REGCLASS (!cast(InstrStr##rrib) _.RC:$src1, - _.RC:$src2, - imm:$cc), - NewInf.KRC)>; -} - - -// VCMPPS - f32 -defm : avx512_fcmp_cc_packed_lowering; -defm : avx512_fcmp_cc_packed_lowering; -defm : avx512_fcmp_cc_packed_lowering; -defm : avx512_fcmp_cc_packed_lowering; - -defm : avx512_fcmp_cc_packed_lowering; -defm : avx512_fcmp_cc_packed_lowering; -defm : avx512_fcmp_cc_packed_lowering; - -defm : avx512_fcmp_cc_packed_sae_lowering; -defm : avx512_fcmp_cc_packed_sae_lowering; - -// VCMPPD - f64 -defm : avx512_fcmp_cc_packed_lowering; -defm : avx512_fcmp_cc_packed_lowering; -defm : avx512_fcmp_cc_packed_lowering; -defm : avx512_fcmp_cc_packed_lowering; -defm : avx512_fcmp_cc_packed_lowering; - -defm : avx512_fcmp_cc_packed_lowering; -defm : avx512_fcmp_cc_packed_lowering; -defm : avx512_fcmp_cc_packed_lowering; -defm : avx512_fcmp_cc_packed_lowering; - -defm : avx512_fcmp_cc_packed_sae_lowering; -defm : avx512_fcmp_cc_packed_sae_lowering; -defm : avx512_fcmp_cc_packed_sae_lowering; +def : Pat<(v8i1 (X86cmpm (v8f32 VR256X:$src1), (v8f32 VR256X:$src2), imm:$cc)), + (COPY_TO_REGCLASS (VCMPPSZrri + (v16f32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), + (v16f32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)), + imm:$cc), VK8)>; +def : Pat<(v8i1 (X86cmpm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2), imm:$cc)), + (COPY_TO_REGCLASS (VPCMPDZrri + (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), + (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)), + imm:$cc), VK8)>; +def : Pat<(v8i1 (X86cmpmu (v8i32 VR256X:$src1), (v8i32 VR256X:$src2), imm:$cc)), + (COPY_TO_REGCLASS (VPCMPUDZrri + (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), + (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)), + imm:$cc), VK8)>; // ---------------------------------------------------------------- // FPClass @@ -3030,69 +2498,6 @@ multiclass avx512_mask_shiftop_w opc1, bits<8> opc2, string OpcodeStr, defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86kshiftl>; defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86kshiftr>; -multiclass axv512_icmp_packed_no_vlx_lowering { -def : Pat<(v8i1 (OpNode (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))), - (COPY_TO_REGCLASS (!cast(InstStr##Zrr) - (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), - (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm))), VK8)>; - -def : Pat<(insert_subvector (v16i1 immAllZerosV), - (v8i1 (OpNode (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))), - (i64 0)), - (KSHIFTRWri (KSHIFTLWri (!cast(InstStr##Zrr) - (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), - (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm))), - (i8 8)), (i8 8))>; - -def : Pat<(insert_subvector (v16i1 immAllZerosV), - (v8i1 (and VK8:$mask, - (OpNode (v8i32 VR256X:$src1), (v8i32 VR256X:$src2)))), - (i64 0)), - (KSHIFTRWri (KSHIFTLWri (!cast(InstStr##Zrrk) - (COPY_TO_REGCLASS VK8:$mask, VK16), - (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), - (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm))), - (i8 8)), (i8 8))>; -} - -multiclass axv512_icmp_packed_cc_no_vlx_lowering { -def : Pat<(v8i1 (OpNode (_.info256.VT VR256X:$src1), (_.info256.VT VR256X:$src2), imm:$cc)), - (COPY_TO_REGCLASS (!cast(InstStr##Zrri) - (_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), - (_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)), - imm:$cc), VK8)>; - -def : Pat<(insert_subvector (v16i1 immAllZerosV), - (v8i1 (OpNode (_.info256.VT VR256X:$src1), (_.info256.VT VR256X:$src2), imm:$cc)), - (i64 0)), - (KSHIFTRWri (KSHIFTLWri (!cast(InstStr##Zrri) - (_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), - (_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)), - imm:$cc), - (i8 8)), (i8 8))>; - -def : Pat<(insert_subvector (v16i1 immAllZerosV), - (v8i1 (and VK8:$mask, - (OpNode (_.info256.VT VR256X:$src1), (_.info256.VT VR256X:$src2), imm:$cc))), - (i64 0)), - (KSHIFTRWri (KSHIFTLWri (!cast(InstStr##Zrrik) - (COPY_TO_REGCLASS VK8:$mask, VK16), - (_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), - (_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)), - imm:$cc), - (i8 8)), (i8 8))>; -} - -let Predicates = [HasAVX512, NoVLX] in { - defm : axv512_icmp_packed_no_vlx_lowering; - defm : axv512_icmp_packed_no_vlx_lowering; - - defm : axv512_icmp_packed_cc_no_vlx_lowering; - defm : axv512_icmp_packed_cc_no_vlx_lowering; - defm : axv512_icmp_packed_cc_no_vlx_lowering; -} - // Mask setting all 0s or 1s multiclass avx512_mask_setop { let Predicates = [HasAVX512] in diff --git a/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll b/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll index 9e0090c26e4..c2d8df6476b 100644 --- a/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll +++ b/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll @@ -1004,6 +1004,8 @@ define i8 @test_pcmpeq_q_256(<4 x i64> %a, <4 x i64> %b) { ; CHECK-LABEL: test_pcmpeq_q_256: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x28,0x29,0xc1] +; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c] +; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c] ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] ; CHECK-NEXT: ## kill: %AL %AL %EAX ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -1016,6 +1018,8 @@ define i8 @test_mask_pcmpeq_q_256(<4 x i64> %a, <4 x i64> %b, i8 %mask) { ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x29,0xc1] +; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c] +; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c] ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] ; CHECK-NEXT: ## kill: %AL %AL %EAX ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -1054,6 +1058,8 @@ define i8 @test_pcmpgt_q_256(<4 x i64> %a, <4 x i64> %b) { ; CHECK-LABEL: test_pcmpgt_q_256: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x28,0x37,0xc1] +; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c] +; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c] ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] ; CHECK-NEXT: ## kill: %AL %AL %EAX ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -1066,6 +1072,8 @@ define i8 @test_mask_pcmpgt_q_256(<4 x i64> %a, <4 x i64> %b, i8 %mask) { ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x37,0xc1] +; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c] +; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c] ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] ; CHECK-NEXT: ## kill: %AL %AL %EAX ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -1079,6 +1087,8 @@ define i8 @test_pcmpeq_d_128(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: test_pcmpeq_d_128: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x08,0x76,0xc1] +; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c] +; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c] ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] ; CHECK-NEXT: ## kill: %AL %AL %EAX ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -1091,6 +1101,8 @@ define i8 @test_mask_pcmpeq_d_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) { ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x76,0xc1] +; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c] +; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c] ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] ; CHECK-NEXT: ## kill: %AL %AL %EAX ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -1104,6 +1116,10 @@ define i8 @test_pcmpeq_q_128(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: test_pcmpeq_q_128: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x08,0x29,0xc1] +; CHECK-NEXT: kshiftlw $14, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0e] +; CHECK-NEXT: kshiftrw $14, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0e] +; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c] +; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c] ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] ; CHECK-NEXT: ## kill: %AL %AL %EAX ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -1116,6 +1132,10 @@ define i8 @test_mask_pcmpeq_q_128(<2 x i64> %a, <2 x i64> %b, i8 %mask) { ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x29,0xc1] +; CHECK-NEXT: kshiftlw $14, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0e] +; CHECK-NEXT: kshiftrw $14, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0e] +; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c] +; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c] ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] ; CHECK-NEXT: ## kill: %AL %AL %EAX ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -1129,6 +1149,8 @@ define i8 @test_pcmpgt_d_128(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: test_pcmpgt_d_128: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x08,0x66,0xc1] +; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c] +; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c] ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] ; CHECK-NEXT: ## kill: %AL %AL %EAX ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -1141,6 +1163,8 @@ define i8 @test_mask_pcmpgt_d_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) { ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x66,0xc1] +; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c] +; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c] ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] ; CHECK-NEXT: ## kill: %AL %AL %EAX ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -1154,6 +1178,10 @@ define i8 @test_pcmpgt_q_128(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: test_pcmpgt_q_128: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x08,0x37,0xc1] +; CHECK-NEXT: kshiftlw $14, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0e] +; CHECK-NEXT: kshiftrw $14, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0e] +; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c] +; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c] ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] ; CHECK-NEXT: ## kill: %AL %AL %EAX ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -1166,6 +1194,10 @@ define i8 @test_mask_pcmpgt_q_128(<2 x i64> %a, <2 x i64> %b, i8 %mask) { ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x37,0xc1] +; CHECK-NEXT: kshiftlw $14, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0e] +; CHECK-NEXT: kshiftrw $14, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0e] +; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c] +; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c] ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] ; CHECK-NEXT: ## kill: %AL %AL %EAX ; CHECK-NEXT: retq ## encoding: [0xc3] diff --git a/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll b/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll deleted file mode 100644 index 6779c0753c4..00000000000 --- a/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll +++ /dev/null @@ -1,13484 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s -check-prefix=NoVLX - -define zeroext i32 @test_vpcmpeqb_v16i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpeqb_v16i1_v32i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <16 x i8> - %1 = bitcast <2 x i64> %__b to <16 x i8> - %2 = icmp eq <16 x i8> %0, %1 - %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> - %4 = bitcast <32 x i1> %3 to i32 - ret i32 %4 -} - -define zeroext i32 @test_vpcmpeqb_v16i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpeqb_v16i1_v32i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpeqb (%rdi), %xmm0, %k0 -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <16 x i8> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <16 x i8> - %2 = icmp eq <16 x i8> %0, %1 - %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> - %4 = bitcast <32 x i1> %3 to i32 - ret i32 %4 -} - -define zeroext i32 @test_masked_vpcmpeqb_v16i1_v32i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpeqb_v16i1_v32i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <16 x i8> - %1 = bitcast <2 x i64> %__b to <16 x i8> - %2 = icmp eq <16 x i8> %0, %1 - %3 = bitcast i16 %__u to <16 x i1> - %4 = and <16 x i1> %2, %3 - %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> - %6 = bitcast <32 x i1> %5 to i32 - ret i32 %6 -} - -define zeroext i32 @test_masked_vpcmpeqb_v16i1_v32i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpeqb_v16i1_v32i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpeqb (%rsi), %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <16 x i8> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <16 x i8> - %2 = icmp eq <16 x i8> %0, %1 - %3 = bitcast i16 %__u to <16 x i1> - %4 = and <16 x i1> %2, %3 - %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> - %6 = bitcast <32 x i1> %5 to i32 - ret i32 %6 -} - - -define zeroext i64 @test_vpcmpeqb_v16i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpeqb_v16i1_v64i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <16 x i8> - %1 = bitcast <2 x i64> %__b to <16 x i8> - %2 = icmp eq <16 x i8> %0, %1 - %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_vpcmpeqb_v16i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpeqb_v16i1_v64i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpeqb (%rdi), %xmm0, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <16 x i8> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <16 x i8> - %2 = icmp eq <16 x i8> %0, %1 - %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_masked_vpcmpeqb_v16i1_v64i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpeqb_v16i1_v64i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <16 x i8> - %1 = bitcast <2 x i64> %__b to <16 x i8> - %2 = icmp eq <16 x i8> %0, %1 - %3 = bitcast i16 %__u to <16 x i1> - %4 = and <16 x i1> %2, %3 - %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> - %6 = bitcast <64 x i1> %5 to i64 - ret i64 %6 -} - -define zeroext i64 @test_masked_vpcmpeqb_v16i1_v64i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpeqb_v16i1_v64i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpeqb (%rsi), %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <16 x i8> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <16 x i8> - %2 = icmp eq <16 x i8> %0, %1 - %3 = bitcast i16 %__u to <16 x i1> - %4 = and <16 x i1> %2, %3 - %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> - %6 = bitcast <64 x i1> %5 to i64 - ret i64 %6 -} - - -define zeroext i64 @test_vpcmpeqb_v32i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpeqb_v32i1_v64i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <32 x i8> - %1 = bitcast <4 x i64> %__b to <32 x i8> - %2 = icmp eq <32 x i8> %0, %1 - %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_vpcmpeqb_v32i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpeqb_v32i1_v64i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpeqb (%rdi), %ymm0, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <32 x i8> - %load = load <4 x i64>, <4 x i64>* %__b - %1 = bitcast <4 x i64> %load to <32 x i8> - %2 = icmp eq <32 x i8> %0, %1 - %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_masked_vpcmpeqb_v32i1_v64i1_mask(i32 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpeqb_v32i1_v64i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 {%k1} -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <32 x i8> - %1 = bitcast <4 x i64> %__b to <32 x i8> - %2 = icmp eq <32 x i8> %0, %1 - %3 = bitcast i32 %__u to <32 x i1> - %4 = and <32 x i1> %2, %3 - %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> - %6 = bitcast <64 x i1> %5 to i64 - ret i64 %6 -} - -define zeroext i64 @test_masked_vpcmpeqb_v32i1_v64i1_mask_mem(i32 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpeqb_v32i1_v64i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpeqb (%rsi), %ymm0, %k0 {%k1} -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <32 x i8> - %load = load <4 x i64>, <4 x i64>* %__b - %1 = bitcast <4 x i64> %load to <32 x i8> - %2 = icmp eq <32 x i8> %0, %1 - %3 = bitcast i32 %__u to <32 x i1> - %4 = and <32 x i1> %2, %3 - %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> - %6 = bitcast <64 x i1> %5 to i64 - ret i64 %6 -} - - -define zeroext i16 @test_vpcmpeqw_v8i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpeqw_v8i1_v16i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <8 x i16> - %1 = bitcast <2 x i64> %__b to <8 x i16> - %2 = icmp eq <8 x i16> %0, %1 - %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> - %4 = bitcast <16 x i1> %3 to i16 - ret i16 %4 -} - -define zeroext i16 @test_vpcmpeqw_v8i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpeqw_v8i1_v16i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpeqw (%rdi), %xmm0, %k0 -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <8 x i16> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <8 x i16> - %2 = icmp eq <8 x i16> %0, %1 - %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> - %4 = bitcast <16 x i1> %3 to i16 - ret i16 %4 -} - -define zeroext i16 @test_masked_vpcmpeqw_v8i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpeqw_v8i1_v16i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <8 x i16> - %1 = bitcast <2 x i64> %__b to <8 x i16> - %2 = icmp eq <8 x i16> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %4 = and <8 x i1> %2, %3 - %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> - %6 = bitcast <16 x i1> %5 to i16 - ret i16 %6 -} - -define zeroext i16 @test_masked_vpcmpeqw_v8i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpeqw_v8i1_v16i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpeqw (%rsi), %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <8 x i16> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <8 x i16> - %2 = icmp eq <8 x i16> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %4 = and <8 x i1> %2, %3 - %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> - %6 = bitcast <16 x i1> %5 to i16 - ret i16 %6 -} - - -define zeroext i32 @test_vpcmpeqw_v8i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpeqw_v8i1_v32i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <8 x i16> - %1 = bitcast <2 x i64> %__b to <8 x i16> - %2 = icmp eq <8 x i16> %0, %1 - %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> - %4 = bitcast <32 x i1> %3 to i32 - ret i32 %4 -} - -define zeroext i32 @test_vpcmpeqw_v8i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpeqw_v8i1_v32i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpeqw (%rdi), %xmm0, %k0 -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <8 x i16> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <8 x i16> - %2 = icmp eq <8 x i16> %0, %1 - %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> - %4 = bitcast <32 x i1> %3 to i32 - ret i32 %4 -} - -define zeroext i32 @test_masked_vpcmpeqw_v8i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpeqw_v8i1_v32i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <8 x i16> - %1 = bitcast <2 x i64> %__b to <8 x i16> - %2 = icmp eq <8 x i16> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %4 = and <8 x i1> %2, %3 - %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> - %6 = bitcast <32 x i1> %5 to i32 - ret i32 %6 -} - -define zeroext i32 @test_masked_vpcmpeqw_v8i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpeqw_v8i1_v32i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpeqw (%rsi), %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <8 x i16> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <8 x i16> - %2 = icmp eq <8 x i16> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %4 = and <8 x i1> %2, %3 - %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> - %6 = bitcast <32 x i1> %5 to i32 - ret i32 %6 -} - - -define zeroext i64 @test_vpcmpeqw_v8i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpeqw_v8i1_v64i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <8 x i16> - %1 = bitcast <2 x i64> %__b to <8 x i16> - %2 = icmp eq <8 x i16> %0, %1 - %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_vpcmpeqw_v8i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpeqw_v8i1_v64i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpeqw (%rdi), %xmm0, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <8 x i16> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <8 x i16> - %2 = icmp eq <8 x i16> %0, %1 - %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_masked_vpcmpeqw_v8i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpeqw_v8i1_v64i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <8 x i16> - %1 = bitcast <2 x i64> %__b to <8 x i16> - %2 = icmp eq <8 x i16> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %4 = and <8 x i1> %2, %3 - %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> - %6 = bitcast <64 x i1> %5 to i64 - ret i64 %6 -} - -define zeroext i64 @test_masked_vpcmpeqw_v8i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpeqw_v8i1_v64i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpeqw (%rsi), %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <8 x i16> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <8 x i16> - %2 = icmp eq <8 x i16> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %4 = and <8 x i1> %2, %3 - %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> - %6 = bitcast <64 x i1> %5 to i64 - ret i64 %6 -} - - -define zeroext i32 @test_vpcmpeqw_v16i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpeqw_v16i1_v32i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <16 x i16> - %1 = bitcast <4 x i64> %__b to <16 x i16> - %2 = icmp eq <16 x i16> %0, %1 - %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> - %4 = bitcast <32 x i1> %3 to i32 - ret i32 %4 -} - -define zeroext i32 @test_vpcmpeqw_v16i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpeqw_v16i1_v32i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpeqw (%rdi), %ymm0, %k0 -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <16 x i16> - %load = load <4 x i64>, <4 x i64>* %__b - %1 = bitcast <4 x i64> %load to <16 x i16> - %2 = icmp eq <16 x i16> %0, %1 - %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> - %4 = bitcast <32 x i1> %3 to i32 - ret i32 %4 -} - -define zeroext i32 @test_masked_vpcmpeqw_v16i1_v32i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpeqw_v16i1_v32i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 {%k1} -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <16 x i16> - %1 = bitcast <4 x i64> %__b to <16 x i16> - %2 = icmp eq <16 x i16> %0, %1 - %3 = bitcast i16 %__u to <16 x i1> - %4 = and <16 x i1> %2, %3 - %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> - %6 = bitcast <32 x i1> %5 to i32 - ret i32 %6 -} - -define zeroext i32 @test_masked_vpcmpeqw_v16i1_v32i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpeqw_v16i1_v32i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpeqw (%rsi), %ymm0, %k0 {%k1} -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <16 x i16> - %load = load <4 x i64>, <4 x i64>* %__b - %1 = bitcast <4 x i64> %load to <16 x i16> - %2 = icmp eq <16 x i16> %0, %1 - %3 = bitcast i16 %__u to <16 x i1> - %4 = and <16 x i1> %2, %3 - %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> - %6 = bitcast <32 x i1> %5 to i32 - ret i32 %6 -} - - -define zeroext i64 @test_vpcmpeqw_v16i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpeqw_v16i1_v64i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <16 x i16> - %1 = bitcast <4 x i64> %__b to <16 x i16> - %2 = icmp eq <16 x i16> %0, %1 - %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_vpcmpeqw_v16i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpeqw_v16i1_v64i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpeqw (%rdi), %ymm0, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <16 x i16> - %load = load <4 x i64>, <4 x i64>* %__b - %1 = bitcast <4 x i64> %load to <16 x i16> - %2 = icmp eq <16 x i16> %0, %1 - %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_masked_vpcmpeqw_v16i1_v64i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpeqw_v16i1_v64i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 {%k1} -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <16 x i16> - %1 = bitcast <4 x i64> %__b to <16 x i16> - %2 = icmp eq <16 x i16> %0, %1 - %3 = bitcast i16 %__u to <16 x i1> - %4 = and <16 x i1> %2, %3 - %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> - %6 = bitcast <64 x i1> %5 to i64 - ret i64 %6 -} - -define zeroext i64 @test_masked_vpcmpeqw_v16i1_v64i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpeqw_v16i1_v64i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpeqw (%rsi), %ymm0, %k0 {%k1} -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <16 x i16> - %load = load <4 x i64>, <4 x i64>* %__b - %1 = bitcast <4 x i64> %load to <16 x i16> - %2 = icmp eq <16 x i16> %0, %1 - %3 = bitcast i16 %__u to <16 x i1> - %4 = and <16 x i1> %2, %3 - %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> - %6 = bitcast <64 x i1> %5 to i64 - ret i64 %6 -} - - -define zeroext i64 @test_vpcmpeqw_v32i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpeqw_v32i1_v64i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <32 x i16> - %1 = bitcast <8 x i64> %__b to <32 x i16> - %2 = icmp eq <32 x i16> %0, %1 - %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_vpcmpeqw_v32i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpeqw_v32i1_v64i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpeqw (%rdi), %zmm0, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <32 x i16> - %load = load <8 x i64>, <8 x i64>* %__b - %1 = bitcast <8 x i64> %load to <32 x i16> - %2 = icmp eq <32 x i16> %0, %1 - %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_masked_vpcmpeqw_v32i1_v64i1_mask(i32 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpeqw_v32i1_v64i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 {%k1} -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <32 x i16> - %1 = bitcast <8 x i64> %__b to <32 x i16> - %2 = icmp eq <32 x i16> %0, %1 - %3 = bitcast i32 %__u to <32 x i1> - %4 = and <32 x i1> %2, %3 - %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> - %6 = bitcast <64 x i1> %5 to i64 - ret i64 %6 -} - -define zeroext i64 @test_masked_vpcmpeqw_v32i1_v64i1_mask_mem(i32 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpeqw_v32i1_v64i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpeqw (%rsi), %zmm0, %k0 {%k1} -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <32 x i16> - %load = load <8 x i64>, <8 x i64>* %__b - %1 = bitcast <8 x i64> %load to <32 x i16> - %2 = icmp eq <32 x i16> %0, %1 - %3 = bitcast i32 %__u to <32 x i1> - %4 = and <32 x i1> %2, %3 - %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> - %6 = bitcast <64 x i1> %5 to i64 - ret i64 %6 -} - - -define zeroext i8 @test_vpcmpeqd_v4i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpeqd_v4i1_v8i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 -; CHECK-NEXT: kmovb %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <4 x i32> - %1 = bitcast <2 x i64> %__b to <4 x i32> - %2 = icmp eq <4 x i32> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> - %4 = bitcast <8 x i1> %3 to i8 - ret i8 %4 -} - -define zeroext i8 @test_vpcmpeqd_v4i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpeqd_v4i1_v8i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpeqd (%rdi), %xmm0, %k0 -; CHECK-NEXT: kmovb %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <4 x i32> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <4 x i32> - %2 = icmp eq <4 x i32> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> - %4 = bitcast <8 x i1> %3 to i8 - ret i8 %4 -} - -define zeroext i8 @test_masked_vpcmpeqd_v4i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpeqd_v4i1_v8i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovb %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <4 x i32> - %1 = bitcast <2 x i64> %__b to <4 x i32> - %2 = icmp eq <4 x i32> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> - %4 = and <4 x i1> %2, %extract.i - %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> - %6 = bitcast <8 x i1> %5 to i8 - ret i8 %6 -} - -define zeroext i8 @test_masked_vpcmpeqd_v4i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpeqd_v4i1_v8i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpeqd (%rsi), %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovb %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <4 x i32> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <4 x i32> - %2 = icmp eq <4 x i32> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> - %4 = and <4 x i1> %2, %extract.i - %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> - %6 = bitcast <8 x i1> %5 to i8 - ret i8 %6 -} - - -define zeroext i8 @test_vpcmpeqd_v4i1_v8i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpeqd_v4i1_v8i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpeqd (%rdi){1to4}, %xmm0, %k0 -; CHECK-NEXT: kmovb %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <4 x i32> - %load = load i32, i32* %__b - %vec = insertelement <4 x i32> undef, i32 %load, i32 0 - %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> - %2 = icmp eq <4 x i32> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> - %4 = bitcast <8 x i1> %3 to i8 - ret i8 %4 -} - -define zeroext i8 @test_masked_vpcmpeqd_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpeqd_v4i1_v8i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpeqd (%rsi){1to4}, %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovb %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <4 x i32> - %load = load i32, i32* %__b - %vec = insertelement <4 x i32> undef, i32 %load, i32 0 - %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> - %2 = icmp eq <4 x i32> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> - %4 = and <4 x i1> %extract.i, %2 - %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> - %6 = bitcast <8 x i1> %5 to i8 - ret i8 %6 -} - - -define zeroext i16 @test_vpcmpeqd_v4i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpeqd_v4i1_v16i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <4 x i32> - %1 = bitcast <2 x i64> %__b to <4 x i32> - %2 = icmp eq <4 x i32> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> - %4 = bitcast <16 x i1> %3 to i16 - ret i16 %4 -} - -define zeroext i16 @test_vpcmpeqd_v4i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpeqd_v4i1_v16i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpeqd (%rdi), %xmm0, %k0 -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <4 x i32> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <4 x i32> - %2 = icmp eq <4 x i32> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> - %4 = bitcast <16 x i1> %3 to i16 - ret i16 %4 -} - -define zeroext i16 @test_masked_vpcmpeqd_v4i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpeqd_v4i1_v16i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <4 x i32> - %1 = bitcast <2 x i64> %__b to <4 x i32> - %2 = icmp eq <4 x i32> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> - %4 = and <4 x i1> %2, %extract.i - %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> - %6 = bitcast <16 x i1> %5 to i16 - ret i16 %6 -} - -define zeroext i16 @test_masked_vpcmpeqd_v4i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpeqd_v4i1_v16i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpeqd (%rsi), %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <4 x i32> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <4 x i32> - %2 = icmp eq <4 x i32> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> - %4 = and <4 x i1> %2, %extract.i - %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> - %6 = bitcast <16 x i1> %5 to i16 - ret i16 %6 -} - - -define zeroext i16 @test_vpcmpeqd_v4i1_v16i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpeqd_v4i1_v16i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpeqd (%rdi){1to4}, %xmm0, %k0 -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <4 x i32> - %load = load i32, i32* %__b - %vec = insertelement <4 x i32> undef, i32 %load, i32 0 - %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> - %2 = icmp eq <4 x i32> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> - %4 = bitcast <16 x i1> %3 to i16 - ret i16 %4 -} - -define zeroext i16 @test_masked_vpcmpeqd_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpeqd_v4i1_v16i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpeqd (%rsi){1to4}, %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <4 x i32> - %load = load i32, i32* %__b - %vec = insertelement <4 x i32> undef, i32 %load, i32 0 - %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> - %2 = icmp eq <4 x i32> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> - %4 = and <4 x i1> %extract.i, %2 - %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> - %6 = bitcast <16 x i1> %5 to i16 - ret i16 %6 -} - - -define zeroext i32 @test_vpcmpeqd_v4i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpeqd_v4i1_v32i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <4 x i32> - %1 = bitcast <2 x i64> %__b to <4 x i32> - %2 = icmp eq <4 x i32> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> - %4 = bitcast <32 x i1> %3 to i32 - ret i32 %4 -} - -define zeroext i32 @test_vpcmpeqd_v4i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpeqd_v4i1_v32i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpeqd (%rdi), %xmm0, %k0 -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <4 x i32> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <4 x i32> - %2 = icmp eq <4 x i32> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> - %4 = bitcast <32 x i1> %3 to i32 - ret i32 %4 -} - -define zeroext i32 @test_masked_vpcmpeqd_v4i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <4 x i32> - %1 = bitcast <2 x i64> %__b to <4 x i32> - %2 = icmp eq <4 x i32> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> - %4 = and <4 x i1> %2, %extract.i - %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> - %6 = bitcast <32 x i1> %5 to i32 - ret i32 %6 -} - -define zeroext i32 @test_masked_vpcmpeqd_v4i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpeqd (%rsi), %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <4 x i32> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <4 x i32> - %2 = icmp eq <4 x i32> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> - %4 = and <4 x i1> %2, %extract.i - %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> - %6 = bitcast <32 x i1> %5 to i32 - ret i32 %6 -} - - -define zeroext i32 @test_vpcmpeqd_v4i1_v32i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpeqd_v4i1_v32i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpeqd (%rdi){1to4}, %xmm0, %k0 -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <4 x i32> - %load = load i32, i32* %__b - %vec = insertelement <4 x i32> undef, i32 %load, i32 0 - %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> - %2 = icmp eq <4 x i32> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> - %4 = bitcast <32 x i1> %3 to i32 - ret i32 %4 -} - -define zeroext i32 @test_masked_vpcmpeqd_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpeqd (%rsi){1to4}, %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <4 x i32> - %load = load i32, i32* %__b - %vec = insertelement <4 x i32> undef, i32 %load, i32 0 - %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> - %2 = icmp eq <4 x i32> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> - %4 = and <4 x i1> %extract.i, %2 - %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> - %6 = bitcast <32 x i1> %5 to i32 - ret i32 %6 -} - - -define zeroext i64 @test_vpcmpeqd_v4i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpeqd_v4i1_v64i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <4 x i32> - %1 = bitcast <2 x i64> %__b to <4 x i32> - %2 = icmp eq <4 x i32> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_vpcmpeqd_v4i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpeqd_v4i1_v64i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpeqd (%rdi), %xmm0, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <4 x i32> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <4 x i32> - %2 = icmp eq <4 x i32> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_masked_vpcmpeqd_v4i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpeqd_v4i1_v64i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <4 x i32> - %1 = bitcast <2 x i64> %__b to <4 x i32> - %2 = icmp eq <4 x i32> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> - %4 = and <4 x i1> %2, %extract.i - %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> - %6 = bitcast <64 x i1> %5 to i64 - ret i64 %6 -} - -define zeroext i64 @test_masked_vpcmpeqd_v4i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpeqd_v4i1_v64i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpeqd (%rsi), %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <4 x i32> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <4 x i32> - %2 = icmp eq <4 x i32> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> - %4 = and <4 x i1> %2, %extract.i - %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> - %6 = bitcast <64 x i1> %5 to i64 - ret i64 %6 -} - - -define zeroext i64 @test_vpcmpeqd_v4i1_v64i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpeqd_v4i1_v64i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpeqd (%rdi){1to4}, %xmm0, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <4 x i32> - %load = load i32, i32* %__b - %vec = insertelement <4 x i32> undef, i32 %load, i32 0 - %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> - %2 = icmp eq <4 x i32> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_masked_vpcmpeqd_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpeqd_v4i1_v64i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpeqd (%rsi){1to4}, %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <4 x i32> - %load = load i32, i32* %__b - %vec = insertelement <4 x i32> undef, i32 %load, i32 0 - %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> - %2 = icmp eq <4 x i32> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> - %4 = and <4 x i1> %extract.i, %2 - %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> - %6 = bitcast <64 x i1> %5 to i64 - ret i64 %6 -} - - -define zeroext i16 @test_vpcmpeqd_v8i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpeqd_v8i1_v16i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -; -; NoVLX-LABEL: test_vpcmpeqd_v8i1_v16i1_mask: -; NoVLX: ## BB#0: ## %entry -; NoVLX-NEXT: ## kill: %YMM1 %YMM1 %ZMM1 -; NoVLX-NEXT: ## kill: %YMM0 %YMM0 %ZMM0 -; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftlw $8, %k0, %k0 -; NoVLX-NEXT: kshiftrw $8, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <8 x i32> - %1 = bitcast <4 x i64> %__b to <8 x i32> - %2 = icmp eq <8 x i32> %0, %1 - %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> - %4 = bitcast <16 x i1> %3 to i16 - ret i16 %4 -} - -define zeroext i16 @test_vpcmpeqd_v8i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpeqd_v8i1_v16i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpeqd (%rdi), %ymm0, %k0 -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -; -; NoVLX-LABEL: test_vpcmpeqd_v8i1_v16i1_mask_mem: -; NoVLX: ## BB#0: ## %entry -; NoVLX-NEXT: ## kill: %YMM0 %YMM0 %ZMM0 -; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 -; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftlw $8, %k0, %k0 -; NoVLX-NEXT: kshiftrw $8, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <8 x i32> - %load = load <4 x i64>, <4 x i64>* %__b - %1 = bitcast <4 x i64> %load to <8 x i32> - %2 = icmp eq <8 x i32> %0, %1 - %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> - %4 = bitcast <16 x i1> %3 to i16 - ret i16 %4 -} - -define zeroext i16 @test_masked_vpcmpeqd_v8i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpeqd_v8i1_v16i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 {%k1} -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -; -; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v16i1_mask: -; NoVLX: ## BB#0: ## %entry -; NoVLX-NEXT: ## kill: %YMM1 %YMM1 %ZMM1 -; NoVLX-NEXT: ## kill: %YMM0 %YMM0 %ZMM0 -; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftlw $8, %k0, %k0 -; NoVLX-NEXT: kshiftrw $8, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <8 x i32> - %1 = bitcast <4 x i64> %__b to <8 x i32> - %2 = icmp eq <8 x i32> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %4 = and <8 x i1> %2, %3 - %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> - %6 = bitcast <16 x i1> %5 to i16 - ret i16 %6 -} - -define zeroext i16 @test_masked_vpcmpeqd_v8i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpeqd_v8i1_v16i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpeqd (%rsi), %ymm0, %k0 {%k1} -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -; -; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v16i1_mask_mem: -; NoVLX: ## BB#0: ## %entry -; NoVLX-NEXT: ## kill: %YMM0 %YMM0 %ZMM0 -; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 -; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftlw $8, %k0, %k0 -; NoVLX-NEXT: kshiftrw $8, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <8 x i32> - %load = load <4 x i64>, <4 x i64>* %__b - %1 = bitcast <4 x i64> %load to <8 x i32> - %2 = icmp eq <8 x i32> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %4 = and <8 x i1> %2, %3 - %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> - %6 = bitcast <16 x i1> %5 to i16 - ret i16 %6 -} - - -define zeroext i16 @test_vpcmpeqd_v8i1_v16i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpeqd_v8i1_v16i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpeqd (%rdi){1to8}, %ymm0, %k0 -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -; -; NoVLX-LABEL: test_vpcmpeqd_v8i1_v16i1_mask_mem_b: -; NoVLX: ## BB#0: ## %entry -; NoVLX-NEXT: ## kill: %YMM0 %YMM0 %ZMM0 -; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1 -; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftlw $8, %k0, %k0 -; NoVLX-NEXT: kshiftrw $8, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <8 x i32> - %load = load i32, i32* %__b - %vec = insertelement <8 x i32> undef, i32 %load, i32 0 - %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> - %2 = icmp eq <8 x i32> %0, %1 - %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> - %4 = bitcast <16 x i1> %3 to i16 - ret i16 %4 -} - -define zeroext i16 @test_masked_vpcmpeqd_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpeqd_v8i1_v16i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpeqd (%rsi){1to8}, %ymm0, %k0 {%k1} -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -; -; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v16i1_mask_mem_b: -; NoVLX: ## BB#0: ## %entry -; NoVLX-NEXT: ## kill: %YMM0 %YMM0 %ZMM0 -; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1 -; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftlw $8, %k0, %k0 -; NoVLX-NEXT: kshiftrw $8, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <8 x i32> - %load = load i32, i32* %__b - %vec = insertelement <8 x i32> undef, i32 %load, i32 0 - %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> - %2 = icmp eq <8 x i32> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %4 = and <8 x i1> %3, %2 - %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> - %6 = bitcast <16 x i1> %5 to i16 - ret i16 %6 -} - - -define zeroext i32 @test_vpcmpeqd_v8i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpeqd_v8i1_v32i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <8 x i32> - %1 = bitcast <4 x i64> %__b to <8 x i32> - %2 = icmp eq <8 x i32> %0, %1 - %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> - %4 = bitcast <32 x i1> %3 to i32 - ret i32 %4 -} - -define zeroext i32 @test_vpcmpeqd_v8i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpeqd_v8i1_v32i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpeqd (%rdi), %ymm0, %k0 -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <8 x i32> - %load = load <4 x i64>, <4 x i64>* %__b - %1 = bitcast <4 x i64> %load to <8 x i32> - %2 = icmp eq <8 x i32> %0, %1 - %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> - %4 = bitcast <32 x i1> %3 to i32 - ret i32 %4 -} - -define zeroext i32 @test_masked_vpcmpeqd_v8i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpeqd_v8i1_v32i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 {%k1} -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <8 x i32> - %1 = bitcast <4 x i64> %__b to <8 x i32> - %2 = icmp eq <8 x i32> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %4 = and <8 x i1> %2, %3 - %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> - %6 = bitcast <32 x i1> %5 to i32 - ret i32 %6 -} - -define zeroext i32 @test_masked_vpcmpeqd_v8i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpeqd_v8i1_v32i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpeqd (%rsi), %ymm0, %k0 {%k1} -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <8 x i32> - %load = load <4 x i64>, <4 x i64>* %__b - %1 = bitcast <4 x i64> %load to <8 x i32> - %2 = icmp eq <8 x i32> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %4 = and <8 x i1> %2, %3 - %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> - %6 = bitcast <32 x i1> %5 to i32 - ret i32 %6 -} - - -define zeroext i32 @test_vpcmpeqd_v8i1_v32i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpeqd_v8i1_v32i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpeqd (%rdi){1to8}, %ymm0, %k0 -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <8 x i32> - %load = load i32, i32* %__b - %vec = insertelement <8 x i32> undef, i32 %load, i32 0 - %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> - %2 = icmp eq <8 x i32> %0, %1 - %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> - %4 = bitcast <32 x i1> %3 to i32 - ret i32 %4 -} - -define zeroext i32 @test_masked_vpcmpeqd_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpeqd_v8i1_v32i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpeqd (%rsi){1to8}, %ymm0, %k0 {%k1} -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <8 x i32> - %load = load i32, i32* %__b - %vec = insertelement <8 x i32> undef, i32 %load, i32 0 - %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> - %2 = icmp eq <8 x i32> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %4 = and <8 x i1> %3, %2 - %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> - %6 = bitcast <32 x i1> %5 to i32 - ret i32 %6 -} - - -define zeroext i64 @test_vpcmpeqd_v8i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpeqd_v8i1_v64i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <8 x i32> - %1 = bitcast <4 x i64> %__b to <8 x i32> - %2 = icmp eq <8 x i32> %0, %1 - %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_vpcmpeqd_v8i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpeqd_v8i1_v64i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpeqd (%rdi), %ymm0, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <8 x i32> - %load = load <4 x i64>, <4 x i64>* %__b - %1 = bitcast <4 x i64> %load to <8 x i32> - %2 = icmp eq <8 x i32> %0, %1 - %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_masked_vpcmpeqd_v8i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpeqd_v8i1_v64i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 {%k1} -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <8 x i32> - %1 = bitcast <4 x i64> %__b to <8 x i32> - %2 = icmp eq <8 x i32> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %4 = and <8 x i1> %2, %3 - %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> - %6 = bitcast <64 x i1> %5 to i64 - ret i64 %6 -} - -define zeroext i64 @test_masked_vpcmpeqd_v8i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpeqd_v8i1_v64i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpeqd (%rsi), %ymm0, %k0 {%k1} -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <8 x i32> - %load = load <4 x i64>, <4 x i64>* %__b - %1 = bitcast <4 x i64> %load to <8 x i32> - %2 = icmp eq <8 x i32> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %4 = and <8 x i1> %2, %3 - %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> - %6 = bitcast <64 x i1> %5 to i64 - ret i64 %6 -} - - -define zeroext i64 @test_vpcmpeqd_v8i1_v64i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpeqd_v8i1_v64i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpeqd (%rdi){1to8}, %ymm0, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <8 x i32> - %load = load i32, i32* %__b - %vec = insertelement <8 x i32> undef, i32 %load, i32 0 - %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> - %2 = icmp eq <8 x i32> %0, %1 - %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_masked_vpcmpeqd_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpeqd_v8i1_v64i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpeqd (%rsi){1to8}, %ymm0, %k0 {%k1} -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <8 x i32> - %load = load i32, i32* %__b - %vec = insertelement <8 x i32> undef, i32 %load, i32 0 - %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> - %2 = icmp eq <8 x i32> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %4 = and <8 x i1> %3, %2 - %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> - %6 = bitcast <64 x i1> %5 to i64 - ret i64 %6 -} - - -define zeroext i32 @test_vpcmpeqd_v16i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpeqd_v16i1_v32i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <16 x i32> - %1 = bitcast <8 x i64> %__b to <16 x i32> - %2 = icmp eq <16 x i32> %0, %1 - %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> - %4 = bitcast <32 x i1> %3 to i32 - ret i32 %4 -} - -define zeroext i32 @test_vpcmpeqd_v16i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpeqd_v16i1_v32i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpeqd (%rdi), %zmm0, %k0 -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <16 x i32> - %load = load <8 x i64>, <8 x i64>* %__b - %1 = bitcast <8 x i64> %load to <16 x i32> - %2 = icmp eq <16 x i32> %0, %1 - %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> - %4 = bitcast <32 x i1> %3 to i32 - ret i32 %4 -} - -define zeroext i32 @test_masked_vpcmpeqd_v16i1_v32i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpeqd_v16i1_v32i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <16 x i32> - %1 = bitcast <8 x i64> %__b to <16 x i32> - %2 = icmp eq <16 x i32> %0, %1 - %3 = bitcast i16 %__u to <16 x i1> - %4 = and <16 x i1> %2, %3 - %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> - %6 = bitcast <32 x i1> %5 to i32 - ret i32 %6 -} - -define zeroext i32 @test_masked_vpcmpeqd_v16i1_v32i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpeqd_v16i1_v32i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpeqd (%rsi), %zmm0, %k0 {%k1} -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <16 x i32> - %load = load <8 x i64>, <8 x i64>* %__b - %1 = bitcast <8 x i64> %load to <16 x i32> - %2 = icmp eq <16 x i32> %0, %1 - %3 = bitcast i16 %__u to <16 x i1> - %4 = and <16 x i1> %2, %3 - %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> - %6 = bitcast <32 x i1> %5 to i32 - ret i32 %6 -} - - -define zeroext i32 @test_vpcmpeqd_v16i1_v32i1_mask_mem_b(<8 x i64> %__a, i32* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpeqd_v16i1_v32i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k0 -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <16 x i32> - %load = load i32, i32* %__b - %vec = insertelement <16 x i32> undef, i32 %load, i32 0 - %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> - %2 = icmp eq <16 x i32> %0, %1 - %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> - %4 = bitcast <32 x i1> %3 to i32 - ret i32 %4 -} - -define zeroext i32 @test_masked_vpcmpeqd_v16i1_v32i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, i32* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpeqd_v16i1_v32i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k0 {%k1} -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <16 x i32> - %load = load i32, i32* %__b - %vec = insertelement <16 x i32> undef, i32 %load, i32 0 - %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> - %2 = icmp eq <16 x i32> %0, %1 - %3 = bitcast i16 %__u to <16 x i1> - %4 = and <16 x i1> %3, %2 - %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> - %6 = bitcast <32 x i1> %5 to i32 - ret i32 %6 -} - - -define zeroext i64 @test_vpcmpeqd_v16i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpeqd_v16i1_v64i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <16 x i32> - %1 = bitcast <8 x i64> %__b to <16 x i32> - %2 = icmp eq <16 x i32> %0, %1 - %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_vpcmpeqd_v16i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpeqd_v16i1_v64i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpeqd (%rdi), %zmm0, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <16 x i32> - %load = load <8 x i64>, <8 x i64>* %__b - %1 = bitcast <8 x i64> %load to <16 x i32> - %2 = icmp eq <16 x i32> %0, %1 - %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_masked_vpcmpeqd_v16i1_v64i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpeqd_v16i1_v64i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <16 x i32> - %1 = bitcast <8 x i64> %__b to <16 x i32> - %2 = icmp eq <16 x i32> %0, %1 - %3 = bitcast i16 %__u to <16 x i1> - %4 = and <16 x i1> %2, %3 - %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> - %6 = bitcast <64 x i1> %5 to i64 - ret i64 %6 -} - -define zeroext i64 @test_masked_vpcmpeqd_v16i1_v64i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpeqd_v16i1_v64i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpeqd (%rsi), %zmm0, %k0 {%k1} -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <16 x i32> - %load = load <8 x i64>, <8 x i64>* %__b - %1 = bitcast <8 x i64> %load to <16 x i32> - %2 = icmp eq <16 x i32> %0, %1 - %3 = bitcast i16 %__u to <16 x i1> - %4 = and <16 x i1> %2, %3 - %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> - %6 = bitcast <64 x i1> %5 to i64 - ret i64 %6 -} - - -define zeroext i64 @test_vpcmpeqd_v16i1_v64i1_mask_mem_b(<8 x i64> %__a, i32* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpeqd_v16i1_v64i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <16 x i32> - %load = load i32, i32* %__b - %vec = insertelement <16 x i32> undef, i32 %load, i32 0 - %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> - %2 = icmp eq <16 x i32> %0, %1 - %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_masked_vpcmpeqd_v16i1_v64i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, i32* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpeqd_v16i1_v64i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k0 {%k1} -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <16 x i32> - %load = load i32, i32* %__b - %vec = insertelement <16 x i32> undef, i32 %load, i32 0 - %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> - %2 = icmp eq <16 x i32> %0, %1 - %3 = bitcast i16 %__u to <16 x i1> - %4 = and <16 x i1> %3, %2 - %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> - %6 = bitcast <64 x i1> %5 to i64 - ret i64 %6 -} - - -define zeroext i4 @test_vpcmpeqq_v2i1_v4i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpeqq_v2i1_v4i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 -; CHECK-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp) -; CHECK-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %1 = bitcast <2 x i64> %__b to <2 x i64> - %2 = icmp eq <2 x i64> %0, %1 - %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> - %4 = bitcast <4 x i1> %3 to i4 - ret i4 %4 -} - -define zeroext i4 @test_vpcmpeqq_v2i1_v4i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpeqq_v2i1_v4i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpeqq (%rdi), %xmm0, %k0 -; CHECK-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp) -; CHECK-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <2 x i64> - %2 = icmp eq <2 x i64> %0, %1 - %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> - %4 = bitcast <4 x i1> %3 to i4 - ret i4 %4 -} - -define zeroext i4 @test_masked_vpcmpeqq_v2i1_v4i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpeqq_v2i1_v4i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp) -; CHECK-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %1 = bitcast <2 x i64> %__b to <2 x i64> - %2 = icmp eq <2 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> - %4 = and <2 x i1> %2, %extract.i - %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> - %6 = bitcast <4 x i1> %5 to i4 - ret i4 %6 -} - -define zeroext i4 @test_masked_vpcmpeqq_v2i1_v4i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpeqq_v2i1_v4i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpeqq (%rsi), %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp) -; CHECK-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <2 x i64> - %2 = icmp eq <2 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> - %4 = and <2 x i1> %2, %extract.i - %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> - %6 = bitcast <4 x i1> %5 to i4 - ret i4 %6 -} - - -define zeroext i4 @test_vpcmpeqq_v2i1_v4i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpeqq_v2i1_v4i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpeqq (%rdi){1to2}, %xmm0, %k0 -; CHECK-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp) -; CHECK-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %load = load i64, i64* %__b - %vec = insertelement <2 x i64> undef, i64 %load, i32 0 - %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> - %2 = icmp eq <2 x i64> %0, %1 - %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> - %4 = bitcast <4 x i1> %3 to i4 - ret i4 %4 -} - -define zeroext i4 @test_masked_vpcmpeqq_v2i1_v4i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpeqq_v2i1_v4i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpeqq (%rsi){1to2}, %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp) -; CHECK-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %load = load i64, i64* %__b - %vec = insertelement <2 x i64> undef, i64 %load, i32 0 - %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> - %2 = icmp eq <2 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> - %4 = and <2 x i1> %extract.i, %2 - %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> - %6 = bitcast <4 x i1> %5 to i4 - ret i4 %6 -} - - -define zeroext i8 @test_vpcmpeqq_v2i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpeqq_v2i1_v8i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 -; CHECK-NEXT: kmovb %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %1 = bitcast <2 x i64> %__b to <2 x i64> - %2 = icmp eq <2 x i64> %0, %1 - %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> - %4 = bitcast <8 x i1> %3 to i8 - ret i8 %4 -} - -define zeroext i8 @test_vpcmpeqq_v2i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpeqq_v2i1_v8i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpeqq (%rdi), %xmm0, %k0 -; CHECK-NEXT: kmovb %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <2 x i64> - %2 = icmp eq <2 x i64> %0, %1 - %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> - %4 = bitcast <8 x i1> %3 to i8 - ret i8 %4 -} - -define zeroext i8 @test_masked_vpcmpeqq_v2i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpeqq_v2i1_v8i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovb %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %1 = bitcast <2 x i64> %__b to <2 x i64> - %2 = icmp eq <2 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> - %4 = and <2 x i1> %2, %extract.i - %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> - %6 = bitcast <8 x i1> %5 to i8 - ret i8 %6 -} - -define zeroext i8 @test_masked_vpcmpeqq_v2i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpeqq_v2i1_v8i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpeqq (%rsi), %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovb %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <2 x i64> - %2 = icmp eq <2 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> - %4 = and <2 x i1> %2, %extract.i - %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> - %6 = bitcast <8 x i1> %5 to i8 - ret i8 %6 -} - - -define zeroext i8 @test_vpcmpeqq_v2i1_v8i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpeqq_v2i1_v8i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpeqq (%rdi){1to2}, %xmm0, %k0 -; CHECK-NEXT: kmovb %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %load = load i64, i64* %__b - %vec = insertelement <2 x i64> undef, i64 %load, i32 0 - %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> - %2 = icmp eq <2 x i64> %0, %1 - %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> - %4 = bitcast <8 x i1> %3 to i8 - ret i8 %4 -} - -define zeroext i8 @test_masked_vpcmpeqq_v2i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpeqq_v2i1_v8i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpeqq (%rsi){1to2}, %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovb %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %load = load i64, i64* %__b - %vec = insertelement <2 x i64> undef, i64 %load, i32 0 - %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> - %2 = icmp eq <2 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> - %4 = and <2 x i1> %extract.i, %2 - %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> - %6 = bitcast <8 x i1> %5 to i8 - ret i8 %6 -} - - -define zeroext i16 @test_vpcmpeqq_v2i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpeqq_v2i1_v16i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %1 = bitcast <2 x i64> %__b to <2 x i64> - %2 = icmp eq <2 x i64> %0, %1 - %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> - %4 = bitcast <16 x i1> %3 to i16 - ret i16 %4 -} - -define zeroext i16 @test_vpcmpeqq_v2i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpeqq_v2i1_v16i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpeqq (%rdi), %xmm0, %k0 -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <2 x i64> - %2 = icmp eq <2 x i64> %0, %1 - %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> - %4 = bitcast <16 x i1> %3 to i16 - ret i16 %4 -} - -define zeroext i16 @test_masked_vpcmpeqq_v2i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpeqq_v2i1_v16i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %1 = bitcast <2 x i64> %__b to <2 x i64> - %2 = icmp eq <2 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> - %4 = and <2 x i1> %2, %extract.i - %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> - %6 = bitcast <16 x i1> %5 to i16 - ret i16 %6 -} - -define zeroext i16 @test_masked_vpcmpeqq_v2i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpeqq_v2i1_v16i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpeqq (%rsi), %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <2 x i64> - %2 = icmp eq <2 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> - %4 = and <2 x i1> %2, %extract.i - %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> - %6 = bitcast <16 x i1> %5 to i16 - ret i16 %6 -} - - -define zeroext i16 @test_vpcmpeqq_v2i1_v16i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpeqq_v2i1_v16i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpeqq (%rdi){1to2}, %xmm0, %k0 -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %load = load i64, i64* %__b - %vec = insertelement <2 x i64> undef, i64 %load, i32 0 - %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> - %2 = icmp eq <2 x i64> %0, %1 - %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> - %4 = bitcast <16 x i1> %3 to i16 - ret i16 %4 -} - -define zeroext i16 @test_masked_vpcmpeqq_v2i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpeqq_v2i1_v16i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpeqq (%rsi){1to2}, %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %load = load i64, i64* %__b - %vec = insertelement <2 x i64> undef, i64 %load, i32 0 - %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> - %2 = icmp eq <2 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> - %4 = and <2 x i1> %extract.i, %2 - %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> - %6 = bitcast <16 x i1> %5 to i16 - ret i16 %6 -} - - -define zeroext i32 @test_vpcmpeqq_v2i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpeqq_v2i1_v32i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %1 = bitcast <2 x i64> %__b to <2 x i64> - %2 = icmp eq <2 x i64> %0, %1 - %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> - %4 = bitcast <32 x i1> %3 to i32 - ret i32 %4 -} - -define zeroext i32 @test_vpcmpeqq_v2i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpeqq_v2i1_v32i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpeqq (%rdi), %xmm0, %k0 -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <2 x i64> - %2 = icmp eq <2 x i64> %0, %1 - %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> - %4 = bitcast <32 x i1> %3 to i32 - ret i32 %4 -} - -define zeroext i32 @test_masked_vpcmpeqq_v2i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpeqq_v2i1_v32i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %1 = bitcast <2 x i64> %__b to <2 x i64> - %2 = icmp eq <2 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> - %4 = and <2 x i1> %2, %extract.i - %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> - %6 = bitcast <32 x i1> %5 to i32 - ret i32 %6 -} - -define zeroext i32 @test_masked_vpcmpeqq_v2i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpeqq_v2i1_v32i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpeqq (%rsi), %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <2 x i64> - %2 = icmp eq <2 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> - %4 = and <2 x i1> %2, %extract.i - %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> - %6 = bitcast <32 x i1> %5 to i32 - ret i32 %6 -} - - -define zeroext i32 @test_vpcmpeqq_v2i1_v32i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpeqq_v2i1_v32i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpeqq (%rdi){1to2}, %xmm0, %k0 -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %load = load i64, i64* %__b - %vec = insertelement <2 x i64> undef, i64 %load, i32 0 - %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> - %2 = icmp eq <2 x i64> %0, %1 - %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> - %4 = bitcast <32 x i1> %3 to i32 - ret i32 %4 -} - -define zeroext i32 @test_masked_vpcmpeqq_v2i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpeqq_v2i1_v32i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpeqq (%rsi){1to2}, %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %load = load i64, i64* %__b - %vec = insertelement <2 x i64> undef, i64 %load, i32 0 - %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> - %2 = icmp eq <2 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> - %4 = and <2 x i1> %extract.i, %2 - %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> - %6 = bitcast <32 x i1> %5 to i32 - ret i32 %6 -} - - -define zeroext i64 @test_vpcmpeqq_v2i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpeqq_v2i1_v64i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %1 = bitcast <2 x i64> %__b to <2 x i64> - %2 = icmp eq <2 x i64> %0, %1 - %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_vpcmpeqq_v2i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpeqq_v2i1_v64i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpeqq (%rdi), %xmm0, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <2 x i64> - %2 = icmp eq <2 x i64> %0, %1 - %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_masked_vpcmpeqq_v2i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpeqq_v2i1_v64i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %1 = bitcast <2 x i64> %__b to <2 x i64> - %2 = icmp eq <2 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> - %4 = and <2 x i1> %2, %extract.i - %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> - %6 = bitcast <64 x i1> %5 to i64 - ret i64 %6 -} - -define zeroext i64 @test_masked_vpcmpeqq_v2i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpeqq_v2i1_v64i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpeqq (%rsi), %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <2 x i64> - %2 = icmp eq <2 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> - %4 = and <2 x i1> %2, %extract.i - %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> - %6 = bitcast <64 x i1> %5 to i64 - ret i64 %6 -} - - -define zeroext i64 @test_vpcmpeqq_v2i1_v64i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpeqq_v2i1_v64i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpeqq (%rdi){1to2}, %xmm0, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %load = load i64, i64* %__b - %vec = insertelement <2 x i64> undef, i64 %load, i32 0 - %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> - %2 = icmp eq <2 x i64> %0, %1 - %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_masked_vpcmpeqq_v2i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpeqq_v2i1_v64i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpeqq (%rsi){1to2}, %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %load = load i64, i64* %__b - %vec = insertelement <2 x i64> undef, i64 %load, i32 0 - %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> - %2 = icmp eq <2 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> - %4 = and <2 x i1> %extract.i, %2 - %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> - %6 = bitcast <64 x i1> %5 to i64 - ret i64 %6 -} - - -define zeroext i8 @test_vpcmpeqq_v4i1_v8i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpeqq_v4i1_v8i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 -; CHECK-NEXT: kmovb %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <4 x i64> - %1 = bitcast <4 x i64> %__b to <4 x i64> - %2 = icmp eq <4 x i64> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> - %4 = bitcast <8 x i1> %3 to i8 - ret i8 %4 -} - -define zeroext i8 @test_vpcmpeqq_v4i1_v8i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpeqq_v4i1_v8i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpeqq (%rdi), %ymm0, %k0 -; CHECK-NEXT: kmovb %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <4 x i64> - %load = load <4 x i64>, <4 x i64>* %__b - %1 = bitcast <4 x i64> %load to <4 x i64> - %2 = icmp eq <4 x i64> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> - %4 = bitcast <8 x i1> %3 to i8 - ret i8 %4 -} - -define zeroext i8 @test_masked_vpcmpeqq_v4i1_v8i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpeqq_v4i1_v8i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 {%k1} -; CHECK-NEXT: kmovb %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <4 x i64> - %1 = bitcast <4 x i64> %__b to <4 x i64> - %2 = icmp eq <4 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> - %4 = and <4 x i1> %2, %extract.i - %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> - %6 = bitcast <8 x i1> %5 to i8 - ret i8 %6 -} - -define zeroext i8 @test_masked_vpcmpeqq_v4i1_v8i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpeqq_v4i1_v8i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpeqq (%rsi), %ymm0, %k0 {%k1} -; CHECK-NEXT: kmovb %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <4 x i64> - %load = load <4 x i64>, <4 x i64>* %__b - %1 = bitcast <4 x i64> %load to <4 x i64> - %2 = icmp eq <4 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> - %4 = and <4 x i1> %2, %extract.i - %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> - %6 = bitcast <8 x i1> %5 to i8 - ret i8 %6 -} - - -define zeroext i8 @test_vpcmpeqq_v4i1_v8i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpeqq_v4i1_v8i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpeqq (%rdi){1to4}, %ymm0, %k0 -; CHECK-NEXT: kmovb %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <4 x i64> - %load = load i64, i64* %__b - %vec = insertelement <4 x i64> undef, i64 %load, i32 0 - %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> - %2 = icmp eq <4 x i64> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> - %4 = bitcast <8 x i1> %3 to i8 - ret i8 %4 -} - -define zeroext i8 @test_masked_vpcmpeqq_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpeqq_v4i1_v8i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpeqq (%rsi){1to4}, %ymm0, %k0 {%k1} -; CHECK-NEXT: kmovb %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <4 x i64> - %load = load i64, i64* %__b - %vec = insertelement <4 x i64> undef, i64 %load, i32 0 - %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> - %2 = icmp eq <4 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> - %4 = and <4 x i1> %extract.i, %2 - %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> - %6 = bitcast <8 x i1> %5 to i8 - ret i8 %6 -} - - -define zeroext i16 @test_vpcmpeqq_v4i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpeqq_v4i1_v16i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <4 x i64> - %1 = bitcast <4 x i64> %__b to <4 x i64> - %2 = icmp eq <4 x i64> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> - %4 = bitcast <16 x i1> %3 to i16 - ret i16 %4 -} - -define zeroext i16 @test_vpcmpeqq_v4i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpeqq_v4i1_v16i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpeqq (%rdi), %ymm0, %k0 -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <4 x i64> - %load = load <4 x i64>, <4 x i64>* %__b - %1 = bitcast <4 x i64> %load to <4 x i64> - %2 = icmp eq <4 x i64> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> - %4 = bitcast <16 x i1> %3 to i16 - ret i16 %4 -} - -define zeroext i16 @test_masked_vpcmpeqq_v4i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpeqq_v4i1_v16i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 {%k1} -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <4 x i64> - %1 = bitcast <4 x i64> %__b to <4 x i64> - %2 = icmp eq <4 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> - %4 = and <4 x i1> %2, %extract.i - %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> - %6 = bitcast <16 x i1> %5 to i16 - ret i16 %6 -} - -define zeroext i16 @test_masked_vpcmpeqq_v4i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpeqq_v4i1_v16i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpeqq (%rsi), %ymm0, %k0 {%k1} -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <4 x i64> - %load = load <4 x i64>, <4 x i64>* %__b - %1 = bitcast <4 x i64> %load to <4 x i64> - %2 = icmp eq <4 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> - %4 = and <4 x i1> %2, %extract.i - %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> - %6 = bitcast <16 x i1> %5 to i16 - ret i16 %6 -} - - -define zeroext i16 @test_vpcmpeqq_v4i1_v16i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpeqq_v4i1_v16i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpeqq (%rdi){1to4}, %ymm0, %k0 -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <4 x i64> - %load = load i64, i64* %__b - %vec = insertelement <4 x i64> undef, i64 %load, i32 0 - %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> - %2 = icmp eq <4 x i64> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> - %4 = bitcast <16 x i1> %3 to i16 - ret i16 %4 -} - -define zeroext i16 @test_masked_vpcmpeqq_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpeqq_v4i1_v16i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpeqq (%rsi){1to4}, %ymm0, %k0 {%k1} -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <4 x i64> - %load = load i64, i64* %__b - %vec = insertelement <4 x i64> undef, i64 %load, i32 0 - %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> - %2 = icmp eq <4 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> - %4 = and <4 x i1> %extract.i, %2 - %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> - %6 = bitcast <16 x i1> %5 to i16 - ret i16 %6 -} - - -define zeroext i32 @test_vpcmpeqq_v4i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpeqq_v4i1_v32i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <4 x i64> - %1 = bitcast <4 x i64> %__b to <4 x i64> - %2 = icmp eq <4 x i64> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> - %4 = bitcast <32 x i1> %3 to i32 - ret i32 %4 -} - -define zeroext i32 @test_vpcmpeqq_v4i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpeqq_v4i1_v32i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpeqq (%rdi), %ymm0, %k0 -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <4 x i64> - %load = load <4 x i64>, <4 x i64>* %__b - %1 = bitcast <4 x i64> %load to <4 x i64> - %2 = icmp eq <4 x i64> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> - %4 = bitcast <32 x i1> %3 to i32 - ret i32 %4 -} - -define zeroext i32 @test_masked_vpcmpeqq_v4i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpeqq_v4i1_v32i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 {%k1} -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <4 x i64> - %1 = bitcast <4 x i64> %__b to <4 x i64> - %2 = icmp eq <4 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> - %4 = and <4 x i1> %2, %extract.i - %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> - %6 = bitcast <32 x i1> %5 to i32 - ret i32 %6 -} - -define zeroext i32 @test_masked_vpcmpeqq_v4i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpeqq_v4i1_v32i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpeqq (%rsi), %ymm0, %k0 {%k1} -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <4 x i64> - %load = load <4 x i64>, <4 x i64>* %__b - %1 = bitcast <4 x i64> %load to <4 x i64> - %2 = icmp eq <4 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> - %4 = and <4 x i1> %2, %extract.i - %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> - %6 = bitcast <32 x i1> %5 to i32 - ret i32 %6 -} - - -define zeroext i32 @test_vpcmpeqq_v4i1_v32i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpeqq_v4i1_v32i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpeqq (%rdi){1to4}, %ymm0, %k0 -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <4 x i64> - %load = load i64, i64* %__b - %vec = insertelement <4 x i64> undef, i64 %load, i32 0 - %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> - %2 = icmp eq <4 x i64> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> - %4 = bitcast <32 x i1> %3 to i32 - ret i32 %4 -} - -define zeroext i32 @test_masked_vpcmpeqq_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpeqq_v4i1_v32i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpeqq (%rsi){1to4}, %ymm0, %k0 {%k1} -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <4 x i64> - %load = load i64, i64* %__b - %vec = insertelement <4 x i64> undef, i64 %load, i32 0 - %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> - %2 = icmp eq <4 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> - %4 = and <4 x i1> %extract.i, %2 - %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> - %6 = bitcast <32 x i1> %5 to i32 - ret i32 %6 -} - - -define zeroext i64 @test_vpcmpeqq_v4i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpeqq_v4i1_v64i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <4 x i64> - %1 = bitcast <4 x i64> %__b to <4 x i64> - %2 = icmp eq <4 x i64> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_vpcmpeqq_v4i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpeqq_v4i1_v64i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpeqq (%rdi), %ymm0, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <4 x i64> - %load = load <4 x i64>, <4 x i64>* %__b - %1 = bitcast <4 x i64> %load to <4 x i64> - %2 = icmp eq <4 x i64> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_masked_vpcmpeqq_v4i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpeqq_v4i1_v64i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 {%k1} -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <4 x i64> - %1 = bitcast <4 x i64> %__b to <4 x i64> - %2 = icmp eq <4 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> - %4 = and <4 x i1> %2, %extract.i - %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> - %6 = bitcast <64 x i1> %5 to i64 - ret i64 %6 -} - -define zeroext i64 @test_masked_vpcmpeqq_v4i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpeqq_v4i1_v64i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpeqq (%rsi), %ymm0, %k0 {%k1} -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <4 x i64> - %load = load <4 x i64>, <4 x i64>* %__b - %1 = bitcast <4 x i64> %load to <4 x i64> - %2 = icmp eq <4 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> - %4 = and <4 x i1> %2, %extract.i - %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> - %6 = bitcast <64 x i1> %5 to i64 - ret i64 %6 -} - - -define zeroext i64 @test_vpcmpeqq_v4i1_v64i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpeqq_v4i1_v64i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpeqq (%rdi){1to4}, %ymm0, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <4 x i64> - %load = load i64, i64* %__b - %vec = insertelement <4 x i64> undef, i64 %load, i32 0 - %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> - %2 = icmp eq <4 x i64> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_masked_vpcmpeqq_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpeqq_v4i1_v64i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpeqq (%rsi){1to4}, %ymm0, %k0 {%k1} -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <4 x i64> - %load = load i64, i64* %__b - %vec = insertelement <4 x i64> undef, i64 %load, i32 0 - %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> - %2 = icmp eq <4 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> - %4 = and <4 x i1> %extract.i, %2 - %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> - %6 = bitcast <64 x i1> %5 to i64 - ret i64 %6 -} - - -define zeroext i16 @test_vpcmpeqq_v8i1_v16i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpeqq_v8i1_v16i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <8 x i64> - %1 = bitcast <8 x i64> %__b to <8 x i64> - %2 = icmp eq <8 x i64> %0, %1 - %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> - %4 = bitcast <16 x i1> %3 to i16 - ret i16 %4 -} - -define zeroext i16 @test_vpcmpeqq_v8i1_v16i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpeqq_v8i1_v16i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpeqq (%rdi), %zmm0, %k0 -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <8 x i64> - %load = load <8 x i64>, <8 x i64>* %__b - %1 = bitcast <8 x i64> %load to <8 x i64> - %2 = icmp eq <8 x i64> %0, %1 - %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> - %4 = bitcast <16 x i1> %3 to i16 - ret i16 %4 -} - -define zeroext i16 @test_masked_vpcmpeqq_v8i1_v16i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpeqq_v8i1_v16i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <8 x i64> - %1 = bitcast <8 x i64> %__b to <8 x i64> - %2 = icmp eq <8 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %4 = and <8 x i1> %2, %3 - %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> - %6 = bitcast <16 x i1> %5 to i16 - ret i16 %6 -} - -define zeroext i16 @test_masked_vpcmpeqq_v8i1_v16i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpeqq_v8i1_v16i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpeqq (%rsi), %zmm0, %k0 {%k1} -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <8 x i64> - %load = load <8 x i64>, <8 x i64>* %__b - %1 = bitcast <8 x i64> %load to <8 x i64> - %2 = icmp eq <8 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %4 = and <8 x i1> %2, %3 - %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> - %6 = bitcast <16 x i1> %5 to i16 - ret i16 %6 -} - - -define zeroext i16 @test_vpcmpeqq_v8i1_v16i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpeqq_v8i1_v16i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0 -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <8 x i64> - %load = load i64, i64* %__b - %vec = insertelement <8 x i64> undef, i64 %load, i32 0 - %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> - %2 = icmp eq <8 x i64> %0, %1 - %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> - %4 = bitcast <16 x i1> %3 to i16 - ret i16 %4 -} - -define zeroext i16 @test_masked_vpcmpeqq_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpeqq_v8i1_v16i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1} -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <8 x i64> - %load = load i64, i64* %__b - %vec = insertelement <8 x i64> undef, i64 %load, i32 0 - %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> - %2 = icmp eq <8 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %4 = and <8 x i1> %3, %2 - %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> - %6 = bitcast <16 x i1> %5 to i16 - ret i16 %6 -} - - -define zeroext i32 @test_vpcmpeqq_v8i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpeqq_v8i1_v32i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <8 x i64> - %1 = bitcast <8 x i64> %__b to <8 x i64> - %2 = icmp eq <8 x i64> %0, %1 - %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> - %4 = bitcast <32 x i1> %3 to i32 - ret i32 %4 -} - -define zeroext i32 @test_vpcmpeqq_v8i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpeqq_v8i1_v32i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpeqq (%rdi), %zmm0, %k0 -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <8 x i64> - %load = load <8 x i64>, <8 x i64>* %__b - %1 = bitcast <8 x i64> %load to <8 x i64> - %2 = icmp eq <8 x i64> %0, %1 - %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> - %4 = bitcast <32 x i1> %3 to i32 - ret i32 %4 -} - -define zeroext i32 @test_masked_vpcmpeqq_v8i1_v32i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpeqq_v8i1_v32i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <8 x i64> - %1 = bitcast <8 x i64> %__b to <8 x i64> - %2 = icmp eq <8 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %4 = and <8 x i1> %2, %3 - %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> - %6 = bitcast <32 x i1> %5 to i32 - ret i32 %6 -} - -define zeroext i32 @test_masked_vpcmpeqq_v8i1_v32i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpeqq_v8i1_v32i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpeqq (%rsi), %zmm0, %k0 {%k1} -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <8 x i64> - %load = load <8 x i64>, <8 x i64>* %__b - %1 = bitcast <8 x i64> %load to <8 x i64> - %2 = icmp eq <8 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %4 = and <8 x i1> %2, %3 - %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> - %6 = bitcast <32 x i1> %5 to i32 - ret i32 %6 -} - - -define zeroext i32 @test_vpcmpeqq_v8i1_v32i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpeqq_v8i1_v32i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0 -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <8 x i64> - %load = load i64, i64* %__b - %vec = insertelement <8 x i64> undef, i64 %load, i32 0 - %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> - %2 = icmp eq <8 x i64> %0, %1 - %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> - %4 = bitcast <32 x i1> %3 to i32 - ret i32 %4 -} - -define zeroext i32 @test_masked_vpcmpeqq_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpeqq_v8i1_v32i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1} -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <8 x i64> - %load = load i64, i64* %__b - %vec = insertelement <8 x i64> undef, i64 %load, i32 0 - %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> - %2 = icmp eq <8 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %4 = and <8 x i1> %3, %2 - %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> - %6 = bitcast <32 x i1> %5 to i32 - ret i32 %6 -} - - -define zeroext i64 @test_vpcmpeqq_v8i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpeqq_v8i1_v64i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <8 x i64> - %1 = bitcast <8 x i64> %__b to <8 x i64> - %2 = icmp eq <8 x i64> %0, %1 - %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_vpcmpeqq_v8i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpeqq_v8i1_v64i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpeqq (%rdi), %zmm0, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <8 x i64> - %load = load <8 x i64>, <8 x i64>* %__b - %1 = bitcast <8 x i64> %load to <8 x i64> - %2 = icmp eq <8 x i64> %0, %1 - %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_masked_vpcmpeqq_v8i1_v64i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpeqq_v8i1_v64i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <8 x i64> - %1 = bitcast <8 x i64> %__b to <8 x i64> - %2 = icmp eq <8 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %4 = and <8 x i1> %2, %3 - %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> - %6 = bitcast <64 x i1> %5 to i64 - ret i64 %6 -} - -define zeroext i64 @test_masked_vpcmpeqq_v8i1_v64i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpeqq_v8i1_v64i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpeqq (%rsi), %zmm0, %k0 {%k1} -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <8 x i64> - %load = load <8 x i64>, <8 x i64>* %__b - %1 = bitcast <8 x i64> %load to <8 x i64> - %2 = icmp eq <8 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %4 = and <8 x i1> %2, %3 - %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> - %6 = bitcast <64 x i1> %5 to i64 - ret i64 %6 -} - - -define zeroext i64 @test_vpcmpeqq_v8i1_v64i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpeqq_v8i1_v64i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <8 x i64> - %load = load i64, i64* %__b - %vec = insertelement <8 x i64> undef, i64 %load, i32 0 - %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> - %2 = icmp eq <8 x i64> %0, %1 - %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_masked_vpcmpeqq_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpeqq_v8i1_v64i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1} -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <8 x i64> - %load = load i64, i64* %__b - %vec = insertelement <8 x i64> undef, i64 %load, i32 0 - %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> - %2 = icmp eq <8 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %4 = and <8 x i1> %3, %2 - %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> - %6 = bitcast <64 x i1> %5 to i64 - ret i64 %6 -} - - -define zeroext i32 @test_vpcmpsgtb_v16i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgtb_v16i1_v32i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpgtb %xmm1, %xmm0, %k0 -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <16 x i8> - %1 = bitcast <2 x i64> %__b to <16 x i8> - %2 = icmp sgt <16 x i8> %0, %1 - %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> - %4 = bitcast <32 x i1> %3 to i32 - ret i32 %4 -} - -define zeroext i32 @test_vpcmpsgtb_v16i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgtb_v16i1_v32i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpgtb (%rdi), %xmm0, %k0 -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <16 x i8> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <16 x i8> - %2 = icmp sgt <16 x i8> %0, %1 - %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> - %4 = bitcast <32 x i1> %3 to i32 - ret i32 %4 -} - -define zeroext i32 @test_masked_vpcmpsgtb_v16i1_v32i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgtb_v16i1_v32i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpgtb %xmm1, %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <16 x i8> - %1 = bitcast <2 x i64> %__b to <16 x i8> - %2 = icmp sgt <16 x i8> %0, %1 - %3 = bitcast i16 %__u to <16 x i1> - %4 = and <16 x i1> %2, %3 - %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> - %6 = bitcast <32 x i1> %5 to i32 - ret i32 %6 -} - -define zeroext i32 @test_masked_vpcmpsgtb_v16i1_v32i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgtb_v16i1_v32i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpgtb (%rsi), %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <16 x i8> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <16 x i8> - %2 = icmp sgt <16 x i8> %0, %1 - %3 = bitcast i16 %__u to <16 x i1> - %4 = and <16 x i1> %2, %3 - %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> - %6 = bitcast <32 x i1> %5 to i32 - ret i32 %6 -} - - -define zeroext i64 @test_vpcmpsgtb_v16i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgtb_v16i1_v64i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpgtb %xmm1, %xmm0, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <16 x i8> - %1 = bitcast <2 x i64> %__b to <16 x i8> - %2 = icmp sgt <16 x i8> %0, %1 - %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_vpcmpsgtb_v16i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgtb_v16i1_v64i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpgtb (%rdi), %xmm0, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <16 x i8> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <16 x i8> - %2 = icmp sgt <16 x i8> %0, %1 - %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_masked_vpcmpsgtb_v16i1_v64i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgtb_v16i1_v64i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpgtb %xmm1, %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <16 x i8> - %1 = bitcast <2 x i64> %__b to <16 x i8> - %2 = icmp sgt <16 x i8> %0, %1 - %3 = bitcast i16 %__u to <16 x i1> - %4 = and <16 x i1> %2, %3 - %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> - %6 = bitcast <64 x i1> %5 to i64 - ret i64 %6 -} - -define zeroext i64 @test_masked_vpcmpsgtb_v16i1_v64i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgtb_v16i1_v64i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpgtb (%rsi), %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <16 x i8> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <16 x i8> - %2 = icmp sgt <16 x i8> %0, %1 - %3 = bitcast i16 %__u to <16 x i1> - %4 = and <16 x i1> %2, %3 - %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> - %6 = bitcast <64 x i1> %5 to i64 - ret i64 %6 -} - - -define zeroext i64 @test_vpcmpsgtb_v32i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgtb_v32i1_v64i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpgtb %ymm1, %ymm0, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <32 x i8> - %1 = bitcast <4 x i64> %__b to <32 x i8> - %2 = icmp sgt <32 x i8> %0, %1 - %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_vpcmpsgtb_v32i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgtb_v32i1_v64i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpgtb (%rdi), %ymm0, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <32 x i8> - %load = load <4 x i64>, <4 x i64>* %__b - %1 = bitcast <4 x i64> %load to <32 x i8> - %2 = icmp sgt <32 x i8> %0, %1 - %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_masked_vpcmpsgtb_v32i1_v64i1_mask(i32 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgtb_v32i1_v64i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpgtb %ymm1, %ymm0, %k0 {%k1} -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <32 x i8> - %1 = bitcast <4 x i64> %__b to <32 x i8> - %2 = icmp sgt <32 x i8> %0, %1 - %3 = bitcast i32 %__u to <32 x i1> - %4 = and <32 x i1> %2, %3 - %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> - %6 = bitcast <64 x i1> %5 to i64 - ret i64 %6 -} - -define zeroext i64 @test_masked_vpcmpsgtb_v32i1_v64i1_mask_mem(i32 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgtb_v32i1_v64i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpgtb (%rsi), %ymm0, %k0 {%k1} -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <32 x i8> - %load = load <4 x i64>, <4 x i64>* %__b - %1 = bitcast <4 x i64> %load to <32 x i8> - %2 = icmp sgt <32 x i8> %0, %1 - %3 = bitcast i32 %__u to <32 x i1> - %4 = and <32 x i1> %2, %3 - %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> - %6 = bitcast <64 x i1> %5 to i64 - ret i64 %6 -} - - -define zeroext i16 @test_vpcmpsgtw_v8i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgtw_v8i1_v16i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpgtw %xmm1, %xmm0, %k0 -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <8 x i16> - %1 = bitcast <2 x i64> %__b to <8 x i16> - %2 = icmp sgt <8 x i16> %0, %1 - %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> - %4 = bitcast <16 x i1> %3 to i16 - ret i16 %4 -} - -define zeroext i16 @test_vpcmpsgtw_v8i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgtw_v8i1_v16i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpgtw (%rdi), %xmm0, %k0 -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <8 x i16> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <8 x i16> - %2 = icmp sgt <8 x i16> %0, %1 - %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> - %4 = bitcast <16 x i1> %3 to i16 - ret i16 %4 -} - -define zeroext i16 @test_masked_vpcmpsgtw_v8i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgtw_v8i1_v16i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpgtw %xmm1, %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <8 x i16> - %1 = bitcast <2 x i64> %__b to <8 x i16> - %2 = icmp sgt <8 x i16> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %4 = and <8 x i1> %2, %3 - %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> - %6 = bitcast <16 x i1> %5 to i16 - ret i16 %6 -} - -define zeroext i16 @test_masked_vpcmpsgtw_v8i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgtw_v8i1_v16i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpgtw (%rsi), %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <8 x i16> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <8 x i16> - %2 = icmp sgt <8 x i16> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %4 = and <8 x i1> %2, %3 - %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> - %6 = bitcast <16 x i1> %5 to i16 - ret i16 %6 -} - - -define zeroext i32 @test_vpcmpsgtw_v8i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgtw_v8i1_v32i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpgtw %xmm1, %xmm0, %k0 -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <8 x i16> - %1 = bitcast <2 x i64> %__b to <8 x i16> - %2 = icmp sgt <8 x i16> %0, %1 - %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> - %4 = bitcast <32 x i1> %3 to i32 - ret i32 %4 -} - -define zeroext i32 @test_vpcmpsgtw_v8i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgtw_v8i1_v32i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpgtw (%rdi), %xmm0, %k0 -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <8 x i16> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <8 x i16> - %2 = icmp sgt <8 x i16> %0, %1 - %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> - %4 = bitcast <32 x i1> %3 to i32 - ret i32 %4 -} - -define zeroext i32 @test_masked_vpcmpsgtw_v8i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgtw_v8i1_v32i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpgtw %xmm1, %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <8 x i16> - %1 = bitcast <2 x i64> %__b to <8 x i16> - %2 = icmp sgt <8 x i16> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %4 = and <8 x i1> %2, %3 - %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> - %6 = bitcast <32 x i1> %5 to i32 - ret i32 %6 -} - -define zeroext i32 @test_masked_vpcmpsgtw_v8i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgtw_v8i1_v32i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpgtw (%rsi), %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <8 x i16> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <8 x i16> - %2 = icmp sgt <8 x i16> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %4 = and <8 x i1> %2, %3 - %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> - %6 = bitcast <32 x i1> %5 to i32 - ret i32 %6 -} - - -define zeroext i64 @test_vpcmpsgtw_v8i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgtw_v8i1_v64i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpgtw %xmm1, %xmm0, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <8 x i16> - %1 = bitcast <2 x i64> %__b to <8 x i16> - %2 = icmp sgt <8 x i16> %0, %1 - %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_vpcmpsgtw_v8i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgtw_v8i1_v64i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpgtw (%rdi), %xmm0, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <8 x i16> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <8 x i16> - %2 = icmp sgt <8 x i16> %0, %1 - %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_masked_vpcmpsgtw_v8i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgtw_v8i1_v64i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpgtw %xmm1, %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <8 x i16> - %1 = bitcast <2 x i64> %__b to <8 x i16> - %2 = icmp sgt <8 x i16> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %4 = and <8 x i1> %2, %3 - %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> - %6 = bitcast <64 x i1> %5 to i64 - ret i64 %6 -} - -define zeroext i64 @test_masked_vpcmpsgtw_v8i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgtw_v8i1_v64i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpgtw (%rsi), %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <8 x i16> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <8 x i16> - %2 = icmp sgt <8 x i16> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %4 = and <8 x i1> %2, %3 - %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> - %6 = bitcast <64 x i1> %5 to i64 - ret i64 %6 -} - - -define zeroext i32 @test_vpcmpsgtw_v16i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgtw_v16i1_v32i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpgtw %ymm1, %ymm0, %k0 -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <16 x i16> - %1 = bitcast <4 x i64> %__b to <16 x i16> - %2 = icmp sgt <16 x i16> %0, %1 - %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> - %4 = bitcast <32 x i1> %3 to i32 - ret i32 %4 -} - -define zeroext i32 @test_vpcmpsgtw_v16i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgtw_v16i1_v32i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpgtw (%rdi), %ymm0, %k0 -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <16 x i16> - %load = load <4 x i64>, <4 x i64>* %__b - %1 = bitcast <4 x i64> %load to <16 x i16> - %2 = icmp sgt <16 x i16> %0, %1 - %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> - %4 = bitcast <32 x i1> %3 to i32 - ret i32 %4 -} - -define zeroext i32 @test_masked_vpcmpsgtw_v16i1_v32i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgtw_v16i1_v32i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpgtw %ymm1, %ymm0, %k0 {%k1} -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <16 x i16> - %1 = bitcast <4 x i64> %__b to <16 x i16> - %2 = icmp sgt <16 x i16> %0, %1 - %3 = bitcast i16 %__u to <16 x i1> - %4 = and <16 x i1> %2, %3 - %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> - %6 = bitcast <32 x i1> %5 to i32 - ret i32 %6 -} - -define zeroext i32 @test_masked_vpcmpsgtw_v16i1_v32i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgtw_v16i1_v32i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpgtw (%rsi), %ymm0, %k0 {%k1} -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <16 x i16> - %load = load <4 x i64>, <4 x i64>* %__b - %1 = bitcast <4 x i64> %load to <16 x i16> - %2 = icmp sgt <16 x i16> %0, %1 - %3 = bitcast i16 %__u to <16 x i1> - %4 = and <16 x i1> %2, %3 - %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> - %6 = bitcast <32 x i1> %5 to i32 - ret i32 %6 -} - - -define zeroext i64 @test_vpcmpsgtw_v16i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgtw_v16i1_v64i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpgtw %ymm1, %ymm0, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <16 x i16> - %1 = bitcast <4 x i64> %__b to <16 x i16> - %2 = icmp sgt <16 x i16> %0, %1 - %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_vpcmpsgtw_v16i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgtw_v16i1_v64i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpgtw (%rdi), %ymm0, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <16 x i16> - %load = load <4 x i64>, <4 x i64>* %__b - %1 = bitcast <4 x i64> %load to <16 x i16> - %2 = icmp sgt <16 x i16> %0, %1 - %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_masked_vpcmpsgtw_v16i1_v64i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgtw_v16i1_v64i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpgtw %ymm1, %ymm0, %k0 {%k1} -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <16 x i16> - %1 = bitcast <4 x i64> %__b to <16 x i16> - %2 = icmp sgt <16 x i16> %0, %1 - %3 = bitcast i16 %__u to <16 x i1> - %4 = and <16 x i1> %2, %3 - %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> - %6 = bitcast <64 x i1> %5 to i64 - ret i64 %6 -} - -define zeroext i64 @test_masked_vpcmpsgtw_v16i1_v64i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgtw_v16i1_v64i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpgtw (%rsi), %ymm0, %k0 {%k1} -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <16 x i16> - %load = load <4 x i64>, <4 x i64>* %__b - %1 = bitcast <4 x i64> %load to <16 x i16> - %2 = icmp sgt <16 x i16> %0, %1 - %3 = bitcast i16 %__u to <16 x i1> - %4 = and <16 x i1> %2, %3 - %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> - %6 = bitcast <64 x i1> %5 to i64 - ret i64 %6 -} - - -define zeroext i64 @test_vpcmpsgtw_v32i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgtw_v32i1_v64i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpgtw %zmm1, %zmm0, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <32 x i16> - %1 = bitcast <8 x i64> %__b to <32 x i16> - %2 = icmp sgt <32 x i16> %0, %1 - %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_vpcmpsgtw_v32i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgtw_v32i1_v64i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpgtw (%rdi), %zmm0, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <32 x i16> - %load = load <8 x i64>, <8 x i64>* %__b - %1 = bitcast <8 x i64> %load to <32 x i16> - %2 = icmp sgt <32 x i16> %0, %1 - %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_masked_vpcmpsgtw_v32i1_v64i1_mask(i32 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgtw_v32i1_v64i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpgtw %zmm1, %zmm0, %k0 {%k1} -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <32 x i16> - %1 = bitcast <8 x i64> %__b to <32 x i16> - %2 = icmp sgt <32 x i16> %0, %1 - %3 = bitcast i32 %__u to <32 x i1> - %4 = and <32 x i1> %2, %3 - %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> - %6 = bitcast <64 x i1> %5 to i64 - ret i64 %6 -} - -define zeroext i64 @test_masked_vpcmpsgtw_v32i1_v64i1_mask_mem(i32 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgtw_v32i1_v64i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpgtw (%rsi), %zmm0, %k0 {%k1} -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <32 x i16> - %load = load <8 x i64>, <8 x i64>* %__b - %1 = bitcast <8 x i64> %load to <32 x i16> - %2 = icmp sgt <32 x i16> %0, %1 - %3 = bitcast i32 %__u to <32 x i1> - %4 = and <32 x i1> %2, %3 - %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> - %6 = bitcast <64 x i1> %5 to i64 - ret i64 %6 -} - - -define zeroext i8 @test_vpcmpsgtd_v4i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgtd_v4i1_v8i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 -; CHECK-NEXT: kmovb %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <4 x i32> - %1 = bitcast <2 x i64> %__b to <4 x i32> - %2 = icmp sgt <4 x i32> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> - %4 = bitcast <8 x i1> %3 to i8 - ret i8 %4 -} - -define zeroext i8 @test_vpcmpsgtd_v4i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgtd_v4i1_v8i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpgtd (%rdi), %xmm0, %k0 -; CHECK-NEXT: kmovb %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <4 x i32> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <4 x i32> - %2 = icmp sgt <4 x i32> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> - %4 = bitcast <8 x i1> %3 to i8 - ret i8 %4 -} - -define zeroext i8 @test_masked_vpcmpsgtd_v4i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgtd_v4i1_v8i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovb %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <4 x i32> - %1 = bitcast <2 x i64> %__b to <4 x i32> - %2 = icmp sgt <4 x i32> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> - %4 = and <4 x i1> %2, %extract.i - %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> - %6 = bitcast <8 x i1> %5 to i8 - ret i8 %6 -} - -define zeroext i8 @test_masked_vpcmpsgtd_v4i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgtd_v4i1_v8i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpgtd (%rsi), %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovb %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <4 x i32> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <4 x i32> - %2 = icmp sgt <4 x i32> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> - %4 = and <4 x i1> %2, %extract.i - %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> - %6 = bitcast <8 x i1> %5 to i8 - ret i8 %6 -} - - -define zeroext i8 @test_vpcmpsgtd_v4i1_v8i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgtd_v4i1_v8i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpgtd (%rdi){1to4}, %xmm0, %k0 -; CHECK-NEXT: kmovb %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <4 x i32> - %load = load i32, i32* %__b - %vec = insertelement <4 x i32> undef, i32 %load, i32 0 - %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> - %2 = icmp sgt <4 x i32> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> - %4 = bitcast <8 x i1> %3 to i8 - ret i8 %4 -} - -define zeroext i8 @test_masked_vpcmpsgtd_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgtd_v4i1_v8i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpgtd (%rsi){1to4}, %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovb %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <4 x i32> - %load = load i32, i32* %__b - %vec = insertelement <4 x i32> undef, i32 %load, i32 0 - %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> - %2 = icmp sgt <4 x i32> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> - %4 = and <4 x i1> %extract.i, %2 - %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> - %6 = bitcast <8 x i1> %5 to i8 - ret i8 %6 -} - - -define zeroext i16 @test_vpcmpsgtd_v4i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgtd_v4i1_v16i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <4 x i32> - %1 = bitcast <2 x i64> %__b to <4 x i32> - %2 = icmp sgt <4 x i32> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> - %4 = bitcast <16 x i1> %3 to i16 - ret i16 %4 -} - -define zeroext i16 @test_vpcmpsgtd_v4i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgtd_v4i1_v16i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpgtd (%rdi), %xmm0, %k0 -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <4 x i32> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <4 x i32> - %2 = icmp sgt <4 x i32> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> - %4 = bitcast <16 x i1> %3 to i16 - ret i16 %4 -} - -define zeroext i16 @test_masked_vpcmpsgtd_v4i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgtd_v4i1_v16i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <4 x i32> - %1 = bitcast <2 x i64> %__b to <4 x i32> - %2 = icmp sgt <4 x i32> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> - %4 = and <4 x i1> %2, %extract.i - %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> - %6 = bitcast <16 x i1> %5 to i16 - ret i16 %6 -} - -define zeroext i16 @test_masked_vpcmpsgtd_v4i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgtd_v4i1_v16i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpgtd (%rsi), %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <4 x i32> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <4 x i32> - %2 = icmp sgt <4 x i32> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> - %4 = and <4 x i1> %2, %extract.i - %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> - %6 = bitcast <16 x i1> %5 to i16 - ret i16 %6 -} - - -define zeroext i16 @test_vpcmpsgtd_v4i1_v16i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgtd_v4i1_v16i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpgtd (%rdi){1to4}, %xmm0, %k0 -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <4 x i32> - %load = load i32, i32* %__b - %vec = insertelement <4 x i32> undef, i32 %load, i32 0 - %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> - %2 = icmp sgt <4 x i32> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> - %4 = bitcast <16 x i1> %3 to i16 - ret i16 %4 -} - -define zeroext i16 @test_masked_vpcmpsgtd_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgtd_v4i1_v16i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpgtd (%rsi){1to4}, %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <4 x i32> - %load = load i32, i32* %__b - %vec = insertelement <4 x i32> undef, i32 %load, i32 0 - %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> - %2 = icmp sgt <4 x i32> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> - %4 = and <4 x i1> %extract.i, %2 - %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> - %6 = bitcast <16 x i1> %5 to i16 - ret i16 %6 -} - - -define zeroext i32 @test_vpcmpsgtd_v4i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgtd_v4i1_v32i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <4 x i32> - %1 = bitcast <2 x i64> %__b to <4 x i32> - %2 = icmp sgt <4 x i32> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> - %4 = bitcast <32 x i1> %3 to i32 - ret i32 %4 -} - -define zeroext i32 @test_vpcmpsgtd_v4i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgtd_v4i1_v32i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpgtd (%rdi), %xmm0, %k0 -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <4 x i32> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <4 x i32> - %2 = icmp sgt <4 x i32> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> - %4 = bitcast <32 x i1> %3 to i32 - ret i32 %4 -} - -define zeroext i32 @test_masked_vpcmpsgtd_v4i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgtd_v4i1_v32i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <4 x i32> - %1 = bitcast <2 x i64> %__b to <4 x i32> - %2 = icmp sgt <4 x i32> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> - %4 = and <4 x i1> %2, %extract.i - %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> - %6 = bitcast <32 x i1> %5 to i32 - ret i32 %6 -} - -define zeroext i32 @test_masked_vpcmpsgtd_v4i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgtd_v4i1_v32i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpgtd (%rsi), %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <4 x i32> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <4 x i32> - %2 = icmp sgt <4 x i32> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> - %4 = and <4 x i1> %2, %extract.i - %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> - %6 = bitcast <32 x i1> %5 to i32 - ret i32 %6 -} - - -define zeroext i32 @test_vpcmpsgtd_v4i1_v32i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgtd_v4i1_v32i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpgtd (%rdi){1to4}, %xmm0, %k0 -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <4 x i32> - %load = load i32, i32* %__b - %vec = insertelement <4 x i32> undef, i32 %load, i32 0 - %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> - %2 = icmp sgt <4 x i32> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> - %4 = bitcast <32 x i1> %3 to i32 - ret i32 %4 -} - -define zeroext i32 @test_masked_vpcmpsgtd_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgtd_v4i1_v32i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpgtd (%rsi){1to4}, %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <4 x i32> - %load = load i32, i32* %__b - %vec = insertelement <4 x i32> undef, i32 %load, i32 0 - %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> - %2 = icmp sgt <4 x i32> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> - %4 = and <4 x i1> %extract.i, %2 - %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> - %6 = bitcast <32 x i1> %5 to i32 - ret i32 %6 -} - - -define zeroext i64 @test_vpcmpsgtd_v4i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgtd_v4i1_v64i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <4 x i32> - %1 = bitcast <2 x i64> %__b to <4 x i32> - %2 = icmp sgt <4 x i32> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_vpcmpsgtd_v4i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgtd_v4i1_v64i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpgtd (%rdi), %xmm0, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <4 x i32> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <4 x i32> - %2 = icmp sgt <4 x i32> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_masked_vpcmpsgtd_v4i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgtd_v4i1_v64i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <4 x i32> - %1 = bitcast <2 x i64> %__b to <4 x i32> - %2 = icmp sgt <4 x i32> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> - %4 = and <4 x i1> %2, %extract.i - %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> - %6 = bitcast <64 x i1> %5 to i64 - ret i64 %6 -} - -define zeroext i64 @test_masked_vpcmpsgtd_v4i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgtd_v4i1_v64i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpgtd (%rsi), %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <4 x i32> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <4 x i32> - %2 = icmp sgt <4 x i32> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> - %4 = and <4 x i1> %2, %extract.i - %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> - %6 = bitcast <64 x i1> %5 to i64 - ret i64 %6 -} - - -define zeroext i64 @test_vpcmpsgtd_v4i1_v64i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgtd_v4i1_v64i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpgtd (%rdi){1to4}, %xmm0, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <4 x i32> - %load = load i32, i32* %__b - %vec = insertelement <4 x i32> undef, i32 %load, i32 0 - %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> - %2 = icmp sgt <4 x i32> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_masked_vpcmpsgtd_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgtd_v4i1_v64i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpgtd (%rsi){1to4}, %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <4 x i32> - %load = load i32, i32* %__b - %vec = insertelement <4 x i32> undef, i32 %load, i32 0 - %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> - %2 = icmp sgt <4 x i32> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> - %4 = and <4 x i1> %extract.i, %2 - %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> - %6 = bitcast <64 x i1> %5 to i64 - ret i64 %6 -} - - -define zeroext i16 @test_vpcmpsgtd_v8i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgtd_v8i1_v16i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -; -; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v16i1_mask: -; NoVLX: ## BB#0: ## %entry -; NoVLX-NEXT: ## kill: %YMM1 %YMM1 %ZMM1 -; NoVLX-NEXT: ## kill: %YMM0 %YMM0 %ZMM0 -; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftlw $8, %k0, %k0 -; NoVLX-NEXT: kshiftrw $8, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <8 x i32> - %1 = bitcast <4 x i64> %__b to <8 x i32> - %2 = icmp sgt <8 x i32> %0, %1 - %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> - %4 = bitcast <16 x i1> %3 to i16 - ret i16 %4 -} - -define zeroext i16 @test_vpcmpsgtd_v8i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgtd_v8i1_v16i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpgtd (%rdi), %ymm0, %k0 -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -; -; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v16i1_mask_mem: -; NoVLX: ## BB#0: ## %entry -; NoVLX-NEXT: ## kill: %YMM0 %YMM0 %ZMM0 -; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 -; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftlw $8, %k0, %k0 -; NoVLX-NEXT: kshiftrw $8, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <8 x i32> - %load = load <4 x i64>, <4 x i64>* %__b - %1 = bitcast <4 x i64> %load to <8 x i32> - %2 = icmp sgt <8 x i32> %0, %1 - %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> - %4 = bitcast <16 x i1> %3 to i16 - ret i16 %4 -} - -define zeroext i16 @test_masked_vpcmpsgtd_v8i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgtd_v8i1_v16i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 {%k1} -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -; -; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v16i1_mask: -; NoVLX: ## BB#0: ## %entry -; NoVLX-NEXT: ## kill: %YMM1 %YMM1 %ZMM1 -; NoVLX-NEXT: ## kill: %YMM0 %YMM0 %ZMM0 -; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftlw $8, %k0, %k0 -; NoVLX-NEXT: kshiftrw $8, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <8 x i32> - %1 = bitcast <4 x i64> %__b to <8 x i32> - %2 = icmp sgt <8 x i32> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %4 = and <8 x i1> %2, %3 - %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> - %6 = bitcast <16 x i1> %5 to i16 - ret i16 %6 -} - -define zeroext i16 @test_masked_vpcmpsgtd_v8i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgtd_v8i1_v16i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpgtd (%rsi), %ymm0, %k0 {%k1} -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -; -; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v16i1_mask_mem: -; NoVLX: ## BB#0: ## %entry -; NoVLX-NEXT: ## kill: %YMM0 %YMM0 %ZMM0 -; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 -; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftlw $8, %k0, %k0 -; NoVLX-NEXT: kshiftrw $8, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <8 x i32> - %load = load <4 x i64>, <4 x i64>* %__b - %1 = bitcast <4 x i64> %load to <8 x i32> - %2 = icmp sgt <8 x i32> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %4 = and <8 x i1> %2, %3 - %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> - %6 = bitcast <16 x i1> %5 to i16 - ret i16 %6 -} - - -define zeroext i16 @test_vpcmpsgtd_v8i1_v16i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgtd_v8i1_v16i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpgtd (%rdi){1to8}, %ymm0, %k0 -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -; -; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v16i1_mask_mem_b: -; NoVLX: ## BB#0: ## %entry -; NoVLX-NEXT: ## kill: %YMM0 %YMM0 %ZMM0 -; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1 -; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftlw $8, %k0, %k0 -; NoVLX-NEXT: kshiftrw $8, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <8 x i32> - %load = load i32, i32* %__b - %vec = insertelement <8 x i32> undef, i32 %load, i32 0 - %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> - %2 = icmp sgt <8 x i32> %0, %1 - %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> - %4 = bitcast <16 x i1> %3 to i16 - ret i16 %4 -} - -define zeroext i16 @test_masked_vpcmpsgtd_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgtd_v8i1_v16i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpgtd (%rsi){1to8}, %ymm0, %k0 {%k1} -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -; -; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v16i1_mask_mem_b: -; NoVLX: ## BB#0: ## %entry -; NoVLX-NEXT: ## kill: %YMM0 %YMM0 %ZMM0 -; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1 -; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftlw $8, %k0, %k0 -; NoVLX-NEXT: kshiftrw $8, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <8 x i32> - %load = load i32, i32* %__b - %vec = insertelement <8 x i32> undef, i32 %load, i32 0 - %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> - %2 = icmp sgt <8 x i32> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %4 = and <8 x i1> %3, %2 - %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> - %6 = bitcast <16 x i1> %5 to i16 - ret i16 %6 -} - - -define zeroext i32 @test_vpcmpsgtd_v8i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgtd_v8i1_v32i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <8 x i32> - %1 = bitcast <4 x i64> %__b to <8 x i32> - %2 = icmp sgt <8 x i32> %0, %1 - %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> - %4 = bitcast <32 x i1> %3 to i32 - ret i32 %4 -} - -define zeroext i32 @test_vpcmpsgtd_v8i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgtd_v8i1_v32i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpgtd (%rdi), %ymm0, %k0 -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <8 x i32> - %load = load <4 x i64>, <4 x i64>* %__b - %1 = bitcast <4 x i64> %load to <8 x i32> - %2 = icmp sgt <8 x i32> %0, %1 - %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> - %4 = bitcast <32 x i1> %3 to i32 - ret i32 %4 -} - -define zeroext i32 @test_masked_vpcmpsgtd_v8i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgtd_v8i1_v32i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 {%k1} -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <8 x i32> - %1 = bitcast <4 x i64> %__b to <8 x i32> - %2 = icmp sgt <8 x i32> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %4 = and <8 x i1> %2, %3 - %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> - %6 = bitcast <32 x i1> %5 to i32 - ret i32 %6 -} - -define zeroext i32 @test_masked_vpcmpsgtd_v8i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgtd_v8i1_v32i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpgtd (%rsi), %ymm0, %k0 {%k1} -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <8 x i32> - %load = load <4 x i64>, <4 x i64>* %__b - %1 = bitcast <4 x i64> %load to <8 x i32> - %2 = icmp sgt <8 x i32> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %4 = and <8 x i1> %2, %3 - %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> - %6 = bitcast <32 x i1> %5 to i32 - ret i32 %6 -} - - -define zeroext i32 @test_vpcmpsgtd_v8i1_v32i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgtd_v8i1_v32i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpgtd (%rdi){1to8}, %ymm0, %k0 -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <8 x i32> - %load = load i32, i32* %__b - %vec = insertelement <8 x i32> undef, i32 %load, i32 0 - %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> - %2 = icmp sgt <8 x i32> %0, %1 - %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> - %4 = bitcast <32 x i1> %3 to i32 - ret i32 %4 -} - -define zeroext i32 @test_masked_vpcmpsgtd_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgtd_v8i1_v32i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpgtd (%rsi){1to8}, %ymm0, %k0 {%k1} -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <8 x i32> - %load = load i32, i32* %__b - %vec = insertelement <8 x i32> undef, i32 %load, i32 0 - %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> - %2 = icmp sgt <8 x i32> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %4 = and <8 x i1> %3, %2 - %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> - %6 = bitcast <32 x i1> %5 to i32 - ret i32 %6 -} - - -define zeroext i64 @test_vpcmpsgtd_v8i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgtd_v8i1_v64i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <8 x i32> - %1 = bitcast <4 x i64> %__b to <8 x i32> - %2 = icmp sgt <8 x i32> %0, %1 - %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_vpcmpsgtd_v8i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgtd_v8i1_v64i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpgtd (%rdi), %ymm0, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <8 x i32> - %load = load <4 x i64>, <4 x i64>* %__b - %1 = bitcast <4 x i64> %load to <8 x i32> - %2 = icmp sgt <8 x i32> %0, %1 - %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_masked_vpcmpsgtd_v8i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgtd_v8i1_v64i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 {%k1} -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <8 x i32> - %1 = bitcast <4 x i64> %__b to <8 x i32> - %2 = icmp sgt <8 x i32> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %4 = and <8 x i1> %2, %3 - %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> - %6 = bitcast <64 x i1> %5 to i64 - ret i64 %6 -} - -define zeroext i64 @test_masked_vpcmpsgtd_v8i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgtd_v8i1_v64i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpgtd (%rsi), %ymm0, %k0 {%k1} -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <8 x i32> - %load = load <4 x i64>, <4 x i64>* %__b - %1 = bitcast <4 x i64> %load to <8 x i32> - %2 = icmp sgt <8 x i32> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %4 = and <8 x i1> %2, %3 - %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> - %6 = bitcast <64 x i1> %5 to i64 - ret i64 %6 -} - - -define zeroext i64 @test_vpcmpsgtd_v8i1_v64i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgtd_v8i1_v64i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpgtd (%rdi){1to8}, %ymm0, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <8 x i32> - %load = load i32, i32* %__b - %vec = insertelement <8 x i32> undef, i32 %load, i32 0 - %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> - %2 = icmp sgt <8 x i32> %0, %1 - %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_masked_vpcmpsgtd_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgtd_v8i1_v64i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpgtd (%rsi){1to8}, %ymm0, %k0 {%k1} -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <8 x i32> - %load = load i32, i32* %__b - %vec = insertelement <8 x i32> undef, i32 %load, i32 0 - %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> - %2 = icmp sgt <8 x i32> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %4 = and <8 x i1> %3, %2 - %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> - %6 = bitcast <64 x i1> %5 to i64 - ret i64 %6 -} - - -define zeroext i32 @test_vpcmpsgtd_v16i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgtd_v16i1_v32i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <16 x i32> - %1 = bitcast <8 x i64> %__b to <16 x i32> - %2 = icmp sgt <16 x i32> %0, %1 - %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> - %4 = bitcast <32 x i1> %3 to i32 - ret i32 %4 -} - -define zeroext i32 @test_vpcmpsgtd_v16i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgtd_v16i1_v32i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpgtd (%rdi), %zmm0, %k0 -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <16 x i32> - %load = load <8 x i64>, <8 x i64>* %__b - %1 = bitcast <8 x i64> %load to <16 x i32> - %2 = icmp sgt <16 x i32> %0, %1 - %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> - %4 = bitcast <32 x i1> %3 to i32 - ret i32 %4 -} - -define zeroext i32 @test_masked_vpcmpsgtd_v16i1_v32i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgtd_v16i1_v32i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <16 x i32> - %1 = bitcast <8 x i64> %__b to <16 x i32> - %2 = icmp sgt <16 x i32> %0, %1 - %3 = bitcast i16 %__u to <16 x i1> - %4 = and <16 x i1> %2, %3 - %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> - %6 = bitcast <32 x i1> %5 to i32 - ret i32 %6 -} - -define zeroext i32 @test_masked_vpcmpsgtd_v16i1_v32i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgtd_v16i1_v32i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpgtd (%rsi), %zmm0, %k0 {%k1} -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <16 x i32> - %load = load <8 x i64>, <8 x i64>* %__b - %1 = bitcast <8 x i64> %load to <16 x i32> - %2 = icmp sgt <16 x i32> %0, %1 - %3 = bitcast i16 %__u to <16 x i1> - %4 = and <16 x i1> %2, %3 - %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> - %6 = bitcast <32 x i1> %5 to i32 - ret i32 %6 -} - - -define zeroext i32 @test_vpcmpsgtd_v16i1_v32i1_mask_mem_b(<8 x i64> %__a, i32* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgtd_v16i1_v32i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k0 -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <16 x i32> - %load = load i32, i32* %__b - %vec = insertelement <16 x i32> undef, i32 %load, i32 0 - %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> - %2 = icmp sgt <16 x i32> %0, %1 - %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> - %4 = bitcast <32 x i1> %3 to i32 - ret i32 %4 -} - -define zeroext i32 @test_masked_vpcmpsgtd_v16i1_v32i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, i32* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgtd_v16i1_v32i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k0 {%k1} -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <16 x i32> - %load = load i32, i32* %__b - %vec = insertelement <16 x i32> undef, i32 %load, i32 0 - %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> - %2 = icmp sgt <16 x i32> %0, %1 - %3 = bitcast i16 %__u to <16 x i1> - %4 = and <16 x i1> %3, %2 - %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> - %6 = bitcast <32 x i1> %5 to i32 - ret i32 %6 -} - - -define zeroext i64 @test_vpcmpsgtd_v16i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgtd_v16i1_v64i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <16 x i32> - %1 = bitcast <8 x i64> %__b to <16 x i32> - %2 = icmp sgt <16 x i32> %0, %1 - %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_vpcmpsgtd_v16i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgtd_v16i1_v64i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpgtd (%rdi), %zmm0, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <16 x i32> - %load = load <8 x i64>, <8 x i64>* %__b - %1 = bitcast <8 x i64> %load to <16 x i32> - %2 = icmp sgt <16 x i32> %0, %1 - %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_masked_vpcmpsgtd_v16i1_v64i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgtd_v16i1_v64i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <16 x i32> - %1 = bitcast <8 x i64> %__b to <16 x i32> - %2 = icmp sgt <16 x i32> %0, %1 - %3 = bitcast i16 %__u to <16 x i1> - %4 = and <16 x i1> %2, %3 - %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> - %6 = bitcast <64 x i1> %5 to i64 - ret i64 %6 -} - -define zeroext i64 @test_masked_vpcmpsgtd_v16i1_v64i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgtd_v16i1_v64i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpgtd (%rsi), %zmm0, %k0 {%k1} -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <16 x i32> - %load = load <8 x i64>, <8 x i64>* %__b - %1 = bitcast <8 x i64> %load to <16 x i32> - %2 = icmp sgt <16 x i32> %0, %1 - %3 = bitcast i16 %__u to <16 x i1> - %4 = and <16 x i1> %2, %3 - %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> - %6 = bitcast <64 x i1> %5 to i64 - ret i64 %6 -} - - -define zeroext i64 @test_vpcmpsgtd_v16i1_v64i1_mask_mem_b(<8 x i64> %__a, i32* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgtd_v16i1_v64i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <16 x i32> - %load = load i32, i32* %__b - %vec = insertelement <16 x i32> undef, i32 %load, i32 0 - %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> - %2 = icmp sgt <16 x i32> %0, %1 - %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_masked_vpcmpsgtd_v16i1_v64i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, i32* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgtd_v16i1_v64i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k0 {%k1} -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <16 x i32> - %load = load i32, i32* %__b - %vec = insertelement <16 x i32> undef, i32 %load, i32 0 - %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> - %2 = icmp sgt <16 x i32> %0, %1 - %3 = bitcast i16 %__u to <16 x i1> - %4 = and <16 x i1> %3, %2 - %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> - %6 = bitcast <64 x i1> %5 to i64 - ret i64 %6 -} - - -define zeroext i4 @test_vpcmpsgtq_v2i1_v4i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgtq_v2i1_v4i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 -; CHECK-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp) -; CHECK-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %1 = bitcast <2 x i64> %__b to <2 x i64> - %2 = icmp sgt <2 x i64> %0, %1 - %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> - %4 = bitcast <4 x i1> %3 to i4 - ret i4 %4 -} - -define zeroext i4 @test_vpcmpsgtq_v2i1_v4i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgtq_v2i1_v4i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpgtq (%rdi), %xmm0, %k0 -; CHECK-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp) -; CHECK-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <2 x i64> - %2 = icmp sgt <2 x i64> %0, %1 - %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> - %4 = bitcast <4 x i1> %3 to i4 - ret i4 %4 -} - -define zeroext i4 @test_masked_vpcmpsgtq_v2i1_v4i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgtq_v2i1_v4i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp) -; CHECK-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %1 = bitcast <2 x i64> %__b to <2 x i64> - %2 = icmp sgt <2 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> - %4 = and <2 x i1> %2, %extract.i - %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> - %6 = bitcast <4 x i1> %5 to i4 - ret i4 %6 -} - -define zeroext i4 @test_masked_vpcmpsgtq_v2i1_v4i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgtq_v2i1_v4i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpgtq (%rsi), %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp) -; CHECK-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <2 x i64> - %2 = icmp sgt <2 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> - %4 = and <2 x i1> %2, %extract.i - %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> - %6 = bitcast <4 x i1> %5 to i4 - ret i4 %6 -} - - -define zeroext i4 @test_vpcmpsgtq_v2i1_v4i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgtq_v2i1_v4i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpgtq (%rdi){1to2}, %xmm0, %k0 -; CHECK-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp) -; CHECK-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %load = load i64, i64* %__b - %vec = insertelement <2 x i64> undef, i64 %load, i32 0 - %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> - %2 = icmp sgt <2 x i64> %0, %1 - %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> - %4 = bitcast <4 x i1> %3 to i4 - ret i4 %4 -} - -define zeroext i4 @test_masked_vpcmpsgtq_v2i1_v4i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgtq_v2i1_v4i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpgtq (%rsi){1to2}, %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp) -; CHECK-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %load = load i64, i64* %__b - %vec = insertelement <2 x i64> undef, i64 %load, i32 0 - %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> - %2 = icmp sgt <2 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> - %4 = and <2 x i1> %extract.i, %2 - %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> - %6 = bitcast <4 x i1> %5 to i4 - ret i4 %6 -} - - -define zeroext i8 @test_vpcmpsgtq_v2i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgtq_v2i1_v8i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 -; CHECK-NEXT: kmovb %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %1 = bitcast <2 x i64> %__b to <2 x i64> - %2 = icmp sgt <2 x i64> %0, %1 - %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> - %4 = bitcast <8 x i1> %3 to i8 - ret i8 %4 -} - -define zeroext i8 @test_vpcmpsgtq_v2i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgtq_v2i1_v8i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpgtq (%rdi), %xmm0, %k0 -; CHECK-NEXT: kmovb %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <2 x i64> - %2 = icmp sgt <2 x i64> %0, %1 - %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> - %4 = bitcast <8 x i1> %3 to i8 - ret i8 %4 -} - -define zeroext i8 @test_masked_vpcmpsgtq_v2i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgtq_v2i1_v8i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovb %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %1 = bitcast <2 x i64> %__b to <2 x i64> - %2 = icmp sgt <2 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> - %4 = and <2 x i1> %2, %extract.i - %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> - %6 = bitcast <8 x i1> %5 to i8 - ret i8 %6 -} - -define zeroext i8 @test_masked_vpcmpsgtq_v2i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgtq_v2i1_v8i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpgtq (%rsi), %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovb %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <2 x i64> - %2 = icmp sgt <2 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> - %4 = and <2 x i1> %2, %extract.i - %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> - %6 = bitcast <8 x i1> %5 to i8 - ret i8 %6 -} - - -define zeroext i8 @test_vpcmpsgtq_v2i1_v8i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgtq_v2i1_v8i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpgtq (%rdi){1to2}, %xmm0, %k0 -; CHECK-NEXT: kmovb %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %load = load i64, i64* %__b - %vec = insertelement <2 x i64> undef, i64 %load, i32 0 - %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> - %2 = icmp sgt <2 x i64> %0, %1 - %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> - %4 = bitcast <8 x i1> %3 to i8 - ret i8 %4 -} - -define zeroext i8 @test_masked_vpcmpsgtq_v2i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgtq_v2i1_v8i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpgtq (%rsi){1to2}, %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovb %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %load = load i64, i64* %__b - %vec = insertelement <2 x i64> undef, i64 %load, i32 0 - %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> - %2 = icmp sgt <2 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> - %4 = and <2 x i1> %extract.i, %2 - %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> - %6 = bitcast <8 x i1> %5 to i8 - ret i8 %6 -} - - -define zeroext i16 @test_vpcmpsgtq_v2i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgtq_v2i1_v16i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %1 = bitcast <2 x i64> %__b to <2 x i64> - %2 = icmp sgt <2 x i64> %0, %1 - %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> - %4 = bitcast <16 x i1> %3 to i16 - ret i16 %4 -} - -define zeroext i16 @test_vpcmpsgtq_v2i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgtq_v2i1_v16i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpgtq (%rdi), %xmm0, %k0 -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <2 x i64> - %2 = icmp sgt <2 x i64> %0, %1 - %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> - %4 = bitcast <16 x i1> %3 to i16 - ret i16 %4 -} - -define zeroext i16 @test_masked_vpcmpsgtq_v2i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgtq_v2i1_v16i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %1 = bitcast <2 x i64> %__b to <2 x i64> - %2 = icmp sgt <2 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> - %4 = and <2 x i1> %2, %extract.i - %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> - %6 = bitcast <16 x i1> %5 to i16 - ret i16 %6 -} - -define zeroext i16 @test_masked_vpcmpsgtq_v2i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgtq_v2i1_v16i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpgtq (%rsi), %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <2 x i64> - %2 = icmp sgt <2 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> - %4 = and <2 x i1> %2, %extract.i - %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> - %6 = bitcast <16 x i1> %5 to i16 - ret i16 %6 -} - - -define zeroext i16 @test_vpcmpsgtq_v2i1_v16i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgtq_v2i1_v16i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpgtq (%rdi){1to2}, %xmm0, %k0 -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %load = load i64, i64* %__b - %vec = insertelement <2 x i64> undef, i64 %load, i32 0 - %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> - %2 = icmp sgt <2 x i64> %0, %1 - %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> - %4 = bitcast <16 x i1> %3 to i16 - ret i16 %4 -} - -define zeroext i16 @test_masked_vpcmpsgtq_v2i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgtq_v2i1_v16i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpgtq (%rsi){1to2}, %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %load = load i64, i64* %__b - %vec = insertelement <2 x i64> undef, i64 %load, i32 0 - %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> - %2 = icmp sgt <2 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> - %4 = and <2 x i1> %extract.i, %2 - %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> - %6 = bitcast <16 x i1> %5 to i16 - ret i16 %6 -} - - -define zeroext i32 @test_vpcmpsgtq_v2i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgtq_v2i1_v32i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %1 = bitcast <2 x i64> %__b to <2 x i64> - %2 = icmp sgt <2 x i64> %0, %1 - %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> - %4 = bitcast <32 x i1> %3 to i32 - ret i32 %4 -} - -define zeroext i32 @test_vpcmpsgtq_v2i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgtq_v2i1_v32i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpgtq (%rdi), %xmm0, %k0 -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <2 x i64> - %2 = icmp sgt <2 x i64> %0, %1 - %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> - %4 = bitcast <32 x i1> %3 to i32 - ret i32 %4 -} - -define zeroext i32 @test_masked_vpcmpsgtq_v2i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgtq_v2i1_v32i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %1 = bitcast <2 x i64> %__b to <2 x i64> - %2 = icmp sgt <2 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> - %4 = and <2 x i1> %2, %extract.i - %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> - %6 = bitcast <32 x i1> %5 to i32 - ret i32 %6 -} - -define zeroext i32 @test_masked_vpcmpsgtq_v2i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgtq_v2i1_v32i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpgtq (%rsi), %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <2 x i64> - %2 = icmp sgt <2 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> - %4 = and <2 x i1> %2, %extract.i - %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> - %6 = bitcast <32 x i1> %5 to i32 - ret i32 %6 -} - - -define zeroext i32 @test_vpcmpsgtq_v2i1_v32i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgtq_v2i1_v32i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpgtq (%rdi){1to2}, %xmm0, %k0 -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %load = load i64, i64* %__b - %vec = insertelement <2 x i64> undef, i64 %load, i32 0 - %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> - %2 = icmp sgt <2 x i64> %0, %1 - %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> - %4 = bitcast <32 x i1> %3 to i32 - ret i32 %4 -} - -define zeroext i32 @test_masked_vpcmpsgtq_v2i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgtq_v2i1_v32i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpgtq (%rsi){1to2}, %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %load = load i64, i64* %__b - %vec = insertelement <2 x i64> undef, i64 %load, i32 0 - %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> - %2 = icmp sgt <2 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> - %4 = and <2 x i1> %extract.i, %2 - %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> - %6 = bitcast <32 x i1> %5 to i32 - ret i32 %6 -} - - -define zeroext i64 @test_vpcmpsgtq_v2i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgtq_v2i1_v64i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %1 = bitcast <2 x i64> %__b to <2 x i64> - %2 = icmp sgt <2 x i64> %0, %1 - %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_vpcmpsgtq_v2i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgtq_v2i1_v64i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpgtq (%rdi), %xmm0, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <2 x i64> - %2 = icmp sgt <2 x i64> %0, %1 - %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_masked_vpcmpsgtq_v2i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgtq_v2i1_v64i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %1 = bitcast <2 x i64> %__b to <2 x i64> - %2 = icmp sgt <2 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> - %4 = and <2 x i1> %2, %extract.i - %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> - %6 = bitcast <64 x i1> %5 to i64 - ret i64 %6 -} - -define zeroext i64 @test_masked_vpcmpsgtq_v2i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgtq_v2i1_v64i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpgtq (%rsi), %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <2 x i64> - %2 = icmp sgt <2 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> - %4 = and <2 x i1> %2, %extract.i - %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> - %6 = bitcast <64 x i1> %5 to i64 - ret i64 %6 -} - - -define zeroext i64 @test_vpcmpsgtq_v2i1_v64i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgtq_v2i1_v64i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpgtq (%rdi){1to2}, %xmm0, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %load = load i64, i64* %__b - %vec = insertelement <2 x i64> undef, i64 %load, i32 0 - %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> - %2 = icmp sgt <2 x i64> %0, %1 - %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_masked_vpcmpsgtq_v2i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgtq_v2i1_v64i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpgtq (%rsi){1to2}, %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %load = load i64, i64* %__b - %vec = insertelement <2 x i64> undef, i64 %load, i32 0 - %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> - %2 = icmp sgt <2 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> - %4 = and <2 x i1> %extract.i, %2 - %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> - %6 = bitcast <64 x i1> %5 to i64 - ret i64 %6 -} - - -define zeroext i8 @test_vpcmpsgtq_v4i1_v8i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgtq_v4i1_v8i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 -; CHECK-NEXT: kmovb %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <4 x i64> - %1 = bitcast <4 x i64> %__b to <4 x i64> - %2 = icmp sgt <4 x i64> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> - %4 = bitcast <8 x i1> %3 to i8 - ret i8 %4 -} - -define zeroext i8 @test_vpcmpsgtq_v4i1_v8i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgtq_v4i1_v8i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpgtq (%rdi), %ymm0, %k0 -; CHECK-NEXT: kmovb %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <4 x i64> - %load = load <4 x i64>, <4 x i64>* %__b - %1 = bitcast <4 x i64> %load to <4 x i64> - %2 = icmp sgt <4 x i64> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> - %4 = bitcast <8 x i1> %3 to i8 - ret i8 %4 -} - -define zeroext i8 @test_masked_vpcmpsgtq_v4i1_v8i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgtq_v4i1_v8i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 {%k1} -; CHECK-NEXT: kmovb %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <4 x i64> - %1 = bitcast <4 x i64> %__b to <4 x i64> - %2 = icmp sgt <4 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> - %4 = and <4 x i1> %2, %extract.i - %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> - %6 = bitcast <8 x i1> %5 to i8 - ret i8 %6 -} - -define zeroext i8 @test_masked_vpcmpsgtq_v4i1_v8i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgtq_v4i1_v8i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpgtq (%rsi), %ymm0, %k0 {%k1} -; CHECK-NEXT: kmovb %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <4 x i64> - %load = load <4 x i64>, <4 x i64>* %__b - %1 = bitcast <4 x i64> %load to <4 x i64> - %2 = icmp sgt <4 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> - %4 = and <4 x i1> %2, %extract.i - %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> - %6 = bitcast <8 x i1> %5 to i8 - ret i8 %6 -} - - -define zeroext i8 @test_vpcmpsgtq_v4i1_v8i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgtq_v4i1_v8i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpgtq (%rdi){1to4}, %ymm0, %k0 -; CHECK-NEXT: kmovb %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <4 x i64> - %load = load i64, i64* %__b - %vec = insertelement <4 x i64> undef, i64 %load, i32 0 - %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> - %2 = icmp sgt <4 x i64> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> - %4 = bitcast <8 x i1> %3 to i8 - ret i8 %4 -} - -define zeroext i8 @test_masked_vpcmpsgtq_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgtq_v4i1_v8i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpgtq (%rsi){1to4}, %ymm0, %k0 {%k1} -; CHECK-NEXT: kmovb %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <4 x i64> - %load = load i64, i64* %__b - %vec = insertelement <4 x i64> undef, i64 %load, i32 0 - %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> - %2 = icmp sgt <4 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> - %4 = and <4 x i1> %extract.i, %2 - %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> - %6 = bitcast <8 x i1> %5 to i8 - ret i8 %6 -} - - -define zeroext i16 @test_vpcmpsgtq_v4i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgtq_v4i1_v16i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <4 x i64> - %1 = bitcast <4 x i64> %__b to <4 x i64> - %2 = icmp sgt <4 x i64> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> - %4 = bitcast <16 x i1> %3 to i16 - ret i16 %4 -} - -define zeroext i16 @test_vpcmpsgtq_v4i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgtq_v4i1_v16i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpgtq (%rdi), %ymm0, %k0 -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <4 x i64> - %load = load <4 x i64>, <4 x i64>* %__b - %1 = bitcast <4 x i64> %load to <4 x i64> - %2 = icmp sgt <4 x i64> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> - %4 = bitcast <16 x i1> %3 to i16 - ret i16 %4 -} - -define zeroext i16 @test_masked_vpcmpsgtq_v4i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgtq_v4i1_v16i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 {%k1} -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <4 x i64> - %1 = bitcast <4 x i64> %__b to <4 x i64> - %2 = icmp sgt <4 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> - %4 = and <4 x i1> %2, %extract.i - %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> - %6 = bitcast <16 x i1> %5 to i16 - ret i16 %6 -} - -define zeroext i16 @test_masked_vpcmpsgtq_v4i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgtq_v4i1_v16i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpgtq (%rsi), %ymm0, %k0 {%k1} -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <4 x i64> - %load = load <4 x i64>, <4 x i64>* %__b - %1 = bitcast <4 x i64> %load to <4 x i64> - %2 = icmp sgt <4 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> - %4 = and <4 x i1> %2, %extract.i - %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> - %6 = bitcast <16 x i1> %5 to i16 - ret i16 %6 -} - - -define zeroext i16 @test_vpcmpsgtq_v4i1_v16i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgtq_v4i1_v16i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpgtq (%rdi){1to4}, %ymm0, %k0 -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <4 x i64> - %load = load i64, i64* %__b - %vec = insertelement <4 x i64> undef, i64 %load, i32 0 - %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> - %2 = icmp sgt <4 x i64> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> - %4 = bitcast <16 x i1> %3 to i16 - ret i16 %4 -} - -define zeroext i16 @test_masked_vpcmpsgtq_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgtq_v4i1_v16i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpgtq (%rsi){1to4}, %ymm0, %k0 {%k1} -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <4 x i64> - %load = load i64, i64* %__b - %vec = insertelement <4 x i64> undef, i64 %load, i32 0 - %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> - %2 = icmp sgt <4 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> - %4 = and <4 x i1> %extract.i, %2 - %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> - %6 = bitcast <16 x i1> %5 to i16 - ret i16 %6 -} - - -define zeroext i32 @test_vpcmpsgtq_v4i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgtq_v4i1_v32i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <4 x i64> - %1 = bitcast <4 x i64> %__b to <4 x i64> - %2 = icmp sgt <4 x i64> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> - %4 = bitcast <32 x i1> %3 to i32 - ret i32 %4 -} - -define zeroext i32 @test_vpcmpsgtq_v4i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgtq_v4i1_v32i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpgtq (%rdi), %ymm0, %k0 -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <4 x i64> - %load = load <4 x i64>, <4 x i64>* %__b - %1 = bitcast <4 x i64> %load to <4 x i64> - %2 = icmp sgt <4 x i64> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> - %4 = bitcast <32 x i1> %3 to i32 - ret i32 %4 -} - -define zeroext i32 @test_masked_vpcmpsgtq_v4i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgtq_v4i1_v32i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 {%k1} -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <4 x i64> - %1 = bitcast <4 x i64> %__b to <4 x i64> - %2 = icmp sgt <4 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> - %4 = and <4 x i1> %2, %extract.i - %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> - %6 = bitcast <32 x i1> %5 to i32 - ret i32 %6 -} - -define zeroext i32 @test_masked_vpcmpsgtq_v4i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgtq_v4i1_v32i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpgtq (%rsi), %ymm0, %k0 {%k1} -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <4 x i64> - %load = load <4 x i64>, <4 x i64>* %__b - %1 = bitcast <4 x i64> %load to <4 x i64> - %2 = icmp sgt <4 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> - %4 = and <4 x i1> %2, %extract.i - %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> - %6 = bitcast <32 x i1> %5 to i32 - ret i32 %6 -} - - -define zeroext i32 @test_vpcmpsgtq_v4i1_v32i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgtq_v4i1_v32i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpgtq (%rdi){1to4}, %ymm0, %k0 -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <4 x i64> - %load = load i64, i64* %__b - %vec = insertelement <4 x i64> undef, i64 %load, i32 0 - %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> - %2 = icmp sgt <4 x i64> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> - %4 = bitcast <32 x i1> %3 to i32 - ret i32 %4 -} - -define zeroext i32 @test_masked_vpcmpsgtq_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgtq_v4i1_v32i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpgtq (%rsi){1to4}, %ymm0, %k0 {%k1} -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <4 x i64> - %load = load i64, i64* %__b - %vec = insertelement <4 x i64> undef, i64 %load, i32 0 - %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> - %2 = icmp sgt <4 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> - %4 = and <4 x i1> %extract.i, %2 - %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> - %6 = bitcast <32 x i1> %5 to i32 - ret i32 %6 -} - - -define zeroext i64 @test_vpcmpsgtq_v4i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgtq_v4i1_v64i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <4 x i64> - %1 = bitcast <4 x i64> %__b to <4 x i64> - %2 = icmp sgt <4 x i64> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_vpcmpsgtq_v4i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgtq_v4i1_v64i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpgtq (%rdi), %ymm0, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <4 x i64> - %load = load <4 x i64>, <4 x i64>* %__b - %1 = bitcast <4 x i64> %load to <4 x i64> - %2 = icmp sgt <4 x i64> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_masked_vpcmpsgtq_v4i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgtq_v4i1_v64i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 {%k1} -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <4 x i64> - %1 = bitcast <4 x i64> %__b to <4 x i64> - %2 = icmp sgt <4 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> - %4 = and <4 x i1> %2, %extract.i - %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> - %6 = bitcast <64 x i1> %5 to i64 - ret i64 %6 -} - -define zeroext i64 @test_masked_vpcmpsgtq_v4i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgtq_v4i1_v64i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpgtq (%rsi), %ymm0, %k0 {%k1} -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <4 x i64> - %load = load <4 x i64>, <4 x i64>* %__b - %1 = bitcast <4 x i64> %load to <4 x i64> - %2 = icmp sgt <4 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> - %4 = and <4 x i1> %2, %extract.i - %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> - %6 = bitcast <64 x i1> %5 to i64 - ret i64 %6 -} - - -define zeroext i64 @test_vpcmpsgtq_v4i1_v64i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgtq_v4i1_v64i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpgtq (%rdi){1to4}, %ymm0, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <4 x i64> - %load = load i64, i64* %__b - %vec = insertelement <4 x i64> undef, i64 %load, i32 0 - %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> - %2 = icmp sgt <4 x i64> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_masked_vpcmpsgtq_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgtq_v4i1_v64i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpgtq (%rsi){1to4}, %ymm0, %k0 {%k1} -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <4 x i64> - %load = load i64, i64* %__b - %vec = insertelement <4 x i64> undef, i64 %load, i32 0 - %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> - %2 = icmp sgt <4 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> - %4 = and <4 x i1> %extract.i, %2 - %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> - %6 = bitcast <64 x i1> %5 to i64 - ret i64 %6 -} - - -define zeroext i16 @test_vpcmpsgtq_v8i1_v16i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgtq_v8i1_v16i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <8 x i64> - %1 = bitcast <8 x i64> %__b to <8 x i64> - %2 = icmp sgt <8 x i64> %0, %1 - %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> - %4 = bitcast <16 x i1> %3 to i16 - ret i16 %4 -} - -define zeroext i16 @test_vpcmpsgtq_v8i1_v16i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgtq_v8i1_v16i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpgtq (%rdi), %zmm0, %k0 -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <8 x i64> - %load = load <8 x i64>, <8 x i64>* %__b - %1 = bitcast <8 x i64> %load to <8 x i64> - %2 = icmp sgt <8 x i64> %0, %1 - %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> - %4 = bitcast <16 x i1> %3 to i16 - ret i16 %4 -} - -define zeroext i16 @test_masked_vpcmpsgtq_v8i1_v16i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgtq_v8i1_v16i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <8 x i64> - %1 = bitcast <8 x i64> %__b to <8 x i64> - %2 = icmp sgt <8 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %4 = and <8 x i1> %2, %3 - %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> - %6 = bitcast <16 x i1> %5 to i16 - ret i16 %6 -} - -define zeroext i16 @test_masked_vpcmpsgtq_v8i1_v16i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgtq_v8i1_v16i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpgtq (%rsi), %zmm0, %k0 {%k1} -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <8 x i64> - %load = load <8 x i64>, <8 x i64>* %__b - %1 = bitcast <8 x i64> %load to <8 x i64> - %2 = icmp sgt <8 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %4 = and <8 x i1> %2, %3 - %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> - %6 = bitcast <16 x i1> %5 to i16 - ret i16 %6 -} - - -define zeroext i16 @test_vpcmpsgtq_v8i1_v16i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgtq_v8i1_v16i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0 -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <8 x i64> - %load = load i64, i64* %__b - %vec = insertelement <8 x i64> undef, i64 %load, i32 0 - %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> - %2 = icmp sgt <8 x i64> %0, %1 - %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> - %4 = bitcast <16 x i1> %3 to i16 - ret i16 %4 -} - -define zeroext i16 @test_masked_vpcmpsgtq_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgtq_v8i1_v16i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1} -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <8 x i64> - %load = load i64, i64* %__b - %vec = insertelement <8 x i64> undef, i64 %load, i32 0 - %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> - %2 = icmp sgt <8 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %4 = and <8 x i1> %3, %2 - %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> - %6 = bitcast <16 x i1> %5 to i16 - ret i16 %6 -} - - -define zeroext i32 @test_vpcmpsgtq_v8i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgtq_v8i1_v32i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <8 x i64> - %1 = bitcast <8 x i64> %__b to <8 x i64> - %2 = icmp sgt <8 x i64> %0, %1 - %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> - %4 = bitcast <32 x i1> %3 to i32 - ret i32 %4 -} - -define zeroext i32 @test_vpcmpsgtq_v8i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgtq_v8i1_v32i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpgtq (%rdi), %zmm0, %k0 -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <8 x i64> - %load = load <8 x i64>, <8 x i64>* %__b - %1 = bitcast <8 x i64> %load to <8 x i64> - %2 = icmp sgt <8 x i64> %0, %1 - %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> - %4 = bitcast <32 x i1> %3 to i32 - ret i32 %4 -} - -define zeroext i32 @test_masked_vpcmpsgtq_v8i1_v32i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgtq_v8i1_v32i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <8 x i64> - %1 = bitcast <8 x i64> %__b to <8 x i64> - %2 = icmp sgt <8 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %4 = and <8 x i1> %2, %3 - %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> - %6 = bitcast <32 x i1> %5 to i32 - ret i32 %6 -} - -define zeroext i32 @test_masked_vpcmpsgtq_v8i1_v32i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgtq_v8i1_v32i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpgtq (%rsi), %zmm0, %k0 {%k1} -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <8 x i64> - %load = load <8 x i64>, <8 x i64>* %__b - %1 = bitcast <8 x i64> %load to <8 x i64> - %2 = icmp sgt <8 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %4 = and <8 x i1> %2, %3 - %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> - %6 = bitcast <32 x i1> %5 to i32 - ret i32 %6 -} - - -define zeroext i32 @test_vpcmpsgtq_v8i1_v32i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgtq_v8i1_v32i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0 -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <8 x i64> - %load = load i64, i64* %__b - %vec = insertelement <8 x i64> undef, i64 %load, i32 0 - %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> - %2 = icmp sgt <8 x i64> %0, %1 - %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> - %4 = bitcast <32 x i1> %3 to i32 - ret i32 %4 -} - -define zeroext i32 @test_masked_vpcmpsgtq_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgtq_v8i1_v32i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1} -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <8 x i64> - %load = load i64, i64* %__b - %vec = insertelement <8 x i64> undef, i64 %load, i32 0 - %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> - %2 = icmp sgt <8 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %4 = and <8 x i1> %3, %2 - %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> - %6 = bitcast <32 x i1> %5 to i32 - ret i32 %6 -} - - -define zeroext i64 @test_vpcmpsgtq_v8i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgtq_v8i1_v64i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <8 x i64> - %1 = bitcast <8 x i64> %__b to <8 x i64> - %2 = icmp sgt <8 x i64> %0, %1 - %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_vpcmpsgtq_v8i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgtq_v8i1_v64i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpgtq (%rdi), %zmm0, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <8 x i64> - %load = load <8 x i64>, <8 x i64>* %__b - %1 = bitcast <8 x i64> %load to <8 x i64> - %2 = icmp sgt <8 x i64> %0, %1 - %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_masked_vpcmpsgtq_v8i1_v64i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgtq_v8i1_v64i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <8 x i64> - %1 = bitcast <8 x i64> %__b to <8 x i64> - %2 = icmp sgt <8 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %4 = and <8 x i1> %2, %3 - %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> - %6 = bitcast <64 x i1> %5 to i64 - ret i64 %6 -} - -define zeroext i64 @test_masked_vpcmpsgtq_v8i1_v64i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgtq_v8i1_v64i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpgtq (%rsi), %zmm0, %k0 {%k1} -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <8 x i64> - %load = load <8 x i64>, <8 x i64>* %__b - %1 = bitcast <8 x i64> %load to <8 x i64> - %2 = icmp sgt <8 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %4 = and <8 x i1> %2, %3 - %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> - %6 = bitcast <64 x i1> %5 to i64 - ret i64 %6 -} - - -define zeroext i64 @test_vpcmpsgtq_v8i1_v64i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgtq_v8i1_v64i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <8 x i64> - %load = load i64, i64* %__b - %vec = insertelement <8 x i64> undef, i64 %load, i32 0 - %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> - %2 = icmp sgt <8 x i64> %0, %1 - %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_masked_vpcmpsgtq_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgtq_v8i1_v64i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1} -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <8 x i64> - %load = load i64, i64* %__b - %vec = insertelement <8 x i64> undef, i64 %load, i32 0 - %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> - %2 = icmp sgt <8 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %4 = and <8 x i1> %3, %2 - %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> - %6 = bitcast <64 x i1> %5 to i64 - ret i64 %6 -} - - -define zeroext i32 @test_vpcmpsgeb_v16i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgeb_v16i1_v32i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpleb %xmm0, %xmm1, %k0 -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <16 x i8> - %1 = bitcast <2 x i64> %__b to <16 x i8> - %2 = icmp sge <16 x i8> %0, %1 - %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> - %4 = bitcast <32 x i1> %3 to i32 - ret i32 %4 -} - -define zeroext i32 @test_vpcmpsgeb_v16i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgeb_v16i1_v32i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpnltb (%rdi), %xmm0, %k0 -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <16 x i8> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <16 x i8> - %2 = icmp sge <16 x i8> %0, %1 - %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> - %4 = bitcast <32 x i1> %3 to i32 - ret i32 %4 -} - -define zeroext i32 @test_masked_vpcmpsgeb_v16i1_v32i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgeb_v16i1_v32i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpleb %xmm0, %xmm1, %k0 {%k1} -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <16 x i8> - %1 = bitcast <2 x i64> %__b to <16 x i8> - %2 = icmp sge <16 x i8> %0, %1 - %3 = bitcast i16 %__u to <16 x i1> - %4 = and <16 x i1> %2, %3 - %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> - %6 = bitcast <32 x i1> %5 to i32 - ret i32 %6 -} - -define zeroext i32 @test_masked_vpcmpsgeb_v16i1_v32i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgeb_v16i1_v32i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpnltb (%rsi), %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <16 x i8> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <16 x i8> - %2 = icmp sge <16 x i8> %0, %1 - %3 = bitcast i16 %__u to <16 x i1> - %4 = and <16 x i1> %2, %3 - %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> - %6 = bitcast <32 x i1> %5 to i32 - ret i32 %6 -} - - -define zeroext i64 @test_vpcmpsgeb_v16i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgeb_v16i1_v64i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpleb %xmm0, %xmm1, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <16 x i8> - %1 = bitcast <2 x i64> %__b to <16 x i8> - %2 = icmp sge <16 x i8> %0, %1 - %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_vpcmpsgeb_v16i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgeb_v16i1_v64i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpnltb (%rdi), %xmm0, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <16 x i8> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <16 x i8> - %2 = icmp sge <16 x i8> %0, %1 - %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_masked_vpcmpsgeb_v16i1_v64i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgeb_v16i1_v64i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpleb %xmm0, %xmm1, %k0 {%k1} -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <16 x i8> - %1 = bitcast <2 x i64> %__b to <16 x i8> - %2 = icmp sge <16 x i8> %0, %1 - %3 = bitcast i16 %__u to <16 x i1> - %4 = and <16 x i1> %2, %3 - %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> - %6 = bitcast <64 x i1> %5 to i64 - ret i64 %6 -} - -define zeroext i64 @test_masked_vpcmpsgeb_v16i1_v64i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgeb_v16i1_v64i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpnltb (%rsi), %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <16 x i8> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <16 x i8> - %2 = icmp sge <16 x i8> %0, %1 - %3 = bitcast i16 %__u to <16 x i1> - %4 = and <16 x i1> %2, %3 - %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> - %6 = bitcast <64 x i1> %5 to i64 - ret i64 %6 -} - - -define zeroext i64 @test_vpcmpsgeb_v32i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgeb_v32i1_v64i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpleb %ymm0, %ymm1, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <32 x i8> - %1 = bitcast <4 x i64> %__b to <32 x i8> - %2 = icmp sge <32 x i8> %0, %1 - %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_vpcmpsgeb_v32i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgeb_v32i1_v64i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpnltb (%rdi), %ymm0, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <32 x i8> - %load = load <4 x i64>, <4 x i64>* %__b - %1 = bitcast <4 x i64> %load to <32 x i8> - %2 = icmp sge <32 x i8> %0, %1 - %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_masked_vpcmpsgeb_v32i1_v64i1_mask(i32 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgeb_v32i1_v64i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpleb %ymm0, %ymm1, %k0 {%k1} -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <32 x i8> - %1 = bitcast <4 x i64> %__b to <32 x i8> - %2 = icmp sge <32 x i8> %0, %1 - %3 = bitcast i32 %__u to <32 x i1> - %4 = and <32 x i1> %2, %3 - %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> - %6 = bitcast <64 x i1> %5 to i64 - ret i64 %6 -} - -define zeroext i64 @test_masked_vpcmpsgeb_v32i1_v64i1_mask_mem(i32 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgeb_v32i1_v64i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpnltb (%rsi), %ymm0, %k0 {%k1} -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <32 x i8> - %load = load <4 x i64>, <4 x i64>* %__b - %1 = bitcast <4 x i64> %load to <32 x i8> - %2 = icmp sge <32 x i8> %0, %1 - %3 = bitcast i32 %__u to <32 x i1> - %4 = and <32 x i1> %2, %3 - %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> - %6 = bitcast <64 x i1> %5 to i64 - ret i64 %6 -} - - -define zeroext i16 @test_vpcmpsgew_v8i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgew_v8i1_v16i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmplew %xmm0, %xmm1, %k0 -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <8 x i16> - %1 = bitcast <2 x i64> %__b to <8 x i16> - %2 = icmp sge <8 x i16> %0, %1 - %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> - %4 = bitcast <16 x i1> %3 to i16 - ret i16 %4 -} - -define zeroext i16 @test_vpcmpsgew_v8i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgew_v8i1_v16i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpnltw (%rdi), %xmm0, %k0 -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <8 x i16> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <8 x i16> - %2 = icmp sge <8 x i16> %0, %1 - %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> - %4 = bitcast <16 x i1> %3 to i16 - ret i16 %4 -} - -define zeroext i16 @test_masked_vpcmpsgew_v8i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgew_v8i1_v16i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmplew %xmm0, %xmm1, %k0 {%k1} -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <8 x i16> - %1 = bitcast <2 x i64> %__b to <8 x i16> - %2 = icmp sge <8 x i16> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %4 = and <8 x i1> %2, %3 - %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> - %6 = bitcast <16 x i1> %5 to i16 - ret i16 %6 -} - -define zeroext i16 @test_masked_vpcmpsgew_v8i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgew_v8i1_v16i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpnltw (%rsi), %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <8 x i16> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <8 x i16> - %2 = icmp sge <8 x i16> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %4 = and <8 x i1> %2, %3 - %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> - %6 = bitcast <16 x i1> %5 to i16 - ret i16 %6 -} - - -define zeroext i32 @test_vpcmpsgew_v8i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgew_v8i1_v32i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmplew %xmm0, %xmm1, %k0 -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <8 x i16> - %1 = bitcast <2 x i64> %__b to <8 x i16> - %2 = icmp sge <8 x i16> %0, %1 - %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> - %4 = bitcast <32 x i1> %3 to i32 - ret i32 %4 -} - -define zeroext i32 @test_vpcmpsgew_v8i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgew_v8i1_v32i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpnltw (%rdi), %xmm0, %k0 -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <8 x i16> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <8 x i16> - %2 = icmp sge <8 x i16> %0, %1 - %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> - %4 = bitcast <32 x i1> %3 to i32 - ret i32 %4 -} - -define zeroext i32 @test_masked_vpcmpsgew_v8i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgew_v8i1_v32i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmplew %xmm0, %xmm1, %k0 {%k1} -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <8 x i16> - %1 = bitcast <2 x i64> %__b to <8 x i16> - %2 = icmp sge <8 x i16> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %4 = and <8 x i1> %2, %3 - %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> - %6 = bitcast <32 x i1> %5 to i32 - ret i32 %6 -} - -define zeroext i32 @test_masked_vpcmpsgew_v8i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgew_v8i1_v32i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpnltw (%rsi), %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <8 x i16> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <8 x i16> - %2 = icmp sge <8 x i16> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %4 = and <8 x i1> %2, %3 - %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> - %6 = bitcast <32 x i1> %5 to i32 - ret i32 %6 -} - - -define zeroext i64 @test_vpcmpsgew_v8i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgew_v8i1_v64i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmplew %xmm0, %xmm1, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <8 x i16> - %1 = bitcast <2 x i64> %__b to <8 x i16> - %2 = icmp sge <8 x i16> %0, %1 - %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_vpcmpsgew_v8i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgew_v8i1_v64i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpnltw (%rdi), %xmm0, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <8 x i16> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <8 x i16> - %2 = icmp sge <8 x i16> %0, %1 - %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_masked_vpcmpsgew_v8i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgew_v8i1_v64i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmplew %xmm0, %xmm1, %k0 {%k1} -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <8 x i16> - %1 = bitcast <2 x i64> %__b to <8 x i16> - %2 = icmp sge <8 x i16> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %4 = and <8 x i1> %2, %3 - %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> - %6 = bitcast <64 x i1> %5 to i64 - ret i64 %6 -} - -define zeroext i64 @test_masked_vpcmpsgew_v8i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgew_v8i1_v64i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpnltw (%rsi), %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <8 x i16> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <8 x i16> - %2 = icmp sge <8 x i16> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %4 = and <8 x i1> %2, %3 - %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> - %6 = bitcast <64 x i1> %5 to i64 - ret i64 %6 -} - - -define zeroext i32 @test_vpcmpsgew_v16i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgew_v16i1_v32i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmplew %ymm0, %ymm1, %k0 -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <16 x i16> - %1 = bitcast <4 x i64> %__b to <16 x i16> - %2 = icmp sge <16 x i16> %0, %1 - %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> - %4 = bitcast <32 x i1> %3 to i32 - ret i32 %4 -} - -define zeroext i32 @test_vpcmpsgew_v16i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgew_v16i1_v32i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpnltw (%rdi), %ymm0, %k0 -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <16 x i16> - %load = load <4 x i64>, <4 x i64>* %__b - %1 = bitcast <4 x i64> %load to <16 x i16> - %2 = icmp sge <16 x i16> %0, %1 - %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> - %4 = bitcast <32 x i1> %3 to i32 - ret i32 %4 -} - -define zeroext i32 @test_masked_vpcmpsgew_v16i1_v32i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgew_v16i1_v32i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmplew %ymm0, %ymm1, %k0 {%k1} -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <16 x i16> - %1 = bitcast <4 x i64> %__b to <16 x i16> - %2 = icmp sge <16 x i16> %0, %1 - %3 = bitcast i16 %__u to <16 x i1> - %4 = and <16 x i1> %2, %3 - %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> - %6 = bitcast <32 x i1> %5 to i32 - ret i32 %6 -} - -define zeroext i32 @test_masked_vpcmpsgew_v16i1_v32i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgew_v16i1_v32i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpnltw (%rsi), %ymm0, %k0 {%k1} -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <16 x i16> - %load = load <4 x i64>, <4 x i64>* %__b - %1 = bitcast <4 x i64> %load to <16 x i16> - %2 = icmp sge <16 x i16> %0, %1 - %3 = bitcast i16 %__u to <16 x i1> - %4 = and <16 x i1> %2, %3 - %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> - %6 = bitcast <32 x i1> %5 to i32 - ret i32 %6 -} - - -define zeroext i64 @test_vpcmpsgew_v16i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgew_v16i1_v64i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmplew %ymm0, %ymm1, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <16 x i16> - %1 = bitcast <4 x i64> %__b to <16 x i16> - %2 = icmp sge <16 x i16> %0, %1 - %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_vpcmpsgew_v16i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgew_v16i1_v64i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpnltw (%rdi), %ymm0, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <16 x i16> - %load = load <4 x i64>, <4 x i64>* %__b - %1 = bitcast <4 x i64> %load to <16 x i16> - %2 = icmp sge <16 x i16> %0, %1 - %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_masked_vpcmpsgew_v16i1_v64i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgew_v16i1_v64i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmplew %ymm0, %ymm1, %k0 {%k1} -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <16 x i16> - %1 = bitcast <4 x i64> %__b to <16 x i16> - %2 = icmp sge <16 x i16> %0, %1 - %3 = bitcast i16 %__u to <16 x i1> - %4 = and <16 x i1> %2, %3 - %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> - %6 = bitcast <64 x i1> %5 to i64 - ret i64 %6 -} - -define zeroext i64 @test_masked_vpcmpsgew_v16i1_v64i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgew_v16i1_v64i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpnltw (%rsi), %ymm0, %k0 {%k1} -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <16 x i16> - %load = load <4 x i64>, <4 x i64>* %__b - %1 = bitcast <4 x i64> %load to <16 x i16> - %2 = icmp sge <16 x i16> %0, %1 - %3 = bitcast i16 %__u to <16 x i1> - %4 = and <16 x i1> %2, %3 - %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> - %6 = bitcast <64 x i1> %5 to i64 - ret i64 %6 -} - - -define zeroext i64 @test_vpcmpsgew_v32i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgew_v32i1_v64i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmplew %zmm0, %zmm1, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <32 x i16> - %1 = bitcast <8 x i64> %__b to <32 x i16> - %2 = icmp sge <32 x i16> %0, %1 - %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_vpcmpsgew_v32i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgew_v32i1_v64i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpnltw (%rdi), %zmm0, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <32 x i16> - %load = load <8 x i64>, <8 x i64>* %__b - %1 = bitcast <8 x i64> %load to <32 x i16> - %2 = icmp sge <32 x i16> %0, %1 - %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_masked_vpcmpsgew_v32i1_v64i1_mask(i32 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgew_v32i1_v64i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmplew %zmm0, %zmm1, %k0 {%k1} -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <32 x i16> - %1 = bitcast <8 x i64> %__b to <32 x i16> - %2 = icmp sge <32 x i16> %0, %1 - %3 = bitcast i32 %__u to <32 x i1> - %4 = and <32 x i1> %2, %3 - %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> - %6 = bitcast <64 x i1> %5 to i64 - ret i64 %6 -} - -define zeroext i64 @test_masked_vpcmpsgew_v32i1_v64i1_mask_mem(i32 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgew_v32i1_v64i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpnltw (%rsi), %zmm0, %k0 {%k1} -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <32 x i16> - %load = load <8 x i64>, <8 x i64>* %__b - %1 = bitcast <8 x i64> %load to <32 x i16> - %2 = icmp sge <32 x i16> %0, %1 - %3 = bitcast i32 %__u to <32 x i1> - %4 = and <32 x i1> %2, %3 - %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> - %6 = bitcast <64 x i1> %5 to i64 - ret i64 %6 -} - - -define zeroext i8 @test_vpcmpsged_v4i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsged_v4i1_v8i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpled %xmm0, %xmm1, %k0 -; CHECK-NEXT: kmovb %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <4 x i32> - %1 = bitcast <2 x i64> %__b to <4 x i32> - %2 = icmp sge <4 x i32> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> - %4 = bitcast <8 x i1> %3 to i8 - ret i8 %4 -} - -define zeroext i8 @test_vpcmpsged_v4i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsged_v4i1_v8i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpnltd (%rdi), %xmm0, %k0 -; CHECK-NEXT: kmovb %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <4 x i32> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <4 x i32> - %2 = icmp sge <4 x i32> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> - %4 = bitcast <8 x i1> %3 to i8 - ret i8 %4 -} - -define zeroext i8 @test_masked_vpcmpsged_v4i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsged_v4i1_v8i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpled %xmm0, %xmm1, %k0 {%k1} -; CHECK-NEXT: kmovb %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <4 x i32> - %1 = bitcast <2 x i64> %__b to <4 x i32> - %2 = icmp sge <4 x i32> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> - %4 = and <4 x i1> %2, %extract.i - %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> - %6 = bitcast <8 x i1> %5 to i8 - ret i8 %6 -} - -define zeroext i8 @test_masked_vpcmpsged_v4i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsged_v4i1_v8i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpnltd (%rsi), %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovb %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <4 x i32> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <4 x i32> - %2 = icmp sge <4 x i32> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> - %4 = and <4 x i1> %2, %extract.i - %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> - %6 = bitcast <8 x i1> %5 to i8 - ret i8 %6 -} - - -define zeroext i8 @test_vpcmpsged_v4i1_v8i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsged_v4i1_v8i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpbroadcastd (%rdi), %xmm1 -; CHECK-NEXT: vpcmpled %xmm0, %xmm1, %k0 -; CHECK-NEXT: kmovb %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <4 x i32> - %load = load i32, i32* %__b - %vec = insertelement <4 x i32> undef, i32 %load, i32 0 - %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> - %2 = icmp sge <4 x i32> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> - %4 = bitcast <8 x i1> %3 to i8 - ret i8 %4 -} - -define zeroext i8 @test_masked_vpcmpsged_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsged_v4i1_v8i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpbroadcastd (%rsi), %xmm1 -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpled %xmm0, %xmm1, %k0 {%k1} -; CHECK-NEXT: kmovb %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <4 x i32> - %load = load i32, i32* %__b - %vec = insertelement <4 x i32> undef, i32 %load, i32 0 - %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> - %2 = icmp sge <4 x i32> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> - %4 = and <4 x i1> %extract.i, %2 - %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> - %6 = bitcast <8 x i1> %5 to i8 - ret i8 %6 -} - - -define zeroext i16 @test_vpcmpsged_v4i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsged_v4i1_v16i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpled %xmm0, %xmm1, %k0 -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <4 x i32> - %1 = bitcast <2 x i64> %__b to <4 x i32> - %2 = icmp sge <4 x i32> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> - %4 = bitcast <16 x i1> %3 to i16 - ret i16 %4 -} - -define zeroext i16 @test_vpcmpsged_v4i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsged_v4i1_v16i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpnltd (%rdi), %xmm0, %k0 -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <4 x i32> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <4 x i32> - %2 = icmp sge <4 x i32> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> - %4 = bitcast <16 x i1> %3 to i16 - ret i16 %4 -} - -define zeroext i16 @test_masked_vpcmpsged_v4i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsged_v4i1_v16i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpled %xmm0, %xmm1, %k0 {%k1} -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <4 x i32> - %1 = bitcast <2 x i64> %__b to <4 x i32> - %2 = icmp sge <4 x i32> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> - %4 = and <4 x i1> %2, %extract.i - %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> - %6 = bitcast <16 x i1> %5 to i16 - ret i16 %6 -} - -define zeroext i16 @test_masked_vpcmpsged_v4i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsged_v4i1_v16i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpnltd (%rsi), %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <4 x i32> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <4 x i32> - %2 = icmp sge <4 x i32> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> - %4 = and <4 x i1> %2, %extract.i - %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> - %6 = bitcast <16 x i1> %5 to i16 - ret i16 %6 -} - - -define zeroext i16 @test_vpcmpsged_v4i1_v16i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsged_v4i1_v16i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpbroadcastd (%rdi), %xmm1 -; CHECK-NEXT: vpcmpled %xmm0, %xmm1, %k0 -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <4 x i32> - %load = load i32, i32* %__b - %vec = insertelement <4 x i32> undef, i32 %load, i32 0 - %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> - %2 = icmp sge <4 x i32> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> - %4 = bitcast <16 x i1> %3 to i16 - ret i16 %4 -} - -define zeroext i16 @test_masked_vpcmpsged_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsged_v4i1_v16i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpbroadcastd (%rsi), %xmm1 -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpled %xmm0, %xmm1, %k0 {%k1} -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <4 x i32> - %load = load i32, i32* %__b - %vec = insertelement <4 x i32> undef, i32 %load, i32 0 - %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> - %2 = icmp sge <4 x i32> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> - %4 = and <4 x i1> %extract.i, %2 - %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> - %6 = bitcast <16 x i1> %5 to i16 - ret i16 %6 -} - - -define zeroext i32 @test_vpcmpsged_v4i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsged_v4i1_v32i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpled %xmm0, %xmm1, %k0 -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <4 x i32> - %1 = bitcast <2 x i64> %__b to <4 x i32> - %2 = icmp sge <4 x i32> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> - %4 = bitcast <32 x i1> %3 to i32 - ret i32 %4 -} - -define zeroext i32 @test_vpcmpsged_v4i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsged_v4i1_v32i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpnltd (%rdi), %xmm0, %k0 -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <4 x i32> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <4 x i32> - %2 = icmp sge <4 x i32> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> - %4 = bitcast <32 x i1> %3 to i32 - ret i32 %4 -} - -define zeroext i32 @test_masked_vpcmpsged_v4i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsged_v4i1_v32i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpled %xmm0, %xmm1, %k0 {%k1} -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <4 x i32> - %1 = bitcast <2 x i64> %__b to <4 x i32> - %2 = icmp sge <4 x i32> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> - %4 = and <4 x i1> %2, %extract.i - %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> - %6 = bitcast <32 x i1> %5 to i32 - ret i32 %6 -} - -define zeroext i32 @test_masked_vpcmpsged_v4i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsged_v4i1_v32i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpnltd (%rsi), %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <4 x i32> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <4 x i32> - %2 = icmp sge <4 x i32> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> - %4 = and <4 x i1> %2, %extract.i - %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> - %6 = bitcast <32 x i1> %5 to i32 - ret i32 %6 -} - - -define zeroext i32 @test_vpcmpsged_v4i1_v32i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsged_v4i1_v32i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpbroadcastd (%rdi), %xmm1 -; CHECK-NEXT: vpcmpled %xmm0, %xmm1, %k0 -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <4 x i32> - %load = load i32, i32* %__b - %vec = insertelement <4 x i32> undef, i32 %load, i32 0 - %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> - %2 = icmp sge <4 x i32> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> - %4 = bitcast <32 x i1> %3 to i32 - ret i32 %4 -} - -define zeroext i32 @test_masked_vpcmpsged_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsged_v4i1_v32i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpbroadcastd (%rsi), %xmm1 -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpled %xmm0, %xmm1, %k0 {%k1} -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <4 x i32> - %load = load i32, i32* %__b - %vec = insertelement <4 x i32> undef, i32 %load, i32 0 - %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> - %2 = icmp sge <4 x i32> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> - %4 = and <4 x i1> %extract.i, %2 - %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> - %6 = bitcast <32 x i1> %5 to i32 - ret i32 %6 -} - - -define zeroext i64 @test_vpcmpsged_v4i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsged_v4i1_v64i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpled %xmm0, %xmm1, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <4 x i32> - %1 = bitcast <2 x i64> %__b to <4 x i32> - %2 = icmp sge <4 x i32> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_vpcmpsged_v4i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsged_v4i1_v64i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpnltd (%rdi), %xmm0, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <4 x i32> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <4 x i32> - %2 = icmp sge <4 x i32> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_masked_vpcmpsged_v4i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsged_v4i1_v64i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpled %xmm0, %xmm1, %k0 {%k1} -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <4 x i32> - %1 = bitcast <2 x i64> %__b to <4 x i32> - %2 = icmp sge <4 x i32> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> - %4 = and <4 x i1> %2, %extract.i - %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> - %6 = bitcast <64 x i1> %5 to i64 - ret i64 %6 -} - -define zeroext i64 @test_masked_vpcmpsged_v4i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsged_v4i1_v64i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpnltd (%rsi), %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <4 x i32> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <4 x i32> - %2 = icmp sge <4 x i32> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> - %4 = and <4 x i1> %2, %extract.i - %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> - %6 = bitcast <64 x i1> %5 to i64 - ret i64 %6 -} - - -define zeroext i64 @test_vpcmpsged_v4i1_v64i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsged_v4i1_v64i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpbroadcastd (%rdi), %xmm1 -; CHECK-NEXT: vpcmpled %xmm0, %xmm1, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <4 x i32> - %load = load i32, i32* %__b - %vec = insertelement <4 x i32> undef, i32 %load, i32 0 - %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> - %2 = icmp sge <4 x i32> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_masked_vpcmpsged_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsged_v4i1_v64i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpbroadcastd (%rsi), %xmm1 -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpled %xmm0, %xmm1, %k0 {%k1} -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <4 x i32> - %load = load i32, i32* %__b - %vec = insertelement <4 x i32> undef, i32 %load, i32 0 - %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> - %2 = icmp sge <4 x i32> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> - %4 = and <4 x i1> %extract.i, %2 - %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> - %6 = bitcast <64 x i1> %5 to i64 - ret i64 %6 -} - - -define zeroext i16 @test_vpcmpsged_v8i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsged_v8i1_v16i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpled %ymm0, %ymm1, %k0 -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -; -; NoVLX-LABEL: test_vpcmpsged_v8i1_v16i1_mask: -; NoVLX: ## BB#0: ## %entry -; NoVLX-NEXT: ## kill: %YMM1 %YMM1 %ZMM1 -; NoVLX-NEXT: ## kill: %YMM0 %YMM0 %ZMM0 -; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 -; NoVLX-NEXT: kshiftlw $8, %k0, %k0 -; NoVLX-NEXT: kshiftrw $8, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <8 x i32> - %1 = bitcast <4 x i64> %__b to <8 x i32> - %2 = icmp sge <8 x i32> %0, %1 - %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> - %4 = bitcast <16 x i1> %3 to i16 - ret i16 %4 -} - -define zeroext i16 @test_vpcmpsged_v8i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsged_v8i1_v16i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpnltd (%rdi), %ymm0, %k0 -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -; -; NoVLX-LABEL: test_vpcmpsged_v8i1_v16i1_mask_mem: -; NoVLX: ## BB#0: ## %entry -; NoVLX-NEXT: ## kill: %YMM0 %YMM0 %ZMM0 -; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 -; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 -; NoVLX-NEXT: kshiftlw $8, %k0, %k0 -; NoVLX-NEXT: kshiftrw $8, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <8 x i32> - %load = load <4 x i64>, <4 x i64>* %__b - %1 = bitcast <4 x i64> %load to <8 x i32> - %2 = icmp sge <8 x i32> %0, %1 - %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> - %4 = bitcast <16 x i1> %3 to i16 - ret i16 %4 -} - -define zeroext i16 @test_masked_vpcmpsged_v8i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsged_v8i1_v16i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpled %ymm0, %ymm1, %k0 {%k1} -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -; -; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v16i1_mask: -; NoVLX: ## BB#0: ## %entry -; NoVLX-NEXT: ## kill: %YMM1 %YMM1 %ZMM1 -; NoVLX-NEXT: ## kill: %YMM0 %YMM0 %ZMM0 -; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1} -; NoVLX-NEXT: kshiftlw $8, %k0, %k0 -; NoVLX-NEXT: kshiftrw $8, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <8 x i32> - %1 = bitcast <4 x i64> %__b to <8 x i32> - %2 = icmp sge <8 x i32> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %4 = and <8 x i1> %2, %3 - %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> - %6 = bitcast <16 x i1> %5 to i16 - ret i16 %6 -} - -define zeroext i16 @test_masked_vpcmpsged_v8i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsged_v8i1_v16i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpnltd (%rsi), %ymm0, %k0 {%k1} -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -; -; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v16i1_mask_mem: -; NoVLX: ## BB#0: ## %entry -; NoVLX-NEXT: ## kill: %YMM0 %YMM0 %ZMM0 -; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 -; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1} -; NoVLX-NEXT: kshiftlw $8, %k0, %k0 -; NoVLX-NEXT: kshiftrw $8, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <8 x i32> - %load = load <4 x i64>, <4 x i64>* %__b - %1 = bitcast <4 x i64> %load to <8 x i32> - %2 = icmp sge <8 x i32> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %4 = and <8 x i1> %2, %3 - %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> - %6 = bitcast <16 x i1> %5 to i16 - ret i16 %6 -} - - -define zeroext i16 @test_vpcmpsged_v8i1_v16i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsged_v8i1_v16i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpbroadcastd (%rdi), %ymm1 -; CHECK-NEXT: vpcmpled %ymm0, %ymm1, %k0 -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -; -; NoVLX-LABEL: test_vpcmpsged_v8i1_v16i1_mask_mem_b: -; NoVLX: ## BB#0: ## %entry -; NoVLX-NEXT: ## kill: %YMM0 %YMM0 %ZMM0 -; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1 -; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 -; NoVLX-NEXT: kshiftlw $8, %k0, %k0 -; NoVLX-NEXT: kshiftrw $8, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <8 x i32> - %load = load i32, i32* %__b - %vec = insertelement <8 x i32> undef, i32 %load, i32 0 - %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> - %2 = icmp sge <8 x i32> %0, %1 - %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> - %4 = bitcast <16 x i1> %3 to i16 - ret i16 %4 -} - -define zeroext i16 @test_masked_vpcmpsged_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsged_v8i1_v16i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpbroadcastd (%rsi), %ymm1 -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpled %ymm0, %ymm1, %k0 {%k1} -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -; -; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v16i1_mask_mem_b: -; NoVLX: ## BB#0: ## %entry -; NoVLX-NEXT: ## kill: %YMM0 %YMM0 %ZMM0 -; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1 -; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1} -; NoVLX-NEXT: kshiftlw $8, %k0, %k0 -; NoVLX-NEXT: kshiftrw $8, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <8 x i32> - %load = load i32, i32* %__b - %vec = insertelement <8 x i32> undef, i32 %load, i32 0 - %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> - %2 = icmp sge <8 x i32> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %4 = and <8 x i1> %3, %2 - %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> - %6 = bitcast <16 x i1> %5 to i16 - ret i16 %6 -} - - -define zeroext i32 @test_vpcmpsged_v8i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsged_v8i1_v32i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpled %ymm0, %ymm1, %k0 -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <8 x i32> - %1 = bitcast <4 x i64> %__b to <8 x i32> - %2 = icmp sge <8 x i32> %0, %1 - %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> - %4 = bitcast <32 x i1> %3 to i32 - ret i32 %4 -} - -define zeroext i32 @test_vpcmpsged_v8i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsged_v8i1_v32i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpnltd (%rdi), %ymm0, %k0 -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <8 x i32> - %load = load <4 x i64>, <4 x i64>* %__b - %1 = bitcast <4 x i64> %load to <8 x i32> - %2 = icmp sge <8 x i32> %0, %1 - %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> - %4 = bitcast <32 x i1> %3 to i32 - ret i32 %4 -} - -define zeroext i32 @test_masked_vpcmpsged_v8i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsged_v8i1_v32i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpled %ymm0, %ymm1, %k0 {%k1} -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <8 x i32> - %1 = bitcast <4 x i64> %__b to <8 x i32> - %2 = icmp sge <8 x i32> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %4 = and <8 x i1> %2, %3 - %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> - %6 = bitcast <32 x i1> %5 to i32 - ret i32 %6 -} - -define zeroext i32 @test_masked_vpcmpsged_v8i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsged_v8i1_v32i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpnltd (%rsi), %ymm0, %k0 {%k1} -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <8 x i32> - %load = load <4 x i64>, <4 x i64>* %__b - %1 = bitcast <4 x i64> %load to <8 x i32> - %2 = icmp sge <8 x i32> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %4 = and <8 x i1> %2, %3 - %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> - %6 = bitcast <32 x i1> %5 to i32 - ret i32 %6 -} - - -define zeroext i32 @test_vpcmpsged_v8i1_v32i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsged_v8i1_v32i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpbroadcastd (%rdi), %ymm1 -; CHECK-NEXT: vpcmpled %ymm0, %ymm1, %k0 -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <8 x i32> - %load = load i32, i32* %__b - %vec = insertelement <8 x i32> undef, i32 %load, i32 0 - %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> - %2 = icmp sge <8 x i32> %0, %1 - %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> - %4 = bitcast <32 x i1> %3 to i32 - ret i32 %4 -} - -define zeroext i32 @test_masked_vpcmpsged_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsged_v8i1_v32i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpbroadcastd (%rsi), %ymm1 -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpled %ymm0, %ymm1, %k0 {%k1} -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <8 x i32> - %load = load i32, i32* %__b - %vec = insertelement <8 x i32> undef, i32 %load, i32 0 - %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> - %2 = icmp sge <8 x i32> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %4 = and <8 x i1> %3, %2 - %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> - %6 = bitcast <32 x i1> %5 to i32 - ret i32 %6 -} - - -define zeroext i64 @test_vpcmpsged_v8i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsged_v8i1_v64i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpled %ymm0, %ymm1, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <8 x i32> - %1 = bitcast <4 x i64> %__b to <8 x i32> - %2 = icmp sge <8 x i32> %0, %1 - %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_vpcmpsged_v8i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsged_v8i1_v64i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpnltd (%rdi), %ymm0, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <8 x i32> - %load = load <4 x i64>, <4 x i64>* %__b - %1 = bitcast <4 x i64> %load to <8 x i32> - %2 = icmp sge <8 x i32> %0, %1 - %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_masked_vpcmpsged_v8i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsged_v8i1_v64i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpled %ymm0, %ymm1, %k0 {%k1} -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <8 x i32> - %1 = bitcast <4 x i64> %__b to <8 x i32> - %2 = icmp sge <8 x i32> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %4 = and <8 x i1> %2, %3 - %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> - %6 = bitcast <64 x i1> %5 to i64 - ret i64 %6 -} - -define zeroext i64 @test_masked_vpcmpsged_v8i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsged_v8i1_v64i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpnltd (%rsi), %ymm0, %k0 {%k1} -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <8 x i32> - %load = load <4 x i64>, <4 x i64>* %__b - %1 = bitcast <4 x i64> %load to <8 x i32> - %2 = icmp sge <8 x i32> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %4 = and <8 x i1> %2, %3 - %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> - %6 = bitcast <64 x i1> %5 to i64 - ret i64 %6 -} - - -define zeroext i64 @test_vpcmpsged_v8i1_v64i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsged_v8i1_v64i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpbroadcastd (%rdi), %ymm1 -; CHECK-NEXT: vpcmpled %ymm0, %ymm1, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <8 x i32> - %load = load i32, i32* %__b - %vec = insertelement <8 x i32> undef, i32 %load, i32 0 - %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> - %2 = icmp sge <8 x i32> %0, %1 - %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_masked_vpcmpsged_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsged_v8i1_v64i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpbroadcastd (%rsi), %ymm1 -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpled %ymm0, %ymm1, %k0 {%k1} -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <8 x i32> - %load = load i32, i32* %__b - %vec = insertelement <8 x i32> undef, i32 %load, i32 0 - %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> - %2 = icmp sge <8 x i32> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %4 = and <8 x i1> %3, %2 - %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> - %6 = bitcast <64 x i1> %5 to i64 - ret i64 %6 -} - - -define zeroext i32 @test_vpcmpsged_v16i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsged_v16i1_v32i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpled %zmm0, %zmm1, %k0 -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <16 x i32> - %1 = bitcast <8 x i64> %__b to <16 x i32> - %2 = icmp sge <16 x i32> %0, %1 - %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> - %4 = bitcast <32 x i1> %3 to i32 - ret i32 %4 -} - -define zeroext i32 @test_vpcmpsged_v16i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsged_v16i1_v32i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpnltd (%rdi), %zmm0, %k0 -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <16 x i32> - %load = load <8 x i64>, <8 x i64>* %__b - %1 = bitcast <8 x i64> %load to <16 x i32> - %2 = icmp sge <16 x i32> %0, %1 - %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> - %4 = bitcast <32 x i1> %3 to i32 - ret i32 %4 -} - -define zeroext i32 @test_masked_vpcmpsged_v16i1_v32i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsged_v16i1_v32i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1} -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <16 x i32> - %1 = bitcast <8 x i64> %__b to <16 x i32> - %2 = icmp sge <16 x i32> %0, %1 - %3 = bitcast i16 %__u to <16 x i1> - %4 = and <16 x i1> %2, %3 - %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> - %6 = bitcast <32 x i1> %5 to i32 - ret i32 %6 -} - -define zeroext i32 @test_masked_vpcmpsged_v16i1_v32i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsged_v16i1_v32i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpnltd (%rsi), %zmm0, %k0 {%k1} -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <16 x i32> - %load = load <8 x i64>, <8 x i64>* %__b - %1 = bitcast <8 x i64> %load to <16 x i32> - %2 = icmp sge <16 x i32> %0, %1 - %3 = bitcast i16 %__u to <16 x i1> - %4 = and <16 x i1> %2, %3 - %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> - %6 = bitcast <32 x i1> %5 to i32 - ret i32 %6 -} - - -define zeroext i32 @test_vpcmpsged_v16i1_v32i1_mask_mem_b(<8 x i64> %__a, i32* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsged_v16i1_v32i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpbroadcastd (%rdi), %zmm1 -; CHECK-NEXT: vpcmpled %zmm0, %zmm1, %k0 -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <16 x i32> - %load = load i32, i32* %__b - %vec = insertelement <16 x i32> undef, i32 %load, i32 0 - %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> - %2 = icmp sge <16 x i32> %0, %1 - %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> - %4 = bitcast <32 x i1> %3 to i32 - ret i32 %4 -} - -define zeroext i32 @test_masked_vpcmpsged_v16i1_v32i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, i32* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsged_v16i1_v32i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpbroadcastd (%rsi), %zmm1 -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1} -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <16 x i32> - %load = load i32, i32* %__b - %vec = insertelement <16 x i32> undef, i32 %load, i32 0 - %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> - %2 = icmp sge <16 x i32> %0, %1 - %3 = bitcast i16 %__u to <16 x i1> - %4 = and <16 x i1> %3, %2 - %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> - %6 = bitcast <32 x i1> %5 to i32 - ret i32 %6 -} - - -define zeroext i64 @test_vpcmpsged_v16i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsged_v16i1_v64i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpled %zmm0, %zmm1, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <16 x i32> - %1 = bitcast <8 x i64> %__b to <16 x i32> - %2 = icmp sge <16 x i32> %0, %1 - %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_vpcmpsged_v16i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsged_v16i1_v64i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpnltd (%rdi), %zmm0, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <16 x i32> - %load = load <8 x i64>, <8 x i64>* %__b - %1 = bitcast <8 x i64> %load to <16 x i32> - %2 = icmp sge <16 x i32> %0, %1 - %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_masked_vpcmpsged_v16i1_v64i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsged_v16i1_v64i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1} -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <16 x i32> - %1 = bitcast <8 x i64> %__b to <16 x i32> - %2 = icmp sge <16 x i32> %0, %1 - %3 = bitcast i16 %__u to <16 x i1> - %4 = and <16 x i1> %2, %3 - %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> - %6 = bitcast <64 x i1> %5 to i64 - ret i64 %6 -} - -define zeroext i64 @test_masked_vpcmpsged_v16i1_v64i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsged_v16i1_v64i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpnltd (%rsi), %zmm0, %k0 {%k1} -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <16 x i32> - %load = load <8 x i64>, <8 x i64>* %__b - %1 = bitcast <8 x i64> %load to <16 x i32> - %2 = icmp sge <16 x i32> %0, %1 - %3 = bitcast i16 %__u to <16 x i1> - %4 = and <16 x i1> %2, %3 - %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> - %6 = bitcast <64 x i1> %5 to i64 - ret i64 %6 -} - - -define zeroext i64 @test_vpcmpsged_v16i1_v64i1_mask_mem_b(<8 x i64> %__a, i32* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsged_v16i1_v64i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpbroadcastd (%rdi), %zmm1 -; CHECK-NEXT: vpcmpled %zmm0, %zmm1, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <16 x i32> - %load = load i32, i32* %__b - %vec = insertelement <16 x i32> undef, i32 %load, i32 0 - %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> - %2 = icmp sge <16 x i32> %0, %1 - %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_masked_vpcmpsged_v16i1_v64i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, i32* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsged_v16i1_v64i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpbroadcastd (%rsi), %zmm1 -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1} -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <16 x i32> - %load = load i32, i32* %__b - %vec = insertelement <16 x i32> undef, i32 %load, i32 0 - %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> - %2 = icmp sge <16 x i32> %0, %1 - %3 = bitcast i16 %__u to <16 x i1> - %4 = and <16 x i1> %3, %2 - %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> - %6 = bitcast <64 x i1> %5 to i64 - ret i64 %6 -} - - -define zeroext i4 @test_vpcmpsgeq_v2i1_v4i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgeq_v2i1_v4i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpleq %xmm0, %xmm1, %k0 -; CHECK-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp) -; CHECK-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %1 = bitcast <2 x i64> %__b to <2 x i64> - %2 = icmp sge <2 x i64> %0, %1 - %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> - %4 = bitcast <4 x i1> %3 to i4 - ret i4 %4 -} - -define zeroext i4 @test_vpcmpsgeq_v2i1_v4i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgeq_v2i1_v4i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpnltq (%rdi), %xmm0, %k0 -; CHECK-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp) -; CHECK-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <2 x i64> - %2 = icmp sge <2 x i64> %0, %1 - %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> - %4 = bitcast <4 x i1> %3 to i4 - ret i4 %4 -} - -define zeroext i4 @test_masked_vpcmpsgeq_v2i1_v4i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgeq_v2i1_v4i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpleq %xmm0, %xmm1, %k0 {%k1} -; CHECK-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp) -; CHECK-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %1 = bitcast <2 x i64> %__b to <2 x i64> - %2 = icmp sge <2 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> - %4 = and <2 x i1> %2, %extract.i - %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> - %6 = bitcast <4 x i1> %5 to i4 - ret i4 %6 -} - -define zeroext i4 @test_masked_vpcmpsgeq_v2i1_v4i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgeq_v2i1_v4i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpnltq (%rsi), %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp) -; CHECK-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <2 x i64> - %2 = icmp sge <2 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> - %4 = and <2 x i1> %2, %extract.i - %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> - %6 = bitcast <4 x i1> %5 to i4 - ret i4 %6 -} - - -define zeroext i4 @test_vpcmpsgeq_v2i1_v4i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgeq_v2i1_v4i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpbroadcastq (%rdi), %xmm1 -; CHECK-NEXT: vpcmpleq %xmm0, %xmm1, %k0 -; CHECK-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp) -; CHECK-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %load = load i64, i64* %__b - %vec = insertelement <2 x i64> undef, i64 %load, i32 0 - %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> - %2 = icmp sge <2 x i64> %0, %1 - %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> - %4 = bitcast <4 x i1> %3 to i4 - ret i4 %4 -} - -define zeroext i4 @test_masked_vpcmpsgeq_v2i1_v4i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgeq_v2i1_v4i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpbroadcastq (%rsi), %xmm1 -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpleq %xmm0, %xmm1, %k0 {%k1} -; CHECK-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp) -; CHECK-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %load = load i64, i64* %__b - %vec = insertelement <2 x i64> undef, i64 %load, i32 0 - %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> - %2 = icmp sge <2 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> - %4 = and <2 x i1> %extract.i, %2 - %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> - %6 = bitcast <4 x i1> %5 to i4 - ret i4 %6 -} - - -define zeroext i8 @test_vpcmpsgeq_v2i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgeq_v2i1_v8i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpleq %xmm0, %xmm1, %k0 -; CHECK-NEXT: kmovb %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %1 = bitcast <2 x i64> %__b to <2 x i64> - %2 = icmp sge <2 x i64> %0, %1 - %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> - %4 = bitcast <8 x i1> %3 to i8 - ret i8 %4 -} - -define zeroext i8 @test_vpcmpsgeq_v2i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgeq_v2i1_v8i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpnltq (%rdi), %xmm0, %k0 -; CHECK-NEXT: kmovb %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <2 x i64> - %2 = icmp sge <2 x i64> %0, %1 - %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> - %4 = bitcast <8 x i1> %3 to i8 - ret i8 %4 -} - -define zeroext i8 @test_masked_vpcmpsgeq_v2i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgeq_v2i1_v8i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpleq %xmm0, %xmm1, %k0 {%k1} -; CHECK-NEXT: kmovb %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %1 = bitcast <2 x i64> %__b to <2 x i64> - %2 = icmp sge <2 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> - %4 = and <2 x i1> %2, %extract.i - %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> - %6 = bitcast <8 x i1> %5 to i8 - ret i8 %6 -} - -define zeroext i8 @test_masked_vpcmpsgeq_v2i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgeq_v2i1_v8i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpnltq (%rsi), %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovb %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <2 x i64> - %2 = icmp sge <2 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> - %4 = and <2 x i1> %2, %extract.i - %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> - %6 = bitcast <8 x i1> %5 to i8 - ret i8 %6 -} - - -define zeroext i8 @test_vpcmpsgeq_v2i1_v8i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgeq_v2i1_v8i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpbroadcastq (%rdi), %xmm1 -; CHECK-NEXT: vpcmpleq %xmm0, %xmm1, %k0 -; CHECK-NEXT: kmovb %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %load = load i64, i64* %__b - %vec = insertelement <2 x i64> undef, i64 %load, i32 0 - %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> - %2 = icmp sge <2 x i64> %0, %1 - %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> - %4 = bitcast <8 x i1> %3 to i8 - ret i8 %4 -} - -define zeroext i8 @test_masked_vpcmpsgeq_v2i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgeq_v2i1_v8i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpbroadcastq (%rsi), %xmm1 -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpleq %xmm0, %xmm1, %k0 {%k1} -; CHECK-NEXT: kmovb %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %load = load i64, i64* %__b - %vec = insertelement <2 x i64> undef, i64 %load, i32 0 - %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> - %2 = icmp sge <2 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> - %4 = and <2 x i1> %extract.i, %2 - %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> - %6 = bitcast <8 x i1> %5 to i8 - ret i8 %6 -} - - -define zeroext i16 @test_vpcmpsgeq_v2i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgeq_v2i1_v16i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpleq %xmm0, %xmm1, %k0 -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %1 = bitcast <2 x i64> %__b to <2 x i64> - %2 = icmp sge <2 x i64> %0, %1 - %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> - %4 = bitcast <16 x i1> %3 to i16 - ret i16 %4 -} - -define zeroext i16 @test_vpcmpsgeq_v2i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgeq_v2i1_v16i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpnltq (%rdi), %xmm0, %k0 -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <2 x i64> - %2 = icmp sge <2 x i64> %0, %1 - %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> - %4 = bitcast <16 x i1> %3 to i16 - ret i16 %4 -} - -define zeroext i16 @test_masked_vpcmpsgeq_v2i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgeq_v2i1_v16i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpleq %xmm0, %xmm1, %k0 {%k1} -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %1 = bitcast <2 x i64> %__b to <2 x i64> - %2 = icmp sge <2 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> - %4 = and <2 x i1> %2, %extract.i - %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> - %6 = bitcast <16 x i1> %5 to i16 - ret i16 %6 -} - -define zeroext i16 @test_masked_vpcmpsgeq_v2i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgeq_v2i1_v16i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpnltq (%rsi), %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <2 x i64> - %2 = icmp sge <2 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> - %4 = and <2 x i1> %2, %extract.i - %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> - %6 = bitcast <16 x i1> %5 to i16 - ret i16 %6 -} - - -define zeroext i16 @test_vpcmpsgeq_v2i1_v16i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgeq_v2i1_v16i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpbroadcastq (%rdi), %xmm1 -; CHECK-NEXT: vpcmpleq %xmm0, %xmm1, %k0 -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %load = load i64, i64* %__b - %vec = insertelement <2 x i64> undef, i64 %load, i32 0 - %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> - %2 = icmp sge <2 x i64> %0, %1 - %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> - %4 = bitcast <16 x i1> %3 to i16 - ret i16 %4 -} - -define zeroext i16 @test_masked_vpcmpsgeq_v2i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgeq_v2i1_v16i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpbroadcastq (%rsi), %xmm1 -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpleq %xmm0, %xmm1, %k0 {%k1} -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %load = load i64, i64* %__b - %vec = insertelement <2 x i64> undef, i64 %load, i32 0 - %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> - %2 = icmp sge <2 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> - %4 = and <2 x i1> %extract.i, %2 - %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> - %6 = bitcast <16 x i1> %5 to i16 - ret i16 %6 -} - - -define zeroext i32 @test_vpcmpsgeq_v2i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgeq_v2i1_v32i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpleq %xmm0, %xmm1, %k0 -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %1 = bitcast <2 x i64> %__b to <2 x i64> - %2 = icmp sge <2 x i64> %0, %1 - %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> - %4 = bitcast <32 x i1> %3 to i32 - ret i32 %4 -} - -define zeroext i32 @test_vpcmpsgeq_v2i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgeq_v2i1_v32i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpnltq (%rdi), %xmm0, %k0 -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <2 x i64> - %2 = icmp sge <2 x i64> %0, %1 - %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> - %4 = bitcast <32 x i1> %3 to i32 - ret i32 %4 -} - -define zeroext i32 @test_masked_vpcmpsgeq_v2i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgeq_v2i1_v32i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpleq %xmm0, %xmm1, %k0 {%k1} -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %1 = bitcast <2 x i64> %__b to <2 x i64> - %2 = icmp sge <2 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> - %4 = and <2 x i1> %2, %extract.i - %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> - %6 = bitcast <32 x i1> %5 to i32 - ret i32 %6 -} - -define zeroext i32 @test_masked_vpcmpsgeq_v2i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgeq_v2i1_v32i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpnltq (%rsi), %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <2 x i64> - %2 = icmp sge <2 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> - %4 = and <2 x i1> %2, %extract.i - %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> - %6 = bitcast <32 x i1> %5 to i32 - ret i32 %6 -} - - -define zeroext i32 @test_vpcmpsgeq_v2i1_v32i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgeq_v2i1_v32i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpbroadcastq (%rdi), %xmm1 -; CHECK-NEXT: vpcmpleq %xmm0, %xmm1, %k0 -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %load = load i64, i64* %__b - %vec = insertelement <2 x i64> undef, i64 %load, i32 0 - %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> - %2 = icmp sge <2 x i64> %0, %1 - %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> - %4 = bitcast <32 x i1> %3 to i32 - ret i32 %4 -} - -define zeroext i32 @test_masked_vpcmpsgeq_v2i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgeq_v2i1_v32i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpbroadcastq (%rsi), %xmm1 -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpleq %xmm0, %xmm1, %k0 {%k1} -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %load = load i64, i64* %__b - %vec = insertelement <2 x i64> undef, i64 %load, i32 0 - %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> - %2 = icmp sge <2 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> - %4 = and <2 x i1> %extract.i, %2 - %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> - %6 = bitcast <32 x i1> %5 to i32 - ret i32 %6 -} - - -define zeroext i64 @test_vpcmpsgeq_v2i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgeq_v2i1_v64i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpleq %xmm0, %xmm1, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %1 = bitcast <2 x i64> %__b to <2 x i64> - %2 = icmp sge <2 x i64> %0, %1 - %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_vpcmpsgeq_v2i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgeq_v2i1_v64i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpnltq (%rdi), %xmm0, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <2 x i64> - %2 = icmp sge <2 x i64> %0, %1 - %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_masked_vpcmpsgeq_v2i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgeq_v2i1_v64i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpleq %xmm0, %xmm1, %k0 {%k1} -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %1 = bitcast <2 x i64> %__b to <2 x i64> - %2 = icmp sge <2 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> - %4 = and <2 x i1> %2, %extract.i - %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> - %6 = bitcast <64 x i1> %5 to i64 - ret i64 %6 -} - -define zeroext i64 @test_masked_vpcmpsgeq_v2i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgeq_v2i1_v64i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpnltq (%rsi), %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <2 x i64> - %2 = icmp sge <2 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> - %4 = and <2 x i1> %2, %extract.i - %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> - %6 = bitcast <64 x i1> %5 to i64 - ret i64 %6 -} - - -define zeroext i64 @test_vpcmpsgeq_v2i1_v64i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgeq_v2i1_v64i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpbroadcastq (%rdi), %xmm1 -; CHECK-NEXT: vpcmpleq %xmm0, %xmm1, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %load = load i64, i64* %__b - %vec = insertelement <2 x i64> undef, i64 %load, i32 0 - %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> - %2 = icmp sge <2 x i64> %0, %1 - %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_masked_vpcmpsgeq_v2i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgeq_v2i1_v64i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpbroadcastq (%rsi), %xmm1 -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpleq %xmm0, %xmm1, %k0 {%k1} -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %load = load i64, i64* %__b - %vec = insertelement <2 x i64> undef, i64 %load, i32 0 - %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> - %2 = icmp sge <2 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> - %4 = and <2 x i1> %extract.i, %2 - %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> - %6 = bitcast <64 x i1> %5 to i64 - ret i64 %6 -} - - -define zeroext i8 @test_vpcmpsgeq_v4i1_v8i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgeq_v4i1_v8i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpleq %ymm0, %ymm1, %k0 -; CHECK-NEXT: kmovb %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <4 x i64> - %1 = bitcast <4 x i64> %__b to <4 x i64> - %2 = icmp sge <4 x i64> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> - %4 = bitcast <8 x i1> %3 to i8 - ret i8 %4 -} - -define zeroext i8 @test_vpcmpsgeq_v4i1_v8i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgeq_v4i1_v8i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpnltq (%rdi), %ymm0, %k0 -; CHECK-NEXT: kmovb %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <4 x i64> - %load = load <4 x i64>, <4 x i64>* %__b - %1 = bitcast <4 x i64> %load to <4 x i64> - %2 = icmp sge <4 x i64> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> - %4 = bitcast <8 x i1> %3 to i8 - ret i8 %4 -} - -define zeroext i8 @test_masked_vpcmpsgeq_v4i1_v8i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgeq_v4i1_v8i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpleq %ymm0, %ymm1, %k0 {%k1} -; CHECK-NEXT: kmovb %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <4 x i64> - %1 = bitcast <4 x i64> %__b to <4 x i64> - %2 = icmp sge <4 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> - %4 = and <4 x i1> %2, %extract.i - %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> - %6 = bitcast <8 x i1> %5 to i8 - ret i8 %6 -} - -define zeroext i8 @test_masked_vpcmpsgeq_v4i1_v8i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgeq_v4i1_v8i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpnltq (%rsi), %ymm0, %k0 {%k1} -; CHECK-NEXT: kmovb %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <4 x i64> - %load = load <4 x i64>, <4 x i64>* %__b - %1 = bitcast <4 x i64> %load to <4 x i64> - %2 = icmp sge <4 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> - %4 = and <4 x i1> %2, %extract.i - %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> - %6 = bitcast <8 x i1> %5 to i8 - ret i8 %6 -} - - -define zeroext i8 @test_vpcmpsgeq_v4i1_v8i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgeq_v4i1_v8i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpbroadcastq (%rdi), %ymm1 -; CHECK-NEXT: vpcmpleq %ymm0, %ymm1, %k0 -; CHECK-NEXT: kmovb %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <4 x i64> - %load = load i64, i64* %__b - %vec = insertelement <4 x i64> undef, i64 %load, i32 0 - %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> - %2 = icmp sge <4 x i64> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> - %4 = bitcast <8 x i1> %3 to i8 - ret i8 %4 -} - -define zeroext i8 @test_masked_vpcmpsgeq_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgeq_v4i1_v8i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpbroadcastq (%rsi), %ymm1 -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpleq %ymm0, %ymm1, %k0 {%k1} -; CHECK-NEXT: kmovb %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <4 x i64> - %load = load i64, i64* %__b - %vec = insertelement <4 x i64> undef, i64 %load, i32 0 - %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> - %2 = icmp sge <4 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> - %4 = and <4 x i1> %extract.i, %2 - %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> - %6 = bitcast <8 x i1> %5 to i8 - ret i8 %6 -} - - -define zeroext i16 @test_vpcmpsgeq_v4i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgeq_v4i1_v16i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpleq %ymm0, %ymm1, %k0 -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <4 x i64> - %1 = bitcast <4 x i64> %__b to <4 x i64> - %2 = icmp sge <4 x i64> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> - %4 = bitcast <16 x i1> %3 to i16 - ret i16 %4 -} - -define zeroext i16 @test_vpcmpsgeq_v4i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgeq_v4i1_v16i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpnltq (%rdi), %ymm0, %k0 -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <4 x i64> - %load = load <4 x i64>, <4 x i64>* %__b - %1 = bitcast <4 x i64> %load to <4 x i64> - %2 = icmp sge <4 x i64> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> - %4 = bitcast <16 x i1> %3 to i16 - ret i16 %4 -} - -define zeroext i16 @test_masked_vpcmpsgeq_v4i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgeq_v4i1_v16i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpleq %ymm0, %ymm1, %k0 {%k1} -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <4 x i64> - %1 = bitcast <4 x i64> %__b to <4 x i64> - %2 = icmp sge <4 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> - %4 = and <4 x i1> %2, %extract.i - %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> - %6 = bitcast <16 x i1> %5 to i16 - ret i16 %6 -} - -define zeroext i16 @test_masked_vpcmpsgeq_v4i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgeq_v4i1_v16i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpnltq (%rsi), %ymm0, %k0 {%k1} -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <4 x i64> - %load = load <4 x i64>, <4 x i64>* %__b - %1 = bitcast <4 x i64> %load to <4 x i64> - %2 = icmp sge <4 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> - %4 = and <4 x i1> %2, %extract.i - %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> - %6 = bitcast <16 x i1> %5 to i16 - ret i16 %6 -} - - -define zeroext i16 @test_vpcmpsgeq_v4i1_v16i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgeq_v4i1_v16i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpbroadcastq (%rdi), %ymm1 -; CHECK-NEXT: vpcmpleq %ymm0, %ymm1, %k0 -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <4 x i64> - %load = load i64, i64* %__b - %vec = insertelement <4 x i64> undef, i64 %load, i32 0 - %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> - %2 = icmp sge <4 x i64> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> - %4 = bitcast <16 x i1> %3 to i16 - ret i16 %4 -} - -define zeroext i16 @test_masked_vpcmpsgeq_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgeq_v4i1_v16i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpbroadcastq (%rsi), %ymm1 -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpleq %ymm0, %ymm1, %k0 {%k1} -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <4 x i64> - %load = load i64, i64* %__b - %vec = insertelement <4 x i64> undef, i64 %load, i32 0 - %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> - %2 = icmp sge <4 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> - %4 = and <4 x i1> %extract.i, %2 - %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> - %6 = bitcast <16 x i1> %5 to i16 - ret i16 %6 -} - - -define zeroext i32 @test_vpcmpsgeq_v4i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgeq_v4i1_v32i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpleq %ymm0, %ymm1, %k0 -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <4 x i64> - %1 = bitcast <4 x i64> %__b to <4 x i64> - %2 = icmp sge <4 x i64> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> - %4 = bitcast <32 x i1> %3 to i32 - ret i32 %4 -} - -define zeroext i32 @test_vpcmpsgeq_v4i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgeq_v4i1_v32i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpnltq (%rdi), %ymm0, %k0 -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <4 x i64> - %load = load <4 x i64>, <4 x i64>* %__b - %1 = bitcast <4 x i64> %load to <4 x i64> - %2 = icmp sge <4 x i64> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> - %4 = bitcast <32 x i1> %3 to i32 - ret i32 %4 -} - -define zeroext i32 @test_masked_vpcmpsgeq_v4i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgeq_v4i1_v32i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpleq %ymm0, %ymm1, %k0 {%k1} -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <4 x i64> - %1 = bitcast <4 x i64> %__b to <4 x i64> - %2 = icmp sge <4 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> - %4 = and <4 x i1> %2, %extract.i - %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> - %6 = bitcast <32 x i1> %5 to i32 - ret i32 %6 -} - -define zeroext i32 @test_masked_vpcmpsgeq_v4i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgeq_v4i1_v32i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpnltq (%rsi), %ymm0, %k0 {%k1} -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <4 x i64> - %load = load <4 x i64>, <4 x i64>* %__b - %1 = bitcast <4 x i64> %load to <4 x i64> - %2 = icmp sge <4 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> - %4 = and <4 x i1> %2, %extract.i - %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> - %6 = bitcast <32 x i1> %5 to i32 - ret i32 %6 -} - - -define zeroext i32 @test_vpcmpsgeq_v4i1_v32i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgeq_v4i1_v32i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpbroadcastq (%rdi), %ymm1 -; CHECK-NEXT: vpcmpleq %ymm0, %ymm1, %k0 -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <4 x i64> - %load = load i64, i64* %__b - %vec = insertelement <4 x i64> undef, i64 %load, i32 0 - %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> - %2 = icmp sge <4 x i64> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> - %4 = bitcast <32 x i1> %3 to i32 - ret i32 %4 -} - -define zeroext i32 @test_masked_vpcmpsgeq_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgeq_v4i1_v32i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpbroadcastq (%rsi), %ymm1 -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpleq %ymm0, %ymm1, %k0 {%k1} -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <4 x i64> - %load = load i64, i64* %__b - %vec = insertelement <4 x i64> undef, i64 %load, i32 0 - %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> - %2 = icmp sge <4 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> - %4 = and <4 x i1> %extract.i, %2 - %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> - %6 = bitcast <32 x i1> %5 to i32 - ret i32 %6 -} - - -define zeroext i64 @test_vpcmpsgeq_v4i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgeq_v4i1_v64i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpleq %ymm0, %ymm1, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <4 x i64> - %1 = bitcast <4 x i64> %__b to <4 x i64> - %2 = icmp sge <4 x i64> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_vpcmpsgeq_v4i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgeq_v4i1_v64i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpnltq (%rdi), %ymm0, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <4 x i64> - %load = load <4 x i64>, <4 x i64>* %__b - %1 = bitcast <4 x i64> %load to <4 x i64> - %2 = icmp sge <4 x i64> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_masked_vpcmpsgeq_v4i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgeq_v4i1_v64i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpleq %ymm0, %ymm1, %k0 {%k1} -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <4 x i64> - %1 = bitcast <4 x i64> %__b to <4 x i64> - %2 = icmp sge <4 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> - %4 = and <4 x i1> %2, %extract.i - %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> - %6 = bitcast <64 x i1> %5 to i64 - ret i64 %6 -} - -define zeroext i64 @test_masked_vpcmpsgeq_v4i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgeq_v4i1_v64i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpnltq (%rsi), %ymm0, %k0 {%k1} -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <4 x i64> - %load = load <4 x i64>, <4 x i64>* %__b - %1 = bitcast <4 x i64> %load to <4 x i64> - %2 = icmp sge <4 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> - %4 = and <4 x i1> %2, %extract.i - %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> - %6 = bitcast <64 x i1> %5 to i64 - ret i64 %6 -} - - -define zeroext i64 @test_vpcmpsgeq_v4i1_v64i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgeq_v4i1_v64i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpbroadcastq (%rdi), %ymm1 -; CHECK-NEXT: vpcmpleq %ymm0, %ymm1, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <4 x i64> - %load = load i64, i64* %__b - %vec = insertelement <4 x i64> undef, i64 %load, i32 0 - %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> - %2 = icmp sge <4 x i64> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_masked_vpcmpsgeq_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgeq_v4i1_v64i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpbroadcastq (%rsi), %ymm1 -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpleq %ymm0, %ymm1, %k0 {%k1} -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <4 x i64> - %load = load i64, i64* %__b - %vec = insertelement <4 x i64> undef, i64 %load, i32 0 - %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> - %2 = icmp sge <4 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> - %4 = and <4 x i1> %extract.i, %2 - %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> - %6 = bitcast <64 x i1> %5 to i64 - ret i64 %6 -} - - -define zeroext i16 @test_vpcmpsgeq_v8i1_v16i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgeq_v8i1_v16i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpleq %zmm0, %zmm1, %k0 -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <8 x i64> - %1 = bitcast <8 x i64> %__b to <8 x i64> - %2 = icmp sge <8 x i64> %0, %1 - %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> - %4 = bitcast <16 x i1> %3 to i16 - ret i16 %4 -} - -define zeroext i16 @test_vpcmpsgeq_v8i1_v16i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgeq_v8i1_v16i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpnltq (%rdi), %zmm0, %k0 -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <8 x i64> - %load = load <8 x i64>, <8 x i64>* %__b - %1 = bitcast <8 x i64> %load to <8 x i64> - %2 = icmp sge <8 x i64> %0, %1 - %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> - %4 = bitcast <16 x i1> %3 to i16 - ret i16 %4 -} - -define zeroext i16 @test_masked_vpcmpsgeq_v8i1_v16i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgeq_v8i1_v16i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <8 x i64> - %1 = bitcast <8 x i64> %__b to <8 x i64> - %2 = icmp sge <8 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %4 = and <8 x i1> %2, %3 - %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> - %6 = bitcast <16 x i1> %5 to i16 - ret i16 %6 -} - -define zeroext i16 @test_masked_vpcmpsgeq_v8i1_v16i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgeq_v8i1_v16i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpnltq (%rsi), %zmm0, %k0 {%k1} -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <8 x i64> - %load = load <8 x i64>, <8 x i64>* %__b - %1 = bitcast <8 x i64> %load to <8 x i64> - %2 = icmp sge <8 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %4 = and <8 x i1> %2, %3 - %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> - %6 = bitcast <16 x i1> %5 to i16 - ret i16 %6 -} - - -define zeroext i16 @test_vpcmpsgeq_v8i1_v16i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgeq_v8i1_v16i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpbroadcastq (%rdi), %zmm1 -; CHECK-NEXT: vpcmpleq %zmm0, %zmm1, %k0 -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <8 x i64> - %load = load i64, i64* %__b - %vec = insertelement <8 x i64> undef, i64 %load, i32 0 - %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> - %2 = icmp sge <8 x i64> %0, %1 - %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> - %4 = bitcast <16 x i1> %3 to i16 - ret i16 %4 -} - -define zeroext i16 @test_masked_vpcmpsgeq_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgeq_v8i1_v16i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpbroadcastq (%rsi), %zmm1 -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <8 x i64> - %load = load i64, i64* %__b - %vec = insertelement <8 x i64> undef, i64 %load, i32 0 - %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> - %2 = icmp sge <8 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %4 = and <8 x i1> %3, %2 - %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> - %6 = bitcast <16 x i1> %5 to i16 - ret i16 %6 -} - - -define zeroext i32 @test_vpcmpsgeq_v8i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgeq_v8i1_v32i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpleq %zmm0, %zmm1, %k0 -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <8 x i64> - %1 = bitcast <8 x i64> %__b to <8 x i64> - %2 = icmp sge <8 x i64> %0, %1 - %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> - %4 = bitcast <32 x i1> %3 to i32 - ret i32 %4 -} - -define zeroext i32 @test_vpcmpsgeq_v8i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgeq_v8i1_v32i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpnltq (%rdi), %zmm0, %k0 -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <8 x i64> - %load = load <8 x i64>, <8 x i64>* %__b - %1 = bitcast <8 x i64> %load to <8 x i64> - %2 = icmp sge <8 x i64> %0, %1 - %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> - %4 = bitcast <32 x i1> %3 to i32 - ret i32 %4 -} - -define zeroext i32 @test_masked_vpcmpsgeq_v8i1_v32i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgeq_v8i1_v32i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <8 x i64> - %1 = bitcast <8 x i64> %__b to <8 x i64> - %2 = icmp sge <8 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %4 = and <8 x i1> %2, %3 - %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> - %6 = bitcast <32 x i1> %5 to i32 - ret i32 %6 -} - -define zeroext i32 @test_masked_vpcmpsgeq_v8i1_v32i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgeq_v8i1_v32i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpnltq (%rsi), %zmm0, %k0 {%k1} -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <8 x i64> - %load = load <8 x i64>, <8 x i64>* %__b - %1 = bitcast <8 x i64> %load to <8 x i64> - %2 = icmp sge <8 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %4 = and <8 x i1> %2, %3 - %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> - %6 = bitcast <32 x i1> %5 to i32 - ret i32 %6 -} - - -define zeroext i32 @test_vpcmpsgeq_v8i1_v32i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgeq_v8i1_v32i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpbroadcastq (%rdi), %zmm1 -; CHECK-NEXT: vpcmpleq %zmm0, %zmm1, %k0 -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <8 x i64> - %load = load i64, i64* %__b - %vec = insertelement <8 x i64> undef, i64 %load, i32 0 - %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> - %2 = icmp sge <8 x i64> %0, %1 - %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> - %4 = bitcast <32 x i1> %3 to i32 - ret i32 %4 -} - -define zeroext i32 @test_masked_vpcmpsgeq_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgeq_v8i1_v32i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpbroadcastq (%rsi), %zmm1 -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <8 x i64> - %load = load i64, i64* %__b - %vec = insertelement <8 x i64> undef, i64 %load, i32 0 - %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> - %2 = icmp sge <8 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %4 = and <8 x i1> %3, %2 - %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> - %6 = bitcast <32 x i1> %5 to i32 - ret i32 %6 -} - - -define zeroext i64 @test_vpcmpsgeq_v8i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgeq_v8i1_v64i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpleq %zmm0, %zmm1, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <8 x i64> - %1 = bitcast <8 x i64> %__b to <8 x i64> - %2 = icmp sge <8 x i64> %0, %1 - %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_vpcmpsgeq_v8i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgeq_v8i1_v64i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpnltq (%rdi), %zmm0, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <8 x i64> - %load = load <8 x i64>, <8 x i64>* %__b - %1 = bitcast <8 x i64> %load to <8 x i64> - %2 = icmp sge <8 x i64> %0, %1 - %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_masked_vpcmpsgeq_v8i1_v64i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgeq_v8i1_v64i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <8 x i64> - %1 = bitcast <8 x i64> %__b to <8 x i64> - %2 = icmp sge <8 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %4 = and <8 x i1> %2, %3 - %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> - %6 = bitcast <64 x i1> %5 to i64 - ret i64 %6 -} - -define zeroext i64 @test_masked_vpcmpsgeq_v8i1_v64i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgeq_v8i1_v64i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpnltq (%rsi), %zmm0, %k0 {%k1} -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <8 x i64> - %load = load <8 x i64>, <8 x i64>* %__b - %1 = bitcast <8 x i64> %load to <8 x i64> - %2 = icmp sge <8 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %4 = and <8 x i1> %2, %3 - %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> - %6 = bitcast <64 x i1> %5 to i64 - ret i64 %6 -} - - -define zeroext i64 @test_vpcmpsgeq_v8i1_v64i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpsgeq_v8i1_v64i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpbroadcastq (%rdi), %zmm1 -; CHECK-NEXT: vpcmpleq %zmm0, %zmm1, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <8 x i64> - %load = load i64, i64* %__b - %vec = insertelement <8 x i64> undef, i64 %load, i32 0 - %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> - %2 = icmp sge <8 x i64> %0, %1 - %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_masked_vpcmpsgeq_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpsgeq_v8i1_v64i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpbroadcastq (%rsi), %zmm1 -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <8 x i64> - %load = load i64, i64* %__b - %vec = insertelement <8 x i64> undef, i64 %load, i32 0 - %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> - %2 = icmp sge <8 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %4 = and <8 x i1> %3, %2 - %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> - %6 = bitcast <64 x i1> %5 to i64 - ret i64 %6 -} - - -define zeroext i32 @test_vpcmpultb_v16i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpultb_v16i1_v32i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpltub %xmm1, %xmm0, %k0 -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <16 x i8> - %1 = bitcast <2 x i64> %__b to <16 x i8> - %2 = icmp ult <16 x i8> %0, %1 - %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> - %4 = bitcast <32 x i1> %3 to i32 - ret i32 %4 -} - -define zeroext i32 @test_vpcmpultb_v16i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpultb_v16i1_v32i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpltub (%rdi), %xmm0, %k0 -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <16 x i8> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <16 x i8> - %2 = icmp ult <16 x i8> %0, %1 - %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> - %4 = bitcast <32 x i1> %3 to i32 - ret i32 %4 -} - -define zeroext i32 @test_masked_vpcmpultb_v16i1_v32i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpultb_v16i1_v32i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpltub %xmm1, %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <16 x i8> - %1 = bitcast <2 x i64> %__b to <16 x i8> - %2 = icmp ult <16 x i8> %0, %1 - %3 = bitcast i16 %__u to <16 x i1> - %4 = and <16 x i1> %2, %3 - %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> - %6 = bitcast <32 x i1> %5 to i32 - ret i32 %6 -} - -define zeroext i32 @test_masked_vpcmpultb_v16i1_v32i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpultb_v16i1_v32i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpltub (%rsi), %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <16 x i8> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <16 x i8> - %2 = icmp ult <16 x i8> %0, %1 - %3 = bitcast i16 %__u to <16 x i1> - %4 = and <16 x i1> %2, %3 - %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> - %6 = bitcast <32 x i1> %5 to i32 - ret i32 %6 -} - - -define zeroext i64 @test_vpcmpultb_v16i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpultb_v16i1_v64i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpltub %xmm1, %xmm0, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <16 x i8> - %1 = bitcast <2 x i64> %__b to <16 x i8> - %2 = icmp ult <16 x i8> %0, %1 - %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_vpcmpultb_v16i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpultb_v16i1_v64i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpltub (%rdi), %xmm0, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <16 x i8> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <16 x i8> - %2 = icmp ult <16 x i8> %0, %1 - %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_masked_vpcmpultb_v16i1_v64i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpultb_v16i1_v64i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpltub %xmm1, %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <16 x i8> - %1 = bitcast <2 x i64> %__b to <16 x i8> - %2 = icmp ult <16 x i8> %0, %1 - %3 = bitcast i16 %__u to <16 x i1> - %4 = and <16 x i1> %2, %3 - %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> - %6 = bitcast <64 x i1> %5 to i64 - ret i64 %6 -} - -define zeroext i64 @test_masked_vpcmpultb_v16i1_v64i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpultb_v16i1_v64i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpltub (%rsi), %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <16 x i8> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <16 x i8> - %2 = icmp ult <16 x i8> %0, %1 - %3 = bitcast i16 %__u to <16 x i1> - %4 = and <16 x i1> %2, %3 - %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> - %6 = bitcast <64 x i1> %5 to i64 - ret i64 %6 -} - - -define zeroext i64 @test_vpcmpultb_v32i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpultb_v32i1_v64i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpltub %ymm1, %ymm0, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <32 x i8> - %1 = bitcast <4 x i64> %__b to <32 x i8> - %2 = icmp ult <32 x i8> %0, %1 - %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_vpcmpultb_v32i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpultb_v32i1_v64i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpltub (%rdi), %ymm0, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <32 x i8> - %load = load <4 x i64>, <4 x i64>* %__b - %1 = bitcast <4 x i64> %load to <32 x i8> - %2 = icmp ult <32 x i8> %0, %1 - %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_masked_vpcmpultb_v32i1_v64i1_mask(i32 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpultb_v32i1_v64i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpltub %ymm1, %ymm0, %k0 {%k1} -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <32 x i8> - %1 = bitcast <4 x i64> %__b to <32 x i8> - %2 = icmp ult <32 x i8> %0, %1 - %3 = bitcast i32 %__u to <32 x i1> - %4 = and <32 x i1> %2, %3 - %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> - %6 = bitcast <64 x i1> %5 to i64 - ret i64 %6 -} - -define zeroext i64 @test_masked_vpcmpultb_v32i1_v64i1_mask_mem(i32 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpultb_v32i1_v64i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpltub (%rsi), %ymm0, %k0 {%k1} -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <32 x i8> - %load = load <4 x i64>, <4 x i64>* %__b - %1 = bitcast <4 x i64> %load to <32 x i8> - %2 = icmp ult <32 x i8> %0, %1 - %3 = bitcast i32 %__u to <32 x i1> - %4 = and <32 x i1> %2, %3 - %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> - %6 = bitcast <64 x i1> %5 to i64 - ret i64 %6 -} - - -define zeroext i16 @test_vpcmpultw_v8i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpultw_v8i1_v16i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpltuw %xmm1, %xmm0, %k0 -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <8 x i16> - %1 = bitcast <2 x i64> %__b to <8 x i16> - %2 = icmp ult <8 x i16> %0, %1 - %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> - %4 = bitcast <16 x i1> %3 to i16 - ret i16 %4 -} - -define zeroext i16 @test_vpcmpultw_v8i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpultw_v8i1_v16i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpltuw (%rdi), %xmm0, %k0 -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <8 x i16> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <8 x i16> - %2 = icmp ult <8 x i16> %0, %1 - %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> - %4 = bitcast <16 x i1> %3 to i16 - ret i16 %4 -} - -define zeroext i16 @test_masked_vpcmpultw_v8i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpultw_v8i1_v16i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpltuw %xmm1, %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <8 x i16> - %1 = bitcast <2 x i64> %__b to <8 x i16> - %2 = icmp ult <8 x i16> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %4 = and <8 x i1> %2, %3 - %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> - %6 = bitcast <16 x i1> %5 to i16 - ret i16 %6 -} - -define zeroext i16 @test_masked_vpcmpultw_v8i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpultw_v8i1_v16i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpltuw (%rsi), %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <8 x i16> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <8 x i16> - %2 = icmp ult <8 x i16> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %4 = and <8 x i1> %2, %3 - %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> - %6 = bitcast <16 x i1> %5 to i16 - ret i16 %6 -} - - -define zeroext i32 @test_vpcmpultw_v8i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpultw_v8i1_v32i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpltuw %xmm1, %xmm0, %k0 -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <8 x i16> - %1 = bitcast <2 x i64> %__b to <8 x i16> - %2 = icmp ult <8 x i16> %0, %1 - %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> - %4 = bitcast <32 x i1> %3 to i32 - ret i32 %4 -} - -define zeroext i32 @test_vpcmpultw_v8i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpultw_v8i1_v32i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpltuw (%rdi), %xmm0, %k0 -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <8 x i16> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <8 x i16> - %2 = icmp ult <8 x i16> %0, %1 - %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> - %4 = bitcast <32 x i1> %3 to i32 - ret i32 %4 -} - -define zeroext i32 @test_masked_vpcmpultw_v8i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpultw_v8i1_v32i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpltuw %xmm1, %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <8 x i16> - %1 = bitcast <2 x i64> %__b to <8 x i16> - %2 = icmp ult <8 x i16> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %4 = and <8 x i1> %2, %3 - %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> - %6 = bitcast <32 x i1> %5 to i32 - ret i32 %6 -} - -define zeroext i32 @test_masked_vpcmpultw_v8i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpultw_v8i1_v32i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpltuw (%rsi), %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <8 x i16> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <8 x i16> - %2 = icmp ult <8 x i16> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %4 = and <8 x i1> %2, %3 - %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> - %6 = bitcast <32 x i1> %5 to i32 - ret i32 %6 -} - - -define zeroext i64 @test_vpcmpultw_v8i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpultw_v8i1_v64i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpltuw %xmm1, %xmm0, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <8 x i16> - %1 = bitcast <2 x i64> %__b to <8 x i16> - %2 = icmp ult <8 x i16> %0, %1 - %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_vpcmpultw_v8i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpultw_v8i1_v64i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpltuw (%rdi), %xmm0, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <8 x i16> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <8 x i16> - %2 = icmp ult <8 x i16> %0, %1 - %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_masked_vpcmpultw_v8i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpultw_v8i1_v64i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpltuw %xmm1, %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <8 x i16> - %1 = bitcast <2 x i64> %__b to <8 x i16> - %2 = icmp ult <8 x i16> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %4 = and <8 x i1> %2, %3 - %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> - %6 = bitcast <64 x i1> %5 to i64 - ret i64 %6 -} - -define zeroext i64 @test_masked_vpcmpultw_v8i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpultw_v8i1_v64i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpltuw (%rsi), %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <8 x i16> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <8 x i16> - %2 = icmp ult <8 x i16> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %4 = and <8 x i1> %2, %3 - %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> - %6 = bitcast <64 x i1> %5 to i64 - ret i64 %6 -} - - -define zeroext i32 @test_vpcmpultw_v16i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpultw_v16i1_v32i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpltuw %ymm1, %ymm0, %k0 -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <16 x i16> - %1 = bitcast <4 x i64> %__b to <16 x i16> - %2 = icmp ult <16 x i16> %0, %1 - %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> - %4 = bitcast <32 x i1> %3 to i32 - ret i32 %4 -} - -define zeroext i32 @test_vpcmpultw_v16i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpultw_v16i1_v32i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpltuw (%rdi), %ymm0, %k0 -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <16 x i16> - %load = load <4 x i64>, <4 x i64>* %__b - %1 = bitcast <4 x i64> %load to <16 x i16> - %2 = icmp ult <16 x i16> %0, %1 - %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> - %4 = bitcast <32 x i1> %3 to i32 - ret i32 %4 -} - -define zeroext i32 @test_masked_vpcmpultw_v16i1_v32i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpultw_v16i1_v32i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpltuw %ymm1, %ymm0, %k0 {%k1} -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <16 x i16> - %1 = bitcast <4 x i64> %__b to <16 x i16> - %2 = icmp ult <16 x i16> %0, %1 - %3 = bitcast i16 %__u to <16 x i1> - %4 = and <16 x i1> %2, %3 - %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> - %6 = bitcast <32 x i1> %5 to i32 - ret i32 %6 -} - -define zeroext i32 @test_masked_vpcmpultw_v16i1_v32i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpultw_v16i1_v32i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpltuw (%rsi), %ymm0, %k0 {%k1} -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <16 x i16> - %load = load <4 x i64>, <4 x i64>* %__b - %1 = bitcast <4 x i64> %load to <16 x i16> - %2 = icmp ult <16 x i16> %0, %1 - %3 = bitcast i16 %__u to <16 x i1> - %4 = and <16 x i1> %2, %3 - %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> - %6 = bitcast <32 x i1> %5 to i32 - ret i32 %6 -} - - -define zeroext i64 @test_vpcmpultw_v16i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpultw_v16i1_v64i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpltuw %ymm1, %ymm0, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <16 x i16> - %1 = bitcast <4 x i64> %__b to <16 x i16> - %2 = icmp ult <16 x i16> %0, %1 - %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_vpcmpultw_v16i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpultw_v16i1_v64i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpltuw (%rdi), %ymm0, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <16 x i16> - %load = load <4 x i64>, <4 x i64>* %__b - %1 = bitcast <4 x i64> %load to <16 x i16> - %2 = icmp ult <16 x i16> %0, %1 - %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_masked_vpcmpultw_v16i1_v64i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpultw_v16i1_v64i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpltuw %ymm1, %ymm0, %k0 {%k1} -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <16 x i16> - %1 = bitcast <4 x i64> %__b to <16 x i16> - %2 = icmp ult <16 x i16> %0, %1 - %3 = bitcast i16 %__u to <16 x i1> - %4 = and <16 x i1> %2, %3 - %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> - %6 = bitcast <64 x i1> %5 to i64 - ret i64 %6 -} - -define zeroext i64 @test_masked_vpcmpultw_v16i1_v64i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpultw_v16i1_v64i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpltuw (%rsi), %ymm0, %k0 {%k1} -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <16 x i16> - %load = load <4 x i64>, <4 x i64>* %__b - %1 = bitcast <4 x i64> %load to <16 x i16> - %2 = icmp ult <16 x i16> %0, %1 - %3 = bitcast i16 %__u to <16 x i1> - %4 = and <16 x i1> %2, %3 - %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> - %6 = bitcast <64 x i1> %5 to i64 - ret i64 %6 -} - - -define zeroext i64 @test_vpcmpultw_v32i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpultw_v32i1_v64i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpltuw %zmm1, %zmm0, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <32 x i16> - %1 = bitcast <8 x i64> %__b to <32 x i16> - %2 = icmp ult <32 x i16> %0, %1 - %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_vpcmpultw_v32i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpultw_v32i1_v64i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpltuw (%rdi), %zmm0, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <32 x i16> - %load = load <8 x i64>, <8 x i64>* %__b - %1 = bitcast <8 x i64> %load to <32 x i16> - %2 = icmp ult <32 x i16> %0, %1 - %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_masked_vpcmpultw_v32i1_v64i1_mask(i32 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpultw_v32i1_v64i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpltuw %zmm1, %zmm0, %k0 {%k1} -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <32 x i16> - %1 = bitcast <8 x i64> %__b to <32 x i16> - %2 = icmp ult <32 x i16> %0, %1 - %3 = bitcast i32 %__u to <32 x i1> - %4 = and <32 x i1> %2, %3 - %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> - %6 = bitcast <64 x i1> %5 to i64 - ret i64 %6 -} - -define zeroext i64 @test_masked_vpcmpultw_v32i1_v64i1_mask_mem(i32 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpultw_v32i1_v64i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpltuw (%rsi), %zmm0, %k0 {%k1} -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <32 x i16> - %load = load <8 x i64>, <8 x i64>* %__b - %1 = bitcast <8 x i64> %load to <32 x i16> - %2 = icmp ult <32 x i16> %0, %1 - %3 = bitcast i32 %__u to <32 x i1> - %4 = and <32 x i1> %2, %3 - %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> - %6 = bitcast <64 x i1> %5 to i64 - ret i64 %6 -} - - -define zeroext i8 @test_vpcmpultd_v4i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpultd_v4i1_v8i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpltud %xmm1, %xmm0, %k0 -; CHECK-NEXT: kmovb %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <4 x i32> - %1 = bitcast <2 x i64> %__b to <4 x i32> - %2 = icmp ult <4 x i32> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> - %4 = bitcast <8 x i1> %3 to i8 - ret i8 %4 -} - -define zeroext i8 @test_vpcmpultd_v4i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpultd_v4i1_v8i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpltud (%rdi), %xmm0, %k0 -; CHECK-NEXT: kmovb %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <4 x i32> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <4 x i32> - %2 = icmp ult <4 x i32> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> - %4 = bitcast <8 x i1> %3 to i8 - ret i8 %4 -} - -define zeroext i8 @test_masked_vpcmpultd_v4i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpultd_v4i1_v8i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpltud %xmm1, %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovb %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <4 x i32> - %1 = bitcast <2 x i64> %__b to <4 x i32> - %2 = icmp ult <4 x i32> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> - %4 = and <4 x i1> %2, %extract.i - %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> - %6 = bitcast <8 x i1> %5 to i8 - ret i8 %6 -} - -define zeroext i8 @test_masked_vpcmpultd_v4i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpultd_v4i1_v8i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpltud (%rsi), %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovb %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <4 x i32> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <4 x i32> - %2 = icmp ult <4 x i32> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> - %4 = and <4 x i1> %2, %extract.i - %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> - %6 = bitcast <8 x i1> %5 to i8 - ret i8 %6 -} - - -define zeroext i8 @test_vpcmpultd_v4i1_v8i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpultd_v4i1_v8i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpltud (%rdi){1to4}, %xmm0, %k0 -; CHECK-NEXT: kmovb %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <4 x i32> - %load = load i32, i32* %__b - %vec = insertelement <4 x i32> undef, i32 %load, i32 0 - %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> - %2 = icmp ult <4 x i32> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> - %4 = bitcast <8 x i1> %3 to i8 - ret i8 %4 -} - -define zeroext i8 @test_masked_vpcmpultd_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpultd_v4i1_v8i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpltud (%rsi){1to4}, %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovb %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <4 x i32> - %load = load i32, i32* %__b - %vec = insertelement <4 x i32> undef, i32 %load, i32 0 - %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> - %2 = icmp ult <4 x i32> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> - %4 = and <4 x i1> %extract.i, %2 - %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> - %6 = bitcast <8 x i1> %5 to i8 - ret i8 %6 -} - - -define zeroext i16 @test_vpcmpultd_v4i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpultd_v4i1_v16i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpltud %xmm1, %xmm0, %k0 -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <4 x i32> - %1 = bitcast <2 x i64> %__b to <4 x i32> - %2 = icmp ult <4 x i32> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> - %4 = bitcast <16 x i1> %3 to i16 - ret i16 %4 -} - -define zeroext i16 @test_vpcmpultd_v4i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpultd_v4i1_v16i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpltud (%rdi), %xmm0, %k0 -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <4 x i32> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <4 x i32> - %2 = icmp ult <4 x i32> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> - %4 = bitcast <16 x i1> %3 to i16 - ret i16 %4 -} - -define zeroext i16 @test_masked_vpcmpultd_v4i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpultd_v4i1_v16i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpltud %xmm1, %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <4 x i32> - %1 = bitcast <2 x i64> %__b to <4 x i32> - %2 = icmp ult <4 x i32> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> - %4 = and <4 x i1> %2, %extract.i - %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> - %6 = bitcast <16 x i1> %5 to i16 - ret i16 %6 -} - -define zeroext i16 @test_masked_vpcmpultd_v4i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpultd_v4i1_v16i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpltud (%rsi), %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <4 x i32> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <4 x i32> - %2 = icmp ult <4 x i32> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> - %4 = and <4 x i1> %2, %extract.i - %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> - %6 = bitcast <16 x i1> %5 to i16 - ret i16 %6 -} - - -define zeroext i16 @test_vpcmpultd_v4i1_v16i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpultd_v4i1_v16i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpltud (%rdi){1to4}, %xmm0, %k0 -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <4 x i32> - %load = load i32, i32* %__b - %vec = insertelement <4 x i32> undef, i32 %load, i32 0 - %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> - %2 = icmp ult <4 x i32> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> - %4 = bitcast <16 x i1> %3 to i16 - ret i16 %4 -} - -define zeroext i16 @test_masked_vpcmpultd_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpultd_v4i1_v16i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpltud (%rsi){1to4}, %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <4 x i32> - %load = load i32, i32* %__b - %vec = insertelement <4 x i32> undef, i32 %load, i32 0 - %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> - %2 = icmp ult <4 x i32> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> - %4 = and <4 x i1> %extract.i, %2 - %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> - %6 = bitcast <16 x i1> %5 to i16 - ret i16 %6 -} - - -define zeroext i32 @test_vpcmpultd_v4i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpultd_v4i1_v32i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpltud %xmm1, %xmm0, %k0 -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <4 x i32> - %1 = bitcast <2 x i64> %__b to <4 x i32> - %2 = icmp ult <4 x i32> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> - %4 = bitcast <32 x i1> %3 to i32 - ret i32 %4 -} - -define zeroext i32 @test_vpcmpultd_v4i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpultd_v4i1_v32i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpltud (%rdi), %xmm0, %k0 -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <4 x i32> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <4 x i32> - %2 = icmp ult <4 x i32> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> - %4 = bitcast <32 x i1> %3 to i32 - ret i32 %4 -} - -define zeroext i32 @test_masked_vpcmpultd_v4i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpultd_v4i1_v32i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpltud %xmm1, %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <4 x i32> - %1 = bitcast <2 x i64> %__b to <4 x i32> - %2 = icmp ult <4 x i32> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> - %4 = and <4 x i1> %2, %extract.i - %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> - %6 = bitcast <32 x i1> %5 to i32 - ret i32 %6 -} - -define zeroext i32 @test_masked_vpcmpultd_v4i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpultd_v4i1_v32i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpltud (%rsi), %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <4 x i32> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <4 x i32> - %2 = icmp ult <4 x i32> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> - %4 = and <4 x i1> %2, %extract.i - %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> - %6 = bitcast <32 x i1> %5 to i32 - ret i32 %6 -} - - -define zeroext i32 @test_vpcmpultd_v4i1_v32i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpultd_v4i1_v32i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpltud (%rdi){1to4}, %xmm0, %k0 -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <4 x i32> - %load = load i32, i32* %__b - %vec = insertelement <4 x i32> undef, i32 %load, i32 0 - %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> - %2 = icmp ult <4 x i32> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> - %4 = bitcast <32 x i1> %3 to i32 - ret i32 %4 -} - -define zeroext i32 @test_masked_vpcmpultd_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpultd_v4i1_v32i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpltud (%rsi){1to4}, %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <4 x i32> - %load = load i32, i32* %__b - %vec = insertelement <4 x i32> undef, i32 %load, i32 0 - %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> - %2 = icmp ult <4 x i32> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> - %4 = and <4 x i1> %extract.i, %2 - %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> - %6 = bitcast <32 x i1> %5 to i32 - ret i32 %6 -} - - -define zeroext i64 @test_vpcmpultd_v4i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpultd_v4i1_v64i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpltud %xmm1, %xmm0, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <4 x i32> - %1 = bitcast <2 x i64> %__b to <4 x i32> - %2 = icmp ult <4 x i32> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_vpcmpultd_v4i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpultd_v4i1_v64i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpltud (%rdi), %xmm0, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <4 x i32> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <4 x i32> - %2 = icmp ult <4 x i32> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_masked_vpcmpultd_v4i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpultd_v4i1_v64i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpltud %xmm1, %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <4 x i32> - %1 = bitcast <2 x i64> %__b to <4 x i32> - %2 = icmp ult <4 x i32> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> - %4 = and <4 x i1> %2, %extract.i - %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> - %6 = bitcast <64 x i1> %5 to i64 - ret i64 %6 -} - -define zeroext i64 @test_masked_vpcmpultd_v4i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpultd_v4i1_v64i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpltud (%rsi), %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <4 x i32> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <4 x i32> - %2 = icmp ult <4 x i32> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> - %4 = and <4 x i1> %2, %extract.i - %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> - %6 = bitcast <64 x i1> %5 to i64 - ret i64 %6 -} - - -define zeroext i64 @test_vpcmpultd_v4i1_v64i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpultd_v4i1_v64i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpltud (%rdi){1to4}, %xmm0, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <4 x i32> - %load = load i32, i32* %__b - %vec = insertelement <4 x i32> undef, i32 %load, i32 0 - %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> - %2 = icmp ult <4 x i32> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_masked_vpcmpultd_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpultd_v4i1_v64i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpltud (%rsi){1to4}, %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <4 x i32> - %load = load i32, i32* %__b - %vec = insertelement <4 x i32> undef, i32 %load, i32 0 - %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> - %2 = icmp ult <4 x i32> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> - %4 = and <4 x i1> %extract.i, %2 - %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> - %6 = bitcast <64 x i1> %5 to i64 - ret i64 %6 -} - - -define zeroext i16 @test_vpcmpultd_v8i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpultd_v8i1_v16i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpltud %ymm1, %ymm0, %k0 -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -; -; NoVLX-LABEL: test_vpcmpultd_v8i1_v16i1_mask: -; NoVLX: ## BB#0: ## %entry -; NoVLX-NEXT: ## kill: %YMM1 %YMM1 %ZMM1 -; NoVLX-NEXT: ## kill: %YMM0 %YMM0 %ZMM0 -; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftlw $8, %k0, %k0 -; NoVLX-NEXT: kshiftrw $8, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <8 x i32> - %1 = bitcast <4 x i64> %__b to <8 x i32> - %2 = icmp ult <8 x i32> %0, %1 - %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> - %4 = bitcast <16 x i1> %3 to i16 - ret i16 %4 -} - -define zeroext i16 @test_vpcmpultd_v8i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpultd_v8i1_v16i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpltud (%rdi), %ymm0, %k0 -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -; -; NoVLX-LABEL: test_vpcmpultd_v8i1_v16i1_mask_mem: -; NoVLX: ## BB#0: ## %entry -; NoVLX-NEXT: ## kill: %YMM0 %YMM0 %ZMM0 -; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 -; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftlw $8, %k0, %k0 -; NoVLX-NEXT: kshiftrw $8, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <8 x i32> - %load = load <4 x i64>, <4 x i64>* %__b - %1 = bitcast <4 x i64> %load to <8 x i32> - %2 = icmp ult <8 x i32> %0, %1 - %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> - %4 = bitcast <16 x i1> %3 to i16 - ret i16 %4 -} - -define zeroext i16 @test_masked_vpcmpultd_v8i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpultd_v8i1_v16i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpltud %ymm1, %ymm0, %k0 {%k1} -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -; -; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v16i1_mask: -; NoVLX: ## BB#0: ## %entry -; NoVLX-NEXT: ## kill: %YMM1 %YMM1 %ZMM1 -; NoVLX-NEXT: ## kill: %YMM0 %YMM0 %ZMM0 -; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftlw $8, %k0, %k0 -; NoVLX-NEXT: kshiftrw $8, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <8 x i32> - %1 = bitcast <4 x i64> %__b to <8 x i32> - %2 = icmp ult <8 x i32> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %4 = and <8 x i1> %2, %3 - %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> - %6 = bitcast <16 x i1> %5 to i16 - ret i16 %6 -} - -define zeroext i16 @test_masked_vpcmpultd_v8i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpultd_v8i1_v16i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpltud (%rsi), %ymm0, %k0 {%k1} -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -; -; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v16i1_mask_mem: -; NoVLX: ## BB#0: ## %entry -; NoVLX-NEXT: ## kill: %YMM0 %YMM0 %ZMM0 -; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 -; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftlw $8, %k0, %k0 -; NoVLX-NEXT: kshiftrw $8, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <8 x i32> - %load = load <4 x i64>, <4 x i64>* %__b - %1 = bitcast <4 x i64> %load to <8 x i32> - %2 = icmp ult <8 x i32> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %4 = and <8 x i1> %2, %3 - %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> - %6 = bitcast <16 x i1> %5 to i16 - ret i16 %6 -} - - -define zeroext i16 @test_vpcmpultd_v8i1_v16i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpultd_v8i1_v16i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpltud (%rdi){1to8}, %ymm0, %k0 -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -; -; NoVLX-LABEL: test_vpcmpultd_v8i1_v16i1_mask_mem_b: -; NoVLX: ## BB#0: ## %entry -; NoVLX-NEXT: ## kill: %YMM0 %YMM0 %ZMM0 -; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1 -; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftlw $8, %k0, %k0 -; NoVLX-NEXT: kshiftrw $8, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <8 x i32> - %load = load i32, i32* %__b - %vec = insertelement <8 x i32> undef, i32 %load, i32 0 - %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> - %2 = icmp ult <8 x i32> %0, %1 - %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> - %4 = bitcast <16 x i1> %3 to i16 - ret i16 %4 -} - -define zeroext i16 @test_masked_vpcmpultd_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpultd_v8i1_v16i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpltud (%rsi){1to8}, %ymm0, %k0 {%k1} -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -; -; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v16i1_mask_mem_b: -; NoVLX: ## BB#0: ## %entry -; NoVLX-NEXT: ## kill: %YMM0 %YMM0 %ZMM0 -; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1 -; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftlw $8, %k0, %k0 -; NoVLX-NEXT: kshiftrw $8, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <8 x i32> - %load = load i32, i32* %__b - %vec = insertelement <8 x i32> undef, i32 %load, i32 0 - %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> - %2 = icmp ult <8 x i32> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %4 = and <8 x i1> %3, %2 - %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> - %6 = bitcast <16 x i1> %5 to i16 - ret i16 %6 -} - - -define zeroext i32 @test_vpcmpultd_v8i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpultd_v8i1_v32i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpltud %ymm1, %ymm0, %k0 -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <8 x i32> - %1 = bitcast <4 x i64> %__b to <8 x i32> - %2 = icmp ult <8 x i32> %0, %1 - %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> - %4 = bitcast <32 x i1> %3 to i32 - ret i32 %4 -} - -define zeroext i32 @test_vpcmpultd_v8i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpultd_v8i1_v32i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpltud (%rdi), %ymm0, %k0 -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <8 x i32> - %load = load <4 x i64>, <4 x i64>* %__b - %1 = bitcast <4 x i64> %load to <8 x i32> - %2 = icmp ult <8 x i32> %0, %1 - %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> - %4 = bitcast <32 x i1> %3 to i32 - ret i32 %4 -} - -define zeroext i32 @test_masked_vpcmpultd_v8i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpultd_v8i1_v32i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpltud %ymm1, %ymm0, %k0 {%k1} -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <8 x i32> - %1 = bitcast <4 x i64> %__b to <8 x i32> - %2 = icmp ult <8 x i32> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %4 = and <8 x i1> %2, %3 - %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> - %6 = bitcast <32 x i1> %5 to i32 - ret i32 %6 -} - -define zeroext i32 @test_masked_vpcmpultd_v8i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpultd_v8i1_v32i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpltud (%rsi), %ymm0, %k0 {%k1} -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <8 x i32> - %load = load <4 x i64>, <4 x i64>* %__b - %1 = bitcast <4 x i64> %load to <8 x i32> - %2 = icmp ult <8 x i32> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %4 = and <8 x i1> %2, %3 - %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> - %6 = bitcast <32 x i1> %5 to i32 - ret i32 %6 -} - - -define zeroext i32 @test_vpcmpultd_v8i1_v32i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpultd_v8i1_v32i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpltud (%rdi){1to8}, %ymm0, %k0 -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <8 x i32> - %load = load i32, i32* %__b - %vec = insertelement <8 x i32> undef, i32 %load, i32 0 - %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> - %2 = icmp ult <8 x i32> %0, %1 - %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> - %4 = bitcast <32 x i1> %3 to i32 - ret i32 %4 -} - -define zeroext i32 @test_masked_vpcmpultd_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpultd_v8i1_v32i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpltud (%rsi){1to8}, %ymm0, %k0 {%k1} -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <8 x i32> - %load = load i32, i32* %__b - %vec = insertelement <8 x i32> undef, i32 %load, i32 0 - %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> - %2 = icmp ult <8 x i32> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %4 = and <8 x i1> %3, %2 - %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> - %6 = bitcast <32 x i1> %5 to i32 - ret i32 %6 -} - - -define zeroext i64 @test_vpcmpultd_v8i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpultd_v8i1_v64i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpltud %ymm1, %ymm0, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <8 x i32> - %1 = bitcast <4 x i64> %__b to <8 x i32> - %2 = icmp ult <8 x i32> %0, %1 - %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_vpcmpultd_v8i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpultd_v8i1_v64i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpltud (%rdi), %ymm0, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <8 x i32> - %load = load <4 x i64>, <4 x i64>* %__b - %1 = bitcast <4 x i64> %load to <8 x i32> - %2 = icmp ult <8 x i32> %0, %1 - %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_masked_vpcmpultd_v8i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpultd_v8i1_v64i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpltud %ymm1, %ymm0, %k0 {%k1} -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <8 x i32> - %1 = bitcast <4 x i64> %__b to <8 x i32> - %2 = icmp ult <8 x i32> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %4 = and <8 x i1> %2, %3 - %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> - %6 = bitcast <64 x i1> %5 to i64 - ret i64 %6 -} - -define zeroext i64 @test_masked_vpcmpultd_v8i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpultd_v8i1_v64i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpltud (%rsi), %ymm0, %k0 {%k1} -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <8 x i32> - %load = load <4 x i64>, <4 x i64>* %__b - %1 = bitcast <4 x i64> %load to <8 x i32> - %2 = icmp ult <8 x i32> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %4 = and <8 x i1> %2, %3 - %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> - %6 = bitcast <64 x i1> %5 to i64 - ret i64 %6 -} - - -define zeroext i64 @test_vpcmpultd_v8i1_v64i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpultd_v8i1_v64i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpltud (%rdi){1to8}, %ymm0, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <8 x i32> - %load = load i32, i32* %__b - %vec = insertelement <8 x i32> undef, i32 %load, i32 0 - %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> - %2 = icmp ult <8 x i32> %0, %1 - %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_masked_vpcmpultd_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpultd_v8i1_v64i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpltud (%rsi){1to8}, %ymm0, %k0 {%k1} -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <8 x i32> - %load = load i32, i32* %__b - %vec = insertelement <8 x i32> undef, i32 %load, i32 0 - %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> - %2 = icmp ult <8 x i32> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %4 = and <8 x i1> %3, %2 - %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> - %6 = bitcast <64 x i1> %5 to i64 - ret i64 %6 -} - - -define zeroext i32 @test_vpcmpultd_v16i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpultd_v16i1_v32i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpltud %zmm1, %zmm0, %k0 -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <16 x i32> - %1 = bitcast <8 x i64> %__b to <16 x i32> - %2 = icmp ult <16 x i32> %0, %1 - %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> - %4 = bitcast <32 x i1> %3 to i32 - ret i32 %4 -} - -define zeroext i32 @test_vpcmpultd_v16i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpultd_v16i1_v32i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpltud (%rdi), %zmm0, %k0 -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <16 x i32> - %load = load <8 x i64>, <8 x i64>* %__b - %1 = bitcast <8 x i64> %load to <16 x i32> - %2 = icmp ult <16 x i32> %0, %1 - %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> - %4 = bitcast <32 x i1> %3 to i32 - ret i32 %4 -} - -define zeroext i32 @test_masked_vpcmpultd_v16i1_v32i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpultd_v16i1_v32i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <16 x i32> - %1 = bitcast <8 x i64> %__b to <16 x i32> - %2 = icmp ult <16 x i32> %0, %1 - %3 = bitcast i16 %__u to <16 x i1> - %4 = and <16 x i1> %2, %3 - %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> - %6 = bitcast <32 x i1> %5 to i32 - ret i32 %6 -} - -define zeroext i32 @test_masked_vpcmpultd_v16i1_v32i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpultd_v16i1_v32i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpltud (%rsi), %zmm0, %k0 {%k1} -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <16 x i32> - %load = load <8 x i64>, <8 x i64>* %__b - %1 = bitcast <8 x i64> %load to <16 x i32> - %2 = icmp ult <16 x i32> %0, %1 - %3 = bitcast i16 %__u to <16 x i1> - %4 = and <16 x i1> %2, %3 - %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> - %6 = bitcast <32 x i1> %5 to i32 - ret i32 %6 -} - - -define zeroext i32 @test_vpcmpultd_v16i1_v32i1_mask_mem_b(<8 x i64> %__a, i32* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpultd_v16i1_v32i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpltud (%rdi){1to16}, %zmm0, %k0 -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <16 x i32> - %load = load i32, i32* %__b - %vec = insertelement <16 x i32> undef, i32 %load, i32 0 - %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> - %2 = icmp ult <16 x i32> %0, %1 - %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> - %4 = bitcast <32 x i1> %3 to i32 - ret i32 %4 -} - -define zeroext i32 @test_masked_vpcmpultd_v16i1_v32i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, i32* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpultd_v16i1_v32i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k0 {%k1} -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <16 x i32> - %load = load i32, i32* %__b - %vec = insertelement <16 x i32> undef, i32 %load, i32 0 - %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> - %2 = icmp ult <16 x i32> %0, %1 - %3 = bitcast i16 %__u to <16 x i1> - %4 = and <16 x i1> %3, %2 - %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> - %6 = bitcast <32 x i1> %5 to i32 - ret i32 %6 -} - - -define zeroext i64 @test_vpcmpultd_v16i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpultd_v16i1_v64i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpltud %zmm1, %zmm0, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <16 x i32> - %1 = bitcast <8 x i64> %__b to <16 x i32> - %2 = icmp ult <16 x i32> %0, %1 - %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_vpcmpultd_v16i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpultd_v16i1_v64i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpltud (%rdi), %zmm0, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <16 x i32> - %load = load <8 x i64>, <8 x i64>* %__b - %1 = bitcast <8 x i64> %load to <16 x i32> - %2 = icmp ult <16 x i32> %0, %1 - %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_masked_vpcmpultd_v16i1_v64i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpultd_v16i1_v64i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <16 x i32> - %1 = bitcast <8 x i64> %__b to <16 x i32> - %2 = icmp ult <16 x i32> %0, %1 - %3 = bitcast i16 %__u to <16 x i1> - %4 = and <16 x i1> %2, %3 - %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> - %6 = bitcast <64 x i1> %5 to i64 - ret i64 %6 -} - -define zeroext i64 @test_masked_vpcmpultd_v16i1_v64i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpultd_v16i1_v64i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpltud (%rsi), %zmm0, %k0 {%k1} -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <16 x i32> - %load = load <8 x i64>, <8 x i64>* %__b - %1 = bitcast <8 x i64> %load to <16 x i32> - %2 = icmp ult <16 x i32> %0, %1 - %3 = bitcast i16 %__u to <16 x i1> - %4 = and <16 x i1> %2, %3 - %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> - %6 = bitcast <64 x i1> %5 to i64 - ret i64 %6 -} - - -define zeroext i64 @test_vpcmpultd_v16i1_v64i1_mask_mem_b(<8 x i64> %__a, i32* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpultd_v16i1_v64i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpltud (%rdi){1to16}, %zmm0, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <16 x i32> - %load = load i32, i32* %__b - %vec = insertelement <16 x i32> undef, i32 %load, i32 0 - %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> - %2 = icmp ult <16 x i32> %0, %1 - %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_masked_vpcmpultd_v16i1_v64i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, i32* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpultd_v16i1_v64i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k0 {%k1} -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <16 x i32> - %load = load i32, i32* %__b - %vec = insertelement <16 x i32> undef, i32 %load, i32 0 - %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> - %2 = icmp ult <16 x i32> %0, %1 - %3 = bitcast i16 %__u to <16 x i1> - %4 = and <16 x i1> %3, %2 - %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> - %6 = bitcast <64 x i1> %5 to i64 - ret i64 %6 -} - - -define zeroext i4 @test_vpcmpultq_v2i1_v4i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpultq_v2i1_v4i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpltuq %xmm1, %xmm0, %k0 -; CHECK-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp) -; CHECK-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %1 = bitcast <2 x i64> %__b to <2 x i64> - %2 = icmp ult <2 x i64> %0, %1 - %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> - %4 = bitcast <4 x i1> %3 to i4 - ret i4 %4 -} - -define zeroext i4 @test_vpcmpultq_v2i1_v4i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpultq_v2i1_v4i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpltuq (%rdi), %xmm0, %k0 -; CHECK-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp) -; CHECK-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <2 x i64> - %2 = icmp ult <2 x i64> %0, %1 - %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> - %4 = bitcast <4 x i1> %3 to i4 - ret i4 %4 -} - -define zeroext i4 @test_masked_vpcmpultq_v2i1_v4i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpultq_v2i1_v4i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpltuq %xmm1, %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp) -; CHECK-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %1 = bitcast <2 x i64> %__b to <2 x i64> - %2 = icmp ult <2 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> - %4 = and <2 x i1> %2, %extract.i - %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> - %6 = bitcast <4 x i1> %5 to i4 - ret i4 %6 -} - -define zeroext i4 @test_masked_vpcmpultq_v2i1_v4i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpultq_v2i1_v4i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpltuq (%rsi), %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp) -; CHECK-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <2 x i64> - %2 = icmp ult <2 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> - %4 = and <2 x i1> %2, %extract.i - %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> - %6 = bitcast <4 x i1> %5 to i4 - ret i4 %6 -} - - -define zeroext i4 @test_vpcmpultq_v2i1_v4i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpultq_v2i1_v4i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpltuq (%rdi){1to2}, %xmm0, %k0 -; CHECK-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp) -; CHECK-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %load = load i64, i64* %__b - %vec = insertelement <2 x i64> undef, i64 %load, i32 0 - %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> - %2 = icmp ult <2 x i64> %0, %1 - %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> - %4 = bitcast <4 x i1> %3 to i4 - ret i4 %4 -} - -define zeroext i4 @test_masked_vpcmpultq_v2i1_v4i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpultq_v2i1_v4i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpltuq (%rsi){1to2}, %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp) -; CHECK-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %load = load i64, i64* %__b - %vec = insertelement <2 x i64> undef, i64 %load, i32 0 - %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> - %2 = icmp ult <2 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> - %4 = and <2 x i1> %extract.i, %2 - %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> - %6 = bitcast <4 x i1> %5 to i4 - ret i4 %6 -} - - -define zeroext i8 @test_vpcmpultq_v2i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpultq_v2i1_v8i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpltuq %xmm1, %xmm0, %k0 -; CHECK-NEXT: kmovb %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %1 = bitcast <2 x i64> %__b to <2 x i64> - %2 = icmp ult <2 x i64> %0, %1 - %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> - %4 = bitcast <8 x i1> %3 to i8 - ret i8 %4 -} - -define zeroext i8 @test_vpcmpultq_v2i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpultq_v2i1_v8i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpltuq (%rdi), %xmm0, %k0 -; CHECK-NEXT: kmovb %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <2 x i64> - %2 = icmp ult <2 x i64> %0, %1 - %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> - %4 = bitcast <8 x i1> %3 to i8 - ret i8 %4 -} - -define zeroext i8 @test_masked_vpcmpultq_v2i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpultq_v2i1_v8i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpltuq %xmm1, %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovb %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %1 = bitcast <2 x i64> %__b to <2 x i64> - %2 = icmp ult <2 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> - %4 = and <2 x i1> %2, %extract.i - %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> - %6 = bitcast <8 x i1> %5 to i8 - ret i8 %6 -} - -define zeroext i8 @test_masked_vpcmpultq_v2i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpultq_v2i1_v8i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpltuq (%rsi), %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovb %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <2 x i64> - %2 = icmp ult <2 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> - %4 = and <2 x i1> %2, %extract.i - %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> - %6 = bitcast <8 x i1> %5 to i8 - ret i8 %6 -} - - -define zeroext i8 @test_vpcmpultq_v2i1_v8i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpultq_v2i1_v8i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpltuq (%rdi){1to2}, %xmm0, %k0 -; CHECK-NEXT: kmovb %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %load = load i64, i64* %__b - %vec = insertelement <2 x i64> undef, i64 %load, i32 0 - %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> - %2 = icmp ult <2 x i64> %0, %1 - %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> - %4 = bitcast <8 x i1> %3 to i8 - ret i8 %4 -} - -define zeroext i8 @test_masked_vpcmpultq_v2i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpultq_v2i1_v8i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpltuq (%rsi){1to2}, %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovb %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %load = load i64, i64* %__b - %vec = insertelement <2 x i64> undef, i64 %load, i32 0 - %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> - %2 = icmp ult <2 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> - %4 = and <2 x i1> %extract.i, %2 - %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> - %6 = bitcast <8 x i1> %5 to i8 - ret i8 %6 -} - - -define zeroext i16 @test_vpcmpultq_v2i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpultq_v2i1_v16i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpltuq %xmm1, %xmm0, %k0 -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %1 = bitcast <2 x i64> %__b to <2 x i64> - %2 = icmp ult <2 x i64> %0, %1 - %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> - %4 = bitcast <16 x i1> %3 to i16 - ret i16 %4 -} - -define zeroext i16 @test_vpcmpultq_v2i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpultq_v2i1_v16i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpltuq (%rdi), %xmm0, %k0 -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <2 x i64> - %2 = icmp ult <2 x i64> %0, %1 - %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> - %4 = bitcast <16 x i1> %3 to i16 - ret i16 %4 -} - -define zeroext i16 @test_masked_vpcmpultq_v2i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpultq_v2i1_v16i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpltuq %xmm1, %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %1 = bitcast <2 x i64> %__b to <2 x i64> - %2 = icmp ult <2 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> - %4 = and <2 x i1> %2, %extract.i - %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> - %6 = bitcast <16 x i1> %5 to i16 - ret i16 %6 -} - -define zeroext i16 @test_masked_vpcmpultq_v2i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpultq_v2i1_v16i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpltuq (%rsi), %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <2 x i64> - %2 = icmp ult <2 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> - %4 = and <2 x i1> %2, %extract.i - %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> - %6 = bitcast <16 x i1> %5 to i16 - ret i16 %6 -} - - -define zeroext i16 @test_vpcmpultq_v2i1_v16i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpultq_v2i1_v16i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpltuq (%rdi){1to2}, %xmm0, %k0 -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %load = load i64, i64* %__b - %vec = insertelement <2 x i64> undef, i64 %load, i32 0 - %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> - %2 = icmp ult <2 x i64> %0, %1 - %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> - %4 = bitcast <16 x i1> %3 to i16 - ret i16 %4 -} - -define zeroext i16 @test_masked_vpcmpultq_v2i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpultq_v2i1_v16i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpltuq (%rsi){1to2}, %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %load = load i64, i64* %__b - %vec = insertelement <2 x i64> undef, i64 %load, i32 0 - %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> - %2 = icmp ult <2 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> - %4 = and <2 x i1> %extract.i, %2 - %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> - %6 = bitcast <16 x i1> %5 to i16 - ret i16 %6 -} - - -define zeroext i32 @test_vpcmpultq_v2i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpultq_v2i1_v32i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpltuq %xmm1, %xmm0, %k0 -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %1 = bitcast <2 x i64> %__b to <2 x i64> - %2 = icmp ult <2 x i64> %0, %1 - %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> - %4 = bitcast <32 x i1> %3 to i32 - ret i32 %4 -} - -define zeroext i32 @test_vpcmpultq_v2i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpultq_v2i1_v32i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpltuq (%rdi), %xmm0, %k0 -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <2 x i64> - %2 = icmp ult <2 x i64> %0, %1 - %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> - %4 = bitcast <32 x i1> %3 to i32 - ret i32 %4 -} - -define zeroext i32 @test_masked_vpcmpultq_v2i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpultq_v2i1_v32i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpltuq %xmm1, %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %1 = bitcast <2 x i64> %__b to <2 x i64> - %2 = icmp ult <2 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> - %4 = and <2 x i1> %2, %extract.i - %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> - %6 = bitcast <32 x i1> %5 to i32 - ret i32 %6 -} - -define zeroext i32 @test_masked_vpcmpultq_v2i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpultq_v2i1_v32i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpltuq (%rsi), %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <2 x i64> - %2 = icmp ult <2 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> - %4 = and <2 x i1> %2, %extract.i - %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> - %6 = bitcast <32 x i1> %5 to i32 - ret i32 %6 -} - - -define zeroext i32 @test_vpcmpultq_v2i1_v32i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpultq_v2i1_v32i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpltuq (%rdi){1to2}, %xmm0, %k0 -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %load = load i64, i64* %__b - %vec = insertelement <2 x i64> undef, i64 %load, i32 0 - %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> - %2 = icmp ult <2 x i64> %0, %1 - %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> - %4 = bitcast <32 x i1> %3 to i32 - ret i32 %4 -} - -define zeroext i32 @test_masked_vpcmpultq_v2i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpultq_v2i1_v32i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpltuq (%rsi){1to2}, %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %load = load i64, i64* %__b - %vec = insertelement <2 x i64> undef, i64 %load, i32 0 - %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> - %2 = icmp ult <2 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> - %4 = and <2 x i1> %extract.i, %2 - %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> - %6 = bitcast <32 x i1> %5 to i32 - ret i32 %6 -} - - -define zeroext i64 @test_vpcmpultq_v2i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpultq_v2i1_v64i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpltuq %xmm1, %xmm0, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %1 = bitcast <2 x i64> %__b to <2 x i64> - %2 = icmp ult <2 x i64> %0, %1 - %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_vpcmpultq_v2i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpultq_v2i1_v64i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpltuq (%rdi), %xmm0, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <2 x i64> - %2 = icmp ult <2 x i64> %0, %1 - %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_masked_vpcmpultq_v2i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpultq_v2i1_v64i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpltuq %xmm1, %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %1 = bitcast <2 x i64> %__b to <2 x i64> - %2 = icmp ult <2 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> - %4 = and <2 x i1> %2, %extract.i - %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> - %6 = bitcast <64 x i1> %5 to i64 - ret i64 %6 -} - -define zeroext i64 @test_masked_vpcmpultq_v2i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpultq_v2i1_v64i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpltuq (%rsi), %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <2 x i64> - %2 = icmp ult <2 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> - %4 = and <2 x i1> %2, %extract.i - %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> - %6 = bitcast <64 x i1> %5 to i64 - ret i64 %6 -} - - -define zeroext i64 @test_vpcmpultq_v2i1_v64i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpultq_v2i1_v64i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpltuq (%rdi){1to2}, %xmm0, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %load = load i64, i64* %__b - %vec = insertelement <2 x i64> undef, i64 %load, i32 0 - %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> - %2 = icmp ult <2 x i64> %0, %1 - %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_masked_vpcmpultq_v2i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpultq_v2i1_v64i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpltuq (%rsi){1to2}, %xmm0, %k0 {%k1} -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x i64> - %load = load i64, i64* %__b - %vec = insertelement <2 x i64> undef, i64 %load, i32 0 - %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> - %2 = icmp ult <2 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> - %4 = and <2 x i1> %extract.i, %2 - %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> - %6 = bitcast <64 x i1> %5 to i64 - ret i64 %6 -} - - -define zeroext i8 @test_vpcmpultq_v4i1_v8i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpultq_v4i1_v8i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpltuq %ymm1, %ymm0, %k0 -; CHECK-NEXT: kmovb %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <4 x i64> - %1 = bitcast <4 x i64> %__b to <4 x i64> - %2 = icmp ult <4 x i64> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> - %4 = bitcast <8 x i1> %3 to i8 - ret i8 %4 -} - -define zeroext i8 @test_vpcmpultq_v4i1_v8i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpultq_v4i1_v8i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpltuq (%rdi), %ymm0, %k0 -; CHECK-NEXT: kmovb %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <4 x i64> - %load = load <4 x i64>, <4 x i64>* %__b - %1 = bitcast <4 x i64> %load to <4 x i64> - %2 = icmp ult <4 x i64> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> - %4 = bitcast <8 x i1> %3 to i8 - ret i8 %4 -} - -define zeroext i8 @test_masked_vpcmpultq_v4i1_v8i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpultq_v4i1_v8i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpltuq %ymm1, %ymm0, %k0 {%k1} -; CHECK-NEXT: kmovb %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <4 x i64> - %1 = bitcast <4 x i64> %__b to <4 x i64> - %2 = icmp ult <4 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> - %4 = and <4 x i1> %2, %extract.i - %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> - %6 = bitcast <8 x i1> %5 to i8 - ret i8 %6 -} - -define zeroext i8 @test_masked_vpcmpultq_v4i1_v8i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpultq_v4i1_v8i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpltuq (%rsi), %ymm0, %k0 {%k1} -; CHECK-NEXT: kmovb %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <4 x i64> - %load = load <4 x i64>, <4 x i64>* %__b - %1 = bitcast <4 x i64> %load to <4 x i64> - %2 = icmp ult <4 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> - %4 = and <4 x i1> %2, %extract.i - %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> - %6 = bitcast <8 x i1> %5 to i8 - ret i8 %6 -} - - -define zeroext i8 @test_vpcmpultq_v4i1_v8i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpultq_v4i1_v8i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpltuq (%rdi){1to4}, %ymm0, %k0 -; CHECK-NEXT: kmovb %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <4 x i64> - %load = load i64, i64* %__b - %vec = insertelement <4 x i64> undef, i64 %load, i32 0 - %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> - %2 = icmp ult <4 x i64> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> - %4 = bitcast <8 x i1> %3 to i8 - ret i8 %4 -} - -define zeroext i8 @test_masked_vpcmpultq_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpultq_v4i1_v8i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpltuq (%rsi){1to4}, %ymm0, %k0 {%k1} -; CHECK-NEXT: kmovb %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <4 x i64> - %load = load i64, i64* %__b - %vec = insertelement <4 x i64> undef, i64 %load, i32 0 - %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> - %2 = icmp ult <4 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> - %4 = and <4 x i1> %extract.i, %2 - %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> - %6 = bitcast <8 x i1> %5 to i8 - ret i8 %6 -} - - -define zeroext i16 @test_vpcmpultq_v4i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpultq_v4i1_v16i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpltuq %ymm1, %ymm0, %k0 -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <4 x i64> - %1 = bitcast <4 x i64> %__b to <4 x i64> - %2 = icmp ult <4 x i64> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> - %4 = bitcast <16 x i1> %3 to i16 - ret i16 %4 -} - -define zeroext i16 @test_vpcmpultq_v4i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpultq_v4i1_v16i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpltuq (%rdi), %ymm0, %k0 -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <4 x i64> - %load = load <4 x i64>, <4 x i64>* %__b - %1 = bitcast <4 x i64> %load to <4 x i64> - %2 = icmp ult <4 x i64> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> - %4 = bitcast <16 x i1> %3 to i16 - ret i16 %4 -} - -define zeroext i16 @test_masked_vpcmpultq_v4i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpultq_v4i1_v16i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpltuq %ymm1, %ymm0, %k0 {%k1} -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <4 x i64> - %1 = bitcast <4 x i64> %__b to <4 x i64> - %2 = icmp ult <4 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> - %4 = and <4 x i1> %2, %extract.i - %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> - %6 = bitcast <16 x i1> %5 to i16 - ret i16 %6 -} - -define zeroext i16 @test_masked_vpcmpultq_v4i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpultq_v4i1_v16i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpltuq (%rsi), %ymm0, %k0 {%k1} -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <4 x i64> - %load = load <4 x i64>, <4 x i64>* %__b - %1 = bitcast <4 x i64> %load to <4 x i64> - %2 = icmp ult <4 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> - %4 = and <4 x i1> %2, %extract.i - %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> - %6 = bitcast <16 x i1> %5 to i16 - ret i16 %6 -} - - -define zeroext i16 @test_vpcmpultq_v4i1_v16i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpultq_v4i1_v16i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpltuq (%rdi){1to4}, %ymm0, %k0 -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <4 x i64> - %load = load i64, i64* %__b - %vec = insertelement <4 x i64> undef, i64 %load, i32 0 - %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> - %2 = icmp ult <4 x i64> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> - %4 = bitcast <16 x i1> %3 to i16 - ret i16 %4 -} - -define zeroext i16 @test_masked_vpcmpultq_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpultq_v4i1_v16i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpltuq (%rsi){1to4}, %ymm0, %k0 {%k1} -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <4 x i64> - %load = load i64, i64* %__b - %vec = insertelement <4 x i64> undef, i64 %load, i32 0 - %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> - %2 = icmp ult <4 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> - %4 = and <4 x i1> %extract.i, %2 - %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> - %6 = bitcast <16 x i1> %5 to i16 - ret i16 %6 -} - - -define zeroext i32 @test_vpcmpultq_v4i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpultq_v4i1_v32i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpltuq %ymm1, %ymm0, %k0 -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <4 x i64> - %1 = bitcast <4 x i64> %__b to <4 x i64> - %2 = icmp ult <4 x i64> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> - %4 = bitcast <32 x i1> %3 to i32 - ret i32 %4 -} - -define zeroext i32 @test_vpcmpultq_v4i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpultq_v4i1_v32i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpltuq (%rdi), %ymm0, %k0 -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <4 x i64> - %load = load <4 x i64>, <4 x i64>* %__b - %1 = bitcast <4 x i64> %load to <4 x i64> - %2 = icmp ult <4 x i64> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> - %4 = bitcast <32 x i1> %3 to i32 - ret i32 %4 -} - -define zeroext i32 @test_masked_vpcmpultq_v4i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpultq_v4i1_v32i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpltuq %ymm1, %ymm0, %k0 {%k1} -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <4 x i64> - %1 = bitcast <4 x i64> %__b to <4 x i64> - %2 = icmp ult <4 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> - %4 = and <4 x i1> %2, %extract.i - %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> - %6 = bitcast <32 x i1> %5 to i32 - ret i32 %6 -} - -define zeroext i32 @test_masked_vpcmpultq_v4i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpultq_v4i1_v32i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpltuq (%rsi), %ymm0, %k0 {%k1} -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <4 x i64> - %load = load <4 x i64>, <4 x i64>* %__b - %1 = bitcast <4 x i64> %load to <4 x i64> - %2 = icmp ult <4 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> - %4 = and <4 x i1> %2, %extract.i - %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> - %6 = bitcast <32 x i1> %5 to i32 - ret i32 %6 -} - - -define zeroext i32 @test_vpcmpultq_v4i1_v32i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpultq_v4i1_v32i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpltuq (%rdi){1to4}, %ymm0, %k0 -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <4 x i64> - %load = load i64, i64* %__b - %vec = insertelement <4 x i64> undef, i64 %load, i32 0 - %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> - %2 = icmp ult <4 x i64> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> - %4 = bitcast <32 x i1> %3 to i32 - ret i32 %4 -} - -define zeroext i32 @test_masked_vpcmpultq_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpultq_v4i1_v32i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpltuq (%rsi){1to4}, %ymm0, %k0 {%k1} -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <4 x i64> - %load = load i64, i64* %__b - %vec = insertelement <4 x i64> undef, i64 %load, i32 0 - %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> - %2 = icmp ult <4 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> - %4 = and <4 x i1> %extract.i, %2 - %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> - %6 = bitcast <32 x i1> %5 to i32 - ret i32 %6 -} - - -define zeroext i64 @test_vpcmpultq_v4i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpultq_v4i1_v64i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpltuq %ymm1, %ymm0, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <4 x i64> - %1 = bitcast <4 x i64> %__b to <4 x i64> - %2 = icmp ult <4 x i64> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_vpcmpultq_v4i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpultq_v4i1_v64i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpltuq (%rdi), %ymm0, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <4 x i64> - %load = load <4 x i64>, <4 x i64>* %__b - %1 = bitcast <4 x i64> %load to <4 x i64> - %2 = icmp ult <4 x i64> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_masked_vpcmpultq_v4i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpultq_v4i1_v64i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpltuq %ymm1, %ymm0, %k0 {%k1} -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <4 x i64> - %1 = bitcast <4 x i64> %__b to <4 x i64> - %2 = icmp ult <4 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> - %4 = and <4 x i1> %2, %extract.i - %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> - %6 = bitcast <64 x i1> %5 to i64 - ret i64 %6 -} - -define zeroext i64 @test_masked_vpcmpultq_v4i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpultq_v4i1_v64i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpltuq (%rsi), %ymm0, %k0 {%k1} -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <4 x i64> - %load = load <4 x i64>, <4 x i64>* %__b - %1 = bitcast <4 x i64> %load to <4 x i64> - %2 = icmp ult <4 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> - %4 = and <4 x i1> %2, %extract.i - %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> - %6 = bitcast <64 x i1> %5 to i64 - ret i64 %6 -} - - -define zeroext i64 @test_vpcmpultq_v4i1_v64i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpultq_v4i1_v64i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpltuq (%rdi){1to4}, %ymm0, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <4 x i64> - %load = load i64, i64* %__b - %vec = insertelement <4 x i64> undef, i64 %load, i32 0 - %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> - %2 = icmp ult <4 x i64> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_masked_vpcmpultq_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpultq_v4i1_v64i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpltuq (%rsi){1to4}, %ymm0, %k0 {%k1} -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <4 x i64> - %load = load i64, i64* %__b - %vec = insertelement <4 x i64> undef, i64 %load, i32 0 - %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> - %2 = icmp ult <4 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> - %4 = and <4 x i1> %extract.i, %2 - %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> - %6 = bitcast <64 x i1> %5 to i64 - ret i64 %6 -} - - -define zeroext i16 @test_vpcmpultq_v8i1_v16i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpultq_v8i1_v16i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <8 x i64> - %1 = bitcast <8 x i64> %__b to <8 x i64> - %2 = icmp ult <8 x i64> %0, %1 - %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> - %4 = bitcast <16 x i1> %3 to i16 - ret i16 %4 -} - -define zeroext i16 @test_vpcmpultq_v8i1_v16i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpultq_v8i1_v16i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpltuq (%rdi), %zmm0, %k0 -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <8 x i64> - %load = load <8 x i64>, <8 x i64>* %__b - %1 = bitcast <8 x i64> %load to <8 x i64> - %2 = icmp ult <8 x i64> %0, %1 - %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> - %4 = bitcast <16 x i1> %3 to i16 - ret i16 %4 -} - -define zeroext i16 @test_masked_vpcmpultq_v8i1_v16i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpultq_v8i1_v16i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <8 x i64> - %1 = bitcast <8 x i64> %__b to <8 x i64> - %2 = icmp ult <8 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %4 = and <8 x i1> %2, %3 - %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> - %6 = bitcast <16 x i1> %5 to i16 - ret i16 %6 -} - -define zeroext i16 @test_masked_vpcmpultq_v8i1_v16i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpultq_v8i1_v16i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpltuq (%rsi), %zmm0, %k0 {%k1} -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <8 x i64> - %load = load <8 x i64>, <8 x i64>* %__b - %1 = bitcast <8 x i64> %load to <8 x i64> - %2 = icmp ult <8 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %4 = and <8 x i1> %2, %3 - %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> - %6 = bitcast <16 x i1> %5 to i16 - ret i16 %6 -} - - -define zeroext i16 @test_vpcmpultq_v8i1_v16i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpultq_v8i1_v16i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0 -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <8 x i64> - %load = load i64, i64* %__b - %vec = insertelement <8 x i64> undef, i64 %load, i32 0 - %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> - %2 = icmp ult <8 x i64> %0, %1 - %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> - %4 = bitcast <16 x i1> %3 to i16 - ret i16 %4 -} - -define zeroext i16 @test_masked_vpcmpultq_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpultq_v8i1_v16i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1} -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <8 x i64> - %load = load i64, i64* %__b - %vec = insertelement <8 x i64> undef, i64 %load, i32 0 - %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> - %2 = icmp ult <8 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %4 = and <8 x i1> %3, %2 - %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> - %6 = bitcast <16 x i1> %5 to i16 - ret i16 %6 -} - - -define zeroext i32 @test_vpcmpultq_v8i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpultq_v8i1_v32i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <8 x i64> - %1 = bitcast <8 x i64> %__b to <8 x i64> - %2 = icmp ult <8 x i64> %0, %1 - %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> - %4 = bitcast <32 x i1> %3 to i32 - ret i32 %4 -} - -define zeroext i32 @test_vpcmpultq_v8i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpultq_v8i1_v32i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpltuq (%rdi), %zmm0, %k0 -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <8 x i64> - %load = load <8 x i64>, <8 x i64>* %__b - %1 = bitcast <8 x i64> %load to <8 x i64> - %2 = icmp ult <8 x i64> %0, %1 - %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> - %4 = bitcast <32 x i1> %3 to i32 - ret i32 %4 -} - -define zeroext i32 @test_masked_vpcmpultq_v8i1_v32i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpultq_v8i1_v32i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <8 x i64> - %1 = bitcast <8 x i64> %__b to <8 x i64> - %2 = icmp ult <8 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %4 = and <8 x i1> %2, %3 - %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> - %6 = bitcast <32 x i1> %5 to i32 - ret i32 %6 -} - -define zeroext i32 @test_masked_vpcmpultq_v8i1_v32i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpultq_v8i1_v32i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpltuq (%rsi), %zmm0, %k0 {%k1} -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <8 x i64> - %load = load <8 x i64>, <8 x i64>* %__b - %1 = bitcast <8 x i64> %load to <8 x i64> - %2 = icmp ult <8 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %4 = and <8 x i1> %2, %3 - %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> - %6 = bitcast <32 x i1> %5 to i32 - ret i32 %6 -} - - -define zeroext i32 @test_vpcmpultq_v8i1_v32i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpultq_v8i1_v32i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0 -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <8 x i64> - %load = load i64, i64* %__b - %vec = insertelement <8 x i64> undef, i64 %load, i32 0 - %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> - %2 = icmp ult <8 x i64> %0, %1 - %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> - %4 = bitcast <32 x i1> %3 to i32 - ret i32 %4 -} - -define zeroext i32 @test_masked_vpcmpultq_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpultq_v8i1_v32i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1} -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <8 x i64> - %load = load i64, i64* %__b - %vec = insertelement <8 x i64> undef, i64 %load, i32 0 - %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> - %2 = icmp ult <8 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %4 = and <8 x i1> %3, %2 - %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> - %6 = bitcast <32 x i1> %5 to i32 - ret i32 %6 -} - - -define zeroext i64 @test_vpcmpultq_v8i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpultq_v8i1_v64i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <8 x i64> - %1 = bitcast <8 x i64> %__b to <8 x i64> - %2 = icmp ult <8 x i64> %0, %1 - %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_vpcmpultq_v8i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpultq_v8i1_v64i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpltuq (%rdi), %zmm0, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <8 x i64> - %load = load <8 x i64>, <8 x i64>* %__b - %1 = bitcast <8 x i64> %load to <8 x i64> - %2 = icmp ult <8 x i64> %0, %1 - %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_masked_vpcmpultq_v8i1_v64i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpultq_v8i1_v64i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <8 x i64> - %1 = bitcast <8 x i64> %__b to <8 x i64> - %2 = icmp ult <8 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %4 = and <8 x i1> %2, %3 - %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> - %6 = bitcast <64 x i1> %5 to i64 - ret i64 %6 -} - -define zeroext i64 @test_masked_vpcmpultq_v8i1_v64i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpultq_v8i1_v64i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpltuq (%rsi), %zmm0, %k0 {%k1} -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <8 x i64> - %load = load <8 x i64>, <8 x i64>* %__b - %1 = bitcast <8 x i64> %load to <8 x i64> - %2 = icmp ult <8 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %4 = and <8 x i1> %2, %3 - %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> - %6 = bitcast <64 x i1> %5 to i64 - ret i64 %6 -} - - -define zeroext i64 @test_vpcmpultq_v8i1_v64i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vpcmpultq_v8i1_v64i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <8 x i64> - %load = load i64, i64* %__b - %vec = insertelement <8 x i64> undef, i64 %load, i32 0 - %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> - %2 = icmp ult <8 x i64> %0, %1 - %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_masked_vpcmpultq_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr { -; CHECK-LABEL: test_masked_vpcmpultq_v8i1_v64i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1} -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <8 x i64> - %load = load i64, i64* %__b - %vec = insertelement <8 x i64> undef, i64 %load, i32 0 - %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> - %2 = icmp ult <8 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %4 = and <8 x i1> %3, %2 - %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> - %6 = bitcast <64 x i1> %5 to i64 - ret i64 %6 -} - - -declare i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> , <16 x float> , i32, i16, i32) -define zeroext i8 @test_vcmpoeqps_v4i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vcmpoeqps_v4i1_v8i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vcmpeqps %xmm1, %xmm0, %k0 -; CHECK-NEXT: kmovb %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <4 x float> - %1 = bitcast <2 x i64> %__b to <4 x float> - %2 = fcmp oeq <4 x float> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> - %4 = bitcast <8 x i1> %3 to i8 - ret i8 %4 -} - -define zeroext i8 @test_vcmpoeqps_v4i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vcmpoeqps_v4i1_v8i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vcmpeqps (%rdi), %xmm0, %k0 -; CHECK-NEXT: kmovb %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <4 x float> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <4 x float> - %2 = fcmp oeq <4 x float> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> - %4 = bitcast <8 x i1> %3 to i8 - ret i8 %4 -} - -define zeroext i8 @test_vcmpoeqps_v4i1_v8i1_mask_mem_b(<2 x i64> %__a, float* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vcmpoeqps_v4i1_v8i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vcmpeqps (%rdi){1to4}, %xmm0, %k0 -; CHECK-NEXT: kmovb %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <4 x float> - %load = load float, float* %__b - %vec = insertelement <4 x float> undef, float %load, i32 0 - %1 = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> - %2 = fcmp oeq <4 x float> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> - %4 = bitcast <8 x i1> %3 to i8 - ret i8 %4 -} - - -define zeroext i16 @test_vcmpoeqps_v4i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vcmpoeqps_v4i1_v16i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vcmpeqps %xmm1, %xmm0, %k0 -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <4 x float> - %1 = bitcast <2 x i64> %__b to <4 x float> - %2 = fcmp oeq <4 x float> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> - %4 = bitcast <16 x i1> %3 to i16 - ret i16 %4 -} - -define zeroext i16 @test_vcmpoeqps_v4i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vcmpoeqps_v4i1_v16i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vcmpeqps (%rdi), %xmm0, %k0 -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <4 x float> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <4 x float> - %2 = fcmp oeq <4 x float> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> - %4 = bitcast <16 x i1> %3 to i16 - ret i16 %4 -} - -define zeroext i16 @test_vcmpoeqps_v4i1_v16i1_mask_mem_b(<2 x i64> %__a, float* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vcmpoeqps_v4i1_v16i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vcmpeqps (%rdi){1to4}, %xmm0, %k0 -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <4 x float> - %load = load float, float* %__b - %vec = insertelement <4 x float> undef, float %load, i32 0 - %1 = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> - %2 = fcmp oeq <4 x float> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> - %4 = bitcast <16 x i1> %3 to i16 - ret i16 %4 -} - - -define zeroext i32 @test_vcmpoeqps_v4i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vcmpoeqps_v4i1_v32i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vcmpeqps %xmm1, %xmm0, %k0 -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <4 x float> - %1 = bitcast <2 x i64> %__b to <4 x float> - %2 = fcmp oeq <4 x float> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> - %4 = bitcast <32 x i1> %3 to i32 - ret i32 %4 -} - -define zeroext i32 @test_vcmpoeqps_v4i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vcmpoeqps_v4i1_v32i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vcmpeqps (%rdi), %xmm0, %k0 -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <4 x float> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <4 x float> - %2 = fcmp oeq <4 x float> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> - %4 = bitcast <32 x i1> %3 to i32 - ret i32 %4 -} - -define zeroext i32 @test_vcmpoeqps_v4i1_v32i1_mask_mem_b(<2 x i64> %__a, float* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vcmpoeqps_v4i1_v32i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vcmpeqps (%rdi){1to4}, %xmm0, %k0 -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <4 x float> - %load = load float, float* %__b - %vec = insertelement <4 x float> undef, float %load, i32 0 - %1 = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> - %2 = fcmp oeq <4 x float> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> - %4 = bitcast <32 x i1> %3 to i32 - ret i32 %4 -} - - -define zeroext i64 @test_vcmpoeqps_v4i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vcmpoeqps_v4i1_v64i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vcmpeqps %xmm1, %xmm0, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <4 x float> - %1 = bitcast <2 x i64> %__b to <4 x float> - %2 = fcmp oeq <4 x float> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_vcmpoeqps_v4i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vcmpoeqps_v4i1_v64i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vcmpeqps (%rdi), %xmm0, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <4 x float> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <4 x float> - %2 = fcmp oeq <4 x float> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_vcmpoeqps_v4i1_v64i1_mask_mem_b(<2 x i64> %__a, float* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vcmpoeqps_v4i1_v64i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vcmpeqps (%rdi){1to4}, %xmm0, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <4 x float> - %load = load float, float* %__b - %vec = insertelement <4 x float> undef, float %load, i32 0 - %1 = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> - %2 = fcmp oeq <4 x float> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - - -define zeroext i16 @test_vcmpoeqps_v8i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vcmpoeqps_v8i1_v16i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %k0 -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -; -; NoVLX-LABEL: test_vcmpoeqps_v8i1_v16i1_mask: -; NoVLX: ## BB#0: ## %entry -; NoVLX-NEXT: ## kill: %YMM1 %YMM1 %ZMM1 -; NoVLX-NEXT: ## kill: %YMM0 %YMM0 %ZMM0 -; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftlw $8, %k0, %k0 -; NoVLX-NEXT: kshiftrw $8, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <8 x float> - %1 = bitcast <4 x i64> %__b to <8 x float> - %2 = fcmp oeq <8 x float> %0, %1 - %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> - %4 = bitcast <16 x i1> %3 to i16 - ret i16 %4 -} - -define zeroext i16 @test_vcmpoeqps_v8i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vcmpoeqps_v8i1_v16i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vcmpeqps (%rdi), %ymm0, %k0 -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -; -; NoVLX-LABEL: test_vcmpoeqps_v8i1_v16i1_mask_mem: -; NoVLX: ## BB#0: ## %entry -; NoVLX-NEXT: ## kill: %YMM0 %YMM0 %ZMM0 -; NoVLX-NEXT: vmovaps (%rdi), %ymm1 -; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftlw $8, %k0, %k0 -; NoVLX-NEXT: kshiftrw $8, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <8 x float> - %load = load <4 x i64>, <4 x i64>* %__b - %1 = bitcast <4 x i64> %load to <8 x float> - %2 = fcmp oeq <8 x float> %0, %1 - %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> - %4 = bitcast <16 x i1> %3 to i16 - ret i16 %4 -} - -define zeroext i16 @test_vcmpoeqps_v8i1_v16i1_mask_mem_b(<4 x i64> %__a, float* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vcmpoeqps_v8i1_v16i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vcmpeqps (%rdi){1to8}, %ymm0, %k0 -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -; -; NoVLX-LABEL: test_vcmpoeqps_v8i1_v16i1_mask_mem_b: -; NoVLX: ## BB#0: ## %entry -; NoVLX-NEXT: ## kill: %YMM0 %YMM0 %ZMM0 -; NoVLX-NEXT: vbroadcastss (%rdi), %ymm1 -; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftlw $8, %k0, %k0 -; NoVLX-NEXT: kshiftrw $8, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <8 x float> - %load = load float, float* %__b - %vec = insertelement <8 x float> undef, float %load, i32 0 - %1 = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> - %2 = fcmp oeq <8 x float> %0, %1 - %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> - %4 = bitcast <16 x i1> %3 to i16 - ret i16 %4 -} - - -define zeroext i32 @test_vcmpoeqps_v8i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vcmpoeqps_v8i1_v32i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %k0 -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <8 x float> - %1 = bitcast <4 x i64> %__b to <8 x float> - %2 = fcmp oeq <8 x float> %0, %1 - %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> - %4 = bitcast <32 x i1> %3 to i32 - ret i32 %4 -} - -define zeroext i32 @test_vcmpoeqps_v8i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vcmpoeqps_v8i1_v32i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vcmpeqps (%rdi), %ymm0, %k0 -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <8 x float> - %load = load <4 x i64>, <4 x i64>* %__b - %1 = bitcast <4 x i64> %load to <8 x float> - %2 = fcmp oeq <8 x float> %0, %1 - %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> - %4 = bitcast <32 x i1> %3 to i32 - ret i32 %4 -} - -define zeroext i32 @test_vcmpoeqps_v8i1_v32i1_mask_mem_b(<4 x i64> %__a, float* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vcmpoeqps_v8i1_v32i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vcmpeqps (%rdi){1to8}, %ymm0, %k0 -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <8 x float> - %load = load float, float* %__b - %vec = insertelement <8 x float> undef, float %load, i32 0 - %1 = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> - %2 = fcmp oeq <8 x float> %0, %1 - %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> - %4 = bitcast <32 x i1> %3 to i32 - ret i32 %4 -} - - -define zeroext i64 @test_vcmpoeqps_v8i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vcmpoeqps_v8i1_v64i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <8 x float> - %1 = bitcast <4 x i64> %__b to <8 x float> - %2 = fcmp oeq <8 x float> %0, %1 - %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_vcmpoeqps_v8i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vcmpoeqps_v8i1_v64i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vcmpeqps (%rdi), %ymm0, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <8 x float> - %load = load <4 x i64>, <4 x i64>* %__b - %1 = bitcast <4 x i64> %load to <8 x float> - %2 = fcmp oeq <8 x float> %0, %1 - %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_vcmpoeqps_v8i1_v64i1_mask_mem_b(<4 x i64> %__a, float* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vcmpoeqps_v8i1_v64i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vcmpeqps (%rdi){1to8}, %ymm0, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <8 x float> - %load = load float, float* %__b - %vec = insertelement <8 x float> undef, float %load, i32 0 - %1 = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> - %2 = fcmp oeq <8 x float> %0, %1 - %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - - -define zeroext i32 @test_vcmpoeqps_v16i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vcmpoeqps_v16i1_v32i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k0 -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <16 x float> - %1 = bitcast <8 x i64> %__b to <16 x float> - %2 = fcmp oeq <16 x float> %0, %1 - %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> - %4 = bitcast <32 x i1> %3 to i32 - ret i32 %4 -} - -define zeroext i32 @test_vcmpoeqps_v16i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vcmpoeqps_v16i1_v32i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vcmpeqps (%rdi), %zmm0, %k0 -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <16 x float> - %load = load <8 x i64>, <8 x i64>* %__b - %1 = bitcast <8 x i64> %load to <16 x float> - %2 = fcmp oeq <16 x float> %0, %1 - %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> - %4 = bitcast <32 x i1> %3 to i32 - ret i32 %4 -} - -define zeroext i32 @test_vcmpoeqps_v16i1_v32i1_mask_mem_b(<8 x i64> %__a, float* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vcmpoeqps_v16i1_v32i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vcmpeqps (%rdi){1to16}, %zmm0, %k0 -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <16 x float> - %load = load float, float* %__b - %vec = insertelement <16 x float> undef, float %load, i32 0 - %1 = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> - %2 = fcmp oeq <16 x float> %0, %1 - %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> - %4 = bitcast <32 x i1> %3 to i32 - ret i32 %4 -} - - -define zeroext i32 @test_vcmpoeqps_v16i1_v32i1_sae_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vcmpoeqps_v16i1_v32i1_sae_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0 -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <16 x float> - %1 = bitcast <8 x i64> %__b to <16 x float> - %2 = call i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %0, <16 x float> %1, i32 2, i16 -1, i32 8) - %3 = zext i16 %2 to i32 - ret i32 %3 -} - - -define zeroext i64 @test_vcmpoeqps_v16i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vcmpoeqps_v16i1_v64i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <16 x float> - %1 = bitcast <8 x i64> %__b to <16 x float> - %2 = fcmp oeq <16 x float> %0, %1 - %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_vcmpoeqps_v16i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vcmpoeqps_v16i1_v64i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vcmpeqps (%rdi), %zmm0, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <16 x float> - %load = load <8 x i64>, <8 x i64>* %__b - %1 = bitcast <8 x i64> %load to <16 x float> - %2 = fcmp oeq <16 x float> %0, %1 - %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_vcmpoeqps_v16i1_v64i1_mask_mem_b(<8 x i64> %__a, float* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vcmpoeqps_v16i1_v64i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vcmpeqps (%rdi){1to16}, %zmm0, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <16 x float> - %load = load float, float* %__b - %vec = insertelement <16 x float> undef, float %load, i32 0 - %1 = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> - %2 = fcmp oeq <16 x float> %0, %1 - %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - - -define zeroext i64 @test_vcmpoeqps_v16i1_v64i1_sae_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vcmpoeqps_v16i1_v64i1_sae_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0 -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: movzwl %ax, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <16 x float> - %1 = bitcast <8 x i64> %__b to <16 x float> - %2 = call i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %0, <16 x float> %1, i32 2, i16 -1, i32 8) - %3 = zext i16 %2 to i64 - ret i64 %3 -} - - -declare i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> , <8 x double> , i32, i8, i32) -define zeroext i4 @test_vcmpoeqpd_v2i1_v4i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vcmpoeqpd_v2i1_v4i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vcmpeqpd %xmm1, %xmm0, %k0 -; CHECK-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp) -; CHECK-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x double> - %1 = bitcast <2 x i64> %__b to <2 x double> - %2 = fcmp oeq <2 x double> %0, %1 - %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> - %4 = bitcast <4 x i1> %3 to i4 - ret i4 %4 -} - -define zeroext i4 @test_vcmpoeqpd_v2i1_v4i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vcmpoeqpd_v2i1_v4i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vcmpeqpd (%rdi), %xmm0, %k0 -; CHECK-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp) -; CHECK-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x double> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <2 x double> - %2 = fcmp oeq <2 x double> %0, %1 - %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> - %4 = bitcast <4 x i1> %3 to i4 - ret i4 %4 -} - -define zeroext i4 @test_vcmpoeqpd_v2i1_v4i1_mask_mem_b(<2 x i64> %__a, double* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vcmpoeqpd_v2i1_v4i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vcmpeqpd (%rdi){1to2}, %xmm0, %k0 -; CHECK-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp) -; CHECK-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x double> - %load = load double, double* %__b - %vec = insertelement <2 x double> undef, double %load, i32 0 - %1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> - %2 = fcmp oeq <2 x double> %0, %1 - %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> - %4 = bitcast <4 x i1> %3 to i4 - ret i4 %4 -} - - -define zeroext i8 @test_vcmpoeqpd_v2i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vcmpoeqpd_v2i1_v8i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vcmpeqpd %xmm1, %xmm0, %k0 -; CHECK-NEXT: kmovb %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x double> - %1 = bitcast <2 x i64> %__b to <2 x double> - %2 = fcmp oeq <2 x double> %0, %1 - %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> - %4 = bitcast <8 x i1> %3 to i8 - ret i8 %4 -} - -define zeroext i8 @test_vcmpoeqpd_v2i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vcmpoeqpd_v2i1_v8i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vcmpeqpd (%rdi), %xmm0, %k0 -; CHECK-NEXT: kmovb %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x double> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <2 x double> - %2 = fcmp oeq <2 x double> %0, %1 - %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> - %4 = bitcast <8 x i1> %3 to i8 - ret i8 %4 -} - -define zeroext i8 @test_vcmpoeqpd_v2i1_v8i1_mask_mem_b(<2 x i64> %__a, double* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vcmpoeqpd_v2i1_v8i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vcmpeqpd (%rdi){1to2}, %xmm0, %k0 -; CHECK-NEXT: kmovb %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x double> - %load = load double, double* %__b - %vec = insertelement <2 x double> undef, double %load, i32 0 - %1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> - %2 = fcmp oeq <2 x double> %0, %1 - %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> - %4 = bitcast <8 x i1> %3 to i8 - ret i8 %4 -} - - -define zeroext i16 @test_vcmpoeqpd_v2i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vcmpoeqpd_v2i1_v16i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vcmpeqpd %xmm1, %xmm0, %k0 -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x double> - %1 = bitcast <2 x i64> %__b to <2 x double> - %2 = fcmp oeq <2 x double> %0, %1 - %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> - %4 = bitcast <16 x i1> %3 to i16 - ret i16 %4 -} - -define zeroext i16 @test_vcmpoeqpd_v2i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vcmpoeqpd_v2i1_v16i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vcmpeqpd (%rdi), %xmm0, %k0 -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x double> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <2 x double> - %2 = fcmp oeq <2 x double> %0, %1 - %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> - %4 = bitcast <16 x i1> %3 to i16 - ret i16 %4 -} - -define zeroext i16 @test_vcmpoeqpd_v2i1_v16i1_mask_mem_b(<2 x i64> %__a, double* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vcmpoeqpd_v2i1_v16i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vcmpeqpd (%rdi){1to2}, %xmm0, %k0 -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x double> - %load = load double, double* %__b - %vec = insertelement <2 x double> undef, double %load, i32 0 - %1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> - %2 = fcmp oeq <2 x double> %0, %1 - %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> - %4 = bitcast <16 x i1> %3 to i16 - ret i16 %4 -} - - -define zeroext i32 @test_vcmpoeqpd_v2i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vcmpoeqpd_v2i1_v32i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vcmpeqpd %xmm1, %xmm0, %k0 -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x double> - %1 = bitcast <2 x i64> %__b to <2 x double> - %2 = fcmp oeq <2 x double> %0, %1 - %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> - %4 = bitcast <32 x i1> %3 to i32 - ret i32 %4 -} - -define zeroext i32 @test_vcmpoeqpd_v2i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vcmpoeqpd_v2i1_v32i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vcmpeqpd (%rdi), %xmm0, %k0 -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x double> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <2 x double> - %2 = fcmp oeq <2 x double> %0, %1 - %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> - %4 = bitcast <32 x i1> %3 to i32 - ret i32 %4 -} - -define zeroext i32 @test_vcmpoeqpd_v2i1_v32i1_mask_mem_b(<2 x i64> %__a, double* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vcmpoeqpd_v2i1_v32i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vcmpeqpd (%rdi){1to2}, %xmm0, %k0 -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x double> - %load = load double, double* %__b - %vec = insertelement <2 x double> undef, double %load, i32 0 - %1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> - %2 = fcmp oeq <2 x double> %0, %1 - %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> - %4 = bitcast <32 x i1> %3 to i32 - ret i32 %4 -} - - -define zeroext i64 @test_vcmpoeqpd_v2i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vcmpoeqpd_v2i1_v64i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vcmpeqpd %xmm1, %xmm0, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x double> - %1 = bitcast <2 x i64> %__b to <2 x double> - %2 = fcmp oeq <2 x double> %0, %1 - %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_vcmpoeqpd_v2i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vcmpoeqpd_v2i1_v64i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vcmpeqpd (%rdi), %xmm0, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x double> - %load = load <2 x i64>, <2 x i64>* %__b - %1 = bitcast <2 x i64> %load to <2 x double> - %2 = fcmp oeq <2 x double> %0, %1 - %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_vcmpoeqpd_v2i1_v64i1_mask_mem_b(<2 x i64> %__a, double* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vcmpoeqpd_v2i1_v64i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vcmpeqpd (%rdi){1to2}, %xmm0, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: retq -entry: - %0 = bitcast <2 x i64> %__a to <2 x double> - %load = load double, double* %__b - %vec = insertelement <2 x double> undef, double %load, i32 0 - %1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> - %2 = fcmp oeq <2 x double> %0, %1 - %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - - -define zeroext i8 @test_vcmpoeqpd_v4i1_v8i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vcmpoeqpd_v4i1_v8i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vcmpeqpd %ymm1, %ymm0, %k0 -; CHECK-NEXT: kmovb %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <4 x double> - %1 = bitcast <4 x i64> %__b to <4 x double> - %2 = fcmp oeq <4 x double> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> - %4 = bitcast <8 x i1> %3 to i8 - ret i8 %4 -} - -define zeroext i8 @test_vcmpoeqpd_v4i1_v8i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vcmpoeqpd_v4i1_v8i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vcmpeqpd (%rdi), %ymm0, %k0 -; CHECK-NEXT: kmovb %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <4 x double> - %load = load <4 x i64>, <4 x i64>* %__b - %1 = bitcast <4 x i64> %load to <4 x double> - %2 = fcmp oeq <4 x double> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> - %4 = bitcast <8 x i1> %3 to i8 - ret i8 %4 -} - -define zeroext i8 @test_vcmpoeqpd_v4i1_v8i1_mask_mem_b(<4 x i64> %__a, double* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vcmpoeqpd_v4i1_v8i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vcmpeqpd (%rdi){1to4}, %ymm0, %k0 -; CHECK-NEXT: kmovb %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <4 x double> - %load = load double, double* %__b - %vec = insertelement <4 x double> undef, double %load, i32 0 - %1 = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> - %2 = fcmp oeq <4 x double> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> - %4 = bitcast <8 x i1> %3 to i8 - ret i8 %4 -} - - -define zeroext i16 @test_vcmpoeqpd_v4i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vcmpoeqpd_v4i1_v16i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vcmpeqpd %ymm1, %ymm0, %k0 -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <4 x double> - %1 = bitcast <4 x i64> %__b to <4 x double> - %2 = fcmp oeq <4 x double> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> - %4 = bitcast <16 x i1> %3 to i16 - ret i16 %4 -} - -define zeroext i16 @test_vcmpoeqpd_v4i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vcmpoeqpd_v4i1_v16i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vcmpeqpd (%rdi), %ymm0, %k0 -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <4 x double> - %load = load <4 x i64>, <4 x i64>* %__b - %1 = bitcast <4 x i64> %load to <4 x double> - %2 = fcmp oeq <4 x double> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> - %4 = bitcast <16 x i1> %3 to i16 - ret i16 %4 -} - -define zeroext i16 @test_vcmpoeqpd_v4i1_v16i1_mask_mem_b(<4 x i64> %__a, double* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vcmpoeqpd_v4i1_v16i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vcmpeqpd (%rdi){1to4}, %ymm0, %k0 -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <4 x double> - %load = load double, double* %__b - %vec = insertelement <4 x double> undef, double %load, i32 0 - %1 = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> - %2 = fcmp oeq <4 x double> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> - %4 = bitcast <16 x i1> %3 to i16 - ret i16 %4 -} - - -define zeroext i32 @test_vcmpoeqpd_v4i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vcmpoeqpd_v4i1_v32i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vcmpeqpd %ymm1, %ymm0, %k0 -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <4 x double> - %1 = bitcast <4 x i64> %__b to <4 x double> - %2 = fcmp oeq <4 x double> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> - %4 = bitcast <32 x i1> %3 to i32 - ret i32 %4 -} - -define zeroext i32 @test_vcmpoeqpd_v4i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vcmpoeqpd_v4i1_v32i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vcmpeqpd (%rdi), %ymm0, %k0 -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <4 x double> - %load = load <4 x i64>, <4 x i64>* %__b - %1 = bitcast <4 x i64> %load to <4 x double> - %2 = fcmp oeq <4 x double> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> - %4 = bitcast <32 x i1> %3 to i32 - ret i32 %4 -} - -define zeroext i32 @test_vcmpoeqpd_v4i1_v32i1_mask_mem_b(<4 x i64> %__a, double* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vcmpoeqpd_v4i1_v32i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vcmpeqpd (%rdi){1to4}, %ymm0, %k0 -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <4 x double> - %load = load double, double* %__b - %vec = insertelement <4 x double> undef, double %load, i32 0 - %1 = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> - %2 = fcmp oeq <4 x double> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> - %4 = bitcast <32 x i1> %3 to i32 - ret i32 %4 -} - - -define zeroext i64 @test_vcmpoeqpd_v4i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vcmpoeqpd_v4i1_v64i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vcmpeqpd %ymm1, %ymm0, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <4 x double> - %1 = bitcast <4 x i64> %__b to <4 x double> - %2 = fcmp oeq <4 x double> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_vcmpoeqpd_v4i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vcmpoeqpd_v4i1_v64i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vcmpeqpd (%rdi), %ymm0, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <4 x double> - %load = load <4 x i64>, <4 x i64>* %__b - %1 = bitcast <4 x i64> %load to <4 x double> - %2 = fcmp oeq <4 x double> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_vcmpoeqpd_v4i1_v64i1_mask_mem_b(<4 x i64> %__a, double* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vcmpoeqpd_v4i1_v64i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vcmpeqpd (%rdi){1to4}, %ymm0, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <4 x double> - %load = load double, double* %__b - %vec = insertelement <4 x double> undef, double %load, i32 0 - %1 = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> - %2 = fcmp oeq <4 x double> %0, %1 - %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - - -define zeroext i16 @test_vcmpoeqpd_v8i1_v16i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vcmpoeqpd_v8i1_v16i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <8 x double> - %1 = bitcast <8 x i64> %__b to <8 x double> - %2 = fcmp oeq <8 x double> %0, %1 - %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> - %4 = bitcast <16 x i1> %3 to i16 - ret i16 %4 -} - -define zeroext i16 @test_vcmpoeqpd_v8i1_v16i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vcmpoeqpd_v8i1_v16i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vcmpeqpd (%rdi), %zmm0, %k0 -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <8 x double> - %load = load <8 x i64>, <8 x i64>* %__b - %1 = bitcast <8 x i64> %load to <8 x double> - %2 = fcmp oeq <8 x double> %0, %1 - %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> - %4 = bitcast <16 x i1> %3 to i16 - ret i16 %4 -} - -define zeroext i16 @test_vcmpoeqpd_v8i1_v16i1_mask_mem_b(<8 x i64> %__a, double* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vcmpoeqpd_v8i1_v16i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0 -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <8 x double> - %load = load double, double* %__b - %vec = insertelement <8 x double> undef, double %load, i32 0 - %1 = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> - %2 = fcmp oeq <8 x double> %0, %1 - %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> - %4 = bitcast <16 x i1> %3 to i16 - ret i16 %4 -} - - -define zeroext i16 @test_vcmpoeqpd_v8i1_v16i1_sae_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vcmpoeqpd_v8i1_v16i1_sae_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0 -; CHECK-NEXT: kmovb %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <8 x double> - %1 = bitcast <8 x i64> %__b to <8 x double> - %2 = call i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, i8 -1, i32 8) - %3 = zext i8 %2 to i16 - ret i16 %3 -} - - -define zeroext i32 @test_vcmpoeqpd_v8i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vcmpoeqpd_v8i1_v32i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <8 x double> - %1 = bitcast <8 x i64> %__b to <8 x double> - %2 = fcmp oeq <8 x double> %0, %1 - %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> - %4 = bitcast <32 x i1> %3 to i32 - ret i32 %4 -} - -define zeroext i32 @test_vcmpoeqpd_v8i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vcmpoeqpd_v8i1_v32i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vcmpeqpd (%rdi), %zmm0, %k0 -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <8 x double> - %load = load <8 x i64>, <8 x i64>* %__b - %1 = bitcast <8 x i64> %load to <8 x double> - %2 = fcmp oeq <8 x double> %0, %1 - %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> - %4 = bitcast <32 x i1> %3 to i32 - ret i32 %4 -} - -define zeroext i32 @test_vcmpoeqpd_v8i1_v32i1_mask_mem_b(<8 x i64> %__a, double* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vcmpoeqpd_v8i1_v32i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0 -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <8 x double> - %load = load double, double* %__b - %vec = insertelement <8 x double> undef, double %load, i32 0 - %1 = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> - %2 = fcmp oeq <8 x double> %0, %1 - %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> - %4 = bitcast <32 x i1> %3 to i32 - ret i32 %4 -} - - -define zeroext i32 @test_vcmpoeqpd_v8i1_v32i1_sae_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vcmpoeqpd_v8i1_v32i1_sae_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0 -; CHECK-NEXT: kmovb %k0, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <8 x double> - %1 = bitcast <8 x i64> %__b to <8 x double> - %2 = call i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, i8 -1, i32 8) - %3 = zext i8 %2 to i32 - ret i32 %3 -} - - -define zeroext i64 @test_vcmpoeqpd_v8i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vcmpoeqpd_v8i1_v64i1_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <8 x double> - %1 = bitcast <8 x i64> %__b to <8 x double> - %2 = fcmp oeq <8 x double> %0, %1 - %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_vcmpoeqpd_v8i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vcmpoeqpd_v8i1_v64i1_mask_mem: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vcmpeqpd (%rdi), %zmm0, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <8 x double> - %load = load <8 x i64>, <8 x i64>* %__b - %1 = bitcast <8 x i64> %load to <8 x double> - %2 = fcmp oeq <8 x double> %0, %1 - %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - -define zeroext i64 @test_vcmpoeqpd_v8i1_v64i1_mask_mem_b(<8 x i64> %__a, double* %__b) local_unnamed_addr { -; CHECK-LABEL: test_vcmpoeqpd_v8i1_v64i1_mask_mem_b: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <8 x double> - %load = load double, double* %__b - %vec = insertelement <8 x double> undef, double %load, i32 0 - %1 = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> - %2 = fcmp oeq <8 x double> %0, %1 - %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> - %4 = bitcast <64 x i1> %3 to i64 - ret i64 %4 -} - - -define zeroext i64 @test_vcmpoeqpd_v8i1_v64i1_sae_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { -; CHECK-LABEL: test_vcmpoeqpd_v8i1_v64i1_sae_mask: -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0 -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: movzbl %al, %eax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__a to <8 x double> - %1 = bitcast <8 x i64> %__b to <8 x double> - %2 = call i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, i8 -1, i32 8) - %3 = zext i8 %2 to i64 - ret i64 %3 -} - - diff --git a/test/CodeGen/X86/compress_expand.ll b/test/CodeGen/X86/compress_expand.ll index f62e18869a9..e09fcf2a336 100644 --- a/test/CodeGen/X86/compress_expand.ll +++ b/test/CodeGen/X86/compress_expand.ll @@ -265,7 +265,9 @@ define <2 x float> @test13(float* %base, <2 x float> %src0, <2 x i32> %trigger) ; SKX: # BB#0: ; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; SKX-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3] -; SKX-NEXT: vpcmpeqq %xmm2, %xmm1, %k1 +; SKX-NEXT: vpcmpeqq %xmm2, %xmm1, %k0 +; SKX-NEXT: kshiftlb $6, %k0, %k0 +; SKX-NEXT: kshiftrb $6, %k0, %k1 ; SKX-NEXT: vexpandps (%rdi), %xmm0 {%k1} ; SKX-NEXT: retq ; @@ -293,7 +295,9 @@ define void @test14(float* %base, <2 x float> %V, <2 x i32> %trigger) { ; SKX: # BB#0: ; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; SKX-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3] -; SKX-NEXT: vpcmpeqq %xmm2, %xmm1, %k1 +; SKX-NEXT: vpcmpeqq %xmm2, %xmm1, %k0 +; SKX-NEXT: kshiftlb $6, %k0, %k0 +; SKX-NEXT: kshiftrb $6, %k0, %k1 ; SKX-NEXT: vcompressps %xmm0, (%rdi) {%k1} ; SKX-NEXT: retq ; diff --git a/test/CodeGen/X86/masked_memop.ll b/test/CodeGen/X86/masked_memop.ll index 7a2e41e10a3..3c616e8a9f4 100644 --- a/test/CodeGen/X86/masked_memop.ll +++ b/test/CodeGen/X86/masked_memop.ll @@ -462,7 +462,9 @@ define void @test14(<2 x i32> %trigger, <2 x float>* %addr, <2 x float> %val) { ; SKX: ## BB#0: ; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; SKX-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3] -; SKX-NEXT: vpcmpeqq %xmm2, %xmm0, %k1 +; SKX-NEXT: vpcmpeqq %xmm2, %xmm0, %k0 +; SKX-NEXT: kshiftlw $14, %k0, %k0 +; SKX-NEXT: kshiftrw $14, %k0, %k1 ; SKX-NEXT: vmovups %xmm1, (%rdi) {%k1} ; SKX-NEXT: retq %mask = icmp eq <2 x i32> %trigger, zeroinitializer @@ -548,7 +550,9 @@ define <2 x float> @test16(<2 x i32> %trigger, <2 x float>* %addr, <2 x float> % ; SKX: ## BB#0: ; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; SKX-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3] -; SKX-NEXT: vpcmpeqq %xmm2, %xmm0, %k1 +; SKX-NEXT: vpcmpeqq %xmm2, %xmm0, %k0 +; SKX-NEXT: kshiftlw $14, %k0, %k0 +; SKX-NEXT: kshiftrw $14, %k0, %k1 ; SKX-NEXT: vblendmps (%rdi), %xmm1, %xmm0 {%k1} ; SKX-NEXT: retq %mask = icmp eq <2 x i32> %trigger, zeroinitializer @@ -597,7 +601,9 @@ define <2 x i32> @test17(<2 x i32> %trigger, <2 x i32>* %addr, <2 x i32> %dst) { ; SKX: ## BB#0: ; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; SKX-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3] -; SKX-NEXT: vpcmpeqq %xmm2, %xmm0, %k1 +; SKX-NEXT: vpcmpeqq %xmm2, %xmm0, %k0 +; SKX-NEXT: kshiftlw $14, %k0, %k0 +; SKX-NEXT: kshiftrw $14, %k0, %k1 ; SKX-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[0,2,2,3] ; SKX-NEXT: vmovdqu32 (%rdi), %xmm0 {%k1} ; SKX-NEXT: vpmovsxdq %xmm0, %xmm0 @@ -639,7 +645,9 @@ define <2 x float> @test18(<2 x i32> %trigger, <2 x float>* %addr) { ; SKX: ## BB#0: ; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; SKX-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] -; SKX-NEXT: vpcmpeqq %xmm1, %xmm0, %k1 +; SKX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 +; SKX-NEXT: kshiftlw $14, %k0, %k0 +; SKX-NEXT: kshiftrw $14, %k0, %k1 ; SKX-NEXT: vmovups (%rdi), %xmm0 {%k1} {z} ; SKX-NEXT: retq %mask = icmp eq <2 x i32> %trigger, zeroinitializer -- 2.11.0