Intrinsic<[llvm_i32_ty], [llvm_v4i64_ty,
llvm_v4i64_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_fpclass_pd_128 :
+ def int_x86_avx512_fpclass_pd_128 :
Intrinsic<[llvm_v2i1_ty], [llvm_v2f64_ty, llvm_i32_ty],
[IntrNoMem]>;
- def int_x86_avx512_mask_fpclass_pd_256 :
+ def int_x86_avx512_fpclass_pd_256 :
Intrinsic<[llvm_v4i1_ty], [llvm_v4f64_ty, llvm_i32_ty],
[IntrNoMem]>;
- def int_x86_avx512_mask_fpclass_pd_512 :
+ def int_x86_avx512_fpclass_pd_512 :
Intrinsic<[llvm_v8i1_ty], [llvm_v8f64_ty, llvm_i32_ty],
[IntrNoMem]>;
- def int_x86_avx512_mask_fpclass_ps_128 :
+ def int_x86_avx512_fpclass_ps_128 :
Intrinsic<[llvm_v4i1_ty], [llvm_v4f32_ty, llvm_i32_ty],
[IntrNoMem]>;
- def int_x86_avx512_mask_fpclass_ps_256 :
+ def int_x86_avx512_fpclass_ps_256 :
Intrinsic<[llvm_v8i1_ty], [llvm_v8f32_ty, llvm_i32_ty],
[IntrNoMem]>;
- def int_x86_avx512_mask_fpclass_ps_512 :
+ def int_x86_avx512_fpclass_ps_512 :
Intrinsic<[llvm_v16i1_ty], [llvm_v16f32_ty, llvm_i32_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_fpclass_sd :
let TargetPrefix = "x86" in {
// NOTE: These comparison intrinsics are not used by clang as long as the
// distinction in signaling behaviour is not implemented.
- def int_x86_avx512_mask_cmp_ps_512 :
+ def int_x86_avx512_cmp_ps_512 :
Intrinsic<[llvm_v16i1_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_cmp_pd_512 :
+ def int_x86_avx512_cmp_pd_512 :
Intrinsic<[llvm_v8i1_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_cmp_ps_256 :
+ def int_x86_avx512_cmp_ps_256 :
Intrinsic<[llvm_v8i1_ty], [llvm_v8f32_ty, llvm_v8f32_ty,
llvm_i32_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_cmp_pd_256 :
+ def int_x86_avx512_cmp_pd_256 :
Intrinsic<[llvm_v4i1_ty], [llvm_v4f64_ty, llvm_v4f64_ty,
llvm_i32_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_cmp_ps_128 :
+ def int_x86_avx512_cmp_ps_128 :
Intrinsic<[llvm_v4i1_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
llvm_i32_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_cmp_pd_128 :
+ def int_x86_avx512_cmp_pd_128 :
Intrinsic<[llvm_v2i1_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
llvm_i32_ty], [IntrNoMem]>;
return true;
}
-// Upgrade the declaration of fp compare intrinsics that change return type
-// from scalar to vXi1 mask.
-static bool UpgradeX86MaskedFPCompare(Function *F, Intrinsic::ID IID,
- Function *&NewFn) {
- // Check if the return type is a vector.
- if (F->getReturnType()->isVectorTy())
- return false;
-
- rename(F);
- NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
- return true;
-}
-
static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
// All of the intrinsics matches below should be marked with which llvm
// version started autoupgrading them. At some point in the future we would
Name.startswith("avx512.mask.cmp.d") || // Added in 5.0
Name.startswith("avx512.mask.cmp.q") || // Added in 5.0
Name.startswith("avx512.mask.cmp.w") || // Added in 5.0
+ Name.startswith("avx512.mask.cmp.p") || // Added in 7.0
Name.startswith("avx512.mask.ucmp.") || // Added in 5.0
Name.startswith("avx512.cvtb2mask.") || // Added in 7.0
Name.startswith("avx512.cvtw2mask.") || // Added in 7.0
Name.startswith("avx512.mask.div.p") || // Added in 7.0. 128/256 in 4.0
Name.startswith("avx512.mask.max.p") || // Added in 7.0. 128/256 in 5.0
Name.startswith("avx512.mask.min.p") || // Added in 7.0. 128/256 in 5.0
+ Name.startswith("avx512.mask.fpclass.p") || // Added in 7.0
Name == "sse.cvtsi2ss" || // Added in 7.0
Name == "sse.cvtsi642ss" || // Added in 7.0
Name == "sse2.cvtsi2sd" || // Added in 7.0
if (Name == "avx2.mpsadbw") // Added in 3.6
return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_mpsadbw,
NewFn);
- if (Name == "avx512.mask.cmp.pd.128") // Added in 7.0
- return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_pd_128,
- NewFn);
- if (Name == "avx512.mask.cmp.pd.256") // Added in 7.0
- return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_pd_256,
- NewFn);
- if (Name == "avx512.mask.cmp.pd.512") // Added in 7.0
- return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_pd_512,
- NewFn);
- if (Name == "avx512.mask.cmp.ps.128") // Added in 7.0
- return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_ps_128,
- NewFn);
- if (Name == "avx512.mask.cmp.ps.256") // Added in 7.0
- return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_ps_256,
- NewFn);
- if (Name == "avx512.mask.cmp.ps.512") // Added in 7.0
- return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_ps_512,
- NewFn);
- if (Name == "avx512.mask.fpclass.pd.128") // Added in 7.0
- return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_fpclass_pd_128,
- NewFn);
- if (Name == "avx512.mask.fpclass.pd.256") // Added in 7.0
- return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_fpclass_pd_256,
- NewFn);
- if (Name == "avx512.mask.fpclass.pd.512") // Added in 7.0
- return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_fpclass_pd_512,
- NewFn);
- if (Name == "avx512.mask.fpclass.ps.128") // Added in 7.0
- return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_fpclass_ps_128,
- NewFn);
- if (Name == "avx512.mask.fpclass.ps.256") // Added in 7.0
- return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_fpclass_ps_256,
- NewFn);
- if (Name == "avx512.mask.fpclass.ps.512") // Added in 7.0
- return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_fpclass_ps_512,
- NewFn);
// frcz.ss/sd may need to have an argument dropped. Added in 3.2
if (Name.startswith("xop.vfrcz.ss") && F->arg_size() == 2) {
}
// Applying mask on vector of i1's and make sure result is at least 8 bits wide.
-static Value *ApplyX86MaskOn1BitsVec(IRBuilder<> &Builder,Value *Vec, Value *Mask,
- unsigned NumElts) {
+static Value *ApplyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec,
+ Value *Mask) {
+ unsigned NumElts = Vec->getType()->getVectorNumElements();
if (Mask) {
const auto *C = dyn_cast<Constant>(Mask);
if (!C || !C->isAllOnesValue())
Value *Mask = CI.getArgOperand(CI.getNumArgOperands() - 1);
- return ApplyX86MaskOn1BitsVec(Builder, Cmp, Mask, NumElts);
+ return ApplyX86MaskOn1BitsVec(Builder, Cmp, Mask);
}
// Replace a masked intrinsic with an older unmasked intrinsic.
ICmpInst::Predicate Pred =
Name.startswith("avx512.ptestm") ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
Rep = Builder.CreateICmp(Pred, Rep, Zero);
- unsigned NumElts = Op0->getType()->getVectorNumElements();
- Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, Mask, NumElts);
+ Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, Mask);
} else if (IsX86 && (Name.startswith("avx512.mask.pbroadcast"))){
unsigned NumElts =
CI->getArgOperand(1)->getType()->getVectorNumElements();
// "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt."
bool CmpEq = Name[16] == 'e';
Rep = upgradeMaskedCompare(Builder, *CI, CmpEq ? 0 : 6, true);
- } else if (IsX86 && Name.startswith("avx512.mask.cmp")) {
+ } else if (IsX86 && Name.startswith("avx512.mask.fpclass.p")) {
+ Type *OpTy = CI->getArgOperand(0)->getType();
+ unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
+ unsigned EltWidth = OpTy->getScalarSizeInBits();
+ Intrinsic::ID IID;
+ if (VecWidth == 128 && EltWidth == 32)
+ IID = Intrinsic::x86_avx512_fpclass_ps_128;
+ else if (VecWidth == 256 && EltWidth == 32)
+ IID = Intrinsic::x86_avx512_fpclass_ps_256;
+ else if (VecWidth == 512 && EltWidth == 32)
+ IID = Intrinsic::x86_avx512_fpclass_ps_512;
+ else if (VecWidth == 128 && EltWidth == 64)
+ IID = Intrinsic::x86_avx512_fpclass_pd_128;
+ else if (VecWidth == 256 && EltWidth == 64)
+ IID = Intrinsic::x86_avx512_fpclass_pd_256;
+ else if (VecWidth == 512 && EltWidth == 64)
+ IID = Intrinsic::x86_avx512_fpclass_pd_512;
+ else
+ llvm_unreachable("Unexpected intrinsic");
+
+ Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
+ { CI->getOperand(0), CI->getArgOperand(1) });
+ Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
+ } else if (IsX86 && Name.startswith("avx512.mask.cmp.p")) {
+ Type *OpTy = CI->getArgOperand(0)->getType();
+ unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
+ unsigned EltWidth = OpTy->getScalarSizeInBits();
+ Intrinsic::ID IID;
+ if (VecWidth == 128 && EltWidth == 32)
+ IID = Intrinsic::x86_avx512_cmp_ps_128;
+ else if (VecWidth == 256 && EltWidth == 32)
+ IID = Intrinsic::x86_avx512_cmp_ps_256;
+ else if (VecWidth == 512 && EltWidth == 32)
+ IID = Intrinsic::x86_avx512_cmp_ps_512;
+ else if (VecWidth == 128 && EltWidth == 64)
+ IID = Intrinsic::x86_avx512_cmp_pd_128;
+ else if (VecWidth == 256 && EltWidth == 64)
+ IID = Intrinsic::x86_avx512_cmp_pd_256;
+ else if (VecWidth == 512 && EltWidth == 64)
+ IID = Intrinsic::x86_avx512_cmp_pd_512;
+ else
+ llvm_unreachable("Unexpected intrinsic");
+
+ SmallVector<Value *, 4> Args;
+ Args.push_back(CI->getArgOperand(0));
+ Args.push_back(CI->getArgOperand(1));
+ Args.push_back(CI->getArgOperand(2));
+ if (CI->getNumArgOperands() == 5)
+ Args.push_back(CI->getArgOperand(4));
+
+ Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
+ Args);
+ Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(3));
+ } else if (IsX86 && Name.startswith("avx512.mask.cmp.") &&
+ Name[16] != 'p') {
+ // Integer compare intrinsics.
unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
Rep = upgradeMaskedCompare(Builder, *CI, Imm, true);
- } else if (IsX86 && Name.startswith("avx512.mask.ucmp")) {
+ } else if (IsX86 && Name.startswith("avx512.mask.ucmp.")) {
unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
Rep = upgradeMaskedCompare(Builder, *CI, Imm, false);
} else if (IsX86 && (Name.startswith("avx512.cvtb2mask.") ||
Value *Op = CI->getArgOperand(0);
Value *Zero = llvm::Constant::getNullValue(Op->getType());
Rep = Builder.CreateICmp(ICmpInst::ICMP_SLT, Op, Zero);
- Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, nullptr,
- Op->getType()->getVectorNumElements());
+ Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, nullptr);
} else if(IsX86 && (Name == "ssse3.pabs.b.128" ||
Name == "ssse3.pabs.w.128" ||
Name == "ssse3.pabs.d.128" ||
break;
}
- case Intrinsic::x86_avx512_mask_cmp_pd_128:
- case Intrinsic::x86_avx512_mask_cmp_pd_256:
- case Intrinsic::x86_avx512_mask_cmp_pd_512:
- case Intrinsic::x86_avx512_mask_cmp_ps_128:
- case Intrinsic::x86_avx512_mask_cmp_ps_256:
- case Intrinsic::x86_avx512_mask_cmp_ps_512: {
- SmallVector<Value *, 4> Args;
- Args.push_back(CI->getArgOperand(0));
- Args.push_back(CI->getArgOperand(1));
- Args.push_back(CI->getArgOperand(2));
- if (CI->getNumArgOperands() == 5)
- Args.push_back(CI->getArgOperand(4));
-
- NewCall = Builder.CreateCall(NewFn, Args);
- unsigned NumElts = Args[0]->getType()->getVectorNumElements();
- Value *Res = ApplyX86MaskOn1BitsVec(Builder, NewCall, CI->getArgOperand(3),
- NumElts);
-
- std::string Name = CI->getName();
- if (!Name.empty()) {
- CI->setName(Name + ".old");
- NewCall->setName(Name);
- }
- CI->replaceAllUsesWith(Res);
- CI->eraseFromParent();
- return;
- }
-
- case Intrinsic::x86_avx512_mask_fpclass_pd_128:
- case Intrinsic::x86_avx512_mask_fpclass_pd_256:
- case Intrinsic::x86_avx512_mask_fpclass_pd_512:
- case Intrinsic::x86_avx512_mask_fpclass_ps_128:
- case Intrinsic::x86_avx512_mask_fpclass_ps_256:
- case Intrinsic::x86_avx512_mask_fpclass_ps_512: {
- SmallVector<Value *, 4> Args;
- Args.push_back(CI->getArgOperand(0));
- Args.push_back(CI->getArgOperand(1));
-
- NewCall = Builder.CreateCall(NewFn, Args);
- unsigned NumElts = Args[0]->getType()->getVectorNumElements();
- Value *Res = ApplyX86MaskOn1BitsVec(Builder, NewCall, CI->getArgOperand(2),
- NumElts);
-
- std::string Name = CI->getName();
- if (!Name.empty()) {
- CI->setName(Name + ".old");
- NewCall->setName(Name);
- }
- CI->replaceAllUsesWith(Res);
- CI->eraseFromParent();
- return;
- }
-
case Intrinsic::thread_pointer: {
NewCall = Builder.CreateCall(NewFn, {});
break;
X86_INTRINSIC_DATA(avx2_psubus_w, INTR_TYPE_2OP, X86ISD::SUBUS, 0),
X86_INTRINSIC_DATA(avx512_add_pd_512, INTR_TYPE_2OP, ISD::FADD, X86ISD::FADD_RND),
X86_INTRINSIC_DATA(avx512_add_ps_512, INTR_TYPE_2OP, ISD::FADD, X86ISD::FADD_RND),
+ X86_INTRINSIC_DATA(avx512_cmp_pd_128, CMP_MASK_CC, X86ISD::CMPM, 0),
+ X86_INTRINSIC_DATA(avx512_cmp_pd_256, CMP_MASK_CC, X86ISD::CMPM, 0),
+ X86_INTRINSIC_DATA(avx512_cmp_pd_512, CMP_MASK_CC, X86ISD::CMPM, X86ISD::CMPM_RND),
+ X86_INTRINSIC_DATA(avx512_cmp_ps_128, CMP_MASK_CC, X86ISD::CMPM, 0),
+ X86_INTRINSIC_DATA(avx512_cmp_ps_256, CMP_MASK_CC, X86ISD::CMPM, 0),
+ X86_INTRINSIC_DATA(avx512_cmp_ps_512, CMP_MASK_CC, X86ISD::CMPM, X86ISD::CMPM_RND),
X86_INTRINSIC_DATA(avx512_cvtsi2sd64, INTR_TYPE_3OP, X86ISD::SCALAR_SINT_TO_FP_RND, 0),
X86_INTRINSIC_DATA(avx512_cvtsi2ss32, INTR_TYPE_3OP, X86ISD::SCALAR_SINT_TO_FP_RND, 0),
X86_INTRINSIC_DATA(avx512_cvtsi2ss64, INTR_TYPE_3OP, X86ISD::SCALAR_SINT_TO_FP_RND, 0),
X86_INTRINSIC_DATA(avx512_div_ps_512, INTR_TYPE_2OP, ISD::FDIV, X86ISD::FDIV_RND),
X86_INTRINSIC_DATA(avx512_exp2_pd, INTR_TYPE_1OP_MASK_RM, X86ISD::EXP2, 0),
X86_INTRINSIC_DATA(avx512_exp2_ps, INTR_TYPE_1OP_MASK_RM, X86ISD::EXP2, 0),
+ X86_INTRINSIC_DATA(avx512_fpclass_pd_128, FPCLASS, X86ISD::VFPCLASS, 0),
+ X86_INTRINSIC_DATA(avx512_fpclass_pd_256, FPCLASS, X86ISD::VFPCLASS, 0),
+ X86_INTRINSIC_DATA(avx512_fpclass_pd_512, FPCLASS, X86ISD::VFPCLASS, 0),
+ X86_INTRINSIC_DATA(avx512_fpclass_ps_128, FPCLASS, X86ISD::VFPCLASS, 0),
+ X86_INTRINSIC_DATA(avx512_fpclass_ps_256, FPCLASS, X86ISD::VFPCLASS, 0),
+ X86_INTRINSIC_DATA(avx512_fpclass_ps_512, FPCLASS, X86ISD::VFPCLASS, 0),
X86_INTRINSIC_DATA(avx512_mask_add_sd_round, INTR_TYPE_SCALAR_MASK_RM,
X86ISD::FADDS_RND, 0),
X86_INTRINSIC_DATA(avx512_mask_add_ss_round, INTR_TYPE_SCALAR_MASK_RM,
X86ISD::FADDS_RND, 0),
- X86_INTRINSIC_DATA(avx512_mask_cmp_pd_128, CMP_MASK_CC, X86ISD::CMPM, 0),
- X86_INTRINSIC_DATA(avx512_mask_cmp_pd_256, CMP_MASK_CC, X86ISD::CMPM, 0),
- X86_INTRINSIC_DATA(avx512_mask_cmp_pd_512, CMP_MASK_CC, X86ISD::CMPM,
- X86ISD::CMPM_RND),
- X86_INTRINSIC_DATA(avx512_mask_cmp_ps_128, CMP_MASK_CC, X86ISD::CMPM, 0),
- X86_INTRINSIC_DATA(avx512_mask_cmp_ps_256, CMP_MASK_CC, X86ISD::CMPM, 0),
- X86_INTRINSIC_DATA(avx512_mask_cmp_ps_512, CMP_MASK_CC, X86ISD::CMPM,
- X86ISD::CMPM_RND),
X86_INTRINSIC_DATA(avx512_mask_cmp_sd, CMP_MASK_SCALAR_CC,
X86ISD::FSETCCM, X86ISD::FSETCCM_RND),
X86_INTRINSIC_DATA(avx512_mask_cmp_ss, CMP_MASK_SCALAR_CC,
X86_INTRINSIC_DATA(avx512_mask_fixupimm_ps_512, FIXUPIMM, X86ISD::VFIXUPIMM, 0),
X86_INTRINSIC_DATA(avx512_mask_fixupimm_sd, FIXUPIMMS, X86ISD::VFIXUPIMMS, 0),
X86_INTRINSIC_DATA(avx512_mask_fixupimm_ss, FIXUPIMMS, X86ISD::VFIXUPIMMS, 0),
- X86_INTRINSIC_DATA(avx512_mask_fpclass_pd_128, FPCLASS, X86ISD::VFPCLASS, 0),
- X86_INTRINSIC_DATA(avx512_mask_fpclass_pd_256, FPCLASS, X86ISD::VFPCLASS, 0),
- X86_INTRINSIC_DATA(avx512_mask_fpclass_pd_512, FPCLASS, X86ISD::VFPCLASS, 0),
- X86_INTRINSIC_DATA(avx512_mask_fpclass_ps_128, FPCLASS, X86ISD::VFPCLASS, 0),
- X86_INTRINSIC_DATA(avx512_mask_fpclass_ps_256, FPCLASS, X86ISD::VFPCLASS, 0),
- X86_INTRINSIC_DATA(avx512_mask_fpclass_ps_512, FPCLASS, X86ISD::VFPCLASS, 0),
X86_INTRINSIC_DATA(avx512_mask_fpclass_sd, FPCLASSS, X86ISD::VFPCLASSS, 0),
X86_INTRINSIC_DATA(avx512_mask_fpclass_ss, FPCLASSS, X86ISD::VFPCLASSS, 0),
X86_INTRINSIC_DATA(avx512_mask_getexp_pd_128, INTR_TYPE_1OP_MASK_RM,
return II;
break;
}
- case Intrinsic::x86_avx512_mask_cmp_pd_128:
- case Intrinsic::x86_avx512_mask_cmp_pd_256:
- case Intrinsic::x86_avx512_mask_cmp_pd_512:
- case Intrinsic::x86_avx512_mask_cmp_ps_128:
- case Intrinsic::x86_avx512_mask_cmp_ps_256:
- case Intrinsic::x86_avx512_mask_cmp_ps_512: {
+ case Intrinsic::x86_avx512_cmp_pd_128:
+ case Intrinsic::x86_avx512_cmp_pd_256:
+ case Intrinsic::x86_avx512_cmp_pd_512:
+ case Intrinsic::x86_avx512_cmp_ps_128:
+ case Intrinsic::x86_avx512_cmp_ps_256:
+ case Intrinsic::x86_avx512_cmp_ps_512: {
// Folding cmp(sub(a,b),0) -> cmp(a,b) and cmp(0,sub(a,b)) -> cmp(b,a)
Value *Arg0 = II->getArgOperand(0);
Value *Arg1 = II->getArgOperand(1);
; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
; CHECK-NEXT: retq
entry:
- %0 = call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %a, <16 x float> %x, i32 13, i32 4)
+ %0 = call <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float> %a, <16 x float> %x, i32 13, i32 4)
%1 = bitcast <16 x i1> %0 to i16
- %2 = call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %b, <16 x float> %x, i32 13, i32 4)
+ %2 = call <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float> %b, <16 x float> %x, i32 13, i32 4)
%3 = bitcast <16 x i1> %2 to i16
- %4 = call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %c, <16 x float> %x, i32 13, i32 4)
+ %4 = call <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float> %c, <16 x float> %x, i32 13, i32 4)
%5 = bitcast <16 x i1> %4 to i16
- %6 = call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %d, <16 x float> %x, i32 13, i32 4)
+ %6 = call <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float> %d, <16 x float> %x, i32 13, i32 4)
%7 = bitcast <16 x i1> %6 to i16
%8 = bitcast i16 %1 to <16 x i1>
%9 = bitcast i16 %3 to <16 x i1>
}
; Function Attrs: nounwind readnone
-declare <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float>, <16 x float>, i32, i32) #1
+declare <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float>, <16 x float>, i32, i32) #1
attributes #0 = { nounwind readnone uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="knl" "target-features"="+adx,+aes,+avx,+avx2,+avx512cd,+avx512er,+avx512f,+avx512pf,+bmi,+bmi2,+cx16,+f16c,+fma,+fsgsbase,+fxsr,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+prefetchwt1,+rdrnd,+rdseed,+rtm,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { nounwind readnone }
; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
- %res = call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %a, <16 x float> %b, i32 2, i32 8)
+ %res = call <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float> %a, <16 x float> %b, i32 2, i32 8)
%1 = bitcast <16 x i1> %res to i16
ret i16 %1
}
-declare <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float>, <16 x float>, i32, i32)
+declare <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float>, <16 x float>, i32, i32)
define i8 @test_cmppd(<8 x double> %a, <8 x double> %b) {
; CHECK-LABEL: test_cmppd:
; CHECK-NEXT: ## kill: def $al killed $al killed $eax
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
- %res = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %a, <8 x double> %b, i32 4, i32 4)
+ %res = call <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double> %a, <8 x double> %b, i32 4, i32 4)
%1 = bitcast <8 x i1> %res to i8
ret i8 %1
}
-declare <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double>, <8 x double>, i32, i32)
+declare <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double>, <8 x double>, i32, i32)
; Function Attrs: nounwind readnone
; CHECK-NEXT: vblendmps %zmm5, %zmm4, %zmm0 {%k1}
; CHECK-NEXT: retq
entry:
- %0 = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %a, <8 x double> %b, i32 17, i32 4)
+ %0 = call <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double> %a, <8 x double> %b, i32 17, i32 4)
%1 = bitcast <8 x i1> %0 to i8
- %2 = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %c, <8 x double> %d, i32 17, i32 4)
+ %2 = call <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double> %c, <8 x double> %d, i32 17, i32 4)
%3 = bitcast <8 x i1> %2 to i8
%conv = zext i8 %1 to i16
%conv2 = zext i8 %3 to i16
; CHECK-NEXT: vblendmps %zmm5, %zmm4, %zmm0 {%k1}
; CHECK-NEXT: retq
entry:
- %0 = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %a, <8 x double> %b, i32 17, i32 4)
+ %0 = call <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double> %a, <8 x double> %b, i32 17, i32 4)
%1 = bitcast <8 x i1> %0 to i8
%conv = zext i8 %1 to i16
%2 = bitcast i16 %conv to <16 x i1>
; X64-NEXT: vzeroupper
; X64-NEXT: retq
entry:
- %0 = tail call <8 x i1> @llvm.x86.avx512.mask.fpclass.pd.512(<8 x double> %__A, i32 4)
+ %0 = tail call <8 x i1> @llvm.x86.avx512.fpclass.pd.512(<8 x double> %__A, i32 4)
%1 = bitcast i8 %__U to <8 x i1>
%2 = and <8 x i1> %0, %1
%3 = bitcast <8 x i1> %2 to i8
ret i8 %3
}
-declare <8 x i1> @llvm.x86.avx512.mask.fpclass.pd.512(<8 x double>, i32)
+declare <8 x i1> @llvm.x86.avx512.fpclass.pd.512(<8 x double>, i32)
define zeroext i8 @test_mm512_fpclass_pd_mask(<8 x double> %__A) {
; CHECK-LABEL: test_mm512_fpclass_pd_mask:
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: ret{{[l|q]}}
entry:
- %0 = tail call <8 x i1> @llvm.x86.avx512.mask.fpclass.pd.512(<8 x double> %__A, i32 4)
+ %0 = tail call <8 x i1> @llvm.x86.avx512.fpclass.pd.512(<8 x double> %__A, i32 4)
%1 = bitcast <8 x i1> %0 to i8
ret i8 %1
}
; X64-NEXT: vzeroupper
; X64-NEXT: retq
entry:
- %0 = tail call <16 x i1> @llvm.x86.avx512.mask.fpclass.ps.512(<16 x float> %__A, i32 4)
+ %0 = tail call <16 x i1> @llvm.x86.avx512.fpclass.ps.512(<16 x float> %__A, i32 4)
%1 = bitcast i16 %__U to <16 x i1>
%2 = and <16 x i1> %0, %1
%3 = bitcast <16 x i1> %2 to i16
ret i16 %3
}
-declare <16 x i1> @llvm.x86.avx512.mask.fpclass.ps.512(<16 x float>, i32)
+declare <16 x i1> @llvm.x86.avx512.fpclass.ps.512(<16 x float>, i32)
define zeroext i16 @test_mm512_fpclass_ps_mask(<16 x float> %__A) {
; CHECK-LABEL: test_mm512_fpclass_ps_mask:
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: ret{{[l|q]}}
entry:
- %0 = tail call <16 x i1> @llvm.x86.avx512.mask.fpclass.ps.512(<16 x float> %__A, i32 4)
+ %0 = tail call <16 x i1> @llvm.x86.avx512.fpclass.ps.512(<16 x float> %__A, i32 4)
%1 = bitcast <16 x i1> %0 to i16
ret i16 %1
}
ret <2 x double> %res4
}
-declare <8 x i1> @llvm.x86.avx512.mask.fpclass.pd.512(<8 x double>, i32)
+declare <8 x i1> @llvm.x86.avx512.fpclass.pd.512(<8 x double>, i32)
-define i8 @test_int_x86_avx512_mask_fpclass_pd_512(<8 x double> %x0) {
-; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_pd_512:
+define i8 @test_int_x86_avx512_fpclass_pd_512(<8 x double> %x0) {
+; CHECK-LABEL: test_int_x86_avx512_fpclass_pd_512:
; CHECK: # %bb.0:
; CHECK-NEXT: vfpclasspd $2, %zmm0, %k1 # encoding: [0x62,0xf3,0xfd,0x48,0x66,0xc8,0x02]
; CHECK-NEXT: vfpclasspd $4, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x66,0xc0,0x04]
; CHECK-NEXT: # kill: def $al killed $al killed $eax
; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
- %res = call <8 x i1> @llvm.x86.avx512.mask.fpclass.pd.512(<8 x double> %x0, i32 4)
- %res1 = call <8 x i1> @llvm.x86.avx512.mask.fpclass.pd.512(<8 x double> %x0, i32 2)
+ %res = call <8 x i1> @llvm.x86.avx512.fpclass.pd.512(<8 x double> %x0, i32 4)
+ %res1 = call <8 x i1> @llvm.x86.avx512.fpclass.pd.512(<8 x double> %x0, i32 2)
%1 = and <8 x i1> %res1, %res
%2 = bitcast <8 x i1> %1 to i8
ret i8 %2
}
-declare <16 x i1> @llvm.x86.avx512.mask.fpclass.ps.512(<16 x float>, i32)
+declare <16 x i1> @llvm.x86.avx512.fpclass.ps.512(<16 x float>, i32)
-define i16@test_int_x86_avx512_mask_fpclass_ps_512(<16 x float> %x0) {
-; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_ps_512:
+define i16@test_int_x86_avx512_fpclass_ps_512(<16 x float> %x0) {
+; CHECK-LABEL: test_int_x86_avx512_fpclass_ps_512:
; CHECK: # %bb.0:
; CHECK-NEXT: vfpclassps $2, %zmm0, %k1 # encoding: [0x62,0xf3,0x7d,0x48,0x66,0xc8,0x02]
; CHECK-NEXT: vfpclassps $4, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x66,0xc0,0x04]
; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
- %res = call <16 x i1> @llvm.x86.avx512.mask.fpclass.ps.512(<16 x float> %x0, i32 4)
- %res1 = call <16 x i1> @llvm.x86.avx512.mask.fpclass.ps.512(<16 x float> %x0, i32 2)
+ %res = call <16 x i1> @llvm.x86.avx512.fpclass.ps.512(<16 x float> %x0, i32 4)
+ %res1 = call <16 x i1> @llvm.x86.avx512.fpclass.ps.512(<16 x float> %x0, i32 2)
%1 = and <16 x i1> %res1, %res
%2 = bitcast <16 x i1> %1 to i16
ret i16 %2
; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: retq
entry:
- %0 = tail call <2 x i1> @llvm.x86.avx512.mask.fpclass.pd.128(<2 x double> %__A, i32 2)
+ %0 = tail call <2 x i1> @llvm.x86.avx512.fpclass.pd.128(<2 x double> %__A, i32 2)
%1 = bitcast i8 %__U to <8 x i1>
%extract = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%2 = and <2 x i1> %0, %extract
ret i8 %4
}
-declare <2 x i1> @llvm.x86.avx512.mask.fpclass.pd.128(<2 x double>, i32)
+declare <2 x i1> @llvm.x86.avx512.fpclass.pd.128(<2 x double>, i32)
define zeroext i8 @test_mm_fpclass_pd_mask(<2 x double> %__A) {
; CHECK-LABEL: test_mm_fpclass_pd_mask:
; CHECK-NEXT: # kill: def $al killed $al killed $eax
; CHECK-NEXT: ret{{[l|q]}}
entry:
- %0 = tail call <2 x i1> @llvm.x86.avx512.mask.fpclass.pd.128(<2 x double> %__A, i32 2)
+ %0 = tail call <2 x i1> @llvm.x86.avx512.fpclass.pd.128(<2 x double> %__A, i32 2)
%1 = shufflevector <2 x i1> %0, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%2 = bitcast <8 x i1> %1 to i8
ret i8 %2
; X64-NEXT: vzeroupper
; X64-NEXT: retq
entry:
- %0 = tail call <4 x i1> @llvm.x86.avx512.mask.fpclass.pd.256(<4 x double> %__A, i32 2)
+ %0 = tail call <4 x i1> @llvm.x86.avx512.fpclass.pd.256(<4 x double> %__A, i32 2)
%1 = bitcast i8 %__U to <8 x i1>
%extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%2 = and <4 x i1> %0, %extract
ret i8 %4
}
-declare <4 x i1> @llvm.x86.avx512.mask.fpclass.pd.256(<4 x double>, i32)
+declare <4 x i1> @llvm.x86.avx512.fpclass.pd.256(<4 x double>, i32)
define zeroext i8 @test_mm256_fpclass_pd_mask(<4 x double> %__A) {
; CHECK-LABEL: test_mm256_fpclass_pd_mask:
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: ret{{[l|q]}}
entry:
- %0 = tail call <4 x i1> @llvm.x86.avx512.mask.fpclass.pd.256(<4 x double> %__A, i32 2)
+ %0 = tail call <4 x i1> @llvm.x86.avx512.fpclass.pd.256(<4 x double> %__A, i32 2)
%1 = shufflevector <4 x i1> %0, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%2 = bitcast <8 x i1> %1 to i8
ret i8 %2
; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: retq
entry:
- %0 = tail call <4 x i1> @llvm.x86.avx512.mask.fpclass.ps.128(<4 x float> %__A, i32 2)
+ %0 = tail call <4 x i1> @llvm.x86.avx512.fpclass.ps.128(<4 x float> %__A, i32 2)
%1 = bitcast i8 %__U to <8 x i1>
%extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%2 = and <4 x i1> %0, %extract
ret i8 %4
}
-declare <4 x i1> @llvm.x86.avx512.mask.fpclass.ps.128(<4 x float>, i32)
+declare <4 x i1> @llvm.x86.avx512.fpclass.ps.128(<4 x float>, i32)
define zeroext i8 @test_mm_fpclass_ps_mask(<4 x float> %__A) {
; CHECK-LABEL: test_mm_fpclass_ps_mask:
; CHECK-NEXT: # kill: def $al killed $al killed $eax
; CHECK-NEXT: ret{{[l|q]}}
entry:
- %0 = tail call <4 x i1> @llvm.x86.avx512.mask.fpclass.ps.128(<4 x float> %__A, i32 2)
+ %0 = tail call <4 x i1> @llvm.x86.avx512.fpclass.ps.128(<4 x float> %__A, i32 2)
%1 = shufflevector <4 x i1> %0, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%2 = bitcast <8 x i1> %1 to i8
ret i8 %2
; X64-NEXT: vzeroupper
; X64-NEXT: retq
entry:
- %0 = tail call <8 x i1> @llvm.x86.avx512.mask.fpclass.ps.256(<8 x float> %__A, i32 2)
+ %0 = tail call <8 x i1> @llvm.x86.avx512.fpclass.ps.256(<8 x float> %__A, i32 2)
%1 = bitcast i8 %__U to <8 x i1>
%2 = and <8 x i1> %0, %1
%3 = bitcast <8 x i1> %2 to i8
ret i8 %3
}
-declare <8 x i1> @llvm.x86.avx512.mask.fpclass.ps.256(<8 x float>, i32)
+declare <8 x i1> @llvm.x86.avx512.fpclass.ps.256(<8 x float>, i32)
define zeroext i8 @test_mm256_fpclass_ps_mask(<8 x float> %__A) {
; CHECK-LABEL: test_mm256_fpclass_ps_mask:
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: ret{{[l|q]}}
entry:
- %0 = tail call <8 x i1> @llvm.x86.avx512.mask.fpclass.ps.256(<8 x float> %__A, i32 2)
+ %0 = tail call <8 x i1> @llvm.x86.avx512.fpclass.ps.256(<8 x float> %__A, i32 2)
%1 = bitcast <8 x i1> %0 to i8
ret i8 %1
}
ret <8 x float> %res2
}
-declare <4 x i1> @llvm.x86.avx512.mask.fpclass.ps.128(<4 x float>, i32)
+declare <4 x i1> @llvm.x86.avx512.fpclass.ps.128(<4 x float>, i32)
-define i8 @test_int_x86_avx512_mask_fpclass_ps_128(<4 x float> %x0) {
-; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_ps_128:
+define i8 @test_int_x86_avx512_fpclass_ps_128(<4 x float> %x0) {
+; CHECK-LABEL: test_int_x86_avx512_fpclass_ps_128:
; CHECK: # %bb.0:
; CHECK-NEXT: vfpclassps $4, %xmm0, %k1 # encoding: [0x62,0xf3,0x7d,0x08,0x66,0xc8,0x04]
; CHECK-NEXT: vfpclassps $2, %xmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x66,0xc0,0x02]
; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: # kill: def $al killed $al killed $eax
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
- %res = call <4 x i1> @llvm.x86.avx512.mask.fpclass.ps.128(<4 x float> %x0, i32 2)
- %res1 = call <4 x i1> @llvm.x86.avx512.mask.fpclass.ps.128(<4 x float> %x0, i32 4)
+ %res = call <4 x i1> @llvm.x86.avx512.fpclass.ps.128(<4 x float> %x0, i32 2)
+ %res1 = call <4 x i1> @llvm.x86.avx512.fpclass.ps.128(<4 x float> %x0, i32 4)
%1 = and <4 x i1> %res1, %res
%2 = shufflevector <4 x i1> %1, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%3 = bitcast <8 x i1> %2 to i8
ret i8 %3
}
-declare <8 x i1> @llvm.x86.avx512.mask.fpclass.ps.256(<8 x float>, i32)
+declare <8 x i1> @llvm.x86.avx512.fpclass.ps.256(<8 x float>, i32)
-define i8 @test_int_x86_avx512_mask_fpclass_ps_256(<8 x float> %x0) {
-; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_ps_256:
+define i8 @test_int_x86_avx512_fpclass_ps_256(<8 x float> %x0) {
+; CHECK-LABEL: test_int_x86_avx512_fpclass_ps_256:
; CHECK: # %bb.0:
; CHECK-NEXT: vfpclassps $4, %ymm0, %k1 # encoding: [0x62,0xf3,0x7d,0x28,0x66,0xc8,0x04]
; CHECK-NEXT: vfpclassps $2, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x66,0xc0,0x02]
; CHECK-NEXT: # kill: def $al killed $al killed $eax
; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
- %res = call <8 x i1> @llvm.x86.avx512.mask.fpclass.ps.256(<8 x float> %x0, i32 2)
- %res1 = call <8 x i1> @llvm.x86.avx512.mask.fpclass.ps.256(<8 x float> %x0, i32 4)
+ %res = call <8 x i1> @llvm.x86.avx512.fpclass.ps.256(<8 x float> %x0, i32 2)
+ %res1 = call <8 x i1> @llvm.x86.avx512.fpclass.ps.256(<8 x float> %x0, i32 4)
%1 = and <8 x i1> %res1, %res
%2 = bitcast <8 x i1> %1 to i8
ret i8 %2
}
-declare <2 x i1> @llvm.x86.avx512.mask.fpclass.pd.128(<2 x double>, i32)
+declare <2 x i1> @llvm.x86.avx512.fpclass.pd.128(<2 x double>, i32)
-define i8 @test_int_x86_avx512_mask_fpclass_pd_128(<2 x double> %x0) {
-; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_pd_128:
+define i8 @test_int_x86_avx512_fpclass_pd_128(<2 x double> %x0) {
+; CHECK-LABEL: test_int_x86_avx512_fpclass_pd_128:
; CHECK: # %bb.0:
; CHECK-NEXT: vfpclasspd $2, %xmm0, %k1 # encoding: [0x62,0xf3,0xfd,0x08,0x66,0xc8,0x02]
; CHECK-NEXT: vfpclasspd $4, %xmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x66,0xc0,0x04]
; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: # kill: def $al killed $al killed $eax
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
- %res = call <2 x i1> @llvm.x86.avx512.mask.fpclass.pd.128(<2 x double> %x0, i32 4)
- %res1 = call <2 x i1> @llvm.x86.avx512.mask.fpclass.pd.128(<2 x double> %x0, i32 2)
+ %res = call <2 x i1> @llvm.x86.avx512.fpclass.pd.128(<2 x double> %x0, i32 4)
+ %res1 = call <2 x i1> @llvm.x86.avx512.fpclass.pd.128(<2 x double> %x0, i32 2)
%1 = and <2 x i1> %res1, %res
%2 = shufflevector <2 x i1> %1, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%3 = bitcast <8 x i1> %2 to i8
ret i8 %3
}
-declare <4 x i1> @llvm.x86.avx512.mask.fpclass.pd.256(<4 x double>, i32)
+declare <4 x i1> @llvm.x86.avx512.fpclass.pd.256(<4 x double>, i32)
-define i8 @test_int_x86_avx512_mask_fpclass_pd_256(<4 x double> %x0) {
-; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_pd_256:
+define i8 @test_int_x86_avx512_fpclass_pd_256(<4 x double> %x0) {
+; CHECK-LABEL: test_int_x86_avx512_fpclass_pd_256:
; CHECK: # %bb.0:
; CHECK-NEXT: vfpclasspd $4, %ymm0, %k1 # encoding: [0x62,0xf3,0xfd,0x28,0x66,0xc8,0x04]
; CHECK-NEXT: vfpclasspd $2, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x66,0xc0,0x02]
; CHECK-NEXT: # kill: def $al killed $al killed $eax
; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
- %res = call <4 x i1> @llvm.x86.avx512.mask.fpclass.pd.256(<4 x double> %x0, i32 2)
- %res1 = call <4 x i1> @llvm.x86.avx512.mask.fpclass.pd.256(<4 x double> %x0, i32 4)
+ %res = call <4 x i1> @llvm.x86.avx512.fpclass.pd.256(<4 x double> %x0, i32 2)
+ %res1 = call <4 x i1> @llvm.x86.avx512.fpclass.pd.256(<4 x double> %x0, i32 4)
%1 = and <4 x i1> %res1, %res
%2 = shufflevector <4 x i1> %1, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%3 = bitcast <8 x i1> %2 to i8
; CHECK-NEXT: # kill: def $al killed $al killed $eax
; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
- %res = call <8 x i1> @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> %a, <8 x float> %b, i32 2)
+ %res = call <8 x i1> @llvm.x86.avx512.cmp.ps.256(<8 x float> %a, <8 x float> %b, i32 2)
%1 = bitcast <8 x i1> %res to i8
ret i8 %1
}
-declare <8 x i1> @llvm.x86.avx512.mask.cmp.ps.256(<8 x float>, <8 x float>, i32)
+declare <8 x i1> @llvm.x86.avx512.cmp.ps.256(<8 x float>, <8 x float>, i32)
define i8 @test_cmpps_128(<4 x float> %a, <4 x float> %b) {
; CHECK-LABEL: test_cmpps_128:
; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: # kill: def $al killed $al killed $eax
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
- %res = call <4 x i1> @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> %a, <4 x float> %b, i32 2)
+ %res = call <4 x i1> @llvm.x86.avx512.cmp.ps.128(<4 x float> %a, <4 x float> %b, i32 2)
%1 = shufflevector <4 x i1> %res, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%2 = bitcast <8 x i1> %1 to i8
ret i8 %2
}
-declare <4 x i1> @llvm.x86.avx512.mask.cmp.ps.128(<4 x float>, <4 x float>, i32)
+declare <4 x i1> @llvm.x86.avx512.cmp.ps.128(<4 x float>, <4 x float>, i32)
define i8 @test_cmppd_256(<4 x double> %a, <4 x double> %b) {
; CHECK-LABEL: test_cmppd_256:
; CHECK-NEXT: # kill: def $al killed $al killed $eax
; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
- %res = call <4 x i1> @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> %a, <4 x double> %b, i32 2)
+ %res = call <4 x i1> @llvm.x86.avx512.cmp.pd.256(<4 x double> %a, <4 x double> %b, i32 2)
%1 = shufflevector <4 x i1> %res, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%2 = bitcast <8 x i1> %1 to i8
ret i8 %2
}
-declare <4 x i1> @llvm.x86.avx512.mask.cmp.pd.256(<4 x double>, <4 x double>, i32)
+declare <4 x i1> @llvm.x86.avx512.cmp.pd.256(<4 x double>, <4 x double>, i32)
define i8 @test_cmppd_128(<2 x double> %a, <2 x double> %b) {
; CHECK-LABEL: test_cmppd_128:
; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: # kill: def $al killed $al killed $eax
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
- %res = call <2 x i1> @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> %a, <2 x double> %b, i32 2)
+ %res = call <2 x i1> @llvm.x86.avx512.cmp.pd.128(<2 x double> %a, <2 x double> %b, i32 2)
%1 = shufflevector <2 x i1> %res, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%2 = bitcast <8 x i1> %1 to i8
ret i8 %2
}
-declare <2 x i1> @llvm.x86.avx512.mask.cmp.pd.128(<2 x double>, <2 x double>, i32)
+declare <2 x i1> @llvm.x86.avx512.cmp.pd.128(<2 x double>, <2 x double>, i32)
define <8 x float> @test_mm512_maskz_max_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
; X86-LABEL: test_mm512_maskz_max_ps_256:
}
-declare <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float>, <16 x float>, i32, i32)
+declare <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float>, <16 x float>, i32, i32)
define zeroext i8 @test_vcmpoeqps_v4i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqps_v4i1_v8i1_mask:
; VLX: # %bb.0: # %entry
entry:
%0 = bitcast <8 x i64> %__a to <16 x float>
%1 = bitcast <8 x i64> %__b to <16 x float>
- %2 = call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %0, <16 x float> %1, i32 2, i32 8)
+ %2 = call <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float> %0, <16 x float> %1, i32 2, i32 8)
%3 = bitcast <16 x i1> %2 to i16
%4 = zext i16 %3 to i32
ret i32 %4
entry:
%0 = bitcast <8 x i64> %__a to <16 x float>
%1 = bitcast <8 x i64> %__b to <16 x float>
- %2 = call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %0, <16 x float> %1, i32 2, i32 8)
+ %2 = call <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float> %0, <16 x float> %1, i32 2, i32 8)
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %2, %3
%5 = bitcast <16 x i1> %4 to i16
entry:
%0 = bitcast <8 x i64> %__a to <16 x float>
%1 = bitcast <8 x i64> %__b to <16 x float>
- %2 = call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %0, <16 x float> %1, i32 2, i32 8)
+ %2 = call <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float> %0, <16 x float> %1, i32 2, i32 8)
%3 = bitcast <16 x i1> %2 to i16
%4 = zext i16 %3 to i64
ret i64 %4
entry:
%0 = bitcast <8 x i64> %__a to <16 x float>
%1 = bitcast <8 x i64> %__b to <16 x float>
- %2 = call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %0, <16 x float> %1, i32 2, i32 8)
+ %2 = call <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float> %0, <16 x float> %1, i32 2, i32 8)
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %2, %3
%5 = bitcast <16 x i1> %4 to i16
-declare <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double>, <8 x double>, i32, i32)
+declare <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double>, <8 x double>, i32, i32)
define zeroext i4 @test_vcmpoeqpd_v2i1_v4i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqpd_v2i1_v4i1_mask:
; VLX: # %bb.0: # %entry
entry:
%0 = bitcast <8 x i64> %__a to <8 x double>
%1 = bitcast <8 x i64> %__b to <8 x double>
- %2 = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, i32 8)
+ %2 = call <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, i32 8)
%3 = bitcast <8 x i1> %2 to i8
%4 = zext i8 %3 to i16
ret i16 %4
entry:
%0 = bitcast <8 x i64> %__a to <8 x double>
%1 = bitcast <8 x i64> %__b to <8 x double>
- %2 = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, i32 8)
+ %2 = call <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, i32 8)
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = bitcast <8 x i1> %4 to i8
entry:
%0 = bitcast <8 x i64> %__a to <8 x double>
%1 = bitcast <8 x i64> %__b to <8 x double>
- %2 = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, i32 8)
+ %2 = call <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, i32 8)
%3 = bitcast <8 x i1> %2 to i8
%4 = zext i8 %3 to i32
ret i32 %4
entry:
%0 = bitcast <8 x i64> %__a to <8 x double>
%1 = bitcast <8 x i64> %__b to <8 x double>
- %2 = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, i32 8)
+ %2 = call <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, i32 8)
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = bitcast <8 x i1> %4 to i8
entry:
%0 = bitcast <8 x i64> %__a to <8 x double>
%1 = bitcast <8 x i64> %__b to <8 x double>
- %2 = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, i32 8)
+ %2 = call <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, i32 8)
%3 = bitcast <8 x i1> %2 to i8
%4 = zext i8 %3 to i64
ret i64 %4
entry:
%0 = bitcast <8 x i64> %__a to <8 x double>
%1 = bitcast <8 x i64> %__b to <8 x double>
- %2 = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, i32 8)
+ %2 = call <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, i32 8)
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = bitcast <8 x i1> %4 to i8
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
- %res = call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %a, <16 x float> %b, i32 2, i32 8)
+ %res = call <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float> %a, <16 x float> %b, i32 2, i32 8)
%1 = bitcast <16 x i1> %res to i16
%cast = bitcast i16 %1 to <16 x i1>
%shuffle = shufflevector <16 x i1> %cast, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
;CHECK-LABEL: stack_fold_cmppd
;CHECK: vcmpeqpd {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%k[0-9]}} {{.*#+}} 64-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
- %res = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %a0, <8 x double> %a1, i32 0, i32 4)
+ %res = call <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double> %a0, <8 x double> %a1, i32 0, i32 4)
%2 = bitcast <8 x i1> %res to i8
ret i8 %2
}
-declare <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double>, <8 x double>, i32, i32)
+declare <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double>, <8 x double>, i32, i32)
define i16 @stack_fold_cmpps(<16 x float> %a0, <16 x float> %a1) {
;CHECK-LABEL: stack_fold_cmpps
;CHECK: vcmpeqps {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%k[0-9]*}} {{.*#+}} 64-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
- %res = call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %a0, <16 x float> %a1, i32 0, i32 4)
+ %res = call <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float> %a0, <16 x float> %a1, i32 0, i32 4)
%2 = bitcast <16 x i1> %res to i16
ret i16 %2
}
-declare <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float>, <16 x float>, i32, i32)
+declare <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float>, <16 x float>, i32, i32)
define <2 x double> @stack_fold_divsd_int(<2 x double> %a0, <2 x double> %a1) {
;CHECK-LABEL: stack_fold_divsd_int
;CHECK-LABEL: stack_fold_cmppd
;CHECK: vcmpeqpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%k[0-9]}} {{.*#+}} 16-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
- %res = call <2 x i1> @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> %a0, <2 x double> %a1, i32 0)
+ %res = call <2 x i1> @llvm.x86.avx512.cmp.pd.128(<2 x double> %a0, <2 x double> %a1, i32 0)
%2 = shufflevector <2 x i1> %res, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%3 = bitcast <8 x i1> %2 to i8
ret i8 %3
}
-declare <2 x i1> @llvm.x86.avx512.mask.cmp.pd.128(<2 x double>, <2 x double>, i32)
+declare <2 x i1> @llvm.x86.avx512.cmp.pd.128(<2 x double>, <2 x double>, i32)
define i8 @stack_fold_cmppd_ymm(<4 x double> %a0, <4 x double> %a1) {
;CHECK-LABEL: stack_fold_cmppd_ymm
;CHECK: vcmpeqpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%k[0-9]}} {{.*#+}} 32-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
- %res = call <4 x i1> @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> %a0, <4 x double> %a1, i32 0)
+ %res = call <4 x i1> @llvm.x86.avx512.cmp.pd.256(<4 x double> %a0, <4 x double> %a1, i32 0)
%2 = shufflevector <4 x i1> %res, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%3 = bitcast <8 x i1> %2 to i8
ret i8 %3
}
-declare <4 x i1> @llvm.x86.avx512.mask.cmp.pd.256(<4 x double>, <4 x double>, i32)
+declare <4 x i1> @llvm.x86.avx512.cmp.pd.256(<4 x double>, <4 x double>, i32)
define i8 @stack_fold_cmpps(<4 x float> %a0, <4 x float> %a1) {
;CHECK-LABEL: stack_fold_cmpps
;CHECK: vcmpeqps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%k[0-9]*}} {{.*#+}} 16-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
- %res = call <4 x i1> @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> %a0, <4 x float> %a1, i32 0)
+ %res = call <4 x i1> @llvm.x86.avx512.cmp.ps.128(<4 x float> %a0, <4 x float> %a1, i32 0)
%2 = shufflevector <4 x i1> %res, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%3 = bitcast <8 x i1> %2 to i8
ret i8 %3
}
-declare <4 x i1> @llvm.x86.avx512.mask.cmp.ps.128(<4 x float>, <4 x float>, i32)
+declare <4 x i1> @llvm.x86.avx512.cmp.ps.128(<4 x float>, <4 x float>, i32)
define i8 @stack_fold_cmpps_ymm(<8 x float> %a0, <8 x float> %a1) {
;CHECK-LABEL: stack_fold_cmpps_ymm
;CHECK: vcmpeqps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%k[0-9]*}} {{.*#+}} 32-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
- %res = call <8 x i1> @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> %a0, <8 x float> %a1, i32 0)
+ %res = call <8 x i1> @llvm.x86.avx512.cmp.ps.256(<8 x float> %a0, <8 x float> %a1, i32 0)
%2 = bitcast <8 x i1> %res to i8
ret i8 %2
}
-declare <8 x i1> @llvm.x86.avx512.mask.cmp.ps.256(<8 x float>, <8 x float>, i32)
+declare <8 x i1> @llvm.x86.avx512.cmp.ps.256(<8 x float>, <8 x float>, i32)
define <2 x double> @stack_fold_divpd(<2 x double> %a0, <2 x double> %a1) {
;CHECK-LABEL: stack_fold_divpd
define i8 @sub_compare_foldingPD128_safe(<2 x double> %a, <2 x double> %b){
; CHECK-LABEL: @sub_compare_foldingPD128_safe(
; CHECK-NEXT: [[SUB_SAFE:%.*]] = fsub <2 x double> [[A:%.*]], [[B:%.*]]
-; CHECK-NEXT: [[T0:%.*]] = call <2 x i1> @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> [[SUB_SAFE]], <2 x double> zeroinitializer, i32 5)
+; CHECK-NEXT: [[T0:%.*]] = call <2 x i1> @llvm.x86.avx512.cmp.pd.128(<2 x double> [[SUB_SAFE]], <2 x double> zeroinitializer, i32 5)
; CHECK-NEXT: [[T1:%.*]] = shufflevector <2 x i1> [[T0]], <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
; CHECK-NEXT: [[T2:%.*]] = bitcast <8 x i1> [[T1]] to i8
; CHECK-NEXT: ret i8 [[T2]]
;
%sub.safe = fsub <2 x double> %a, %b
- %t0 = call <2 x i1> @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> %sub.safe, <2 x double> zeroinitializer, i32 5)
+ %t0 = call <2 x i1> @llvm.x86.avx512.cmp.pd.128(<2 x double> %sub.safe, <2 x double> zeroinitializer, i32 5)
%t1 = shufflevector <2 x i1> %t0, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%t2 = bitcast <8 x i1> %t1 to i8
ret i8 %t2
define i8 @sub_compare_foldingPD128(<2 x double> %a, <2 x double> %b){
; CHECK-LABEL: @sub_compare_foldingPD128(
-; CHECK-NEXT: [[T0:%.*]] = call <2 x i1> @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], i32 5)
+; CHECK-NEXT: [[T0:%.*]] = call <2 x i1> @llvm.x86.avx512.cmp.pd.128(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], i32 5)
; CHECK-NEXT: [[T1:%.*]] = shufflevector <2 x i1> [[T0]], <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
; CHECK-NEXT: [[T2:%.*]] = bitcast <8 x i1> [[T1]] to i8
; CHECK-NEXT: ret i8 [[T2]]
;
%sub.i = fsub ninf <2 x double> %a, %b
- %t0 = call <2 x i1> @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> %sub.i, <2 x double> zeroinitializer, i32 5)
+ %t0 = call <2 x i1> @llvm.x86.avx512.cmp.pd.128(<2 x double> %sub.i, <2 x double> zeroinitializer, i32 5)
%t1 = shufflevector <2 x i1> %t0, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%t2 = bitcast <8 x i1> %t1 to i8
ret i8 %t2
define i8 @sub_compare_foldingPD128_undef_elt(<2 x double> %a, <2 x double> %b){
; CHECK-LABEL: @sub_compare_foldingPD128_undef_elt(
-; CHECK-NEXT: [[T0:%.*]] = call <2 x i1> @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], i32 5)
+; CHECK-NEXT: [[T0:%.*]] = call <2 x i1> @llvm.x86.avx512.cmp.pd.128(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], i32 5)
; CHECK-NEXT: [[T1:%.*]] = shufflevector <2 x i1> [[T0]], <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
; CHECK-NEXT: [[T2:%.*]] = bitcast <8 x i1> [[T1]] to i8
; CHECK-NEXT: ret i8 [[T2]]
;
%sub.i = fsub ninf <2 x double> %a, %b
- %t0 = call <2 x i1> @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> %sub.i, <2 x double> <double 0.0, double undef>, i32 5)
+ %t0 = call <2 x i1> @llvm.x86.avx512.cmp.pd.128(<2 x double> %sub.i, <2 x double> <double 0.0, double undef>, i32 5)
%t1 = shufflevector <2 x i1> %t0, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%t2 = bitcast <8 x i1> %t1 to i8
ret i8 %t2
define i8 @sub_compare_foldingPD256(<4 x double> %a, <4 x double> %b){
; CHECK-LABEL: @sub_compare_foldingPD256(
-; CHECK-NEXT: [[T0:%.*]] = call <4 x i1> @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> [[A:%.*]], <4 x double> [[B:%.*]], i32 5)
+; CHECK-NEXT: [[T0:%.*]] = call <4 x i1> @llvm.x86.avx512.cmp.pd.256(<4 x double> [[A:%.*]], <4 x double> [[B:%.*]], i32 5)
; CHECK-NEXT: [[T1:%.*]] = shufflevector <4 x i1> [[T0]], <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; CHECK-NEXT: [[T2:%.*]] = bitcast <8 x i1> [[T1]] to i8
; CHECK-NEXT: ret i8 [[T2]]
;
%sub.i1 = fsub ninf <4 x double> %a, %b
- %t0 = call <4 x i1> @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> %sub.i1, <4 x double> zeroinitializer, i32 5)
+ %t0 = call <4 x i1> @llvm.x86.avx512.cmp.pd.256(<4 x double> %sub.i1, <4 x double> zeroinitializer, i32 5)
%t1 = shufflevector <4 x i1> %t0, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%t2 = bitcast <8 x i1> %t1 to i8
ret i8 %t2
define i8 @sub_compare_foldingPD512(<8 x double> %a, <8 x double> %b){
; CHECK-LABEL: @sub_compare_foldingPD512(
-; CHECK-NEXT: [[T0:%.*]] = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> [[A:%.*]], <8 x double> [[B:%.*]], i32 11, i32 4)
+; CHECK-NEXT: [[T0:%.*]] = call <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double> [[A:%.*]], <8 x double> [[B:%.*]], i32 11, i32 4)
; CHECK-NEXT: [[T1:%.*]] = bitcast <8 x i1> [[T0]] to i8
; CHECK-NEXT: ret i8 [[T1]]
;
%sub.i2 = fsub ninf <8 x double> %a, %b
- %t0 = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %sub.i2, <8 x double> zeroinitializer, i32 11, i32 4)
+ %t0 = call <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double> %sub.i2, <8 x double> zeroinitializer, i32 11, i32 4)
%t1 = bitcast <8 x i1> %t0 to i8
ret i8 %t1
}
define i8 @sub_compare_foldingPS128(<4 x float> %a, <4 x float> %b){
; CHECK-LABEL: @sub_compare_foldingPS128(
-; CHECK-NEXT: [[T0:%.*]] = call <4 x i1> @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], i32 12)
+; CHECK-NEXT: [[T0:%.*]] = call <4 x i1> @llvm.x86.avx512.cmp.ps.128(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], i32 12)
; CHECK-NEXT: [[T1:%.*]] = shufflevector <4 x i1> [[T0]], <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; CHECK-NEXT: [[T2:%.*]] = bitcast <8 x i1> [[T1]] to i8
; CHECK-NEXT: ret i8 [[T2]]
;
%sub.i3 = fsub ninf <4 x float> %a, %b
- %t0 = call <4 x i1> @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> %sub.i3, <4 x float> zeroinitializer, i32 12)
+ %t0 = call <4 x i1> @llvm.x86.avx512.cmp.ps.128(<4 x float> %sub.i3, <4 x float> zeroinitializer, i32 12)
%t1 = shufflevector <4 x i1> %t0, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%t2 = bitcast <8 x i1> %t1 to i8
ret i8 %t2
define i8 @sub_compare_foldingPS256(<8 x float> %a, <8 x float> %b){
; CHECK-LABEL: @sub_compare_foldingPS256(
-; CHECK-NEXT: [[T0:%.*]] = call <8 x i1> @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> [[A:%.*]], <8 x float> [[B:%.*]], i32 5)
+; CHECK-NEXT: [[T0:%.*]] = call <8 x i1> @llvm.x86.avx512.cmp.ps.256(<8 x float> [[A:%.*]], <8 x float> [[B:%.*]], i32 5)
; CHECK-NEXT: [[T1:%.*]] = bitcast <8 x i1> [[T0]] to i8
; CHECK-NEXT: ret i8 [[T1]]
;
%sub.i4 = fsub ninf <8 x float> %a, %b
- %t0 = call <8 x i1> @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> %sub.i4, <8 x float> zeroinitializer, i32 5)
+ %t0 = call <8 x i1> @llvm.x86.avx512.cmp.ps.256(<8 x float> %sub.i4, <8 x float> zeroinitializer, i32 5)
%t1 = bitcast <8 x i1> %t0 to i8
ret i8 %t1
}
define i16 @sub_compare_foldingPS512(<16 x float> %a, <16 x float> %b){
; CHECK-LABEL: @sub_compare_foldingPS512(
-; CHECK-NEXT: [[T0:%.*]] = call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> [[A:%.*]], <16 x float> [[B:%.*]], i32 11, i32 4)
+; CHECK-NEXT: [[T0:%.*]] = call <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float> [[A:%.*]], <16 x float> [[B:%.*]], i32 11, i32 4)
; CHECK-NEXT: [[T1:%.*]] = bitcast <16 x i1> [[T0]] to i16
; CHECK-NEXT: ret i16 [[T1]]
;
%sub.i5 = fsub ninf <16 x float> %a, %b
- %t0 = call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %sub.i5, <16 x float> zeroinitializer, i32 11, i32 4)
+ %t0 = call <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float> %sub.i5, <16 x float> zeroinitializer, i32 11, i32 4)
%t1 = bitcast <16 x i1> %t0 to i16
ret i16 %t1
}
define i8 @sub_compare_folding_swapPD128(<2 x double> %a, <2 x double> %b){
; CHECK-LABEL: @sub_compare_folding_swapPD128(
-; CHECK-NEXT: [[T0:%.*]] = call <2 x i1> @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> [[B:%.*]], <2 x double> [[A:%.*]], i32 5)
+; CHECK-NEXT: [[T0:%.*]] = call <2 x i1> @llvm.x86.avx512.cmp.pd.128(<2 x double> [[B:%.*]], <2 x double> [[A:%.*]], i32 5)
; CHECK-NEXT: [[T1:%.*]] = shufflevector <2 x i1> [[T0]], <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
; CHECK-NEXT: [[T2:%.*]] = bitcast <8 x i1> [[T1]] to i8
; CHECK-NEXT: ret i8 [[T2]]
;
%sub.i = fsub ninf <2 x double> %a, %b
- %t0 = call <2 x i1> @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> zeroinitializer, <2 x double> %sub.i, i32 5)
+ %t0 = call <2 x i1> @llvm.x86.avx512.cmp.pd.128(<2 x double> zeroinitializer, <2 x double> %sub.i, i32 5)
%t1 = shufflevector <2 x i1> %t0, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%t2 = bitcast <8 x i1> %t1 to i8
ret i8 %t2
define i8 @sub_compare_folding_swapPD256(<4 x double> %a, <4 x double> %b){
; CHECK-LABEL: @sub_compare_folding_swapPD256(
-; CHECK-NEXT: [[T0:%.*]] = call <4 x i1> @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> [[B:%.*]], <4 x double> [[A:%.*]], i32 5)
+; CHECK-NEXT: [[T0:%.*]] = call <4 x i1> @llvm.x86.avx512.cmp.pd.256(<4 x double> [[B:%.*]], <4 x double> [[A:%.*]], i32 5)
; CHECK-NEXT: [[T1:%.*]] = shufflevector <4 x i1> [[T0]], <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; CHECK-NEXT: [[T2:%.*]] = bitcast <8 x i1> [[T1]] to i8
; CHECK-NEXT: ret i8 [[T2]]
;
%sub.i = fsub ninf <4 x double> %a, %b
- %t0 = call <4 x i1> @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> zeroinitializer, <4 x double> %sub.i, i32 5)
+ %t0 = call <4 x i1> @llvm.x86.avx512.cmp.pd.256(<4 x double> zeroinitializer, <4 x double> %sub.i, i32 5)
%t1 = shufflevector <4 x i1> %t0, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%t2 = bitcast <8 x i1> %t1 to i8
ret i8 %t2
define i8 @sub_compare_folding_swapPD256_undef(<4 x double> %a, <4 x double> %b) {
; CHECK-LABEL: @sub_compare_folding_swapPD256_undef(
-; CHECK-NEXT: [[TMP:%.*]] = call <4 x i1> @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> undef, <4 x double> zeroinitializer, i32 5)
+; CHECK-NEXT: [[TMP:%.*]] = call <4 x i1> @llvm.x86.avx512.cmp.pd.256(<4 x double> undef, <4 x double> zeroinitializer, i32 5)
; CHECK-NEXT: [[T0:%.*]] = shufflevector <4 x i1> [[TMP]], <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; CHECK-NEXT: [[T1:%.*]] = bitcast <8 x i1> [[T0]] to i8
; CHECK-NEXT: ret i8 [[T1]]
;
%sub.i1 = fsub ninf <4 x double> undef, undef
- %tmp = call <4 x i1> @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> %sub.i1, <4 x double> zeroinitializer, i32 5)
+ %tmp = call <4 x i1> @llvm.x86.avx512.cmp.pd.256(<4 x double> %sub.i1, <4 x double> zeroinitializer, i32 5)
%t0 = shufflevector <4 x i1> %tmp, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%t1 = bitcast <8 x i1> %t0 to i8
ret i8 %t1
define i8 @sub_compare_folding_swapPD512(<8 x double> %a, <8 x double> %b){
; CHECK-LABEL: @sub_compare_folding_swapPD512(
-; CHECK-NEXT: [[T0:%.*]] = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> [[B:%.*]], <8 x double> [[A:%.*]], i32 11, i32 4)
+; CHECK-NEXT: [[T0:%.*]] = call <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double> [[B:%.*]], <8 x double> [[A:%.*]], i32 11, i32 4)
; CHECK-NEXT: [[T1:%.*]] = bitcast <8 x i1> [[T0]] to i8
; CHECK-NEXT: ret i8 [[T1]]
;
%sub.i = fsub ninf <8 x double> %a, %b
- %t0 = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> zeroinitializer, <8 x double> %sub.i, i32 11, i32 4)
+ %t0 = call <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double> zeroinitializer, <8 x double> %sub.i, i32 11, i32 4)
%t1 = bitcast <8 x i1> %t0 to i8
ret i8 %t1
}
define i8 @sub_compare_folding_swapPS128(<4 x float> %a, <4 x float> %b){
; CHECK-LABEL: @sub_compare_folding_swapPS128(
-; CHECK-NEXT: [[T0:%.*]] = call <4 x i1> @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> [[B:%.*]], <4 x float> [[A:%.*]], i32 12)
+; CHECK-NEXT: [[T0:%.*]] = call <4 x i1> @llvm.x86.avx512.cmp.ps.128(<4 x float> [[B:%.*]], <4 x float> [[A:%.*]], i32 12)
; CHECK-NEXT: [[T1:%.*]] = shufflevector <4 x i1> [[T0]], <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; CHECK-NEXT: [[T2:%.*]] = bitcast <8 x i1> [[T1]] to i8
; CHECK-NEXT: ret i8 [[T2]]
;
%sub.i = fsub ninf <4 x float> %a, %b
- %t0 = call <4 x i1> @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> zeroinitializer, <4 x float> %sub.i, i32 12)
+ %t0 = call <4 x i1> @llvm.x86.avx512.cmp.ps.128(<4 x float> zeroinitializer, <4 x float> %sub.i, i32 12)
%t1 = shufflevector <4 x i1> %t0, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%t2 = bitcast <8 x i1> %t1 to i8
ret i8 %t2
define i8 @sub_compare_folding_swapPS256(<8 x float> %a, <8 x float> %b){
; CHECK-LABEL: @sub_compare_folding_swapPS256(
-; CHECK-NEXT: [[T0:%.*]] = call <8 x i1> @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> [[B:%.*]], <8 x float> [[A:%.*]], i32 5)
+; CHECK-NEXT: [[T0:%.*]] = call <8 x i1> @llvm.x86.avx512.cmp.ps.256(<8 x float> [[B:%.*]], <8 x float> [[A:%.*]], i32 5)
; CHECK-NEXT: [[T1:%.*]] = bitcast <8 x i1> [[T0]] to i8
; CHECK-NEXT: ret i8 [[T1]]
;
%sub.i = fsub ninf <8 x float> %a, %b
- %t0 = call <8 x i1> @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> zeroinitializer, <8 x float> %sub.i, i32 5)
+ %t0 = call <8 x i1> @llvm.x86.avx512.cmp.ps.256(<8 x float> zeroinitializer, <8 x float> %sub.i, i32 5)
%t1 = bitcast <8 x i1> %t0 to i8
ret i8 %t1
}
define i16 @sub_compare_folding_swapPS512(<16 x float> %a, <16 x float> %b){
; CHECK-LABEL: @sub_compare_folding_swapPS512(
-; CHECK-NEXT: [[T0:%.*]] = call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> [[B:%.*]], <16 x float> [[A:%.*]], i32 11, i32 4)
+; CHECK-NEXT: [[T0:%.*]] = call <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float> [[B:%.*]], <16 x float> [[A:%.*]], i32 11, i32 4)
; CHECK-NEXT: [[T1:%.*]] = bitcast <16 x i1> [[T0]] to i16
; CHECK-NEXT: ret i16 [[T1]]
;
%sub.i = fsub ninf <16 x float> %a, %b
- %t0 = call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> zeroinitializer, <16 x float> %sub.i, i32 11, i32 4)
+ %t0 = call <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float> zeroinitializer, <16 x float> %sub.i, i32 11, i32 4)
%t1 = bitcast <16 x i1> %t0 to i16
ret i16 %t1
}
-declare <2 x i1> @llvm.x86.avx512.mask.cmp.pd.128(<2 x double>, <2 x double>, i32)
-declare <4 x i1> @llvm.x86.avx512.mask.cmp.pd.256(<4 x double>, <4 x double>, i32)
-declare <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double>, <8 x double>, i32, i32)
-declare <4 x i1> @llvm.x86.avx512.mask.cmp.ps.128(<4 x float>, <4 x float>, i32)
-declare <8 x i1> @llvm.x86.avx512.mask.cmp.ps.256(<8 x float>, <8 x float>, i32)
-declare <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float>, <16 x float>, i32, i32)
+declare <2 x i1> @llvm.x86.avx512.cmp.pd.128(<2 x double>, <2 x double>, i32)
+declare <4 x i1> @llvm.x86.avx512.cmp.pd.256(<4 x double>, <4 x double>, i32)
+declare <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double>, <8 x double>, i32, i32)
+declare <4 x i1> @llvm.x86.avx512.cmp.ps.128(<4 x float>, <4 x float>, i32)
+declare <8 x i1> @llvm.x86.avx512.cmp.ps.256(<8 x float>, <8 x float>, i32)
+declare <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float>, <16 x float>, i32, i32)