// AVX-512 - BLEND using mask
//
multiclass avx512_blendmask<bits<8> opc, string OpcodeStr, X86VectorVTInfo _> {
- let ExeDomain = _.ExeDomain in {
- let hasSideEffects = 0 in
+ let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
def rr : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2),
!strconcat(OpcodeStr,
(ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
!strconcat(OpcodeStr,
"\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
- [(set _.RC:$dst, (vselect _.KRCWM:$mask,
- (_.VT _.RC:$src2),
- (_.VT _.RC:$src1)))]>, EVEX_4V, EVEX_K;
- let hasSideEffects = 0 in
+ []>, EVEX_4V, EVEX_K;
def rrkz : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
(ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
!strconcat(OpcodeStr,
"\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
[]>, EVEX_4V, EVEX_KZ;
- let mayLoad = 1, hasSideEffects = 0 in
+ let mayLoad = 1 in {
def rm : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
(ins _.RC:$src1, _.MemOp:$src2),
!strconcat(OpcodeStr,
(ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
!strconcat(OpcodeStr,
"\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
- [(set _.RC:$dst, (vselect _.KRCWM:$mask,
- (_.VT (bitconvert (_.LdFrag addr:$src2))),
- (_.VT _.RC:$src1)))]>,
- EVEX_4V, EVEX_K, EVEX_CD8<_.EltSize, CD8VF>;
- let mayLoad = 1, hasSideEffects = 0 in
+ []>, EVEX_4V, EVEX_K, EVEX_CD8<_.EltSize, CD8VF>;
def rmkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
(ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
!strconcat(OpcodeStr,
"\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
[]>, EVEX_4V, EVEX_KZ, EVEX_CD8<_.EltSize, CD8VF>;
}
+ }
}
multiclass avx512_blendmask_rmb<bits<8> opc, string OpcodeStr, X86VectorVTInfo _> {
; CHECK-NEXT: vpsrad $1, %zmm2, %zmm2
; CHECK-NEXT: movw $-21846, %ax ## imm = 0xAAAA
; CHECK-NEXT: kmovw %eax, %k1
-; CHECK-NEXT: vpblendmd {{.*}}(%rip), %zmm1, %zmm1 {%k1}
+; CHECK-NEXT: vmovdqa32 {{.*}}(%rip), %zmm1 {%k1}
; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0
; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0
; CHECK-NEXT: vcvtdq2ps %zmm0, %zmm0
; CHECK: ## BB#0:
; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0xef,0xd2]
; CHECK-NEXT: vpcmpneqd %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf3,0x75,0x48,0x1f,0xca,0x04]
-; CHECK-NEXT: vpblendmd (%rdi), %zmm0, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x64,0x07]
+; CHECK-NEXT: vmovdqa32 (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x6f,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%mask = icmp ne <16 x i32> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <16 x i32>*
; CHECK: ## BB#0:
; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0xef,0xd2]
; CHECK-NEXT: vpcmpneqd %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf3,0x75,0x48,0x1f,0xca,0x04]
-; CHECK-NEXT: vpblendmd (%rdi), %zmm0, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x64,0x07]
+; CHECK-NEXT: vmovdqu32 (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf1,0x7e,0x49,0x6f,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%mask = icmp ne <16 x i32> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <16 x i32>*
; CHECK: ## BB#0:
; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0xef,0xd2]
; CHECK-NEXT: vpcmpneqq %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x48,0x1f,0xca,0x04]
-; CHECK-NEXT: vpblendmq (%rdi), %zmm0, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x64,0x07]
+; CHECK-NEXT: vmovdqa64 (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x6f,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%mask = icmp ne <8 x i64> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <8 x i64>*
; CHECK: ## BB#0:
; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0xef,0xd2]
; CHECK-NEXT: vpcmpneqq %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x48,0x1f,0xca,0x04]
-; CHECK-NEXT: vpblendmq (%rdi), %zmm0, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x64,0x07]
+; CHECK-NEXT: vmovdqu64 (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf1,0xfe,0x49,0x6f,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%mask = icmp ne <8 x i64> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <8 x i64>*
; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0xef,0xd2]
; CHECK-NEXT: vcmpordps %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf1,0x74,0x48,0xc2,0xca,0x07]
; CHECK-NEXT: vcmpneqps %zmm2, %zmm1, %k1 {%k1} ## encoding: [0x62,0xf1,0x74,0x49,0xc2,0xca,0x04]
-; CHECK-NEXT: vblendmps (%rdi), %zmm0, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x65,0x07]
+; CHECK-NEXT: vmovaps (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x28,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%mask = fcmp one <16 x float> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <16 x float>*
; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0xef,0xd2]
; CHECK-NEXT: vcmpordps %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf1,0x74,0x48,0xc2,0xca,0x07]
; CHECK-NEXT: vcmpneqps %zmm2, %zmm1, %k1 {%k1} ## encoding: [0x62,0xf1,0x74,0x49,0xc2,0xca,0x04]
-; CHECK-NEXT: vblendmps (%rdi), %zmm0, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x65,0x07]
+; CHECK-NEXT: vmovups (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x10,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%mask = fcmp one <16 x float> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <16 x float>*
; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0xef,0xd2]
; CHECK-NEXT: vcmpordpd %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf1,0xf5,0x48,0xc2,0xca,0x07]
; CHECK-NEXT: vcmpneqpd %zmm2, %zmm1, %k1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0xc2,0xca,0x04]
-; CHECK-NEXT: vblendmpd (%rdi), %zmm0, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x65,0x07]
+; CHECK-NEXT: vmovapd (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x28,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%mask = fcmp one <8 x double> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <8 x double>*
; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0xef,0xd2]
; CHECK-NEXT: vcmpordpd %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf1,0xf5,0x48,0xc2,0xca,0x07]
; CHECK-NEXT: vcmpneqpd %zmm2, %zmm1, %k1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0xc2,0xca,0x04]
-; CHECK-NEXT: vblendmpd (%rdi), %zmm0, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x65,0x07]
+; CHECK-NEXT: vmovupd (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x10,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%mask = fcmp one <8 x double> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <8 x double>*
}
; X32-LABEL: test_argRet128Vector:
-; X32: vpblend{{.*}} %xmm0, %xmm1, %xmm0
+; X32: vmovdqa{{.*}} %xmm0, %xmm1
+; X32: vmovdqa{{.*}} %xmm1, %xmm0
; X32: ret{{.*}}
; WIN64-LABEL: test_argRet128Vector:
-; WIN64: vpblend{{.*}} %xmm0, %xmm1, %xmm0
+; WIN64: vmovdqa{{.*}} %xmm0, %xmm1
+; WIN64: vmovdqa{{.*}} %xmm1, %xmm0
; WIN64: ret{{.*}}
; Test regcall when receiving/returning 128 bit vector
; X32-LABEL: test_CallargRet128Vector:
; X32: vmov{{.*}} %xmm0, {{%xmm([0-7])}}
; X32: call{{.*}} {{.*}}test_argRet128Vector
-; X32: vpblend{{.*}} {{%xmm([0-7])}}, %xmm0, %xmm0
+; X32: vmovdqa{{.*}} {{%xmm([0-7])}}, %xmm0
; X32: ret{{.*}}
; WIN64-LABEL: test_CallargRet128Vector:
; WIN64: vmov{{.*}} %xmm0, {{%xmm([0-9]+)}}
; WIN64: call{{.*}} {{.*}}test_argRet128Vector
-; WIN64: vpblend{{.*}} {{%xmm([0-9]+)}}, %xmm0, %xmm0
+; WIN64: vmovdqa{{.*}} {{%xmm([0-9]+)}}, %xmm0
; WIN64: ret{{.*}}
; Test regcall when passing/retrieving 128 bit vector
}
; X32-LABEL: test_argRet256Vector:
-; X32: vpblend{{.*}} %ymm0, %ymm1, %ymm0
+; X32: vmovdqa{{.*}} %ymm0, %ymm1
+; X32: vmovdqa{{.*}} %ymm1, %ymm0
; X32: ret{{.*}}
; WIN64-LABEL: test_argRet256Vector:
-; WIN64: vpblend{{.*}} %ymm0, %ymm1, %ymm0
+; WIN64: vmovdqa{{.*}} %ymm0, %ymm1
+; WIN64: vmovdqa{{.*}} %ymm1, %ymm0
; WIN64: ret{{.*}}
; Test regcall when receiving/returning 256 bit vector
; X32-LABEL: test_CallargRet256Vector:
; X32: vmov{{.*}} %ymm0, %ymm1
; X32: call{{.*}} {{.*}}test_argRet256Vector
-; X32: vpblend{{.*}} %ymm1, %ymm0, %ymm0
+; X32: vmovdqa{{.*}} %ymm1, %ymm0
; X32: ret{{.*}}
; WIN64-LABEL: test_CallargRet256Vector:
; WIN64: vmov{{.*}} %ymm0, %ymm1
; WIN64: call{{.*}} {{.*}}test_argRet256Vector
-; WIN64: vpblend{{.*}} %ymm1, %ymm0, %ymm0
+; WIN64: vmovdqa{{.*}} %ymm1, %ymm0
; WIN64: ret{{.*}}
; Test regcall when passing/retrieving 256 bit vector
}
; X32-LABEL: test_argRet512Vector:
-; X32: vpblend{{.*}} %zmm0, %zmm1, %zmm0
+; X32: vmovdqa{{.*}} %zmm0, %zmm1
+; X32: vmovdqa{{.*}} %zmm1, %zmm0
; X32: ret{{.*}}
; WIN64-LABEL: test_argRet512Vector:
-; WIN64: vpblend{{.*}} %zmm0, %zmm1, %zmm0
+; WIN64: vmovdqa{{.*}} %zmm0, %zmm1
+; WIN64: vmovdqa{{.*}} %zmm1, %zmm0
; WIN64: ret{{.*}}
; Test regcall when receiving/returning 512 bit vector
; X32-LABEL: test_CallargRet512Vector:
; X32: vmov{{.*}} %zmm0, %zmm1
; X32: call{{.*}} {{.*}}test_argRet512Vector
-; X32: vpblend{{.*}} %zmm1, %zmm0, %zmm0
+; X32: movdqa{{.*}} %zmm1, %zmm0
; X32: ret{{.*}}
; WIN64-LABEL: test_CallargRet512Vector:
; WIN64: vmov{{.*}} %zmm0, %zmm1
; WIN64: call{{.*}} {{.*}}test_argRet512Vector
-; WIN64: vpblend{{.*}} %zmm1, %zmm0, %zmm0
+; WIN64: vmovdqa{{.*}} %zmm1, %zmm0
; WIN64: ret{{.*}}
; Test regcall when passing/retrieving 512 bit vector
; CHECK-LABEL: test1:
; CHECK: ## BB#0:
; CHECK-NEXT: vcmpleps %zmm1, %zmm0, %k1
-; CHECK-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1}
+; CHECK-NEXT: vmovaps %zmm0, %zmm1 {%k1}
+; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
%mask = fcmp ole <16 x float> %x, %y
%max = select <16 x i1> %mask, <16 x float> %x, <16 x float> %y
; CHECK-LABEL: test2:
; CHECK: ## BB#0:
; CHECK-NEXT: vcmplepd %zmm1, %zmm0, %k1
-; CHECK-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1}
+; CHECK-NEXT: vmovapd %zmm0, %zmm1 {%k1}
+; CHECK-NEXT: vmovapd %zmm1, %zmm0
; CHECK-NEXT: retq
%mask = fcmp ole <8 x double> %x, %y
%max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %y
; CHECK-LABEL: test3:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpeqd (%rdi), %zmm0, %k1
-; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
+; CHECK-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
+; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
%y = load <16 x i32>, <16 x i32>* %yp, align 4
%mask = icmp eq <16 x i32> %x, %y
; CHECK-LABEL: test4_unsigned:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpnltud %zmm1, %zmm0, %k1
-; CHECK-NEXT: vpblendmd %zmm2, %zmm1, %zmm0 {%k1}
+; CHECK-NEXT: vmovdqa32 %zmm2, %zmm1 {%k1}
+; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
%mask = icmp uge <16 x i32> %x, %y
%max = select <16 x i1> %mask, <16 x i32> %x1, <16 x i32> %y
; CHECK-LABEL: test5:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k1
-; CHECK-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
+; CHECK-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1}
+; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
%mask = icmp eq <8 x i64> %x, %y
%max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %y
; CHECK-LABEL: test6_unsigned:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpnleuq %zmm1, %zmm0, %k1
-; CHECK-NEXT: vpblendmq %zmm2, %zmm1, %zmm0 {%k1}
+; CHECK-NEXT: vmovdqa64 %zmm2, %zmm1 {%k1}
+; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
%mask = icmp ugt <8 x i64> %x, %y
%max = select <8 x i1> %mask, <8 x i64> %x1, <8 x i64> %y
; SKX: ## BB#0:
; SKX-NEXT: vxorps %xmm2, %xmm2, %xmm2
; SKX-NEXT: vcmpltps %xmm2, %xmm0, %k1
-; SKX-NEXT: vblendmps %xmm0, %xmm1, %xmm0 {%k1}
+; SKX-NEXT: vmovaps %xmm0, %xmm1 {%k1}
+; SKX-NEXT: vmovaps %xmm1, %xmm0
; SKX-NEXT: retq
%mask = fcmp olt <4 x float> %a, zeroinitializer
; SKX: ## BB#0:
; SKX-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; SKX-NEXT: vcmpltpd %xmm2, %xmm0, %k1
-; SKX-NEXT: vblendmpd %xmm0, %xmm1, %xmm0 {%k1}
+; SKX-NEXT: vmovapd %xmm0, %xmm1 {%k1}
+; SKX-NEXT: vmovapd %xmm1, %xmm0
; SKX-NEXT: retq
%mask = fcmp olt <2 x double> %a, zeroinitializer
%c = select <2 x i1>%mask, <2 x double>%a, <2 x double>%b
; SKX-LABEL: test9:
; SKX: ## BB#0:
; SKX-NEXT: vpcmpeqd %ymm1, %ymm0, %k1
-; SKX-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1}
+; SKX-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1}
+; SKX-NEXT: vmovdqa %ymm1, %ymm0
; SKX-NEXT: retq
%mask = icmp eq <8 x i32> %x, %y
%max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %y
; SKX-LABEL: test10:
; SKX: ## BB#0:
; SKX-NEXT: vcmpeqps %ymm1, %ymm0, %k1
-; SKX-NEXT: vblendmps %ymm0, %ymm1, %ymm0 {%k1}
+; SKX-NEXT: vmovaps %ymm0, %ymm1 {%k1}
+; SKX-NEXT: vmovaps %ymm1, %ymm0
; SKX-NEXT: retq
%mask = fcmp oeq <8 x float> %x, %y
; CHECK-LABEL: test16:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpled %zmm0, %zmm1, %k1
-; CHECK-NEXT: vpblendmd %zmm2, %zmm1, %zmm0 {%k1}
+; CHECK-NEXT: vmovdqa32 %zmm2, %zmm1 {%k1}
+; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
%mask = icmp sge <16 x i32> %x, %y
%max = select <16 x i1> %mask, <16 x i32> %x1, <16 x i32> %y
; CHECK-LABEL: test17:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpgtd (%rdi), %zmm0, %k1
-; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
+; CHECK-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
+; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
%y = load <16 x i32>, <16 x i32>* %y.ptr, align 4
%mask = icmp sgt <16 x i32> %x, %y
; CHECK-LABEL: test18:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpled (%rdi), %zmm0, %k1
-; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
+; CHECK-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
+; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
%y = load <16 x i32>, <16 x i32>* %y.ptr, align 4
%mask = icmp sle <16 x i32> %x, %y
; CHECK-LABEL: test19:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpleud (%rdi), %zmm0, %k1
-; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
+; CHECK-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
+; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
%y = load <16 x i32>, <16 x i32>* %y.ptr, align 4
%mask = icmp ule <16 x i32> %x, %y
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k1
; CHECK-NEXT: vpcmpeqd %zmm3, %zmm2, %k1 {%k1}
-; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
+; CHECK-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
+; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
%mask1 = icmp eq <16 x i32> %x1, %y1
%mask0 = icmp eq <16 x i32> %x, %y
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpleq %zmm1, %zmm0, %k1
; CHECK-NEXT: vpcmpleq %zmm2, %zmm3, %k1 {%k1}
-; CHECK-NEXT: vpblendmq %zmm0, %zmm2, %zmm0 {%k1}
+; CHECK-NEXT: vmovdqa64 %zmm0, %zmm2 {%k1}
+; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0
; CHECK-NEXT: retq
%mask1 = icmp sge <8 x i64> %x1, %y1
%mask0 = icmp sle <8 x i64> %x, %y
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpgtq %zmm2, %zmm1, %k1
; CHECK-NEXT: vpcmpgtq (%rdi), %zmm0, %k1 {%k1}
-; CHECK-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
+; CHECK-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1}
+; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
%mask1 = icmp sgt <8 x i64> %x1, %y1
%y = load <8 x i64>, <8 x i64>* %y.ptr, align 4
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpled %zmm1, %zmm2, %k1
; CHECK-NEXT: vpcmpleud (%rdi), %zmm0, %k1 {%k1}
-; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
+; CHECK-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
+; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
%mask1 = icmp sge <16 x i32> %x1, %y1
%y = load <16 x i32>, <16 x i32>* %y.ptr, align 4
; CHECK-LABEL: test24:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k1
-; CHECK-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
+; CHECK-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1}
+; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
%yb = load i64, i64* %yb.ptr, align 4
%y.0 = insertelement <8 x i64> undef, i64 %yb, i32 0
; CHECK-LABEL: test25:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpled (%rdi){1to16}, %zmm0, %k1
-; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
+; CHECK-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
+; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
%yb = load i32, i32* %yb.ptr, align 4
%y.0 = insertelement <16 x i32> undef, i32 %yb, i32 0
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpled %zmm1, %zmm2, %k1
; CHECK-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k1 {%k1}
-; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
+; CHECK-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
+; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
%mask1 = icmp sge <16 x i32> %x1, %y1
%yb = load i32, i32* %yb.ptr, align 4
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpleq %zmm1, %zmm2, %k1
; CHECK-NEXT: vpcmpleq (%rdi){1to8}, %zmm0, %k1 {%k1}
-; CHECK-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
+; CHECK-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1}
+; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
%mask1 = icmp sge <8 x i64> %x1, %y1
%yb = load i64, i64* %yb.ptr, align 4
; SKX-LABEL: test30:
; SKX: ## BB#0:
; SKX-NEXT: vcmpeqpd %ymm1, %ymm0, %k1
-; SKX-NEXT: vblendmpd %ymm0, %ymm1, %ymm0 {%k1}
+; SKX-NEXT: vmovapd %ymm0, %ymm1 {%k1}
+; SKX-NEXT: vmovapd %ymm1, %ymm0
; SKX-NEXT: retq
%mask = fcmp oeq <4 x double> %x, %y
; SKX-LABEL: test31:
; SKX: ## BB#0:
; SKX-NEXT: vcmpltpd (%rdi), %xmm0, %k1
-; SKX-NEXT: vblendmpd %xmm0, %xmm1, %xmm0 {%k1}
+; SKX-NEXT: vmovapd %xmm0, %xmm1 {%k1}
+; SKX-NEXT: vmovapd %xmm1, %xmm0
; SKX-NEXT: retq
%y = load <2 x double>, <2 x double>* %yp, align 4
; SKX-LABEL: test32:
; SKX: ## BB#0:
; SKX-NEXT: vcmpltpd (%rdi), %ymm0, %k1
-; SKX-NEXT: vblendmpd %ymm0, %ymm1, %ymm0 {%k1}
+; SKX-NEXT: vmovapd %ymm0, %ymm1 {%k1}
+; SKX-NEXT: vmovapd %ymm1, %ymm0
; SKX-NEXT: retq
%y = load <4 x double>, <4 x double>* %yp, align 4
; CHECK-LABEL: test33:
; CHECK: ## BB#0:
; CHECK-NEXT: vcmpltpd (%rdi), %zmm0, %k1
-; CHECK-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1}
+; CHECK-NEXT: vmovapd %zmm0, %zmm1 {%k1}
+; CHECK-NEXT: vmovapd %zmm1, %zmm0
; CHECK-NEXT: retq
%y = load <8 x double>, <8 x double>* %yp, align 4
%mask = fcmp olt <8 x double> %x, %y
; SKX-LABEL: test34:
; SKX: ## BB#0:
; SKX-NEXT: vcmpltps (%rdi), %xmm0, %k1
-; SKX-NEXT: vblendmps %xmm0, %xmm1, %xmm0 {%k1}
+; SKX-NEXT: vmovaps %xmm0, %xmm1 {%k1}
+; SKX-NEXT: vmovaps %xmm1, %xmm0
; SKX-NEXT: retq
%y = load <4 x float>, <4 x float>* %yp, align 4
%mask = fcmp olt <4 x float> %x, %y
; SKX-LABEL: test35:
; SKX: ## BB#0:
; SKX-NEXT: vcmpltps (%rdi), %ymm0, %k1
-; SKX-NEXT: vblendmps %ymm0, %ymm1, %ymm0 {%k1}
+; SKX-NEXT: vmovaps %ymm0, %ymm1 {%k1}
+; SKX-NEXT: vmovaps %ymm1, %ymm0
; SKX-NEXT: retq
%y = load <8 x float>, <8 x float>* %yp, align 4
; CHECK-LABEL: test36:
; CHECK: ## BB#0:
; CHECK-NEXT: vcmpltps (%rdi), %zmm0, %k1
-; CHECK-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1}
+; CHECK-NEXT: vmovaps %zmm0, %zmm1 {%k1}
+; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
%y = load <16 x float>, <16 x float>* %yp, align 4
%mask = fcmp olt <16 x float> %x, %y
; CHECK-LABEL: test37:
; CHECK: ## BB#0:
; CHECK-NEXT: vcmpltpd (%rdi){1to8}, %zmm0, %k1
-; CHECK-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1}
+; CHECK-NEXT: vmovapd %zmm0, %zmm1 {%k1}
+; CHECK-NEXT: vmovapd %zmm1, %zmm0
; CHECK-NEXT: retq
%a = load double, double* %ptr
; SKX-LABEL: test38:
; SKX: ## BB#0:
; SKX-NEXT: vcmpltpd (%rdi){1to4}, %ymm0, %k1
-; SKX-NEXT: vblendmpd %ymm0, %ymm1, %ymm0 {%k1}
+; SKX-NEXT: vmovapd %ymm0, %ymm1 {%k1}
+; SKX-NEXT: vmovapd %ymm1, %ymm0
; SKX-NEXT: retq
%a = load double, double* %ptr
; SKX-LABEL: test39:
; SKX: ## BB#0:
; SKX-NEXT: vcmpltpd (%rdi){1to2}, %xmm0, %k1
-; SKX-NEXT: vblendmpd %xmm0, %xmm1, %xmm0 {%k1}
+; SKX-NEXT: vmovapd %xmm0, %xmm1 {%k1}
+; SKX-NEXT: vmovapd %xmm1, %xmm0
; SKX-NEXT: retq
%a = load double, double* %ptr
; CHECK-LABEL: test40:
; CHECK: ## BB#0:
; CHECK-NEXT: vcmpltps (%rdi){1to16}, %zmm0, %k1
-; CHECK-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1}
+; CHECK-NEXT: vmovaps %zmm0, %zmm1 {%k1}
+; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
%a = load float, float* %ptr
; SKX-LABEL: test41:
; SKX: ## BB#0:
; SKX-NEXT: vcmpltps (%rdi){1to8}, %ymm0, %k1
-; SKX-NEXT: vblendmps %ymm0, %ymm1, %ymm0 {%k1}
+; SKX-NEXT: vmovaps %ymm0, %ymm1 {%k1}
+; SKX-NEXT: vmovaps %ymm1, %ymm0
; SKX-NEXT: retq
%a = load float, float* %ptr
; SKX-LABEL: test42:
; SKX: ## BB#0:
; SKX-NEXT: vcmpltps (%rdi){1to4}, %xmm0, %k1
-; SKX-NEXT: vblendmps %xmm0, %xmm1, %xmm0 {%k1}
+; SKX-NEXT: vmovaps %xmm0, %xmm1 {%k1}
+; SKX-NEXT: vmovaps %xmm1, %xmm0
; SKX-NEXT: retq
%a = load float, float* %ptr
; KNL-NEXT: vpsllq $63, %zmm2, %zmm2
; KNL-NEXT: vptestmq %zmm2, %zmm2, %k1
; KNL-NEXT: vcmpltpd (%rdi){1to8}, %zmm0, %k1 {%k1}
-; KNL-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1}
+; KNL-NEXT: vmovapd %zmm0, %zmm1 {%k1}
+; KNL-NEXT: vmovapd %zmm1, %zmm0
; KNL-NEXT: retq
;
; SKX-LABEL: test43:
; SKX-NEXT: vpsllw $15, %xmm2, %xmm2
; SKX-NEXT: vpmovw2m %xmm2, %k1
; SKX-NEXT: vcmpltpd (%rdi){1to8}, %zmm0, %k1 {%k1}
-; SKX-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1}
+; SKX-NEXT: vmovapd %zmm0, %zmm1 {%k1}
+; SKX-NEXT: vmovapd %zmm1, %zmm0
; SKX-NEXT: retq
%a = load double, double* %ptr
; CHECK: ## BB#0:
; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2
; CHECK-NEXT: vpcmpneqb %zmm2, %zmm1, %k1
-; CHECK-NEXT: vpblendmb (%rdi), %zmm0, %zmm0 {%k1}
+; CHECK-NEXT: vmovdqu8 (%rdi), %zmm0 {%k1}
; CHECK-NEXT: retq
%mask = icmp ne <64 x i8> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <64 x i8>*
; CHECK: ## BB#0:
; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2
; CHECK-NEXT: vpcmpneqw %zmm2, %zmm1, %k1
-; CHECK-NEXT: vpblendmw (%rdi), %zmm0, %zmm0 {%k1}
+; CHECK-NEXT: vmovdqu16 (%rdi), %zmm0 {%k1}
; CHECK-NEXT: retq
%mask = icmp ne <32 x i16> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <32 x i16>*
; CHECK-LABEL: test1:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpeqb %zmm1, %zmm0, %k1
-; CHECK-NEXT: vpblendmb %zmm0, %zmm1, %zmm0 {%k1}
+; CHECK-NEXT: vmovdqu8 %zmm0, %zmm1 {%k1}
+; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
%mask = icmp eq <64 x i8> %x, %y
%max = select <64 x i1> %mask, <64 x i8> %x, <64 x i8> %y
; CHECK-LABEL: test2:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpgtb %zmm1, %zmm0, %k1
-; CHECK-NEXT: vpblendmb %zmm2, %zmm1, %zmm0 {%k1}
+; CHECK-NEXT: vmovdqu8 %zmm2, %zmm1 {%k1}
+; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
%mask = icmp sgt <64 x i8> %x, %y
%max = select <64 x i1> %mask, <64 x i8> %x1, <64 x i8> %y
; CHECK-LABEL: test3:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmplew %zmm0, %zmm1, %k1
-; CHECK-NEXT: vpblendmw %zmm2, %zmm1, %zmm0 {%k1}
+; CHECK-NEXT: vmovdqu16 %zmm2, %zmm1 {%k1}
+; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
%mask = icmp sge <32 x i16> %x, %y
%max = select <32 x i1> %mask, <32 x i16> %x1, <32 x i16> %y
; CHECK-LABEL: test4:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpnleub %zmm1, %zmm0, %k1
-; CHECK-NEXT: vpblendmb %zmm2, %zmm1, %zmm0 {%k1}
+; CHECK-NEXT: vmovdqu8 %zmm2, %zmm1 {%k1}
+; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
%mask = icmp ugt <64 x i8> %x, %y
%max = select <64 x i1> %mask, <64 x i8> %x1, <64 x i8> %y
; CHECK-LABEL: test5:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpeqw (%rdi), %zmm0, %k1
-; CHECK-NEXT: vpblendmw %zmm0, %zmm1, %zmm0 {%k1}
+; CHECK-NEXT: vmovdqu16 %zmm0, %zmm1 {%k1}
+; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
%y = load <32 x i16>, <32 x i16>* %yp, align 4
%mask = icmp eq <32 x i16> %x, %y
; CHECK-LABEL: test6:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpgtw (%rdi), %zmm0, %k1
-; CHECK-NEXT: vpblendmw %zmm0, %zmm1, %zmm0 {%k1}
+; CHECK-NEXT: vmovdqu16 %zmm0, %zmm1 {%k1}
+; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
%y = load <32 x i16>, <32 x i16>* %y.ptr, align 4
%mask = icmp sgt <32 x i16> %x, %y
; CHECK-LABEL: test7:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmplew (%rdi), %zmm0, %k1
-; CHECK-NEXT: vpblendmw %zmm0, %zmm1, %zmm0 {%k1}
+; CHECK-NEXT: vmovdqu16 %zmm0, %zmm1 {%k1}
+; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
%y = load <32 x i16>, <32 x i16>* %y.ptr, align 4
%mask = icmp sle <32 x i16> %x, %y
; CHECK-LABEL: test8:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpleuw (%rdi), %zmm0, %k1
-; CHECK-NEXT: vpblendmw %zmm0, %zmm1, %zmm0 {%k1}
+; CHECK-NEXT: vmovdqu16 %zmm0, %zmm1 {%k1}
+; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
%y = load <32 x i16>, <32 x i16>* %y.ptr, align 4
%mask = icmp ule <32 x i16> %x, %y
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpeqw %zmm1, %zmm0, %k1
; CHECK-NEXT: vpcmpeqw %zmm3, %zmm2, %k1 {%k1}
-; CHECK-NEXT: vpblendmw %zmm0, %zmm1, %zmm0 {%k1}
+; CHECK-NEXT: vmovdqu16 %zmm0, %zmm1 {%k1}
+; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
%mask1 = icmp eq <32 x i16> %x1, %y1
%mask0 = icmp eq <32 x i16> %x, %y
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpleb %zmm1, %zmm0, %k1
; CHECK-NEXT: vpcmpleb %zmm2, %zmm3, %k1 {%k1}
-; CHECK-NEXT: vpblendmb %zmm0, %zmm2, %zmm0 {%k1}
+; CHECK-NEXT: vmovdqu8 %zmm0, %zmm2 {%k1}
+; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0
; CHECK-NEXT: retq
%mask1 = icmp sge <64 x i8> %x1, %y1
%mask0 = icmp sle <64 x i8> %x, %y
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpgtb %zmm2, %zmm1, %k1
; CHECK-NEXT: vpcmpgtb (%rdi), %zmm0, %k1 {%k1}
-; CHECK-NEXT: vpblendmb %zmm0, %zmm1, %zmm0 {%k1}
+; CHECK-NEXT: vmovdqu8 %zmm0, %zmm1 {%k1}
+; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
%mask1 = icmp sgt <64 x i8> %x1, %y1
%y = load <64 x i8>, <64 x i8>* %y.ptr, align 4
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmplew %zmm1, %zmm2, %k1
; CHECK-NEXT: vpcmpleuw (%rdi), %zmm0, %k1 {%k1}
-; CHECK-NEXT: vpblendmw %zmm0, %zmm1, %zmm0 {%k1}
+; CHECK-NEXT: vmovdqu16 %zmm0, %zmm1 {%k1}
+; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
%mask1 = icmp sge <32 x i16> %x1, %y1
%y = load <32 x i16>, <32 x i16>* %y.ptr, align 4
; CHECK: ## BB#0:
; CHECK-NEXT: vpxor %ymm2, %ymm2, %ymm2 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xef,0xd2]
; CHECK-NEXT: vpcmpneqb %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf3,0x75,0x28,0x3f,0xca,0x04]
-; CHECK-NEXT: vpblendmb (%rdi), %ymm0, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x66,0x07]
+; CHECK-NEXT: vmovdqu8 (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0x7f,0x29,0x6f,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%mask = icmp ne <32 x i8> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <32 x i8>*
; CHECK: ## BB#0:
; CHECK-NEXT: vpxor %ymm2, %ymm2, %ymm2 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xef,0xd2]
; CHECK-NEXT: vpcmpneqw %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x28,0x3f,0xca,0x04]
-; CHECK-NEXT: vpblendmw (%rdi), %ymm0, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x66,0x07]
+; CHECK-NEXT: vmovdqu16 (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0xff,0x29,0x6f,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%mask = icmp ne <16 x i16> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <16 x i16>*
; CHECK: ## BB#0:
; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2]
; CHECK-NEXT: vpcmpneqb %xmm2, %xmm1, %k1 ## encoding: [0x62,0xf3,0x75,0x08,0x3f,0xca,0x04]
-; CHECK-NEXT: vpblendmb (%rdi), %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x66,0x07]
+; CHECK-NEXT: vmovdqu8 (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0x7f,0x09,0x6f,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%mask = icmp ne <16 x i8> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <16 x i8>*
; CHECK: ## BB#0:
; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2]
; CHECK-NEXT: vpcmpneqw %xmm2, %xmm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x08,0x3f,0xca,0x04]
-; CHECK-NEXT: vpblendmw (%rdi), %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x66,0x07]
+; CHECK-NEXT: vmovdqu16 (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0xff,0x09,0x6f,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%mask = icmp ne <8 x i16> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <8 x i16>*
; CHECK-LABEL: test256_1:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpeqb %ymm1, %ymm0, %k1
-; CHECK-NEXT: vpblendmb %ymm0, %ymm1, %ymm0 {%k1}
+; CHECK-NEXT: vmovdqu8 %ymm0, %ymm1 {%k1}
+; CHECK-NEXT: vmovdqa %ymm1, %ymm0
; CHECK-NEXT: retq
%mask = icmp eq <32 x i8> %x, %y
%max = select <32 x i1> %mask, <32 x i8> %x, <32 x i8> %y
; CHECK-LABEL: test256_2:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpgtb %ymm1, %ymm0, %k1
-; CHECK-NEXT: vpblendmb %ymm0, %ymm2, %ymm0 {%k1}
+; CHECK-NEXT: vmovdqu8 %ymm0, %ymm2 {%k1}
+; CHECK-NEXT: vmovdqa %ymm2, %ymm0
; CHECK-NEXT: retq
%mask = icmp sgt <32 x i8> %x, %y
%max = select <32 x i1> %mask, <32 x i8> %x, <32 x i8> %x1
; CHECK-LABEL: test256_3:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmplew %ymm0, %ymm1, %k1
-; CHECK-NEXT: vpblendmw %ymm2, %ymm1, %ymm0 {%k1}
+; CHECK-NEXT: vmovdqu16 %ymm2, %ymm1 {%k1}
+; CHECK-NEXT: vmovdqa %ymm1, %ymm0
; CHECK-NEXT: retq
%mask = icmp sge <16 x i16> %x, %y
%max = select <16 x i1> %mask, <16 x i16> %x1, <16 x i16> %y
; CHECK-LABEL: test256_4:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpnleub %ymm1, %ymm0, %k1
-; CHECK-NEXT: vpblendmb %ymm0, %ymm2, %ymm0 {%k1}
+; CHECK-NEXT: vmovdqu8 %ymm0, %ymm2 {%k1}
+; CHECK-NEXT: vmovdqa %ymm2, %ymm0
; CHECK-NEXT: retq
%mask = icmp ugt <32 x i8> %x, %y
%max = select <32 x i1> %mask, <32 x i8> %x, <32 x i8> %x1
; CHECK-LABEL: test256_5:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpeqw (%rdi), %ymm0, %k1
-; CHECK-NEXT: vpblendmw %ymm0, %ymm1, %ymm0 {%k1}
+; CHECK-NEXT: vmovdqu16 %ymm0, %ymm1 {%k1}
+; CHECK-NEXT: vmovdqa %ymm1, %ymm0
; CHECK-NEXT: retq
%y = load <16 x i16>, <16 x i16>* %yp, align 4
%mask = icmp eq <16 x i16> %x, %y
; CHECK-LABEL: test256_6:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpgtw (%rdi), %ymm0, %k1
-; CHECK-NEXT: vpblendmw %ymm0, %ymm1, %ymm0 {%k1}
+; CHECK-NEXT: vmovdqu16 %ymm0, %ymm1 {%k1}
+; CHECK-NEXT: vmovdqa %ymm1, %ymm0
; CHECK-NEXT: retq
%y = load <16 x i16>, <16 x i16>* %y.ptr, align 4
%mask = icmp sgt <16 x i16> %x, %y
; CHECK-LABEL: test256_7:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmplew (%rdi), %ymm0, %k1
-; CHECK-NEXT: vpblendmw %ymm0, %ymm1, %ymm0 {%k1}
+; CHECK-NEXT: vmovdqu16 %ymm0, %ymm1 {%k1}
+; CHECK-NEXT: vmovdqa %ymm1, %ymm0
; CHECK-NEXT: retq
%y = load <16 x i16>, <16 x i16>* %y.ptr, align 4
%mask = icmp sle <16 x i16> %x, %y
; CHECK-LABEL: test256_8:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpleuw (%rdi), %ymm0, %k1
-; CHECK-NEXT: vpblendmw %ymm0, %ymm1, %ymm0 {%k1}
+; CHECK-NEXT: vmovdqu16 %ymm0, %ymm1 {%k1}
+; CHECK-NEXT: vmovdqa %ymm1, %ymm0
; CHECK-NEXT: retq
%y = load <16 x i16>, <16 x i16>* %y.ptr, align 4
%mask = icmp ule <16 x i16> %x, %y
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpeqw %ymm1, %ymm0, %k1
; CHECK-NEXT: vpcmpeqw %ymm3, %ymm2, %k1 {%k1}
-; CHECK-NEXT: vpblendmw %ymm0, %ymm1, %ymm0 {%k1}
+; CHECK-NEXT: vmovdqu16 %ymm0, %ymm1 {%k1}
+; CHECK-NEXT: vmovdqa %ymm1, %ymm0
; CHECK-NEXT: retq
%mask1 = icmp eq <16 x i16> %x1, %y1
%mask0 = icmp eq <16 x i16> %x, %y
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpleb %ymm1, %ymm0, %k1
; CHECK-NEXT: vpcmpleb %ymm2, %ymm3, %k1 {%k1}
-; CHECK-NEXT: vpblendmb %ymm0, %ymm2, %ymm0 {%k1}
+; CHECK-NEXT: vmovdqu8 %ymm0, %ymm2 {%k1}
+; CHECK-NEXT: vmovdqa %ymm2, %ymm0
; CHECK-NEXT: retq
%mask1 = icmp sge <32 x i8> %x1, %y1
%mask0 = icmp sle <32 x i8> %x, %y
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpgtb %ymm2, %ymm1, %k1
; CHECK-NEXT: vpcmpgtb (%rdi), %ymm0, %k1 {%k1}
-; CHECK-NEXT: vpblendmb %ymm0, %ymm1, %ymm0 {%k1}
+; CHECK-NEXT: vmovdqu8 %ymm0, %ymm1 {%k1}
+; CHECK-NEXT: vmovdqa %ymm1, %ymm0
; CHECK-NEXT: retq
%mask1 = icmp sgt <32 x i8> %x1, %y1
%y = load <32 x i8>, <32 x i8>* %y.ptr, align 4
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmplew %ymm1, %ymm2, %k1
; CHECK-NEXT: vpcmpleuw (%rdi), %ymm0, %k1 {%k1}
-; CHECK-NEXT: vpblendmw %ymm0, %ymm1, %ymm0 {%k1}
+; CHECK-NEXT: vmovdqu16 %ymm0, %ymm1 {%k1}
+; CHECK-NEXT: vmovdqa %ymm1, %ymm0
; CHECK-NEXT: retq
%mask1 = icmp sge <16 x i16> %x1, %y1
%y = load <16 x i16>, <16 x i16>* %y.ptr, align 4
; CHECK-LABEL: test128_1:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpeqb %xmm1, %xmm0, %k1
-; CHECK-NEXT: vpblendmb %xmm0, %xmm1, %xmm0 {%k1}
+; CHECK-NEXT: vmovdqu8 %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vmovdqa %xmm1, %xmm0
; CHECK-NEXT: retq
%mask = icmp eq <16 x i8> %x, %y
%max = select <16 x i1> %mask, <16 x i8> %x, <16 x i8> %y
; CHECK-LABEL: test128_2:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpgtb %xmm1, %xmm0, %k1
-; CHECK-NEXT: vpblendmb %xmm0, %xmm2, %xmm0 {%k1}
+; CHECK-NEXT: vmovdqu8 %xmm0, %xmm2 {%k1}
+; CHECK-NEXT: vmovdqa %xmm2, %xmm0
; CHECK-NEXT: retq
%mask = icmp sgt <16 x i8> %x, %y
%max = select <16 x i1> %mask, <16 x i8> %x, <16 x i8> %x1
; CHECK-LABEL: test128_3:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmplew %xmm0, %xmm1, %k1
-; CHECK-NEXT: vpblendmw %xmm2, %xmm1, %xmm0 {%k1}
+; CHECK-NEXT: vmovdqu16 %xmm2, %xmm1 {%k1}
+; CHECK-NEXT: vmovdqa %xmm1, %xmm0
; CHECK-NEXT: retq
%mask = icmp sge <8 x i16> %x, %y
%max = select <8 x i1> %mask, <8 x i16> %x1, <8 x i16> %y
; CHECK-LABEL: test128_4:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpnleub %xmm1, %xmm0, %k1
-; CHECK-NEXT: vpblendmb %xmm0, %xmm2, %xmm0 {%k1}
+; CHECK-NEXT: vmovdqu8 %xmm0, %xmm2 {%k1}
+; CHECK-NEXT: vmovdqa %xmm2, %xmm0
; CHECK-NEXT: retq
%mask = icmp ugt <16 x i8> %x, %y
%max = select <16 x i1> %mask, <16 x i8> %x, <16 x i8> %x1
; CHECK-LABEL: test128_5:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpeqw (%rdi), %xmm0, %k1
-; CHECK-NEXT: vpblendmw %xmm0, %xmm1, %xmm0 {%k1}
+; CHECK-NEXT: vmovdqu16 %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vmovdqa %xmm1, %xmm0
; CHECK-NEXT: retq
%y = load <8 x i16>, <8 x i16>* %yp, align 4
%mask = icmp eq <8 x i16> %x, %y
; CHECK-LABEL: test128_6:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpgtw (%rdi), %xmm0, %k1
-; CHECK-NEXT: vpblendmw %xmm0, %xmm1, %xmm0 {%k1}
+; CHECK-NEXT: vmovdqu16 %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vmovdqa %xmm1, %xmm0
; CHECK-NEXT: retq
%y = load <8 x i16>, <8 x i16>* %y.ptr, align 4
%mask = icmp sgt <8 x i16> %x, %y
; CHECK-LABEL: test128_7:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmplew (%rdi), %xmm0, %k1
-; CHECK-NEXT: vpblendmw %xmm0, %xmm1, %xmm0 {%k1}
+; CHECK-NEXT: vmovdqu16 %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vmovdqa %xmm1, %xmm0
; CHECK-NEXT: retq
%y = load <8 x i16>, <8 x i16>* %y.ptr, align 4
%mask = icmp sle <8 x i16> %x, %y
; CHECK-LABEL: test128_8:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpleuw (%rdi), %xmm0, %k1
-; CHECK-NEXT: vpblendmw %xmm0, %xmm1, %xmm0 {%k1}
+; CHECK-NEXT: vmovdqu16 %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vmovdqa %xmm1, %xmm0
; CHECK-NEXT: retq
%y = load <8 x i16>, <8 x i16>* %y.ptr, align 4
%mask = icmp ule <8 x i16> %x, %y
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpeqw %xmm1, %xmm0, %k1
; CHECK-NEXT: vpcmpeqw %xmm3, %xmm2, %k1 {%k1}
-; CHECK-NEXT: vpblendmw %xmm0, %xmm1, %xmm0 {%k1}
+; CHECK-NEXT: vmovdqu16 %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vmovdqa %xmm1, %xmm0
; CHECK-NEXT: retq
%mask1 = icmp eq <8 x i16> %x1, %y1
%mask0 = icmp eq <8 x i16> %x, %y
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpleb %xmm1, %xmm0, %k1
; CHECK-NEXT: vpcmpleb %xmm2, %xmm3, %k1 {%k1}
-; CHECK-NEXT: vpblendmb %xmm0, %xmm2, %xmm0 {%k1}
+; CHECK-NEXT: vmovdqu8 %xmm0, %xmm2 {%k1}
+; CHECK-NEXT: vmovdqa %xmm2, %xmm0
; CHECK-NEXT: retq
%mask1 = icmp sge <16 x i8> %x1, %y1
%mask0 = icmp sle <16 x i8> %x, %y
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpgtb %xmm2, %xmm1, %k1
; CHECK-NEXT: vpcmpgtb (%rdi), %xmm0, %k1 {%k1}
-; CHECK-NEXT: vpblendmb %xmm0, %xmm1, %xmm0 {%k1}
+; CHECK-NEXT: vmovdqu8 %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vmovdqa %xmm1, %xmm0
; CHECK-NEXT: retq
%mask1 = icmp sgt <16 x i8> %x1, %y1
%y = load <16 x i8>, <16 x i8>* %y.ptr, align 4
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmplew %xmm1, %xmm2, %k1
; CHECK-NEXT: vpcmpleuw (%rdi), %xmm0, %k1 {%k1}
-; CHECK-NEXT: vpblendmw %xmm0, %xmm1, %xmm0 {%k1}
+; CHECK-NEXT: vmovdqu16 %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vmovdqa %xmm1, %xmm0
; CHECK-NEXT: retq
%mask1 = icmp sge <8 x i16> %x1, %y1
%y = load <8 x i16>, <8 x i16>* %y.ptr, align 4
; CHECK: ## BB#0:
; CHECK-NEXT: vpxor %ymm2, %ymm2, %ymm2 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xef,0xd2]
; CHECK-NEXT: vpcmpneqd %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf3,0x75,0x28,0x1f,0xca,0x04]
-; CHECK-NEXT: vpblendmd (%rdi), %ymm0, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x64,0x07]
+; CHECK-NEXT: vmovdqa32 (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x6f,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%mask = icmp ne <8 x i32> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <8 x i32>*
; CHECK: ## BB#0:
; CHECK-NEXT: vpxor %ymm2, %ymm2, %ymm2 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xef,0xd2]
; CHECK-NEXT: vpcmpneqd %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf3,0x75,0x28,0x1f,0xca,0x04]
-; CHECK-NEXT: vpblendmd (%rdi), %ymm0, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x64,0x07]
+; CHECK-NEXT: vmovdqu32 (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0x7e,0x29,0x6f,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%mask = icmp ne <8 x i32> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <8 x i32>*
; CHECK: ## BB#0:
; CHECK-NEXT: vpxor %ymm2, %ymm2, %ymm2 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xef,0xd2]
; CHECK-NEXT: vpcmpneqq %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x28,0x1f,0xca,0x04]
-; CHECK-NEXT: vpblendmq (%rdi), %ymm0, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x64,0x07]
+; CHECK-NEXT: vmovdqa64 (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x6f,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%mask = icmp ne <4 x i64> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <4 x i64>*
; CHECK: ## BB#0:
; CHECK-NEXT: vpxor %ymm2, %ymm2, %ymm2 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xef,0xd2]
; CHECK-NEXT: vpcmpneqq %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x28,0x1f,0xca,0x04]
-; CHECK-NEXT: vpblendmq (%rdi), %ymm0, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x64,0x07]
+; CHECK-NEXT: vmovdqu64 (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0xfe,0x29,0x6f,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%mask = icmp ne <4 x i64> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <4 x i64>*
; CHECK-NEXT: vpxor %ymm2, %ymm2, %ymm2 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xef,0xd2]
; CHECK-NEXT: vcmpordps %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf1,0x74,0x28,0xc2,0xca,0x07]
; CHECK-NEXT: vcmpneqps %ymm2, %ymm1, %k1 {%k1} ## encoding: [0x62,0xf1,0x74,0x29,0xc2,0xca,0x04]
-; CHECK-NEXT: vblendmps (%rdi), %ymm0, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x65,0x07]
+; CHECK-NEXT: vmovaps (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x28,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%mask = fcmp one <8 x float> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <8 x float>*
; CHECK-NEXT: vpxor %ymm2, %ymm2, %ymm2 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xef,0xd2]
; CHECK-NEXT: vcmpordps %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf1,0x74,0x28,0xc2,0xca,0x07]
; CHECK-NEXT: vcmpneqps %ymm2, %ymm1, %k1 {%k1} ## encoding: [0x62,0xf1,0x74,0x29,0xc2,0xca,0x04]
-; CHECK-NEXT: vblendmps (%rdi), %ymm0, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x65,0x07]
+; CHECK-NEXT: vmovups (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x10,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%mask = fcmp one <8 x float> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <8 x float>*
; CHECK: ## BB#0:
; CHECK-NEXT: vpxor %ymm2, %ymm2, %ymm2 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xef,0xd2]
; CHECK-NEXT: vpcmpneqq %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x28,0x1f,0xca,0x04]
-; CHECK-NEXT: vblendmpd (%rdi), %ymm0, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x65,0x07]
+; CHECK-NEXT: vmovapd (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x28,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%mask = icmp ne <4 x i64> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <4 x double>*
; CHECK: ## BB#0:
; CHECK-NEXT: vpxor %ymm2, %ymm2, %ymm2 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xef,0xd2]
; CHECK-NEXT: vpcmpneqq %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x28,0x1f,0xca,0x04]
-; CHECK-NEXT: vblendmpd (%rdi), %ymm0, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x65,0x07]
+; CHECK-NEXT: vmovupd (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x10,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%mask = icmp ne <4 x i64> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <4 x double>*
; CHECK: ## BB#0:
; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2]
; CHECK-NEXT: vpcmpneqd %xmm2, %xmm1, %k1 ## encoding: [0x62,0xf3,0x75,0x08,0x1f,0xca,0x04]
-; CHECK-NEXT: vpblendmd (%rdi), %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x64,0x07]
+; CHECK-NEXT: vmovdqa32 (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x6f,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%mask = icmp ne <4 x i32> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <4 x i32>*
; CHECK: ## BB#0:
; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2]
; CHECK-NEXT: vpcmpneqd %xmm2, %xmm1, %k1 ## encoding: [0x62,0xf3,0x75,0x08,0x1f,0xca,0x04]
-; CHECK-NEXT: vpblendmd (%rdi), %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x64,0x07]
+; CHECK-NEXT: vmovdqu32 (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0x7e,0x09,0x6f,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%mask = icmp ne <4 x i32> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <4 x i32>*
; CHECK: ## BB#0:
; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2]
; CHECK-NEXT: vpcmpneqq %xmm2, %xmm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x08,0x1f,0xca,0x04]
-; CHECK-NEXT: vpblendmq (%rdi), %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x64,0x07]
+; CHECK-NEXT: vmovdqa64 (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x6f,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%mask = icmp ne <2 x i64> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <2 x i64>*
; CHECK: ## BB#0:
; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2]
; CHECK-NEXT: vpcmpneqq %xmm2, %xmm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x08,0x1f,0xca,0x04]
-; CHECK-NEXT: vpblendmq (%rdi), %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x64,0x07]
+; CHECK-NEXT: vmovdqu64 (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0xfe,0x09,0x6f,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%mask = icmp ne <2 x i64> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <2 x i64>*
; CHECK: ## BB#0:
; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2]
; CHECK-NEXT: vpcmpneqd %xmm2, %xmm1, %k1 ## encoding: [0x62,0xf3,0x75,0x08,0x1f,0xca,0x04]
-; CHECK-NEXT: vblendmps (%rdi), %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x65,0x07]
+; CHECK-NEXT: vmovaps (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x28,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%mask = icmp ne <4 x i32> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <4 x float>*
; CHECK: ## BB#0:
; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2]
; CHECK-NEXT: vpcmpneqd %xmm2, %xmm1, %k1 ## encoding: [0x62,0xf3,0x75,0x08,0x1f,0xca,0x04]
-; CHECK-NEXT: vblendmps (%rdi), %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x65,0x07]
+; CHECK-NEXT: vmovups (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x10,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%mask = icmp ne <4 x i32> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <4 x float>*
; CHECK: ## BB#0:
; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2]
; CHECK-NEXT: vpcmpneqq %xmm2, %xmm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x08,0x1f,0xca,0x04]
-; CHECK-NEXT: vblendmpd (%rdi), %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x65,0x07]
+; CHECK-NEXT: vmovapd (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x28,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%mask = icmp ne <2 x i64> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <2 x double>*
; CHECK: ## BB#0:
; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2]
; CHECK-NEXT: vpcmpneqq %xmm2, %xmm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x08,0x1f,0xca,0x04]
-; CHECK-NEXT: vblendmpd (%rdi), %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x65,0x07]
+; CHECK-NEXT: vmovupd (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x10,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%mask = icmp ne <2 x i64> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <2 x double>*
; CHECK-LABEL: test256_1:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpeqq %ymm1, %ymm0, %k1
-; CHECK-NEXT: vpblendmq %ymm0, %ymm1, %ymm0 {%k1}
+; CHECK-NEXT: vmovdqa64 %ymm0, %ymm1 {%k1}
+; CHECK-NEXT: vmovdqa %ymm1, %ymm0
; CHECK-NEXT: retq
%mask = icmp eq <4 x i64> %x, %y
%max = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> %y
; CHECK-LABEL: test256_2:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpgtq %ymm1, %ymm0, %k1
-; CHECK-NEXT: vpblendmq %ymm2, %ymm1, %ymm0 {%k1}
+; CHECK-NEXT: vmovdqa64 %ymm2, %ymm1 {%k1}
+; CHECK-NEXT: vmovdqa %ymm1, %ymm0
; CHECK-NEXT: retq
%mask = icmp sgt <4 x i64> %x, %y
%max = select <4 x i1> %mask, <4 x i64> %x1, <4 x i64> %y
; CHECK-LABEL: test256_3:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpled %ymm0, %ymm1, %k1
-; CHECK-NEXT: vpblendmd %ymm2, %ymm1, %ymm0 {%k1}
+; CHECK-NEXT: vmovdqa32 %ymm2, %ymm1 {%k1}
+; CHECK-NEXT: vmovdqa %ymm1, %ymm0
; CHECK-NEXT: retq
%mask = icmp sge <8 x i32> %x, %y
%max = select <8 x i1> %mask, <8 x i32> %x1, <8 x i32> %y
; CHECK-LABEL: test256_4:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpnleuq %ymm1, %ymm0, %k1
-; CHECK-NEXT: vpblendmq %ymm2, %ymm1, %ymm0 {%k1}
+; CHECK-NEXT: vmovdqa64 %ymm2, %ymm1 {%k1}
+; CHECK-NEXT: vmovdqa %ymm1, %ymm0
; CHECK-NEXT: retq
%mask = icmp ugt <4 x i64> %x, %y
%max = select <4 x i1> %mask, <4 x i64> %x1, <4 x i64> %y
; CHECK-LABEL: test256_5:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpeqd (%rdi), %ymm0, %k1
-; CHECK-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1}
+; CHECK-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1}
+; CHECK-NEXT: vmovdqa %ymm1, %ymm0
; CHECK-NEXT: retq
%y = load <8 x i32>, <8 x i32>* %yp, align 4
%mask = icmp eq <8 x i32> %x, %y
; CHECK-LABEL: test256_5b:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpeqd (%rdi), %ymm0, %k1
-; CHECK-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1}
+; CHECK-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1}
+; CHECK-NEXT: vmovdqa %ymm1, %ymm0
; CHECK-NEXT: retq
%y = load <8 x i32>, <8 x i32>* %yp, align 4
%mask = icmp eq <8 x i32> %y, %x
; CHECK-LABEL: test256_6:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpgtd (%rdi), %ymm0, %k1
-; CHECK-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1}
+; CHECK-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1}
+; CHECK-NEXT: vmovdqa %ymm1, %ymm0
; CHECK-NEXT: retq
%y = load <8 x i32>, <8 x i32>* %y.ptr, align 4
%mask = icmp sgt <8 x i32> %x, %y
; CHECK-LABEL: test256_6b:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpgtd (%rdi), %ymm0, %k1
-; CHECK-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1}
+; CHECK-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1}
+; CHECK-NEXT: vmovdqa %ymm1, %ymm0
; CHECK-NEXT: retq
%y = load <8 x i32>, <8 x i32>* %y.ptr, align 4
%mask = icmp slt <8 x i32> %y, %x
; CHECK-LABEL: test256_7:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpled (%rdi), %ymm0, %k1
-; CHECK-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1}
+; CHECK-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1}
+; CHECK-NEXT: vmovdqa %ymm1, %ymm0
; CHECK-NEXT: retq
%y = load <8 x i32>, <8 x i32>* %y.ptr, align 4
%mask = icmp sle <8 x i32> %x, %y
; CHECK-LABEL: test256_7b:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpled (%rdi), %ymm0, %k1
-; CHECK-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1}
+; CHECK-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1}
+; CHECK-NEXT: vmovdqa %ymm1, %ymm0
; CHECK-NEXT: retq
%y = load <8 x i32>, <8 x i32>* %y.ptr, align 4
%mask = icmp sge <8 x i32> %y, %x
; CHECK-LABEL: test256_8:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpleud (%rdi), %ymm0, %k1
-; CHECK-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1}
+; CHECK-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1}
+; CHECK-NEXT: vmovdqa %ymm1, %ymm0
; CHECK-NEXT: retq
%y = load <8 x i32>, <8 x i32>* %y.ptr, align 4
%mask = icmp ule <8 x i32> %x, %y
; CHECK-LABEL: test256_8b:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpleud (%rdi), %ymm0, %k1
-; CHECK-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1}
+; CHECK-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1}
+; CHECK-NEXT: vmovdqa %ymm1, %ymm0
; CHECK-NEXT: retq
%y = load <8 x i32>, <8 x i32>* %y.ptr, align 4
%mask = icmp uge <8 x i32> %y, %x
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpeqd %ymm1, %ymm0, %k1
; CHECK-NEXT: vpcmpeqd %ymm3, %ymm2, %k1 {%k1}
-; CHECK-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1}
+; CHECK-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1}
+; CHECK-NEXT: vmovdqa %ymm1, %ymm0
; CHECK-NEXT: retq
%mask1 = icmp eq <8 x i32> %x1, %y1
%mask0 = icmp eq <8 x i32> %x, %y
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpleq %ymm1, %ymm0, %k1
; CHECK-NEXT: vpcmpleq %ymm2, %ymm3, %k1 {%k1}
-; CHECK-NEXT: vpblendmq %ymm0, %ymm2, %ymm0 {%k1}
+; CHECK-NEXT: vmovdqa64 %ymm0, %ymm2 {%k1}
+; CHECK-NEXT: vmovdqa %ymm2, %ymm0
; CHECK-NEXT: retq
%mask1 = icmp sge <4 x i64> %x1, %y1
%mask0 = icmp sle <4 x i64> %x, %y
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpgtq %ymm2, %ymm1, %k1
; CHECK-NEXT: vpcmpgtq (%rdi), %ymm0, %k1 {%k1}
-; CHECK-NEXT: vpblendmq %ymm0, %ymm1, %ymm0 {%k1}
+; CHECK-NEXT: vmovdqa64 %ymm0, %ymm1 {%k1}
+; CHECK-NEXT: vmovdqa %ymm1, %ymm0
; CHECK-NEXT: retq
%mask1 = icmp sgt <4 x i64> %x1, %y1
%y = load <4 x i64>, <4 x i64>* %y.ptr, align 4
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpled %ymm1, %ymm2, %k1
; CHECK-NEXT: vpcmpleud (%rdi), %ymm0, %k1 {%k1}
-; CHECK-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1}
+; CHECK-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1}
+; CHECK-NEXT: vmovdqa %ymm1, %ymm0
; CHECK-NEXT: retq
%mask1 = icmp sge <8 x i32> %x1, %y1
%y = load <8 x i32>, <8 x i32>* %y.ptr, align 4
; CHECK-LABEL: test256_13:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpeqq (%rdi){1to4}, %ymm0, %k1
-; CHECK-NEXT: vpblendmq %ymm0, %ymm1, %ymm0 {%k1}
+; CHECK-NEXT: vmovdqa64 %ymm0, %ymm1 {%k1}
+; CHECK-NEXT: vmovdqa %ymm1, %ymm0
; CHECK-NEXT: retq
%yb = load i64, i64* %yb.ptr, align 4
%y.0 = insertelement <4 x i64> undef, i64 %yb, i32 0
; CHECK-LABEL: test256_14:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpled (%rdi){1to8}, %ymm0, %k1
-; CHECK-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1}
+; CHECK-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1}
+; CHECK-NEXT: vmovdqa %ymm1, %ymm0
; CHECK-NEXT: retq
%yb = load i32, i32* %yb.ptr, align 4
%y.0 = insertelement <8 x i32> undef, i32 %yb, i32 0
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpled %ymm1, %ymm2, %k1
; CHECK-NEXT: vpcmpgtd (%rdi){1to8}, %ymm0, %k1 {%k1}
-; CHECK-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1}
+; CHECK-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1}
+; CHECK-NEXT: vmovdqa %ymm1, %ymm0
; CHECK-NEXT: retq
%mask1 = icmp sge <8 x i32> %x1, %y1
%yb = load i32, i32* %yb.ptr, align 4
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpleq %ymm1, %ymm2, %k1
; CHECK-NEXT: vpcmpgtq (%rdi){1to4}, %ymm0, %k1 {%k1}
-; CHECK-NEXT: vpblendmq %ymm0, %ymm1, %ymm0 {%k1}
+; CHECK-NEXT: vmovdqa64 %ymm0, %ymm1 {%k1}
+; CHECK-NEXT: vmovdqa %ymm1, %ymm0
; CHECK-NEXT: retq
%mask1 = icmp sge <4 x i64> %x1, %y1
%yb = load i64, i64* %yb.ptr, align 4
; CHECK-LABEL: test256_17:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpneqd (%rdi), %ymm0, %k1
-; CHECK-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1}
+; CHECK-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1}
+; CHECK-NEXT: vmovdqa %ymm1, %ymm0
; CHECK-NEXT: retq
%y = load <8 x i32>, <8 x i32>* %yp, align 4
%mask = icmp ne <8 x i32> %x, %y
; CHECK-LABEL: test256_18:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpneqd (%rdi), %ymm0, %k1
-; CHECK-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1}
+; CHECK-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1}
+; CHECK-NEXT: vmovdqa %ymm1, %ymm0
; CHECK-NEXT: retq
%y = load <8 x i32>, <8 x i32>* %yp, align 4
%mask = icmp ne <8 x i32> %y, %x
; CHECK-LABEL: test256_19:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpnltud (%rdi), %ymm0, %k1
-; CHECK-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1}
+; CHECK-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1}
+; CHECK-NEXT: vmovdqa %ymm1, %ymm0
; CHECK-NEXT: retq
%y = load <8 x i32>, <8 x i32>* %yp, align 4
%mask = icmp uge <8 x i32> %x, %y
; CHECK-LABEL: test256_20:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpleud (%rdi), %ymm0, %k1
-; CHECK-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1}
+; CHECK-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1}
+; CHECK-NEXT: vmovdqa %ymm1, %ymm0
; CHECK-NEXT: retq
%y = load <8 x i32>, <8 x i32>* %yp, align 4
%mask = icmp uge <8 x i32> %y, %x
; CHECK-LABEL: test128_1:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %k1
-; CHECK-NEXT: vpblendmq %xmm0, %xmm1, %xmm0 {%k1}
+; CHECK-NEXT: vmovdqa64 %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vmovdqa %xmm1, %xmm0
; CHECK-NEXT: retq
%mask = icmp eq <2 x i64> %x, %y
%max = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> %y
; CHECK-LABEL: test128_2:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpgtq %xmm1, %xmm0, %k1
-; CHECK-NEXT: vpblendmq %xmm2, %xmm1, %xmm0 {%k1}
+; CHECK-NEXT: vmovdqa64 %xmm2, %xmm1 {%k1}
+; CHECK-NEXT: vmovdqa %xmm1, %xmm0
; CHECK-NEXT: retq
%mask = icmp sgt <2 x i64> %x, %y
%max = select <2 x i1> %mask, <2 x i64> %x1, <2 x i64> %y
; CHECK-LABEL: test128_3:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpled %xmm0, %xmm1, %k1
-; CHECK-NEXT: vpblendmd %xmm2, %xmm1, %xmm0 {%k1}
+; CHECK-NEXT: vmovdqa32 %xmm2, %xmm1 {%k1}
+; CHECK-NEXT: vmovdqa %xmm1, %xmm0
; CHECK-NEXT: retq
%mask = icmp sge <4 x i32> %x, %y
%max = select <4 x i1> %mask, <4 x i32> %x1, <4 x i32> %y
; CHECK-LABEL: test128_4:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpnleuq %xmm1, %xmm0, %k1
-; CHECK-NEXT: vpblendmq %xmm2, %xmm1, %xmm0 {%k1}
+; CHECK-NEXT: vmovdqa64 %xmm2, %xmm1 {%k1}
+; CHECK-NEXT: vmovdqa %xmm1, %xmm0
; CHECK-NEXT: retq
%mask = icmp ugt <2 x i64> %x, %y
%max = select <2 x i1> %mask, <2 x i64> %x1, <2 x i64> %y
; CHECK-LABEL: test128_5:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpeqd (%rdi), %xmm0, %k1
-; CHECK-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1}
+; CHECK-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vmovdqa %xmm1, %xmm0
; CHECK-NEXT: retq
%y = load <4 x i32>, <4 x i32>* %yp, align 4
%mask = icmp eq <4 x i32> %x, %y
; CHECK-LABEL: test128_5b:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpeqd (%rdi), %xmm0, %k1
-; CHECK-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1}
+; CHECK-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vmovdqa %xmm1, %xmm0
; CHECK-NEXT: retq
%y = load <4 x i32>, <4 x i32>* %yp, align 4
%mask = icmp eq <4 x i32> %y, %x
; CHECK-LABEL: test128_6:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpgtd (%rdi), %xmm0, %k1
-; CHECK-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1}
+; CHECK-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vmovdqa %xmm1, %xmm0
; CHECK-NEXT: retq
%y = load <4 x i32>, <4 x i32>* %y.ptr, align 4
%mask = icmp sgt <4 x i32> %x, %y
; CHECK-LABEL: test128_6b:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpgtd (%rdi), %xmm0, %k1
-; CHECK-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1}
+; CHECK-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vmovdqa %xmm1, %xmm0
; CHECK-NEXT: retq
%y = load <4 x i32>, <4 x i32>* %y.ptr, align 4
%mask = icmp slt <4 x i32> %y, %x
; CHECK-LABEL: test128_7:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpled (%rdi), %xmm0, %k1
-; CHECK-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1}
+; CHECK-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vmovdqa %xmm1, %xmm0
; CHECK-NEXT: retq
%y = load <4 x i32>, <4 x i32>* %y.ptr, align 4
%mask = icmp sle <4 x i32> %x, %y
; CHECK-LABEL: test128_7b:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpled (%rdi), %xmm0, %k1
-; CHECK-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1}
+; CHECK-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vmovdqa %xmm1, %xmm0
; CHECK-NEXT: retq
%y = load <4 x i32>, <4 x i32>* %y.ptr, align 4
%mask = icmp sge <4 x i32> %y, %x
; CHECK-LABEL: test128_8:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpleud (%rdi), %xmm0, %k1
-; CHECK-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1}
+; CHECK-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vmovdqa %xmm1, %xmm0
; CHECK-NEXT: retq
%y = load <4 x i32>, <4 x i32>* %y.ptr, align 4
%mask = icmp ule <4 x i32> %x, %y
; CHECK-LABEL: test128_8b:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpleud (%rdi), %xmm0, %k1
-; CHECK-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1}
+; CHECK-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vmovdqa %xmm1, %xmm0
; CHECK-NEXT: retq
%y = load <4 x i32>, <4 x i32>* %y.ptr, align 4
%mask = icmp uge <4 x i32> %y, %x
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %k1
; CHECK-NEXT: vpcmpeqd %xmm3, %xmm2, %k1 {%k1}
-; CHECK-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1}
+; CHECK-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vmovdqa %xmm1, %xmm0
; CHECK-NEXT: retq
%mask1 = icmp eq <4 x i32> %x1, %y1
%mask0 = icmp eq <4 x i32> %x, %y
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpleq %xmm1, %xmm0, %k1
; CHECK-NEXT: vpcmpleq %xmm2, %xmm3, %k1 {%k1}
-; CHECK-NEXT: vpblendmq %xmm0, %xmm2, %xmm0 {%k1}
+; CHECK-NEXT: vmovdqa64 %xmm0, %xmm2 {%k1}
+; CHECK-NEXT: vmovdqa %xmm2, %xmm0
; CHECK-NEXT: retq
%mask1 = icmp sge <2 x i64> %x1, %y1
%mask0 = icmp sle <2 x i64> %x, %y
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpgtq %xmm2, %xmm1, %k1
; CHECK-NEXT: vpcmpgtq (%rdi), %xmm0, %k1 {%k1}
-; CHECK-NEXT: vpblendmq %xmm0, %xmm1, %xmm0 {%k1}
+; CHECK-NEXT: vmovdqa64 %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vmovdqa %xmm1, %xmm0
; CHECK-NEXT: retq
%mask1 = icmp sgt <2 x i64> %x1, %y1
%y = load <2 x i64>, <2 x i64>* %y.ptr, align 4
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpled %xmm1, %xmm2, %k1
; CHECK-NEXT: vpcmpleud (%rdi), %xmm0, %k1 {%k1}
-; CHECK-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1}
+; CHECK-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vmovdqa %xmm1, %xmm0
; CHECK-NEXT: retq
%mask1 = icmp sge <4 x i32> %x1, %y1
%y = load <4 x i32>, <4 x i32>* %y.ptr, align 4
; CHECK-LABEL: test128_13:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpeqq (%rdi){1to2}, %xmm0, %k1
-; CHECK-NEXT: vpblendmq %xmm0, %xmm1, %xmm0 {%k1}
+; CHECK-NEXT: vmovdqa64 %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vmovdqa %xmm1, %xmm0
; CHECK-NEXT: retq
%yb = load i64, i64* %yb.ptr, align 4
%y.0 = insertelement <2 x i64> undef, i64 %yb, i32 0
; CHECK-LABEL: test128_14:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpled (%rdi){1to4}, %xmm0, %k1
-; CHECK-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1}
+; CHECK-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vmovdqa %xmm1, %xmm0
; CHECK-NEXT: retq
%yb = load i32, i32* %yb.ptr, align 4
%y.0 = insertelement <4 x i32> undef, i32 %yb, i32 0
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpled %xmm1, %xmm2, %k1
; CHECK-NEXT: vpcmpgtd (%rdi){1to4}, %xmm0, %k1 {%k1}
-; CHECK-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1}
+; CHECK-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vmovdqa %xmm1, %xmm0
; CHECK-NEXT: retq
%mask1 = icmp sge <4 x i32> %x1, %y1
%yb = load i32, i32* %yb.ptr, align 4
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpleq %xmm1, %xmm2, %k1
; CHECK-NEXT: vpcmpgtq (%rdi){1to2}, %xmm0, %k1 {%k1}
-; CHECK-NEXT: vpblendmq %xmm0, %xmm1, %xmm0 {%k1}
+; CHECK-NEXT: vmovdqa64 %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vmovdqa %xmm1, %xmm0
; CHECK-NEXT: retq
%mask1 = icmp sge <2 x i64> %x1, %y1
%yb = load i64, i64* %yb.ptr, align 4
; CHECK-LABEL: test128_17:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpneqd (%rdi), %xmm0, %k1
-; CHECK-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1}
+; CHECK-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vmovdqa %xmm1, %xmm0
; CHECK-NEXT: retq
%y = load <4 x i32>, <4 x i32>* %y.ptr, align 4
%mask = icmp ne <4 x i32> %x, %y
; CHECK-LABEL: test128_18:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpneqd (%rdi), %xmm0, %k1
-; CHECK-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1}
+; CHECK-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vmovdqa %xmm1, %xmm0
; CHECK-NEXT: retq
%y = load <4 x i32>, <4 x i32>* %y.ptr, align 4
%mask = icmp ne <4 x i32> %y, %x
; CHECK-LABEL: test128_19:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpnltud (%rdi), %xmm0, %k1
-; CHECK-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1}
+; CHECK-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vmovdqa %xmm1, %xmm0
; CHECK-NEXT: retq
%y = load <4 x i32>, <4 x i32>* %y.ptr, align 4
%mask = icmp uge <4 x i32> %x, %y
; CHECK-LABEL: test128_20:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpleud (%rdi), %xmm0, %k1
-; CHECK-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1}
+; CHECK-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vmovdqa %xmm1, %xmm0
; CHECK-NEXT: retq
%y = load <4 x i32>, <4 x i32>* %y.ptr, align 4
%mask = icmp uge <4 x i32> %y, %x
; SKX-NEXT: kmovw %edi, %k1
; SKX-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm3
; SKX-NEXT: vfnmadd213ps {ru-sae}, %zmm2, %zmm0, %zmm1
-; SKX-NEXT: vblendmps %zmm1, %zmm3, %zmm1 {%k1}
-; SKX-NEXT: vfnmadd132ps {rd-sae}, %zmm0, %zmm2, %zmm1 {%k1}
-; SKX-NEXT: vmovaps %zmm1, %zmm0
+; SKX-NEXT: vmovaps %zmm1, %zmm3 {%k1}
+; SKX-NEXT: vfnmadd132ps {rd-sae}, %zmm0, %zmm2, %zmm3 {%k1}
+; SKX-NEXT: vmovaps %zmm3, %zmm0
; SKX-NEXT: retq
;
; KNL-LABEL: test15:
; KNL-NEXT: kmovw %edi, %k1
; KNL-NEXT: vpxord {{.*}}(%rip){1to16}, %zmm0, %zmm3
; KNL-NEXT: vfnmadd213ps {ru-sae}, %zmm2, %zmm0, %zmm1
-; KNL-NEXT: vblendmps %zmm1, %zmm3, %zmm1 {%k1}
-; KNL-NEXT: vfnmadd132ps {rd-sae}, %zmm0, %zmm2, %zmm1 {%k1}
-; KNL-NEXT: vmovaps %zmm1, %zmm0
+; KNL-NEXT: vmovaps %zmm1, %zmm3 {%k1}
+; KNL-NEXT: vfnmadd132ps {rd-sae}, %zmm0, %zmm2, %zmm3 {%k1}
+; KNL-NEXT: vmovaps %zmm3, %zmm0
; KNL-NEXT: retq
entry:
%sub.i = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a
; CHECK: # BB#0:
; CHECK-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,2,3,0]
; CHECK-NEXT: kmovb %edi, %k1
-; CHECK-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1}
+; CHECK-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1}
+; CHECK-NEXT: vmovdqa %ymm1, %ymm0
; CHECK-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1>
%mask.cast = bitcast i8 %mask to <8 x i1>
; CHECK-LABEL: broadcast_v4f32_0101_from_v2f32_mask:
; CHECK: # BB#0:
; CHECK-NEXT: kmovb %esi, %k1
-; CHECK-NEXT: vblendmpd (%rdi){1to2}, %xmm0, %xmm0 {%k1}
+; CHECK-NEXT: vmovddup {{.*#+}} xmm0 {%k1} = mem[0,0]
; CHECK-NEXT: retq
%q = load double, double* %x, align 1
%vecinit.i = insertelement <2 x double> undef, double %q, i32 0