{ X86::XORPSrm, X86::XORPDrm, X86::PXORrm },
{ X86::XORPSrr, X86::XORPDrr, X86::PXORrr },
{ X86::UNPCKLPDrm, X86::UNPCKLPDrm, X86::PUNPCKLQDQrm },
- { X86::UNPCKLPDrr, X86::UNPCKLPDrr, X86::PUNPCKLQDQrr },
+ { X86::MOVLHPSrr, X86::UNPCKLPDrr, X86::PUNPCKLQDQrr },
{ X86::UNPCKHPDrm, X86::UNPCKHPDrm, X86::PUNPCKHQDQrm },
{ X86::UNPCKHPDrr, X86::UNPCKHPDrr, X86::PUNPCKHQDQrr },
{ X86::UNPCKLPSrm, X86::UNPCKLPSrm, X86::PUNPCKLDQrm },
{ X86::VXORPSrm, X86::VXORPDrm, X86::VPXORrm },
{ X86::VXORPSrr, X86::VXORPDrr, X86::VPXORrr },
{ X86::VUNPCKLPDrm, X86::VUNPCKLPDrm, X86::VPUNPCKLQDQrm },
- { X86::VUNPCKLPDrr, X86::VUNPCKLPDrr, X86::VPUNPCKLQDQrr },
+ { X86::VMOVLHPSrr, X86::VUNPCKLPDrr, X86::VPUNPCKLQDQrr },
{ X86::VUNPCKHPDrm, X86::VUNPCKHPDrm, X86::VPUNPCKHQDQrm },
{ X86::VUNPCKHPDrr, X86::VUNPCKHPDrr, X86::VPUNPCKHQDQrr },
{ X86::VUNPCKLPSrm, X86::VUNPCKLPSrm, X86::VPUNPCKLDQrm },
{ X86::VUNPCKHPSZ256rm, X86::VUNPCKHPSZ256rm, X86::VPUNPCKHDQZ256rm },
{ X86::VUNPCKHPSZ256rr, X86::VUNPCKHPSZ256rr, X86::VPUNPCKHDQZ256rr },
{ X86::VUNPCKLPDZ128rm, X86::VUNPCKLPDZ128rm, X86::VPUNPCKLQDQZ128rm },
- { X86::VUNPCKLPDZ128rr, X86::VUNPCKLPDZ128rr, X86::VPUNPCKLQDQZ128rr },
+ { X86::VMOVLHPSZrr, X86::VUNPCKLPDZ128rr, X86::VPUNPCKLQDQZ128rr },
{ X86::VUNPCKHPDZ128rm, X86::VUNPCKHPDZ128rm, X86::VPUNPCKHQDQZ128rm },
{ X86::VUNPCKHPDZ128rr, X86::VUNPCKHPDZ128rr, X86::VPUNPCKHQDQZ128rr },
{ X86::VUNPCKLPSZ128rm, X86::VUNPCKLPSZ128rm, X86::VPUNPCKLDQZ128rm },
; X32-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
; X32-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero
; X32-NEXT: vmovsd {{.*#+}} xmm3 = mem[0],zero
-; X32-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
-; X32-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; X32-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; X32-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; X32-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: test_mm256_set_pd:
; X64: # BB#0:
-; X64-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
-; X64-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm3[0],xmm2[0]
+; X64-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; X64-NEXT: vmovlhps {{.*#+}} xmm1 = xmm3[0],xmm2[0]
; X64-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; X64-NEXT: retq
%res0 = insertelement <4 x double> undef, double %a3, i32 0
; X32-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
; X32-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero
; X32-NEXT: vmovsd {{.*#+}} xmm3 = mem[0],zero
-; X32-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
-; X32-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm3[0],xmm2[0]
+; X32-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; X32-NEXT: vmovlhps {{.*#+}} xmm1 = xmm3[0],xmm2[0]
; X32-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: test_mm256_setr_pd:
; X64: # BB#0:
-; X64-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
-; X64-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; X64-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; X64-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; X64-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; X64-NEXT: retq
%res0 = insertelement <4 x double> undef, double %a0, i32 0
; CHECK-LABEL: unpacklopd_not:
; CHECK: # BB#0:
; CHECK-NEXT: vunpckhpd {{.*#+}} xmm2 = xmm0[1],xmm1[1]
-; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; CHECK-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; CHECK-NEXT: retq
%shuffle.i = shufflevector <4 x double> %src1, <4 x double> %src2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
; CHECK-NEXT: vextractf128 $1, %ymm1, %xmm1
; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
; CHECK-NEXT: vunpckhpd {{.*#+}} xmm2 = xmm0[1],xmm1[1]
-; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; CHECK-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; CHECK-NEXT: retq
%shuffle.i = shufflevector <4 x double> %src1, <4 x double> %src2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
; NODQ-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm2
; NODQ-NEXT: vmovq %xmm1, %rax
; NODQ-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm1
-; NODQ-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; NODQ-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; NODQ-NEXT: vextracti32x4 $2, %zmm0, %xmm2
; NODQ-NEXT: vpextrq $1, %xmm2, %rax
; NODQ-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm3
; NODQ-NEXT: vmovq %xmm2, %rax
; NODQ-NEXT: vcvtsi2sdq %rax, %xmm4, %xmm2
-; NODQ-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; NODQ-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
; NODQ-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
; NODQ-NEXT: vextracti128 $1, %ymm0, %xmm2
; NODQ-NEXT: vpextrq $1, %xmm2, %rax
; NODQ-NEXT: vcvtsi2sdq %rax, %xmm4, %xmm3
; NODQ-NEXT: vmovq %xmm2, %rax
; NODQ-NEXT: vcvtsi2sdq %rax, %xmm4, %xmm2
-; NODQ-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; NODQ-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
; NODQ-NEXT: vpextrq $1, %xmm0, %rax
; NODQ-NEXT: vcvtsi2sdq %rax, %xmm4, %xmm3
; NODQ-NEXT: vmovq %xmm0, %rax
; NODQ-NEXT: vcvtsi2sdq %rax, %xmm4, %xmm0
-; NODQ-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm3[0]
+; NODQ-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm3[0]
; NODQ-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; NODQ-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
; NODQ-NEXT: retq
; NODQ-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm2
; NODQ-NEXT: vmovq %xmm1, %rax
; NODQ-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm1
-; NODQ-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; NODQ-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; NODQ-NEXT: vpextrq $1, %xmm0, %rax
; NODQ-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm2
; NODQ-NEXT: vmovq %xmm0, %rax
; NODQ-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm0
-; NODQ-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; NODQ-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; NODQ-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; NODQ-NEXT: retq
;
; NODQ-NEXT: vcvtsi2sdq %rax, %xmm1, %xmm1
; NODQ-NEXT: vmovq %xmm0, %rax
; NODQ-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm0
-; NODQ-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; NODQ-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; NODQ-NEXT: retq
;
; VLDQ-LABEL: slto2f64:
; NODQ-NEXT: vcvtusi2sdq %rax, %xmm2, %xmm2
; NODQ-NEXT: vmovq %xmm1, %rax
; NODQ-NEXT: vcvtusi2sdq %rax, %xmm3, %xmm1
-; NODQ-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; NODQ-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; NODQ-NEXT: vextracti32x4 $2, %zmm0, %xmm2
; NODQ-NEXT: vpextrq $1, %xmm2, %rax
; NODQ-NEXT: vcvtusi2sdq %rax, %xmm3, %xmm3
; NODQ-NEXT: vmovq %xmm2, %rax
; NODQ-NEXT: vcvtusi2sdq %rax, %xmm4, %xmm2
-; NODQ-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; NODQ-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
; NODQ-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
; NODQ-NEXT: vextracti128 $1, %ymm0, %xmm2
; NODQ-NEXT: vpextrq $1, %xmm2, %rax
; NODQ-NEXT: vcvtusi2sdq %rax, %xmm4, %xmm3
; NODQ-NEXT: vmovq %xmm2, %rax
; NODQ-NEXT: vcvtusi2sdq %rax, %xmm4, %xmm2
-; NODQ-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; NODQ-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
; NODQ-NEXT: vpextrq $1, %xmm0, %rax
; NODQ-NEXT: vcvtusi2sdq %rax, %xmm4, %xmm3
; NODQ-NEXT: vmovq %xmm0, %rax
; NODQ-NEXT: vcvtusi2sdq %rax, %xmm4, %xmm0
-; NODQ-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm3[0]
+; NODQ-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm3[0]
; NODQ-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; NODQ-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
; NODQ-NEXT: retq
; KNL-NEXT: vcvtusi2sdq %rax, %xmm4, %xmm2
; KNL-NEXT: vextracti32x4 $2, %zmm0, %xmm4
; KNL-NEXT: vpextrq $1, %xmm4, %rax
-; KNL-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; KNL-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
; KNL-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm3
; KNL-NEXT: vmovq %xmm4, %rax
; KNL-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm4
-; KNL-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm4[0],xmm3[0]
+; KNL-NEXT: vmovlhps {{.*#+}} xmm3 = xmm4[0],xmm3[0]
; KNL-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
; KNL-NEXT: vextracti128 $1, %ymm0, %xmm3
; KNL-NEXT: vpextrq $1, %xmm3, %rax
; KNL-NEXT: vmovq %xmm3, %rax
; KNL-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm3
; KNL-NEXT: vpextrq $1, %xmm0, %rax
-; KNL-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0]
+; KNL-NEXT: vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0]
; KNL-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm4
; KNL-NEXT: vmovq %xmm0, %rax
; KNL-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm0
-; KNL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm4[0]
+; KNL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm4[0]
; KNL-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
; KNL-NEXT: vextracti32x4 $3, %zmm1, %xmm3
; KNL-NEXT: vpextrq $1, %xmm3, %rax
; KNL-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm2
; KNL-NEXT: vmovq %xmm3, %rax
; KNL-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm3
-; KNL-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm3[0],xmm2[0]
+; KNL-NEXT: vmovlhps {{.*#+}} xmm2 = xmm3[0],xmm2[0]
; KNL-NEXT: vextracti32x4 $2, %zmm1, %xmm3
; KNL-NEXT: vpextrq $1, %xmm3, %rax
; KNL-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm4
; KNL-NEXT: vmovq %xmm3, %rax
; KNL-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm3
-; KNL-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0]
+; KNL-NEXT: vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0]
; KNL-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
; KNL-NEXT: vextracti128 $1, %ymm1, %xmm3
; KNL-NEXT: vpextrq $1, %xmm3, %rax
; KNL-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm4
; KNL-NEXT: vmovq %xmm3, %rax
; KNL-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm3
-; KNL-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0]
+; KNL-NEXT: vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0]
; KNL-NEXT: vpextrq $1, %xmm1, %rax
; KNL-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm4
; KNL-NEXT: vmovq %xmm1, %rax
; KNL-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm1
-; KNL-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm4[0]
+; KNL-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm4[0]
; KNL-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
; KNL-NEXT: vinsertf64x4 $1, %ymm2, %zmm1, %zmm1
; KNL-NEXT: retq
; VLNODQ-NEXT: vcvtusi2sdq %rax, %xmm3, %xmm3
; VLNODQ-NEXT: vmovq %xmm2, %rax
; VLNODQ-NEXT: vcvtusi2sdq %rax, %xmm4, %xmm2
-; VLNODQ-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; VLNODQ-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
; VLNODQ-NEXT: vextracti32x4 $2, %zmm0, %xmm3
; VLNODQ-NEXT: vpextrq $1, %xmm3, %rax
; VLNODQ-NEXT: vcvtusi2sdq %rax, %xmm4, %xmm4
; VLNODQ-NEXT: vmovq %xmm3, %rax
; VLNODQ-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm3
-; VLNODQ-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0]
+; VLNODQ-NEXT: vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0]
; VLNODQ-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
; VLNODQ-NEXT: vextracti128 $1, %ymm0, %xmm3
; VLNODQ-NEXT: vpextrq $1, %xmm3, %rax
; VLNODQ-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm4
; VLNODQ-NEXT: vmovq %xmm3, %rax
; VLNODQ-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm3
-; VLNODQ-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0]
+; VLNODQ-NEXT: vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0]
; VLNODQ-NEXT: vpextrq $1, %xmm0, %rax
; VLNODQ-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm4
; VLNODQ-NEXT: vmovq %xmm0, %rax
; VLNODQ-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm0
-; VLNODQ-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm4[0]
+; VLNODQ-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm4[0]
; VLNODQ-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
; VLNODQ-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
; VLNODQ-NEXT: vextracti32x4 $3, %zmm1, %xmm2
; VLNODQ-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm3
; VLNODQ-NEXT: vmovq %xmm2, %rax
; VLNODQ-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm2
-; VLNODQ-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; VLNODQ-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
; VLNODQ-NEXT: vextracti32x4 $2, %zmm1, %xmm3
; VLNODQ-NEXT: vpextrq $1, %xmm3, %rax
; VLNODQ-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm4
; VLNODQ-NEXT: vmovq %xmm3, %rax
; VLNODQ-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm3
-; VLNODQ-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0]
+; VLNODQ-NEXT: vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0]
; VLNODQ-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
; VLNODQ-NEXT: vextracti128 $1, %ymm1, %xmm3
; VLNODQ-NEXT: vpextrq $1, %xmm3, %rax
; VLNODQ-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm4
; VLNODQ-NEXT: vmovq %xmm3, %rax
; VLNODQ-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm3
-; VLNODQ-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0]
+; VLNODQ-NEXT: vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0]
; VLNODQ-NEXT: vpextrq $1, %xmm1, %rax
; VLNODQ-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm4
; VLNODQ-NEXT: vmovq %xmm1, %rax
; VLNODQ-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm1
-; VLNODQ-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm4[0]
+; VLNODQ-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm4[0]
; VLNODQ-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
; VLNODQ-NEXT: vinsertf64x4 $1, %ymm2, %zmm1, %zmm1
; VLNODQ-NEXT: retq
; AVX512BW-NEXT: vcvtusi2sdq %rax, %xmm3, %xmm3
; AVX512BW-NEXT: vmovq %xmm2, %rax
; AVX512BW-NEXT: vcvtusi2sdq %rax, %xmm4, %xmm2
-; AVX512BW-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; AVX512BW-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
; AVX512BW-NEXT: vextracti32x4 $2, %zmm0, %xmm3
; AVX512BW-NEXT: vpextrq $1, %xmm3, %rax
; AVX512BW-NEXT: vcvtusi2sdq %rax, %xmm4, %xmm4
; AVX512BW-NEXT: vmovq %xmm3, %rax
; AVX512BW-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm3
-; AVX512BW-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0]
+; AVX512BW-NEXT: vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0]
; AVX512BW-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm3
; AVX512BW-NEXT: vpextrq $1, %xmm3, %rax
; AVX512BW-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm4
; AVX512BW-NEXT: vmovq %xmm3, %rax
; AVX512BW-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm3
-; AVX512BW-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0]
+; AVX512BW-NEXT: vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0]
; AVX512BW-NEXT: vpextrq $1, %xmm0, %rax
; AVX512BW-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm4
; AVX512BW-NEXT: vmovq %xmm0, %rax
; AVX512BW-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm0
-; AVX512BW-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm4[0]
+; AVX512BW-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm4[0]
; AVX512BW-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
; AVX512BW-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
; AVX512BW-NEXT: vextracti32x4 $3, %zmm1, %xmm2
; AVX512BW-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm3
; AVX512BW-NEXT: vmovq %xmm2, %rax
; AVX512BW-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm2
-; AVX512BW-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; AVX512BW-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
; AVX512BW-NEXT: vextracti32x4 $2, %zmm1, %xmm3
; AVX512BW-NEXT: vpextrq $1, %xmm3, %rax
; AVX512BW-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm4
; AVX512BW-NEXT: vmovq %xmm3, %rax
; AVX512BW-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm3
-; AVX512BW-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0]
+; AVX512BW-NEXT: vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0]
; AVX512BW-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
; AVX512BW-NEXT: vextracti128 $1, %ymm1, %xmm3
; AVX512BW-NEXT: vpextrq $1, %xmm3, %rax
; AVX512BW-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm4
; AVX512BW-NEXT: vmovq %xmm3, %rax
; AVX512BW-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm3
-; AVX512BW-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0]
+; AVX512BW-NEXT: vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0]
; AVX512BW-NEXT: vpextrq $1, %xmm1, %rax
; AVX512BW-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm4
; AVX512BW-NEXT: vmovq %xmm1, %rax
; AVX512BW-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm1
-; AVX512BW-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm4[0]
+; AVX512BW-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm4[0]
; AVX512BW-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
; AVX512BW-NEXT: vinsertf64x4 $1, %ymm2, %zmm1, %zmm1
; AVX512BW-NEXT: retq
; NODQ-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm2
; NODQ-NEXT: vmovq %xmm1, %rax
; NODQ-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm1
-; NODQ-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; NODQ-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; NODQ-NEXT: vextracti32x4 $2, %zmm0, %xmm2
; NODQ-NEXT: vpextrq $1, %xmm2, %rax
; NODQ-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm3
; NODQ-NEXT: vmovq %xmm2, %rax
; NODQ-NEXT: vcvtsi2sdq %rax, %xmm4, %xmm2
-; NODQ-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; NODQ-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
; NODQ-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
; NODQ-NEXT: vextracti128 $1, %ymm0, %xmm2
; NODQ-NEXT: vpextrq $1, %xmm2, %rax
; NODQ-NEXT: vcvtsi2sdq %rax, %xmm4, %xmm3
; NODQ-NEXT: vmovq %xmm2, %rax
; NODQ-NEXT: vcvtsi2sdq %rax, %xmm4, %xmm2
-; NODQ-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; NODQ-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
; NODQ-NEXT: vpextrq $1, %xmm0, %rax
; NODQ-NEXT: vcvtsi2sdq %rax, %xmm4, %xmm3
; NODQ-NEXT: vmovq %xmm0, %rax
; NODQ-NEXT: vcvtsi2sdq %rax, %xmm4, %xmm0
-; NODQ-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm3[0]
+; NODQ-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm3[0]
; NODQ-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; NODQ-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
; NODQ-NEXT: retq
; KNL-NEXT: vcvtsi2sdq %rax, %xmm4, %xmm2
; KNL-NEXT: vextracti32x4 $2, %zmm0, %xmm4
; KNL-NEXT: vpextrq $1, %xmm4, %rax
-; KNL-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; KNL-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
; KNL-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm3
; KNL-NEXT: vmovq %xmm4, %rax
; KNL-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm4
-; KNL-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm4[0],xmm3[0]
+; KNL-NEXT: vmovlhps {{.*#+}} xmm3 = xmm4[0],xmm3[0]
; KNL-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
; KNL-NEXT: vextracti128 $1, %ymm0, %xmm3
; KNL-NEXT: vpextrq $1, %xmm3, %rax
; KNL-NEXT: vmovq %xmm3, %rax
; KNL-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm3
; KNL-NEXT: vpextrq $1, %xmm0, %rax
-; KNL-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0]
+; KNL-NEXT: vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0]
; KNL-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm4
; KNL-NEXT: vmovq %xmm0, %rax
; KNL-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm0
-; KNL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm4[0]
+; KNL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm4[0]
; KNL-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
; KNL-NEXT: vextracti32x4 $3, %zmm1, %xmm3
; KNL-NEXT: vpextrq $1, %xmm3, %rax
; KNL-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm2
; KNL-NEXT: vmovq %xmm3, %rax
; KNL-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm3
-; KNL-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm3[0],xmm2[0]
+; KNL-NEXT: vmovlhps {{.*#+}} xmm2 = xmm3[0],xmm2[0]
; KNL-NEXT: vextracti32x4 $2, %zmm1, %xmm3
; KNL-NEXT: vpextrq $1, %xmm3, %rax
; KNL-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm4
; KNL-NEXT: vmovq %xmm3, %rax
; KNL-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm3
-; KNL-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0]
+; KNL-NEXT: vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0]
; KNL-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
; KNL-NEXT: vextracti128 $1, %ymm1, %xmm3
; KNL-NEXT: vpextrq $1, %xmm3, %rax
; KNL-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm4
; KNL-NEXT: vmovq %xmm3, %rax
; KNL-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm3
-; KNL-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0]
+; KNL-NEXT: vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0]
; KNL-NEXT: vpextrq $1, %xmm1, %rax
; KNL-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm4
; KNL-NEXT: vmovq %xmm1, %rax
; KNL-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm1
-; KNL-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm4[0]
+; KNL-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm4[0]
; KNL-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
; KNL-NEXT: vinsertf64x4 $1, %ymm2, %zmm1, %zmm1
; KNL-NEXT: retq
; VLNODQ-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm3
; VLNODQ-NEXT: vmovq %xmm2, %rax
; VLNODQ-NEXT: vcvtsi2sdq %rax, %xmm4, %xmm2
-; VLNODQ-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; VLNODQ-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
; VLNODQ-NEXT: vextracti32x4 $2, %zmm0, %xmm3
; VLNODQ-NEXT: vpextrq $1, %xmm3, %rax
; VLNODQ-NEXT: vcvtsi2sdq %rax, %xmm4, %xmm4
; VLNODQ-NEXT: vmovq %xmm3, %rax
; VLNODQ-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm3
-; VLNODQ-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0]
+; VLNODQ-NEXT: vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0]
; VLNODQ-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
; VLNODQ-NEXT: vextracti128 $1, %ymm0, %xmm3
; VLNODQ-NEXT: vpextrq $1, %xmm3, %rax
; VLNODQ-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm4
; VLNODQ-NEXT: vmovq %xmm3, %rax
; VLNODQ-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm3
-; VLNODQ-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0]
+; VLNODQ-NEXT: vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0]
; VLNODQ-NEXT: vpextrq $1, %xmm0, %rax
; VLNODQ-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm4
; VLNODQ-NEXT: vmovq %xmm0, %rax
; VLNODQ-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm0
-; VLNODQ-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm4[0]
+; VLNODQ-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm4[0]
; VLNODQ-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
; VLNODQ-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
; VLNODQ-NEXT: vextracti32x4 $3, %zmm1, %xmm2
; VLNODQ-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm3
; VLNODQ-NEXT: vmovq %xmm2, %rax
; VLNODQ-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm2
-; VLNODQ-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; VLNODQ-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
; VLNODQ-NEXT: vextracti32x4 $2, %zmm1, %xmm3
; VLNODQ-NEXT: vpextrq $1, %xmm3, %rax
; VLNODQ-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm4
; VLNODQ-NEXT: vmovq %xmm3, %rax
; VLNODQ-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm3
-; VLNODQ-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0]
+; VLNODQ-NEXT: vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0]
; VLNODQ-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
; VLNODQ-NEXT: vextracti128 $1, %ymm1, %xmm3
; VLNODQ-NEXT: vpextrq $1, %xmm3, %rax
; VLNODQ-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm4
; VLNODQ-NEXT: vmovq %xmm3, %rax
; VLNODQ-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm3
-; VLNODQ-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0]
+; VLNODQ-NEXT: vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0]
; VLNODQ-NEXT: vpextrq $1, %xmm1, %rax
; VLNODQ-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm4
; VLNODQ-NEXT: vmovq %xmm1, %rax
; VLNODQ-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm1
-; VLNODQ-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm4[0]
+; VLNODQ-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm4[0]
; VLNODQ-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
; VLNODQ-NEXT: vinsertf64x4 $1, %ymm2, %zmm1, %zmm1
; VLNODQ-NEXT: retq
; AVX512BW-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm3
; AVX512BW-NEXT: vmovq %xmm2, %rax
; AVX512BW-NEXT: vcvtsi2sdq %rax, %xmm4, %xmm2
-; AVX512BW-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; AVX512BW-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
; AVX512BW-NEXT: vextracti32x4 $2, %zmm0, %xmm3
; AVX512BW-NEXT: vpextrq $1, %xmm3, %rax
; AVX512BW-NEXT: vcvtsi2sdq %rax, %xmm4, %xmm4
; AVX512BW-NEXT: vmovq %xmm3, %rax
; AVX512BW-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm3
-; AVX512BW-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0]
+; AVX512BW-NEXT: vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0]
; AVX512BW-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm3
; AVX512BW-NEXT: vpextrq $1, %xmm3, %rax
; AVX512BW-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm4
; AVX512BW-NEXT: vmovq %xmm3, %rax
; AVX512BW-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm3
-; AVX512BW-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0]
+; AVX512BW-NEXT: vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0]
; AVX512BW-NEXT: vpextrq $1, %xmm0, %rax
; AVX512BW-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm4
; AVX512BW-NEXT: vmovq %xmm0, %rax
; AVX512BW-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm0
-; AVX512BW-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm4[0]
+; AVX512BW-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm4[0]
; AVX512BW-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
; AVX512BW-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
; AVX512BW-NEXT: vextracti32x4 $3, %zmm1, %xmm2
; AVX512BW-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm3
; AVX512BW-NEXT: vmovq %xmm2, %rax
; AVX512BW-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm2
-; AVX512BW-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; AVX512BW-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
; AVX512BW-NEXT: vextracti32x4 $2, %zmm1, %xmm3
; AVX512BW-NEXT: vpextrq $1, %xmm3, %rax
; AVX512BW-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm4
; AVX512BW-NEXT: vmovq %xmm3, %rax
; AVX512BW-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm3
-; AVX512BW-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0]
+; AVX512BW-NEXT: vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0]
; AVX512BW-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
; AVX512BW-NEXT: vextracti128 $1, %ymm1, %xmm3
; AVX512BW-NEXT: vpextrq $1, %xmm3, %rax
; AVX512BW-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm4
; AVX512BW-NEXT: vmovq %xmm3, %rax
; AVX512BW-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm3
-; AVX512BW-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0]
+; AVX512BW-NEXT: vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0]
; AVX512BW-NEXT: vpextrq $1, %xmm1, %rax
; AVX512BW-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm4
; AVX512BW-NEXT: vmovq %xmm1, %rax
; AVX512BW-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm1
-; AVX512BW-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm4[0]
+; AVX512BW-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm4[0]
; AVX512BW-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
; AVX512BW-NEXT: vinsertf64x4 $1, %ymm2, %zmm1, %zmm1
; AVX512BW-NEXT: retq
; VLNODQ-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm1
; VLNODQ-NEXT: vmovq %xmm0, %rax
; VLNODQ-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm0
-; VLNODQ-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; VLNODQ-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; VLNODQ-NEXT: retq
%cmpres = fcmp ogt <2 x double> %a, zeroinitializer
%1 = sitofp <2 x i1> %cmpres to <2 x double>
; VLNODQ-NEXT: vcvtusi2sdq %rax, %xmm2, %xmm1
; VLNODQ-NEXT: vmovq %xmm0, %rax
; VLNODQ-NEXT: vcvtusi2sdq %rax, %xmm2, %xmm0
-; VLNODQ-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; VLNODQ-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; VLNODQ-NEXT: retq
%mask = icmp ult <2 x i32> %a, zeroinitializer
%1 = uitofp <2 x i1> %mask to <2 x double>
define <8 x double> @test_insert_128_v8f64(<8 x double> %x, double %y) {
; CHECK-LABEL: test_insert_128_v8f64:
; CHECK: ## BB#0:
-; CHECK-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm0[0],xmm1[0]
+; CHECK-NEXT: vmovlhps {{.*#+}} xmm1 = xmm0[0],xmm1[0]
; CHECK-NEXT: vinsertf32x4 $0, %xmm1, %zmm0, %zmm0
; CHECK-NEXT: retq
%r = insertelement <8 x double> %x, double %y, i32 1
; CHECK-LABEL: test_4xi64_to_2xi64_perm_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
-; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; CHECK-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%res = shufflevector <4 x i64> %vec, <4 x i64> undef, <2 x i32> <i32 2, i32 0>
; CHECK: # BB#0:
; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
; CHECK-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[2,3,0,1]
-; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; CHECK-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%res = shufflevector <8 x i64> %vec, <8 x i64> undef, <2 x i32> <i32 3, i32 0>
; CHECK-NEXT: vmovaps (%rdi), %zmm0
; CHECK-NEXT: vextractf32x4 $2, %zmm0, %xmm1
; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; CHECK-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%vec = load <8 x i64>, <8 x i64>* %vp
; CHECK-LABEL: test_4xdouble_to_2xdouble_perm_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
-; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; CHECK-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%res = shufflevector <4 x double> %vec, <4 x double> undef, <2 x i32> <i32 2, i32 0>
define <2 x double> @test_2xdouble_unpack_low_mask0(<2 x double> %vec1, <2 x double> %vec2) {
; CHECK-LABEL: test_2xdouble_unpack_low_mask0:
; CHECK: # BB#0:
-; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; CHECK-NEXT: retq
%res = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 0, i32 2>
ret <2 x double> %res
;
; SSE-64-LABEL: test_buildvector_v2f64:
; SSE-64: # BB#0:
-; SSE-64-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE-64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE-64-NEXT: retq
;
; AVX-32-LABEL: test_buildvector_v2f64:
;
; AVX-64-LABEL: test_buildvector_v2f64:
; AVX-64: # BB#0:
-; AVX-64-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX-64-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; AVX-64-NEXT: retq
%ins0 = insertelement <2 x double> undef, double %a0, i32 0
%ins1 = insertelement <2 x double> %ins0, double %a1, i32 1
; SSE2-64: # BB#0:
; SSE2-64-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
; SSE2-64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; SSE2-64-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; SSE2-64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; SSE2-64-NEXT: retq
;
; SSE41-64-LABEL: test_buildvector_v4f32:
;
; AVX-64-LABEL: test_buildvector_v4f64:
; AVX-64: # BB#0:
-; AVX-64-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
-; AVX-64-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX-64-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; AVX-64-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; AVX-64-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX-64-NEXT: retq
%ins0 = insertelement <4 x double> undef, double %a0, i32 0
;
; AVX-64-LABEL: test_buildvector_v8f64:
; AVX-64: # BB#0:
-; AVX-64-NEXT: vunpcklpd {{.*#+}} xmm6 = xmm6[0],xmm7[0]
-; AVX-64-NEXT: vunpcklpd {{.*#+}} xmm4 = xmm4[0],xmm5[0]
+; AVX-64-NEXT: vmovlhps {{.*#+}} xmm6 = xmm6[0],xmm7[0]
+; AVX-64-NEXT: vmovlhps {{.*#+}} xmm4 = xmm4[0],xmm5[0]
; AVX-64-NEXT: vinsertf128 $1, %xmm6, %ymm4, %ymm4
-; AVX-64-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
-; AVX-64-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX-64-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; AVX-64-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; AVX-64-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX-64-NEXT: vinsertf64x4 $1, %ymm4, %zmm0, %zmm0
; AVX-64-NEXT: retq
; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; SSE2-NEXT: xorps %xmm2, %xmm2
; SSE2-NEXT: movss {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3]
-; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; SSE2-NEXT: retq
;
; SSE41-LABEL: test_negative_zero_1:
; SSE2: # BB#0:
; SSE2-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; SSE2-NEXT: retq
;
; SSE41-LABEL: test_buildvector_v4f32_register:
; SSE2-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE2-NEXT: retq
;
; SSE41-LABEL: test_buildvector_v4f32_load:
; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; SSE2-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
-; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; SSE2-NEXT: retq
;
; SSE41-LABEL: test_buildvector_v4f32_partial_load:
; SSE-NEXT: movb $0, -{{[0-9]+}}(%rsp)
; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
-; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE-NEXT: popq %rbx
; SSE-NEXT: popq %r14
; SSE-NEXT: retq
; SSE-NEXT: movb $0, -{{[0-9]+}}(%rsp)
; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero
-; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; SSE-NEXT: popq %rbx
; SSE-NEXT: popq %r14
; SSE-NEXT: retq
; SSE-NEXT: cvtss2sd %xmm5, %xmm4
; SSE-NEXT: andps %xmm8, %xmm4
; SSE-NEXT: orps %xmm0, %xmm4
-; SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm4[0]
+; SSE-NEXT: movlhps {{.*#+}} xmm2 = xmm2[0],xmm4[0]
; SSE-NEXT: movaps %xmm1, %xmm0
; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
; SSE-NEXT: andps %xmm7, %xmm0
; SSE-NEXT: cvtss2sd %xmm6, %xmm0
; SSE-NEXT: andps %xmm8, %xmm0
; SSE-NEXT: orps %xmm0, %xmm1
-; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm3[0]
+; SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm3[0]
; SSE-NEXT: movaps %xmm2, %xmm0
; SSE-NEXT: retq
;
define <2 x i64> @test14(<2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: test14:
; CHECK: # BB#0:
-; CHECK-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; CHECK-NEXT: retq
%shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32><i32 0, i32 2>
%shuf2 = shufflevector <2 x i64> %b, <2 x i64> zeroinitializer, <2 x i32><i32 2, i32 0>
define <2 x i64> @test16(<2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: test16:
; CHECK: # BB#0:
-; CHECK-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
; CHECK-NEXT: movaps %xmm1, %xmm0
; CHECK-NEXT: retq
%shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32><i32 2, i32 0>
; CHECK-LABEL: test:
; CHECK: # BB#0: # %entry
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; CHECK-NEXT: movaps %xmm0, (%eax)
; CHECK-NEXT: retl
entry:
; SSE-NEXT: subss %xmm4, %xmm1
; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE-NEXT: retq
;
; AVX-LABEL: not_a_hsub_2:
; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm2[1,1]
; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; SSE-NEXT: addss %xmm2, %xmm0
-; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
; SSE-NEXT: movaps %xmm1, %xmm0
; SSE-NEXT: retq
;
; CHECK-LIBCALL-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload
; CHECK-LIBCALL-NEXT: unpcklps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Folded Reload
; CHECK-LIBCALL-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
-; CHECK-LIBCALL-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-LIBCALL-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; CHECK-LIBCALL-NEXT: addq $48, %rsp
; CHECK-LIBCALL-NEXT: popq %rbx
; CHECK-LIBCALL-NEXT: retq
; CHECK-I686-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; CHECK-I686-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; CHECK-I686-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-I686-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; CHECK-I686-NEXT: addl $56, %esp
; CHECK-I686-NEXT: popl %esi
; CHECK-I686-NEXT: retl
; CHECK-LIBCALL-NEXT: movss {{[0-9]+}}(%rsp), %xmm0 # 4-byte Reload
; CHECK-LIBCALL-NEXT: # xmm0 = mem[0],zero,zero,zero
; CHECK-LIBCALL-NEXT: cvtss2sd %xmm0, %xmm0
-; CHECK-LIBCALL-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-LIBCALL-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; CHECK-LIBCALL-NEXT: movss {{[0-9]+}}(%rsp), %xmm1 # 4-byte Reload
; CHECK-LIBCALL-NEXT: # xmm1 = mem[0],zero,zero,zero
; CHECK-LIBCALL-NEXT: cvtss2sd %xmm1, %xmm2
; CHECK-LIBCALL-NEXT: movss {{[0-9]+}}(%rsp), %xmm1 # 4-byte Reload
; CHECK-LIBCALL-NEXT: # xmm1 = mem[0],zero,zero,zero
; CHECK-LIBCALL-NEXT: cvtss2sd %xmm1, %xmm1
-; CHECK-LIBCALL-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; CHECK-LIBCALL-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; CHECK-LIBCALL-NEXT: addq $16, %rsp
; CHECK-LIBCALL-NEXT: popq %rbx
; CHECK-LIBCALL-NEXT: retq
; BWON-F16C-NEXT: vcvtph2ps %xmm3, %xmm3
; BWON-F16C-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3
; BWON-F16C-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2
-; BWON-F16C-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; BWON-F16C-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
; BWON-F16C-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
; BWON-F16C-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
-; BWON-F16C-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; BWON-F16C-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; BWON-F16C-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; BWON-F16C-NEXT: retq
;
; X32: ## BB#0:
; X32-NEXT: vhaddps %xmm1, %xmm0, %xmm0
; X32-NEXT: vhaddps %xmm3, %xmm2, %xmm1
-; X32-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; X32-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; X32-NEXT: retl
;
; X64-LABEL: test_unpackl_fhadd_128:
; X64: ## BB#0:
; X64-NEXT: vhaddps %xmm1, %xmm0, %xmm0
; X64-NEXT: vhaddps %xmm3, %xmm2, %xmm1
-; X64-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; X64-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; X64-NEXT: retq
%1 = call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %a0, <4 x float> %a1)
%2 = call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %a2, <4 x float> %a3)
; X64-SSE-NEXT: movq %xmm1, %rax
; X64-SSE-NEXT: xorps %xmm1, %xmm1
; X64-SSE-NEXT: cvtsi2sdq %rax, %xmm1
-; X64-SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; X64-SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; X64-SSE-NEXT: retq
;
; X64-AVX-LABEL: clamp_sitofp_2i64_2f64:
; X64-AVX-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm1
; X64-AVX-NEXT: vmovq %xmm0, %rax
; X64-AVX-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm0
-; X64-AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; X64-AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; X64-AVX-NEXT: retq
%clo = icmp slt <2 x i64> %a, <i64 -255, i64 -255>
%lo = select <2 x i1> %clo, <2 x i64> <i64 -255, i64 -255>, <2 x i64> %a
; AVX: ## BB#0:
; AVX-NEXT: vextractf128 $1, %ymm1, %xmm2
; AVX-NEXT: vmovsd {{.*#+}} xmm3 = mem[0],zero
-; AVX-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; AVX-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
; AVX-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
; AVX-NEXT: retq
;
; SSE2: # BB#0:
; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE2-NEXT: retq
;
; SSE41-LABEL: merge_4f32_f32_012u:
; SSE2: # BB#0:
; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE2-NEXT: retq
;
; SSE41-LABEL: merge_4f32_f32_019u:
; SSE: # BB#0:
; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
-; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE-NEXT: retq
;
; AVX-LABEL: merge_2i64_i64_12_volatile:
; AVX: # BB#0:
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
-; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; AVX-NEXT: retq
;
; X32-SSE1-LABEL: merge_2i64_i64_12_volatile:
; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE2-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
-; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE2-NEXT: retq
;
; SSE41-LABEL: merge_4f32_f32_2345_volatile:
define <4 x float> @test_movlhps(<4 x float> %a0, <4 x float> %a1) {
; GENERIC-LABEL: test_movlhps:
; GENERIC: # BB#0:
-; GENERIC-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
+; GENERIC-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; ATOM-LABEL: test_movlhps:
; ATOM: # BB#0:
-; ATOM-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
+; ATOM-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
; ATOM-NEXT: addps %xmm1, %xmm0 # sched: [5:5.00]
; ATOM-NEXT: retq # sched: [79:39.50]
;
; SLM-LABEL: test_movlhps:
; SLM: # BB#0:
-; SLM-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
+; SLM-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
; SANDY-LABEL: test_movlhps:
; SANDY: # BB#0:
-; SANDY-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
+; SANDY-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_movlhps:
; HASWELL: # BB#0:
-; HASWELL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
+; HASWELL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
; HASWELL-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; SKYLAKE-LABEL: test_movlhps:
; SKYLAKE: # BB#0:
-; SKYLAKE-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
+; SKYLAKE-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
; SKYLAKE-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_movlhps:
; BTVER2: # BB#0:
-; BTVER2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50]
+; BTVER2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50]
; BTVER2-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_movlhps:
; ZNVER1: # BB#0:
-; ZNVER1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50]
+; ZNVER1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50]
; ZNVER1-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
; X32-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; X32-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; X32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; X32-NEXT: retl
;
; X64-LABEL: test_mm_set_epi32:
; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X32-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; X32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; X32-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; X32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; X32-NEXT: retl
;
; X64-LABEL: test_mm_set_epi64x:
; X32: # BB#0:
; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; X32-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
-; X32-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; X32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; X32-NEXT: retl
;
; X64-LABEL: test_mm_set_pd:
; X64: # BB#0:
-; X64-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; X64-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
; X64-NEXT: movaps %xmm1, %xmm0
; X64-NEXT: retq
%res0 = insertelement <2 x double> undef, double %a1, i32 0
; X32-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; X32-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; X32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; X32-NEXT: retl
;
; X64-LABEL: test_mm_setr_epi32:
; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X32-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; X32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; X32-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; X32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; X32-NEXT: retl
;
; X64-LABEL: test_mm_setr_epi64x:
; X32: # BB#0:
; X32-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; X32-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; X32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; X32-NEXT: retl
;
; X64-LABEL: test_mm_setr_pd:
; X64: # BB#0:
-; X64-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; X64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; X64-NEXT: retq
%res0 = insertelement <2 x double> undef, double %a0, i32 0
%res1 = insertelement <2 x double> %res0, double %a1, i32 1
define <2 x i64> @test_mm_unpacklo_epi64(<2 x i64> %a0, <2 x i64> %a1) {
; X32-LABEL: test_mm_unpacklo_epi64:
; X32: # BB#0:
-; X32-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; X32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; X32-NEXT: retl
;
; X64-LABEL: test_mm_unpacklo_epi64:
; X64: # BB#0:
-; X64-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; X64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; X64-NEXT: retq
%res = shufflevector <2 x i64> %a0, <2 x i64> %a1, <2 x i32> <i32 0, i32 2>
ret <2 x i64> %res
define <2 x double> @test_mm_unpacklo_pd(<2 x double> %a0, <2 x double> %a1) {
; X32-LABEL: test_mm_unpacklo_pd:
; X32: # BB#0:
-; X32-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; X32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; X32-NEXT: retl
;
; X64-LABEL: test_mm_unpacklo_pd:
; X64: # BB#0:
-; X64-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; X64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; X64-NEXT: retq
%res = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 0, i32 2>
ret <2 x double> %res
define <2 x double> @test_movsd_reg(<2 x double> %a0, <2 x double> %a1) {
; GENERIC-LABEL: test_movsd_reg:
; GENERIC: # BB#0:
-; GENERIC-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:1.00]
+; GENERIC-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:1.00]
; GENERIC-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; ATOM-LABEL: test_movsd_reg:
; ATOM: # BB#0:
-; ATOM-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:1.00]
+; ATOM-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:1.00]
; ATOM-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50]
; ATOM-NEXT: nop # sched: [1:0.50]
; ATOM-NEXT: nop # sched: [1:0.50]
;
; SLM-LABEL: test_movsd_reg:
; SLM: # BB#0:
-; SLM-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:1.00]
+; SLM-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:1.00]
; SLM-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
; SANDY-LABEL: test_movsd_reg:
; SANDY: # BB#0:
-; SANDY-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:1.00]
+; SANDY-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_movsd_reg:
; HASWELL: # BB#0:
-; HASWELL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:1.00]
+; HASWELL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; SKYLAKE-LABEL: test_movsd_reg:
; SKYLAKE: # BB#0:
-; SKYLAKE-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:1.00]
+; SKYLAKE-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_movsd_reg:
; BTVER2: # BB#0:
-; BTVER2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:0.50]
+; BTVER2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_movsd_reg:
; ZNVER1: # BB#0:
-; ZNVER1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:0.50]
+; ZNVER1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:0.50]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 2, i32 0>
ret <2 x double> %1
; X64-LABEL: test2:
; X64: # BB#0:
; X64-NEXT: movaps (%rsi), %xmm1
-; X64-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; X64-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
; X64-NEXT: movaps %xmm1, (%rdi)
; X64-NEXT: retq
%tmp3 = load <2 x double>, <2 x double>* %A, align 16
; X64: # BB#0:
; X64-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
; X64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; X64-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; X64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; X64-NEXT: retq
%tmp = insertelement <4 x float> undef, float %a, i32 0 ; <<4 x float>> [#uses=1]
%tmp11 = insertelement <4 x float> %tmp, float %b, i32 1 ; <<4 x float>> [#uses=1]
; X64: # BB#0:
; X64-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
; X64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; X64-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; X64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; X64-NEXT: retq
%tmp = insertelement <4 x float> undef, float %a, i32 0 ; <<4 x float>> [#uses=1]
%tmp11 = insertelement <4 x float> %tmp, float %b, i32 1 ; <<4 x float>> [#uses=1]
;
; X64-LABEL: test11:
; X64: # BB#0:
-; X64-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; X64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; X64-NEXT: retq
%tmp = insertelement <2 x double> undef, double %a, i32 0 ; <<2 x double>> [#uses=1]
%tmp7 = insertelement <2 x double> %tmp, double %b, i32 1 ; <<2 x double>> [#uses=1]
; X86-NEXT: movaps %xmm2, %xmm0
; X86-NEXT: addps %xmm1, %xmm0
; X86-NEXT: subps %xmm1, %xmm2
-; X86-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; X86-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; X86-NEXT: retl
;
; X64-LABEL: test14:
; X64-NEXT: movaps %xmm2, %xmm0
; X64-NEXT: addps %xmm1, %xmm0
; X64-NEXT: subps %xmm1, %xmm2
-; X64-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; X64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; X64-NEXT: retq
%tmp = load <4 x float>, <4 x float>* %y ; <<4 x float>> [#uses=2]
%tmp5 = load <4 x float>, <4 x float>* %x ; <<4 x float>> [#uses=2]
; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1]
; SSE-NEXT: subss %xmm1, %xmm0
-; SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm0[0]
+; SSE-NEXT: movlhps {{.*#+}} xmm2 = xmm2[0],xmm0[0]
; SSE-NEXT: movaps %xmm2, %xmm0
; SSE-NEXT: retq
;
; SSE-NEXT: addss %xmm0, %xmm1
; SSE-NEXT: unpcklps {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1]
; SSE-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
-; SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; SSE-NEXT: movlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
; SSE-NEXT: movaps %xmm2, %xmm0
; SSE-NEXT: retq
;
; X32: ## BB#0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; X32-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; X32-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
; X32-NEXT: movaps %xmm1, %xmm0
; X32-NEXT: retl
;
; X64-LABEL: insertps_with_undefs:
; X64: ## BB#0:
; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; X64-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; X64-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
; X64-NEXT: movaps %xmm1, %xmm0
; X64-NEXT: retq
%1 = load float, float* %b, align 4
; SSE-NEXT: fldcw -{{[0-9]+}}(%rsp)
; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE-NEXT: xorps %xmm1, %xmm1
; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[2,3]
; SSE-NEXT: retq
; AVX-NEXT: fisttpll -{{[0-9]+}}(%rsp)
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
-; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
; AVX-NEXT: retq
%cvt = fptosi <2 x x86_fp80> %a to <2 x i32>
;
; X64-LABEL: t3:
; X64: # BB#0:
-; X64-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; X64-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
; X64-NEXT: movaps %xmm1, %xmm0
; X64-NEXT: retq
%tmp1 = insertelement <2 x double> %tmp, double %s, i32 1
; SSE-NEXT: movq %xmm0, %rax
; SSE-NEXT: xorps %xmm0, %xmm0
; SSE-NEXT: cvtsi2sdq %rax, %xmm0
-; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
; SSE-NEXT: movaps %xmm1, %xmm0
; SSE-NEXT: retq
;
; VEX-NEXT: vcvtsi2sdq %rax, %xmm1, %xmm1
; VEX-NEXT: vmovq %xmm0, %rax
; VEX-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm0
-; VEX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; VEX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; VEX-NEXT: retq
;
; AVX512F-LABEL: sitofp_2i64_to_2f64:
; AVX512F-NEXT: vcvtsi2sdq %rax, %xmm1, %xmm1
; AVX512F-NEXT: vmovq %xmm0, %rax
; AVX512F-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm0
-; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX512F-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: sitofp_2i64_to_2f64:
; AVX512VL-NEXT: vcvtsi2sdq %rax, %xmm1, %xmm1
; AVX512VL-NEXT: vmovq %xmm0, %rax
; AVX512VL-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm0
-; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; AVX512VL-NEXT: retq
;
; AVX512DQ-LABEL: sitofp_2i64_to_2f64:
; SSE-NEXT: movq %xmm0, %rax
; SSE-NEXT: xorps %xmm0, %xmm0
; SSE-NEXT: cvtsi2sdq %rax, %xmm0
-; SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm0[0]
+; SSE-NEXT: movlhps {{.*#+}} xmm2 = xmm2[0],xmm0[0]
; SSE-NEXT: movq %xmm1, %rax
; SSE-NEXT: cvtsi2sdq %rax, %xmm3
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
; SSE-NEXT: movq %xmm0, %rax
; SSE-NEXT: xorps %xmm0, %xmm0
; SSE-NEXT: cvtsi2sdq %rax, %xmm0
-; SSE-NEXT: unpcklpd {{.*#+}} xmm3 = xmm3[0],xmm0[0]
+; SSE-NEXT: movlhps {{.*#+}} xmm3 = xmm3[0],xmm0[0]
; SSE-NEXT: movaps %xmm2, %xmm0
; SSE-NEXT: movaps %xmm3, %xmm1
; SSE-NEXT: retq
; AVX1-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm2
; AVX1-NEXT: vmovq %xmm1, %rax
; AVX1-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm1
-; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; AVX1-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; AVX1-NEXT: vpextrq $1, %xmm0, %rax
; AVX1-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm2
; AVX1-NEXT: vmovq %xmm0, %rax
; AVX1-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm0
-; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; AVX1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm2
; AVX2-NEXT: vmovq %xmm1, %rax
; AVX2-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm1
-; AVX2-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; AVX2-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; AVX2-NEXT: vpextrq $1, %xmm0, %rax
; AVX2-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm2
; AVX2-NEXT: vmovq %xmm0, %rax
; AVX2-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm0
-; AVX2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; AVX2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX2-NEXT: retq
;
; AVX512F-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm2
; AVX512F-NEXT: vmovq %xmm1, %rax
; AVX512F-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm1
-; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; AVX512F-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; AVX512F-NEXT: vpextrq $1, %xmm0, %rax
; AVX512F-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm2
; AVX512F-NEXT: vmovq %xmm0, %rax
; AVX512F-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm0
-; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; AVX512F-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; AVX512F-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX512F-NEXT: retq
;
; AVX512VL-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm2
; AVX512VL-NEXT: vmovq %xmm1, %rax
; AVX512VL-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm1
-; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax
; AVX512VL-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm2
; AVX512VL-NEXT: vmovq %xmm0, %rax
; AVX512VL-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm0
-; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; AVX512VL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX512VL-NEXT: retq
;
; AVX512F-NEXT: vcvtusi2sdq %rax, %xmm1, %xmm1
; AVX512F-NEXT: vmovq %xmm0, %rax
; AVX512F-NEXT: vcvtusi2sdq %rax, %xmm2, %xmm0
-; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX512F-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: uitofp_2i64_to_2f64:
; AVX512VL-NEXT: vcvtusi2sdq %rax, %xmm1, %xmm1
; AVX512VL-NEXT: vmovq %xmm0, %rax
; AVX512VL-NEXT: vcvtusi2sdq %rax, %xmm2, %xmm0
-; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; AVX512VL-NEXT: retq
;
; AVX512DQ-LABEL: uitofp_2i64_to_2f64:
; AVX512F-NEXT: vcvtusi2sdq %rax, %xmm2, %xmm2
; AVX512F-NEXT: vmovq %xmm1, %rax
; AVX512F-NEXT: vcvtusi2sdq %rax, %xmm3, %xmm1
-; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; AVX512F-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; AVX512F-NEXT: vpextrq $1, %xmm0, %rax
; AVX512F-NEXT: vcvtusi2sdq %rax, %xmm3, %xmm2
; AVX512F-NEXT: vmovq %xmm0, %rax
; AVX512F-NEXT: vcvtusi2sdq %rax, %xmm3, %xmm0
-; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; AVX512F-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; AVX512F-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX512F-NEXT: retq
;
; AVX512VL-NEXT: vcvtusi2sdq %rax, %xmm2, %xmm2
; AVX512VL-NEXT: vmovq %xmm1, %rax
; AVX512VL-NEXT: vcvtusi2sdq %rax, %xmm3, %xmm1
-; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax
; AVX512VL-NEXT: vcvtusi2sdq %rax, %xmm3, %xmm2
; AVX512VL-NEXT: vmovq %xmm0, %rax
; AVX512VL-NEXT: vcvtusi2sdq %rax, %xmm3, %xmm0
-; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; AVX512VL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX512VL-NEXT: retq
;
; SSE-NEXT: xorps %xmm0, %xmm0
; SSE-NEXT: cvtsi2ssq %rax, %xmm0
; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; SSE-NEXT: movaps %xmm1, %xmm0
; SSE-NEXT: retq
;
; SSE-NEXT: addss %xmm0, %xmm0
; SSE-NEXT: .LBB47_12:
; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; SSE-NEXT: movaps %xmm1, %xmm0
; SSE-NEXT: retq
;
; SSE-NEXT: movq %xmm1, %rax
; SSE-NEXT: xorps %xmm1, %xmm1
; SSE-NEXT: cvtsi2sdq %rax, %xmm1
-; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE-NEXT: retq
;
; VEX-LABEL: sitofp_load_2i64_to_2f64:
; VEX-NEXT: vcvtsi2sdq %rax, %xmm1, %xmm1
; VEX-NEXT: vmovq %xmm0, %rax
; VEX-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm0
-; VEX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; VEX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; VEX-NEXT: retq
;
; AVX512F-LABEL: sitofp_load_2i64_to_2f64:
; AVX512F-NEXT: vcvtsi2sdq %rax, %xmm1, %xmm1
; AVX512F-NEXT: vmovq %xmm0, %rax
; AVX512F-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm0
-; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX512F-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: sitofp_load_2i64_to_2f64:
; AVX512VL-NEXT: vcvtsi2sdq %rax, %xmm1, %xmm1
; AVX512VL-NEXT: vmovq %xmm0, %rax
; AVX512VL-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm0
-; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; AVX512VL-NEXT: retq
;
; AVX512DQ-LABEL: sitofp_load_2i64_to_2f64:
; SSE-NEXT: movq %xmm1, %rax
; SSE-NEXT: xorps %xmm1, %xmm1
; SSE-NEXT: cvtsi2sdq %rax, %xmm1
-; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE-NEXT: movq %xmm2, %rax
; SSE-NEXT: xorps %xmm1, %xmm1
; SSE-NEXT: cvtsi2sdq %rax, %xmm1
; SSE-NEXT: movq %xmm2, %rax
; SSE-NEXT: xorps %xmm2, %xmm2
; SSE-NEXT: cvtsi2sdq %rax, %xmm2
-; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; SSE-NEXT: retq
;
; AVX1-LABEL: sitofp_load_4i64_to_4f64:
; AVX1-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm2
; AVX1-NEXT: vmovq %xmm1, %rax
; AVX1-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm1
-; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; AVX1-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; AVX1-NEXT: vpextrq $1, %xmm0, %rax
; AVX1-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm2
; AVX1-NEXT: vmovq %xmm0, %rax
; AVX1-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm0
-; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; AVX1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm2
; AVX2-NEXT: vmovq %xmm1, %rax
; AVX2-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm1
-; AVX2-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; AVX2-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; AVX2-NEXT: vpextrq $1, %xmm0, %rax
; AVX2-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm2
; AVX2-NEXT: vmovq %xmm0, %rax
; AVX2-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm0
-; AVX2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; AVX2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX2-NEXT: retq
;
; AVX512F-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm2
; AVX512F-NEXT: vmovq %xmm1, %rax
; AVX512F-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm1
-; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; AVX512F-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; AVX512F-NEXT: vpextrq $1, %xmm0, %rax
; AVX512F-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm2
; AVX512F-NEXT: vmovq %xmm0, %rax
; AVX512F-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm0
-; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; AVX512F-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; AVX512F-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX512F-NEXT: retq
;
; AVX512VL-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm2
; AVX512VL-NEXT: vmovq %xmm1, %rax
; AVX512VL-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm1
-; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax
; AVX512VL-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm2
; AVX512VL-NEXT: vmovq %xmm0, %rax
; AVX512VL-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm0
-; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; AVX512VL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX512VL-NEXT: retq
;
; AVX512F-NEXT: vcvtusi2sdq %rax, %xmm1, %xmm1
; AVX512F-NEXT: vmovq %xmm0, %rax
; AVX512F-NEXT: vcvtusi2sdq %rax, %xmm2, %xmm0
-; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX512F-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: uitofp_load_2i64_to_2f64:
; AVX512VL-NEXT: vcvtusi2sdq %rax, %xmm1, %xmm1
; AVX512VL-NEXT: vmovq %xmm0, %rax
; AVX512VL-NEXT: vcvtusi2sdq %rax, %xmm2, %xmm0
-; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; AVX512VL-NEXT: retq
;
; AVX512DQ-LABEL: uitofp_load_2i64_to_2f64:
; AVX512F-NEXT: vcvtusi2sdq %rax, %xmm2, %xmm2
; AVX512F-NEXT: vmovq %xmm1, %rax
; AVX512F-NEXT: vcvtusi2sdq %rax, %xmm3, %xmm1
-; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; AVX512F-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; AVX512F-NEXT: vpextrq $1, %xmm0, %rax
; AVX512F-NEXT: vcvtusi2sdq %rax, %xmm3, %xmm2
; AVX512F-NEXT: vmovq %xmm0, %rax
; AVX512F-NEXT: vcvtusi2sdq %rax, %xmm3, %xmm0
-; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; AVX512F-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; AVX512F-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX512F-NEXT: retq
;
; AVX512VL-NEXT: vcvtusi2sdq %rax, %xmm2, %xmm2
; AVX512VL-NEXT: vmovq %xmm1, %rax
; AVX512VL-NEXT: vcvtusi2sdq %rax, %xmm3, %xmm1
-; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax
; AVX512VL-NEXT: vcvtusi2sdq %rax, %xmm3, %xmm2
; AVX512VL-NEXT: vmovq %xmm0, %rax
; AVX512VL-NEXT: vcvtusi2sdq %rax, %xmm3, %xmm0
-; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; AVX512VL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX512VL-NEXT: retq
;
; SSE-NEXT: xorps %xmm1, %xmm1
; SSE-NEXT: cvtsi2ssq %rax, %xmm1
; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; SSE-NEXT: retq
;
; AVX1-LABEL: sitofp_load_4i64_to_4f32:
; SSE-NEXT: xorps %xmm1, %xmm1
; SSE-NEXT: cvtsi2ssq %rax, %xmm1
; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm4[0]
+; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm4[0]
; SSE-NEXT: movq %xmm3, %rax
; SSE-NEXT: xorps %xmm4, %xmm4
; SSE-NEXT: cvtsi2ssq %rax, %xmm4
; SSE-NEXT: xorps %xmm2, %xmm2
; SSE-NEXT: cvtsi2ssq %rax, %xmm2
; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
-; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm4[0]
+; SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm4[0]
; SSE-NEXT: retq
;
; AVX1-LABEL: sitofp_load_8i64_to_8f32:
; SSE-NEXT: addss %xmm2, %xmm2
; SSE-NEXT: .LBB76_12:
; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE-NEXT: retq
;
; AVX1-LABEL: uitofp_load_4i64_to_4f32:
; SSE-NEXT: cvtsi2ssq %rax, %xmm1
; SSE-NEXT: addss %xmm1, %xmm1
; SSE-NEXT: .LBB80_21:
-; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm3[0]
+; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm3[0]
; SSE-NEXT: unpcklps {{.*#+}} xmm5 = xmm5[0],xmm7[0],xmm5[1],xmm7[1]
; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
; SSE-NEXT: movq %xmm2, %rax
; SSE-NEXT: addss %xmm2, %xmm2
; SSE-NEXT: .LBB80_24:
; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
-; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm5[0]
+; SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm5[0]
; SSE-NEXT: retq
;
; AVX1-LABEL: uitofp_load_8i64_to_8f32:
; AVX1-NEXT: vcvtph2ps %xmm1, %xmm1
; AVX1-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
-; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX1-NEXT: retq
;
; AVX2-LABEL: cvt_2i16_to_2f64:
; AVX2-NEXT: vcvtph2ps %xmm1, %xmm1
; AVX2-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
-; AVX2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX2-NEXT: retq
;
; AVX512F-LABEL: cvt_2i16_to_2f64:
; AVX512F-NEXT: vcvtph2ps %ymm1, %zmm1
; AVX512F-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
; AVX512F-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
-; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX512F-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
; AVX512VL-NEXT: vcvtph2ps %xmm1, %xmm1
; AVX512VL-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
; AVX512VL-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
-; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX512VL-NEXT: retq
%1 = bitcast <2 x i16> %a0 to <2 x half>
%2 = fpext <2 x half> %1 to <2 x double>
; AVX1-NEXT: vcvtph2ps %xmm3, %xmm3
; AVX1-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3
; AVX1-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2
-; AVX1-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; AVX1-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
; AVX1-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
-; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-NEXT: vcvtph2ps %xmm3, %xmm3
; AVX2-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3
; AVX2-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2
-; AVX2-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; AVX2-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
; AVX2-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
-; AVX2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX2-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX2-NEXT: retq
;
; AVX512F-NEXT: vcvtph2ps %ymm3, %zmm3
; AVX512F-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3
; AVX512F-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2
-; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; AVX512F-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
; AVX512F-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
; AVX512F-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
-; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX512F-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX512F-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX512F-NEXT: retq
;
; AVX512VL-NEXT: vcvtph2ps %xmm3, %xmm3
; AVX512VL-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3
; AVX512VL-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2
-; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
; AVX512VL-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
; AVX512VL-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
-; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX512VL-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX512VL-NEXT: retq
%1 = bitcast <4 x i16> %a0 to <4 x half>
; AVX1-NEXT: vcvtph2ps %xmm1, %xmm1
; AVX1-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
-; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX1-NEXT: retq
;
; AVX2-LABEL: cvt_8i16_to_2f64:
; AVX2-NEXT: vcvtph2ps %xmm1, %xmm1
; AVX2-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
-; AVX2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX2-NEXT: retq
;
; AVX512F-LABEL: cvt_8i16_to_2f64:
; AVX512F-NEXT: vcvtph2ps %ymm1, %zmm1
; AVX512F-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
; AVX512F-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
-; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX512F-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
; AVX512VL-NEXT: vcvtph2ps %xmm1, %xmm1
; AVX512VL-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
; AVX512VL-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
-; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX512VL-NEXT: retq
%1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <2 x i32> <i32 0, i32 1>
%2 = bitcast <2 x i16> %1 to <2 x half>
; AVX1-NEXT: vcvtph2ps %xmm3, %xmm3
; AVX1-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3
; AVX1-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2
-; AVX1-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; AVX1-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
; AVX1-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
-; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-NEXT: vcvtph2ps %xmm3, %xmm3
; AVX2-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3
; AVX2-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2
-; AVX2-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; AVX2-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
; AVX2-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
-; AVX2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX2-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX2-NEXT: retq
;
; AVX512F-NEXT: vcvtph2ps %ymm3, %zmm3
; AVX512F-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3
; AVX512F-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2
-; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; AVX512F-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
; AVX512F-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
; AVX512F-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
-; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX512F-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX512F-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX512F-NEXT: retq
;
; AVX512VL-NEXT: vcvtph2ps %xmm3, %xmm3
; AVX512VL-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3
; AVX512VL-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2
-; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
; AVX512VL-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
; AVX512VL-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
-; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX512VL-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX512VL-NEXT: retq
%1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; AVX1-NEXT: vcvtph2ps %xmm7, %xmm7
; AVX1-NEXT: vcvtss2sd %xmm7, %xmm7, %xmm7
; AVX1-NEXT: vcvtss2sd %xmm6, %xmm6, %xmm6
-; AVX1-NEXT: vunpcklpd {{.*#+}} xmm6 = xmm6[0],xmm7[0]
+; AVX1-NEXT: vmovlhps {{.*#+}} xmm6 = xmm6[0],xmm7[0]
; AVX1-NEXT: vcvtss2sd %xmm5, %xmm5, %xmm5
; AVX1-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
-; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm5[0],xmm0[0]
+; AVX1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm5[0],xmm0[0]
; AVX1-NEXT: vinsertf128 $1, %xmm6, %ymm0, %ymm0
; AVX1-NEXT: vcvtss2sd %xmm4, %xmm4, %xmm4
; AVX1-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3
-; AVX1-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0]
+; AVX1-NEXT: vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0]
; AVX1-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2
; AVX1-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
-; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0]
+; AVX1-NEXT: vmovlhps {{.*#+}} xmm1 = xmm2[0],xmm1[0]
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
; AVX1-NEXT: retq
;
; AVX2-NEXT: vcvtph2ps %xmm7, %xmm7
; AVX2-NEXT: vcvtss2sd %xmm7, %xmm7, %xmm7
; AVX2-NEXT: vcvtss2sd %xmm6, %xmm6, %xmm6
-; AVX2-NEXT: vunpcklpd {{.*#+}} xmm6 = xmm6[0],xmm7[0]
+; AVX2-NEXT: vmovlhps {{.*#+}} xmm6 = xmm6[0],xmm7[0]
; AVX2-NEXT: vcvtss2sd %xmm5, %xmm5, %xmm5
; AVX2-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
-; AVX2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm5[0],xmm0[0]
+; AVX2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm5[0],xmm0[0]
; AVX2-NEXT: vinsertf128 $1, %xmm6, %ymm0, %ymm0
; AVX2-NEXT: vcvtss2sd %xmm4, %xmm4, %xmm4
; AVX2-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3
-; AVX2-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0]
+; AVX2-NEXT: vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0]
; AVX2-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2
; AVX2-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
-; AVX2-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0]
+; AVX2-NEXT: vmovlhps {{.*#+}} xmm1 = xmm2[0],xmm1[0]
; AVX2-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
; AVX2-NEXT: retq
;
; AVX512F-NEXT: vcvtph2ps %ymm7, %zmm7
; AVX512F-NEXT: vcvtss2sd %xmm7, %xmm7, %xmm7
; AVX512F-NEXT: vcvtss2sd %xmm6, %xmm6, %xmm6
-; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm6 = xmm6[0],xmm7[0]
+; AVX512F-NEXT: vmovlhps {{.*#+}} xmm6 = xmm6[0],xmm7[0]
; AVX512F-NEXT: vcvtss2sd %xmm5, %xmm5, %xmm5
; AVX512F-NEXT: vcvtss2sd %xmm4, %xmm4, %xmm4
-; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm4 = xmm5[0],xmm4[0]
+; AVX512F-NEXT: vmovlhps {{.*#+}} xmm4 = xmm5[0],xmm4[0]
; AVX512F-NEXT: vinsertf128 $1, %xmm6, %ymm4, %ymm4
; AVX512F-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3
; AVX512F-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2
-; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; AVX512F-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
; AVX512F-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
; AVX512F-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
-; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX512F-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX512F-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX512F-NEXT: vinsertf64x4 $1, %ymm4, %zmm0, %zmm0
; AVX512F-NEXT: retq
; AVX512VL-NEXT: vcvtph2ps %xmm7, %xmm7
; AVX512VL-NEXT: vcvtss2sd %xmm7, %xmm7, %xmm7
; AVX512VL-NEXT: vcvtss2sd %xmm6, %xmm6, %xmm6
-; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm6 = xmm6[0],xmm7[0]
+; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm6 = xmm6[0],xmm7[0]
; AVX512VL-NEXT: vcvtss2sd %xmm5, %xmm5, %xmm5
; AVX512VL-NEXT: vcvtss2sd %xmm4, %xmm4, %xmm4
-; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm4 = xmm5[0],xmm4[0]
+; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm4 = xmm5[0],xmm4[0]
; AVX512VL-NEXT: vinsertf128 $1, %xmm6, %ymm4, %ymm4
; AVX512VL-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3
; AVX512VL-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2
-; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
; AVX512VL-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
; AVX512VL-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
-; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX512VL-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX512VL-NEXT: vinsertf64x4 $1, %ymm4, %zmm0, %zmm0
; AVX512VL-NEXT: retq
; AVX1-NEXT: vcvtph2ps %xmm1, %xmm1
; AVX1-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
-; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; AVX1-NEXT: retq
;
; AVX2-LABEL: load_cvt_2i16_to_2f64:
; AVX2-NEXT: vcvtph2ps %xmm1, %xmm1
; AVX2-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
-; AVX2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; AVX2-NEXT: retq
;
; AVX512F-LABEL: load_cvt_2i16_to_2f64:
; AVX512F-NEXT: vcvtph2ps %ymm1, %zmm1
; AVX512F-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
; AVX512F-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
-; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX512F-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
; AVX512VL-NEXT: vcvtph2ps %xmm1, %xmm1
; AVX512VL-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
; AVX512VL-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
-; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; AVX512VL-NEXT: retq
%1 = load <2 x i16>, <2 x i16>* %a0
%2 = bitcast <2 x i16> %1 to <2 x half>
; AVX1-NEXT: vcvtph2ps %xmm3, %xmm3
; AVX1-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3
; AVX1-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2
-; AVX1-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; AVX1-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
; AVX1-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
-; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-NEXT: vcvtph2ps %xmm3, %xmm3
; AVX2-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3
; AVX2-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2
-; AVX2-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; AVX2-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
; AVX2-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
-; AVX2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; AVX2-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX2-NEXT: retq
;
; AVX512F-NEXT: vcvtph2ps %ymm3, %zmm3
; AVX512F-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3
; AVX512F-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2
-; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; AVX512F-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
; AVX512F-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
; AVX512F-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
-; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX512F-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; AVX512F-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX512F-NEXT: retq
;
; AVX512VL-NEXT: vcvtph2ps %xmm3, %xmm3
; AVX512VL-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3
; AVX512VL-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2
-; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
; AVX512VL-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
; AVX512VL-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
-; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; AVX512VL-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX512VL-NEXT: retq
%1 = load <4 x i16>, <4 x i16>* %a0
; AVX1-NEXT: vcvtph2ps %xmm3, %xmm3
; AVX1-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3
; AVX1-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2
-; AVX1-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; AVX1-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
; AVX1-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
-; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-NEXT: vcvtph2ps %xmm3, %xmm3
; AVX2-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3
; AVX2-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2
-; AVX2-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; AVX2-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
; AVX2-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
-; AVX2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX2-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX2-NEXT: retq
;
; AVX512F-NEXT: vcvtph2ps %ymm3, %zmm3
; AVX512F-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3
; AVX512F-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2
-; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; AVX512F-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
; AVX512F-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
; AVX512F-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
-; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX512F-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX512F-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX512F-NEXT: retq
;
; AVX512VL-NEXT: vcvtph2ps %xmm3, %xmm3
; AVX512VL-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3
; AVX512VL-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2
-; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
; AVX512VL-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
; AVX512VL-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
-; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX512VL-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX512VL-NEXT: retq
%1 = load <8 x i16>, <8 x i16>* %a0
; AVX1-NEXT: vcvtph2ps %xmm7, %xmm7
; AVX1-NEXT: vcvtss2sd %xmm7, %xmm7, %xmm7
; AVX1-NEXT: vcvtss2sd %xmm6, %xmm6, %xmm6
-; AVX1-NEXT: vunpcklpd {{.*#+}} xmm6 = xmm6[0],xmm7[0]
+; AVX1-NEXT: vmovlhps {{.*#+}} xmm6 = xmm6[0],xmm7[0]
; AVX1-NEXT: vcvtss2sd %xmm5, %xmm5, %xmm5
; AVX1-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
-; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm5[0]
+; AVX1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm5[0]
; AVX1-NEXT: vinsertf128 $1, %xmm6, %ymm0, %ymm0
; AVX1-NEXT: vcvtss2sd %xmm4, %xmm4, %xmm4
; AVX1-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3
-; AVX1-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0]
+; AVX1-NEXT: vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0]
; AVX1-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2
; AVX1-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
-; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; AVX1-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
; AVX1-NEXT: retq
;
; AVX2-NEXT: vcvtph2ps %xmm7, %xmm7
; AVX2-NEXT: vcvtss2sd %xmm7, %xmm7, %xmm7
; AVX2-NEXT: vcvtss2sd %xmm6, %xmm6, %xmm6
-; AVX2-NEXT: vunpcklpd {{.*#+}} xmm6 = xmm6[0],xmm7[0]
+; AVX2-NEXT: vmovlhps {{.*#+}} xmm6 = xmm6[0],xmm7[0]
; AVX2-NEXT: vcvtss2sd %xmm5, %xmm5, %xmm5
; AVX2-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
-; AVX2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm5[0]
+; AVX2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm5[0]
; AVX2-NEXT: vinsertf128 $1, %xmm6, %ymm0, %ymm0
; AVX2-NEXT: vcvtss2sd %xmm4, %xmm4, %xmm4
; AVX2-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3
-; AVX2-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0]
+; AVX2-NEXT: vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0]
; AVX2-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2
; AVX2-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
-; AVX2-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; AVX2-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; AVX2-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
; AVX2-NEXT: retq
;
; AVX512F-NEXT: vcvtph2ps %ymm7, %zmm7
; AVX512F-NEXT: vcvtss2sd %xmm7, %xmm7, %xmm7
; AVX512F-NEXT: vcvtss2sd %xmm6, %xmm6, %xmm6
-; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm6 = xmm6[0],xmm7[0]
+; AVX512F-NEXT: vmovlhps {{.*#+}} xmm6 = xmm6[0],xmm7[0]
; AVX512F-NEXT: vcvtss2sd %xmm5, %xmm5, %xmm5
; AVX512F-NEXT: vcvtss2sd %xmm4, %xmm4, %xmm4
-; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm4 = xmm4[0],xmm5[0]
+; AVX512F-NEXT: vmovlhps {{.*#+}} xmm4 = xmm4[0],xmm5[0]
; AVX512F-NEXT: vinsertf128 $1, %xmm6, %ymm4, %ymm4
; AVX512F-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3
; AVX512F-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2
-; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; AVX512F-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
; AVX512F-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
; AVX512F-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
-; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX512F-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; AVX512F-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX512F-NEXT: vinsertf64x4 $1, %ymm4, %zmm0, %zmm0
; AVX512F-NEXT: retq
; AVX512VL-NEXT: vcvtph2ps %xmm7, %xmm7
; AVX512VL-NEXT: vcvtss2sd %xmm7, %xmm7, %xmm7
; AVX512VL-NEXT: vcvtss2sd %xmm6, %xmm6, %xmm6
-; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm6 = xmm6[0],xmm7[0]
+; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm6 = xmm6[0],xmm7[0]
; AVX512VL-NEXT: vcvtss2sd %xmm5, %xmm5, %xmm5
; AVX512VL-NEXT: vcvtss2sd %xmm4, %xmm4, %xmm4
-; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm4 = xmm4[0],xmm5[0]
+; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm4 = xmm4[0],xmm5[0]
; AVX512VL-NEXT: vinsertf128 $1, %xmm6, %ymm4, %ymm4
; AVX512VL-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3
; AVX512VL-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2
-; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
; AVX512VL-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
; AVX512VL-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
-; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; AVX512VL-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX512VL-NEXT: vinsertf64x4 $1, %ymm4, %zmm0, %zmm0
; AVX512VL-NEXT: retq
define <2 x i64> @shuffle_v2i64_02(<2 x i64> %a, <2 x i64> %b) {
; SSE-LABEL: shuffle_v2i64_02:
; SSE: # BB#0:
-; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE-NEXT: retq
;
; AVX-LABEL: shuffle_v2i64_02:
; AVX: # BB#0:
-; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; AVX-NEXT: retq
%shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
ret <2 x i64> %shuffle
define <2 x i64> @shuffle_v2i64_02_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
; SSE-LABEL: shuffle_v2i64_02_copy:
; SSE: # BB#0:
-; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; SSE-NEXT: movaps %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: shuffle_v2i64_02_copy:
; AVX: # BB#0:
-; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm2[0]
+; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm2[0]
; AVX-NEXT: retq
%shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
ret <2 x i64> %shuffle
define <2 x i64> @shuffle_v2i64_20(<2 x i64> %a, <2 x i64> %b) {
; SSE-LABEL: shuffle_v2i64_20:
; SSE: # BB#0:
-; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
; SSE-NEXT: movaps %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: shuffle_v2i64_20:
; AVX: # BB#0:
-; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX-NEXT: retq
%shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 0>
ret <2 x i64> %shuffle
define <2 x i64> @shuffle_v2i64_20_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
; SSE-LABEL: shuffle_v2i64_20_copy:
; SSE: # BB#0:
-; SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm1[0]
+; SSE-NEXT: movlhps {{.*#+}} xmm2 = xmm2[0],xmm1[0]
; SSE-NEXT: movaps %xmm2, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: shuffle_v2i64_20_copy:
; AVX: # BB#0:
-; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm2[0],xmm1[0]
+; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm2[0],xmm1[0]
; AVX-NEXT: retq
%shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 0>
ret <2 x i64> %shuffle
; SSE-LABEL: shuffle_v2f64_z0:
; SSE: # BB#0:
; SSE-NEXT: xorps %xmm1, %xmm1
-; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
; SSE-NEXT: movaps %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX1-LABEL: shuffle_v2f64_z0:
; AVX1: # BB#0:
; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX1-NEXT: retq
;
; AVX2-LABEL: shuffle_v2f64_z0:
; AVX2: # BB#0:
; AVX2-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; AVX2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX2-NEXT: retq
;
; AVX512VL-LABEL: shuffle_v2f64_z0:
; SSE-LABEL: insert_mem_hi_v2i64:
; SSE: # BB#0:
; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
-; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE-NEXT: retq
;
; AVX-LABEL: insert_mem_hi_v2i64:
; AVX: # BB#0:
; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
-; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; AVX-NEXT: retq
%a = load i64, i64* %ptr
%v = insertelement <2 x i64> undef, i64 %a, i32 0
define <2 x double> @insert_reg_hi_v2f64(double %a, <2 x double> %b) {
; SSE-LABEL: insert_reg_hi_v2f64:
; SSE: # BB#0:
-; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
; SSE-NEXT: movaps %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: insert_reg_hi_v2f64:
; AVX: # BB#0:
-; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX-NEXT: retq
%v = insertelement <2 x double> undef, double %a, i32 0
%shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 2, i32 0>
define <4 x float> @shuffle_v4f32_0145(<4 x float> %a, <4 x float> %b) {
; SSE-LABEL: shuffle_v4f32_0145:
; SSE: # BB#0:
-; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE-NEXT: retq
;
; AVX-LABEL: shuffle_v4f32_0145:
; AVX: # BB#0:
-; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; AVX-NEXT: retq
%shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
ret <4 x float> %shuffle
define <4 x i32> @shuffle_v4i32_0145(<4 x i32> %a, <4 x i32> %b) {
; SSE-LABEL: shuffle_v4i32_0145:
; SSE: # BB#0:
-; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE-NEXT: retq
;
; AVX-LABEL: shuffle_v4i32_0145:
; AVX: # BB#0:
-; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; AVX-NEXT: retq
%shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
ret <4 x i32> %shuffle
define <4 x i32> @shuffle_v4i32_4501(<4 x i32> %a, <4 x i32> %b) {
; SSE-LABEL: shuffle_v4i32_4501:
; SSE: # BB#0:
-; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
; SSE-NEXT: movaps %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: shuffle_v4i32_4501:
; AVX: # BB#0:
-; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX-NEXT: retq
%shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
ret <4 x i32> %shuffle
; AVX512VL-LABEL: shuffle_v4f32_bitcast_4401:
; AVX512VL: # BB#0:
; AVX512VL-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,0,1,1]
-; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX512VL-NEXT: retq
%1 = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
%2 = bitcast <4 x i32> %1 to <2 x double>
; SSE-LABEL: insert_mem_hi_v4i32:
; SSE: # BB#0:
; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
-; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE-NEXT: retq
;
; AVX1OR2-LABEL: insert_mem_hi_v4i32:
; AVX1OR2: # BB#0:
; AVX1OR2-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
-; AVX1OR2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX1OR2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; AVX1OR2-NEXT: retq
;
; AVX512VL-LABEL: insert_mem_hi_v4i32:
define <4 x float> @insert_reg_hi_v4f32(double %a, <4 x float> %b) {
; SSE-LABEL: insert_reg_hi_v4f32:
; SSE: # BB#0:
-; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
; SSE-NEXT: movaps %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: insert_reg_hi_v4f32:
; AVX: # BB#0:
-; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX-NEXT: retq
%a.cast = bitcast double %a to <2 x float>
%v = shufflevector <2 x float> %a.cast, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
; AVX1-LABEL: shuffle_v4f64_0415:
; AVX1: # BB#0:
; AVX1-NEXT: vunpckhpd {{.*#+}} xmm2 = xmm0[1],xmm1[1]
-; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX1-LABEL: shuffle_v4i64_0020:
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; AVX1-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,0,1]
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
; AVX1-LABEL: shuffle_v4i64_0451:
; AVX1: # BB#0:
; AVX1-NEXT: vunpckhpd {{.*#+}} xmm2 = xmm1[1],xmm0[1]
-; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX1-LABEL: shuffle_v4i64_4015:
; AVX1: # BB#0:
; AVX1-NEXT: vunpckhpd {{.*#+}} xmm2 = xmm0[1],xmm1[1]
-; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX1-LABEL: shuffle_v4i64_0415:
; AVX1: # BB#0:
; AVX1-NEXT: vunpckhpd {{.*#+}} xmm2 = xmm0[1],xmm1[1]
-; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: retq
;
define <4 x float> @combine_test3(<4 x float> %a, <4 x float> %b) {
; SSE-LABEL: combine_test3:
; SSE: # BB#0:
-; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE-NEXT: retq
;
; AVX-LABEL: combine_test3:
; AVX: # BB#0:
-; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; AVX-NEXT: retq
%1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 1, i32 7>
%2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 1>
define <4 x i32> @combine_test8(<4 x i32> %a, <4 x i32> %b) {
; SSE-LABEL: combine_test8:
; SSE: # BB#0:
-; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE-NEXT: retq
;
; AVX-LABEL: combine_test8:
; AVX: # BB#0:
-; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; AVX-NEXT: retq
%1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 5, i32 1, i32 7>
%2 = shufflevector <4 x i32> %1, <4 x i32> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 1>
define <4 x float> @combine_test13(<4 x float> %a, <4 x float> %b) {
; SSE-LABEL: combine_test13:
; SSE: # BB#0:
-; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE-NEXT: retq
;
; AVX-LABEL: combine_test13:
; AVX: # BB#0:
-; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; AVX-NEXT: retq
%1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
%2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
define <4 x i32> @combine_test18(<4 x i32> %a, <4 x i32> %b) {
; SSE-LABEL: combine_test18:
; SSE: # BB#0:
-; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE-NEXT: retq
;
; AVX-LABEL: combine_test18:
; AVX: # BB#0:
-; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; AVX-NEXT: retq
%1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
%2 = shufflevector <4 x i32> %1, <4 x i32> %a, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
; SSE-LABEL: combine_test21:
; SSE: # BB#0:
; SSE-NEXT: movaps %xmm0, %xmm2
-; SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm1[0]
+; SSE-NEXT: movlhps {{.*#+}} xmm2 = xmm2[0],xmm1[0]
; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
; SSE-NEXT: movaps %xmm2, (%rdi)
; SSE-NEXT: retq
; AVX-LABEL: combine_test21:
; AVX: # BB#0:
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm0[0],xmm1[0]
+; AVX-NEXT: vmovlhps {{.*#+}} xmm2 = xmm0[0],xmm1[0]
; AVX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
; AVX-NEXT: vmovaps %xmm2, (%rdi)
; AVX-NEXT: vzeroupper
define <4 x float> @combine_undef_input_test2(<4 x float> %a, <4 x float> %b) {
; SSE-LABEL: combine_undef_input_test2:
; SSE: # BB#0:
-; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE-NEXT: retq
;
; AVX-LABEL: combine_undef_input_test2:
; AVX: # BB#0:
-; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; AVX-NEXT: retq
%1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 6, i32 0, i32 1, i32 7>
%2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 1, i32 2, i32 4, i32 5>
define <4 x float> @combine_undef_input_test3(<4 x float> %a, <4 x float> %b) {
; SSE-LABEL: combine_undef_input_test3:
; SSE: # BB#0:
-; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE-NEXT: retq
;
; AVX-LABEL: combine_undef_input_test3:
; AVX: # BB#0:
-; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; AVX-NEXT: retq
%1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 0, i32 5, i32 1, i32 7>
%2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 1>
define <4 x float> @combine_undef_input_test12(<4 x float> %a, <4 x float> %b) {
; SSE-LABEL: combine_undef_input_test12:
; SSE: # BB#0:
-; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE-NEXT: retq
;
; AVX-LABEL: combine_undef_input_test12:
; AVX: # BB#0:
-; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; AVX-NEXT: retq
%1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 6, i32 0, i32 1, i32 7>
%2 = shufflevector <4 x float> %b, <4 x float> %1, <4 x i32> <i32 5, i32 6, i32 0, i32 1>
define <4 x float> @combine_undef_input_test13(<4 x float> %a, <4 x float> %b) {
; SSE-LABEL: combine_undef_input_test13:
; SSE: # BB#0:
-; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE-NEXT: retq
;
; AVX-LABEL: combine_undef_input_test13:
; AVX: # BB#0:
-; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; AVX-NEXT: retq
%1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 0, i32 5, i32 1, i32 7>
%2 = shufflevector <4 x float> %b, <4 x float> %1, <4 x i32> <i32 4, i32 5, i32 0, i32 5>
; SSE-NEXT: andl $1, %esi
; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE-NEXT: retq
;
; AVX-LABEL: var_shuffle_v2i64_v2i64_xx_i64:
; AVX-NEXT: andl $1, %esi
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
-; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX-NEXT: retq
%x0 = extractelement <2 x i64> %x, i32 %i0
%x1 = extractelement <2 x i64> %x, i32 %i1
; SSE2-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: var_shuffle_v4f32_v4f32_xxxx_i32:
; SSSE3-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; SSSE3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSSE3-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; SSSE3-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSSE3-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: var_shuffle_v4f32_v4f32_xxxx_i32:
; SSE2-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: var_shuffle_v4i32_v4i32_xxxx_i32:
; SSSE3-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; SSSE3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSSE3-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; SSSE3-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSSE3-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: var_shuffle_v4i32_v4i32_xxxx_i32:
; SSE2-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: mem_shuffle_v4i32_v4i32_xxxx_i32:
; SSSE3-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; SSSE3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSSE3-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; SSSE3-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSSE3-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: mem_shuffle_v4i32_v4i32_xxxx_i32:
; SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; SSE-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
-; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE-NEXT: retq
;
; AVX-LABEL: var_shuffle_v4f32_v4f32_x0yx_i32:
; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; AVX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; AVX-NEXT: vunpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
-; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; AVX-NEXT: retq
%x0 = extractelement <4 x float> %x, i32 %i0
%x1 = extractelement <4 x float> %x, i32 %i1
; ALL-NEXT: vmovaps %ymm0, (%rsp)
; ALL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; ALL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
-; ALL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; ALL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; ALL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
; ALL-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero
-; ALL-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0]
+; ALL-NEXT: vmovlhps {{.*#+}} xmm1 = xmm2[0],xmm1[0]
; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; ALL-NEXT: movq %rbp, %rsp
; ALL-NEXT: popq %rbp
; ALL-NEXT: vmovaps %ymm0, (%rsp)
; ALL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; ALL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
-; ALL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; ALL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; ALL-NEXT: vmovaps %xmm0, %xmm0
; ALL-NEXT: movq %rbp, %rsp
; ALL-NEXT: popq %rbp
; ALL-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
; ALL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; ALL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
-; ALL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; ALL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; ALL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
; ALL-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero
-; ALL-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0]
+; ALL-NEXT: vmovlhps {{.*#+}} xmm1 = xmm2[0],xmm1[0]
; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; ALL-NEXT: retq
%x0 = extractelement <2 x i64> %x, i64 %i0
; ALL-NEXT: vmovaps %ymm0, (%rsp)
; ALL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; ALL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
-; ALL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; ALL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; ALL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
; ALL-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero
-; ALL-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0]
+; ALL-NEXT: vmovlhps {{.*#+}} xmm1 = xmm2[0],xmm1[0]
; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; ALL-NEXT: movq %rbp, %rsp
; ALL-NEXT: popq %rbp
; ALL-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
; ALL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; ALL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
-; ALL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; ALL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; ALL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
; ALL-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero
-; ALL-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0]
+; ALL-NEXT: vmovlhps {{.*#+}} xmm1 = xmm2[0],xmm1[0]
; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; ALL-NEXT: retq
%p0 = getelementptr inbounds i64, i64* %i, i32 0
; NOTE: This operation is collapsed to a single truncate, so this test no longer covers
; what it originally intended to.
-; CHECK: MOVLHPSrr
+; CHECK: PUNPCKLQDQrr
; CHECK: PSHUFHWri
; CHECK: PACKUSWBrr
; CHECK: PACKUSWBrr
define <4 x float> @select_of_shuffles_0(<2 x float> %a0, <2 x float> %b0, <2 x float> %a1, <2 x float> %b1) {
; SSE-LABEL: select_of_shuffles_0:
; SSE: # BB#0:
-; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
-; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm3[0]
+; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm3[0]
; SSE-NEXT: subps %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: select_of_shuffles_0:
; AVX: # BB#0:
-; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
-; AVX-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm3[0]
+; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; AVX-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm3[0]
; AVX-NEXT: vsubps %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
%1 = shufflevector <2 x float> %a0, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
;
; X64-LABEL: convert:
; X64: # BB#0: # %entry
-; X64-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; X64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; X64-NEXT: movaps %xmm0, (%rdi)
; X64-NEXT: retq
entry:
; X32-LABEL: vpermil2pd_21:
; X32: # BB#0:
; X32-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; X32-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; X32-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; X32-NEXT: retl
;
; X64-LABEL: vpermil2pd_21:
; X64: # BB#0:
; X64-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; X64-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; X64-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; X64-NEXT: retq
%1 = call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> %a0, <2 x double> %a1, <2 x i64> <i64 10, i64 1>, i8 2)
ret <2 x double> %1