define <16 x i32> @test_2xi32_to_16xi32_mem(<2 x i32>* %vp) {
; CHECK-LABEL: test_2xi32_to_16xi32_mem:
; CHECK: # %bb.0:
-; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; CHECK-NEXT: vbroadcastsd %xmm0, %zmm0
+; CHECK-NEXT: vbroadcastsd (%rdi), %zmm0
; CHECK-NEXT: retq
%vec = load <2 x i32>, <2 x i32>* %vp
%res = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
define <16 x i32> @test_masked_2xi32_to_16xi32_mem_mask0(<2 x i32>* %vp, <16 x i32> %default, <16 x i32> %mask) {
; CHECK-LABEL: test_masked_2xi32_to_16xi32_mem_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero
-; CHECK-NEXT: vpbroadcastq %xmm2, %zmm2
; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1
-; CHECK-NEXT: vmovdqa32 %zmm2, %zmm0 {%k1}
+; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} zmm0 {%k1} = mem[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
; CHECK-NEXT: retq
%vec = load <2 x i32>, <2 x i32>* %vp
%shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
define <16 x i32> @test_masked_z_2xi32_to_16xi32_mem_mask0(<2 x i32>* %vp, <16 x i32> %mask) {
; CHECK-LABEL: test_masked_z_2xi32_to_16xi32_mem_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
-; CHECK-NEXT: vpbroadcastq %xmm1, %zmm1
; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k1
-; CHECK-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} {z}
+; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
; CHECK-NEXT: retq
%vec = load <2 x i32>, <2 x i32>* %vp
%shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
define <16 x i32> @test_masked_2xi32_to_16xi32_mem_mask1(<2 x i32>* %vp, <16 x i32> %default, <16 x i32> %mask) {
; CHECK-LABEL: test_masked_2xi32_to_16xi32_mem_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero
-; CHECK-NEXT: vpbroadcastq %xmm2, %zmm2
; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1
-; CHECK-NEXT: vmovdqa32 %zmm2, %zmm0 {%k1}
+; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} zmm0 {%k1} = mem[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
; CHECK-NEXT: retq
%vec = load <2 x i32>, <2 x i32>* %vp
%shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
define <16 x i32> @test_masked_z_2xi32_to_16xi32_mem_mask1(<2 x i32>* %vp, <16 x i32> %mask) {
; CHECK-LABEL: test_masked_z_2xi32_to_16xi32_mem_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
-; CHECK-NEXT: vpbroadcastq %xmm1, %zmm1
; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k1
-; CHECK-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} {z}
+; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
; CHECK-NEXT: retq
%vec = load <2 x i32>, <2 x i32>* %vp
%shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
define <16 x i32> @test_masked_2xi32_to_16xi32_mem_mask2(<2 x i32>* %vp, <16 x i32> %default, <16 x i32> %mask) {
; CHECK-LABEL: test_masked_2xi32_to_16xi32_mem_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero
-; CHECK-NEXT: vpbroadcastq %xmm2, %zmm2
; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1
-; CHECK-NEXT: vmovdqa32 %zmm2, %zmm0 {%k1}
+; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} zmm0 {%k1} = mem[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
; CHECK-NEXT: retq
%vec = load <2 x i32>, <2 x i32>* %vp
%shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
define <16 x i32> @test_masked_z_2xi32_to_16xi32_mem_mask2(<2 x i32>* %vp, <16 x i32> %mask) {
; CHECK-LABEL: test_masked_z_2xi32_to_16xi32_mem_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
-; CHECK-NEXT: vpbroadcastq %xmm1, %zmm1
; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k1
-; CHECK-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} {z}
+; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
; CHECK-NEXT: retq
%vec = load <2 x i32>, <2 x i32>* %vp
%shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
define <16 x i32> @test_masked_2xi32_to_16xi32_mem_mask3(<2 x i32>* %vp, <16 x i32> %default, <16 x i32> %mask) {
; CHECK-LABEL: test_masked_2xi32_to_16xi32_mem_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero
-; CHECK-NEXT: vpbroadcastq %xmm2, %zmm2
; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1
-; CHECK-NEXT: vmovdqa32 %zmm2, %zmm0 {%k1}
+; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} zmm0 {%k1} = mem[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
; CHECK-NEXT: retq
%vec = load <2 x i32>, <2 x i32>* %vp
%shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
define <16 x i32> @test_masked_z_2xi32_to_16xi32_mem_mask3(<2 x i32>* %vp, <16 x i32> %mask) {
; CHECK-LABEL: test_masked_z_2xi32_to_16xi32_mem_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
-; CHECK-NEXT: vpbroadcastq %xmm1, %zmm1
; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k1
-; CHECK-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} {z}
+; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
; CHECK-NEXT: retq
%vec = load <2 x i32>, <2 x i32>* %vp
%shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
define <32 x i8> @combine_pshufb_as_vpbroadcastb256(<2 x i64> %a) {
; CHECK-LABEL: combine_pshufb_as_vpbroadcastb256:
; CHECK: # %bb.0:
-; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; CHECK-NEXT: vpbroadcastb %xmm0, %ymm0
; CHECK-NEXT: ret{{[l|q]}}
%1 = shufflevector <2 x i64> %a, <2 x i64> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
define <32 x i8> @combine_pshufb_as_vpbroadcastw256(<2 x i64> %a) {
; CHECK-LABEL: combine_pshufb_as_vpbroadcastw256:
; CHECK: # %bb.0:
-; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; CHECK-NEXT: vpbroadcastw %xmm0, %ymm0
; CHECK-NEXT: ret{{[l|q]}}
%1 = shufflevector <2 x i64> %a, <2 x i64> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
define <8 x i32> @combine_permd_as_vpbroadcastd256(<4 x i32> %a) {
; X86-LABEL: combine_permd_as_vpbroadcastd256:
; X86: # %bb.0:
-; X86-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; X86-NEXT: vpbroadcastd %xmm0, %ymm0
; X86-NEXT: vpaddd {{\.LCPI.*}}, %ymm0, %ymm0
; X86-NEXT: retl
;
; X64-LABEL: combine_permd_as_vpbroadcastd256:
; X64: # %bb.0:
-; X64-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; X64-NEXT: vpbroadcastd %xmm0, %ymm0
; X64-NEXT: vpaddd {{.*}}(%rip), %ymm0, %ymm0
; X64-NEXT: retq
define <8 x i32> @combine_permd_as_vpbroadcastq256(<4 x i32> %a) {
; X86-LABEL: combine_permd_as_vpbroadcastq256:
; X86: # %bb.0:
-; X86-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; X86-NEXT: vpbroadcastq %xmm0, %ymm0
; X86-NEXT: vpaddd {{\.LCPI.*}}, %ymm0, %ymm0
; X86-NEXT: retl
;
; X64-LABEL: combine_permd_as_vpbroadcastq256:
; X64: # %bb.0:
-; X64-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; X64-NEXT: vpbroadcastq %xmm0, %ymm0
; X64-NEXT: vpaddd {{.*}}(%rip), %ymm0, %ymm0
; X64-NEXT: retq
define <8 x float> @combine_permps_as_vpbroadcastss256(<4 x float> %a) {
; CHECK-LABEL: combine_permps_as_vpbroadcastss256:
; CHECK: # %bb.0:
-; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; CHECK-NEXT: vbroadcastss %xmm0, %ymm0
; CHECK-NEXT: ret{{[l|q]}}
%1 = shufflevector <4 x float> %a, <4 x float> undef, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
define <4 x double> @combine_permps_as_vpbroadcastsd256(<2 x double> %a) {
; CHECK-LABEL: combine_permps_as_vpbroadcastsd256:
; CHECK: # %bb.0:
-; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0
; CHECK-NEXT: ret{{[l|q]}}
%1 = shufflevector <2 x double> %a, <2 x double> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
define <8 x float> @combine_vpbroadcast_permd_as_vpbroadcastss256(<4 x float> %a) {
; CHECK-LABEL: combine_vpbroadcast_permd_as_vpbroadcastss256:
; CHECK: # %bb.0:
-; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; CHECK-NEXT: vbroadcastss %xmm0, %ymm0
; CHECK-NEXT: ret{{[l|q]}}
%1 = shufflevector <4 x float> %a, <4 x float> undef, <8 x i32> zeroinitializer
define <4 x double> @combine_vpbroadcast_permd_as_vpbroadcastsd256(<2 x double> %a) {
; CHECK-LABEL: combine_vpbroadcast_permd_as_vpbroadcastsd256:
; CHECK: # %bb.0:
-; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0
; CHECK-NEXT: ret{{[l|q]}}
%1 = shufflevector <2 x double> %a, <2 x double> undef, <4 x i32> zeroinitializer
define <8 x i32> @combine_permd_insertion_as_broadcast_v4i64(i64 %a0) {
; X86-LABEL: combine_permd_insertion_as_broadcast_v4i64:
; X86: # %bb.0:
-; X86-NEXT: vbroadcastsd {{[0-9]+}}(%esp), %ymm0
+; X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; X86-NEXT: vbroadcastsd %xmm0, %ymm0
; X86-NEXT: retl
;
; X64-LABEL: combine_permd_insertion_as_broadcast_v4i64: