From: Chandler Carruth Date: Fri, 3 Oct 2014 11:16:45 +0000 (+0000) Subject: [x86] Add some important, missing test coverage for blending from one X-Git-Tag: android-x86-7.1-r4~56880 X-Git-Url: http://git.osdn.net/view?a=commitdiff_plain;h=ca77e5899373cb07a5aa01f19d3ad35ed14dba2c;p=android-x86%2Fexternal-llvm.git [x86] Add some important, missing test coverage for blending from one vector to a zero vector for the v2 cases and fix the v4 integer cases to actually blend from a vector. There are already seprate tests for the case of inserting from a scalar. These cases cover a lot of the regressions I've seen in the regression test suite for the new vector shuffle lowering and specifically cover the reported lack of using various zext-ing instruction patterns. My next patch should fix a big chunk of this, but wanted to get a nice baseline for these patterns in the test cases first. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@218976 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/test/CodeGen/X86/vector-shuffle-128-v2.ll b/test/CodeGen/X86/vector-shuffle-128-v2.ll index c0bdc06138a..1fc1b287ef7 100644 --- a/test/CodeGen/X86/vector-shuffle-128-v2.ll +++ b/test/CodeGen/X86/vector-shuffle-128-v2.ll @@ -651,6 +651,227 @@ define <2 x i64> @shuffle_v2i64_31_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64 ret <2 x i64> %shuffle } +define <2 x i64> @shuffle_v2i64_0z(<2 x i64> %a) { +; SSE2-LABEL: shuffle_v2i64_0z: +; SSE2: # BB#0: +; SSE2-NEXT: xorpd %xmm1, %xmm1 +; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; SSE2-NEXT: retq +; +; SSE3-LABEL: shuffle_v2i64_0z: +; SSE3: # BB#0: +; SSE3-NEXT: xorpd %xmm1, %xmm1 +; SSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; SSE3-NEXT: retq +; +; SSSE3-LABEL: shuffle_v2i64_0z: +; SSSE3: # BB#0: +; SSSE3-NEXT: xorpd %xmm1, %xmm1 +; SSSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; SSSE3-NEXT: retq +; +; SSE41-LABEL: shuffle_v2i64_0z: +; SSE41: # BB#0: +; SSE41-NEXT: pxor %xmm1, %xmm1 +; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] +; SSE41-NEXT: retq +; +; AVX1-LABEL: shuffle_v2i64_0z: +; AVX1: # BB#0: +; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] +; AVX1-NEXT: retq +; +; AVX2-LABEL: shuffle_v2i64_0z: +; AVX2: # BB#0: +; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] +; AVX2-NEXT: retq + %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> + ret <2 x i64> %shuffle +} + +define <2 x i64> @shuffle_v2i64_1z(<2 x i64> %a) { +; SSE-LABEL: shuffle_v2i64_1z: +; SSE: # BB#0: +; SSE-NEXT: pxor %xmm1, %xmm1 +; SSE-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] +; SSE-NEXT: retq +; +; AVX-LABEL: shuffle_v2i64_1z: +; AVX: # BB#0: +; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] +; AVX-NEXT: retq + %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> + ret <2 x i64> %shuffle +} + +define <2 x i64> @shuffle_v2i64_z0(<2 x i64> %a) { +; SSE-LABEL: shuffle_v2i64_z0: +; SSE: # BB#0: +; SSE-NEXT: pxor %xmm1, %xmm1 +; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] +; SSE-NEXT: movdqa %xmm1, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: shuffle_v2i64_z0: +; AVX: # BB#0: +; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; AVX-NEXT: retq + %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> + ret <2 x i64> %shuffle +} + +define <2 x i64> @shuffle_v2i64_z1(<2 x i64> %a) { +; SSE2-LABEL: shuffle_v2i64_z1: +; SSE2: # BB#0: +; SSE2-NEXT: xorpd %xmm1, %xmm1 +; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm0[1] +; SSE2-NEXT: movapd %xmm1, %xmm0 +; SSE2-NEXT: retq +; +; SSE3-LABEL: shuffle_v2i64_z1: +; SSE3: # BB#0: +; SSE3-NEXT: xorpd %xmm1, %xmm1 +; SSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm0[1] +; SSE3-NEXT: movapd %xmm1, %xmm0 +; SSE3-NEXT: retq +; +; SSSE3-LABEL: shuffle_v2i64_z1: +; SSSE3: # BB#0: +; SSSE3-NEXT: xorpd %xmm1, %xmm1 +; SSSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm0[1] +; SSSE3-NEXT: movapd %xmm1, %xmm0 +; SSSE3-NEXT: retq +; +; SSE41-LABEL: shuffle_v2i64_z1: +; SSE41: # BB#0: +; SSE41-NEXT: pxor %xmm1, %xmm1 +; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm0[4,5,6,7] +; SSE41-NEXT: movdqa %xmm1, %xmm0 +; SSE41-NEXT: retq +; +; AVX1-LABEL: shuffle_v2i64_z1: +; AVX1: # BB#0: +; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] +; AVX1-NEXT: retq +; +; AVX2-LABEL: shuffle_v2i64_z1: +; AVX2: # BB#0: +; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] +; AVX2-NEXT: retq + %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> + ret <2 x i64> %shuffle +} + +define <2 x double> @shuffle_v2f64_0z(<2 x double> %a) { +; SSE2-LABEL: shuffle_v2f64_0z: +; SSE2: # BB#0: +; SSE2-NEXT: xorpd %xmm1, %xmm1 +; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; SSE2-NEXT: retq +; +; SSE3-LABEL: shuffle_v2f64_0z: +; SSE3: # BB#0: +; SSE3-NEXT: xorpd %xmm1, %xmm1 +; SSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; SSE3-NEXT: retq +; +; SSSE3-LABEL: shuffle_v2f64_0z: +; SSSE3: # BB#0: +; SSSE3-NEXT: xorpd %xmm1, %xmm1 +; SSSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; SSSE3-NEXT: retq +; +; SSE41-LABEL: shuffle_v2f64_0z: +; SSE41: # BB#0: +; SSE41-NEXT: xorpd %xmm1, %xmm1 +; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; SSE41-NEXT: retq +; +; AVX-LABEL: shuffle_v2f64_0z: +; AVX: # BB#0: +; AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; AVX-NEXT: retq + %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> + ret <2 x double> %shuffle +} + +define <2 x double> @shuffle_v2f64_1z(<2 x double> %a) { +; SSE-LABEL: shuffle_v2f64_1z: +; SSE: # BB#0: +; SSE-NEXT: xorpd %xmm1, %xmm1 +; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] +; SSE-NEXT: retq +; +; AVX-LABEL: shuffle_v2f64_1z: +; AVX: # BB#0: +; AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] +; AVX-NEXT: retq + %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> + ret <2 x double> %shuffle +} + +define <2 x double> @shuffle_v2f64_z0(<2 x double> %a) { +; SSE-LABEL: shuffle_v2f64_z0: +; SSE: # BB#0: +; SSE-NEXT: xorpd %xmm1, %xmm1 +; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] +; SSE-NEXT: movapd %xmm1, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: shuffle_v2f64_z0: +; AVX: # BB#0: +; AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; AVX-NEXT: retq + %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> + ret <2 x double> %shuffle +} + +define <2 x double> @shuffle_v2f64_z1(<2 x double> %a) { +; SSE2-LABEL: shuffle_v2f64_z1: +; SSE2: # BB#0: +; SSE2-NEXT: xorpd %xmm1, %xmm1 +; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm0[1] +; SSE2-NEXT: movapd %xmm1, %xmm0 +; SSE2-NEXT: retq +; +; SSE3-LABEL: shuffle_v2f64_z1: +; SSE3: # BB#0: +; SSE3-NEXT: xorpd %xmm1, %xmm1 +; SSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm0[1] +; SSE3-NEXT: movapd %xmm1, %xmm0 +; SSE3-NEXT: retq +; +; SSSE3-LABEL: shuffle_v2f64_z1: +; SSSE3: # BB#0: +; SSSE3-NEXT: xorpd %xmm1, %xmm1 +; SSSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm0[1] +; SSSE3-NEXT: movapd %xmm1, %xmm0 +; SSSE3-NEXT: retq +; +; SSE41-LABEL: shuffle_v2f64_z1: +; SSE41: # BB#0: +; SSE41-NEXT: xorpd %xmm1, %xmm1 +; SSE41-NEXT: blendpd {{.*#+}} xmm1 = xmm1[0],xmm0[1] +; SSE41-NEXT: movapd %xmm1, %xmm0 +; SSE41-NEXT: retq +; +; AVX-LABEL: shuffle_v2f64_z1: +; AVX: # BB#0: +; AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] +; AVX-NEXT: retq + %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> + ret <2 x double> %shuffle +} define <2 x i64> @insert_reg_and_zero_v2i64(i64 %a) { ; SSE-LABEL: insert_reg_and_zero_v2i64: diff --git a/test/CodeGen/X86/vector-shuffle-128-v4.ll b/test/CodeGen/X86/vector-shuffle-128-v4.ll index a359ce7b1b8..3e278d23f8a 100644 --- a/test/CodeGen/X86/vector-shuffle-128-v4.ll +++ b/test/CodeGen/X86/vector-shuffle-128-v4.ll @@ -659,85 +659,189 @@ define <4 x float> @shuffle_v4f32_z6zz(<4 x float> %a) { ret <4 x float> %shuffle } -define <4 x i32> @shuffle_v4i32_4zzz(i32 %i) { -; SSE-LABEL: shuffle_v4i32_4zzz: -; SSE: # BB#0: -; SSE-NEXT: movd %edi, %xmm0 -; SSE-NEXT: retq +define <4 x i32> @shuffle_v4i32_4zzz(<4 x i32> %a) { +; SSE2-LABEL: shuffle_v4i32_4zzz: +; SSE2: # BB#0: +; SSE2-NEXT: xorps %xmm1, %xmm1 +; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[1,0] +; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[2,3] +; SSE2-NEXT: retq ; -; AVX-LABEL: shuffle_v4i32_4zzz: -; AVX: # BB#0: -; AVX-NEXT: vmovd %edi, %xmm0 -; AVX-NEXT: retq - %a = insertelement <4 x i32> undef, i32 %i, i32 0 +; SSE3-LABEL: shuffle_v4i32_4zzz: +; SSE3: # BB#0: +; SSE3-NEXT: xorps %xmm1, %xmm1 +; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[1,0] +; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[2,3] +; SSE3-NEXT: retq +; +; SSSE3-LABEL: shuffle_v4i32_4zzz: +; SSSE3: # BB#0: +; SSSE3-NEXT: xorps %xmm1, %xmm1 +; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[1,0] +; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[2,3] +; SSSE3-NEXT: retq +; +; SSE41-LABEL: shuffle_v4i32_4zzz: +; SSE41: # BB#0: +; SSE41-NEXT: pxor %xmm1, %xmm1 +; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3,4,5,6,7] +; SSE41-NEXT: movdqa %xmm1, %xmm0 +; SSE41-NEXT: retq +; +; AVX1-LABEL: shuffle_v4i32_4zzz: +; AVX1: # BB#0: +; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7] +; AVX1-NEXT: retq +; +; AVX2-LABEL: shuffle_v4i32_4zzz: +; AVX2: # BB#0: +; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] +; AVX2-NEXT: retq %shuffle = shufflevector <4 x i32> zeroinitializer, <4 x i32> %a, <4 x i32> ret <4 x i32> %shuffle } -define <4 x i32> @shuffle_v4i32_z4zz(i32 %i) { -; SSE-LABEL: shuffle_v4i32_z4zz: -; SSE: # BB#0: -; SSE-NEXT: movd %edi, %xmm0 -; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,1,1] -; SSE-NEXT: retq +define <4 x i32> @shuffle_v4i32_z4zz(<4 x i32> %a) { +; SSE2-LABEL: shuffle_v4i32_z4zz: +; SSE2: # BB#0: +; SSE2-NEXT: xorps %xmm1, %xmm1 +; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[2,0] +; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[3,0] +; SSE2-NEXT: retq +; +; SSE3-LABEL: shuffle_v4i32_z4zz: +; SSE3: # BB#0: +; SSE3-NEXT: xorps %xmm1, %xmm1 +; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[2,0] +; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[3,0] +; SSE3-NEXT: retq +; +; SSSE3-LABEL: shuffle_v4i32_z4zz: +; SSSE3: # BB#0: +; SSSE3-NEXT: xorps %xmm1, %xmm1 +; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[2,0] +; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[3,0] +; SSSE3-NEXT: retq +; +; SSE41-LABEL: shuffle_v4i32_z4zz: +; SSE41: # BB#0: +; SSE41-NEXT: insertps {{.*#+}} xmm0 = zero,xmm0[0],zero,zero +; SSE41-NEXT: retq ; ; AVX-LABEL: shuffle_v4i32_z4zz: ; AVX: # BB#0: -; AVX-NEXT: vmovd %edi, %xmm0 -; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,1,1] +; AVX-NEXT: vinsertps {{.*#+}} xmm0 = zero,xmm0[0],zero,zero ; AVX-NEXT: retq - %a = insertelement <4 x i32> undef, i32 %i, i32 0 %shuffle = shufflevector <4 x i32> zeroinitializer, <4 x i32> %a, <4 x i32> ret <4 x i32> %shuffle } -define <4 x i32> @shuffle_v4i32_zz4z(i32 %i) { -; SSE-LABEL: shuffle_v4i32_zz4z: -; SSE: # BB#0: -; SSE-NEXT: movd %edi, %xmm0 -; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,0,1] -; SSE-NEXT: retq +define <4 x i32> @shuffle_v4i32_zz4z(<4 x i32> %a) { +; SSE2-LABEL: shuffle_v4i32_zz4z: +; SSE2: # BB#0: +; SSE2-NEXT: xorps %xmm1, %xmm1 +; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] +; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[0,2] +; SSE2-NEXT: movaps %xmm1, %xmm0 +; SSE2-NEXT: retq +; +; SSE3-LABEL: shuffle_v4i32_zz4z: +; SSE3: # BB#0: +; SSE3-NEXT: xorps %xmm1, %xmm1 +; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] +; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[0,2] +; SSE3-NEXT: movaps %xmm1, %xmm0 +; SSE3-NEXT: retq +; +; SSSE3-LABEL: shuffle_v4i32_zz4z: +; SSSE3: # BB#0: +; SSSE3-NEXT: xorps %xmm1, %xmm1 +; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] +; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[0,2] +; SSSE3-NEXT: movaps %xmm1, %xmm0 +; SSSE3-NEXT: retq +; +; SSE41-LABEL: shuffle_v4i32_zz4z: +; SSE41: # BB#0: +; SSE41-NEXT: insertps {{.*#+}} xmm0 = zero,zero,xmm0[0],zero +; SSE41-NEXT: retq ; ; AVX-LABEL: shuffle_v4i32_zz4z: ; AVX: # BB#0: -; AVX-NEXT: vmovd %edi, %xmm0 -; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,0,1] +; AVX-NEXT: vinsertps {{.*#+}} xmm0 = zero,zero,xmm0[0],zero ; AVX-NEXT: retq - %a = insertelement <4 x i32> undef, i32 %i, i32 0 %shuffle = shufflevector <4 x i32> zeroinitializer, <4 x i32> %a, <4 x i32> ret <4 x i32> %shuffle } -define <4 x i32> @shuffle_v4i32_zuu4(i32 %i) { -; SSE-LABEL: shuffle_v4i32_zuu4: -; SSE: # BB#0: -; SSE-NEXT: movd %edi, %xmm0 -; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,0] -; SSE-NEXT: retq +define <4 x i32> @shuffle_v4i32_zuu4(<4 x i32> %a) { +; SSE2-LABEL: shuffle_v4i32_zuu4: +; SSE2: # BB#0: +; SSE2-NEXT: xorps %xmm1, %xmm1 +; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0] +; SSE2-NEXT: movaps %xmm1, %xmm0 +; SSE2-NEXT: retq +; +; SSE3-LABEL: shuffle_v4i32_zuu4: +; SSE3: # BB#0: +; SSE3-NEXT: xorps %xmm1, %xmm1 +; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0] +; SSE3-NEXT: movaps %xmm1, %xmm0 +; SSE3-NEXT: retq +; +; SSSE3-LABEL: shuffle_v4i32_zuu4: +; SSSE3: # BB#0: +; SSSE3-NEXT: xorps %xmm1, %xmm1 +; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0] +; SSSE3-NEXT: movaps %xmm1, %xmm0 +; SSSE3-NEXT: retq +; +; SSE41-LABEL: shuffle_v4i32_zuu4: +; SSE41: # BB#0: +; SSE41-NEXT: insertps {{.*#+}} xmm0 = zero,zero,zero,xmm0[0] +; SSE41-NEXT: retq ; ; AVX-LABEL: shuffle_v4i32_zuu4: ; AVX: # BB#0: -; AVX-NEXT: vmovd %edi, %xmm0 -; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,0] +; AVX-NEXT: vinsertps {{.*#+}} xmm0 = zero,zero,zero,xmm0[0] ; AVX-NEXT: retq - %a = insertelement <4 x i32> undef, i32 %i, i32 0 %shuffle = shufflevector <4 x i32> zeroinitializer, <4 x i32> %a, <4 x i32> ret <4 x i32> %shuffle } -define <4 x i32> @shuffle_v4i32_z6zz(i32 %i) { -; SSE-LABEL: shuffle_v4i32_z6zz: -; SSE: # BB#0: -; SSE-NEXT: movd %edi, %xmm0 -; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,1,1] -; SSE-NEXT: retq +define <4 x i32> @shuffle_v4i32_z6zz(<4 x i32> %a) { +; SSE2-LABEL: shuffle_v4i32_z6zz: +; SSE2: # BB#0: +; SSE2-NEXT: xorps %xmm1, %xmm1 +; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[0,0] +; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3] +; SSE2-NEXT: retq +; +; SSE3-LABEL: shuffle_v4i32_z6zz: +; SSE3: # BB#0: +; SSE3-NEXT: xorps %xmm1, %xmm1 +; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[0,0] +; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3] +; SSE3-NEXT: retq +; +; SSSE3-LABEL: shuffle_v4i32_z6zz: +; SSSE3: # BB#0: +; SSSE3-NEXT: xorps %xmm1, %xmm1 +; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[0,0] +; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3] +; SSSE3-NEXT: retq +; +; SSE41-LABEL: shuffle_v4i32_z6zz: +; SSE41: # BB#0: +; SSE41-NEXT: insertps {{.*#+}} xmm0 = zero,xmm0[2],zero,zero +; SSE41-NEXT: retq ; ; AVX-LABEL: shuffle_v4i32_z6zz: ; AVX: # BB#0: -; AVX-NEXT: vmovd %edi, %xmm0 -; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,1,1] +; AVX-NEXT: vinsertps {{.*#+}} xmm0 = zero,xmm0[2],zero,zero ; AVX-NEXT: retq - %a = insertelement <4 x i32> undef, i32 %i, i32 2 %shuffle = shufflevector <4 x i32> zeroinitializer, <4 x i32> %a, <4 x i32> ret <4 x i32> %shuffle }