From: Simon Pilgrim Date: Sat, 1 Oct 2016 13:10:14 +0000 (+0000) Subject: [X86][SSE] Regenerate vselect tests and improve AVX1/AVX2 coverage X-Git-Tag: android-x86-7.1-r4~26380 X-Git-Url: http://git.osdn.net/view?a=commitdiff_plain;h=f2993f0b162e300b3a75ca97f352a78e7ed8e33a;p=android-x86%2Fexternal-llvm.git [X86][SSE] Regenerate vselect tests and improve AVX1/AVX2 coverage git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@283035 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/test/CodeGen/X86/vselect-2.ll b/test/CodeGen/X86/vselect-2.ll index 8e0f4a4ef44..3ceff7ddd6e 100644 --- a/test/CodeGen/X86/vselect-2.ll +++ b/test/CodeGen/X86/vselect-2.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE41 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2 define <4 x i32> @test1(<4 x i32> %A, <4 x i32> %B) { ; SSE2-LABEL: test1: @@ -13,6 +15,16 @@ define <4 x i32> @test1(<4 x i32> %A, <4 x i32> %B) { ; SSE41: # BB#0: ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] ; SSE41-NEXT: retq +; +; AVX1-LABEL: test1: +; AVX1: # BB#0: +; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] +; AVX1-NEXT: retq +; +; AVX2-LABEL: test1: +; AVX2: # BB#0: +; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] +; AVX2-NEXT: retq %select = select <4 x i1>, <4 x i32> %A, <4 x i32> %B ret <4 x i32> %select } @@ -27,6 +39,16 @@ define <4 x i32> @test2(<4 x i32> %A, <4 x i32> %B) { ; SSE41: # BB#0: ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] ; SSE41-NEXT: retq +; +; AVX1-LABEL: test2: +; AVX1: # BB#0: +; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] +; AVX1-NEXT: retq +; +; AVX2-LABEL: test2: +; AVX2: # BB#0: +; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] +; AVX2-NEXT: retq %select = select <4 x i1>, <4 x i32> %A, <4 x i32> %B ret <4 x i32> %select } @@ -42,6 +64,11 @@ define <4 x float> @test3(<4 x float> %A, <4 x float> %B) { ; SSE41: # BB#0: ; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] ; SSE41-NEXT: retq +; +; AVX-LABEL: test3: +; AVX: # BB#0: +; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; AVX-NEXT: retq %select = select <4 x i1>, <4 x float> %A, <4 x float> %B ret <4 x float> %select } @@ -56,6 +83,11 @@ define <4 x float> @test4(<4 x float> %A, <4 x float> %B) { ; SSE41: # BB#0: ; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] ; SSE41-NEXT: retq +; +; AVX-LABEL: test4: +; AVX: # BB#0: +; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] +; AVX-NEXT: retq %select = select <4 x i1>, <4 x float> %A, <4 x float> %B ret <4 x float> %select } diff --git a/test/CodeGen/X86/vselect-avx.ll b/test/CodeGen/X86/vselect-avx.ll index d9f783756d1..c453724b045 100644 --- a/test/CodeGen/X86/vselect-avx.ll +++ b/test/CodeGen/X86/vselect-avx.ll @@ -1,5 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-apple-macosx -mattr=+avx | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-apple-macosx -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1 +; RUN: llc < %s -mtriple=x86_64-apple-macosx -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2 + target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" ; For this test we used to optimize the @@ -14,16 +16,16 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" ; define void @test(<4 x i16>* %a, <4 x i16>* %b) { -; CHECK-LABEL: test: -; CHECK: ## BB#0: ## %body -; CHECK-NEXT: vmovdqa {{.*#+}} xmm0 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] -; CHECK-NEXT: vmovdqa {{.*#+}} xmm1 = [65533,124,125,14807] -; CHECK-NEXT: vpshufb %xmm0, %xmm1, %xmm1 -; CHECK-NEXT: vmovq %xmm1, (%rdi) -; CHECK-NEXT: vmovdqa {{.*#+}} xmm1 = [65535,0,0,65535] -; CHECK-NEXT: vpshufb %xmm0, %xmm1, %xmm0 -; CHECK-NEXT: vmovq %xmm0, (%rsi) -; CHECK-NEXT: retq +; AVX-LABEL: test: +; AVX: ## BB#0: ## %body +; AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] +; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [65533,124,125,14807] +; AVX-NEXT: vpshufb %xmm0, %xmm1, %xmm1 +; AVX-NEXT: vmovq %xmm1, (%rdi) +; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [65535,0,0,65535] +; AVX-NEXT: vpshufb %xmm0, %xmm1, %xmm0 +; AVX-NEXT: vmovq %xmm0, (%rsi) +; AVX-NEXT: retq body: %predphi = select <4 x i1> , <4 x i16> , <4 x i16> %predphi42 = select <4 x i1> , <4 x i16> , <4 x i16> zeroinitializer @@ -39,20 +41,32 @@ body: ; of the condition. define void @test2(double** %call1559, i64 %indvars.iv4198, <4 x i1> %tmp1895) { -; CHECK-LABEL: test2: -; CHECK: ## BB#0: ## %bb -; CHECK-NEXT: vpslld $31, %xmm0, %xmm0 -; CHECK-NEXT: vpsrad $31, %xmm0, %xmm0 -; CHECK-NEXT: vpmovsxdq %xmm0, %xmm1 -; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] -; CHECK-NEXT: vpmovsxdq %xmm0, %xmm0 -; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 -; CHECK-NEXT: movq (%rdi,%rsi,8), %rax -; CHECK-NEXT: vmovapd {{.*#+}} ymm1 = [5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01] -; CHECK-NEXT: vblendvpd %ymm0, {{.*}}(%rip), %ymm1, %ymm0 -; CHECK-NEXT: vmovupd %ymm0, (%rax) -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq +; AVX1-LABEL: test2: +; AVX1: ## BB#0: ## %bb +; AVX1-NEXT: vpslld $31, %xmm0, %xmm0 +; AVX1-NEXT: vpsrad $31, %xmm0, %xmm0 +; AVX1-NEXT: vpmovsxdq %xmm0, %xmm1 +; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] +; AVX1-NEXT: vpmovsxdq %xmm0, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX1-NEXT: movq (%rdi,%rsi,8), %rax +; AVX1-NEXT: vmovapd {{.*#+}} ymm1 = [5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01] +; AVX1-NEXT: vblendvpd %ymm0, {{.*}}(%rip), %ymm1, %ymm0 +; AVX1-NEXT: vmovupd %ymm0, (%rax) +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: test2: +; AVX2: ## BB#0: ## %bb +; AVX2-NEXT: vpslld $31, %xmm0, %xmm0 +; AVX2-NEXT: vpmovsxdq %xmm0, %ymm0 +; AVX2-NEXT: movq (%rdi,%rsi,8), %rax +; AVX2-NEXT: vbroadcastsd {{.*}}(%rip), %ymm1 +; AVX2-NEXT: vbroadcastsd {{.*}}(%rip), %ymm2 +; AVX2-NEXT: vblendvpd %ymm0, %ymm1, %ymm2, %ymm0 +; AVX2-NEXT: vmovupd %ymm0, (%rax) +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq bb: %arrayidx1928 = getelementptr inbounds double*, double** %call1559, i64 %indvars.iv4198 %tmp1888 = load double*, double** %arrayidx1928, align 8 @@ -72,28 +86,52 @@ bb: ; define void @test3(<4 x i32> %induction30, <4 x i16>* %tmp16, <4 x i16>* %tmp17, <4 x i16> %tmp3, <4 x i16> %tmp12) { -; CHECK-LABEL: test3: -; CHECK: ## BB#0: -; CHECK-NEXT: vmovdqa {{.*#+}} xmm3 = [1431655766,1431655766,1431655766,1431655766] -; CHECK-NEXT: vpshufd {{.*#+}} xmm4 = xmm3[1,1,3,3] -; CHECK-NEXT: vpshufd {{.*#+}} xmm5 = xmm0[1,1,3,3] -; CHECK-NEXT: vpmuldq %xmm4, %xmm5, %xmm4 -; CHECK-NEXT: vpmuldq %xmm3, %xmm0, %xmm3 -; CHECK-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] -; CHECK-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0,1],xmm4[2,3],xmm3[4,5],xmm4[6,7] -; CHECK-NEXT: vpsrld $31, %xmm3, %xmm4 -; CHECK-NEXT: vpaddd %xmm4, %xmm3, %xmm3 -; CHECK-NEXT: vpmulld {{.*}}(%rip), %xmm3, %xmm3 -; CHECK-NEXT: vpsubd %xmm3, %xmm0, %xmm0 -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; CHECK-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0 -; CHECK-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm1 -; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] -; CHECK-NEXT: vpshufb %xmm2, %xmm0, %xmm0 -; CHECK-NEXT: vmovq %xmm0, (%rdi) -; CHECK-NEXT: vpshufb %xmm2, %xmm1, %xmm0 -; CHECK-NEXT: vmovq %xmm0, (%rsi) -; CHECK-NEXT: retq +; AVX1-LABEL: test3: +; AVX1: ## BB#0: +; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [1431655766,1431655766,1431655766,1431655766] +; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm3[1,1,3,3] +; AVX1-NEXT: vpshufd {{.*#+}} xmm5 = xmm0[1,1,3,3] +; AVX1-NEXT: vpmuldq %xmm4, %xmm5, %xmm4 +; AVX1-NEXT: vpmuldq %xmm3, %xmm0, %xmm3 +; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] +; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0,1],xmm4[2,3],xmm3[4,5],xmm4[6,7] +; AVX1-NEXT: vpsrld $31, %xmm3, %xmm4 +; AVX1-NEXT: vpaddd %xmm4, %xmm3, %xmm3 +; AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm3, %xmm3 +; AVX1-NEXT: vpsubd %xmm3, %xmm0, %xmm0 +; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX1-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0 +; AVX1-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm1 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] +; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vmovq %xmm0, (%rdi) +; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm0 +; AVX1-NEXT: vmovq %xmm0, (%rsi) +; AVX1-NEXT: retq +; +; AVX2-LABEL: test3: +; AVX2: ## BB#0: +; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %xmm3 +; AVX2-NEXT: vpshufd {{.*#+}} xmm4 = xmm3[1,1,3,3] +; AVX2-NEXT: vpshufd {{.*#+}} xmm5 = xmm0[1,1,3,3] +; AVX2-NEXT: vpmuldq %xmm4, %xmm5, %xmm4 +; AVX2-NEXT: vpmuldq %xmm3, %xmm0, %xmm3 +; AVX2-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] +; AVX2-NEXT: vpblendd {{.*#+}} xmm3 = xmm3[0],xmm4[1],xmm3[2],xmm4[3] +; AVX2-NEXT: vpsrld $31, %xmm3, %xmm4 +; AVX2-NEXT: vpaddd %xmm4, %xmm3, %xmm3 +; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %xmm4 +; AVX2-NEXT: vpmulld %xmm4, %xmm3, %xmm3 +; AVX2-NEXT: vpsubd %xmm3, %xmm0, %xmm0 +; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX2-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0 +; AVX2-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm1 +; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] +; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vmovq %xmm0, (%rdi) +; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm0 +; AVX2-NEXT: vmovq %xmm0, (%rsi) +; AVX2-NEXT: retq %tmp6 = srem <4 x i32> %induction30, %tmp7 = icmp eq <4 x i32> %tmp6, zeroinitializer %predphi = select <4 x i1> %tmp7, <4 x i16> %tmp3, <4 x i16> %tmp12 @@ -108,22 +146,30 @@ define void @test3(<4 x i32> %induction30, <4 x i16>* %tmp16, <4 x i16>* %tmp17, ; vpblendvb in AVX1, only in AVX2. Instead, it should be expanded. define <32 x i8> @PR22706(<32 x i1> %x) { -; CHECK-LABEL: PR22706: -; CHECK: ## BB#0: -; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1 -; CHECK-NEXT: vpsllw $7, %xmm1, %xmm1 -; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] -; CHECK-NEXT: vpand %xmm2, %xmm1, %xmm1 -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; CHECK-NEXT: vpcmpgtb %xmm1, %xmm3, %xmm1 -; CHECK-NEXT: vpsllw $7, %xmm0, %xmm0 -; CHECK-NEXT: vpand %xmm2, %xmm0, %xmm0 -; CHECK-NEXT: vpcmpgtb %xmm0, %xmm3, %xmm0 -; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 -; CHECK-NEXT: vandnps {{.*}}(%rip), %ymm0, %ymm1 -; CHECK-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 -; CHECK-NEXT: vorps %ymm1, %ymm0, %ymm0 -; CHECK-NEXT: retq +; AVX1-LABEL: PR22706: +; AVX1: ## BB#0: +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX1-NEXT: vpsllw $7, %xmm1, %xmm1 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] +; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1 +; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX1-NEXT: vpcmpgtb %xmm1, %xmm3, %xmm1 +; AVX1-NEXT: vpsllw $7, %xmm0, %xmm0 +; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpcmpgtb %xmm0, %xmm3, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX1-NEXT: vandnps {{.*}}(%rip), %ymm0, %ymm1 +; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 +; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: PR22706: +; AVX2: ## BB#0: +; AVX2-NEXT: vpsllw $7, %ymm0, %ymm0 +; AVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 +; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2] +; AVX2-NEXT: vpblendvb %ymm0, {{.*}}(%rip), %ymm1, %ymm0 +; AVX2-NEXT: retq %tmp = select <32 x i1> %x, <32 x i8> , <32 x i8> ret <32 x i8> %tmp } diff --git a/test/CodeGen/X86/vselect.ll b/test/CodeGen/X86/vselect.ll index 359ea7eb3ee..9d08822f50f 100644 --- a/test/CodeGen/X86/vselect.ll +++ b/test/CodeGen/X86/vselect.ll @@ -1,248 +1,478 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+sse2,-sse4.1 < %s | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2 ; Verify that we don't emit packed vector shifts instructions if the ; condition used by the vector select is a vector of constants. define <4 x float> @test1(<4 x float> %a, <4 x float> %b) { -; CHECK-LABEL: test1: -; CHECK: # BB#0: -; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,3] -; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3] -; CHECK-NEXT: retq +; SSE2-LABEL: test1: +; SSE2: # BB#0: +; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,3] +; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3] +; SSE2-NEXT: retq +; +; SSE41-LABEL: test1: +; SSE41: # BB#0: +; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] +; SSE41-NEXT: retq +; +; AVX-LABEL: test1: +; AVX: # BB#0: +; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] +; AVX-NEXT: retq %1 = select <4 x i1> , <4 x float> %a, <4 x float> %b ret <4 x float> %1 } define <4 x float> @test2(<4 x float> %a, <4 x float> %b) { -; CHECK-LABEL: test2: -; CHECK: # BB#0: -; CHECK-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] -; CHECK-NEXT: movapd %xmm1, %xmm0 -; CHECK-NEXT: retq +; SSE2-LABEL: test2: +; SSE2: # BB#0: +; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] +; SSE2-NEXT: movapd %xmm1, %xmm0 +; SSE2-NEXT: retq +; +; SSE41-LABEL: test2: +; SSE41: # BB#0: +; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; SSE41-NEXT: retq +; +; AVX-LABEL: test2: +; AVX: # BB#0: +; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; AVX-NEXT: retq %1 = select <4 x i1> , <4 x float> %a, <4 x float> %b ret <4 x float> %1 } define <4 x float> @test3(<4 x float> %a, <4 x float> %b) { -; CHECK-LABEL: test3: -; CHECK: # BB#0: -; CHECK-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] -; CHECK-NEXT: retq +; SSE2-LABEL: test3: +; SSE2: # BB#0: +; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] +; SSE2-NEXT: retq +; +; SSE41-LABEL: test3: +; SSE41: # BB#0: +; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] +; SSE41-NEXT: retq +; +; AVX-LABEL: test3: +; AVX: # BB#0: +; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] +; AVX-NEXT: retq %1 = select <4 x i1> , <4 x float> %a, <4 x float> %b ret <4 x float> %1 } define <4 x float> @test4(<4 x float> %a, <4 x float> %b) { -; CHECK-LABEL: test4: -; CHECK: # BB#0: -; CHECK-NEXT: movaps %xmm1, %xmm0 -; CHECK-NEXT: retq +; SSE-LABEL: test4: +; SSE: # BB#0: +; SSE-NEXT: movaps %xmm1, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: test4: +; AVX: # BB#0: +; AVX-NEXT: vmovaps %xmm1, %xmm0 +; AVX-NEXT: retq %1 = select <4 x i1> , <4 x float> %a, <4 x float> %b ret <4 x float> %1 } define <4 x float> @test5(<4 x float> %a, <4 x float> %b) { -; CHECK-LABEL: test5: -; CHECK: # BB#0: -; CHECK-NEXT: retq +; SSE-LABEL: test5: +; SSE: # BB#0: +; SSE-NEXT: retq +; +; AVX-LABEL: test5: +; AVX: # BB#0: +; AVX-NEXT: retq %1 = select <4 x i1> , <4 x float> %a, <4 x float> %b ret <4 x float> %1 } define <8 x i16> @test6(<8 x i16> %a, <8 x i16> %b) { -; CHECK-LABEL: test6: -; CHECK: # BB#0: -; CHECK-NEXT: retq +; SSE-LABEL: test6: +; SSE: # BB#0: +; SSE-NEXT: retq +; +; AVX-LABEL: test6: +; AVX: # BB#0: +; AVX-NEXT: retq %1 = select <8 x i1> , <8 x i16> %a, <8 x i16> %a ret <8 x i16> %1 } define <8 x i16> @test7(<8 x i16> %a, <8 x i16> %b) { -; CHECK-LABEL: test7: -; CHECK: # BB#0: -; CHECK-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] -; CHECK-NEXT: movapd %xmm1, %xmm0 -; CHECK-NEXT: retq +; SSE2-LABEL: test7: +; SSE2: # BB#0: +; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] +; SSE2-NEXT: movapd %xmm1, %xmm0 +; SSE2-NEXT: retq +; +; SSE41-LABEL: test7: +; SSE41: # BB#0: +; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] +; SSE41-NEXT: retq +; +; AVX1-LABEL: test7: +; AVX1: # BB#0: +; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] +; AVX1-NEXT: retq +; +; AVX2-LABEL: test7: +; AVX2: # BB#0: +; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] +; AVX2-NEXT: retq %1 = select <8 x i1> , <8 x i16> %a, <8 x i16> %b ret <8 x i16> %1 } define <8 x i16> @test8(<8 x i16> %a, <8 x i16> %b) { -; CHECK-LABEL: test8: -; CHECK: # BB#0: -; CHECK-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] -; CHECK-NEXT: retq +; SSE2-LABEL: test8: +; SSE2: # BB#0: +; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] +; SSE2-NEXT: retq +; +; SSE41-LABEL: test8: +; SSE41: # BB#0: +; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] +; SSE41-NEXT: retq +; +; AVX1-LABEL: test8: +; AVX1: # BB#0: +; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] +; AVX1-NEXT: retq +; +; AVX2-LABEL: test8: +; AVX2: # BB#0: +; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] +; AVX2-NEXT: retq %1 = select <8 x i1> , <8 x i16> %a, <8 x i16> %b ret <8 x i16> %1 } define <8 x i16> @test9(<8 x i16> %a, <8 x i16> %b) { -; CHECK-LABEL: test9: -; CHECK: # BB#0: -; CHECK-NEXT: movaps %xmm1, %xmm0 -; CHECK-NEXT: retq +; SSE-LABEL: test9: +; SSE: # BB#0: +; SSE-NEXT: movaps %xmm1, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: test9: +; AVX: # BB#0: +; AVX-NEXT: vmovaps %xmm1, %xmm0 +; AVX-NEXT: retq %1 = select <8 x i1> , <8 x i16> %a, <8 x i16> %b ret <8 x i16> %1 } define <8 x i16> @test10(<8 x i16> %a, <8 x i16> %b) { -; CHECK-LABEL: test10: -; CHECK: # BB#0: -; CHECK-NEXT: retq +; SSE-LABEL: test10: +; SSE: # BB#0: +; SSE-NEXT: retq +; +; AVX-LABEL: test10: +; AVX: # BB#0: +; AVX-NEXT: retq %1 = select <8 x i1> , <8 x i16> %a, <8 x i16> %b ret <8 x i16> %1 } define <8 x i16> @test11(<8 x i16> %a, <8 x i16> %b) { -; CHECK-LABEL: test11: -; CHECK: # BB#0: -; CHECK-NEXT: movaps {{.*#+}} xmm2 = [0,65535,65535,0,65535,65535,65535,65535] -; CHECK-NEXT: andps %xmm2, %xmm0 -; CHECK-NEXT: andnps %xmm1, %xmm2 -; CHECK-NEXT: orps %xmm2, %xmm0 -; CHECK-NEXT: retq +; SSE2-LABEL: test11: +; SSE2: # BB#0: +; SSE2-NEXT: movaps {{.*#+}} xmm2 = [0,65535,65535,0,65535,65535,65535,65535] +; SSE2-NEXT: andps %xmm2, %xmm0 +; SSE2-NEXT: andnps %xmm1, %xmm2 +; SSE2-NEXT: orps %xmm2, %xmm0 +; SSE2-NEXT: retq +; +; SSE41-LABEL: test11: +; SSE41: # BB#0: +; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1,2],xmm1[3],xmm0[4,5,6,7] +; SSE41-NEXT: retq +; +; AVX-LABEL: test11: +; AVX: # BB#0: +; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1,2],xmm1[3],xmm0[4,5,6,7] +; AVX-NEXT: retq %1 = select <8 x i1> , <8 x i16> %a, <8 x i16> %b ret <8 x i16> %1 } define <8 x i16> @test12(<8 x i16> %a, <8 x i16> %b) { -; CHECK-LABEL: test12: -; CHECK: # BB#0: -; CHECK-NEXT: movaps %xmm1, %xmm0 -; CHECK-NEXT: retq +; SSE-LABEL: test12: +; SSE: # BB#0: +; SSE-NEXT: movaps %xmm1, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: test12: +; AVX: # BB#0: +; AVX-NEXT: vmovaps %xmm1, %xmm0 +; AVX-NEXT: retq %1 = select <8 x i1> , <8 x i16> %a, <8 x i16> %b ret <8 x i16> %1 } define <8 x i16> @test13(<8 x i16> %a, <8 x i16> %b) { -; CHECK-LABEL: test13: -; CHECK: # BB#0: -; CHECK-NEXT: movaps %xmm1, %xmm0 -; CHECK-NEXT: retq +; SSE-LABEL: test13: +; SSE: # BB#0: +; SSE-NEXT: movaps %xmm1, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: test13: +; AVX: # BB#0: +; AVX-NEXT: vmovaps %xmm1, %xmm0 +; AVX-NEXT: retq %1 = select <8 x i1> , <8 x i16> %a, <8 x i16> %b ret <8 x i16> %1 } ; Fold (vselect (build_vector AllOnes), N1, N2) -> N1 define <4 x float> @test14(<4 x float> %a, <4 x float> %b) { -; CHECK-LABEL: test14: -; CHECK: # BB#0: -; CHECK-NEXT: retq +; SSE-LABEL: test14: +; SSE: # BB#0: +; SSE-NEXT: retq +; +; AVX-LABEL: test14: +; AVX: # BB#0: +; AVX-NEXT: retq %1 = select <4 x i1> , <4 x float> %a, <4 x float> %b ret <4 x float> %1 } define <8 x i16> @test15(<8 x i16> %a, <8 x i16> %b) { -; CHECK-LABEL: test15: -; CHECK: # BB#0: -; CHECK-NEXT: retq +; SSE-LABEL: test15: +; SSE: # BB#0: +; SSE-NEXT: retq +; +; AVX-LABEL: test15: +; AVX: # BB#0: +; AVX-NEXT: retq %1 = select <8 x i1> , <8 x i16> %a, <8 x i16> %b ret <8 x i16> %1 } ; Fold (vselect (build_vector AllZeros), N1, N2) -> N2 define <4 x float> @test16(<4 x float> %a, <4 x float> %b) { -; CHECK-LABEL: test16: -; CHECK: # BB#0: -; CHECK-NEXT: movaps %xmm1, %xmm0 -; CHECK-NEXT: retq +; SSE-LABEL: test16: +; SSE: # BB#0: +; SSE-NEXT: movaps %xmm1, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: test16: +; AVX: # BB#0: +; AVX-NEXT: vmovaps %xmm1, %xmm0 +; AVX-NEXT: retq %1 = select <4 x i1> , <4 x float> %a, <4 x float> %b ret <4 x float> %1 } define <8 x i16> @test17(<8 x i16> %a, <8 x i16> %b) { -; CHECK-LABEL: test17: -; CHECK: # BB#0: -; CHECK-NEXT: movaps %xmm1, %xmm0 -; CHECK-NEXT: retq +; SSE-LABEL: test17: +; SSE: # BB#0: +; SSE-NEXT: movaps %xmm1, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: test17: +; AVX: # BB#0: +; AVX-NEXT: vmovaps %xmm1, %xmm0 +; AVX-NEXT: retq %1 = select <8 x i1> , <8 x i16> %a, <8 x i16> %b ret <8 x i16> %1 } define <4 x float> @test18(<4 x float> %a, <4 x float> %b) { -; CHECK-LABEL: test18: -; CHECK: # BB#0: -; CHECK-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] -; CHECK-NEXT: retq +; SSE2-LABEL: test18: +; SSE2: # BB#0: +; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] +; SSE2-NEXT: retq +; +; SSE41-LABEL: test18: +; SSE41: # BB#0: +; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] +; SSE41-NEXT: retq +; +; AVX-LABEL: test18: +; AVX: # BB#0: +; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] +; AVX-NEXT: retq %1 = select <4 x i1> , <4 x float> %a, <4 x float> %b ret <4 x float> %1 } define <4 x i32> @test19(<4 x i32> %a, <4 x i32> %b) { -; CHECK-LABEL: test19: -; CHECK: # BB#0: -; CHECK-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] -; CHECK-NEXT: retq +; SSE2-LABEL: test19: +; SSE2: # BB#0: +; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] +; SSE2-NEXT: retq +; +; SSE41-LABEL: test19: +; SSE41: # BB#0: +; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7] +; SSE41-NEXT: retq +; +; AVX1-LABEL: test19: +; AVX1: # BB#0: +; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7] +; AVX1-NEXT: retq +; +; AVX2-LABEL: test19: +; AVX2: # BB#0: +; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] +; AVX2-NEXT: retq %1 = select <4 x i1> , <4 x i32> %a, <4 x i32> %b ret <4 x i32> %1 } define <2 x double> @test20(<2 x double> %a, <2 x double> %b) { -; CHECK-LABEL: test20: -; CHECK: # BB#0: -; CHECK-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] -; CHECK-NEXT: retq +; SSE2-LABEL: test20: +; SSE2: # BB#0: +; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] +; SSE2-NEXT: retq +; +; SSE41-LABEL: test20: +; SSE41: # BB#0: +; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] +; SSE41-NEXT: retq +; +; AVX-LABEL: test20: +; AVX: # BB#0: +; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] +; AVX-NEXT: retq %1 = select <2 x i1> , <2 x double> %a, <2 x double> %b ret <2 x double> %1 } define <2 x i64> @test21(<2 x i64> %a, <2 x i64> %b) { -; CHECK-LABEL: test21: -; CHECK: # BB#0: -; CHECK-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] -; CHECK-NEXT: retq +; SSE2-LABEL: test21: +; SSE2: # BB#0: +; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] +; SSE2-NEXT: retq +; +; SSE41-LABEL: test21: +; SSE41: # BB#0: +; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] +; SSE41-NEXT: retq +; +; AVX1-LABEL: test21: +; AVX1: # BB#0: +; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] +; AVX1-NEXT: retq +; +; AVX2-LABEL: test21: +; AVX2: # BB#0: +; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] +; AVX2-NEXT: retq %1 = select <2 x i1> , <2 x i64> %a, <2 x i64> %b ret <2 x i64> %1 } define <4 x float> @test22(<4 x float> %a, <4 x float> %b) { -; CHECK-LABEL: test22: -; CHECK: # BB#0: -; CHECK-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] -; CHECK-NEXT: movaps %xmm1, %xmm0 -; CHECK-NEXT: retq +; SSE2-LABEL: test22: +; SSE2: # BB#0: +; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] +; SSE2-NEXT: movaps %xmm1, %xmm0 +; SSE2-NEXT: retq +; +; SSE41-LABEL: test22: +; SSE41: # BB#0: +; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] +; SSE41-NEXT: retq +; +; AVX-LABEL: test22: +; AVX: # BB#0: +; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] +; AVX-NEXT: retq %1 = select <4 x i1> , <4 x float> %a, <4 x float> %b ret <4 x float> %1 } define <4 x i32> @test23(<4 x i32> %a, <4 x i32> %b) { -; CHECK-LABEL: test23: -; CHECK: # BB#0: -; CHECK-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] -; CHECK-NEXT: movaps %xmm1, %xmm0 -; CHECK-NEXT: retq +; SSE2-LABEL: test23: +; SSE2: # BB#0: +; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] +; SSE2-NEXT: movaps %xmm1, %xmm0 +; SSE2-NEXT: retq +; +; SSE41-LABEL: test23: +; SSE41: # BB#0: +; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7] +; SSE41-NEXT: retq +; +; AVX1-LABEL: test23: +; AVX1: # BB#0: +; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7] +; AVX1-NEXT: retq +; +; AVX2-LABEL: test23: +; AVX2: # BB#0: +; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] +; AVX2-NEXT: retq %1 = select <4 x i1> , <4 x i32> %a, <4 x i32> %b ret <4 x i32> %1 } define <2 x double> @test24(<2 x double> %a, <2 x double> %b) { -; CHECK-LABEL: test24: -; CHECK: # BB#0: -; CHECK-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] -; CHECK-NEXT: movapd %xmm1, %xmm0 -; CHECK-NEXT: retq +; SSE2-LABEL: test24: +; SSE2: # BB#0: +; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] +; SSE2-NEXT: movapd %xmm1, %xmm0 +; SSE2-NEXT: retq +; +; SSE41-LABEL: test24: +; SSE41: # BB#0: +; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; SSE41-NEXT: retq +; +; AVX-LABEL: test24: +; AVX: # BB#0: +; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; AVX-NEXT: retq %1 = select <2 x i1> , <2 x double> %a, <2 x double> %b ret <2 x double> %1 } define <2 x i64> @test25(<2 x i64> %a, <2 x i64> %b) { -; CHECK-LABEL: test25: -; CHECK: # BB#0: -; CHECK-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] -; CHECK-NEXT: movapd %xmm1, %xmm0 -; CHECK-NEXT: retq +; SSE2-LABEL: test25: +; SSE2: # BB#0: +; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] +; SSE2-NEXT: movapd %xmm1, %xmm0 +; SSE2-NEXT: retq +; +; SSE41-LABEL: test25: +; SSE41: # BB#0: +; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] +; SSE41-NEXT: retq +; +; AVX1-LABEL: test25: +; AVX1: # BB#0: +; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] +; AVX1-NEXT: retq +; +; AVX2-LABEL: test25: +; AVX2: # BB#0: +; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] +; AVX2-NEXT: retq %1 = select <2 x i1> , <2 x i64> %a, <2 x i64> %b ret <2 x i64> %1 } define <4 x float> @select_of_shuffles_0(<2 x float> %a0, <2 x float> %b0, <2 x float> %a1, <2 x float> %b1) { -; CHECK-LABEL: select_of_shuffles_0: -; CHECK: # BB#0: -; CHECK-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] -; CHECK-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm3[0] -; CHECK-NEXT: subps %xmm1, %xmm0 -; CHECK-NEXT: retq +; SSE-LABEL: select_of_shuffles_0: +; SSE: # BB#0: +; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm3[0] +; SSE-NEXT: subps %xmm1, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: select_of_shuffles_0: +; AVX: # BB#0: +; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; AVX-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm3[0] +; AVX-NEXT: vsubps %xmm1, %xmm0, %xmm0 +; AVX-NEXT: retq %1 = shufflevector <2 x float> %a0, <2 x float> undef, <4 x i32> %2 = shufflevector <2 x float> %a1, <2 x float> undef, <4 x i32> %3 = select <4 x i1> , <4 x float> %2, <4 x float> %1 @@ -255,22 +485,28 @@ define <4 x float> @select_of_shuffles_0(<2 x float> %a0, <2 x float> %b0, <2 x ; PR20677 define <16 x double> @select_illegal(<16 x double> %a, <16 x double> %b) { -; CHECK-LABEL: select_illegal: -; CHECK: # BB#0: -; CHECK-NEXT: movaps {{[0-9]+}}(%rsp), %xmm4 -; CHECK-NEXT: movaps {{[0-9]+}}(%rsp), %xmm5 -; CHECK-NEXT: movaps {{[0-9]+}}(%rsp), %xmm6 -; CHECK-NEXT: movaps {{[0-9]+}}(%rsp), %xmm7 -; CHECK-NEXT: movaps %xmm7, 112(%rdi) -; CHECK-NEXT: movaps %xmm6, 96(%rdi) -; CHECK-NEXT: movaps %xmm5, 80(%rdi) -; CHECK-NEXT: movaps %xmm4, 64(%rdi) -; CHECK-NEXT: movaps %xmm3, 48(%rdi) -; CHECK-NEXT: movaps %xmm2, 32(%rdi) -; CHECK-NEXT: movaps %xmm1, 16(%rdi) -; CHECK-NEXT: movaps %xmm0, (%rdi) -; CHECK-NEXT: movq %rdi, %rax -; CHECK-NEXT: retq +; SSE-LABEL: select_illegal: +; SSE: # BB#0: +; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm4 +; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm5 +; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm6 +; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm7 +; SSE-NEXT: movaps %xmm7, 112(%rdi) +; SSE-NEXT: movaps %xmm6, 96(%rdi) +; SSE-NEXT: movaps %xmm5, 80(%rdi) +; SSE-NEXT: movaps %xmm4, 64(%rdi) +; SSE-NEXT: movaps %xmm3, 48(%rdi) +; SSE-NEXT: movaps %xmm2, 32(%rdi) +; SSE-NEXT: movaps %xmm1, 16(%rdi) +; SSE-NEXT: movaps %xmm0, (%rdi) +; SSE-NEXT: movq %rdi, %rax +; SSE-NEXT: retq +; +; AVX-LABEL: select_illegal: +; AVX: # BB#0: +; AVX-NEXT: vmovaps %ymm6, %ymm2 +; AVX-NEXT: vmovaps %ymm7, %ymm3 +; AVX-NEXT: retq %sel = select <16 x i1> , <16 x double> %a, <16 x double> %b ret <16 x double> %sel }