From b8e8b823ce1688ec5c3cb9bd792bf762903b9147 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Thu, 3 Jan 2019 22:11:14 +0000 Subject: [PATCH] [x86] add common FileCheck prefix to reduce assert duplication; NFC git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@350356 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/CodeGen/X86/haddsub-undef.ll | 276 +++++++++++++------------------------- 1 file changed, 90 insertions(+), 186 deletions(-) diff --git a/test/CodeGen/X86/haddsub-undef.ll b/test/CodeGen/X86/haddsub-undef.ll index 166528b08ef..83782b19126 100644 --- a/test/CodeGen/X86/haddsub-undef.ll +++ b/test/CodeGen/X86/haddsub-undef.ll @@ -1,10 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE,SSE-SLOW -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+ssse3,fast-hops | FileCheck %s --check-prefixes=SSE,SSE-FAST -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1,AVX1-SLOW -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx,fast-hops | FileCheck %s --check-prefixes=AVX,AVX1,AVX1-FAST -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2,AVX2-SLOW -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2,fast-hops | FileCheck %s --check-prefixes=AVX,AVX2,AVX2-FAST +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE,SSE-SLOW +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+ssse3,fast-hops | FileCheck %s --check-prefixes=SSE,SSE-FAST +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX-SLOW,AVX1,AVX1-SLOW +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx,fast-hops | FileCheck %s --check-prefixes=AVX,AVX-FAST,AVX1,AVX1-FAST +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX-SLOW,AVX2,AVX2-SLOW +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2,fast-hops | FileCheck %s --check-prefixes=AVX,AVX-FAST,AVX2,AVX2-FAST ; Verify that we correctly fold horizontal binop even in the presence of UNDEFs. @@ -463,27 +463,16 @@ define <2 x double> @add_pd_003(<2 x double> %x) { ; SSE-FAST-NEXT: haddpd %xmm0, %xmm0 ; SSE-FAST-NEXT: retq ; -; AVX1-SLOW-LABEL: add_pd_003: -; AVX1-SLOW: # %bb.0: -; AVX1-SLOW-NEXT: vmovddup {{.*#+}} xmm1 = xmm0[0,0] -; AVX1-SLOW-NEXT: vaddpd %xmm0, %xmm1, %xmm0 -; AVX1-SLOW-NEXT: retq -; -; AVX1-FAST-LABEL: add_pd_003: -; AVX1-FAST: # %bb.0: -; AVX1-FAST-NEXT: vhaddpd %xmm0, %xmm0, %xmm0 -; AVX1-FAST-NEXT: retq +; AVX-SLOW-LABEL: add_pd_003: +; AVX-SLOW: # %bb.0: +; AVX-SLOW-NEXT: vmovddup {{.*#+}} xmm1 = xmm0[0,0] +; AVX-SLOW-NEXT: vaddpd %xmm0, %xmm1, %xmm0 +; AVX-SLOW-NEXT: retq ; -; AVX2-SLOW-LABEL: add_pd_003: -; AVX2-SLOW: # %bb.0: -; AVX2-SLOW-NEXT: vmovddup {{.*#+}} xmm1 = xmm0[0,0] -; AVX2-SLOW-NEXT: vaddpd %xmm0, %xmm1, %xmm0 -; AVX2-SLOW-NEXT: retq -; -; AVX2-FAST-LABEL: add_pd_003: -; AVX2-FAST: # %bb.0: -; AVX2-FAST-NEXT: vhaddpd %xmm0, %xmm0, %xmm0 -; AVX2-FAST-NEXT: retq +; AVX-FAST-LABEL: add_pd_003: +; AVX-FAST: # %bb.0: +; AVX-FAST-NEXT: vhaddpd %xmm0, %xmm0, %xmm0 +; AVX-FAST-NEXT: retq %l = shufflevector <2 x double> %x, <2 x double> undef, <2 x i32> %add = fadd <2 x double> %l, %x ret <2 x double> %add @@ -505,27 +494,16 @@ define <2 x double> @add_pd_003_2(<2 x double> %x) { ; SSE-FAST-NEXT: haddpd %xmm0, %xmm0 ; SSE-FAST-NEXT: retq ; -; AVX1-SLOW-LABEL: add_pd_003_2: -; AVX1-SLOW: # %bb.0: -; AVX1-SLOW-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; AVX1-SLOW-NEXT: vaddpd %xmm0, %xmm1, %xmm0 -; AVX1-SLOW-NEXT: retq -; -; AVX1-FAST-LABEL: add_pd_003_2: -; AVX1-FAST: # %bb.0: -; AVX1-FAST-NEXT: vhaddpd %xmm0, %xmm0, %xmm0 -; AVX1-FAST-NEXT: retq -; -; AVX2-SLOW-LABEL: add_pd_003_2: -; AVX2-SLOW: # %bb.0: -; AVX2-SLOW-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; AVX2-SLOW-NEXT: vaddpd %xmm0, %xmm1, %xmm0 -; AVX2-SLOW-NEXT: retq +; AVX-SLOW-LABEL: add_pd_003_2: +; AVX-SLOW: # %bb.0: +; AVX-SLOW-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] +; AVX-SLOW-NEXT: vaddpd %xmm0, %xmm1, %xmm0 +; AVX-SLOW-NEXT: retq ; -; AVX2-FAST-LABEL: add_pd_003_2: -; AVX2-FAST: # %bb.0: -; AVX2-FAST-NEXT: vhaddpd %xmm0, %xmm0, %xmm0 -; AVX2-FAST-NEXT: retq +; AVX-FAST-LABEL: add_pd_003_2: +; AVX-FAST: # %bb.0: +; AVX-FAST-NEXT: vhaddpd %xmm0, %xmm0, %xmm0 +; AVX-FAST-NEXT: retq %l = shufflevector <2 x double> %x, <2 x double> undef, <2 x i32> %add = fadd <2 x double> %l, %x ret <2 x double> %add @@ -545,31 +523,18 @@ define <2 x double> @add_pd_010(<2 x double> %x) { ; SSE-FAST-NEXT: haddpd %xmm0, %xmm0 ; SSE-FAST-NEXT: retq ; -; AVX1-SLOW-LABEL: add_pd_010: -; AVX1-SLOW: # %bb.0: -; AVX1-SLOW-NEXT: vmovddup {{.*#+}} xmm1 = xmm0[0,0] -; AVX1-SLOW-NEXT: vaddpd %xmm0, %xmm1, %xmm0 -; AVX1-SLOW-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; AVX1-SLOW-NEXT: retq -; -; AVX1-FAST-LABEL: add_pd_010: -; AVX1-FAST: # %bb.0: -; AVX1-FAST-NEXT: vhaddpd %xmm0, %xmm0, %xmm0 -; AVX1-FAST-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; AVX1-FAST-NEXT: retq -; -; AVX2-SLOW-LABEL: add_pd_010: -; AVX2-SLOW: # %bb.0: -; AVX2-SLOW-NEXT: vmovddup {{.*#+}} xmm1 = xmm0[0,0] -; AVX2-SLOW-NEXT: vaddpd %xmm0, %xmm1, %xmm0 -; AVX2-SLOW-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; AVX2-SLOW-NEXT: retq -; -; AVX2-FAST-LABEL: add_pd_010: -; AVX2-FAST: # %bb.0: -; AVX2-FAST-NEXT: vhaddpd %xmm0, %xmm0, %xmm0 -; AVX2-FAST-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; AVX2-FAST-NEXT: retq +; AVX-SLOW-LABEL: add_pd_010: +; AVX-SLOW: # %bb.0: +; AVX-SLOW-NEXT: vmovddup {{.*#+}} xmm1 = xmm0[0,0] +; AVX-SLOW-NEXT: vaddpd %xmm0, %xmm1, %xmm0 +; AVX-SLOW-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] +; AVX-SLOW-NEXT: retq +; +; AVX-FAST-LABEL: add_pd_010: +; AVX-FAST: # %bb.0: +; AVX-FAST-NEXT: vhaddpd %xmm0, %xmm0, %xmm0 +; AVX-FAST-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] +; AVX-FAST-NEXT: retq %l = shufflevector <2 x double> %x, <2 x double> undef, <2 x i32> %add = fadd <2 x double> %l, %x %shuffle2 = shufflevector <2 x double> %add, <2 x double> undef, <2 x i32> @@ -590,29 +555,17 @@ define <4 x float> @add_ps_007(<4 x float> %x) { ; SSE-FAST-NEXT: haddps %xmm0, %xmm0 ; SSE-FAST-NEXT: retq ; -; AVX1-SLOW-LABEL: add_ps_007: -; AVX1-SLOW: # %bb.0: -; AVX1-SLOW-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,1,0,2] -; AVX1-SLOW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,1,3] -; AVX1-SLOW-NEXT: vaddps %xmm0, %xmm1, %xmm0 -; AVX1-SLOW-NEXT: retq -; -; AVX1-FAST-LABEL: add_ps_007: -; AVX1-FAST: # %bb.0: -; AVX1-FAST-NEXT: vhaddps %xmm0, %xmm0, %xmm0 -; AVX1-FAST-NEXT: retq -; -; AVX2-SLOW-LABEL: add_ps_007: -; AVX2-SLOW: # %bb.0: -; AVX2-SLOW-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,1,0,2] -; AVX2-SLOW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,1,3] -; AVX2-SLOW-NEXT: vaddps %xmm0, %xmm1, %xmm0 -; AVX2-SLOW-NEXT: retq -; -; AVX2-FAST-LABEL: add_ps_007: -; AVX2-FAST: # %bb.0: -; AVX2-FAST-NEXT: vhaddps %xmm0, %xmm0, %xmm0 -; AVX2-FAST-NEXT: retq +; AVX-SLOW-LABEL: add_ps_007: +; AVX-SLOW: # %bb.0: +; AVX-SLOW-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,1,0,2] +; AVX-SLOW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,1,3] +; AVX-SLOW-NEXT: vaddps %xmm0, %xmm1, %xmm0 +; AVX-SLOW-NEXT: retq +; +; AVX-FAST-LABEL: add_ps_007: +; AVX-FAST: # %bb.0: +; AVX-FAST-NEXT: vhaddps %xmm0, %xmm0, %xmm0 +; AVX-FAST-NEXT: retq %l = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> %r = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> %add = fadd <4 x float> %l, %r @@ -635,33 +588,19 @@ define <4 x float> @add_ps_030(<4 x float> %x) { ; SSE-FAST-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,2,2,3] ; SSE-FAST-NEXT: retq ; -; AVX1-SLOW-LABEL: add_ps_030: -; AVX1-SLOW: # %bb.0: -; AVX1-SLOW-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,1,0,2] -; AVX1-SLOW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,1,3] -; AVX1-SLOW-NEXT: vaddps %xmm0, %xmm1, %xmm0 -; AVX1-SLOW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,2,3] -; AVX1-SLOW-NEXT: retq -; -; AVX1-FAST-LABEL: add_ps_030: -; AVX1-FAST: # %bb.0: -; AVX1-FAST-NEXT: vhaddps %xmm0, %xmm0, %xmm0 -; AVX1-FAST-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,2,3] -; AVX1-FAST-NEXT: retq -; -; AVX2-SLOW-LABEL: add_ps_030: -; AVX2-SLOW: # %bb.0: -; AVX2-SLOW-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,1,0,2] -; AVX2-SLOW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,1,3] -; AVX2-SLOW-NEXT: vaddps %xmm0, %xmm1, %xmm0 -; AVX2-SLOW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,2,3] -; AVX2-SLOW-NEXT: retq -; -; AVX2-FAST-LABEL: add_ps_030: -; AVX2-FAST: # %bb.0: -; AVX2-FAST-NEXT: vhaddps %xmm0, %xmm0, %xmm0 -; AVX2-FAST-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,2,3] -; AVX2-FAST-NEXT: retq +; AVX-SLOW-LABEL: add_ps_030: +; AVX-SLOW: # %bb.0: +; AVX-SLOW-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,1,0,2] +; AVX-SLOW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,1,3] +; AVX-SLOW-NEXT: vaddps %xmm0, %xmm1, %xmm0 +; AVX-SLOW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,2,3] +; AVX-SLOW-NEXT: retq +; +; AVX-FAST-LABEL: add_ps_030: +; AVX-FAST: # %bb.0: +; AVX-FAST-NEXT: vhaddps %xmm0, %xmm0, %xmm0 +; AVX-FAST-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,2,3] +; AVX-FAST-NEXT: retq %l = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> %r = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> %add = fadd <4 x float> %l, %r @@ -689,10 +628,10 @@ define <4 x float> @add_ps_007_2(<4 x float> %x) { ; AVX1-SLOW-NEXT: vaddps %xmm0, %xmm1, %xmm0 ; AVX1-SLOW-NEXT: retq ; -; AVX1-FAST-LABEL: add_ps_007_2: -; AVX1-FAST: # %bb.0: -; AVX1-FAST-NEXT: vhaddps %xmm0, %xmm0, %xmm0 -; AVX1-FAST-NEXT: retq +; AVX-FAST-LABEL: add_ps_007_2: +; AVX-FAST: # %bb.0: +; AVX-FAST-NEXT: vhaddps %xmm0, %xmm0, %xmm0 +; AVX-FAST-NEXT: retq ; ; AVX2-SLOW-LABEL: add_ps_007_2: ; AVX2-SLOW: # %bb.0: @@ -700,11 +639,6 @@ define <4 x float> @add_ps_007_2(<4 x float> %x) { ; AVX2-SLOW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,1,3] ; AVX2-SLOW-NEXT: vaddps %xmm0, %xmm1, %xmm0 ; AVX2-SLOW-NEXT: retq -; -; AVX2-FAST-LABEL: add_ps_007_2: -; AVX2-FAST: # %bb.0: -; AVX2-FAST-NEXT: vhaddps %xmm0, %xmm0, %xmm0 -; AVX2-FAST-NEXT: retq %l = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> %r = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> %add = fadd <4 x float> %l, %r @@ -723,27 +657,16 @@ define <4 x float> @add_ps_008(<4 x float> %x) { ; SSE-FAST-NEXT: haddps %xmm0, %xmm0 ; SSE-FAST-NEXT: retq ; -; AVX1-SLOW-LABEL: add_ps_008: -; AVX1-SLOW: # %bb.0: -; AVX1-SLOW-NEXT: vmovsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] -; AVX1-SLOW-NEXT: vaddps %xmm0, %xmm1, %xmm0 -; AVX1-SLOW-NEXT: retq +; AVX-SLOW-LABEL: add_ps_008: +; AVX-SLOW: # %bb.0: +; AVX-SLOW-NEXT: vmovsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] +; AVX-SLOW-NEXT: vaddps %xmm0, %xmm1, %xmm0 +; AVX-SLOW-NEXT: retq ; -; AVX1-FAST-LABEL: add_ps_008: -; AVX1-FAST: # %bb.0: -; AVX1-FAST-NEXT: vhaddps %xmm0, %xmm0, %xmm0 -; AVX1-FAST-NEXT: retq -; -; AVX2-SLOW-LABEL: add_ps_008: -; AVX2-SLOW: # %bb.0: -; AVX2-SLOW-NEXT: vmovsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] -; AVX2-SLOW-NEXT: vaddps %xmm0, %xmm1, %xmm0 -; AVX2-SLOW-NEXT: retq -; -; AVX2-FAST-LABEL: add_ps_008: -; AVX2-FAST: # %bb.0: -; AVX2-FAST-NEXT: vhaddps %xmm0, %xmm0, %xmm0 -; AVX2-FAST-NEXT: retq +; AVX-FAST-LABEL: add_ps_008: +; AVX-FAST: # %bb.0: +; AVX-FAST-NEXT: vhaddps %xmm0, %xmm0, %xmm0 +; AVX-FAST-NEXT: retq %l = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> %add = fadd <4 x float> %l, %x ret <4 x float> %add @@ -764,31 +687,18 @@ define <4 x float> @add_ps_017(<4 x float> %x) { ; SSE-FAST-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] ; SSE-FAST-NEXT: retq ; -; AVX1-SLOW-LABEL: add_ps_017: -; AVX1-SLOW: # %bb.0: -; AVX1-SLOW-NEXT: vmovsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] -; AVX1-SLOW-NEXT: vaddps %xmm0, %xmm1, %xmm0 -; AVX1-SLOW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] -; AVX1-SLOW-NEXT: retq -; -; AVX1-FAST-LABEL: add_ps_017: -; AVX1-FAST: # %bb.0: -; AVX1-FAST-NEXT: vhaddps %xmm0, %xmm0, %xmm0 -; AVX1-FAST-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] -; AVX1-FAST-NEXT: retq -; -; AVX2-SLOW-LABEL: add_ps_017: -; AVX2-SLOW: # %bb.0: -; AVX2-SLOW-NEXT: vmovsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] -; AVX2-SLOW-NEXT: vaddps %xmm0, %xmm1, %xmm0 -; AVX2-SLOW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] -; AVX2-SLOW-NEXT: retq -; -; AVX2-FAST-LABEL: add_ps_017: -; AVX2-FAST: # %bb.0: -; AVX2-FAST-NEXT: vhaddps %xmm0, %xmm0, %xmm0 -; AVX2-FAST-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] -; AVX2-FAST-NEXT: retq +; AVX-SLOW-LABEL: add_ps_017: +; AVX-SLOW: # %bb.0: +; AVX-SLOW-NEXT: vmovsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] +; AVX-SLOW-NEXT: vaddps %xmm0, %xmm1, %xmm0 +; AVX-SLOW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] +; AVX-SLOW-NEXT: retq +; +; AVX-FAST-LABEL: add_ps_017: +; AVX-FAST: # %bb.0: +; AVX-FAST-NEXT: vhaddps %xmm0, %xmm0, %xmm0 +; AVX-FAST-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] +; AVX-FAST-NEXT: retq %l = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> %add = fadd <4 x float> %l, %x %shuffle2 = shufflevector <4 x float> %add, <4 x float> undef, <4 x i32> @@ -818,11 +728,11 @@ define <4 x float> @add_ps_018(<4 x float> %x) { ; AVX1-SLOW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3] ; AVX1-SLOW-NEXT: retq ; -; AVX1-FAST-LABEL: add_ps_018: -; AVX1-FAST: # %bb.0: -; AVX1-FAST-NEXT: vhaddps %xmm0, %xmm0, %xmm0 -; AVX1-FAST-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3] -; AVX1-FAST-NEXT: retq +; AVX-FAST-LABEL: add_ps_018: +; AVX-FAST: # %bb.0: +; AVX-FAST-NEXT: vhaddps %xmm0, %xmm0, %xmm0 +; AVX-FAST-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3] +; AVX-FAST-NEXT: retq ; ; AVX2-SLOW-LABEL: add_ps_018: ; AVX2-SLOW: # %bb.0: @@ -831,12 +741,6 @@ define <4 x float> @add_ps_018(<4 x float> %x) { ; AVX2-SLOW-NEXT: vaddps %xmm0, %xmm1, %xmm0 ; AVX2-SLOW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3] ; AVX2-SLOW-NEXT: retq -; -; AVX2-FAST-LABEL: add_ps_018: -; AVX2-FAST: # %bb.0: -; AVX2-FAST-NEXT: vhaddps %xmm0, %xmm0, %xmm0 -; AVX2-FAST-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3] -; AVX2-FAST-NEXT: retq %l = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> %r = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> %add = fadd <4 x float> %l, %r -- 2.11.0