From 598cdd31a0f19703ec4a0c8044ee1734d1ad9fb8 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sat, 4 Feb 2017 22:17:22 +0000 Subject: [PATCH] [X86][SSE] Add target shuffle combine buildvec style tests Extra tests for D29399 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@294101 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/CodeGen/X86/vector-shuffle-combining-avx2.ll | 29 ++++++++++++++++ test/CodeGen/X86/vector-shuffle-combining-ssse3.ll | 40 ++++++++++++++++++++++ 2 files changed, 69 insertions(+) diff --git a/test/CodeGen/X86/vector-shuffle-combining-avx2.ll b/test/CodeGen/X86/vector-shuffle-combining-avx2.ll index d34bbb60103..4b23ba4f69f 100644 --- a/test/CodeGen/X86/vector-shuffle-combining-avx2.ll +++ b/test/CodeGen/X86/vector-shuffle-combining-avx2.ll @@ -712,6 +712,35 @@ define <32 x i8> @combine_psrlq_pshufb(<4 x i64> %a0) { ret <32 x i8> %3 } +define <32 x i8> @combine_unpack_unpack_pshufb(<32 x i8> %a0) { +; X32-LABEL: combine_unpack_unpack_pshufb: +; X32: # BB#0: +; X32-NEXT: vpshufd {{.*#+}} ymm1 = ymm0[0,0,0,0,4,4,4,4] +; X32-NEXT: vpshufd {{.*#+}} ymm2 = ymm0[1,1,2,3,5,5,6,7] +; X32-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,2,3,6,7,6,7] +; X32-NEXT: vpunpcklbw {{.*#+}} ymm2 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[16],ymm2[16],ymm1[17],ymm2[17],ymm1[18],ymm2[18],ymm1[19],ymm2[19],ymm1[20],ymm2[20],ymm1[21],ymm2[21],ymm1[22],ymm2[22],ymm1[23],ymm2[23] +; X32-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23] +; X32-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[16],ymm0[16],ymm2[17],ymm0[17],ymm2[18],ymm0[18],ymm2[19],ymm0[19],ymm2[20],ymm0[20],ymm2[21],ymm0[21],ymm2[22],ymm0[22],ymm2[23],ymm0[23] +; X32-NEXT: retl +; +; X64-LABEL: combine_unpack_unpack_pshufb: +; X64: # BB#0: +; X64-NEXT: vpshufd {{.*#+}} ymm1 = ymm0[0,0,0,0,4,4,4,4] +; X64-NEXT: vpshufd {{.*#+}} ymm2 = ymm0[1,1,2,3,5,5,6,7] +; X64-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,2,3,6,7,6,7] +; X64-NEXT: vpunpcklbw {{.*#+}} ymm2 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[16],ymm2[16],ymm1[17],ymm2[17],ymm1[18],ymm2[18],ymm1[19],ymm2[19],ymm1[20],ymm2[20],ymm1[21],ymm2[21],ymm1[22],ymm2[22],ymm1[23],ymm2[23] +; X64-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23] +; X64-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[16],ymm0[16],ymm2[17],ymm0[17],ymm2[18],ymm0[18],ymm2[19],ymm0[19],ymm2[20],ymm0[20],ymm2[21],ymm0[21],ymm2[22],ymm0[22],ymm2[23],ymm0[23] +; X64-NEXT: retq + %1 = shufflevector <32 x i8> %a0, <32 x i8> undef, <32 x i32> + %2 = shufflevector <32 x i8> %a0, <32 x i8> undef, <32 x i32> + %3 = shufflevector <32 x i8> %a0, <32 x i8> undef, <32 x i32> + %4 = shufflevector <32 x i8> %1, <32 x i8> %2, <32 x i32> + %5 = shufflevector <32 x i8> %1, <32 x i8> %3, <32 x i32> + %6 = shufflevector <32 x i8> %4, <32 x i8> %5, <32 x i32> + ret <32 x i8> %6 +} + define <8 x i32> @constant_fold_permd() { ; X32-LABEL: constant_fold_permd: ; X32: # BB#0: diff --git a/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll b/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll index 7e29a48d5cd..33b22b3fe86 100644 --- a/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll +++ b/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll @@ -552,6 +552,46 @@ define <16 x i8> @combine_unpckl_arg1_pshufb(<16 x i8> %a0, <16 x i8> %a1) { ret <16 x i8> %2 } +define <8 x i16> @shuffle_combine_unpack_insert(<8 x i16> %a0) { +; SSE-LABEL: shuffle_combine_unpack_insert: +; SSE: # BB#0: +; SSE-NEXT: pextrw $2, %xmm0, %eax +; SSE-NEXT: pextrw $4, %xmm0, %ecx +; SSE-NEXT: movdqa %xmm0, %xmm2 +; SSE-NEXT: pinsrw $4, %eax, %xmm2 +; SSE-NEXT: movdqa %xmm0, %xmm1 +; SSE-NEXT: pinsrw $2, %ecx, %xmm1 +; SSE-NEXT: movdqa %xmm2, %xmm3 +; SSE-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3] +; SSE-NEXT: movdqa %xmm3, %xmm1 +; SSE-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] +; SSE-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] +; SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3] +; SSE-NEXT: movdqa %xmm1, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: shuffle_combine_unpack_insert: +; AVX: # BB#0: +; AVX-NEXT: vpextrw $2, %xmm0, %eax +; AVX-NEXT: vpextrw $4, %xmm0, %ecx +; AVX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm1 +; AVX-NEXT: vpinsrw $2, %ecx, %xmm0, %xmm2 +; AVX-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] +; AVX-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] +; AVX-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] +; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] +; AVX-NEXT: retq + %1 = extractelement <8 x i16> %a0, i32 2 + %2 = extractelement <8 x i16> %a0, i32 4 + %3 = insertelement <8 x i16> %a0, i16 %1, i32 4 + %4 = insertelement <8 x i16> %a0, i16 %2, i32 2 + %5 = shufflevector <8 x i16> %3, <8 x i16> %4, <8 x i32> + %6 = shufflevector <8 x i16> %5, <8 x i16> %3, <8 x i32> + %7 = shufflevector <8 x i16> %5, <8 x i16> %a0, <8 x i32> + %8 = shufflevector <8 x i16> %6, <8 x i16> %7, <8 x i32> + ret <8 x i16> %8 +} + define <16 x i8> @constant_fold_pshufb() { ; SSE-LABEL: constant_fold_pshufb: ; SSE: # BB#0: -- 2.11.0