From 3f1dbfd35ddc34f967b87cbc48ab69dcc929a2f3 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 19 Nov 2015 22:13:56 +0000 Subject: [PATCH] [X86][SSE4A] Fix issue with EXTRQI shuffles not starting at the correct start index. Found during stress testing. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@253611 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 5 +++-- test/CodeGen/X86/vector-shuffle-sse4a.ll | 37 ++++++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+), 2 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index bbd857c0811..219f1864229 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -7351,8 +7351,9 @@ static SDValue lowerVectorShuffleWithSSE4A(SDLoc DL, MVT VT, SDValue V1, SDValue &V = (M < Size ? V1 : V2); M = M % Size; - // All mask elements must be in the lower half. - if (M >= HalfSize) + // The extracted elements must start at a valid index and all mask + // elements must be in the lower half. + if (i > M || M >= HalfSize) return SDValue(); if (Idx < 0 || (Src == V && Idx == (M - i))) { diff --git a/test/CodeGen/X86/vector-shuffle-sse4a.ll b/test/CodeGen/X86/vector-shuffle-sse4a.ll index 2dd43e2852a..89b0d6543fb 100644 --- a/test/CodeGen/X86/vector-shuffle-sse4a.ll +++ b/test/CodeGen/X86/vector-shuffle-sse4a.ll @@ -319,5 +319,42 @@ define <16 x i8> @shuffle_8_18_uuuuuuuuuuuuuu(<16 x i8> %a, <16 x i8> %b) { ret <16 x i8> %1 } +define <16 x i8> @shuffle_uu_0_5_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu(<16 x i8> %v) { +; BTVER1-LABEL: shuffle_uu_0_5_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu: +; BTVER1: # BB#0: +; BTVER1-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,0,5,5,4,4,5,5,4,4,5,5,6,6,7,7] +; BTVER1-NEXT: retq +; +; BTVER2-LABEL: shuffle_uu_0_5_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu: +; BTVER2: # BB#0: +; BTVER2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,5,5,4,4,5,5,4,4,5,5,6,6,7,7] +; BTVER2-NEXT: retq + %1 = shufflevector <16 x i8> %v, <16 x i8> zeroinitializer, <16 x i32> + ret <16 x i8> %1 +} + +define <16 x i8> @shuffle_uu_16_4_16_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu(<16 x i8> %v) { +; BTVER1-LABEL: shuffle_uu_16_4_16_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu: +; BTVER1: # BB#0: +; BTVER1-NEXT: pshufb {{.*#+}} xmm0 = xmm0[u],zero,xmm0[4],zero,xmm0[u,u,u,u,u,u,u,u,u,u,u,u] +; BTVER1-NEXT: retq +; +; BTVER2-LABEL: shuffle_uu_16_4_16_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu: +; BTVER2: # BB#0: +; BTVER2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[u],zero,xmm0[4],zero,xmm0[u,u,u,u,u,u,u,u,u,u,u,u] +; BTVER2-NEXT: retq + %1 = shufflevector <16 x i8> %v, <16 x i8> zeroinitializer, <16 x i32> + ret <16 x i8> %1 +} + +define <16 x i8> @shuffle_uu_uu_4_16_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu(<16 x i8> %v) { +; ALL-LABEL: shuffle_uu_uu_4_16_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu: +; ALL: # BB#0: +; ALL-NEXT: extrq {{.*#+}} xmm0 = xmm0[2,3,4],zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u] +; ALL-NEXT: retq + %1 = shufflevector <16 x i8> %v, <16 x i8> zeroinitializer, <16 x i32> + ret <16 x i8> %1 +} + declare <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64>, i8, i8) nounwind declare <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64>, <2 x i64>, i8, i8) nounwind -- 2.11.0