From: Simon Pilgrim Date: Wed, 9 Jan 2019 13:23:28 +0000 (+0000) Subject: [X86] Enable combining shuffles to PACKSS/PACKUS for 256/512-bit vectors X-Git-Tag: android-x86-9.0-r1~8703 X-Git-Url: http://git.osdn.net/view?a=commitdiff_plain;h=9024892e845a56b32c290e0155d50f54dd67fef2;p=android-x86%2Fexternal-llvm.git [X86] Enable combining shuffles to PACKSS/PACKUS for 256/512-bit vectors git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@350716 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index ae7e120cd6b..ca9332fd0bd 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -30500,9 +30500,10 @@ static bool matchBinaryVectorShuffle(MVT MaskVT, ArrayRef Mask, } } - // Attempt to match against either a unary or binary PACKSS/PACKUS shuffle. - // TODO add support for 256/512-bit types. - if ((MaskVT == MVT::v8i16 || MaskVT == MVT::v16i8) && Subtarget.hasSSE2()) { + // Attempt to match against either an unary or binary PACKSS/PACKUS shuffle. + if (((MaskVT == MVT::v8i16 || MaskVT == MVT::v16i8) && Subtarget.hasSSE2()) || + ((MaskVT == MVT::v16i16 || MaskVT == MVT::v32i8) && Subtarget.hasInt256()) || + ((MaskVT == MVT::v32i16 || MaskVT == MVT::v64i8) && Subtarget.hasBWI())) { if (matchVectorShuffleWithPACK(MaskVT, SrcVT, V1, V2, Shuffle, Mask, DAG, Subtarget)) { DstVT = MaskVT; diff --git a/test/CodeGen/X86/vector-shuffle-combining-avx2.ll b/test/CodeGen/X86/vector-shuffle-combining-avx2.ll index dda9e140d45..bce3ac3c63b 100644 --- a/test/CodeGen/X86/vector-shuffle-combining-avx2.ll +++ b/test/CodeGen/X86/vector-shuffle-combining-avx2.ll @@ -616,9 +616,7 @@ define <32 x i8> @combine_pshufb_as_packsswb(<16 x i16> %a0, <16 x i16> %a1) nou ; CHECK: # %bb.0: ; CHECK-NEXT: vpsraw $11, %ymm0, %ymm0 ; CHECK-NEXT: vpsraw $11, %ymm1, %ymm1 -; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,18,20,22,24,26,28,30],zero,zero,zero,zero,zero,zero,zero,zero -; CHECK-NEXT: vpshufb {{.*#+}} ymm1 = zero,zero,zero,zero,zero,zero,zero,zero,ymm1[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero,ymm1[16,18,20,22,24,26,28,30] -; CHECK-NEXT: vpor %ymm1, %ymm0, %ymm0 +; CHECK-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 ; CHECK-NEXT: ret{{[l|q]}} %1 = ashr <16 x i16> %a0, %2 = ashr <16 x i16> %a1, @@ -635,9 +633,7 @@ define <32 x i8> @combine_pshufb_as_packuswb(<16 x i16> %a0, <16 x i16> %a1) nou ; CHECK: # %bb.0: ; CHECK-NEXT: vpsrlw $11, %ymm0, %ymm0 ; CHECK-NEXT: vpsrlw $11, %ymm1, %ymm1 -; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,18,20,22,24,26,28,30],zero,zero,zero,zero,zero,zero,zero,zero -; CHECK-NEXT: vpshufb {{.*#+}} ymm1 = zero,zero,zero,zero,zero,zero,zero,zero,ymm1[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero,ymm1[16,18,20,22,24,26,28,30] -; CHECK-NEXT: vpor %ymm1, %ymm0, %ymm0 +; CHECK-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 ; CHECK-NEXT: ret{{[l|q]}} %1 = lshr <16 x i16> %a0, %2 = lshr <16 x i16> %a1, diff --git a/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll b/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll index 5c40520ac1c..90fa5332a4a 100644 --- a/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll +++ b/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll @@ -829,18 +829,14 @@ define <64 x i8> @combine_pshufb_as_packsswb(<32 x i16> %a0, <32 x i16> %a1) nou ; X32: # %bb.0: ; X32-NEXT: vpsraw $11, %zmm0, %zmm0 ; X32-NEXT: vpsraw $11, %zmm1, %zmm1 -; X32-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[16,18,20,22,24,26,28,30],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[32,34,36,38,40,42,44,46],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[48,50,52,54,56,58,60,62],zero,zero,zero,zero,zero,zero,zero,zero -; X32-NEXT: vpshufb {{.*#+}} zmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zmm1[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero,zmm1[16,18,20,22,24,26,28,30],zero,zero,zero,zero,zero,zero,zero,zero,zmm1[32,34,36,38,40,42,44,46],zero,zero,zero,zero,zero,zero,zero,zero,zmm1[48,50,52,54,56,58,60,62] -; X32-NEXT: vporq %zmm1, %zmm0, %zmm0 +; X32-NEXT: vpacksswb %zmm1, %zmm0, %zmm0 ; X32-NEXT: retl ; ; X64-LABEL: combine_pshufb_as_packsswb: ; X64: # %bb.0: ; X64-NEXT: vpsraw $11, %zmm0, %zmm0 ; X64-NEXT: vpsraw $11, %zmm1, %zmm1 -; X64-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[16,18,20,22,24,26,28,30],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[32,34,36,38,40,42,44,46],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[48,50,52,54,56,58,60,62],zero,zero,zero,zero,zero,zero,zero,zero -; X64-NEXT: vpshufb {{.*#+}} zmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zmm1[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero,zmm1[16,18,20,22,24,26,28,30],zero,zero,zero,zero,zero,zero,zero,zero,zmm1[32,34,36,38,40,42,44,46],zero,zero,zero,zero,zero,zero,zero,zero,zmm1[48,50,52,54,56,58,60,62] -; X64-NEXT: vporq %zmm1, %zmm0, %zmm0 +; X64-NEXT: vpacksswb %zmm1, %zmm0, %zmm0 ; X64-NEXT: retq %1 = ashr <32 x i16> %a0, %2 = ashr <32 x i16> %a1, @@ -857,18 +853,14 @@ define <64 x i8> @combine_pshufb_as_packuswb(<32 x i16> %a0, <32 x i16> %a1) nou ; X32: # %bb.0: ; X32-NEXT: vpsrlw $11, %zmm0, %zmm0 ; X32-NEXT: vpsrlw $11, %zmm1, %zmm1 -; X32-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[16,18,20,22,24,26,28,30],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[32,34,36,38,40,42,44,46],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[48,50,52,54,56,58,60,62],zero,zero,zero,zero,zero,zero,zero,zero -; X32-NEXT: vpshufb {{.*#+}} zmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zmm1[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero,zmm1[16,18,20,22,24,26,28,30],zero,zero,zero,zero,zero,zero,zero,zero,zmm1[32,34,36,38,40,42,44,46],zero,zero,zero,zero,zero,zero,zero,zero,zmm1[48,50,52,54,56,58,60,62] -; X32-NEXT: vporq %zmm1, %zmm0, %zmm0 +; X32-NEXT: vpackuswb %zmm1, %zmm0, %zmm0 ; X32-NEXT: retl ; ; X64-LABEL: combine_pshufb_as_packuswb: ; X64: # %bb.0: ; X64-NEXT: vpsrlw $11, %zmm0, %zmm0 ; X64-NEXT: vpsrlw $11, %zmm1, %zmm1 -; X64-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[16,18,20,22,24,26,28,30],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[32,34,36,38,40,42,44,46],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[48,50,52,54,56,58,60,62],zero,zero,zero,zero,zero,zero,zero,zero -; X64-NEXT: vpshufb {{.*#+}} zmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zmm1[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero,zmm1[16,18,20,22,24,26,28,30],zero,zero,zero,zero,zero,zero,zero,zero,zmm1[32,34,36,38,40,42,44,46],zero,zero,zero,zero,zero,zero,zero,zero,zmm1[48,50,52,54,56,58,60,62] -; X64-NEXT: vporq %zmm1, %zmm0, %zmm0 +; X64-NEXT: vpackuswb %zmm1, %zmm0, %zmm0 ; X64-NEXT: retq %1 = lshr <32 x i16> %a0, %2 = lshr <32 x i16> %a1,