From: Simon Pilgrim Date: Wed, 9 Jan 2019 12:34:10 +0000 (+0000) Subject: [X86] Add extra test coverage for combining shuffles to PACKSS/PACKUS X-Git-Tag: android-x86-9.0-r1~8709 X-Git-Url: http://git.osdn.net/view?a=commitdiff_plain;h=18381bbdcec1dfee09f6b08740377627419c6344;p=android-x86%2Fexternal-llvm.git [X86] Add extra test coverage for combining shuffles to PACKSS/PACKUS git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@350707 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/test/CodeGen/X86/vector-shuffle-combining-avx2.ll b/test/CodeGen/X86/vector-shuffle-combining-avx2.ll index 2ea0f1ab3e7..dda9e140d45 100644 --- a/test/CodeGen/X86/vector-shuffle-combining-avx2.ll +++ b/test/CodeGen/X86/vector-shuffle-combining-avx2.ll @@ -611,6 +611,44 @@ define <32 x i8> @shuffle_combine_packuswb_pshufb(<16 x i16> %a0, <16 x i16> %a1 } declare <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16>, <16 x i16>) nounwind readnone +define <32 x i8> @combine_pshufb_as_packsswb(<16 x i16> %a0, <16 x i16> %a1) nounwind { +; CHECK-LABEL: combine_pshufb_as_packsswb: +; CHECK: # %bb.0: +; CHECK-NEXT: vpsraw $11, %ymm0, %ymm0 +; CHECK-NEXT: vpsraw $11, %ymm1, %ymm1 +; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,18,20,22,24,26,28,30],zero,zero,zero,zero,zero,zero,zero,zero +; CHECK-NEXT: vpshufb {{.*#+}} ymm1 = zero,zero,zero,zero,zero,zero,zero,zero,ymm1[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero,ymm1[16,18,20,22,24,26,28,30] +; CHECK-NEXT: vpor %ymm1, %ymm0, %ymm0 +; CHECK-NEXT: ret{{[l|q]}} + %1 = ashr <16 x i16> %a0, + %2 = ashr <16 x i16> %a1, + %3 = bitcast <16 x i16> %1 to <32 x i8> + %4 = bitcast <16 x i16> %2 to <32 x i8> + %5 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %3, <32 x i8> ) + %6 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %4, <32 x i8> ) + %7 = or <32 x i8> %5, %6 + ret <32 x i8> %7 +} + +define <32 x i8> @combine_pshufb_as_packuswb(<16 x i16> %a0, <16 x i16> %a1) nounwind { +; CHECK-LABEL: combine_pshufb_as_packuswb: +; CHECK: # %bb.0: +; CHECK-NEXT: vpsrlw $11, %ymm0, %ymm0 +; CHECK-NEXT: vpsrlw $11, %ymm1, %ymm1 +; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,18,20,22,24,26,28,30],zero,zero,zero,zero,zero,zero,zero,zero +; CHECK-NEXT: vpshufb {{.*#+}} ymm1 = zero,zero,zero,zero,zero,zero,zero,zero,ymm1[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero,ymm1[16,18,20,22,24,26,28,30] +; CHECK-NEXT: vpor %ymm1, %ymm0, %ymm0 +; CHECK-NEXT: ret{{[l|q]}} + %1 = lshr <16 x i16> %a0, + %2 = lshr <16 x i16> %a1, + %3 = bitcast <16 x i16> %1 to <32 x i8> + %4 = bitcast <16 x i16> %2 to <32 x i8> + %5 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %3, <32 x i8> ) + %6 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %4, <32 x i8> ) + %7 = or <32 x i8> %5, %6 + ret <32 x i8> %7 +} + define <16 x i8> @combine_pshufb_insertion_as_broadcast_v2i64(i64 %a0) { ; X86-LABEL: combine_pshufb_insertion_as_broadcast_v2i64: ; X86: # %bb.0: diff --git a/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll b/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll index b8d3824f1ca..5c40520ac1c 100644 --- a/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll +++ b/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll @@ -824,6 +824,62 @@ define <32 x i16> @combine_pshufb_as_pshufhw(<32 x i16> %a0) { ret <32 x i16> %1 } +define <64 x i8> @combine_pshufb_as_packsswb(<32 x i16> %a0, <32 x i16> %a1) nounwind { +; X32-LABEL: combine_pshufb_as_packsswb: +; X32: # %bb.0: +; X32-NEXT: vpsraw $11, %zmm0, %zmm0 +; X32-NEXT: vpsraw $11, %zmm1, %zmm1 +; X32-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[16,18,20,22,24,26,28,30],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[32,34,36,38,40,42,44,46],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[48,50,52,54,56,58,60,62],zero,zero,zero,zero,zero,zero,zero,zero +; X32-NEXT: vpshufb {{.*#+}} zmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zmm1[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero,zmm1[16,18,20,22,24,26,28,30],zero,zero,zero,zero,zero,zero,zero,zero,zmm1[32,34,36,38,40,42,44,46],zero,zero,zero,zero,zero,zero,zero,zero,zmm1[48,50,52,54,56,58,60,62] +; X32-NEXT: vporq %zmm1, %zmm0, %zmm0 +; X32-NEXT: retl +; +; X64-LABEL: combine_pshufb_as_packsswb: +; X64: # %bb.0: +; X64-NEXT: vpsraw $11, %zmm0, %zmm0 +; X64-NEXT: vpsraw $11, %zmm1, %zmm1 +; X64-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[16,18,20,22,24,26,28,30],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[32,34,36,38,40,42,44,46],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[48,50,52,54,56,58,60,62],zero,zero,zero,zero,zero,zero,zero,zero +; X64-NEXT: vpshufb {{.*#+}} zmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zmm1[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero,zmm1[16,18,20,22,24,26,28,30],zero,zero,zero,zero,zero,zero,zero,zero,zmm1[32,34,36,38,40,42,44,46],zero,zero,zero,zero,zero,zero,zero,zero,zmm1[48,50,52,54,56,58,60,62] +; X64-NEXT: vporq %zmm1, %zmm0, %zmm0 +; X64-NEXT: retq + %1 = ashr <32 x i16> %a0, + %2 = ashr <32 x i16> %a1, + %3 = bitcast <32 x i16> %1 to <64 x i8> + %4 = bitcast <32 x i16> %2 to <64 x i8> + %5 = tail call <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8> %3, <64 x i8> , <64 x i8> undef, i64 -1) + %6 = tail call <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8> %4, <64 x i8> , <64 x i8> undef, i64 -1) + %7 = or <64 x i8> %5, %6 + ret <64 x i8> %7 +} + +define <64 x i8> @combine_pshufb_as_packuswb(<32 x i16> %a0, <32 x i16> %a1) nounwind { +; X32-LABEL: combine_pshufb_as_packuswb: +; X32: # %bb.0: +; X32-NEXT: vpsrlw $11, %zmm0, %zmm0 +; X32-NEXT: vpsrlw $11, %zmm1, %zmm1 +; X32-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[16,18,20,22,24,26,28,30],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[32,34,36,38,40,42,44,46],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[48,50,52,54,56,58,60,62],zero,zero,zero,zero,zero,zero,zero,zero +; X32-NEXT: vpshufb {{.*#+}} zmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zmm1[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero,zmm1[16,18,20,22,24,26,28,30],zero,zero,zero,zero,zero,zero,zero,zero,zmm1[32,34,36,38,40,42,44,46],zero,zero,zero,zero,zero,zero,zero,zero,zmm1[48,50,52,54,56,58,60,62] +; X32-NEXT: vporq %zmm1, %zmm0, %zmm0 +; X32-NEXT: retl +; +; X64-LABEL: combine_pshufb_as_packuswb: +; X64: # %bb.0: +; X64-NEXT: vpsrlw $11, %zmm0, %zmm0 +; X64-NEXT: vpsrlw $11, %zmm1, %zmm1 +; X64-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[16,18,20,22,24,26,28,30],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[32,34,36,38,40,42,44,46],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[48,50,52,54,56,58,60,62],zero,zero,zero,zero,zero,zero,zero,zero +; X64-NEXT: vpshufb {{.*#+}} zmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zmm1[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero,zmm1[16,18,20,22,24,26,28,30],zero,zero,zero,zero,zero,zero,zero,zero,zmm1[32,34,36,38,40,42,44,46],zero,zero,zero,zero,zero,zero,zero,zero,zmm1[48,50,52,54,56,58,60,62] +; X64-NEXT: vporq %zmm1, %zmm0, %zmm0 +; X64-NEXT: retq + %1 = lshr <32 x i16> %a0, + %2 = lshr <32 x i16> %a1, + %3 = bitcast <32 x i16> %1 to <64 x i8> + %4 = bitcast <32 x i16> %2 to <64 x i8> + %5 = tail call <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8> %3, <64 x i8> , <64 x i8> undef, i64 -1) + %6 = tail call <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8> %4, <64 x i8> , <64 x i8> undef, i64 -1) + %7 = or <64 x i8> %5, %6 + ret <64 x i8> %7 +} + define <32 x i16> @combine_vpermi2var_32i16_as_pshufb(<32 x i16> %a0) { ; X32-LABEL: combine_vpermi2var_32i16_as_pshufb: ; X32: # %bb.0: