From 3b19475ae2bc49fa3fe340706d23dd9a72de04f9 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 20 Aug 2018 21:08:35 +0000 Subject: [PATCH] [X86] Prevent lowerVectorShuffleByMerging128BitLanes from creating cycles Due to some splat handling code in getVectorShuffle, its possible for NewV1/NewV2 to have their mask modified from what is requested. This can lead to cycles being created in the DAG. This patch examines the returned mask and makes sure its different. Long term we may need to look closer at that splat code in getVectorShuffle, or add more splat awareness to getVectorShuffle. Fixes PR38639 Differential Revision: https://reviews.llvm.org/D50981 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@340214 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 12 ++++++++++++ test/CodeGen/X86/pr38639.ll | 18 ++++++++++++++++++ 2 files changed, 30 insertions(+) create mode 100644 test/CodeGen/X86/pr38639.ll diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 1e365e01072..c03d32cfbef 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -13470,6 +13470,12 @@ static SDValue lowerVectorShuffleByMerging128BitLanes( } } SDValue NewV1 = DAG.getVectorShuffle(VT, DL, V1, V2, NewMask); + // Ensure we didn't get back the shuffle we started with. + // FIXME: This is a hack to make up for some splat handling code in + // getVectorShuffle. + if (isa(NewV1) && + cast(NewV1)->getMask() == Mask) + return SDValue(); for (int Lane = 0; Lane != NumLanes; ++Lane) { int Src = LaneSrcs[Lane][1]; @@ -13481,6 +13487,12 @@ static SDValue lowerVectorShuffleByMerging128BitLanes( } } SDValue NewV2 = DAG.getVectorShuffle(VT, DL, V1, V2, NewMask); + // Ensure we didn't get back the shuffle we started with. + // FIXME: This is a hack to make up for some splat handling code in + // getVectorShuffle. + if (isa(NewV2) && + cast(NewV2)->getMask() == Mask) + return SDValue(); for (int i = 0; i != Size; ++i) { NewMask[i] = RepeatMask[i % LaneSize]; diff --git a/test/CodeGen/X86/pr38639.ll b/test/CodeGen/X86/pr38639.ll new file mode 100644 index 00000000000..c877568d634 --- /dev/null +++ b/test/CodeGen/X86/pr38639.ll @@ -0,0 +1,18 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=btver2 | FileCheck %s + +define <8 x double> @test(<4 x double> %a, <4 x double> %b) { +; CHECK-LABEL: test: +; CHECK: # %bb.0: +; CHECK-NEXT: vmovaps {{.*#+}} ymm2 = +; CHECK-NEXT: vextractf128 $1, %ymm2, %xmm4 +; CHECK-NEXT: vblendps {{.*#+}} ymm1 = ymm0[0,1,2,3],ymm2[4,5,6,7] +; CHECK-NEXT: vblendps {{.*#+}} ymm3 = ymm0[0,1],ymm2[2,3],ymm0[4,5,6,7] +; CHECK-NEXT: vblendps {{.*#+}} xmm2 = xmm4[0,1],xmm2[2,3] +; CHECK-NEXT: vunpckhpd {{.*#+}} ymm1 = ymm3[1],ymm1[1],ymm3[3],ymm1[3] +; CHECK-NEXT: vblendps {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7] +; CHECK-NEXT: retq + %1 = shufflevector <4 x double> %a, <4 x double> , <8 x i32> + ret <8 x double> %1 +} + -- 2.11.0