From 806bdb8daa97d540ad07612ac7c42d960e0088cc Mon Sep 17 00:00:00 2001 From: Stefan Pintilie Date: Mon, 23 Oct 2017 20:22:23 +0000 Subject: [PATCH] Revert "[PowerPC] Try to simplify a Swap if it feeds a Splat" Revert commit r316366. Previous commit causes p8-scalar_vector_conversions.ll to fail. This reverts commit 990e764ad8a2eec206ce5dda6aefab059ccd4e92. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@316371 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCMIPeephole.cpp | 47 ---------- test/CodeGen/PowerPC/ppc64-peephole-swap.ll | 134 ---------------------------- 2 files changed, 181 deletions(-) delete mode 100644 test/CodeGen/PowerPC/ppc64-peephole-swap.ll diff --git a/lib/Target/PowerPC/PPCMIPeephole.cpp b/lib/Target/PowerPC/PPCMIPeephole.cpp index beb40992900..80b7ac24345 100644 --- a/lib/Target/PowerPC/PPCMIPeephole.cpp +++ b/lib/Target/PowerPC/PPCMIPeephole.cpp @@ -375,53 +375,6 @@ bool PPCMIPeephole::simplifyCode(void) { MI.getOperand(2).setImm(NewElem); } } - - // Splat is fed by a SWAP which is a permute of this form - // XXPERMDI %VA, %VA, 2 - // Since the splat instruction can use any of the vector elements to do - // the splat we do not have to rearrange the elements in the vector - // with a swap before we do the splat. We can simply do the splat from - // a different index. - // If the swap has only one use (the splat) then we can completely - // remove the swap too. - if (DefOpcode == PPC::XXPERMDI && MI.getOperand(1).isImm()) { - unsigned SwapRes = DefMI->getOperand(0).getReg(); - unsigned SwapOp1 = DefMI->getOperand(1).getReg(); - unsigned SwapOp2 = DefMI->getOperand(2).getReg(); - unsigned SwapImm = DefMI->getOperand(3).getImm(); - unsigned SplatImm = MI.getOperand(1).getImm(); - - // Break if this permute is not a swap. - if (SwapOp1 != SwapOp2 || SwapImm != 2) - break; - - unsigned NewElem = 0; - // Compute the new index to use for the splat. - if (MI.getOpcode() == PPC::VSPLTB) - NewElem = (SplatImm + 8) & 0xF; - else if (MI.getOpcode() == PPC::VSPLTH) - NewElem = (SplatImm + 4) & 0x7; - else if (MI.getOpcode() == PPC::XXSPLTW) - NewElem = (SplatImm + 2) & 0x3; - else { - DEBUG(dbgs() << "Unknown splat opcode."); - DEBUG(MI.dump()); - break; - } - - if (MRI->hasOneNonDBGUse(SwapRes)) { - DEBUG(dbgs() << "Removing redundant swap: "); - DEBUG(DefMI->dump()); - ToErase = DefMI; - } - Simplified = true; - DEBUG(dbgs() << "Changing splat immediate from " << SplatImm << - " to " << NewElem << " in instruction: "); - DEBUG(MI.dump()); - MI.getOperand(1).setImm(NewElem); - MI.getOperand(2).setReg(SwapOp1); - } - break; } case PPC::XVCVDPSP: { diff --git a/test/CodeGen/PowerPC/ppc64-peephole-swap.ll b/test/CodeGen/PowerPC/ppc64-peephole-swap.ll deleted file mode 100644 index 004a856e055..00000000000 --- a/test/CodeGen/PowerPC/ppc64-peephole-swap.ll +++ /dev/null @@ -1,134 +0,0 @@ -; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 -verify-machineinstrs | FileCheck %s -; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-PWR8 - -; The strightforward expansion of this code will result in a swap followed by a -; splat. However, the swap is not needed since in this case the splat is the -; only use. -; We want to check that we are not using the swap and that we have indexed the -; splat to the correct location. -; 8 Bit Signed Version of the test. -; Function Attrs: norecurse nounwind readnone -define <16 x i8> @splat_8_plus(<16 x i8> %v, i8 signext %c) local_unnamed_addr { -entry: - %splat.splatinsert.i = insertelement <16 x i8> undef, i8 %c, i32 0 - %splat.splat.i = shufflevector <16 x i8> %splat.splatinsert.i, <16 x i8> undef, <16 x i32> zeroinitializer - %add = add <16 x i8> %splat.splat.i, %v - ret <16 x i8> %add -; CHECK-LABEL: splat_8_plus -; CHECK-NOT: xxswapd -; CHECK: vspltb {{[0-9]+}}, {{[0-9]+}}, 7 -; CHECK: blr -; CHECK-PWR8-LABEL: splat_8_plus -; CHECK-PWR8-NOT: xxswapd -; CHECK-PWR8: vspltb {{[0-9]+}}, {{[0-9]+}}, 7 -; CHECK-PWR8: blr -} - -; 8 Bit Unsigned Version of the test. -; Function Attrs: norecurse nounwind readnone -define <16 x i8> @splat_u8_plus(<16 x i8> %v, i8 zeroext %c) local_unnamed_addr { -entry: - %splat.splatinsert.i = insertelement <16 x i8> undef, i8 %c, i32 0 - %splat.splat.i = shufflevector <16 x i8> %splat.splatinsert.i, <16 x i8> undef, <16 x i32> zeroinitializer - %add = add <16 x i8> %splat.splat.i, %v - ret <16 x i8> %add -; CHECK-LABEL: splat_u8_plus -; CHECK-NOT: xxswapd -; CHECK: vspltb {{[0-9]+}}, {{[0-9]+}}, 7 -; CHECK: blr -; CHECK-PWR8-LABEL: splat_u8_plus -; CHECK-PWR8-NOT: xxswapd -; CHECK-PWR8: vspltb {{[0-9]+}}, {{[0-9]+}}, 7 -; CHECK-PWR8: blr -} - -; 16 Bit Signed Version of the test. -; Function Attrs: norecurse nounwind readnone -define <8 x i16> @splat_16_plus(<8 x i16> %v, i16 signext %c) local_unnamed_addr { -entry: - %0 = shl i16 %c, 8 - %conv.i = ashr exact i16 %0, 8 - %splat.splatinsert.i = insertelement <8 x i16> undef, i16 %conv.i, i32 0 - %splat.splat.i = shufflevector <8 x i16> %splat.splatinsert.i, <8 x i16> undef, <8 x i32> zeroinitializer - %add = add <8 x i16> %splat.splat.i, %v - ret <8 x i16> %add -; CHECK-LABEL: splat_16_plus -; CHECK-NOT: xxswapd -; CHECK: vsplth {{[0-9]+}}, {{[0-9]+}}, 3 -; CHECK: blr -; CHECK-PWR8-LABEL: splat_16_plus -; CHECK-PWR8-NOT: xxswapd -; CHECK-PWR8: vsplth {{[0-9]+}}, {{[0-9]+}}, 3 -; CHECK-PWR8: blr -} - -; 16 Bit Unsigned Version of the test. -; Function Attrs: norecurse nounwind readnone -define <8 x i16> @splat_u16_plus(<8 x i16> %v, i16 zeroext %c) local_unnamed_addr { -entry: - %0 = shl i16 %c, 8 - %conv.i = ashr exact i16 %0, 8 - %splat.splatinsert.i = insertelement <8 x i16> undef, i16 %conv.i, i32 0 - %splat.splat.i = shufflevector <8 x i16> %splat.splatinsert.i, <8 x i16> undef, <8 x i32> zeroinitializer - %add = add <8 x i16> %splat.splat.i, %v - ret <8 x i16> %add -; CHECK-LABEL: splat_u16_plus -; CHECK-NOT: xxswapd -; CHECK: vsplth {{[0-9]+}}, {{[0-9]+}}, 3 -; CHECK: blr -; CHECK-PWR8-LABEL: splat_u16_plus -; CHECK-PWR8-NOT: xxswapd -; CHECK-PWR8: vsplth {{[0-9]+}}, {{[0-9]+}}, 3 -; CHECK-PWR8: blr -} - -; 32 Bit Signed Version of the test. -; The 32 bit examples work differently than the 8 and 16 bit versions of the -; test. On Power 9 we have the mtvsrws instruction that does both the move to -; register and the splat so it does not really test the newly implemented code. -; On Power 9 for the 32 bit case we don't need the new simplification. It is -; just here for completeness. -; Function Attrs: norecurse nounwind readnone -define <4 x i32> @splat_32_plus(<4 x i32> %v, i32 signext %c) local_unnamed_addr { -entry: - %sext = shl i32 %c, 24 - %conv.i = ashr exact i32 %sext, 24 - %splat.splatinsert.i = insertelement <4 x i32> undef, i32 %conv.i, i32 0 - %splat.splat.i = shufflevector <4 x i32> %splat.splatinsert.i, <4 x i32> undef, <4 x i32> zeroinitializer - %add = add <4 x i32> %splat.splat.i, %v - ret <4 x i32> %add -; CHECK-LABEL: splat_32_plus -; CHECK-NOT: xxswapd -; CHECK: mtvsrws {{[0-9]+}}, {{[0-9]+}} -; CHECK: blr -; CHECK-PWR8-LABEL: splat_32_plus -; CHECK-PWR8-NOT: xxswapd -; CHECK-PWR8: xxspltw {{[0-9]+}}, {{[0-9]+}}, 1 -; CHECK-PWR8: blr -} - -; 32 Bit Unsigned Version of the test. -; The 32 bit examples work differently than the 8 and 16 bit versions of the -; test. On Power 9 we have the mtvsrws instruction that does both the move to -; register and the splat so it does not really test the newly implemented code. -; On Power 9 for the 32 bit case we don't need the new simplification. It is -; just here for completeness. -; Function Attrs: norecurse nounwind readnone -define <4 x i32> @splat_u32_plus(<4 x i32> %v, i32 zeroext %c) local_unnamed_addr { -entry: - %sext = shl i32 %c, 24 - %conv.i = ashr exact i32 %sext, 24 - %splat.splatinsert.i = insertelement <4 x i32> undef, i32 %conv.i, i32 0 - %splat.splat.i = shufflevector <4 x i32> %splat.splatinsert.i, <4 x i32> undef, <4 x i32> zeroinitializer - %add = add <4 x i32> %splat.splat.i, %v - ret <4 x i32> %add -; CHECK-LABEL: splat_u32_plus -; CHECK-NOT: xxswapd -; CHECK: mtvsrws {{[0-9]+}}, {{[0-9]+}} -; CHECK: blr -; CHECK-PWR8-LABEL: splat_u32_plus -; CHECK-PWR8-NOT: xxswapd -; CHECK-PWR8: xxspltw {{[0-9]+}}, {{[0-9]+}}, 1 -; CHECK-PWR8: blr -} - -- 2.11.0