From 979b87d6b778b695f8dafb81b3ce96224e13fb53 Mon Sep 17 00:00:00 2001 From: Kewen Lin Date: Wed, 19 Dec 2018 03:04:07 +0000 Subject: [PATCH] [PowerPC]Exploit P9 vabsdu for unsigned vselect patterns For type v4i32/v8ii16/v16i8, do following transforms: (vselect (setcc a, b, setugt), (sub a, b), (sub b, a)) -> (vabsd a, b) (vselect (setcc a, b, setuge), (sub a, b), (sub b, a)) -> (vabsd a, b) (vselect (setcc a, b, setult), (sub b, a), (sub a, b)) -> (vabsd a, b) (vselect (setcc a, b, setule), (sub b, a), (sub a, b)) -> (vabsd a, b) Differential Revision: https://reviews.llvm.org/D55812 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@349599 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCISelLowering.cpp | 65 ++++++++++++++++++ lib/Target/PowerPC/PPCISelLowering.h | 1 + test/CodeGen/PowerPC/ppc64-P9-vabsd.ll | 120 +++++++++++++++++++++++---------- 3 files changed, 150 insertions(+), 36 deletions(-) diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 619b13fe86e..92af82dc4b9 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -1085,6 +1085,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, if (Subtarget.hasP9Altivec()) { setTargetDAGCombine(ISD::ABS); + setTargetDAGCombine(ISD::VSELECT); } // Darwin long double math library functions have $LDBL128 appended. @@ -13267,6 +13268,8 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, return DAGCombineBuildVector(N, DCI); case ISD::ABS: return combineABS(N, DCI); + case ISD::VSELECT: + return combineVSelect(N, DCI); } return SDValue(); @@ -14597,3 +14600,65 @@ SDValue PPCTargetLowering::combineABS(SDNode *N, DAGCombinerInfo &DCI) const { return SDValue(); } +// For type v4i32/v8ii16/v16i8, transform +// from (vselect (setcc a, b, setugt), (sub a, b), (sub b, a)) to (vabsd a, b) +// from (vselect (setcc a, b, setuge), (sub a, b), (sub b, a)) to (vabsd a, b) +// from (vselect (setcc a, b, setult), (sub b, a), (sub a, b)) to (vabsd a, b) +// from (vselect (setcc a, b, setule), (sub b, a), (sub a, b)) to (vabsd a, b) +SDValue PPCTargetLowering::combineVSelect(SDNode *N, + DAGCombinerInfo &DCI) const { + assert((N->getOpcode() == ISD::VSELECT) && "Need VSELECT node here"); + assert(Subtarget.hasP9Altivec() && + "Only combine this when P9 altivec supported!"); + + SelectionDAG &DAG = DCI.DAG; + SDLoc dl(N); + SDValue Cond = N->getOperand(0); + SDValue TrueOpnd = N->getOperand(1); + SDValue FalseOpnd = N->getOperand(2); + EVT VT = N->getOperand(1).getValueType(); + + if (Cond.getOpcode() != ISD::SETCC || TrueOpnd.getOpcode() != ISD::SUB || + FalseOpnd.getOpcode() != ISD::SUB) + return SDValue(); + + // ABSD only available for type v4i32/v8i16/v16i8 + if (VT != MVT::v4i32 && VT != MVT::v8i16 && VT != MVT::v16i8) + return SDValue(); + + // At least to save one more dependent computation + if (!(Cond.hasOneUse() || TrueOpnd.hasOneUse() || FalseOpnd.hasOneUse())) + return SDValue(); + + ISD::CondCode CC = cast(Cond.getOperand(2))->get(); + + // Can only handle unsigned comparison here + switch (CC) { + default: + return SDValue(); + case ISD::SETUGT: + case ISD::SETUGE: + break; + case ISD::SETULT: + case ISD::SETULE: + std::swap(TrueOpnd, FalseOpnd); + break; + } + + SDValue CmpOpnd1 = Cond.getOperand(0); + SDValue CmpOpnd2 = Cond.getOperand(1); + + // SETCC CmpOpnd1 CmpOpnd2 cond + // TrueOpnd = CmpOpnd1 - CmpOpnd2 + // FalseOpnd = CmpOpnd2 - CmpOpnd1 + if (TrueOpnd.getOperand(0) == CmpOpnd1 && + TrueOpnd.getOperand(1) == CmpOpnd2 && + FalseOpnd.getOperand(0) == CmpOpnd2 && + FalseOpnd.getOperand(1) == CmpOpnd1) { + return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(1).getValueType(), + CmpOpnd1, CmpOpnd2, + DAG.getTargetConstant(0, dl, MVT::i32)); + } + + return SDValue(); +} diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index d9884dd24b7..09039cb7736 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -1118,6 +1118,7 @@ namespace llvm { SDValue combineTRUNCATE(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineSetCC(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineABS(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue combineVSelect(SDNode *N, DAGCombinerInfo &DCI) const; /// ConvertSETCCToSubtract - looks at SETCC that compares ints. It replaces /// SETCC with integer subtraction when (1) there is a legal way of doing it diff --git a/test/CodeGen/PowerPC/ppc64-P9-vabsd.ll b/test/CodeGen/PowerPC/ppc64-P9-vabsd.ll index faa731fd0fe..653b2121e40 100644 --- a/test/CodeGen/PowerPC/ppc64-P9-vabsd.ll +++ b/test/CodeGen/PowerPC/ppc64-P9-vabsd.ll @@ -526,9 +526,10 @@ define <4 x i32> @absd_int32_ugt(<4 x i32>, <4 x i32>) { %6 = select <4 x i1> %3, <4 x i32> %4, <4 x i32> %5 ret <4 x i32> %6 ; CHECK-LABEL: absd_int32_ugt -; CHECK: vcmpgtuw -; CHECK: xxsel -; CHECK: blr +; CHECK-NOT: vcmpgtuw +; CHECK-NOT: xxsel +; CHECK: vabsduw v2, v2, v3 +; CHECK-NEXT: blr ; CHECK-PWR8-LABEL: absd_int32_ugt ; CHECK-PWR8: vcmpgtuw ; CHECK-PWR8: xxsel @@ -542,9 +543,10 @@ define <4 x i32> @absd_int32_uge(<4 x i32>, <4 x i32>) { %6 = select <4 x i1> %3, <4 x i32> %4, <4 x i32> %5 ret <4 x i32> %6 ; CHECK-LABEL: absd_int32_uge -; CHECK: vcmpgtuw -; CHECK: xxsel -; CHECK: blr +; CHECK-NOT: vcmpgtuw +; CHECK-NOT: xxsel +; CHECK: vabsduw v2, v2, v3 +; CHECK-NEXT: blr ; CHECK-PWR8-LABEL: absd_int32_uge ; CHECK-PWR8: vcmpgtuw ; CHECK-PWR8: xxsel @@ -558,9 +560,10 @@ define <4 x i32> @absd_int32_ult(<4 x i32>, <4 x i32>) { %6 = select <4 x i1> %3, <4 x i32> %5, <4 x i32> %4 ret <4 x i32> %6 ; CHECK-LABEL: absd_int32_ult -; CHECK: vcmpgtuw -; CHECK: xxsel -; CHECK: blr +; CHECK-NOT: vcmpgtuw +; CHECK-NOT: xxsel +; CHECK: vabsduw v2, v2, v3 +; CHECK-NEXT: blr ; CHECK-PWR8-LABEL: absd_int32_ult ; CHECK-PWR8: vcmpgtuw ; CHECK-PWR8: xxsel @@ -574,9 +577,10 @@ define <4 x i32> @absd_int32_ule(<4 x i32>, <4 x i32>) { %6 = select <4 x i1> %3, <4 x i32> %5, <4 x i32> %4 ret <4 x i32> %6 ; CHECK-LABEL: absd_int32_ule -; CHECK: vcmpgtuw -; CHECK: xxsel -; CHECK: blr +; CHECK-NOT: vcmpgtuw +; CHECK-NOT: xxsel +; CHECK: vabsduw v2, v2, v3 +; CHECK-NEXT: blr ; CHECK-PWR8-LABEL: absd_int32_ule ; CHECK-PWR8: vcmpgtuw ; CHECK-PWR8: xxsel @@ -590,9 +594,10 @@ define <8 x i16> @absd_int16_ugt(<8 x i16>, <8 x i16>) { %6 = select <8 x i1> %3, <8 x i16> %4, <8 x i16> %5 ret <8 x i16> %6 ; CHECK-LABEL: absd_int16_ugt -; CHECK: vcmpgtuh -; CHECK: xxsel -; CHECK: blr +; CHECK-NOT: vcmpgtuh +; CHECK-NOT: xxsel +; CHECK: vabsduh v2, v2, v3 +; CHECK-NEXT: blr ; CHECK-PWR8-LABEL: absd_int16_ugt ; CHECK-PWR8: vcmpgtuh ; CHECK-PWR8: xxsel @@ -606,9 +611,10 @@ define <8 x i16> @absd_int16_uge(<8 x i16>, <8 x i16>) { %6 = select <8 x i1> %3, <8 x i16> %4, <8 x i16> %5 ret <8 x i16> %6 ; CHECK-LABEL: absd_int16_uge -; CHECK: vcmpgtuh -; CHECK: xxsel -; CHECK: blr +; CHECK-NOT: vcmpgtuh +; CHECK-NOT: xxsel +; CHECK: vabsduh v2, v2, v3 +; CHECK-NEXT: blr ; CHECK-PWR8-LABEL: absd_int16_uge ; CHECK-PWR8: vcmpgtuh ; CHECK-PWR8: xxsel @@ -622,9 +628,10 @@ define <8 x i16> @absd_int16_ult(<8 x i16>, <8 x i16>) { %6 = select <8 x i1> %3, <8 x i16> %5, <8 x i16> %4 ret <8 x i16> %6 ; CHECK-LABEL: absd_int16_ult -; CHECK: vcmpgtuh -; CHECK: xxsel -; CHECK: blr +; CHECK-NOT: vcmpgtuh +; CHECK-NOT: xxsel +; CHECK: vabsduh v2, v2, v3 +; CHECK-NEXT: blr ; CHECK-PWR8-LABEL: absd_int16_ult ; CHECK-PWR8: vcmpgtuh ; CHECK-PWR8: xxsel @@ -638,9 +645,10 @@ define <8 x i16> @absd_int16_ule(<8 x i16>, <8 x i16>) { %6 = select <8 x i1> %3, <8 x i16> %5, <8 x i16> %4 ret <8 x i16> %6 ; CHECK-LABEL: absd_int16_ule -; CHECK: vcmpgtuh -; CHECK: xxsel -; CHECK: blr +; CHECK-NOT: vcmpgtuh +; CHECK-NOT: xxsel +; CHECK: vabsduh v2, v2, v3 +; CHECK-NEXT: blr ; CHECK-PWR8-LABEL: absd_int16_ule ; CHECK-PWR8: vcmpgtuh ; CHECK-PWR8: xxsel @@ -654,9 +662,10 @@ define <16 x i8> @absd_int8_ugt(<16 x i8>, <16 x i8>) { %6 = select <16 x i1> %3, <16 x i8> %4, <16 x i8> %5 ret <16 x i8> %6 ; CHECK-LABEL: absd_int8_ugt -; CHECK: vcmpgtub -; CHECK: xxsel -; CHECK: blr +; CHECK-NOT: vcmpgtub +; CHECK-NOT: xxsel +; CHECK: vabsdub v2, v2, v3 +; CHECK-NEXT: blr ; CHECK-PWR8-LABEL: absd_int8_ugt ; CHECK-PWR8: vcmpgtub ; CHECK-PWR8: xxsel @@ -670,9 +679,10 @@ define <16 x i8> @absd_int8_uge(<16 x i8>, <16 x i8>) { %6 = select <16 x i1> %3, <16 x i8> %4, <16 x i8> %5 ret <16 x i8> %6 ; CHECK-LABEL: absd_int8_uge -; CHECK: vcmpgtub -; CHECK: xxsel -; CHECK: blr +; CHECK-NOT: vcmpgtub +; CHECK-NOT: xxsel +; CHECK: vabsdub v2, v2, v3 +; CHECK-NEXT: blr ; CHECK-PWR8-LABEL: absd_int8_uge ; CHECK-PWR8: vcmpgtub ; CHECK-PWR8: xxsel @@ -686,9 +696,10 @@ define <16 x i8> @absd_int8_ult(<16 x i8>, <16 x i8>) { %6 = select <16 x i1> %3, <16 x i8> %5, <16 x i8> %4 ret <16 x i8> %6 ; CHECK-LABEL: absd_int8_ult -; CHECK: vcmpgtub -; CHECK: xxsel -; CHECK: blr +; CHECK-NOT: vcmpgtub +; CHECK-NOT: xxsel +; CHECK: vabsdub v2, v2, v3 +; CHECK-NEXT: blr ; CHECK-PWR8-LABEL: absd_int8_ult ; CHECK-PWR8: vcmpgtub ; CHECK-PWR8: xxsel @@ -702,15 +713,52 @@ define <16 x i8> @absd_int8_ule(<16 x i8>, <16 x i8>) { %6 = select <16 x i1> %3, <16 x i8> %5, <16 x i8> %4 ret <16 x i8> %6 ; CHECK-LABEL: absd_int8_ule -; CHECK: vcmpgtub -; CHECK: xxsel -; CHECK: blr +; CHECK-NOT: vcmpgtub +; CHECK-NOT: xxsel +; CHECK: vabsdub v2, v2, v3 +; CHECK-NEXT: blr ; CHECK-PWR8-LABEL: absd_int8_ule ; CHECK-PWR8: vcmpgtub ; CHECK-PWR8: xxsel ; CHECK-PWR8: blr } +; some cases we are unable to optimize +; check whether goes beyond the scope +define <4 x i32> @absd_int32_ugt_opp(<4 x i32>, <4 x i32>) { + %3 = icmp ugt <4 x i32> %0, %1 + %4 = sub <4 x i32> %0, %1 + %5 = sub <4 x i32> %1, %0 + %6 = select <4 x i1> %3, <4 x i32> %5, <4 x i32> %4 + ret <4 x i32> %6 +; CHECK-LABEL: absd_int32_ugt_opp +; CHECK-NOT: vabsduw +; CHECK: vcmpgtuw +; CHECK: xxsel +; CHECK: blr +; CHECK-PWR8-LABEL: absd_int32_ugt_opp +; CHECK-PWR8: vcmpgtuw +; CHECK-PWR8: xxsel +; CHECK-PWR8: blr +} + +define <2 x i64> @absd_int64_ugt(<2 x i64>, <2 x i64>) { + %3 = icmp ugt <2 x i64> %0, %1 + %4 = sub <2 x i64> %0, %1 + %5 = sub <2 x i64> %1, %0 + %6 = select <2 x i1> %3, <2 x i64> %4, <2 x i64> %5 + ret <2 x i64> %6 +; CHECK-LABEL: absd_int64_ugt +; CHECK-NOT: vabsduw +; CHECK: vcmpgtud +; CHECK: xxsel +; CHECK: blr +; CHECK-PWR8-LABEL: absd_int64_ugt +; CHECK-PWR8: vcmpgtud +; CHECK-PWR8: xxsel +; CHECK-PWR8: blr +} + declare <4 x i32> @llvm.ppc.altivec.vmaxsw(<4 x i32>, <4 x i32>) declare <8 x i16> @llvm.ppc.altivec.vmaxsh(<8 x i16>, <8 x i16>) -- 2.11.0