if (Subtarget.hasP9Altivec()) {
setTargetDAGCombine(ISD::ABS);
+ setTargetDAGCombine(ISD::VSELECT);
}
// Darwin long double math library functions have $LDBL128 appended.
return DAGCombineBuildVector(N, DCI);
case ISD::ABS:
return combineABS(N, DCI);
+ case ISD::VSELECT:
+ return combineVSelect(N, DCI);
}
return SDValue();
return SDValue();
}
+// For type v4i32/v8ii16/v16i8, transform
+// from (vselect (setcc a, b, setugt), (sub a, b), (sub b, a)) to (vabsd a, b)
+// from (vselect (setcc a, b, setuge), (sub a, b), (sub b, a)) to (vabsd a, b)
+// from (vselect (setcc a, b, setult), (sub b, a), (sub a, b)) to (vabsd a, b)
+// from (vselect (setcc a, b, setule), (sub b, a), (sub a, b)) to (vabsd a, b)
+SDValue PPCTargetLowering::combineVSelect(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ assert((N->getOpcode() == ISD::VSELECT) && "Need VSELECT node here");
+ assert(Subtarget.hasP9Altivec() &&
+ "Only combine this when P9 altivec supported!");
+
+ SelectionDAG &DAG = DCI.DAG;
+ SDLoc dl(N);
+ SDValue Cond = N->getOperand(0);
+ SDValue TrueOpnd = N->getOperand(1);
+ SDValue FalseOpnd = N->getOperand(2);
+ EVT VT = N->getOperand(1).getValueType();
+
+ if (Cond.getOpcode() != ISD::SETCC || TrueOpnd.getOpcode() != ISD::SUB ||
+ FalseOpnd.getOpcode() != ISD::SUB)
+ return SDValue();
+
+ // ABSD only available for type v4i32/v8i16/v16i8
+ if (VT != MVT::v4i32 && VT != MVT::v8i16 && VT != MVT::v16i8)
+ return SDValue();
+
+ // At least to save one more dependent computation
+ if (!(Cond.hasOneUse() || TrueOpnd.hasOneUse() || FalseOpnd.hasOneUse()))
+ return SDValue();
+
+ ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
+
+ // Can only handle unsigned comparison here
+ switch (CC) {
+ default:
+ return SDValue();
+ case ISD::SETUGT:
+ case ISD::SETUGE:
+ break;
+ case ISD::SETULT:
+ case ISD::SETULE:
+ std::swap(TrueOpnd, FalseOpnd);
+ break;
+ }
+
+ SDValue CmpOpnd1 = Cond.getOperand(0);
+ SDValue CmpOpnd2 = Cond.getOperand(1);
+
+ // SETCC CmpOpnd1 CmpOpnd2 cond
+ // TrueOpnd = CmpOpnd1 - CmpOpnd2
+ // FalseOpnd = CmpOpnd2 - CmpOpnd1
+ if (TrueOpnd.getOperand(0) == CmpOpnd1 &&
+ TrueOpnd.getOperand(1) == CmpOpnd2 &&
+ FalseOpnd.getOperand(0) == CmpOpnd2 &&
+ FalseOpnd.getOperand(1) == CmpOpnd1) {
+ return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(1).getValueType(),
+ CmpOpnd1, CmpOpnd2,
+ DAG.getTargetConstant(0, dl, MVT::i32));
+ }
+
+ return SDValue();
+}
%6 = select <4 x i1> %3, <4 x i32> %4, <4 x i32> %5
ret <4 x i32> %6
; CHECK-LABEL: absd_int32_ugt
-; CHECK: vcmpgtuw
-; CHECK: xxsel
-; CHECK: blr
+; CHECK-NOT: vcmpgtuw
+; CHECK-NOT: xxsel
+; CHECK: vabsduw v2, v2, v3
+; CHECK-NEXT: blr
; CHECK-PWR8-LABEL: absd_int32_ugt
; CHECK-PWR8: vcmpgtuw
; CHECK-PWR8: xxsel
%6 = select <4 x i1> %3, <4 x i32> %4, <4 x i32> %5
ret <4 x i32> %6
; CHECK-LABEL: absd_int32_uge
-; CHECK: vcmpgtuw
-; CHECK: xxsel
-; CHECK: blr
+; CHECK-NOT: vcmpgtuw
+; CHECK-NOT: xxsel
+; CHECK: vabsduw v2, v2, v3
+; CHECK-NEXT: blr
; CHECK-PWR8-LABEL: absd_int32_uge
; CHECK-PWR8: vcmpgtuw
; CHECK-PWR8: xxsel
%6 = select <4 x i1> %3, <4 x i32> %5, <4 x i32> %4
ret <4 x i32> %6
; CHECK-LABEL: absd_int32_ult
-; CHECK: vcmpgtuw
-; CHECK: xxsel
-; CHECK: blr
+; CHECK-NOT: vcmpgtuw
+; CHECK-NOT: xxsel
+; CHECK: vabsduw v2, v2, v3
+; CHECK-NEXT: blr
; CHECK-PWR8-LABEL: absd_int32_ult
; CHECK-PWR8: vcmpgtuw
; CHECK-PWR8: xxsel
%6 = select <4 x i1> %3, <4 x i32> %5, <4 x i32> %4
ret <4 x i32> %6
; CHECK-LABEL: absd_int32_ule
-; CHECK: vcmpgtuw
-; CHECK: xxsel
-; CHECK: blr
+; CHECK-NOT: vcmpgtuw
+; CHECK-NOT: xxsel
+; CHECK: vabsduw v2, v2, v3
+; CHECK-NEXT: blr
; CHECK-PWR8-LABEL: absd_int32_ule
; CHECK-PWR8: vcmpgtuw
; CHECK-PWR8: xxsel
%6 = select <8 x i1> %3, <8 x i16> %4, <8 x i16> %5
ret <8 x i16> %6
; CHECK-LABEL: absd_int16_ugt
-; CHECK: vcmpgtuh
-; CHECK: xxsel
-; CHECK: blr
+; CHECK-NOT: vcmpgtuh
+; CHECK-NOT: xxsel
+; CHECK: vabsduh v2, v2, v3
+; CHECK-NEXT: blr
; CHECK-PWR8-LABEL: absd_int16_ugt
; CHECK-PWR8: vcmpgtuh
; CHECK-PWR8: xxsel
%6 = select <8 x i1> %3, <8 x i16> %4, <8 x i16> %5
ret <8 x i16> %6
; CHECK-LABEL: absd_int16_uge
-; CHECK: vcmpgtuh
-; CHECK: xxsel
-; CHECK: blr
+; CHECK-NOT: vcmpgtuh
+; CHECK-NOT: xxsel
+; CHECK: vabsduh v2, v2, v3
+; CHECK-NEXT: blr
; CHECK-PWR8-LABEL: absd_int16_uge
; CHECK-PWR8: vcmpgtuh
; CHECK-PWR8: xxsel
%6 = select <8 x i1> %3, <8 x i16> %5, <8 x i16> %4
ret <8 x i16> %6
; CHECK-LABEL: absd_int16_ult
-; CHECK: vcmpgtuh
-; CHECK: xxsel
-; CHECK: blr
+; CHECK-NOT: vcmpgtuh
+; CHECK-NOT: xxsel
+; CHECK: vabsduh v2, v2, v3
+; CHECK-NEXT: blr
; CHECK-PWR8-LABEL: absd_int16_ult
; CHECK-PWR8: vcmpgtuh
; CHECK-PWR8: xxsel
%6 = select <8 x i1> %3, <8 x i16> %5, <8 x i16> %4
ret <8 x i16> %6
; CHECK-LABEL: absd_int16_ule
-; CHECK: vcmpgtuh
-; CHECK: xxsel
-; CHECK: blr
+; CHECK-NOT: vcmpgtuh
+; CHECK-NOT: xxsel
+; CHECK: vabsduh v2, v2, v3
+; CHECK-NEXT: blr
; CHECK-PWR8-LABEL: absd_int16_ule
; CHECK-PWR8: vcmpgtuh
; CHECK-PWR8: xxsel
%6 = select <16 x i1> %3, <16 x i8> %4, <16 x i8> %5
ret <16 x i8> %6
; CHECK-LABEL: absd_int8_ugt
-; CHECK: vcmpgtub
-; CHECK: xxsel
-; CHECK: blr
+; CHECK-NOT: vcmpgtub
+; CHECK-NOT: xxsel
+; CHECK: vabsdub v2, v2, v3
+; CHECK-NEXT: blr
; CHECK-PWR8-LABEL: absd_int8_ugt
; CHECK-PWR8: vcmpgtub
; CHECK-PWR8: xxsel
%6 = select <16 x i1> %3, <16 x i8> %4, <16 x i8> %5
ret <16 x i8> %6
; CHECK-LABEL: absd_int8_uge
-; CHECK: vcmpgtub
-; CHECK: xxsel
-; CHECK: blr
+; CHECK-NOT: vcmpgtub
+; CHECK-NOT: xxsel
+; CHECK: vabsdub v2, v2, v3
+; CHECK-NEXT: blr
; CHECK-PWR8-LABEL: absd_int8_uge
; CHECK-PWR8: vcmpgtub
; CHECK-PWR8: xxsel
%6 = select <16 x i1> %3, <16 x i8> %5, <16 x i8> %4
ret <16 x i8> %6
; CHECK-LABEL: absd_int8_ult
-; CHECK: vcmpgtub
-; CHECK: xxsel
-; CHECK: blr
+; CHECK-NOT: vcmpgtub
+; CHECK-NOT: xxsel
+; CHECK: vabsdub v2, v2, v3
+; CHECK-NEXT: blr
; CHECK-PWR8-LABEL: absd_int8_ult
; CHECK-PWR8: vcmpgtub
; CHECK-PWR8: xxsel
%6 = select <16 x i1> %3, <16 x i8> %5, <16 x i8> %4
ret <16 x i8> %6
; CHECK-LABEL: absd_int8_ule
-; CHECK: vcmpgtub
-; CHECK: xxsel
-; CHECK: blr
+; CHECK-NOT: vcmpgtub
+; CHECK-NOT: xxsel
+; CHECK: vabsdub v2, v2, v3
+; CHECK-NEXT: blr
; CHECK-PWR8-LABEL: absd_int8_ule
; CHECK-PWR8: vcmpgtub
; CHECK-PWR8: xxsel
; CHECK-PWR8: blr
}
+; some cases we are unable to optimize
+; check whether goes beyond the scope
+define <4 x i32> @absd_int32_ugt_opp(<4 x i32>, <4 x i32>) {
+ %3 = icmp ugt <4 x i32> %0, %1
+ %4 = sub <4 x i32> %0, %1
+ %5 = sub <4 x i32> %1, %0
+ %6 = select <4 x i1> %3, <4 x i32> %5, <4 x i32> %4
+ ret <4 x i32> %6
+; CHECK-LABEL: absd_int32_ugt_opp
+; CHECK-NOT: vabsduw
+; CHECK: vcmpgtuw
+; CHECK: xxsel
+; CHECK: blr
+; CHECK-PWR8-LABEL: absd_int32_ugt_opp
+; CHECK-PWR8: vcmpgtuw
+; CHECK-PWR8: xxsel
+; CHECK-PWR8: blr
+}
+
+define <2 x i64> @absd_int64_ugt(<2 x i64>, <2 x i64>) {
+ %3 = icmp ugt <2 x i64> %0, %1
+ %4 = sub <2 x i64> %0, %1
+ %5 = sub <2 x i64> %1, %0
+ %6 = select <2 x i1> %3, <2 x i64> %4, <2 x i64> %5
+ ret <2 x i64> %6
+; CHECK-LABEL: absd_int64_ugt
+; CHECK-NOT: vabsduw
+; CHECK: vcmpgtud
+; CHECK: xxsel
+; CHECK: blr
+; CHECK-PWR8-LABEL: absd_int64_ugt
+; CHECK-PWR8: vcmpgtud
+; CHECK-PWR8: xxsel
+; CHECK-PWR8: blr
+}
+
declare <4 x i32> @llvm.ppc.altivec.vmaxsw(<4 x i32>, <4 x i32>)
declare <8 x i16> @llvm.ppc.altivec.vmaxsh(<8 x i16>, <8 x i16>)