AArch64: Implement missed conditional compare sequences.

author Balaram Makam <bmakam@codeaurora.org>

Mon, 1 Feb 2016 19:13:07 +0000 (19:13 +0000)

committer Balaram Makam <bmakam@codeaurora.org>

Mon, 1 Feb 2016 19:13:07 +0000 (19:13 +0000)
author Balaram Makam <bmakam@codeaurora.org>
Mon, 1 Feb 2016 19:13:07 +0000 (19:13 +0000)
committer Balaram Makam <bmakam@codeaurora.org>
Mon, 1 Feb 2016 19:13:07 +0000 (19:13 +0000)
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

index 6323a4b..be306c0 100644 (file)
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -1721,7 +1721,7 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
      return SDValue(N, 0);
  
    // fold (a+b) -> (a|b) iff a and b share no bits.
-  if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
+  if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::OR, VT)) &&
        VT.isInteger() && !VT.isVector() && DAG.haveNoCommonBitsSet(N0, N1))
      return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N1);
  
@@ -6363,7 +6363,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
        isa<LoadSDNode>(N0.getOperand(0)) &&
        N0.getOperand(1).getOpcode() == ISD::Constant &&
        TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, N0.getValueType()) &&
-      (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
+      (!LegalOperations && TLI.isOperationLegalOrCustom(N0.getOpcode(), VT))) {
      LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0));
      if (LN0->getExtensionType() != ISD::SEXTLOAD && LN0->isUnindexed()) {
        bool DoXform = true;
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp

index 644aa3d..2be5f3c 100644 (file)
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -144,6 +144,16 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
    setOperationAction(ISD::XOR, MVT::i32, Custom);
    setOperationAction(ISD::XOR, MVT::i64, Custom);
  
+  // Custom lowering hooks are needed for OR
+  // to fold it into CCMP.
+  setOperationAction(ISD::OR, MVT::i32, Custom);
+  setOperationAction(ISD::OR, MVT::i64, Custom);
+
+  // Custom lowering hooks are needed for AND
+  // to fold it into CCMP.
+  setOperationAction(ISD::AND, MVT::i32, Custom);
+  setOperationAction(ISD::AND, MVT::i64, Custom);
+
    // Virtually no operation on f128 is legal, but LLVM can't expand them when
    // there's a valid register class, so we need custom operations in most cases.
    setOperationAction(ISD::FABS, MVT::f128, Expand);
@@ -1597,6 +1607,27 @@ static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
    return Cmp;
  }
  
+// Attempt to form conditional compare sequences for and/or trees
+// with setcc leafs.
+static SDValue tryLowerToAArch64Cmp(SDValue Op, SelectionDAG &DAG) {
+  SDValue LHS = Op.getOperand(0);
+  SDValue RHS = Op.getOperand(1);
+  if ((LHS.getOpcode() != ISD::SETCC) || (RHS.getOpcode() != ISD::SETCC))
+    return Op;
+
+  bool CanNegate;
+  if (!isConjunctionDisjunctionTree(Op, CanNegate))
+    return SDValue();
+
+  EVT VT = Op.getValueType();
+  SDLoc DL(Op);
+  SDValue TVal = DAG.getConstant(1, DL, VT);
+  SDValue FVal = DAG.getConstant(0, DL, VT);
+  SDValue CCVal;
+  SDValue Cmp = getAArch64Cmp(Op, FVal, ISD::SETEQ, CCVal, DAG, DL);
+  return DAG.getNode(AArch64ISD::CSEL, DL, VT, FVal, TVal, CCVal, Cmp);
+}
+
  static std::pair<SDValue, SDValue>
  getAArch64XALUOOp(AArch64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) {
    assert((Op.getValueType() == MVT::i32 || Op.getValueType() == MVT::i64) &&
@@ -1718,6 +1749,18 @@ SDValue AArch64TargetLowering::LowerF128Call(SDValue Op, SelectionDAG &DAG,
    return makeLibCall(DAG, Call, MVT::f128, Ops, false, SDLoc(Op)).first;
  }
  
+SDValue AArch64TargetLowering::LowerAND(SDValue Op, SelectionDAG &DAG) const {
+  if (Op.getValueType().isVector())
+    return LowerVectorAND(Op, DAG);
+  return tryLowerToAArch64Cmp(Op, DAG);
+}
+
+SDValue AArch64TargetLowering::LowerOR(SDValue Op, SelectionDAG &DAG) const {
+  if (Op.getValueType().isVector())
+    return LowerVectorOR(Op, DAG);
+  return tryLowerToAArch64Cmp(Op, DAG);
+}
+
  static SDValue LowerXOR(SDValue Op, SelectionDAG &DAG) {
    SDValue Sel = Op.getOperand(0);
    SDValue Other = Op.getOperand(1);
@@ -2372,9 +2415,9 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
    case ISD::FCOPYSIGN:
      return LowerFCOPYSIGN(Op, DAG);
    case ISD::AND:
-    return LowerVectorAND(Op, DAG);
+    return LowerAND(Op, DAG);
    case ISD::OR:
-    return LowerVectorOR(Op, DAG);
+    return LowerOR(Op, DAG);
    case ISD::XOR:
      return LowerXOR(Op, DAG);
    case ISD::PREFETCH:
diff --git a/lib/Target/AArch64/AArch64ISelLowering.h b/lib/Target/AArch64/AArch64ISelLowering.h

index e99616c..20c468e 100644 (file)
--- a/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/lib/Target/AArch64/AArch64ISelLowering.h
@@ -488,6 +488,8 @@ private:
    SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) const;
    SDValue LowerF128Call(SDValue Op, SelectionDAG &DAG,
                          RTLIB::Libcall Call) const;
+  SDValue LowerAND(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerOR(SDValue Op, SelectionDAG &DAG) const;
    SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
    SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
    SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
diff --git a/test/CodeGen/AArch64/arm64-ccmp.ll b/test/CodeGen/AArch64/arm64-ccmp.ll

index 767df74..c6062d3 100644 (file)
--- a/test/CodeGen/AArch64/arm64-ccmp.ll
+++ b/test/CodeGen/AArch64/arm64-ccmp.ll
@@ -371,21 +371,76 @@ define i32 @select_andor(i32 %v1, i32 %v2, i32 %v3) {
    ret i32 %sel
  }
  
-; CHECK-LABEL: select_noccmp1
-define i64 @select_noccmp1(i64 %v1, i64 %v2, i64 %v3, i64 %r) {
-; CHECK: cmp x0, #0
-; CHECK-NEXT: cset [[REG0:w[0-9]+]], lt
-; CHECK-NEXT: cmp x0, #13
-; CHECK-NOT: ccmp
+; CHECK-LABEL: single_noselect
+define i32 @single_noselect(i32 %A, i32 %B) #0 {
+; CHECK: cmp w1, #1
+; CHECK-NEXT: ccmp w0, #1, #8, ge
+; CHECK-NEXT: cset w0, lt
+; CHECK-NEXT: ret
+  %notlhs = icmp slt i32 %A, 1
+  %notrhs = icmp slt i32 %B, 1
+  %lnot = or i1 %notlhs, %notrhs
+  %conv = zext i1 %lnot to i32
+  ret i32 %conv
+}
+
+; CHECK-LABEL: single_and_ext
+define i32 @single_and_ext(i32 %A, i32 %B, i32 %C) #0 {
+; CHECK: cmp w1, #2
+; CHECK-NEXT: ccmp w0, #4, #0, lt
+; CHECK-NEXT: cinc w0, w2, lt
+; CHECK-NEXT: ret
+  %cmp = icmp slt i32 %A, 4
+  %cmp1 = icmp slt i32 %B, 2
+  %and1 = and i1 %cmp, %cmp1
+  %conv = zext i1 %and1 to i32
+  %add = add nsw i32 %conv, %C
+  ret i32 %add
+}
+
+; CHECK-LABEL: single_noselect_phi
+define i32 @single_noselect_phi(i32 %A, i32 %B, i32 %C) #0 {
+; CHECK: cmp w1, #0
+; CHECK-NEXT: ccmp w0, #0, #4, gt
  ; CHECK-NEXT: cset [[REG1:w[0-9]+]], gt
-; CHECK-NEXT: cmp x2, #2
+; CHECK-NEXT: cmp w1, #2
+; CHECK-NEXT: ccmp w0, #4, #8, ge
  ; CHECK-NEXT: cset [[REG2:w[0-9]+]], lt
+; CHECK-NEXT: cmp w2, #0
+; CHECK-NEXT: csel w0, [[REG1]], [[REG2]], eq
+; CHECK-NEXT: ret
+entry:
+  %tobool = icmp eq i32 %C, 0
+  br i1 %tobool, label %if.else, label %if.then
+
+if.then:                                          ; preds = %entry
+  %cmp = icmp slt i32 %A, 4
+  %cmp1 = icmp slt i32 %B, 2
+  %0 = or i1 %cmp, %cmp1
+  br label %if.end
+
+if.else:                                          ; preds = %entry
+  %cmp2 = icmp sgt i32 %A, 0
+  %cmp3 = icmp sgt i32 %B, 0
+  %1 = and i1 %cmp2, %cmp3
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %if.then
+  %b.0.in = phi i1 [ %0, %if.then ], [ %1, %if.else ]
+  %conv = zext i1 %b.0.in to i32
+  ret i32 %conv
+}
+
+; CHECK-LABEL: select_noccmp1
+define i64 @select_noccmp1(i64 %v1, i64 %v2, i64 %v3, i64 %r) {
+; CHECK: cmp x0, #13
+; CHECK-NEXT: ccmp x0, #0, #0, gt
+; CHECK-NEXT: cset [[REG1:w[0-9]+]], lt
  ; CHECK-NEXT: cmp x2, #4
-; CHECK-NEXT: cset [[REG3:w[0-9]+]], gt
-; CHECK-NEXT: and [[REG4:w[0-9]+]], [[REG0]], [[REG1]]
-; CHECK-NEXT: and [[REG5:w[0-9]+]], [[REG2]], [[REG3]]
-; CHECK-NEXT: orr [[REG6:w[0-9]+]], [[REG4]], [[REG5]]
-; CHECK-NEXT: cmp [[REG6]], #0
+; CHECK-NEXT: ccmp x2, #2, #0, gt
+; CHECK-NEXT: cset [[REG2:w[0-9]+]], lt
+; CHECK-NEXT: orr [[REG3:w[0-9]+]], [[REG1]], [[REG2]]
+; CHECK-NEXT: cmp [[REG3]], #0
  ; CHECK-NEXT: csel x0, xzr, x3, ne
  ; CHECK-NEXT: ret
    %c0 = icmp slt i64 %v1, 0
author	Balaram Makam <bmakam@codeaurora.org>
	Mon, 1 Feb 2016 19:13:07 +0000 (19:13 +0000)
committer	Balaram Makam <bmakam@codeaurora.org>
	Mon, 1 Feb 2016 19:13:07 +0000 (19:13 +0000)
lib/CodeGen/SelectionDAG/DAGCombiner.cpp		patch \| blob \| history
lib/Target/AArch64/AArch64ISelLowering.cpp		patch \| blob \| history
lib/Target/AArch64/AArch64ISelLowering.h		patch \| blob \| history
test/CodeGen/AArch64/arm64-ccmp.ll		patch \| blob \| history