[AArch64] Improve add/sub/cmp isel of uxtw forms.

author Geoff Berry <gberry@codeaurora.org>

Mon, 26 Sep 2016 15:34:47 +0000 (15:34 +0000)

committer Geoff Berry <gberry@codeaurora.org>

Mon, 26 Sep 2016 15:34:47 +0000 (15:34 +0000)
author Geoff Berry <gberry@codeaurora.org>
Mon, 26 Sep 2016 15:34:47 +0000 (15:34 +0000)
committer Geoff Berry <gberry@codeaurora.org>
Mon, 26 Sep 2016 15:34:47 +0000 (15:34 +0000)
diff --git a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp

index 6b7d517..6fa0782 100644 (file)
--- a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -586,6 +586,11 @@ bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg,
        return false;
  
      Reg = N.getOperand(0);
+
+    // Don't match if free 32-bit -> 64-bit zext can be used instead.
+    if (Ext == AArch64_AM::UXTW &&
+        Reg->getValueType(0).getSizeInBits() == 32 && isDef32(*Reg.getNode()))
+      return false;
    }
  
    // AArch64 mandates that the RHS of the operation must use the smallest
diff --git a/lib/Target/AArch64/AArch64ISelLowering.h b/lib/Target/AArch64/AArch64ISelLowering.h

index 86f1d97..882ed19 100644 (file)
--- a/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/lib/Target/AArch64/AArch64ISelLowering.h
@@ -215,6 +215,21 @@ enum NodeType : unsigned {
  
  } // end namespace AArch64ISD
  
+namespace {
+
+// Any instruction that defines a 32-bit result zeros out the high half of the
+// register. Truncate can be lowered to EXTRACT_SUBREG. CopyFromReg may
+// be copying from a truncate. But any other 32-bit operation will zero-extend
+// up to 64 bits.
+// FIXME: X86 also checks for CMOV here. Do we need something similar?
+static inline bool isDef32(const SDNode &N) {
+  unsigned Opc = N.getOpcode();
+  return Opc != ISD::TRUNCATE && Opc != TargetOpcode::EXTRACT_SUBREG &&
+         Opc != ISD::CopyFromReg;
+}
+
+} // end anonymous namespace
+
  class AArch64Subtarget;
  class AArch64TargetMachine;
  
diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td

index e58ad27..dad097e 100644 (file)
--- a/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/lib/Target/AArch64/AArch64InstrInfo.td
@@ -5272,15 +5272,8 @@ def SHA256SU0rr : SHATiedInstVV<0b0010, "sha256su0",int_aarch64_crypto_sha256su0
  //----------------------------------------------------------------------------
  // FIXME: Like for X86, these should go in their own separate .td file.
  
-// Any instruction that defines a 32-bit result leaves the high half of the
-// register. Truncate can be lowered to EXTRACT_SUBREG. CopyFromReg may
-// be copying from a truncate. But any other 32-bit operation will zero-extend
-// up to 64 bits.
-// FIXME: X86 also checks for CMOV here. Do we need something similar?
  def def32 : PatLeaf<(i32 GPR32:$src), [{
-  return N->getOpcode() != ISD::TRUNCATE &&
-         N->getOpcode() != TargetOpcode::EXTRACT_SUBREG &&
-         N->getOpcode() != ISD::CopyFromReg;
+  return isDef32(*N);
  }]>;
  
  // In the case of a 32-bit def that is known to implicitly zero-extend,
diff --git a/test/CodeGen/AArch64/addsub_ext.ll b/test/CodeGen/AArch64/addsub_ext.ll

index cfe0dfc..df1b9fe 100644 (file)
--- a/test/CodeGen/AArch64/addsub_ext.ll
+++ b/test/CodeGen/AArch64/addsub_ext.ll
@@ -274,19 +274,20 @@ define void @sub_i16rhs() minsize {
  ; N.b. we could probably check more here ("add w2, w3, w1, uxtw" for
  ; example), but the remaining instructions are probably not idiomatic
  ; in the face of "add/sub (shifted register)" so I don't intend to.
-define void @addsub_i32rhs() minsize {
+define void @addsub_i32rhs(i32 %in32) minsize {
  ; CHECK-LABEL: addsub_i32rhs:
      %val32_tmp = load i32, i32* @var32
      %lhs64 = load i64, i64* @var64
  
      %val32 = add i32 %val32_tmp, 123
  
-    %rhs64_zext = zext i32 %val32 to i64
+    %rhs64_zext = zext i32 %in32 to i64
      %res64_zext = add i64 %lhs64, %rhs64_zext
      store volatile i64 %res64_zext, i64* @var64
  ; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtw
  
-    %rhs64_zext_shift = shl i64 %rhs64_zext, 2
+    %rhs64_zext2 = zext i32 %val32 to i64
+    %rhs64_zext_shift = shl i64 %rhs64_zext2, 2
      %res64_zext_shift = add i64 %lhs64, %rhs64_zext_shift
      store volatile i64 %res64_zext_shift, i64* @var64
  ; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtw #2
@@ -304,19 +305,20 @@ define void @addsub_i32rhs() minsize {
      ret void
  }
  
-define void @sub_i32rhs() minsize {
+define void @sub_i32rhs(i32 %in32) minsize {
  ; CHECK-LABEL: sub_i32rhs:
      %val32_tmp = load i32, i32* @var32
      %lhs64 = load i64, i64* @var64
  
      %val32 = add i32 %val32_tmp, 123
  
-    %rhs64_zext = zext i32 %val32 to i64
+    %rhs64_zext = zext i32 %in32 to i64
      %res64_zext = sub i64 %lhs64, %rhs64_zext
      store volatile i64 %res64_zext, i64* @var64
  ; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtw
  
-    %rhs64_zext_shift = shl i64 %rhs64_zext, 2
+    %rhs64_zext2 = zext i32 %val32 to i64
+    %rhs64_zext_shift = shl i64 %rhs64_zext2, 2
      %res64_zext_shift = sub i64 %lhs64, %rhs64_zext_shift
      store volatile i64 %res64_zext_shift, i64* @var64
  ; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtw #2
@@ -333,3 +335,98 @@ define void @sub_i32rhs() minsize {
  
      ret void
  }
+
+; Check that implicit zext from w reg write is used instead of uxtw form of add.
+define i64 @add_fold_uxtw(i32 %x, i64 %y) {
+; CHECK-LABEL: add_fold_uxtw:
+entry:
+; CHECK: and w[[TMP:[0-9]+]], w0, #0x3
+  %m = and i32 %x, 3
+  %ext = zext i32 %m to i64
+; CHECK-NEXT: add x0, x1, x[[TMP]]
+  %ret = add i64 %y, %ext
+  ret i64 %ret
+}
+
+; Check that implicit zext from w reg write is used instead of uxtw
+; form of sub and that mov WZR is folded to form a neg instruction.
+define i64 @sub_fold_uxtw_xzr(i32 %x)  {
+; CHECK-LABEL: sub_fold_uxtw_xzr:
+entry:
+; CHECK: and w[[TMP:[0-9]+]], w0, #0x3
+  %m = and i32 %x, 3
+  %ext = zext i32 %m to i64
+; CHECK-NEXT: neg x0, x[[TMP]]
+  %ret = sub i64 0, %ext
+  ret i64 %ret
+}
+
+; Check that implicit zext from w reg write is used instead of uxtw form of subs/cmp.
+define i1 @cmp_fold_uxtw(i32 %x, i64 %y) {
+; CHECK-LABEL: cmp_fold_uxtw:
+entry:
+; CHECK: and w[[TMP:[0-9]+]], w0, #0x3
+  %m = and i32 %x, 3
+  %ext = zext i32 %m to i64
+; CHECK-NEXT: cmp x1, x[[TMP]]
+; CHECK-NEXT: cset
+  %ret = icmp eq i64 %y, %ext
+  ret i1 %ret
+}
+
+; Check that implicit zext from w reg write is used instead of uxtw
+; form of add, leading to madd selection.
+define i64 @madd_fold_uxtw(i32 %x, i64 %y) {
+; CHECK-LABEL: madd_fold_uxtw:
+entry:
+; CHECK: and w[[TMP:[0-9]+]], w0, #0x3
+  %m = and i32 %x, 3
+  %ext = zext i32 %m to i64
+; CHECK-NEXT: madd x0, x1, x1, x[[TMP]]
+  %mul = mul i64 %y, %y
+  %ret = add i64 %mul, %ext
+  ret i64 %ret
+}
+
+; Check that implicit zext from w reg write is used instead of uxtw
+; form of sub, leading to sub/cmp folding.
+; Check that implicit zext from w reg write is used instead of uxtw form of subs/cmp.
+define i1 @cmp_sub_fold_uxtw(i32 %x, i64 %y, i64 %z) {
+; CHECK-LABEL: cmp_sub_fold_uxtw:
+entry:
+; CHECK: and w[[TMP:[0-9]+]], w0, #0x3
+  %m = and i32 %x, 3
+  %ext = zext i32 %m to i64
+; CHECK-NEXT: cmp x[[TMP2:[0-9]+]], x[[TMP]]
+; CHECK-NEXT: cset
+  %sub = sub i64 %z, %ext
+  %ret = icmp eq i64 %sub, 0
+  ret i1 %ret
+}
+
+; Check that implicit zext from w reg write is used instead of uxtw
+; form of add and add of -1 gets selected as sub.
+define i64 @add_imm_fold_uxtw(i32 %x) {
+; CHECK-LABEL: add_imm_fold_uxtw:
+entry:
+; CHECK: and w[[TMP:[0-9]+]], w0, #0x3
+  %m = and i32 %x, 3
+  %ext = zext i32 %m to i64
+; CHECK-NEXT: sub x0, x[[TMP]], #1
+  %ret = add i64 %ext, -1
+  ret i64 %ret
+}
+
+; Check that implicit zext from w reg write is used instead of uxtw
+; form of add and add lsl form gets selected.
+define i64 @add_lsl_fold_uxtw(i32 %x, i64 %y) {
+; CHECK-LABEL: add_lsl_fold_uxtw:
+entry:
+; CHECK: orr w[[TMP:[0-9]+]], w0, #0x3
+  %m = or i32 %x, 3
+  %ext = zext i32 %m to i64
+  %shift = shl i64 %y, 3
+; CHECK-NEXT: add x0, x[[TMP]], x1, lsl #3
+  %ret = add i64 %ext, %shift
+  ret i64 %ret
+}
author	Geoff Berry <gberry@codeaurora.org>
	Mon, 26 Sep 2016 15:34:47 +0000 (15:34 +0000)
committer	Geoff Berry <gberry@codeaurora.org>
	Mon, 26 Sep 2016 15:34:47 +0000 (15:34 +0000)
lib/Target/AArch64/AArch64ISelDAGToDAG.cpp		patch \| blob \| history
lib/Target/AArch64/AArch64ISelLowering.h		patch \| blob \| history
lib/Target/AArch64/AArch64InstrInfo.td		patch \| blob \| history
test/CodeGen/AArch64/addsub_ext.ll		patch \| blob \| history