[AArch64] Improve v8.1-A code-gen for atomic load-subtract

author Oliver Stannard <oliver.stannard@arm.com>

Mon, 12 Feb 2018 14:22:03 +0000 (14:22 +0000)

committer Oliver Stannard <oliver.stannard@arm.com>

Mon, 12 Feb 2018 14:22:03 +0000 (14:22 +0000)
author Oliver Stannard <oliver.stannard@arm.com>
Mon, 12 Feb 2018 14:22:03 +0000 (14:22 +0000)
committer Oliver Stannard <oliver.stannard@arm.com>
Mon, 12 Feb 2018 14:22:03 +0000 (14:22 +0000)
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp

index 0734186..93ffe31 100644 (file)
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -464,6 +464,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
    setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
  
    setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i128, Custom);
+  setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Custom);
+  setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Custom);
  
    // Lower READCYCLECOUNTER using an mrs from PMCCNTR_EL0.
    // This requires the Performance Monitors extension.
@@ -2679,6 +2681,8 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
    case ISD::VECREDUCE_FMAX:
    case ISD::VECREDUCE_FMIN:
      return LowerVECREDUCE(Op, DAG);
+  case ISD::ATOMIC_LOAD_SUB:
+    return LowerATOMIC_LOAD_SUB(Op, DAG);
    }
  }
  
@@ -7373,6 +7377,23 @@ SDValue AArch64TargetLowering::LowerVECREDUCE(SDValue Op,
    }
  }
  
+SDValue AArch64TargetLowering::LowerATOMIC_LOAD_SUB(SDValue Op,
+                                                    SelectionDAG &DAG) const {
+  auto &Subtarget = static_cast<const AArch64Subtarget &>(DAG.getSubtarget());
+  if (!Subtarget.hasLSE())
+    return SDValue();
+
+  // LSE has an atomic load-add instruction, but not a load-sub.
+  SDLoc dl(Op);
+  MVT VT = Op.getSimpleValueType();
+  SDValue RHS = Op.getOperand(2);
+  AtomicSDNode *AN = cast<AtomicSDNode>(Op.getNode());
+  RHS = DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(0, dl, VT), RHS);
+  return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, dl, AN->getMemoryVT(),
+                       Op.getOperand(0), Op.getOperand(1), RHS,
+                       AN->getMemOperand());
+}
+
  /// getTgtMemIntrinsic - Represent NEON load and store intrinsics as
  /// MemIntrinsicNodes.  The associated MachineMemOperands record the alignment
  /// specified in the intrinsic calls.
diff --git a/lib/Target/AArch64/AArch64ISelLowering.h b/lib/Target/AArch64/AArch64ISelLowering.h

index 483f2b4..9160678 100644 (file)
--- a/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/lib/Target/AArch64/AArch64ISelLowering.h
@@ -596,6 +596,7 @@ private:
    SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
    SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const;
    SDValue LowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerATOMIC_LOAD_SUB(SDValue Op, SelectionDAG &DAG) const;
  
    SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
                          std::vector<SDNode *> *Created) const override;
diff --git a/test/CodeGen/AArch64/atomic-ops-lse.ll b/test/CodeGen/AArch64/atomic-ops-lse.ll

index 1ec9673..0b945fb 100644 (file)
--- a/test/CodeGen/AArch64/atomic-ops-lse.ll
+++ b/test/CodeGen/AArch64/atomic-ops-lse.ll
@@ -814,6 +814,118 @@ define void @test_atomic_load_sub_i64_noret(i64 %offset) nounwind {
    ret void
  }
  
+define i8 @test_atomic_load_sub_i8_neg_imm() nounwind {
+; CHECK-LABEL: test_atomic_load_sub_i8_neg_imm:
+  %old = atomicrmw sub i8* @var8, i8 -1 seq_cst
+
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
+; CHECK: orr w[[IMM:[0-9]+]], wzr, #0x1
+; CHECK: ldaddalb w[[IMM]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+  ret i8 %old
+}
+
+define i16 @test_atomic_load_sub_i16_neg_imm() nounwind {
+; CHECK-LABEL: test_atomic_load_sub_i16_neg_imm:
+  %old = atomicrmw sub i16* @var16, i16 -1 seq_cst
+
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
+; CHECK: orr w[[IMM:[0-9]+]], wzr, #0x1
+; CHECK: ldaddalh w[[IMM]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+  ret i16 %old
+}
+
+define i32 @test_atomic_load_sub_i32_neg_imm() nounwind {
+; CHECK-LABEL: test_atomic_load_sub_i32_neg_imm:
+  %old = atomicrmw sub i32* @var32, i32 -1 seq_cst
+
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
+; CHECK: orr w[[IMM:[0-9]+]], wzr, #0x1
+; CHECK: ldaddal w[[IMM]], w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+  ret i32 %old
+}
+
+define i64 @test_atomic_load_sub_i64_neg_imm() nounwind {
+; CHECK-LABEL: test_atomic_load_sub_i64_neg_imm:
+  %old = atomicrmw sub i64* @var64, i64 -1 seq_cst
+
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
+; CHECK: orr w[[IMM:[0-9]+]], wzr, #0x1
+; CHECK: ldaddal x[[IMM]], x[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+  ret i64 %old
+}
+
+define i8 @test_atomic_load_sub_i8_neg_arg(i8 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_sub_i8_neg_arg:
+  %neg = sub i8 0, %offset
+  %old = atomicrmw sub i8* @var8, i8 %neg seq_cst
+
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
+; CHECK: ldaddalb w0, w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+  ret i8 %old
+}
+
+define i16 @test_atomic_load_sub_i16_neg_arg(i16 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_sub_i16_neg_arg:
+  %neg = sub i16 0, %offset
+  %old = atomicrmw sub i16* @var16, i16 %neg seq_cst
+
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
+; CHECK: ldaddalh w0, w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+  ret i16 %old
+}
+
+define i32 @test_atomic_load_sub_i32_neg_arg(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_sub_i32_neg_arg:
+  %neg = sub i32 0, %offset
+  %old = atomicrmw sub i32* @var32, i32 %neg seq_cst
+
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
+; CHECK: ldaddal w0, w[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+  ret i32 %old
+}
+
+define i64 @test_atomic_load_sub_i64_neg_arg(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_sub_i64_neg_arg:
+  %neg = sub i64 0, %offset
+  %old = atomicrmw sub i64* @var64, i64 %neg seq_cst
+
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
+; CHECK: ldaddal x0, x[[NEW:[0-9]+]], [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+  ret i64 %old
+}
+
  define i8 @test_atomic_load_and_i8(i8 %offset) nounwind {
  ; CHECK-LABEL: test_atomic_load_and_i8:
    %old = atomicrmw and i8* @var8, i8 %offset seq_cst
author	Oliver Stannard <oliver.stannard@arm.com>
	Mon, 12 Feb 2018 14:22:03 +0000 (14:22 +0000)
committer	Oliver Stannard <oliver.stannard@arm.com>
	Mon, 12 Feb 2018 14:22:03 +0000 (14:22 +0000)
lib/Target/AArch64/AArch64ISelLowering.cpp		patch \| blob \| history
lib/Target/AArch64/AArch64ISelLowering.h		patch \| blob \| history
test/CodeGen/AArch64/atomic-ops-lse.ll		patch \| blob \| history