[AArch64][FastISel] Select atomic stores into STLR.

author Ahmed Bougacha <ahmed.bougacha@gmail.com>

Wed, 20 Jul 2016 21:12:27 +0000 (21:12 +0000)

committer Ahmed Bougacha <ahmed.bougacha@gmail.com>

Wed, 20 Jul 2016 21:12:27 +0000 (21:12 +0000)
author Ahmed Bougacha <ahmed.bougacha@gmail.com>
Wed, 20 Jul 2016 21:12:27 +0000 (21:12 +0000)
committer Ahmed Bougacha <ahmed.bougacha@gmail.com>
Wed, 20 Jul 2016 21:12:27 +0000 (21:12 +0000)
diff --git a/lib/Target/AArch64/AArch64FastISel.cpp b/lib/Target/AArch64/AArch64FastISel.cpp

index e2ab7ab..ac98e66 100644 (file)
--- a/lib/Target/AArch64/AArch64FastISel.cpp
+++ b/lib/Target/AArch64/AArch64FastISel.cpp
@@ -185,6 +185,8 @@ private:
                      MachineMemOperand *MMO = nullptr);
    bool emitStore(MVT VT, unsigned SrcReg, Address Addr,
                   MachineMemOperand *MMO = nullptr);
+  bool emitStoreRelease(MVT VT, unsigned SrcReg, unsigned AddrReg,
+                        MachineMemOperand *MMO = nullptr);
    unsigned emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
    unsigned emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt);
    unsigned emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
@@ -1997,6 +1999,28 @@ bool AArch64FastISel::selectLoad(const Instruction *I) {
    return true;
  }
  
+bool AArch64FastISel::emitStoreRelease(MVT VT, unsigned SrcReg,
+                                       unsigned AddrReg,
+                                       MachineMemOperand *MMO) {
+  unsigned Opc;
+  switch (VT.SimpleTy) {
+  default: return false;
+  case MVT::i8:  Opc = AArch64::STLRB; break;
+  case MVT::i16: Opc = AArch64::STLRH; break;
+  case MVT::i32: Opc = AArch64::STLRW; break;
+  case MVT::i64: Opc = AArch64::STLRX; break;
+  }
+
+  const MCInstrDesc &II = TII.get(Opc);
+  SrcReg = constrainOperandRegClass(II, SrcReg, 0);
+  AddrReg = constrainOperandRegClass(II, AddrReg, 1);
+  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
+      .addReg(SrcReg)
+      .addReg(AddrReg)
+      .addMemOperand(MMO);
+  return true;
+}
+
  bool AArch64FastISel::emitStore(MVT VT, unsigned SrcReg, Address Addr,
                                  MachineMemOperand *MMO) {
    if (!TLI.allowsMisalignedMemoryAccesses(VT))
@@ -2071,8 +2095,7 @@ bool AArch64FastISel::selectStore(const Instruction *I) {
    // Verify we have a legal type before going any further.  Currently, we handle
    // simple types that will directly fit in a register (i32/f32/i64/f64) or
    // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
-  if (!isTypeSupported(Op0->getType(), VT, /*IsVectorAllowed=*/true) ||
-      cast<StoreInst>(I)->isAtomic())
+  if (!isTypeSupported(Op0->getType(), VT, /*IsVectorAllowed=*/true))
      return false;
  
    const Value *PtrV = I->getOperand(1);
@@ -2109,9 +2132,23 @@ bool AArch64FastISel::selectStore(const Instruction *I) {
    if (!SrcReg)
      return false;
  
+  auto *SI = cast<StoreInst>(I);
+
+  // Try to emit a STLR for seq_cst/release.
+  if (SI->isAtomic()) {
+    AtomicOrdering Ord = SI->getOrdering();
+    // The non-atomic instructions are sufficient for relaxed stores.
+    if (isReleaseOrStronger(Ord)) {
+      // The STLR addressing mode only supports a base reg; pass that directly.
+      unsigned AddrReg = getRegForValue(PtrV);
+      return emitStoreRelease(VT, SrcReg, AddrReg,
+                              createMachineMemOperandFor(I));
+    }
+  }
+
    // See if we can handle this address.
    Address Addr;
-  if (!computeAddress(I->getOperand(1), Addr, I->getOperand(0)->getType()))
+  if (!computeAddress(PtrV, Addr, Op0->getType()))
      return false;
  
    if (!emitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I)))
diff --git a/test/CodeGen/AArch64/fast-isel-atomic.ll b/test/CodeGen/AArch64/fast-isel-atomic.ll

new file mode 100644 (file)

index 0000000..195b8be
--- /dev/null
+++ b/test/CodeGen/AArch64/fast-isel-atomic.ll
@@ -0,0 +1,244 @@
+; RUN: llc -mtriple=aarch64-- -O0 -fast-isel -fast-isel-abort=4 -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-- -O0 -fast-isel=0 -verify-machineinstrs < %s | FileCheck %s
+
+; Note that checking SelectionDAG output isn't strictly necessary, but they
+; currently match, so we might as well check both!  Feel free to remove SDAG.
+
+; CHECK-LABEL: atomic_store_monotonic_8:
+; CHECK-NEXT: // BB#0:
+; CHECK-NEXT:  strb  w1, [x0]
+; CHECK-NEXT:  ret
+define void @atomic_store_monotonic_8(i8* %p, i8 %val) #0 {
+  store atomic i8 %val, i8* %p monotonic, align 1
+  ret void
+}
+
+; CHECK-LABEL: atomic_store_monotonic_8_off:
+; CHECK-NEXT: // BB#0:
+; CHECK-NEXT:  strb w1, [x0, #1]
+; CHECK-NEXT:  ret
+define void @atomic_store_monotonic_8_off(i8* %p, i8 %val) #0 {
+  %tmp0 = getelementptr i8, i8* %p, i32 1
+  store atomic i8 %val, i8* %tmp0 monotonic, align 1
+  ret void
+}
+
+; CHECK-LABEL: atomic_store_monotonic_16:
+; CHECK-NEXT: // BB#0:
+; CHECK-NEXT:  strh  w1, [x0]
+; CHECK-NEXT:  ret
+define void @atomic_store_monotonic_16(i16* %p, i16 %val) #0 {
+  store atomic i16 %val, i16* %p monotonic, align 2
+  ret void
+}
+
+; CHECK-LABEL: atomic_store_monotonic_16_off:
+; CHECK-NEXT: // BB#0:
+; CHECK-NEXT:  strh w1, [x0, #2]
+; CHECK-NEXT:  ret
+define void @atomic_store_monotonic_16_off(i16* %p, i16 %val) #0 {
+  %tmp0 = getelementptr i16, i16* %p, i32 1
+  store atomic i16 %val, i16* %tmp0 monotonic, align 2
+  ret void
+}
+
+; CHECK-LABEL: atomic_store_monotonic_32:
+; CHECK-NEXT: // BB#0:
+; CHECK-NEXT:  str  w1, [x0]
+; CHECK-NEXT:  ret
+define void @atomic_store_monotonic_32(i32* %p, i32 %val) #0 {
+  store atomic i32 %val, i32* %p monotonic, align 4
+  ret void
+}
+
+; CHECK-LABEL: atomic_store_monotonic_32_off:
+; CHECK-NEXT: // BB#0:
+; CHECK-NEXT:  str w1, [x0, #4]
+; CHECK-NEXT:  ret
+define void @atomic_store_monotonic_32_off(i32* %p, i32 %val) #0 {
+  %tmp0 = getelementptr i32, i32* %p, i32 1
+  store atomic i32 %val, i32* %tmp0 monotonic, align 4
+  ret void
+}
+
+; CHECK-LABEL: atomic_store_monotonic_64:
+; CHECK-NEXT: // BB#0:
+; CHECK-NEXT:  str  x1, [x0]
+; CHECK-NEXT:  ret
+define void @atomic_store_monotonic_64(i64* %p, i64 %val) #0 {
+  store atomic i64 %val, i64* %p monotonic, align 8
+  ret void
+}
+
+; CHECK-LABEL: atomic_store_monotonic_64_off:
+; CHECK-NEXT: // BB#0:
+; CHECK-NEXT:  str x1, [x0, #8]
+; CHECK-NEXT:  ret
+define void @atomic_store_monotonic_64_off(i64* %p, i64 %val) #0 {
+  %tmp0 = getelementptr i64, i64* %p, i32 1
+  store atomic i64 %val, i64* %tmp0 monotonic, align 8
+  ret void
+}
+
+; CHECK-LABEL: atomic_store_release_8:
+; CHECK-NEXT: // BB#0:
+; CHECK-NEXT:  stlrb w1, [x0]
+; CHECK-NEXT:  ret
+define void @atomic_store_release_8(i8* %p, i8 %val) #0 {
+  store atomic i8 %val, i8* %p release, align 1
+  ret void
+}
+
+; CHECK-LABEL: atomic_store_release_8_off:
+; CHECK-NEXT: // BB#0:
+; CHECK-NEXT:  add x0, x0, #1
+; CHECK-NEXT:  stlrb w1, [x0]
+; CHECK-NEXT:  ret
+define void @atomic_store_release_8_off(i8* %p, i8 %val) #0 {
+  %tmp0 = getelementptr i8, i8* %p, i32 1
+  store atomic i8 %val, i8* %tmp0 release, align 1
+  ret void
+}
+
+; CHECK-LABEL: atomic_store_release_16:
+; CHECK-NEXT: // BB#0:
+; CHECK-NEXT:  stlrh w1, [x0]
+; CHECK-NEXT:  ret
+define void @atomic_store_release_16(i16* %p, i16 %val) #0 {
+  store atomic i16 %val, i16* %p release, align 2
+  ret void
+}
+
+; CHECK-LABEL: atomic_store_release_16_off:
+; CHECK-NEXT: // BB#0:
+; CHECK-NEXT:  add x0, x0, #2
+; CHECK-NEXT:  stlrh w1, [x0]
+; CHECK-NEXT:  ret
+define void @atomic_store_release_16_off(i16* %p, i16 %val) #0 {
+  %tmp0 = getelementptr i16, i16* %p, i32 1
+  store atomic i16 %val, i16* %tmp0 release, align 2
+  ret void
+}
+
+; CHECK-LABEL: atomic_store_release_32:
+; CHECK-NEXT: // BB#0:
+; CHECK-NEXT:  stlr w1, [x0]
+; CHECK-NEXT:  ret
+define void @atomic_store_release_32(i32* %p, i32 %val) #0 {
+  store atomic i32 %val, i32* %p release, align 4
+  ret void
+}
+
+; CHECK-LABEL: atomic_store_release_32_off:
+; CHECK-NEXT: // BB#0:
+; CHECK-NEXT:  add x0, x0, #4
+; CHECK-NEXT:  stlr w1, [x0]
+; CHECK-NEXT:  ret
+define void @atomic_store_release_32_off(i32* %p, i32 %val) #0 {
+  %tmp0 = getelementptr i32, i32* %p, i32 1
+  store atomic i32 %val, i32* %tmp0 release, align 4
+  ret void
+}
+
+; CHECK-LABEL: atomic_store_release_64:
+; CHECK-NEXT: // BB#0:
+; CHECK-NEXT:  stlr x1, [x0]
+; CHECK-NEXT:  ret
+define void @atomic_store_release_64(i64* %p, i64 %val) #0 {
+  store atomic i64 %val, i64* %p release, align 8
+  ret void
+}
+
+; CHECK-LABEL: atomic_store_release_64_off:
+; CHECK-NEXT: // BB#0:
+; CHECK-NEXT:  add x0, x0, #8
+; CHECK-NEXT:  stlr x1, [x0]
+; CHECK-NEXT:  ret
+define void @atomic_store_release_64_off(i64* %p, i64 %val) #0 {
+  %tmp0 = getelementptr i64, i64* %p, i32 1
+  store atomic i64 %val, i64* %tmp0 release, align 8
+  ret void
+}
+
+
+; CHECK-LABEL: atomic_store_seq_cst_8:
+; CHECK-NEXT: // BB#0:
+; CHECK-NEXT:  stlrb w1, [x0]
+; CHECK-NEXT:  ret
+define void @atomic_store_seq_cst_8(i8* %p, i8 %val) #0 {
+  store atomic i8 %val, i8* %p seq_cst, align 1
+  ret void
+}
+
+; CHECK-LABEL: atomic_store_seq_cst_8_off:
+; CHECK-NEXT: // BB#0:
+; CHECK-NEXT:  add x0, x0, #1
+; CHECK-NEXT:  stlrb w1, [x0]
+; CHECK-NEXT:  ret
+define void @atomic_store_seq_cst_8_off(i8* %p, i8 %val) #0 {
+  %tmp0 = getelementptr i8, i8* %p, i32 1
+  store atomic i8 %val, i8* %tmp0 seq_cst, align 1
+  ret void
+}
+
+; CHECK-LABEL: atomic_store_seq_cst_16:
+; CHECK-NEXT: // BB#0:
+; CHECK-NEXT:  stlrh w1, [x0]
+; CHECK-NEXT:  ret
+define void @atomic_store_seq_cst_16(i16* %p, i16 %val) #0 {
+  store atomic i16 %val, i16* %p seq_cst, align 2
+  ret void
+}
+
+; CHECK-LABEL: atomic_store_seq_cst_16_off:
+; CHECK-NEXT: // BB#0:
+; CHECK-NEXT:  add x0, x0, #2
+; CHECK-NEXT:  stlrh w1, [x0]
+; CHECK-NEXT:  ret
+define void @atomic_store_seq_cst_16_off(i16* %p, i16 %val) #0 {
+  %tmp0 = getelementptr i16, i16* %p, i32 1
+  store atomic i16 %val, i16* %tmp0 seq_cst, align 2
+  ret void
+}
+
+; CHECK-LABEL: atomic_store_seq_cst_32:
+; CHECK-NEXT: // BB#0:
+; CHECK-NEXT:  stlr w1, [x0]
+; CHECK-NEXT:  ret
+define void @atomic_store_seq_cst_32(i32* %p, i32 %val) #0 {
+  store atomic i32 %val, i32* %p seq_cst, align 4
+  ret void
+}
+
+; CHECK-LABEL: atomic_store_seq_cst_32_off:
+; CHECK-NEXT: // BB#0:
+; CHECK-NEXT:  add x0, x0, #4
+; CHECK-NEXT:  stlr w1, [x0]
+; CHECK-NEXT:  ret
+define void @atomic_store_seq_cst_32_off(i32* %p, i32 %val) #0 {
+  %tmp0 = getelementptr i32, i32* %p, i32 1
+  store atomic i32 %val, i32* %tmp0 seq_cst, align 4
+  ret void
+}
+
+; CHECK-LABEL: atomic_store_seq_cst_64:
+; CHECK-NEXT: // BB#0:
+; CHECK-NEXT:  stlr x1, [x0]
+; CHECK-NEXT:  ret
+define void @atomic_store_seq_cst_64(i64* %p, i64 %val) #0 {
+  store atomic i64 %val, i64* %p seq_cst, align 8
+  ret void
+}
+
+; CHECK-LABEL: atomic_store_seq_cst_64_off:
+; CHECK-NEXT: // BB#0:
+; CHECK-NEXT:  add x0, x0, #8
+; CHECK-NEXT:  stlr x1, [x0]
+; CHECK-NEXT:  ret
+define void @atomic_store_seq_cst_64_off(i64* %p, i64 %val) #0 {
+  %tmp0 = getelementptr i64, i64* %p, i32 1
+  store atomic i64 %val, i64* %tmp0 seq_cst, align 8
+  ret void
+}
+
+attributes #0 = { nounwind }
author	Ahmed Bougacha <ahmed.bougacha@gmail.com>
	Wed, 20 Jul 2016 21:12:27 +0000 (21:12 +0000)
committer	Ahmed Bougacha <ahmed.bougacha@gmail.com>
	Wed, 20 Jul 2016 21:12:27 +0000 (21:12 +0000)
lib/Target/AArch64/AArch64FastISel.cpp		patch \| blob \| history
test/CodeGen/AArch64/fast-isel-atomic.ll	[new file with mode: 0644]	patch \| blob