Reland r329956, "AArch64: Introduce a DAG combine for folding offsets into addresses...

author Peter Collingbourne <peter@pcc.me.uk>

Mon, 23 Apr 2018 19:09:34 +0000 (19:09 +0000)

committer Peter Collingbourne <peter@pcc.me.uk>

Mon, 23 Apr 2018 19:09:34 +0000 (19:09 +0000)
author Peter Collingbourne <peter@pcc.me.uk>
Mon, 23 Apr 2018 19:09:34 +0000 (19:09 +0000)
committer Peter Collingbourne <peter@pcc.me.uk>
Mon, 23 Apr 2018 19:09:34 +0000 (19:09 +0000)
diff --git a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp

index eee59f1..d44eee0 100644 (file)
--- a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -743,14 +743,16 @@ bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size,
      if (!GAN)
        return true;
  
-    const GlobalValue *GV = GAN->getGlobal();
-    unsigned Alignment = GV->getAlignment();
-    Type *Ty = GV->getValueType();
-    if (Alignment == 0 && Ty->isSized())
-      Alignment = DL.getABITypeAlignment(Ty);
-
-    if (Alignment >= Size)
-      return true;
+    if (GAN->getOffset() % Size == 0) {
+      const GlobalValue *GV = GAN->getGlobal();
+      unsigned Alignment = GV->getAlignment();
+      Type *Ty = GV->getValueType();
+      if (Alignment == 0 && Ty->isSized())
+        Alignment = DL.getABITypeAlignment(Ty);
+
+      if (Alignment >= Size)
+        return true;
+    }
    }
  
    if (CurDAG->isBaseWithConstantOffset(N)) {
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp

index 19573e1..e12aeb4 100644 (file)
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -577,6 +577,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
    setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
    setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
  
+  setTargetDAGCombine(ISD::GlobalAddress);
+
    MaxStoresPerMemset = MaxStoresPerMemsetOptSize = 8;
    MaxStoresPerMemcpy = MaxStoresPerMemcpyOptSize = 4;
    MaxStoresPerMemmove = MaxStoresPerMemmoveOptSize = 4;
@@ -3677,7 +3679,8 @@ AArch64TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
  SDValue AArch64TargetLowering::getTargetNode(GlobalAddressSDNode *N, EVT Ty,
                                               SelectionDAG &DAG,
                                               unsigned Flag) const {
-  return DAG.getTargetGlobalAddress(N->getGlobal(), SDLoc(N), Ty, 0, Flag);
+  return DAG.getTargetGlobalAddress(N->getGlobal(), SDLoc(N), Ty,
+                                    N->getOffset(), Flag);
  }
  
  SDValue AArch64TargetLowering::getTargetNode(JumpTableSDNode *N, EVT Ty,
@@ -3752,8 +3755,9 @@ SDValue AArch64TargetLowering::LowerGlobalAddress(SDValue Op,
    unsigned char OpFlags =
        Subtarget->ClassifyGlobalReference(GV, getTargetMachine());
  
-  assert(cast<GlobalAddressSDNode>(Op)->getOffset() == 0 &&
-         "unexpected offset in global node");
+  if (OpFlags != AArch64II::MO_NO_FLAG)
+    assert(cast<GlobalAddressSDNode>(Op)->getOffset() == 0 &&
+           "unexpected offset in global node");
  
    // This also catches the large code model case for Darwin.
    if ((OpFlags & AArch64II::MO_GOT) != 0) {
@@ -4991,10 +4995,8 @@ SDValue AArch64TargetLowering::LowerShiftLeftParts(SDValue Op,
  
  bool AArch64TargetLowering::isOffsetFoldingLegal(
      const GlobalAddressSDNode *GA) const {
-  DEBUG(dbgs() << "Skipping offset folding global address: ");
-  DEBUG(GA->dump());
-  DEBUG(dbgs() << "AArch64 doesn't support folding offsets into global "
-        "addresses\n");
+  // Offsets are folded in the DAG combine rather than here so that we can
+  // intelligently choose an offset based on the uses.
    return false;
  }
  
@@ -10617,6 +10619,59 @@ static SDValue performNVCASTCombine(SDNode *N) {
    return SDValue();
  }
  
+// If all users of the globaladdr are of the form (globaladdr + constant), find
+// the smallest constant, fold it into the globaladdr's offset and rewrite the
+// globaladdr as (globaladdr + constant) - constant.
+static SDValue performGlobalAddressCombine(SDNode *N, SelectionDAG &DAG,
+                                           const AArch64Subtarget *Subtarget,
+                                           const TargetMachine &TM) {
+  auto *GN = dyn_cast<GlobalAddressSDNode>(N);
+  if (!GN || Subtarget->ClassifyGlobalReference(GN->getGlobal(), TM) !=
+                 AArch64II::MO_NO_FLAG)
+    return SDValue();
+
+  uint64_t MinOffset = -1ull;
+  for (SDNode *N : GN->uses()) {
+    if (N->getOpcode() != ISD::ADD)
+      return SDValue();
+    auto *C = dyn_cast<ConstantSDNode>(N->getOperand(0));
+    if (!C)
+      C = dyn_cast<ConstantSDNode>(N->getOperand(1));
+    if (!C)
+      return SDValue();
+    MinOffset = std::min(MinOffset, C->getZExtValue());
+  }
+  uint64_t Offset = MinOffset + GN->getOffset();
+
+  // Require that the new offset is larger than the existing one. Otherwise, we
+  // can end up oscillating between two possible DAGs, for example,
+  // (add (add globaladdr + 10, -1), 1) and (add globaladdr + 9, 1).
+  if (Offset <= uint64_t(GN->getOffset()))
+    return SDValue();
+
+  // Check whether folding this offset is legal. It must not go out of bounds of
+  // the referenced object to avoid violating the code model, and must be
+  // smaller than 2^21 because this is the largest offset expressible in all
+  // object formats.
+  //
+  // This check also prevents us from folding negative offsets, which will end
+  // up being treated in the same way as large positive ones. They could also
+  // cause code model violations, and aren't really common enough to matter.
+  if (Offset >= (1 << 21))
+    return SDValue();
+
+  const GlobalValue *GV = GN->getGlobal();
+  Type *T = GV->getValueType();
+  if (!T->isSized() ||
+      Offset > GV->getParent()->getDataLayout().getTypeAllocSize(T))
+    return SDValue();
+
+  SDLoc DL(GN);
+  SDValue Result = DAG.getGlobalAddress(GV, DL, MVT::i64, Offset);
+  return DAG.getNode(ISD::SUB, DL, MVT::i64, Result,
+                     DAG.getConstant(MinOffset, DL, MVT::i64));
+}
+
  SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
                                                   DAGCombinerInfo &DCI) const {
    SelectionDAG &DAG = DCI.DAG;
@@ -10704,6 +10759,8 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
      default:
        break;
      }
+  case ISD::GlobalAddress:
+    return performGlobalAddressCombine(N, DAG, Subtarget, getTargetMachine());
    }
    return SDValue();
  }
diff --git a/test/CodeGen/AArch64/arm64-addrmode.ll b/test/CodeGen/AArch64/arm64-addrmode.ll

index 6da7679..16f8d01 100644 (file)
--- a/test/CodeGen/AArch64/arm64-addrmode.ll
+++ b/test/CodeGen/AArch64/arm64-addrmode.ll
@@ -5,32 +5,31 @@
  
  ; base + offset (imm9)
  ; CHECK: @t1
-; CHECK: ldr xzr, [x{{[0-9]+}}, #8]
+; CHECK: ldr xzr, [x0, #8]
  ; CHECK: ret
-define void @t1() {
-  %incdec.ptr = getelementptr inbounds i64, i64* @object, i64 1
+define void @t1(i64* %object) {
+  %incdec.ptr = getelementptr inbounds i64, i64* %object, i64 1
    %tmp = load volatile i64, i64* %incdec.ptr, align 8
    ret void
  }
  
  ; base + offset (> imm9)
  ; CHECK: @t2
-; CHECK: sub [[ADDREG:x[0-9]+]], x{{[0-9]+}}, #264
+; CHECK: sub [[ADDREG:x[0-9]+]], x0, #264
  ; CHECK: ldr xzr, [
-; CHECK: [[ADDREG]]]
  ; CHECK: ret
-define void @t2() {
-  %incdec.ptr = getelementptr inbounds i64, i64* @object, i64 -33
+define void @t2(i64* %object) {
+  %incdec.ptr = getelementptr inbounds i64, i64* %object, i64 -33
    %tmp = load volatile i64, i64* %incdec.ptr, align 8
    ret void
  }
  
  ; base + unsigned offset (> imm9 and <= imm12 * size of type in bytes)
  ; CHECK: @t3
-; CHECK: ldr xzr, [x{{[0-9]+}}, #32760]
+; CHECK: ldr xzr, [x0, #32760]
  ; CHECK: ret
-define void @t3() {
-  %incdec.ptr = getelementptr inbounds i64, i64* @object, i64 4095
+define void @t3(i64* %object) {
+  %incdec.ptr = getelementptr inbounds i64, i64* %object, i64 4095
    %tmp = load volatile i64, i64* %incdec.ptr, align 8
    ret void
  }
@@ -38,10 +37,10 @@ define void @t3() {
  ; base + unsigned offset (> imm12 * size of type in bytes)
  ; CHECK: @t4
  ; CHECK: orr w[[NUM:[0-9]+]], wzr, #0x8000
-; CHECK: ldr xzr, [x{{[0-9]+}}, x[[NUM]]]
+; CHECK: ldr xzr, [x0, x[[NUM]]]
  ; CHECK: ret
-define void @t4() {
-  %incdec.ptr = getelementptr inbounds i64, i64* @object, i64 4096
+define void @t4(i64* %object) {
+  %incdec.ptr = getelementptr inbounds i64, i64* %object, i64 4096
    %tmp = load volatile i64, i64* %incdec.ptr, align 8
    ret void
  }
@@ -58,12 +57,12 @@ define void @t5(i64 %a) {
  
  ; base + reg + imm
  ; CHECK: @t6
-; CHECK: add [[ADDREG:x[0-9]+]], x{{[0-9]+}}, x{{[0-9]+}}, lsl #3
+; CHECK: add [[ADDREG:x[0-9]+]], x1, x0, lsl #3
  ; CHECK-NEXT: orr w[[NUM:[0-9]+]], wzr, #0x8000
  ; CHECK: ldr xzr, [x{{[0-9]+}}, x[[NUM]]]
  ; CHECK: ret
-define void @t6(i64 %a) {
-  %tmp1 = getelementptr inbounds i64, i64* @object, i64 %a
+define void @t6(i64 %a, i64* %object) {
+  %tmp1 = getelementptr inbounds i64, i64* %object, i64 %a
    %incdec.ptr = getelementptr inbounds i64, i64* %tmp1, i64 4096
    %tmp = load volatile i64, i64* %incdec.ptr, align 8
    ret void
diff --git a/test/CodeGen/AArch64/arm64-vector-ldst.ll b/test/CodeGen/AArch64/arm64-vector-ldst.ll

index 938b3d1..6e530cb 100644 (file)
--- a/test/CodeGen/AArch64/arm64-vector-ldst.ll
+++ b/test/CodeGen/AArch64/arm64-vector-ldst.ll
@@ -264,149 +264,196 @@ entry:
  
  ; Add a bunch of tests for rdar://13258794: Match LDUR/STUR for D and Q
  ; registers for unscaled vector accesses
-@str = global [63 x i8] c"Test case for rdar://13258794: LDUR/STUR for D and Q registers\00", align 1
  
-define <1 x i64> @fct0() nounwind readonly ssp {
+define <1 x i64> @fct0(i8* %str) nounwind readonly ssp {
  entry:
  ; CHECK-LABEL: fct0:
  ; CHECK: ldur {{d[0-9]+}}, [{{x[0-9]+}}, #3]
-  %0 = load <1 x i64>, <1 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <1 x i64>*), align 8
+  %p = getelementptr inbounds i8, i8* %str, i64 3
+  %q = bitcast i8* %p to <1 x i64>*
+  %0 = load <1 x i64>, <1 x i64>* %q, align 8
    ret <1 x i64> %0
  }
  
-define <2 x i32> @fct1() nounwind readonly ssp {
+define <2 x i32> @fct1(i8* %str) nounwind readonly ssp {
  entry:
  ; CHECK-LABEL: fct1:
  ; CHECK: ldur {{d[0-9]+}}, [{{x[0-9]+}}, #3]
-  %0 = load <2 x i32>, <2 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <2 x i32>*), align 8
+  %p = getelementptr inbounds i8, i8* %str, i64 3
+  %q = bitcast i8* %p to <2 x i32>*
+  %0 = load <2 x i32>, <2 x i32>* %q, align 8
    ret <2 x i32> %0
  }
  
-define <4 x i16> @fct2() nounwind readonly ssp {
+define <4 x i16> @fct2(i8* %str) nounwind readonly ssp {
  entry:
  ; CHECK-LABEL: fct2:
  ; CHECK: ldur {{d[0-9]+}}, [{{x[0-9]+}}, #3]
-  %0 = load <4 x i16>, <4 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <4 x i16>*), align 8
+  %p = getelementptr inbounds i8, i8* %str, i64 3
+  %q = bitcast i8* %p to <4 x i16>*
+  %0 = load <4 x i16>, <4 x i16>* %q, align 8
    ret <4 x i16> %0
  }
  
-define <8 x i8> @fct3() nounwind readonly ssp {
+define <8 x i8> @fct3(i8* %str) nounwind readonly ssp {
  entry:
  ; CHECK-LABEL: fct3:
  ; CHECK: ldur {{d[0-9]+}}, [{{x[0-9]+}}, #3]
-  %0 = load <8 x i8>, <8 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <8 x i8>*), align 8
+  %p = getelementptr inbounds i8, i8* %str, i64 3
+  %q = bitcast i8* %p to <8 x i8>*
+  %0 = load <8 x i8>, <8 x i8>* %q, align 8
    ret <8 x i8> %0
  }
  
-define <2 x i64> @fct4() nounwind readonly ssp {
+define <2 x i64> @fct4(i8* %str) nounwind readonly ssp {
  entry:
  ; CHECK-LABEL: fct4:
  ; CHECK: ldur {{q[0-9]+}}, [{{x[0-9]+}}, #3]
-  %0 = load <2 x i64>, <2 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <2 x i64>*), align 16
+  %p = getelementptr inbounds i8, i8* %str, i64 3
+  %q = bitcast i8* %p to <2 x i64>*
+  %0 = load <2 x i64>, <2 x i64>* %q, align 16
    ret <2 x i64> %0
  }
  
-define <4 x i32> @fct5() nounwind readonly ssp {
+define <4 x i32> @fct5(i8* %str) nounwind readonly ssp {
  entry:
  ; CHECK-LABEL: fct5:
  ; CHECK: ldur {{q[0-9]+}}, [{{x[0-9]+}}, #3]
-  %0 = load <4 x i32>, <4 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <4 x i32>*), align 16
+  %p = getelementptr inbounds i8, i8* %str, i64 3
+  %q = bitcast i8* %p to <4 x i32>*
+  %0 = load <4 x i32>, <4 x i32>* %q, align 16
    ret <4 x i32> %0
  }
  
-define <8 x i16> @fct6() nounwind readonly ssp {
+define <8 x i16> @fct6(i8* %str) nounwind readonly ssp {
  entry:
  ; CHECK-LABEL: fct6:
  ; CHECK: ldur {{q[0-9]+}}, [{{x[0-9]+}}, #3]
-  %0 = load <8 x i16>, <8 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <8 x i16>*), align 16
+  %p = getelementptr inbounds i8, i8* %str, i64 3
+  %q = bitcast i8* %p to <8 x i16>*
+  %0 = load <8 x i16>, <8 x i16>* %q, align 16
    ret <8 x i16> %0
  }
  
-define <16 x i8> @fct7() nounwind readonly ssp {
+define <16 x i8> @fct7(i8* %str) nounwind readonly ssp {
  entry:
  ; CHECK-LABEL: fct7:
  ; CHECK: ldur {{q[0-9]+}}, [{{x[0-9]+}}, #3]
-  %0 = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <16 x i8>*), align 16
+  %p = getelementptr inbounds i8, i8* %str, i64 3
+  %q = bitcast i8* %p to <16 x i8>*
+  %0 = load <16 x i8>, <16 x i8>* %q, align 16
    ret <16 x i8> %0
  }
  
-define void @fct8() nounwind ssp {
+define void @fct8(i8* %str) nounwind ssp {
  entry:
  ; CHECK-LABEL: fct8:
  ; CHECK: ldur [[DESTREG:d[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
  ; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
-  %0 = load <1 x i64>, <1 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <1 x i64>*), align 8
-  store <1 x i64> %0, <1 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 4) to <1 x i64>*), align 8
+  %p = getelementptr inbounds i8, i8* %str, i64 3
+  %q = bitcast i8* %p to <1 x i64>*
+  %0 = load <1 x i64>, <1 x i64>* %q, align 8
+  %p2 = getelementptr inbounds i8, i8* %str, i64 4
+  %q2 = bitcast i8* %p2 to <1 x i64>*
+  store <1 x i64> %0, <1 x i64>* %q2, align 8
    ret void
  }
  
-define void @fct9() nounwind ssp {
+define void @fct9(i8* %str) nounwind ssp {
  entry:
  ; CHECK-LABEL: fct9:
  ; CHECK: ldur [[DESTREG:d[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
  ; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
-  %0 = load <2 x i32>, <2 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <2 x i32>*), align 8
-  store <2 x i32> %0, <2 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 4) to <2 x i32>*), align 8
+  %p = getelementptr inbounds i8, i8* %str, i64 3
+  %q = bitcast i8* %p to <2 x i32>*
+  %0 = load <2 x i32>, <2 x i32>* %q, align 8
+  %p2 = getelementptr inbounds i8, i8* %str, i64 4
+  %q2 = bitcast i8* %p2 to <2 x i32>*
+  store <2 x i32> %0, <2 x i32>* %q2, align 8
    ret void
  }
  
-define void @fct10() nounwind ssp {
+define void @fct10(i8* %str) nounwind ssp {
  entry:
  ; CHECK-LABEL: fct10:
  ; CHECK: ldur [[DESTREG:d[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
  ; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
-  %0 = load <4 x i16>, <4 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <4 x i16>*), align 8
-  store <4 x i16> %0, <4 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 4) to <4 x i16>*), align 8
+  %p = getelementptr inbounds i8, i8* %str, i64 3
+  %q = bitcast i8* %p to <4 x i16>*
+  %0 = load <4 x i16>, <4 x i16>* %q, align 8
+  %p2 = getelementptr inbounds i8, i8* %str, i64 4
+  %q2 = bitcast i8* %p2 to <4 x i16>*
+  store <4 x i16> %0, <4 x i16>* %q2, align 8
    ret void
  }
  
-define void @fct11() nounwind ssp {
+define void @fct11(i8* %str) nounwind ssp {
  entry:
  ; CHECK-LABEL: fct11:
  ; CHECK: ldur [[DESTREG:d[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
  ; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
-  %0 = load <8 x i8>, <8 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <8 x i8>*), align 8
-  store <8 x i8> %0, <8 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 4) to <8 x i8>*), align 8
+  %p = getelementptr inbounds i8, i8* %str, i64 3
+  %q = bitcast i8* %p to <8 x i8>*
+  %0 = load <8 x i8>, <8 x i8>* %q, align 8
+  %p2 = getelementptr inbounds i8, i8* %str, i64 4
+  %q2 = bitcast i8* %p2 to <8 x i8>*
+  store <8 x i8> %0, <8 x i8>* %q2, align 8
    ret void
  }
  
-define void @fct12() nounwind ssp {
+define void @fct12(i8* %str) nounwind ssp {
  entry:
  ; CHECK-LABEL: fct12:
  ; CHECK: ldur [[DESTREG:q[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
  ; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
-  %0 = load <2 x i64>, <2 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <2 x i64>*), align 16
-  store <2 x i64> %0, <2 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 4) to <2 x i64>*), align 16
+  %p = getelementptr inbounds i8, i8* %str, i64 3
+  %q = bitcast i8* %p to <2 x i64>*
+  %0 = load <2 x i64>, <2 x i64>* %q, align 16
+  %p2 = getelementptr inbounds i8, i8* %str, i64 4
+  %q2 = bitcast i8* %p2 to <2 x i64>*
+  store <2 x i64> %0, <2 x i64>* %q2, align 16
    ret void
  }
  
-define void @fct13() nounwind ssp {
+define void @fct13(i8* %str) nounwind ssp {
  entry:
  ; CHECK-LABEL: fct13:
  ; CHECK: ldur [[DESTREG:q[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
  ; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
-  %0 = load <4 x i32>, <4 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <4 x i32>*), align 16
-  store <4 x i32> %0, <4 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 4) to <4 x i32>*), align 16
+  %p = getelementptr inbounds i8, i8* %str, i64 3
+  %q = bitcast i8* %p to <4 x i32>*
+  %0 = load <4 x i32>, <4 x i32>* %q, align 16
+  %p2 = getelementptr inbounds i8, i8* %str, i64 4
+  %q2 = bitcast i8* %p2 to <4 x i32>*
+  store <4 x i32> %0, <4 x i32>* %q2, align 16
    ret void
  }
  
-define void @fct14() nounwind ssp {
+define void @fct14(i8* %str) nounwind ssp {
  entry:
  ; CHECK-LABEL: fct14:
  ; CHECK: ldur [[DESTREG:q[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
  ; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
-  %0 = load <8 x i16>, <8 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <8 x i16>*), align 16
-  store <8 x i16> %0, <8 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 4) to <8 x i16>*), align 16
+  %p = getelementptr inbounds i8, i8* %str, i64 3
+  %q = bitcast i8* %p to <8 x i16>*
+  %0 = load <8 x i16>, <8 x i16>* %q, align 16
+  %p2 = getelementptr inbounds i8, i8* %str, i64 4
+  %q2 = bitcast i8* %p2 to <8 x i16>*
+  store <8 x i16> %0, <8 x i16>* %q2, align 16
    ret void
  }
  
-define void @fct15() nounwind ssp {
+define void @fct15(i8* %str) nounwind ssp {
  entry:
  ; CHECK-LABEL: fct15:
  ; CHECK: ldur [[DESTREG:q[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
  ; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
-  %0 = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <16 x i8>*), align 16
-  store <16 x i8> %0, <16 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 4) to <16 x i8>*), align 16
+  %p = getelementptr inbounds i8, i8* %str, i64 3
+  %q = bitcast i8* %p to <16 x i8>*
+  %0 = load <16 x i8>, <16 x i8>* %q, align 16
+  %p2 = getelementptr inbounds i8, i8* %str, i64 4
+  %q2 = bitcast i8* %p2 to <16 x i8>*
+  store <16 x i8> %0, <16 x i8>* %q2, align 16
    ret void
  }
  
diff --git a/test/CodeGen/AArch64/fold-global-offsets.ll b/test/CodeGen/AArch64/fold-global-offsets.ll

new file mode 100644 (file)

index 0000000..ffcdc2b
--- /dev/null
+++ b/test/CodeGen/AArch64/fold-global-offsets.ll
@@ -0,0 +1,69 @@
+; RUN: llc < %s -mtriple=arm64-linux-gnu | FileCheck %s
+
+@x1 = external hidden global [2 x i64]
+@x2 = external hidden global [16777216 x i64]
+@x3 = external hidden global { [9 x i8*], [8 x i8*] }
+
+define i64 @f1() {
+  ; CHECK: f1:
+  ; CHECK: adrp x8, x1+16
+  ; CHECK: ldr x0, [x8, :lo12:x1+16]
+  %l = load i64, i64* getelementptr ([2 x i64], [2 x i64]* @x1, i64 0, i64 2)
+  ret i64 %l
+}
+
+define i64 @f2() {
+  ; CHECK: f2:
+  ; CHECK: adrp x8, x1
+  ; CHECK: add x8, x8, :lo12:x1
+  ; CHECK: ldr x0, [x8, #24]
+  %l = load i64, i64* getelementptr ([2 x i64], [2 x i64]* @x1, i64 0, i64 3)
+  ret i64 %l
+}
+
+define i64 @f3() {
+  ; CHECK: f3:
+  ; CHECK: adrp x8, x1+1
+  ; CHECK: add x8, x8, :lo12:x1+1
+  ; CHECK: ldr x0, [x8]
+  %l = load i64, i64* bitcast (i8* getelementptr (i8, i8* bitcast ([2 x i64]* @x1 to i8*), i64 1) to i64*)
+  ret i64 %l
+}
+
+define [2 x i64] @f4() {
+  ; CHECK: f4:
+  ; CHECK: adrp x8, x2+8
+  ; CHECK: add x8, x8, :lo12:x2+8
+  ; CHECK: ldp x0, x1, [x8]
+  %l = load [2 x i64], [2 x i64]* bitcast (i8* getelementptr (i8, i8* bitcast ([16777216 x i64]* @x2 to i8*), i64 8) to [2 x i64]*)
+  ret [2 x i64] %l
+}
+
+define i64 @f5() {
+  ; CHECK: f5:
+  ; CHECK: adrp x8, x2+2097144
+  ; CHECK: ldr x0, [x8, :lo12:x2+2097144]
+  ; CHECK: ret
+  %l = load i64, i64* getelementptr ([16777216 x i64], [16777216 x i64]* @x2, i64 0, i64 262143)
+  ret i64 %l
+}
+
+define i64 @f6() {
+  ; CHECK: f6:
+  ; CHECK: adrp x8, x2
+  ; CHECK: add x8, x8, :lo12:x2
+  ; CHECK: orr w9, wzr, #0x200000
+  ; CHECK: ldr x0, [x8, x9]
+  ; CHECK: ret
+  %l = load i64, i64* getelementptr ([16777216 x i64], [16777216 x i64]* @x2, i64 0, i64 262144)
+  ret i64 %l
+}
+
+define i32 @f7() {
+entry:
+  ; CHECK: f7
+  ; CHECK: adrp x8, x3+108
+  ; CHECK: ldr w0, [x8, :lo12:x3+108]
+  %l = load i32, i32* getelementptr (i32, i32* inttoptr (i64 trunc (i128 lshr (i128 bitcast (<2 x i64> <i64 undef, i64 ptrtoint (i8** getelementptr inbounds ({ [9 x i8*], [8 x i8*] }, { [9 x i8*], [8 x i8*] }* @x3, i64 0, inrange i32 1, i64 2) to i64)> to i128), i128 64) to i64) to i32*), i64 5)
+  ret i32 %l
+}
diff --git a/test/CodeGen/AArch64/global-merge-3.ll b/test/CodeGen/AArch64/global-merge-3.ll

index 106d6da..4844d96 100644 (file)
--- a/test/CodeGen/AArch64/global-merge-3.ll
+++ b/test/CodeGen/AArch64/global-merge-3.ll
@@ -10,8 +10,8 @@ define void @f1(i32 %a1, i32 %a2, i32 %a3) {
  ;CHECK-APPLE-IOS: adrp x8, __MergedGlobals_x@PAGE
  ;CHECK-APPLE-IOS-NOT: adrp
  ;CHECK-APPLE-IOS: add  x8, x8, __MergedGlobals_x@PAGEOFF
-;CHECK-APPLE-IOS: adrp x9, __MergedGlobals_y@PAGE
-;CHECK-APPLE-IOS: add  x9, x9, __MergedGlobals_y@PAGEOFF
+;CHECK-APPLE-IOS: adrp x9, __MergedGlobals_y@PAGE+12
+;CHECK-APPLE-IOS: str  w1, [x9, __MergedGlobals_y@PAGEOFF+12]
    %x3 = getelementptr inbounds [1000 x i32], [1000 x i32]* @x, i32 0, i64 3
    %y3 = getelementptr inbounds [1000 x i32], [1000 x i32]* @y, i32 0, i64 3
    store i32 %a1, i32* %x3, align 4
diff --git a/test/CodeGen/AArch64/global-merge-ignore-single-use-minsize.ll b/test/CodeGen/AArch64/global-merge-ignore-single-use-minsize.ll

index 1c1b4f6..8207f8c 100644 (file)
--- a/test/CodeGen/AArch64/global-merge-ignore-single-use-minsize.ll
+++ b/test/CodeGen/AArch64/global-merge-ignore-single-use-minsize.ll
@@ -44,9 +44,9 @@ define void @f2(i32 %a1, i32 %a2) nounwind {
  
  ; CHECK-LABEL: f3:
  define void @f3(i32 %a1, i32 %a2) minsize nounwind {
-; CHECK-NEXT: adrp x8, [[SET]]@PAGE
-; CHECK-NEXT: add x8, x8, [[SET]]@PAGEOFF
-; CHECK-NEXT: stp w0, w1, [x8, #8]
+; CHECK-NEXT: adrp x8, [[SET]]@PAGE+8
+; CHECK-NEXT: add x8, x8, [[SET]]@PAGEOFF+8
+; CHECK-NEXT: stp w0, w1, [x8]
  ; CHECK-NEXT: ret
    store i32 %a1, i32* @m3, align 4
    store i32 %a2, i32* @n3, align 4
@@ -57,10 +57,9 @@ define void @f3(i32 %a1, i32 %a2) minsize nounwind {
  
  ; CHECK-LABEL: f4:
  define void @f4(i32 %a1, i32 %a2) nounwind {
-; CHECK-NEXT: adrp x8, [[SET]]@PAGE
-; CHECK-NEXT: add x8, x8, [[SET]]@PAGEOFF
+; CHECK-NEXT: adrp x8, [[SET]]@PAGE+8
  ; CHECK-NEXT: adrp x9, _n4@PAGE
-; CHECK-NEXT: str w0, [x8, #8]
+; CHECK-NEXT: str w0, [x8, [[SET]]@PAGEOFF+8]
  ; CHECK-NEXT: str w1, [x9, _n4@PAGEOFF]
  ; CHECK-NEXT: ret
    store i32 %a1, i32* @m3, align 4
diff --git a/test/CodeGen/AArch64/global-merge-ignore-single-use.ll b/test/CodeGen/AArch64/global-merge-ignore-single-use.ll

index 97e283c..b3b8406 100644 (file)
--- a/test/CodeGen/AArch64/global-merge-ignore-single-use.ll
+++ b/test/CodeGen/AArch64/global-merge-ignore-single-use.ll
@@ -38,9 +38,9 @@ define void @f2(i32 %a1, i32 %a2, i32 %a3) #0 {
  
  ; CHECK-LABEL: f3:
  define void @f3(i32 %a1, i32 %a2) #0 {
-; CHECK-NEXT: adrp x8, [[SET]]@PAGE
-; CHECK-NEXT: add x8, x8, [[SET]]@PAGEOFF
-; CHECK-NEXT: stp w0, w1, [x8, #12]
+; CHECK-NEXT: adrp x8, [[SET]]@PAGE+12
+; CHECK-NEXT: add x8, x8, [[SET]]@PAGEOFF+12
+; CHECK-NEXT: stp w0, w1, [x8]
  ; CHECK-NEXT: ret
    store i32 %a1, i32* @m2, align 4
    store i32 %a2, i32* @n2, align 4
author	Peter Collingbourne <peter@pcc.me.uk>
	Mon, 23 Apr 2018 19:09:34 +0000 (19:09 +0000)
committer	Peter Collingbourne <peter@pcc.me.uk>
	Mon, 23 Apr 2018 19:09:34 +0000 (19:09 +0000)
lib/Target/AArch64/AArch64ISelDAGToDAG.cpp		patch \| blob \| history
lib/Target/AArch64/AArch64ISelLowering.cpp		patch \| blob \| history
test/CodeGen/AArch64/arm64-addrmode.ll		patch \| blob \| history
test/CodeGen/AArch64/arm64-vector-ldst.ll		patch \| blob \| history
test/CodeGen/AArch64/fold-global-offsets.ll	[new file with mode: 0644]	patch \| blob
test/CodeGen/AArch64/global-merge-3.ll		patch \| blob \| history
test/CodeGen/AArch64/global-merge-ignore-single-use-minsize.ll		patch \| blob \| history
test/CodeGen/AArch64/global-merge-ignore-single-use.ll		patch \| blob \| history