From: Daniel Neilson <dneilson@azul.com>
Date: Wed, 21 Mar 2018 14:14:55 +0000 (+0000)
Subject: [MemCpyOpt] Update to new API for memory intrinsic alignment
X-Git-Tag: android-x86-7.1-r4~3471
X-Git-Url: http://git.osdn.net/view?a=commitdiff_plain;h=8bf245f39a7fdc7858b113309b59ea62024b0d74;p=android-x86%2Fexternal-llvm.git

[MemCpyOpt] Update to new API for memory intrinsic alignment

Summary:
This change is part of step five in the series of changes to remove alignment argument from
memcpy/memmove/memset in favour of alignment attributes. In particular, this changes the
MemCpyOpt pass to cease using:
1) The old getAlignment() API of MemoryIntrinsic in favour of getting source & dest specific
alignments through the new API.
2) The old IRBuilder CreateMemCpy/CreateMemMove single-alignment APIs in favour of the new
API that allows setting source and destination alignments independently.

We also add a few tests to fill gaps in the testing of this pass.

Steps:
Step 1) Remove alignment parameter and create alignment parameter attributes for
memcpy/memmove/memset. ( rL322965, rC322964, rL322963 )
Step 2) Expand the IRBuilder API to allow creation of memcpy/memmove with differing
source and dest alignments. ( rL323597 )
Step 3) Update Clang to use the new IRBuilder API. ( rC323617 )
Step 4) Update Polly to use the new IRBuilder API. ( rL323618 )
Step 5) Update LLVM passes that create memcpy/memmove calls to use the new IRBuilder API,
and those that use use MemIntrinsicInst::[get|set]Alignment() to use [get|set]DestAlignment()
and [get|set]SourceAlignment() instead. ( rL323886, rL323891, rL324148, rL324273, rL324278,
rL324384, rL324395, rL324402, rL324626, rL324642, rL324653, rL324654, rL324773, rL324774,
rL324781, rL324784, rL324955, rL324960, rL325816, rL327398, rL327421 )
Step 6) Remove the single-alignment IRBuilder API for memcpy/memmove, and the
MemIntrinsicInst::[get|set]Alignment() methods.

Reference
   http://lists.llvm.org/pipermail/llvm-dev/2015-August/089384.html
   http://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20151109/312083.html

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@328097 91177308-0d34-0410-b5e6-96231b3b80d8
---

diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 9c870b42a74..e2de0ee6bcf 100644
--- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -263,7 +263,7 @@ public:
 
   void addMemSet(int64_t OffsetFromFirst, MemSetInst *MSI) {
     int64_t Size = cast<ConstantInt>(MSI->getLength())->getZExtValue();
-    addRange(OffsetFromFirst, Size, MSI->getDest(), MSI->getAlignment(), MSI);
+    addRange(OffsetFromFirst, Size, MSI->getDest(), MSI->getDestAlignment(), MSI);
   }
 
   void addRange(int64_t Start, int64_t Size, Value *Ptr,
@@ -498,16 +498,25 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst,
   return AMemSet;
 }
 
-static unsigned findCommonAlignment(const DataLayout &DL, const StoreInst *SI,
-                                     const LoadInst *LI) {
+static unsigned findStoreAlignment(const DataLayout &DL, const StoreInst *SI) {
   unsigned StoreAlign = SI->getAlignment();
   if (!StoreAlign)
     StoreAlign = DL.getABITypeAlignment(SI->getOperand(0)->getType());
+  return StoreAlign;
+}
+
+static unsigned findLoadAlignment(const DataLayout &DL, const LoadInst *LI) {
   unsigned LoadAlign = LI->getAlignment();
   if (!LoadAlign)
     LoadAlign = DL.getABITypeAlignment(LI->getType());
+  return LoadAlign;
+}
 
-  return std::min(StoreAlign, LoadAlign);
+static unsigned findCommonAlignment(const DataLayout &DL, const StoreInst *SI,
+                                     const LoadInst *LI) {
+  unsigned StoreAlign = findStoreAlignment(DL, SI);
+  unsigned LoadAlign = findLoadAlignment(DL, LI);
+  return MinAlign(StoreAlign, LoadAlign);
 }
 
 // This method try to lift a store instruction before position P.
@@ -656,19 +665,20 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
           if (!AA.isNoAlias(MemoryLocation::get(SI), LoadLoc))
             UseMemMove = true;
 
-          unsigned Align = findCommonAlignment(DL, SI, LI);
           uint64_t Size = DL.getTypeStoreSize(T);
 
           IRBuilder<> Builder(P);
           Instruction *M;
           if (UseMemMove)
-            M = Builder.CreateMemMove(SI->getPointerOperand(),
-                                      LI->getPointerOperand(), Size,
-                                      Align, SI->isVolatile());
+            M = Builder.CreateMemMove(
+                SI->getPointerOperand(), findStoreAlignment(DL, SI),
+                LI->getPointerOperand(), findLoadAlignment(DL, LI), Size,
+                SI->isVolatile());
           else
-            M = Builder.CreateMemCpy(SI->getPointerOperand(),
-                                     LI->getPointerOperand(), Size,
-                                     Align, SI->isVolatile());
+            M = Builder.CreateMemCpy(
+                SI->getPointerOperand(), findStoreAlignment(DL, SI),
+                LI->getPointerOperand(), findLoadAlignment(DL, LI), Size,
+                SI->isVolatile());
 
           DEBUG(dbgs() << "Promoting " << *LI << " to " << *SI
                        << " => " << *M << "\n");
@@ -1047,20 +1057,17 @@ bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,
 
   // If all checks passed, then we can transform M.
 
-  // Make sure to use the lesser of the alignment of the source and the dest
-  // since we're changing where we're reading from, but don't want to increase
-  // the alignment past what can be read from or written to.
   // TODO: Is this worth it if we're creating a less aligned memcpy? For
   // example we could be moving from movaps -> movq on x86.
-  unsigned Align = std::min(MDep->getAlignment(), M->getAlignment());
-
   IRBuilder<> Builder(M);
   if (UseMemMove)
-    Builder.CreateMemMove(M->getRawDest(), MDep->getRawSource(), M->getLength(),
-                          Align, M->isVolatile());
+    Builder.CreateMemMove(M->getRawDest(), M->getDestAlignment(),
+                          MDep->getRawSource(), MDep->getSourceAlignment(),
+                          M->getLength(), M->isVolatile());
   else
-    Builder.CreateMemCpy(M->getRawDest(), MDep->getRawSource(), M->getLength(),
-                         Align, M->isVolatile());
+    Builder.CreateMemCpy(M->getRawDest(), M->getDestAlignment(),
+                         MDep->getRawSource(), MDep->getSourceAlignment(),
+                         M->getLength(), M->isVolatile());
 
   // Remove the instruction we're replacing.
   MD->removeInstruction(M);
@@ -1106,7 +1113,7 @@ bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy,
   // If Dest is aligned, and SrcSize is constant, use the minimum alignment
   // of the sum.
   const unsigned DestAlign =
-      std::max(MemSet->getAlignment(), MemCpy->getAlignment());
+      std::max(MemSet->getDestAlignment(), MemCpy->getDestAlignment());
   if (DestAlign > 1)
     if (ConstantInt *SrcSizeC = dyn_cast<ConstantInt>(SrcSize))
       Align = MinAlign(SrcSizeC->getZExtValue(), DestAlign);
@@ -1166,7 +1173,7 @@ bool MemCpyOptPass::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy,
 
   IRBuilder<> Builder(MemCpy);
   Builder.CreateMemSet(MemCpy->getRawDest(), MemSet->getOperand(1),
-                       CopySize, MemCpy->getAlignment());
+                       CopySize, MemCpy->getDestAlignment());
   return true;
 }
 
@@ -1192,7 +1199,7 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M) {
       if (Value *ByteVal = isBytewiseValue(GV->getInitializer())) {
         IRBuilder<> Builder(M);
         Builder.CreateMemSet(M->getRawDest(), ByteVal, M->getLength(),
-                             M->getAlignment(), false);
+                             M->getDestAlignment(), false);
         MD->removeInstruction(M);
         M->eraseFromParent();
         ++NumCpyToSet;
@@ -1221,8 +1228,11 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M) {
   //   d) memcpy from a just-memset'd source can be turned into memset.
   if (DepInfo.isClobber()) {
     if (CallInst *C = dyn_cast<CallInst>(DepInfo.getInst())) {
+      // FIXME: Can we pass in either of dest/src alignment here instead
+      // of conservatively taking the minimum?
+      unsigned Align = MinAlign(M->getDestAlignment(), M->getSourceAlignment());
       if (performCallSlotOptzn(M, M->getDest(), M->getSource(),
-                               CopySize->getZExtValue(), M->getAlignment(),
+                               CopySize->getZExtValue(), Align,
                                C)) {
         MD->removeInstruction(M);
         M->eraseFromParent();
@@ -1337,7 +1347,7 @@ bool MemCpyOptPass::processByValArgument(CallSite CS, unsigned ArgNo) {
   // source of the memcpy to the alignment we need.  If we fail, we bail out.
   AssumptionCache &AC = LookupAssumptionCache();
   DominatorTree &DT = LookupDomTree();
-  if (MDep->getAlignment() < ByValAlign &&
+  if (MDep->getSourceAlignment() < ByValAlign &&
       getOrEnforceKnownAlignment(MDep->getSource(), ByValAlign, DL,
                                  CS.getInstruction(), &AC, &DT) < ByValAlign)
     return false;
diff --git a/test/Transforms/MemCpyOpt/load-store-to-memcpy.ll b/test/Transforms/MemCpyOpt/load-store-to-memcpy.ll
index f6b175e4171..9dbba093fe2 100644
--- a/test/Transforms/MemCpyOpt/load-store-to-memcpy.ll
+++ b/test/Transforms/MemCpyOpt/load-store-to-memcpy.ll
@@ -3,6 +3,34 @@
 
 %T = type { i8, i32 }
 
+; Ensure load-store forwarding of an aggregate is interpreted as
+; a memmove when the source and dest may alias
+define void @test_memmove(%T* align 8 %a, %T* align 16 %b) {
+; CHECK-LABEL: @test_memmove(
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast %T* [[B:%.*]] to i8*
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast %T* [[A:%.*]] to i8*
+; CHECK-NEXT:    call void @llvm.memmove.p0i8.p0i8.i64(i8* align 16 [[TMP1]], i8* align 8 [[TMP2]], i64 8, i1 false)
+; CHECK-NEXT:    ret void
+;
+  %val = load %T, %T* %a, align 8
+  store %T %val, %T* %b, align 16
+  ret void
+}
+
+; Ensure load-store forwarding of an aggregate is interpreted as
+; a memcpy when the source and dest do not alias
+define void @test_memcpy(%T* noalias align 8 %a, %T* noalias align 16 %b) {
+; CHECK-LABEL: @test_memcpy(
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast %T* [[B:%.*]] to i8*
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast %T* [[A:%.*]] to i8*
+; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP1]], i8* align 8 [[TMP2]], i64 8, i1 false)
+; CHECK-NEXT:    ret void
+;
+  %val = load %T, %T* %a, align 8
+  store %T %val, %T* %b, align 16
+  ret void
+}
+
 ; memcpy(%d, %a) should not be generated since store2 may-aliases load %a.
 define void @f(%T* %a, %T* %b, %T* %c, %T* %d) {
 ; CHECK-LABEL: @f(
diff --git a/test/Transforms/MemCpyOpt/memcpy.ll b/test/Transforms/MemCpyOpt/memcpy.ll
index 7e1e4d54a24..4c5f6cbeb1a 100644
--- a/test/Transforms/MemCpyOpt/memcpy.ll
+++ b/test/Transforms/MemCpyOpt/memcpy.ll
@@ -46,6 +46,20 @@ define void @test2(i8* %P, i8* %Q) nounwind  {
 ; CHECK-NEXT: ret void
 }
 
+; The intermediate alloca and one of the memcpy's should be eliminated, the
+; other should be related with a memcpy.
+define void @test2_memcpy(i8* noalias %P, i8* noalias %Q) nounwind  {
+  %memtmp = alloca %0, align 16
+  %R = bitcast %0* %memtmp to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 %R, i8* align 16 %P, i32 32, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 %Q, i8* align 16 %R, i32 32, i1 false)
+  ret void
+
+; CHECK-LABEL: @test2_memcpy(
+; CHECK-NEXT: call void @llvm.memcpy{{.*}}(i8* align 16 %Q, i8* align 16 %P
+; CHECK-NEXT: ret void
+}
+