Fix a logic bug in inline expansion of memcpy / memset with an overlapping

author Evan Cheng <evan.cheng@apple.com>

Wed, 12 Dec 2012 20:43:23 +0000 (20:43 +0000)

committer Evan Cheng <evan.cheng@apple.com>

Wed, 12 Dec 2012 20:43:23 +0000 (20:43 +0000)
author Evan Cheng <evan.cheng@apple.com>
Wed, 12 Dec 2012 20:43:23 +0000 (20:43 +0000)
committer Evan Cheng <evan.cheng@apple.com>
Wed, 12 Dec 2012 20:43:23 +0000 (20:43 +0000)
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

index 269f221..2375182 100644 (file)
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -3469,9 +3469,6 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps,
  
    unsigned NumMemOps = 0;
    while (Size != 0) {
-    if (++NumMemOps > Limit)
-      return false;
-
      unsigned VTSize = VT.getSizeInBits() / 8;
      while (VTSize > Size) {
        // For now, only use non-vector load / store's for the left-over pieces.
@@ -3507,7 +3504,8 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps,
        // FIXME: Only does this for 64-bit or more since we don't have proper
        // cost model for unaligned load / store.
        bool Fast;
-      if (AllowOverlap && VTSize >= 8 && NewVTSize < Size &&
+      if (NumMemOps && AllowOverlap &&
+          VTSize >= 8 && NewVTSize < Size &&
            TLI.allowsUnalignedMemoryAccesses(VT, &Fast) && Fast)
          VTSize = Size;
        else {
@@ -3516,6 +3514,9 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps,
        }
      }
  
+    if (++NumMemOps > Limit)
+      return false;
+
      MemOps.push_back(VT);
      Size -= VTSize;
    }
diff --git a/test/CodeGen/Mips/2012-12-12-ExpandMemcpy.ll b/test/CodeGen/Mips/2012-12-12-ExpandMemcpy.ll

new file mode 100644 (file)

index 0000000..9d4daee
--- /dev/null
+++ b/test/CodeGen/Mips/2012-12-12-ExpandMemcpy.ll
@@ -0,0 +1,11 @@
+; RUN: llc -march=mips64el -mcpu=mips64r2 < %s
+
+@.str = private unnamed_addr constant [7 x i8] c"hello\0A\00", align 1
+
+define void @t(i8* %ptr) {
+entry:
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %ptr, i8* getelementptr inbounds ([7 x i8]* @.str, i64 0, i64 0), i64 7, i32 1, i1 false)
+  ret void
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
author	Evan Cheng <evan.cheng@apple.com>
	Wed, 12 Dec 2012 20:43:23 +0000 (20:43 +0000)
committer	Evan Cheng <evan.cheng@apple.com>
	Wed, 12 Dec 2012 20:43:23 +0000 (20:43 +0000)
lib/CodeGen/SelectionDAG/SelectionDAG.cpp		patch \| blob \| history
test/CodeGen/Mips/2012-12-12-ExpandMemcpy.ll	[new file with mode: 0644]	patch \| blob