From dd34dc99fdf66edc52b5fc2ddf8132ebaa134aee Mon Sep 17 00:00:00 2001
From: Renato Golin <renato.golin@linaro.org>
Date: Wed, 14 Aug 2013 16:35:29 +0000
Subject: [PATCH] Let t2LDRBi8 and t2LDRBi12 have same Base Pointer

When determining if two different loads are from the same base address,
this patch allows one load to use a t2LDRi8 address mode and another to
use a t2LDRi12 address mode. The current implementation is very
conservative and this allows the case of differing Thumb2 byte loads to
be considered. Allowing these differing modes instead of forcing the exact
same opcode is useful for situations where one opcodes loads from a base
address+1 and a second opcode loads for a base address-1.

Patch by Daniel Stewart.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188385 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/ARM/ARMBaseInstrInfo.cpp                | 15 ++++-
 test/CodeGen/ARM/2013-05-07-ByteLoadSameAddress.ll | 64 ++++++++++++++++++++++
 2 files changed, 78 insertions(+), 1 deletion(-)
 create mode 100644 test/CodeGen/ARM/2013-05-07-ByteLoadSameAddress.ll

diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 977d936bffc..62e80632c74 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -1404,9 +1404,11 @@ bool ARMBaseInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
   case ARM::VLDRD:
   case ARM::VLDRS:
   case ARM::t2LDRi8:
+  case ARM::t2LDRBi8:
   case ARM::t2LDRDi8:
   case ARM::t2LDRSHi8:
   case ARM::t2LDRi12:
+  case ARM::t2LDRBi12:
   case ARM::t2LDRSHi12:
     break;
   }
@@ -1423,8 +1425,10 @@ bool ARMBaseInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
   case ARM::VLDRD:
   case ARM::VLDRS:
   case ARM::t2LDRi8:
+  case ARM::t2LDRBi8:
   case ARM::t2LDRSHi8:
   case ARM::t2LDRi12:
+  case ARM::t2LDRBi12:
   case ARM::t2LDRSHi12:
     break;
   }
@@ -1471,7 +1475,16 @@ bool ARMBaseInstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
   if ((Offset2 - Offset1) / 8 > 64)
     return false;
 
-  if (Load1->getMachineOpcode() != Load2->getMachineOpcode())
+  // Check if the machine opcodes are different. If they are different
+  // then we consider them to not be of the same base address,
+  // EXCEPT in the case of Thumb2 byte loads where one is LDRBi8 and the other LDRBi12.
+  // In this case, they are considered to be the same because they are different
+  // encoding forms of the same basic instruction.
+  if ((Load1->getMachineOpcode() != Load2->getMachineOpcode()) &&
+      !((Load1->getMachineOpcode() == ARM::t2LDRBi8 &&
+         Load2->getMachineOpcode() == ARM::t2LDRBi12) ||
+        (Load1->getMachineOpcode() == ARM::t2LDRBi12 &&
+         Load2->getMachineOpcode() == ARM::t2LDRBi8)))
     return false;  // FIXME: overly conservative?
 
   // Four loads in a row should be sufficient.
diff --git a/test/CodeGen/ARM/2013-05-07-ByteLoadSameAddress.ll b/test/CodeGen/ARM/2013-05-07-ByteLoadSameAddress.ll
new file mode 100644
index 00000000000..defb9460114
--- /dev/null
+++ b/test/CodeGen/ARM/2013-05-07-ByteLoadSameAddress.ll
@@ -0,0 +1,64 @@
+; RUN: llc < %s -march=thumb -mattr=+v7,+thumb2 | FileCheck %s
+
+define i8 @f1(i8* %call1, i8* %call3, i32 %h, i32 %w, i32 %Width) {
+; CHECK: f1:
+entry:
+        %mul17 = mul nsw i32 %Width, %h
+        %add = add nsw i32 %mul17, %w
+        %sub19 = sub i32 %add, %Width
+        %sub20 = add i32 %sub19, -1
+        %arrayidx21 = getelementptr inbounds i8* %call1, i32 %sub20
+        %0 = load i8* %arrayidx21, align 1
+        %conv22 = zext i8 %0 to i32
+        %arrayidx25 = getelementptr inbounds i8* %call1, i32 %sub19
+        %1 = load i8* %arrayidx25, align 1
+        %conv26 = zext i8 %1 to i32
+        %mul23189 = add i32 %conv26, %conv22
+        %add30 = add i32 %sub19, 1
+        %arrayidx31 = getelementptr inbounds i8* %call1, i32 %add30
+        %2 = load i8* %arrayidx31, align 1
+        %conv32 = zext i8 %2 to i32
+; CHECK: ldrb r{{[0-9]*}}, [r{{[0-9]*}}, #-1]
+; CHECK-NEXT: ldrb r{{[0-9]*}}, [r{{[0-9]*}}, #1]
+        %add28190 = add i32 %mul23189, %conv32
+        %sub35 = add i32 %add, -1
+        %arrayidx36 = getelementptr inbounds i8* %call1, i32 %sub35
+        %3 = load i8* %arrayidx36, align 1
+        %conv37 = zext i8 %3 to i32
+        %add34191 = add i32 %add28190, %conv37
+        %arrayidx40 = getelementptr inbounds i8* %call1, i32 %add
+        %4 = load i8* %arrayidx40, align 1
+        %conv41 = zext i8 %4 to i32
+        %mul42 = mul nsw i32 %conv41, 255
+        %add44 = add i32 %add, 1
+        %arrayidx45 = getelementptr inbounds i8* %call1, i32 %add44
+        %5 = load i8* %arrayidx45, align 1
+        %conv46 = zext i8 %5 to i32
+; CHECK: ldrb r{{[0-9]*}}, [r{{[0-9]*}}, #-1]
+; CHECK-NEXT: ldrb r{{[0-9]*}}, [r{{[0-9]*}}, #1]
+        %add49 = add i32 %add, %Width
+        %sub50 = add i32 %add49, -1
+        %arrayidx51 = getelementptr inbounds i8* %call1, i32 %sub50
+        %6 = load i8* %arrayidx51, align 1
+        %conv52 = zext i8 %6 to i32
+        %arrayidx56 = getelementptr inbounds i8* %call1, i32 %add49
+        %7 = load i8* %arrayidx56, align 1
+        %conv57 = zext i8 %7 to i32
+        %add61 = add i32 %add49, 1
+        %arrayidx62 = getelementptr inbounds i8* %call1, i32 %add61
+        %8 = load i8* %arrayidx62, align 1
+        %conv63 = zext i8 %8 to i32
+; CHECK: ldrb r{{[0-9]*}}, [r{{[0-9]*}}, #-1]
+; CHECK-NEXT: ldrb{{[.w]*}} r{{[0-9]*}}, [r{{[0-9]*}}, #1]
+        %tmp = add i32 %add34191, %conv46
+        %tmp193 = add i32 %tmp, %conv52
+        %tmp194 = add i32 %tmp193, %conv57
+        %tmp195 = add i32 %tmp194, %conv63
+        %tmp196 = mul i32 %tmp195, -28
+        %add65 = add i32 %tmp196, %mul42
+        %9 = lshr i32 %add65, 8
+        %conv68 = trunc i32 %9 to i8
+        %arrayidx69 = getelementptr inbounds i8* %call3, i32 %add
+        store i8 %conv68, i8* %arrayidx69, align 1
+        ret i8 %conv68
+}
-- 
2.11.0