} else {
MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset);
if (((Mode != ARM_AM::ia && Mode != ARM_AM::ib) || Offset != Bytes) &&
- ((Mode != ARM_AM::da && Mode != ARM_AM::db) || Offset != -Bytes))
- return false;
+ ((Mode != ARM_AM::da && Mode != ARM_AM::db) || Offset != -Bytes)) {
+
+ // We couldn't find an inc/dec to merge. But if the base is dead, we
+ // can still change to a writeback form as that will save us 2 bytes
+ // of code size. It can create WAW hazards though, so only do it if
+ // we're minimizing code size.
+ if (!MBB.getParent()->getFunction()->optForMinSize() || !BaseKill)
+ return false;
+
+ bool HighRegsUsed = false;
+ for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i)
+ if (MI->getOperand(i).getReg() >= ARM::R8) {
+ HighRegsUsed = true;
+ break;
+ }
+
+ if (!HighRegsUsed)
+ MergeInstr = MBB.end();
+ else
+ return false;
+ }
}
- MBB.erase(MergeInstr);
+ if (MergeInstr != MBB.end())
+ MBB.erase(MergeInstr);
unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc))
--- /dev/null
+; RUN: llc -O3 < %s | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-f64:32:64-v64:32:64-v128:32:128-a:0:32-n32-S32"
+target triple = "armv7--linux-gnu"
+
+@a = global i32 0, align 4
+@b = global i32 0, align 4
+@c = global i32 0, align 4
+
+; CHECK-LABEL: bar:
+; CHECK: ldm r{{[0-9]}}!, {r0, r{{[0-9]}}, r{{[0-9]}}}
+define void @bar(i32 %a1, i32 %b1, i32 %c1) minsize optsize {
+ %1 = load i32, i32* @a, align 4
+ %2 = load i32, i32* @b, align 4
+ %3 = load i32, i32* @c, align 4
+ %4 = tail call i32 @baz(i32 %1, i32 %3) minsize optsize
+ %5 = tail call i32 @baz(i32 %2, i32 %3) minsize optsize
+ ret void
+}
+
+declare i32 @baz(i32,i32) minsize optsize