From 2358986dd4ecf9dd9743b808d9a23749897353cd Mon Sep 17 00:00:00 2001 From: Oliver Stannard Date: Tue, 10 Nov 2015 11:04:18 +0000 Subject: [PATCH] [AArch64] Fix halfword load merging for big-endian targets For big-endian targets, when we merge two halfword loads into a word load, the order of the halfwords in the loaded value is reversed compared to little-endian, so the load-store optimiser needs to swap the destination registers. This does not affect merging of two word loads, as we use ldp, which treats the memory as two separate 32-bit words. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@252597 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp | 12 ++++++-- test/CodeGen/AArch64/arm64-ldr-merge.ll | 37 +++++++++++++++++------- 2 files changed, 35 insertions(+), 14 deletions(-) diff --git a/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp index 6ef4c269d8f..ffe6ab2cb53 100644 --- a/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp +++ b/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp @@ -84,6 +84,7 @@ struct AArch64LoadStoreOpt : public MachineFunctionPass { const AArch64InstrInfo *TII; const TargetRegisterInfo *TRI; + const AArch64Subtarget *Subtarget; // Scan the instructions looking for a load/store that can be combined // with the current instruction into a load/store pair. @@ -537,6 +538,10 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I, if (!IsUnscaled) OffsetImm /= 2; MachineInstr *RtNewDest = MergeForward ? I : Paired; + // When merging small (< 32 bit) loads for big-endian targets, the order of + // the component parts gets swapped. + if (!Subtarget->isLittleEndian()) + std::swap(RtMI, Rt2MI); // Construct the new load instruction. // FIXME: currently we support only halfword unsigned load. We need to // handle byte type, signed, and store instructions as well. @@ -560,7 +565,7 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I, DEBUG((NewMemMI)->print(dbgs())); MachineInstr *ExtDestMI = MergeForward ? Paired : I; - if (ExtDestMI == Rt2MI) { + if ((ExtDestMI == Rt2MI) == Subtarget->isLittleEndian()) { // Create the bitfield extract for high half. BitExtMI1 = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(), TII->get(AArch64::UBFMWri)) @@ -1388,8 +1393,9 @@ bool AArch64LoadStoreOpt::enableNarrowLdMerge(MachineFunction &Fn) { } bool AArch64LoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { - TII = static_cast(Fn.getSubtarget().getInstrInfo()); - TRI = Fn.getSubtarget().getRegisterInfo(); + Subtarget = &static_cast(Fn.getSubtarget()); + TII = static_cast(Subtarget->getInstrInfo()); + TRI = Subtarget->getRegisterInfo(); bool Modified = false; bool enableNarrowLdOpt = enableNarrowLdMerge(Fn); diff --git a/test/CodeGen/AArch64/arm64-ldr-merge.ll b/test/CodeGen/AArch64/arm64-ldr-merge.ll index 4e40bac4e71..5d8cb8d745d 100644 --- a/test/CodeGen/AArch64/arm64-ldr-merge.ll +++ b/test/CodeGen/AArch64/arm64-ldr-merge.ll @@ -1,36 +1,51 @@ -; RUN: llc < %s -march=arm64 -mcpu=cortex-a57 -verify-machineinstrs | FileCheck %s +; RUN: llc < %s -march=arm64 -mcpu=cortex-a57 -verify-machineinstrs | FileCheck %s --check-prefix=CHECK --check-prefix=LE +; RUN: llc < %s -march=aarch64_be -mcpu=cortex-a57 -verify-machineinstrs | FileCheck %s --check-prefix=CHECK --check-prefix=BE ; CHECK-LABEL: Ldrh_merge ; CHECK-NOT: ldrh ; CHECK: ldr [[NEW_DEST:w[0-9]+]] -; CHECK: and w{{[0-9]+}}, [[NEW_DEST]], #0xffff -; CHECK: lsr w{{[0-9]+}}, [[NEW_DEST]] +; CHECK-DAG: and [[LO_PART:w[0-9]+]], [[NEW_DEST]], #0xffff +; CHECK-DAG: lsr [[HI_PART:w[0-9]+]], [[NEW_DEST]], #16 +; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]] +; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]] define i16 @Ldrh_merge(i16* nocapture readonly %p) { %1 = load i16, i16* %p, align 2 %arrayidx2 = getelementptr inbounds i16, i16* %p, i64 1 %2 = load i16, i16* %arrayidx2, align 2 - %add = add nuw nsw i16 %1, %2 + %add = sub nuw nsw i16 %1, %2 ret i16 %add } ; CHECK-LABEL: Ldurh_merge ; CHECK-NOT: ldurh ; CHECK: ldur [[NEW_DEST:w[0-9]+]] -; CHECK: and w{{[0-9]+}}, [[NEW_DEST]], #0xffff -; CHECK: lsr w{{[0-9]+}}, [[NEW_DEST]] +; CHECK-DAG: and [[LO_PART:w[0-9]+]], [[NEW_DEST]], #0xffff +; CHECK-DAG: lsr [[HI_PART:w[0-9]+]], [[NEW_DEST]] +; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]] +; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]] define i16 @Ldurh_merge(i16* nocapture readonly %p) { entry: %arrayidx = getelementptr inbounds i16, i16* %p, i64 -2 %0 = load i16, i16* %arrayidx %arrayidx3 = getelementptr inbounds i16, i16* %p, i64 -1 %1 = load i16, i16* %arrayidx3 - %add = add nuw nsw i16 %0, %1 + %add = sub nuw nsw i16 %0, %1 ret i16 %add } ; CHECK-LABEL: Ldrh_4_merge ; CHECK-NOT: ldrh -; CHECK: ldp [[NEW_DEST:w[0-9]+]] +; CHECK: ldp [[WORD1:w[0-9]+]], [[WORD2:w[0-9]+]], [x0] +; CHECK-DAG: and [[WORD1LO:w[0-9]+]], [[WORD1]], #0xffff +; CHECK-DAG: lsr [[WORD1HI:w[0-9]+]], [[WORD1]], #16 +; CHECK-DAG: and [[WORD2LO:w[0-9]+]], [[WORD2]], #0xffff +; CHECK-DAG: lsr [[WORD2HI:w[0-9]+]], [[WORD2]], #16 +; LE-DAG: sub [[TEMP1:w[0-9]+]], [[WORD1HI]], [[WORD1LO]] +; BE-DAG: sub [[TEMP1:w[0-9]+]], [[WORD1LO]], [[WORD1HI]] +; LE: udiv [[TEMP2:w[0-9]+]], [[TEMP1]], [[WORD2LO]] +; BE: udiv [[TEMP2:w[0-9]+]], [[TEMP1]], [[WORD2HI]] +; LE: sub w0, [[TEMP2]], [[WORD2HI]] +; BE: sub w0, [[TEMP2]], [[WORD2LO]] define i16 @Ldrh_4_merge(i16* nocapture readonly %P) { %arrayidx = getelementptr inbounds i16, i16* %P, i64 0 %l0 = load i16, i16* %arrayidx @@ -40,8 +55,8 @@ define i16 @Ldrh_4_merge(i16* nocapture readonly %P) { %l2 = load i16, i16* %arrayidx7 %arrayidx12 = getelementptr inbounds i16, i16* %P, i64 3 %l3 = load i16, i16* %arrayidx12 - %add4 = add nuw nsw i16 %l1, %l0 - %add9 = add nuw nsw i16 %add4, %l2 - %add14 = add nuw nsw i16 %add9, %l3 + %add4 = sub nuw nsw i16 %l1, %l0 + %add9 = udiv i16 %add4, %l2 + %add14 = sub nuw nsw i16 %add9, %l3 ret i16 %add14 } -- 2.11.0