From: Justin Bogner Date: Thu, 14 Jun 2018 18:32:55 +0000 (+0000) Subject: [VirtRegRewriter] Avoid clobbering registers when expanding copy bundles X-Git-Tag: android-x86-8.1-r1~2106 X-Git-Url: http://git.osdn.net/view?a=commitdiff_plain;h=a1b4efd4bd8b68cd6bf5df5424c7fc802a3eb34f;p=android-x86%2Fexternal-llvm.git [VirtRegRewriter] Avoid clobbering registers when expanding copy bundles If a copy bundle happens to involve overlapping registers, we can end up with emitting the copies in an order that ends up clobbering some of the subregisters. Since instructions in the copy bundle semantically happen at the same time, this is incorrect and we need to make sure we order the copies such that this doesn't happen. Differential Revision: https://reviews.llvm.org/D48154 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@334750 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp index 3f9dcd4edce..2741dbe064e 100644 --- a/lib/CodeGen/VirtRegMap.cpp +++ b/lib/CodeGen/VirtRegMap.cpp @@ -406,6 +406,8 @@ void VirtRegRewriter::expandCopyBundle(MachineInstr &MI) const { return; if (MI.isBundledWithPred() && !MI.isBundledWithSucc()) { + SmallVector MIs({&MI}); + // Only do this when the complete bundle is made out of COPYs. MachineBasicBlock &MBB = *MI.getParent(); for (MachineBasicBlock::reverse_instr_iterator I = @@ -413,16 +415,53 @@ void VirtRegRewriter::expandCopyBundle(MachineInstr &MI) const { I != E && I->isBundledWithSucc(); ++I) { if (!I->isCopy()) return; + MIs.push_back(&*I); + } + MachineInstr *FirstMI = MIs.back(); + + auto anyRegsAlias = [](const MachineInstr *Dst, + ArrayRef Srcs, + const TargetRegisterInfo *TRI) { + for (const MachineInstr *Src : Srcs) + if (Src != Dst) + if (TRI->regsOverlap(Dst->getOperand(0).getReg(), + Src->getOperand(1).getReg())) + return true; + return false; + }; + + // If any of the destination registers in the bundle of copies alias any of + // the source registers, try to schedule the instructions to avoid any + // clobbering. + for (int E = MIs.size(), PrevE; E > 1; PrevE = E) { + for (int I = E; I--; ) + if (!anyRegsAlias(MIs[I], makeArrayRef(MIs).take_front(E), TRI)) { + if (I + 1 != E) + std::swap(MIs[I], MIs[E - 1]); + --E; + } + if (PrevE == E) { + MF->getFunction().getContext().emitError( + "register rewriting failed: cycle in copy bundle"); + break; + } } - for (MachineBasicBlock::reverse_instr_iterator I = MI.getReverseIterator(); - I->isBundledWithPred(); ) { - MachineInstr &MI = *I; - ++I; + MachineInstr *BundleStart = FirstMI; + for (MachineInstr *BundledMI : llvm::reverse(MIs)) { + // If instruction is in the middle of the bundle, move it before the + // bundle starts, otherwise, just unbundle it. When we get to the last + // instruction, the bundle will have been completely undone. + if (BundledMI != BundleStart) { + BundledMI->removeFromBundle(); + MBB.insert(FirstMI, BundledMI); + } else if (BundledMI->isBundledWithSucc()) { + BundledMI->unbundleFromSucc(); + BundleStart = &*std::next(BundledMI->getIterator()); + } - MI.unbundleFromPred(); - if (Indexes) - Indexes->insertMachineInstrInMaps(MI); + if (Indexes && BundledMI != FirstMI) + Indexes->insertMachineInstrInMaps(*BundledMI); } } } diff --git a/test/CodeGen/AArch64/overlapping-copy-bundle-cycle.mir b/test/CodeGen/AArch64/overlapping-copy-bundle-cycle.mir new file mode 100644 index 00000000000..093473ab472 --- /dev/null +++ b/test/CodeGen/AArch64/overlapping-copy-bundle-cycle.mir @@ -0,0 +1,16 @@ +# RUN: not llc -mtriple=aarch64-apple-ios -run-pass=greedy -run-pass=virtregrewriter %s -o /dev/null 2>&1 | FileCheck %s + +# Check we don't infinitely loop on cycles in copy bundles. +# CHECK: error: register rewriting failed: cycle in copy bundle + +--- +name: func0 +body: | + bb.0: + $x0 = IMPLICIT_DEF + $q0_q1_q2_q3 = IMPLICIT_DEF + $q1_q2_q3 = COPY $q0_q1_q2 { + $q2_q3_q4 = COPY $q1_q2_q3 + } + ST4i64 $q1_q2_q3_q4, 0, $x0 +... diff --git a/test/CodeGen/AArch64/overlapping-copy-bundle.mir b/test/CodeGen/AArch64/overlapping-copy-bundle.mir new file mode 100644 index 00000000000..84f39ea9398 --- /dev/null +++ b/test/CodeGen/AArch64/overlapping-copy-bundle.mir @@ -0,0 +1,80 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=aarch64-apple-ios -run-pass=greedy -run-pass=virtregrewriter %s -o - | FileCheck %s +--- +name: func0 +body: | + bb.0: + ; Make sure we don't clobber q3 when we expand the bundle + ; CHECK-LABEL: name: func0 + ; CHECK: $x0 = IMPLICIT_DEF + ; CHECK: $q0_q1_q2_q3 = IMPLICIT_DEF + ; CHECK: $q4 = COPY $q3 + ; CHECK: $q1_q2_q3 = COPY $q0_q1_q2 + ; CHECK: ST4i64 $q1_q2_q3_q4, 0, $x0 + $x0 = IMPLICIT_DEF + $q0_q1_q2_q3 = IMPLICIT_DEF + $q1_q2_q3 = COPY $q0_q1_q2 { + $q4 = COPY $q3 + } + ST4i64 $q1_q2_q3_q4, 0, $x0 + +... +--- +name: func1 +body: | + bb.0: + ; If it was already ordered, make sure we don't break it + ; CHECK-LABEL: name: func1 + ; CHECK: $x0 = IMPLICIT_DEF + ; CHECK: $q0_q1_q2_q3 = IMPLICIT_DEF + ; CHECK: $q4 = COPY $q3 + ; CHECK: $q1_q2_q3 = COPY $q0_q1_q2 + ; CHECK: ST4i64 $q1_q2_q3_q4, 0, $x0 + $x0 = IMPLICIT_DEF + $q0_q1_q2_q3 = IMPLICIT_DEF + $q4 = COPY $q3 { + $q1_q2_q3 = COPY $q0_q1_q2 + } + ST4i64 $q1_q2_q3_q4, 0, $x0 + +... +--- +name: func2 +body: | + bb.0: + ; A bit less realistic, but check that we handle multiple nodes + ; CHECK-LABEL: name: func2 + ; CHECK: $x0 = IMPLICIT_DEF + ; CHECK: $q0_q1_q2_q3 = IMPLICIT_DEF + ; CHECK: $q3 = COPY $q2 + ; CHECK: $q4 = COPY $q1 + ; CHECK: $q1_q2 = COPY $q0_q1 + ; CHECK: ST4i64 $q1_q2_q3_q4, 0, $x0 + $x0 = IMPLICIT_DEF + $q0_q1_q2_q3 = IMPLICIT_DEF + $q1_q2 = COPY $q0_q1 { + $q3 = COPY $q2 + $q4 = COPY $q1 + } + ST4i64 $q1_q2_q3_q4, 0, $x0 + +... +--- +name: func3 +body: | + bb.0: + ; If there was nothing wrong, don't change the order for no reason + ; CHECK-LABEL: name: func3 + ; CHECK: $x0 = IMPLICIT_DEF + ; CHECK: $q1_q2_q3_q4 = IMPLICIT_DEF + ; CHECK: $q0_q1 = COPY $q1_q2 + ; CHECK: $q2_q3 = COPY $q3_q4 + ; CHECK: ST4i64 $q0_q1_q2_q3, 0, $x0 + $x0 = IMPLICIT_DEF + $q1_q2_q3_q4 = IMPLICIT_DEF + $q0_q1 = COPY $q1_q2 { + $q2_q3 = COPY $q3_q4 + } + ST4i64 $q0_q1_q2_q3, 0, $x0 + +...