From 1753e553bfce14c0b68aace845c49d27dab9b9b4 Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Wed, 17 Feb 2016 21:16:53 +0000 Subject: [PATCH] AArch64: improve redundant copy elimination. Mostly, this fixes the bug that if the CBZ guaranteed Xn but Wn was used, we didn't sort out the use-def chain properly. I've also made it check more than just the last instruction for a compatible CBZ (so it can cope without fallthroughs). I'd have liked to do that separately, but it's helps writing the test. Finally, I removed some custom loops in favour of MachineInstr helpers and refactored the control flow to flatten it and avoid possibly quadratic iterations in blocks with many copies. NFC for these, just a general tidy-up. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@261154 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../AArch64/AArch64RedundantCopyElimination.cpp | 86 ++++++++++++---------- test/CodeGen/AArch64/machine-copy-remove.ll | 27 ++++++- 2 files changed, 69 insertions(+), 44 deletions(-) diff --git a/lib/Target/AArch64/AArch64RedundantCopyElimination.cpp b/lib/Target/AArch64/AArch64RedundantCopyElimination.cpp index 4a1e25254fc..b52c19a026d 100644 --- a/lib/Target/AArch64/AArch64RedundantCopyElimination.cpp +++ b/lib/Target/AArch64/AArch64RedundantCopyElimination.cpp @@ -63,6 +63,20 @@ char AArch64RedundantCopyElimination::ID = 0; INITIALIZE_PASS(AArch64RedundantCopyElimination, "aarch64-copyelim", "AArch64 redundant copy elimination pass", false, false) +static bool guaranteesZeroRegInBlock(MachineInstr *MI, MachineBasicBlock *MBB) { + unsigned Opc = MI->getOpcode(); + // Check if the current basic block is the target block to which the + // CBZ/CBNZ instruction jumps when its Wt/Xt is zero. + if ((Opc == AArch64::CBZW || Opc == AArch64::CBZX) && + MBB == MI->getOperand(1).getMBB()) + return true; + else if ((Opc == AArch64::CBNZW || Opc == AArch64::CBNZX) && + MBB != MI->getOperand(1).getMBB()) + return true; + + return false; +} + bool AArch64RedundantCopyElimination::optimizeCopy(MachineBasicBlock *MBB) { // Check if the current basic block has a single predecessor. if (MBB->pred_size() != 1) @@ -73,18 +87,16 @@ bool AArch64RedundantCopyElimination::optimizeCopy(MachineBasicBlock *MBB) { if (CompBr == PredMBB->end() || PredMBB->succ_size() != 2) return false; - unsigned LastOpc = CompBr->getOpcode(); - // Check if the current basic block is the target block to which the cbz/cbnz - // instruction jumps when its Wt/Xt is zero. - if (LastOpc == AArch64::CBZW || LastOpc == AArch64::CBZX) { - if (MBB != CompBr->getOperand(1).getMBB()) - return false; - } else if (LastOpc == AArch64::CBNZW || LastOpc == AArch64::CBNZX) { - if (MBB == CompBr->getOperand(1).getMBB()) - return false; - } else { + ++CompBr; + do { + --CompBr; + if (guaranteesZeroRegInBlock(CompBr, MBB)) + break; + } while (CompBr != PredMBB->begin() && CompBr->isTerminator()); + + // We've not found a CBZ/CBNZ, time to bail out. + if (!guaranteesZeroRegInBlock(CompBr, MBB)) return false; - } unsigned TargetReg = CompBr->getOperand(0).getReg(); if (!TargetReg) @@ -98,6 +110,8 @@ bool AArch64RedundantCopyElimination::optimizeCopy(MachineBasicBlock *MBB) { TargetRegs.insert(*AI); bool Changed = false; + MachineBasicBlock::iterator LastChange = MBB->begin(); + unsigned SmallestDef = TargetReg; // Remove redundant Copy instructions unless TargetReg is modified. for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;) { MachineInstr *MI = &*I; @@ -111,48 +125,40 @@ bool AArch64RedundantCopyElimination::optimizeCopy(MachineBasicBlock *MBB) { if ((SrcReg == AArch64::XZR || SrcReg == AArch64::WZR) && !MRI->isReserved(DefReg) && (TargetReg == DefReg || TRI->isSuperRegister(DefReg, TargetReg))) { - - CompBr->clearRegisterKills(DefReg, TRI); - if (MBB->isLiveIn(DefReg)) - // Clear any kills of TargetReg between CompBr and MI. - for (MachineInstr &MMI : - make_range(MBB->begin()->getIterator(), MI->getIterator())) - MMI.clearRegisterKills(DefReg, TRI); - else - MBB->addLiveIn(DefReg); - DEBUG(dbgs() << "Remove redundant Copy : "); DEBUG((MI)->print(dbgs())); MI->eraseFromParent(); Changed = true; + LastChange = I; NumCopiesRemoved++; + SmallestDef = + TRI->isSubRegister(SmallestDef, DefReg) ? DefReg : SmallestDef; continue; } } - for (const MachineOperand &MO : MI->operands()) { - // FIXME: It is possible to use the register mask to check if all - // registers in TargetRegs are not clobbered. For now, we treat it like - // a basic block boundary. - if (MO.isRegMask()) - return Changed; - if (!MO.isReg()) - continue; - unsigned Reg = MO.getReg(); + if (MI->modifiesRegister(TargetReg, TRI)) + break; + } - if (!Reg) - continue; + if (!Changed) + return false; - assert(TargetRegisterInfo::isPhysicalRegister(Reg) && - "Expect physical register"); + // Otherwise, we have to fixup the use-def chain, starting with the + // CBZ/CBNZ. Conservatively mark as much as we can live. + CompBr->clearRegisterKills(SmallestDef, TRI); - // Stop if the TargetReg is modified. - if (MO.isDef() && TargetRegs.count(Reg)) - return Changed; - } - } - return Changed; + // Clear any kills of TargetReg between CompBr and MI. + if (std::any_of(TargetRegs.begin(), TargetRegs.end(), + [&](unsigned Reg) { return MBB->isLiveIn(Reg); })) { + for (MachineInstr &MMI : + make_range(MBB->begin()->getIterator(), LastChange->getIterator())) + MMI.clearRegisterKills(SmallestDef, TRI); + } else + MBB->addLiveIn(TargetReg); + + return true; } bool AArch64RedundantCopyElimination::runOnMachineFunction( diff --git a/test/CodeGen/AArch64/machine-copy-remove.ll b/test/CodeGen/AArch64/machine-copy-remove.ll index 7196d432c9e..6a97ead0de8 100644 --- a/test/CodeGen/AArch64/machine-copy-remove.ll +++ b/test/CodeGen/AArch64/machine-copy-remove.ll @@ -1,6 +1,6 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mcpu=cortex-a57 -verify-machineinstrs < %s | FileCheck %s -; CHECK-LABEL: f_XX +; CHECK-LABEL: f_XX: ; CHECK: cbz x[[REG:[0-9]+]], [[BB:.LBB.*]] ; CHECK: [[BB]]: ; CHECK-NOT: mov x[[REG]], xzr @@ -18,7 +18,7 @@ if.end: ; preds = %entry, %if.then ret i64 %a.0 } -; CHECK-LABEL: f_WW +; CHECK-LABEL: f_WW: ; CHECK: cbz w[[REG:[0-9]+]], [[BB:.LBB.*]] ; CHECK: [[BB]]: ; CHECK-NOT: mov w[[REG]], wzr @@ -36,7 +36,7 @@ if.end: ; preds = %entry, %if.then ret i32 %a.0 } -; CHECK-LABEL: f_XW +; CHECK-LABEL: f_XW: ; CHECK: cbz x[[REG:[0-9]+]], [[BB:.LBB.*]] ; CHECK: [[BB]]: ; CHECK-NOT: mov w[[REG]], wzr @@ -54,7 +54,7 @@ if.end: ; preds = %entry, %if.then ret i32 %a.0 } -; CHECK-LABEL: f_WX +; CHECK-LABEL: f_WX: ; CHECK: cbz w[[REG:[0-9]+]], [[BB:.LBB.*]] ; CHECK: [[BB]]: ; CHECK: mov x[[REG]], xzr @@ -73,3 +73,22 @@ if.end: ; preds = %entry, %if.then %a.0 = phi i64 [ %0, %if.then ], [ 0, %entry ] ret i64 %a.0 } + +; CHECK-LABEL: test_superreg: +; CHECK: cbz x[[REG:[0-9]+]], [[BB:.LBB.*]] +; CHECK: [[BB]]: +; CHECK: str x[[REG]], [x1] +; CHECK-NOT: mov w[[REG]], wzr +; Because we returned w0 but x0 was marked live-in to the block, we didn't +; remove the on the str leading to a verification failure. +define i32 @test_superreg(i64 %in, i64* %dest) { + %tst = icmp eq i64 %in, 0 + br i1 %tst, label %true, label %false + +false: + ret i32 42 + +true: + store volatile i64 %in, i64* %dest + ret i32 0 +} \ No newline at end of file -- 2.11.0