From 84581daf2058eb9d37e24a50ad3be198529bdf28 Mon Sep 17 00:00:00 2001 From: Renato Golin Date: Fri, 5 Apr 2013 16:39:53 +0000 Subject: [PATCH] Reverting 178851 as it broke buildbots git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@178883 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMLoadStoreOptimizer.cpp | 171 ++------------------- .../ARM/2013-04-05-overridden-loads-PR14824.ll | 110 ------------- 2 files changed, 10 insertions(+), 271 deletions(-) delete mode 100644 test/CodeGen/ARM/2013-04-05-overridden-loads-PR14824.ll diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index b7ac5d57c36..e4e683c2a02 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -87,53 +87,6 @@ namespace { MachineBasicBlock::iterator i) : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {} }; - class UnitRegsMap { - public: - UnitRegsMap(const TargetRegisterInfo* _TRI) : TRI(_TRI) {} - const SmallVector& operator[](unsigned Reg) { - DenseMap >::iterator found = - Cache.find(Reg); - if (found != Cache.end()) - return found->second; - else - return Cache.insert(std::make_pair(Reg, this->getUnitRegs(Reg))) - .first->second; - } - private: - SmallVector getUnitRegs(unsigned Reg) { - SmallVector Res; - - const TargetRegisterClass* TRC = TRI->getMinimalPhysRegClass(Reg); - if (TRC == &ARM::QPRRegClass) { - if (Reg > ARM::Q7) { - Res.push_back(TRI->getSubReg(Reg, ARM::dsub_0)); - Res.push_back(TRI->getSubReg(Reg, ARM::dsub_1)); - return Res; - } - - Res.push_back(TRI->getSubReg(Reg, ARM::ssub_0)); - Res.push_back(TRI->getSubReg(Reg, ARM::ssub_1)); - Res.push_back(TRI->getSubReg(Reg, ARM::ssub_2)); - Res.push_back(TRI->getSubReg(Reg, ARM::ssub_3)); - - return Res; - } - - if (TRC == &ARM::DPRRegClass && Reg < ARM::D15) { - Res.push_back(TRI->getSubReg(Reg, ARM::ssub_0)); - Res.push_back(TRI->getSubReg(Reg, ARM::ssub_1)); - - return Res; - } - - Res.push_back(Reg); - - return Res; - - } - const TargetRegisterInfo* TRI; - DenseMap > Cache; - }; typedef SmallVector MemOpQueue; typedef MemOpQueue::iterator MemOpQueueIter; @@ -175,11 +128,6 @@ namespace { MachineBasicBlock::iterator MBBI, bool &Advance, MachineBasicBlock::iterator &I); - unsigned AddMemOp(MemOpQueue& MemOps, - const MemOpQueueEntry newEntry, - UnitRegsMap& UnitRegsInfo, - SmallSet& UsedUnitRegs, - unsigned At = -1U); bool LoadStoreMultipleOpti(MachineBasicBlock &MBB); bool MergeReturnIntoLDM(MachineBasicBlock &MBB); }; @@ -1265,103 +1213,12 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, return false; } -/// AddMemOp - helper for ARMLoadStoreOpt::LoadStoreMultipleOpti. -/// It adds store mem ops with simple push_back/insert method, -/// without any additional logic. -/// For load operation it does the next: -/// 1. Adds new load operation into MemOp collection at "At" position. -/// 2. Removes any "load" operations from MemOps, that changes "Reg" register -/// contents, prior to "At". -/// UnitRegsInfo - Map of type Map< Register, UnitRegisters-vector > -/// UsedUnitRegs - set of unit-registers currently in use. -/// At - position at which it would added, and prior which the clean-up -/// should be made (for load operation). -/// FIXME: The clean-up also should be made for store operations, -/// but the memory address should be analyzed instead of unit registers. -unsigned ARMLoadStoreOpt::AddMemOp(MemOpQueue& MemOps, - const MemOpQueueEntry NewEntry, - UnitRegsMap& UnitRegsInfo, - SmallSet& UsedUnitRegs, - unsigned At) { - unsigned Cleaned = 0; - - if (At == -1U) { - At = MemOps.size(); - MemOps.push_back(NewEntry); - } else - MemOps.insert(&MemOps[At], NewEntry); - - // FIXME: - // If operation is not load, leave it as is by now, - // So 0 overridden ops would cleaned in this case. - if (!NewEntry.MBBI->mayLoad()) - return 0; - - const SmallVector& NewEntryUnitRegs = UnitRegsInfo[NewEntry.Reg]; - - bool FoundOverriddenLoads = false; - - for (unsigned i = 0, e = NewEntryUnitRegs.size(); i != e; ++i) - if (UsedUnitRegs.count(NewEntryUnitRegs[i])) { - FoundOverriddenLoads = true; - break; - } - - // If we detect that this register is used by load operations that are - // predecessors for the new one, remove them from MemOps then. - if (FoundOverriddenLoads) { - MemOpQueue UpdatedMemOps; - - // Scan through MemOps entries. - for (unsigned i = 0; i != At; ++i) { - MemOpQueueEntry& MemOpEntry = MemOps[i]; - - // FIXME: Skip non-load operations by now. - if (!MemOpEntry.MBBI->mayLoad()) - continue; - - const SmallVector& MemOpUnitRegs = - UnitRegsInfo[MemOpEntry.Reg]; - - // Lookup entry that loads contents into register used by new entry. - bool ReleaseThisEntry = false; - for (unsigned m = 0, em = MemOpUnitRegs.size(); m != em; ++m) { - if (std::find(NewEntryUnitRegs.begin(), NewEntryUnitRegs.end(), - MemOpUnitRegs[m]) != NewEntryUnitRegs.end()) { - ReleaseThisEntry = true; - ++Cleaned; - break; - } - } - - if (ReleaseThisEntry) { - const SmallVector& RelesedRegs = UnitRegsInfo[MemOpEntry.Reg]; - for (unsigned r = 0, er = RelesedRegs.size(); r != er; ++r) - UsedUnitRegs.erase(RelesedRegs[r]); - } else - UpdatedMemOps.push_back(MemOpEntry); - } - - // Keep anything without changes after At position. - for (unsigned i = At, e = MemOps.size(); i != e; ++i) - UpdatedMemOps.push_back(MemOps[i]); - - MemOps.swap(UpdatedMemOps); - } - - UsedUnitRegs.insert(NewEntryUnitRegs.begin(), NewEntryUnitRegs.end()); - - return Cleaned; -} - /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR /// ops of the same base and incrementing offset into LDM / STM ops. bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { unsigned NumMerges = 0; unsigned NumMemOps = 0; MemOpQueue MemOps; - UnitRegsMap UnitRegsInfo(TRI); - SmallSet UsedRegUnits; unsigned CurrBase = 0; int CurrOpc = -1; unsigned CurrSize = 0; @@ -1408,11 +1265,8 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { CurrSize = Size; CurrPred = Pred; CurrPredReg = PredReg; - MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI)); ++NumMemOps; - const SmallVector& EntryUnitRegs = UnitRegsInfo[Reg]; - UsedRegUnits.insert(EntryUnitRegs.begin(), EntryUnitRegs.end()); Advance = true; } else { if (Clobber) { @@ -1424,24 +1278,20 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { // No need to match PredReg. // Continue adding to the queue. if (Offset > MemOps.back().Offset) { - unsigned OverridesCleaned = - AddMemOp(MemOps, - MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI), - UnitRegsInfo, UsedRegUnits) != 0; - NumMemOps += 1 - OverridesCleaned; + MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, + Position, MBBI)); + ++NumMemOps; Advance = true; } else { - for (unsigned I = 0; I != NumMemOps; ++I) { - if (Offset < MemOps[I].Offset) { - MemOpQueueEntry entry(Offset, Reg, isKill, Position, MBBI); - unsigned OverridesCleaned = - AddMemOp(MemOps, entry, UnitRegsInfo, - UsedRegUnits, I) != 0; - NumMemOps += 1 - OverridesCleaned; - + for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end(); + I != E; ++I) { + if (Offset < I->Offset) { + MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill, + Position, MBBI)); + ++NumMemOps; Advance = true; break; - } else if (Offset == MemOps[I].Offset) { + } else if (Offset == I->Offset) { // Collision! This can't be merged! break; } @@ -1512,7 +1362,6 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { CurrPredReg = 0; if (NumMemOps) { MemOps.clear(); - UsedRegUnits.clear(); NumMemOps = 0; } diff --git a/test/CodeGen/ARM/2013-04-05-overridden-loads-PR14824.ll b/test/CodeGen/ARM/2013-04-05-overridden-loads-PR14824.ll deleted file mode 100644 index 2561686c1f8..00000000000 --- a/test/CodeGen/ARM/2013-04-05-overridden-loads-PR14824.ll +++ /dev/null @@ -1,110 +0,0 @@ -; RUN: llc < %s -mtriple=thumbv7-none-linux-gnueabi -mcpu=cortex-a9 -mattr=+neon,+neonfp | FileCheck %s -; The test is presented by Jiangning Liu. -;CHECK-NOT: vldmia - -define void @sample_test(<8 x i64> * %secondSource, <8 x i64> * %source, <8 x i64> * %dest) nounwind { -entry: - %s0 = load <8 x i64> * %source, align 64 - %s1 = load <8 x i64> * %secondSource, align 64 - %s2 = bitcast <8 x i64> %s0 to i512 - %data.i.i.48.extract.shift = lshr i512 %s2, 384 - %data.i.i.48.extract.trunc = trunc i512 %data.i.i.48.extract.shift to i64 - %arrayidx64 = getelementptr inbounds <8 x i64> * %source, i32 6 - %s120 = load <8 x i64> * %arrayidx64, align 64 - %arrayidx67 = getelementptr inbounds <8 x i64> * %secondSource, i32 6 - %s121 = load <8 x i64> * %arrayidx67, align 64 - %s122 = bitcast <8 x i64> %s120 to i512 - %data.i.i677.48.extract.shift = lshr i512 %s122, 384 - %data.i.i677.48.extract.trunc = trunc i512 %data.i.i677.48.extract.shift to i64 - %s123 = insertelement <8 x i64> undef, i64 %data.i.i677.48.extract.trunc, i32 0 - %data.i.i677.32.extract.shift = lshr i512 %s122, 256 - %data.i.i677.32.extract.trunc = trunc i512 %data.i.i677.32.extract.shift to i64 - %s124 = insertelement <8 x i64> %s123, i64 %data.i.i677.32.extract.trunc, i32 1 - %data.i.i677.16.extract.shift = lshr i512 %s122, 128 - %data.i.i677.16.extract.trunc = trunc i512 %data.i.i677.16.extract.shift to i64 - %s125 = insertelement <8 x i64> %s124, i64 %data.i.i677.16.extract.trunc, i32 2 - %data.i.i677.56.extract.shift = lshr i512 %s122, 448 - %data.i.i677.56.extract.trunc = trunc i512 %data.i.i677.56.extract.shift to i64 - %s126 = insertelement <8 x i64> %s125, i64 %data.i.i677.56.extract.trunc, i32 3 - %data.i.i677.24.extract.shift = lshr i512 %s122, 192 - %data.i.i677.24.extract.trunc = trunc i512 %data.i.i677.24.extract.shift to i64 - %s127 = insertelement <8 x i64> %s126, i64 %data.i.i677.24.extract.trunc, i32 4 - %s128 = insertelement <8 x i64> %s127, i64 %data.i.i677.32.extract.trunc, i32 5 - %s129 = insertelement <8 x i64> %s128, i64 %data.i.i677.16.extract.trunc, i32 6 - %s130 = insertelement <8 x i64> %s129, i64 %data.i.i677.56.extract.trunc, i32 7 - %s131 = bitcast <8 x i64> %s121 to i512 - %data.i1.i676.48.extract.shift = lshr i512 %s131, 384 - %data.i1.i676.48.extract.trunc = trunc i512 %data.i1.i676.48.extract.shift to i64 - %s132 = insertelement <8 x i64> undef, i64 %data.i1.i676.48.extract.trunc, i32 0 - %data.i1.i676.32.extract.shift = lshr i512 %s131, 256 - %data.i1.i676.32.extract.trunc = trunc i512 %data.i1.i676.32.extract.shift to i64 - %s133 = insertelement <8 x i64> %s132, i64 %data.i1.i676.32.extract.trunc, i32 1 - %data.i1.i676.16.extract.shift = lshr i512 %s131, 128 - %data.i1.i676.16.extract.trunc = trunc i512 %data.i1.i676.16.extract.shift to i64 - %s134 = insertelement <8 x i64> %s133, i64 %data.i1.i676.16.extract.trunc, i32 2 - %data.i1.i676.56.extract.shift = lshr i512 %s131, 448 - %data.i1.i676.56.extract.trunc = trunc i512 %data.i1.i676.56.extract.shift to i64 - %s135 = insertelement <8 x i64> %s134, i64 %data.i1.i676.56.extract.trunc, i32 3 - %data.i1.i676.24.extract.shift = lshr i512 %s131, 192 - %data.i1.i676.24.extract.trunc = trunc i512 %data.i1.i676.24.extract.shift to i64 - %s136 = insertelement <8 x i64> %s135, i64 %data.i1.i676.24.extract.trunc, i32 4 - %s137 = insertelement <8 x i64> %s136, i64 %data.i1.i676.32.extract.trunc, i32 5 - %s138 = insertelement <8 x i64> %s137, i64 %data.i1.i676.16.extract.trunc, i32 6 - %s139 = insertelement <8 x i64> %s138, i64 %data.i1.i676.56.extract.trunc, i32 7 - %vecinit28.i.i699 = shufflevector <8 x i64> %s139, <8 x i64> %s130, <8 x i32> - %vecinit35.i.i700 = shufflevector <8 x i64> %vecinit28.i.i699, <8 x i64> %s139, <8 x i32> - %vecinit42.i.i701 = shufflevector <8 x i64> %vecinit35.i.i700, <8 x i64> %s139, <8 x i32> - %vecinit49.i.i702 = shufflevector <8 x i64> %vecinit42.i.i701, <8 x i64> %s130, <8 x i32> - %arrayidx72 = getelementptr inbounds <8 x i64> * %dest, i32 6 - store <8 x i64> %vecinit49.i.i702, <8 x i64> * %arrayidx72, align 64 - %arrayidx75 = getelementptr inbounds <8 x i64> * %source, i32 7 - %s140 = load <8 x i64> * %arrayidx75, align 64 - %arrayidx78 = getelementptr inbounds <8 x i64> * %secondSource, i32 7 - %s141 = load <8 x i64> * %arrayidx78, align 64 - %s142 = bitcast <8 x i64> %s140 to i512 - %data.i.i650.32.extract.shift = lshr i512 %s142, 256 - %data.i.i650.32.extract.trunc = trunc i512 %data.i.i650.32.extract.shift to i64 - %s143 = insertelement <8 x i64> undef, i64 %data.i.i650.32.extract.trunc, i32 0 - %s144 = insertelement <8 x i64> %s143, i64 %data.i.i650.32.extract.trunc, i32 1 - %data.i.i650.16.extract.shift = lshr i512 %s142, 128 - %data.i.i650.16.extract.trunc = trunc i512 %data.i.i650.16.extract.shift to i64 - %s145 = insertelement <8 x i64> %s144, i64 %data.i.i650.16.extract.trunc, i32 2 - %data.i.i650.8.extract.shift = lshr i512 %s142, 64 - %data.i.i650.8.extract.trunc = trunc i512 %data.i.i650.8.extract.shift to i64 - %s146 = insertelement <8 x i64> %s145, i64 %data.i.i650.8.extract.trunc, i32 3 - %s147 = insertelement <8 x i64> %s146, i64 %data.i.i650.8.extract.trunc, i32 4 - %data.i.i650.48.extract.shift = lshr i512 %s142, 384 - %data.i.i650.48.extract.trunc = trunc i512 %data.i.i650.48.extract.shift to i64 - %s148 = insertelement <8 x i64> %s147, i64 %data.i.i650.48.extract.trunc, i32 5 - %s149 = insertelement <8 x i64> %s148, i64 %data.i.i650.16.extract.trunc, i32 6 - %data.i.i650.0.extract.trunc = trunc i512 %s142 to i64 - %s150 = insertelement <8 x i64> %s149, i64 %data.i.i650.0.extract.trunc, i32 7 - %s151 = bitcast <8 x i64> %s141 to i512 - %data.i1.i649.32.extract.shift = lshr i512 %s151, 256 - %data.i1.i649.32.extract.trunc = trunc i512 %data.i1.i649.32.extract.shift to i64 - %s152 = insertelement <8 x i64> undef, i64 %data.i1.i649.32.extract.trunc, i32 0 - %s153 = insertelement <8 x i64> %s152, i64 %data.i1.i649.32.extract.trunc, i32 1 - %data.i1.i649.16.extract.shift = lshr i512 %s151, 128 - %data.i1.i649.16.extract.trunc = trunc i512 %data.i1.i649.16.extract.shift to i64 - %s154 = insertelement <8 x i64> %s153, i64 %data.i1.i649.16.extract.trunc, i32 2 - %data.i1.i649.8.extract.shift = lshr i512 %s151, 64 - %data.i1.i649.8.extract.trunc = trunc i512 %data.i1.i649.8.extract.shift to i64 - %s155 = insertelement <8 x i64> %s154, i64 %data.i1.i649.8.extract.trunc, i32 3 - %s156 = insertelement <8 x i64> %s155, i64 %data.i1.i649.8.extract.trunc, i32 4 - %data.i1.i649.48.extract.shift = lshr i512 %s151, 384 - %data.i1.i649.48.extract.trunc = trunc i512 %data.i1.i649.48.extract.shift to i64 - %s157 = insertelement <8 x i64> %s156, i64 %data.i1.i649.48.extract.trunc, i32 5 - %s158 = insertelement <8 x i64> %s157, i64 %data.i1.i649.16.extract.trunc, i32 6 - %data.i1.i649.0.extract.trunc = trunc i512 %s151 to i64 - %s159 = insertelement <8 x i64> %s158, i64 %data.i1.i649.0.extract.trunc, i32 7 - %vecinit7.i.i669 = shufflevector <8 x i64> %s159, <8 x i64> %s150, <8 x i32> - %vecinit14.i.i670 = shufflevector <8 x i64> %vecinit7.i.i669, <8 x i64> %s150, <8 x i32> - %vecinit21.i.i671 = shufflevector <8 x i64> %vecinit14.i.i670, <8 x i64> %s150, <8 x i32> - %vecinit28.i.i672 = shufflevector <8 x i64> %vecinit21.i.i671, <8 x i64> %s150, <8 x i32> - %vecinit35.i.i673 = shufflevector <8 x i64> %vecinit28.i.i672, <8 x i64> %s159, <8 x i32> - %vecinit42.i.i674 = shufflevector <8 x i64> %vecinit35.i.i673, <8 x i64> %s159, <8 x i32> - %vecinit49.i.i675 = shufflevector <8 x i64> %vecinit42.i.i674, <8 x i64> %s159, <8 x i32> - %arrayidx83 = getelementptr inbounds <8 x i64> * %dest, i32 7 - store <8 x i64> %vecinit49.i.i675, <8 x i64> * %arrayidx83, align 64 - ret void -} -- 2.11.0