From fb337ea53d1e6fe68b24217a9ea95e9f544ef697 Mon Sep 17 00:00:00 2001 From: Vladimir Marko Date: Wed, 25 Nov 2015 15:25:10 +0000 Subject: [PATCH] Move PC-relative addressing bases to a better position. Move the platform-specific HX86ComputeBaseMethodAddress and HArmDexCacheArraysBase to the latest dominator of their uses outside any loop. This brings the base closer to the first use (previously, it was in the entry block) and relieves some pressure on the register allocator while avoiding recalculation of the base in a loop. Change-Id: I231aa81eb5b4de9af2d0167054d06b65eb18a636 --- compiler/optimizing/dex_cache_array_fixups_arm.cc | 26 +++++++---- compiler/optimizing/nodes.cc | 53 +++++++++++++++++++++++ compiler/optimizing/nodes.h | 8 ++++ compiler/optimizing/pc_relative_fixups_x86.cc | 28 ++++++++---- 4 files changed, 98 insertions(+), 17 deletions(-) diff --git a/compiler/optimizing/dex_cache_array_fixups_arm.cc b/compiler/optimizing/dex_cache_array_fixups_arm.cc index 7f5f0e5fd..65820630f 100644 --- a/compiler/optimizing/dex_cache_array_fixups_arm.cc +++ b/compiler/optimizing/dex_cache_array_fixups_arm.cc @@ -33,6 +33,16 @@ class DexCacheArrayFixupsVisitor : public HGraphVisitor { // Attribute memory use to code generator. graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {} + void MoveBasesIfNeeded() { + for (const auto& entry : dex_cache_array_bases_) { + // Bring the base closer to the first use (previously, it was in the + // entry block) and relieve some pressure on the register allocator + // while avoiding recalculation of the base in a loop. + HArmDexCacheArraysBase* base = entry.second; + base->MoveBeforeFirstUserAndOutOfLoops(); + } + } + private: void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE { // If this is an invoke with PC-relative access to the dex cache methods array, @@ -40,7 +50,7 @@ class DexCacheArrayFixupsVisitor : public HGraphVisitor { if (invoke->HasPcRelativeDexCache()) { // Initialize base for target method dex file if needed. MethodReference target_method = invoke->GetTargetMethod(); - HArmDexCacheArraysBase* base = GetOrCreateDexCacheArrayBase(invoke, *target_method.dex_file); + HArmDexCacheArraysBase* base = GetOrCreateDexCacheArrayBase(*target_method.dex_file); // Update the element offset in base. DexCacheArraysLayout layout(kArmPointerSize, target_method.dex_file); base->UpdateElementOffset(layout.MethodOffset(target_method.dex_method_index)); @@ -50,8 +60,7 @@ class DexCacheArrayFixupsVisitor : public HGraphVisitor { } } - HArmDexCacheArraysBase* GetOrCreateDexCacheArrayBase(HInstruction* user, - const DexFile& dex_file) { + HArmDexCacheArraysBase* GetOrCreateDexCacheArrayBase(const DexFile& dex_file) { // Ensure we only initialize the pointer once for each dex file. auto lb = dex_cache_array_bases_.lower_bound(&dex_file); if (lb != dex_cache_array_bases_.end() && @@ -59,11 +68,11 @@ class DexCacheArrayFixupsVisitor : public HGraphVisitor { return lb->second; } - HGraph* graph = GetGraph(); - HBasicBlock* entry = graph->GetEntryBlock(); - HArmDexCacheArraysBase* base = new (graph->GetArena()) HArmDexCacheArraysBase(dex_file); - HInstruction* insert_pos = (user->GetBlock() == entry) ? user : entry->GetLastInstruction(); - entry->InsertInstructionBefore(base, insert_pos); + // Insert the base at the start of the entry block, move it to a better + // position later in MoveBaseIfNeeded(). + HArmDexCacheArraysBase* base = new (GetGraph()->GetArena()) HArmDexCacheArraysBase(dex_file); + HBasicBlock* entry_block = GetGraph()->GetEntryBlock(); + entry_block->InsertInstructionBefore(base, entry_block->GetFirstInstruction()); dex_cache_array_bases_.PutBefore(lb, &dex_file, base); return base; } @@ -76,6 +85,7 @@ class DexCacheArrayFixupsVisitor : public HGraphVisitor { void DexCacheArrayFixups::Run() { DexCacheArrayFixupsVisitor visitor(graph_); visitor.VisitInsertionOrder(); + visitor.MoveBasesIfNeeded(); } } // namespace arm diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index 890598d68..7986e9181 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -1177,6 +1177,59 @@ void HInstruction::MoveBefore(HInstruction* cursor) { } } +void HInstruction::MoveBeforeFirstUserAndOutOfLoops() { + DCHECK(!CanThrow()); + DCHECK(!HasSideEffects()); + DCHECK(!HasEnvironmentUses()); + DCHECK(HasNonEnvironmentUses()); + DCHECK(!IsPhi()); // Makes no sense for Phi. + DCHECK_EQ(InputCount(), 0u); + + // Find the target block. + HUseIterator uses_it(GetUses()); + HBasicBlock* target_block = uses_it.Current()->GetUser()->GetBlock(); + uses_it.Advance(); + while (!uses_it.Done() && uses_it.Current()->GetUser()->GetBlock() == target_block) { + uses_it.Advance(); + } + if (!uses_it.Done()) { + // This instruction has uses in two or more blocks. Find the common dominator. + CommonDominator finder(target_block); + for (; !uses_it.Done(); uses_it.Advance()) { + finder.Update(uses_it.Current()->GetUser()->GetBlock()); + } + target_block = finder.Get(); + DCHECK(target_block != nullptr); + } + // Move to the first dominator not in a loop. + while (target_block->IsInLoop()) { + target_block = target_block->GetDominator(); + DCHECK(target_block != nullptr); + } + + // Find insertion position. + HInstruction* insert_pos = nullptr; + for (HUseIterator uses_it2(GetUses()); !uses_it2.Done(); uses_it2.Advance()) { + if (uses_it2.Current()->GetUser()->GetBlock() == target_block && + (insert_pos == nullptr || uses_it2.Current()->GetUser()->StrictlyDominates(insert_pos))) { + insert_pos = uses_it2.Current()->GetUser(); + } + } + if (insert_pos == nullptr) { + // No user in `target_block`, insert before the control flow instruction. + insert_pos = target_block->GetLastInstruction(); + DCHECK(insert_pos->IsControlFlow()); + // Avoid splitting HCondition from HIf to prevent unnecessary materialization. + if (insert_pos->IsIf()) { + HInstruction* if_input = insert_pos->AsIf()->InputAt(0); + if (if_input == insert_pos->GetPrevious()) { + insert_pos = if_input; + } + } + } + MoveBefore(insert_pos); +} + HBasicBlock* HBasicBlock::SplitBefore(HInstruction* cursor) { DCHECK(!graph_->IsInSsaForm()) << "Support for SSA form not implemented."; DCHECK_EQ(cursor->GetBlock(), this); diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 0bf7734ea..f2622034f 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -1956,6 +1956,14 @@ class HInstruction : public ArenaObject { // Move `this` instruction before `cursor`. void MoveBefore(HInstruction* cursor); + // Move `this` before its first user and out of any loops. If there is no + // out-of-loop user that dominates all other users, move the instruction + // to the end of the out-of-loop common dominator of the user's blocks. + // + // This can be used only on non-throwing instructions with no side effects that + // have at least one use but no environment uses. + void MoveBeforeFirstUserAndOutOfLoops(); + #define INSTRUCTION_TYPE_CHECK(type, super) \ bool Is##type() const { return (As##type() != nullptr); } \ virtual const H##type* As##type() const { return nullptr; } \ diff --git a/compiler/optimizing/pc_relative_fixups_x86.cc b/compiler/optimizing/pc_relative_fixups_x86.cc index 808a1dc6c..b383f1e1a 100644 --- a/compiler/optimizing/pc_relative_fixups_x86.cc +++ b/compiler/optimizing/pc_relative_fixups_x86.cc @@ -26,6 +26,15 @@ class PCRelativeHandlerVisitor : public HGraphVisitor { public: explicit PCRelativeHandlerVisitor(HGraph* graph) : HGraphVisitor(graph), base_(nullptr) {} + void MoveBaseIfNeeded() { + if (base_ != nullptr) { + // Bring the base closer to the first use (previously, it was in the + // entry block) and relieve some pressure on the register allocator + // while avoiding recalculation of the base in a loop. + base_->MoveBeforeFirstUserAndOutOfLoops(); + } + } + private: void VisitAdd(HAdd* add) OVERRIDE { BinaryFP(add); @@ -72,7 +81,7 @@ class PCRelativeHandlerVisitor : public HGraphVisitor { void VisitPackedSwitch(HPackedSwitch* switch_insn) OVERRIDE { // We need to replace the HPackedSwitch with a HX86PackedSwitch in order to // address the constant area. - InitializePCRelativeBasePointer(switch_insn); + InitializePCRelativeBasePointer(); HGraph* graph = GetGraph(); HBasicBlock* block = switch_insn->GetBlock(); HX86PackedSwitch* x86_switch = new (graph->GetArena()) HX86PackedSwitch( @@ -84,22 +93,22 @@ class PCRelativeHandlerVisitor : public HGraphVisitor { block->ReplaceAndRemoveInstructionWith(switch_insn, x86_switch); } - void InitializePCRelativeBasePointer(HInstruction* user) { + void InitializePCRelativeBasePointer() { // Ensure we only initialize the pointer once. if (base_ != nullptr) { return; } - HGraph* graph = GetGraph(); - HBasicBlock* entry = graph->GetEntryBlock(); - base_ = new (graph->GetArena()) HX86ComputeBaseMethodAddress(); - HInstruction* insert_pos = (user->GetBlock() == entry) ? user : entry->GetLastInstruction(); - entry->InsertInstructionBefore(base_, insert_pos); + // Insert the base at the start of the entry block, move it to a better + // position later in MoveBaseIfNeeded(). + base_ = new (GetGraph()->GetArena()) HX86ComputeBaseMethodAddress(); + HBasicBlock* entry_block = GetGraph()->GetEntryBlock(); + entry_block->InsertInstructionBefore(base_, entry_block->GetFirstInstruction()); DCHECK(base_ != nullptr); } void ReplaceInput(HInstruction* insn, HConstant* value, int input_index, bool materialize) { - InitializePCRelativeBasePointer(insn); + InitializePCRelativeBasePointer(); HX86LoadFromConstantTable* load_constant = new (GetGraph()->GetArena()) HX86LoadFromConstantTable(base_, value, materialize); insn->GetBlock()->InsertInstructionBefore(load_constant, insn); @@ -111,7 +120,7 @@ class PCRelativeHandlerVisitor : public HGraphVisitor { // addressing, we need the PC-relative address base. HInvokeStaticOrDirect* invoke_static_or_direct = invoke->AsInvokeStaticOrDirect(); if (invoke_static_or_direct != nullptr && invoke_static_or_direct->HasPcRelativeDexCache()) { - InitializePCRelativeBasePointer(invoke); + InitializePCRelativeBasePointer(); // Add the extra parameter base_. DCHECK(!invoke_static_or_direct->HasCurrentMethodInput()); invoke_static_or_direct->AddSpecialInput(base_); @@ -133,6 +142,7 @@ class PCRelativeHandlerVisitor : public HGraphVisitor { void PcRelativeFixups::Run() { PCRelativeHandlerVisitor visitor(graph_); visitor.VisitInsertionOrder(); + visitor.MoveBaseIfNeeded(); } } // namespace x86 -- 2.11.0