From 01b47b046b01ec68696f8ff61b5326cdd3af348e Mon Sep 17 00:00:00 2001 From: Mingyao Yang Date: Fri, 3 Feb 2017 12:09:57 -0800 Subject: [PATCH] Inlining a few small methods based on profiling dex2oat with perf. Test: m test-art-host Change-Id: I6313158e59592d8d132154523be9c82dda3c7eb8 --- compiler/optimizing/instruction_builder.cc | 54 +++++++++++++++++------------ compiler/optimizing/instruction_builder.h | 4 +++ compiler/optimizing/loop_optimization.cc | 3 +- compiler/optimizing/nodes.h | 12 +++---- compiler/optimizing/ssa_liveness_analysis.h | 2 +- runtime/base/mutex.h | 4 +-- runtime/scoped_thread_state_change.h | 2 ++ runtime/stack_map.h | 2 +- 8 files changed, 49 insertions(+), 34 deletions(-) diff --git a/compiler/optimizing/instruction_builder.cc b/compiler/optimizing/instruction_builder.cc index c60f6e539..7dc2fa834 100644 --- a/compiler/optimizing/instruction_builder.cc +++ b/compiler/optimizing/instruction_builder.cc @@ -37,37 +37,45 @@ HBasicBlock* HInstructionBuilder::FindBlockStartingAt(uint32_t dex_pc) const { return block_builder_->GetBlockAt(dex_pc); } -ArenaVector* HInstructionBuilder::GetLocalsFor(HBasicBlock* block) { +inline ArenaVector* HInstructionBuilder::GetLocalsFor(HBasicBlock* block) { ArenaVector* locals = &locals_for_[block->GetBlockId()]; const size_t vregs = graph_->GetNumberOfVRegs(); - if (locals->size() != vregs) { - locals->resize(vregs, nullptr); - - if (block->IsCatchBlock()) { - // We record incoming inputs of catch phis at throwing instructions and - // must therefore eagerly create the phis. Phis for undefined vregs will - // be deleted when the first throwing instruction with the vreg undefined - // is encountered. Unused phis will be removed by dead phi analysis. - for (size_t i = 0; i < vregs; ++i) { - // No point in creating the catch phi if it is already undefined at - // the first throwing instruction. - HInstruction* current_local_value = (*current_locals_)[i]; - if (current_local_value != nullptr) { - HPhi* phi = new (arena_) HPhi( - arena_, - i, - 0, - current_local_value->GetType()); - block->AddPhi(phi); - (*locals)[i] = phi; - } + if (locals->size() == vregs) { + return locals; + } + return GetLocalsForWithAllocation(block, locals, vregs); +} + +ArenaVector* HInstructionBuilder::GetLocalsForWithAllocation( + HBasicBlock* block, + ArenaVector* locals, + const size_t vregs) { + DCHECK_NE(locals->size(), vregs); + locals->resize(vregs, nullptr); + if (block->IsCatchBlock()) { + // We record incoming inputs of catch phis at throwing instructions and + // must therefore eagerly create the phis. Phis for undefined vregs will + // be deleted when the first throwing instruction with the vreg undefined + // is encountered. Unused phis will be removed by dead phi analysis. + for (size_t i = 0; i < vregs; ++i) { + // No point in creating the catch phi if it is already undefined at + // the first throwing instruction. + HInstruction* current_local_value = (*current_locals_)[i]; + if (current_local_value != nullptr) { + HPhi* phi = new (arena_) HPhi( + arena_, + i, + 0, + current_local_value->GetType()); + block->AddPhi(phi); + (*locals)[i] = phi; } } } return locals; } -HInstruction* HInstructionBuilder::ValueOfLocalAt(HBasicBlock* block, size_t local) { +inline HInstruction* HInstructionBuilder::ValueOfLocalAt(HBasicBlock* block, size_t local) { ArenaVector* locals = GetLocalsFor(block); return (*locals)[local]; } diff --git a/compiler/optimizing/instruction_builder.h b/compiler/optimizing/instruction_builder.h index e735a0c46..7fdc1883c 100644 --- a/compiler/optimizing/instruction_builder.h +++ b/compiler/optimizing/instruction_builder.h @@ -93,6 +93,10 @@ class HInstructionBuilder : public ValueObject { HBasicBlock* FindBlockStartingAt(uint32_t dex_pc) const; ArenaVector* GetLocalsFor(HBasicBlock* block); + // Out of line version of GetLocalsFor(), which has a fast path that is + // beneficial to get inlined by callers. + ArenaVector* GetLocalsForWithAllocation( + HBasicBlock* block, ArenaVector* locals, const size_t vregs); HInstruction* ValueOfLocalAt(HBasicBlock* block, size_t local); HInstruction* LoadLocal(uint32_t register_index, Primitive::Type type) const; HInstruction* LoadNullCheckedLocal(uint32_t register_index, uint32_t dex_pc); diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc index 26c9ab83c..bffda93d8 100644 --- a/compiler/optimizing/loop_optimization.cc +++ b/compiler/optimizing/loop_optimization.cc @@ -69,7 +69,7 @@ HLoopOptimization::HLoopOptimization(HGraph* graph, } void HLoopOptimization::Run() { - // Well-behaved loops only. + // Skip if there is no loop or the graph has try-catch/irreducible loops. // TODO: make this less of a sledgehammer. if (!graph_->HasLoops() || graph_->HasTryCatch() || graph_->HasIrreducibleLoops()) { return; @@ -85,6 +85,7 @@ void HLoopOptimization::Run() { LocalRun(); if (top_loop_ == nullptr) { + // All loops have been eliminated. graph_->SetHasLoops(false); } diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index c39aed2c6..11a0d35bb 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -1734,11 +1734,11 @@ class SideEffects : public ValueObject { // A HEnvironment object contains the values of virtual registers at a given location. class HEnvironment : public ArenaObject { public: - HEnvironment(ArenaAllocator* arena, - size_t number_of_vregs, - ArtMethod* method, - uint32_t dex_pc, - HInstruction* holder) + ALWAYS_INLINE HEnvironment(ArenaAllocator* arena, + size_t number_of_vregs, + ArtMethod* method, + uint32_t dex_pc, + HInstruction* holder) : vregs_(number_of_vregs, arena->Adapter(kArenaAllocEnvironmentVRegs)), locations_(number_of_vregs, arena->Adapter(kArenaAllocEnvironmentLocations)), parent_(nullptr), @@ -1747,7 +1747,7 @@ class HEnvironment : public ArenaObject { holder_(holder) { } - HEnvironment(ArenaAllocator* arena, const HEnvironment& to_copy, HInstruction* holder) + ALWAYS_INLINE HEnvironment(ArenaAllocator* arena, const HEnvironment& to_copy, HInstruction* holder) : HEnvironment(arena, to_copy.Size(), to_copy.GetMethod(), diff --git a/compiler/optimizing/ssa_liveness_analysis.h b/compiler/optimizing/ssa_liveness_analysis.h index b62bf4e5f..a239bd50c 100644 --- a/compiler/optimizing/ssa_liveness_analysis.h +++ b/compiler/optimizing/ssa_liveness_analysis.h @@ -331,7 +331,7 @@ class LiveInterval : public ArenaObject { instruction, /* environment */ nullptr, input_index, block->GetLifetimeEnd(), first_use_); } - void AddRange(size_t start, size_t end) { + ALWAYS_INLINE void AddRange(size_t start, size_t end) { if (first_range_ == nullptr) { first_range_ = last_range_ = range_search_start_ = new (allocator_) LiveRange(start, end, first_range_); diff --git a/runtime/base/mutex.h b/runtime/base/mutex.h index c59664b9c..65d35eab3 100644 --- a/runtime/base/mutex.h +++ b/runtime/base/mutex.h @@ -516,12 +516,12 @@ class SCOPED_CAPABILITY MutexLock { // construction and releases it upon destruction. class SCOPED_CAPABILITY ReaderMutexLock { public: - ReaderMutexLock(Thread* self, ReaderWriterMutex& mu) ACQUIRE(mu) : + ReaderMutexLock(Thread* self, ReaderWriterMutex& mu) ACQUIRE(mu) ALWAYS_INLINE : self_(self), mu_(mu) { mu_.SharedLock(self_); } - ~ReaderMutexLock() RELEASE() { + ~ReaderMutexLock() RELEASE() ALWAYS_INLINE { mu_.SharedUnlock(self_); } diff --git a/runtime/scoped_thread_state_change.h b/runtime/scoped_thread_state_change.h index a3286ac3d..5f03741d1 100644 --- a/runtime/scoped_thread_state_change.h +++ b/runtime/scoped_thread_state_change.h @@ -141,6 +141,8 @@ class ScopedObjectAccessUnchecked : public ScopedObjectAccessAlreadyRunnable { ALWAYS_INLINE explicit ScopedObjectAccessUnchecked(Thread* self) REQUIRES(!Locks::thread_suspend_count_lock_); + ALWAYS_INLINE ~ScopedObjectAccessUnchecked() REQUIRES(!Locks::thread_suspend_count_lock_) {} + // Used when we want a scoped JNI thread state but have no thread/JNIEnv. Consequently doesn't // change into Runnable or acquire a share on the mutator_lock_. explicit ScopedObjectAccessUnchecked(JavaVM* vm) ALWAYS_INLINE diff --git a/runtime/stack_map.h b/runtime/stack_map.h index 67f0b5715..d936ce938 100644 --- a/runtime/stack_map.h +++ b/runtime/stack_map.h @@ -571,7 +571,7 @@ class DexRegisterMap { } } - bool IsDexRegisterLive(uint16_t dex_register_number) const { + ALWAYS_INLINE bool IsDexRegisterLive(uint16_t dex_register_number) const { size_t live_bit_mask_offset_in_bits = GetLiveBitMaskOffset() * kBitsPerByte; return region_.LoadBit(live_bit_mask_offset_in_bits + dex_register_number); } -- 2.11.0