From: Mathieu Chartier Date: Thu, 14 Jul 2016 20:30:03 +0000 (-0700) Subject: Add a way to measure read barrier slow paths X-Git-Url: http://git.osdn.net/view?a=commitdiff_plain;h=56fe25895e91d34a0a017429468829a20bdd5ae4;p=android-x86%2Fart.git Add a way to measure read barrier slow paths If enabled, this option counts number of slow paths, measures the total slow path time per GC and records the info into a histogram. Also added support for systrace to see which threads are performing slow paths. Added runtime option -Xgc:measure to enable. The info is dumped for SIGQUIT. Test: Volantis boot with CC, test-art-host with CC, run EEAC with CC and -Xgc:measure Bug: 30162165 Change-Id: I3c2bdb4156065249c45695f13c77c0579bc8e57a --- diff --git a/cmdline/cmdline_types.h b/cmdline/cmdline_types.h index 9b4042c59..f05648c58 100644 --- a/cmdline/cmdline_types.h +++ b/cmdline/cmdline_types.h @@ -462,7 +462,7 @@ static gc::CollectorType ParseCollectorType(const std::string& option) { struct XGcOption { // These defaults are used when the command line arguments for -Xgc: // are either omitted completely or partially. - gc::CollectorType collector_type_ = kUseReadBarrier ? + gc::CollectorType collector_type_ = kUseReadBarrier ? // If RB is enabled (currently a build-time decision), // use CC as the default GC. gc::kCollectorTypeCC : @@ -473,6 +473,7 @@ struct XGcOption { bool verify_pre_gc_rosalloc_ = kIsDebugBuild; bool verify_pre_sweeping_rosalloc_ = false; bool verify_post_gc_rosalloc_ = false; + bool measure_ = kIsDebugBuild; bool gcstress_ = false; }; @@ -515,6 +516,8 @@ struct CmdlineType : CmdlineTypeParser { xgc.gcstress_ = true; } else if (gc_option == "nogcstress") { xgc.gcstress_ = false; + } else if (gc_option == "measure") { + xgc.measure_ = true; } else if ((gc_option == "precise") || (gc_option == "noprecise") || (gc_option == "verifycardtable") || diff --git a/runtime/gc/collector/concurrent_copying-inl.h b/runtime/gc/collector/concurrent_copying-inl.h index 301111251..4019a5b53 100644 --- a/runtime/gc/collector/concurrent_copying-inl.h +++ b/runtime/gc/collector/concurrent_copying-inl.h @@ -153,6 +153,14 @@ inline mirror::Object* ConcurrentCopying::Mark(mirror::Object* from_ref) { } } +inline mirror::Object* ConcurrentCopying::MarkFromReadBarrier(mirror::Object* from_ref) { + // TODO: Consider removing this check when we are done investigating slow paths. b/30162165 + if (UNLIKELY(mark_from_read_barrier_measurements_)) { + return MarkFromReadBarrierWithMeasurements(from_ref); + } + return Mark(from_ref); +} + inline mirror::Object* ConcurrentCopying::GetFwdPtr(mirror::Object* from_ref) { DCHECK(region_space_->IsInFromSpace(from_ref)); LockWord lw = from_ref->GetLockWord(false); diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc index b7b5aa005..155e032ab 100644 --- a/runtime/gc/collector/concurrent_copying.cc +++ b/runtime/gc/collector/concurrent_copying.cc @@ -17,7 +17,9 @@ #include "concurrent_copying.h" #include "art_field-inl.h" +#include "base/histogram-inl.h" #include "base/stl_util.h" +#include "base/systrace.h" #include "debugger.h" #include "gc/accounting/heap_bitmap-inl.h" #include "gc/accounting/space_bitmap-inl.h" @@ -39,7 +41,9 @@ namespace collector { static constexpr size_t kDefaultGcMarkStackSize = 2 * MB; -ConcurrentCopying::ConcurrentCopying(Heap* heap, const std::string& name_prefix) +ConcurrentCopying::ConcurrentCopying(Heap* heap, + const std::string& name_prefix, + bool measure_read_barrier_slow_path) : GarbageCollector(heap, name_prefix + (name_prefix.empty() ? "" : " ") + "concurrent copying + mark sweep"), @@ -54,6 +58,14 @@ ConcurrentCopying::ConcurrentCopying(Heap* heap, const std::string& name_prefix) heap_mark_bitmap_(nullptr), live_stack_freeze_size_(0), mark_stack_mode_(kMarkStackModeOff), weak_ref_access_enabled_(true), skipped_blocks_lock_("concurrent copying bytes blocks lock", kMarkSweepMarkStackLock), + measure_read_barrier_slow_path_(measure_read_barrier_slow_path), + rb_slow_path_ns_(0), + rb_slow_path_count_(0), + rb_slow_path_count_gc_(0), + rb_slow_path_histogram_lock_("Read barrier histogram lock"), + rb_slow_path_time_histogram_("Mutator time in read barrier slow path", 500, 32), + rb_slow_path_count_total_(0), + rb_slow_path_count_gc_total_(0), rb_table_(heap_->GetReadBarrierTable()), force_evacuate_all_(false), immune_gray_stack_lock_("concurrent copying immune gray stack lock", @@ -162,6 +174,14 @@ void ConcurrentCopying::InitializePhase() { MutexLock mu(Thread::Current(), mark_stack_lock_); CHECK(false_gray_stack_.empty()); } + + mark_from_read_barrier_measurements_ = measure_read_barrier_slow_path_; + if (measure_read_barrier_slow_path_) { + rb_slow_path_ns_.StoreRelaxed(0); + rb_slow_path_count_.StoreRelaxed(0); + rb_slow_path_count_gc_.StoreRelaxed(0); + } + immune_spaces_.Reset(); bytes_moved_.StoreRelaxed(0); objects_moved_.StoreRelaxed(0); @@ -1996,9 +2016,17 @@ void ConcurrentCopying::FinishPhase() { MutexLock mu(Thread::Current(), skipped_blocks_lock_); skipped_blocks_map_.clear(); } - ReaderMutexLock mu(self, *Locks::mutator_lock_); - WriterMutexLock mu2(self, *Locks::heap_bitmap_lock_); - heap_->ClearMarkedObjects(); + { + ReaderMutexLock mu(self, *Locks::mutator_lock_); + WriterMutexLock mu2(self, *Locks::heap_bitmap_lock_); + heap_->ClearMarkedObjects(); + } + if (measure_read_barrier_slow_path_) { + MutexLock mu(self, rb_slow_path_histogram_lock_); + rb_slow_path_time_histogram_.AdjustAndAddValue(rb_slow_path_ns_.LoadRelaxed()); + rb_slow_path_count_total_ += rb_slow_path_count_.LoadRelaxed(); + rb_slow_path_count_gc_total_ += rb_slow_path_count_gc_.LoadRelaxed(); + } } bool ConcurrentCopying::IsMarkedHeapReference(mirror::HeapReference* field) { @@ -2036,6 +2064,37 @@ void ConcurrentCopying::RevokeAllThreadLocalBuffers() { region_space_->RevokeAllThreadLocalBuffers(); } +mirror::Object* ConcurrentCopying::MarkFromReadBarrierWithMeasurements(mirror::Object* from_ref) { + if (Thread::Current() != thread_running_gc_) { + rb_slow_path_count_.FetchAndAddRelaxed(1u); + } else { + rb_slow_path_count_gc_.FetchAndAddRelaxed(1u); + } + ScopedTrace tr(__FUNCTION__); + const uint64_t start_time = measure_read_barrier_slow_path_ ? NanoTime() : 0u; + mirror::Object* ret = Mark(from_ref); + if (measure_read_barrier_slow_path_) { + rb_slow_path_ns_.FetchAndAddRelaxed(NanoTime() - start_time); + } + return ret; +} + +void ConcurrentCopying::DumpPerformanceInfo(std::ostream& os) { + GarbageCollector::DumpPerformanceInfo(os); + MutexLock mu(Thread::Current(), rb_slow_path_histogram_lock_); + if (rb_slow_path_time_histogram_.SampleSize() > 0) { + Histogram::CumulativeData cumulative_data; + rb_slow_path_time_histogram_.CreateHistogram(&cumulative_data); + rb_slow_path_time_histogram_.PrintConfidenceIntervals(os, 0.99, cumulative_data); + } + if (rb_slow_path_count_total_ > 0) { + os << "Slow path count " << rb_slow_path_count_total_ << "\n"; + } + if (rb_slow_path_count_gc_total_ > 0) { + os << "GC slow path count " << rb_slow_path_count_gc_total_ << "\n"; + } +} + } // namespace collector } // namespace gc } // namespace art diff --git a/runtime/gc/collector/concurrent_copying.h b/runtime/gc/collector/concurrent_copying.h index 166a1f0b2..6a8d052cb 100644 --- a/runtime/gc/collector/concurrent_copying.h +++ b/runtime/gc/collector/concurrent_copying.h @@ -58,17 +58,24 @@ class ConcurrentCopying : public GarbageCollector { // Enable verbose mode. static constexpr bool kVerboseMode = false; - ConcurrentCopying(Heap* heap, const std::string& name_prefix = ""); + ConcurrentCopying(Heap* heap, + const std::string& name_prefix = "", + bool measure_read_barrier_slow_path = false); ~ConcurrentCopying(); virtual void RunPhases() OVERRIDE - REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_, !immune_gray_stack_lock_); + REQUIRES(!immune_gray_stack_lock_, + !mark_stack_lock_, + !rb_slow_path_histogram_lock_, + !skipped_blocks_lock_); void InitializePhase() SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!mark_stack_lock_, !immune_gray_stack_lock_); void MarkingPhase() SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_, !immune_gray_stack_lock_); void ReclaimPhase() SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!mark_stack_lock_); - void FinishPhase() REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_); + void FinishPhase() REQUIRES(!mark_stack_lock_, + !rb_slow_path_histogram_lock_, + !skipped_blocks_lock_); void BindBitmaps() SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!Locks::heap_bitmap_lock_); @@ -95,7 +102,11 @@ class ConcurrentCopying : public GarbageCollector { return IsMarked(ref) == ref; } template - ALWAYS_INLINE mirror::Object* Mark(mirror::Object* from_ref) SHARED_REQUIRES(Locks::mutator_lock_) + ALWAYS_INLINE mirror::Object* Mark(mirror::Object* from_ref) + SHARED_REQUIRES(Locks::mutator_lock_) + REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_, !immune_gray_stack_lock_); + ALWAYS_INLINE mirror::Object* MarkFromReadBarrier(mirror::Object* from_ref) + SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_, !immune_gray_stack_lock_); bool IsMarking() const { return is_marking_; @@ -203,6 +214,10 @@ class ConcurrentCopying : public GarbageCollector { REQUIRES(!mark_stack_lock_); void ScanImmuneObject(mirror::Object* obj) SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!mark_stack_lock_); + mirror::Object* MarkFromReadBarrierWithMeasurements(mirror::Object* from_ref) + SHARED_REQUIRES(Locks::mutator_lock_) + REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_, !immune_gray_stack_lock_); + void DumpPerformanceInfo(std::ostream& os) OVERRIDE REQUIRES(!rb_slow_path_histogram_lock_); space::RegionSpace* region_space_; // The underlying region space. std::unique_ptr gc_barrier_; @@ -251,6 +266,20 @@ class ConcurrentCopying : public GarbageCollector { Atomic to_space_bytes_skipped_; Atomic to_space_objects_skipped_; + // If measure_read_barrier_slow_path_ is true, we count how long is spent in MarkFromReadBarrier + // and also log. + bool measure_read_barrier_slow_path_; + // mark_from_read_barrier_measurements_ is true if systrace is enabled or + // measure_read_barrier_time_ is true. + bool mark_from_read_barrier_measurements_; + Atomic rb_slow_path_ns_; + Atomic rb_slow_path_count_; + Atomic rb_slow_path_count_gc_; + mutable Mutex rb_slow_path_histogram_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER; + Histogram rb_slow_path_time_histogram_ GUARDED_BY(rb_slow_path_histogram_lock_); + uint64_t rb_slow_path_count_total_ GUARDED_BY(rb_slow_path_histogram_lock_); + uint64_t rb_slow_path_count_gc_total_ GUARDED_BY(rb_slow_path_histogram_lock_); + accounting::ReadBarrierTable* rb_table_; bool force_evacuate_all_; // True if all regions are evacuated. Atomic updated_all_immune_objects_; diff --git a/runtime/gc/collector/garbage_collector.h b/runtime/gc/collector/garbage_collector.h index 580486aa6..e0b71a7e2 100644 --- a/runtime/gc/collector/garbage_collector.h +++ b/runtime/gc/collector/garbage_collector.h @@ -181,7 +181,7 @@ class GarbageCollector : public RootVisitor, public IsMarkedVisitor, public Mark void RecordFree(const ObjectBytePair& freed); // Record a free of large objects. void RecordFreeLOS(const ObjectBytePair& freed); - void DumpPerformanceInfo(std::ostream& os) REQUIRES(!pause_histogram_lock_); + virtual void DumpPerformanceInfo(std::ostream& os) REQUIRES(!pause_histogram_lock_); // Helper functions for querying if objects are marked. These are used for processing references, // and will be used for reading system weaks while the GC is running. diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc index a6d62a934..6f4767e39 100644 --- a/runtime/gc/heap.cc +++ b/runtime/gc/heap.cc @@ -157,6 +157,7 @@ Heap::Heap(size_t initial_size, bool verify_pre_sweeping_rosalloc, bool verify_post_gc_rosalloc, bool gc_stress_mode, + bool measure_gc_performance, bool use_homogeneous_space_compaction_for_oom, uint64_t min_interval_homogeneous_space_compaction_by_oom) : non_moving_space_(nullptr), @@ -599,7 +600,9 @@ Heap::Heap(size_t initial_size, garbage_collectors_.push_back(semi_space_collector_); } if (MayUseCollector(kCollectorTypeCC)) { - concurrent_copying_collector_ = new collector::ConcurrentCopying(this); + concurrent_copying_collector_ = new collector::ConcurrentCopying(this, + "", + measure_gc_performance); garbage_collectors_.push_back(concurrent_copying_collector_); } if (MayUseCollector(kCollectorTypeMC)) { diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h index 6fb048a5d..bb0d11a1d 100644 --- a/runtime/gc/heap.h +++ b/runtime/gc/heap.h @@ -182,6 +182,7 @@ class Heap { bool verify_pre_sweeping_rosalloc, bool verify_post_gc_rosalloc, bool gc_stress_mode, + bool measure_gc_performance, bool use_homogeneous_space_compaction, uint64_t min_interval_homogeneous_space_compaction_by_oom); diff --git a/runtime/read_barrier-inl.h b/runtime/read_barrier-inl.h index 0c3eb3b62..92efa211c 100644 --- a/runtime/read_barrier-inl.h +++ b/runtime/read_barrier-inl.h @@ -220,7 +220,7 @@ inline void ReadBarrier::AssertToSpaceInvariant(GcRootSource* gc_root_source, } inline mirror::Object* ReadBarrier::Mark(mirror::Object* obj) { - return Runtime::Current()->GetHeap()->ConcurrentCopyingCollector()->Mark(obj); + return Runtime::Current()->GetHeap()->ConcurrentCopyingCollector()->MarkFromReadBarrier(obj); } inline bool ReadBarrier::HasGrayReadBarrierPointer(mirror::Object* obj, diff --git a/runtime/runtime.cc b/runtime/runtime.cc index 21cd2aa2c..079c07924 100644 --- a/runtime/runtime.cc +++ b/runtime/runtime.cc @@ -989,6 +989,7 @@ bool Runtime::Init(RuntimeArgumentMap&& runtime_options_in) { xgc_option.verify_pre_sweeping_rosalloc_, xgc_option.verify_post_gc_rosalloc_, xgc_option.gcstress_, + xgc_option.measure_, runtime_options.GetOrDefault(Opt::EnableHSpaceCompactForOOM), runtime_options.GetOrDefault(Opt::HSpaceCompactForOOMMinIntervalsMs));