From 42c3c33df8b6eefc4ba532f1981282510f109928 Mon Sep 17 00:00:00 2001 From: Man Cao Date: Tue, 23 Jun 2015 16:38:25 -0700 Subject: [PATCH] Make allocation tracker use less memory The allocation tracker no longer keeps recently allocated objects live. Instead it just keeps their class objects live as strong roots. This fixed the gc-stress test failure for 098-ddmc. Also fixed the issue in DisableNewSystemWeak() for allocation tracker, by making new allocation to wait until GC's sweeping to complete. I didn't feel any significant slowdown with this wait. Bug: 20037135 Change-Id: I6a98188832cf7ee478007e3788e742dc6e18f7b8 --- runtime/debugger.cc | 6 ++-- runtime/gc/allocation_record.cc | 69 ++++++++++++++++++++++++++++++++++------- runtime/gc/allocation_record.h | 42 ++++++++++++++++++++----- runtime/gc/heap-inl.h | 3 +- runtime/gc/heap.cc | 29 +++++++++++++++++ runtime/gc/heap.h | 18 +++++++++-- runtime/hprof/hprof.cc | 6 ++++ runtime/runtime.cc | 8 ++--- test/Android.run-test.mk | 5 +-- 9 files changed, 152 insertions(+), 34 deletions(-) diff --git a/runtime/debugger.cc b/runtime/debugger.cc index f04f3563c..de46b3527 100644 --- a/runtime/debugger.cc +++ b/runtime/debugger.cc @@ -4715,7 +4715,7 @@ void Dbg::DumpRecentAllocations() { const gc::AllocRecord* record = it->second; LOG(INFO) << StringPrintf(" Thread %-2d %6zd bytes ", record->GetTid(), record->ByteCount()) - << PrettyClass(it->first.Read()->GetClass()); + << PrettyClass(record->GetClass()); for (size_t stack_frame = 0, depth = record->GetDepth(); stack_frame < depth; ++stack_frame) { const gc::AllocRecordStackTraceElement& stack_element = record->StackElement(stack_frame); @@ -4850,7 +4850,7 @@ jbyteArray Dbg::GetRecentAllocations() { count > 0 && it != end; count--, it++) { const gc::AllocRecord* record = it->second; std::string temp; - class_names.Add(it->first.Read()->GetClass()->GetDescriptor(&temp)); + class_names.Add(record->GetClass()->GetDescriptor(&temp)); for (size_t i = 0, depth = record->GetDepth(); i < depth; i++) { ArtMethod* m = record->StackElement(i).GetMethod(); class_names.Add(m->GetDeclaringClassDescriptor()); @@ -4902,7 +4902,7 @@ jbyteArray Dbg::GetRecentAllocations() { const gc::AllocRecord* record = it->second; size_t stack_depth = record->GetDepth(); size_t allocated_object_class_name_index = - class_names.IndexOf(it->first.Read()->GetClass()->GetDescriptor(&temp)); + class_names.IndexOf(record->GetClass()->GetDescriptor(&temp)); JDWP::Append4BE(bytes, record->ByteCount()); JDWP::Append2BE(bytes, static_cast(record->GetTid())); JDWP::Append2BE(bytes, allocated_object_class_name_index); diff --git a/runtime/gc/allocation_record.cc b/runtime/gc/allocation_record.cc index ac7de631d..c5b9f65c2 100644 --- a/runtime/gc/allocation_record.cc +++ b/runtime/gc/allocation_record.cc @@ -94,33 +94,58 @@ void AllocRecordObjectMap::VisitRoots(RootVisitor* visitor) { CHECK_LE(recent_record_max_, alloc_record_max_); BufferedRootVisitor buffered_visitor(visitor, RootInfo(kRootDebugger)); size_t count = recent_record_max_; - // Only visit the last recent_record_max_ number of objects in entries_. - // They need to be retained for DDMS's recent allocation tracking. - // TODO: This will cause 098-ddmc test to run out of memory for GC stress test. - // There should be an option that do not keep these objects live if allocation tracking is only - // for the purpose of an HPROF dump. b/20037135 + // Only visit the last recent_record_max_ number of allocation records in entries_ and mark the + // klass_ fields as strong roots. for (auto it = entries_.rbegin(), end = entries_.rend(); count > 0 && it != end; count--, ++it) { - buffered_visitor.VisitRoot(it->first); + buffered_visitor.VisitRoot(it->second->GetClassGcRoot()); + } +} + +static inline void SweepClassObject(AllocRecord* record, IsMarkedCallback* callback, void* arg) + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) + EXCLUSIVE_LOCKS_REQUIRED(Locks::alloc_tracker_lock_) { + GcRoot& klass = record->GetClassGcRoot(); + // This does not need a read barrier because this is called by GC. + mirror::Object* old_object = klass.Read(); + mirror::Object* new_object = callback(old_object, arg); + if (UNLIKELY(old_object != new_object)) { + mirror::Class* new_klass = (UNLIKELY(new_object == nullptr) ? nullptr : new_object->AsClass()); + klass = GcRoot(new_klass); } } void AllocRecordObjectMap::SweepAllocationRecords(IsMarkedCallback* callback, void* arg) { VLOG(heap) << "Start SweepAllocationRecords()"; - size_t count_deleted = 0, count_moved = 0; + size_t count_deleted = 0, count_moved = 0, count = 0; + // Only the first (size - recent_record_max_) number of records can be deleted. + size_t delete_bound; + if (entries_.size() <= recent_record_max_) { + delete_bound = 0; + } else { + delete_bound = entries_.size() - recent_record_max_; + } for (auto it = entries_.begin(), end = entries_.end(); it != end;) { + ++count; // This does not need a read barrier because this is called by GC. mirror::Object* old_object = it->first.Read(); AllocRecord* record = it->second; mirror::Object* new_object = callback(old_object, arg); if (new_object == nullptr) { - delete record; - it = entries_.erase(it); - ++count_deleted; + if (count > delete_bound) { + it->first = GcRoot(nullptr); + SweepClassObject(record, callback, arg); + ++it; + } else { + delete record; + it = entries_.erase(it); + ++count_deleted; + } } else { if (old_object != new_object) { it->first = GcRoot(new_object); ++count_moved; } + SweepClassObject(record, callback, arg); ++it; } } @@ -128,6 +153,20 @@ void AllocRecordObjectMap::SweepAllocationRecords(IsMarkedCallback* callback, vo VLOG(heap) << "Updated " << count_moved << " allocation records"; } +void AllocRecordObjectMap::AllowNewAllocationRecords() { + allow_new_record_ = true; + new_record_condition_.Broadcast(Thread::Current()); +} + +void AllocRecordObjectMap::DisallowNewAllocationRecords() { + allow_new_record_ = false; +} + +void AllocRecordObjectMap::EnsureNewAllocationRecordsDisallowed() { + CHECK(!allow_new_record_); +} + + struct AllocRecordStackVisitor : public StackVisitor { AllocRecordStackVisitor(Thread* thread, AllocRecordStackTrace* trace_in, size_t max) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) @@ -201,7 +240,8 @@ void AllocRecordObjectMap::SetAllocTrackingEnabled(bool enable) { } } -void AllocRecordObjectMap::RecordAllocation(Thread* self, mirror::Object* obj, size_t byte_count) { +void AllocRecordObjectMap::RecordAllocation(Thread* self, mirror::Object* obj, mirror::Class* klass, + size_t byte_count) { MutexLock mu(self, *Locks::alloc_tracker_lock_); Heap* heap = Runtime::Current()->GetHeap(); if (!heap->IsAllocTrackingEnabled()) { @@ -217,6 +257,11 @@ void AllocRecordObjectMap::RecordAllocation(Thread* self, mirror::Object* obj, s return; } + // Wait for GC's sweeping to complete and allow new records + while (UNLIKELY(!records->allow_new_record_)) { + records->new_record_condition_.WaitHoldingLocks(self); + } + DCHECK_LE(records->Size(), records->alloc_record_max_); // Get stack trace. @@ -229,7 +274,7 @@ void AllocRecordObjectMap::RecordAllocation(Thread* self, mirror::Object* obj, s AllocRecordStackTrace* trace = new AllocRecordStackTrace(records->scratch_trace_); // Fill in the basics. - AllocRecord* record = new AllocRecord(byte_count, trace); + AllocRecord* record = new AllocRecord(byte_count, klass, trace); records->Put(obj, record); DCHECK_LE(records->Size(), records->alloc_record_max_); diff --git a/runtime/gc/allocation_record.h b/runtime/gc/allocation_record.h index 5214b6b47..f5671537b 100644 --- a/runtime/gc/allocation_record.h +++ b/runtime/gc/allocation_record.h @@ -161,8 +161,8 @@ template struct EqAllocRecordTypesPtr { class AllocRecord { public: // All instances of AllocRecord should be managed by an instance of AllocRecordObjectMap. - AllocRecord(size_t count, AllocRecordStackTrace* trace) - : byte_count_(count), trace_(trace) {} + AllocRecord(size_t count, mirror::Class* klass, AllocRecordStackTrace* trace) + : byte_count_(count), klass_(klass), trace_(trace) {} ~AllocRecord() { delete trace_; @@ -184,12 +184,22 @@ class AllocRecord { return trace_->GetTid(); } + mirror::Class* GetClass() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + return klass_.Read(); + } + + GcRoot& GetClassGcRoot() { + return klass_; + } + const AllocRecordStackTraceElement& StackElement(size_t index) const { return trace_->GetStackElement(index); } private: const size_t byte_count_; + // The klass_ could be a strong or weak root for GC + GcRoot klass_; // TODO: Currently trace_ is like a std::unique_ptr, // but in future with deduplication it could be a std::shared_ptr. const AllocRecordStackTrace* const trace_; @@ -197,14 +207,17 @@ class AllocRecord { class AllocRecordObjectMap { public: - // Since the entries contain weak roots, they need a read barrier. Do not directly access - // the mirror::Object pointers in it. Use functions that contain read barriers. - // No need for "const AllocRecord*" in the list, because all fields of AllocRecord are const. + // GcRoot pointers in the list are weak roots, and the last recent_record_max_ + // number of AllocRecord::klass_ pointers are strong roots (and the rest of klass_ pointers are + // weak roots). The last recent_record_max_ number of pairs in the list are always kept for DDMS's + // recent allocation tracking, but GcRoot pointers in these pairs can become null. + // Both types of pointers need read barriers, do not directly access them. typedef std::list, AllocRecord*>> EntryList; // "static" because it is part of double-checked locking. It needs to check a bool first, // in order to make sure the AllocRecordObjectMap object is not null. - static void RecordAllocation(Thread* self, mirror::Object* obj, size_t byte_count) + static void RecordAllocation(Thread* self, mirror::Object* obj, mirror::Class* klass, + size_t byte_count) LOCKS_EXCLUDED(Locks::alloc_tracker_lock_) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); @@ -215,7 +228,9 @@ class AllocRecordObjectMap { recent_record_max_(kDefaultNumRecentRecords), max_stack_depth_(kDefaultAllocStackDepth), scratch_trace_(kMaxSupportedStackDepth), - alloc_ddm_thread_id_(0) {} + alloc_ddm_thread_id_(0), + allow_new_record_(true), + new_record_condition_("New allocation record condition", *Locks::alloc_tracker_lock_) {} ~AllocRecordObjectMap(); @@ -247,6 +262,16 @@ class AllocRecordObjectMap { SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) EXCLUSIVE_LOCKS_REQUIRED(Locks::alloc_tracker_lock_); + void DisallowNewAllocationRecords() + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) + EXCLUSIVE_LOCKS_REQUIRED(Locks::alloc_tracker_lock_); + void AllowNewAllocationRecords() + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) + EXCLUSIVE_LOCKS_REQUIRED(Locks::alloc_tracker_lock_); + void EnsureNewAllocationRecordsDisallowed() + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) + EXCLUSIVE_LOCKS_REQUIRED(Locks::alloc_tracker_lock_); + // TODO: Is there a better way to hide the entries_'s type? EntryList::iterator Begin() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) @@ -282,6 +307,9 @@ class AllocRecordObjectMap { size_t max_stack_depth_ GUARDED_BY(Locks::alloc_tracker_lock_); AllocRecordStackTrace scratch_trace_ GUARDED_BY(Locks::alloc_tracker_lock_); pid_t alloc_ddm_thread_id_ GUARDED_BY(Locks::alloc_tracker_lock_); + bool allow_new_record_ GUARDED_BY(Locks::alloc_tracker_lock_); + ConditionVariable new_record_condition_ GUARDED_BY(Locks::alloc_tracker_lock_); + // see the comment in typedef of EntryList EntryList entries_ GUARDED_BY(Locks::alloc_tracker_lock_); void SetProperties() EXCLUSIVE_LOCKS_REQUIRED(Locks::alloc_tracker_lock_); diff --git a/runtime/gc/heap-inl.h b/runtime/gc/heap-inl.h index 0ed3b6d9d..2e661601f 100644 --- a/runtime/gc/heap-inl.h +++ b/runtime/gc/heap-inl.h @@ -170,7 +170,8 @@ inline mirror::Object* Heap::AllocObjectWithAllocator(Thread* self, mirror::Clas } if (kInstrumented) { if (IsAllocTrackingEnabled()) { - AllocRecordObjectMap::RecordAllocation(self, obj, bytes_allocated); + // Use obj->GetClass() instead of klass, because PushOnAllocationStack() could move klass + AllocRecordObjectMap::RecordAllocation(self, obj, obj->GetClass(), bytes_allocated); } } else { DCHECK(!IsAllocTrackingEnabled()); diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc index f0ba0bd25..3dafb7dbd 100644 --- a/runtime/gc/heap.cc +++ b/runtime/gc/heap.cc @@ -3716,6 +3716,35 @@ void Heap::SweepAllocationRecords(IsMarkedCallback* visitor, void* arg) const { } } +void Heap::AllowNewAllocationRecords() const { + if (IsAllocTrackingEnabled()) { + MutexLock mu(Thread::Current(), *Locks::alloc_tracker_lock_); + if (IsAllocTrackingEnabled()) { + GetAllocationRecords()->AllowNewAllocationRecords(); + } + } +} + +void Heap::DisallowNewAllocationRecords() const { + if (IsAllocTrackingEnabled()) { + MutexLock mu(Thread::Current(), *Locks::alloc_tracker_lock_); + if (IsAllocTrackingEnabled()) { + GetAllocationRecords()->DisallowNewAllocationRecords(); + } + } +} + +void Heap::EnsureNewAllocationRecordsDisallowed() const { + if (IsAllocTrackingEnabled()) { + // Lock and unlock once to ensure that no threads are still in the + // middle of adding new allocation records. + MutexLock mu(Thread::Current(), *Locks::alloc_tracker_lock_); + if (IsAllocTrackingEnabled()) { + GetAllocationRecords()->EnsureNewAllocationRecordsDisallowed(); + } + } +} + // Based on debug malloc logic from libc/bionic/debug_stacktrace.cpp. class StackCrawlState { public: diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h index 54a323588..1c75bd008 100644 --- a/runtime/gc/heap.h +++ b/runtime/gc/heap.h @@ -706,10 +706,24 @@ class Heap { EXCLUSIVE_LOCKS_REQUIRED(Locks::alloc_tracker_lock_); void VisitAllocationRecords(RootVisitor* visitor) const - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) + LOCKS_EXCLUDED(Locks::alloc_tracker_lock_); void SweepAllocationRecords(IsMarkedCallback* visitor, void* arg) const - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) + LOCKS_EXCLUDED(Locks::alloc_tracker_lock_); + + void DisallowNewAllocationRecords() const + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) + LOCKS_EXCLUDED(Locks::alloc_tracker_lock_); + + void AllowNewAllocationRecords() const + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) + LOCKS_EXCLUDED(Locks::alloc_tracker_lock_); + + void EnsureNewAllocationRecordsDisallowed() const + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) + LOCKS_EXCLUDED(Locks::alloc_tracker_lock_); private: class ConcurrentGCTask; diff --git a/runtime/hprof/hprof.cc b/runtime/hprof/hprof.cc index f32d5a1b8..3a22b162b 100644 --- a/runtime/hprof/hprof.cc +++ b/runtime/hprof/hprof.cc @@ -823,9 +823,14 @@ class Hprof : public SingleRootVisitor { CHECK(records != nullptr); HprofStackTraceSerialNumber next_trace_sn = kHprofNullStackTrace + 1; HprofStackFrameId next_frame_id = 0; + size_t count = 0; for (auto it = records->Begin(), end = records->End(); it != end; ++it) { const mirror::Object* obj = it->first.Read(); + if (obj == nullptr) { + continue; + } + ++count; const gc::AllocRecordStackTrace* trace = it->second->GetStackTrace(); // Copy the pair into a real hash map to speed up look up. @@ -849,6 +854,7 @@ class Hprof : public SingleRootVisitor { } CHECK_EQ(traces_.size(), next_trace_sn - kHprofNullStackTrace - 1); CHECK_EQ(frames_.size(), next_frame_id); + VLOG(heap) << "hprof: found " << count << " objects with allocation stack traces"; } // If direct_to_ddms_ is set, "filename_" and "fd" will be ignored. diff --git a/runtime/runtime.cc b/runtime/runtime.cc index d0f01b375..96c15ea0d 100644 --- a/runtime/runtime.cc +++ b/runtime/runtime.cc @@ -1483,17 +1483,14 @@ void Runtime::DisallowNewSystemWeaks() { monitor_list_->DisallowNewMonitors(); intern_table_->DisallowNewInterns(); java_vm_->DisallowNewWeakGlobals(); - // TODO: add a similar call for heap.allocation_records_, otherwise some of the newly allocated - // objects that are not marked might be swept from the records, making the records incomplete. - // It is safe for now since the only effect is that those objects do not have allocation records. - // The number of such objects should be small, and current allocation tracker cannot collect - // allocation records for all objects anyway. + heap_->DisallowNewAllocationRecords(); } void Runtime::AllowNewSystemWeaks() { monitor_list_->AllowNewMonitors(); intern_table_->AllowNewInterns(); java_vm_->AllowNewWeakGlobals(); + heap_->AllowNewAllocationRecords(); } void Runtime::EnsureNewSystemWeaksDisallowed() { @@ -1502,6 +1499,7 @@ void Runtime::EnsureNewSystemWeaksDisallowed() { monitor_list_->EnsureNewMonitorsDisallowed(); intern_table_->EnsureNewInternsDisallowed(); java_vm_->EnsureNewWeakGlobalsDisallowed(); + heap_->EnsureNewAllocationRecordsDisallowed(); } void Runtime::SetInstructionSet(InstructionSet instruction_set) { diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk index ac9656b78..29c3ea901 100644 --- a/test/Android.run-test.mk +++ b/test/Android.run-test.mk @@ -230,10 +230,7 @@ endif TEST_ART_BROKEN_NO_RELOCATE_TESTS := # Tests that are broken with GC stress. -# 098-ddmc is broken until the allocation tracker does not mark recently allocated objects as roots. -# Marking them roots is for consistent behavior with DDMS's getRecentAllocations(). b/20037135 -TEST_ART_BROKEN_GCSTRESS_RUN_TESTS := \ - 098-ddmc +TEST_ART_BROKEN_GCSTRESS_RUN_TESTS := ifneq (,$(filter gcstress,$(GC_TYPES))) ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \ -- 2.11.0