From: Man Cao Date: Thu, 28 May 2015 00:25:30 +0000 (-0700) Subject: Add allocation stack traces for HPROF dump. X-Git-Tag: android-x86-7.1-r1~889^2~1044^2 X-Git-Url: http://git.osdn.net/view?a=commitdiff_plain;h=8c2ff641294715864013737fdec57cdfd410270c;p=android-x86%2Fart.git Add allocation stack traces for HPROF dump. This feature is currently only enabled when DDMS's allocation tracking is enabled. In the future there should be a way to enable this feature before an application starts. Also updates DDMS's recent allocation tracking to use a new backend data structure that is shared with this feature. The following system properties controls customizable parameters: dalvik.vm.allocTrackerMax: max number of objects that have allocation records, default 512K; dalvik.vm.recentAllocMax: max number of records that are sent to DDMS when clicking "Get allocation" button, default 64K-1 (limit of the protocol); dalvik.vm.allocStackDepth: max number of stack frames in an allocation record, default 4. Bug: 20037135 Change-Id: I26ed378a5613678bd3c43e846025f90470a8e059 --- diff --git a/runtime/Android.mk b/runtime/Android.mk index c1e6e0972..5ed695518 100644 --- a/runtime/Android.mk +++ b/runtime/Android.mk @@ -45,6 +45,7 @@ LIBART_COMMON_SRC_FILES := \ dex_file_verifier.cc \ dex_instruction.cc \ elf_file.cc \ + gc/allocation_record.cc \ gc/allocator/dlmalloc.cc \ gc/allocator/rosalloc.cc \ gc/accounting/bitmap.cc \ diff --git a/runtime/debugger.cc b/runtime/debugger.cc index 24615e2a6..3c75daf7b 100644 --- a/runtime/debugger.cc +++ b/runtime/debugger.cc @@ -29,6 +29,7 @@ #include "dex_file-inl.h" #include "dex_instruction.h" #include "gc/accounting/card_table-inl.h" +#include "gc/allocation_record.h" #include "gc/space/large_object_space.h" #include "gc/space/space-inl.h" #include "handle_scope.h" @@ -61,127 +62,30 @@ namespace art { // The key identifying the debugger to update instrumentation. static constexpr const char* kDbgInstrumentationKey = "Debugger"; -static const size_t kMaxAllocRecordStackDepth = 16; // Max 255. -static const size_t kDefaultNumAllocRecords = 64*1024; // Must be a power of 2. 2BE can hold 64k-1. - -// Limit alloc_record_count to the 2BE value that is the limit of the current protocol. +// Limit alloc_record_count to the 2BE value (64k-1) that is the limit of the current protocol. static uint16_t CappedAllocRecordCount(size_t alloc_record_count) { - if (alloc_record_count > 0xffff) { - return 0xffff; - } - return alloc_record_count; -} - -class AllocRecordStackTraceElement { - public: - AllocRecordStackTraceElement() : method_(nullptr), dex_pc_(0) { - } - - int32_t LineNumber() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { - ArtMethod* method = Method(); - DCHECK(method != nullptr); - return method->GetLineNumFromDexPC(DexPc()); - } - - ArtMethod* Method() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { - ScopedObjectAccessUnchecked soa(Thread::Current()); - return soa.DecodeMethod(method_); - } - - void SetMethod(ArtMethod* m) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { - ScopedObjectAccessUnchecked soa(Thread::Current()); - method_ = soa.EncodeMethod(m); - } - - uint32_t DexPc() const { - return dex_pc_; - } - - void SetDexPc(uint32_t pc) { - dex_pc_ = pc; - } - - private: - jmethodID method_; - uint32_t dex_pc_; -}; - -jobject Dbg::TypeCache::Add(mirror::Class* t) { - ScopedObjectAccessUnchecked soa(Thread::Current()); - JNIEnv* const env = soa.Env(); - ScopedLocalRef local_ref(soa.Env(), soa.AddLocalReference(t)); - const int32_t hash_code = soa.Decode(local_ref.get())->IdentityHashCode(); - auto range = objects_.equal_range(hash_code); - for (auto it = range.first; it != range.second; ++it) { - if (soa.Decode(it->second) == soa.Decode(local_ref.get())) { - // Found a matching weak global, return it. - return it->second; + size_t cap = 0xffff; +#ifdef HAVE_ANDROID_OS + // Check whether there's a system property overriding the number of recent records. + const char* propertyName = "dalvik.vm.recentAllocMax"; + char recentAllocMaxString[PROPERTY_VALUE_MAX]; + if (property_get(propertyName, recentAllocMaxString, "") > 0) { + char* end; + size_t value = strtoul(recentAllocMaxString, &end, 10); + if (*end != '\0') { + LOG(ERROR) << "Ignoring " << propertyName << " '" << recentAllocMaxString + << "' --- invalid"; + } else { + cap = value; } } - const jobject weak_global = env->NewWeakGlobalRef(local_ref.get()); - objects_.insert(std::make_pair(hash_code, weak_global)); - return weak_global; -} - -void Dbg::TypeCache::Clear() { - JavaVMExt* vm = Runtime::Current()->GetJavaVM(); - Thread* self = Thread::Current(); - for (const auto& p : objects_) { - vm->DeleteWeakGlobalRef(self, p.second); +#endif + if (alloc_record_count > cap) { + return cap; } - objects_.clear(); + return alloc_record_count; } -class AllocRecord { - public: - AllocRecord() : type_(nullptr), byte_count_(0), thin_lock_id_(0) {} - - mirror::Class* Type() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { - return down_cast(Thread::Current()->DecodeJObject(type_)); - } - - void SetType(mirror::Class* t) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_, - Locks::alloc_tracker_lock_) { - type_ = Dbg::type_cache_.Add(t); - } - - size_t GetDepth() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { - size_t depth = 0; - while (depth < kMaxAllocRecordStackDepth && stack_[depth].Method() != nullptr) { - ++depth; - } - return depth; - } - - size_t ByteCount() const { - return byte_count_; - } - - void SetByteCount(size_t count) { - byte_count_ = count; - } - - uint16_t ThinLockId() const { - return thin_lock_id_; - } - - void SetThinLockId(uint16_t id) { - thin_lock_id_ = id; - } - - AllocRecordStackTraceElement* StackElement(size_t index) { - DCHECK_LT(index, kMaxAllocRecordStackDepth); - return &stack_[index]; - } - - private: - jobject type_; // This is a weak global. - size_t byte_count_; - uint16_t thin_lock_id_; - // Unused entries have null method. - AllocRecordStackTraceElement stack_[kMaxAllocRecordStackDepth]; -}; - class Breakpoint { public: Breakpoint(ArtMethod* method, uint32_t dex_pc, @@ -382,13 +286,6 @@ bool Dbg::gDebuggerActive = false; bool Dbg::gDisposed = false; ObjectRegistry* Dbg::gRegistry = nullptr; -// Recent allocation tracking. -AllocRecord* Dbg::recent_allocation_records_ = nullptr; // TODO: CircularBuffer -size_t Dbg::alloc_record_max_ = 0; -size_t Dbg::alloc_record_head_ = 0; -size_t Dbg::alloc_record_count_ = 0; -Dbg::TypeCache Dbg::type_cache_; - // Deoptimization support. std::vector Dbg::deoptimization_requests_; size_t Dbg::full_deoptimization_event_count_ = 0; @@ -4665,177 +4562,41 @@ void Dbg::DdmSendHeapSegments(bool native) { Dbg::DdmSendChunk(native ? CHUNK_TYPE("NHEN") : CHUNK_TYPE("HPEN"), sizeof(heap_id), heap_id); } -static size_t GetAllocTrackerMax() { -#ifdef HAVE_ANDROID_OS - // Check whether there's a system property overriding the number of records. - const char* propertyName = "dalvik.vm.allocTrackerMax"; - char allocRecordMaxString[PROPERTY_VALUE_MAX]; - if (property_get(propertyName, allocRecordMaxString, "") > 0) { - char* end; - size_t value = strtoul(allocRecordMaxString, &end, 10); - if (*end != '\0') { - LOG(ERROR) << "Ignoring " << propertyName << " '" << allocRecordMaxString - << "' --- invalid"; - return kDefaultNumAllocRecords; - } - if (!IsPowerOfTwo(value)) { - LOG(ERROR) << "Ignoring " << propertyName << " '" << allocRecordMaxString - << "' --- not power of two"; - return kDefaultNumAllocRecords; - } - return value; - } -#endif - return kDefaultNumAllocRecords; -} - void Dbg::SetAllocTrackingEnabled(bool enable) { - Thread* self = Thread::Current(); - if (enable) { - { - MutexLock mu(self, *Locks::alloc_tracker_lock_); - if (recent_allocation_records_ != nullptr) { - return; // Already enabled, bail. - } - alloc_record_max_ = GetAllocTrackerMax(); - LOG(INFO) << "Enabling alloc tracker (" << alloc_record_max_ << " entries of " - << kMaxAllocRecordStackDepth << " frames, taking " - << PrettySize(sizeof(AllocRecord) * alloc_record_max_) << ")"; - DCHECK_EQ(alloc_record_head_, 0U); - DCHECK_EQ(alloc_record_count_, 0U); - recent_allocation_records_ = new AllocRecord[alloc_record_max_]; - CHECK(recent_allocation_records_ != nullptr); - } - Runtime::Current()->GetInstrumentation()->InstrumentQuickAllocEntryPoints(); - } else { - { - ScopedObjectAccess soa(self); // For type_cache_.Clear(); - MutexLock mu(self, *Locks::alloc_tracker_lock_); - if (recent_allocation_records_ == nullptr) { - return; // Already disabled, bail. - } - LOG(INFO) << "Disabling alloc tracker"; - delete[] recent_allocation_records_; - recent_allocation_records_ = nullptr; - alloc_record_head_ = 0; - alloc_record_count_ = 0; - type_cache_.Clear(); - } - // If an allocation comes in before we uninstrument, we will safely drop it on the floor. - Runtime::Current()->GetInstrumentation()->UninstrumentQuickAllocEntryPoints(); - } -} - -struct AllocRecordStackVisitor : public StackVisitor { - AllocRecordStackVisitor(Thread* thread, AllocRecord* record_in) - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) - : StackVisitor(thread, nullptr, StackVisitor::StackWalkKind::kIncludeInlinedFrames), - record(record_in), - depth(0) {} - - // TODO: Enable annotalysis. We know lock is held in constructor, but abstraction confuses - // annotalysis. - bool VisitFrame() NO_THREAD_SAFETY_ANALYSIS { - if (depth >= kMaxAllocRecordStackDepth) { - return false; - } - ArtMethod* m = GetMethod(); - if (!m->IsRuntimeMethod()) { - record->StackElement(depth)->SetMethod(m); - record->StackElement(depth)->SetDexPc(GetDexPc()); - ++depth; - } - return true; - } - - ~AllocRecordStackVisitor() { - // Clear out any unused stack trace elements. - for (; depth < kMaxAllocRecordStackDepth; ++depth) { - record->StackElement(depth)->SetMethod(nullptr); - record->StackElement(depth)->SetDexPc(0); - } - } - - AllocRecord* record; - size_t depth; -}; - -void Dbg::RecordAllocation(Thread* self, mirror::Class* type, size_t byte_count) { - MutexLock mu(self, *Locks::alloc_tracker_lock_); - if (recent_allocation_records_ == nullptr) { - // In the process of shutting down recording, bail. - return; - } - - // Advance and clip. - if (++alloc_record_head_ == alloc_record_max_) { - alloc_record_head_ = 0; - } - - // Fill in the basics. - AllocRecord* record = &recent_allocation_records_[alloc_record_head_]; - record->SetType(type); - record->SetByteCount(byte_count); - record->SetThinLockId(self->GetThreadId()); - - // Fill in the stack trace. - AllocRecordStackVisitor visitor(self, record); - visitor.WalkStack(); - - if (alloc_record_count_ < alloc_record_max_) { - ++alloc_record_count_; - } -} - -// Returns the index of the head element. -// -// We point at the most-recently-written record, so if alloc_record_count_ is 1 -// we want to use the current element. Take "head+1" and subtract count -// from it. -// -// We need to handle underflow in our circular buffer, so we add -// alloc_record_max_ and then mask it back down. -size_t Dbg::HeadIndex() { - return (Dbg::alloc_record_head_ + 1 + Dbg::alloc_record_max_ - Dbg::alloc_record_count_) & - (Dbg::alloc_record_max_ - 1); + gc::AllocRecordObjectMap::SetAllocTrackingEnabled(enable); } void Dbg::DumpRecentAllocations() { ScopedObjectAccess soa(Thread::Current()); MutexLock mu(soa.Self(), *Locks::alloc_tracker_lock_); - if (recent_allocation_records_ == nullptr) { + if (!Runtime::Current()->GetHeap()->IsAllocTrackingEnabled()) { LOG(INFO) << "Not recording tracked allocations"; return; } + gc::AllocRecordObjectMap* records = Runtime::Current()->GetHeap()->GetAllocationRecords(); + CHECK(records != nullptr); - // "i" is the head of the list. We want to start at the end of the - // list and move forward to the tail. - size_t i = HeadIndex(); - const uint16_t capped_count = CappedAllocRecordCount(Dbg::alloc_record_count_); + const uint16_t capped_count = CappedAllocRecordCount(records->Size()); uint16_t count = capped_count; - LOG(INFO) << "Tracked allocations, (head=" << alloc_record_head_ << " count=" << count << ")"; - while (count--) { - AllocRecord* record = &recent_allocation_records_[i]; + LOG(INFO) << "Tracked allocations, (count=" << count << ")"; + for (auto it = records->RBegin(), end = records->REnd(); + count > 0 && it != end; count--, it++) { + const gc::AllocRecord* record = it->second; - LOG(INFO) << StringPrintf(" Thread %-2d %6zd bytes ", record->ThinLockId(), record->ByteCount()) - << PrettyClass(record->Type()); + LOG(INFO) << StringPrintf(" Thread %-2d %6zd bytes ", record->GetTid(), record->ByteCount()) + << PrettyClass(it->first.Read()->GetClass()); - for (size_t stack_frame = 0; stack_frame < kMaxAllocRecordStackDepth; ++stack_frame) { - AllocRecordStackTraceElement* stack_element = record->StackElement(stack_frame); - ArtMethod* m = stack_element->Method(); - if (m == nullptr) { - break; - } - LOG(INFO) << " " << PrettyMethod(m) << " line " << stack_element->LineNumber(); + for (size_t stack_frame = 0, depth = record->GetDepth(); stack_frame < depth; ++stack_frame) { + const gc::AllocRecordStackTraceElement& stack_element = record->StackElement(stack_frame); + ArtMethod* m = stack_element.GetMethod(); + LOG(INFO) << " " << PrettyMethod(m) << " line " << stack_element.ComputeLineNumber(); } // pause periodically to help logcat catch up if ((count % 5) == 0) { usleep(40000); } - - i = (i + 1) & (alloc_record_max_ - 1); } } @@ -4937,6 +4698,15 @@ jbyteArray Dbg::GetRecentAllocations() { std::vector bytes; { MutexLock mu(self, *Locks::alloc_tracker_lock_); + gc::AllocRecordObjectMap* records = Runtime::Current()->GetHeap()->GetAllocationRecords(); + // In case this method is called when allocation tracker is disabled, + // we should still send some data back. + gc::AllocRecordObjectMap dummy; + if (records == nullptr) { + CHECK(!Runtime::Current()->GetHeap()->IsAllocTrackingEnabled()); + records = &dummy; + } + // // Part 1: generate string tables. // @@ -4944,26 +4714,23 @@ jbyteArray Dbg::GetRecentAllocations() { StringTable method_names; StringTable filenames; - const uint16_t capped_count = CappedAllocRecordCount(Dbg::alloc_record_count_); + const uint16_t capped_count = CappedAllocRecordCount(records->Size()); uint16_t count = capped_count; - size_t idx = HeadIndex(); - while (count--) { - AllocRecord* record = &recent_allocation_records_[idx]; + for (auto it = records->RBegin(), end = records->REnd(); + count > 0 && it != end; count--, it++) { + const gc::AllocRecord* record = it->second; std::string temp; - class_names.Add(record->Type()->GetDescriptor(&temp)); - for (size_t i = 0; i < kMaxAllocRecordStackDepth; i++) { - ArtMethod* m = record->StackElement(i)->Method(); - if (m != nullptr) { - class_names.Add(m->GetDeclaringClassDescriptor()); - method_names.Add(m->GetName()); - filenames.Add(GetMethodSourceFile(m)); - } + class_names.Add(it->first.Read()->GetClass()->GetDescriptor(&temp)); + for (size_t i = 0, depth = record->GetDepth(); i < depth; i++) { + ArtMethod* m = record->StackElement(i).GetMethod(); + class_names.Add(m->GetDeclaringClassDescriptor()); + method_names.Add(m->GetName()); + filenames.Add(GetMethodSourceFile(m)); } - - idx = (idx + 1) & (alloc_record_max_ - 1); } - LOG(INFO) << "allocation records: " << capped_count; + LOG(INFO) << "recent allocation records: " << capped_count; + LOG(INFO) << "allocation records all objects: " << records->Size(); // // Part 2: Generate the output and store it in the buffer. @@ -4991,20 +4758,23 @@ jbyteArray Dbg::GetRecentAllocations() { JDWP::Append2BE(bytes, method_names.Size()); JDWP::Append2BE(bytes, filenames.Size()); - idx = HeadIndex(); std::string temp; - for (count = capped_count; count != 0; --count) { + count = capped_count; + // The last "count" number of allocation records in "records" are the most recent "count" number + // of allocations. Reverse iterate to get them. The most recent allocation is sent first. + for (auto it = records->RBegin(), end = records->REnd(); + count > 0 && it != end; count--, it++) { // For each entry: // (4b) total allocation size // (2b) thread id // (2b) allocated object's class name index // (1b) stack depth - AllocRecord* record = &recent_allocation_records_[idx]; + const gc::AllocRecord* record = it->second; size_t stack_depth = record->GetDepth(); size_t allocated_object_class_name_index = - class_names.IndexOf(record->Type()->GetDescriptor(&temp)); + class_names.IndexOf(it->first.Read()->GetClass()->GetDescriptor(&temp)); JDWP::Append4BE(bytes, record->ByteCount()); - JDWP::Append2BE(bytes, record->ThinLockId()); + JDWP::Append2BE(bytes, static_cast(record->GetTid())); JDWP::Append2BE(bytes, allocated_object_class_name_index); JDWP::Append1BE(bytes, stack_depth); @@ -5014,16 +4784,15 @@ jbyteArray Dbg::GetRecentAllocations() { // (2b) method name // (2b) method source file // (2b) line number, clipped to 32767; -2 if native; -1 if no source - ArtMethod* m = record->StackElement(stack_frame)->Method(); + ArtMethod* m = record->StackElement(stack_frame).GetMethod(); size_t class_name_index = class_names.IndexOf(m->GetDeclaringClassDescriptor()); size_t method_name_index = method_names.IndexOf(m->GetName()); size_t file_name_index = filenames.IndexOf(GetMethodSourceFile(m)); JDWP::Append2BE(bytes, class_name_index); JDWP::Append2BE(bytes, method_name_index); JDWP::Append2BE(bytes, file_name_index); - JDWP::Append2BE(bytes, record->StackElement(stack_frame)->LineNumber()); + JDWP::Append2BE(bytes, record->StackElement(stack_frame).ComputeLineNumber()); } - idx = (idx + 1) & (alloc_record_max_ - 1); } // (xb) class name strings diff --git a/runtime/debugger.h b/runtime/debugger.h index 7c586a4ff..c23b1b88f 100644 --- a/runtime/debugger.h +++ b/runtime/debugger.h @@ -23,6 +23,7 @@ #include +#include #include #include #include @@ -32,7 +33,6 @@ #include "jdwp/jdwp.h" #include "jni.h" #include "jvalue.h" -#include "object_callbacks.h" #include "thread_state.h" namespace art { @@ -41,7 +41,6 @@ class Class; class Object; class Throwable; } // namespace mirror -class AllocRecord; class ArtField; class ArtMethod; class ObjectRegistry; @@ -202,19 +201,6 @@ std::ostream& operator<<(std::ostream& os, const DeoptimizationRequest::Kind& rh class Dbg { public: - class TypeCache { - public: - // Returns a weak global for the input type. Deduplicates. - jobject Add(mirror::Class* t) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_, - Locks::alloc_tracker_lock_); - // Clears the type cache and deletes all the weak global refs. - void Clear() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_, - Locks::alloc_tracker_lock_); - - private: - std::multimap objects_; - }; - static void SetJdwpAllowed(bool allowed); static void StartJdwp(); @@ -655,19 +641,12 @@ class Dbg { SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); /* - * Recent allocation tracking support. + * Allocation tracking support. */ - static void RecordAllocation(Thread* self, mirror::Class* type, size_t byte_count) - LOCKS_EXCLUDED(Locks::alloc_tracker_lock_) - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); static void SetAllocTrackingEnabled(bool enabled) LOCKS_EXCLUDED(Locks::alloc_tracker_lock_); - static bool IsAllocTrackingEnabled() { - return recent_allocation_records_ != nullptr; - } static jbyteArray GetRecentAllocations() LOCKS_EXCLUDED(Locks::alloc_tracker_lock_) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); - static size_t HeadIndex() EXCLUSIVE_LOCKS_REQUIRED(Locks::alloc_tracker_lock_); static void DumpRecentAllocations() LOCKS_EXCLUDED(Locks::alloc_tracker_lock_); enum HpifWhen { @@ -755,11 +734,6 @@ class Dbg { static bool IsForcedInterpreterNeededForUpcallImpl(Thread* thread, ArtMethod* m) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); - static AllocRecord* recent_allocation_records_ PT_GUARDED_BY(Locks::alloc_tracker_lock_); - static size_t alloc_record_max_ GUARDED_BY(Locks::alloc_tracker_lock_); - static size_t alloc_record_head_ GUARDED_BY(Locks::alloc_tracker_lock_); - static size_t alloc_record_count_ GUARDED_BY(Locks::alloc_tracker_lock_); - // Indicates whether the debugger is making requests. static bool gDebuggerActive; @@ -784,9 +758,6 @@ class Dbg { static size_t* GetReferenceCounterForEvent(uint32_t instrumentation_event); - // Weak global type cache, TODO improve this. - static TypeCache type_cache_ GUARDED_BY(Locks::alloc_tracker_lock_); - // Instrumentation event reference counters. // TODO we could use an array instead of having all these dedicated counters. Instrumentation // events are bits of a mask so we could convert them to array index. @@ -798,7 +769,6 @@ class Dbg { static size_t exception_catch_event_ref_count_ GUARDED_BY(Locks::deoptimization_lock_); static uint32_t instrumentation_events_ GUARDED_BY(Locks::mutator_lock_); - friend class AllocRecord; // For type_cache_ with proper annotalysis. DISALLOW_COPY_AND_ASSIGN(Dbg); }; diff --git a/runtime/gc/allocation_record.cc b/runtime/gc/allocation_record.cc new file mode 100644 index 000000000..a38536342 --- /dev/null +++ b/runtime/gc/allocation_record.cc @@ -0,0 +1,206 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "allocation_record.h" + +#include "art_method-inl.h" +#include "base/stl_util.h" +#include "stack.h" + +#ifdef HAVE_ANDROID_OS +#include "cutils/properties.h" +#endif + +namespace art { +namespace gc { + +int32_t AllocRecordStackTraceElement::ComputeLineNumber() const { + DCHECK(method_ != nullptr); + return method_->GetLineNumFromDexPC(dex_pc_); +} + +void AllocRecordObjectMap::SetProperties() { +#ifdef HAVE_ANDROID_OS + // Check whether there's a system property overriding the max number of records. + const char* propertyName = "dalvik.vm.allocTrackerMax"; + char allocMaxString[PROPERTY_VALUE_MAX]; + if (property_get(propertyName, allocMaxString, "") > 0) { + char* end; + size_t value = strtoul(allocMaxString, &end, 10); + if (*end != '\0') { + LOG(ERROR) << "Ignoring " << propertyName << " '" << allocMaxString + << "' --- invalid"; + } else { + alloc_record_max_ = value; + } + } + // Check whether there's a system property overriding the max depth of stack trace. + propertyName = "dalvik.vm.allocStackDepth"; + char stackDepthString[PROPERTY_VALUE_MAX]; + if (property_get(propertyName, stackDepthString, "") > 0) { + char* end; + size_t value = strtoul(stackDepthString, &end, 10); + if (*end != '\0') { + LOG(ERROR) << "Ignoring " << propertyName << " '" << stackDepthString + << "' --- invalid"; + } else { + max_stack_depth_ = value; + } + } +#endif +} + +AllocRecordObjectMap::~AllocRecordObjectMap() { + STLDeleteValues(&entries_); +} + +void AllocRecordObjectMap::SweepAllocationRecords(IsMarkedCallback* callback, void* arg) { + VLOG(heap) << "Start SweepAllocationRecords()"; + size_t count_deleted = 0, count_moved = 0; + for (auto it = entries_.begin(), end = entries_.end(); it != end;) { + // This does not need a read barrier because this is called by GC. + mirror::Object* old_object = it->first.Read(); + AllocRecord* record = it->second; + mirror::Object* new_object = callback(old_object, arg); + if (new_object == nullptr) { + delete record; + it = entries_.erase(it); + ++count_deleted; + } else { + if (old_object != new_object) { + it->first = GcRoot(new_object); + ++count_moved; + } + ++it; + } + } + VLOG(heap) << "Deleted " << count_deleted << " allocation records"; + VLOG(heap) << "Updated " << count_moved << " allocation records"; +} + +struct AllocRecordStackVisitor : public StackVisitor { + AllocRecordStackVisitor(Thread* thread, AllocRecordStackTrace* trace_in, size_t max) + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) + : StackVisitor(thread, nullptr, StackVisitor::StackWalkKind::kIncludeInlinedFrames), + trace(trace_in), + depth(0), + max_depth(max) {} + + // TODO: Enable annotalysis. We know lock is held in constructor, but abstraction confuses + // annotalysis. + bool VisitFrame() OVERRIDE NO_THREAD_SAFETY_ANALYSIS { + if (depth >= max_depth) { + return false; + } + ArtMethod* m = GetMethod(); + if (!m->IsRuntimeMethod()) { + trace->SetStackElementAt(depth, m, GetDexPc()); + ++depth; + } + return true; + } + + ~AllocRecordStackVisitor() { + trace->SetDepth(depth); + } + + AllocRecordStackTrace* trace; + size_t depth; + const size_t max_depth; +}; + +void AllocRecordObjectMap::SetAllocTrackingEnabled(bool enable) { + Thread* self = Thread::Current(); + Heap* heap = Runtime::Current()->GetHeap(); + if (enable) { + { + MutexLock mu(self, *Locks::alloc_tracker_lock_); + if (heap->IsAllocTrackingEnabled()) { + return; // Already enabled, bail. + } + AllocRecordObjectMap* records = new AllocRecordObjectMap(); + CHECK(records != nullptr); + records->SetProperties(); + std::string self_name; + self->GetThreadName(self_name); + if (self_name == "JDWP") { + records->alloc_ddm_thread_id_ = self->GetTid(); + } + size_t sz = sizeof(AllocRecordStackTraceElement) * records->max_stack_depth_ + + sizeof(AllocRecord) + sizeof(AllocRecordStackTrace); + LOG(INFO) << "Enabling alloc tracker (" << records->alloc_record_max_ << " entries of " + << records->max_stack_depth_ << " frames, taking up to " + << PrettySize(sz * records->alloc_record_max_) << ")"; + heap->SetAllocationRecords(records); + heap->SetAllocTrackingEnabled(true); + } + Runtime::Current()->GetInstrumentation()->InstrumentQuickAllocEntryPoints(); + } else { + { + MutexLock mu(self, *Locks::alloc_tracker_lock_); + if (!heap->IsAllocTrackingEnabled()) { + return; // Already disabled, bail. + } + heap->SetAllocTrackingEnabled(false); + LOG(INFO) << "Disabling alloc tracker"; + heap->SetAllocationRecords(nullptr); + } + // If an allocation comes in before we uninstrument, we will safely drop it on the floor. + Runtime::Current()->GetInstrumentation()->UninstrumentQuickAllocEntryPoints(); + } +} + +void AllocRecordObjectMap::RecordAllocation(Thread* self, mirror::Object* obj, size_t byte_count) { + MutexLock mu(self, *Locks::alloc_tracker_lock_); + Heap* heap = Runtime::Current()->GetHeap(); + if (!heap->IsAllocTrackingEnabled()) { + // In the process of shutting down recording, bail. + return; + } + + AllocRecordObjectMap* records = heap->GetAllocationRecords(); + DCHECK(records != nullptr); + + // Do not record for DDM thread + if (records->alloc_ddm_thread_id_ == self->GetTid()) { + return; + } + + DCHECK_LE(records->Size(), records->alloc_record_max_); + + // Remove oldest record. + if (records->Size() == records->alloc_record_max_) { + records->RemoveOldest(); + } + + // Get stack trace. + const size_t max_depth = records->max_stack_depth_; + AllocRecordStackTrace* trace = new AllocRecordStackTrace(self->GetTid(), max_depth); + // add scope to make "visitor" destroyed promptly, in order to set the trace->depth_ + { + AllocRecordStackVisitor visitor(self, trace, max_depth); + visitor.WalkStack(); + } + + // Fill in the basics. + AllocRecord* record = new AllocRecord(byte_count, trace); + + records->Put(obj, record); + DCHECK_LE(records->Size(), records->alloc_record_max_); +} + +} // namespace gc +} // namespace art diff --git a/runtime/gc/allocation_record.h b/runtime/gc/allocation_record.h new file mode 100644 index 000000000..45b3406ce --- /dev/null +++ b/runtime/gc/allocation_record.h @@ -0,0 +1,271 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_RUNTIME_GC_ALLOCATION_RECORD_H_ +#define ART_RUNTIME_GC_ALLOCATION_RECORD_H_ + +#include + +#include "base/mutex.h" +#include "object_callbacks.h" +#include "gc_root.h" + +namespace art { + +class ArtMethod; +class Thread; + +namespace mirror { + class Class; + class Object; +} + +namespace gc { + +class AllocRecordStackTraceElement { + public: + AllocRecordStackTraceElement() : method_(nullptr), dex_pc_(0) {} + + int32_t ComputeLineNumber() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + + ArtMethod* GetMethod() const { + return method_; + } + + void SetMethod(ArtMethod* m) { + method_ = m; + } + + uint32_t GetDexPc() const { + return dex_pc_; + } + + void SetDexPc(uint32_t pc) { + dex_pc_ = pc; + } + + bool operator==(const AllocRecordStackTraceElement& other) const { + if (this == &other) return true; + return method_ == other.method_ && dex_pc_ == other.dex_pc_; + } + + private: + ArtMethod* method_; + uint32_t dex_pc_; +}; + +class AllocRecordStackTrace { + public: + static constexpr size_t kHashMultiplier = 17; + + AllocRecordStackTrace(pid_t tid, size_t max_depth) + : tid_(tid), depth_(0), stack_(new AllocRecordStackTraceElement[max_depth]) {} + + ~AllocRecordStackTrace() { + delete[] stack_; + } + + pid_t GetTid() const { + return tid_; + } + + size_t GetDepth() const { + return depth_; + } + + void SetDepth(size_t depth) { + depth_ = depth; + } + + const AllocRecordStackTraceElement& GetStackElement(size_t index) const { + DCHECK_LT(index, depth_); + return stack_[index]; + } + + void SetStackElementAt(size_t index, ArtMethod* m, uint32_t dex_pc) { + stack_[index].SetMethod(m); + stack_[index].SetDexPc(dex_pc); + } + + bool operator==(const AllocRecordStackTrace& other) const { + if (this == &other) return true; + if (depth_ != other.depth_) return false; + for (size_t i = 0; i < depth_; ++i) { + if (!(stack_[i] == other.stack_[i])) return false; + } + return true; + } + + private: + const pid_t tid_; + size_t depth_; + AllocRecordStackTraceElement* const stack_; +}; + +struct HashAllocRecordTypes { + size_t operator()(const AllocRecordStackTraceElement& r) const { + return std::hash()(reinterpret_cast(r.GetMethod())) * + AllocRecordStackTrace::kHashMultiplier + std::hash()(r.GetDexPc()); + } + + size_t operator()(const AllocRecordStackTrace& r) const { + size_t depth = r.GetDepth(); + size_t result = r.GetTid() * AllocRecordStackTrace::kHashMultiplier + depth; + for (size_t i = 0; i < depth; ++i) { + result = result * AllocRecordStackTrace::kHashMultiplier + (*this)(r.GetStackElement(i)); + } + return result; + } +}; + +template struct HashAllocRecordTypesPtr { + size_t operator()(const T* r) const { + if (r == nullptr) return 0; + return HashAllocRecordTypes()(*r); + } +}; + +template struct EqAllocRecordTypesPtr { + bool operator()(const T* r1, const T* r2) const { + if (r1 == r2) return true; + if (r1 == nullptr || r2 == nullptr) return false; + return *r1 == *r2; + } +}; + +class AllocRecord { + public: + // All instances of AllocRecord should be managed by an instance of AllocRecordObjectMap. + AllocRecord(size_t count, AllocRecordStackTrace* trace) + : byte_count_(count), trace_(trace) {} + + ~AllocRecord() { + delete trace_; + } + + size_t GetDepth() const { + return trace_->GetDepth(); + } + + const AllocRecordStackTrace* GetStackTrace() const { + return trace_; + } + + size_t ByteCount() const { + return byte_count_; + } + + pid_t GetTid() const { + return trace_->GetTid(); + } + + const AllocRecordStackTraceElement& StackElement(size_t index) const { + return trace_->GetStackElement(index); + } + + private: + const size_t byte_count_; + // TODO: Currently trace_ is like a std::unique_ptr, + // but in future with deduplication it could be a std::shared_ptr. + const AllocRecordStackTrace* const trace_; +}; + +class AllocRecordObjectMap { + public: + // Since the entries contain weak roots, they need a read barrier. Do not directly access + // the mirror::Object pointers in it. Use functions that contain read barriers. + // No need for "const AllocRecord*" in the list, because all fields of AllocRecord are const. + typedef std::list, AllocRecord*>> EntryList; + + // "static" because it is part of double-checked locking. It needs to check a bool first, + // in order to make sure the AllocRecordObjectMap object is not null. + static void RecordAllocation(Thread* self, mirror::Object* obj, size_t byte_count) + LOCKS_EXCLUDED(Locks::alloc_tracker_lock_) + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + + static void SetAllocTrackingEnabled(bool enabled) LOCKS_EXCLUDED(Locks::alloc_tracker_lock_); + + AllocRecordObjectMap() EXCLUSIVE_LOCKS_REQUIRED(Locks::alloc_tracker_lock_) + : alloc_record_max_(kDefaultNumAllocRecords), + max_stack_depth_(kDefaultAllocStackDepth), + alloc_ddm_thread_id_(0) {} + + ~AllocRecordObjectMap(); + + void Put(mirror::Object* obj, AllocRecord* record) + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) + EXCLUSIVE_LOCKS_REQUIRED(Locks::alloc_tracker_lock_) { + entries_.emplace_back(GcRoot(obj), record); + } + + size_t Size() const SHARED_LOCKS_REQUIRED(Locks::alloc_tracker_lock_) { + return entries_.size(); + } + + void SweepAllocationRecords(IsMarkedCallback* callback, void* arg) + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) + EXCLUSIVE_LOCKS_REQUIRED(Locks::alloc_tracker_lock_); + + void RemoveOldest() + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) + EXCLUSIVE_LOCKS_REQUIRED(Locks::alloc_tracker_lock_) { + DCHECK(!entries_.empty()); + delete entries_.front().second; + entries_.pop_front(); + } + + // TODO: Is there a better way to hide the entries_'s type? + EntryList::iterator Begin() + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) + EXCLUSIVE_LOCKS_REQUIRED(Locks::alloc_tracker_lock_) { + return entries_.begin(); + } + + EntryList::iterator End() + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) + EXCLUSIVE_LOCKS_REQUIRED(Locks::alloc_tracker_lock_) { + return entries_.end(); + } + + EntryList::reverse_iterator RBegin() + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) + EXCLUSIVE_LOCKS_REQUIRED(Locks::alloc_tracker_lock_) { + return entries_.rbegin(); + } + + EntryList::reverse_iterator REnd() + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) + EXCLUSIVE_LOCKS_REQUIRED(Locks::alloc_tracker_lock_) { + return entries_.rend(); + } + + private: + static constexpr size_t kDefaultNumAllocRecords = 512 * 1024; + static constexpr size_t kDefaultAllocStackDepth = 4; + size_t alloc_record_max_ GUARDED_BY(Locks::alloc_tracker_lock_); + // The implementation always allocates max_stack_depth_ number of frames for each stack trace. + // As long as the max depth is not very large, this is not a waste of memory since most stack + // traces will fill up the max depth number of the frames. + size_t max_stack_depth_ GUARDED_BY(Locks::alloc_tracker_lock_); + pid_t alloc_ddm_thread_id_ GUARDED_BY(Locks::alloc_tracker_lock_); + EntryList entries_ GUARDED_BY(Locks::alloc_tracker_lock_); + + void SetProperties() EXCLUSIVE_LOCKS_REQUIRED(Locks::alloc_tracker_lock_); +}; + +} // namespace gc +} // namespace art +#endif // ART_RUNTIME_GC_ALLOCATION_RECORD_H_ diff --git a/runtime/gc/heap-inl.h b/runtime/gc/heap-inl.h index 2d5433032..ee4568ece 100644 --- a/runtime/gc/heap-inl.h +++ b/runtime/gc/heap-inl.h @@ -22,6 +22,7 @@ #include "base/time_utils.h" #include "debugger.h" #include "gc/accounting/card_table-inl.h" +#include "gc/allocation_record.h" #include "gc/collector/semi_space.h" #include "gc/space/bump_pointer_space-inl.h" #include "gc/space/dlmalloc_space-inl.h" @@ -168,11 +169,11 @@ inline mirror::Object* Heap::AllocObjectWithAllocator(Thread* self, mirror::Clas PushOnAllocationStack(self, &obj); } if (kInstrumented) { - if (Dbg::IsAllocTrackingEnabled()) { - Dbg::RecordAllocation(self, klass, bytes_allocated); + if (IsAllocTrackingEnabled()) { + AllocRecordObjectMap::RecordAllocation(self, obj, bytes_allocated); } } else { - DCHECK(!Dbg::IsAllocTrackingEnabled()); + DCHECK(!IsAllocTrackingEnabled()); } // IsConcurrentGc() isn't known at compile time so we can optimize by not checking it for // the BumpPointer or TLAB allocators. This is nice since it allows the entire if statement to be diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc index aeab7d80b..14fa74072 100644 --- a/runtime/gc/heap.cc +++ b/runtime/gc/heap.cc @@ -209,7 +209,8 @@ Heap::Heap(size_t initial_size, size_t growth_limit, size_t min_free, size_t max blocking_gc_count_last_window_(0U), gc_count_rate_histogram_("gc count rate histogram", 1U, kGcCountRateMaxBucketCount), blocking_gc_count_rate_histogram_("blocking gc count rate histogram", 1U, - kGcCountRateMaxBucketCount) { + kGcCountRateMaxBucketCount), + alloc_tracking_enabled_(false) { if (VLOG_IS_ON(heap) || VLOG_IS_ON(startup)) { LOG(INFO) << "Heap() entering"; } @@ -1043,6 +1044,7 @@ Heap::~Heap() { STLDeleteElements(&garbage_collectors_); // If we don't reset then the mark stack complains in its destructor. allocation_stack_->Reset(); + allocation_records_.reset(); live_stack_->Reset(); STLDeleteValues(&mod_union_tables_); STLDeleteValues(&remembered_sets_); @@ -3653,5 +3655,18 @@ void Heap::ClearMarkedObjects() { } } +void Heap::SetAllocationRecords(AllocRecordObjectMap* records) { + allocation_records_.reset(records); +} + +void Heap::SweepAllocationRecords(IsMarkedCallback* visitor, void* arg) const { + if (IsAllocTrackingEnabled()) { + MutexLock mu(Thread::Current(), *Locks::alloc_tracker_lock_); + if (IsAllocTrackingEnabled()) { + GetAllocationRecords()->SweepAllocationRecords(visitor, arg); + } + } +} + } // namespace gc } // namespace art diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h index 81a97414b..09bd3701e 100644 --- a/runtime/gc/heap.h +++ b/runtime/gc/heap.h @@ -58,6 +58,7 @@ namespace mirror { namespace gc { +class AllocRecordObjectMap; class ReferenceProcessor; class TaskProcessor; @@ -683,6 +684,27 @@ class Heap { void DumpGcCountRateHistogram(std::ostream& os) const; void DumpBlockingGcCountRateHistogram(std::ostream& os) const; + // Allocation tracking support + // Callers to this function use double-checked locking to ensure safety on allocation_records_ + bool IsAllocTrackingEnabled() const { + return alloc_tracking_enabled_.LoadRelaxed(); + } + + void SetAllocTrackingEnabled(bool enabled) EXCLUSIVE_LOCKS_REQUIRED(Locks::alloc_tracker_lock_) { + alloc_tracking_enabled_.StoreRelaxed(enabled); + } + + AllocRecordObjectMap* GetAllocationRecords() const + EXCLUSIVE_LOCKS_REQUIRED(Locks::alloc_tracker_lock_) { + return allocation_records_.get(); + } + + void SetAllocationRecords(AllocRecordObjectMap* records) + EXCLUSIVE_LOCKS_REQUIRED(Locks::alloc_tracker_lock_); + + void SweepAllocationRecords(IsMarkedCallback* visitor, void* arg) const + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + private: class ConcurrentGCTask; class CollectorTransitionTask; @@ -1191,6 +1213,11 @@ class Heap { // The histogram of the number of blocking GC invocations per window duration. Histogram blocking_gc_count_rate_histogram_ GUARDED_BY(gc_complete_lock_); + // Allocation tracking support + Atomic alloc_tracking_enabled_; + std::unique_ptr allocation_records_ + GUARDED_BY(Locks::alloc_tracker_lock_); + friend class CollectorTransitionTask; friend class collector::GarbageCollector; friend class collector::MarkCompact; diff --git a/runtime/hprof/hprof.cc b/runtime/hprof/hprof.cc index 6e0e56e82..85eefb095 100644 --- a/runtime/hprof/hprof.cc +++ b/runtime/hprof/hprof.cc @@ -48,6 +48,7 @@ #include "dex_file-inl.h" #include "gc_root.h" #include "gc/accounting/heap_bitmap.h" +#include "gc/allocation_record.h" #include "gc/heap.h" #include "gc/space/space.h" #include "globals.h" @@ -68,7 +69,6 @@ namespace hprof { static constexpr bool kDirectStream = true; static constexpr uint32_t kHprofTime = 0; -static constexpr uint32_t kHprofNullStackTrace = 0; static constexpr uint32_t kHprofNullThread = 0; static constexpr size_t kMaxObjectsPerSegment = 128; @@ -144,6 +144,10 @@ enum HprofBasicType { typedef uint32_t HprofStringId; typedef uint32_t HprofClassObjectId; +typedef uint32_t HprofClassSerialNumber; +typedef uint32_t HprofStackTraceSerialNumber; +typedef uint32_t HprofStackFrameId; +static constexpr HprofStackTraceSerialNumber kHprofNullStackTrace = 0; class EndianOutput { public: @@ -194,6 +198,10 @@ class EndianOutput { AddU4(PointerToLowMemUInt32(value)); } + void AddStackTraceSerialNumber(HprofStackTraceSerialNumber value) { + AddU4(value); + } + // The ID for the synthetic object generated to account for class static overhead. void AddClassStaticsId(const mirror::Class* value) { AddU4(1 | PointerToLowMemUInt32(value)); @@ -415,13 +423,21 @@ class Hprof : public SingleRootVisitor { start_ns_(NanoTime()), current_heap_(HPROF_HEAP_DEFAULT), objects_in_segment_(0), - next_string_id_(0x400000) { + next_string_id_(0x400000), + next_class_serial_number_(1) { LOG(INFO) << "hprof: heap dump \"" << filename_ << "\" starting..."; } void Dump() EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_) - LOCKS_EXCLUDED(Locks::heap_bitmap_lock_) { + LOCKS_EXCLUDED(Locks::heap_bitmap_lock_, Locks::alloc_tracker_lock_) { + { + MutexLock mu(Thread::Current(), *Locks::alloc_tracker_lock_); + if (Runtime::Current()->GetHeap()->IsAllocTrackingEnabled()) { + PopulateAllocationTrackingTraces(); + } + } + // First pass to measure the size of the dump. size_t overall_size; size_t max_length; @@ -480,11 +496,11 @@ class Hprof : public SingleRootVisitor { objects_in_segment_ = 0; if (header_first) { - ProcessHeader(); + ProcessHeader(true); ProcessBody(); } else { ProcessBody(); - ProcessHeader(); + ProcessHeader(false); } } @@ -501,21 +517,29 @@ class Hprof : public SingleRootVisitor { output_->EndRecord(); } - void ProcessHeader() EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_) { + void ProcessHeader(bool string_first) EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_) { // Write the header. WriteFixedHeader(); // Write the string and class tables, and any stack traces, to the header. // (jhat requires that these appear before any of the data in the body that refers to them.) - WriteStringTable(); + // jhat also requires the string table appear before class table and stack traces. + // However, WriteStackTraces() can modify the string table, so it's necessary to call + // WriteStringTable() last in the first pass, to compute the correct length of the output. + if (string_first) { + WriteStringTable(); + } WriteClassTable(); WriteStackTraces(); + if (!string_first) { + WriteStringTable(); + } output_->EndRecord(); } void WriteClassTable() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { - uint32_t nextSerialNumber = 1; - - for (mirror::Class* c : classes_) { + for (const auto& p : classes_) { + mirror::Class* c = p.first; + HprofClassSerialNumber sn = p.second; CHECK(c != nullptr); output_->StartNewRecord(HPROF_TAG_LOAD_CLASS, kHprofTime); // LOAD CLASS format: @@ -523,9 +547,9 @@ class Hprof : public SingleRootVisitor { // ID: class object ID. We use the address of the class object structure as its ID. // U4: stack trace serial number // ID: class name string ID - __ AddU4(nextSerialNumber++); + __ AddU4(sn); __ AddObjectId(c); - __ AddU4(kHprofNullStackTrace); + __ AddStackTraceSerialNumber(LookupStackTraceSerialNumber(c)); __ AddStringId(LookupClassNameId(c)); } } @@ -567,15 +591,31 @@ class Hprof : public SingleRootVisitor { HprofClassObjectId LookupClassId(mirror::Class* c) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { if (c != nullptr) { - auto result = classes_.insert(c); - const mirror::Class* present = *result.first; - CHECK_EQ(present, c); - // Make sure that we've assigned a string ID for this class' name - LookupClassNameId(c); + auto it = classes_.find(c); + if (it == classes_.end()) { + // first time to see this class + HprofClassSerialNumber sn = next_class_serial_number_++; + classes_.Put(c, sn); + // Make sure that we've assigned a string ID for this class' name + LookupClassNameId(c); + } } return PointerToLowMemUInt32(c); } + HprofStackTraceSerialNumber LookupStackTraceSerialNumber(const mirror::Object* obj) + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + auto r = allocation_records_.find(obj); + if (r == allocation_records_.end()) { + return kHprofNullStackTrace; + } else { + const gc::AllocRecordStackTrace* trace = r->second; + auto result = traces_.find(trace); + CHECK(result != traces_.end()); + return result->second; + } + } + HprofStringId LookupStringId(mirror::String* string) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { return LookupStringId(string->ToModifiedUtf8()); } @@ -622,12 +662,66 @@ class Hprof : public SingleRootVisitor { __ AddU4(static_cast(nowMs & 0xFFFFFFFF)); } - void WriteStackTraces() { + void WriteStackTraces() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { // Write a dummy stack trace record so the analysis tools don't freak out. output_->StartNewRecord(HPROF_TAG_STACK_TRACE, kHprofTime); - __ AddU4(kHprofNullStackTrace); + __ AddStackTraceSerialNumber(kHprofNullStackTrace); __ AddU4(kHprofNullThread); __ AddU4(0); // no frames + + // TODO: jhat complains "WARNING: Stack trace not found for serial # -1", but no trace should + // have -1 as its serial number (as long as HprofStackTraceSerialNumber doesn't overflow). + for (const auto& it : traces_) { + const gc::AllocRecordStackTrace* trace = it.first; + HprofStackTraceSerialNumber trace_sn = it.second; + size_t depth = trace->GetDepth(); + + // First write stack frames of the trace + for (size_t i = 0; i < depth; ++i) { + const gc::AllocRecordStackTraceElement* frame = &trace->GetStackElement(i); + ArtMethod* method = frame->GetMethod(); + CHECK(method != nullptr); + output_->StartNewRecord(HPROF_TAG_STACK_FRAME, kHprofTime); + // STACK FRAME format: + // ID: stack frame ID. We use the address of the AllocRecordStackTraceElement object as its ID. + // ID: method name string ID + // ID: method signature string ID + // ID: source file name string ID + // U4: class serial number + // U4: >0, line number; 0, no line information available; -1, unknown location + auto frame_result = frames_.find(frame); + CHECK(frame_result != frames_.end()); + __ AddU4(frame_result->second); + __ AddStringId(LookupStringId(method->GetName())); + __ AddStringId(LookupStringId(method->GetSignature().ToString())); + const char* source_file = method->GetDeclaringClassSourceFile(); + if (source_file == nullptr) { + source_file = ""; + } + __ AddStringId(LookupStringId(source_file)); + auto class_result = classes_.find(method->GetDeclaringClass()); + CHECK(class_result != classes_.end()); + __ AddU4(class_result->second); + __ AddU4(frame->ComputeLineNumber()); + } + + // Then write the trace itself + output_->StartNewRecord(HPROF_TAG_STACK_TRACE, kHprofTime); + // STACK TRACE format: + // U4: stack trace serial number. We use the address of the AllocRecordStackTrace object as its serial number. + // U4: thread serial number. We use Thread::GetTid(). + // U4: number of frames + // [ID]*: series of stack frame ID's + __ AddStackTraceSerialNumber(trace_sn); + __ AddU4(trace->GetTid()); + __ AddU4(depth); + for (size_t i = 0; i < depth; ++i) { + const gc::AllocRecordStackTraceElement* frame = &trace->GetStackElement(i); + auto frame_result = frames_.find(frame); + CHECK(frame_result != frames_.end()); + __ AddU4(frame_result->second); + } + } } bool DumpToDdmsBuffered(size_t overall_size ATTRIBUTE_UNUSED, size_t max_length ATTRIBUTE_UNUSED) @@ -723,6 +817,40 @@ class Hprof : public SingleRootVisitor { return true; } + void PopulateAllocationTrackingTraces() + EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::alloc_tracker_lock_) { + gc::AllocRecordObjectMap* records = Runtime::Current()->GetHeap()->GetAllocationRecords(); + CHECK(records != nullptr); + HprofStackTraceSerialNumber next_trace_sn = kHprofNullStackTrace + 1; + HprofStackFrameId next_frame_id = 0; + + for (auto it = records->Begin(), end = records->End(); it != end; ++it) { + const mirror::Object* obj = it->first.Read(); + const gc::AllocRecordStackTrace* trace = it->second->GetStackTrace(); + + // Copy the pair into a real hash map to speed up look up. + auto records_result = allocation_records_.emplace(obj, trace); + // The insertion should always succeed, i.e. no duplicate object pointers in "records" + CHECK(records_result.second); + + // Generate serial numbers for traces, and IDs for frames. + auto traces_result = traces_.find(trace); + if (traces_result == traces_.end()) { + traces_.emplace(trace, next_trace_sn++); + // only check frames if the trace is newly discovered + for (size_t i = 0, depth = trace->GetDepth(); i < depth; ++i) { + const gc::AllocRecordStackTraceElement* frame = &trace->GetStackElement(i); + auto frames_result = frames_.find(frame); + if (frames_result == frames_.end()) { + frames_.emplace(frame, next_frame_id++); + } + } + } + } + CHECK_EQ(traces_.size(), next_trace_sn - kHprofNullStackTrace - 1); + CHECK_EQ(frames_.size(), next_frame_id); + } + // If direct_to_ddms_ is set, "filename_" and "fd" will be ignored. // Otherwise, "filename_" must be valid, though if "fd" >= 0 it will // only be used for debug messages. @@ -737,9 +865,18 @@ class Hprof : public SingleRootVisitor { HprofHeapId current_heap_; // Which heap we're currently dumping. size_t objects_in_segment_; - std::set classes_; HprofStringId next_string_id_; SafeMap strings_; + HprofClassSerialNumber next_class_serial_number_; + SafeMap classes_; + + std::unordered_map, + gc::EqAllocRecordTypesPtr> traces_; + std::unordered_map, + gc::EqAllocRecordTypesPtr> frames_; + std::unordered_map allocation_records_; DISALLOW_COPY_AND_ASSIGN(Hprof); }; @@ -881,10 +1018,6 @@ void Hprof::MarkRootObject(const mirror::Object* obj, jobject jni_obj, HprofHeap ++objects_in_segment_; } -static int StackTraceSerialNumber(const mirror::Object* /*obj*/) { - return kHprofNullStackTrace; -} - void Hprof::DumpHeapObject(mirror::Object* obj) { // Ignore classes that are retired. if (obj->IsClass() && obj->AsClass()->IsRetired()) { @@ -966,7 +1099,7 @@ void Hprof::DumpHeapClass(mirror::Class* klass) { // StaticField array at the end of this class. __ AddU1(HPROF_PRIMITIVE_ARRAY_DUMP); __ AddClassStaticsId(klass); - __ AddU4(StackTraceSerialNumber(klass)); + __ AddStackTraceSerialNumber(LookupStackTraceSerialNumber(klass)); __ AddU4(byteLength); __ AddU1(hprof_basic_byte); for (int i = 0; i < byteLength; ++i) { @@ -976,7 +1109,7 @@ void Hprof::DumpHeapClass(mirror::Class* klass) { __ AddU1(HPROF_CLASS_DUMP); __ AddClassId(LookupClassId(klass)); - __ AddU4(StackTraceSerialNumber(klass)); + __ AddStackTraceSerialNumber(LookupStackTraceSerialNumber(klass)); __ AddClassId(LookupClassId(klass->GetSuperClass())); __ AddObjectId(klass->GetClassLoader()); __ AddObjectId(nullptr); // no signer @@ -1072,7 +1205,7 @@ void Hprof::DumpHeapArray(mirror::Array* obj, mirror::Class* klass) { __ AddU1(HPROF_OBJECT_ARRAY_DUMP); __ AddObjectId(obj); - __ AddU4(StackTraceSerialNumber(obj)); + __ AddStackTraceSerialNumber(LookupStackTraceSerialNumber(obj)); __ AddU4(length); __ AddClassId(LookupClassId(klass)); @@ -1087,7 +1220,7 @@ void Hprof::DumpHeapArray(mirror::Array* obj, mirror::Class* klass) { __ AddU1(HPROF_PRIMITIVE_ARRAY_DUMP); __ AddObjectId(obj); - __ AddU4(StackTraceSerialNumber(obj)); + __ AddStackTraceSerialNumber(LookupStackTraceSerialNumber(obj)); __ AddU4(length); __ AddU1(t); @@ -1108,7 +1241,7 @@ void Hprof::DumpHeapInstanceObject(mirror::Object* obj, mirror::Class* klass) { // obj is an instance object. __ AddU1(HPROF_INSTANCE_DUMP); __ AddObjectId(obj); - __ AddU4(StackTraceSerialNumber(obj)); + __ AddStackTraceSerialNumber(LookupStackTraceSerialNumber(obj)); __ AddClassId(LookupClassId(klass)); // Reserve some space for the length of the instance data, which we won't @@ -1170,7 +1303,7 @@ void Hprof::DumpHeapInstanceObject(mirror::Object* obj, mirror::Class* klass) { __ AddU1(HPROF_PRIMITIVE_ARRAY_DUMP); __ AddObjectId(value); - __ AddU4(StackTraceSerialNumber(obj)); + __ AddStackTraceSerialNumber(LookupStackTraceSerialNumber(obj)); __ AddU4(s->GetLength()); __ AddU1(hprof_basic_char); __ AddU2List(s->GetValue(), s->GetLength()); diff --git a/runtime/native/org_apache_harmony_dalvik_ddmc_DdmVmInternal.cc b/runtime/native/org_apache_harmony_dalvik_ddmc_DdmVmInternal.cc index b96ddc810..9ce4a02f1 100644 --- a/runtime/native/org_apache_harmony_dalvik_ddmc_DdmVmInternal.cc +++ b/runtime/native/org_apache_harmony_dalvik_ddmc_DdmVmInternal.cc @@ -38,7 +38,7 @@ static jbyteArray DdmVmInternal_getRecentAllocations(JNIEnv* env, jclass) { } static jboolean DdmVmInternal_getRecentAllocationStatus(JNIEnv*, jclass) { - return Dbg::IsAllocTrackingEnabled(); + return Runtime::Current()->GetHeap()->IsAllocTrackingEnabled(); } /* diff --git a/runtime/runtime.cc b/runtime/runtime.cc index 9d651bf6a..cb7f5f37c 100644 --- a/runtime/runtime.cc +++ b/runtime/runtime.cc @@ -402,6 +402,7 @@ void Runtime::SweepSystemWeaks(IsMarkedCallback* visitor, void* arg) { GetInternTable()->SweepInternTableWeaks(visitor, arg); GetMonitorList()->SweepMonitorList(visitor, arg); GetJavaVM()->SweepJniWeakGlobals(visitor, arg); + GetHeap()->SweepAllocationRecords(visitor, arg); } bool Runtime::Create(const RuntimeOptions& options, bool ignore_unrecognized) { @@ -1471,6 +1472,11 @@ void Runtime::DisallowNewSystemWeaks() { monitor_list_->DisallowNewMonitors(); intern_table_->DisallowNewInterns(); java_vm_->DisallowNewWeakGlobals(); + // TODO: add a similar call for heap.allocation_records_, otherwise some of the newly allocated + // objects that are not marked might be swept from the records, making the records incomplete. + // It is safe for now since the only effect is that those objects do not have allocation records. + // The number of such objects should be small, and current allocation tracker cannot collect + // allocation records for all objects anyway. } void Runtime::AllowNewSystemWeaks() {