From 6bfc37d875b2556b3f1d95b00a785a09dc94ccb0 Mon Sep 17 00:00:00 2001 From: Richard Uhler Date: Thu, 24 Nov 2016 11:21:41 +0000 Subject: [PATCH] Redesign implementation of RegisterNativeAllocation. Improve the performance and remove the issue with GCing apps to death when they reach the growth_limit_. This implements the REDESIGN approach described in detail in the document at go/understanding-register-native-allocation. Bug: 29156652 Bug: 32576211 Test: m test-art-host Test: vogar libcore/luni/src/test/java/libcore/util/NativeAllocationRegistryTest.java Test: Device boots. Test: adb bugreport and look for "Registered native bytes allocated" Change-Id: I09176b2b0e4f0c401fe1947830fa5271060f4e61 --- runtime/atomic.h | 9 ++ runtime/gc/heap.cc | 151 +++++++++++-------------------- runtime/gc/heap.h | 67 +++++++++----- test/004-NativeAllocations/src/Main.java | 12 ++- 4 files changed, 119 insertions(+), 120 deletions(-) diff --git a/runtime/atomic.h b/runtime/atomic.h index e2a725978..45c3165b1 100644 --- a/runtime/atomic.h +++ b/runtime/atomic.h @@ -235,6 +235,11 @@ class PACKED(sizeof(T)) Atomic : public std::atomic { this->store(desired, std::memory_order_seq_cst); } + // Atomically replace the value with desired value. + T ExchangeRelaxed(T desired_value) { + return this->exchange(desired_value, std::memory_order_relaxed); + } + // Atomically replace the value with desired value if it matches the expected value. // Participates in total ordering of atomic operations. bool CompareExchangeStrongSequentiallyConsistent(T expected_value, T desired_value) { @@ -283,6 +288,10 @@ class PACKED(sizeof(T)) Atomic : public std::atomic { return this->fetch_sub(value, std::memory_order_seq_cst); // Return old value. } + T FetchAndSubRelaxed(const T value) { + return this->fetch_sub(value, std::memory_order_relaxed); // Return old value. + } + T FetchAndOrSequentiallyConsistent(const T value) { return this->fetch_or(value, std::memory_order_seq_cst); // Return old_value. } diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc index 34d82845d..522e0bfcb 100644 --- a/runtime/gc/heap.cc +++ b/runtime/gc/heap.cc @@ -127,8 +127,6 @@ static constexpr uint32_t kAllocSpaceBeginForDeterministicAoT = 0x40000000; // Dump the rosalloc stats on SIGQUIT. static constexpr bool kDumpRosAllocStatsOnSigQuit = false; -static constexpr size_t kNativeAllocationHistogramBuckets = 16; - // Extra added to the heap growth multiplier. Used to adjust the GC ergonomics for the read barrier // config. static constexpr double kExtraHeapGrowthMultiplier = kUseReadBarrier ? 1.0 : 0.0; @@ -194,18 +192,12 @@ Heap::Heap(size_t initial_size, capacity_(capacity), growth_limit_(growth_limit), max_allowed_footprint_(initial_size), - native_footprint_gc_watermark_(initial_size), - native_need_to_run_finalization_(false), concurrent_start_bytes_(std::numeric_limits::max()), total_bytes_freed_ever_(0), total_objects_freed_ever_(0), num_bytes_allocated_(0), - native_bytes_allocated_(0), - native_histogram_lock_("Native allocation lock"), - native_allocation_histogram_("Native allocation sizes", - 1U, - kNativeAllocationHistogramBuckets), - native_free_histogram_("Native free sizes", 1U, kNativeAllocationHistogramBuckets), + new_native_bytes_allocated_(0), + old_native_bytes_allocated_(0), num_bytes_freed_revoke_(0), verify_missing_card_marks_(false), verify_system_weaks_(false), @@ -1101,19 +1093,9 @@ void Heap::DumpGcPerformanceInfo(std::ostream& os) { rosalloc_space_->DumpStats(os); } - { - MutexLock mu(Thread::Current(), native_histogram_lock_); - if (native_allocation_histogram_.SampleSize() > 0u) { - os << "Histogram of native allocation "; - native_allocation_histogram_.DumpBins(os); - os << " bucket size " << native_allocation_histogram_.BucketWidth() << "\n"; - } - if (native_free_histogram_.SampleSize() > 0u) { - os << "Histogram of native free "; - native_free_histogram_.DumpBins(os); - os << " bucket size " << native_free_histogram_.BucketWidth() << "\n"; - } - } + os << "Registered native bytes allocated: " + << old_native_bytes_allocated_.LoadRelaxed() + new_native_bytes_allocated_.LoadRelaxed() + << "\n"; BaseMutex::DumpAll(os); } @@ -2645,6 +2627,13 @@ collector::GcType Heap::CollectGarbageInternal(collector::GcType gc_type, // Approximate heap size. ATRACE_INT("Heap size (KB)", bytes_allocated_before_gc / KB); + if (gc_type == NonStickyGcType()) { + // Move all bytes from new_native_bytes_allocated_ to + // old_native_bytes_allocated_ now that GC has been triggered, resetting + // new_native_bytes_allocated_ to zero in the process. + old_native_bytes_allocated_.FetchAndAddRelaxed(new_native_bytes_allocated_.ExchangeRelaxed(0)); + } + DCHECK_LT(gc_type, collector::kGcTypeMax); DCHECK_NE(gc_type, collector::kGcTypeNone); @@ -3504,18 +3493,6 @@ bool Heap::IsMovableObject(ObjPtr obj) const { return false; } -void Heap::UpdateMaxNativeFootprint() { - size_t native_size = native_bytes_allocated_.LoadRelaxed(); - // TODO: Tune the native heap utilization to be a value other than the java heap utilization. - size_t target_size = native_size / GetTargetHeapUtilization(); - if (target_size > native_size + max_free_) { - target_size = native_size + max_free_; - } else if (target_size < native_size + min_free_) { - target_size = native_size + min_free_; - } - native_footprint_gc_watermark_ = std::min(growth_limit_, target_size); -} - collector::GarbageCollector* Heap::FindCollectorByGcType(collector::GcType gc_type) { for (const auto& collector : garbage_collectors_) { if (collector->GetCollectorType() == collector_type_ && @@ -3552,11 +3529,9 @@ void Heap::GrowForUtilization(collector::GarbageCollector* collector_ran, target_size = bytes_allocated + delta * multiplier; target_size = std::min(target_size, bytes_allocated + adjusted_max_free); target_size = std::max(target_size, bytes_allocated + adjusted_min_free); - native_need_to_run_finalization_ = true; next_gc_type_ = collector::kGcTypeSticky; } else { - collector::GcType non_sticky_gc_type = - HasZygoteSpace() ? collector::kGcTypePartial : collector::kGcTypeFull; + collector::GcType non_sticky_gc_type = NonStickyGcType(); // Find what the next non sticky collector will be. collector::GarbageCollector* non_sticky_collector = FindCollectorByGcType(non_sticky_gc_type); // If the throughput of the current sticky GC >= throughput of the non sticky collector, then @@ -3707,7 +3682,7 @@ void Heap::ConcurrentGC(Thread* self, bool force_full) { collector::GcType next_gc_type = next_gc_type_; // If forcing full and next gc type is sticky, override with a non-sticky type. if (force_full && next_gc_type == collector::kGcTypeSticky) { - next_gc_type = HasZygoteSpace() ? collector::kGcTypePartial : collector::kGcTypeFull; + next_gc_type = NonStickyGcType(); } if (CollectGarbageInternal(next_gc_type, kGcCauseBackground, false) == collector::kGcTypeNone) { @@ -3863,71 +3838,51 @@ void Heap::RunFinalization(JNIEnv* env, uint64_t timeout) { static_cast(timeout)); } -void Heap::RegisterNativeAllocation(JNIEnv* env, size_t bytes) { - Thread* self = ThreadForEnv(env); - { - MutexLock mu(self, native_histogram_lock_); - native_allocation_histogram_.AddValue(bytes); - } - if (native_need_to_run_finalization_) { - RunFinalization(env, kNativeAllocationFinalizeTimeout); - UpdateMaxNativeFootprint(); - native_need_to_run_finalization_ = false; - } - // Total number of native bytes allocated. - size_t new_native_bytes_allocated = native_bytes_allocated_.FetchAndAddSequentiallyConsistent(bytes); - new_native_bytes_allocated += bytes; - if (new_native_bytes_allocated > native_footprint_gc_watermark_) { - collector::GcType gc_type = HasZygoteSpace() ? collector::kGcTypePartial : - collector::kGcTypeFull; - - // The second watermark is higher than the gc watermark. If you hit this it means you are - // allocating native objects faster than the GC can keep up with. - if (new_native_bytes_allocated > growth_limit_) { - if (WaitForGcToComplete(kGcCauseForNativeAlloc, self) != collector::kGcTypeNone) { - // Just finished a GC, attempt to run finalizers. - RunFinalization(env, kNativeAllocationFinalizeTimeout); - CHECK(!env->ExceptionCheck()); - // Native bytes allocated may be updated by finalization, refresh it. - new_native_bytes_allocated = native_bytes_allocated_.LoadRelaxed(); - } - // If we still are over the watermark, attempt a GC for alloc and run finalizers. - if (new_native_bytes_allocated > growth_limit_) { - CollectGarbageInternal(gc_type, kGcCauseForNativeAlloc, false); - RunFinalization(env, kNativeAllocationFinalizeTimeout); - native_need_to_run_finalization_ = false; - CHECK(!env->ExceptionCheck()); - } - // We have just run finalizers, update the native watermark since it is very likely that - // finalizers released native managed allocations. - UpdateMaxNativeFootprint(); - } else if (!IsGCRequestPending()) { - if (IsGcConcurrent()) { - RequestConcurrentGC(self, true); // Request non-sticky type. - } else { - CollectGarbageInternal(gc_type, kGcCauseForNativeAlloc, false); - } +void Heap::TriggerGcForNativeAlloc(Thread* self) { + if (!IsGCRequestPending()) { + if (IsGcConcurrent()) { + RequestConcurrentGC(self, /*force_full*/true); + } else { + CollectGarbageInternal(NonStickyGcType(), kGcCauseForNativeAlloc, false); } } } -void Heap::RegisterNativeFree(JNIEnv* env, size_t bytes) { - size_t expected_size; - { - MutexLock mu(Thread::Current(), native_histogram_lock_); - native_free_histogram_.AddValue(bytes); +void Heap::RegisterNativeAllocation(JNIEnv* env, size_t bytes) { + // See the REDESIGN section of go/understanding-register-native-allocation + // for an explanation of how RegisterNativeAllocation works. + size_t new_value = bytes + new_native_bytes_allocated_.FetchAndAddRelaxed(bytes); + if (new_value > NativeAllocationBlockingGcWatermark()) { + // Wait for a new GC to finish and finalizers to run, because the + // allocation rate is too high. + Thread* self = ThreadForEnv(env); + WaitForGcToComplete(kGcCauseForNativeAlloc, self); + TriggerGcForNativeAlloc(self); + WaitForGcToComplete(kGcCauseForNativeAlloc, self); + RunFinalization(env, kNativeAllocationFinalizeTimeout); + CHECK(!env->ExceptionCheck()); + } else if (new_value > NativeAllocationGcWatermark() && !IsGCRequestPending()) { + // Trigger another GC because there have been enough native bytes + // allocated since the last GC. + TriggerGcForNativeAlloc(ThreadForEnv(env)); } +} + +void Heap::RegisterNativeFree(JNIEnv*, size_t bytes) { + // Take the bytes freed out of new_native_bytes_allocated_ first. If + // new_native_bytes_allocated_ reaches zero, take the remaining bytes freed + // out of old_native_bytes_allocated_ to ensure all freed bytes are + // accounted for. + size_t allocated; + size_t new_freed_bytes; do { - expected_size = native_bytes_allocated_.LoadRelaxed(); - if (UNLIKELY(bytes > expected_size)) { - ScopedObjectAccess soa(env); - env->ThrowNew(WellKnownClasses::java_lang_RuntimeException, - StringPrintf("Attempted to free %zd native bytes with only %zd native bytes " - "registered as allocated", bytes, expected_size).c_str()); - break; - } - } while (!native_bytes_allocated_.CompareExchangeWeakRelaxed(expected_size, - expected_size - bytes)); + allocated = new_native_bytes_allocated_.LoadRelaxed(); + new_freed_bytes = std::min(allocated, bytes); + } while (!new_native_bytes_allocated_.CompareExchangeWeakRelaxed(allocated, + allocated - new_freed_bytes)); + if (new_freed_bytes < bytes) { + old_native_bytes_allocated_.FetchAndSubRelaxed(bytes - new_freed_bytes); + } } size_t Heap::GetTotalMemory() const { diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h index 3a8e29b08..b2a414293 100644 --- a/runtime/gc/heap.h +++ b/runtime/gc/heap.h @@ -260,9 +260,9 @@ class Heap { REQUIRES_SHARED(Locks::mutator_lock_); void RegisterNativeAllocation(JNIEnv* env, size_t bytes) - REQUIRES(!*gc_complete_lock_, !*pending_task_lock_, !native_histogram_lock_); + REQUIRES(!*gc_complete_lock_, !*pending_task_lock_); void RegisterNativeFree(JNIEnv* env, size_t bytes) - REQUIRES(!*gc_complete_lock_, !*pending_task_lock_, !native_histogram_lock_); + REQUIRES(!*gc_complete_lock_, !*pending_task_lock_); // Change the allocator, updates entrypoints. void ChangeAllocator(AllocatorType allocator) @@ -562,7 +562,7 @@ class Heap { space::Space* FindSpaceFromAddress(const void* ptr) const REQUIRES_SHARED(Locks::mutator_lock_); - void DumpForSigQuit(std::ostream& os) REQUIRES(!*gc_complete_lock_, !native_histogram_lock_); + void DumpForSigQuit(std::ostream& os) REQUIRES(!*gc_complete_lock_); // Do a pending collector transition. void DoPendingCollectorTransition() REQUIRES(!*gc_complete_lock_, !*pending_task_lock_); @@ -679,7 +679,7 @@ class Heap { // GC performance measuring void DumpGcPerformanceInfo(std::ostream& os) - REQUIRES(!*gc_complete_lock_, !native_histogram_lock_); + REQUIRES(!*gc_complete_lock_); void ResetGcPerformanceInfo() REQUIRES(!*gc_complete_lock_); // Thread pool. @@ -979,10 +979,6 @@ class Heap { void PostGcVerificationPaused(collector::GarbageCollector* gc) REQUIRES(Locks::mutator_lock_, !*gc_complete_lock_); - // Update the watermark for the native allocated bytes based on the current number of native - // bytes allocated and the target utilization ratio. - void UpdateMaxNativeFootprint(); - // Find a collector based on GC type. collector::GarbageCollector* FindCollectorByGcType(collector::GcType gc_type); @@ -1066,6 +1062,36 @@ class Heap { REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(!*gc_complete_lock_, !*pending_task_lock_, !*backtrace_lock_); + collector::GcType NonStickyGcType() const { + return HasZygoteSpace() ? collector::kGcTypePartial : collector::kGcTypeFull; + } + + // Helper function for RegisterNativeAllocation. Request a non-sticky GC for + // NativeAlloc if one isn't currently running. The GC is performed in place + // if the garbage collector is not concurrent. + void TriggerGcForNativeAlloc(Thread* self) REQUIRES(!*gc_complete_lock_, !*pending_task_lock_); + + // How large new_native_bytes_allocated_ can grow before we trigger a new + // GC. + ALWAYS_INLINE size_t NativeAllocationGcWatermark() const { + // Reuse max_free_ for the native allocation gc watermark, so that the + // native heap is treated in the same way as the Java heap in the case + // where the gc watermark update would exceed max_free_. Using max_free_ + // instead of the target utilization means the watermark doesn't depend on + // the current number of registered native allocations. + return max_free_; + } + + // How large new_native_bytes_allocated_ can grow while GC is in progress + // before we block the allocating thread to allow GC to catch up. + ALWAYS_INLINE size_t NativeAllocationBlockingGcWatermark() const { + // Historically the native allocations were bounded by growth_limit_. This + // uses that same value, dividing growth_limit_ by 2 to account for + // the fact that now the bound is relative to the number of retained + // registered native allocations rather than absolute. + return growth_limit_ / 2; + } + // All-known continuous spaces, where objects lie within fixed bounds. std::vector continuous_spaces_ GUARDED_BY(Locks::mutator_lock_); @@ -1184,12 +1210,6 @@ class Heap { // a GC should be triggered. size_t max_allowed_footprint_; - // The watermark at which a concurrent GC is requested by registerNativeAllocation. - size_t native_footprint_gc_watermark_; - - // Whether or not we need to run finalizers in the next native allocation. - bool native_need_to_run_finalization_; - // When num_bytes_allocated_ exceeds this amount then a concurrent GC should be requested so that // it completes ahead of an allocation failing. size_t concurrent_start_bytes_; @@ -1203,13 +1223,18 @@ class Heap { // Number of bytes allocated. Adjusted after each allocation and free. Atomic num_bytes_allocated_; - // Bytes which are allocated and managed by native code but still need to be accounted for. - Atomic native_bytes_allocated_; - - // Native allocation stats. - Mutex native_histogram_lock_; - Histogram native_allocation_histogram_; - Histogram native_free_histogram_; + // Number of registered native bytes allocated since the last time GC was + // triggered. Adjusted after each RegisterNativeAllocation and + // RegisterNativeFree. Used to determine when to trigger GC for native + // allocations. + // See the REDESIGN section of go/understanding-register-native-allocation. + Atomic new_native_bytes_allocated_; + + // Number of registered native bytes allocated prior to the last time GC was + // triggered, for debugging purposes. The current number of registered + // native bytes is determined by taking the sum of + // old_native_bytes_allocated_ and new_native_bytes_allocated_. + Atomic old_native_bytes_allocated_; // Number of bytes freed by thread local buffer revokes. This will // cancel out the ahead-of-time bulk counting of bytes allocated in diff --git a/test/004-NativeAllocations/src/Main.java b/test/004-NativeAllocations/src/Main.java index 92f4e21f4..871275512 100644 --- a/test/004-NativeAllocations/src/Main.java +++ b/test/004-NativeAllocations/src/Main.java @@ -16,6 +16,7 @@ import java.lang.reflect.*; import java.lang.Runtime; +import dalvik.system.VMRuntime; public class Main { static Object nativeLock = new Object(); @@ -33,10 +34,19 @@ public class Main { NativeAllocation(int bytes, boolean testingDeadlock) throws Exception { this.bytes = bytes; register_native_allocation.invoke(runtime, bytes); + + // Register native allocation can only provide guarantees bounding + // the maximum outstanding allocations if finalizers don't time + // out. In case finalizers have timed out, wait longer for them + // now to complete so we can test the guarantees. + if (!testingDeadlock) { + VMRuntime.runFinalization(0); + } + synchronized (nativeLock) { if (!testingDeadlock) { nativeBytes += bytes; - if (nativeBytes > maxMem) { + if (nativeBytes > 2 * maxMem) { throw new OutOfMemoryError(); } } -- 2.11.0