From 36bdbd2bf2ee36662f700b63474b546a7abecfa3 Mon Sep 17 00:00:00 2001 From: Richard Uhler Date: Tue, 24 Jan 2017 14:17:05 +0000 Subject: [PATCH] Revert "Redesign implementation of RegisterNativeAllocation." This reverts commit 6bfc37d875b2556b3f1d95b00a785a09dc94ccb0. Test failures in 004-NativeAllocations. Bug: 29156652 Bug: 32576211 Change-Id: Ic54c67caf913024485a5c0621857d68a6fb710fa --- runtime/atomic.h | 9 -- runtime/gc/heap.cc | 151 ++++++++++++++++++++----------- runtime/gc/heap.h | 67 +++++--------- test/004-NativeAllocations/src/Main.java | 12 +-- 4 files changed, 120 insertions(+), 119 deletions(-) diff --git a/runtime/atomic.h b/runtime/atomic.h index 45c3165b1..e2a725978 100644 --- a/runtime/atomic.h +++ b/runtime/atomic.h @@ -235,11 +235,6 @@ class PACKED(sizeof(T)) Atomic : public std::atomic { this->store(desired, std::memory_order_seq_cst); } - // Atomically replace the value with desired value. - T ExchangeRelaxed(T desired_value) { - return this->exchange(desired_value, std::memory_order_relaxed); - } - // Atomically replace the value with desired value if it matches the expected value. // Participates in total ordering of atomic operations. bool CompareExchangeStrongSequentiallyConsistent(T expected_value, T desired_value) { @@ -288,10 +283,6 @@ class PACKED(sizeof(T)) Atomic : public std::atomic { return this->fetch_sub(value, std::memory_order_seq_cst); // Return old value. } - T FetchAndSubRelaxed(const T value) { - return this->fetch_sub(value, std::memory_order_relaxed); // Return old value. - } - T FetchAndOrSequentiallyConsistent(const T value) { return this->fetch_or(value, std::memory_order_seq_cst); // Return old_value. } diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc index 522e0bfcb..34d82845d 100644 --- a/runtime/gc/heap.cc +++ b/runtime/gc/heap.cc @@ -127,6 +127,8 @@ static constexpr uint32_t kAllocSpaceBeginForDeterministicAoT = 0x40000000; // Dump the rosalloc stats on SIGQUIT. static constexpr bool kDumpRosAllocStatsOnSigQuit = false; +static constexpr size_t kNativeAllocationHistogramBuckets = 16; + // Extra added to the heap growth multiplier. Used to adjust the GC ergonomics for the read barrier // config. static constexpr double kExtraHeapGrowthMultiplier = kUseReadBarrier ? 1.0 : 0.0; @@ -192,12 +194,18 @@ Heap::Heap(size_t initial_size, capacity_(capacity), growth_limit_(growth_limit), max_allowed_footprint_(initial_size), + native_footprint_gc_watermark_(initial_size), + native_need_to_run_finalization_(false), concurrent_start_bytes_(std::numeric_limits::max()), total_bytes_freed_ever_(0), total_objects_freed_ever_(0), num_bytes_allocated_(0), - new_native_bytes_allocated_(0), - old_native_bytes_allocated_(0), + native_bytes_allocated_(0), + native_histogram_lock_("Native allocation lock"), + native_allocation_histogram_("Native allocation sizes", + 1U, + kNativeAllocationHistogramBuckets), + native_free_histogram_("Native free sizes", 1U, kNativeAllocationHistogramBuckets), num_bytes_freed_revoke_(0), verify_missing_card_marks_(false), verify_system_weaks_(false), @@ -1093,9 +1101,19 @@ void Heap::DumpGcPerformanceInfo(std::ostream& os) { rosalloc_space_->DumpStats(os); } - os << "Registered native bytes allocated: " - << old_native_bytes_allocated_.LoadRelaxed() + new_native_bytes_allocated_.LoadRelaxed() - << "\n"; + { + MutexLock mu(Thread::Current(), native_histogram_lock_); + if (native_allocation_histogram_.SampleSize() > 0u) { + os << "Histogram of native allocation "; + native_allocation_histogram_.DumpBins(os); + os << " bucket size " << native_allocation_histogram_.BucketWidth() << "\n"; + } + if (native_free_histogram_.SampleSize() > 0u) { + os << "Histogram of native free "; + native_free_histogram_.DumpBins(os); + os << " bucket size " << native_free_histogram_.BucketWidth() << "\n"; + } + } BaseMutex::DumpAll(os); } @@ -2627,13 +2645,6 @@ collector::GcType Heap::CollectGarbageInternal(collector::GcType gc_type, // Approximate heap size. ATRACE_INT("Heap size (KB)", bytes_allocated_before_gc / KB); - if (gc_type == NonStickyGcType()) { - // Move all bytes from new_native_bytes_allocated_ to - // old_native_bytes_allocated_ now that GC has been triggered, resetting - // new_native_bytes_allocated_ to zero in the process. - old_native_bytes_allocated_.FetchAndAddRelaxed(new_native_bytes_allocated_.ExchangeRelaxed(0)); - } - DCHECK_LT(gc_type, collector::kGcTypeMax); DCHECK_NE(gc_type, collector::kGcTypeNone); @@ -3493,6 +3504,18 @@ bool Heap::IsMovableObject(ObjPtr obj) const { return false; } +void Heap::UpdateMaxNativeFootprint() { + size_t native_size = native_bytes_allocated_.LoadRelaxed(); + // TODO: Tune the native heap utilization to be a value other than the java heap utilization. + size_t target_size = native_size / GetTargetHeapUtilization(); + if (target_size > native_size + max_free_) { + target_size = native_size + max_free_; + } else if (target_size < native_size + min_free_) { + target_size = native_size + min_free_; + } + native_footprint_gc_watermark_ = std::min(growth_limit_, target_size); +} + collector::GarbageCollector* Heap::FindCollectorByGcType(collector::GcType gc_type) { for (const auto& collector : garbage_collectors_) { if (collector->GetCollectorType() == collector_type_ && @@ -3529,9 +3552,11 @@ void Heap::GrowForUtilization(collector::GarbageCollector* collector_ran, target_size = bytes_allocated + delta * multiplier; target_size = std::min(target_size, bytes_allocated + adjusted_max_free); target_size = std::max(target_size, bytes_allocated + adjusted_min_free); + native_need_to_run_finalization_ = true; next_gc_type_ = collector::kGcTypeSticky; } else { - collector::GcType non_sticky_gc_type = NonStickyGcType(); + collector::GcType non_sticky_gc_type = + HasZygoteSpace() ? collector::kGcTypePartial : collector::kGcTypeFull; // Find what the next non sticky collector will be. collector::GarbageCollector* non_sticky_collector = FindCollectorByGcType(non_sticky_gc_type); // If the throughput of the current sticky GC >= throughput of the non sticky collector, then @@ -3682,7 +3707,7 @@ void Heap::ConcurrentGC(Thread* self, bool force_full) { collector::GcType next_gc_type = next_gc_type_; // If forcing full and next gc type is sticky, override with a non-sticky type. if (force_full && next_gc_type == collector::kGcTypeSticky) { - next_gc_type = NonStickyGcType(); + next_gc_type = HasZygoteSpace() ? collector::kGcTypePartial : collector::kGcTypeFull; } if (CollectGarbageInternal(next_gc_type, kGcCauseBackground, false) == collector::kGcTypeNone) { @@ -3838,51 +3863,71 @@ void Heap::RunFinalization(JNIEnv* env, uint64_t timeout) { static_cast(timeout)); } -void Heap::TriggerGcForNativeAlloc(Thread* self) { - if (!IsGCRequestPending()) { - if (IsGcConcurrent()) { - RequestConcurrentGC(self, /*force_full*/true); - } else { - CollectGarbageInternal(NonStickyGcType(), kGcCauseForNativeAlloc, false); - } - } -} - void Heap::RegisterNativeAllocation(JNIEnv* env, size_t bytes) { - // See the REDESIGN section of go/understanding-register-native-allocation - // for an explanation of how RegisterNativeAllocation works. - size_t new_value = bytes + new_native_bytes_allocated_.FetchAndAddRelaxed(bytes); - if (new_value > NativeAllocationBlockingGcWatermark()) { - // Wait for a new GC to finish and finalizers to run, because the - // allocation rate is too high. - Thread* self = ThreadForEnv(env); - WaitForGcToComplete(kGcCauseForNativeAlloc, self); - TriggerGcForNativeAlloc(self); - WaitForGcToComplete(kGcCauseForNativeAlloc, self); + Thread* self = ThreadForEnv(env); + { + MutexLock mu(self, native_histogram_lock_); + native_allocation_histogram_.AddValue(bytes); + } + if (native_need_to_run_finalization_) { RunFinalization(env, kNativeAllocationFinalizeTimeout); - CHECK(!env->ExceptionCheck()); - } else if (new_value > NativeAllocationGcWatermark() && !IsGCRequestPending()) { - // Trigger another GC because there have been enough native bytes - // allocated since the last GC. - TriggerGcForNativeAlloc(ThreadForEnv(env)); + UpdateMaxNativeFootprint(); + native_need_to_run_finalization_ = false; + } + // Total number of native bytes allocated. + size_t new_native_bytes_allocated = native_bytes_allocated_.FetchAndAddSequentiallyConsistent(bytes); + new_native_bytes_allocated += bytes; + if (new_native_bytes_allocated > native_footprint_gc_watermark_) { + collector::GcType gc_type = HasZygoteSpace() ? collector::kGcTypePartial : + collector::kGcTypeFull; + + // The second watermark is higher than the gc watermark. If you hit this it means you are + // allocating native objects faster than the GC can keep up with. + if (new_native_bytes_allocated > growth_limit_) { + if (WaitForGcToComplete(kGcCauseForNativeAlloc, self) != collector::kGcTypeNone) { + // Just finished a GC, attempt to run finalizers. + RunFinalization(env, kNativeAllocationFinalizeTimeout); + CHECK(!env->ExceptionCheck()); + // Native bytes allocated may be updated by finalization, refresh it. + new_native_bytes_allocated = native_bytes_allocated_.LoadRelaxed(); + } + // If we still are over the watermark, attempt a GC for alloc and run finalizers. + if (new_native_bytes_allocated > growth_limit_) { + CollectGarbageInternal(gc_type, kGcCauseForNativeAlloc, false); + RunFinalization(env, kNativeAllocationFinalizeTimeout); + native_need_to_run_finalization_ = false; + CHECK(!env->ExceptionCheck()); + } + // We have just run finalizers, update the native watermark since it is very likely that + // finalizers released native managed allocations. + UpdateMaxNativeFootprint(); + } else if (!IsGCRequestPending()) { + if (IsGcConcurrent()) { + RequestConcurrentGC(self, true); // Request non-sticky type. + } else { + CollectGarbageInternal(gc_type, kGcCauseForNativeAlloc, false); + } + } } } -void Heap::RegisterNativeFree(JNIEnv*, size_t bytes) { - // Take the bytes freed out of new_native_bytes_allocated_ first. If - // new_native_bytes_allocated_ reaches zero, take the remaining bytes freed - // out of old_native_bytes_allocated_ to ensure all freed bytes are - // accounted for. - size_t allocated; - size_t new_freed_bytes; - do { - allocated = new_native_bytes_allocated_.LoadRelaxed(); - new_freed_bytes = std::min(allocated, bytes); - } while (!new_native_bytes_allocated_.CompareExchangeWeakRelaxed(allocated, - allocated - new_freed_bytes)); - if (new_freed_bytes < bytes) { - old_native_bytes_allocated_.FetchAndSubRelaxed(bytes - new_freed_bytes); +void Heap::RegisterNativeFree(JNIEnv* env, size_t bytes) { + size_t expected_size; + { + MutexLock mu(Thread::Current(), native_histogram_lock_); + native_free_histogram_.AddValue(bytes); } + do { + expected_size = native_bytes_allocated_.LoadRelaxed(); + if (UNLIKELY(bytes > expected_size)) { + ScopedObjectAccess soa(env); + env->ThrowNew(WellKnownClasses::java_lang_RuntimeException, + StringPrintf("Attempted to free %zd native bytes with only %zd native bytes " + "registered as allocated", bytes, expected_size).c_str()); + break; + } + } while (!native_bytes_allocated_.CompareExchangeWeakRelaxed(expected_size, + expected_size - bytes)); } size_t Heap::GetTotalMemory() const { diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h index b2a414293..3a8e29b08 100644 --- a/runtime/gc/heap.h +++ b/runtime/gc/heap.h @@ -260,9 +260,9 @@ class Heap { REQUIRES_SHARED(Locks::mutator_lock_); void RegisterNativeAllocation(JNIEnv* env, size_t bytes) - REQUIRES(!*gc_complete_lock_, !*pending_task_lock_); + REQUIRES(!*gc_complete_lock_, !*pending_task_lock_, !native_histogram_lock_); void RegisterNativeFree(JNIEnv* env, size_t bytes) - REQUIRES(!*gc_complete_lock_, !*pending_task_lock_); + REQUIRES(!*gc_complete_lock_, !*pending_task_lock_, !native_histogram_lock_); // Change the allocator, updates entrypoints. void ChangeAllocator(AllocatorType allocator) @@ -562,7 +562,7 @@ class Heap { space::Space* FindSpaceFromAddress(const void* ptr) const REQUIRES_SHARED(Locks::mutator_lock_); - void DumpForSigQuit(std::ostream& os) REQUIRES(!*gc_complete_lock_); + void DumpForSigQuit(std::ostream& os) REQUIRES(!*gc_complete_lock_, !native_histogram_lock_); // Do a pending collector transition. void DoPendingCollectorTransition() REQUIRES(!*gc_complete_lock_, !*pending_task_lock_); @@ -679,7 +679,7 @@ class Heap { // GC performance measuring void DumpGcPerformanceInfo(std::ostream& os) - REQUIRES(!*gc_complete_lock_); + REQUIRES(!*gc_complete_lock_, !native_histogram_lock_); void ResetGcPerformanceInfo() REQUIRES(!*gc_complete_lock_); // Thread pool. @@ -979,6 +979,10 @@ class Heap { void PostGcVerificationPaused(collector::GarbageCollector* gc) REQUIRES(Locks::mutator_lock_, !*gc_complete_lock_); + // Update the watermark for the native allocated bytes based on the current number of native + // bytes allocated and the target utilization ratio. + void UpdateMaxNativeFootprint(); + // Find a collector based on GC type. collector::GarbageCollector* FindCollectorByGcType(collector::GcType gc_type); @@ -1062,36 +1066,6 @@ class Heap { REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(!*gc_complete_lock_, !*pending_task_lock_, !*backtrace_lock_); - collector::GcType NonStickyGcType() const { - return HasZygoteSpace() ? collector::kGcTypePartial : collector::kGcTypeFull; - } - - // Helper function for RegisterNativeAllocation. Request a non-sticky GC for - // NativeAlloc if one isn't currently running. The GC is performed in place - // if the garbage collector is not concurrent. - void TriggerGcForNativeAlloc(Thread* self) REQUIRES(!*gc_complete_lock_, !*pending_task_lock_); - - // How large new_native_bytes_allocated_ can grow before we trigger a new - // GC. - ALWAYS_INLINE size_t NativeAllocationGcWatermark() const { - // Reuse max_free_ for the native allocation gc watermark, so that the - // native heap is treated in the same way as the Java heap in the case - // where the gc watermark update would exceed max_free_. Using max_free_ - // instead of the target utilization means the watermark doesn't depend on - // the current number of registered native allocations. - return max_free_; - } - - // How large new_native_bytes_allocated_ can grow while GC is in progress - // before we block the allocating thread to allow GC to catch up. - ALWAYS_INLINE size_t NativeAllocationBlockingGcWatermark() const { - // Historically the native allocations were bounded by growth_limit_. This - // uses that same value, dividing growth_limit_ by 2 to account for - // the fact that now the bound is relative to the number of retained - // registered native allocations rather than absolute. - return growth_limit_ / 2; - } - // All-known continuous spaces, where objects lie within fixed bounds. std::vector continuous_spaces_ GUARDED_BY(Locks::mutator_lock_); @@ -1210,6 +1184,12 @@ class Heap { // a GC should be triggered. size_t max_allowed_footprint_; + // The watermark at which a concurrent GC is requested by registerNativeAllocation. + size_t native_footprint_gc_watermark_; + + // Whether or not we need to run finalizers in the next native allocation. + bool native_need_to_run_finalization_; + // When num_bytes_allocated_ exceeds this amount then a concurrent GC should be requested so that // it completes ahead of an allocation failing. size_t concurrent_start_bytes_; @@ -1223,18 +1203,13 @@ class Heap { // Number of bytes allocated. Adjusted after each allocation and free. Atomic num_bytes_allocated_; - // Number of registered native bytes allocated since the last time GC was - // triggered. Adjusted after each RegisterNativeAllocation and - // RegisterNativeFree. Used to determine when to trigger GC for native - // allocations. - // See the REDESIGN section of go/understanding-register-native-allocation. - Atomic new_native_bytes_allocated_; - - // Number of registered native bytes allocated prior to the last time GC was - // triggered, for debugging purposes. The current number of registered - // native bytes is determined by taking the sum of - // old_native_bytes_allocated_ and new_native_bytes_allocated_. - Atomic old_native_bytes_allocated_; + // Bytes which are allocated and managed by native code but still need to be accounted for. + Atomic native_bytes_allocated_; + + // Native allocation stats. + Mutex native_histogram_lock_; + Histogram native_allocation_histogram_; + Histogram native_free_histogram_; // Number of bytes freed by thread local buffer revokes. This will // cancel out the ahead-of-time bulk counting of bytes allocated in diff --git a/test/004-NativeAllocations/src/Main.java b/test/004-NativeAllocations/src/Main.java index 871275512..92f4e21f4 100644 --- a/test/004-NativeAllocations/src/Main.java +++ b/test/004-NativeAllocations/src/Main.java @@ -16,7 +16,6 @@ import java.lang.reflect.*; import java.lang.Runtime; -import dalvik.system.VMRuntime; public class Main { static Object nativeLock = new Object(); @@ -34,19 +33,10 @@ public class Main { NativeAllocation(int bytes, boolean testingDeadlock) throws Exception { this.bytes = bytes; register_native_allocation.invoke(runtime, bytes); - - // Register native allocation can only provide guarantees bounding - // the maximum outstanding allocations if finalizers don't time - // out. In case finalizers have timed out, wait longer for them - // now to complete so we can test the guarantees. - if (!testingDeadlock) { - VMRuntime.runFinalization(0); - } - synchronized (nativeLock) { if (!testingDeadlock) { nativeBytes += bytes; - if (nativeBytes > 2 * maxMem) { + if (nativeBytes > maxMem) { throw new OutOfMemoryError(); } } -- 2.11.0