From 21328a15e15005815efc843e774ac6974e94d4d8 Mon Sep 17 00:00:00 2001 From: Mathieu Chartier Date: Fri, 22 Jul 2016 10:47:45 -0700 Subject: [PATCH] Improve CC handling for immune objects Currently we reduce ram for immune objects by racing agianst the mutators to try and finish processing them before the mutators change many objects to gray. However there is still a window of time where the mutator can dirty immune pages by changing the lock words to gray. These pages remain dirty for the lifetime of the app. This CL changes uses the FlipCallback pause to gray all of the immune objects that have a dirty card. Once these objects are all gray we don't to gray any more objects in the immune spaces since these objects are the only ones that may reference non immune objects. Also only scan objects that are gray when scanning immune spaces to reduce scanning time. System wide PSS after boot on N9, before: 61668 kB: .art mmap 11249 kB: .Zygote After: 36013 kB: .art mmap 12251 kB: .Zygote Results are better than demonstrated since there are more apps running after. Maps PSS / Private Dirty, before: .art mmap 3703 3116 .Zygote 577 480 After: .art mmap 1655 1092 .Zygote 476 392 System server before: .art mmap 4453 3956 .Zygote 849 780 After: .art mmap 2326 1748 .Zygote 640 564 EAAC: Before: ScanImmuneSpaces takes 669.434ms GC time Scores: 718, 761, 753 average 744 GC time: 4.2s, 4.35s, 4.3s average 4.28s After: ScanImmuneSpaces takes 138.328ms GC time Scores: 731, 730, 704 average 722 GC time: 3.92s, 3.83s, 3.85s average 3.87s Additional GC pause time is 285us on Maps on N9. TODO: Reduce this pause time. Test: N9 booting, test-art-host, EAAC all run with CC Bug: 29516968 Bug: 12687968 Change-Id: I584b10d017547b321f33eb23fb5d64372af6f69c --- runtime/gc/accounting/mod_union_table.cc | 53 +++++++++ runtime/gc/accounting/mod_union_table.h | 14 +++ runtime/gc/collector/concurrent_copying.cc | 176 +++++++++++++++++++++++++++-- runtime/gc/collector/concurrent_copying.h | 8 ++ 4 files changed, 241 insertions(+), 10 deletions(-) diff --git a/runtime/gc/accounting/mod_union_table.cc b/runtime/gc/accounting/mod_union_table.cc index 4e40aea58..4e83913ad 100644 --- a/runtime/gc/accounting/mod_union_table.cc +++ b/runtime/gc/accounting/mod_union_table.cc @@ -318,6 +318,18 @@ class ModUnionCheckReferences { const std::set& references_; }; +class EmptyMarkObjectVisitor : public MarkObjectVisitor { + public: + mirror::Object* MarkObject(mirror::Object* obj) OVERRIDE {return obj;} + void MarkHeapReference(mirror::HeapReference*) OVERRIDE {} +}; + +void ModUnionTable::FilterCards() { + EmptyMarkObjectVisitor visitor; + // Use empty visitor since filtering is automatically done by UpdateAndMarkReferences. + UpdateAndMarkReferences(&visitor); +} + void ModUnionTableReferenceCache::Verify() { // Start by checking that everything in the mod union table is marked. for (const auto& ref_pair : references_) { @@ -364,6 +376,31 @@ void ModUnionTableReferenceCache::Dump(std::ostream& os) { } } +void ModUnionTableReferenceCache::VisitObjects(ObjectCallback* callback, void* arg) { + CardTable* const card_table = heap_->GetCardTable(); + ContinuousSpaceBitmap* live_bitmap = space_->GetLiveBitmap(); + for (uint8_t* card : cleared_cards_) { + uintptr_t start = reinterpret_cast(card_table->AddrFromCard(card)); + uintptr_t end = start + CardTable::kCardSize; + live_bitmap->VisitMarkedRange(start, + end, + [this, callback, arg](mirror::Object* obj) { + callback(obj, arg); + }); + } + // This may visit the same card twice, TODO avoid this. + for (const auto& pair : references_) { + const uint8_t* card = pair.first; + uintptr_t start = reinterpret_cast(card_table->AddrFromCard(card)); + uintptr_t end = start + CardTable::kCardSize; + live_bitmap->VisitMarkedRange(start, + end, + [this, callback, arg](mirror::Object* obj) { + callback(obj, arg); + }); + } +} + void ModUnionTableReferenceCache::UpdateAndMarkReferences(MarkObjectVisitor* visitor) { CardTable* const card_table = heap_->GetCardTable(); std::vector*> cards_references; @@ -502,6 +539,22 @@ void ModUnionTableCardCache::UpdateAndMarkReferences(MarkObjectVisitor* visitor) 0, RoundUp(space_->Size(), CardTable::kCardSize) / CardTable::kCardSize, bit_visitor); } +void ModUnionTableCardCache::VisitObjects(ObjectCallback* callback, void* arg) { + card_bitmap_->VisitSetBits( + 0, + RoundUp(space_->Size(), CardTable::kCardSize) / CardTable::kCardSize, + [this, callback, arg](size_t bit_index) { + const uintptr_t start = card_bitmap_->AddrFromBitIndex(bit_index); + DCHECK(space_->HasAddress(reinterpret_cast(start))) + << start << " " << *space_; + space_->GetLiveBitmap()->VisitMarkedRange(start, + start + CardTable::kCardSize, + [this, callback, arg](mirror::Object* obj) { + callback(obj, arg); + }); + }); +} + void ModUnionTableCardCache::Dump(std::ostream& os) { os << "ModUnionTable dirty cards: ["; // TODO: Find cleaner way of doing this. diff --git a/runtime/gc/accounting/mod_union_table.h b/runtime/gc/accounting/mod_union_table.h index a7a4246fc..bf665c50d 100644 --- a/runtime/gc/accounting/mod_union_table.h +++ b/runtime/gc/accounting/mod_union_table.h @@ -68,6 +68,9 @@ class ModUnionTable { // spaces which are stored in the mod-union table. virtual void UpdateAndMarkReferences(MarkObjectVisitor* visitor) = 0; + // Visit all of the objects that may contain references to other spaces. + virtual void VisitObjects(ObjectCallback* callback, void* arg) = 0; + // Verification, sanity checks that we don't have clean cards which conflict with out cached data // for said cards. Exclusive lock is required since verify sometimes uses // SpaceBitmap::VisitMarkedRange and VisitMarkedRange can't know if the callback will modify the @@ -78,6 +81,9 @@ class ModUnionTable { // doesn't need to be aligned. virtual bool ContainsCardFor(uintptr_t addr) = 0; + // Filter out cards that don't need to be marked. Automatically done with UpdateAndMarkReferences. + void FilterCards(); + virtual void Dump(std::ostream& os) = 0; space::ContinuousSpace* GetSpace() { @@ -115,6 +121,10 @@ class ModUnionTableReferenceCache : public ModUnionTable { SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(Locks::heap_bitmap_lock_); + virtual void VisitObjects(ObjectCallback* callback, void* arg) OVERRIDE + REQUIRES(Locks::heap_bitmap_lock_) + SHARED_REQUIRES(Locks::mutator_lock_); + // Exclusive lock is required since verify uses SpaceBitmap::VisitMarkedRange and // VisitMarkedRange can't know if the callback will modify the bitmap or not. void Verify() OVERRIDE @@ -156,6 +166,10 @@ class ModUnionTableCardCache : public ModUnionTable { REQUIRES(Locks::heap_bitmap_lock_) SHARED_REQUIRES(Locks::mutator_lock_); + virtual void VisitObjects(ObjectCallback* callback, void* arg) OVERRIDE + REQUIRES(Locks::heap_bitmap_lock_) + SHARED_REQUIRES(Locks::mutator_lock_); + // Nothing to verify. virtual void Verify() OVERRIDE {} diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc index d2d2f234a..d413a5053 100644 --- a/runtime/gc/collector/concurrent_copying.cc +++ b/runtime/gc/collector/concurrent_copying.cc @@ -22,6 +22,7 @@ #include "base/systrace.h" #include "debugger.h" #include "gc/accounting/heap_bitmap-inl.h" +#include "gc/accounting/mod_union_table-inl.h" #include "gc/accounting/space_bitmap-inl.h" #include "gc/reference_processor.h" #include "gc/space/image_space.h" @@ -40,6 +41,12 @@ namespace gc { namespace collector { static constexpr size_t kDefaultGcMarkStackSize = 2 * MB; +// If kGrayDirtyImmuneObjects is true then we gray dirty objects in the GC pause to prevent dirty +// pages. +static constexpr bool kGrayDirtyImmuneObjects = true; +// If kFilterModUnionCards then we attempt to filter cards that don't need to be dirty in the mod +// union table. Disabled since it does not seem to help the pause much. +static constexpr bool kFilterModUnionCards = kIsDebugBuild; ConcurrentCopying::ConcurrentCopying(Heap* heap, const std::string& name_prefix, @@ -315,12 +322,87 @@ class ConcurrentCopying::FlipCallback : public Closure { TimingLogger::ScopedTiming split2("(Paused)VisitTransactionRoots", cc->GetTimings()); Runtime::Current()->VisitTransactionRoots(cc); } + if (kUseBakerReadBarrier && kGrayDirtyImmuneObjects) { + cc->GrayAllDirtyImmuneObjects(); + if (kIsDebugBuild) { + // Check that all non-gray immune objects only refernce immune objects. + cc->VerifyGrayImmuneObjects(); + } + } } private: ConcurrentCopying* const concurrent_copying_; }; +class ConcurrentCopying::VerifyGrayImmuneObjectsVisitor { + public: + explicit VerifyGrayImmuneObjectsVisitor(ConcurrentCopying* collector) + : collector_(collector) {} + + void operator()(mirror::Object* obj, MemberOffset offset, bool /* is_static */) + const ALWAYS_INLINE SHARED_REQUIRES(Locks::mutator_lock_) + SHARED_REQUIRES(Locks::heap_bitmap_lock_) { + CheckReference(obj->GetFieldObject(offset), + obj, offset); + } + + void operator()(mirror::Class* klass, mirror::Reference* ref) const + SHARED_REQUIRES(Locks::mutator_lock_) ALWAYS_INLINE { + CHECK(klass->IsTypeOfReferenceClass()); + CheckReference(ref->GetReferent(), + ref, + mirror::Reference::ReferentOffset()); + } + + void VisitRootIfNonNull(mirror::CompressedReference* root) const + ALWAYS_INLINE + SHARED_REQUIRES(Locks::mutator_lock_) { + if (!root->IsNull()) { + VisitRoot(root); + } + } + + void VisitRoot(mirror::CompressedReference* root) const + ALWAYS_INLINE + SHARED_REQUIRES(Locks::mutator_lock_) { + CheckReference(root->AsMirrorPtr(), nullptr, MemberOffset(0)); + } + + private: + ConcurrentCopying* const collector_; + + void CheckReference(mirror::Object* ref, mirror::Object* holder, MemberOffset offset) const + SHARED_REQUIRES(Locks::mutator_lock_) { + if (ref != nullptr) { + CHECK(collector_->immune_spaces_.ContainsObject(ref)) + << "Non gray object references non immune object "<< ref << " " << PrettyTypeOf(ref) + << " in holder " << holder << " " << PrettyTypeOf(holder) << " offset=" + << offset.Uint32Value(); + } + } +}; + +void ConcurrentCopying::VerifyGrayImmuneObjects() { + TimingLogger::ScopedTiming split(__FUNCTION__, GetTimings()); + for (auto& space : immune_spaces_.GetSpaces()) { + DCHECK(space->IsImageSpace() || space->IsZygoteSpace()); + accounting::ContinuousSpaceBitmap* live_bitmap = space->GetLiveBitmap(); + VerifyGrayImmuneObjectsVisitor visitor(this); + live_bitmap->VisitMarkedRange(reinterpret_cast(space->Begin()), + reinterpret_cast(space->Limit()), + [&visitor](mirror::Object* obj) + SHARED_REQUIRES(Locks::mutator_lock_) { + // If an object is not gray, it should only have references to things in the immune spaces. + if (obj->GetReadBarrierPointer() != ReadBarrier::GrayPtr()) { + obj->VisitReferences(visitor, visitor); + } + }); + } +} + // Switch threads that from from-space to to-space refs. Forward/mark the thread roots. void ConcurrentCopying::FlipThreadRoots() { TimingLogger::ScopedTiming split("FlipThreadRoots", GetTimings()); @@ -350,6 +432,52 @@ void ConcurrentCopying::FlipThreadRoots() { } } +class ConcurrentCopying::GrayImmuneObjectVisitor { + public: + explicit GrayImmuneObjectVisitor() {} + + ALWAYS_INLINE void operator()(mirror::Object* obj) const SHARED_REQUIRES(Locks::mutator_lock_) { + if (kUseBakerReadBarrier) { + if (kIsDebugBuild) { + Locks::mutator_lock_->AssertExclusiveHeld(Thread::Current()); + } + obj->SetReadBarrierPointer(ReadBarrier::GrayPtr()); + } + } + + static void Callback(mirror::Object* obj, void* arg) SHARED_REQUIRES(Locks::mutator_lock_) { + reinterpret_cast(arg)->operator()(obj); + } +}; + +void ConcurrentCopying::GrayAllDirtyImmuneObjects() { + TimingLogger::ScopedTiming split(__FUNCTION__, GetTimings()); + gc::Heap* const heap = Runtime::Current()->GetHeap(); + accounting::CardTable* const card_table = heap->GetCardTable(); + WriterMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_); + for (space::ContinuousSpace* space : immune_spaces_.GetSpaces()) { + DCHECK(space->IsImageSpace() || space->IsZygoteSpace()); + GrayImmuneObjectVisitor visitor; + accounting::ModUnionTable* table = heap->FindModUnionTableFromSpace(space); + // Mark all the objects on dirty cards since these may point to objects in other space. + // Once these are marked, the GC will eventually clear them later. + // Table is non null for boot image and zygote spaces. It is only null for application image + // spaces. + if (table != nullptr) { + // TODO: Add preclean outside the pause. + table->ClearCards(); + table->VisitObjects(GrayImmuneObjectVisitor::Callback, &visitor); + } else { + // TODO: Consider having a mark bitmap for app image spaces and avoid scanning during the + // pause because app image spaces are all dirty pages anyways. + card_table->Scan(space->GetMarkBitmap(), space->Begin(), space->End(), visitor); + } + } + // Since all of the objects that may point to other spaces are marked, we can avoid all the read + // barriers in the immune spaces. + updated_all_immune_objects_.StoreRelaxed(true); +} + void ConcurrentCopying::SwapStacks() { heap_->SwapStacks(); } @@ -393,7 +521,17 @@ class ConcurrentCopying::ImmuneSpaceScanObjVisitor { : collector_(cc) {} void operator()(mirror::Object* obj) const SHARED_REQUIRES(Locks::mutator_lock_) { - collector_->ScanImmuneObject(obj); + if (kUseBakerReadBarrier && kGrayDirtyImmuneObjects) { + if (obj->GetReadBarrierPointer() == ReadBarrier::GrayPtr()) { + collector_->ScanImmuneObject(obj); + // Done scanning the object, go back to white. + bool success = obj->AtomicSetReadBarrierPointer(ReadBarrier::GrayPtr(), + ReadBarrier::WhitePtr()); + CHECK(success); + } + } else { + collector_->ScanImmuneObject(obj); + } } private: @@ -415,13 +553,16 @@ void ConcurrentCopying::MarkingPhase() { if (kUseBakerReadBarrier) { gc_grays_immune_objects_ = false; } - for (auto& space : immune_spaces_.GetSpaces()) { - DCHECK(space->IsImageSpace() || space->IsZygoteSpace()); - accounting::ContinuousSpaceBitmap* live_bitmap = space->GetLiveBitmap(); - ImmuneSpaceScanObjVisitor visitor(this); - live_bitmap->VisitMarkedRange(reinterpret_cast(space->Begin()), - reinterpret_cast(space->Limit()), - visitor); + { + TimingLogger::ScopedTiming split2("ScanImmuneSpaces", GetTimings()); + for (auto& space : immune_spaces_.GetSpaces()) { + DCHECK(space->IsImageSpace() || space->IsZygoteSpace()); + accounting::ContinuousSpaceBitmap* live_bitmap = space->GetLiveBitmap(); + ImmuneSpaceScanObjVisitor visitor(this); + live_bitmap->VisitMarkedRange(reinterpret_cast(space->Begin()), + reinterpret_cast(space->Limit()), + visitor); + } } if (kUseBakerReadBarrier) { // This release fence makes the field updates in the above loop visible before allowing mutator @@ -2052,8 +2193,23 @@ void ConcurrentCopying::FinishPhase() { } { ReaderMutexLock mu(self, *Locks::mutator_lock_); - WriterMutexLock mu2(self, *Locks::heap_bitmap_lock_); - heap_->ClearMarkedObjects(); + { + WriterMutexLock mu2(self, *Locks::heap_bitmap_lock_); + heap_->ClearMarkedObjects(); + } + if (kUseBakerReadBarrier && kFilterModUnionCards) { + TimingLogger::ScopedTiming split("FilterModUnionCards", GetTimings()); + ReaderMutexLock mu2(self, *Locks::heap_bitmap_lock_); + gc::Heap* const heap = Runtime::Current()->GetHeap(); + for (space::ContinuousSpace* space : immune_spaces_.GetSpaces()) { + DCHECK(space->IsImageSpace() || space->IsZygoteSpace()); + accounting::ModUnionTable* table = heap->FindModUnionTableFromSpace(space); + // Filter out cards that don't need to be set. + if (table != nullptr) { + table->FilterCards(); + } + } + } } if (measure_read_barrier_slow_path_) { MutexLock mu(self, rb_slow_path_histogram_lock_); diff --git a/runtime/gc/collector/concurrent_copying.h b/runtime/gc/collector/concurrent_copying.h index 6a8d052cb..32b05fa1f 100644 --- a/runtime/gc/collector/concurrent_copying.h +++ b/runtime/gc/collector/concurrent_copying.h @@ -152,6 +152,12 @@ class ConcurrentCopying : public GarbageCollector { bool ProcessMarkStackOnce() SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!mark_stack_lock_); void ProcessMarkStackRef(mirror::Object* to_ref) SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!mark_stack_lock_); + void GrayAllDirtyImmuneObjects() + REQUIRES(Locks::mutator_lock_) + REQUIRES(!mark_stack_lock_); + void VerifyGrayImmuneObjects() + REQUIRES(Locks::mutator_lock_) + REQUIRES(!mark_stack_lock_); size_t ProcessThreadLocalMarkStacks(bool disable_weak_ref_access) SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!mark_stack_lock_); void RevokeThreadLocalMarkStacks(bool disable_weak_ref_access) @@ -294,12 +300,14 @@ class ConcurrentCopying : public GarbageCollector { class ComputeUnevacFromSpaceLiveRatioVisitor; class DisableMarkingCheckpoint; class FlipCallback; + class GrayImmuneObjectVisitor; class ImmuneSpaceScanObjVisitor; class LostCopyVisitor; class RefFieldsVisitor; class RevokeThreadLocalMarkStackCheckpoint; class ScopedGcGraysImmuneObjects; class ThreadFlipVisitor; + class VerifyGrayImmuneObjectsVisitor; class VerifyNoFromSpaceRefsFieldVisitor; class VerifyNoFromSpaceRefsObjectVisitor; class VerifyNoFromSpaceRefsVisitor; -- 2.11.0