OSDN Git Service

Implement TLAB fast paths in artAllocObjectFromCode.
authorHiroshi Yamauchi <yamauchi@google.com>
Wed, 6 Aug 2014 19:41:15 +0000 (12:41 -0700)
committerHiroshi Yamauchi <yamauchi@google.com>
Wed, 6 Aug 2014 19:41:15 +0000 (12:41 -0700)
GSS/TLAB GC speedup on N4 (ms):

MemAllocTest 2963 -> 2792
BinaryTrees  2205 -> 2113

Also, measured wth -XX:IgnoreMaxFootprint to invoke GC less often
(only when the bump pointer space is filled rather than based on the
target utilization):

MemAllocTest 2707 -> 2590
BinaryTrees  2023 -> 1906

TODO: implement fast paths for array allocations.

Bug: 9986565
Change-Id: I73ff6327b229704f8ae5924ae9b747443c229841

runtime/entrypoints/entrypoint_utils-inl.h
runtime/entrypoints/quick/quick_alloc_entrypoints.cc
runtime/gc/heap.h

index cb0be04..7c73c79 100644 (file)
@@ -37,6 +37,7 @@ namespace art {
 
 // TODO: Fix no thread safety analysis when GCC can handle template specialization.
 template <const bool kAccessCheck>
+ALWAYS_INLINE
 static inline mirror::Class* CheckObjectAlloc(uint32_t type_idx,
                                               mirror::ArtMethod* method,
                                               Thread* self, bool* slow_path) {
@@ -86,6 +87,7 @@ static inline mirror::Class* CheckObjectAlloc(uint32_t type_idx,
 }
 
 // TODO: Fix no thread safety analysis when annotalysis is smarter.
+ALWAYS_INLINE
 static inline mirror::Class* CheckClassInitializedForObjectAlloc(mirror::Class* klass,
                                                                  Thread* self,
                                                                  bool* slow_path) {
@@ -116,6 +118,7 @@ static inline mirror::Class* CheckClassInitializedForObjectAlloc(mirror::Class*
 // check.
 // TODO: Fix NO_THREAD_SAFETY_ANALYSIS when GCC is smarter.
 template <bool kAccessCheck, bool kInstrumented>
+ALWAYS_INLINE
 static inline mirror::Object* AllocObjectFromCode(uint32_t type_idx,
                                                   mirror::ArtMethod* method,
                                                   Thread* self,
@@ -135,6 +138,7 @@ static inline mirror::Object* AllocObjectFromCode(uint32_t type_idx,
 // Given the context of a calling Method and a resolved class, create an instance.
 // TODO: Fix NO_THREAD_SAFETY_ANALYSIS when GCC is smarter.
 template <bool kInstrumented>
+ALWAYS_INLINE
 static inline mirror::Object* AllocObjectFromCodeResolved(mirror::Class* klass,
                                                           mirror::ArtMethod* method,
                                                           Thread* self,
@@ -157,6 +161,7 @@ static inline mirror::Object* AllocObjectFromCodeResolved(mirror::Class* klass,
 // Given the context of a calling Method and an initialized class, create an instance.
 // TODO: Fix NO_THREAD_SAFETY_ANALYSIS when GCC is smarter.
 template <bool kInstrumented>
+ALWAYS_INLINE
 static inline mirror::Object* AllocObjectFromCodeInitialized(mirror::Class* klass,
                                                              mirror::ArtMethod* method,
                                                              Thread* self,
@@ -169,6 +174,7 @@ static inline mirror::Object* AllocObjectFromCodeInitialized(mirror::Class* klas
 
 // TODO: Fix no thread safety analysis when GCC can handle template specialization.
 template <bool kAccessCheck>
+ALWAYS_INLINE
 static inline mirror::Class* CheckArrayAlloc(uint32_t type_idx,
                                              mirror::ArtMethod* method,
                                              int32_t component_count,
@@ -205,6 +211,7 @@ static inline mirror::Class* CheckArrayAlloc(uint32_t type_idx,
 // check.
 // TODO: Fix no thread safety analysis when GCC can handle template specialization.
 template <bool kAccessCheck, bool kInstrumented>
+ALWAYS_INLINE
 static inline mirror::Array* AllocArrayFromCode(uint32_t type_idx,
                                                 mirror::ArtMethod* method,
                                                 int32_t component_count,
@@ -227,6 +234,7 @@ static inline mirror::Array* AllocArrayFromCode(uint32_t type_idx,
 }
 
 template <bool kAccessCheck, bool kInstrumented>
+ALWAYS_INLINE
 static inline mirror::Array* AllocArrayFromCodeResolved(mirror::Class* klass,
                                                         mirror::ArtMethod* method,
                                                         int32_t component_count,
index 1f2713a..7d4da18 100644 (file)
 
 namespace art {
 
+static constexpr bool kUseTlabFastPath = true;
+
 #define GENERATE_ENTRYPOINTS_FOR_ALLOCATOR_INST(suffix, suffix2, instrumented_bool, allocator_type) \
 extern "C" mirror::Object* artAllocObjectFromCode ##suffix##suffix2( \
     uint32_t type_idx, mirror::ArtMethod* method, Thread* self, \
     StackReference<mirror::ArtMethod>* sp) \
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { \
+  if (kUseTlabFastPath && !instrumented_bool && allocator_type == gc::kAllocatorTypeTLAB) { \
+    mirror::Class* klass = method->GetDexCacheResolvedTypes()->GetWithoutChecks(type_idx); \
+    if (LIKELY(klass != nullptr && klass->IsInitialized() && !klass->IsFinalizable())) { \
+      size_t byte_count = klass->GetObjectSize(); \
+      byte_count = RoundUp(byte_count, gc::space::BumpPointerSpace::kAlignment); \
+      mirror::Object* obj; \
+      if (LIKELY(byte_count < self->TlabSize())) { \
+        obj = self->AllocTlab(byte_count); \
+        DCHECK(obj != nullptr) << "AllocTlab can't fail"; \
+        obj->SetClass(klass); \
+        if (kUseBakerOrBrooksReadBarrier) { \
+          if (kUseBrooksReadBarrier) { \
+            obj->SetReadBarrierPointer(obj); \
+          } \
+          obj->AssertReadBarrierPointer(); \
+        } \
+        QuasiAtomic::ThreadFenceForConstructor(); \
+        return obj; \
+      } \
+    } \
+  } \
   FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly); \
   return AllocObjectFromCode<false, instrumented_bool>(type_idx, method, self, allocator_type); \
 } \
@@ -37,6 +60,26 @@ extern "C" mirror::Object* artAllocObjectFromCodeResolved##suffix##suffix2( \
     mirror::Class* klass, mirror::ArtMethod* method, Thread* self, \
     StackReference<mirror::ArtMethod>* sp) \
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { \
+  if (kUseTlabFastPath && !instrumented_bool && allocator_type == gc::kAllocatorTypeTLAB) { \
+    if (LIKELY(klass->IsInitialized())) { \
+      size_t byte_count = klass->GetObjectSize(); \
+      byte_count = RoundUp(byte_count, gc::space::BumpPointerSpace::kAlignment); \
+      mirror::Object* obj; \
+      if (LIKELY(byte_count < self->TlabSize())) { \
+        obj = self->AllocTlab(byte_count); \
+        DCHECK(obj != nullptr) << "AllocTlab can't fail"; \
+        obj->SetClass(klass); \
+        if (kUseBakerOrBrooksReadBarrier) { \
+          if (kUseBrooksReadBarrier) { \
+            obj->SetReadBarrierPointer(obj); \
+          } \
+          obj->AssertReadBarrierPointer(); \
+        } \
+        QuasiAtomic::ThreadFenceForConstructor(); \
+        return obj; \
+      } \
+    } \
+  } \
   FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly); \
   return AllocObjectFromCodeResolved<instrumented_bool>(klass, method, self, allocator_type); \
 } \
@@ -44,6 +87,24 @@ extern "C" mirror::Object* artAllocObjectFromCodeInitialized##suffix##suffix2( \
     mirror::Class* klass, mirror::ArtMethod* method, Thread* self, \
     StackReference<mirror::ArtMethod>* sp) \
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { \
+  if (kUseTlabFastPath && !instrumented_bool && allocator_type == gc::kAllocatorTypeTLAB) { \
+    size_t byte_count = klass->GetObjectSize(); \
+    byte_count = RoundUp(byte_count, gc::space::BumpPointerSpace::kAlignment); \
+    mirror::Object* obj; \
+    if (LIKELY(byte_count < self->TlabSize())) { \
+      obj = self->AllocTlab(byte_count); \
+      DCHECK(obj != nullptr) << "AllocTlab can't fail"; \
+      obj->SetClass(klass); \
+      if (kUseBakerOrBrooksReadBarrier) { \
+        if (kUseBrooksReadBarrier) { \
+          obj->SetReadBarrierPointer(obj); \
+        } \
+        obj->AssertReadBarrierPointer(); \
+      } \
+      QuasiAtomic::ThreadFenceForConstructor(); \
+      return obj; \
+    } \
+  } \
   FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly); \
   return AllocObjectFromCodeInitialized<instrumented_bool>(klass, method, self, allocator_type); \
 } \
index 8ffadd5..a82392a 100644 (file)
@@ -662,7 +662,7 @@ class Heap {
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   template <bool kGrow>
-  bool IsOutOfMemoryOnAllocation(AllocatorType allocator_type, size_t alloc_size);
+  ALWAYS_INLINE bool IsOutOfMemoryOnAllocation(AllocatorType allocator_type, size_t alloc_size);
 
   // Returns true if the address passed in is within the address range of a continuous space.
   bool IsValidContinuousSpaceObjectAddress(const mirror::Object* obj) const