From 8261d02f9523b95013108f271b82bb157ef6f71d Mon Sep 17 00:00:00 2001 From: Mathieu Chartier Date: Mon, 8 Aug 2016 09:41:04 -0700 Subject: [PATCH] Revert "Revert "ARM64 asm for region space array allocation"" Also added missing large object check. No regression from the check N6P CC EAAC time at 1313 for 10 samples vs 1314 before reverts. Bug: 30162165 Bug: 12687968 Test: test-art-target with CC + heap poisoning This reverts commit 6ae7f3a4541e70f04243a6fe469aa3bd51e16d79. Change-Id: Ie28f652f619898d7d37eeebf3f31a88af8fac949 --- runtime/arch/arm64/quick_entrypoints_arm64.S | 189 ++++++++++++++++++++++++--- runtime/arch/quick_alloc_entrypoints.S | 35 +++-- runtime/asm_support.h | 8 ++ runtime/gc/heap.cc | 1 + runtime/gc/heap.h | 3 +- runtime/generated/asm_support_gen.h | 2 + runtime/mirror/class-inl.h | 10 +- runtime/mirror/class.h | 12 +- tools/cpp-define-generator/constant_heap.def | 25 ++++ tools/cpp-define-generator/offsets_all.def | 1 + 10 files changed, 247 insertions(+), 39 deletions(-) create mode 100644 tools/cpp-define-generator/constant_heap.def diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S index 415bb71ba..439f8d46b 100644 --- a/runtime/arch/arm64/quick_entrypoints_arm64.S +++ b/runtime/arch/arm64/quick_entrypoints_arm64.S @@ -1788,7 +1788,20 @@ END art_quick_set64_static ONE_ARG_DOWNCALL art_quick_resolve_string, artResolveStringFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER // Generate the allocation entrypoints for each allocator. -GENERATE_ALLOC_ENTRYPOINTS_FOR_EACH_ALLOCATOR +GENERATE_ALLOC_ENTRYPOINTS_FOR_NON_REGION_TLAB_ALLOCATORS +// Comment out allocators that have arm64 specific asm. +// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab, RegionTLAB) implemented in asm +// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB) +// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_tlab, RegionTLAB) +GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB) +// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_region_tlab, RegionTLAB) implemented in asm +// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_region_tlab, RegionTLAB) +GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB) +GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_region_tlab, RegionTLAB) +GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB) +GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_region_tlab, RegionTLAB) +GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region_tlab, RegionTLAB) +GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region_tlab, RegionTLAB) // A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc, RosAlloc). ENTRY art_quick_alloc_object_rosalloc @@ -1895,6 +1908,71 @@ ENTRY art_quick_alloc_object_rosalloc RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER END art_quick_alloc_object_rosalloc + +// The common fast path code for art_quick_alloc_array_region_tlab. +.macro ALLOC_ARRAY_TLAB_FAST_PATH slowPathLabel, xClass, wClass, xCount, wCount, xTemp0, wTemp0, xTemp1, wTemp1, xTemp2, wTemp2 + // Check null class + cbz \wClass, \slowPathLabel + ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED \slowPathLabel, \xClass, \wClass, \xCount, \wCount, \xTemp0, \wTemp0, \xTemp1, \wTemp1, \xTemp2, \wTemp2 +.endm + +// The common fast path code for art_quick_alloc_array_region_tlab. +.macro ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED slowPathLabel, xClass, wClass, xCount, wCount, xTemp0, wTemp0, xTemp1, wTemp1, xTemp2, wTemp2 + // Array classes are never finalizable or uninitialized, no need to check. + ldr \wTemp0, [\xClass, #MIRROR_CLASS_COMPONENT_TYPE_OFFSET] // Load component type + UNPOISON_HEAP_REF \wTemp0 + ldr \wTemp0, [\xTemp0, #MIRROR_CLASS_OBJECT_PRIMITIVE_TYPE_OFFSET] + lsr \xTemp0, \xTemp0, #PRIMITIVE_TYPE_SIZE_SHIFT_SHIFT // Component size shift is in high 16 + // bits. + // xCount is holding a 32 bit value, + // it can not overflow. + lsl \xTemp1, \xCount, \xTemp0 // Calculate data size + // Add array data offset and alignment. + add \xTemp1, \xTemp1, #(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK) +#if MIRROR_LONG_ARRAY_DATA_OFFSET != MIRROR_INT_ARRAY_DATA_OFFSET + 4 +#error Long array data offset must be 4 greater than int array data offset. +#endif + + add \xTemp0, \xTemp0, #1 // Add 4 to the length only if the + // component size shift is 3 + // (for 64 bit alignment). + and \xTemp0, \xTemp0, #4 + add \xTemp1, \xTemp1, \xTemp0 + and \xTemp1, \xTemp1, #OBJECT_ALIGNMENT_MASK_TOGGLED // Round up the object size by the + // object alignment. (addr + 7) & ~7. + // Add by 7 is done above. + + cmp \xTemp1, #MIN_LARGE_OBJECT_THRESHOLD // Possibly a large object, go slow + bhs \slowPathLabel // path. + + ldr \xTemp0, [xSELF, #THREAD_LOCAL_POS_OFFSET] // Check tlab for space, note that + // we use (end - begin) to handle + // negative size arrays. It is + // assumed that a negative size will + // always be greater unsigned than + // region size. + ldr \xTemp2, [xSELF, #THREAD_LOCAL_END_OFFSET] + sub \xTemp2, \xTemp2, \xTemp0 + cmp \xTemp1, \xTemp2 + bhi \slowPathLabel + + // "Point of no slow path". Won't go to the slow path from here on. OK to clobber x0 and x1. + // Move old thread_local_pos to x0 + // for the return value. + mov x0, \xTemp0 + add \xTemp0, \xTemp0, \xTemp1 + str \xTemp0, [xSELF, #THREAD_LOCAL_POS_OFFSET] // Store new thread_local_pos. + ldr \xTemp0, [xSELF, #THREAD_LOCAL_OBJECTS_OFFSET] // Increment thread_local_objects. + add \xTemp0, \xTemp0, #1 + str \xTemp0, [xSELF, #THREAD_LOCAL_OBJECTS_OFFSET] + POISON_HEAP_REF \wClass + str \wClass, [x0, #MIRROR_OBJECT_CLASS_OFFSET] // Store the class pointer. + str \wCount, [x0, #MIRROR_ARRAY_LENGTH_OFFSET] // Store the array length. + // Fence. + dmb ishst + ret +.endm + // The common fast path code for art_quick_alloc_object_tlab and art_quick_alloc_object_region_tlab. // // x0: type_idx/return value, x1: ArtMethod*, x2: Class*, xSELF(x19): Thread::Current @@ -1902,8 +1980,11 @@ END art_quick_alloc_object_rosalloc // Need to preserve x0 and x1 to the slow path. .macro ALLOC_OBJECT_TLAB_FAST_PATH slowPathLabel cbz x2, \slowPathLabel // Check null class - // Check class status. - ldr w3, [x2, #MIRROR_CLASS_STATUS_OFFSET] + ALLOC_OBJECT_TLAB_FAST_PATH_RESOLVED \slowPathLabel +.endm + +.macro ALLOC_OBJECT_TLAB_FAST_PATH_RESOLVED slowPathLabel + ldr w3, [x2, #MIRROR_CLASS_STATUS_OFFSET] // Check class status. cmp x3, #MIRROR_CLASS_STATUS_INITIALIZED bne \slowPathLabel // Add a fake dependence from the @@ -1916,6 +1997,10 @@ END art_quick_alloc_object_rosalloc // a load-acquire for the status). eor x3, x3, x3 add x2, x2, x3 + ALLOC_OBJECT_TLAB_FAST_PATH_INITIALIZED \slowPathLabel +.endm + +.macro ALLOC_OBJECT_TLAB_FAST_PATH_INITIALIZED slowPathLabel // Check access flags has // kAccClassIsFinalizable. ldr w3, [x2, #MIRROR_CLASS_ACCESS_FLAGS_OFFSET] @@ -1977,32 +2062,37 @@ ENTRY art_quick_alloc_object_tlab RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER END art_quick_alloc_object_tlab -// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab, RegionTLAB) -ENTRY art_quick_alloc_object_region_tlab +// The common code for art_quick_alloc_object_*region_tlab +.macro GENERATE_ALLOC_OBJECT_REGION_TLAB name, entrypoint, fast_path, is_resolved +ENTRY \name // Fast path region tlab allocation. - // x0: type_idx/return value, x1: ArtMethod*, xSELF(x19): Thread::Current + // x0: type_idx/resolved class/return value, x1: ArtMethod*, xSELF(x19): Thread::Current + // If is_resolved is 1 then x0 is the resolved type, otherwise it is the index. // x2-x7: free. #if !defined(USE_READ_BARRIER) mvn x0, xzr // Read barrier must be enabled here. ret // Return -1. #endif +.if \is_resolved + mov x2, x0 // class is actually stored in x0 already +.else ldr x2, [x1, #ART_METHOD_DEX_CACHE_TYPES_OFFSET_64] // Load dex cache resolved types array // Load the class (x2) ldr w2, [x2, x0, lsl #COMPRESSED_REFERENCE_SIZE_SHIFT] - +.endif // Most common case: GC is not marking. ldr w3, [xSELF, #THREAD_IS_GC_MARKING_OFFSET] - cbnz x3, .Lart_quick_alloc_object_region_tlab_marking -.Lart_quick_alloc_object_region_tlab_do_allocation: - ALLOC_OBJECT_TLAB_FAST_PATH .Lart_quick_alloc_object_region_tlab_slow_path -.Lart_quick_alloc_object_region_tlab_marking: + cbnz x3, .Lmarking\name +.Ldo_allocation\name: + \fast_path .Lslow_path\name +.Lmarking\name: // GC is marking, check the lock word of the class for the mark bit. // If the class is null, go slow path. The check is required to read the lock word. - cbz w2, .Lart_quick_alloc_object_region_tlab_slow_path + cbz w2, .Lslow_path\name // Class is not null, check mark bit in lock word. ldr w3, [x2, #MIRROR_OBJECT_LOCK_WORD_OFFSET] // If the bit is not zero, do the allocation. - tbnz w3, #LOCK_WORD_MARK_BIT_SHIFT, .Lart_quick_alloc_object_region_tlab_do_allocation + tbnz w3, #LOCK_WORD_MARK_BIT_SHIFT, .Ldo_allocation\name // The read barrier slow path. Mark // the class. stp x0, x1, [sp, #-32]! // Save registers (x0, x1, lr). @@ -2013,14 +2103,79 @@ ENTRY art_quick_alloc_object_region_tlab ldp x0, x1, [sp, #0] // Restore registers. ldr xLR, [sp, #16] add sp, sp, #32 - b .Lart_quick_alloc_object_region_tlab_do_allocation -.Lart_quick_alloc_object_region_tlab_slow_path: + b .Ldo_allocation\name +.Lslow_path\name: SETUP_REFS_ONLY_CALLEE_SAVE_FRAME // Save callee saves in case of GC. mov x2, xSELF // Pass Thread::Current. - bl artAllocObjectFromCodeRegionTLAB // (uint32_t type_idx, Method* method, Thread*) + bl \entrypoint // (uint32_t type_idx, Method* method, Thread*) RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER -END art_quick_alloc_object_region_tlab +END \name +.endm + +GENERATE_ALLOC_OBJECT_REGION_TLAB art_quick_alloc_object_region_tlab, artAllocObjectFromCodeRegionTLAB, ALLOC_OBJECT_TLAB_FAST_PATH, 0 +GENERATE_ALLOC_OBJECT_REGION_TLAB art_quick_alloc_object_resolved_region_tlab, artAllocObjectFromCodeResolvedRegionTLAB, ALLOC_OBJECT_TLAB_FAST_PATH_RESOLVED, 1 +GENERATE_ALLOC_OBJECT_REGION_TLAB art_quick_alloc_object_initialized_region_tlab, artAllocObjectFromCodeInitializedRegionTLAB, ALLOC_OBJECT_TLAB_FAST_PATH_INITIALIZED, 1 + +// The common code for art_quick_alloc_array_*region_tlab +.macro GENERATE_ALLOC_ARRAY_REGION_TLAB name, entrypoint, fast_path, is_resolved +ENTRY \name + // Fast path array allocation for region tlab allocation. + // x0: uint32_t type_idx + // x1: int32_t component_count + // x2: ArtMethod* method + // x3-x7: free. +#if !defined(USE_READ_BARRIER) + mvn x0, xzr // Read barrier must be enabled here. + ret // Return -1. +#endif +.if \is_resolved + mov x3, x0 + // If already resolved, class is stored in x0 +.else + ldr x3, [x2, #ART_METHOD_DEX_CACHE_TYPES_OFFSET_64] // Load dex cache resolved types array + // Load the class (x2) + ldr w3, [x3, x0, lsl #COMPRESSED_REFERENCE_SIZE_SHIFT] +.endif + // Most common case: GC is not marking. + ldr w4, [xSELF, #THREAD_IS_GC_MARKING_OFFSET] + cbnz x4, .Lmarking\name +.Ldo_allocation\name: + \fast_path .Lslow_path\name, x3, w3, x1, w1, x4, w4, x5, w5, x6, w6 +.Lmarking\name: + // GC is marking, check the lock word of the class for the mark bit. + // If the class is null, go slow path. The check is required to read the lock word. + cbz w3, .Lslow_path\name + // Class is not null, check mark bit in lock word. + ldr w4, [x3, #MIRROR_OBJECT_LOCK_WORD_OFFSET] + // If the bit is not zero, do the allocation. + tbnz w4, #LOCK_WORD_MARK_BIT_SHIFT, .Ldo_allocation\name + // The read barrier slow path. Mark + // the class. + stp x0, x1, [sp, #-32]! // Save registers (x0, x1, x2, lr). + stp x2, xLR, [sp, #16] + mov x0, x3 // Pass the class as the first param. + bl artReadBarrierMark + mov x3, x0 // Get the (marked) class back. + ldp x2, xLR, [sp, #16] + ldp x0, x1, [sp], #32 // Restore registers. + b .Ldo_allocation\name +.Lslow_path\name: + // x0: uint32_t type_idx / mirror::Class* klass (if resolved) + // x1: int32_t component_count + // x2: ArtMethod* method + // x3: Thread* self + SETUP_REFS_ONLY_CALLEE_SAVE_FRAME // save callee saves in case of GC + mov x3, xSELF // pass Thread::Current + bl \entrypoint + RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME + RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER +END \name +.endm + +GENERATE_ALLOC_ARRAY_REGION_TLAB art_quick_alloc_array_region_tlab, artAllocArrayFromCodeRegionTLAB, ALLOC_ARRAY_TLAB_FAST_PATH, 0 +// TODO: art_quick_alloc_array_resolved_region_tlab seems to not get called. Investigate compiler. +GENERATE_ALLOC_ARRAY_REGION_TLAB art_quick_alloc_array_resolved_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED, 1 /* * Called by managed code when the thread has been asked to suspend. diff --git a/runtime/arch/quick_alloc_entrypoints.S b/runtime/arch/quick_alloc_entrypoints.S index 290769b36..fa86bf408 100644 --- a/runtime/arch/quick_alloc_entrypoints.S +++ b/runtime/arch/quick_alloc_entrypoints.S @@ -87,6 +87,27 @@ GENERATE_ALLOC_ENTRYPOINTS _region_tlab_instrumented, RegionTLABInstrumented ONE_ARG_DOWNCALL art_quick_alloc_string_from_string ## c_suffix, artAllocStringFromStringFromCode ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER .macro GENERATE_ALLOC_ENTRYPOINTS_FOR_EACH_ALLOCATOR +GENERATE_ALLOC_ENTRYPOINTS_FOR_NON_REGION_TLAB_ALLOCATORS +GENERATE_ALLOC_ENTRYPOINTS_FOR_REGION_TLAB_ALLOCATOR +.endm + +.macro GENERATE_ALLOC_ENTRYPOINTS_FOR_REGION_TLAB_ALLOCATOR +// This is to be separately defined for each architecture to allow a hand-written assembly fast path. +// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab, RegionTLAB) +GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB) +GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_tlab, RegionTLAB) +GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB) +GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_region_tlab, RegionTLAB) +GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_region_tlab, RegionTLAB) +GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB) +GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_region_tlab, RegionTLAB) +GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB) +GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_region_tlab, RegionTLAB) +GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region_tlab, RegionTLAB) +GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region_tlab, RegionTLAB) +.endm + +.macro GENERATE_ALLOC_ENTRYPOINTS_FOR_NON_REGION_TLAB_ALLOCATORS GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_dlmalloc, DlMalloc) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_dlmalloc, DlMalloc) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_dlmalloc, DlMalloc) @@ -219,20 +240,6 @@ GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_region_instrumented, RegionI GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region_instrumented, RegionInstrumented) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region_instrumented, RegionInstrumented) -// This is to be separately defined for each architecture to allow a hand-written assembly fast path. -// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab, RegionTLAB) -GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB) -GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_tlab, RegionTLAB) -GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB) -GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_region_tlab, RegionTLAB) -GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_region_tlab, RegionTLAB) -GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB) -GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_region_tlab, RegionTLAB) -GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB) -GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_region_tlab, RegionTLAB) -GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region_tlab, RegionTLAB) -GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region_tlab, RegionTLAB) - GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab_instrumented, RegionTLABInstrumented) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab_instrumented, RegionTLABInstrumented) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_tlab_instrumented, RegionTLABInstrumented) diff --git a/runtime/asm_support.h b/runtime/asm_support.h index 0619af813..d4cee4445 100644 --- a/runtime/asm_support.h +++ b/runtime/asm_support.h @@ -20,6 +20,7 @@ #if defined(__cplusplus) #include "art_method.h" #include "gc/allocator/rosalloc.h" +#include "gc/heap.h" #include "jit/jit.h" #include "lock_word.h" #include "mirror/class.h" @@ -174,10 +175,17 @@ ADD_TEST_EQ(MIRROR_CLASS_ACCESS_FLAGS_OFFSET, #define MIRROR_CLASS_OBJECT_SIZE_OFFSET (100 + MIRROR_OBJECT_HEADER_SIZE) ADD_TEST_EQ(MIRROR_CLASS_OBJECT_SIZE_OFFSET, art::mirror::Class::ObjectSizeOffset().Int32Value()) +#define MIRROR_CLASS_OBJECT_PRIMITIVE_TYPE_OFFSET (104 + MIRROR_OBJECT_HEADER_SIZE) +ADD_TEST_EQ(MIRROR_CLASS_OBJECT_PRIMITIVE_TYPE_OFFSET, + art::mirror::Class::PrimitiveTypeOffset().Int32Value()) #define MIRROR_CLASS_STATUS_OFFSET (112 + MIRROR_OBJECT_HEADER_SIZE) ADD_TEST_EQ(MIRROR_CLASS_STATUS_OFFSET, art::mirror::Class::StatusOffset().Int32Value()) +#define PRIMITIVE_TYPE_SIZE_SHIFT_SHIFT 16 +ADD_TEST_EQ(PRIMITIVE_TYPE_SIZE_SHIFT_SHIFT, + static_cast(art::mirror::Class::kPrimitiveTypeSizeShiftShift)) + // Array offsets. #define MIRROR_ARRAY_LENGTH_OFFSET MIRROR_OBJECT_HEADER_SIZE ADD_TEST_EQ(MIRROR_ARRAY_LENGTH_OFFSET, art::mirror::Array::LengthOffset().Int32Value()) diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc index 5485cd233..88fbf781b 100644 --- a/runtime/gc/heap.cc +++ b/runtime/gc/heap.cc @@ -257,6 +257,7 @@ Heap::Heap(size_t initial_size, if (VLOG_IS_ON(heap) || VLOG_IS_ON(startup)) { LOG(INFO) << "Heap() entering"; } + CHECK_GE(large_object_threshold, kMinLargeObjectThreshold); ScopedTrace trace(__FUNCTION__); Runtime* const runtime = Runtime::Current(); // If we aren't the zygote, switch to the default non zygote allocator. This may update the diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h index bb0d11a1d..be8ed40e8 100644 --- a/runtime/gc/heap.h +++ b/runtime/gc/heap.h @@ -132,7 +132,8 @@ class Heap { static constexpr double kDefaultTargetUtilization = 0.5; static constexpr double kDefaultHeapGrowthMultiplier = 2.0; // Primitive arrays larger than this size are put in the large object space. - static constexpr size_t kDefaultLargeObjectThreshold = 3 * kPageSize; + static constexpr size_t kMinLargeObjectThreshold = 3 * kPageSize; + static constexpr size_t kDefaultLargeObjectThreshold = kMinLargeObjectThreshold; // Whether or not parallel GC is enabled. If not, then we never create the thread pool. static constexpr bool kDefaultEnableParallelGC = false; diff --git a/runtime/generated/asm_support_gen.h b/runtime/generated/asm_support_gen.h index c66029d32..3d3cc4e04 100644 --- a/runtime/generated/asm_support_gen.h +++ b/runtime/generated/asm_support_gen.h @@ -70,6 +70,8 @@ DEFINE_CHECK_EQ(static_cast(ART_METHOD_JNI_OFFSET_64), (static_cast(ART_METHOD_QUICK_CODE_OFFSET_32), (static_cast(art::ArtMethod:: EntryPointFromQuickCompiledCodeOffset(art::PointerSize::k32).Int32Value()))) #define ART_METHOD_QUICK_CODE_OFFSET_64 48 DEFINE_CHECK_EQ(static_cast(ART_METHOD_QUICK_CODE_OFFSET_64), (static_cast(art::ArtMethod:: EntryPointFromQuickCompiledCodeOffset(art::PointerSize::k64).Int32Value()))) +#define MIN_LARGE_OBJECT_THRESHOLD 0x3000 +DEFINE_CHECK_EQ(static_cast(MIN_LARGE_OBJECT_THRESHOLD), (static_cast(art::gc::Heap::kMinLargeObjectThreshold))) #define LOCK_WORD_STATE_SHIFT 30 DEFINE_CHECK_EQ(static_cast(LOCK_WORD_STATE_SHIFT), (static_cast(art::LockWord::kStateShift))) #define LOCK_WORD_STATE_MASK 0xc0000000 diff --git a/runtime/mirror/class-inl.h b/runtime/mirror/class-inl.h index 8f5419cd4..8ad47eb79 100644 --- a/runtime/mirror/class-inl.h +++ b/runtime/mirror/class-inl.h @@ -636,8 +636,9 @@ inline Primitive::Type Class::GetPrimitiveType() { static_assert(sizeof(Primitive::Type) == sizeof(int32_t), "art::Primitive::Type and int32_t have different sizes."); int32_t v32 = GetField32(OFFSET_OF_OBJECT_MEMBER(Class, primitive_type_)); - Primitive::Type type = static_cast(v32 & 0xFFFF); - DCHECK_EQ(static_cast(v32 >> 16), Primitive::ComponentSizeShift(type)); + Primitive::Type type = static_cast(v32 & kPrimitiveTypeMask); + DCHECK_EQ(static_cast(v32 >> kPrimitiveTypeSizeShiftShift), + Primitive::ComponentSizeShift(type)); return type; } @@ -646,8 +647,9 @@ inline size_t Class::GetPrimitiveTypeSizeShift() { static_assert(sizeof(Primitive::Type) == sizeof(int32_t), "art::Primitive::Type and int32_t have different sizes."); int32_t v32 = GetField32(OFFSET_OF_OBJECT_MEMBER(Class, primitive_type_)); - size_t size_shift = static_cast(v32 >> 16); - DCHECK_EQ(size_shift, Primitive::ComponentSizeShift(static_cast(v32 & 0xFFFF))); + size_t size_shift = static_cast(v32 >> kPrimitiveTypeSizeShiftShift); + DCHECK_EQ(size_shift, + Primitive::ComponentSizeShift(static_cast(v32 & kPrimitiveTypeMask))); return size_shift; } diff --git a/runtime/mirror/class.h b/runtime/mirror/class.h index 5c490dec3..8f6ce44ff 100644 --- a/runtime/mirror/class.h +++ b/runtime/mirror/class.h @@ -64,6 +64,12 @@ class MANAGED Class FINAL : public Object { // 2 ref instance fields.] static constexpr uint32_t kClassWalkSuper = 0xC0000000; + // Shift primitive type by kPrimitiveTypeSizeShiftShift to get the component type size shift + // Used for computing array size as follows: + // array_bytes = header_size + (elements << (primitive_type >> kPrimitiveTypeSizeShiftShift)) + static constexpr uint32_t kPrimitiveTypeSizeShiftShift = 16; + static constexpr uint32_t kPrimitiveTypeMask = (1u << kPrimitiveTypeSizeShiftShift) - 1; + // Class Status // // kStatusRetired: Class that's temporarily used till class linking time @@ -371,10 +377,10 @@ class MANAGED Class FINAL : public Object { void SetPrimitiveType(Primitive::Type new_type) SHARED_REQUIRES(Locks::mutator_lock_) { DCHECK_EQ(sizeof(Primitive::Type), sizeof(int32_t)); - int32_t v32 = static_cast(new_type); - DCHECK_EQ(v32 & 0xFFFF, v32) << "upper 16 bits aren't zero"; + uint32_t v32 = static_cast(new_type); + DCHECK_EQ(v32 & kPrimitiveTypeMask, v32) << "upper 16 bits aren't zero"; // Store the component size shift in the upper 16 bits. - v32 |= Primitive::ComponentSizeShift(new_type) << 16; + v32 |= Primitive::ComponentSizeShift(new_type) << kPrimitiveTypeSizeShiftShift; SetField32(OFFSET_OF_OBJECT_MEMBER(Class, primitive_type_), v32); } diff --git a/tools/cpp-define-generator/constant_heap.def b/tools/cpp-define-generator/constant_heap.def new file mode 100644 index 000000000..dc7673650 --- /dev/null +++ b/tools/cpp-define-generator/constant_heap.def @@ -0,0 +1,25 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Export heap values. + +#if defined(DEFINE_INCLUDE_DEPENDENCIES) +#include "gc/heap.h" +#endif + +// Size of references to the heap on the stack. +DEFINE_EXPR(MIN_LARGE_OBJECT_THRESHOLD, size_t, art::gc::Heap::kMinLargeObjectThreshold) + diff --git a/tools/cpp-define-generator/offsets_all.def b/tools/cpp-define-generator/offsets_all.def index 01e4d5b0c..d2d877710 100644 --- a/tools/cpp-define-generator/offsets_all.def +++ b/tools/cpp-define-generator/offsets_all.def @@ -48,6 +48,7 @@ // TODO: MIRROR_*_ARRAY offsets (depends on header size) // TODO: MIRROR_STRING offsets (depends on header size) #include "offset_dexcache.def" +#include "constant_heap.def" #include "constant_lockword.def" #include "constant_globals.def" #include "constant_rosalloc.def" -- 2.11.0