From 45aa598cd1773f5eb1705dec13bea059238e054d Mon Sep 17 00:00:00 2001 From: David Srbecky Date: Fri, 18 Mar 2016 02:15:09 +0000 Subject: [PATCH] Deduplicate stack masks The stack masks repeat often enough so that it is worth deduplicating them. Oat size for a large app: 98143600 -> 96722288 (-1.44%) Bug: 34621054 Test: test-art-host Change-Id: If73d51e46066357049d5be2e406ae9a32b7ff1f4 --- compiler/optimizing/stack_map_stream.cc | 81 ++++++++++++++++++++-------- compiler/optimizing/stack_map_stream.h | 6 +++ compiler/optimizing/stack_map_test.cc | 65 ++++++++++++++--------- oatdump/oatdump.cc | 29 ++++------ runtime/check_reference_map_visitor.h | 4 +- runtime/memory_region.cc | 3 +- runtime/memory_region.h | 23 ++++++-- runtime/oat.h | 2 +- runtime/quick_exception_handler.cc | 4 +- runtime/stack_map.cc | 10 ++-- runtime/stack_map.h | 93 ++++++++++++++++++++------------- runtime/thread.cc | 7 +-- 12 files changed, 205 insertions(+), 122 deletions(-) diff --git a/compiler/optimizing/stack_map_stream.cc b/compiler/optimizing/stack_map_stream.cc index 1b9bd7eb3..46497e37c 100644 --- a/compiler/optimizing/stack_map_stream.cc +++ b/compiler/optimizing/stack_map_stream.cc @@ -16,6 +16,9 @@ #include "stack_map_stream.h" +#include + +#include "base/stl_util.h" #include "art_method.h" #include "runtime.h" #include "scoped_thread_state_change-inl.h" @@ -40,6 +43,7 @@ void StackMapStream::BeginStackMapEntry(uint32_t dex_pc, current_entry_.inline_infos_start_index = inline_infos_.size(); current_entry_.dex_register_map_hash = 0; current_entry_.same_dex_register_map_as_ = kNoSameDexMapFound; + current_entry_.stack_mask_index = 0; if (num_dex_registers != 0) { current_entry_.live_dex_registers_mask = ArenaBitVector::Create(allocator_, num_dex_registers, true, kArenaAllocStackMapStream); @@ -153,32 +157,37 @@ CodeOffset StackMapStream::ComputeMaxNativePcCodeOffset() const { } size_t StackMapStream::PrepareForFillIn() { - int stack_mask_number_of_bits = stack_mask_max_ + 1; // Need room for max element too. + size_t stack_mask_size_in_bits = stack_mask_max_ + 1; // Need room for max element too. + size_t number_of_stack_masks = PrepareStackMasks(stack_mask_size_in_bits); dex_register_maps_size_ = ComputeDexRegisterMapsSize(); ComputeInlineInfoEncoding(); // needs dex_register_maps_size_. inline_info_size_ = inline_infos_.size() * inline_info_encoding_.GetEntrySize(); CodeOffset max_native_pc_offset = ComputeMaxNativePcCodeOffset(); - // The stack map contains compressed native offsets. + // The stack map contains compressed native PC offsets. size_t stack_map_size = stack_map_encoding_.SetFromSizes(max_native_pc_offset.CompressedValue(), dex_pc_max_, dex_register_maps_size_, inline_info_size_, register_mask_max_, - stack_mask_number_of_bits); + number_of_stack_masks); stack_maps_size_ = RoundUp(stack_maps_.size() * stack_map_size, kBitsPerByte) / kBitsPerByte; dex_register_location_catalog_size_ = ComputeDexRegisterLocationCatalogSize(); + size_t stack_masks_bytes = + RoundUp(number_of_stack_masks * stack_mask_size_in_bits, kBitsPerByte) / kBitsPerByte; size_t non_header_size = stack_maps_size_ + dex_register_location_catalog_size_ + dex_register_maps_size_ + - inline_info_size_; + inline_info_size_ + + stack_masks_bytes; // Prepare the CodeInfo variable-sized encoding. CodeInfoEncoding code_info_encoding; code_info_encoding.non_header_size = non_header_size; code_info_encoding.number_of_stack_maps = stack_maps_.size(); - code_info_encoding.stack_map_size_in_bits = stack_map_size; + code_info_encoding.number_of_stack_masks = number_of_stack_masks; + code_info_encoding.stack_mask_size_in_bits = stack_mask_size_in_bits; code_info_encoding.stack_map_encoding = stack_map_encoding_; code_info_encoding.inline_info_encoding = inline_info_encoding_; code_info_encoding.number_of_location_catalog_entries = location_catalog_entries_.size(); @@ -322,17 +331,7 @@ void StackMapStream::FillIn(MemoryRegion region) { stack_map.SetDexPc(stack_map_encoding_, entry.dex_pc); stack_map.SetNativePcCodeOffset(stack_map_encoding_, entry.native_pc_code_offset); stack_map.SetRegisterMask(stack_map_encoding_, entry.register_mask); - size_t number_of_stack_mask_bits = code_info.GetNumberOfStackMaskBits(encoding); - if (entry.sp_mask != nullptr) { - for (size_t bit = 0; bit < number_of_stack_mask_bits; bit++) { - stack_map.SetStackMaskBit(stack_map_encoding_, bit, entry.sp_mask->IsBitSet(bit)); - } - } else { - // The MemoryRegion does not have to be zeroed, so make sure we clear the bits. - for (size_t bit = 0; bit < number_of_stack_mask_bits; bit++) { - stack_map.SetStackMaskBit(stack_map_encoding_, bit, false); - } - } + stack_map.SetStackMaskIndex(stack_map_encoding_, entry.stack_mask_index); if (entry.num_dex_registers == 0 || (entry.live_dex_registers_mask->NumSetBits() == 0)) { // No dex map available. @@ -353,7 +352,7 @@ void StackMapStream::FillIn(MemoryRegion region) { next_dex_register_map_offset += register_region.size(); DexRegisterMap dex_register_map(register_region); stack_map.SetDexRegisterMapOffset( - stack_map_encoding_, register_region.start() - dex_register_locations_region.start()); + stack_map_encoding_, register_region.begin() - dex_register_locations_region.begin()); // Set the dex register location. FillInDexRegisterMap(dex_register_map, @@ -373,7 +372,7 @@ void StackMapStream::FillIn(MemoryRegion region) { // Currently relative to the dex register map. stack_map.SetInlineDescriptorOffset( - stack_map_encoding_, inline_region.start() - dex_register_locations_region.start()); + stack_map_encoding_, inline_region.begin() - dex_register_locations_region.begin()); inline_info.SetDepth(inline_info_encoding_, entry.inlining_depth); DCHECK_LE(entry.inline_infos_start_index + entry.inlining_depth, inline_infos_.size()); @@ -408,7 +407,7 @@ void StackMapStream::FillIn(MemoryRegion region) { DexRegisterMap dex_register_map(register_region); inline_info.SetDexRegisterMapOffsetAtDepth( inline_info_encoding_, - depth, register_region.start() - dex_register_locations_region.start()); + depth, register_region.begin() - dex_register_locations_region.begin()); FillInDexRegisterMap(dex_register_map, inline_entry.num_dex_registers, @@ -423,6 +422,19 @@ void StackMapStream::FillIn(MemoryRegion region) { } } + // Write stack masks at the end. + size_t stack_mask_bits = encoding.stack_mask_size_in_bits; + if (stack_mask_bits > 0) { + size_t stack_mask_bytes = RoundUp(stack_mask_bits, kBitsPerByte) / kBitsPerByte; + for (size_t i = 0; i < encoding.number_of_stack_masks; ++i) { + MemoryRegion source(&stack_masks_[i * stack_mask_bytes], stack_mask_bytes); + BitMemoryRegion stack_mask = code_info.GetStackMask(encoding, i); + for (size_t bit_index = 0; bit_index < encoding.stack_mask_size_in_bits; ++bit_index) { + stack_mask.StoreBit(bit_index, source.LoadBit(bit_index)); + } + } + } + // Verify all written data in debug build. if (kIsDebugBuild) { CheckCodeInfo(region); @@ -536,6 +548,27 @@ void StackMapStream::CheckDexRegisterMap(const CodeInfo& code_info, } } +size_t StackMapStream::PrepareStackMasks(size_t entry_size_in_bits) { + // Preallocate memory since we do not want it to move (the dedup map will point into it). + const size_t byte_entry_size = RoundUp(entry_size_in_bits, kBitsPerByte) / kBitsPerByte; + stack_masks_.resize(byte_entry_size * stack_maps_.size(), 0u); + // For deduplicating we store the stack masks as byte packed for simplicity. We can bit pack later + // when copying out from stack_masks_. + std::unordered_map, + MemoryRegion::ContentEquals> dedup(stack_maps_.size()); + for (StackMapEntry& stack_map : stack_maps_) { + size_t index = dedup.size(); + MemoryRegion stack_mask(stack_masks_.data() + index * byte_entry_size, byte_entry_size); + for (size_t i = 0; i < entry_size_in_bits; i++) { + stack_mask.StoreBit(i, stack_map.sp_mask != nullptr && stack_map.sp_mask->IsBitSet(i)); + } + stack_map.stack_mask_index = dedup.emplace(stack_mask, index).first->second; + } + return dedup.size(); +} + // Check that all StackMapStream inputs are correctly encoded by trying to read them back. void StackMapStream::CheckCodeInfo(MemoryRegion region) const { CodeInfo code_info(region); @@ -551,15 +584,17 @@ void StackMapStream::CheckCodeInfo(MemoryRegion region) const { entry.native_pc_code_offset.Uint32Value(instruction_set_)); DCHECK_EQ(stack_map.GetDexPc(stack_map_encoding), entry.dex_pc); DCHECK_EQ(stack_map.GetRegisterMask(stack_map_encoding), entry.register_mask); - size_t num_stack_mask_bits = code_info.GetNumberOfStackMaskBits(encoding); + const size_t num_stack_mask_bits = code_info.GetNumberOfStackMaskBits(encoding); + DCHECK_EQ(stack_map.GetStackMaskIndex(stack_map_encoding), entry.stack_mask_index); + BitMemoryRegion stack_mask = code_info.GetStackMaskOf(encoding, stack_map); if (entry.sp_mask != nullptr) { - DCHECK_GE(num_stack_mask_bits, entry.sp_mask->GetNumberOfBits()); + DCHECK_GE(stack_mask.size_in_bits(), entry.sp_mask->GetNumberOfBits()); for (size_t b = 0; b < num_stack_mask_bits; b++) { - DCHECK_EQ(stack_map.GetStackMaskBit(stack_map_encoding, b), entry.sp_mask->IsBitSet(b)); + DCHECK_EQ(stack_mask.LoadBit(b), entry.sp_mask->IsBitSet(b)); } } else { for (size_t b = 0; b < num_stack_mask_bits; b++) { - DCHECK_EQ(stack_map.GetStackMaskBit(stack_map_encoding, b), 0u); + DCHECK_EQ(stack_mask.LoadBit(b), 0u); } } diff --git a/compiler/optimizing/stack_map_stream.h b/compiler/optimizing/stack_map_stream.h index 8fec47243..e2e16e820 100644 --- a/compiler/optimizing/stack_map_stream.h +++ b/compiler/optimizing/stack_map_stream.h @@ -68,6 +68,7 @@ class StackMapStream : public ValueObject { location_catalog_entries_indices_(allocator->Adapter(kArenaAllocStackMapStream)), dex_register_locations_(allocator->Adapter(kArenaAllocStackMapStream)), inline_infos_(allocator->Adapter(kArenaAllocStackMapStream)), + stack_masks_(allocator->Adapter(kArenaAllocStackMapStream)), stack_mask_max_(-1), dex_pc_max_(0), register_mask_max_(0), @@ -107,6 +108,7 @@ class StackMapStream : public ValueObject { BitVector* live_dex_registers_mask; uint32_t dex_register_map_hash; size_t same_dex_register_map_as_; + uint32_t stack_mask_index; }; struct InlineInfoEntry { @@ -160,6 +162,9 @@ class StackMapStream : public ValueObject { CodeOffset ComputeMaxNativePcCodeOffset() const; + // Returns the number of unique stack masks. + size_t PrepareStackMasks(size_t entry_size_in_bits); + // Returns the index of an entry with the same dex register map as the current_entry, // or kNoSameDexMapFound if no such entry exists. size_t FindEntryWithTheSameDexMap(); @@ -193,6 +198,7 @@ class StackMapStream : public ValueObject { // A set of concatenated maps of Dex register locations indices to `location_catalog_entries_`. ArenaVector dex_register_locations_; ArenaVector inline_infos_; + ArenaVector stack_masks_; int stack_mask_max_; uint32_t dex_pc_max_; uint32_t register_mask_max_; diff --git a/compiler/optimizing/stack_map_test.cc b/compiler/optimizing/stack_map_test.cc index da4597e38..da68b6081 100644 --- a/compiler/optimizing/stack_map_test.cc +++ b/compiler/optimizing/stack_map_test.cc @@ -27,15 +27,16 @@ namespace art { // Check that the stack mask of given stack map is identical // to the given bit vector. Returns true if they are same. static bool CheckStackMask( - int number_of_bits, + const CodeInfo& code_info, + const CodeInfoEncoding& encoding, const StackMap& stack_map, - StackMapEncoding& encoding, const BitVector& bit_vector) { - if (bit_vector.GetHighestBitSet() >= number_of_bits) { + BitMemoryRegion stack_mask = code_info.GetStackMaskOf(encoding, stack_map); + if (bit_vector.GetNumberOfBits() > encoding.stack_mask_size_in_bits) { return false; } - for (int i = 0; i < number_of_bits; ++i) { - if (stack_map.GetStackMaskBit(encoding, i) != bit_vector.IsBitSet(i)) { + for (size_t i = 0; i < encoding.stack_mask_size_in_bits; ++i) { + if (stack_mask.LoadBit(i) != bit_vector.IsBitSet(i)) { return false; } } @@ -81,10 +82,7 @@ TEST(StackMapTest, Test1) { ASSERT_EQ(64u, stack_map.GetNativePcOffset(encoding.stack_map_encoding, kRuntimeISA)); ASSERT_EQ(0x3u, stack_map.GetRegisterMask(encoding.stack_map_encoding)); - ASSERT_TRUE(CheckStackMask(code_info.GetNumberOfStackMaskBits(encoding), - stack_map, - encoding.stack_map_encoding, - sp_mask)); + ASSERT_TRUE(CheckStackMask(code_info, encoding, stack_map, sp_mask)); ASSERT_TRUE(stack_map.HasDexRegisterMap(encoding.stack_map_encoding)); DexRegisterMap dex_register_map = @@ -199,10 +197,7 @@ TEST(StackMapTest, Test2) { ASSERT_EQ(64u, stack_map.GetNativePcOffset(encoding.stack_map_encoding, kRuntimeISA)); ASSERT_EQ(0x3u, stack_map.GetRegisterMask(encoding.stack_map_encoding)); - ASSERT_TRUE(CheckStackMask(code_info.GetNumberOfStackMaskBits(encoding), - stack_map, - encoding.stack_map_encoding, - sp_mask1)); + ASSERT_TRUE(CheckStackMask(code_info, encoding, stack_map, sp_mask1)); ASSERT_TRUE(stack_map.HasDexRegisterMap(encoding.stack_map_encoding)); DexRegisterMap dex_register_map = @@ -261,10 +256,7 @@ TEST(StackMapTest, Test2) { ASSERT_EQ(128u, stack_map.GetNativePcOffset(encoding.stack_map_encoding, kRuntimeISA)); ASSERT_EQ(0xFFu, stack_map.GetRegisterMask(encoding.stack_map_encoding)); - ASSERT_TRUE(CheckStackMask(code_info.GetNumberOfStackMaskBits(encoding), - stack_map, - encoding.stack_map_encoding, - sp_mask2)); + ASSERT_TRUE(CheckStackMask(code_info, encoding, stack_map, sp_mask2)); ASSERT_TRUE(stack_map.HasDexRegisterMap(encoding.stack_map_encoding)); DexRegisterMap dex_register_map = @@ -318,10 +310,7 @@ TEST(StackMapTest, Test2) { ASSERT_EQ(192u, stack_map.GetNativePcOffset(encoding.stack_map_encoding, kRuntimeISA)); ASSERT_EQ(0xABu, stack_map.GetRegisterMask(encoding.stack_map_encoding)); - ASSERT_TRUE(CheckStackMask(code_info.GetNumberOfStackMaskBits(encoding), - stack_map, - encoding.stack_map_encoding, - sp_mask3)); + ASSERT_TRUE(CheckStackMask(code_info, encoding, stack_map, sp_mask3)); ASSERT_TRUE(stack_map.HasDexRegisterMap(encoding.stack_map_encoding)); DexRegisterMap dex_register_map = @@ -375,10 +364,7 @@ TEST(StackMapTest, Test2) { ASSERT_EQ(256u, stack_map.GetNativePcOffset(encoding.stack_map_encoding, kRuntimeISA)); ASSERT_EQ(0xCDu, stack_map.GetRegisterMask(encoding.stack_map_encoding)); - ASSERT_TRUE(CheckStackMask(code_info.GetNumberOfStackMaskBits(encoding), - stack_map, - encoding.stack_map_encoding, - sp_mask4)); + ASSERT_TRUE(CheckStackMask(code_info, encoding, stack_map, sp_mask4)); ASSERT_TRUE(stack_map.HasDexRegisterMap(encoding.stack_map_encoding)); DexRegisterMap dex_register_map = @@ -854,4 +840,33 @@ TEST(StackMapTest, CodeOffsetTest) { EXPECT_EQ(offset_mips64.Uint32Value(kMips64), kMips64InstructionAlignment); } + +TEST(StackMapTest, TestDeduplicateStackMask) { + ArenaPool pool; + ArenaAllocator arena(&pool); + StackMapStream stream(&arena, kRuntimeISA); + + ArenaBitVector sp_mask(&arena, 0, true); + sp_mask.SetBit(1); + sp_mask.SetBit(4); + stream.BeginStackMapEntry(0, 4, 0x3, &sp_mask, 0, 0); + stream.EndStackMapEntry(); + stream.BeginStackMapEntry(0, 8, 0x3, &sp_mask, 0, 0); + stream.EndStackMapEntry(); + + size_t size = stream.PrepareForFillIn(); + void* memory = arena.Alloc(size, kArenaAllocMisc); + MemoryRegion region(memory, size); + stream.FillIn(region); + + CodeInfo code_info(region); + CodeInfoEncoding encoding = code_info.ExtractEncoding(); + ASSERT_EQ(2u, code_info.GetNumberOfStackMaps(encoding)); + + StackMap stack_map1 = code_info.GetStackMapForNativePcOffset(4, encoding); + StackMap stack_map2 = code_info.GetStackMapForNativePcOffset(8, encoding); + EXPECT_EQ(stack_map1.GetStackMaskIndex(encoding.stack_map_encoding), + stack_map2.GetStackMaskIndex(encoding.stack_map_encoding)); +} + } // namespace art diff --git a/oatdump/oatdump.cc b/oatdump/oatdump.cc index 9b4d3e115..c95cc748b 100644 --- a/oatdump/oatdump.cc +++ b/oatdump/oatdump.cc @@ -589,16 +589,16 @@ class OatDumper { kByteKindCodeInfoInlineInfo, kByteKindCodeInfoEncoding, kByteKindCodeInfoOther, + kByteKindCodeInfoStackMasks, kByteKindStackMapNativePc, kByteKindStackMapDexPc, kByteKindStackMapDexRegisterMap, kByteKindStackMapInlineInfo, kByteKindStackMapRegisterMask, - kByteKindStackMapMask, - kByteKindStackMapOther, + kByteKindStackMapStackMaskIndex, kByteKindCount, kByteKindStackMapFirst = kByteKindCodeInfoOther, - kByteKindStackMapLast = kByteKindStackMapOther, + kByteKindStackMapLast = kByteKindStackMapStackMaskIndex, }; int64_t bits[kByteKindCount] = {}; // Since code has deduplication, seen tracks already seen pointers to avoid double counting @@ -632,6 +632,7 @@ class OatDumper { Dump(os, "CodeInfoLocationCatalog ", bits[kByteKindCodeInfoLocationCatalog], sum); Dump(os, "CodeInfoDexRegisterMap ", bits[kByteKindCodeInfoDexRegisterMap], sum); Dump(os, "CodeInfoInlineInfo ", bits[kByteKindCodeInfoInlineInfo], sum); + Dump(os, "CodeInfoStackMasks ", bits[kByteKindCodeInfoStackMasks], sum); Dump(os, "CodeInfoStackMap ", stack_map_bits, sum); { ScopedIndentation indent1(&os); @@ -661,13 +662,8 @@ class OatDumper { stack_map_bits, "stack map"); Dump(os, - "StackMapMask ", - bits[kByteKindStackMapMask], - stack_map_bits, - "stack map"); - Dump(os, - "StackMapOther ", - bits[kByteKindStackMapOther], + "StackMapStackMaskIndex ", + bits[kByteKindStackMapStackMaskIndex], stack_map_bits, "stack map"); } @@ -1575,16 +1571,13 @@ class OatDumper { stats_.AddBits( Stats::kByteKindStackMapRegisterMask, stack_map_encoding.GetRegisterMaskEncoding().BitSize() * num_stack_maps); - const size_t stack_mask_bits = encoding.stack_map_size_in_bits - - stack_map_encoding.GetStackMaskBitOffset(); stats_.AddBits( - Stats::kByteKindStackMapMask, - stack_mask_bits * num_stack_maps); - const size_t stack_map_bits = - stack_map_encoding.GetStackMaskBitOffset() + stack_mask_bits; + Stats::kByteKindStackMapStackMaskIndex, + stack_map_encoding.GetStackMaskIndexEncoding().BitSize() * num_stack_maps); stats_.AddBits( - Stats::kByteKindStackMapOther, - (encoding.stack_map_size_in_bits - stack_map_bits) * num_stack_maps); + Stats::kByteKindCodeInfoStackMasks, + helper.GetCodeInfo().GetNumberOfStackMaskBits(encoding) * + encoding.number_of_stack_masks); const size_t stack_map_bytes = helper.GetCodeInfo().GetStackMapsSize(encoding); const size_t location_catalog_bytes = helper.GetCodeInfo().GetDexRegisterLocationCatalogSize(encoding); diff --git a/runtime/check_reference_map_visitor.h b/runtime/check_reference_map_visitor.h index 93fdaa616..2252fe7c2 100644 --- a/runtime/check_reference_map_visitor.h +++ b/runtime/check_reference_map_visitor.h @@ -68,6 +68,7 @@ class CheckReferenceMapVisitor : public StackVisitor { DexRegisterMap dex_register_map = code_info.GetDexRegisterMapOf(stack_map, encoding, number_of_dex_registers); uint32_t register_mask = stack_map.GetRegisterMask(encoding.stack_map_encoding); + BitMemoryRegion stack_mask = code_info.GetStackMaskOf(encoding, stack_map); for (int i = 0; i < number_of_references; ++i) { int reg = registers[i]; CHECK(reg < m->GetCodeItem()->registers_size_); @@ -80,8 +81,7 @@ class CheckReferenceMapVisitor : public StackVisitor { break; case DexRegisterLocation::Kind::kInStack: DCHECK_EQ(location.GetValue() % kFrameSlotSize, 0); - CHECK(stack_map.GetStackMaskBit(encoding.stack_map_encoding, - location.GetValue() / kFrameSlotSize)); + CHECK(stack_mask.LoadBit(location.GetValue() / kFrameSlotSize)); break; case DexRegisterLocation::Kind::kInRegister: case DexRegisterLocation::Kind::kInRegisterHigh: diff --git a/runtime/memory_region.cc b/runtime/memory_region.cc index b0ecab40c..13cc5c99b 100644 --- a/runtime/memory_region.cc +++ b/runtime/memory_region.cc @@ -29,8 +29,7 @@ void MemoryRegion::CopyFrom(size_t offset, const MemoryRegion& from) const { CHECK_GT(from.size(), 0U); CHECK_GE(this->size(), from.size()); CHECK_LE(offset, this->size() - from.size()); - memmove(reinterpret_cast(start() + offset), - from.pointer(), from.size()); + memmove(reinterpret_cast(begin() + offset), from.pointer(), from.size()); } void MemoryRegion::StoreBits(uintptr_t bit_offset, uint32_t value, size_t length) { diff --git a/runtime/memory_region.h b/runtime/memory_region.h index f55dff7a5..7cf5d49d7 100644 --- a/runtime/memory_region.h +++ b/runtime/memory_region.h @@ -35,6 +35,12 @@ namespace art { // of the region. class MemoryRegion FINAL : public ValueObject { public: + struct ContentEquals { + constexpr bool operator()(const MemoryRegion& lhs, const MemoryRegion& rhs) const { + return lhs.size() == rhs.size() && memcmp(lhs.begin(), rhs.begin(), lhs.size()) == 0; + } + }; + MemoryRegion() : pointer_(nullptr), size_(0) {} MemoryRegion(void* pointer_in, uintptr_t size_in) : pointer_(pointer_in), size_(size_in) {} @@ -46,8 +52,8 @@ class MemoryRegion FINAL : public ValueObject { return OFFSETOF_MEMBER(MemoryRegion, pointer_); } - uint8_t* start() const { return reinterpret_cast(pointer_); } - uint8_t* end() const { return start() + size_; } + uint8_t* begin() const { return reinterpret_cast(pointer_); } + uint8_t* end() const { return begin() + size_; } // Load value of type `T` at `offset`. The memory address corresponding // to `offset` should be word-aligned (on ARM, this is a requirement). @@ -131,7 +137,7 @@ class MemoryRegion FINAL : public ValueObject { // Do not touch any memory if the range is empty. return 0; } - const uint8_t* address = start() + bit_offset / kBitsPerByte; + const uint8_t* address = begin() + bit_offset / kBitsPerByte; const uint32_t shift = bit_offset & (kBitsPerByte - 1); // Load the value (reading only the strictly needed bytes). const uint32_t load_bit_count = shift + length; @@ -165,11 +171,18 @@ class MemoryRegion FINAL : public ValueObject { void CopyFrom(size_t offset, const MemoryRegion& from) const; + template + void CopyFromVector(size_t offset, Vector& vector) const { + if (!vector.empty()) { + CopyFrom(offset, MemoryRegion(vector.data(), vector.size())); + } + } + // Compute a sub memory region based on an existing one. ALWAYS_INLINE MemoryRegion Subregion(uintptr_t offset, uintptr_t size_in) const { CHECK_GE(this->size(), size_in); CHECK_LE(offset, this->size() - size_in); - return MemoryRegion(reinterpret_cast(start() + offset), size_in); + return MemoryRegion(reinterpret_cast(begin() + offset), size_in); } // Compute an extended memory region based on an existing one. @@ -183,7 +196,7 @@ class MemoryRegion FINAL : public ValueObject { ALWAYS_INLINE T* ComputeInternalPointer(size_t offset) const { CHECK_GE(size(), sizeof(T)); CHECK_LE(offset, size() - sizeof(T)); - return reinterpret_cast(start() + offset); + return reinterpret_cast(begin() + offset); } // Locate the bit with the given offset. Returns a pointer to the byte diff --git a/runtime/oat.h b/runtime/oat.h index 62f010ba9..403371619 100644 --- a/runtime/oat.h +++ b/runtime/oat.h @@ -32,7 +32,7 @@ class InstructionSetFeatures; class PACKED(4) OatHeader { public: static constexpr uint8_t kOatMagic[] = { 'o', 'a', 't', '\n' }; - static constexpr uint8_t kOatVersion[] = { '1', '0', '6', '\0' }; // hash-based DexCache types + static constexpr uint8_t kOatVersion[] = { '1', '0', '7', '\0' }; // Stack map stack mask change. static constexpr const char* kImageLocationKey = "image-location"; static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline"; diff --git a/runtime/quick_exception_handler.cc b/runtime/quick_exception_handler.cc index 4e7695118..3ba30112f 100644 --- a/runtime/quick_exception_handler.cc +++ b/runtime/quick_exception_handler.cc @@ -408,6 +408,7 @@ class DeoptimizeStackVisitor FINAL : public StackVisitor { StackMap stack_map = code_info.GetStackMapForNativePcOffset(native_pc_offset, encoding); const size_t number_of_vregs = m->GetCodeItem()->registers_size_; uint32_t register_mask = stack_map.GetRegisterMask(encoding.stack_map_encoding); + BitMemoryRegion stack_mask = code_info.GetStackMaskOf(encoding, stack_map); DexRegisterMap vreg_map = IsInInlinedFrame() ? code_info.GetDexRegisterMapAtDepth(GetCurrentInliningDepth() - 1, code_info.GetInlineInfoOf(stack_map, encoding), @@ -440,8 +441,7 @@ class DeoptimizeStackVisitor FINAL : public StackVisitor { const uint8_t* addr = reinterpret_cast(GetCurrentQuickFrame()) + offset; value = *reinterpret_cast(addr); uint32_t bit = (offset >> 2); - if (code_info.GetNumberOfStackMaskBits(encoding) > bit && - stack_map.GetStackMaskBit(encoding.stack_map_encoding, bit)) { + if (bit < encoding.stack_mask_size_in_bits && stack_mask.LoadBit(bit)) { is_reference = true; } break; diff --git a/runtime/stack_map.cc b/runtime/stack_map.cc index e093293e7..f470ae964 100644 --- a/runtime/stack_map.cc +++ b/runtime/stack_map.cc @@ -98,7 +98,8 @@ void StackMapEncoding::Dump(VariableIndentationOutputStream* vios) const { << ", dex_register_map_bit_offset=" << static_cast(dex_register_map_bit_offset_) << ", inline_info_bit_offset=" << static_cast(inline_info_bit_offset_) << ", register_mask_bit_offset=" << static_cast(register_mask_bit_offset_) - << ", stack_mask_bit_offset=" << static_cast(stack_mask_bit_offset_) + << ", stack_mask_index_bit_offset=" << static_cast(stack_mask_index_bit_offset_) + << ", total_bit_size=" << static_cast(total_bit_size_) << ")\n"; } @@ -198,7 +199,7 @@ void StackMap::Dump(VariableIndentationOutputStream* vios, << "StackMap" << header_suffix << std::hex << " [native_pc=0x" << code_offset + pc_offset << "]" - << " [entry_size=0x" << encoding.stack_map_size_in_bits << " bits]" + << " [entry_size=0x" << encoding.stack_map_encoding.BitSize() << " bits]" << " (dex_pc=0x" << GetDexPc(stack_map_encoding) << ", native_pc_offset=0x" << pc_offset << ", dex_register_map_offset=0x" << GetDexRegisterMapOffset(stack_map_encoding) @@ -206,8 +207,9 @@ void StackMap::Dump(VariableIndentationOutputStream* vios, << ", register_mask=0x" << GetRegisterMask(stack_map_encoding) << std::dec << ", stack_mask=0b"; - for (size_t i = 0, e = code_info.GetNumberOfStackMaskBits(encoding); i < e; ++i) { - vios->Stream() << GetStackMaskBit(stack_map_encoding, e - i - 1); + BitMemoryRegion stack_mask = code_info.GetStackMaskOf(encoding, *this); + for (size_t i = 0, e = encoding.stack_mask_size_in_bits; i < e; ++i) { + vios->Stream() << stack_mask.LoadBit(e - i - 1); } vios->Stream() << ")\n"; if (HasDexRegisterMap(stack_map_encoding)) { diff --git a/runtime/stack_map.h b/runtime/stack_map.h index 679218d5b..83ba45794 100644 --- a/runtime/stack_map.h +++ b/runtime/stack_map.h @@ -695,34 +695,34 @@ class StackMapEncoding { size_t dex_register_map_size, size_t inline_info_size, size_t register_mask_max, - size_t stack_mask_bit_size) { - size_t bit_offset = 0; - DCHECK_EQ(kNativePcBitOffset, bit_offset); - bit_offset += MinimumBitsToStore(native_pc_max); + size_t number_of_stack_masks) { + total_bit_size_ = 0; + DCHECK_EQ(kNativePcBitOffset, total_bit_size_); + total_bit_size_ += MinimumBitsToStore(native_pc_max); - dex_pc_bit_offset_ = dchecked_integral_cast(bit_offset); - bit_offset += MinimumBitsToStore(1 /* kNoDexPc */ + dex_pc_max); + dex_pc_bit_offset_ = total_bit_size_; + total_bit_size_ += MinimumBitsToStore(1 /* kNoDexPc */ + dex_pc_max); // We also need +1 for kNoDexRegisterMap, but since the size is strictly // greater than any offset we might try to encode, we already implicitly have it. - dex_register_map_bit_offset_ = dchecked_integral_cast(bit_offset); - bit_offset += MinimumBitsToStore(dex_register_map_size); + dex_register_map_bit_offset_ = total_bit_size_; + total_bit_size_ += MinimumBitsToStore(dex_register_map_size); // We also need +1 for kNoInlineInfo, but since the inline_info_size is strictly // greater than the offset we might try to encode, we already implicitly have it. // If inline_info_size is zero, we can encode only kNoInlineInfo (in zero bits). - inline_info_bit_offset_ = dchecked_integral_cast(bit_offset); + inline_info_bit_offset_ = total_bit_size_; if (inline_info_size != 0) { - bit_offset += MinimumBitsToStore(dex_register_map_size + inline_info_size); + total_bit_size_ += MinimumBitsToStore(dex_register_map_size + inline_info_size); } - register_mask_bit_offset_ = dchecked_integral_cast(bit_offset); - bit_offset += MinimumBitsToStore(register_mask_max); + register_mask_bit_offset_ = total_bit_size_; + total_bit_size_ += MinimumBitsToStore(register_mask_max); - stack_mask_bit_offset_ = dchecked_integral_cast(bit_offset); - bit_offset += stack_mask_bit_size; + stack_mask_index_bit_offset_ = total_bit_size_; + total_bit_size_ += MinimumBitsToStore(number_of_stack_masks); - return bit_offset; + return total_bit_size_; } ALWAYS_INLINE FieldEncoding GetNativePcEncoding() const { @@ -738,15 +738,13 @@ class StackMapEncoding { return FieldEncoding(inline_info_bit_offset_, register_mask_bit_offset_, -1 /* min_value */); } ALWAYS_INLINE FieldEncoding GetRegisterMaskEncoding() const { - return FieldEncoding(register_mask_bit_offset_, stack_mask_bit_offset_); + return FieldEncoding(register_mask_bit_offset_, stack_mask_index_bit_offset_); } - ALWAYS_INLINE size_t GetStackMaskBitOffset() const { - // The end offset is not encoded. It is implicitly the end of stack map entry. - return stack_mask_bit_offset_; + ALWAYS_INLINE FieldEncoding GetStackMaskIndexEncoding() const { + return FieldEncoding(stack_mask_index_bit_offset_, total_bit_size_); } - ALWAYS_INLINE size_t GetNumberOfStackMaskBits(size_t stack_map_bits) const { - // Note that the stack mask bits are last. - return stack_map_bits - GetStackMaskBitOffset(); + ALWAYS_INLINE size_t BitSize() const { + return total_bit_size_; } void Dump(VariableIndentationOutputStream* vios) const; @@ -757,7 +755,8 @@ class StackMapEncoding { uint8_t dex_register_map_bit_offset_; uint8_t inline_info_bit_offset_; uint8_t register_mask_bit_offset_; - uint8_t stack_mask_bit_offset_; + uint8_t stack_mask_index_bit_offset_; + uint8_t total_bit_size_; }; /** @@ -771,7 +770,7 @@ class StackMapEncoding { * The information is of the form: * * [native_pc_offset, dex_pc, dex_register_map_offset, inlining_info_offset, register_mask, - * stack_mask]. + * stack_mask_index]. */ class StackMap { public: @@ -824,12 +823,12 @@ class StackMap { encoding.GetRegisterMaskEncoding().Store(region_, mask); } - ALWAYS_INLINE bool GetStackMaskBit(const StackMapEncoding& encoding, size_t index) const { - return region_.LoadBit(encoding.GetStackMaskBitOffset() + index); + ALWAYS_INLINE uint32_t GetStackMaskIndex(const StackMapEncoding& encoding) const { + return encoding.GetStackMaskIndexEncoding().Load(region_); } - ALWAYS_INLINE void SetStackMaskBit(const StackMapEncoding& encoding, size_t index, bool value) { - region_.StoreBit(encoding.GetStackMaskBitOffset() + index, value); + ALWAYS_INLINE void SetStackMaskIndex(const StackMapEncoding& encoding, uint32_t mask) { + encoding.GetStackMaskIndexEncoding().Store(region_, mask); } ALWAYS_INLINE bool HasDexRegisterMap(const StackMapEncoding& encoding) const { @@ -1031,7 +1030,8 @@ class InlineInfo { struct CodeInfoEncoding { uint32_t non_header_size; uint32_t number_of_stack_maps; - uint32_t stack_map_size_in_bits; + uint32_t number_of_stack_masks; + uint32_t stack_mask_size_in_bits; uint32_t number_of_location_catalog_entries; StackMapEncoding stack_map_encoding; InlineInfoEncoding inline_info_encoding; @@ -1043,7 +1043,8 @@ struct CodeInfoEncoding { const uint8_t* ptr = reinterpret_cast(data); non_header_size = DecodeUnsignedLeb128(&ptr); number_of_stack_maps = DecodeUnsignedLeb128(&ptr); - stack_map_size_in_bits = DecodeUnsignedLeb128(&ptr); + number_of_stack_masks = DecodeUnsignedLeb128(&ptr); + stack_mask_size_in_bits = DecodeUnsignedLeb128(&ptr); number_of_location_catalog_entries = DecodeUnsignedLeb128(&ptr); static_assert(alignof(StackMapEncoding) == 1, "StackMapEncoding should not require alignment"); @@ -1064,7 +1065,8 @@ struct CodeInfoEncoding { void Compress(Vector* dest) const { EncodeUnsignedLeb128(dest, non_header_size); EncodeUnsignedLeb128(dest, number_of_stack_maps); - EncodeUnsignedLeb128(dest, stack_map_size_in_bits); + EncodeUnsignedLeb128(dest, number_of_stack_masks); + EncodeUnsignedLeb128(dest, stack_mask_size_in_bits); EncodeUnsignedLeb128(dest, number_of_location_catalog_entries); const uint8_t* stack_map_ptr = reinterpret_cast(&stack_map_encoding); dest->insert(dest->end(), stack_map_ptr, stack_map_ptr + sizeof(StackMapEncoding)); @@ -1098,7 +1100,7 @@ class CodeInfo { } CodeInfoEncoding ExtractEncoding() const { - CodeInfoEncoding encoding(region_.start()); + CodeInfoEncoding encoding(region_.begin()); AssertValidStackMap(encoding); return encoding; } @@ -1114,14 +1116,27 @@ class CodeInfo { } ALWAYS_INLINE size_t GetNumberOfStackMaskBits(const CodeInfoEncoding& encoding) const { - return encoding.stack_map_encoding.GetNumberOfStackMaskBits(encoding.stack_map_size_in_bits); + return encoding.stack_mask_size_in_bits; } ALWAYS_INLINE StackMap GetStackMapAt(size_t i, const CodeInfoEncoding& encoding) const { - const size_t map_size = encoding.stack_map_size_in_bits; + const size_t map_size = encoding.stack_map_encoding.BitSize(); return StackMap(BitMemoryRegion(GetStackMaps(encoding), i * map_size, map_size)); } + BitMemoryRegion GetStackMask(const CodeInfoEncoding& encoding, size_t stack_mask_index) const { + // All stack mask data is stored at the very end. + const size_t entry_size = GetNumberOfStackMaskBits(encoding); + return BitMemoryRegion(region_, + region_.size_in_bits() - entry_size * (stack_mask_index + 1), + entry_size); + } + + BitMemoryRegion GetStackMaskOf(const CodeInfoEncoding& encoding, + const StackMap& stack_map) const { + return GetStackMask(encoding, stack_map.GetStackMaskIndex(encoding.stack_map_encoding)); + } + uint32_t GetNumberOfLocationCatalogEntries(const CodeInfoEncoding& encoding) const { return encoding.number_of_location_catalog_entries; } @@ -1135,10 +1150,14 @@ class CodeInfo { return encoding.number_of_stack_maps; } + // Get the size of all the stack maps of this CodeInfo object, in bits. Not byte aligned. + ALWAYS_INLINE size_t GetStackMapsSizeInBits(const CodeInfoEncoding& encoding) const { + return encoding.stack_map_encoding.BitSize() * GetNumberOfStackMaps(encoding); + } + // Get the size of all the stack maps of this CodeInfo object, in bytes. size_t GetStackMapsSize(const CodeInfoEncoding& encoding) const { - return RoundUp(encoding.stack_map_size_in_bits * GetNumberOfStackMaps(encoding), kBitsPerByte) / - kBitsPerByte; + return RoundUp(GetStackMapsSizeInBits(encoding), kBitsPerByte) / kBitsPerByte; } uint32_t GetDexRegisterLocationCatalogOffset(const CodeInfoEncoding& encoding) const { @@ -1288,7 +1307,7 @@ class CodeInfo { << encoding.non_header_size << "\n" << encoding.number_of_location_catalog_entries << "\n" << encoding.number_of_stack_maps << "\n" - << encoding.stack_map_size_in_bits; + << encoding.stack_map_encoding.BitSize(); } } diff --git a/runtime/thread.cc b/runtime/thread.cc index 3c7a71aba..8002f32da 100644 --- a/runtime/thread.cc +++ b/runtime/thread.cc @@ -3038,9 +3038,10 @@ class ReferenceMapVisitor : public StackVisitor { T vreg_info(m, code_info, encoding, map, visitor_); // Visit stack entries that hold pointers. - size_t number_of_bits = code_info.GetNumberOfStackMaskBits(encoding); + const size_t number_of_bits = code_info.GetNumberOfStackMaskBits(encoding); + BitMemoryRegion stack_mask = code_info.GetStackMaskOf(encoding, map); for (size_t i = 0; i < number_of_bits; ++i) { - if (map.GetStackMaskBit(encoding.stack_map_encoding, i)) { + if (stack_mask.LoadBit(i)) { auto* ref_addr = vreg_base + i; mirror::Object* ref = ref_addr->AsMirrorPtr(); if (ref != nullptr) { @@ -3048,7 +3049,7 @@ class ReferenceMapVisitor : public StackVisitor { vreg_info.VisitStack(&new_ref, i, this); if (ref != new_ref) { ref_addr->Assign(new_ref); - } + } } } } -- 2.11.0