From dec8f63fdf50815f24efe1c03af64208da15f339 Mon Sep 17 00:00:00 2001 From: Roland Levillain Date: Fri, 22 Jul 2016 17:10:06 +0100 Subject: [PATCH] Do not emit stack maps for runtime calls to ReadBarrierMarkRegX. * Boot image code size variation on Nexus 5X (aosp_bullhead-userdebug build): - total ARM64 framework Oat files size change: 115584120 bytes -> 109124728 bytes (-5.59%) - total ARM framework Oat files size change: 97387728 bytes -> 92517584 (-5.00%) Test: ART host and target (ARM, ARM64) tests. Bug: 29506760 Bug: 12687968 Change-Id: I979d9fb2b4e09f4c0c7bf33af2cd91750a67f989 --- compiler/optimizing/code_generator.cc | 33 +++++++++++++++++++++++----- compiler/optimizing/code_generator.h | 6 +++++ compiler/optimizing/code_generator_arm.cc | 22 ++++++++++--------- compiler/optimizing/code_generator_arm.h | 6 +++++ compiler/optimizing/code_generator_arm64.cc | 23 ++++++++++--------- compiler/optimizing/code_generator_arm64.h | 6 +++++ compiler/optimizing/code_generator_x86.cc | 21 +++++++++--------- compiler/optimizing/code_generator_x86.h | 6 +++++ compiler/optimizing/code_generator_x86_64.cc | 21 +++++++++--------- compiler/optimizing/code_generator_x86_64.h | 6 +++++ 10 files changed, 104 insertions(+), 46 deletions(-) diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index 9c6dcaa7b..3269dc660 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -1178,19 +1178,19 @@ void CodeGenerator::ValidateInvokeRuntime(HInstruction* instruction, SlowPathCod << "instruction->DebugName()=" << instruction->DebugName() << " slow_path->GetDescription()=" << slow_path->GetDescription(); DCHECK(instruction->GetSideEffects().Includes(SideEffects::CanTriggerGC()) || - // When read barriers are enabled, some instructions use a - // slow path to emit a read barrier, which does not trigger - // GC, is not fatal, nor is emitted by HDeoptimize - // instructions. + // When (non-Baker) read barriers are enabled, some instructions + // use a slow path to emit a read barrier, which does not trigger + // GC. (kEmitCompilerReadBarrier && + !kUseBakerReadBarrier && (instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet() || - instruction->IsArraySet() || instruction->IsArrayGet() || instruction->IsLoadClass() || instruction->IsLoadString() || instruction->IsInstanceOf() || - instruction->IsCheckCast()))) + instruction->IsCheckCast() || + (instruction->IsInvokeVirtual() && instruction->GetLocations()->Intrinsified())))) << "instruction->DebugName()=" << instruction->DebugName() << " instruction->GetSideEffects().ToString()=" << instruction->GetSideEffects().ToString() << " slow_path->GetDescription()=" << slow_path->GetDescription(); @@ -1204,6 +1204,27 @@ void CodeGenerator::ValidateInvokeRuntime(HInstruction* instruction, SlowPathCod << instruction->DebugName() << ((slow_path != nullptr) ? slow_path->GetDescription() : ""); } +void CodeGenerator::ValidateInvokeRuntimeWithoutRecordingPcInfo(HInstruction* instruction, + SlowPathCode* slow_path) { + DCHECK(instruction->GetLocations()->OnlyCallsOnSlowPath()) + << "instruction->DebugName()=" << instruction->DebugName() + << " slow_path->GetDescription()=" << slow_path->GetDescription(); + // Only the Baker read barrier marking slow path used by certains + // instructions is expected to invoke the runtime without recording + // PC-related information. + DCHECK(kUseBakerReadBarrier); + DCHECK(instruction->IsInstanceFieldGet() || + instruction->IsStaticFieldGet() || + instruction->IsArrayGet() || + instruction->IsLoadClass() || + instruction->IsLoadString() || + instruction->IsInstanceOf() || + instruction->IsCheckCast() || + (instruction->IsInvokeVirtual() && instruction->GetLocations()->Intrinsified())) + << "instruction->DebugName()=" << instruction->DebugName() + << " slow_path->GetDescription()=" << slow_path->GetDescription(); +} + void SlowPathCode::SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) { RegisterSet* live_registers = locations->GetLiveRegisters(); size_t stack_offset = codegen->GetFirstRegisterSlotInSlowPath(); diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h index 5e6e175c6..2042adef1 100644 --- a/compiler/optimizing/code_generator.h +++ b/compiler/optimizing/code_generator.h @@ -379,8 +379,14 @@ class CodeGenerator : public DeletableArenaObject { return type == Primitive::kPrimNot && !value->IsNullConstant(); } + + // Perfoms checks pertaining to an InvokeRuntime call. void ValidateInvokeRuntime(HInstruction* instruction, SlowPathCode* slow_path); + // Perfoms checks pertaining to an InvokeRuntimeWithoutRecordingPcInfo call. + static void ValidateInvokeRuntimeWithoutRecordingPcInfo(HInstruction* instruction, + SlowPathCode* slow_path); + void AddAllocatedRegister(Location location) { allocated_registers_.Add(location); } diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index 236ed20fc..124a61fe6 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -431,8 +431,7 @@ class ReadBarrierMarkSlowPathARM : public SlowPathCode { instruction_->IsLoadString() || instruction_->IsInstanceOf() || instruction_->IsCheckCast() || - ((instruction_->IsInvokeStaticOrDirect() || instruction_->IsInvokeVirtual()) && - instruction_->GetLocations()->Intrinsified())) + (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified()) << "Unexpected instruction in read barrier marking slow path: " << instruction_->DebugName(); @@ -440,7 +439,6 @@ class ReadBarrierMarkSlowPathARM : public SlowPathCode { // No need to save live registers; it's taken care of by the // entrypoint. Also, there is no need to update the stack mask, // as this runtime call will not trigger a garbage collection. - InvokeRuntimeCallingConvention calling_convention; CodeGeneratorARM* arm_codegen = down_cast(codegen); DCHECK_NE(reg, SP); DCHECK_NE(reg, LR); @@ -462,11 +460,8 @@ class ReadBarrierMarkSlowPathARM : public SlowPathCode { // int32_t entry_point_offset = CodeGenerator::GetReadBarrierMarkEntryPointsOffset(reg); - // TODO: Do not emit a stack map for this runtime call. - arm_codegen->InvokeRuntime(entry_point_offset, - instruction_, - instruction_->GetDexPc(), - this); + // This runtime call does not require a stack map. + arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this); __ b(GetExitLabel()); } @@ -516,8 +511,7 @@ class ReadBarrierForHeapReferenceSlowPathARM : public SlowPathCode { instruction_->IsArrayGet() || instruction_->IsInstanceOf() || instruction_->IsCheckCast() || - ((instruction_->IsInvokeStaticOrDirect() || instruction_->IsInvokeVirtual()) && - instruction_->GetLocations()->Intrinsified())) + (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified()) << "Unexpected instruction in read barrier for heap reference slow path: " << instruction_->DebugName(); @@ -1240,6 +1234,14 @@ void CodeGeneratorARM::InvokeRuntime(int32_t entry_point_offset, RecordPcInfo(instruction, dex_pc, slow_path); } +void CodeGeneratorARM::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset, + HInstruction* instruction, + SlowPathCode* slow_path) { + ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path); + __ LoadFromOffset(kLoadWord, LR, TR, entry_point_offset); + __ blx(LR); +} + void InstructionCodeGeneratorARM::HandleGoto(HInstruction* got, HBasicBlock* successor) { DCHECK(!successor->IsExitBlock()); diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h index ef7913b6f..05cb8d194 100644 --- a/compiler/optimizing/code_generator_arm.h +++ b/compiler/optimizing/code_generator_arm.h @@ -394,6 +394,12 @@ class CodeGeneratorARM : public CodeGenerator { uint32_t dex_pc, SlowPathCode* slow_path); + // Generate code to invoke a runtime entry point, but do not record + // PC-related information in a stack map. + void InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset, + HInstruction* instruction, + SlowPathCode* slow_path); + // Emit a write barrier. void MarkGCCard(Register temp, Register card, Register object, Register value, bool can_be_null); diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index 76b07979f..efeef7bb6 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -597,8 +597,7 @@ class ReadBarrierMarkSlowPathARM64 : public SlowPathCodeARM64 { instruction_->IsLoadString() || instruction_->IsInstanceOf() || instruction_->IsCheckCast() || - ((instruction_->IsInvokeStaticOrDirect() || instruction_->IsInvokeVirtual()) && - instruction_->GetLocations()->Intrinsified())) + (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified()) << "Unexpected instruction in read barrier marking slow path: " << instruction_->DebugName(); @@ -606,7 +605,6 @@ class ReadBarrierMarkSlowPathARM64 : public SlowPathCodeARM64 { // No need to save live registers; it's taken care of by the // entrypoint. Also, there is no need to update the stack mask, // as this runtime call will not trigger a garbage collection. - InvokeRuntimeCallingConvention calling_convention; CodeGeneratorARM64* arm64_codegen = down_cast(codegen); DCHECK_NE(obj_.reg(), LR); DCHECK_NE(obj_.reg(), WSP); @@ -628,11 +626,8 @@ class ReadBarrierMarkSlowPathARM64 : public SlowPathCodeARM64 { // int32_t entry_point_offset = CodeGenerator::GetReadBarrierMarkEntryPointsOffset(obj_.reg()); - // TODO: Do not emit a stack map for this runtime call. - arm64_codegen->InvokeRuntime(entry_point_offset, - instruction_, - instruction_->GetDexPc(), - this); + // This runtime call does not require a stack map. + arm64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this); __ B(GetExitLabel()); } @@ -682,8 +677,7 @@ class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 { instruction_->IsArrayGet() || instruction_->IsInstanceOf() || instruction_->IsCheckCast() || - ((instruction_->IsInvokeStaticOrDirect() || instruction_->IsInvokeVirtual()) && - instruction_->GetLocations()->Intrinsified())) + (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified()) << "Unexpected instruction in read barrier for heap reference slow path: " << instruction_->DebugName(); // The read barrier instrumentation does not support the HIntermediateAddress instruction yet. @@ -1502,6 +1496,15 @@ void CodeGeneratorARM64::InvokeRuntime(int32_t entry_point_offset, RecordPcInfo(instruction, dex_pc, slow_path); } +void CodeGeneratorARM64::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset, + HInstruction* instruction, + SlowPathCode* slow_path) { + ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path); + BlockPoolsScope block_pools(GetVIXLAssembler()); + __ Ldr(lr, MemOperand(tr, entry_point_offset)); + __ Blr(lr); +} + void InstructionCodeGeneratorARM64::GenerateClassInitializationCheck(SlowPathCodeARM64* slow_path, Register class_reg) { UseScratchRegisterScope temps(GetVIXLAssembler()); diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h index 7201e5906..88e8cead3 100644 --- a/compiler/optimizing/code_generator_arm64.h +++ b/compiler/optimizing/code_generator_arm64.h @@ -499,6 +499,12 @@ class CodeGeneratorARM64 : public CodeGenerator { uint32_t dex_pc, SlowPathCode* slow_path); + // Generate code to invoke a runtime entry point, but do not record + // PC-related information in a stack map. + void InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset, + HInstruction* instruction, + SlowPathCode* slow_path); + ParallelMoveResolverARM64* GetMoveResolver() OVERRIDE { return &move_resolver_; } bool NeedsTwoRegisters(Primitive::Type type ATTRIBUTE_UNUSED) const OVERRIDE { diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 82baaa044..528e94f76 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -466,8 +466,7 @@ class ReadBarrierMarkSlowPathX86 : public SlowPathCode { instruction_->IsLoadString() || instruction_->IsInstanceOf() || instruction_->IsCheckCast() || - ((instruction_->IsInvokeStaticOrDirect() || instruction_->IsInvokeVirtual()) && - instruction_->GetLocations()->Intrinsified())) + (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified()) << "Unexpected instruction in read barrier marking slow path: " << instruction_->DebugName(); @@ -475,7 +474,6 @@ class ReadBarrierMarkSlowPathX86 : public SlowPathCode { // No need to save live registers; it's taken care of by the // entrypoint. Also, there is no need to update the stack mask, // as this runtime call will not trigger a garbage collection. - InvokeRuntimeCallingConvention calling_convention; CodeGeneratorX86* x86_codegen = down_cast(codegen); DCHECK_NE(reg, ESP); DCHECK(0 <= reg && reg < kNumberOfCpuRegisters) << reg; @@ -495,11 +493,8 @@ class ReadBarrierMarkSlowPathX86 : public SlowPathCode { // int32_t entry_point_offset = CodeGenerator::GetReadBarrierMarkEntryPointsOffset(reg); - // TODO: Do not emit a stack map for this runtime call. - x86_codegen->InvokeRuntime(entry_point_offset, - instruction_, - instruction_->GetDexPc(), - this); + // This runtime call does not require a stack map. + x86_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this); __ jmp(GetExitLabel()); } @@ -549,8 +544,7 @@ class ReadBarrierForHeapReferenceSlowPathX86 : public SlowPathCode { instruction_->IsArrayGet() || instruction_->IsInstanceOf() || instruction_->IsCheckCast() || - ((instruction_->IsInvokeStaticOrDirect() || instruction_->IsInvokeVirtual()) && - instruction_->GetLocations()->Intrinsified())) + (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified()) << "Unexpected instruction in read barrier for heap reference slow path: " << instruction_->DebugName(); @@ -824,6 +818,13 @@ void CodeGeneratorX86::InvokeRuntime(int32_t entry_point_offset, RecordPcInfo(instruction, dex_pc, slow_path); } +void CodeGeneratorX86::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset, + HInstruction* instruction, + SlowPathCode* slow_path) { + ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path); + __ fs()->call(Address::Absolute(entry_point_offset)); +} + CodeGeneratorX86::CodeGeneratorX86(HGraph* graph, const X86InstructionSetFeatures& isa_features, const CompilerOptions& compiler_options, diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index 2a9fb8099..12901724e 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -336,6 +336,12 @@ class CodeGeneratorX86 : public CodeGenerator { uint32_t dex_pc, SlowPathCode* slow_path); + // Generate code to invoke a runtime entry point, but do not record + // PC-related information in a stack map. + void InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset, + HInstruction* instruction, + SlowPathCode* slow_path); + size_t GetWordSize() const OVERRIDE { return kX86WordSize; } diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index b6ba30e15..0f0129b42 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -487,8 +487,7 @@ class ReadBarrierMarkSlowPathX86_64 : public SlowPathCode { instruction_->IsLoadString() || instruction_->IsInstanceOf() || instruction_->IsCheckCast() || - ((instruction_->IsInvokeStaticOrDirect() || instruction_->IsInvokeVirtual()) && - instruction_->GetLocations()->Intrinsified())) + (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified()) << "Unexpected instruction in read barrier marking slow path: " << instruction_->DebugName(); @@ -496,7 +495,6 @@ class ReadBarrierMarkSlowPathX86_64 : public SlowPathCode { // No need to save live registers; it's taken care of by the // entrypoint. Also, there is no need to update the stack mask, // as this runtime call will not trigger a garbage collection. - InvokeRuntimeCallingConvention calling_convention; CodeGeneratorX86_64* x86_64_codegen = down_cast(codegen); DCHECK_NE(reg, RSP); DCHECK(0 <= reg && reg < kNumberOfCpuRegisters) << reg; @@ -516,11 +514,8 @@ class ReadBarrierMarkSlowPathX86_64 : public SlowPathCode { // int32_t entry_point_offset = CodeGenerator::GetReadBarrierMarkEntryPointsOffset(reg); - // TODO: Do not emit a stack map for this runtime call. - x86_64_codegen->InvokeRuntime(entry_point_offset, - instruction_, - instruction_->GetDexPc(), - this); + // This runtime call does not require a stack map. + x86_64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this); __ jmp(GetExitLabel()); } @@ -570,8 +565,7 @@ class ReadBarrierForHeapReferenceSlowPathX86_64 : public SlowPathCode { instruction_->IsArrayGet() || instruction_->IsInstanceOf() || instruction_->IsCheckCast() || - ((instruction_->IsInvokeStaticOrDirect() || instruction_->IsInvokeVirtual()) && - instruction_->GetLocations()->Intrinsified())) + (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified()) << "Unexpected instruction in read barrier for heap reference slow path: " << instruction_->DebugName(); @@ -1052,6 +1046,13 @@ void CodeGeneratorX86_64::InvokeRuntime(int32_t entry_point_offset, RecordPcInfo(instruction, dex_pc, slow_path); } +void CodeGeneratorX86_64::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset, + HInstruction* instruction, + SlowPathCode* slow_path) { + ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path); + __ gs()->call(Address::Absolute(entry_point_offset, /* no_rip */ true)); +} + static constexpr int kNumberOfCpuRegisterPairs = 0; // Use a fake return address register to mimic Quick. static constexpr Register kFakeReturnRegister = Register(kLastCpuRegister + 1); diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index d7cfd37c3..cf92d68c6 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -318,6 +318,12 @@ class CodeGeneratorX86_64 : public CodeGenerator { uint32_t dex_pc, SlowPathCode* slow_path); + // Generate code to invoke a runtime entry point, but do not record + // PC-related information in a stack map. + void InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset, + HInstruction* instruction, + SlowPathCode* slow_path); + size_t GetWordSize() const OVERRIDE { return kX86_64WordSize; } -- 2.11.0