From a1aa3b1f40e496d6f8b3b305a4f956ddf2e425fc Mon Sep 17 00:00:00 2001 From: Roland Levillain Date: Wed, 26 Oct 2016 13:03:38 +0100 Subject: [PATCH] Add support for Baker read barriers in UnsafeCASObject intrinsics. Prior to doing the compare-and-swap operation, ensure the expected reference stored in the holding object's field is in the to-space by loading it, emitting a read barrier and updating that field with a strong compare-and-set operation with relaxed memory synchronization ordering (if needed). Test: ART host and target tests and Nexus 5X boot test with Baker read barriers. Bug: 29516905 Bug: 12687968 Change-Id: I480f6a9b59547f11d0a04777406b9bfeb905bfd2 --- compiler/optimizing/code_generator_arm.cc | 240 +++++++++++++++++++++++--- compiler/optimizing/code_generator_arm.h | 16 +- compiler/optimizing/code_generator_arm64.cc | 243 ++++++++++++++++++++++++--- compiler/optimizing/code_generator_arm64.h | 10 +- compiler/optimizing/code_generator_x86.cc | 228 ++++++++++++++++++++++--- compiler/optimizing/code_generator_x86.h | 17 +- compiler/optimizing/code_generator_x86_64.cc | 243 ++++++++++++++++++++++++--- compiler/optimizing/code_generator_x86_64.h | 18 +- compiler/optimizing/intrinsics_arm.cc | 130 +++++++------- compiler/optimizing/intrinsics_arm64.cc | 97 ++++++----- compiler/optimizing/intrinsics_x86.cc | 93 +++++----- compiler/optimizing/intrinsics_x86_64.cc | 91 +++++----- runtime/interpreter/unstarted_runtime.cc | 2 +- runtime/native/sun_misc_Unsafe.cc | 2 +- runtime/read_barrier-inl.h | 2 +- 15 files changed, 1151 insertions(+), 281 deletions(-) diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index 9f92b2092..959a0f199 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -601,11 +601,21 @@ class ArraySetSlowPathARM : public SlowPathCodeARM { DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathARM); }; -// Slow path marking an object during a read barrier. +// Slow path marking an object reference `ref` during a read +// barrier. The field `obj.field` in the object `obj` holding this +// reference does not get updated by this slow path after marking (see +// ReadBarrierMarkAndUpdateFieldSlowPathARM below for that). +// +// This means that after the execution of this slow path, `ref` will +// always be up-to-date, but `obj.field` may not; i.e., after the +// flip, `ref` will be a to-space reference, but `obj.field` will +// probably still be a from-space reference (unless it gets updated by +// another thread, or if another thread installed another object +// reference (different from `ref`) in `obj.field`). class ReadBarrierMarkSlowPathARM : public SlowPathCodeARM { public: - ReadBarrierMarkSlowPathARM(HInstruction* instruction, Location obj) - : SlowPathCodeARM(instruction), obj_(obj) { + ReadBarrierMarkSlowPathARM(HInstruction* instruction, Location ref) + : SlowPathCodeARM(instruction), ref_(ref) { DCHECK(kEmitCompilerReadBarrier); } @@ -613,9 +623,9 @@ class ReadBarrierMarkSlowPathARM : public SlowPathCodeARM { void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { LocationSummary* locations = instruction_->GetLocations(); - Register reg = obj_.AsRegister(); + Register ref_reg = ref_.AsRegister(); DCHECK(locations->CanCall()); - DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg)); + DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg; DCHECK(instruction_->IsInstanceFieldGet() || instruction_->IsStaticFieldGet() || instruction_->IsArrayGet() || @@ -634,40 +644,208 @@ class ReadBarrierMarkSlowPathARM : public SlowPathCodeARM { // entrypoint. Also, there is no need to update the stack mask, // as this runtime call will not trigger a garbage collection. CodeGeneratorARM* arm_codegen = down_cast(codegen); - DCHECK_NE(reg, SP); - DCHECK_NE(reg, LR); - DCHECK_NE(reg, PC); + DCHECK_NE(ref_reg, SP); + DCHECK_NE(ref_reg, LR); + DCHECK_NE(ref_reg, PC); // IP is used internally by the ReadBarrierMarkRegX entry point // as a temporary, it cannot be the entry point's input/output. - DCHECK_NE(reg, IP); - DCHECK(0 <= reg && reg < kNumberOfCoreRegisters) << reg; + DCHECK_NE(ref_reg, IP); + DCHECK(0 <= ref_reg && ref_reg < kNumberOfCoreRegisters) << ref_reg; // "Compact" slow path, saving two moves. // // Instead of using the standard runtime calling convention (input // and output in R0): // - // R0 <- obj + // R0 <- ref // R0 <- ReadBarrierMark(R0) - // obj <- R0 + // ref <- R0 // - // we just use rX (the register holding `obj`) as input and output + // we just use rX (the register containing `ref`) as input and output // of a dedicated entrypoint: // // rX <- ReadBarrierMarkRegX(rX) // int32_t entry_point_offset = - CodeGenerator::GetReadBarrierMarkEntryPointsOffset(reg); + CodeGenerator::GetReadBarrierMarkEntryPointsOffset(ref_reg); // This runtime call does not require a stack map. arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this); __ b(GetExitLabel()); } private: - const Location obj_; + // The location (register) of the marked object reference. + const Location ref_; DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathARM); }; +// Slow path marking an object reference `ref` during a read barrier, +// and if needed, atomically updating the field `obj.field` in the +// object `obj` holding this reference after marking (contrary to +// ReadBarrierMarkSlowPathARM above, which never tries to update +// `obj.field`). +// +// This means that after the execution of this slow path, both `ref` +// and `obj.field` will be up-to-date; i.e., after the flip, both will +// hold the same to-space reference (unless another thread installed +// another object reference (different from `ref`) in `obj.field`). +class ReadBarrierMarkAndUpdateFieldSlowPathARM : public SlowPathCodeARM { + public: + ReadBarrierMarkAndUpdateFieldSlowPathARM(HInstruction* instruction, + Location ref, + Register obj, + Location field_offset, + Register temp1, + Register temp2) + : SlowPathCodeARM(instruction), + ref_(ref), + obj_(obj), + field_offset_(field_offset), + temp1_(temp1), + temp2_(temp2) { + DCHECK(kEmitCompilerReadBarrier); + } + + const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkAndUpdateFieldSlowPathARM"; } + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + LocationSummary* locations = instruction_->GetLocations(); + Register ref_reg = ref_.AsRegister(); + DCHECK(locations->CanCall()); + DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg; + // This slow path is only used by the UnsafeCASObject intrinsic. + DCHECK((instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified())) + << "Unexpected instruction in read barrier marking and field updating slow path: " + << instruction_->DebugName(); + DCHECK(instruction_->GetLocations()->Intrinsified()); + DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kUnsafeCASObject); + DCHECK(field_offset_.IsRegisterPair()) << field_offset_; + + __ Bind(GetEntryLabel()); + + // Save the old reference. + // Note that we cannot use IP to save the old reference, as IP is + // used internally by the ReadBarrierMarkRegX entry point, and we + // need the old reference after the call to that entry point. + DCHECK_NE(temp1_, IP); + __ Mov(temp1_, ref_reg); + + // No need to save live registers; it's taken care of by the + // entrypoint. Also, there is no need to update the stack mask, + // as this runtime call will not trigger a garbage collection. + CodeGeneratorARM* arm_codegen = down_cast(codegen); + DCHECK_NE(ref_reg, SP); + DCHECK_NE(ref_reg, LR); + DCHECK_NE(ref_reg, PC); + // IP is used internally by the ReadBarrierMarkRegX entry point + // as a temporary, it cannot be the entry point's input/output. + DCHECK_NE(ref_reg, IP); + DCHECK(0 <= ref_reg && ref_reg < kNumberOfCoreRegisters) << ref_reg; + // "Compact" slow path, saving two moves. + // + // Instead of using the standard runtime calling convention (input + // and output in R0): + // + // R0 <- ref + // R0 <- ReadBarrierMark(R0) + // ref <- R0 + // + // we just use rX (the register containing `ref`) as input and output + // of a dedicated entrypoint: + // + // rX <- ReadBarrierMarkRegX(rX) + // + int32_t entry_point_offset = + CodeGenerator::GetReadBarrierMarkEntryPointsOffset(ref_reg); + // This runtime call does not require a stack map. + arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this); + + // If the new reference is different from the old reference, + // update the field in the holder (`*(obj_ + field_offset_)`). + // + // Note that this field could also hold a different object, if + // another thread had concurrently changed it. In that case, the + // LDREX/SUBS/ITNE sequence of instructions in the compare-and-set + // (CAS) operation below would abort the CAS, leaving the field + // as-is. + Label done; + __ cmp(temp1_, ShifterOperand(ref_reg)); + __ b(&done, EQ); + + // Update the the holder's field atomically. This may fail if + // mutator updates before us, but it's OK. This is achieved + // using a strong compare-and-set (CAS) operation with relaxed + // memory synchronization ordering, where the expected value is + // the old reference and the desired value is the new reference. + + // Convenience aliases. + Register base = obj_; + // The UnsafeCASObject intrinsic uses a register pair as field + // offset ("long offset"), of which only the low part contains + // data. + Register offset = field_offset_.AsRegisterPairLow(); + Register expected = temp1_; + Register value = ref_reg; + Register tmp_ptr = IP; // Pointer to actual memory. + Register tmp = temp2_; // Value in memory. + + __ add(tmp_ptr, base, ShifterOperand(offset)); + + if (kPoisonHeapReferences) { + __ PoisonHeapReference(expected); + if (value == expected) { + // Do not poison `value`, as it is the same register as + // `expected`, which has just been poisoned. + } else { + __ PoisonHeapReference(value); + } + } + + // do { + // tmp = [r_ptr] - expected; + // } while (tmp == 0 && failure([r_ptr] <- r_new_value)); + + Label loop_head; + __ Bind(&loop_head); + + __ ldrex(tmp, tmp_ptr); + + __ subs(tmp, tmp, ShifterOperand(expected)); + + __ it(EQ, ItState::kItT); + __ strex(tmp, value, tmp_ptr, EQ); + __ cmp(tmp, ShifterOperand(1), EQ); + + __ b(&loop_head, EQ); + + if (kPoisonHeapReferences) { + __ UnpoisonHeapReference(expected); + if (value == expected) { + // Do not unpoison `value`, as it is the same register as + // `expected`, which has just been unpoisoned. + } else { + __ UnpoisonHeapReference(value); + } + } + + __ Bind(&done); + __ b(GetExitLabel()); + } + + private: + // The location (register) of the marked object reference. + const Location ref_; + // The register containing the object holding the marked object reference field. + const Register obj_; + // The location of the offset of the marked reference field within `obj_`. + Location field_offset_; + + const Register temp1_; + const Register temp2_; + + DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkAndUpdateFieldSlowPathARM); +}; + // Slow path generating a read barrier for a heap reference. class ReadBarrierForHeapReferenceSlowPathARM : public SlowPathCodeARM { public: @@ -6644,7 +6822,9 @@ void CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i Location index, ScaleFactor scale_factor, Location temp, - bool needs_null_check) { + bool needs_null_check, + bool always_update_field, + Register* temp2) { DCHECK(kEmitCompilerReadBarrier); DCHECK(kUseBakerReadBarrier); @@ -6689,8 +6869,9 @@ void CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i // The actual reference load. if (index.IsValid()) { - // Load types involving an "index": ArrayGet and - // UnsafeGetObject/UnsafeGetObjectVolatile intrinsics. + // Load types involving an "index": ArrayGet, + // UnsafeGetObject/UnsafeGetObjectVolatile and UnsafeCASObject + // intrinsics. // /* HeapReference */ ref = *(obj + offset + (index << scale_factor)) if (index.IsConstant()) { size_t computed_offset = @@ -6698,9 +6879,9 @@ void CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i __ LoadFromOffset(kLoadWord, ref_reg, obj, computed_offset); } else { // Handle the special case of the - // UnsafeGetObject/UnsafeGetObjectVolatile intrinsics, which use - // a register pair as index ("long offset"), of which only the low - // part contains data. + // UnsafeGetObject/UnsafeGetObjectVolatile and UnsafeCASObject + // intrinsics, which use a register pair as index ("long + // offset"), of which only the low part contains data. Register index_reg = index.IsRegisterPair() ? index.AsRegisterPairLow() : index.AsRegister(); @@ -6716,8 +6897,21 @@ void CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i __ MaybeUnpoisonHeapReference(ref_reg); // Slow path marking the object `ref` when it is gray. - SlowPathCodeARM* slow_path = - new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM(instruction, ref); + SlowPathCodeARM* slow_path; + if (always_update_field) { + DCHECK(temp2 != nullptr); + // ReadBarrierMarkAndUpdateFieldSlowPathARM only supports address + // of the form `obj + field_offset`, where `obj` is a register and + // `field_offset` is a register pair (of which only the lower half + // is used). Thus `offset` and `scale_factor` above are expected + // to be null in this code path. + DCHECK_EQ(offset, 0u); + DCHECK_EQ(scale_factor, ScaleFactor::TIMES_1); + slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkAndUpdateFieldSlowPathARM( + instruction, ref, obj, /* field_offset */ index, temp_reg, *temp2); + } else { + slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM(instruction, ref); + } AddSlowPath(slow_path); // if (rb_state == ReadBarrier::gray_ptr_) diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h index 4d59b4786..c676d753c 100644 --- a/compiler/optimizing/code_generator_arm.h +++ b/compiler/optimizing/code_generator_arm.h @@ -508,6 +508,18 @@ class CodeGeneratorARM : public CodeGenerator { bool needs_null_check); // Factored implementation used by GenerateFieldLoadWithBakerReadBarrier // and GenerateArrayLoadWithBakerReadBarrier. + + // Factored implementation, used by GenerateFieldLoadWithBakerReadBarrier, + // GenerateArrayLoadWithBakerReadBarrier and some intrinsics. + // + // Load the object reference located at the address + // `obj + offset + (index << scale_factor)`, held by object `obj`, into + // `ref`, and mark it if needed. + // + // If `always_update_field` is true, the value of the reference is + // atomically updated in the holder (`obj`). This operation + // requires an extra temporary register, which must be provided as a + // non-null pointer (`temp2`). void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, Location ref, Register obj, @@ -515,7 +527,9 @@ class CodeGeneratorARM : public CodeGenerator { Location index, ScaleFactor scale_factor, Location temp, - bool needs_null_check); + bool needs_null_check, + bool always_update_field = false, + Register* temp2 = nullptr); // Generate a read barrier for a heap reference within `instruction` // using a slow path. diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index 9e59d8cc3..60d7faf73 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -589,11 +589,21 @@ void JumpTableARM64::EmitTable(CodeGeneratorARM64* codegen) { } } -// Slow path marking an object during a read barrier. +// Slow path marking an object reference `ref` during a read +// barrier. The field `obj.field` in the object `obj` holding this +// reference does not get updated by this slow path after marking (see +// ReadBarrierMarkAndUpdateFieldSlowPathARM64 below for that). +// +// This means that after the execution of this slow path, `ref` will +// always be up-to-date, but `obj.field` may not; i.e., after the +// flip, `ref` will be a to-space reference, but `obj.field` will +// probably still be a from-space reference (unless it gets updated by +// another thread, or if another thread installed another object +// reference (different from `ref`) in `obj.field`). class ReadBarrierMarkSlowPathARM64 : public SlowPathCodeARM64 { public: - ReadBarrierMarkSlowPathARM64(HInstruction* instruction, Location obj) - : SlowPathCodeARM64(instruction), obj_(obj) { + ReadBarrierMarkSlowPathARM64(HInstruction* instruction, Location ref) + : SlowPathCodeARM64(instruction), ref_(ref) { DCHECK(kEmitCompilerReadBarrier); } @@ -602,7 +612,8 @@ class ReadBarrierMarkSlowPathARM64 : public SlowPathCodeARM64 { void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { LocationSummary* locations = instruction_->GetLocations(); DCHECK(locations->CanCall()); - DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(obj_.reg())); + DCHECK(ref_.IsRegister()) << ref_; + DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_.reg())) << ref_.reg(); DCHECK(instruction_->IsInstanceFieldGet() || instruction_->IsStaticFieldGet() || instruction_->IsArrayGet() || @@ -621,40 +632,204 @@ class ReadBarrierMarkSlowPathARM64 : public SlowPathCodeARM64 { // entrypoint. Also, there is no need to update the stack mask, // as this runtime call will not trigger a garbage collection. CodeGeneratorARM64* arm64_codegen = down_cast(codegen); - DCHECK_NE(obj_.reg(), LR); - DCHECK_NE(obj_.reg(), WSP); - DCHECK_NE(obj_.reg(), WZR); + DCHECK_NE(ref_.reg(), LR); + DCHECK_NE(ref_.reg(), WSP); + DCHECK_NE(ref_.reg(), WZR); // IP0 is used internally by the ReadBarrierMarkRegX entry point // as a temporary, it cannot be the entry point's input/output. - DCHECK_NE(obj_.reg(), IP0); - DCHECK(0 <= obj_.reg() && obj_.reg() < kNumberOfWRegisters) << obj_.reg(); + DCHECK_NE(ref_.reg(), IP0); + DCHECK(0 <= ref_.reg() && ref_.reg() < kNumberOfWRegisters) << ref_.reg(); // "Compact" slow path, saving two moves. // // Instead of using the standard runtime calling convention (input // and output in W0): // - // W0 <- obj + // W0 <- ref // W0 <- ReadBarrierMark(W0) - // obj <- W0 + // ref <- W0 // - // we just use rX (the register holding `obj`) as input and output + // we just use rX (the register containing `ref`) as input and output // of a dedicated entrypoint: // // rX <- ReadBarrierMarkRegX(rX) // int32_t entry_point_offset = - CodeGenerator::GetReadBarrierMarkEntryPointsOffset(obj_.reg()); + CodeGenerator::GetReadBarrierMarkEntryPointsOffset(ref_.reg()); // This runtime call does not require a stack map. arm64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this); __ B(GetExitLabel()); } private: - const Location obj_; + // The location (register) of the marked object reference. + const Location ref_; DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathARM64); }; +// Slow path marking an object reference `ref` during a read barrier, +// and if needed, atomically updating the field `obj.field` in the +// object `obj` holding this reference after marking (contrary to +// ReadBarrierMarkSlowPathARM64 above, which never tries to update +// `obj.field`). +// +// This means that after the execution of this slow path, both `ref` +// and `obj.field` will be up-to-date; i.e., after the flip, both will +// hold the same to-space reference (unless another thread installed +// another object reference (different from `ref`) in `obj.field`). +class ReadBarrierMarkAndUpdateFieldSlowPathARM64 : public SlowPathCodeARM64 { + public: + ReadBarrierMarkAndUpdateFieldSlowPathARM64(HInstruction* instruction, + Location ref, + Register obj, + Location field_offset, + Register temp) + : SlowPathCodeARM64(instruction), + ref_(ref), + obj_(obj), + field_offset_(field_offset), + temp_(temp) { + DCHECK(kEmitCompilerReadBarrier); + } + + const char* GetDescription() const OVERRIDE { + return "ReadBarrierMarkAndUpdateFieldSlowPathARM64"; + } + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + LocationSummary* locations = instruction_->GetLocations(); + Register ref_reg = WRegisterFrom(ref_); + DCHECK(locations->CanCall()); + DCHECK(ref_.IsRegister()) << ref_; + DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_.reg())) << ref_.reg(); + // This slow path is only used by the UnsafeCASObject intrinsic. + DCHECK((instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified())) + << "Unexpected instruction in read barrier marking and field updating slow path: " + << instruction_->DebugName(); + DCHECK(instruction_->GetLocations()->Intrinsified()); + DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kUnsafeCASObject); + DCHECK(field_offset_.IsRegister()) << field_offset_; + + __ Bind(GetEntryLabel()); + + // Save the old reference. + // Note that we cannot use IP to save the old reference, as IP is + // used internally by the ReadBarrierMarkRegX entry point, and we + // need the old reference after the call to that entry point. + DCHECK_NE(LocationFrom(temp_).reg(), IP0); + __ Mov(temp_.W(), ref_reg); + + // No need to save live registers; it's taken care of by the + // entrypoint. Also, there is no need to update the stack mask, + // as this runtime call will not trigger a garbage collection. + CodeGeneratorARM64* arm64_codegen = down_cast(codegen); + DCHECK_NE(ref_.reg(), LR); + DCHECK_NE(ref_.reg(), WSP); + DCHECK_NE(ref_.reg(), WZR); + // IP0 is used internally by the ReadBarrierMarkRegX entry point + // as a temporary, it cannot be the entry point's input/output. + DCHECK_NE(ref_.reg(), IP0); + DCHECK(0 <= ref_.reg() && ref_.reg() < kNumberOfWRegisters) << ref_.reg(); + // "Compact" slow path, saving two moves. + // + // Instead of using the standard runtime calling convention (input + // and output in W0): + // + // W0 <- ref + // W0 <- ReadBarrierMark(W0) + // ref <- W0 + // + // we just use rX (the register containing `ref`) as input and output + // of a dedicated entrypoint: + // + // rX <- ReadBarrierMarkRegX(rX) + // + int32_t entry_point_offset = + CodeGenerator::GetReadBarrierMarkEntryPointsOffset(ref_.reg()); + // This runtime call does not require a stack map. + arm64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this); + + // If the new reference is different from the old reference, + // update the field in the holder (`*(obj_ + field_offset_)`). + // + // Note that this field could also hold a different object, if + // another thread had concurrently changed it. In that case, the + // LDXR/CMP/BNE sequence of instructions in the compare-and-set + // (CAS) operation below would abort the CAS, leaving the field + // as-is. + vixl::aarch64::Label done; + __ Cmp(temp_.W(), ref_reg); + __ B(eq, &done); + + // Update the the holder's field atomically. This may fail if + // mutator updates before us, but it's OK. This is achieved + // using a strong compare-and-set (CAS) operation with relaxed + // memory synchronization ordering, where the expected value is + // the old reference and the desired value is the new reference. + + MacroAssembler* masm = arm64_codegen->GetVIXLAssembler(); + UseScratchRegisterScope temps(masm); + + // Convenience aliases. + Register base = obj_.W(); + Register offset = XRegisterFrom(field_offset_); + Register expected = temp_.W(); + Register value = ref_reg; + Register tmp_ptr = temps.AcquireX(); // Pointer to actual memory. + Register tmp_value = temps.AcquireW(); // Value in memory. + + __ Add(tmp_ptr, base.X(), Operand(offset)); + + if (kPoisonHeapReferences) { + arm64_codegen->GetAssembler()->PoisonHeapReference(expected); + if (value.Is(expected)) { + // Do not poison `value`, as it is the same register as + // `expected`, which has just been poisoned. + } else { + arm64_codegen->GetAssembler()->PoisonHeapReference(value); + } + } + + // do { + // tmp_value = [tmp_ptr] - expected; + // } while (tmp_value == 0 && failure([tmp_ptr] <- r_new_value)); + + vixl::aarch64::Label loop_head, exit_loop; + __ Bind(&loop_head); + __ Ldxr(tmp_value, MemOperand(tmp_ptr)); + __ Cmp(tmp_value, expected); + __ B(&exit_loop, ne); + __ Stxr(tmp_value, value, MemOperand(tmp_ptr)); + __ Cbnz(tmp_value, &loop_head); + __ Bind(&exit_loop); + + if (kPoisonHeapReferences) { + arm64_codegen->GetAssembler()->UnpoisonHeapReference(expected); + if (value.Is(expected)) { + // Do not unpoison `value`, as it is the same register as + // `expected`, which has just been unpoisoned. + } else { + arm64_codegen->GetAssembler()->UnpoisonHeapReference(value); + } + } + + __ Bind(&done); + __ B(GetExitLabel()); + } + + private: + // The location (register) of the marked object reference. + const Location ref_; + // The register containing the object holding the marked object reference field. + const Register obj_; + // The location of the offset of the marked reference field within `obj_`. + Location field_offset_; + + const Register temp_; + + DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkAndUpdateFieldSlowPathARM64); +}; + // Slow path generating a read barrier for a heap reference. class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 { public: @@ -768,7 +943,7 @@ class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 { DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) || (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile)) << instruction_->AsInvoke()->GetIntrinsic(); - DCHECK_EQ(offset_, 0U); + DCHECK_EQ(offset_, 0u); DCHECK(index_.IsRegister()); } } @@ -5174,7 +5349,7 @@ void CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* ins // /* HeapReference */ ref = *(obj + offset) Location no_index = Location::NoLocation(); - size_t no_scale_factor = 0U; + size_t no_scale_factor = 0u; GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, @@ -5225,7 +5400,8 @@ void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* size_t scale_factor, Register temp, bool needs_null_check, - bool use_load_acquire) { + bool use_load_acquire, + bool always_update_field) { DCHECK(kEmitCompilerReadBarrier); DCHECK(kUseBakerReadBarrier); // If we are emitting an array load, we should not be using a @@ -5278,7 +5454,9 @@ void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* // The actual reference load. if (index.IsValid()) { - // Load types involving an "index". + // Load types involving an "index": ArrayGet, + // UnsafeGetObject/UnsafeGetObjectVolatile and UnsafeCASObject + // intrinsics. if (use_load_acquire) { // UnsafeGetObjectVolatile intrinsic case. // Register `index` is not an index in an object array, but an @@ -5287,9 +5465,9 @@ void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* DCHECK(instruction->GetLocations()->Intrinsified()); DCHECK(instruction->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile) << instruction->AsInvoke()->GetIntrinsic(); - DCHECK_EQ(offset, 0U); - DCHECK_EQ(scale_factor, 0U); - DCHECK_EQ(needs_null_check, 0U); + DCHECK_EQ(offset, 0u); + DCHECK_EQ(scale_factor, 0u); + DCHECK_EQ(needs_null_check, 0u); // /* HeapReference */ ref = *(obj + index) MemOperand field = HeapOperand(obj, XRegisterFrom(index)); LoadAcquire(instruction, ref_reg, field, /* needs_null_check */ false); @@ -5300,10 +5478,10 @@ void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* uint32_t computed_offset = offset + (Int64ConstantFrom(index) << scale_factor); Load(type, ref_reg, HeapOperand(obj, computed_offset)); } else { - Register temp2 = temps.AcquireW(); - __ Add(temp2, obj, offset); - Load(type, ref_reg, HeapOperand(temp2, XRegisterFrom(index), LSL, scale_factor)); - temps.Release(temp2); + Register temp3 = temps.AcquireW(); + __ Add(temp3, obj, offset); + Load(type, ref_reg, HeapOperand(temp3, XRegisterFrom(index), LSL, scale_factor)); + temps.Release(temp3); } } } else { @@ -5320,8 +5498,19 @@ void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* GetAssembler()->MaybeUnpoisonHeapReference(ref_reg); // Slow path marking the object `ref` when it is gray. - SlowPathCodeARM64* slow_path = - new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM64(instruction, ref); + SlowPathCodeARM64* slow_path; + if (always_update_field) { + // ReadBarrierMarkAndUpdateFieldSlowPathARM64 only supports + // address of the form `obj + field_offset`, where `obj` is a + // register and `field_offset` is a register. Thus `offset` and + // `scale_factor` above are expected to be null in this code path. + DCHECK_EQ(offset, 0u); + DCHECK_EQ(scale_factor, 0u); /* "times 1" */ + slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkAndUpdateFieldSlowPathARM64( + instruction, ref, obj, /* field_offset */ index, temp); + } else { + slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM64(instruction, ref); + } AddSlowPath(slow_path); // if (rb_state == ReadBarrier::gray_ptr_) diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h index eb28ecb42..3de46277c 100644 --- a/compiler/optimizing/code_generator_arm64.h +++ b/compiler/optimizing/code_generator_arm64.h @@ -594,6 +594,13 @@ class CodeGeneratorARM64 : public CodeGenerator { bool needs_null_check); // Factored implementation used by GenerateFieldLoadWithBakerReadBarrier // and GenerateArrayLoadWithBakerReadBarrier. + // + // Load the object reference located at the address + // `obj + offset + (index << scale_factor)`, held by object `obj`, into + // `ref`, and mark it if needed. + // + // If `always_update_field` is true, the value of the reference is + // atomically updated in the holder (`obj`). void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, Location ref, vixl::aarch64::Register obj, @@ -602,7 +609,8 @@ class CodeGeneratorARM64 : public CodeGenerator { size_t scale_factor, vixl::aarch64::Register temp, bool needs_null_check, - bool use_load_acquire); + bool use_load_acquire, + bool always_update_field = false); // Generate a read barrier for a heap reference within `instruction` // using a slow path. diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 02c1c3b69..015333dbe 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -426,11 +426,25 @@ class ArraySetSlowPathX86 : public SlowPathCode { DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathX86); }; -// Slow path marking an object during a read barrier. +// Slow path marking an object reference `ref` during a read +// barrier. The field `obj.field` in the object `obj` holding this +// reference does not get updated by this slow path after marking (see +// ReadBarrierMarkAndUpdateFieldSlowPathX86 below for that). +// +// This means that after the execution of this slow path, `ref` will +// always be up-to-date, but `obj.field` may not; i.e., after the +// flip, `ref` will be a to-space reference, but `obj.field` will +// probably still be a from-space reference (unless it gets updated by +// another thread, or if another thread installed another object +// reference (different from `ref`) in `obj.field`). class ReadBarrierMarkSlowPathX86 : public SlowPathCode { public: - ReadBarrierMarkSlowPathX86(HInstruction* instruction, Location obj, bool unpoison) - : SlowPathCode(instruction), obj_(obj), unpoison_(unpoison) { + ReadBarrierMarkSlowPathX86(HInstruction* instruction, + Location ref, + bool unpoison_ref_before_marking) + : SlowPathCode(instruction), + ref_(ref), + unpoison_ref_before_marking_(unpoison_ref_before_marking) { DCHECK(kEmitCompilerReadBarrier); } @@ -438,9 +452,9 @@ class ReadBarrierMarkSlowPathX86 : public SlowPathCode { void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { LocationSummary* locations = instruction_->GetLocations(); - Register reg = obj_.AsRegister(); + Register ref_reg = ref_.AsRegister(); DCHECK(locations->CanCall()); - DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg)); + DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg; DCHECK(instruction_->IsInstanceFieldGet() || instruction_->IsStaticFieldGet() || instruction_->IsArrayGet() || @@ -455,44 +469,211 @@ class ReadBarrierMarkSlowPathX86 : public SlowPathCode { << instruction_->DebugName(); __ Bind(GetEntryLabel()); - if (unpoison_) { + if (unpoison_ref_before_marking_) { // Object* ref = ref_addr->AsMirrorPtr() - __ MaybeUnpoisonHeapReference(reg); + __ MaybeUnpoisonHeapReference(ref_reg); } // No need to save live registers; it's taken care of by the // entrypoint. Also, there is no need to update the stack mask, // as this runtime call will not trigger a garbage collection. CodeGeneratorX86* x86_codegen = down_cast(codegen); - DCHECK_NE(reg, ESP); - DCHECK(0 <= reg && reg < kNumberOfCpuRegisters) << reg; + DCHECK_NE(ref_reg, ESP); + DCHECK(0 <= ref_reg && ref_reg < kNumberOfCpuRegisters) << ref_reg; // "Compact" slow path, saving two moves. // // Instead of using the standard runtime calling convention (input // and output in EAX): // - // EAX <- obj + // EAX <- ref // EAX <- ReadBarrierMark(EAX) - // obj <- EAX + // ref <- EAX // - // we just use rX (the register holding `obj`) as input and output + // we just use rX (the register containing `ref`) as input and output // of a dedicated entrypoint: // // rX <- ReadBarrierMarkRegX(rX) // int32_t entry_point_offset = - CodeGenerator::GetReadBarrierMarkEntryPointsOffset(reg); + CodeGenerator::GetReadBarrierMarkEntryPointsOffset(ref_reg); // This runtime call does not require a stack map. x86_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this); __ jmp(GetExitLabel()); } private: - const Location obj_; - const bool unpoison_; + // The location (register) of the marked object reference. + const Location ref_; + // Should the reference in `ref_` be unpoisoned prior to marking it? + const bool unpoison_ref_before_marking_; DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathX86); }; +// Slow path marking an object reference `ref` during a read barrier, +// and if needed, atomically updating the field `obj.field` in the +// object `obj` holding this reference after marking (contrary to +// ReadBarrierMarkSlowPathX86 above, which never tries to update +// `obj.field`). +// +// This means that after the execution of this slow path, both `ref` +// and `obj.field` will be up-to-date; i.e., after the flip, both will +// hold the same to-space reference (unless another thread installed +// another object reference (different from `ref`) in `obj.field`). +class ReadBarrierMarkAndUpdateFieldSlowPathX86 : public SlowPathCode { + public: + ReadBarrierMarkAndUpdateFieldSlowPathX86(HInstruction* instruction, + Location ref, + Register obj, + const Address& field_addr, + bool unpoison_ref_before_marking, + Register temp) + : SlowPathCode(instruction), + ref_(ref), + obj_(obj), + field_addr_(field_addr), + unpoison_ref_before_marking_(unpoison_ref_before_marking), + temp_(temp) { + DCHECK(kEmitCompilerReadBarrier); + } + + const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkAndUpdateFieldSlowPathX86"; } + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + LocationSummary* locations = instruction_->GetLocations(); + Register ref_reg = ref_.AsRegister(); + DCHECK(locations->CanCall()); + DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg; + // This slow path is only used by the UnsafeCASObject intrinsic. + DCHECK((instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified())) + << "Unexpected instruction in read barrier marking and field updating slow path: " + << instruction_->DebugName(); + DCHECK(instruction_->GetLocations()->Intrinsified()); + DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kUnsafeCASObject); + + __ Bind(GetEntryLabel()); + if (unpoison_ref_before_marking_) { + // Object* ref = ref_addr->AsMirrorPtr() + __ MaybeUnpoisonHeapReference(ref_reg); + } + + // Save the old (unpoisoned) reference. + __ movl(temp_, ref_reg); + + // No need to save live registers; it's taken care of by the + // entrypoint. Also, there is no need to update the stack mask, + // as this runtime call will not trigger a garbage collection. + CodeGeneratorX86* x86_codegen = down_cast(codegen); + DCHECK_NE(ref_reg, ESP); + DCHECK(0 <= ref_reg && ref_reg < kNumberOfCpuRegisters) << ref_reg; + // "Compact" slow path, saving two moves. + // + // Instead of using the standard runtime calling convention (input + // and output in EAX): + // + // EAX <- ref + // EAX <- ReadBarrierMark(EAX) + // ref <- EAX + // + // we just use rX (the register containing `ref`) as input and output + // of a dedicated entrypoint: + // + // rX <- ReadBarrierMarkRegX(rX) + // + int32_t entry_point_offset = + CodeGenerator::GetReadBarrierMarkEntryPointsOffset(ref_reg); + // This runtime call does not require a stack map. + x86_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this); + + // If the new reference is different from the old reference, + // update the field in the holder (`*field_addr`). + // + // Note that this field could also hold a different object, if + // another thread had concurrently changed it. In that case, the + // LOCK CMPXCHGL instruction in the compare-and-set (CAS) + // operation below would abort the CAS, leaving the field as-is. + NearLabel done; + __ cmpl(temp_, ref_reg); + __ j(kEqual, &done); + + // Update the the holder's field atomically. This may fail if + // mutator updates before us, but it's OK. This is achieved + // using a strong compare-and-set (CAS) operation with relaxed + // memory synchronization ordering, where the expected value is + // the old reference and the desired value is the new reference. + // This operation is implemented with a 32-bit LOCK CMPXLCHG + // instruction, which requires the expected value (the old + // reference) to be in EAX. Save EAX beforehand, and move the + // expected value (stored in `temp_`) into EAX. + __ pushl(EAX); + __ movl(EAX, temp_); + + // Convenience aliases. + Register base = obj_; + Register expected = EAX; + Register value = ref_reg; + + bool base_equals_value = (base == value); + if (kPoisonHeapReferences) { + if (base_equals_value) { + // If `base` and `value` are the same register location, move + // `value` to a temporary register. This way, poisoning + // `value` won't invalidate `base`. + value = temp_; + __ movl(value, base); + } + + // Check that the register allocator did not assign the location + // of `expected` (EAX) to `value` nor to `base`, so that heap + // poisoning (when enabled) works as intended below. + // - If `value` were equal to `expected`, both references would + // be poisoned twice, meaning they would not be poisoned at + // all, as heap poisoning uses address negation. + // - If `base` were equal to `expected`, poisoning `expected` + // would invalidate `base`. + DCHECK_NE(value, expected); + DCHECK_NE(base, expected); + + __ PoisonHeapReference(expected); + __ PoisonHeapReference(value); + } + + __ LockCmpxchgl(field_addr_, value); + + // If heap poisoning is enabled, we need to unpoison the values + // that were poisoned earlier. + if (kPoisonHeapReferences) { + if (base_equals_value) { + // `value` has been moved to a temporary register, no need + // to unpoison it. + } else { + __ UnpoisonHeapReference(value); + } + // No need to unpoison `expected` (EAX), as it is be overwritten below. + } + + // Restore EAX. + __ popl(EAX); + + __ Bind(&done); + __ jmp(GetExitLabel()); + } + + private: + // The location (register) of the marked object reference. + const Location ref_; + // The register containing the object holding the marked object reference field. + const Register obj_; + // The address of the marked reference field. The base of this address must be `obj_`. + const Address field_addr_; + + // Should the reference in `ref_` be unpoisoned prior to marking it? + const bool unpoison_ref_before_marking_; + + const Register temp_; + + DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkAndUpdateFieldSlowPathX86); +}; + // Slow path generating a read barrier for a heap reference. class ReadBarrierForHeapReferenceSlowPathX86 : public SlowPathCode { public: @@ -6831,7 +7012,7 @@ void InstructionCodeGeneratorX86::GenerateGcRootFieldLoad(HInstruction* instruct // Slow path marking the GC root `root`. SlowPathCode* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86( - instruction, root, /* unpoison */ false); + instruction, root, /* unpoison_ref_before_marking */ false); codegen_->AddSlowPath(slow_path); __ fs()->cmpl(Address::Absolute(Thread::IsGcMarkingOffset().Int32Value()), @@ -6896,7 +7077,9 @@ void CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i Location ref, Register obj, const Address& src, - bool needs_null_check) { + bool needs_null_check, + bool always_update_field, + Register* temp) { DCHECK(kEmitCompilerReadBarrier); DCHECK(kUseBakerReadBarrier); @@ -6953,8 +7136,15 @@ void CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i // Note: Reference unpoisoning modifies the flags, so we need to delay it after the branch. // Slow path marking the object `ref` when it is gray. - SlowPathCode* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86( - instruction, ref, /* unpoison */ true); + SlowPathCode* slow_path; + if (always_update_field) { + DCHECK(temp != nullptr); + slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkAndUpdateFieldSlowPathX86( + instruction, ref, obj, src, /* unpoison_ref_before_marking */ true, *temp); + } else { + slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86( + instruction, ref, /* unpoison_ref_before_marking */ true); + } AddSlowPath(slow_path); // We have done the "if" of the gray bit check above, now branch based on the flags. diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index e7d9a43f5..167017e0d 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -499,13 +499,24 @@ class CodeGeneratorX86 : public CodeGenerator { uint32_t data_offset, Location index, bool needs_null_check); - // Factored implementation used by GenerateFieldLoadWithBakerReadBarrier - // and GenerateArrayLoadWithBakerReadBarrier. + // Factored implementation, used by GenerateFieldLoadWithBakerReadBarrier, + // GenerateArrayLoadWithBakerReadBarrier and some intrinsics. + // + // Load the object reference located at address `src`, held by + // object `obj`, into `ref`, and mark it if needed. The base of + // address `src` must be `obj`. + // + // If `always_update_field` is true, the value of the reference is + // atomically updated in the holder (`obj`). This operation + // requires a temporary register, which must be provided as a + // non-null pointer (`temp`). void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, Location ref, Register obj, const Address& src, - bool needs_null_check); + bool needs_null_check, + bool always_update_field = false, + Register* temp = nullptr); // Generate a read barrier for a heap reference within `instruction` // using a slow path. diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index 4b64c1b6f..22325047e 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -445,11 +445,25 @@ class ArraySetSlowPathX86_64 : public SlowPathCode { DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathX86_64); }; -// Slow path marking an object during a read barrier. +// Slow path marking an object reference `ref` during a read +// barrier. The field `obj.field` in the object `obj` holding this +// reference does not get updated by this slow path after marking (see +// ReadBarrierMarkAndUpdateFieldSlowPathX86_64 below for that). +// +// This means that after the execution of this slow path, `ref` will +// always be up-to-date, but `obj.field` may not; i.e., after the +// flip, `ref` will be a to-space reference, but `obj.field` will +// probably still be a from-space reference (unless it gets updated by +// another thread, or if another thread installed another object +// reference (different from `ref`) in `obj.field`). class ReadBarrierMarkSlowPathX86_64 : public SlowPathCode { public: - ReadBarrierMarkSlowPathX86_64(HInstruction* instruction, Location obj, bool unpoison) - : SlowPathCode(instruction), obj_(obj), unpoison_(unpoison) { + ReadBarrierMarkSlowPathX86_64(HInstruction* instruction, + Location ref, + bool unpoison_ref_before_marking) + : SlowPathCode(instruction), + ref_(ref), + unpoison_ref_before_marking_(unpoison_ref_before_marking) { DCHECK(kEmitCompilerReadBarrier); } @@ -457,10 +471,10 @@ class ReadBarrierMarkSlowPathX86_64 : public SlowPathCode { void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { LocationSummary* locations = instruction_->GetLocations(); - CpuRegister cpu_reg = obj_.AsRegister(); - Register reg = cpu_reg.AsRegister(); + CpuRegister ref_cpu_reg = ref_.AsRegister(); + Register ref_reg = ref_cpu_reg.AsRegister(); DCHECK(locations->CanCall()); - DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg)); + DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg; DCHECK(instruction_->IsInstanceFieldGet() || instruction_->IsStaticFieldGet() || instruction_->IsArrayGet() || @@ -475,44 +489,218 @@ class ReadBarrierMarkSlowPathX86_64 : public SlowPathCode { << instruction_->DebugName(); __ Bind(GetEntryLabel()); - if (unpoison_) { + if (unpoison_ref_before_marking_) { // Object* ref = ref_addr->AsMirrorPtr() - __ MaybeUnpoisonHeapReference(cpu_reg); + __ MaybeUnpoisonHeapReference(ref_cpu_reg); } // No need to save live registers; it's taken care of by the // entrypoint. Also, there is no need to update the stack mask, // as this runtime call will not trigger a garbage collection. CodeGeneratorX86_64* x86_64_codegen = down_cast(codegen); - DCHECK_NE(reg, RSP); - DCHECK(0 <= reg && reg < kNumberOfCpuRegisters) << reg; + DCHECK_NE(ref_reg, RSP); + DCHECK(0 <= ref_reg && ref_reg < kNumberOfCpuRegisters) << ref_reg; // "Compact" slow path, saving two moves. // // Instead of using the standard runtime calling convention (input // and output in R0): // - // RDI <- obj + // RDI <- ref // RAX <- ReadBarrierMark(RDI) - // obj <- RAX + // ref <- RAX // - // we just use rX (the register holding `obj`) as input and output + // we just use rX (the register containing `ref`) as input and output // of a dedicated entrypoint: // // rX <- ReadBarrierMarkRegX(rX) // int32_t entry_point_offset = - CodeGenerator::GetReadBarrierMarkEntryPointsOffset(reg); + CodeGenerator::GetReadBarrierMarkEntryPointsOffset(ref_reg); // This runtime call does not require a stack map. x86_64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this); __ jmp(GetExitLabel()); } private: - const Location obj_; - const bool unpoison_; + // The location (register) of the marked object reference. + const Location ref_; + // Should the reference in `ref_` be unpoisoned prior to marking it? + const bool unpoison_ref_before_marking_; DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathX86_64); }; +// Slow path marking an object reference `ref` during a read barrier, +// and if needed, atomically updating the field `obj.field` in the +// object `obj` holding this reference after marking (contrary to +// ReadBarrierMarkSlowPathX86_64 above, which never tries to update +// `obj.field`). +// +// This means that after the execution of this slow path, both `ref` +// and `obj.field` will be up-to-date; i.e., after the flip, both will +// hold the same to-space reference (unless another thread installed +// another object reference (different from `ref`) in `obj.field`). +class ReadBarrierMarkAndUpdateFieldSlowPathX86_64 : public SlowPathCode { + public: + ReadBarrierMarkAndUpdateFieldSlowPathX86_64(HInstruction* instruction, + Location ref, + CpuRegister obj, + const Address& field_addr, + bool unpoison_ref_before_marking, + CpuRegister temp1, + CpuRegister temp2) + : SlowPathCode(instruction), + ref_(ref), + obj_(obj), + field_addr_(field_addr), + unpoison_ref_before_marking_(unpoison_ref_before_marking), + temp1_(temp1), + temp2_(temp2) { + DCHECK(kEmitCompilerReadBarrier); + } + + const char* GetDescription() const OVERRIDE { + return "ReadBarrierMarkAndUpdateFieldSlowPathX86_64"; + } + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + LocationSummary* locations = instruction_->GetLocations(); + CpuRegister ref_cpu_reg = ref_.AsRegister(); + Register ref_reg = ref_cpu_reg.AsRegister(); + DCHECK(locations->CanCall()); + DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg; + // This slow path is only used by the UnsafeCASObject intrinsic. + DCHECK((instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified())) + << "Unexpected instruction in read barrier marking and field updating slow path: " + << instruction_->DebugName(); + DCHECK(instruction_->GetLocations()->Intrinsified()); + DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kUnsafeCASObject); + + __ Bind(GetEntryLabel()); + if (unpoison_ref_before_marking_) { + // Object* ref = ref_addr->AsMirrorPtr() + __ MaybeUnpoisonHeapReference(ref_cpu_reg); + } + + // Save the old (unpoisoned) reference. + __ movl(temp1_, ref_cpu_reg); + + // No need to save live registers; it's taken care of by the + // entrypoint. Also, there is no need to update the stack mask, + // as this runtime call will not trigger a garbage collection. + CodeGeneratorX86_64* x86_64_codegen = down_cast(codegen); + DCHECK_NE(ref_reg, RSP); + DCHECK(0 <= ref_reg && ref_reg < kNumberOfCpuRegisters) << ref_reg; + // "Compact" slow path, saving two moves. + // + // Instead of using the standard runtime calling convention (input + // and output in R0): + // + // RDI <- ref + // RAX <- ReadBarrierMark(RDI) + // ref <- RAX + // + // we just use rX (the register containing `ref`) as input and output + // of a dedicated entrypoint: + // + // rX <- ReadBarrierMarkRegX(rX) + // + int32_t entry_point_offset = + CodeGenerator::GetReadBarrierMarkEntryPointsOffset(ref_reg); + // This runtime call does not require a stack map. + x86_64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this); + + // If the new reference is different from the old reference, + // update the field in the holder (`*field_addr`). + // + // Note that this field could also hold a different object, if + // another thread had concurrently changed it. In that case, the + // LOCK CMPXCHGL instruction in the compare-and-set (CAS) + // operation below would abort the CAS, leaving the field as-is. + NearLabel done; + __ cmpl(temp1_, ref_cpu_reg); + __ j(kEqual, &done); + + // Update the the holder's field atomically. This may fail if + // mutator updates before us, but it's OK. This is achived + // using a strong compare-and-set (CAS) operation with relaxed + // memory synchronization ordering, where the expected value is + // the old reference and the desired value is the new reference. + // This operation is implemented with a 32-bit LOCK CMPXLCHG + // instruction, which requires the expected value (the old + // reference) to be in EAX. Save RAX beforehand, and move the + // expected value (stored in `temp1_`) into EAX. + __ movq(temp2_, CpuRegister(RAX)); + __ movl(CpuRegister(RAX), temp1_); + + // Convenience aliases. + CpuRegister base = obj_; + CpuRegister expected = CpuRegister(RAX); + CpuRegister value = ref_cpu_reg; + + bool base_equals_value = (base.AsRegister() == value.AsRegister()); + Register value_reg = ref_reg; + if (kPoisonHeapReferences) { + if (base_equals_value) { + // If `base` and `value` are the same register location, move + // `value_reg` to a temporary register. This way, poisoning + // `value_reg` won't invalidate `base`. + value_reg = temp1_.AsRegister(); + __ movl(CpuRegister(value_reg), base); + } + + // Check that the register allocator did not assign the location + // of `expected` (RAX) to `value` nor to `base`, so that heap + // poisoning (when enabled) works as intended below. + // - If `value` were equal to `expected`, both references would + // be poisoned twice, meaning they would not be poisoned at + // all, as heap poisoning uses address negation. + // - If `base` were equal to `expected`, poisoning `expected` + // would invalidate `base`. + DCHECK_NE(value_reg, expected.AsRegister()); + DCHECK_NE(base.AsRegister(), expected.AsRegister()); + + __ PoisonHeapReference(expected); + __ PoisonHeapReference(CpuRegister(value_reg)); + } + + __ LockCmpxchgl(field_addr_, CpuRegister(value_reg)); + + // If heap poisoning is enabled, we need to unpoison the values + // that were poisoned earlier. + if (kPoisonHeapReferences) { + if (base_equals_value) { + // `value_reg` has been moved to a temporary register, no need + // to unpoison it. + } else { + __ UnpoisonHeapReference(CpuRegister(value_reg)); + } + // No need to unpoison `expected` (RAX), as it is be overwritten below. + } + + // Restore RAX. + __ movq(CpuRegister(RAX), temp2_); + + __ Bind(&done); + __ jmp(GetExitLabel()); + } + + private: + // The location (register) of the marked object reference. + const Location ref_; + // The register containing the object holding the marked object reference field. + const CpuRegister obj_; + // The address of the marked reference field. The base of this address must be `obj_`. + const Address field_addr_; + + // Should the reference in `ref_` be unpoisoned prior to marking it? + const bool unpoison_ref_before_marking_; + + const CpuRegister temp1_; + const CpuRegister temp2_; + + DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkAndUpdateFieldSlowPathX86_64); +}; + // Slow path generating a read barrier for a heap reference. class ReadBarrierForHeapReferenceSlowPathX86_64 : public SlowPathCode { public: @@ -4122,7 +4310,7 @@ void InstructionCodeGeneratorX86_64::HandleFieldGet(HInstruction* instruction, // /* HeapReference */ out = *(base + offset) if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { // Note that a potential implicit null check is handled in this - // CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier call. + // CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier call. codegen_->GenerateFieldLoadWithBakerReadBarrier( instruction, out, base, offset, /* needs_null_check */ true); if (is_volatile) { @@ -4569,7 +4757,7 @@ void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) { // *(obj + data_offset + index * sizeof(HeapReference)) if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { // Note that a potential implicit null check is handled in this - // CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier call. + // CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier call. codegen_->GenerateArrayLoadWithBakerReadBarrier( instruction, out_loc, obj, data_offset, index, /* needs_null_check */ true); } else { @@ -6264,7 +6452,7 @@ void InstructionCodeGeneratorX86_64::GenerateGcRootFieldLoad(HInstruction* instr // Slow path marking the GC root `root`. SlowPathCode* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86_64( - instruction, root, /* unpoison */ false); + instruction, root, /* unpoison_ref_before_marking */ false); codegen_->AddSlowPath(slow_path); __ gs()->cmpl(Address::Absolute(Thread::IsGcMarkingOffset().Int32Value(), @@ -6330,7 +6518,10 @@ void CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction Location ref, CpuRegister obj, const Address& src, - bool needs_null_check) { + bool needs_null_check, + bool always_update_field, + CpuRegister* temp1, + CpuRegister* temp2) { DCHECK(kEmitCompilerReadBarrier); DCHECK(kUseBakerReadBarrier); @@ -6387,8 +6578,16 @@ void CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction // Note: Reference unpoisoning modifies the flags, so we need to delay it after the branch. // Slow path marking the object `ref` when it is gray. - SlowPathCode* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86_64( - instruction, ref, /* unpoison */ true); + SlowPathCode* slow_path; + if (always_update_field) { + DCHECK(temp1 != nullptr); + DCHECK(temp2 != nullptr); + slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkAndUpdateFieldSlowPathX86_64( + instruction, ref, obj, src, /* unpoison_ref_before_marking */ true, *temp1, *temp2); + } else { + slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86_64( + instruction, ref, /* unpoison_ref_before_marking */ true); + } AddSlowPath(slow_path); // We have done the "if" of the gray bit check above, now branch based on the flags. diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index 57ef83f62..70e22f96f 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -434,13 +434,25 @@ class CodeGeneratorX86_64 : public CodeGenerator { uint32_t data_offset, Location index, bool needs_null_check); - // Factored implementation used by GenerateFieldLoadWithBakerReadBarrier - // and GenerateArrayLoadWithBakerReadBarrier. + // Factored implementation, used by GenerateFieldLoadWithBakerReadBarrier, + // GenerateArrayLoadWithBakerReadBarrier and some intrinsics. + // + // Load the object reference located at address `src`, held by + // object `obj`, into `ref`, and mark it if needed. The base of + // address `src` must be `obj`. + // + // If `always_update_field` is true, the value of the reference is + // atomically updated in the holder (`obj`). This operation + // requires two temporary registers, which must be provided as + // non-null pointers (`temp1` and `temp2`). void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, Location ref, CpuRegister obj, const Address& src, - bool needs_null_check); + bool needs_null_check, + bool always_update_field = false, + CpuRegister* temp1 = nullptr, + CpuRegister* temp2 = nullptr); // Generate a read barrier for a heap reference within `instruction` // using a slow path. diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc index 96a6ecbee..8790c1e4f 100644 --- a/compiler/optimizing/intrinsics_arm.cc +++ b/compiler/optimizing/intrinsics_arm.cc @@ -652,9 +652,9 @@ static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject || invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile); LocationSummary* locations = new (arena) LocationSummary(invoke, - can_call ? - LocationSummary::kCallOnSlowPath : - LocationSummary::kNoCall, + (can_call + ? LocationSummary::kCallOnSlowPath + : LocationSummary::kNoCall), kIntrinsified); if (can_call && kUseBakerReadBarrier) { locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. @@ -663,7 +663,7 @@ static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, locations->SetInAt(1, Location::RequiresRegister()); locations->SetInAt(2, Location::RequiresRegister()); locations->SetOut(Location::RequiresRegister(), - can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap); + (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap)); if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) { // We need a temporary register for the read barrier marking slow // path in InstructionCodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier. @@ -891,8 +891,13 @@ void IntrinsicCodeGeneratorARM::VisitUnsafePutLongVolatile(HInvoke* invoke) { static void CreateIntIntIntIntIntToIntPlusTemps(ArenaAllocator* arena, HInvoke* invoke, Primitive::Type type) { + bool can_call = kEmitCompilerReadBarrier && + kUseBakerReadBarrier && + (invoke->GetIntrinsic() == Intrinsics::kUnsafeCASObject); LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kNoCall, + (can_call + ? LocationSummary::kCallOnSlowPath + : LocationSummary::kNoCall), kIntrinsified); locations->SetInAt(0, Location::NoLocation()); // Unused receiver. locations->SetInAt(1, Location::RequiresRegister()); @@ -901,36 +906,65 @@ static void CreateIntIntIntIntIntToIntPlusTemps(ArenaAllocator* arena, locations->SetInAt(4, Location::RequiresRegister()); // If heap poisoning is enabled, we don't want the unpoisoning - // operations to potentially clobber the output. - Location::OutputOverlap overlaps = (kPoisonHeapReferences && type == Primitive::kPrimNot) + // operations to potentially clobber the output. Likewise when + // emitting a (Baker) read barrier, which may call. + Location::OutputOverlap overlaps = + ((kPoisonHeapReferences && type == Primitive::kPrimNot) || can_call) ? Location::kOutputOverlap : Location::kNoOutputOverlap; locations->SetOut(Location::RequiresRegister(), overlaps); + // Temporary registers used in CAS. In the object case + // (UnsafeCASObject intrinsic), these are also used for + // card-marking, and possibly for (Baker) read barrier. locations->AddTemp(Location::RequiresRegister()); // Pointer. locations->AddTemp(Location::RequiresRegister()); // Temp 1. } -static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGeneratorARM* codegen) { +static void GenCas(HInvoke* invoke, Primitive::Type type, CodeGeneratorARM* codegen) { DCHECK_NE(type, Primitive::kPrimLong); ArmAssembler* assembler = codegen->GetAssembler(); + LocationSummary* locations = invoke->GetLocations(); - Register out = locations->Out().AsRegister(); // Boolean result. + Location out_loc = locations->Out(); + Register out = out_loc.AsRegister(); // Boolean result. - Register base = locations->InAt(1).AsRegister(); // Object pointer. - Register offset = locations->InAt(2).AsRegisterPairLow(); // Offset (discard high 4B). - Register expected_lo = locations->InAt(3).AsRegister(); // Expected. - Register value_lo = locations->InAt(4).AsRegister(); // Value. + Register base = locations->InAt(1).AsRegister(); // Object pointer. + Location offset_loc = locations->InAt(2); + Register offset = offset_loc.AsRegisterPairLow(); // Offset (discard high 4B). + Register expected = locations->InAt(3).AsRegister(); // Expected. + Register value = locations->InAt(4).AsRegister(); // Value. - Register tmp_ptr = locations->GetTemp(0).AsRegister(); // Pointer to actual memory. - Register tmp_lo = locations->GetTemp(1).AsRegister(); // Value in memory. + Location tmp_ptr_loc = locations->GetTemp(0); + Register tmp_ptr = tmp_ptr_loc.AsRegister(); // Pointer to actual memory. + Register tmp = locations->GetTemp(1).AsRegister(); // Value in memory. if (type == Primitive::kPrimNot) { + // The only read barrier implementation supporting the + // UnsafeCASObject intrinsic is the Baker-style read barriers. + DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier); + // Mark card for object assuming new value is stored. Worst case we will mark an unchanged // object and scan the receiver at the next GC for nothing. bool value_can_be_null = true; // TODO: Worth finding out this information? - codegen->MarkGCCard(tmp_ptr, tmp_lo, base, value_lo, value_can_be_null); + codegen->MarkGCCard(tmp_ptr, tmp, base, value, value_can_be_null); + + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + // Need to make sure the reference stored in the field is a to-space + // one before attempting the CAS or the CAS could fail incorrectly. + codegen->GenerateReferenceLoadWithBakerReadBarrier( + invoke, + out_loc, // Unused, used only as a "temporary" within the read barrier. + base, + /* offset */ 0u, + /* index */ offset_loc, + ScaleFactor::TIMES_1, + tmp_ptr_loc, + /* needs_null_check */ false, + /* always_update_field */ true, + &tmp); + } } // Prevent reordering with prior memory operations. @@ -942,12 +976,12 @@ static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGenerat __ add(tmp_ptr, base, ShifterOperand(offset)); if (kPoisonHeapReferences && type == Primitive::kPrimNot) { - codegen->GetAssembler()->PoisonHeapReference(expected_lo); - if (value_lo == expected_lo) { - // Do not poison `value_lo`, as it is the same register as - // `expected_lo`, which has just been poisoned. + __ PoisonHeapReference(expected); + if (value == expected) { + // Do not poison `value`, as it is the same register as + // `expected`, which has just been poisoned. } else { - codegen->GetAssembler()->PoisonHeapReference(value_lo); + __ PoisonHeapReference(value); } } @@ -959,37 +993,29 @@ static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGenerat Label loop_head; __ Bind(&loop_head); - // TODO: When `type == Primitive::kPrimNot`, add a read barrier for - // the reference stored in the object before attempting the CAS, - // similar to the one in the art::Unsafe_compareAndSwapObject JNI - // implementation. - // - // Note that this code is not (yet) used when read barriers are - // enabled (see IntrinsicLocationsBuilderARM::VisitUnsafeCASObject). - DCHECK(!(type == Primitive::kPrimNot && kEmitCompilerReadBarrier)); - __ ldrex(tmp_lo, tmp_ptr); + __ ldrex(tmp, tmp_ptr); - __ subs(tmp_lo, tmp_lo, ShifterOperand(expected_lo)); + __ subs(tmp, tmp, ShifterOperand(expected)); __ it(EQ, ItState::kItT); - __ strex(tmp_lo, value_lo, tmp_ptr, EQ); - __ cmp(tmp_lo, ShifterOperand(1), EQ); + __ strex(tmp, value, tmp_ptr, EQ); + __ cmp(tmp, ShifterOperand(1), EQ); __ b(&loop_head, EQ); __ dmb(ISH); - __ rsbs(out, tmp_lo, ShifterOperand(1)); + __ rsbs(out, tmp, ShifterOperand(1)); __ it(CC); __ mov(out, ShifterOperand(0), CC); if (kPoisonHeapReferences && type == Primitive::kPrimNot) { - codegen->GetAssembler()->UnpoisonHeapReference(expected_lo); - if (value_lo == expected_lo) { - // Do not unpoison `value_lo`, as it is the same register as - // `expected_lo`, which has just been unpoisoned. + __ UnpoisonHeapReference(expected); + if (value == expected) { + // Do not unpoison `value`, as it is the same register as + // `expected`, which has just been unpoisoned. } else { - codegen->GetAssembler()->UnpoisonHeapReference(value_lo); + __ UnpoisonHeapReference(value); } } } @@ -998,33 +1024,23 @@ void IntrinsicLocationsBuilderARM::VisitUnsafeCASInt(HInvoke* invoke) { CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke, Primitive::kPrimInt); } void IntrinsicLocationsBuilderARM::VisitUnsafeCASObject(HInvoke* invoke) { - // The UnsafeCASObject intrinsic is missing a read barrier, and - // therefore sometimes does not work as expected (b/25883050). - // Turn it off temporarily as a quick fix, until the read barrier is - // implemented (see TODO in GenCAS). - // - // TODO(rpl): Implement read barrier support in GenCAS and re-enable - // this intrinsic. - if (kEmitCompilerReadBarrier) { + // The only read barrier implementation supporting the + // UnsafeCASObject intrinsic is the Baker-style read barriers. + if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) { return; } CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke, Primitive::kPrimNot); } void IntrinsicCodeGeneratorARM::VisitUnsafeCASInt(HInvoke* invoke) { - GenCas(invoke->GetLocations(), Primitive::kPrimInt, codegen_); + GenCas(invoke, Primitive::kPrimInt, codegen_); } void IntrinsicCodeGeneratorARM::VisitUnsafeCASObject(HInvoke* invoke) { - // The UnsafeCASObject intrinsic is missing a read barrier, and - // therefore sometimes does not work as expected (b/25883050). - // Turn it off temporarily as a quick fix, until the read barrier is - // implemented (see TODO in GenCAS). - // - // TODO(rpl): Implement read barrier support in GenCAS and re-enable - // this intrinsic. - DCHECK(!kEmitCompilerReadBarrier); + // The only read barrier implementation supporting the + // UnsafeCASObject intrinsic is the Baker-style read barriers. + DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier); - GenCas(invoke->GetLocations(), Primitive::kPrimNot, codegen_); + GenCas(invoke, Primitive::kPrimNot, codegen_); } void IntrinsicLocationsBuilderARM::VisitStringCompareTo(HInvoke* invoke) { diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc index e2c1802fd..db1c02286 100644 --- a/compiler/optimizing/intrinsics_arm64.cc +++ b/compiler/optimizing/intrinsics_arm64.cc @@ -863,9 +863,9 @@ static void GenUnsafeGet(HInvoke* invoke, codegen->GenerateReferenceLoadWithBakerReadBarrier(invoke, trg_loc, base, - /* offset */ 0U, + /* offset */ 0u, /* index */ offset_loc, - /* scale_factor */ 0U, + /* scale_factor */ 0u, temp, /* needs_null_check */ false, is_volatile); @@ -880,7 +880,7 @@ static void GenUnsafeGet(HInvoke* invoke, if (type == Primitive::kPrimNot) { DCHECK(trg.IsW()); - codegen->MaybeGenerateReadBarrierSlow(invoke, trg_loc, trg_loc, base_loc, 0U, offset_loc); + codegen->MaybeGenerateReadBarrierSlow(invoke, trg_loc, trg_loc, base_loc, 0u, offset_loc); } } } @@ -890,9 +890,9 @@ static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject || invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile); LocationSummary* locations = new (arena) LocationSummary(invoke, - can_call ? - LocationSummary::kCallOnSlowPath : - LocationSummary::kNoCall, + (can_call + ? LocationSummary::kCallOnSlowPath + : LocationSummary::kNoCall), kIntrinsified); if (can_call && kUseBakerReadBarrier) { locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. @@ -901,7 +901,7 @@ static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke locations->SetInAt(1, Location::RequiresRegister()); locations->SetInAt(2, Location::RequiresRegister()); locations->SetOut(Location::RequiresRegister(), - can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap); + (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap)); } void IntrinsicLocationsBuilderARM64::VisitUnsafeGet(HInvoke* invoke) { @@ -1086,8 +1086,13 @@ void IntrinsicCodeGeneratorARM64::VisitUnsafePutLongVolatile(HInvoke* invoke) { static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, HInvoke* invoke, Primitive::Type type) { + bool can_call = kEmitCompilerReadBarrier && + kUseBakerReadBarrier && + (invoke->GetIntrinsic() == Intrinsics::kUnsafeCASObject); LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kNoCall, + (can_call + ? LocationSummary::kCallOnSlowPath + : LocationSummary::kNoCall), kIntrinsified); locations->SetInAt(0, Location::NoLocation()); // Unused receiver. locations->SetInAt(1, Location::RequiresRegister()); @@ -1096,20 +1101,29 @@ static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, locations->SetInAt(4, Location::RequiresRegister()); // If heap poisoning is enabled, we don't want the unpoisoning - // operations to potentially clobber the output. - Location::OutputOverlap overlaps = (kPoisonHeapReferences && type == Primitive::kPrimNot) + // operations to potentially clobber the output. Likewise when + // emitting a (Baker) read barrier, which may call. + Location::OutputOverlap overlaps = + ((kPoisonHeapReferences && type == Primitive::kPrimNot) || can_call) ? Location::kOutputOverlap : Location::kNoOutputOverlap; locations->SetOut(Location::RequiresRegister(), overlaps); + if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + // Temporary register for (Baker) read barrier. + locations->AddTemp(Location::RequiresRegister()); + } } -static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGeneratorARM64* codegen) { +static void GenCas(HInvoke* invoke, Primitive::Type type, CodeGeneratorARM64* codegen) { MacroAssembler* masm = codegen->GetVIXLAssembler(); + LocationSummary* locations = invoke->GetLocations(); - Register out = WRegisterFrom(locations->Out()); // Boolean result. + Location out_loc = locations->Out(); + Register out = WRegisterFrom(out_loc); // Boolean result. Register base = WRegisterFrom(locations->InAt(1)); // Object pointer. - Register offset = XRegisterFrom(locations->InAt(2)); // Long offset. + Location offset_loc = locations->InAt(2); + Register offset = XRegisterFrom(offset_loc); // Long offset. Register expected = RegisterFrom(locations->InAt(3), type); // Expected. Register value = RegisterFrom(locations->InAt(4), type); // Value. @@ -1118,6 +1132,27 @@ static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGenerat // Mark card for object assuming new value is stored. bool value_can_be_null = true; // TODO: Worth finding out this information? codegen->MarkGCCard(base, value, value_can_be_null); + + // The only read barrier implementation supporting the + // UnsafeCASObject intrinsic is the Baker-style read barriers. + DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier); + + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + Register temp = WRegisterFrom(locations->GetTemp(0)); + // Need to make sure the reference stored in the field is a to-space + // one before attempting the CAS or the CAS could fail incorrectly. + codegen->GenerateReferenceLoadWithBakerReadBarrier( + invoke, + out_loc, // Unused, used only as a "temporary" within the read barrier. + base, + /* offset */ 0u, + /* index */ offset_loc, + /* scale_factor */ 0u, + temp, + /* needs_null_check */ false, + /* use_load_acquire */ false, + /* always_update_field */ true); + } } UseScratchRegisterScope temps(masm); @@ -1145,14 +1180,6 @@ static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGenerat vixl::aarch64::Label loop_head, exit_loop; __ Bind(&loop_head); - // TODO: When `type == Primitive::kPrimNot`, add a read barrier for - // the reference stored in the object before attempting the CAS, - // similar to the one in the art::Unsafe_compareAndSwapObject JNI - // implementation. - // - // Note that this code is not (yet) used when read barriers are - // enabled (see IntrinsicLocationsBuilderARM64::VisitUnsafeCASObject). - DCHECK(!(type == Primitive::kPrimNot && kEmitCompilerReadBarrier)); __ Ldaxr(tmp_value, MemOperand(tmp_ptr)); __ Cmp(tmp_value, expected); __ B(&exit_loop, ne); @@ -1179,14 +1206,9 @@ void IntrinsicLocationsBuilderARM64::VisitUnsafeCASLong(HInvoke* invoke) { CreateIntIntIntIntIntToInt(arena_, invoke, Primitive::kPrimLong); } void IntrinsicLocationsBuilderARM64::VisitUnsafeCASObject(HInvoke* invoke) { - // The UnsafeCASObject intrinsic is missing a read barrier, and - // therefore sometimes does not work as expected (b/25883050). - // Turn it off temporarily as a quick fix, until the read barrier is - // implemented (see TODO in GenCAS). - // - // TODO(rpl): Implement read barrier support in GenCAS and re-enable - // this intrinsic. - if (kEmitCompilerReadBarrier) { + // The only read barrier implementation supporting the + // UnsafeCASObject intrinsic is the Baker-style read barriers. + if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) { return; } @@ -1194,22 +1216,17 @@ void IntrinsicLocationsBuilderARM64::VisitUnsafeCASObject(HInvoke* invoke) { } void IntrinsicCodeGeneratorARM64::VisitUnsafeCASInt(HInvoke* invoke) { - GenCas(invoke->GetLocations(), Primitive::kPrimInt, codegen_); + GenCas(invoke, Primitive::kPrimInt, codegen_); } void IntrinsicCodeGeneratorARM64::VisitUnsafeCASLong(HInvoke* invoke) { - GenCas(invoke->GetLocations(), Primitive::kPrimLong, codegen_); + GenCas(invoke, Primitive::kPrimLong, codegen_); } void IntrinsicCodeGeneratorARM64::VisitUnsafeCASObject(HInvoke* invoke) { - // The UnsafeCASObject intrinsic is missing a read barrier, and - // therefore sometimes does not work as expected (b/25883050). - // Turn it off temporarily as a quick fix, until the read barrier is - // implemented (see TODO in GenCAS). - // - // TODO(rpl): Implement read barrier support in GenCAS and re-enable - // this intrinsic. - DCHECK(!kEmitCompilerReadBarrier); + // The only read barrier implementation supporting the + // UnsafeCASObject intrinsic is the Baker-style read barriers. + DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier); - GenCas(invoke->GetLocations(), Primitive::kPrimNot, codegen_); + GenCas(invoke, Primitive::kPrimNot, codegen_); } void IntrinsicLocationsBuilderARM64::VisitStringCompareTo(HInvoke* invoke) { diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc index f41e4d95b..aae389984 100644 --- a/compiler/optimizing/intrinsics_x86.cc +++ b/compiler/optimizing/intrinsics_x86.cc @@ -2056,9 +2056,9 @@ static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject || invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile); LocationSummary* locations = new (arena) LocationSummary(invoke, - can_call ? - LocationSummary::kCallOnSlowPath : - LocationSummary::kNoCall, + (can_call + ? LocationSummary::kCallOnSlowPath + : LocationSummary::kNoCall), kIntrinsified); if (can_call && kUseBakerReadBarrier) { locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. @@ -2076,7 +2076,7 @@ static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, } } else { locations->SetOut(Location::RequiresRegister(), - can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap); + (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap)); } } @@ -2255,10 +2255,16 @@ void IntrinsicCodeGeneratorX86::VisitUnsafePutLongVolatile(HInvoke* invoke) { GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, /* is_volatile */ true, codegen_); } -static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, Primitive::Type type, +static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, + Primitive::Type type, HInvoke* invoke) { + bool can_call = kEmitCompilerReadBarrier && + kUseBakerReadBarrier && + (invoke->GetIntrinsic() == Intrinsics::kUnsafeCASObject); LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kNoCall, + (can_call + ? LocationSummary::kCallOnSlowPath + : LocationSummary::kNoCall), kIntrinsified); locations->SetInAt(0, Location::NoLocation()); // Unused receiver. locations->SetInAt(1, Location::RequiresRegister()); @@ -2278,7 +2284,8 @@ static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, Primitive::Type ty // Force a byte register for the output. locations->SetOut(Location::RegisterLocation(EAX)); if (type == Primitive::kPrimNot) { - // Need temp registers for card-marking. + // Need temporary registers for card-marking, and possibly for + // (Baker) read barrier. locations->AddTemp(Location::RequiresRegister()); // Possibly used for reference poisoning too. // Need a byte register for marking. locations->AddTemp(Location::RegisterLocation(ECX)); @@ -2294,14 +2301,9 @@ void IntrinsicLocationsBuilderX86::VisitUnsafeCASLong(HInvoke* invoke) { } void IntrinsicLocationsBuilderX86::VisitUnsafeCASObject(HInvoke* invoke) { - // The UnsafeCASObject intrinsic is missing a read barrier, and - // therefore sometimes does not work as expected (b/25883050). - // Turn it off temporarily as a quick fix, until the read barrier is - // implemented (see TODO in GenCAS). - // - // TODO(rpl): Implement read barrier support in GenCAS and re-enable - // this intrinsic. - if (kEmitCompilerReadBarrier) { + // The only read barrier implementation supporting the + // UnsafeCASObject intrinsic is the Baker-style read barriers. + if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) { return; } @@ -2317,7 +2319,18 @@ static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86* code Location out = locations->Out(); DCHECK_EQ(out.AsRegister(), EAX); + // The address of the field within the holding object. + Address field_addr(base, offset, ScaleFactor::TIMES_1, 0); + if (type == Primitive::kPrimNot) { + // The only read barrier implementation supporting the + // UnsafeCASObject intrinsic is the Baker-style read barriers. + DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier); + + Location temp1_loc = locations->GetTemp(0); + Register temp1 = temp1_loc.AsRegister(); + Register temp2 = locations->GetTemp(1).AsRegister(); + Register expected = locations->InAt(3).AsRegister(); // Ensure `expected` is in EAX (required by the CMPXCHG instruction). DCHECK_EQ(expected, EAX); @@ -2325,11 +2338,20 @@ static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86* code // Mark card for object assuming new value is stored. bool value_can_be_null = true; // TODO: Worth finding out this information? - codegen->MarkGCCard(locations->GetTemp(0).AsRegister(), - locations->GetTemp(1).AsRegister(), - base, - value, - value_can_be_null); + codegen->MarkGCCard(temp1, temp2, base, value, value_can_be_null); + + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + // Need to make sure the reference stored in the field is a to-space + // one before attempting the CAS or the CAS could fail incorrectly. + codegen->GenerateReferenceLoadWithBakerReadBarrier( + invoke, + temp1_loc, // Unused, used only as a "temporary" within the read barrier. + base, + field_addr, + /* needs_null_check */ false, + /* always_update_field */ true, + &temp2); + } bool base_equals_value = (base == value); if (kPoisonHeapReferences) { @@ -2337,7 +2359,7 @@ static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86* code // If `base` and `value` are the same register location, move // `value` to a temporary register. This way, poisoning // `value` won't invalidate `base`. - value = locations->GetTemp(0).AsRegister(); + value = temp1; __ movl(value, base); } @@ -2356,19 +2378,12 @@ static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86* code __ PoisonHeapReference(value); } - // TODO: Add a read barrier for the reference stored in the object - // before attempting the CAS, similar to the one in the - // art::Unsafe_compareAndSwapObject JNI implementation. - // - // Note that this code is not (yet) used when read barriers are - // enabled (see IntrinsicLocationsBuilderX86::VisitUnsafeCASObject). - DCHECK(!kEmitCompilerReadBarrier); - __ LockCmpxchgl(Address(base, offset, TIMES_1, 0), value); + __ LockCmpxchgl(field_addr, value); // LOCK CMPXCHG has full barrier semantics, and we don't need // scheduling barriers at this time. - // Convert ZF into the boolean result. + // Convert ZF into the Boolean result. __ setb(kZero, out.AsRegister()); __ movzxb(out.AsRegister(), out.AsRegister()); @@ -2392,8 +2407,7 @@ static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86* code // Ensure the expected value is in EAX (required by the CMPXCHG // instruction). DCHECK_EQ(locations->InAt(3).AsRegister(), EAX); - __ LockCmpxchgl(Address(base, offset, TIMES_1, 0), - locations->InAt(4).AsRegister()); + __ LockCmpxchgl(field_addr, locations->InAt(4).AsRegister()); } else if (type == Primitive::kPrimLong) { // Ensure the expected value is in EAX:EDX and that the new // value is in EBX:ECX (required by the CMPXCHG8B instruction). @@ -2401,7 +2415,7 @@ static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86* code DCHECK_EQ(locations->InAt(3).AsRegisterPairHigh(), EDX); DCHECK_EQ(locations->InAt(4).AsRegisterPairLow(), EBX); DCHECK_EQ(locations->InAt(4).AsRegisterPairHigh(), ECX); - __ LockCmpxchg8b(Address(base, offset, TIMES_1, 0)); + __ LockCmpxchg8b(field_addr); } else { LOG(FATAL) << "Unexpected CAS type " << type; } @@ -2409,7 +2423,7 @@ static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86* code // LOCK CMPXCHG/LOCK CMPXCHG8B have full barrier semantics, and we // don't need scheduling barriers at this time. - // Convert ZF into the boolean result. + // Convert ZF into the Boolean result. __ setb(kZero, out.AsRegister()); __ movzxb(out.AsRegister(), out.AsRegister()); } @@ -2424,14 +2438,9 @@ void IntrinsicCodeGeneratorX86::VisitUnsafeCASLong(HInvoke* invoke) { } void IntrinsicCodeGeneratorX86::VisitUnsafeCASObject(HInvoke* invoke) { - // The UnsafeCASObject intrinsic is missing a read barrier, and - // therefore sometimes does not work as expected (b/25883050). - // Turn it off temporarily as a quick fix, until the read barrier is - // implemented (see TODO in GenCAS). - // - // TODO(rpl): Implement read barrier support in GenCAS and re-enable - // this intrinsic. - DCHECK(!kEmitCompilerReadBarrier); + // The only read barrier implementation supporting the + // UnsafeCASObject intrinsic is the Baker-style read barriers. + DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier); GenCAS(Primitive::kPrimNot, invoke, codegen_); } diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc index 4b0afca12..cdef22f6d 100644 --- a/compiler/optimizing/intrinsics_x86_64.cc +++ b/compiler/optimizing/intrinsics_x86_64.cc @@ -2172,9 +2172,9 @@ static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject || invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile); LocationSummary* locations = new (arena) LocationSummary(invoke, - can_call ? - LocationSummary::kCallOnSlowPath : - LocationSummary::kNoCall, + (can_call + ? LocationSummary::kCallOnSlowPath + : LocationSummary::kNoCall), kIntrinsified); if (can_call && kUseBakerReadBarrier) { locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. @@ -2183,7 +2183,7 @@ static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke locations->SetInAt(1, Location::RequiresRegister()); locations->SetInAt(2, Location::RequiresRegister()); locations->SetOut(Location::RequiresRegister(), - can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap); + (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap)); } void IntrinsicLocationsBuilderX86_64::VisitUnsafeGet(HInvoke* invoke) { @@ -2333,10 +2333,16 @@ void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongVolatile(HInvoke* invoke) { GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, /* is_volatile */ true, codegen_); } -static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, Primitive::Type type, +static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, + Primitive::Type type, HInvoke* invoke) { + bool can_call = kEmitCompilerReadBarrier && + kUseBakerReadBarrier && + (invoke->GetIntrinsic() == Intrinsics::kUnsafeCASObject); LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kNoCall, + (can_call + ? LocationSummary::kCallOnSlowPath + : LocationSummary::kNoCall), kIntrinsified); locations->SetInAt(0, Location::NoLocation()); // Unused receiver. locations->SetInAt(1, Location::RequiresRegister()); @@ -2347,7 +2353,8 @@ static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, Primitive::Type ty locations->SetOut(Location::RequiresRegister()); if (type == Primitive::kPrimNot) { - // Need temp registers for card-marking. + // Need temporary registers for card-marking, and possibly for + // (Baker) read barrier. locations->AddTemp(Location::RequiresRegister()); // Possibly used for reference poisoning too. locations->AddTemp(Location::RequiresRegister()); } @@ -2362,14 +2369,9 @@ void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASLong(HInvoke* invoke) { } void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASObject(HInvoke* invoke) { - // The UnsafeCASObject intrinsic is missing a read barrier, and - // therefore sometimes does not work as expected (b/25883050). - // Turn it off temporarily as a quick fix, until the read barrier is - // implemented (see TODO in GenCAS). - // - // TODO(rpl): Implement read barrier support in GenCAS and re-enable - // this intrinsic. - if (kEmitCompilerReadBarrier) { + // The only read barrier implementation supporting the + // UnsafeCASObject intrinsic is the Baker-style read barriers. + if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) { return; } @@ -2386,16 +2388,37 @@ static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86_64* c // Ensure `expected` is in RAX (required by the CMPXCHG instruction). DCHECK_EQ(expected.AsRegister(), RAX); CpuRegister value = locations->InAt(4).AsRegister(); - CpuRegister out = locations->Out().AsRegister(); + Location out_loc = locations->Out(); + CpuRegister out = out_loc.AsRegister(); if (type == Primitive::kPrimNot) { + // The only read barrier implementation supporting the + // UnsafeCASObject intrinsic is the Baker-style read barriers. + DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier); + + CpuRegister temp1 = locations->GetTemp(0).AsRegister(); + CpuRegister temp2 = locations->GetTemp(1).AsRegister(); + // Mark card for object assuming new value is stored. bool value_can_be_null = true; // TODO: Worth finding out this information? - codegen->MarkGCCard(locations->GetTemp(0).AsRegister(), - locations->GetTemp(1).AsRegister(), - base, - value, - value_can_be_null); + codegen->MarkGCCard(temp1, temp2, base, value, value_can_be_null); + + // The address of the field within the holding object. + Address field_addr(base, offset, ScaleFactor::TIMES_1, 0); + + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + // Need to make sure the reference stored in the field is a to-space + // one before attempting the CAS or the CAS could fail incorrectly. + codegen->GenerateReferenceLoadWithBakerReadBarrier( + invoke, + out_loc, // Unused, used only as a "temporary" within the read barrier. + base, + field_addr, + /* needs_null_check */ false, + /* always_update_field */ true, + &temp1, + &temp2); + } bool base_equals_value = (base.AsRegister() == value.AsRegister()); Register value_reg = value.AsRegister(); @@ -2404,7 +2427,7 @@ static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86_64* c // If `base` and `value` are the same register location, move // `value_reg` to a temporary register. This way, poisoning // `value_reg` won't invalidate `base`. - value_reg = locations->GetTemp(0).AsRegister().AsRegister(); + value_reg = temp1.AsRegister(); __ movl(CpuRegister(value_reg), base); } @@ -2423,19 +2446,12 @@ static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86_64* c __ PoisonHeapReference(CpuRegister(value_reg)); } - // TODO: Add a read barrier for the reference stored in the object - // before attempting the CAS, similar to the one in the - // art::Unsafe_compareAndSwapObject JNI implementation. - // - // Note that this code is not (yet) used when read barriers are - // enabled (see IntrinsicLocationsBuilderX86_64::VisitUnsafeCASObject). - DCHECK(!kEmitCompilerReadBarrier); - __ LockCmpxchgl(Address(base, offset, TIMES_1, 0), CpuRegister(value_reg)); + __ LockCmpxchgl(field_addr, CpuRegister(value_reg)); // LOCK CMPXCHG has full barrier semantics, and we don't need // scheduling barriers at this time. - // Convert ZF into the boolean result. + // Convert ZF into the Boolean result. __ setcc(kZero, out); __ movzxb(out, out); @@ -2468,7 +2484,7 @@ static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86_64* c // LOCK CMPXCHG has full barrier semantics, and we don't need // scheduling barriers at this time. - // Convert ZF into the boolean result. + // Convert ZF into the Boolean result. __ setcc(kZero, out); __ movzxb(out, out); } @@ -2483,14 +2499,9 @@ void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASLong(HInvoke* invoke) { } void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASObject(HInvoke* invoke) { - // The UnsafeCASObject intrinsic is missing a read barrier, and - // therefore sometimes does not work as expected (b/25883050). - // Turn it off temporarily as a quick fix, until the read barrier is - // implemented (see TODO in GenCAS). - // - // TODO(rpl): Implement read barrier support in GenCAS and re-enable - // this intrinsic. - DCHECK(!kEmitCompilerReadBarrier); + // The only read barrier implementation supporting the + // UnsafeCASObject intrinsic is the Baker-style read barriers. + DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier); GenCAS(Primitive::kPrimNot, invoke, codegen_); } diff --git a/runtime/interpreter/unstarted_runtime.cc b/runtime/interpreter/unstarted_runtime.cc index 5a62bd77d..cc88d7edd 100644 --- a/runtime/interpreter/unstarted_runtime.cc +++ b/runtime/interpreter/unstarted_runtime.cc @@ -1271,7 +1271,7 @@ void UnstartedRuntime::UnstartedUnsafeCompareAndSwapObject( mirror::HeapReference* field_addr = reinterpret_cast*>( reinterpret_cast(obj) + static_cast(offset)); - ReadBarrier::Barrier( + ReadBarrier::Barrier( obj, MemberOffset(offset), field_addr); diff --git a/runtime/native/sun_misc_Unsafe.cc b/runtime/native/sun_misc_Unsafe.cc index cdf4b14db..644df070b 100644 --- a/runtime/native/sun_misc_Unsafe.cc +++ b/runtime/native/sun_misc_Unsafe.cc @@ -65,7 +65,7 @@ static jboolean Unsafe_compareAndSwapObject(JNIEnv* env, jobject, jobject javaOb mirror::HeapReference* field_addr = reinterpret_cast*>( reinterpret_cast(obj.Ptr()) + static_cast(offset)); - ReadBarrier::Barrier( + ReadBarrier::Barrier( obj.Ptr(), MemberOffset(offset), field_addr); diff --git a/runtime/read_barrier-inl.h b/runtime/read_barrier-inl.h index 67e5a81b2..be9560052 100644 --- a/runtime/read_barrier-inl.h +++ b/runtime/read_barrier-inl.h @@ -54,7 +54,7 @@ inline MirrorType* ReadBarrier::Barrier( // Slow-path. ref = reinterpret_cast(Mark(ref)); // If kAlwaysUpdateField is true, update the field atomically. This may fail if mutator - // updates before us, but it's ok. + // updates before us, but it's OK. if (kAlwaysUpdateField && ref != old_ref) { obj->CasFieldStrongRelaxedObjectWithoutWriteBarrier( offset, old_ref, ref); -- 2.11.0