From: Vladimir Marko Date: Fri, 12 Aug 2016 12:37:55 +0000 (+0100) Subject: x86/x86-64: Avoid temporary for read barrier field load. X-Git-Url: http://git.osdn.net/view?a=commitdiff_plain;h=ccf06d8f19a37432de4a3b768747090adfbd18ec;p=android-x86%2Fart.git x86/x86-64: Avoid temporary for read barrier field load. Add TEST instructions for memory and immediate. Use the byte version to avoid a temporary in read barrier field load. Test: Tested with ART_USE_READ_BARRIER=true on host. Test: Tested with ART_USE_READ_BARRIER=true ART_HEAP_POISONING=true on host. Bug: 29966877 Bug: 12687968 Change-Id: Ia415d3c2e1ae1ff6dff11d72bbb7d96d5deed6ee --- diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index 404f044ce..6d9c55cd7 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -6377,7 +6377,7 @@ void InstructionCodeGeneratorARM::GenerateGcRootFieldLoad(HInstruction* instruct "art::mirror::CompressedReference and int32_t " "have different sizes."); - // Slow path used to mark the GC root `root`. + // Slow path marking the GC root `root`. SlowPathCode* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM(instruction, root); codegen_->AddSlowPath(slow_path); @@ -6518,7 +6518,7 @@ void CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i // Object* ref = ref_addr->AsMirrorPtr() __ MaybeUnpoisonHeapReference(ref_reg); - // Slow path used to mark the object `ref` when it is gray. + // Slow path marking the object `ref` when it is gray. SlowPathCode* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM(instruction, ref); AddSlowPath(slow_path); diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index 122c174ea..cc8985d0b 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -5041,7 +5041,7 @@ void InstructionCodeGeneratorARM64::GenerateGcRootFieldLoad(HInstruction* instru "art::mirror::CompressedReference and int32_t " "have different sizes."); - // Slow path used to mark the GC root `root`. + // Slow path marking the GC root `root`. SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM64(instruction, root); codegen_->AddSlowPath(slow_path); @@ -5239,7 +5239,7 @@ void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* // Object* ref = ref_addr->AsMirrorPtr() GetAssembler()->MaybeUnpoisonHeapReference(ref_reg); - // Slow path used to mark the object `ref` when it is gray. + // Slow path marking the object `ref` when it is gray. SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM64(instruction, ref); AddSlowPath(slow_path); diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 7aca16f86..f50eb5cb7 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -445,8 +445,8 @@ class ArraySetSlowPathX86 : public SlowPathCode { // Slow path marking an object during a read barrier. class ReadBarrierMarkSlowPathX86 : public SlowPathCode { public: - ReadBarrierMarkSlowPathX86(HInstruction* instruction, Location obj) - : SlowPathCode(instruction), obj_(obj) { + ReadBarrierMarkSlowPathX86(HInstruction* instruction, Location obj, bool unpoison) + : SlowPathCode(instruction), obj_(obj), unpoison_(unpoison) { DCHECK(kEmitCompilerReadBarrier); } @@ -470,6 +470,10 @@ class ReadBarrierMarkSlowPathX86 : public SlowPathCode { << instruction_->DebugName(); __ Bind(GetEntryLabel()); + if (unpoison_) { + // Object* ref = ref_addr->AsMirrorPtr() + __ MaybeUnpoisonHeapReference(reg); + } // No need to save live registers; it's taken care of by the // entrypoint. Also, there is no need to update the stack mask, // as this runtime call will not trigger a garbage collection. @@ -499,6 +503,7 @@ class ReadBarrierMarkSlowPathX86 : public SlowPathCode { private: const Location obj_; + const bool unpoison_; DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathX86); }; @@ -4631,10 +4636,6 @@ void LocationsBuilderX86::HandleFieldGet(HInstruction* instruction, const FieldI // load the temp into the XMM and then copy the XMM into the // output, 32 bits at a time). locations->AddTemp(Location::RequiresFpuRegister()); - } else if (object_field_get_with_read_barrier && kUseBakerReadBarrier) { - // We need a temporary register for the read barrier marking slow - // path in CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier. - locations->AddTemp(Location::RequiresRegister()); } } @@ -4678,11 +4679,10 @@ void InstructionCodeGeneratorX86::HandleFieldGet(HInstruction* instruction, case Primitive::kPrimNot: { // /* HeapReference */ out = *(base + offset) if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { - Location temp_loc = locations->GetTemp(0); // Note that a potential implicit null check is handled in this // CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier call. codegen_->GenerateFieldLoadWithBakerReadBarrier( - instruction, out, base, offset, temp_loc, /* needs_null_check */ true); + instruction, out, base, offset, /* needs_null_check */ true); if (is_volatile) { codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); } @@ -5093,11 +5093,6 @@ void LocationsBuilderX86::VisitArrayGet(HArrayGet* instruction) { Location::kOutputOverlap : Location::kNoOutputOverlap); } - // We need a temporary register for the read barrier marking slow - // path in CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier. - if (object_array_get_with_read_barrier && kUseBakerReadBarrier) { - locations->AddTemp(Location::RequiresRegister()); - } } void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) { @@ -5172,11 +5167,10 @@ void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) { // /* HeapReference */ out = // *(obj + data_offset + index * sizeof(HeapReference)) if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { - Location temp = locations->GetTemp(0); // Note that a potential implicit null check is handled in this // CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier call. codegen_->GenerateArrayLoadWithBakerReadBarrier( - instruction, out_loc, obj, data_offset, index, temp, /* needs_null_check */ true); + instruction, out_loc, obj, data_offset, index, /* needs_null_check */ true); } else { Register out = out_loc.AsRegister(); if (index.IsConstant()) { @@ -6281,8 +6275,8 @@ void InstructionCodeGeneratorX86::VisitThrow(HThrow* instruction) { static bool TypeCheckNeedsATemporary(TypeCheckKind type_check_kind) { return kEmitCompilerReadBarrier && - (kUseBakerReadBarrier || - type_check_kind == TypeCheckKind::kAbstractClassCheck || + !kUseBakerReadBarrier && + (type_check_kind == TypeCheckKind::kAbstractClassCheck || type_check_kind == TypeCheckKind::kClassHierarchyCheck || type_check_kind == TypeCheckKind::kArrayObjectCheck); } @@ -6343,7 +6337,7 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) { } // /* HeapReference */ out = obj->klass_ - GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset, maybe_temp_loc); + GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset); switch (type_check_kind) { case TypeCheckKind::kExactCheck: { @@ -6565,7 +6559,7 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) { } // /* HeapReference */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc); + GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset); switch (type_check_kind) { case TypeCheckKind::kExactCheck: @@ -6601,8 +6595,7 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) { // going into the slow path, as it has been overwritten in the // meantime. // /* HeapReference */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters( - instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc); + GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset); __ jmp(type_check_slow_path->GetEntryLabel()); __ Bind(&compare_classes); @@ -6641,8 +6634,7 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) { // going into the slow path, as it has been overwritten in the // meantime. // /* HeapReference */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters( - instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc); + GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset); __ jmp(type_check_slow_path->GetEntryLabel()); break; } @@ -6674,8 +6666,7 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) { // going into the slow path, as it has been overwritten in the // meantime. // /* HeapReference */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters( - instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc); + GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset); __ jmp(type_check_slow_path->GetEntryLabel()); __ Bind(&check_non_primitive_component_type); @@ -6683,8 +6674,7 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) { __ j(kEqual, &done); // Same comment as above regarding `temp` and the slow path. // /* HeapReference */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters( - instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc); + GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset); __ jmp(type_check_slow_path->GetEntryLabel()); break; } @@ -6875,17 +6865,17 @@ void InstructionCodeGeneratorX86::GenerateReferenceLoadOneRegister(HInstruction* Location maybe_temp) { Register out_reg = out.AsRegister(); if (kEmitCompilerReadBarrier) { - DCHECK(maybe_temp.IsRegister()) << maybe_temp; if (kUseBakerReadBarrier) { // Load with fast path based Baker's read barrier. // /* HeapReference */ out = *(out + offset) codegen_->GenerateFieldLoadWithBakerReadBarrier( - instruction, out, out_reg, offset, maybe_temp, /* needs_null_check */ false); + instruction, out, out_reg, offset, /* needs_null_check */ false); } else { // Load with slow path based read barrier. // Save the value of `out` into `maybe_temp` before overwriting it // in the following move operation, as we will need it for the // read barrier below. + DCHECK(maybe_temp.IsRegister()) << maybe_temp; __ movl(maybe_temp.AsRegister(), out_reg); // /* HeapReference */ out = *(out + offset) __ movl(out_reg, Address(out_reg, offset)); @@ -6902,17 +6892,15 @@ void InstructionCodeGeneratorX86::GenerateReferenceLoadOneRegister(HInstruction* void InstructionCodeGeneratorX86::GenerateReferenceLoadTwoRegisters(HInstruction* instruction, Location out, Location obj, - uint32_t offset, - Location maybe_temp) { + uint32_t offset) { Register out_reg = out.AsRegister(); Register obj_reg = obj.AsRegister(); if (kEmitCompilerReadBarrier) { if (kUseBakerReadBarrier) { - DCHECK(maybe_temp.IsRegister()) << maybe_temp; // Load with fast path based Baker's read barrier. // /* HeapReference */ out = *(obj + offset) codegen_->GenerateFieldLoadWithBakerReadBarrier( - instruction, out, obj_reg, offset, maybe_temp, /* needs_null_check */ false); + instruction, out, obj_reg, offset, /* needs_null_check */ false); } else { // Load with slow path based read barrier. // /* HeapReference */ out = *(obj + offset) @@ -6955,9 +6943,9 @@ void InstructionCodeGeneratorX86::GenerateGcRootFieldLoad(HInstruction* instruct "art::mirror::CompressedReference and int32_t " "have different sizes."); - // Slow path used to mark the GC root `root`. - SlowPathCode* slow_path = - new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86(instruction, root); + // Slow path marking the GC root `root`. + SlowPathCode* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86( + instruction, root, /* unpoison */ false); codegen_->AddSlowPath(slow_path); __ fs()->cmpl(Address::Absolute(Thread::IsGcMarkingOffset().Int32Value()), @@ -6991,14 +6979,13 @@ void CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instr Location ref, Register obj, uint32_t offset, - Location temp, bool needs_null_check) { DCHECK(kEmitCompilerReadBarrier); DCHECK(kUseBakerReadBarrier); // /* HeapReference */ ref = *(obj + offset) Address src(obj, offset); - GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, temp, needs_null_check); + GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check); } void CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction, @@ -7006,7 +6993,6 @@ void CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instr Register obj, uint32_t data_offset, Location index, - Location temp, bool needs_null_check) { DCHECK(kEmitCompilerReadBarrier); DCHECK(kUseBakerReadBarrier); @@ -7019,14 +7005,13 @@ void CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instr Address src = index.IsConstant() ? Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset) : Address(obj, index.AsRegister(), TIMES_4, data_offset); - GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, temp, needs_null_check); + GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check); } void CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, Location ref, Register obj, const Address& src, - Location temp, bool needs_null_check) { DCHECK(kEmitCompilerReadBarrier); DCHECK(kUseBakerReadBarrier); @@ -7056,17 +7041,23 @@ void CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i // performance reasons. Register ref_reg = ref.AsRegister(); - Register temp_reg = temp.AsRegister(); uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value(); - // /* int32_t */ monitor = obj->monitor_ - __ movl(temp_reg, Address(obj, monitor_offset)); + // Given the numeric representation, it's enough to check the low bit of the rb_state. + static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0"); + static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1"); + static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2"); + constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte; + constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte; + constexpr int32_t test_value = static_cast(1 << gray_bit_position); + + // if (rb_state == ReadBarrier::gray_ptr_) + // ref = ReadBarrier::Mark(ref); + // At this point, just do the "if" and make sure that flags are preserved until the branch. + __ testb(Address(obj, monitor_offset + gray_byte_position), Immediate(test_value)); if (needs_null_check) { MaybeRecordImplicitNullCheck(instruction); } - // /* LockWord */ lock_word = LockWord(monitor) - static_assert(sizeof(LockWord) == sizeof(int32_t), - "art::LockWord and int32_t have different sizes."); // Load fence to prevent load-load reordering. // Note that this is a no-op, thanks to the x86 memory model. @@ -7074,25 +7065,20 @@ void CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i // The actual reference load. // /* HeapReference */ ref = *src - __ movl(ref_reg, src); + __ movl(ref_reg, src); // Flags are unaffected. + + // Note: Reference unpoisoning modifies the flags, so we need to delay it after the branch. + // Slow path marking the object `ref` when it is gray. + SlowPathCode* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86( + instruction, ref, /* unpoison */ true); + AddSlowPath(slow_path); + + // We have done the "if" of the gray bit check above, now branch based on the flags. + __ j(kNotZero, slow_path->GetEntryLabel()); // Object* ref = ref_addr->AsMirrorPtr() __ MaybeUnpoisonHeapReference(ref_reg); - // Slow path used to mark the object `ref` when it is gray. - SlowPathCode* slow_path = - new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86(instruction, ref); - AddSlowPath(slow_path); - - // if (rb_state == ReadBarrier::gray_ptr_) - // ref = ReadBarrier::Mark(ref); - // Given the numeric representation, it's enough to check the low bit of the - // rb_state. We do that by shifting the bit out of the lock word with SHR. - static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0"); - static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1"); - static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2"); - __ shrl(temp_reg, Immediate(LockWord::kReadBarrierStateShift + 1)); - __ j(kCarrySet, slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); } diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index 894f2e8f4..c644e401f 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -254,8 +254,7 @@ class InstructionCodeGeneratorX86 : public InstructionCodeGenerator { void GenerateReferenceLoadTwoRegisters(HInstruction* instruction, Location out, Location obj, - uint32_t offset, - Location maybe_temp); + uint32_t offset); // Generate a GC root reference load: // // root <- *address @@ -487,7 +486,6 @@ class CodeGeneratorX86 : public CodeGenerator { Location ref, Register obj, uint32_t offset, - Location temp, bool needs_null_check); // Fast path implementation of ReadBarrier::Barrier for a heap // reference array load when Baker's read barriers are used. @@ -496,7 +494,6 @@ class CodeGeneratorX86 : public CodeGenerator { Register obj, uint32_t data_offset, Location index, - Location temp, bool needs_null_check); // Factored implementation used by GenerateFieldLoadWithBakerReadBarrier // and GenerateArrayLoadWithBakerReadBarrier. @@ -504,7 +501,6 @@ class CodeGeneratorX86 : public CodeGenerator { Location ref, Register obj, const Address& src, - Location temp, bool needs_null_check); // Generate a read barrier for a heap reference within `instruction` diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index 0c55ae44d..ec37e5db2 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -466,8 +466,8 @@ class ArraySetSlowPathX86_64 : public SlowPathCode { // Slow path marking an object during a read barrier. class ReadBarrierMarkSlowPathX86_64 : public SlowPathCode { public: - ReadBarrierMarkSlowPathX86_64(HInstruction* instruction, Location obj) - : SlowPathCode(instruction), obj_(obj) { + ReadBarrierMarkSlowPathX86_64(HInstruction* instruction, Location obj, bool unpoison) + : SlowPathCode(instruction), obj_(obj), unpoison_(unpoison) { DCHECK(kEmitCompilerReadBarrier); } @@ -491,6 +491,10 @@ class ReadBarrierMarkSlowPathX86_64 : public SlowPathCode { << instruction_->DebugName(); __ Bind(GetEntryLabel()); + if (unpoison_) { + // Object* ref = ref_addr->AsMirrorPtr() + __ MaybeUnpoisonHeapReference(obj_.AsRegister()); + } // No need to save live registers; it's taken care of by the // entrypoint. Also, there is no need to update the stack mask, // as this runtime call will not trigger a garbage collection. @@ -520,6 +524,7 @@ class ReadBarrierMarkSlowPathX86_64 : public SlowPathCode { private: const Location obj_; + const bool unpoison_; DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathX86_64); }; @@ -4152,11 +4157,6 @@ void LocationsBuilderX86_64::HandleFieldGet(HInstruction* instruction) { Location::RequiresRegister(), object_field_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap); } - if (object_field_get_with_read_barrier && kUseBakerReadBarrier) { - // We need a temporary register for the read barrier marking slow - // path in CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier. - locations->AddTemp(Location::RequiresRegister()); - } } void InstructionCodeGeneratorX86_64::HandleFieldGet(HInstruction* instruction, @@ -4200,11 +4200,10 @@ void InstructionCodeGeneratorX86_64::HandleFieldGet(HInstruction* instruction, case Primitive::kPrimNot: { // /* HeapReference */ out = *(base + offset) if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { - Location temp_loc = locations->GetTemp(0); // Note that a potential implicit null check is handled in this // CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier call. codegen_->GenerateFieldLoadWithBakerReadBarrier( - instruction, out, base, offset, temp_loc, /* needs_null_check */ true); + instruction, out, base, offset, /* needs_null_check */ true); if (is_volatile) { codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); } @@ -4588,11 +4587,6 @@ void LocationsBuilderX86_64::VisitArrayGet(HArrayGet* instruction) { Location::RequiresRegister(), object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap); } - // We need a temporary register for the read barrier marking slow - // path in CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier. - if (object_array_get_with_read_barrier && kUseBakerReadBarrier) { - locations->AddTemp(Location::RequiresRegister()); - } } void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) { @@ -4667,11 +4661,10 @@ void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) { // /* HeapReference */ out = // *(obj + data_offset + index * sizeof(HeapReference)) if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { - Location temp = locations->GetTemp(0); // Note that a potential implicit null check is handled in this // CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier call. codegen_->GenerateArrayLoadWithBakerReadBarrier( - instruction, out_loc, obj, data_offset, index, temp, /* needs_null_check */ true); + instruction, out_loc, obj, data_offset, index, /* needs_null_check */ true); } else { CpuRegister out = out_loc.AsRegister(); if (index.IsConstant()) { @@ -5687,8 +5680,8 @@ void InstructionCodeGeneratorX86_64::VisitThrow(HThrow* instruction) { static bool TypeCheckNeedsATemporary(TypeCheckKind type_check_kind) { return kEmitCompilerReadBarrier && - (kUseBakerReadBarrier || - type_check_kind == TypeCheckKind::kAbstractClassCheck || + !kUseBakerReadBarrier && + (type_check_kind == TypeCheckKind::kAbstractClassCheck || type_check_kind == TypeCheckKind::kClassHierarchyCheck || type_check_kind == TypeCheckKind::kArrayObjectCheck); } @@ -5749,7 +5742,7 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) { } // /* HeapReference */ out = obj->klass_ - GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset, maybe_temp_loc); + GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset); switch (type_check_kind) { case TypeCheckKind::kExactCheck: { @@ -5979,8 +5972,7 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { } // /* HeapReference */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters( - instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc); + GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset); if (cls.IsRegister()) { __ cmpl(temp, cls.AsRegister()); @@ -6004,8 +5996,7 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { } // /* HeapReference */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters( - instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc); + GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset); // If the class is abstract, we eagerly fetch the super class of the // object to avoid doing a comparison we know will fail. @@ -6025,8 +6016,7 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { // going into the slow path, as it has been overwritten in the // meantime. // /* HeapReference */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters( - instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc); + GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset); __ jmp(type_check_slow_path->GetEntryLabel()); __ Bind(&compare_classes); @@ -6050,8 +6040,7 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { } // /* HeapReference */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters( - instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc); + GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset); // Walk over the class hierarchy to find a match. NearLabel loop; @@ -6077,8 +6066,7 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { // going into the slow path, as it has been overwritten in the // meantime. // /* HeapReference */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters( - instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc); + GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset); __ jmp(type_check_slow_path->GetEntryLabel()); __ Bind(&done); break; @@ -6097,8 +6085,7 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { } // /* HeapReference */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters( - instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc); + GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset); // Do an exact check. NearLabel check_non_primitive_component_type; @@ -6126,8 +6113,7 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { // going into the slow path, as it has been overwritten in the // meantime. // /* HeapReference */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters( - instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc); + GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset); __ jmp(type_check_slow_path->GetEntryLabel()); __ Bind(&check_non_primitive_component_type); @@ -6135,8 +6121,7 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { __ j(kEqual, &done); // Same comment as above regarding `temp` and the slow path. // /* HeapReference */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters( - instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc); + GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset); __ jmp(type_check_slow_path->GetEntryLabel()); __ Bind(&done); break; @@ -6152,8 +6137,7 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { } // /* HeapReference */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters( - instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc); + GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset); // We always go into the type check slow path for the unresolved // and interface check cases. @@ -6321,17 +6305,17 @@ void InstructionCodeGeneratorX86_64::GenerateReferenceLoadOneRegister(HInstructi Location maybe_temp) { CpuRegister out_reg = out.AsRegister(); if (kEmitCompilerReadBarrier) { - DCHECK(maybe_temp.IsRegister()) << maybe_temp; if (kUseBakerReadBarrier) { // Load with fast path based Baker's read barrier. // /* HeapReference */ out = *(out + offset) codegen_->GenerateFieldLoadWithBakerReadBarrier( - instruction, out, out_reg, offset, maybe_temp, /* needs_null_check */ false); + instruction, out, out_reg, offset, /* needs_null_check */ false); } else { // Load with slow path based read barrier. // Save the value of `out` into `maybe_temp` before overwriting it // in the following move operation, as we will need it for the // read barrier below. + DCHECK(maybe_temp.IsRegister()) << maybe_temp; __ movl(maybe_temp.AsRegister(), out_reg); // /* HeapReference */ out = *(out + offset) __ movl(out_reg, Address(out_reg, offset)); @@ -6348,17 +6332,15 @@ void InstructionCodeGeneratorX86_64::GenerateReferenceLoadOneRegister(HInstructi void InstructionCodeGeneratorX86_64::GenerateReferenceLoadTwoRegisters(HInstruction* instruction, Location out, Location obj, - uint32_t offset, - Location maybe_temp) { + uint32_t offset) { CpuRegister out_reg = out.AsRegister(); CpuRegister obj_reg = obj.AsRegister(); if (kEmitCompilerReadBarrier) { if (kUseBakerReadBarrier) { - DCHECK(maybe_temp.IsRegister()) << maybe_temp; // Load with fast path based Baker's read barrier. // /* HeapReference */ out = *(obj + offset) codegen_->GenerateFieldLoadWithBakerReadBarrier( - instruction, out, obj_reg, offset, maybe_temp, /* needs_null_check */ false); + instruction, out, obj_reg, offset, /* needs_null_check */ false); } else { // Load with slow path based read barrier. // /* HeapReference */ out = *(obj + offset) @@ -6401,9 +6383,9 @@ void InstructionCodeGeneratorX86_64::GenerateGcRootFieldLoad(HInstruction* instr "art::mirror::CompressedReference and int32_t " "have different sizes."); - // Slow path used to mark the GC root `root`. - SlowPathCode* slow_path = - new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86_64(instruction, root); + // Slow path marking the GC root `root`. + SlowPathCode* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86_64( + instruction, root, /* unpoison */ false); codegen_->AddSlowPath(slow_path); __ gs()->cmpl(Address::Absolute(Thread::IsGcMarkingOffset().Int32Value(), @@ -6438,14 +6420,13 @@ void CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* in Location ref, CpuRegister obj, uint32_t offset, - Location temp, bool needs_null_check) { DCHECK(kEmitCompilerReadBarrier); DCHECK(kUseBakerReadBarrier); // /* HeapReference */ ref = *(obj + offset) Address src(obj, offset); - GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, temp, needs_null_check); + GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check); } void CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction, @@ -6453,7 +6434,6 @@ void CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* in CpuRegister obj, uint32_t data_offset, Location index, - Location temp, bool needs_null_check) { DCHECK(kEmitCompilerReadBarrier); DCHECK(kUseBakerReadBarrier); @@ -6466,14 +6446,13 @@ void CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* in Address src = index.IsConstant() ? Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset) : Address(obj, index.AsRegister(), TIMES_4, data_offset); - GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, temp, needs_null_check); + GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check); } void CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, Location ref, CpuRegister obj, const Address& src, - Location temp, bool needs_null_check) { DCHECK(kEmitCompilerReadBarrier); DCHECK(kUseBakerReadBarrier); @@ -6503,17 +6482,23 @@ void CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction // performance reasons. CpuRegister ref_reg = ref.AsRegister(); - CpuRegister temp_reg = temp.AsRegister(); uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value(); - // /* int32_t */ monitor = obj->monitor_ - __ movl(temp_reg, Address(obj, monitor_offset)); + // Given the numeric representation, it's enough to check the low bit of the rb_state. + static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0"); + static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1"); + static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2"); + constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte; + constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte; + constexpr int32_t test_value = static_cast(1 << gray_bit_position); + + // if (rb_state == ReadBarrier::gray_ptr_) + // ref = ReadBarrier::Mark(ref); + // At this point, just do the "if" and make sure that flags are preserved until the branch. + __ testb(Address(obj, monitor_offset + gray_byte_position), Immediate(test_value)); if (needs_null_check) { MaybeRecordImplicitNullCheck(instruction); } - // /* LockWord */ lock_word = LockWord(monitor) - static_assert(sizeof(LockWord) == sizeof(int32_t), - "art::LockWord and int32_t have different sizes."); // Load fence to prevent load-load reordering. // Note that this is a no-op, thanks to the x86-64 memory model. @@ -6521,25 +6506,20 @@ void CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction // The actual reference load. // /* HeapReference */ ref = *src - __ movl(ref_reg, src); + __ movl(ref_reg, src); // Flags are unaffected. + + // Note: Reference unpoisoning modifies the flags, so we need to delay it after the branch. + // Slow path marking the object `ref` when it is gray. + SlowPathCode* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86_64( + instruction, ref, /* unpoison */ true); + AddSlowPath(slow_path); + + // We have done the "if" of the gray bit check above, now branch based on the flags. + __ j(kNotZero, slow_path->GetEntryLabel()); // Object* ref = ref_addr->AsMirrorPtr() __ MaybeUnpoisonHeapReference(ref_reg); - // Slow path used to mark the object `ref` when it is gray. - SlowPathCode* slow_path = - new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86_64(instruction, ref); - AddSlowPath(slow_path); - - // if (rb_state == ReadBarrier::gray_ptr_) - // ref = ReadBarrier::Mark(ref); - // Given the numeric representation, it's enough to check the low bit of the - // rb_state. We do that by shifting the bit out of the lock word with SHR. - static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0"); - static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1"); - static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2"); - __ shrl(temp_reg, Immediate(LockWord::kReadBarrierStateShift + 1)); - __ j(kCarrySet, slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); } diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index 4e0e34ce3..44844ac67 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -248,8 +248,7 @@ class InstructionCodeGeneratorX86_64 : public InstructionCodeGenerator { void GenerateReferenceLoadTwoRegisters(HInstruction* instruction, Location out, Location obj, - uint32_t offset, - Location maybe_temp); + uint32_t offset); // Generate a GC root reference load: // // root <- *address @@ -427,7 +426,6 @@ class CodeGeneratorX86_64 : public CodeGenerator { Location ref, CpuRegister obj, uint32_t offset, - Location temp, bool needs_null_check); // Fast path implementation of ReadBarrier::Barrier for a heap // reference array load when Baker's read barriers are used. @@ -436,7 +434,6 @@ class CodeGeneratorX86_64 : public CodeGenerator { CpuRegister obj, uint32_t data_offset, Location index, - Location temp, bool needs_null_check); // Factored implementation used by GenerateFieldLoadWithBakerReadBarrier // and GenerateArrayLoadWithBakerReadBarrier. @@ -444,7 +441,6 @@ class CodeGeneratorX86_64 : public CodeGenerator { Location ref, CpuRegister obj, const Address& src, - Location temp, bool needs_null_check); // Generate a read barrier for a heap reference within `instruction` diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc index 49d6c1952..cf4a04055 100644 --- a/compiler/optimizing/intrinsics_x86.cc +++ b/compiler/optimizing/intrinsics_x86.cc @@ -1934,10 +1934,9 @@ static void GenUnsafeGet(HInvoke* invoke, Register output = output_loc.AsRegister(); if (kEmitCompilerReadBarrier) { if (kUseBakerReadBarrier) { - Location temp = locations->GetTemp(0); Address src(base, offset, ScaleFactor::TIMES_1, 0); codegen->GenerateReferenceLoadWithBakerReadBarrier( - invoke, output_loc, base, src, temp, /* needs_null_check */ false); + invoke, output_loc, base, src, /* needs_null_check */ false); } else { __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0)); codegen->GenerateReadBarrierSlow( @@ -2000,11 +1999,6 @@ static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, locations->SetOut(Location::RequiresRegister(), can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap); } - if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) { - // We need a temporary register for the read barrier marking slow - // path in InstructionCodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier. - locations->AddTemp(Location::RequiresRegister()); - } } void IntrinsicLocationsBuilderX86::VisitUnsafeGet(HInvoke* invoke) { @@ -2933,11 +2927,11 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) { if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { // /* HeapReference */ temp1 = src->klass_ codegen_->GenerateFieldLoadWithBakerReadBarrier( - invoke, temp1_loc, src, class_offset, temp2_loc, /* needs_null_check */ false); + invoke, temp1_loc, src, class_offset, /* needs_null_check */ false); // Bail out if the source is not a non primitive array. // /* HeapReference */ temp1 = temp1->component_type_ codegen_->GenerateFieldLoadWithBakerReadBarrier( - invoke, temp1_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false); + invoke, temp1_loc, temp1, component_offset, /* needs_null_check */ false); __ testl(temp1, temp1); __ j(kEqual, intrinsic_slow_path->GetEntryLabel()); // If heap poisoning is enabled, `temp1` has been unpoisoned @@ -2970,7 +2964,7 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) { // /* HeapReference */ temp1 = dest->klass_ codegen_->GenerateFieldLoadWithBakerReadBarrier( - invoke, temp1_loc, dest, class_offset, temp2_loc, /* needs_null_check */ false); + invoke, temp1_loc, dest, class_offset, /* needs_null_check */ false); if (!optimizations.GetDestinationIsNonPrimitiveArray()) { // Bail out if the destination is not a non primitive array. @@ -2982,7 +2976,7 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) { // temporaries such a `temp1`. // /* HeapReference */ temp2 = temp1->component_type_ codegen_->GenerateFieldLoadWithBakerReadBarrier( - invoke, temp2_loc, temp1, component_offset, temp3_loc, /* needs_null_check */ false); + invoke, temp2_loc, temp1, component_offset, /* needs_null_check */ false); __ testl(temp2, temp2); __ j(kEqual, intrinsic_slow_path->GetEntryLabel()); // If heap poisoning is enabled, `temp2` has been unpoisoned @@ -2995,7 +2989,7 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) { // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below. // /* HeapReference */ temp2 = src->klass_ codegen_->GenerateFieldLoadWithBakerReadBarrier( - invoke, temp2_loc, src, class_offset, temp3_loc, /* needs_null_check */ false); + invoke, temp2_loc, src, class_offset, /* needs_null_check */ false); // Note: if heap poisoning is on, we are comparing two unpoisoned references here. __ cmpl(temp1, temp2); @@ -3004,7 +2998,7 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) { __ j(kEqual, &do_copy); // /* HeapReference */ temp1 = temp1->component_type_ codegen_->GenerateFieldLoadWithBakerReadBarrier( - invoke, temp1_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false); + invoke, temp1_loc, temp1, component_offset, /* needs_null_check */ false); // We do not need to emit a read barrier for the following // heap reference load, as `temp1` is only used in a // comparison with null below, and this reference is not @@ -3058,10 +3052,10 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) { if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { // /* HeapReference */ temp1 = src->klass_ codegen_->GenerateFieldLoadWithBakerReadBarrier( - invoke, temp1_loc, src, class_offset, temp2_loc, /* needs_null_check */ false); + invoke, temp1_loc, src, class_offset, /* needs_null_check */ false); // /* HeapReference */ temp1 = temp1->component_type_ codegen_->GenerateFieldLoadWithBakerReadBarrier( - invoke, temp1_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false); + invoke, temp1_loc, temp1, component_offset, /* needs_null_check */ false); __ testl(temp1, temp1); __ j(kEqual, intrinsic_slow_path->GetEntryLabel()); // If heap poisoning is enabled, `temp1` has been unpoisoned @@ -3139,11 +3133,18 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) { __ cmpl(temp1, temp3); __ j(kEqual, &done); - // /* int32_t */ monitor = src->monitor_ - __ movl(temp2, Address(src, monitor_offset)); - // /* LockWord */ lock_word = LockWord(monitor) - static_assert(sizeof(LockWord) == sizeof(int32_t), - "art::LockWord and int32_t have different sizes."); + // Given the numeric representation, it's enough to check the low bit of the rb_state. + static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0"); + static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1"); + static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2"); + constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte; + constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte; + constexpr int32_t test_value = static_cast(1 << gray_bit_position); + + // if (rb_state == ReadBarrier::gray_ptr_) + // goto slow_path; + // At this point, just do the "if" and make sure that flags are preserved until the branch. + __ testb(Address(src, monitor_offset + gray_byte_position), Immediate(test_value)); // Load fence to prevent load-load reordering. // Note that this is a no-op, thanks to the x86 memory model. @@ -3154,13 +3155,8 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) { new (GetAllocator()) ReadBarrierSystemArrayCopySlowPathX86(invoke); codegen_->AddSlowPath(read_barrier_slow_path); - // Given the numeric representation, it's enough to check the low bit of the - // rb_state. We do that by shifting the bit out of the lock word with SHR. - static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0"); - static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1"); - static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2"); - __ shrl(temp2, Immediate(LockWord::kReadBarrierStateShift + 1)); - __ j(kCarrySet, read_barrier_slow_path->GetEntryLabel()); + // We have done the "if" of the gray bit check above, now branch based on the flags. + __ j(kNotZero, read_barrier_slow_path->GetEntryLabel()); // Fast-path copy. diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc index 311e1cd6e..a4ee54623 100644 --- a/compiler/optimizing/intrinsics_x86_64.cc +++ b/compiler/optimizing/intrinsics_x86_64.cc @@ -1241,7 +1241,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { // /* HeapReference */ temp1 = dest->klass_ codegen_->GenerateFieldLoadWithBakerReadBarrier( - invoke, temp1_loc, dest, class_offset, temp3_loc, /* needs_null_check */ false); + invoke, temp1_loc, dest, class_offset, /* needs_null_check */ false); // Register `temp1` is not trashed by the read barrier emitted // by GenerateFieldLoadWithBakerReadBarrier below, as that // method produces a call to a ReadBarrierMarkRegX entry point, @@ -1249,7 +1249,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { // temporaries such a `temp1`. // /* HeapReference */ temp2 = src->klass_ codegen_->GenerateFieldLoadWithBakerReadBarrier( - invoke, temp2_loc, src, class_offset, temp3_loc, /* needs_null_check */ false); + invoke, temp2_loc, src, class_offset, /* needs_null_check */ false); // If heap poisoning is enabled, `temp1` and `temp2` have been // unpoisoned by the the previous calls to // GenerateFieldLoadWithBakerReadBarrier. @@ -1273,7 +1273,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { // /* HeapReference */ TMP = temp1->component_type_ codegen_->GenerateFieldLoadWithBakerReadBarrier( - invoke, TMP_loc, temp1, component_offset, temp3_loc, /* needs_null_check */ false); + invoke, TMP_loc, temp1, component_offset, /* needs_null_check */ false); __ testl(CpuRegister(TMP), CpuRegister(TMP)); __ j(kEqual, intrinsic_slow_path->GetEntryLabel()); // If heap poisoning is enabled, `TMP` has been unpoisoned by @@ -1296,7 +1296,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below. // /* HeapReference */ TMP = temp2->component_type_ codegen_->GenerateFieldLoadWithBakerReadBarrier( - invoke, TMP_loc, temp2, component_offset, temp3_loc, /* needs_null_check */ false); + invoke, TMP_loc, temp2, component_offset, /* needs_null_check */ false); __ testl(CpuRegister(TMP), CpuRegister(TMP)); __ j(kEqual, intrinsic_slow_path->GetEntryLabel()); // If heap poisoning is enabled, `TMP` has been unpoisoned by @@ -1320,7 +1320,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { // /* HeapReference */ temp1 = temp1->component_type_ codegen_->GenerateFieldLoadWithBakerReadBarrier( - invoke, temp1_loc, temp1, component_offset, temp3_loc, /* needs_null_check */ false); + invoke, temp1_loc, temp1, component_offset, /* needs_null_check */ false); // We do not need to emit a read barrier for the following // heap reference load, as `temp1` is only used in a // comparison with null below, and this reference is not @@ -1348,10 +1348,10 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { // /* HeapReference */ temp1 = src->klass_ codegen_->GenerateFieldLoadWithBakerReadBarrier( - invoke, temp1_loc, src, class_offset, temp3_loc, /* needs_null_check */ false); + invoke, temp1_loc, src, class_offset, /* needs_null_check */ false); // /* HeapReference */ TMP = temp1->component_type_ codegen_->GenerateFieldLoadWithBakerReadBarrier( - invoke, TMP_loc, temp1, component_offset, temp3_loc, /* needs_null_check */ false); + invoke, TMP_loc, temp1, component_offset, /* needs_null_check */ false); __ testl(CpuRegister(TMP), CpuRegister(TMP)); __ j(kEqual, intrinsic_slow_path->GetEntryLabel()); } else { @@ -1421,11 +1421,18 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { __ cmpl(temp1, temp3); __ j(kEqual, &done); - // /* int32_t */ monitor = src->monitor_ - __ movl(CpuRegister(TMP), Address(src, monitor_offset)); - // /* LockWord */ lock_word = LockWord(monitor) - static_assert(sizeof(LockWord) == sizeof(int32_t), - "art::LockWord and int32_t have different sizes."); + // Given the numeric representation, it's enough to check the low bit of the rb_state. + static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0"); + static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1"); + static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2"); + constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte; + constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte; + constexpr int32_t test_value = static_cast(1 << gray_bit_position); + + // if (rb_state == ReadBarrier::gray_ptr_) + // goto slow_path; + // At this point, just do the "if" and make sure that flags are preserved until the branch. + __ testb(Address(src, monitor_offset + gray_byte_position), Immediate(test_value)); // Load fence to prevent load-load reordering. // Note that this is a no-op, thanks to the x86-64 memory model. @@ -1436,13 +1443,8 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { new (GetAllocator()) ReadBarrierSystemArrayCopySlowPathX86_64(invoke); codegen_->AddSlowPath(read_barrier_slow_path); - // Given the numeric representation, it's enough to check the low bit of the - // rb_state. We do that by shifting the bit out of the lock word with SHR. - static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0"); - static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1"); - static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2"); - __ shrl(CpuRegister(TMP), Immediate(LockWord::kReadBarrierStateShift + 1)); - __ j(kCarrySet, read_barrier_slow_path->GetEntryLabel()); + // We have done the "if" of the gray bit check above, now branch based on the flags. + __ j(kNotZero, read_barrier_slow_path->GetEntryLabel()); // Fast-path copy. // Iterate over the arrays and do a raw copy of the objects. We don't need to @@ -2087,10 +2089,9 @@ static void GenUnsafeGet(HInvoke* invoke, case Primitive::kPrimNot: { if (kEmitCompilerReadBarrier) { if (kUseBakerReadBarrier) { - Location temp = locations->GetTemp(0); Address src(base, offset, ScaleFactor::TIMES_1, 0); codegen->GenerateReferenceLoadWithBakerReadBarrier( - invoke, output_loc, base, src, temp, /* needs_null_check */ false); + invoke, output_loc, base, src, /* needs_null_check */ false); } else { __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0)); codegen->GenerateReadBarrierSlow( @@ -2113,9 +2114,7 @@ static void GenUnsafeGet(HInvoke* invoke, } } -static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, - HInvoke* invoke, - Primitive::Type type) { +static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { bool can_call = kEmitCompilerReadBarrier && (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject || invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile); @@ -2129,30 +2128,25 @@ static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, locations->SetInAt(2, Location::RequiresRegister()); locations->SetOut(Location::RequiresRegister(), can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap); - if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) { - // We need a temporary register for the read barrier marking slow - // path in InstructionCodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier. - locations->AddTemp(Location::RequiresRegister()); - } } void IntrinsicLocationsBuilderX86_64::VisitUnsafeGet(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt); + CreateIntIntIntToIntLocations(arena_, invoke); } void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetVolatile(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt); + CreateIntIntIntToIntLocations(arena_, invoke); } void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLong(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong); + CreateIntIntIntToIntLocations(arena_, invoke); } void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong); + CreateIntIntIntToIntLocations(arena_, invoke); } void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetObject(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot); + CreateIntIntIntToIntLocations(arena_, invoke); } void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot); + CreateIntIntIntToIntLocations(arena_, invoke); } diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc index f1a991574..f2ef41f40 100644 --- a/compiler/utils/x86/assembler_x86.cc +++ b/compiler/utils/x86/assembler_x86.cc @@ -1148,6 +1148,23 @@ void X86Assembler::testl(Register reg, const Immediate& immediate) { } +void X86Assembler::testb(const Address& dst, const Immediate& imm) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0xF6); + EmitOperand(EAX, dst); + CHECK(imm.is_int8()); + EmitUint8(imm.value() & 0xFF); +} + + +void X86Assembler::testl(const Address& dst, const Immediate& imm) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0xF7); + EmitOperand(0, dst); + EmitImmediate(imm); +} + + void X86Assembler::andl(Register dst, Register src) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0x23); diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h index 63aa4a4b8..2ddcd760d 100644 --- a/compiler/utils/x86/assembler_x86.h +++ b/compiler/utils/x86/assembler_x86.h @@ -496,6 +496,9 @@ class X86Assembler FINAL : public Assembler { void testl(Register reg, const Immediate& imm); void testl(Register reg1, const Address& address); + void testb(const Address& dst, const Immediate& imm); + void testl(const Address& dst, const Immediate& imm); + void andl(Register dst, const Immediate& imm); void andl(Register dst, Register src); void andl(Register dst, const Address& address); diff --git a/compiler/utils/x86/assembler_x86_test.cc b/compiler/utils/x86/assembler_x86_test.cc index 307e034b7..61d70d714 100644 --- a/compiler/utils/x86/assembler_x86_test.cc +++ b/compiler/utils/x86/assembler_x86_test.cc @@ -375,6 +375,42 @@ TEST_F(AssemblerX86Test, CmovlAddress) { DriverStr(expected, "cmovl_address"); } +TEST_F(AssemblerX86Test, TestbAddressImmediate) { + GetAssembler()->testb( + x86::Address(x86::Register(x86::EDI), x86::Register(x86::EBX), x86::TIMES_4, 12), + x86::Immediate(1)); + GetAssembler()->testb( + x86::Address(x86::Register(x86::ESP), FrameOffset(7)), + x86::Immediate(-128)); + GetAssembler()->testb( + x86::Address(x86::Register(x86::EBX), MemberOffset(130)), + x86::Immediate(127)); + const char* expected = + "testb $1, 0xc(%EDI,%EBX,4)\n" + "testb $-128, 0x7(%ESP)\n" + "testb $127, 0x82(%EBX)\n"; + + DriverStr(expected, "TestbAddressImmediate"); +} + +TEST_F(AssemblerX86Test, TestlAddressImmediate) { + GetAssembler()->testl( + x86::Address(x86::Register(x86::EDI), x86::Register(x86::EBX), x86::TIMES_4, 12), + x86::Immediate(1)); + GetAssembler()->testl( + x86::Address(x86::Register(x86::ESP), FrameOffset(7)), + x86::Immediate(-100000)); + GetAssembler()->testl( + x86::Address(x86::Register(x86::EBX), MemberOffset(130)), + x86::Immediate(77777777)); + const char* expected = + "testl $1, 0xc(%EDI,%EBX,4)\n" + "testl $-100000, 0x7(%ESP)\n" + "testl $77777777, 0x82(%EBX)\n"; + + DriverStr(expected, "TestlAddressImmediate"); +} + ///////////////// // Near labels // ///////////////// diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc index ddc824425..1f73aa737 100644 --- a/compiler/utils/x86_64/assembler_x86_64.cc +++ b/compiler/utils/x86_64/assembler_x86_64.cc @@ -1389,6 +1389,25 @@ void X86_64Assembler::testq(CpuRegister reg, const Address& address) { } +void X86_64Assembler::testb(const Address& dst, const Immediate& imm) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitOptionalRex32(dst); + EmitUint8(0xF6); + EmitOperand(Register::RAX, dst); + CHECK(imm.is_int8()); + EmitUint8(imm.value() & 0xFF); +} + + +void X86_64Assembler::testl(const Address& dst, const Immediate& imm) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitOptionalRex32(dst); + EmitUint8(0xF7); + EmitOperand(0, dst); + EmitImmediate(imm); +} + + void X86_64Assembler::andl(CpuRegister dst, CpuRegister src) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitOptionalRex32(dst, src); diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h index a4166f965..3a4bfca6b 100644 --- a/compiler/utils/x86_64/assembler_x86_64.h +++ b/compiler/utils/x86_64/assembler_x86_64.h @@ -528,6 +528,9 @@ class X86_64Assembler FINAL : public Assembler { void testq(CpuRegister reg1, CpuRegister reg2); void testq(CpuRegister reg, const Address& address); + void testb(const Address& address, const Immediate& imm); + void testl(const Address& address, const Immediate& imm); + void andl(CpuRegister dst, const Immediate& imm); void andl(CpuRegister dst, CpuRegister src); void andl(CpuRegister reg, const Address& address); diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc index 36c966b3c..48a18760f 100644 --- a/compiler/utils/x86_64/assembler_x86_64_test.cc +++ b/compiler/utils/x86_64/assembler_x86_64_test.cc @@ -1526,6 +1526,48 @@ TEST_F(AssemblerX86_64Test, Cmpb) { DriverStr(expected, "cmpb"); } +TEST_F(AssemblerX86_64Test, TestbAddressImmediate) { + GetAssembler()->testb( + x86_64::Address(x86_64::CpuRegister(x86_64::RDI), + x86_64::CpuRegister(x86_64::RBX), + x86_64::TIMES_4, + 12), + x86_64::Immediate(1)); + GetAssembler()->testb( + x86_64::Address(x86_64::CpuRegister(x86_64::RSP), FrameOffset(7)), + x86_64::Immediate(-128)); + GetAssembler()->testb( + x86_64::Address(x86_64::CpuRegister(x86_64::RBX), MemberOffset(130)), + x86_64::Immediate(127)); + const char* expected = + "testb $1, 0xc(%RDI,%RBX,4)\n" + "testb $-128, 0x7(%RSP)\n" + "testb $127, 0x82(%RBX)\n"; + + DriverStr(expected, "TestbAddressImmediate"); +} + +TEST_F(AssemblerX86_64Test, TestlAddressImmediate) { + GetAssembler()->testl( + x86_64::Address(x86_64::CpuRegister(x86_64::RDI), + x86_64::CpuRegister(x86_64::RBX), + x86_64::TIMES_4, + 12), + x86_64::Immediate(1)); + GetAssembler()->testl( + x86_64::Address(x86_64::CpuRegister(x86_64::RSP), FrameOffset(7)), + x86_64::Immediate(-100000)); + GetAssembler()->testl( + x86_64::Address(x86_64::CpuRegister(x86_64::RBX), MemberOffset(130)), + x86_64::Immediate(77777777)); + const char* expected = + "testl $1, 0xc(%RDI,%RBX,4)\n" + "testl $-100000, 0x7(%RSP)\n" + "testl $77777777, 0x82(%RBX)\n"; + + DriverStr(expected, "TestlAddressImmediate"); +} + class JNIMacroAssemblerX86_64Test : public JNIMacroAssemblerTest { public: using Base = JNIMacroAssemblerTest; diff --git a/test/537-checker-arraycopy/src/Main.java b/test/537-checker-arraycopy/src/Main.java index 7c124caa8..95a11ca01 100644 --- a/test/537-checker-arraycopy/src/Main.java +++ b/test/537-checker-arraycopy/src/Main.java @@ -51,10 +51,10 @@ public class Main { /// CHECK-START-X86_64: void Main.arraycopy() disassembly (after) /// CHECK: InvokeStaticOrDirect intrinsic:SystemArrayCopy - /// CHECK-NOT: test + /// CHECK-NOT: test {{^[^\[].*}}, {{^[^\[].*}} /// CHECK-NOT: call /// CHECK: ReturnVoid - // Checks that the call is intrinsified and that there is no test instruction + // Checks that the call is intrinsified and that there is no register test instruction // when we know the source and destination are not null. public static void arraycopy() { Object[] obj = new Object[4];