OSDN Git Service

Re-enable the ArraySet fast path with Baker read barriers.
authorRoland Levillain <rpl@google.com>
Thu, 25 Aug 2016 16:27:56 +0000 (17:27 +0100)
committerRoland Levillain <rpl@google.com>
Thu, 25 Aug 2016 16:27:56 +0000 (17:27 +0100)
Benchmarks (ARM64) score variations on Nexus 5X with CPU
cores clamped at 960000 Hz (aosp_bullhead-userdebug build):
- Ritzperf - average (lower is better):       -0.95% (virtually unchanged)
- CaffeineMark - average (higher is better):  +2.50% (slightly better)
- DeltaBlue (lower is better):                -0.55% (virtually unchanged)
- Richards - average (lower is better):       +0.67% (virtually unchanged)
- SciMark2 - average (higher is better):      -0.10% (virtually unchanged)

Details about Ritzperf benchmarks with meaningful variations
(lower is better):
- GenericCalcActions.MemAllocTest:            -5.05% (better)

Details about CaffeineMark benchmarks with meaningful variations
(higher is better):
- Method:                                    +16.88% (better)

Details about Richards benchmarks with meaningful variations
(lower is better):
- deutsch_acc_interface:                      +9.86% (worse)

Boot image code size variation on Nexus 5X
(aosp_bullhead-userdebug build):
- total ARM64 framework Oat files size change:
  105933472 bytes -> 106027680 bytes (+0.09%)
- total ARM framework Oat files size change:
  89157936 bytes -> 89239856 bytes (+0.09%)

Test: ART host and target (ARM, ARM64) tests.
Bug: 29516974
Bug: 29506760
Bug: 12687968
Change-Id: Ib9e9709712295e17804b8888ac10e3d518ff2e70

compiler/optimizing/code_generator.cc
compiler/optimizing/code_generator_arm.cc
compiler/optimizing/code_generator_arm64.cc
compiler/optimizing/code_generator_x86.cc
compiler/optimizing/code_generator_x86_64.cc
compiler/optimizing/intrinsics_arm.cc

index c532e72..0d3f849 100644 (file)
@@ -1224,6 +1224,7 @@ void CodeGenerator::ValidateInvokeRuntimeWithoutRecordingPcInfo(HInstruction* in
   DCHECK(instruction->IsInstanceFieldGet() ||
          instruction->IsStaticFieldGet() ||
          instruction->IsArrayGet() ||
+         instruction->IsArraySet() ||
          instruction->IsLoadClass() ||
          instruction->IsLoadString() ||
          instruction->IsInstanceOf() ||
index 6d9c55c..9d5aabc 100644 (file)
@@ -425,6 +425,7 @@ class ReadBarrierMarkSlowPathARM : public SlowPathCode {
     DCHECK(instruction_->IsInstanceFieldGet() ||
            instruction_->IsStaticFieldGet() ||
            instruction_->IsArrayGet() ||
+           instruction_->IsArraySet() ||
            instruction_->IsLoadClass() ||
            instruction_->IsLoadString() ||
            instruction_->IsInstanceOf() ||
@@ -4660,6 +4661,7 @@ void LocationsBuilderARM::VisitArraySet(HArraySet* instruction) {
   }
   if (needs_write_barrier) {
     // Temporary registers for the write barrier.
+    // These registers may be used for Baker read barriers too.
     locations->AddTemp(Location::RequiresRegister());  // Possibly used for ref. poisoning too.
     locations->AddTemp(Location::RequiresRegister());
   }
@@ -4744,8 +4746,10 @@ void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) {
       }
 
       DCHECK(needs_write_barrier);
-      Register temp1 = locations->GetTemp(0).AsRegister<Register>();
-      Register temp2 = locations->GetTemp(1).AsRegister<Register>();
+      Location temp1_loc = locations->GetTemp(0);
+      Register temp1 = temp1_loc.AsRegister<Register>();
+      Location temp2_loc = locations->GetTemp(1);
+      Register temp2 = temp2_loc.AsRegister<Register>();
       uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
       uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
       uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
@@ -4776,33 +4780,97 @@ void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) {
         }
 
         if (kEmitCompilerReadBarrier) {
-          // When read barriers are enabled, the type checking
-          // instrumentation requires two read barriers:
-          //
-          //   __ Mov(temp2, temp1);
-          //   // /* HeapReference<Class> */ temp1 = temp1->component_type_
-          //   __ LoadFromOffset(kLoadWord, temp1, temp1, component_offset);
-          //   codegen_->GenerateReadBarrierSlow(
-          //       instruction, temp1_loc, temp1_loc, temp2_loc, component_offset);
-          //
-          //   // /* HeapReference<Class> */ temp2 = value->klass_
-          //   __ LoadFromOffset(kLoadWord, temp2, value, class_offset);
-          //   codegen_->GenerateReadBarrierSlow(
-          //       instruction, temp2_loc, temp2_loc, value_loc, class_offset, temp1_loc);
-          //
-          //   __ cmp(temp1, ShifterOperand(temp2));
-          //
-          // However, the second read barrier may trash `temp`, as it
-          // is a temporary register, and as such would not be saved
-          // along with live registers before calling the runtime (nor
-          // restored afterwards).  So in this case, we bail out and
-          // delegate the work to the array set slow path.
-          //
-          // TODO: Extend the register allocator to support a new
-          // "(locally) live temp" location so as to avoid always
-          // going into the slow path when read barriers are enabled.
-          __ b(slow_path->GetEntryLabel());
+          if (!kUseBakerReadBarrier) {
+            // When (non-Baker) read barriers are enabled, the type
+            // checking instrumentation requires two read barriers
+            // generated by CodeGeneratorARM::GenerateReadBarrierSlow:
+            //
+            //   __ Mov(temp2, temp1);
+            //   // /* HeapReference<Class> */ temp1 = temp1->component_type_
+            //   __ LoadFromOffset(kLoadWord, temp1, temp1, component_offset);
+            //   codegen_->GenerateReadBarrierSlow(
+            //       instruction, temp1_loc, temp1_loc, temp2_loc, component_offset);
+            //
+            //   // /* HeapReference<Class> */ temp2 = value->klass_
+            //   __ LoadFromOffset(kLoadWord, temp2, value, class_offset);
+            //   codegen_->GenerateReadBarrierSlow(
+            //       instruction, temp2_loc, temp2_loc, value_loc, class_offset, temp1_loc);
+            //
+            //   __ cmp(temp1, ShifterOperand(temp2));
+            //
+            // However, the second read barrier may trash `temp`, as it
+            // is a temporary register, and as such would not be saved
+            // along with live registers before calling the runtime (nor
+            // restored afterwards).  So in this case, we bail out and
+            // delegate the work to the array set slow path.
+            //
+            // TODO: Extend the register allocator to support a new
+            // "(locally) live temp" location so as to avoid always
+            // going into the slow path when read barriers are enabled?
+            //
+            // There is no such problem with Baker read barriers (see below).
+            __ b(slow_path->GetEntryLabel());
+          } else {
+            Register temp3 = IP;
+            Location temp3_loc = Location::RegisterLocation(temp3);
+
+            // Note: `temp3` (scratch register IP) cannot be used as
+            // `ref` argument of GenerateFieldLoadWithBakerReadBarrier
+            // calls below (see ReadBarrierMarkSlowPathARM for more
+            // details).
+
+            // /* HeapReference<Class> */ temp1 = array->klass_
+            codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
+                                                            temp1_loc,
+                                                            array,
+                                                            class_offset,
+                                                            temp3_loc,
+                                                            /* needs_null_check */ true);
+
+            // /* HeapReference<Class> */ temp1 = temp1->component_type_
+            codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
+                                                            temp1_loc,
+                                                            temp1,
+                                                            component_offset,
+                                                            temp3_loc,
+                                                            /* needs_null_check */ false);
+            // Register `temp1` is not trashed by the read barrier
+            // emitted by GenerateFieldLoadWithBakerReadBarrier below,
+            // as that method produces a call to a ReadBarrierMarkRegX
+            // entry point, which saves all potentially live registers,
+            // including temporaries such a `temp1`.
+            // /* HeapReference<Class> */ temp2 = value->klass_
+            codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
+                                                            temp2_loc,
+                                                            value,
+                                                            class_offset,
+                                                            temp3_loc,
+                                                            /* needs_null_check */ false);
+            // If heap poisoning is enabled, `temp1` and `temp2` have
+            // been unpoisoned by the the previous calls to
+            // CodeGeneratorARM::GenerateFieldLoadWithBakerReadBarrier.
+            __ cmp(temp1, ShifterOperand(temp2));
+
+            if (instruction->StaticTypeOfArrayIsObjectArray()) {
+              Label do_put;
+              __ b(&do_put, EQ);
+              // We do not need to emit a read barrier for the
+              // following heap reference load, as `temp1` is only used
+              // in a comparison with null below, and this reference
+              // is not kept afterwards.
+              // /* HeapReference<Class> */ temp1 = temp1->super_class_
+              __ LoadFromOffset(kLoadWord, temp1, temp1, super_offset);
+              // If heap poisoning is enabled, no need to unpoison
+              // `temp`, as we are comparing against null below.
+              __ CompareAndBranchIfNonZero(temp1, slow_path->GetEntryLabel());
+              __ Bind(&do_put);
+            } else {
+              __ b(slow_path->GetEntryLabel(), NE);
+            }
+          }
         } else {
+          // Non read barrier code.
+
           // /* HeapReference<Class> */ temp1 = array->klass_
           __ LoadFromOffset(kLoadWord, temp1, array, class_offset);
           codegen_->MaybeRecordImplicitNullCheck(instruction);
index cc8985d..578c7e7 100644 (file)
@@ -591,6 +591,7 @@ class ReadBarrierMarkSlowPathARM64 : public SlowPathCodeARM64 {
     DCHECK(instruction_->IsInstanceFieldGet() ||
            instruction_->IsStaticFieldGet() ||
            instruction_->IsArrayGet() ||
+           instruction_->IsArraySet() ||
            instruction_->IsLoadClass() ||
            instruction_->IsLoadString() ||
            instruction_->IsInstanceOf() ||
@@ -2176,6 +2177,11 @@ void LocationsBuilderARM64::VisitArraySet(HArraySet* instruction) {
   } else {
     locations->SetInAt(2, Location::RequiresRegister());
   }
+  if (object_array_set_with_read_barrier && kUseBakerReadBarrier) {
+    // Additional temporary registers for a Baker read barrier.
+    locations->AddTemp(Location::RequiresRegister());
+    locations->AddTemp(Location::RequiresRegister());
+  }
 }
 
 void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) {
@@ -2225,7 +2231,6 @@ void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) {
     codegen_->Store(value_type, value, destination);
     codegen_->MaybeRecordImplicitNullCheck(instruction);
   } else {
-    DCHECK(needs_write_barrier);
     DCHECK(!instruction->GetArray()->IsIntermediateAddress());
     vixl::aarch64::Label done;
     SlowPathCodeARM64* slow_path = nullptr;
@@ -2264,33 +2269,112 @@ void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) {
         }
 
         if (kEmitCompilerReadBarrier) {
-          // When read barriers are enabled, the type checking
-          // instrumentation requires two read barriers:
-          //
-          //   __ Mov(temp2, temp);
-          //   // /* HeapReference<Class> */ temp = temp->component_type_
-          //   __ Ldr(temp, HeapOperand(temp, component_offset));
-          //   codegen_->GenerateReadBarrierSlow(
-          //       instruction, temp_loc, temp_loc, temp2_loc, component_offset);
-          //
-          //   // /* HeapReference<Class> */ temp2 = value->klass_
-          //   __ Ldr(temp2, HeapOperand(Register(value), class_offset));
-          //   codegen_->GenerateReadBarrierSlow(
-          //       instruction, temp2_loc, temp2_loc, value_loc, class_offset, temp_loc);
-          //
-          //   __ Cmp(temp, temp2);
-          //
-          // However, the second read barrier may trash `temp`, as it
-          // is a temporary register, and as such would not be saved
-          // along with live registers before calling the runtime (nor
-          // restored afterwards).  So in this case, we bail out and
-          // delegate the work to the array set slow path.
-          //
-          // TODO: Extend the register allocator to support a new
-          // "(locally) live temp" location so as to avoid always
-          // going into the slow path when read barriers are enabled.
-          __ B(slow_path->GetEntryLabel());
+          if (!kUseBakerReadBarrier) {
+            // When (non-Baker) read barriers are enabled, the type
+            // checking instrumentation requires two read barriers
+            // generated by CodeGeneratorARM64::GenerateReadBarrierSlow:
+            //
+            //   __ Mov(temp2, temp);
+            //   // /* HeapReference<Class> */ temp = temp->component_type_
+            //   __ Ldr(temp, HeapOperand(temp, component_offset));
+            //   codegen_->GenerateReadBarrierSlow(
+            //       instruction, temp_loc, temp_loc, temp2_loc, component_offset);
+            //
+            //   // /* HeapReference<Class> */ temp2 = value->klass_
+            //   __ Ldr(temp2, HeapOperand(Register(value), class_offset));
+            //   codegen_->GenerateReadBarrierSlow(
+            //       instruction, temp2_loc, temp2_loc, value_loc, class_offset, temp_loc);
+            //
+            //   __ Cmp(temp, temp2);
+            //
+            // However, the second read barrier may trash `temp`, as it
+            // is a temporary register, and as such would not be saved
+            // along with live registers before calling the runtime (nor
+            // restored afterwards).  So in this case, we bail out and
+            // delegate the work to the array set slow path.
+            //
+            // TODO: Extend the register allocator to support a new
+            // "(locally) live temp" location so as to avoid always
+            // going into the slow path when read barriers are enabled?
+            //
+            // There is no such problem with Baker read barriers (see below).
+            __ B(slow_path->GetEntryLabel());
+          } else {
+            // Note that we cannot use `temps` (instance of VIXL's
+            // UseScratchRegisterScope) to allocate `temp2` because
+            // the Baker read barriers generated by
+            // GenerateFieldLoadWithBakerReadBarrier below also use
+            // that facility to allocate a temporary register, thus
+            // making VIXL's scratch register pool empty.
+            Location temp2_loc = locations->GetTemp(0);
+            Register temp2 = WRegisterFrom(temp2_loc);
+
+            // Note: Because it is acquired from VIXL's scratch register
+            // pool, `temp` might be IP0, and thus cannot be used as
+            // `ref` argument of GenerateFieldLoadWithBakerReadBarrier
+            // calls below (see ReadBarrierMarkSlowPathARM64 for more
+            // details).
+
+            // /* HeapReference<Class> */ temp2 = array->klass_
+            codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
+                                                            temp2_loc,
+                                                            array,
+                                                            class_offset,
+                                                            temp,
+                                                            /* needs_null_check */ true,
+                                                            /* use_load_acquire */ false);
+
+            // /* HeapReference<Class> */ temp2 = temp2->component_type_
+            codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
+                                                            temp2_loc,
+                                                            temp2,
+                                                            component_offset,
+                                                            temp,
+                                                            /* needs_null_check */ false,
+                                                            /* use_load_acquire */ false);
+            // For the same reason that we request `temp2` from the
+            // register allocator above, we cannot get `temp3` from
+            // VIXL's scratch register pool.
+            Location temp3_loc = locations->GetTemp(1);
+            Register temp3 = WRegisterFrom(temp3_loc);
+            // Register `temp2` is not trashed by the read barrier
+            // emitted by GenerateFieldLoadWithBakerReadBarrier below,
+            // as that method produces a call to a ReadBarrierMarkRegX
+            // entry point, which saves all potentially live registers,
+            // including temporaries such a `temp2`.
+            // /* HeapReference<Class> */ temp3 = register_value->klass_
+            codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
+                                                            temp3_loc,
+                                                            value.W(),
+                                                            class_offset,
+                                                            temp,
+                                                            /* needs_null_check */ false,
+                                                            /* use_load_acquire */ false);
+            // If heap poisoning is enabled, `temp2` and `temp3` have
+            // been unpoisoned by the the previous calls to
+            // CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier.
+            __ Cmp(temp2, temp3);
+
+            if (instruction->StaticTypeOfArrayIsObjectArray()) {
+              vixl::aarch64::Label do_put;
+              __ B(eq, &do_put);
+              // We do not need to emit a read barrier for the
+              // following heap reference load, as `temp2` is only used
+              // in a comparison with null below, and this reference
+              // is not kept afterwards.
+              // /* HeapReference<Class> */ temp = temp2->super_class_
+              __ Ldr(temp, HeapOperand(temp2, super_offset));
+              // If heap poisoning is enabled, no need to unpoison
+              // `temp`, as we are comparing against null below.
+              __ Cbnz(temp, slow_path->GetEntryLabel());
+              __ Bind(&do_put);
+            } else {
+              __ B(ne, slow_path->GetEntryLabel());
+            }
+          }
         } else {
+          // Non read barrier code.
+
           Register temp2 = temps.AcquireSameSizeAs(array);
           // /* HeapReference<Class> */ temp = array->klass_
           __ Ldr(temp, HeapOperand(array, class_offset));
@@ -2304,6 +2388,7 @@ void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) {
           // If heap poisoning is enabled, no need to unpoison `temp`
           // nor `temp2`, as we are comparing two poisoned references.
           __ Cmp(temp, temp2);
+          temps.Release(temp2);
 
           if (instruction->StaticTypeOfArrayIsObjectArray()) {
             vixl::aarch64::Label do_put;
@@ -2321,7 +2406,6 @@ void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) {
           } else {
             __ B(ne, slow_path->GetEntryLabel());
           }
-          temps.Release(temp2);
         }
       }
 
index f50eb5c..e8740a5 100644 (file)
@@ -460,6 +460,7 @@ class ReadBarrierMarkSlowPathX86 : public SlowPathCode {
     DCHECK(instruction_->IsInstanceFieldGet() ||
            instruction_->IsStaticFieldGet() ||
            instruction_->IsArrayGet() ||
+           instruction_->IsArraySet() ||
            instruction_->IsLoadClass() ||
            instruction_->IsLoadString() ||
            instruction_->IsInstanceOf() ||
@@ -5279,6 +5280,7 @@ void LocationsBuilderX86::VisitArraySet(HArraySet* instruction) {
   }
   if (needs_write_barrier) {
     // Temporary registers for the write barrier.
+    // These registers may be used for Baker read barriers too.
     locations->AddTemp(Location::RequiresRegister());  // Possibly used for ref. poisoning too.
     // Ensure the card is in a byte register.
     locations->AddTemp(Location::RegisterLocation(ECX));
@@ -5349,9 +5351,13 @@ void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) {
 
       DCHECK(needs_write_barrier);
       Register register_value = value.AsRegister<Register>();
-      NearLabel done, not_null, do_put;
+      // We cannot use a NearLabel for `done`, as its range may be too
+      // short when Baker read barriers are enabled.
+      Label done;
+      NearLabel not_null, do_put;
       SlowPathCode* slow_path = nullptr;
-      Register temp = locations->GetTemp(0).AsRegister<Register>();
+      Location temp_loc = locations->GetTemp(0);
+      Register temp = temp_loc.AsRegister<Register>();
       if (may_need_runtime_call_for_type_check) {
         slow_path = new (GetGraph()->GetArena()) ArraySetSlowPathX86(instruction);
         codegen_->AddSlowPath(slow_path);
@@ -5365,33 +5371,77 @@ void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) {
         }
 
         if (kEmitCompilerReadBarrier) {
-          // When read barriers are enabled, the type checking
-          // instrumentation requires two read barriers:
-          //
-          //   __ movl(temp2, temp);
-          //   // /* HeapReference<Class> */ temp = temp->component_type_
-          //   __ movl(temp, Address(temp, component_offset));
-          //   codegen_->GenerateReadBarrierSlow(
-          //       instruction, temp_loc, temp_loc, temp2_loc, component_offset);
-          //
-          //   // /* HeapReference<Class> */ temp2 = register_value->klass_
-          //   __ movl(temp2, Address(register_value, class_offset));
-          //   codegen_->GenerateReadBarrierSlow(
-          //       instruction, temp2_loc, temp2_loc, value, class_offset, temp_loc);
-          //
-          //   __ cmpl(temp, temp2);
-          //
-          // However, the second read barrier may trash `temp`, as it
-          // is a temporary register, and as such would not be saved
-          // along with live registers before calling the runtime (nor
-          // restored afterwards).  So in this case, we bail out and
-          // delegate the work to the array set slow path.
-          //
-          // TODO: Extend the register allocator to support a new
-          // "(locally) live temp" location so as to avoid always
-          // going into the slow path when read barriers are enabled.
-          __ jmp(slow_path->GetEntryLabel());
+          if (!kUseBakerReadBarrier) {
+            // When (non-Baker) read barriers are enabled, the type
+            // checking instrumentation requires two read barriers
+            // generated by CodeGeneratorX86::GenerateReadBarrierSlow:
+            //
+            //   __ movl(temp2, temp);
+            //   // /* HeapReference<Class> */ temp = temp->component_type_
+            //   __ movl(temp, Address(temp, component_offset));
+            //   codegen_->GenerateReadBarrierSlow(
+            //       instruction, temp_loc, temp_loc, temp2_loc, component_offset);
+            //
+            //   // /* HeapReference<Class> */ temp2 = register_value->klass_
+            //   __ movl(temp2, Address(register_value, class_offset));
+            //   codegen_->GenerateReadBarrierSlow(
+            //       instruction, temp2_loc, temp2_loc, value, class_offset, temp_loc);
+            //
+            //   __ cmpl(temp, temp2);
+            //
+            // However, the second read barrier may trash `temp`, as it
+            // is a temporary register, and as such would not be saved
+            // along with live registers before calling the runtime (nor
+            // restored afterwards).  So in this case, we bail out and
+            // delegate the work to the array set slow path.
+            //
+            // TODO: Extend the register allocator to support a new
+            // "(locally) live temp" location so as to avoid always
+            // going into the slow path when read barriers are enabled?
+            //
+            // There is no such problem with Baker read barriers (see below).
+            __ jmp(slow_path->GetEntryLabel());
+          } else {
+            Location temp2_loc = locations->GetTemp(1);
+            Register temp2 = temp2_loc.AsRegister<Register>();
+            // /* HeapReference<Class> */ temp = array->klass_
+            codegen_->GenerateFieldLoadWithBakerReadBarrier(
+                instruction, temp_loc, array, class_offset, /* needs_null_check */ true);
+
+            // /* HeapReference<Class> */ temp = temp->component_type_
+            codegen_->GenerateFieldLoadWithBakerReadBarrier(
+                instruction, temp_loc, temp, component_offset, /* needs_null_check */ false);
+            // Register `temp` is not trashed by the read barrier
+            // emitted by GenerateFieldLoadWithBakerReadBarrier below,
+            // as that method produces a call to a ReadBarrierMarkRegX
+            // entry point, which saves all potentially live registers,
+            // including temporaries such a `temp`.
+            // /* HeapReference<Class> */ temp2 = register_value->klass_
+            codegen_->GenerateFieldLoadWithBakerReadBarrier(
+                instruction, temp2_loc, register_value, class_offset, /* needs_null_check */ false);
+            // If heap poisoning is enabled, `temp` and `temp2` have
+            // been unpoisoned by the the previous calls to
+            // CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier.
+            __ cmpl(temp, temp2);
+
+            if (instruction->StaticTypeOfArrayIsObjectArray()) {
+              __ j(kEqual, &do_put);
+              // We do not need to emit a read barrier for the
+              // following heap reference load, as `temp` is only used
+              // in a comparison with null below, and this reference
+              // is not kept afterwards. Also, if heap poisoning is
+              // enabled, there is no need to unpoison that heap
+              // reference for the same reason (comparison with null).
+              __ cmpl(Address(temp, super_offset), Immediate(0));
+              __ j(kNotEqual, slow_path->GetEntryLabel());
+              __ Bind(&do_put);
+            } else {
+              __ j(kNotEqual, slow_path->GetEntryLabel());
+            }
+          }
         } else {
+          // Non read barrier code.
+
           // /* HeapReference<Class> */ temp = array->klass_
           __ movl(temp, Address(array, class_offset));
           codegen_->MaybeRecordImplicitNullCheck(instruction);
@@ -5410,11 +5460,10 @@ void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) {
             // not been unpoisoned yet; unpoison it now.
             __ MaybeUnpoisonHeapReference(temp);
 
-            // /* HeapReference<Class> */ temp = temp->super_class_
-            __ movl(temp, Address(temp, super_offset));
-            // If heap poisoning is enabled, no need to unpoison
-            // `temp`, as we are comparing against null below.
-            __ testl(temp, temp);
+            // If heap poisoning is enabled, no need to unpoison the
+            // heap reference loaded below, as it is only used for a
+            // comparison with null.
+            __ cmpl(Address(temp, super_offset), Immediate(0));
             __ j(kNotEqual, slow_path->GetEntryLabel());
             __ Bind(&do_put);
           } else {
index ec37e5d..bc5ffcc 100644 (file)
@@ -481,6 +481,7 @@ class ReadBarrierMarkSlowPathX86_64 : public SlowPathCode {
     DCHECK(instruction_->IsInstanceFieldGet() ||
            instruction_->IsStaticFieldGet() ||
            instruction_->IsArrayGet() ||
+           instruction_->IsArraySet() ||
            instruction_->IsLoadClass() ||
            instruction_->IsLoadString() ||
            instruction_->IsInstanceOf() ||
@@ -4760,10 +4761,8 @@ void LocationsBuilderX86_64::VisitArraySet(HArraySet* instruction) {
 
   if (needs_write_barrier) {
     // Temporary registers for the write barrier.
-
-    // This first temporary register is possibly used for heap
-    // reference poisoning and/or read barrier emission too.
-    locations->AddTemp(Location::RequiresRegister());
+    // These registers may be used for Baker read barriers too.
+    locations->AddTemp(Location::RequiresRegister());  // Possibly used for ref. poisoning too.
     locations->AddTemp(Location::RequiresRegister());
   }
 }
@@ -4833,9 +4832,13 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) {
 
       DCHECK(needs_write_barrier);
       CpuRegister register_value = value.AsRegister<CpuRegister>();
-      NearLabel done, not_null, do_put;
+      // We cannot use a NearLabel for `done`, as its range may be too
+      // short when Baker read barriers are enabled.
+      Label done;
+      NearLabel not_null, do_put;
       SlowPathCode* slow_path = nullptr;
-      CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
+      Location temp_loc = locations->GetTemp(0);
+      CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
       if (may_need_runtime_call_for_type_check) {
         slow_path = new (GetGraph()->GetArena()) ArraySetSlowPathX86_64(instruction);
         codegen_->AddSlowPath(slow_path);
@@ -4849,33 +4852,77 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) {
         }
 
         if (kEmitCompilerReadBarrier) {
-          // When read barriers are enabled, the type checking
-          // instrumentation requires two read barriers:
-          //
-          //   __ movl(temp2, temp);
-          //   // /* HeapReference<Class> */ temp = temp->component_type_
-          //   __ movl(temp, Address(temp, component_offset));
-          //   codegen_->GenerateReadBarrierSlow(
-          //       instruction, temp_loc, temp_loc, temp2_loc, component_offset);
-          //
-          //   // /* HeapReference<Class> */ temp2 = register_value->klass_
-          //   __ movl(temp2, Address(register_value, class_offset));
-          //   codegen_->GenerateReadBarrierSlow(
-          //       instruction, temp2_loc, temp2_loc, value, class_offset, temp_loc);
-          //
-          //   __ cmpl(temp, temp2);
-          //
-          // However, the second read barrier may trash `temp`, as it
-          // is a temporary register, and as such would not be saved
-          // along with live registers before calling the runtime (nor
-          // restored afterwards).  So in this case, we bail out and
-          // delegate the work to the array set slow path.
-          //
-          // TODO: Extend the register allocator to support a new
-          // "(locally) live temp" location so as to avoid always
-          // going into the slow path when read barriers are enabled.
-          __ jmp(slow_path->GetEntryLabel());
+          if (!kUseBakerReadBarrier) {
+            // When (non-Baker) read barriers are enabled, the type
+            // checking instrumentation requires two read barriers
+            // generated by CodeGeneratorX86_64::GenerateReadBarrierSlow:
+            //
+            //   __ movl(temp2, temp);
+            //   // /* HeapReference<Class> */ temp = temp->component_type_
+            //   __ movl(temp, Address(temp, component_offset));
+            //   codegen_->GenerateReadBarrierSlow(
+            //       instruction, temp_loc, temp_loc, temp2_loc, component_offset);
+            //
+            //   // /* HeapReference<Class> */ temp2 = register_value->klass_
+            //   __ movl(temp2, Address(register_value, class_offset));
+            //   codegen_->GenerateReadBarrierSlow(
+            //       instruction, temp2_loc, temp2_loc, value, class_offset, temp_loc);
+            //
+            //   __ cmpl(temp, temp2);
+            //
+            // However, the second read barrier may trash `temp`, as it
+            // is a temporary register, and as such would not be saved
+            // along with live registers before calling the runtime (nor
+            // restored afterwards).  So in this case, we bail out and
+            // delegate the work to the array set slow path.
+            //
+            // TODO: Extend the register allocator to support a new
+            // "(locally) live temp" location so as to avoid always
+            // going into the slow path when read barriers are enabled?
+            //
+            // There is no such problem with Baker read barriers (see below).
+            __ jmp(slow_path->GetEntryLabel());
+          } else {
+            Location temp2_loc = locations->GetTemp(1);
+            CpuRegister temp2 = temp2_loc.AsRegister<CpuRegister>();
+            // /* HeapReference<Class> */ temp = array->klass_
+            codegen_->GenerateFieldLoadWithBakerReadBarrier(
+                instruction, temp_loc, array, class_offset, /* needs_null_check */ true);
+
+            // /* HeapReference<Class> */ temp = temp->component_type_
+            codegen_->GenerateFieldLoadWithBakerReadBarrier(
+                instruction, temp_loc, temp, component_offset, /* needs_null_check */ false);
+            // Register `temp` is not trashed by the read barrier
+            // emitted by GenerateFieldLoadWithBakerReadBarrier below,
+            // as that method produces a call to a ReadBarrierMarkRegX
+            // entry point, which saves all potentially live registers,
+            // including temporaries such a `temp`.
+            // /* HeapReference<Class> */ temp2 = register_value->klass_
+            codegen_->GenerateFieldLoadWithBakerReadBarrier(
+                instruction, temp2_loc, register_value, class_offset, /* needs_null_check */ false);
+            // If heap poisoning is enabled, `temp` and `temp2` have
+            // been unpoisoned by the the previous calls to
+            // CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier.
+            __ cmpl(temp, temp2);
+
+            if (instruction->StaticTypeOfArrayIsObjectArray()) {
+              __ j(kEqual, &do_put);
+              // We do not need to emit a read barrier for the
+              // following heap reference load, as `temp` is only used
+              // in a comparison with null below, and this reference
+              // is not kept afterwards. Also, if heap poisoning is
+              // enabled, there is no need to unpoison that heap
+              // reference for the same reason (comparison with null).
+              __ cmpl(Address(temp, super_offset), Immediate(0));
+              __ j(kNotEqual, slow_path->GetEntryLabel());
+              __ Bind(&do_put);
+            } else {
+              __ j(kNotEqual, slow_path->GetEntryLabel());
+            }
+          }
         } else {
+          // Non read barrier code.
+
           // /* HeapReference<Class> */ temp = array->klass_
           __ movl(temp, Address(array, class_offset));
           codegen_->MaybeRecordImplicitNullCheck(instruction);
@@ -4894,11 +4941,10 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) {
             // not been unpoisoned yet; unpoison it now.
             __ MaybeUnpoisonHeapReference(temp);
 
-            // /* HeapReference<Class> */ temp = temp->super_class_
-            __ movl(temp, Address(temp, super_offset));
-            // If heap poisoning is enabled, no need to unpoison
-            // `temp`, as we are comparing against null below.
-            __ testl(temp, temp);
+            // If heap poisoning is enabled, no need to unpoison the
+            // heap reference loaded below, as it is only used for a
+            // comparison with null.
+            __ cmpl(Address(temp, super_offset), Immediate(0));
             __ j(kNotEqual, slow_path->GetEntryLabel());
             __ Bind(&do_put);
           } else {
index 0bbc0e5..e1a6454 100644 (file)
@@ -1450,7 +1450,7 @@ void IntrinsicLocationsBuilderARM::VisitSystemArrayCopy(HInvoke* invoke) {
   }
   if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
     // Temporary register IP cannot be used in
-    // ReadBarrierSystemArrayCopySlowPathARM64 (because that register
+    // ReadBarrierSystemArrayCopySlowPathARM (because that register
     // is clobbered by ReadBarrierMarkRegX entry points). Get an extra
     // temporary register from the register allocator.
     locations->AddTemp(Location::RequiresRegister());