OSDN Git Service

[optimizing compiler] Compute live spill size
authorMark Mendell <mark.p.mendell@intel.com>
Tue, 13 Jan 2015 14:20:58 +0000 (09:20 -0500)
committerMark Mendell <mark.p.mendell@intel.com>
Thu, 15 Jan 2015 16:21:37 +0000 (11:21 -0500)
The current stack frame calculation assumes that each live register to
be saved/restored has the word size of the machine.  This fails for X86,
where a double in an XMM register takes up 8 bytes.  Change the
calculation to keep track of the number of core registers and number of
fp registers to handle this distinction.

This is slightly pessimal, as the registers may not be active at the
same time, but the only way to handle this would be to allocate both
classes of registers simultaneously, or remember all the active
intervals, matching them up and compute the size of each safepoint
interval.

Change-Id: If7860aa319b625c214775347728cdf49a56946eb
Signed-off-by: Mark Mendell <mark.p.mendell@intel.com>
compiler/optimizing/code_generator.cc
compiler/optimizing/code_generator.h
compiler/optimizing/code_generator_arm.h
compiler/optimizing/code_generator_arm64.h
compiler/optimizing/code_generator_x86.h
compiler/optimizing/code_generator_x86_64.h
compiler/optimizing/register_allocator.cc
compiler/optimizing/register_allocator.h

index 0c1ff9b..9e89070 100644 (file)
@@ -54,6 +54,7 @@ void CodeGenerator::CompileBaseline(CodeAllocator* allocator, bool is_leaf) {
                      + GetGraph()->GetTemporariesVRegSlots()
                      + 1 /* filler */,
                    0, /* the baseline compiler does not have live registers at slow path */
+                   0, /* the baseline compiler does not have live registers at slow path */
                    GetGraph()->GetMaximumNumberOfOutVRegs()
                      + 1 /* current method */);
   GenerateFrameEntry();
@@ -136,14 +137,16 @@ size_t CodeGenerator::FindTwoFreeConsecutiveAlignedEntries(bool* array, size_t l
 }
 
 void CodeGenerator::ComputeFrameSize(size_t number_of_spill_slots,
-                                     size_t maximum_number_of_live_registers,
+                                     size_t maximum_number_of_live_core_registers,
+                                     size_t maximum_number_of_live_fp_registers,
                                      size_t number_of_out_slots) {
   first_register_slot_in_slow_path_ = (number_of_out_slots + number_of_spill_slots) * kVRegSize;
 
   SetFrameSize(RoundUp(
       number_of_spill_slots * kVRegSize
       + number_of_out_slots * kVRegSize
-      + maximum_number_of_live_registers * GetWordSize()
+      + maximum_number_of_live_core_registers * GetWordSize()
+      + maximum_number_of_live_fp_registers * GetFloatingPointSpillSlotSize()
       + FrameEntrySpillSize(),
       kStackAlignment));
 }
index 8d28f3d..88e50b6 100644 (file)
@@ -109,9 +109,11 @@ class CodeGenerator {
   virtual HGraphVisitor* GetInstructionVisitor() = 0;
   virtual Assembler* GetAssembler() = 0;
   virtual size_t GetWordSize() const = 0;
+  virtual size_t GetFloatingPointSpillSlotSize() const = 0;
   virtual uintptr_t GetAddressOf(HBasicBlock* block) const = 0;
   void ComputeFrameSize(size_t number_of_spill_slots,
-                        size_t maximum_number_of_live_registers,
+                        size_t maximum_number_of_live_core_registers,
+                        size_t maximum_number_of_live_fp_registers,
                         size_t number_of_out_slots);
   virtual size_t FrameEntrySpillSize() const = 0;
   int32_t GetStackSlot(HLocal* local) const;
index c1b4eda..a753ec3 100644 (file)
@@ -175,6 +175,11 @@ class CodeGeneratorARM : public CodeGenerator {
     return kArmWordSize;
   }
 
+  size_t GetFloatingPointSpillSlotSize() const OVERRIDE {
+    // Allocated in S registers, which are word sized.
+    return kArmWordSize;
+  }
+
   size_t FrameEntrySpillSize() const OVERRIDE;
 
   HGraphVisitor* GetLocationBuilder() OVERRIDE {
index e4da07b..590bc1d 100644 (file)
@@ -191,6 +191,11 @@ class CodeGeneratorARM64 : public CodeGenerator {
     return kArm64WordSize;
   }
 
+  size_t GetFloatingPointSpillSlotSize() const OVERRIDE {
+    // Allocated in D registers, which are word sized.
+    return kArm64WordSize;
+  }
+
   uintptr_t GetAddressOf(HBasicBlock* block) const OVERRIDE {
     vixl::Label* block_entry_label = GetLabelOf(block);
     DCHECK(block_entry_label->IsBound());
index acde122..2d8adb2 100644 (file)
@@ -166,6 +166,11 @@ class CodeGeneratorX86 : public CodeGenerator {
     return kX86WordSize;
   }
 
+  size_t GetFloatingPointSpillSlotSize() const OVERRIDE {
+    // 8 bytes == 2 words for each spill.
+    return 2 * kX86WordSize;
+  }
+
   size_t FrameEntrySpillSize() const OVERRIDE;
 
   HGraphVisitor* GetLocationBuilder() OVERRIDE {
index 87f6b0f..343fba3 100644 (file)
@@ -169,6 +169,10 @@ class CodeGeneratorX86_64 : public CodeGenerator {
     return kX86_64WordSize;
   }
 
+  size_t GetFloatingPointSpillSlotSize() const OVERRIDE {
+    return kX86_64WordSize;
+  }
+
   size_t FrameEntrySpillSize() const OVERRIDE;
 
   HGraphVisitor* GetLocationBuilder() OVERRIDE {
index 1efc52b..d2f4f9b 100644 (file)
@@ -56,7 +56,8 @@ RegisterAllocator::RegisterAllocator(ArenaAllocator* allocator,
         blocked_core_registers_(codegen->GetBlockedCoreRegisters()),
         blocked_fp_registers_(codegen->GetBlockedFloatingPointRegisters()),
         reserved_out_slots_(0),
-        maximum_number_of_live_registers_(0) {
+        maximum_number_of_live_core_registers_(0),
+        maximum_number_of_live_fp_registers_(0) {
   codegen->SetupBlockedRegisters();
   physical_core_register_intervals_.SetSize(codegen->GetNumberOfCoreRegisters());
   physical_fp_register_intervals_.SetSize(codegen->GetNumberOfFloatingPointRegisters());
@@ -185,9 +186,6 @@ void RegisterAllocator::AllocateRegistersInternal() {
   }
   LinearScan();
 
-  size_t saved_maximum_number_of_live_registers = maximum_number_of_live_registers_;
-  maximum_number_of_live_registers_ = 0;
-
   inactive_.Reset();
   active_.Reset();
   handled_.Reset();
@@ -207,7 +205,6 @@ void RegisterAllocator::AllocateRegistersInternal() {
     }
   }
   LinearScan();
-  maximum_number_of_live_registers_ += saved_maximum_number_of_live_registers;
 }
 
 void RegisterAllocator::ProcessInstruction(HInstruction* instruction) {
@@ -602,8 +599,13 @@ void RegisterAllocator::LinearScan() {
     if (current->IsSlowPathSafepoint()) {
       // Synthesized interval to record the maximum number of live registers
       // at safepoints. No need to allocate a register for it.
-      maximum_number_of_live_registers_ =
-          std::max(maximum_number_of_live_registers_, active_.Size());
+      if (processing_core_registers_) {
+        maximum_number_of_live_core_registers_ =
+          std::max(maximum_number_of_live_core_registers_, active_.Size());
+      } else {
+        maximum_number_of_live_fp_registers_ =
+          std::max(maximum_number_of_live_fp_registers_, active_.Size());
+      }
       DCHECK(unhandled_->IsEmpty() || unhandled_->Peek()->GetStart() > current->GetStart());
       continue;
     }
@@ -1255,8 +1257,9 @@ void RegisterAllocator::ConnectSiblings(LiveInterval* interval) {
       switch (source.GetKind()) {
         case Location::kRegister: {
           locations->AddLiveRegister(source);
-          DCHECK_LE(locations->GetNumberOfLiveRegisters(), maximum_number_of_live_registers_);
-
+          DCHECK_LE(locations->GetNumberOfLiveRegisters(),
+                    maximum_number_of_live_core_registers_ +
+                    maximum_number_of_live_fp_registers_);
           if (current->GetType() == Primitive::kPrimNot) {
             locations->SetRegisterBit(source.reg());
           }
@@ -1349,7 +1352,8 @@ void RegisterAllocator::ConnectSplitSiblings(LiveInterval* interval,
 
 void RegisterAllocator::Resolve() {
   codegen_->ComputeFrameSize(
-      spill_slots_.Size(), maximum_number_of_live_registers_, reserved_out_slots_);
+      spill_slots_.Size(), maximum_number_of_live_core_registers_,
+      maximum_number_of_live_fp_registers_, reserved_out_slots_);
 
   // Adjust the Out Location of instructions.
   // TODO: Use pointers of Location inside LiveInterval to avoid doing another iteration.
index c152a8b..70841b8 100644 (file)
@@ -190,8 +190,11 @@ class RegisterAllocator {
   // Slots reserved for out arguments.
   size_t reserved_out_slots_;
 
-  // The maximum live registers at safepoints.
-  size_t maximum_number_of_live_registers_;
+  // The maximum live core registers at safepoints.
+  size_t maximum_number_of_live_core_registers_;
+
+  // The maximum live FP registers at safepoints.
+  size_t maximum_number_of_live_fp_registers_;
 
   ART_FRIEND_TEST(RegisterAllocatorTest, FreeUntil);