[optimizing] Add RIP support for x86_64

author Mark Mendell <mark.p.mendell@intel.com>

Fri, 27 Mar 2015 01:07:46 +0000 (21:07 -0400)

committer Mark Mendell <mark.p.mendell@intel.com>

Thu, 9 Apr 2015 14:47:11 +0000 (10:47 -0400)
author Mark Mendell <mark.p.mendell@intel.com>
Fri, 27 Mar 2015 01:07:46 +0000 (21:07 -0400)
committer Mark Mendell <mark.p.mendell@intel.com>
Thu, 9 Apr 2015 14:47:11 +0000 (10:47 -0400)
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc

index 9e08558..1604a7c 100644 (file)
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -429,7 +429,8 @@ CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph,
          location_builder_(graph, this),
          instruction_visitor_(graph, this),
          move_resolver_(graph->GetArena(), this),
-        isa_features_(isa_features) {
+        isa_features_(isa_features),
+        constant_area_start_(0) {
    AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister));
  }
  
@@ -1952,7 +1953,7 @@ void LocationsBuilderX86_64::VisitAdd(HAdd* add) {
      case Primitive::kPrimDouble:
      case Primitive::kPrimFloat: {
        locations->SetInAt(0, Location::RequiresFpuRegister());
-      locations->SetInAt(1, Location::RequiresFpuRegister());
+      locations->SetInAt(1, Location::Any());
        locations->SetOut(Location::SameAsFirstInput());
        break;
      }
@@ -2016,12 +2017,30 @@ void InstructionCodeGeneratorX86_64::VisitAdd(HAdd* add) {
      }
  
      case Primitive::kPrimFloat: {
-      __ addss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+      if (second.IsFpuRegister()) {
+        __ addss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+      } else if (second.IsConstant()) {
+        __ addss(first.AsFpuRegister<XmmRegister>(),
+                 codegen_->LiteralFloatAddress(second.GetConstant()->AsFloatConstant()->GetValue()));
+      } else {
+        DCHECK(second.IsStackSlot());
+        __ addss(first.AsFpuRegister<XmmRegister>(),
+                 Address(CpuRegister(RSP), second.GetStackIndex()));
+      }
        break;
      }
  
      case Primitive::kPrimDouble: {
-      __ addsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+      if (second.IsFpuRegister()) {
+        __ addsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+      } else if (second.IsConstant()) {
+        __ addsd(first.AsFpuRegister<XmmRegister>(),
+                 codegen_->LiteralDoubleAddress(second.GetConstant()->AsDoubleConstant()->GetValue()));
+      } else {
+        DCHECK(second.IsDoubleStackSlot());
+        __ addsd(first.AsFpuRegister<XmmRegister>(),
+                 Address(CpuRegister(RSP), second.GetStackIndex()));
+      }
        break;
      }
  
@@ -2049,7 +2068,7 @@ void LocationsBuilderX86_64::VisitSub(HSub* sub) {
      case Primitive::kPrimFloat:
      case Primitive::kPrimDouble: {
        locations->SetInAt(0, Location::RequiresFpuRegister());
-      locations->SetInAt(1, Location::RequiresFpuRegister());
+      locations->SetInAt(1, Location::Any());
        locations->SetOut(Location::SameAsFirstInput());
        break;
      }
@@ -2087,12 +2106,30 @@ void InstructionCodeGeneratorX86_64::VisitSub(HSub* sub) {
      }
  
      case Primitive::kPrimFloat: {
-      __ subss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+      if (second.IsFpuRegister()) {
+        __ subss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+      } else if (second.IsConstant()) {
+        __ subss(first.AsFpuRegister<XmmRegister>(),
+                 codegen_->LiteralFloatAddress(second.GetConstant()->AsFloatConstant()->GetValue()));
+      } else {
+        DCHECK(second.IsStackSlot());
+        __ subss(first.AsFpuRegister<XmmRegister>(),
+                 Address(CpuRegister(RSP), second.GetStackIndex()));
+      }
        break;
      }
  
      case Primitive::kPrimDouble: {
-      __ subsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+      if (second.IsFpuRegister()) {
+        __ subsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+      } else if (second.IsConstant()) {
+        __ subsd(first.AsFpuRegister<XmmRegister>(),
+                 codegen_->LiteralDoubleAddress(second.GetConstant()->AsDoubleConstant()->GetValue()));
+      } else {
+        DCHECK(second.IsDoubleStackSlot());
+        __ subsd(first.AsFpuRegister<XmmRegister>(),
+                 Address(CpuRegister(RSP), second.GetStackIndex()));
+      }
        break;
      }
  
@@ -2125,7 +2162,7 @@ void LocationsBuilderX86_64::VisitMul(HMul* mul) {
      case Primitive::kPrimFloat:
      case Primitive::kPrimDouble: {
        locations->SetInAt(0, Location::RequiresFpuRegister());
-      locations->SetInAt(1, Location::RequiresFpuRegister());
+      locations->SetInAt(1, Location::Any());
        locations->SetOut(Location::SameAsFirstInput());
        break;
      }
@@ -2170,13 +2207,31 @@ void InstructionCodeGeneratorX86_64::VisitMul(HMul* mul) {
  
      case Primitive::kPrimFloat: {
        DCHECK(first.Equals(locations->Out()));
-      __ mulss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+      if (second.IsFpuRegister()) {
+        __ mulss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+      } else if (second.IsConstant()) {
+        __ mulss(first.AsFpuRegister<XmmRegister>(),
+                 codegen_->LiteralFloatAddress(second.GetConstant()->AsFloatConstant()->GetValue()));
+      } else {
+        DCHECK(second.IsStackSlot());
+        __ mulss(first.AsFpuRegister<XmmRegister>(),
+                 Address(CpuRegister(RSP), second.GetStackIndex()));
+      }
        break;
      }
  
      case Primitive::kPrimDouble: {
        DCHECK(first.Equals(locations->Out()));
-      __ mulsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+      if (second.IsFpuRegister()) {
+        __ mulsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+      } else if (second.IsConstant()) {
+        __ mulsd(first.AsFpuRegister<XmmRegister>(),
+                 codegen_->LiteralDoubleAddress(second.GetConstant()->AsDoubleConstant()->GetValue()));
+      } else {
+        DCHECK(second.IsDoubleStackSlot());
+        __ mulsd(first.AsFpuRegister<XmmRegister>(),
+                 Address(CpuRegister(RSP), second.GetStackIndex()));
+      }
        break;
      }
  
@@ -2566,7 +2621,7 @@ void LocationsBuilderX86_64::VisitDiv(HDiv* div) {
      case Primitive::kPrimFloat:
      case Primitive::kPrimDouble: {
        locations->SetInAt(0, Location::RequiresFpuRegister());
-      locations->SetInAt(1, Location::RequiresFpuRegister());
+      locations->SetInAt(1, Location::Any());
        locations->SetOut(Location::SameAsFirstInput());
        break;
      }
@@ -2591,12 +2646,30 @@ void InstructionCodeGeneratorX86_64::VisitDiv(HDiv* div) {
      }
  
      case Primitive::kPrimFloat: {
-      __ divss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+      if (second.IsFpuRegister()) {
+        __ divss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+      } else if (second.IsConstant()) {
+        __ divss(first.AsFpuRegister<XmmRegister>(),
+                 codegen_->LiteralFloatAddress(second.GetConstant()->AsFloatConstant()->GetValue()));
+      } else {
+        DCHECK(second.IsStackSlot());
+        __ divss(first.AsFpuRegister<XmmRegister>(),
+                 Address(CpuRegister(RSP), second.GetStackIndex()));
+      }
        break;
      }
  
      case Primitive::kPrimDouble: {
-      __ divsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+      if (second.IsFpuRegister()) {
+        __ divsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+      } else if (second.IsConstant()) {
+        __ divsd(first.AsFpuRegister<XmmRegister>(),
+                 codegen_->LiteralDoubleAddress(second.GetConstant()->AsDoubleConstant()->GetValue()));
+      } else {
+        DCHECK(second.IsDoubleStackSlot());
+        __ divsd(first.AsFpuRegister<XmmRegister>(),
+                 Address(CpuRegister(RSP), second.GetStackIndex()));
+      }
        break;
      }
  
@@ -4135,5 +4208,65 @@ void InstructionCodeGeneratorX86_64::VisitBoundType(HBoundType* instruction) {
    LOG(FATAL) << "Unreachable";
  }
  
+void CodeGeneratorX86_64::Finalize(CodeAllocator* allocator) {
+  // Generate the constant area if needed.
+  if (!__ IsConstantAreaEmpty()) {
+    // Align to 4 byte boundary to reduce cache misses, as the data is 4 and 8
+    // byte values.  If used for vectors at a later time, this will need to be
+    // updated to 16 bytes with the appropriate offset.
+    __ Align(4, 0);
+    constant_area_start_ = __ CodeSize();
+    __ AddConstantArea();
+  }
+
+  // And finish up.
+  CodeGenerator::Finalize(allocator);
+}
+
+/**
+ * Class to handle late fixup of offsets into constant area.
+ */
+class RIPFixup : public AssemblerFixup, public ArenaObject<kArenaAllocMisc> {
+  public:
+    RIPFixup(CodeGeneratorX86_64& codegen, int offset)
+      : codegen_(codegen), offset_into_constant_area_(offset) {}
+
+  private:
+    void Process(const MemoryRegion& region, int pos) OVERRIDE {
+      // Patch the correct offset for the instruction.  We use the address of the
+      // 'next' instruction, which is 'pos' (patch the 4 bytes before).
+      int constant_offset = codegen_.ConstantAreaStart() + offset_into_constant_area_;
+      int relative_position = constant_offset - pos;
+
+      // Patch in the right value.
+      region.StoreUnaligned<int32_t>(pos - 4, relative_position);
+    }
+
+    CodeGeneratorX86_64& codegen_;
+
+    // Location in constant area that the fixup refers to.
+    int offset_into_constant_area_;
+};
+
+Address CodeGeneratorX86_64::LiteralDoubleAddress(double v) {
+  AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, __ AddDouble(v));
+  return Address::RIP(fixup);
+}
+
+Address CodeGeneratorX86_64::LiteralFloatAddress(float v) {
+  AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, __ AddFloat(v));
+  return Address::RIP(fixup);
+}
+
+Address CodeGeneratorX86_64::LiteralInt32Address(int32_t v) {
+  AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, __ AddInt32(v));
+  return Address::RIP(fixup);
+}
+
+Address CodeGeneratorX86_64::LiteralInt64Address(int64_t v) {
+  AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, __ AddInt64(v));
+  return Address::RIP(fixup);
+}
+
  }  // namespace x86_64
  }  // namespace art
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h

index be2a79e..c819eec 100644 (file)
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -246,6 +246,7 @@ class CodeGeneratorX86_64 : public CodeGenerator {
    Location AllocateFreeRegister(Primitive::Type type) const OVERRIDE;
    void DumpCoreRegister(std::ostream& stream, int reg) const OVERRIDE;
    void DumpFloatingPointRegister(std::ostream& stream, int reg) const OVERRIDE;
+  void Finalize(CodeAllocator* allocator) OVERRIDE;
  
    InstructionSet GetInstructionSet() const OVERRIDE {
      return InstructionSet::kX86_64;
@@ -277,6 +278,15 @@ class CodeGeneratorX86_64 : public CodeGenerator {
      return isa_features_;
    }
  
+  int ConstantAreaStart() const {
+    return constant_area_start_;
+  }
+
+  Address LiteralDoubleAddress(double v);
+  Address LiteralFloatAddress(float v);
+  Address LiteralInt32Address(int32_t v);
+  Address LiteralInt64Address(int64_t v);
+
   private:
    // Labels for each block that will be compiled.
    GrowableArray<Label> block_labels_;
@@ -287,6 +297,10 @@ class CodeGeneratorX86_64 : public CodeGenerator {
    X86_64Assembler assembler_;
    const X86_64InstructionSetFeatures& isa_features_;
  
+  // Offset to start of the constant area in the assembled code.
+  // Used for fixups to the constant area.
+  int constant_area_start_;
+
    DISALLOW_COPY_AND_ASSIGN(CodeGeneratorX86_64);
  };
  
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc

index 5122a00..c0c4ff3 100644 (file)
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -298,25 +298,23 @@ static void CreateFloatToFloatPlusTemps(ArenaAllocator* arena, HInvoke* invoke)
    // TODO: Allow x86 to work with memory. This requires assembler support, see below.
    // locations->SetInAt(0, Location::Any());               // X86 can work on memory directly.
    locations->SetOut(Location::SameAsFirstInput());
-  locations->AddTemp(Location::RequiresRegister());     // Immediate constant.
-  locations->AddTemp(Location::RequiresFpuRegister());  // FP version of above.
+  locations->AddTemp(Location::RequiresFpuRegister());  // FP reg to hold mask.
  }
  
-static void MathAbsFP(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) {
+static void MathAbsFP(LocationSummary* locations, bool is64bit,
+                      X86_64Assembler* assembler, CodeGeneratorX86_64* codegen) {
    Location output = locations->Out();
-  CpuRegister cpu_temp = locations->GetTemp(0).AsRegister<CpuRegister>();
  
    if (output.IsFpuRegister()) {
      // In-register
-    XmmRegister xmm_temp = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
+    XmmRegister xmm_temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
  
+    // TODO: Can mask directly with constant area if we align on 16 bytes.
      if (is64bit) {
-      __ movq(cpu_temp, Immediate(INT64_C(0x7FFFFFFFFFFFFFFF)));
-      __ movd(xmm_temp, cpu_temp);
+      __ movsd(xmm_temp, codegen->LiteralInt64Address(INT64_C(0x7FFFFFFFFFFFFFFF)));
        __ andpd(output.AsFpuRegister<XmmRegister>(), xmm_temp);
      } else {
-      __ movl(cpu_temp, Immediate(INT64_C(0x7FFFFFFF)));
-      __ movd(xmm_temp, cpu_temp);
+      __ movss(xmm_temp, codegen->LiteralInt32Address(INT32_C(0x7FFFFFFF)));
        __ andps(output.AsFpuRegister<XmmRegister>(), xmm_temp);
      }
    } else {
@@ -341,7 +339,7 @@ void IntrinsicLocationsBuilderX86_64::VisitMathAbsDouble(HInvoke* invoke) {
  }
  
  void IntrinsicCodeGeneratorX86_64::VisitMathAbsDouble(HInvoke* invoke) {
-  MathAbsFP(invoke->GetLocations(), true, GetAssembler());
+  MathAbsFP(invoke->GetLocations(), true, GetAssembler(), codegen_);
  }
  
  void IntrinsicLocationsBuilderX86_64::VisitMathAbsFloat(HInvoke* invoke) {
@@ -349,7 +347,7 @@ void IntrinsicLocationsBuilderX86_64::VisitMathAbsFloat(HInvoke* invoke) {
  }
  
  void IntrinsicCodeGeneratorX86_64::VisitMathAbsFloat(HInvoke* invoke) {
-  MathAbsFP(invoke->GetLocations(), false, GetAssembler());
+  MathAbsFP(invoke->GetLocations(), false, GetAssembler(), codegen_);
  }
  
  static void CreateIntToIntPlusTemp(ArenaAllocator* arena, HInvoke* invoke) {
@@ -400,7 +398,7 @@ void IntrinsicCodeGeneratorX86_64::VisitMathAbsLong(HInvoke* invoke) {
  }
  
  static void GenMinMaxFP(LocationSummary* locations, bool is_min, bool is_double,
-                        X86_64Assembler* assembler) {
+                        X86_64Assembler* assembler, CodeGeneratorX86_64* codegen) {
    Location op1_loc = locations->InAt(0);
    Location op2_loc = locations->InAt(1);
    Location out_loc = locations->Out();
@@ -427,7 +425,7 @@ static void GenMinMaxFP(LocationSummary* locations, bool is_min, bool is_double,
    //
    // This removes one jmp, but needs to copy one input (op1) to out.
    //
-  // TODO: This is straight from Quick (except literal pool). Make NaN an out-of-line slowpath?
+  // TODO: This is straight from Quick. Make NaN an out-of-line slowpath?
  
    XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>();
  
@@ -461,14 +459,11 @@ static void GenMinMaxFP(LocationSummary* locations, bool is_min, bool is_double,
  
    // NaN handling.
    __ Bind(&nan);
-  CpuRegister cpu_temp = locations->GetTemp(0).AsRegister<CpuRegister>();
-  // TODO: Literal pool. Trades 64b immediate in CPU reg for direct memory access.
    if (is_double) {
-    __ movq(cpu_temp, Immediate(INT64_C(0x7FF8000000000000)));
+    __ movsd(out, codegen->LiteralInt64Address(INT64_C(0x7FF8000000000000)));
    } else {
-    __ movl(cpu_temp, Immediate(INT64_C(0x7FC00000)));
+    __ movss(out, codegen->LiteralInt32Address(INT32_C(0x7FC00000)));
    }
-  __ movd(out, cpu_temp, is_double);
    __ jmp(&done);
  
    // out := op2;
@@ -483,7 +478,7 @@ static void GenMinMaxFP(LocationSummary* locations, bool is_min, bool is_double,
    __ Bind(&done);
  }
  
-static void CreateFPFPToFPPlusTempLocations(ArenaAllocator* arena, HInvoke* invoke) {
+static void CreateFPFPToFP(ArenaAllocator* arena, HInvoke* invoke) {
    LocationSummary* locations = new (arena) LocationSummary(invoke,
                                                             LocationSummary::kNoCall,
                                                             kIntrinsified);
@@ -492,39 +487,38 @@ static void CreateFPFPToFPPlusTempLocations(ArenaAllocator* arena, HInvoke* invo
    // The following is sub-optimal, but all we can do for now. It would be fine to also accept
    // the second input to be the output (we can simply swap inputs).
    locations->SetOut(Location::SameAsFirstInput());
-  locations->AddTemp(Location::RequiresRegister());     // Immediate constant.
  }
  
  void IntrinsicLocationsBuilderX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) {
-  CreateFPFPToFPPlusTempLocations(arena_, invoke);
+  CreateFPFPToFP(arena_, invoke);
  }
  
  void IntrinsicCodeGeneratorX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) {
-  GenMinMaxFP(invoke->GetLocations(), true, true, GetAssembler());
+  GenMinMaxFP(invoke->GetLocations(), true, true, GetAssembler(), codegen_);
  }
  
  void IntrinsicLocationsBuilderX86_64::VisitMathMinFloatFloat(HInvoke* invoke) {
-  CreateFPFPToFPPlusTempLocations(arena_, invoke);
+  CreateFPFPToFP(arena_, invoke);
  }
  
  void IntrinsicCodeGeneratorX86_64::VisitMathMinFloatFloat(HInvoke* invoke) {
-  GenMinMaxFP(invoke->GetLocations(), true, false, GetAssembler());
+  GenMinMaxFP(invoke->GetLocations(), true, false, GetAssembler(), codegen_);
  }
  
  void IntrinsicLocationsBuilderX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
-  CreateFPFPToFPPlusTempLocations(arena_, invoke);
+  CreateFPFPToFP(arena_, invoke);
  }
  
  void IntrinsicCodeGeneratorX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
-  GenMinMaxFP(invoke->GetLocations(), false, true, GetAssembler());
+  GenMinMaxFP(invoke->GetLocations(), false, true, GetAssembler(), codegen_);
  }
  
  void IntrinsicLocationsBuilderX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) {
-  CreateFPFPToFPPlusTempLocations(arena_, invoke);
+  CreateFPFPToFP(arena_, invoke);
  }
  
  void IntrinsicCodeGeneratorX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) {
-  GenMinMaxFP(invoke->GetLocations(), false, false, GetAssembler());
+  GenMinMaxFP(invoke->GetLocations(), false, false, GetAssembler(), codegen_);
  }
  
  static void GenMinMax(LocationSummary* locations, bool is_min, bool is_long,
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc

index 30e8218..a9e8c50 100644 (file)
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -1975,6 +1975,10 @@ void X86_64Assembler::EmitOperand(uint8_t reg_or_opcode, const Operand& operand)
    for (int i = 1; i < length; i++) {
      EmitUint8(operand.encoding_[i]);
    }
+  AssemblerFixup* fixup = operand.GetFixup();
+  if (fixup != nullptr) {
+    EmitFixup(fixup);
+  }
  }
  
  
@@ -2702,5 +2706,55 @@ void X86_64ExceptionSlowPath::Emit(Assembler *sasm) {
  #undef __
  }
  
+void X86_64Assembler::AddConstantArea() {
+  const std::vector<int32_t>& area = constant_area_.GetBuffer();
+  for (size_t i = 0, u = area.size(); i < u; i++) {
+    AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+    EmitInt32(area[i]);
+  }
+}
+
+int ConstantArea::AddInt32(int32_t v) {
+  for (size_t i = 0, u = buffer_.size(); i < u; i++) {
+    if (v == buffer_[i]) {
+      return i * elem_size_;
+    }
+  }
+
+  // Didn't match anything.
+  int result = buffer_.size() * elem_size_;
+  buffer_.push_back(v);
+  return result;
+}
+
+int ConstantArea::AddInt64(int64_t v) {
+  int32_t v_low = v;
+  int32_t v_high = v >> 32;
+  if (buffer_.size() > 1) {
+    // Ensure we don't pass the end of the buffer.
+    for (size_t i = 0, u = buffer_.size() - 1; i < u; i++) {
+      if (v_low == buffer_[i] && v_high == buffer_[i+1]) {
+        return i * elem_size_;
+      }
+    }
+  }
+
+  // Didn't match anything.
+  int result = buffer_.size() * elem_size_;
+  buffer_.push_back(v_low);
+  buffer_.push_back(v_high);
+  return result;
+}
+
+int ConstantArea::AddDouble(double v) {
+  // Treat the value as a 64-bit integer value.
+  return AddInt64(bit_cast<int64_t, double>(v));
+}
+
+int ConstantArea::AddFloat(float v) {
+  // Treat the value as a 32-bit integer value.
+  return AddInt32(bit_cast<int32_t, float>(v));
+}
+
  }  // namespace x86_64
  }  // namespace art
diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h

index d357a81..ef6205c 100644 (file)
--- a/compiler/utils/x86_64/assembler_x86_64.h
+++ b/compiler/utils/x86_64/assembler_x86_64.h
@@ -97,9 +97,13 @@ class Operand : public ValueObject {
          && (reg.NeedsRex() == ((rex_ & 1) != 0));  // REX.000B bits match.
    }
  
+  AssemblerFixup* GetFixup() const {
+    return fixup_;
+  }
+
   protected:
    // Operand can be sub classed (e.g: Address).
-  Operand() : rex_(0), length_(0) { }
+  Operand() : rex_(0), length_(0), fixup_(nullptr) { }
  
    void SetModRM(uint8_t mod_in, CpuRegister rm_in) {
      CHECK_EQ(mod_in & ~3, 0);
@@ -136,12 +140,17 @@ class Operand : public ValueObject {
      length_ += disp_size;
    }
  
+  void SetFixup(AssemblerFixup* fixup) {
+    fixup_ = fixup;
+  }
+
   private:
    uint8_t rex_;
    uint8_t length_;
    uint8_t encoding_[6];
+  AssemblerFixup* fixup_;
  
-  explicit Operand(CpuRegister reg) : rex_(0), length_(0) { SetModRM(3, reg); }
+  explicit Operand(CpuRegister reg) : rex_(0), length_(0), fixup_(nullptr) { SetModRM(3, reg); }
  
    // Get the operand encoding byte at the given index.
    uint8_t encoding_at(int index_in) const {
@@ -232,6 +241,15 @@ class Address : public Operand {
      return result;
    }
  
+  // An RIP relative address that will be fixed up later.
+  static Address RIP(AssemblerFixup* fixup) {
+    Address result;
+    result.SetModRM(0, CpuRegister(RBP));
+    result.SetDisp32(0);
+    result.SetFixup(fixup);
+    return result;
+  }
+
    // If no_rip is true then the Absolute address isn't RIP relative.
    static Address Absolute(ThreadOffset<8> addr, bool no_rip = false) {
      return Absolute(addr.Int32Value(), no_rip);
@@ -242,6 +260,55 @@ class Address : public Operand {
  };
  
  
+/**
+ * Class to handle constant area values.
+ */
+class ConstantArea {
+  public:
+    ConstantArea() {}
+
+    /**
+     * Add a double to the constant area.
+     * @param v literal to be added to the constant area.
+     * @returns the offset in the constant area where the literal resides.
+     */
+    int AddDouble(double v);
+
+    /**
+     * Add a float to the constant area.
+     * @param v literal to be added to the constant area.
+     * @returns the offset in the constant area where the literal resides.
+     */
+    int AddFloat(float v);
+
+    /**
+     * Add an int32_t to the constant area.
+     * @param v literal to be added to the constant area.
+     * @returns the offset in the constant area where the literal resides.
+     */
+    int AddInt32(int32_t v);
+
+    /**
+     * Add an int64_t to the constant area.
+     * @param v literal to be added to the constant area.
+     * @returns the offset in the constant area where the literal resides.
+     */
+    int AddInt64(int64_t v);
+
+    int GetSize() const {
+      return buffer_.size() * elem_size_;
+    }
+
+    const std::vector<int32_t>& GetBuffer() const {
+      return buffer_;
+    }
+
+  private:
+    static constexpr size_t elem_size_ = sizeof(int32_t);
+    std::vector<int32_t> buffer_;
+};
+
+
  class X86_64Assembler FINAL : public Assembler {
   public:
    X86_64Assembler() {}
@@ -669,6 +736,45 @@ class X86_64Assembler FINAL : public Assembler {
    // and branch to a ExceptionSlowPath if it is.
    void ExceptionPoll(ManagedRegister scratch, size_t stack_adjust) OVERRIDE;
  
+  /**
+   * Add a double to the constant area.
+   * @param v literal to be added to the constant area.
+   * @returns the offset in the constant area where the literal resides.
+   */
+  int AddDouble(double v) { return constant_area_.AddDouble(v); }
+
+  /**
+   * Add a float to the constant area.
+   * @param v literal to be added to the constant area.
+   * @returns the offset in the constant area where the literal resides.
+   */
+  int AddFloat(float v)   { return constant_area_.AddFloat(v); }
+
+  /**
+   * Add an int32_t to the constant area.
+   * @param v literal to be added to the constant area.
+   * @returns the offset in the constant area where the literal resides.
+   */
+  int AddInt32(int32_t v) { return constant_area_.AddInt32(v); }
+
+  /**
+   * Add an int64_t to the constant area.
+   * @param v literal to be added to the constant area.
+   * @returns the offset in the constant area where the literal resides.
+   */
+  int AddInt64(int64_t v) { return constant_area_.AddInt64(v); }
+
+  /**
+   * Add the contents of the constant area to the assembler buffer.
+   */
+  void AddConstantArea();
+
+  /**
+   * Is the constant area empty?
+   * @returns 'true' if there are no literals in the constant area.
+   */
+  bool IsConstantAreaEmpty() const { return constant_area_.GetSize() == 0; }
+
   private:
    void EmitUint8(uint8_t value);
    void EmitInt32(int32_t value);
@@ -714,6 +820,8 @@ class X86_64Assembler FINAL : public Assembler {
    void EmitOptionalByteRegNormalizingRex32(CpuRegister dst, CpuRegister src);
    void EmitOptionalByteRegNormalizingRex32(CpuRegister dst, const Operand& operand);
  
+  ConstantArea constant_area_;
+
    DISALLOW_COPY_AND_ASSIGN(X86_64Assembler);
  };
author	Mark Mendell <mark.p.mendell@intel.com>
	Fri, 27 Mar 2015 01:07:46 +0000 (21:07 -0400)
committer	Mark Mendell <mark.p.mendell@intel.com>
	Thu, 9 Apr 2015 14:47:11 +0000 (10:47 -0400)
compiler/optimizing/code_generator_x86_64.cc		patch \| blob \| history
compiler/optimizing/code_generator_x86_64.h		patch \| blob \| history
compiler/optimizing/intrinsics_x86_64.cc		patch \| blob \| history
compiler/utils/x86_64/assembler_x86_64.cc		patch \| blob \| history
compiler/utils/x86_64/assembler_x86_64.h		patch \| blob \| history