From b4ba354cf8d22b261205494875cc014f18587b50 Mon Sep 17 00:00:00 2001 From: Nicolas Geoffray Date: Thu, 5 Mar 2015 11:28:58 +0000 Subject: [PATCH] [optimizing] Enable x86 long support. Change-Id: I9006972a65a1f191c45691104a960366747f9d16 --- compiler/optimizing/code_generator.h | 16 +- compiler/optimizing/code_generator_x86.cc | 235 +++++++++++++++++++++++----- compiler/optimizing/code_generator_x86.h | 2 + compiler/optimizing/locations.h | 22 ++- compiler/optimizing/nodes.h | 15 +- compiler/optimizing/optimizing_compiler.cc | 2 +- compiler/optimizing/register_allocator.cc | 110 ++++++++----- compiler/optimizing/register_allocator.h | 24 +-- compiler/optimizing/ssa_liveness_analysis.h | 6 +- compiler/utils/x86/assembler_x86.cc | 30 ++++ compiler/utils/x86/assembler_x86.h | 5 + 11 files changed, 354 insertions(+), 113 deletions(-) diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h index 5146afad8..b8f4572ab 100644 --- a/compiler/optimizing/code_generator.h +++ b/compiler/optimizing/code_generator.h @@ -153,17 +153,13 @@ class CodeGenerator { virtual size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) = 0; // Restores the register from the stack. Returns the size taken on stack. virtual size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) = 0; - virtual size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) { - UNUSED(stack_index, reg_id); - UNIMPLEMENTED(FATAL); - UNREACHABLE(); - } - virtual size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) { - UNUSED(stack_index, reg_id); - UNIMPLEMENTED(FATAL); - UNREACHABLE(); - } + + virtual size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) = 0; + virtual size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) = 0; + virtual bool NeedsTwoRegisters(Primitive::Type type) const = 0; + // Returns whether we should split long moves in parallel moves. + virtual bool ShouldSplitLongMoves() const { return false; } bool IsCoreCalleeSaveRegister(int reg) const { return (core_callee_save_mask_ & (1 << reg)) != 0; diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 4b8adddfe..7f2ea021d 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -673,8 +673,19 @@ void CodeGeneratorX86::Move64(Location destination, Location source) { source.AsRegisterPairHigh()); } else if (source.IsFpuRegister()) { __ movsd(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister()); + } else if (source.IsConstant()) { + HConstant* constant = source.GetConstant(); + int64_t value; + if (constant->IsLongConstant()) { + value = constant->AsLongConstant()->GetValue(); + } else { + DCHECK(constant->IsDoubleConstant()); + value = bit_cast(constant->AsDoubleConstant()->GetValue()); + } + __ movl(Address(ESP, destination.GetStackIndex()), Immediate(Low32Bits(value))); + __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)), Immediate(High32Bits(value))); } else { - DCHECK(source.IsDoubleStackSlot()); + DCHECK(source.IsDoubleStackSlot()) << source; EmitParallelMoves( Location::StackSlot(source.GetStackIndex()), Location::StackSlot(destination.GetStackIndex()), @@ -1555,8 +1566,6 @@ void InstructionCodeGeneratorX86::VisitTypeConversion(HTypeConversion* conversio // Processing a Dex `int-to-byte' instruction. if (in.IsRegister()) { __ movsxb(out.AsRegister(), in.AsRegister()); - } else if (in.IsStackSlot()) { - __ movsxb(out.AsRegister(), Address(ESP, in.GetStackIndex())); } else { DCHECK(in.GetConstant()->IsIntConstant()); int32_t value = in.GetConstant()->AsIntConstant()->GetValue(); @@ -1892,10 +1901,15 @@ void InstructionCodeGeneratorX86::VisitAdd(HAdd* add) { if (second.IsRegisterPair()) { __ addl(first.AsRegisterPairLow(), second.AsRegisterPairLow()); __ adcl(first.AsRegisterPairHigh(), second.AsRegisterPairHigh()); - } else { + } else if (second.IsDoubleStackSlot()) { __ addl(first.AsRegisterPairLow(), Address(ESP, second.GetStackIndex())); __ adcl(first.AsRegisterPairHigh(), Address(ESP, second.GetHighStackIndex(kX86WordSize))); + } else { + DCHECK(second.IsConstant()) << second; + int64_t value = second.GetConstant()->AsLongConstant()->GetValue(); + __ addl(first.AsRegisterPairLow(), Immediate(Low32Bits(value))); + __ adcl(first.AsRegisterPairHigh(), Immediate(High32Bits(value))); } break; } @@ -1965,10 +1979,15 @@ void InstructionCodeGeneratorX86::VisitSub(HSub* sub) { if (second.IsRegisterPair()) { __ subl(first.AsRegisterPairLow(), second.AsRegisterPairLow()); __ sbbl(first.AsRegisterPairHigh(), second.AsRegisterPairHigh()); - } else { + } else if (second.IsDoubleStackSlot()) { __ subl(first.AsRegisterPairLow(), Address(ESP, second.GetStackIndex())); __ sbbl(first.AsRegisterPairHigh(), Address(ESP, second.GetHighStackIndex(kX86WordSize))); + } else { + DCHECK(second.IsConstant()) << second; + int64_t value = second.GetConstant()->AsLongConstant()->GetValue(); + __ subl(first.AsRegisterPairLow(), Immediate(Low32Bits(value))); + __ sbbl(first.AsRegisterPairHigh(), Immediate(High32Bits(value))); } break; } @@ -1999,12 +2018,6 @@ void LocationsBuilderX86::VisitMul(HMul* mul) { break; case Primitive::kPrimLong: { locations->SetInAt(0, Location::RequiresRegister()); - // TODO: Currently this handles only stack operands: - // - we don't have enough registers because we currently use Quick ABI. - // - by the time we have a working register allocator we will probably change the ABI - // and fix the above. - // - we don't have a way yet to request operands on stack but the base line compiler - // will leave the operands on the stack with Any(). locations->SetInAt(1, Location::Any()); locations->SetOut(Location::SameAsFirstInput()); // Needed for imul on 32bits with 64bits output. @@ -2046,39 +2059,83 @@ void InstructionCodeGeneratorX86::VisitMul(HMul* mul) { } case Primitive::kPrimLong: { - DCHECK(second.IsDoubleStackSlot()); - Register in1_hi = first.AsRegisterPairHigh(); Register in1_lo = first.AsRegisterPairLow(); - Address in2_hi(ESP, second.GetHighStackIndex(kX86WordSize)); - Address in2_lo(ESP, second.GetStackIndex()); Register eax = locations->GetTemp(0).AsRegister(); Register edx = locations->GetTemp(1).AsRegister(); DCHECK_EQ(EAX, eax); DCHECK_EQ(EDX, edx); - // input: in1 - 64 bits, in2 - 64 bits + // input: in1 - 64 bits, in2 - 64 bits. // output: in1 // formula: in1.hi : in1.lo = (in1.lo * in2.hi + in1.hi * in2.lo)* 2^32 + in1.lo * in2.lo // parts: in1.hi = in1.lo * in2.hi + in1.hi * in2.lo + (in1.lo * in2.lo)[63:32] // parts: in1.lo = (in1.lo * in2.lo)[31:0] - - __ movl(eax, in2_hi); - // eax <- in1.lo * in2.hi - __ imull(eax, in1_lo); - // in1.hi <- in1.hi * in2.lo - __ imull(in1_hi, in2_lo); - // in1.hi <- in1.lo * in2.hi + in1.hi * in2.lo - __ addl(in1_hi, eax); - // move in1_lo to eax to prepare for double precision - __ movl(eax, in1_lo); - // edx:eax <- in1.lo * in2.lo - __ mull(in2_lo); - // in1.hi <- in2.hi * in1.lo + in2.lo * in1.hi + (in1.lo * in2.lo)[63:32] - __ addl(in1_hi, edx); - // in1.lo <- (in1.lo * in2.lo)[31:0]; - __ movl(in1_lo, eax); + if (second.IsConstant()) { + DCHECK(second.GetConstant()->IsLongConstant()); + + int64_t value = second.GetConstant()->AsLongConstant()->GetValue(); + int32_t low_value = Low32Bits(value); + int32_t high_value = High32Bits(value); + Immediate low(low_value); + Immediate high(high_value); + + __ movl(eax, high); + // eax <- in1.lo * in2.hi + __ imull(eax, in1_lo); + // in1.hi <- in1.hi * in2.lo + __ imull(in1_hi, low); + // in1.hi <- in1.lo * in2.hi + in1.hi * in2.lo + __ addl(in1_hi, eax); + // move in2_lo to eax to prepare for double precision + __ movl(eax, low); + // edx:eax <- in1.lo * in2.lo + __ mull(in1_lo); + // in1.hi <- in2.hi * in1.lo + in2.lo * in1.hi + (in1.lo * in2.lo)[63:32] + __ addl(in1_hi, edx); + // in1.lo <- (in1.lo * in2.lo)[31:0]; + __ movl(in1_lo, eax); + } else if (second.IsRegisterPair()) { + Register in2_hi = second.AsRegisterPairHigh(); + Register in2_lo = second.AsRegisterPairLow(); + + __ movl(eax, in2_hi); + // eax <- in1.lo * in2.hi + __ imull(eax, in1_lo); + // in1.hi <- in1.hi * in2.lo + __ imull(in1_hi, in2_lo); + // in1.hi <- in1.lo * in2.hi + in1.hi * in2.lo + __ addl(in1_hi, eax); + // move in1_lo to eax to prepare for double precision + __ movl(eax, in1_lo); + // edx:eax <- in1.lo * in2.lo + __ mull(in2_lo); + // in1.hi <- in2.hi * in1.lo + in2.lo * in1.hi + (in1.lo * in2.lo)[63:32] + __ addl(in1_hi, edx); + // in1.lo <- (in1.lo * in2.lo)[31:0]; + __ movl(in1_lo, eax); + } else { + DCHECK(second.IsDoubleStackSlot()) << second; + Address in2_hi(ESP, second.GetHighStackIndex(kX86WordSize)); + Address in2_lo(ESP, second.GetStackIndex()); + + __ movl(eax, in2_hi); + // eax <- in1.lo * in2.hi + __ imull(eax, in1_lo); + // in1.hi <- in1.hi * in2.lo + __ imull(in1_hi, in2_lo); + // in1.hi <- in1.lo * in2.hi + in1.hi * in2.lo + __ addl(in1_hi, eax); + // move in1_lo to eax to prepare for double precision + __ movl(eax, in1_lo); + // edx:eax <- in1.lo * in2.lo + __ mull(in2_lo); + // in1.hi <- in2.hi * in1.lo + in2.lo * in1.hi + (in1.lo * in2.lo)[63:32] + __ addl(in1_hi, edx); + // in1.lo <- (in1.lo * in2.lo)[31:0]; + __ movl(in1_lo, eax); + } break; } @@ -2674,18 +2731,24 @@ void InstructionCodeGeneratorX86::VisitCompare(HCompare* compare) { case Primitive::kPrimLong: { if (right.IsRegisterPair()) { __ cmpl(left.AsRegisterPairHigh(), right.AsRegisterPairHigh()); - } else { - DCHECK(right.IsDoubleStackSlot()); + } else if (right.IsDoubleStackSlot()) { __ cmpl(left.AsRegisterPairHigh(), Address(ESP, right.GetHighStackIndex(kX86WordSize))); + } else { + DCHECK(right.IsConstant()) << right; + __ cmpl(left.AsRegisterPairHigh(), + Immediate(High32Bits(right.GetConstant()->AsLongConstant()->GetValue()))); } __ j(kLess, &less); // Signed compare. __ j(kGreater, &greater); // Signed compare. if (right.IsRegisterPair()) { __ cmpl(left.AsRegisterPairLow(), right.AsRegisterPairLow()); - } else { - DCHECK(right.IsDoubleStackSlot()); + } else if (right.IsDoubleStackSlot()) { __ cmpl(left.AsRegisterPairLow(), Address(ESP, right.GetStackIndex())); + } else { + DCHECK(right.IsConstant()) << right; + __ cmpl(left.AsRegisterPairLow(), + Immediate(Low32Bits(right.GetConstant()->AsLongConstant()->GetValue()))); } break; } @@ -2770,7 +2833,12 @@ void LocationsBuilderX86::HandleFieldGet(HInstruction* instruction, const FieldI LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + + // The output overlaps in case of long: we don't want the low move to overwrite + // the object's location. + locations->SetOut(Location::RequiresRegister(), + (instruction->GetType() == Primitive::kPrimLong) ? Location::kOutputOverlap + : Location::kNoOutputOverlap); if (field_info.IsVolatile() && (field_info.GetFieldType() == Primitive::kPrimLong)) { // Long values can be loaded atomically into an XMM using movsd. @@ -2827,6 +2895,7 @@ void InstructionCodeGeneratorX86::HandleFieldGet(HInstruction* instruction, __ psrlq(temp, Immediate(32)); __ movd(out.AsRegisterPairHigh(), temp); } else { + DCHECK_NE(base, out.AsRegisterPairLow()); __ movl(out.AsRegisterPairLow(), Address(base, offset)); codegen_->MaybeRecordImplicitNullCheck(instruction); __ movl(out.AsRegisterPairHigh(), Address(base, kX86WordSize + offset)); @@ -3064,7 +3133,11 @@ void LocationsBuilderX86::VisitArrayGet(HArrayGet* instruction) { new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + // The output overlaps in case of long: we don't want the low move to overwrite + // the array's location. + locations->SetOut(Location::RequiresRegister(), + (instruction->GetType() == Primitive::kPrimLong) ? Location::kOutputOverlap + : Location::kNoOutputOverlap); } void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) { @@ -3138,6 +3211,7 @@ void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) { case Primitive::kPrimLong: { uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value(); Location out = locations->Out(); + DCHECK_NE(obj, out.AsRegisterPairLow()); if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset; __ movl(out.AsRegisterPairLow(), Address(obj, offset)); @@ -3569,8 +3643,7 @@ void ParallelMoveResolverX86::EmitMove(size_t index) { DCHECK(destination.IsStackSlot()) << destination; __ movl(Address(ESP, destination.GetStackIndex()), Immediate(value)); } - } else { - DCHECK(constant->IsFloatConstant()); + } else if (constant->IsFloatConstant()) { float value = constant->AsFloatConstant()->GetValue(); Immediate imm(bit_cast(value)); if (destination.IsFpuRegister()) { @@ -3583,6 +3656,43 @@ void ParallelMoveResolverX86::EmitMove(size_t index) { DCHECK(destination.IsStackSlot()) << destination; __ movl(Address(ESP, destination.GetStackIndex()), imm); } + } else if (constant->IsLongConstant()) { + int64_t value = constant->AsLongConstant()->GetValue(); + int32_t low_value = Low32Bits(value); + int32_t high_value = High32Bits(value); + Immediate low(low_value); + Immediate high(high_value); + if (destination.IsDoubleStackSlot()) { + __ movl(Address(ESP, destination.GetStackIndex()), low); + __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)), high); + } else { + __ movl(destination.AsRegisterPairLow(), low); + __ movl(destination.AsRegisterPairHigh(), high); + } + } else { + DCHECK(constant->IsDoubleConstant()); + double dbl_value = constant->AsDoubleConstant()->GetValue(); + int64_t value = bit_cast(dbl_value); + int32_t low_value = Low32Bits(value); + int32_t high_value = High32Bits(value); + Immediate low(low_value); + Immediate high(high_value); + if (destination.IsFpuRegister()) { + XmmRegister dest = destination.AsFpuRegister(); + if (value == 0) { + // Easy handling of 0.0. + __ xorpd(dest, dest); + } else { + __ pushl(high); + __ pushl(low); + __ movsd(dest, Address(ESP, 0)); + __ addl(ESP, Immediate(8)); + } + } else { + DCHECK(destination.IsDoubleStackSlot()) << destination; + __ movl(Address(ESP, destination.GetStackIndex()), low); + __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)), high); + } } } else { LOG(FATAL) << "Unimplemented move: " << destination << " <- " << source; @@ -3650,6 +3760,33 @@ void ParallelMoveResolverX86::EmitSwap(size_t index) { Exchange32(source.AsFpuRegister(), destination.GetStackIndex()); } else if (destination.IsFpuRegister() && source.IsStackSlot()) { Exchange32(destination.AsFpuRegister(), source.GetStackIndex()); + } else if (source.IsFpuRegister() && destination.IsDoubleStackSlot()) { + // Take advantage of the 16 bytes in the XMM register. + XmmRegister reg = source.AsFpuRegister(); + Address stack(ESP, destination.GetStackIndex()); + // Load the double into the high doubleword. + __ movhpd(reg, stack); + + // Store the low double into the destination. + __ movsd(reg, stack); + + // Move the high double to the low double. + __ psrldq(reg, Immediate(8)); + } else if (destination.IsFpuRegister() && source.IsDoubleStackSlot()) { + // Take advantage of the 16 bytes in the XMM register. + XmmRegister reg = destination.AsFpuRegister(); + Address stack(ESP, source.GetStackIndex()); + // Load the double into the high doubleword. + __ movhpd(reg, stack); + + // Store the low double into the destination. + __ movsd(reg, stack); + + // Move the high double to the low double. + __ psrldq(reg, Immediate(8)); + } else if (destination.IsDoubleStackSlot() && source.IsDoubleStackSlot()) { + Exchange(destination.GetStackIndex(), source.GetStackIndex()); + Exchange(destination.GetHighStackIndex(kX86WordSize), source.GetHighStackIndex(kX86WordSize)); } else { LOG(FATAL) << "Unimplemented: source: " << source << ", destination: " << destination; } @@ -3951,7 +4088,7 @@ void InstructionCodeGeneratorX86::HandleBitwiseOperation(HBinaryOperation* instr __ xorl(first.AsRegisterPairLow(), second.AsRegisterPairLow()); __ xorl(first.AsRegisterPairHigh(), second.AsRegisterPairHigh()); } - } else { + } else if (second.IsDoubleStackSlot()) { if (instruction->IsAnd()) { __ andl(first.AsRegisterPairLow(), Address(ESP, second.GetStackIndex())); __ andl(first.AsRegisterPairHigh(), @@ -3966,6 +4103,22 @@ void InstructionCodeGeneratorX86::HandleBitwiseOperation(HBinaryOperation* instr __ xorl(first.AsRegisterPairHigh(), Address(ESP, second.GetHighStackIndex(kX86WordSize))); } + } else { + DCHECK(second.IsConstant()) << second; + int64_t value = second.GetConstant()->AsLongConstant()->GetValue(); + Immediate low(Low32Bits(value)); + Immediate high(High32Bits(value)); + if (instruction->IsAnd()) { + __ andl(first.AsRegisterPairLow(), low); + __ andl(first.AsRegisterPairHigh(), high); + } else if (instruction->IsOr()) { + __ orl(first.AsRegisterPairLow(), low); + __ orl(first.AsRegisterPairHigh(), high); + } else { + DCHECK(instruction->IsXor()); + __ xorl(first.AsRegisterPairLow(), low); + __ xorl(first.AsRegisterPairHigh(), high); + } } } } diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index f5a9b7d1f..c5763de05 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -245,6 +245,8 @@ class CodeGeneratorX86 : public CodeGenerator { return type == Primitive::kPrimLong; } + bool ShouldSplitLongMoves() const OVERRIDE { return true; } + Label* GetFrameEntryLabel() { return &frame_entry_label_; } private: diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h index 198cc15cc..566c0daf1 100644 --- a/compiler/optimizing/locations.h +++ b/compiler/optimizing/locations.h @@ -211,15 +211,25 @@ class Location : public ValueObject { } Location ToLow() const { - return IsRegisterPair() - ? Location::RegisterLocation(low()) - : Location::FpuRegisterLocation(low()); + if (IsRegisterPair()) { + return Location::RegisterLocation(low()); + } else if (IsFpuRegisterPair()) { + return Location::FpuRegisterLocation(low()); + } else { + DCHECK(IsDoubleStackSlot()); + return Location::StackSlot(GetStackIndex()); + } } Location ToHigh() const { - return IsRegisterPair() - ? Location::RegisterLocation(high()) - : Location::FpuRegisterLocation(high()); + if (IsRegisterPair()) { + return Location::RegisterLocation(high()); + } else if (IsFpuRegisterPair()) { + return Location::FpuRegisterLocation(high()); + } else { + DCHECK(IsDoubleStackSlot()); + return Location::StackSlot(GetHighStackIndex(4)); + } } static uintptr_t EncodeStackIndex(intptr_t stack_index) { diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index b7dd75645..6945ff0d5 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -3289,8 +3289,19 @@ class HParallelMove : public HTemplateInstruction<0> { if (kIsDebugBuild) { if (instruction != nullptr) { for (size_t i = 0, e = moves_.Size(); i < e; ++i) { - DCHECK_NE(moves_.Get(i).GetInstruction(), instruction) - << "Doing parallel moves for the same instruction."; + if (moves_.Get(i).GetInstruction() == instruction) { + // Special case the situation where the move is for the spill slot + // of the instruction. + if ((GetPrevious() == instruction) + || ((GetPrevious() == nullptr) + && instruction->IsPhi() + && instruction->GetBlock() == GetBlock())) { + DCHECK_NE(destination.GetKind(), moves_.Get(i).GetDestination().GetKind()) + << "Doing parallel moves for the same instruction."; + } else { + DCHECK(false) << "Doing parallel moves for the same instruction."; + } + } } } for (size_t i = 0, e = moves_.Size(); i < e; ++i) { diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index eb984248a..9971daf37 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -523,7 +523,7 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item, dex_file, dex_compilation_unit, &pass_info_printer); - } else if (shouldOptimize && RegisterAllocator::Supports(instruction_set)) { + } else if (shouldOptimize && can_allocate_registers) { LOG(FATAL) << "Could not allocate registers in optimizing compiler"; UNREACHABLE(); } else { diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc index 54e62a5b2..c1760d192 100644 --- a/compiler/optimizing/register_allocator.cc +++ b/compiler/optimizing/register_allocator.cc @@ -32,6 +32,9 @@ static constexpr size_t kDefaultNumberOfSpillSlots = 4; // allocate SRegister. static int GetHighForLowRegister(int reg) { return reg + 1; } static bool IsLowRegister(int reg) { return (reg & 1) == 0; } +static bool IsLowOfUnalignedPairInterval(LiveInterval* low) { + return GetHighForLowRegister(low->GetRegister()) != low->GetHighInterval()->GetRegister(); +} RegisterAllocator::RegisterAllocator(ArenaAllocator* allocator, CodeGenerator* codegen, @@ -70,28 +73,13 @@ RegisterAllocator::RegisterAllocator(ArenaAllocator* allocator, reserved_out_slots_ = 1 + codegen->GetGraph()->GetMaximumNumberOfOutVRegs(); } -bool RegisterAllocator::CanAllocateRegistersFor(const HGraph& graph, +bool RegisterAllocator::CanAllocateRegistersFor(const HGraph& graph ATTRIBUTE_UNUSED, InstructionSet instruction_set) { - if (!Supports(instruction_set)) { - return false; - } - if (instruction_set == kArm64 + return instruction_set == kArm64 || instruction_set == kX86_64 || instruction_set == kArm - || instruction_set == kThumb2) { - return true; - } - for (size_t i = 0, e = graph.GetBlocks().Size(); i < e; ++i) { - for (HInstructionIterator it(graph.GetBlocks().Get(i)->GetInstructions()); - !it.Done(); - it.Advance()) { - HInstruction* current = it.Current(); - if (instruction_set == kX86 && current->GetType() == Primitive::kPrimLong) { - return false; - } - } - } - return true; + || instruction_set == kX86 + || instruction_set == kThumb2; } static bool ShouldProcess(bool processing_core_registers, LiveInterval* interval) { @@ -771,8 +759,15 @@ bool RegisterAllocator::TryAllocateFreeReg(LiveInterval* current) { return false; } - if (current->IsLowInterval() && free_until[GetHighForLowRegister(reg)] == 0) { - return false; + if (current->IsLowInterval()) { + // If the high register of this interval is not available, we need to spill. + int high_reg = current->GetHighInterval()->GetRegister(); + if (high_reg == kNoRegister) { + high_reg = GetHighForLowRegister(reg); + } + if (free_until[high_reg] == 0) { + return false; + } } current->SetRegister(reg); @@ -831,16 +826,18 @@ int RegisterAllocator::FindAvailableRegister(size_t* next_use) const { return reg; } -bool RegisterAllocator::TrySplitNonPairIntervalAt(size_t position, - size_t first_register_use, - size_t* next_use) { +bool RegisterAllocator::TrySplitNonPairOrUnalignedPairIntervalAt(size_t position, + size_t first_register_use, + size_t* next_use) { for (size_t i = 0, e = active_.Size(); i < e; ++i) { LiveInterval* active = active_.Get(i); DCHECK(active->HasRegister()); + if (active->IsFixed()) continue; + if (active->IsHighInterval()) continue; + if (first_register_use > next_use[active->GetRegister()]) continue; + // Split the first interval found. - if (first_register_use <= next_use[active->GetRegister()] - && !active->IsLowInterval() - && !active->IsHighInterval()) { + if (!active->IsLowInterval() || IsLowOfUnalignedPairInterval(active)) { LiveInterval* split = Split(active, position); active_.DeleteAt(i); if (split != active) { @@ -934,14 +931,17 @@ bool RegisterAllocator::AllocateBlockedReg(LiveInterval* current) { DCHECK_NE(reg, kNoRegister); if (should_spill) { DCHECK(!current->IsHighInterval()); - bool is_allocation_at_use_site = (current->GetStart() == (first_register_use - 1)); + bool is_allocation_at_use_site = (current->GetStart() >= (first_register_use - 1)); if (current->IsLowInterval() && is_allocation_at_use_site - && TrySplitNonPairIntervalAt(current->GetStart(), first_register_use, next_use)) { + && TrySplitNonPairOrUnalignedPairIntervalAt(current->GetStart(), + first_register_use, + next_use)) { // If we're allocating a register for `current` because the instruction at // that position requires it, but we think we should spill, then there are - // non-pair intervals blocking the allocation. We split the first - // interval found, and put ourselves first in the `unhandled_` list. + // non-pair intervals or unaligned pair intervals blocking the allocation. + // We split the first interval found, and put ourselves first in the + // `unhandled_` list. LiveInterval* existing = unhandled_->Peek(); DCHECK(existing->IsHighInterval()); DCHECK_EQ(existing->GetLowInterval(), current); @@ -1203,7 +1203,24 @@ static bool IsValidDestination(Location destination) { || destination.IsDoubleStackSlot(); } -void RegisterAllocator::AddInputMoveFor(HInstruction* user, +void RegisterAllocator::AddMove(HParallelMove* move, + Location source, + Location destination, + HInstruction* instruction, + Primitive::Type type) const { + if (type == Primitive::kPrimLong + && codegen_->ShouldSplitLongMoves() + // The parallel move resolver knows how to deal with long constants. + && !source.IsConstant()) { + move->AddMove(source.ToLow(), destination.ToLow(), instruction); + move->AddMove(source.ToHigh(), destination.ToHigh(), nullptr); + } else { + move->AddMove(source, destination, instruction); + } +} + +void RegisterAllocator::AddInputMoveFor(HInstruction* input, + HInstruction* user, Location source, Location destination) const { if (source.Equals(destination)) return; @@ -1222,7 +1239,7 @@ void RegisterAllocator::AddInputMoveFor(HInstruction* user, move = previous->AsParallelMove(); } DCHECK_EQ(move->GetLifetimePosition(), user->GetLifetimePosition()); - move->AddMove(source, destination, nullptr); + AddMove(move, source, destination, nullptr, input->GetType()); } static bool IsInstructionStart(size_t position) { @@ -1251,8 +1268,16 @@ void RegisterAllocator::InsertParallelMoveAt(size_t position, at = liveness_.GetInstructionFromPosition((position + 1) / 2); // Note that parallel moves may have already been inserted, so we explicitly // ask for the first instruction of the block: `GetInstructionFromPosition` does - // not contain the moves. + // not contain the `HParallelMove` instructions. at = at->GetBlock()->GetFirstInstruction(); + + if (at->GetLifetimePosition() < position) { + // We may insert moves for split siblings and phi spills at the beginning of the block. + // Since this is a different lifetime position, we need to go to the next instruction. + DCHECK(at->IsParallelMove()); + at = at->GetNext(); + } + if (at->GetLifetimePosition() != position) { DCHECK_GT(at->GetLifetimePosition(), position); move = new (allocator_) HParallelMove(allocator_); @@ -1294,7 +1319,7 @@ void RegisterAllocator::InsertParallelMoveAt(size_t position, } } DCHECK_EQ(move->GetLifetimePosition(), position); - move->AddMove(source, destination, instruction); + AddMove(move, source, destination, instruction, instruction->GetType()); } void RegisterAllocator::InsertParallelMoveAtExitOf(HBasicBlock* block, @@ -1324,7 +1349,7 @@ void RegisterAllocator::InsertParallelMoveAtExitOf(HBasicBlock* block, } else { move = previous->AsParallelMove(); } - move->AddMove(source, destination, instruction); + AddMove(move, source, destination, instruction, instruction->GetType()); } void RegisterAllocator::InsertParallelMoveAtEntryOf(HBasicBlock* block, @@ -1336,14 +1361,15 @@ void RegisterAllocator::InsertParallelMoveAtEntryOf(HBasicBlock* block, HInstruction* first = block->GetFirstInstruction(); HParallelMove* move = first->AsParallelMove(); + size_t position = block->GetLifetimeStart(); // This is a parallel move for connecting blocks. We need to differentiate // it with moves for connecting siblings in a same block, and input moves. - if (move == nullptr || move->GetLifetimePosition() != block->GetLifetimeStart()) { + if (move == nullptr || move->GetLifetimePosition() != position) { move = new (allocator_) HParallelMove(allocator_); - move->SetLifetimePosition(block->GetLifetimeStart()); + move->SetLifetimePosition(position); block->InsertInstructionBefore(move, first); } - move->AddMove(source, destination, instruction); + AddMove(move, source, destination, instruction, instruction->GetType()); } void RegisterAllocator::InsertMoveAfter(HInstruction* instruction, @@ -1367,7 +1393,7 @@ void RegisterAllocator::InsertMoveAfter(HInstruction* instruction, move->SetLifetimePosition(position); instruction->GetBlock()->InsertInstructionBefore(move, instruction->GetNext()); } - move->AddMove(source, destination, instruction); + AddMove(move, source, destination, instruction, instruction->GetType()); } void RegisterAllocator::ConnectSiblings(LiveInterval* interval) { @@ -1401,7 +1427,7 @@ void RegisterAllocator::ConnectSiblings(LiveInterval* interval) { if (expected_location.IsUnallocated()) { locations->SetInAt(use->GetInputIndex(), source); } else if (!expected_location.IsConstant()) { - AddInputMoveFor(use->GetUser(), source, expected_location); + AddInputMoveFor(interval->GetDefinedBy(), use->GetUser(), source, expected_location); } } else { DCHECK(use->GetUser()->IsInvoke()); @@ -1648,7 +1674,7 @@ void RegisterAllocator::Resolve() { Location source = input->GetLiveInterval()->GetLocationAt( predecessor->GetLifetimeEnd() - 1); Location destination = phi->GetLiveInterval()->ToLocation(); - InsertParallelMoveAtExitOf(predecessor, nullptr, source, destination); + InsertParallelMoveAtExitOf(predecessor, phi, source, destination); } } } diff --git a/compiler/optimizing/register_allocator.h b/compiler/optimizing/register_allocator.h index 579f069f5..fcc61128a 100644 --- a/compiler/optimizing/register_allocator.h +++ b/compiler/optimizing/register_allocator.h @@ -66,13 +66,6 @@ class RegisterAllocator { bool log_fatal_on_failure); static bool CanAllocateRegistersFor(const HGraph& graph, InstructionSet instruction_set); - static bool Supports(InstructionSet instruction_set) { - return instruction_set == kArm - || instruction_set == kArm64 - || instruction_set == kThumb2 - || instruction_set == kX86 - || instruction_set == kX86_64; - } size_t GetNumberOfSpillSlots() const { return int_spill_slots_.Size() @@ -121,12 +114,21 @@ class RegisterAllocator { Location source, Location destination) const; void InsertMoveAfter(HInstruction* instruction, Location source, Location destination) const; - void AddInputMoveFor(HInstruction* user, Location source, Location destination) const; + void AddInputMoveFor(HInstruction* input, + HInstruction* user, + Location source, + Location destination) const; void InsertParallelMoveAt(size_t position, HInstruction* instruction, Location source, Location destination) const; + void AddMove(HParallelMove* move, + Location source, + Location destination, + HInstruction* instruction, + Primitive::Type type) const; + // Helper methods. void AllocateRegistersInternal(); void ProcessInstruction(HInstruction* instruction); @@ -136,9 +138,11 @@ class RegisterAllocator { int FindAvailableRegisterPair(size_t* next_use, size_t starting_at) const; int FindAvailableRegister(size_t* next_use) const; - // Try splitting an active non-pair interval at the given `position`. + // Try splitting an active non-pair or unaligned pair interval at the given `position`. // Returns whether it was successful at finding such an interval. - bool TrySplitNonPairIntervalAt(size_t position, size_t first_register_use, size_t* next_use); + bool TrySplitNonPairOrUnalignedPairIntervalAt(size_t position, + size_t first_register_use, + size_t* next_use); ArenaAllocator* const allocator_; CodeGenerator* const codegen_; diff --git a/compiler/optimizing/ssa_liveness_analysis.h b/compiler/optimizing/ssa_liveness_analysis.h index 9ff2f205d..5787f0cc4 100644 --- a/compiler/optimizing/ssa_liveness_analysis.h +++ b/compiler/optimizing/ssa_liveness_analysis.h @@ -373,13 +373,17 @@ class LiveInterval : public ArenaObject { if (location.IsUnallocated()) { if ((location.GetPolicy() == Location::kRequiresRegister) || (location.GetPolicy() == Location::kSameAsFirstInput - && locations->InAt(0).GetPolicy() == Location::kRequiresRegister)) { + && (locations->InAt(0).IsRegister() + || locations->InAt(0).IsRegisterPair() + || locations->InAt(0).GetPolicy() == Location::kRequiresRegister))) { return position; } else if ((location.GetPolicy() == Location::kRequiresFpuRegister) || (location.GetPolicy() == Location::kSameAsFirstInput && locations->InAt(0).GetPolicy() == Location::kRequiresFpuRegister)) { return position; } + } else if (location.IsRegister() || location.IsRegisterPair()) { + return position; } } diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc index 8f4208b41..90170ceed 100644 --- a/compiler/utils/x86/assembler_x86.cc +++ b/compiler/utils/x86/assembler_x86.cc @@ -451,6 +451,36 @@ void X86Assembler::movsd(XmmRegister dst, XmmRegister src) { } +void X86Assembler::movhpd(XmmRegister dst, const Address& src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x16); + EmitOperand(dst, src); +} + + +void X86Assembler::movhpd(const Address& dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x17); + EmitOperand(src, dst); +} + + +void X86Assembler::psrldq(XmmRegister reg, const Immediate& shift_count) { + DCHECK(shift_count.is_uint8()); + + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x73); + EmitXmmRegisterOperand(3, reg); + EmitUint8(shift_count.value()); +} + + void X86Assembler::psrlq(XmmRegister reg, const Immediate& shift_count) { DCHECK(shift_count.is_uint8()); diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h index 2dde90744..4d20db03a 100644 --- a/compiler/utils/x86/assembler_x86.h +++ b/compiler/utils/x86/assembler_x86.h @@ -277,6 +277,11 @@ class X86Assembler FINAL : public Assembler { void psrlq(XmmRegister reg, const Immediate& shift_count); void punpckldq(XmmRegister dst, XmmRegister src); + void movhpd(XmmRegister dst, const Address& src); + void movhpd(const Address& dst, XmmRegister src); + + void psrldq(XmmRegister reg, const Immediate& shift_count); + void addsd(XmmRegister dst, XmmRegister src); void addsd(XmmRegister dst, const Address& src); void subsd(XmmRegister dst, XmmRegister src); -- 2.11.0