From 840e5461a85f8908f51e7f6cd562a9129ff0e7ce Mon Sep 17 00:00:00 2001 From: Nicolas Geoffray Date: Wed, 7 Jan 2015 16:01:24 +0000 Subject: [PATCH] Implement double and float support for arm in register allocator. The basic approach is: - An instruction that needs two registers gets two intervals. - When allocating the low part, we also allocate the high part. - When splitting a low (or high) interval, we also split the high (or low) equivalent. - Allocation follows the (S/D register) requirement that low registers are always even and the high equivalent is low + 1. Change-Id: I06a5148e05a2ffc7e7555d08e871ed007b4c2797 --- compiler/optimizing/code_generator.cc | 8 ++ compiler/optimizing/code_generator.h | 1 + compiler/optimizing/code_generator_arm.cc | 148 +++++++++++++++++--- compiler/optimizing/code_generator_arm.h | 6 + compiler/optimizing/code_generator_arm64.h | 4 + compiler/optimizing/code_generator_x86.h | 4 + compiler/optimizing/code_generator_x86_64.h | 4 + compiler/optimizing/graph_visualizer.cc | 15 +- compiler/optimizing/locations.h | 49 ++++++- compiler/optimizing/nodes.h | 2 +- compiler/optimizing/register_allocator.cc | 199 ++++++++++++++++++++++----- compiler/optimizing/register_allocator.h | 2 + compiler/optimizing/ssa_liveness_analysis.cc | 17 ++- compiler/optimizing/ssa_liveness_analysis.h | 81 ++++++++++- compiler/utils/arm/assembler_arm.h | 7 + 15 files changed, 480 insertions(+), 67 deletions(-) diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index 4d8154e6a..ada0fb75d 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -620,6 +620,14 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction, uint32_t dex_pc) { break; } + case Location::kFpuRegisterPair : { + stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kInFpuRegister, location.low()); + stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kInFpuRegister, location.high()); + ++i; + DCHECK_LT(i, environment_size); + break; + } + default: LOG(FATAL) << "Unexpected kind " << location.GetKind(); } diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h index 4205ebebf..9880239c8 100644 --- a/compiler/optimizing/code_generator.h +++ b/compiler/optimizing/code_generator.h @@ -142,6 +142,7 @@ class CodeGenerator : public ArenaObject { UNIMPLEMENTED(FATAL); UNREACHABLE(); } + virtual bool NeedsTwoRegisters(Primitive::Type type) const = 0; void RecordPcInfo(HInstruction* instruction, uint32_t dex_pc); diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index 3b3fb6476..63f5f94e7 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -373,6 +373,16 @@ size_t CodeGeneratorARM::RestoreCoreRegister(size_t stack_index, uint32_t reg_id return kArmWordSize; } +size_t CodeGeneratorARM::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) { + __ StoreSToOffset(static_cast(reg_id), SP, stack_index); + return kArmWordSize; +} + +size_t CodeGeneratorARM::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) { + __ LoadSFromOffset(static_cast(reg_id), SP, stack_index); + return kArmWordSize; +} + CodeGeneratorARM::CodeGeneratorARM(HGraph* graph, const ArmInstructionSetFeatures* isa_features) : CodeGenerator(graph, kNumberOfCoreRegisters, kNumberOfSRegisters, kNumberOfRegisterPairs), @@ -802,7 +812,8 @@ void CodeGeneratorARM::Move(HInstruction* instruction, Location location, HInstr __ LoadImmediate(IP, value); __ StoreToOffset(kStoreWord, IP, SP, location.GetStackIndex()); } - } else if (const_to_move->IsLongConstant()) { + } else { + DCHECK(const_to_move->IsLongConstant()) << const_to_move; int64_t value = const_to_move->AsLongConstant()->GetValue(); if (location.IsRegisterPair()) { __ LoadImmediate(location.AsRegisterPairLow(), Low32Bits(value)); @@ -2994,10 +3005,34 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) { break; } - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: - LOG(FATAL) << "Unimplemented register type " << instruction->GetType(); - UNREACHABLE(); + case Primitive::kPrimFloat: { + uint32_t data_offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value(); + Location out = locations->Out(); + DCHECK(out.IsFpuRegister()); + if (index.IsConstant()) { + size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; + __ LoadSFromOffset(out.AsFpuRegister(), obj, offset); + } else { + __ add(IP, obj, ShifterOperand(index.AsRegister(), LSL, TIMES_4)); + __ LoadSFromOffset(out.AsFpuRegister(), IP, data_offset); + } + break; + } + + case Primitive::kPrimDouble: { + uint32_t data_offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value(); + Location out = locations->Out(); + DCHECK(out.IsFpuRegisterPair()); + if (index.IsConstant()) { + size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset; + __ LoadDFromOffset(FromLowSToD(out.AsFpuRegisterPairLow()), obj, offset); + } else { + __ add(IP, obj, ShifterOperand(index.AsRegister(), LSL, TIMES_8)); + __ LoadDFromOffset(FromLowSToD(out.AsFpuRegisterPairLow()), IP, data_offset); + } + break; + } + case Primitive::kPrimVoid: LOG(FATAL) << "Unreachable type " << instruction->GetType(); UNREACHABLE(); @@ -3114,12 +3149,36 @@ void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) { break; } - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: - LOG(FATAL) << "Unimplemented register type " << instruction->GetType(); - UNREACHABLE(); + case Primitive::kPrimFloat: { + uint32_t data_offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value(); + Location value = locations->InAt(2); + DCHECK(value.IsFpuRegister()); + if (index.IsConstant()) { + size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; + __ StoreSToOffset(value.AsFpuRegister(), obj, offset); + } else { + __ add(IP, obj, ShifterOperand(index.AsRegister(), LSL, TIMES_4)); + __ StoreSToOffset(value.AsFpuRegister(), IP, data_offset); + } + break; + } + + case Primitive::kPrimDouble: { + uint32_t data_offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value(); + Location value = locations->InAt(2); + DCHECK(value.IsFpuRegisterPair()); + if (index.IsConstant()) { + size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset; + __ StoreDToOffset(FromLowSToD(value.AsFpuRegisterPairLow()), obj, offset); + } else { + __ add(IP, obj, ShifterOperand(index.AsRegister(), LSL, TIMES_8)); + __ StoreDToOffset(FromLowSToD(value.AsFpuRegisterPairLow()), IP, data_offset); + } + break; + } + case Primitive::kPrimVoid: - LOG(FATAL) << "Unreachable type " << instruction->GetType(); + LOG(FATAL) << "Unreachable type " << value_type; UNREACHABLE(); } } @@ -3247,21 +3306,62 @@ void ParallelMoveResolverARM::EmitMove(size_t index) { if (destination.IsRegister()) { __ LoadFromOffset(kLoadWord, destination.AsRegister(), SP, source.GetStackIndex()); + } else if (destination.IsFpuRegister()) { + __ LoadSFromOffset(destination.AsFpuRegister(), SP, source.GetStackIndex()); } else { DCHECK(destination.IsStackSlot()); __ LoadFromOffset(kLoadWord, IP, SP, source.GetStackIndex()); __ StoreToOffset(kStoreWord, IP, SP, destination.GetStackIndex()); } - } else { - DCHECK(source.IsConstant()); - DCHECK(source.GetConstant()->IsIntConstant()); - int32_t value = source.GetConstant()->AsIntConstant()->GetValue(); - if (destination.IsRegister()) { - __ LoadImmediate(destination.AsRegister(), value); + } else if (source.IsFpuRegister()) { + if (destination.IsFpuRegister()) { + __ vmovs(destination.AsFpuRegister(), source.AsFpuRegister()); } else { DCHECK(destination.IsStackSlot()); - __ LoadImmediate(IP, value); + __ StoreSToOffset(source.AsFpuRegister(), SP, destination.GetStackIndex()); + } + } else if (source.IsFpuRegisterPair()) { + if (destination.IsFpuRegisterPair()) { + __ vmovd(FromLowSToD(destination.AsFpuRegisterPairLow()), + FromLowSToD(source.AsFpuRegisterPairLow())); + } else { + DCHECK(destination.IsDoubleStackSlot()) << destination; + __ StoreDToOffset(FromLowSToD(source.AsFpuRegisterPairLow()), + SP, destination.GetStackIndex()); + } + } else if (source.IsDoubleStackSlot()) { + if (destination.IsFpuRegisterPair()) { + __ LoadDFromOffset(FromLowSToD(destination.AsFpuRegisterPairLow()), + SP, source.GetStackIndex()); + } else { + DCHECK(destination.IsDoubleStackSlot()) << destination; + __ LoadFromOffset(kLoadWord, IP, SP, source.GetStackIndex()); __ StoreToOffset(kStoreWord, IP, SP, destination.GetStackIndex()); + __ LoadFromOffset(kLoadWord, IP, SP, source.GetHighStackIndex(kArmWordSize)); + __ StoreToOffset(kStoreWord, IP, SP, destination.GetHighStackIndex(kArmWordSize)); + } + } else { + DCHECK(source.IsConstant()) << source; + HInstruction* constant = source.GetConstant(); + if (constant->IsIntConstant()) { + int32_t value = constant->AsIntConstant()->GetValue(); + if (destination.IsRegister()) { + __ LoadImmediate(destination.AsRegister(), value); + } else { + DCHECK(destination.IsStackSlot()); + __ LoadImmediate(IP, value); + __ StoreToOffset(kStoreWord, IP, SP, destination.GetStackIndex()); + } + } else { + DCHECK(constant->IsFloatConstant()); + float value = constant->AsFloatConstant()->GetValue(); + if (destination.IsFpuRegister()) { + __ LoadSImmediate(destination.AsFpuRegister(), value); + } else { + DCHECK(destination.IsStackSlot()); + __ LoadImmediate(IP, bit_cast(value)); + __ StoreToOffset(kStoreWord, IP, SP, destination.GetStackIndex()); + } } } } @@ -3300,6 +3400,20 @@ void ParallelMoveResolverARM::EmitSwap(size_t index) { Exchange(destination.AsRegister(), source.GetStackIndex()); } else if (source.IsStackSlot() && destination.IsStackSlot()) { Exchange(source.GetStackIndex(), destination.GetStackIndex()); + } else if (source.IsFpuRegister() && destination.IsFpuRegister()) { + __ vmovrs(IP, source.AsFpuRegister()); + __ vmovs(source.AsFpuRegister(), destination.AsFpuRegister()); + __ vmovsr(destination.AsFpuRegister(), IP); + } else if (source.IsFpuRegister() || destination.IsFpuRegister()) { + SRegister reg = source.IsFpuRegister() ? source.AsFpuRegister() + : destination.AsFpuRegister(); + int mem = source.IsFpuRegister() + ? destination.GetStackIndex() + : source.GetStackIndex(); + + __ vmovrs(IP, reg); + __ LoadSFromOffset(reg, SP, mem); + __ StoreToOffset(kStoreWord, IP, SP, mem); } else { LOG(FATAL) << "Unimplemented"; } diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h index 40f4edc4e..c1b4eda3a 100644 --- a/compiler/optimizing/code_generator_arm.h +++ b/compiler/optimizing/code_generator_arm.h @@ -168,6 +168,8 @@ class CodeGeneratorARM : public CodeGenerator { void Move(HInstruction* instruction, Location location, HInstruction* move_for) OVERRIDE; size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE; size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE; + size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) OVERRIDE; + size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) OVERRIDE; size_t GetWordSize() const OVERRIDE { return kArmWordSize; @@ -237,6 +239,10 @@ class CodeGeneratorARM : public CodeGenerator { return isa_features_; } + bool NeedsTwoRegisters(Primitive::Type type) const OVERRIDE { + return type == Primitive::kPrimDouble || type == Primitive::kPrimLong; + } + private: // Labels for each block that will be compiled. GrowableArray