From 102cbed1e52b7c5f09458b44903fe97bb3e14d5f Mon Sep 17 00:00:00 2001 From: Nicolas Geoffray Date: Wed, 15 Oct 2014 18:31:05 +0100 Subject: [PATCH] Implement register allocator for floating point registers. Also: - Fix misuses of emitting the rex prefix in the x86_64 assembler. - Fix movaps code generation in the x86_64 assembler. Change-Id: Ib6dcf6e7c4a9c43368cfc46b02ba50f69ae69cbe --- compiler/optimizing/code_generator.cc | 34 ++--- compiler/optimizing/code_generator.h | 15 +- compiler/optimizing/code_generator_arm.cc | 30 +++- compiler/optimizing/code_generator_arm.h | 4 +- compiler/optimizing/code_generator_x86.cc | 30 +++- compiler/optimizing/code_generator_x86.h | 4 +- compiler/optimizing/code_generator_x86_64.cc | 184 +++++++++++++++++++++---- compiler/optimizing/code_generator_x86_64.h | 8 +- compiler/optimizing/graph_visualizer.cc | 18 ++- compiler/optimizing/nodes.cc | 25 ++++ compiler/optimizing/nodes.h | 72 +++++++++- compiler/optimizing/register_allocator.cc | 125 ++++++++++++----- compiler/optimizing/register_allocator.h | 8 +- compiler/optimizing/ssa_builder.cc | 106 +++++++++++++- compiler/optimizing/ssa_builder.h | 4 + compiler/optimizing/ssa_liveness_analysis.cc | 18 ++- compiler/optimizing/ssa_liveness_analysis.h | 15 +- compiler/optimizing/ssa_phi_elimination.cc | 15 +- compiler/optimizing/ssa_type_propagation.cc | 28 +++- compiler/optimizing/ssa_type_propagation.h | 1 + compiler/utils/assembler_test.h | 2 +- compiler/utils/x86_64/assembler_x86_64.cc | 10 +- compiler/utils/x86_64/assembler_x86_64.h | 3 +- compiler/utils/x86_64/assembler_x86_64_test.cc | 26 ++++ runtime/check_reference_map_visitor.h | 6 +- runtime/stack_map.h | 1 + test/410-floats/src/Main.java | 38 ++++- 27 files changed, 688 insertions(+), 142 deletions(-) diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index 408e13e36..d5cd490d1 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -473,8 +473,7 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction, uint32_t dex_pc) { case Location::kRegister : { int id = location.reg(); stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kInRegister, id); - if (current->GetType() == Primitive::kPrimDouble - || current->GetType() == Primitive::kPrimLong) { + if (current->GetType() == Primitive::kPrimLong) { stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kInRegister, id); ++i; DCHECK_LT(i, environment_size); @@ -482,52 +481,55 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction, uint32_t dex_pc) { break; } + case Location::kFpuRegister : { + int id = location.reg(); + stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kInFpuRegister, id); + if (current->GetType() == Primitive::kPrimDouble) { + stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kInFpuRegister, id); + ++i; + DCHECK_LT(i, environment_size); + } + break; + } + default: LOG(FATAL) << "Unexpected kind " << location.GetKind(); } } } -size_t CodeGenerator::GetStackOffsetOfSavedRegister(size_t index) { - return first_register_slot_in_slow_path_ + index * GetWordSize(); -} - void CodeGenerator::SaveLiveRegisters(LocationSummary* locations) { RegisterSet* register_set = locations->GetLiveRegisters(); - uint32_t count = 0; + size_t stack_offset = first_register_slot_in_slow_path_; for (size_t i = 0, e = GetNumberOfCoreRegisters(); i < e; ++i) { if (register_set->ContainsCoreRegister(i)) { - size_t stack_offset = GetStackOffsetOfSavedRegister(count); - ++count; - SaveCoreRegister(Location::StackSlot(stack_offset), i); // If the register holds an object, update the stack mask. if (locations->RegisterContainsObject(i)) { locations->SetStackBit(stack_offset / kVRegSize); } + stack_offset += SaveCoreRegister(stack_offset, i); } } for (size_t i = 0, e = GetNumberOfFloatingPointRegisters(); i < e; ++i) { if (register_set->ContainsFloatingPointRegister(i)) { - LOG(FATAL) << "Unimplemented"; + stack_offset += SaveFloatingPointRegister(stack_offset, i); } } } void CodeGenerator::RestoreLiveRegisters(LocationSummary* locations) { RegisterSet* register_set = locations->GetLiveRegisters(); - uint32_t count = 0; + size_t stack_offset = first_register_slot_in_slow_path_; for (size_t i = 0, e = GetNumberOfCoreRegisters(); i < e; ++i) { if (register_set->ContainsCoreRegister(i)) { - size_t stack_offset = GetStackOffsetOfSavedRegister(count); - ++count; - RestoreCoreRegister(Location::StackSlot(stack_offset), i); + stack_offset += RestoreCoreRegister(stack_offset, i); } } for (size_t i = 0, e = GetNumberOfFloatingPointRegisters(); i < e; ++i) { if (register_set->ContainsFloatingPointRegister(i)) { - LOG(FATAL) << "Unimplemented"; + stack_offset += RestoreFloatingPointRegister(stack_offset, i); } } } diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h index 7aaf99108..220d74556 100644 --- a/compiler/optimizing/code_generator.h +++ b/compiler/optimizing/code_generator.h @@ -110,8 +110,18 @@ class CodeGenerator : public ArenaObject { virtual void DumpCoreRegister(std::ostream& stream, int reg) const = 0; virtual void DumpFloatingPointRegister(std::ostream& stream, int reg) const = 0; virtual InstructionSet GetInstructionSet() const = 0; - virtual void SaveCoreRegister(Location stack_location, uint32_t reg_id) = 0; - virtual void RestoreCoreRegister(Location stack_location, uint32_t reg_id) = 0; + // Saves the register in the stack. Returns the size taken on stack. + virtual size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) = 0; + // Restores the register from the stack. Returns the size taken on stack. + virtual size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) = 0; + virtual size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) { + LOG(FATAL) << "Unimplemented"; + return 0u; + } + virtual size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) { + LOG(FATAL) << "Unimplemented"; + return 0u; + } void RecordPcInfo(HInstruction* instruction, uint32_t dex_pc); @@ -145,6 +155,7 @@ class CodeGenerator : public ArenaObject { void ClearSpillSlotsFromLoopPhisInStackMap(HSuspendCheck* suspend_check) const; bool* GetBlockedCoreRegisters() const { return blocked_core_registers_; } + bool* GetBlockedFloatingPointRegisters() const { return blocked_fpu_registers_; } protected: CodeGenerator(HGraph* graph, diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index ebb1d6ae4..24b7c2dd4 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -210,12 +210,14 @@ void CodeGeneratorARM::DumpFloatingPointRegister(std::ostream& stream, int reg) stream << ArmManagedRegister::FromDRegister(DRegister(reg)); } -void CodeGeneratorARM::SaveCoreRegister(Location stack_location, uint32_t reg_id) { - __ StoreToOffset(kStoreWord, static_cast(reg_id), SP, stack_location.GetStackIndex()); +size_t CodeGeneratorARM::SaveCoreRegister(size_t stack_index, uint32_t reg_id) { + __ StoreToOffset(kStoreWord, static_cast(reg_id), SP, stack_index); + return kArmWordSize; } -void CodeGeneratorARM::RestoreCoreRegister(Location stack_location, uint32_t reg_id) { - __ LoadFromOffset(kLoadWord, static_cast(reg_id), SP, stack_location.GetStackIndex()); +size_t CodeGeneratorARM::RestoreCoreRegister(size_t stack_index, uint32_t reg_id) { + __ LoadFromOffset(kLoadWord, static_cast(reg_id), SP, stack_index); + return kArmWordSize; } CodeGeneratorARM::CodeGeneratorARM(HGraph* graph) @@ -859,6 +861,26 @@ void InstructionCodeGeneratorARM::VisitLongConstant(HLongConstant* constant) { // Will be generated at use site. } +void LocationsBuilderARM::VisitFloatConstant(HFloatConstant* constant) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall); + locations->SetOut(Location::ConstantLocation(constant)); +} + +void InstructionCodeGeneratorARM::VisitFloatConstant(HFloatConstant* constant) { + // Will be generated at use site. +} + +void LocationsBuilderARM::VisitDoubleConstant(HDoubleConstant* constant) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall); + locations->SetOut(Location::ConstantLocation(constant)); +} + +void InstructionCodeGeneratorARM::VisitDoubleConstant(HDoubleConstant* constant) { + // Will be generated at use site. +} + void LocationsBuilderARM::VisitReturnVoid(HReturnVoid* ret) { ret->SetLocations(nullptr); } diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h index 57b289c80..1fe8a7eac 100644 --- a/compiler/optimizing/code_generator_arm.h +++ b/compiler/optimizing/code_generator_arm.h @@ -142,8 +142,8 @@ class CodeGeneratorARM : public CodeGenerator { virtual void GenerateFrameExit() OVERRIDE; virtual void Bind(HBasicBlock* block) OVERRIDE; virtual void Move(HInstruction* instruction, Location location, HInstruction* move_for) OVERRIDE; - virtual void SaveCoreRegister(Location stack_location, uint32_t reg_id) OVERRIDE; - virtual void RestoreCoreRegister(Location stack_location, uint32_t reg_id) OVERRIDE; + virtual size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE; + virtual size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE; virtual size_t GetWordSize() const OVERRIDE { return kArmWordSize; diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index cc2be82b9..2550518db 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -182,12 +182,14 @@ void CodeGeneratorX86::DumpFloatingPointRegister(std::ostream& stream, int reg) stream << X86ManagedRegister::FromXmmRegister(XmmRegister(reg)); } -void CodeGeneratorX86::SaveCoreRegister(Location stack_location, uint32_t reg_id) { - __ movl(Address(ESP, stack_location.GetStackIndex()), static_cast(reg_id)); +size_t CodeGeneratorX86::SaveCoreRegister(size_t stack_index, uint32_t reg_id) { + __ movl(Address(ESP, stack_index), static_cast(reg_id)); + return kX86WordSize; } -void CodeGeneratorX86::RestoreCoreRegister(Location stack_location, uint32_t reg_id) { - __ movl(static_cast(reg_id), Address(ESP, stack_location.GetStackIndex())); +size_t CodeGeneratorX86::RestoreCoreRegister(size_t stack_index, uint32_t reg_id) { + __ movl(static_cast(reg_id), Address(ESP, stack_index)); + return kX86WordSize; } CodeGeneratorX86::CodeGeneratorX86(HGraph* graph) @@ -795,6 +797,26 @@ void InstructionCodeGeneratorX86::VisitLongConstant(HLongConstant* constant) { // Will be generated at use site. } +void LocationsBuilderX86::VisitFloatConstant(HFloatConstant* constant) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall); + locations->SetOut(Location::ConstantLocation(constant)); +} + +void InstructionCodeGeneratorX86::VisitFloatConstant(HFloatConstant* constant) { + // Will be generated at use site. +} + +void LocationsBuilderX86::VisitDoubleConstant(HDoubleConstant* constant) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall); + locations->SetOut(Location::ConstantLocation(constant)); +} + +void InstructionCodeGeneratorX86::VisitDoubleConstant(HDoubleConstant* constant) { + // Will be generated at use site. +} + void LocationsBuilderX86::VisitReturnVoid(HReturnVoid* ret) { ret->SetLocations(nullptr); } diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index db8b9abd9..fff91d179 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -144,8 +144,8 @@ class CodeGeneratorX86 : public CodeGenerator { virtual void GenerateFrameExit() OVERRIDE; virtual void Bind(HBasicBlock* block) OVERRIDE; virtual void Move(HInstruction* instruction, Location location, HInstruction* move_for) OVERRIDE; - virtual void SaveCoreRegister(Location stack_location, uint32_t reg_id) OVERRIDE; - virtual void RestoreCoreRegister(Location stack_location, uint32_t reg_id) OVERRIDE; + virtual size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE; + virtual size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE; virtual size_t GetWordSize() const OVERRIDE { return kX86WordSize; diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index 9df9d417d..9e63f8bc5 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -191,12 +191,24 @@ void CodeGeneratorX86_64::DumpFloatingPointRegister(std::ostream& stream, int re stream << X86_64ManagedRegister::FromXmmRegister(FloatRegister(reg)); } -void CodeGeneratorX86_64::SaveCoreRegister(Location stack_location, uint32_t reg_id) { - __ movq(Address(CpuRegister(RSP), stack_location.GetStackIndex()), CpuRegister(reg_id)); +size_t CodeGeneratorX86_64::SaveCoreRegister(size_t stack_index, uint32_t reg_id) { + __ movq(Address(CpuRegister(RSP), stack_index), CpuRegister(reg_id)); + return kX86_64WordSize; } -void CodeGeneratorX86_64::RestoreCoreRegister(Location stack_location, uint32_t reg_id) { - __ movq(CpuRegister(reg_id), Address(CpuRegister(RSP), stack_location.GetStackIndex())); +size_t CodeGeneratorX86_64::RestoreCoreRegister(size_t stack_index, uint32_t reg_id) { + __ movq(CpuRegister(reg_id), Address(CpuRegister(RSP), stack_index)); + return kX86_64WordSize; +} + +size_t CodeGeneratorX86_64::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) { + __ movsd(Address(CpuRegister(RSP), stack_index), XmmRegister(reg_id)); + return kX86_64WordSize; +} + +size_t CodeGeneratorX86_64::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) { + __ movsd(XmmRegister(reg_id), Address(CpuRegister(RSP), stack_index)); + return kX86_64WordSize; } CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph) @@ -727,6 +739,26 @@ void InstructionCodeGeneratorX86_64::VisitLongConstant(HLongConstant* constant) // Will be generated at use site. } +void LocationsBuilderX86_64::VisitFloatConstant(HFloatConstant* constant) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall); + locations->SetOut(Location::ConstantLocation(constant)); +} + +void InstructionCodeGeneratorX86_64::VisitFloatConstant(HFloatConstant* constant) { + // Will be generated at use site. +} + +void LocationsBuilderX86_64::VisitDoubleConstant(HDoubleConstant* constant) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall); + locations->SetOut(Location::ConstantLocation(constant)); +} + +void InstructionCodeGeneratorX86_64::VisitDoubleConstant(HDoubleConstant* constant) { + // Will be generated at use site. +} + void LocationsBuilderX86_64::VisitReturnVoid(HReturnVoid* ret) { ret->SetLocations(nullptr); } @@ -995,7 +1027,7 @@ void LocationsBuilderX86_64::VisitAdd(HAdd* add) { case Primitive::kPrimDouble: case Primitive::kPrimFloat: { locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetInAt(1, Location::Any()); + locations->SetInAt(1, Location::RequiresFpuRegister()); locations->SetOut(Location::SameAsFirstInput()); break; } @@ -1032,21 +1064,12 @@ void InstructionCodeGeneratorX86_64::VisitAdd(HAdd* add) { } case Primitive::kPrimFloat: { - if (second.IsFpuRegister()) { - __ addss(first.As(), second.As()); - } else { - __ addss(first.As(), - Address(CpuRegister(RSP), second.GetStackIndex())); - } + __ addss(first.As(), second.As()); break; } case Primitive::kPrimDouble: { - if (second.IsFpuRegister()) { - __ addsd(first.As(), second.As()); - } else { - __ addsd(first.As(), Address(CpuRegister(RSP), second.GetStackIndex())); - } + __ addsd(first.As(), second.As()); break; } @@ -1482,10 +1505,30 @@ void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) { break; } - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: - LOG(FATAL) << "Unimplemented register type " << instruction->GetType(); - UNREACHABLE(); + case Primitive::kPrimFloat: { + uint32_t data_offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value(); + XmmRegister out = locations->Out().As(); + if (index.IsConstant()) { + __ movss(out, Address(obj, + (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset)); + } else { + __ movss(out, Address(obj, index.As(), TIMES_4, data_offset)); + } + break; + } + + case Primitive::kPrimDouble: { + uint32_t data_offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value(); + XmmRegister out = locations->Out().As(); + if (index.IsConstant()) { + __ movsd(out, Address(obj, + (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset)); + } else { + __ movsd(out, Address(obj, index.As(), TIMES_8, data_offset)); + } + break; + } + case Primitive::kPrimVoid: LOG(FATAL) << "Unreachable type " << instruction->GetType(); UNREACHABLE(); @@ -1509,6 +1552,8 @@ void LocationsBuilderX86_64::VisitArraySet(HArraySet* instruction) { locations->SetInAt(2, Location::RequiresRegister()); if (value_type == Primitive::kPrimLong) { locations->SetInAt(2, Location::RequiresRegister()); + } else if (value_type == Primitive::kPrimFloat || value_type == Primitive::kPrimDouble) { + locations->SetInAt(2, Location::RequiresFpuRegister()); } else { locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2))); } @@ -1581,6 +1626,7 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) { __ movl(Address(obj, index.As(), TIMES_4, data_offset), value.As()); } else { + DCHECK(value.IsConstant()) << value; __ movl(Address(obj, index.As(), TIMES_4, data_offset), Immediate(value.GetConstant()->AsIntConstant()->GetValue())); } @@ -1609,10 +1655,34 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) { break; } - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: - LOG(FATAL) << "Unimplemented register type " << instruction->GetType(); - UNREACHABLE(); + case Primitive::kPrimFloat: { + uint32_t data_offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value(); + if (index.IsConstant()) { + size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; + DCHECK(value.IsFpuRegister()); + __ movss(Address(obj, offset), value.As()); + } else { + DCHECK(value.IsFpuRegister()); + __ movss(Address(obj, index.As(), TIMES_4, data_offset), + value.As()); + } + break; + } + + case Primitive::kPrimDouble: { + uint32_t data_offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value(); + if (index.IsConstant()) { + size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset; + DCHECK(value.IsFpuRegister()); + __ movsd(Address(obj, offset), value.As()); + } else { + DCHECK(value.IsFpuRegister()); + __ movsd(Address(obj, index.As(), TIMES_8, data_offset), + value.As()); + } + break; + } + case Primitive::kPrimVoid: LOG(FATAL) << "Unreachable type " << instruction->GetType(); UNREACHABLE(); @@ -1746,6 +1816,9 @@ void ParallelMoveResolverX86_64::EmitMove(size_t index) { if (destination.IsRegister()) { __ movl(destination.As(), Address(CpuRegister(RSP), source.GetStackIndex())); + } else if (destination.IsFpuRegister()) { + __ movss(destination.As(), + Address(CpuRegister(RSP), source.GetStackIndex())); } else { DCHECK(destination.IsStackSlot()); __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex())); @@ -1755,6 +1828,8 @@ void ParallelMoveResolverX86_64::EmitMove(size_t index) { if (destination.IsRegister()) { __ movq(destination.As(), Address(CpuRegister(RSP), source.GetStackIndex())); + } else if (destination.IsFpuRegister()) { + __ movsd(destination.As(), Address(CpuRegister(RSP), source.GetStackIndex())); } else { DCHECK(destination.IsDoubleStackSlot()); __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex())); @@ -1767,6 +1842,7 @@ void ParallelMoveResolverX86_64::EmitMove(size_t index) { if (destination.IsRegister()) { __ movl(destination.As(), imm); } else { + DCHECK(destination.IsStackSlot()) << destination; __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), imm); } } else if (constant->IsLongConstant()) { @@ -1774,14 +1850,42 @@ void ParallelMoveResolverX86_64::EmitMove(size_t index) { if (destination.IsRegister()) { __ movq(destination.As(), Immediate(value)); } else { + DCHECK(destination.IsDoubleStackSlot()) << destination; __ movq(CpuRegister(TMP), Immediate(value)); __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP)); } + } else if (constant->IsFloatConstant()) { + Immediate imm(bit_cast(constant->AsFloatConstant()->GetValue())); + if (destination.IsFpuRegister()) { + __ movl(CpuRegister(TMP), imm); + __ movd(destination.As(), CpuRegister(TMP)); + } else { + DCHECK(destination.IsStackSlot()) << destination; + __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), imm); + } } else { - LOG(FATAL) << "Unimplemented constant type"; + DCHECK(constant->IsDoubleConstant()) << constant->DebugName(); + Immediate imm(bit_cast(constant->AsDoubleConstant()->GetValue())); + if (destination.IsFpuRegister()) { + __ movq(CpuRegister(TMP), imm); + __ movd(destination.As(), CpuRegister(TMP)); + } else { + DCHECK(destination.IsDoubleStackSlot()) << destination; + __ movq(CpuRegister(TMP), imm); + __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP)); + } + } + } else if (source.IsFpuRegister()) { + if (destination.IsFpuRegister()) { + __ movaps(destination.As(), source.As()); + } else if (destination.IsStackSlot()) { + __ movss(Address(CpuRegister(RSP), destination.GetStackIndex()), + source.As()); + } else { + DCHECK(destination.IsDoubleStackSlot()); + __ movsd(Address(CpuRegister(RSP), destination.GetStackIndex()), + source.As()); } - } else { - LOG(FATAL) << "Unimplemented"; } } @@ -1823,6 +1927,18 @@ void ParallelMoveResolverX86_64::Exchange64(int mem1, int mem2) { CpuRegister(ensure_scratch.GetRegister())); } +void ParallelMoveResolverX86_64::Exchange32(XmmRegister reg, int mem) { + __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem)); + __ movss(Address(CpuRegister(RSP), mem), reg); + __ movd(reg, CpuRegister(TMP)); +} + +void ParallelMoveResolverX86_64::Exchange64(XmmRegister reg, int mem) { + __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem)); + __ movsd(Address(CpuRegister(RSP), mem), reg); + __ movd(reg, CpuRegister(TMP)); +} + void ParallelMoveResolverX86_64::EmitSwap(size_t index) { MoveOperands* move = moves_.Get(index); Location source = move->GetSource(); @@ -1842,8 +1958,20 @@ void ParallelMoveResolverX86_64::EmitSwap(size_t index) { Exchange64(destination.As(), source.GetStackIndex()); } else if (source.IsDoubleStackSlot() && destination.IsDoubleStackSlot()) { Exchange64(destination.GetStackIndex(), source.GetStackIndex()); + } else if (source.IsFpuRegister() && destination.IsFpuRegister()) { + __ movd(CpuRegister(TMP), source.As()); + __ movaps(source.As(), destination.As()); + __ movd(destination.As(), CpuRegister(TMP)); + } else if (source.IsFpuRegister() && destination.IsStackSlot()) { + Exchange32(source.As(), destination.GetStackIndex()); + } else if (source.IsStackSlot() && destination.IsFpuRegister()) { + Exchange32(destination.As(), source.GetStackIndex()); + } else if (source.IsFpuRegister() && destination.IsDoubleStackSlot()) { + Exchange64(source.As(), destination.GetStackIndex()); + } else if (source.IsDoubleStackSlot() && destination.IsFpuRegister()) { + Exchange64(destination.As(), source.GetStackIndex()); } else { - LOG(FATAL) << "Unimplemented"; + LOG(FATAL) << "Unimplemented swap between " << source << " and " << destination; } } diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index 5ac0189b5..e04a8d8ab 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -80,8 +80,10 @@ class ParallelMoveResolverX86_64 : public ParallelMoveResolver { private: void Exchange32(CpuRegister reg, int mem); + void Exchange32(XmmRegister reg, int mem); void Exchange32(int mem1, int mem2); void Exchange64(CpuRegister reg, int mem); + void Exchange64(XmmRegister reg, int mem); void Exchange64(int mem1, int mem2); CodeGeneratorX86_64* const codegen_; @@ -146,8 +148,10 @@ class CodeGeneratorX86_64 : public CodeGenerator { virtual void GenerateFrameExit() OVERRIDE; virtual void Bind(HBasicBlock* block) OVERRIDE; virtual void Move(HInstruction* instruction, Location location, HInstruction* move_for) OVERRIDE; - virtual void SaveCoreRegister(Location stack_location, uint32_t reg_id) OVERRIDE; - virtual void RestoreCoreRegister(Location stack_location, uint32_t reg_id) OVERRIDE; + virtual size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE; + virtual size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE; + virtual size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) OVERRIDE; + virtual size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) OVERRIDE; virtual size_t GetWordSize() const OVERRIDE { return kX86_64WordSize; diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc index 459010dcf..4ed215624 100644 --- a/compiler/optimizing/graph_visualizer.cc +++ b/compiler/optimizing/graph_visualizer.cc @@ -120,13 +120,11 @@ class HGraphVisualizerPrinter : public HGraphVisitor { output_<< std::endl; } - void DumpLocation(Location location, Primitive::Type type) { + void DumpLocation(Location location) { if (location.IsRegister()) { - if (type == Primitive::kPrimDouble || type == Primitive::kPrimFloat) { - codegen_.DumpFloatingPointRegister(output_, location.reg()); - } else { - codegen_.DumpCoreRegister(output_, location.reg()); - } + codegen_.DumpCoreRegister(output_, location.reg()); + } else if (location.IsFpuRegister()) { + codegen_.DumpFloatingPointRegister(output_, location.reg()); } else if (location.IsConstant()) { output_ << "constant"; HConstant* constant = location.GetConstant(); @@ -150,9 +148,9 @@ class HGraphVisualizerPrinter : public HGraphVisitor { output_ << " ("; for (size_t i = 0, e = instruction->NumMoves(); i < e; ++i) { MoveOperands* move = instruction->MoveOperandsAt(i); - DumpLocation(move->GetSource(), Primitive::kPrimInt); + DumpLocation(move->GetSource()); output_ << " -> "; - DumpLocation(move->GetDestination(), Primitive::kPrimInt); + DumpLocation(move->GetDestination()); if (i + 1 != e) { output_ << ", "; } @@ -183,13 +181,13 @@ class HGraphVisualizerPrinter : public HGraphVisitor { if (locations != nullptr) { output_ << " ( "; for (size_t i = 0; i < instruction->InputCount(); ++i) { - DumpLocation(locations->InAt(i), instruction->InputAt(i)->GetType()); + DumpLocation(locations->InAt(i)); output_ << " "; } output_ << ")"; if (locations->Out().IsValid()) { output_ << " -> "; - DumpLocation(locations->Out(), instruction->GetType()); + DumpLocation(locations->Out()); } } output_ << " (liveness: " << instruction->GetLifetimePosition() << ")"; diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index a219b97cc..050551021 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -363,6 +363,25 @@ void HBasicBlock::AddPhi(HPhi* phi) { Add(&phis_, this, phi); } +void HBasicBlock::InsertPhiAfter(HPhi* phi, HPhi* cursor) { + DCHECK_EQ(phi->GetId(), -1); + DCHECK_NE(cursor->GetId(), -1); + DCHECK_EQ(cursor->GetBlock(), this); + if (cursor->next_ == nullptr) { + cursor->next_ = phi; + phi->previous_ = cursor; + DCHECK(phi->next_ == nullptr); + } else { + phi->next_ = cursor->next_; + phi->previous_ = cursor; + cursor->next_ = phi; + phi->next_->previous_ = phi; + } + phi->SetBlock(this); + phi->SetId(GetGraph()->GetNextInstructionId()); + UpdateInputsUsers(phi); +} + static void Remove(HInstructionList* instruction_list, HBasicBlock* block, HInstruction* instruction) { @@ -531,6 +550,12 @@ void HInstruction::ReplaceWith(HInstruction* other) { env_uses_ = nullptr; } +void HInstruction::ReplaceInput(HInstruction* replacement, size_t index) { + InputAt(index)->RemoveUser(this, index); + SetRawInputAt(index, replacement); + replacement->AddUseAt(this, index); +} + size_t HInstruction::EnvironmentSize() const { return HasEnvironment() ? environment_->Size() : 0; } diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 3f29e53d6..7c933aa4f 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -399,6 +399,7 @@ class HBasicBlock : public ArenaObject { void ReplaceAndRemoveInstructionWith(HInstruction* initial, HInstruction* replacement); void AddPhi(HPhi* phi); + void InsertPhiAfter(HPhi* instruction, HPhi* cursor); void RemovePhi(HPhi* phi); bool IsLoopHeader() const { @@ -503,7 +504,9 @@ class HBasicBlock : public ArenaObject { M(Temporary, Instruction) \ M(SuspendCheck, Instruction) \ M(Mul, BinaryOperation) \ - M(Neg, UnaryOperation) + M(Neg, UnaryOperation) \ + M(FloatConstant, Constant) \ + M(DoubleConstant, Constant) \ #define FOR_EACH_INSTRUCTION(M) \ FOR_EACH_CONCRETE_INSTRUCTION(M) \ @@ -710,6 +713,7 @@ class HInstruction : public ArenaObject { void SetLocations(LocationSummary* locations) { locations_ = locations; } void ReplaceWith(HInstruction* instruction); + void ReplaceInput(HInstruction* replacement, size_t index); bool HasOnlyOneUse() const { return uses_ != nullptr && uses_->GetTail() == nullptr; @@ -995,8 +999,8 @@ class HExpression : public HTemplateInstruction { virtual Primitive::Type GetType() const { return type_; } - private: - const Primitive::Type type_; + protected: + Primitive::Type type_; }; // Represents dex's RETURN_VOID opcode. A HReturnVoid is a control flow @@ -1401,6 +1405,48 @@ class HConstant : public HExpression<0> { DISALLOW_COPY_AND_ASSIGN(HConstant); }; +class HFloatConstant : public HConstant { + public: + explicit HFloatConstant(float value) : HConstant(Primitive::kPrimFloat), value_(value) {} + + float GetValue() const { return value_; } + + virtual bool InstructionDataEquals(HInstruction* other) const { + return bit_cast(other->AsFloatConstant()->value_) == + bit_cast(value_); + } + + virtual size_t ComputeHashCode() const { return static_cast(GetValue()); } + + DECLARE_INSTRUCTION(FloatConstant); + + private: + const float value_; + + DISALLOW_COPY_AND_ASSIGN(HFloatConstant); +}; + +class HDoubleConstant : public HConstant { + public: + explicit HDoubleConstant(double value) : HConstant(Primitive::kPrimDouble), value_(value) {} + + double GetValue() const { return value_; } + + virtual bool InstructionDataEquals(HInstruction* other) const { + return bit_cast(other->AsDoubleConstant()->value_) == + bit_cast(value_); + } + + virtual size_t ComputeHashCode() const { return static_cast(GetValue()); } + + DECLARE_INSTRUCTION(DoubleConstant); + + private: + const double value_; + + DISALLOW_COPY_AND_ASSIGN(HDoubleConstant); +}; + // Constants of the type int. Those can be from Dex instructions, or // synthesized (for example with the if-eqz instruction). class HIntConstant : public HConstant { @@ -1794,6 +1840,7 @@ class HArrayGet : public HExpression<2> { virtual bool CanBeMoved() const { return true; } virtual bool InstructionDataEquals(HInstruction* other) const { return true; } + void SetType(Primitive::Type type) { type_ = type; } DECLARE_INSTRUCTION(ArrayGet); @@ -1806,11 +1853,11 @@ class HArraySet : public HTemplateInstruction<3> { HArraySet(HInstruction* array, HInstruction* index, HInstruction* value, - Primitive::Type component_type, + Primitive::Type expected_component_type, uint32_t dex_pc) : HTemplateInstruction(SideEffects::ChangesSomething()), dex_pc_(dex_pc), - component_type_(component_type) { + expected_component_type_(expected_component_type) { SetRawInputAt(0, array); SetRawInputAt(1, index); SetRawInputAt(2, value); @@ -1824,13 +1871,24 @@ class HArraySet : public HTemplateInstruction<3> { uint32_t GetDexPc() const { return dex_pc_; } - Primitive::Type GetComponentType() const { return component_type_; } + HInstruction* GetValue() const { return InputAt(2); } + + Primitive::Type GetComponentType() const { + // The Dex format does not type floating point index operations. Since the + // `expected_component_type_` is set during building and can therefore not + // be correct, we also check what is the value type. If it is a floating + // point type, we must use that type. + Primitive::Type value_type = GetValue()->GetType(); + return ((value_type == Primitive::kPrimFloat) || (value_type == Primitive::kPrimDouble)) + ? value_type + : expected_component_type_; + } DECLARE_INSTRUCTION(ArraySet); private: const uint32_t dex_pc_; - const Primitive::Type component_type_; + const Primitive::Type expected_component_type_; DISALLOW_COPY_AND_ASSIGN(HArraySet); }; diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc index 719c06972..3b51bfb2d 100644 --- a/compiler/optimizing/register_allocator.cc +++ b/compiler/optimizing/register_allocator.cc @@ -37,18 +37,21 @@ RegisterAllocator::RegisterAllocator(ArenaAllocator* allocator, handled_(allocator, 0), active_(allocator, 0), inactive_(allocator, 0), - physical_register_intervals_(allocator, codegen->GetNumberOfCoreRegisters()), + physical_core_register_intervals_(allocator, codegen->GetNumberOfCoreRegisters()), + physical_fp_register_intervals_(allocator, codegen->GetNumberOfFloatingPointRegisters()), temp_intervals_(allocator, 4), spill_slots_(allocator, kDefaultNumberOfSpillSlots), safepoints_(allocator, 0), processing_core_registers_(false), number_of_registers_(-1), registers_array_(nullptr), - blocked_registers_(codegen->GetBlockedCoreRegisters()), + blocked_core_registers_(codegen->GetBlockedCoreRegisters()), + blocked_fp_registers_(codegen->GetBlockedFloatingPointRegisters()), reserved_out_slots_(0), maximum_number_of_live_registers_(0) { codegen->SetupBlockedRegisters(); - physical_register_intervals_.SetSize(codegen->GetNumberOfCoreRegisters()); + physical_core_register_intervals_.SetSize(codegen->GetNumberOfCoreRegisters()); + physical_fp_register_intervals_.SetSize(codegen->GetNumberOfFloatingPointRegisters()); // Always reserve for the current method and the graph's max out registers. // TODO: compute it instead. reserved_out_slots_ = 1 + codegen->GetGraph()->GetMaximumNumberOfOutVRegs(); @@ -65,8 +68,10 @@ bool RegisterAllocator::CanAllocateRegistersFor(const HGraph& graph, it.Advance()) { HInstruction* current = it.Current(); if (current->GetType() == Primitive::kPrimLong && instruction_set != kX86_64) return false; - if (current->GetType() == Primitive::kPrimFloat) return false; - if (current->GetType() == Primitive::kPrimDouble) return false; + if ((current->GetType() == Primitive::kPrimFloat || current->GetType() == Primitive::kPrimDouble) + && instruction_set != kX86_64) { + return false; + } } } return true; @@ -93,14 +98,22 @@ void RegisterAllocator::AllocateRegisters() { void RegisterAllocator::BlockRegister(Location location, size_t start, - size_t end, - Primitive::Type type) { + size_t end) { int reg = location.reg(); - LiveInterval* interval = physical_register_intervals_.Get(reg); + DCHECK(location.IsRegister() || location.IsFpuRegister()); + LiveInterval* interval = location.IsRegister() + ? physical_core_register_intervals_.Get(reg) + : physical_fp_register_intervals_.Get(reg); + Primitive::Type type = location.IsRegister() + ? Primitive::kPrimInt + : Primitive::kPrimDouble; if (interval == nullptr) { interval = LiveInterval::MakeFixedInterval(allocator_, reg, type); - physical_register_intervals_.Put(reg, interval); - inactive_.Add(interval); + if (location.IsRegister()) { + physical_core_register_intervals_.Put(reg, interval); + } else { + physical_fp_register_intervals_.Put(reg, interval); + } } DCHECK(interval->GetRegister() == reg); interval->AddRange(start, end); @@ -123,8 +136,17 @@ void RegisterAllocator::AllocateRegistersInternal() { registers_array_ = allocator_->AllocArray(number_of_registers_); processing_core_registers_ = true; unhandled_ = &unhandled_core_intervals_; + for (size_t i = 0, e = physical_core_register_intervals_.Size(); i < e; ++i) { + LiveInterval* fixed = physical_core_register_intervals_.Get(i); + if (fixed != nullptr) { + inactive_.Add(fixed); + } + } LinearScan(); + size_t saved_maximum_number_of_live_registers = maximum_number_of_live_registers_; + maximum_number_of_live_registers_ = 0; + inactive_.Reset(); active_.Reset(); handled_.Reset(); @@ -133,9 +155,14 @@ void RegisterAllocator::AllocateRegistersInternal() { registers_array_ = allocator_->AllocArray(number_of_registers_); processing_core_registers_ = false; unhandled_ = &unhandled_fp_intervals_; - // TODO: Enable FP register allocation. - DCHECK(unhandled_->IsEmpty()); + for (size_t i = 0, e = physical_fp_register_intervals_.Size(); i < e; ++i) { + LiveInterval* fixed = physical_fp_register_intervals_.Get(i); + if (fixed != nullptr) { + inactive_.Add(fixed); + } + } LinearScan(); + maximum_number_of_live_registers_ += saved_maximum_number_of_live_registers; } void RegisterAllocator::ProcessInstruction(HInstruction* instruction) { @@ -148,8 +175,9 @@ void RegisterAllocator::ProcessInstruction(HInstruction* instruction) { for (size_t i = 0; i < locations->GetTempCount(); ++i) { Location temp = locations->GetTemp(i); if (temp.IsRegister()) { - BlockRegister(temp, position, position + 1, Primitive::kPrimInt); + BlockRegister(temp, position, position + 1); } else { + DCHECK(temp.IsUnallocated()); LiveInterval* interval = LiveInterval::MakeTempInterval(allocator_, Primitive::kPrimInt); temp_intervals_.Add(interval); interval->AddRange(position, position + 1); @@ -160,10 +188,6 @@ void RegisterAllocator::ProcessInstruction(HInstruction* instruction) { bool core_register = (instruction->GetType() != Primitive::kPrimDouble) && (instruction->GetType() != Primitive::kPrimFloat); - GrowableArray& unhandled = core_register - ? unhandled_core_intervals_ - : unhandled_fp_intervals_; - if (locations->CanCall()) { if (!instruction->IsSuspendCheck()) { codegen_->MarkNotLeaf(); @@ -180,7 +204,8 @@ void RegisterAllocator::ProcessInstruction(HInstruction* instruction) { // maximum before updating locations. LiveInterval* interval = LiveInterval::MakeSlowPathInterval(allocator_, instruction); interval->AddRange(position, position + 1); - unhandled.Add(interval); + unhandled_core_intervals_.Add(interval); + unhandled_fp_intervals_.Add(interval); } } @@ -189,21 +214,29 @@ void RegisterAllocator::ProcessInstruction(HInstruction* instruction) { for (size_t i = 0; i < codegen_->GetNumberOfCoreRegisters(); ++i) { BlockRegister(Location::RegisterLocation(i), position, - position + 1, - Primitive::kPrimInt); + position + 1); + } + for (size_t i = 0; i < codegen_->GetNumberOfFloatingPointRegisters(); ++i) { + BlockRegister(Location::FpuRegisterLocation(i), + position, + position + 1); } } for (size_t i = 0; i < instruction->InputCount(); ++i) { Location input = locations->InAt(i); - if (input.IsRegister()) { - BlockRegister(input, position, position + 1, instruction->InputAt(i)->GetType()); + if (input.IsRegister() || input.IsFpuRegister()) { + BlockRegister(input, position, position + 1); } } LiveInterval* current = instruction->GetLiveInterval(); if (current == nullptr) return; + GrowableArray& unhandled = core_register + ? unhandled_core_intervals_ + : unhandled_fp_intervals_; + DCHECK(unhandled.IsEmpty() || current->StartsBeforeOrAt(unhandled.Peek())); // Some instructions define their output in fixed register/stack slot. We need // to ensure we know these locations before doing register allocation. For a @@ -213,11 +246,11 @@ void RegisterAllocator::ProcessInstruction(HInstruction* instruction) { // // The backwards walking ensures the ranges are ordered on increasing start positions. Location output = locations->Out(); - if (output.IsRegister()) { + if (output.IsRegister() || output.IsFpuRegister()) { // Shift the interval's start by one to account for the blocked register. current->SetFrom(position + 1); current->SetRegister(output.reg()); - BlockRegister(output, position, position + 1, instruction->GetType()); + BlockRegister(output, position, position + 1); } else if (!locations->OutputOverlapsWithInputs()) { // Shift the interval's start by one to not interfere with the inputs. current->SetFrom(position + 1); @@ -281,10 +314,19 @@ bool RegisterAllocator::ValidateInternal(bool log_fatal_on_failure) const { } } - for (size_t i = 0, e = physical_register_intervals_.Size(); i < e; ++i) { - LiveInterval* fixed = physical_register_intervals_.Get(i); - if (fixed != nullptr && ShouldProcess(processing_core_registers_, fixed)) { - intervals.Add(fixed); + if (processing_core_registers_) { + for (size_t i = 0, e = physical_core_register_intervals_.Size(); i < e; ++i) { + LiveInterval* fixed = physical_core_register_intervals_.Get(i); + if (fixed != nullptr) { + intervals.Add(fixed); + } + } + } else { + for (size_t i = 0, e = physical_fp_register_intervals_.Size(); i < e; ++i) { + LiveInterval* fixed = physical_fp_register_intervals_.Get(i); + if (fixed != nullptr) { + intervals.Add(fixed); + } } } @@ -377,10 +419,10 @@ void RegisterAllocator::DumpInterval(std::ostream& stream, LiveInterval* interva interval->Dump(stream); stream << ": "; if (interval->HasRegister()) { - if (processing_core_registers_) { - codegen_->DumpCoreRegister(stream, interval->GetRegister()); - } else { + if (interval->IsFloatingPoint()) { codegen_->DumpFloatingPointRegister(stream, interval->GetRegister()); + } else { + codegen_->DumpCoreRegister(stream, interval->GetRegister()); } } else { stream << "spilled"; @@ -522,10 +564,9 @@ bool RegisterAllocator::TryAllocateFreeReg(LiveInterval* current) { } bool RegisterAllocator::IsBlocked(int reg) const { - // TODO: This only works for core registers and needs to be adjusted for - // floating point registers. - DCHECK(processing_core_registers_); - return blocked_registers_[reg]; + return processing_core_registers_ + ? blocked_core_registers_[reg] + : blocked_fp_registers_[reg]; } // Find the register that is used the last, and spill the interval @@ -727,7 +768,10 @@ void RegisterAllocator::AllocateSpillSlotFor(LiveInterval* interval) { } static bool IsValidDestination(Location destination) { - return destination.IsRegister() || destination.IsStackSlot() || destination.IsDoubleStackSlot(); + return destination.IsRegister() + || destination.IsFpuRegister() + || destination.IsStackSlot() + || destination.IsDoubleStackSlot(); } void RegisterAllocator::AddInputMoveFor(HInstruction* user, @@ -877,7 +921,9 @@ void RegisterAllocator::ConnectSiblings(LiveInterval* interval) { if (current->HasSpillSlot() && current->HasRegister()) { // We spill eagerly, so move must be at definition. InsertMoveAfter(interval->GetDefinedBy(), - Location::RegisterLocation(interval->GetRegister()), + interval->IsFloatingPoint() + ? Location::FpuRegisterLocation(interval->GetRegister()) + : Location::RegisterLocation(interval->GetRegister()), interval->NeedsTwoSpillSlots() ? Location::DoubleStackSlot(interval->GetParent()->GetSpillSlot()) : Location::StackSlot(interval->GetParent()->GetSpillSlot())); @@ -935,6 +981,10 @@ void RegisterAllocator::ConnectSiblings(LiveInterval* interval) { } break; } + case Location::kFpuRegister: { + locations->AddLiveRegister(source); + break; + } case Location::kStackSlot: // Fall-through case Location::kDoubleStackSlot: // Fall-through case Location::kConstant: { @@ -1098,6 +1148,7 @@ void RegisterAllocator::Resolve() { current = at; } LocationSummary* locations = at->GetLocations(); + DCHECK(temp->GetType() == Primitive::kPrimInt); locations->SetTempAt( temp_index++, Location::RegisterLocation(temp->GetRegister())); } diff --git a/compiler/optimizing/register_allocator.h b/compiler/optimizing/register_allocator.h index 0c3a9b381..b88153969 100644 --- a/compiler/optimizing/register_allocator.h +++ b/compiler/optimizing/register_allocator.h @@ -94,7 +94,7 @@ class RegisterAllocator { bool IsBlocked(int reg) const; // Update the interval for the register in `location` to cover [start, end). - void BlockRegister(Location location, size_t start, size_t end, Primitive::Type type); + void BlockRegister(Location location, size_t start, size_t end); // Allocate a spill slot for the given interval. void AllocateSpillSlotFor(LiveInterval* interval); @@ -156,7 +156,8 @@ class RegisterAllocator { // Fixed intervals for physical registers. Such intervals cover the positions // where an instruction requires a specific register. - GrowableArray physical_register_intervals_; + GrowableArray physical_core_register_intervals_; + GrowableArray physical_fp_register_intervals_; // Intervals for temporaries. Such intervals cover the positions // where an instruction requires a temporary. @@ -179,7 +180,8 @@ class RegisterAllocator { size_t* registers_array_; // Blocked registers, as decided by the code generator. - bool* const blocked_registers_; + bool* const blocked_core_registers_; + bool* const blocked_fp_registers_; // Slots reserved for out arguments. size_t reserved_out_slots_; diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc index be2c03957..a0cc8a94e 100644 --- a/compiler/optimizing/ssa_builder.cc +++ b/compiler/optimizing/ssa_builder.cc @@ -129,8 +129,112 @@ void SsaBuilder::VisitBasicBlock(HBasicBlock* block) { } } +/** + * Constants in the Dex format are not typed. So the builder types them as + * integers, but when doing the SSA form, we might realize the constant + * is used for floating point operations. We create a floating-point equivalent + * constant to make the operations correctly typed. + */ +static HFloatConstant* GetFloatEquivalent(HIntConstant* constant) { + // We place the floating point constant next to this constant. + HFloatConstant* result = constant->GetNext()->AsFloatConstant(); + if (result == nullptr) { + HGraph* graph = constant->GetBlock()->GetGraph(); + ArenaAllocator* allocator = graph->GetArena(); + result = new (allocator) HFloatConstant(bit_cast(constant->GetValue())); + constant->GetBlock()->InsertInstructionBefore(result, constant->GetNext()); + } else { + // If there is already a constant with the expected type, we know it is + // the floating point equivalent of this constant. + DCHECK_EQ((bit_cast(result->GetValue())), constant->GetValue()); + } + return result; +} + +/** + * Wide constants in the Dex format are not typed. So the builder types them as + * longs, but when doing the SSA form, we might realize the constant + * is used for floating point operations. We create a floating-point equivalent + * constant to make the operations correctly typed. + */ +static HDoubleConstant* GetDoubleEquivalent(HLongConstant* constant) { + // We place the floating point constant next to this constant. + HDoubleConstant* result = constant->GetNext()->AsDoubleConstant(); + if (result == nullptr) { + HGraph* graph = constant->GetBlock()->GetGraph(); + ArenaAllocator* allocator = graph->GetArena(); + result = new (allocator) HDoubleConstant(bit_cast(constant->GetValue())); + constant->GetBlock()->InsertInstructionBefore(result, constant->GetNext()); + } else { + // If there is already a constant with the expected type, we know it is + // the floating point equivalent of this constant. + DCHECK_EQ((bit_cast(result->GetValue())), constant->GetValue()); + } + return result; +} + +/** + * Because of Dex format, we might end up having the same phi being + * used for non floating point operations and floating point operations. Because + * we want the graph to be correctly typed (and thereafter avoid moves between + * floating point registers and core registers), we need to create a copy of the + * phi with a floating point type. + */ +static HPhi* GetFloatOrDoubleEquivalentOfPhi(HPhi* phi, Primitive::Type type) { + // We place the floating point phi next to this phi. + HInstruction* next = phi->GetNext(); + if (next == nullptr + || (next->GetType() != Primitive::kPrimDouble && next->GetType() != Primitive::kPrimFloat)) { + ArenaAllocator* allocator = phi->GetBlock()->GetGraph()->GetArena(); + HPhi* new_phi = new (allocator) HPhi(allocator, phi->GetRegNumber(), phi->InputCount(), type); + for (size_t i = 0, e = phi->InputCount(); i < e; ++i) { + // Copy the inputs. Note that the graph may not be correctly typed by doing this copy, + // but the type propagation phase will fix it. + new_phi->SetRawInputAt(i, phi->InputAt(i)); + } + phi->GetBlock()->InsertPhiAfter(new_phi, phi); + return new_phi; + } else { + // If there is already a phi with the expected type, we know it is the floating + // point equivalent of this phi. + DCHECK_EQ(next->AsPhi()->GetRegNumber(), phi->GetRegNumber()); + return next->AsPhi(); + } +} + +HInstruction* SsaBuilder::GetFloatOrDoubleEquivalent(HInstruction* user, + HInstruction* value, + Primitive::Type type) { + if (value->IsArrayGet()) { + // The verifier has checked that values in arrays cannot be used for both + // floating point and non-floating point operations. It is therefore safe to just + // change the type of the operation. + value->AsArrayGet()->SetType(type); + return value; + } else if (value->IsLongConstant()) { + return GetDoubleEquivalent(value->AsLongConstant()); + } else if (value->IsIntConstant()) { + return GetFloatEquivalent(value->AsIntConstant()); + } else if (value->IsPhi()) { + return GetFloatOrDoubleEquivalentOfPhi(value->AsPhi(), type); + } else { + // For other instructions, we assume the verifier has checked that the dex format is correctly + // typed and the value in a dex register will not be used for both floating point and + // non-floating point operations. So the only reason an instruction would want a floating + // point equivalent is for an unused phi that will be removed by the dead phi elimination phase. + DCHECK(user->IsPhi()); + return value; + } +} + void SsaBuilder::VisitLoadLocal(HLoadLocal* load) { - load->ReplaceWith(current_locals_->Get(load->GetLocal()->GetRegNumber())); + HInstruction* value = current_locals_->Get(load->GetLocal()->GetRegNumber()); + if (load->GetType() != value->GetType() + && (load->GetType() == Primitive::kPrimFloat || load->GetType() == Primitive::kPrimDouble)) { + // If the operation requests a specific type, we make sure its input is of that type. + value = GetFloatOrDoubleEquivalent(load, value, load->GetType()); + } + load->ReplaceWith(value); load->GetBlock()->RemoveInstruction(load); } diff --git a/compiler/optimizing/ssa_builder.h b/compiler/optimizing/ssa_builder.h index 9d8c0729a..24f5ac55f 100644 --- a/compiler/optimizing/ssa_builder.h +++ b/compiler/optimizing/ssa_builder.h @@ -52,6 +52,10 @@ class SsaBuilder : public HGraphVisitor { void VisitStoreLocal(HStoreLocal* store); void VisitInstruction(HInstruction* instruction); + static HInstruction* GetFloatOrDoubleEquivalent(HInstruction* user, + HInstruction* instruction, + Primitive::Type type); + private: // Locals for the current block being visited. GrowableArray* current_locals_; diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc index f0edc6422..1e34670d7 100644 --- a/compiler/optimizing/ssa_liveness_analysis.cc +++ b/compiler/optimizing/ssa_liveness_analysis.cc @@ -319,7 +319,7 @@ int LiveInterval::FindFirstRegisterHint(size_t* free_until) const { if (user->IsPhi()) { // If the phi has a register, try to use the same. Location phi_location = user->GetLiveInterval()->ToLocation(); - if (phi_location.IsRegister() && free_until[phi_location.reg()] >= use_position) { + if (SameRegisterKind(phi_location) && free_until[phi_location.reg()] >= use_position) { return phi_location.reg(); } const GrowableArray& predecessors = user->GetBlock()->GetPredecessors(); @@ -345,7 +345,7 @@ int LiveInterval::FindFirstRegisterHint(size_t* free_until) const { // We use the user's lifetime position - 1 (and not `use_position`) because the // register is blocked at the beginning of the user. size_t position = user->GetLifetimePosition() - 1; - if (expected.IsRegister() && free_until[expected.reg()] >= position) { + if (SameRegisterKind(expected) && free_until[expected.reg()] >= position) { return expected.reg(); } } @@ -368,7 +368,7 @@ int LiveInterval::FindHintAtDefinition() const { // If the input dies at the end of the predecessor, we know its register can // be reused. Location input_location = input_interval.ToLocation(); - if (input_location.IsRegister()) { + if (SameRegisterKind(input_location)) { return input_location.reg(); } } @@ -384,7 +384,7 @@ int LiveInterval::FindHintAtDefinition() const { // If the input dies at the start of this instruction, we know its register can // be reused. Location location = input_interval.ToLocation(); - if (location.IsRegister()) { + if (SameRegisterKind(location)) { return location.reg(); } } @@ -393,13 +393,21 @@ int LiveInterval::FindHintAtDefinition() const { return kNoRegister; } +bool LiveInterval::SameRegisterKind(Location other) const { + return IsFloatingPoint() + ? other.IsFpuRegister() + : other.IsRegister(); +} + bool LiveInterval::NeedsTwoSpillSlots() const { return type_ == Primitive::kPrimLong || type_ == Primitive::kPrimDouble; } Location LiveInterval::ToLocation() const { if (HasRegister()) { - return Location::RegisterLocation(GetRegister()); + return IsFloatingPoint() + ? Location::FpuRegisterLocation(GetRegister()) + : Location::RegisterLocation(GetRegister()); } else { HInstruction* defined_by = GetParent()->GetDefinedBy(); if (defined_by->IsConstant()) { diff --git a/compiler/optimizing/ssa_liveness_analysis.h b/compiler/optimizing/ssa_liveness_analysis.h index d3e1c0e81..8ce5ce902 100644 --- a/compiler/optimizing/ssa_liveness_analysis.h +++ b/compiler/optimizing/ssa_liveness_analysis.h @@ -358,6 +358,10 @@ class LiveInterval : public ArenaObject { || (location.GetPolicy() == Location::kSameAsFirstInput && locations->InAt(0).GetPolicy() == Location::kRequiresRegister)) { return position; + } else if ((location.GetPolicy() == Location::kRequiresFpuRegister) + || (location.GetPolicy() == Location::kSameAsFirstInput + && locations->InAt(0).GetPolicy() == Location::kRequiresFpuRegister)) { + return position; } } } @@ -368,7 +372,9 @@ class LiveInterval : public ArenaObject { size_t use_position = use->GetPosition(); if (use_position >= position && !use->GetIsEnvironment()) { Location location = use->GetUser()->GetLocations()->InAt(use->GetInputIndex()); - if (location.IsUnallocated() && location.GetPolicy() == Location::kRequiresRegister) { + if (location.IsUnallocated() + && (location.GetPolicy() == Location::kRequiresRegister + || location.GetPolicy() == Location::kRequiresFpuRegister)) { // Return the lifetime just before the user, so that the interval has a register // when entering the user. return use->GetUser()->GetLifetimePosition() - 1; @@ -502,6 +508,10 @@ class LiveInterval : public ArenaObject { // slots for spilling. bool NeedsTwoSpillSlots() const; + bool IsFloatingPoint() const { + return type_ == Primitive::kPrimFloat || type_ == Primitive::kPrimDouble; + } + // Converts the location of the interval to a `Location` object. Location ToLocation() const; @@ -513,6 +523,9 @@ class LiveInterval : public ArenaObject { bool IsTemp() const { return is_temp_; } + // Returns whether `other` and `this` share the same kind of register. + bool SameRegisterKind(Location other) const; + private: ArenaAllocator* const allocator_; diff --git a/compiler/optimizing/ssa_phi_elimination.cc b/compiler/optimizing/ssa_phi_elimination.cc index e02a182ec..4eda0f375 100644 --- a/compiler/optimizing/ssa_phi_elimination.cc +++ b/compiler/optimizing/ssa_phi_elimination.cc @@ -24,18 +24,13 @@ void SsaDeadPhiElimination::Run() { HBasicBlock* block = it.Current(); for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) { HPhi* phi = it.Current()->AsPhi(); - if (phi->HasEnvironmentUses()) { - // TODO: Do we want to keep that phi alive? - worklist_.Add(phi); - phi->SetLive(); - continue; - } for (HUseIterator it(phi->GetUses()); !it.Done(); it.Advance()) { HUseListNode* current = it.Current(); HInstruction* user = current->GetUser(); if (!user->IsPhi()) { worklist_.Add(phi); phi->SetLive(); + break; } else { phi->SetDead(); } @@ -76,6 +71,14 @@ void SsaDeadPhiElimination::Run() { current->RemoveUser(user, user_node->GetIndex()); } } + if (current->HasEnvironmentUses()) { + for (HUseIterator it(current->GetEnvUses()); !it.Done(); it.Advance()) { + HUseListNode* user_node = it.Current(); + HEnvironment* user = user_node->GetUser(); + user->SetRawEnvAt(user_node->GetIndex(), nullptr); + current->RemoveEnvironmentUser(user, user_node->GetIndex()); + } + } block->RemovePhi(current->AsPhi()); } current = next; diff --git a/compiler/optimizing/ssa_type_propagation.cc b/compiler/optimizing/ssa_type_propagation.cc index a860cb7cf..3828142ed 100644 --- a/compiler/optimizing/ssa_type_propagation.cc +++ b/compiler/optimizing/ssa_type_propagation.cc @@ -14,6 +14,7 @@ * limitations under the License. */ +#include "ssa_builder.h" #include "ssa_type_propagation.h" #include "nodes.h" @@ -38,15 +39,31 @@ static Primitive::Type MergeTypes(Primitive::Type existing, Primitive::Type new_ // Re-compute and update the type of the instruction. Returns // whether or not the type was changed. -static bool UpdateType(HPhi* phi) { +bool SsaTypePropagation::UpdateType(HPhi* phi) { Primitive::Type existing = phi->GetType(); - Primitive::Type new_type = Primitive::kPrimVoid; + Primitive::Type new_type = existing; for (size_t i = 0, e = phi->InputCount(); i < e; ++i) { Primitive::Type input_type = phi->InputAt(i)->GetType(); new_type = MergeTypes(new_type, input_type); } phi->SetType(new_type); + + if (new_type == Primitive::kPrimDouble || new_type == Primitive::kPrimFloat) { + // If the phi is of floating point type, we need to update its inputs to that + // type. For inputs that are phis, we need to recompute their types. + for (size_t i = 0, e = phi->InputCount(); i < e; ++i) { + HInstruction* input = phi->InputAt(i); + if (input->GetType() != new_type) { + HInstruction* equivalent = SsaBuilder::GetFloatOrDoubleEquivalent(phi, input, new_type); + phi->ReplaceInput(equivalent, i); + if (equivalent->IsPhi()) { + AddToWorklist(equivalent->AsPhi()); + } + } + } + } + return existing != new_type; } @@ -63,7 +80,12 @@ void SsaTypePropagation::VisitBasicBlock(HBasicBlock* block) { HPhi* phi = it.Current()->AsPhi(); // Set the initial type for the phi. Use the non back edge input for reaching // a fixed point faster. - phi->SetType(phi->InputAt(0)->GetType()); + Primitive::Type phi_type = phi->GetType(); + // We merge with the existing type, that has been set by the SSA builder. + DCHECK(phi_type == Primitive::kPrimVoid + || phi_type == Primitive::kPrimFloat + || phi_type == Primitive::kPrimDouble); + phi->SetType(MergeTypes(phi->InputAt(0)->GetType(), phi->GetType())); AddToWorklist(phi); } } else { diff --git a/compiler/optimizing/ssa_type_propagation.h b/compiler/optimizing/ssa_type_propagation.h index 5f471a981..f4d3d6344 100644 --- a/compiler/optimizing/ssa_type_propagation.h +++ b/compiler/optimizing/ssa_type_propagation.h @@ -34,6 +34,7 @@ class SsaTypePropagation : public ValueObject { void ProcessWorklist(); void AddToWorklist(HPhi* phi); void AddDependentInstructionsToWorklist(HPhi* phi); + bool UpdateType(HPhi* phi); HGraph* const graph_; GrowableArray worklist_; diff --git a/compiler/utils/assembler_test.h b/compiler/utils/assembler_test.h index 37429131e..5bfa462d7 100644 --- a/compiler/utils/assembler_test.h +++ b/compiler/utils/assembler_test.h @@ -373,7 +373,7 @@ class AssemblerTest : public testing::Test { } } else { // This will output the assembly. - EXPECT_EQ(*data, *res.code) << "Outputs (and disassembly) not identical."; + EXPECT_EQ(*res.code, *data) << "Outputs (and disassembly) not identical."; } } } diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc index db7151c3c..f4c9862f9 100644 --- a/compiler/utils/x86_64/assembler_x86_64.cc +++ b/compiler/utils/x86_64/assembler_x86_64.cc @@ -317,7 +317,7 @@ void X86_64Assembler::movaps(XmmRegister dst, XmmRegister src) { EmitOptionalRex32(dst, src); EmitUint8(0x0F); EmitUint8(0x28); - EmitXmmRegisterOperand(src.LowBits(), dst); + EmitXmmRegisterOperand(dst.LowBits(), src); } @@ -354,7 +354,7 @@ void X86_64Assembler::movss(XmmRegister dst, XmmRegister src) { void X86_64Assembler::movd(XmmRegister dst, CpuRegister src) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0x66); - EmitOptionalRex32(dst, src); + EmitRex64(dst, src); EmitUint8(0x0F); EmitUint8(0x6E); EmitOperand(dst.LowBits(), Operand(src)); @@ -364,7 +364,7 @@ void X86_64Assembler::movd(XmmRegister dst, CpuRegister src) { void X86_64Assembler::movd(CpuRegister dst, XmmRegister src) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0x66); - EmitOptionalRex32(src, dst); + EmitRex64(src, dst); EmitUint8(0x0F); EmitUint8(0x7E); EmitOperand(src.LowBits(), Operand(dst)); @@ -1748,6 +1748,10 @@ void X86_64Assembler::EmitRex64(CpuRegister dst, CpuRegister src) { EmitOptionalRex(false, true, dst.NeedsRex(), false, src.NeedsRex()); } +void X86_64Assembler::EmitRex64(XmmRegister dst, CpuRegister src) { + EmitOptionalRex(false, true, dst.NeedsRex(), false, src.NeedsRex()); +} + void X86_64Assembler::EmitRex64(CpuRegister dst, const Operand& operand) { uint8_t rex = 0x48 | operand.rex(); // REX.W000 if (dst.NeedsRex()) { diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h index 4ffb6b566..7e5859cc4 100644 --- a/compiler/utils/x86_64/assembler_x86_64.h +++ b/compiler/utils/x86_64/assembler_x86_64.h @@ -666,6 +666,7 @@ class X86_64Assembler FINAL : public Assembler { void EmitRex64(CpuRegister reg); void EmitRex64(CpuRegister dst, CpuRegister src); void EmitRex64(CpuRegister dst, const Operand& operand); + void EmitRex64(XmmRegister dst, CpuRegister src); // Emit a REX prefix to normalize byte registers plus necessary register bit encodings. void EmitOptionalByteRegNormalizingRex32(CpuRegister dst, CpuRegister src); @@ -692,7 +693,7 @@ inline void X86_64Assembler::EmitInt64(int64_t value) { inline void X86_64Assembler::EmitRegisterOperand(uint8_t rm, uint8_t reg) { CHECK_GE(rm, 0); CHECK_LT(rm, 8); - buffer_.Emit(0xC0 + (rm << 3) + reg); + buffer_.Emit((0xC0 | (reg & 7)) + (rm << 3)); } inline void X86_64Assembler::EmitXmmRegisterOperand(uint8_t rm, XmmRegister reg) { diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc index 69a5fa0db..37a09328f 100644 --- a/compiler/utils/x86_64/assembler_x86_64_test.cc +++ b/compiler/utils/x86_64/assembler_x86_64_test.cc @@ -134,6 +134,32 @@ TEST_F(AssemblerX86_64Test, XorqImm) { DriverStr(RepeatRI(&x86_64::X86_64Assembler::xorq, 4U, "xorq ${imm}, %{reg}"), "xorqi"); } +TEST_F(AssemblerX86_64Test, Movaps) { + GetAssembler()->movaps(x86_64::XmmRegister(x86_64::XMM0), x86_64::XmmRegister(x86_64::XMM8)); + DriverStr("movaps %xmm8, %xmm0", "movaps"); +} + +TEST_F(AssemblerX86_64Test, Movd) { + GetAssembler()->movd(x86_64::XmmRegister(x86_64::XMM0), x86_64::CpuRegister(x86_64::R11)); + GetAssembler()->movd(x86_64::XmmRegister(x86_64::XMM0), x86_64::CpuRegister(x86_64::RAX)); + GetAssembler()->movd(x86_64::XmmRegister(x86_64::XMM8), x86_64::CpuRegister(x86_64::R11)); + GetAssembler()->movd(x86_64::XmmRegister(x86_64::XMM8), x86_64::CpuRegister(x86_64::RAX)); + GetAssembler()->movd(x86_64::CpuRegister(x86_64::R11), x86_64::XmmRegister(x86_64::XMM0)); + GetAssembler()->movd(x86_64::CpuRegister(x86_64::RAX), x86_64::XmmRegister(x86_64::XMM0)); + GetAssembler()->movd(x86_64::CpuRegister(x86_64::R11), x86_64::XmmRegister(x86_64::XMM8)); + GetAssembler()->movd(x86_64::CpuRegister(x86_64::RAX), x86_64::XmmRegister(x86_64::XMM8)); + const char* expected = + "movd %r11, %xmm0\n" + "movd %rax, %xmm0\n" + "movd %r11, %xmm8\n" + "movd %rax, %xmm8\n" + "movd %xmm0, %r11\n" + "movd %xmm0, %rax\n" + "movd %xmm8, %r11\n" + "movd %xmm8, %rax\n"; + DriverStr(expected, "movd"); +} + TEST_F(AssemblerX86_64Test, Movl) { GetAssembler()->movl(x86_64::CpuRegister(x86_64::R8), x86_64::CpuRegister(x86_64::R11)); GetAssembler()->movl(x86_64::CpuRegister(x86_64::RAX), x86_64::CpuRegister(x86_64::R11)); diff --git a/runtime/check_reference_map_visitor.h b/runtime/check_reference_map_visitor.h index 1a78d7265..8c2293f86 100644 --- a/runtime/check_reference_map_visitor.h +++ b/runtime/check_reference_map_visitor.h @@ -84,8 +84,12 @@ class CheckReferenceMapVisitor : public StackVisitor { case DexRegisterMap::kInRegister: CHECK_NE(register_mask & dex_register_map.GetValue(reg), 0u); break; + case DexRegisterMap::kInFpuRegister: + // In Fpu register, should not be a reference. + CHECK(false); + break; case DexRegisterMap::kConstant: - CHECK_EQ(dex_register_map.GetValue(0), 0); + CHECK_EQ(dex_register_map.GetValue(reg), 0); break; } } diff --git a/runtime/stack_map.h b/runtime/stack_map.h index 9b49d31ed..b1c46a977 100644 --- a/runtime/stack_map.h +++ b/runtime/stack_map.h @@ -88,6 +88,7 @@ class DexRegisterMap { kNone, kInStack, kInRegister, + kInFpuRegister, kConstant }; diff --git a/test/410-floats/src/Main.java b/test/410-floats/src/Main.java index d8d6fac30..230045784 100644 --- a/test/410-floats/src/Main.java +++ b/test/410-floats/src/Main.java @@ -17,9 +17,10 @@ public class Main { public static void main(String[] args) { assertEquals(4.2f, returnFloat()); - float[] a = new float[1]; + float[] a = new float[2]; a[0] = 42.2f; - assertEquals(42.2f, returnFloat(a)); + a[1] = 3.2f; + assertEquals(45.4f, returnFloat(a)); assertEquals(4.4, returnDouble()); double[] b = new double[1]; @@ -36,6 +37,9 @@ public class Main { assertEquals(3.1, invokeTakeADouble(3.1)); assertEquals(12.7, invokeTakeThreeDouble(3.1, 4.4, 5.2)); assertEquals(12.7f, invokeTakeThreeFloat(3.1f, 4.4f, 5.2f)); + + testArrayOperations(new float[2], 0, 1.2f, 3.4f); + testArrayOperations(new double[2], 0, 4.1, 7.6); } public static float invokeReturnFloat() { @@ -51,7 +55,7 @@ public class Main { } public static float returnFloat(float[] a) { - return a[0]; + return a[0] + a[1]; } public static double returnDouble() { @@ -94,6 +98,34 @@ public class Main { return takeThreeFloat(a, b, c); } + // Test simple operations on a float array to ensure the register allocator works + // properly. + public static void testArrayOperations(float[] a, int index, float value1, float value2) { + a[0] = value1; + a[1] = value2; + assertEquals(value1 + value2, a[0] + a[1]); + a[0] = 0.0f; + a[1] = 0.0f; + assertEquals(0.0f, a[0] + a[1]); + a[index] = value1; + a[index + 1] = value2; + assertEquals(value1 + value2, a[0] + a[1]); + } + + // Test simple operations on a double array to ensure the register allocator works + // properly. + public static void testArrayOperations(double[] a, int index, double value1, double value2) { + a[0] = value1; + a[1] = value2; + assertEquals(value1 + value2, a[0] + a[1]); + a[0] = 0.0; + a[1] = 0.0; + assertEquals(0.0, a[0] + a[1]); + a[index] = value1; + a[index + 1] = value2; + assertEquals(value1 + value2, a[0] + a[1]); + } + public static void assertEquals(float expected, float actual) { if (expected != actual) { throw new AssertionError("Expected " + expected + " got " + actual); -- 2.11.0