From 09b8463493aeb6ea2bce05f67d3457d5fcc8a7d9 Mon Sep 17 00:00:00 2001 From: Mark Mendell Date: Fri, 13 Feb 2015 17:48:38 -0500 Subject: [PATCH] [optimizing compiler] x86 goodness Implement the x86 version of https://android-review.googlesource.com/#/c/129560/, which made some enhancements to x86_64 code. - Use leal to implement 3 operand adds - Use testl rather than cmpl to 0 for registers - Use leaq for x86_64 for adds with constant in int32_t range Note: - The range and register allocator tests seem quite fragile. I had to change ADD_INT_LIT8 to XOR_INT_LIT8 for the register allocator test to get the code to run. It seems like this is a bit hard-coded to expected code generation sequences. I also changes BuildTwoAdds to BuildTwoSubs for the same reason. - For the live range test, I just changed the expected output, as the Locations were different. Change-Id: I402f2e95ddc8be4eb0befb3dae1b29feadfa29ab Signed-off-by: Mark Mendell --- compiler/optimizing/code_generator_x86.cc | 69 ++++++++++++++++++-------- compiler/optimizing/code_generator_x86_64.cc | 35 +++++++++++-- compiler/optimizing/live_ranges_test.cc | 4 +- compiler/optimizing/register_allocator_test.cc | 60 +++++++++++----------- 4 files changed, 112 insertions(+), 56 deletions(-) diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 7b35cfded..116dd158d 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -819,7 +819,7 @@ void InstructionCodeGeneratorX86::VisitIf(HIf* if_instr) { // Materialized condition, compare against 0. Location lhs = if_instr->GetLocations()->InAt(0); if (lhs.IsRegister()) { - __ cmpl(lhs.AsRegister(), Immediate(0)); + __ testl(lhs.AsRegister(), lhs.AsRegister()); } else { __ cmpl(Address(ESP, lhs.GetStackIndex()), Immediate(0)); } @@ -836,9 +836,12 @@ void InstructionCodeGeneratorX86::VisitIf(HIf* if_instr) { if (rhs.IsRegister()) { __ cmpl(lhs.AsRegister(), rhs.AsRegister()); } else if (rhs.IsConstant()) { - HIntConstant* instruction = rhs.GetConstant()->AsIntConstant(); - Immediate imm(instruction->AsIntConstant()->GetValue()); - __ cmpl(lhs.AsRegister(), imm); + int32_t constant = rhs.GetConstant()->AsIntConstant()->GetValue(); + if (constant == 0) { + __ testl(lhs.AsRegister(), lhs.AsRegister()); + } else { + __ cmpl(lhs.AsRegister(), Immediate(constant)); + } } else { __ cmpl(lhs.AsRegister(), Address(ESP, rhs.GetStackIndex())); } @@ -914,16 +917,19 @@ void InstructionCodeGeneratorX86::VisitCondition(HCondition* comp) { Register reg = locations->Out().AsRegister(); // Clear register: setcc only sets the low byte. __ xorl(reg, reg); - if (locations->InAt(1).IsRegister()) { - __ cmpl(locations->InAt(0).AsRegister(), - locations->InAt(1).AsRegister()); - } else if (locations->InAt(1).IsConstant()) { - HConstant* instruction = locations->InAt(1).GetConstant(); - Immediate imm(CodeGenerator::GetInt32ValueOf(instruction)); - __ cmpl(locations->InAt(0).AsRegister(), imm); + Location lhs = locations->InAt(0); + Location rhs = locations->InAt(1); + if (rhs.IsRegister()) { + __ cmpl(lhs.AsRegister(), rhs.AsRegister()); + } else if (rhs.IsConstant()) { + int32_t constant = rhs.GetConstant()->AsIntConstant()->GetValue(); + if (constant == 0) { + __ testl(lhs.AsRegister(), lhs.AsRegister()); + } else { + __ cmpl(lhs.AsRegister(), Immediate(constant)); + } } else { - __ cmpl(locations->InAt(0).AsRegister(), - Address(ESP, locations->InAt(1).GetStackIndex())); + __ cmpl(lhs.AsRegister(), Address(ESP, rhs.GetStackIndex())); } __ setb(X86Condition(comp->GetCondition()), reg); } @@ -1798,7 +1804,13 @@ void LocationsBuilderX86::VisitAdd(HAdd* add) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(add, LocationSummary::kNoCall); switch (add->GetResultType()) { - case Primitive::kPrimInt: + case Primitive::kPrimInt: { + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrConstant(add->InputAt(1))); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + break; + } + case Primitive::kPrimLong: { locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::Any()); @@ -1824,15 +1836,26 @@ void InstructionCodeGeneratorX86::VisitAdd(HAdd* add) { LocationSummary* locations = add->GetLocations(); Location first = locations->InAt(0); Location second = locations->InAt(1); - DCHECK(first.Equals(locations->Out())); + Location out = locations->Out(); + switch (add->GetResultType()) { case Primitive::kPrimInt: { if (second.IsRegister()) { - __ addl(first.AsRegister(), second.AsRegister()); + if (out.AsRegister() == first.AsRegister()) { + __ addl(out.AsRegister(), second.AsRegister()); + } else { + __ leal(out.AsRegister(), Address( + first.AsRegister(), second.AsRegister(), TIMES_1, 0)); + } } else if (second.IsConstant()) { - __ addl(first.AsRegister(), - Immediate(second.GetConstant()->AsIntConstant()->GetValue())); + int32_t value = second.GetConstant()->AsIntConstant()->GetValue(); + if (out.AsRegister() == first.AsRegister()) { + __ addl(out.AsRegister(), Immediate(value)); + } else { + __ leal(out.AsRegister(), Address(first.AsRegister(), value)); + } } else { + DCHECK(first.Equals(locations->Out())); __ addl(first.AsRegister(), Address(ESP, second.GetStackIndex())); } break; @@ -3502,12 +3525,16 @@ void ParallelMoveResolverX86::EmitMove(size_t index) { } else if (source.IsConstant()) { HConstant* constant = source.GetConstant(); if (constant->IsIntConstant() || constant->IsNullConstant()) { - Immediate imm(CodeGenerator::GetInt32ValueOf(constant)); + int32_t value = CodeGenerator::GetInt32ValueOf(constant); if (destination.IsRegister()) { - __ movl(destination.AsRegister(), imm); + if (value == 0) { + __ xorl(destination.AsRegister(), destination.AsRegister()); + } else { + __ movl(destination.AsRegister(), Immediate(value)); + } } else { DCHECK(destination.IsStackSlot()) << destination; - __ movl(Address(ESP, destination.GetStackIndex()), imm); + __ movl(Address(ESP, destination.GetStackIndex()), Immediate(value)); } } else { DCHECK(constant->IsFloatConstant()); diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index 74adb3142..adc022a2c 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -1868,8 +1868,19 @@ void LocationsBuilderX86_64::VisitAdd(HAdd* add) { case Primitive::kPrimLong: { locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); - locations->SetOut(Location::SameAsFirstInput()); + // We can use a leaq or addq if the constant can fit in an immediate. + HInstruction* rhs = add->InputAt(1); + bool is_int32_constant = false; + if (rhs->IsLongConstant()) { + int64_t value = rhs->AsLongConstant()->GetValue(); + if (static_cast(value) == value) { + is_int32_constant = true; + } + } + locations->SetInAt(1, + is_int32_constant ? Location::RegisterOrConstant(rhs) : + Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); break; } @@ -1917,7 +1928,25 @@ void InstructionCodeGeneratorX86_64::VisitAdd(HAdd* add) { } case Primitive::kPrimLong: { - __ addq(first.AsRegister(), second.AsRegister()); + if (second.IsRegister()) { + if (out.AsRegister() == first.AsRegister()) { + __ addq(out.AsRegister(), second.AsRegister()); + } else { + __ leaq(out.AsRegister(), Address( + first.AsRegister(), second.AsRegister(), TIMES_1, 0)); + } + } else { + DCHECK(second.IsConstant()); + int64_t value = second.GetConstant()->AsLongConstant()->GetValue(); + int32_t int32_value = Low32Bits(value); + DCHECK_EQ(int32_value, value); + if (out.AsRegister() == first.AsRegister()) { + __ addq(out.AsRegister(), Immediate(int32_value)); + } else { + __ leaq(out.AsRegister(), Address( + first.AsRegister(), int32_value)); + } + } break; } diff --git a/compiler/optimizing/live_ranges_test.cc b/compiler/optimizing/live_ranges_test.cc index 17914e820..c102c4f02 100644 --- a/compiler/optimizing/live_ranges_test.cc +++ b/compiler/optimizing/live_ranges_test.cc @@ -399,11 +399,11 @@ TEST(LiveRangesTest, CFG4) { LiveInterval* interval = liveness.GetInstructionFromSsaIndex(0)->GetLiveInterval(); LiveRange* range = interval->GetFirstRange(); ASSERT_EQ(2u, range->GetStart()); - ASSERT_EQ(16u, range->GetEnd()); + ASSERT_EQ(17u, range->GetEnd()); range = range->GetNext(); ASSERT_TRUE(range != nullptr); ASSERT_EQ(20u, range->GetStart()); - ASSERT_EQ(22u, range->GetEnd()); + ASSERT_EQ(23u, range->GetEnd()); ASSERT_TRUE(range->GetNext() == nullptr); // Test for the 4 constant. diff --git a/compiler/optimizing/register_allocator_test.cc b/compiler/optimizing/register_allocator_test.cc index e5d06a9f8..b757a3b9b 100644 --- a/compiler/optimizing/register_allocator_test.cc +++ b/compiler/optimizing/register_allocator_test.cc @@ -322,9 +322,9 @@ TEST(RegisterAllocatorTest, Loop3) { TEST(RegisterAllocatorTest, FirstRegisterUse) { const uint16_t data[] = THREE_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 0 | 0, - Instruction::ADD_INT_LIT8 | 1 << 8, 1 << 8, - Instruction::ADD_INT_LIT8 | 0 << 8, 1 << 8, - Instruction::ADD_INT_LIT8 | 1 << 8, 1 << 8 | 1, + Instruction::XOR_INT_LIT8 | 1 << 8, 1 << 8, + Instruction::XOR_INT_LIT8 | 0 << 8, 1 << 8, + Instruction::XOR_INT_LIT8 | 1 << 8, 1 << 8 | 1, Instruction::RETURN_VOID); ArenaPool pool; @@ -334,27 +334,27 @@ TEST(RegisterAllocatorTest, FirstRegisterUse) { SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); - HAdd* first_add = graph->GetBlocks().Get(1)->GetFirstInstruction()->AsAdd(); - HAdd* last_add = graph->GetBlocks().Get(1)->GetLastInstruction()->GetPrevious()->AsAdd(); - ASSERT_EQ(last_add->InputAt(0), first_add); - LiveInterval* interval = first_add->GetLiveInterval(); - ASSERT_EQ(interval->GetEnd(), last_add->GetLifetimePosition()); + HXor* first_xor = graph->GetBlocks().Get(1)->GetFirstInstruction()->AsXor(); + HXor* last_xor = graph->GetBlocks().Get(1)->GetLastInstruction()->GetPrevious()->AsXor(); + ASSERT_EQ(last_xor->InputAt(0), first_xor); + LiveInterval* interval = first_xor->GetLiveInterval(); + ASSERT_EQ(interval->GetEnd(), last_xor->GetLifetimePosition()); ASSERT_TRUE(interval->GetNextSibling() == nullptr); // We need a register for the output of the instruction. - ASSERT_EQ(interval->FirstRegisterUse(), first_add->GetLifetimePosition()); + ASSERT_EQ(interval->FirstRegisterUse(), first_xor->GetLifetimePosition()); // Split at the next instruction. - interval = interval->SplitAt(first_add->GetLifetimePosition() + 2); + interval = interval->SplitAt(first_xor->GetLifetimePosition() + 2); // The user of the split is the last add. - ASSERT_EQ(interval->FirstRegisterUse(), last_add->GetLifetimePosition()); + ASSERT_EQ(interval->FirstRegisterUse(), last_xor->GetLifetimePosition()); // Split before the last add. - LiveInterval* new_interval = interval->SplitAt(last_add->GetLifetimePosition() - 1); + LiveInterval* new_interval = interval->SplitAt(last_xor->GetLifetimePosition() - 1); // Ensure the current interval has no register use... ASSERT_EQ(interval->FirstRegisterUse(), kNoLifetime); // And the new interval has it for the last add. - ASSERT_EQ(new_interval->FirstRegisterUse(), last_add->GetLifetimePosition()); + ASSERT_EQ(new_interval->FirstRegisterUse(), last_xor->GetLifetimePosition()); } TEST(RegisterAllocatorTest, DeadPhi) { @@ -634,9 +634,9 @@ TEST(RegisterAllocatorTest, ExpectedInRegisterHint) { } } -static HGraph* BuildTwoAdds(ArenaAllocator* allocator, - HInstruction** first_add, - HInstruction** second_add) { +static HGraph* BuildTwoSubs(ArenaAllocator* allocator, + HInstruction** first_sub, + HInstruction** second_sub) { HGraph* graph = new (allocator) HGraph(allocator); HBasicBlock* entry = new (allocator) HBasicBlock(graph); graph->AddBlock(entry); @@ -652,10 +652,10 @@ static HGraph* BuildTwoAdds(ArenaAllocator* allocator, graph->AddBlock(block); entry->AddSuccessor(block); - *first_add = new (allocator) HAdd(Primitive::kPrimInt, parameter, constant1); - block->AddInstruction(*first_add); - *second_add = new (allocator) HAdd(Primitive::kPrimInt, *first_add, constant2); - block->AddInstruction(*second_add); + *first_sub = new (allocator) HSub(Primitive::kPrimInt, parameter, constant1); + block->AddInstruction(*first_sub); + *second_sub = new (allocator) HSub(Primitive::kPrimInt, *first_sub, constant2); + block->AddInstruction(*second_sub); block->AddInstruction(new (allocator) HExit()); return graph; @@ -664,10 +664,10 @@ static HGraph* BuildTwoAdds(ArenaAllocator* allocator, TEST(RegisterAllocatorTest, SameAsFirstInputHint) { ArenaPool pool; ArenaAllocator allocator(&pool); - HInstruction *first_add, *second_add; + HInstruction *first_sub, *second_sub; { - HGraph* graph = BuildTwoAdds(&allocator, &first_add, &second_add); + HGraph* graph = BuildTwoSubs(&allocator, &first_sub, &second_sub); x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); @@ -676,27 +676,27 @@ TEST(RegisterAllocatorTest, SameAsFirstInputHint) { register_allocator.AllocateRegisters(); // Sanity check that in normal conditions, the registers are the same. - ASSERT_EQ(first_add->GetLiveInterval()->GetRegister(), 1); - ASSERT_EQ(second_add->GetLiveInterval()->GetRegister(), 1); + ASSERT_EQ(first_sub->GetLiveInterval()->GetRegister(), 1); + ASSERT_EQ(second_sub->GetLiveInterval()->GetRegister(), 1); } { - HGraph* graph = BuildTwoAdds(&allocator, &first_add, &second_add); + HGraph* graph = BuildTwoSubs(&allocator, &first_sub, &second_sub); x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); // check that both adds get the same register. // Don't use UpdateOutput because output is already allocated. - first_add->InputAt(0)->GetLocations()->output_ = Location::RegisterLocation(2); - ASSERT_EQ(first_add->GetLocations()->Out().GetPolicy(), Location::kSameAsFirstInput); - ASSERT_EQ(second_add->GetLocations()->Out().GetPolicy(), Location::kSameAsFirstInput); + first_sub->InputAt(0)->GetLocations()->output_ = Location::RegisterLocation(2); + ASSERT_EQ(first_sub->GetLocations()->Out().GetPolicy(), Location::kSameAsFirstInput); + ASSERT_EQ(second_sub->GetLocations()->Out().GetPolicy(), Location::kSameAsFirstInput); RegisterAllocator register_allocator(&allocator, &codegen, liveness); register_allocator.AllocateRegisters(); - ASSERT_EQ(first_add->GetLiveInterval()->GetRegister(), 2); - ASSERT_EQ(second_add->GetLiveInterval()->GetRegister(), 2); + ASSERT_EQ(first_sub->GetLiveInterval()->GetRegister(), 2); + ASSERT_EQ(second_sub->GetLiveInterval()->GetRegister(), 2); } } -- 2.11.0