From 222fcf96c9b73bbb739012575e7e413caf9348ec Mon Sep 17 00:00:00 2001 From: Mark Mendell Date: Mon, 30 Mar 2015 14:13:30 -0400 Subject: [PATCH] [optimizing] Improve x86 shifts Support memory operands for integer shifts. Generate better code for long shifts by constants. Change-Id: Icc92fa1b59cc280d4894af6f054e19b01977d5ce Signed-off-by: Mark Mendell --- compiler/optimizing/code_generator_x86.cc | 162 +++++++++++++++++++++++++----- compiler/optimizing/code_generator_x86.h | 3 + compiler/utils/x86/assembler_x86.cc | 70 +++++++++++-- compiler/utils/x86/assembler_x86.h | 12 ++- 4 files changed, 208 insertions(+), 39 deletions(-) diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 845c6c25f..ed47f6524 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -2713,16 +2713,16 @@ void LocationsBuilderX86::HandleShift(HBinaryOperation* op) { switch (op->GetResultType()) { case Primitive::kPrimInt: { - locations->SetInAt(0, Location::RequiresRegister()); - // The shift count needs to be in CL. + locations->SetInAt(0, Location::Any()); + // The shift count needs to be in CL or a constant. locations->SetInAt(1, Location::ByteRegisterOrConstant(ECX, op->InputAt(1))); locations->SetOut(Location::SameAsFirstInput()); break; } case Primitive::kPrimLong: { locations->SetInAt(0, Location::RequiresRegister()); - // The shift count needs to be in CL. - locations->SetInAt(1, Location::RegisterLocation(ECX)); + // The shift count needs to be in CL or a constant. + locations->SetInAt(1, Location::ByteRegisterOrConstant(ECX, op->InputAt(1))); locations->SetOut(Location::SameAsFirstInput()); break; } @@ -2741,38 +2741,87 @@ void InstructionCodeGeneratorX86::HandleShift(HBinaryOperation* op) { switch (op->GetResultType()) { case Primitive::kPrimInt: { - Register first_reg = first.AsRegister(); - if (second.IsRegister()) { - Register second_reg = second.AsRegister(); - DCHECK_EQ(ECX, second_reg); - if (op->IsShl()) { - __ shll(first_reg, second_reg); - } else if (op->IsShr()) { - __ sarl(first_reg, second_reg); + if (first.IsRegister()) { + Register first_reg = first.AsRegister(); + if (second.IsRegister()) { + Register second_reg = second.AsRegister(); + DCHECK_EQ(ECX, second_reg); + if (op->IsShl()) { + __ shll(first_reg, second_reg); + } else if (op->IsShr()) { + __ sarl(first_reg, second_reg); + } else { + __ shrl(first_reg, second_reg); + } } else { - __ shrl(first_reg, second_reg); + int32_t shift = second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftValue; + if (shift == 0) { + return; + } + Immediate imm(shift); + if (op->IsShl()) { + __ shll(first_reg, imm); + } else if (op->IsShr()) { + __ sarl(first_reg, imm); + } else { + __ shrl(first_reg, imm); + } } } else { - Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftValue); - if (op->IsShl()) { - __ shll(first_reg, imm); - } else if (op->IsShr()) { - __ sarl(first_reg, imm); + DCHECK(first.IsStackSlot()) << first; + Address addr(ESP, first.GetStackIndex()); + if (second.IsRegister()) { + Register second_reg = second.AsRegister(); + DCHECK_EQ(ECX, second_reg); + if (op->IsShl()) { + __ shll(addr, second_reg); + } else if (op->IsShr()) { + __ sarl(addr, second_reg); + } else { + __ shrl(addr, second_reg); + } } else { - __ shrl(first_reg, imm); + int32_t shift = second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftValue; + if (shift == 0) { + return; + } + Immediate imm(shift); + if (op->IsShl()) { + __ shll(addr, imm); + } else if (op->IsShr()) { + __ sarl(addr, imm); + } else { + __ shrl(addr, imm); + } } } + break; } case Primitive::kPrimLong: { - Register second_reg = second.AsRegister(); - DCHECK_EQ(ECX, second_reg); - if (op->IsShl()) { - GenerateShlLong(first, second_reg); - } else if (op->IsShr()) { - GenerateShrLong(first, second_reg); + if (second.IsRegister()) { + Register second_reg = second.AsRegister(); + DCHECK_EQ(ECX, second_reg); + if (op->IsShl()) { + GenerateShlLong(first, second_reg); + } else if (op->IsShr()) { + GenerateShrLong(first, second_reg); + } else { + GenerateUShrLong(first, second_reg); + } } else { - GenerateUShrLong(first, second_reg); + // Shift by a constant. + int shift = second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftValue; + // Nothing to do if the shift is 0, as the input is already the output. + if (shift != 0) { + if (op->IsShl()) { + GenerateShlLong(first, shift); + } else if (op->IsShr()) { + GenerateShrLong(first, shift); + } else { + GenerateUShrLong(first, shift); + } + } } break; } @@ -2781,6 +2830,26 @@ void InstructionCodeGeneratorX86::HandleShift(HBinaryOperation* op) { } } +void InstructionCodeGeneratorX86::GenerateShlLong(const Location& loc, int shift) { + Register low = loc.AsRegisterPairLow(); + Register high = loc.AsRegisterPairHigh(); + if (shift == 32) { + // Shift by 32 is easy. High gets low, and low gets 0. + codegen_->EmitParallelMoves( + loc.ToLow(), loc.ToHigh(), + Location::ConstantLocation(GetGraph()->GetIntConstant(0)), loc.ToLow()); + } else if (shift > 32) { + // Low part becomes 0. High part is low part << (shift-32). + __ movl(high, low); + __ shll(high, Immediate(shift - 32)); + __ xorl(low, low); + } else { + // Between 1 and 31. + __ shld(high, low, Immediate(shift)); + __ shll(low, Immediate(shift)); + } +} + void InstructionCodeGeneratorX86::GenerateShlLong(const Location& loc, Register shifter) { Label done; __ shld(loc.AsRegisterPairHigh(), loc.AsRegisterPairLow(), shifter); @@ -2792,6 +2861,27 @@ void InstructionCodeGeneratorX86::GenerateShlLong(const Location& loc, Register __ Bind(&done); } +void InstructionCodeGeneratorX86::GenerateShrLong(const Location& loc, int shift) { + Register low = loc.AsRegisterPairLow(); + Register high = loc.AsRegisterPairHigh(); + if (shift == 32) { + // Need to copy the sign. + DCHECK_NE(low, high); + __ movl(low, high); + __ sarl(high, Immediate(31)); + } else if (shift > 32) { + DCHECK_NE(low, high); + // High part becomes sign. Low part is shifted by shift - 32. + __ movl(low, high); + __ sarl(high, Immediate(31)); + __ shrl(low, Immediate(shift - 32)); + } else { + // Between 1 and 31. + __ shrd(low, high, Immediate(shift)); + __ sarl(high, Immediate(shift)); + } +} + void InstructionCodeGeneratorX86::GenerateShrLong(const Location& loc, Register shifter) { Label done; __ shrd(loc.AsRegisterPairLow(), loc.AsRegisterPairHigh(), shifter); @@ -2803,6 +2893,26 @@ void InstructionCodeGeneratorX86::GenerateShrLong(const Location& loc, Register __ Bind(&done); } +void InstructionCodeGeneratorX86::GenerateUShrLong(const Location& loc, int shift) { + Register low = loc.AsRegisterPairLow(); + Register high = loc.AsRegisterPairHigh(); + if (shift == 32) { + // Shift by 32 is easy. Low gets high, and high gets 0. + codegen_->EmitParallelMoves( + loc.ToHigh(), loc.ToLow(), + Location::ConstantLocation(GetGraph()->GetIntConstant(0)), loc.ToHigh()); + } else if (shift > 32) { + // Low part is high >> (shift - 32). High part becomes 0. + __ movl(low, high); + __ shrl(low, Immediate(shift - 32)); + __ xorl(high, high); + } else { + // Between 1 and 31. + __ shrd(low, high, Immediate(shift)); + __ shrl(high, Immediate(shift)); + } +} + void InstructionCodeGeneratorX86::GenerateUShrLong(const Location& loc, Register shifter) { Label done; __ shrd(loc.AsRegisterPairLow(), loc.AsRegisterPairHigh(), shifter); diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index 20f14fb3f..e6e7fb7b4 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -171,6 +171,9 @@ class InstructionCodeGeneratorX86 : public HGraphVisitor { void GenerateShlLong(const Location& loc, Register shifter); void GenerateShrLong(const Location& loc, Register shifter); void GenerateUShrLong(const Location& loc, Register shifter); + void GenerateShlLong(const Location& loc, int shift); + void GenerateShrLong(const Location& loc, int shift); + void GenerateUShrLong(const Location& loc, int shift); void GenerateMemoryBarrier(MemBarrierKind kind); void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info); void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc index 51cc7acbd..f8bba07f8 100644 --- a/compiler/utils/x86/assembler_x86.cc +++ b/compiler/utils/x86/assembler_x86.cc @@ -1285,32 +1285,62 @@ void X86Assembler::decl(const Address& address) { void X86Assembler::shll(Register reg, const Immediate& imm) { - EmitGenericShift(4, reg, imm); + EmitGenericShift(4, Operand(reg), imm); } void X86Assembler::shll(Register operand, Register shifter) { - EmitGenericShift(4, operand, shifter); + EmitGenericShift(4, Operand(operand), shifter); +} + + +void X86Assembler::shll(const Address& address, const Immediate& imm) { + EmitGenericShift(4, address, imm); +} + + +void X86Assembler::shll(const Address& address, Register shifter) { + EmitGenericShift(4, address, shifter); } void X86Assembler::shrl(Register reg, const Immediate& imm) { - EmitGenericShift(5, reg, imm); + EmitGenericShift(5, Operand(reg), imm); } void X86Assembler::shrl(Register operand, Register shifter) { - EmitGenericShift(5, operand, shifter); + EmitGenericShift(5, Operand(operand), shifter); +} + + +void X86Assembler::shrl(const Address& address, const Immediate& imm) { + EmitGenericShift(5, address, imm); +} + + +void X86Assembler::shrl(const Address& address, Register shifter) { + EmitGenericShift(5, address, shifter); } void X86Assembler::sarl(Register reg, const Immediate& imm) { - EmitGenericShift(7, reg, imm); + EmitGenericShift(7, Operand(reg), imm); } void X86Assembler::sarl(Register operand, Register shifter) { - EmitGenericShift(7, operand, shifter); + EmitGenericShift(7, Operand(operand), shifter); +} + + +void X86Assembler::sarl(const Address& address, const Immediate& imm) { + EmitGenericShift(7, address, imm); +} + + +void X86Assembler::sarl(const Address& address, Register shifter) { + EmitGenericShift(7, address, shifter); } @@ -1323,6 +1353,15 @@ void X86Assembler::shld(Register dst, Register src, Register shifter) { } +void X86Assembler::shld(Register dst, Register src, const Immediate& imm) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x0F); + EmitUint8(0xA4); + EmitRegisterOperand(src, dst); + EmitUint8(imm.value() & 0xFF); +} + + void X86Assembler::shrd(Register dst, Register src, Register shifter) { DCHECK_EQ(ECX, shifter); AssemblerBuffer::EnsureCapacity ensured(&buffer_); @@ -1332,6 +1371,15 @@ void X86Assembler::shrd(Register dst, Register src, Register shifter) { } +void X86Assembler::shrd(Register dst, Register src, const Immediate& imm) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x0F); + EmitUint8(0xAC); + EmitRegisterOperand(src, dst); + EmitUint8(imm.value() & 0xFF); +} + + void X86Assembler::negl(Register reg) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0xF7); @@ -1615,28 +1663,28 @@ void X86Assembler::EmitLabelLink(Label* label) { void X86Assembler::EmitGenericShift(int reg_or_opcode, - Register reg, + const Operand& operand, const Immediate& imm) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); CHECK(imm.is_int8()); if (imm.value() == 1) { EmitUint8(0xD1); - EmitOperand(reg_or_opcode, Operand(reg)); + EmitOperand(reg_or_opcode, operand); } else { EmitUint8(0xC1); - EmitOperand(reg_or_opcode, Operand(reg)); + EmitOperand(reg_or_opcode, operand); EmitUint8(imm.value() & 0xFF); } } void X86Assembler::EmitGenericShift(int reg_or_opcode, - Register operand, + const Operand& operand, Register shifter) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); CHECK_EQ(shifter, ECX); EmitUint8(0xD3); - EmitOperand(reg_or_opcode, Operand(operand)); + EmitOperand(reg_or_opcode, operand); } static dwarf::Reg DWARFReg(Register reg) { diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h index f3675aece..37acb6ef1 100644 --- a/compiler/utils/x86/assembler_x86.h +++ b/compiler/utils/x86/assembler_x86.h @@ -429,12 +429,20 @@ class X86Assembler FINAL : public Assembler { void shll(Register reg, const Immediate& imm); void shll(Register operand, Register shifter); + void shll(const Address& address, const Immediate& imm); + void shll(const Address& address, Register shifter); void shrl(Register reg, const Immediate& imm); void shrl(Register operand, Register shifter); + void shrl(const Address& address, const Immediate& imm); + void shrl(const Address& address, Register shifter); void sarl(Register reg, const Immediate& imm); void sarl(Register operand, Register shifter); + void sarl(const Address& address, const Immediate& imm); + void sarl(const Address& address, Register shifter); void shld(Register dst, Register src, Register shifter); + void shld(Register dst, Register src, const Immediate& imm); void shrd(Register dst, Register src, Register shifter); + void shrd(Register dst, Register src, const Immediate& imm); void negl(Register reg); void notl(Register reg); @@ -619,8 +627,8 @@ class X86Assembler FINAL : public Assembler { void EmitLabelLink(Label* label); void EmitNearLabelLink(Label* label); - void EmitGenericShift(int rm, Register reg, const Immediate& imm); - void EmitGenericShift(int rm, Register operand, Register shifter); + void EmitGenericShift(int rm, const Operand& operand, const Immediate& imm); + void EmitGenericShift(int rm, const Operand& operand, Register shifter); DISALLOW_COPY_AND_ASSIGN(X86Assembler); }; -- 2.11.0