From 24f2dfae084b2382c053f5d688fd6bb26cb8a328 Mon Sep 17 00:00:00 2001 From: Mark Mendell Date: Wed, 14 Jan 2015 19:51:45 -0500 Subject: [PATCH] [optimizing compiler] Implement inline x86 FP '%' Replace the calls to fmod/fmodf by inline code as is done in the Quick compiler. Remove the quick fmod/fmodf runtime entries, as they are no longer in use. 64 bit code generator Move() routine needed to be enhanced to handle constants, as Location::Any() allows them to be generated. Change-Id: I6b6a42f6faeed4b0b3c940453e487daf5b25d184 Signed-off-by: Mark Mendell --- compiler/optimizing/code_generator_x86.cc | 114 ++++++++++++++++------ compiler/optimizing/code_generator_x86.h | 3 + compiler/optimizing/code_generator_x86_64.cc | 125 +++++++++++++++++++++---- compiler/optimizing/code_generator_x86_64.h | 3 + compiler/utils/x86/assembler_x86.cc | 36 +++++++ compiler/utils/x86/assembler_x86.h | 7 ++ compiler/utils/x86_64/assembler_x86_64.cc | 35 +++++++ compiler/utils/x86_64/assembler_x86_64.h | 7 ++ runtime/arch/x86/entrypoints_init_x86.cc | 8 +- runtime/arch/x86/quick_entrypoints_x86.S | 29 ------ runtime/arch/x86_64/entrypoints_init_x86_64.cc | 4 +- runtime/arch/x86_64/quick_entrypoints_x86_64.S | 2 - 12 files changed, 288 insertions(+), 85 deletions(-) diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 5b09fc190..57f01e8e1 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -40,6 +40,8 @@ static constexpr size_t kRuntimeParameterCoreRegistersLength = static constexpr XmmRegister kRuntimeParameterFpuRegisters[] = { }; static constexpr size_t kRuntimeParameterFpuRegistersLength = 0; +static constexpr int kC2ConditionMask = 0x400; + // Marker for places that can be updated once we don't follow the quick ABI. static constexpr bool kFollowsQuickABI = true; @@ -2076,6 +2078,81 @@ void InstructionCodeGeneratorX86::VisitMul(HMul* mul) { } } +void InstructionCodeGeneratorX86::PushOntoFPStack(Location source, uint32_t temp_offset, + uint32_t stack_adjustment, bool is_float) { + if (source.IsStackSlot()) { + DCHECK(is_float); + __ flds(Address(ESP, source.GetStackIndex() + stack_adjustment)); + } else if (source.IsDoubleStackSlot()) { + DCHECK(!is_float); + __ fldl(Address(ESP, source.GetStackIndex() + stack_adjustment)); + } else { + // Write the value to the temporary location on the stack and load to FP stack. + if (is_float) { + Location stack_temp = Location::StackSlot(temp_offset); + codegen_->Move32(stack_temp, source); + __ flds(Address(ESP, temp_offset)); + } else { + Location stack_temp = Location::DoubleStackSlot(temp_offset); + codegen_->Move64(stack_temp, source); + __ fldl(Address(ESP, temp_offset)); + } + } +} + +void InstructionCodeGeneratorX86::GenerateRemFP(HRem *rem) { + Primitive::Type type = rem->GetResultType(); + bool is_float = type == Primitive::kPrimFloat; + size_t elem_size = Primitive::ComponentSize(type); + LocationSummary* locations = rem->GetLocations(); + Location first = locations->InAt(0); + Location second = locations->InAt(1); + Location out = locations->Out(); + + // Create stack space for 2 elements. + // TODO: enhance register allocator to ask for stack temporaries. + __ subl(ESP, Immediate(2 * elem_size)); + + // Load the values to the FP stack in reverse order, using temporaries if needed. + PushOntoFPStack(second, elem_size, 2 * elem_size, is_float); + PushOntoFPStack(first, 0, 2 * elem_size, is_float); + + // Loop doing FPREM until we stabilize. + Label retry; + __ Bind(&retry); + __ fprem(); + + // Move FP status to AX. + __ fstsw(); + + // And see if the argument reduction is complete. This is signaled by the + // C2 FPU flag bit set to 0. + __ andl(EAX, Immediate(kC2ConditionMask)); + __ j(kNotEqual, &retry); + + // We have settled on the final value. Retrieve it into an XMM register. + // Store FP top of stack to real stack. + if (is_float) { + __ fsts(Address(ESP, 0)); + } else { + __ fstl(Address(ESP, 0)); + } + + // Pop the 2 items from the FP stack. + __ fucompp(); + + // Load the value from the stack into an XMM register. + DCHECK(out.IsFpuRegister()) << out; + if (is_float) { + __ movss(out.AsFpuRegister(), Address(ESP, 0)); + } else { + __ movsd(out.AsFpuRegister(), Address(ESP, 0)); + } + + // And remove the temporary stack space we allocated. + __ addl(ESP, Immediate(2 * elem_size)); +} + void InstructionCodeGeneratorX86::GenerateDivRemIntegral(HBinaryOperation* instruction) { DCHECK(instruction->IsDiv() || instruction->IsRem()); @@ -2209,10 +2286,8 @@ void InstructionCodeGeneratorX86::VisitDiv(HDiv* div) { void LocationsBuilderX86::VisitRem(HRem* rem) { Primitive::Type type = rem->GetResultType(); - LocationSummary::CallKind call_kind = type == Primitive::kPrimInt - ? LocationSummary::kNoCall - : LocationSummary::kCall; - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(rem, call_kind); + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(rem, LocationSummary::kNoCall); switch (type) { case Primitive::kPrimInt: { @@ -2231,24 +2306,12 @@ void LocationsBuilderX86::VisitRem(HRem* rem) { locations->SetOut(Location::RegisterPairLocation(EAX, EDX)); break; } + case Primitive::kPrimDouble: case Primitive::kPrimFloat: { - InvokeRuntimeCallingConvention calling_convention; - // x86 floating-point parameters are passed through core registers (EAX, ECX). - locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); - // The runtime helper puts the result in XMM0. - locations->SetOut(Location::FpuRegisterLocation(XMM0)); - break; - } - case Primitive::kPrimDouble: { - InvokeRuntimeCallingConvention calling_convention; - // x86 floating-point parameters are passed through core registers (EAX_ECX, EDX_EBX). - locations->SetInAt(0, Location::RegisterPairLocation( - calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1))); - locations->SetInAt(1, Location::RegisterPairLocation( - calling_convention.GetRegisterAt(2), calling_convention.GetRegisterAt(3))); - // The runtime helper puts the result in XMM0. - locations->SetOut(Location::FpuRegisterLocation(XMM0)); + locations->SetInAt(0, Location::Any()); + locations->SetInAt(1, Location::Any()); + locations->SetOut(Location::RequiresFpuRegister()); + locations->AddTemp(Location::RegisterLocation(EAX)); break; } @@ -2265,14 +2328,9 @@ void InstructionCodeGeneratorX86::VisitRem(HRem* rem) { GenerateDivRemIntegral(rem); break; } - case Primitive::kPrimFloat: { - __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pFmodf))); - codegen_->RecordPcInfo(rem, rem->GetDexPc()); - break; - } + case Primitive::kPrimFloat: case Primitive::kPrimDouble: { - __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pFmod))); - codegen_->RecordPcInfo(rem, rem->GetDexPc()); + GenerateRemFP(rem); break; } default: diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index b77a1aa85..a9086f887 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -137,6 +137,7 @@ class InstructionCodeGeneratorX86 : public HGraphVisitor { void GenerateClassInitializationCheck(SlowPathCodeX86* slow_path, Register class_reg); void HandleBitwiseOperation(HBinaryOperation* instruction); void GenerateDivRemIntegral(HBinaryOperation* instruction); + void GenerateRemFP(HRem *rem); void HandleShift(HBinaryOperation* instruction); void GenerateShlLong(const Location& loc, Register shifter); void GenerateShrLong(const Location& loc, Register shifter); @@ -144,6 +145,8 @@ class InstructionCodeGeneratorX86 : public HGraphVisitor { void GenerateMemoryBarrier(MemBarrierKind kind); void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info); void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); + void PushOntoFPStack(Location source, uint32_t temp_offset, + uint32_t stack_adjustment, bool is_float); void GenerateImplicitNullCheck(HNullCheck* instruction); void GenerateExplicitNullCheck(HNullCheck* instruction); diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index 196e0cf66..dd6861f67 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -47,6 +47,8 @@ static constexpr FloatRegister kRuntimeParameterFpuRegisters[] = { XMM0, XMM1 }; static constexpr size_t kRuntimeParameterFpuRegistersLength = arraysize(kRuntimeParameterFpuRegisters); +static constexpr int kC2ConditionMask = 0x400; + class InvokeRuntimeCallingConvention : public CallingConvention { public: InvokeRuntimeCallingConvention() @@ -583,8 +585,18 @@ void CodeGeneratorX86_64::Move(Location destination, Location source) { } else if (source.IsFpuRegister()) { __ movss(Address(CpuRegister(RSP), destination.GetStackIndex()), source.AsFpuRegister()); + } else if (source.IsConstant()) { + HConstant* constant = source.GetConstant(); + int32_t value; + if (constant->IsFloatConstant()) { + value = bit_cast(constant->AsFloatConstant()->GetValue()); + } else { + DCHECK(constant->IsIntConstant()); + value = constant->AsIntConstant()->GetValue(); + } + __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), Immediate(value)); } else { - DCHECK(source.IsStackSlot()); + DCHECK(source.IsStackSlot()) << source; __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex())); __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP)); } @@ -596,6 +608,17 @@ void CodeGeneratorX86_64::Move(Location destination, Location source) { } else if (source.IsFpuRegister()) { __ movsd(Address(CpuRegister(RSP), destination.GetStackIndex()), source.AsFpuRegister()); + } else if (source.IsConstant()) { + HConstant* constant = source.GetConstant(); + int64_t value = constant->AsLongConstant()->GetValue(); + if (constant->IsDoubleConstant()) { + value = bit_cast(constant->AsDoubleConstant()->GetValue()); + } else { + DCHECK(constant->IsLongConstant()); + value = constant->AsLongConstant()->GetValue(); + } + __ movq(CpuRegister(TMP), Immediate(value)); + __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP)); } else { DCHECK(source.IsDoubleStackSlot()); __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex())); @@ -2000,6 +2023,81 @@ void InstructionCodeGeneratorX86_64::VisitMul(HMul* mul) { } } +void InstructionCodeGeneratorX86_64::PushOntoFPStack(Location source, uint32_t temp_offset, + uint32_t stack_adjustment, bool is_float) { + if (source.IsStackSlot()) { + DCHECK(is_float); + __ flds(Address(CpuRegister(RSP), source.GetStackIndex() + stack_adjustment)); + } else if (source.IsDoubleStackSlot()) { + DCHECK(!is_float); + __ fldl(Address(CpuRegister(RSP), source.GetStackIndex() + stack_adjustment)); + } else { + // Write the value to the temporary location on the stack and load to FP stack. + if (is_float) { + Location stack_temp = Location::StackSlot(temp_offset); + codegen_->Move(stack_temp, source); + __ flds(Address(CpuRegister(RSP), temp_offset)); + } else { + Location stack_temp = Location::DoubleStackSlot(temp_offset); + codegen_->Move(stack_temp, source); + __ fldl(Address(CpuRegister(RSP), temp_offset)); + } + } +} + +void InstructionCodeGeneratorX86_64::GenerateRemFP(HRem *rem) { + Primitive::Type type = rem->GetResultType(); + bool is_float = type == Primitive::kPrimFloat; + size_t elem_size = Primitive::ComponentSize(type); + LocationSummary* locations = rem->GetLocations(); + Location first = locations->InAt(0); + Location second = locations->InAt(1); + Location out = locations->Out(); + + // Create stack space for 2 elements. + // TODO: enhance register allocator to ask for stack temporaries. + __ subq(CpuRegister(RSP), Immediate(2 * elem_size)); + + // Load the values to the FP stack in reverse order, using temporaries if needed. + PushOntoFPStack(second, elem_size, 2 * elem_size, is_float); + PushOntoFPStack(first, 0, 2 * elem_size, is_float); + + // Loop doing FPREM until we stabilize. + Label retry; + __ Bind(&retry); + __ fprem(); + + // Move FP status to AX. + __ fstsw(); + + // And see if the argument reduction is complete. This is signaled by the + // C2 FPU flag bit set to 0. + __ andl(CpuRegister(RAX), Immediate(kC2ConditionMask)); + __ j(kNotEqual, &retry); + + // We have settled on the final value. Retrieve it into an XMM register. + // Store FP top of stack to real stack. + if (is_float) { + __ fsts(Address(CpuRegister(RSP), 0)); + } else { + __ fstl(Address(CpuRegister(RSP), 0)); + } + + // Pop the 2 items from the FP stack. + __ fucompp(); + + // Load the value from the stack into an XMM register. + DCHECK(out.IsFpuRegister()) << out; + if (is_float) { + __ movss(out.AsFpuRegister(), Address(CpuRegister(RSP), 0)); + } else { + __ movsd(out.AsFpuRegister(), Address(CpuRegister(RSP), 0)); + } + + // And remove the temporary stack space we allocated. + __ addq(CpuRegister(RSP), Immediate(2 * elem_size)); +} + void InstructionCodeGeneratorX86_64::GenerateDivRemIntegral(HBinaryOperation* instruction) { DCHECK(instruction->IsDiv() || instruction->IsRem()); Primitive::Type type = instruction->GetResultType(); @@ -2099,11 +2197,8 @@ void InstructionCodeGeneratorX86_64::VisitDiv(HDiv* div) { void LocationsBuilderX86_64::VisitRem(HRem* rem) { Primitive::Type type = rem->GetResultType(); - LocationSummary::CallKind call_kind = - (type == Primitive::kPrimInt) || (type == Primitive::kPrimLong) - ? LocationSummary::kNoCall - : LocationSummary::kCall; - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(rem, call_kind); + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(rem, LocationSummary::kNoCall); switch (type) { case Primitive::kPrimInt: @@ -2117,11 +2212,10 @@ void LocationsBuilderX86_64::VisitRem(HRem* rem) { case Primitive::kPrimFloat: case Primitive::kPrimDouble: { - InvokeRuntimeCallingConvention calling_convention; - locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0))); - locations->SetInAt(1, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(1))); - // The runtime helper puts the result in XMM0. - locations->SetOut(Location::FpuRegisterLocation(XMM0)); + locations->SetInAt(0, Location::Any()); + locations->SetInAt(1, Location::Any()); + locations->SetOut(Location::RequiresFpuRegister()); + locations->AddTemp(Location::RegisterLocation(RAX)); break; } @@ -2138,14 +2232,9 @@ void InstructionCodeGeneratorX86_64::VisitRem(HRem* rem) { GenerateDivRemIntegral(rem); break; } - case Primitive::kPrimFloat: { - __ gs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pFmodf), true)); - codegen_->RecordPcInfo(rem, rem->GetDexPc()); - break; - } + case Primitive::kPrimFloat: case Primitive::kPrimDouble: { - __ gs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pFmod), true)); - codegen_->RecordPcInfo(rem, rem->GetDexPc()); + GenerateRemFP(rem); break; } default: diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index befe994ef..ead771a1f 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -155,6 +155,7 @@ class InstructionCodeGeneratorX86_64 : public HGraphVisitor { void GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor); void GenerateClassInitializationCheck(SlowPathCodeX86_64* slow_path, CpuRegister class_reg); void HandleBitwiseOperation(HBinaryOperation* operation); + void GenerateRemFP(HRem *rem); void GenerateDivRemIntegral(HBinaryOperation* instruction); void HandleShift(HBinaryOperation* operation); void GenerateMemoryBarrier(MemBarrierKind kind); @@ -162,6 +163,8 @@ class InstructionCodeGeneratorX86_64 : public HGraphVisitor { void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); void GenerateImplicitNullCheck(HNullCheck* instruction); void GenerateExplicitNullCheck(HNullCheck* instruction); + void PushOntoFPStack(Location source, uint32_t temp_offset, + uint32_t stack_adjustment, bool is_float); X86_64Assembler* const assembler_; CodeGeneratorX86_64* const codegen_; diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc index 83584a2dc..3f266fecf 100644 --- a/compiler/utils/x86/assembler_x86.cc +++ b/compiler/utils/x86/assembler_x86.cc @@ -409,6 +409,13 @@ void X86Assembler::flds(const Address& src) { } +void X86Assembler::fsts(const Address& dst) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0xD9); + EmitOperand(2, dst); +} + + void X86Assembler::fstps(const Address& dst) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0xD9); @@ -719,6 +726,13 @@ void X86Assembler::fldl(const Address& src) { } +void X86Assembler::fstl(const Address& dst) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0xDD); + EmitOperand(2, dst); +} + + void X86Assembler::fstpl(const Address& dst) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0xDD); @@ -726,6 +740,14 @@ void X86Assembler::fstpl(const Address& dst) { } +void X86Assembler::fstsw() { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x9B); + EmitUint8(0xDF); + EmitUint8(0xE0); +} + + void X86Assembler::fnstcw(const Address& dst) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0xD9); @@ -797,6 +819,20 @@ void X86Assembler::fptan() { } +void X86Assembler::fucompp() { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0xDA); + EmitUint8(0xE9); +} + + +void X86Assembler::fprem() { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0xD9); + EmitUint8(0xF8); +} + + void X86Assembler::xchgl(Register dst, Register src) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0x87); diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h index ad070673e..3a44ace64 100644 --- a/compiler/utils/x86/assembler_x86.h +++ b/compiler/utils/x86/assembler_x86.h @@ -317,9 +317,15 @@ class X86Assembler FINAL : public Assembler { void flds(const Address& src); void fstps(const Address& dst); + void fsts(const Address& dst); void fldl(const Address& src); void fstpl(const Address& dst); + void fstl(const Address& dst); + + void fstsw(); + + void fucompp(); void fnstcw(const Address& dst); void fldcw(const Address& src); @@ -334,6 +340,7 @@ class X86Assembler FINAL : public Assembler { void fsin(); void fcos(); void fptan(); + void fprem(); void xchgl(Register dst, Register src); void xchgl(Register reg, const Address& address); diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc index 906eabf4b..5afa603bb 100644 --- a/compiler/utils/x86_64/assembler_x86_64.cc +++ b/compiler/utils/x86_64/assembler_x86_64.cc @@ -496,6 +496,13 @@ void X86_64Assembler::flds(const Address& src) { } +void X86_64Assembler::fsts(const Address& dst) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0xD9); + EmitOperand(2, dst); +} + + void X86_64Assembler::fstps(const Address& dst) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0xD9); @@ -888,6 +895,13 @@ void X86_64Assembler::fldl(const Address& src) { } +void X86_64Assembler::fstl(const Address& dst) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0xDD); + EmitOperand(2, dst); +} + + void X86_64Assembler::fstpl(const Address& dst) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0xDD); @@ -895,6 +909,14 @@ void X86_64Assembler::fstpl(const Address& dst) { } +void X86_64Assembler::fstsw() { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x9B); + EmitUint8(0xDF); + EmitUint8(0xE0); +} + + void X86_64Assembler::fnstcw(const Address& dst) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0xD9); @@ -965,6 +987,19 @@ void X86_64Assembler::fptan() { EmitUint8(0xF2); } +void X86_64Assembler::fucompp() { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0xDA); + EmitUint8(0xE9); +} + + +void X86_64Assembler::fprem() { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0xD9); + EmitUint8(0xF8); +} + void X86_64Assembler::xchgl(CpuRegister dst, CpuRegister src) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h index 4a509faa0..e24fa1b9e 100644 --- a/compiler/utils/x86_64/assembler_x86_64.h +++ b/compiler/utils/x86_64/assembler_x86_64.h @@ -373,9 +373,15 @@ class X86_64Assembler FINAL : public Assembler { void flds(const Address& src); void fstps(const Address& dst); + void fsts(const Address& dst); void fldl(const Address& src); void fstpl(const Address& dst); + void fstl(const Address& dst); + + void fstsw(); + + void fucompp(); void fnstcw(const Address& dst); void fldcw(const Address& src); @@ -390,6 +396,7 @@ class X86_64Assembler FINAL : public Assembler { void fsin(); void fcos(); void fptan(); + void fprem(); void xchgl(CpuRegister dst, CpuRegister src); void xchgq(CpuRegister dst, CpuRegister src); diff --git a/runtime/arch/x86/entrypoints_init_x86.cc b/runtime/arch/x86/entrypoints_init_x86.cc index 2ac5279a5..7cdd2fc74 100644 --- a/runtime/arch/x86/entrypoints_init_x86.cc +++ b/runtime/arch/x86/entrypoints_init_x86.cc @@ -28,10 +28,6 @@ namespace art { extern "C" uint32_t art_quick_is_assignable(const mirror::Class* klass, const mirror::Class* ref_class); -// fmod entrypointes. -extern "C" double art_quick_fmod(double, double); -extern "C" float art_quick_fmodf(float, float); - void InitEntryPoints(InterpreterEntryPoints* ipoints, JniEntryPoints* jpoints, QuickEntryPoints* qpoints) { // Interpreter @@ -104,9 +100,9 @@ void InitEntryPoints(InterpreterEntryPoints* ipoints, JniEntryPoints* jpoints, // points->pCmpgFloat = NULL; // Not needed on x86. // points->pCmplDouble = NULL; // Not needed on x86. // points->pCmplFloat = NULL; // Not needed on x86. - qpoints->pFmod = art_quick_fmod; + // qpoints->pFmod = NULL; // Not needed on x86. // qpoints->pL2d = NULL; // Not needed on x86. - qpoints->pFmodf = art_quick_fmodf; + // qpoints->pFmodf = NULL; // Not needed on x86. // qpoints->pL2f = NULL; // Not needed on x86. // points->pD2iz = NULL; // Not needed on x86. // points->pF2iz = NULL; // Not needed on x86. diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S index 302b9f85c..4a0d7f8f5 100644 --- a/runtime/arch/x86/quick_entrypoints_x86.S +++ b/runtime/arch/x86/quick_entrypoints_x86.S @@ -801,35 +801,6 @@ END_FUNCTION art_quick_memcpy NO_ARG_DOWNCALL art_quick_test_suspend, artTestSuspendFromCode, ret -DEFINE_FUNCTION art_quick_fmod - subl LITERAL(12), %esp // alignment padding - CFI_ADJUST_CFA_OFFSET(12) - PUSH ebx // pass arg4 b.hi - PUSH edx // pass arg3 b.lo - PUSH ecx // pass arg2 a.hi - PUSH eax // pass arg1 a.lo - SETUP_GOT_NOSAVE ebx // clobbers EBX - call PLT_SYMBOL(fmod) // (jdouble a, jdouble b) - fstpl (%esp) // pop return value off fp stack - movsd (%esp), %xmm0 // place into %xmm0 - addl LITERAL(28), %esp // pop arguments - CFI_ADJUST_CFA_OFFSET(-28) - ret -END_FUNCTION art_quick_fmod - -DEFINE_FUNCTION art_quick_fmodf - PUSH eax // alignment padding - PUSH ecx // pass arg2 b - PUSH eax // pass arg1 a - SETUP_GOT_NOSAVE ebx // clobbers EBX - call PLT_SYMBOL(fmodf) // (jfloat a, jfloat b) - fstps (%esp) // pop return value off fp stack - movss (%esp), %xmm0 // place into %xmm0 - addl LITERAL(12), %esp // pop arguments - CFI_ADJUST_CFA_OFFSET(-12) - ret -END_FUNCTION art_quick_fmodf - DEFINE_FUNCTION art_quick_d2l PUSH eax // alignment padding PUSH ecx // pass arg2 a.hi diff --git a/runtime/arch/x86_64/entrypoints_init_x86_64.cc b/runtime/arch/x86_64/entrypoints_init_x86_64.cc index 3f1e4b594..b25d7a7c8 100644 --- a/runtime/arch/x86_64/entrypoints_init_x86_64.cc +++ b/runtime/arch/x86_64/entrypoints_init_x86_64.cc @@ -105,9 +105,9 @@ void InitEntryPoints(InterpreterEntryPoints* ipoints, JniEntryPoints* jpoints, // points->pCmpgFloat = NULL; // Not needed on x86. // points->pCmplDouble = NULL; // Not needed on x86. // points->pCmplFloat = NULL; // Not needed on x86. - qpoints->pFmod = fmod; + // qpoints->pFmod = NULL; // Not needed on x86. // qpoints->pL2d = NULL; // Not needed on x86. - qpoints->pFmodf = fmodf; + // qpoints->pFmodf = NULL; // Not needed on x86. // qpoints->pL2f = NULL; // Not needed on x86. // points->pD2iz = NULL; // Not needed on x86. // points->pF2iz = NULL; // Not needed on x86. diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S index 5ae65db0f..48f5e850d 100644 --- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S +++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S @@ -1121,8 +1121,6 @@ UNIMPLEMENTED art_quick_lmul UNIMPLEMENTED art_quick_lshl UNIMPLEMENTED art_quick_lshr UNIMPLEMENTED art_quick_lushr -UNIMPLEMENTED art_quick_fmod -UNIMPLEMENTED art_quick_fmodf THREE_ARG_REF_DOWNCALL art_quick_set8_instance, artSet8InstanceFromCode, RETURN_IF_EAX_ZERO THREE_ARG_REF_DOWNCALL art_quick_set16_instance, artSet16InstanceFromCode, RETURN_IF_EAX_ZERO -- 2.11.0