From: buzbee Date: Tue, 27 May 2014 20:59:04 +0000 (-0700) Subject: Art compiler: remove unnecessary sqrt call X-Git-Tag: android-x86-7.1-r1~889^2~3942^2~9^2 X-Git-Url: http://git.osdn.net/view?a=commitdiff_plain;h=055c29fd0f752328981f1b7ccadb1862eecedd40;p=android-x86%2Fart.git Art compiler: remove unnecessary sqrt call For reasons lost in the mists of time, the Dalvik JIT tested the results of an inlined sqrt for NaN on Arm targets, and then called an out-of-line routine to recompute if true. The Quick compiler inherited this behavior. It is not necessary, and the CL purges it (along with the out-of-line sqrt entrypoint). Change-Id: I8c8fa6feacf9b7c3b9e190dfc6f728932fd948c6 --- diff --git a/compiler/dex/quick/arm/fp_arm.cc b/compiler/dex/quick/arm/fp_arm.cc index bb02f7475..18d4391f9 100644 --- a/compiler/dex/quick/arm/fp_arm.cc +++ b/compiler/dex/quick/arm/fp_arm.cc @@ -334,22 +334,11 @@ void ArmMir2Lir::GenNegDouble(RegLocation rl_dest, RegLocation rl_src) { bool ArmMir2Lir::GenInlinedSqrt(CallInfo* info) { DCHECK_EQ(cu_->instruction_set, kThumb2); - LIR *branch; RegLocation rl_src = info->args[0]; RegLocation rl_dest = InlineTargetWide(info); // double place for result rl_src = LoadValueWide(rl_src, kFPReg); RegLocation rl_result = EvalLoc(rl_dest, kFPReg, true); NewLIR2(kThumb2Vsqrtd, rl_result.reg.GetReg(), rl_src.reg.GetReg()); - NewLIR2(kThumb2Vcmpd, rl_result.reg.GetReg(), rl_result.reg.GetReg()); - NewLIR0(kThumb2Fmstat); - branch = NewLIR2(kThumbBCond, 0, kArmCondEq); - ClobberCallerSave(); - LockCallTemps(); // Using fixed registers - RegStorage r_tgt = LoadHelper(QUICK_ENTRYPOINT_OFFSET(4, pSqrt)); - NewLIR3(kThumb2Fmrrd, rs_r0.GetReg(), rs_r1.GetReg(), rl_src.reg.GetReg()); - NewLIR1(kThumbBlxR, r_tgt.GetReg()); - NewLIR3(kThumb2Fmdrr, rl_result.reg.GetReg(), rs_r0.GetReg(), rs_r1.GetReg()); - branch->target = NewLIR0(kPseudoTargetLabel); StoreValueWide(rl_dest, rl_result); return true; } diff --git a/compiler/oat_test.cc b/compiler/oat_test.cc index 6812f3c9d..49cf71b7e 100644 --- a/compiler/oat_test.cc +++ b/compiler/oat_test.cc @@ -180,7 +180,7 @@ TEST_F(OatTest, OatHeaderSizeCheck) { EXPECT_EQ(80U, sizeof(OatHeader)); EXPECT_EQ(8U, sizeof(OatMethodOffsets)); EXPECT_EQ(24U, sizeof(OatQuickMethodHeader)); - EXPECT_EQ(80 * GetInstructionSetPointerSize(kRuntimeISA), sizeof(QuickEntryPoints)); + EXPECT_EQ(79 * GetInstructionSetPointerSize(kRuntimeISA), sizeof(QuickEntryPoints)); } TEST_F(OatTest, OatHeaderIsValid) { diff --git a/runtime/arch/arm/entrypoints_init_arm.cc b/runtime/arch/arm/entrypoints_init_arm.cc index 23e343311..340a83e4e 100644 --- a/runtime/arch/arm/entrypoints_init_arm.cc +++ b/runtime/arch/arm/entrypoints_init_arm.cc @@ -196,7 +196,6 @@ void InitEntryPoints(InterpreterEntryPoints* ipoints, JniEntryPoints* jpoints, qpoints->pCmplDouble = CmplDouble; qpoints->pCmplFloat = CmplFloat; qpoints->pFmod = fmod; - qpoints->pSqrt = sqrt; qpoints->pL2d = __aeabi_l2d; qpoints->pFmodf = fmodf; qpoints->pL2f = __aeabi_l2f; diff --git a/runtime/arch/arm64/entrypoints_init_arm64.cc b/runtime/arch/arm64/entrypoints_init_arm64.cc index cb9f53b72..46e819eff 100644 --- a/runtime/arch/arm64/entrypoints_init_arm64.cc +++ b/runtime/arch/arm64/entrypoints_init_arm64.cc @@ -182,7 +182,6 @@ void InitEntryPoints(InterpreterEntryPoints* ipoints, JniEntryPoints* jpoints, qpoints->pCmplDouble = CmplDouble; qpoints->pCmplFloat = CmplFloat; qpoints->pFmod = fmod; - qpoints->pSqrt = sqrt; qpoints->pL2d = NULL; qpoints->pFmodf = fmodf; qpoints->pL2f = NULL; diff --git a/runtime/arch/x86/entrypoints_init_x86.cc b/runtime/arch/x86/entrypoints_init_x86.cc index 8ad29dd8b..c53fa1eab 100644 --- a/runtime/arch/x86/entrypoints_init_x86.cc +++ b/runtime/arch/x86/entrypoints_init_x86.cc @@ -177,7 +177,6 @@ void InitEntryPoints(InterpreterEntryPoints* ipoints, JniEntryPoints* jpoints, // points->pCmplDouble = NULL; // Not needed on x86. // points->pCmplFloat = NULL; // Not needed on x86. qpoints->pFmod = art_quick_fmod; - // qpoints->pSqrt = NULL; // Not needed on x86. // qpoints->pL2d = NULL; // Not needed on x86. qpoints->pFmodf = art_quick_fmodf; // qpoints->pL2f = NULL; // Not needed on x86. diff --git a/runtime/arch/x86_64/entrypoints_init_x86_64.cc b/runtime/arch/x86_64/entrypoints_init_x86_64.cc index 86dcf36c7..aeda07262 100644 --- a/runtime/arch/x86_64/entrypoints_init_x86_64.cc +++ b/runtime/arch/x86_64/entrypoints_init_x86_64.cc @@ -176,7 +176,6 @@ void InitEntryPoints(InterpreterEntryPoints* ipoints, JniEntryPoints* jpoints, // points->pCmplDouble = NULL; // Not needed on x86. // points->pCmplFloat = NULL; // Not needed on x86. qpoints->pFmod = fmod; - // qpoints->pSqrt = NULL; // Not needed on x86. // qpoints->pL2d = NULL; // Not needed on x86. qpoints->pFmodf = fmodf; // qpoints->pL2f = NULL; // Not needed on x86. diff --git a/runtime/entrypoints/quick/quick_entrypoints.h b/runtime/entrypoints/quick/quick_entrypoints.h index ec69e287a..7bd15828c 100644 --- a/runtime/entrypoints/quick/quick_entrypoints.h +++ b/runtime/entrypoints/quick/quick_entrypoints.h @@ -98,7 +98,6 @@ struct PACKED(4) QuickEntryPoints { int32_t (*pCmplDouble)(double, double); int32_t (*pCmplFloat)(float, float); double (*pFmod)(double, double); - double (*pSqrt)(double); double (*pL2d)(int64_t); float (*pFmodf)(float, float); float (*pL2f)(int64_t); diff --git a/runtime/oat.cc b/runtime/oat.cc index 10d335eec..9c14a4fb8 100644 --- a/runtime/oat.cc +++ b/runtime/oat.cc @@ -22,7 +22,7 @@ namespace art { const uint8_t OatHeader::kOatMagic[] = { 'o', 'a', 't', '\n' }; -const uint8_t OatHeader::kOatVersion[] = { '0', '2', '9', '\0' }; +const uint8_t OatHeader::kOatVersion[] = { '0', '3', '0', '\0' }; OatHeader::OatHeader() { memset(this, 0, sizeof(*this)); diff --git a/runtime/thread.cc b/runtime/thread.cc index 41cfc5884..55bec1e9f 100644 --- a/runtime/thread.cc +++ b/runtime/thread.cc @@ -1816,7 +1816,6 @@ void Thread::DumpThreadOffset(std::ostream& os, uint32_t offset) { QUICK_ENTRY_POINT_INFO(pCmplDouble) QUICK_ENTRY_POINT_INFO(pCmplFloat) QUICK_ENTRY_POINT_INFO(pFmod) - QUICK_ENTRY_POINT_INFO(pSqrt) QUICK_ENTRY_POINT_INFO(pL2d) QUICK_ENTRY_POINT_INFO(pFmodf) QUICK_ENTRY_POINT_INFO(pL2f)