From 63fe93d9f9d2956b1ee2b98cdd6ddd2153f5f9cf Mon Sep 17 00:00:00 2001 From: Serban Constantinescu Date: Mon, 30 Jun 2014 17:10:28 +0100 Subject: [PATCH] AArch64: Enable Inlining. This patch fixes the remaining issues with inlining for ARM64. Change-Id: I2d85b7c4f3fb2b667bf6029fbc271ab954378889 Signed-off-by: Serban Constantinescu Signed-off-by: Matteo Franchin --- compiler/dex/frontend.cc | 3 +- compiler/dex/quick/arm64/codegen_arm64.h | 1 + compiler/dex/quick/arm64/fp_arm64.cc | 10 +++++++ compiler/dex/quick/arm64/int_arm64.cc | 38 ++++++++++++++----------- compiler/dex/quick/arm64/utility_arm64.cc | 34 +++++++++++++++++++---- compiler/dex/quick/gen_invoke.cc | 46 ++++++++++++++----------------- 6 files changed, 84 insertions(+), 48 deletions(-) diff --git a/compiler/dex/frontend.cc b/compiler/dex/frontend.cc index 07bbf15e0..8021fa43f 100644 --- a/compiler/dex/frontend.cc +++ b/compiler/dex/frontend.cc @@ -868,7 +868,8 @@ static CompiledMethod* CompileMethod(CompilerDriver& driver, cu.disable_opt |= (1 << kLoadStoreElimination); } else if (cu.instruction_set == kArm64) { // TODO(Arm64): enable optimizations once backend is mature enough. - cu.disable_opt = ~(uint32_t)0; + cu.disable_opt = ~((1 << kSuppressMethodInlining) | + (1 << kNullCheckElimination)); } cu.StartTimingSplit("BuildMIRGraph"); diff --git a/compiler/dex/quick/arm64/codegen_arm64.h b/compiler/dex/quick/arm64/codegen_arm64.h index b070c8a28..294b9ea7e 100644 --- a/compiler/dex/quick/arm64/codegen_arm64.h +++ b/compiler/dex/quick/arm64/codegen_arm64.h @@ -177,6 +177,7 @@ class Arm64Mir2Lir FINAL : public Mir2Lir { RegLocation rl_src2); void GenConversion(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src); bool GenInlinedReverseBits(CallInfo* info, OpSize size); + bool GenInlinedAbsDouble(CallInfo* info) OVERRIDE; bool GenInlinedCas(CallInfo* info, bool is_long, bool is_object); bool GenInlinedMinMax(CallInfo* info, bool is_min, bool is_long); bool GenInlinedMinMaxFP(CallInfo* info, bool is_min, bool is_double); diff --git a/compiler/dex/quick/arm64/fp_arm64.cc b/compiler/dex/quick/arm64/fp_arm64.cc index 0f9de5b60..6594c4b7a 100644 --- a/compiler/dex/quick/arm64/fp_arm64.cc +++ b/compiler/dex/quick/arm64/fp_arm64.cc @@ -323,6 +323,16 @@ void Arm64Mir2Lir::GenNegDouble(RegLocation rl_dest, RegLocation rl_src) { StoreValueWide(rl_dest, rl_result); } +bool Arm64Mir2Lir::GenInlinedAbsDouble(CallInfo* info) { + RegLocation rl_src = info->args[0]; + rl_src = LoadValueWide(rl_src, kCoreReg); + RegLocation rl_dest = InlineTargetWide(info); + RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); + NewLIR4(WIDE(kA64Ubfm4rrdd), rl_result.reg.GetReg(), rl_src.reg.GetReg(), 0, 62); + StoreValueWide(rl_dest, rl_result); + return true; +} + bool Arm64Mir2Lir::GenInlinedSqrt(CallInfo* info) { RegLocation rl_src = info->args[0]; RegLocation rl_dest = InlineTargetWide(info); // double place for result diff --git a/compiler/dex/quick/arm64/int_arm64.cc b/compiler/dex/quick/arm64/int_arm64.cc index bab549955..86dddae0d 100644 --- a/compiler/dex/quick/arm64/int_arm64.cc +++ b/compiler/dex/quick/arm64/int_arm64.cc @@ -451,9 +451,8 @@ bool Arm64Mir2Lir::GenInlinedMinMax(CallInfo* info, bool is_min, bool is_long) { bool Arm64Mir2Lir::GenInlinedPeek(CallInfo* info, OpSize size) { RegLocation rl_src_address = info->args[0]; // long address - rl_src_address = NarrowRegLoc(rl_src_address); // ignore high half in info->args[1] ? - RegLocation rl_dest = InlineTarget(info); - RegLocation rl_address = LoadValue(rl_src_address, kCoreReg); // kRefReg + RegLocation rl_dest = (size == k64) ? InlineTargetWide(info) : InlineTarget(info); + RegLocation rl_address = LoadValueWide(rl_src_address, kCoreReg); RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); LoadBaseDisp(rl_address.reg, 0, rl_result.reg, size, kNotVolatile); @@ -468,9 +467,8 @@ bool Arm64Mir2Lir::GenInlinedPeek(CallInfo* info, OpSize size) { bool Arm64Mir2Lir::GenInlinedPoke(CallInfo* info, OpSize size) { RegLocation rl_src_address = info->args[0]; // long address - rl_src_address = NarrowRegLoc(rl_src_address); // ignore high half in info->args[1] RegLocation rl_src_value = info->args[2]; // [size] value - RegLocation rl_address = LoadValue(rl_src_address, kCoreReg); // kRefReg + RegLocation rl_address = LoadValueWide(rl_src_address, kCoreReg); RegLocation rl_value; if (size == k64) { @@ -497,11 +495,9 @@ void Arm64Mir2Lir::OpTlsCmp(ThreadOffset<8> offset, int val) { bool Arm64Mir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) { DCHECK_EQ(cu_->instruction_set, kArm64); - ArmOpcode wide = is_long ? WIDE(0) : UNWIDE(0); // Unused - RegLocation rl_src_unsafe = info->args[0]; RegLocation rl_src_obj = info->args[1]; // Object - known non-null RegLocation rl_src_offset = info->args[2]; // long low - rl_src_offset = NarrowRegLoc(rl_src_offset); // ignore high half in info->args[3] //TODO: do we really need this RegLocation rl_src_expected = info->args[4]; // int, long or Object // If is_long, high half is in info->args[5] RegLocation rl_src_new_value = info->args[is_long ? 6 : 5]; // int, long or Object @@ -510,7 +506,7 @@ bool Arm64Mir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) { // Load Object and offset RegLocation rl_object = LoadValue(rl_src_obj, kRefReg); - RegLocation rl_offset = LoadValue(rl_src_offset, kRefReg); + RegLocation rl_offset = LoadValueWide(rl_src_offset, kCoreReg); RegLocation rl_new_value; RegLocation rl_expected; @@ -542,28 +538,38 @@ bool Arm64Mir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) { // result = tmp != 0; RegStorage r_tmp; + RegStorage r_tmp_stored; + RegStorage rl_new_value_stored = rl_new_value.reg; + ArmOpcode wide = UNWIDE(0); if (is_long) { - r_tmp = AllocTempWide(); + r_tmp_stored = r_tmp = AllocTempWide(); + wide = WIDE(0); } else if (is_object) { + // References use 64-bit registers, but are stored as compressed 32-bit values. + // This means r_tmp_stored != r_tmp. r_tmp = AllocTempRef(); + r_tmp_stored = As32BitReg(r_tmp); + rl_new_value_stored = As32BitReg(rl_new_value_stored); } else { - r_tmp = AllocTemp(); + r_tmp_stored = r_tmp = AllocTemp(); } + RegStorage r_tmp32 = (r_tmp.Is32Bit()) ? r_tmp : As32BitReg(r_tmp); LIR* loop = NewLIR0(kPseudoTargetLabel); - NewLIR2(kA64Ldaxr2rX | wide, r_tmp.GetReg(), r_ptr.GetReg()); + NewLIR2(kA64Ldaxr2rX | wide, r_tmp_stored.GetReg(), r_ptr.GetReg()); OpRegReg(kOpCmp, r_tmp, rl_expected.reg); DCHECK(last_lir_insn_->u.m.def_mask->HasBit(ResourceMask::kCCode)); LIR* early_exit = OpCondBranch(kCondNe, NULL); - - NewLIR3(kA64Stlxr3wrX | wide, As32BitReg(r_tmp).GetReg(), rl_new_value.reg.GetReg(), r_ptr.GetReg()); - NewLIR3(kA64Cmp3RdT, As32BitReg(r_tmp).GetReg(), 0, ENCODE_NO_SHIFT); + NewLIR3(kA64Stlxr3wrX | wide, r_tmp32.GetReg(), rl_new_value_stored.GetReg(), r_ptr.GetReg()); + NewLIR3(kA64Cmp3RdT, r_tmp32.GetReg(), 0, ENCODE_NO_SHIFT); DCHECK(last_lir_insn_->u.m.def_mask->HasBit(ResourceMask::kCCode)); OpCondBranch(kCondNe, loop); + LIR* exit_loop = NewLIR0(kPseudoTargetLabel); + early_exit->target = exit_loop; + RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); - LIR* exit = NewLIR4(kA64Csinc4rrrc, rl_result.reg.GetReg(), rwzr, rwzr, kArmCondNe); - early_exit->target = exit; + NewLIR4(kA64Csinc4rrrc, rl_result.reg.GetReg(), rwzr, rwzr, kArmCondNe); FreeTemp(r_tmp); // Now unneeded. FreeTemp(r_ptr); // Now unneeded. diff --git a/compiler/dex/quick/arm64/utility_arm64.cc b/compiler/dex/quick/arm64/utility_arm64.cc index aaee91b81..eb084047f 100644 --- a/compiler/dex/quick/arm64/utility_arm64.cc +++ b/compiler/dex/quick/arm64/utility_arm64.cc @@ -893,7 +893,14 @@ LIR* Arm64Mir2Lir::LoadBaseIndexed(RegStorage r_base, RegStorage r_index, RegSto int expected_scale = 0; ArmOpcode opcode = kA64Brk1d; r_base = Check64BitReg(r_base); - r_index = Check64BitReg(r_index); + + // TODO(Arm64): The sign extension of r_index should be carried out by using an extended + // register offset load (rather than doing the sign extension in a separate instruction). + if (r_index.Is32Bit()) { + // Assemble: ``sxtw xN, wN''. + r_index = As64BitReg(r_index); + NewLIR4(WIDE(kA64Sbfm4rrdd), r_index.GetReg(), r_index.GetReg(), 0, 31); + } if (r_dest.IsFloat()) { if (r_dest.IsDouble()) { @@ -920,9 +927,11 @@ LIR* Arm64Mir2Lir::LoadBaseIndexed(RegStorage r_base, RegStorage r_index, RegSto opcode = WIDE(kA64Ldr4rXxG); expected_scale = 3; break; + case kReference: + // TODO(Arm64): r_dest must be 64-bit below. Remove the hack below. + r_dest = (r_dest.Is64Bit()) ? As32BitReg(r_dest) : r_dest; case kSingle: case k32: - case kReference: r_dest = Check32BitReg(r_dest); opcode = kA64Ldr4rXxG; expected_scale = 2; @@ -973,7 +982,14 @@ LIR* Arm64Mir2Lir::StoreBaseIndexed(RegStorage r_base, RegStorage r_index, RegSt int expected_scale = 0; ArmOpcode opcode = kA64Brk1d; r_base = Check64BitReg(r_base); - r_index = Check64BitReg(r_index); + + // TODO(Arm64): The sign extension of r_index should be carried out by using an extended + // register offset store (rather than doing the sign extension in a separate instruction). + if (r_index.Is32Bit()) { + // Assemble: ``sxtw xN, wN''. + r_index = As64BitReg(r_index); + NewLIR4(WIDE(kA64Sbfm4rrdd), r_index.GetReg(), r_index.GetReg(), 0, 31); + } if (r_src.IsFloat()) { if (r_src.IsDouble()) { @@ -1000,9 +1016,11 @@ LIR* Arm64Mir2Lir::StoreBaseIndexed(RegStorage r_base, RegStorage r_index, RegSt opcode = WIDE(kA64Str4rXxG); expected_scale = 3; break; + case kReference: + // TODO(Arm64): r_src must be 64-bit below. Remove the hack below. + r_src = (r_src.Is64Bit()) ? As32BitReg(r_src) : r_src; case kSingle: // Intentional fall-trough. case k32: // Intentional fall-trough. - case kReference: r_src = Check32BitReg(r_src); opcode = kA64Str4rXxG; expected_scale = 2; @@ -1066,9 +1084,11 @@ LIR* Arm64Mir2Lir::LoadBaseDispBody(RegStorage r_base, int displacement, RegStor alt_opcode = WIDE(kA64Ldur3rXd); } break; + case kReference: + // TODO(Arm64): r_dest must be 64-bit below. Remove the hack below. + r_dest = (r_dest.Is64Bit()) ? As32BitReg(r_dest) : r_dest; case kSingle: // Intentional fall-through. case k32: // Intentional fall-trough. - case kReference: r_dest = Check32BitReg(r_dest); scale = 2; if (r_dest.IsFloat()) { @@ -1165,9 +1185,11 @@ LIR* Arm64Mir2Lir::StoreBaseDispBody(RegStorage r_base, int displacement, RegSto alt_opcode = FWIDE(kA64Stur3rXd); } break; + case kReference: + // TODO(Arm64): r_src must be 64-bit below. Remove the hack below. + r_src = (r_src.Is64Bit()) ? As32BitReg(r_src) : r_src; case kSingle: // Intentional fall-through. case k32: // Intentional fall-trough. - case kReference: r_src = Check32BitReg(r_src); scale = 2; if (r_src.IsFloat()) { diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc index 6c670cdeb..e26270393 100755 --- a/compiler/dex/quick/gen_invoke.cc +++ b/compiler/dex/quick/gen_invoke.cc @@ -1280,7 +1280,7 @@ bool Mir2Lir::GenInlinedCharAt(CallInfo* info) { } Load32Disp(rl_obj.reg, offset_offset, reg_off); MarkPossibleNullPointerException(info->opt_flags); - Load32Disp(rl_obj.reg, value_offset, reg_ptr); + LoadRefDisp(rl_obj.reg, value_offset, reg_ptr, kNotVolatile); if (range_check) { // Set up a slow path to allow retry in case of bounds violation */ OpRegReg(kOpCmp, rl_idx.reg, reg_max); @@ -1367,8 +1367,8 @@ bool Mir2Lir::GenInlinedStringIsEmptyOrLength(CallInfo* info, bool is_empty) { } bool Mir2Lir::GenInlinedReverseBytes(CallInfo* info, OpSize size) { - if (cu_->instruction_set == kMips) { - // TODO - add Mips implementation + if (cu_->instruction_set == kMips || cu_->instruction_set == kArm64) { + // TODO - add Mips implementation; Enable Arm64. return false; } RegLocation rl_src_i = info->args[0]; @@ -1499,14 +1499,8 @@ bool Mir2Lir::GenInlinedAbsDouble(CallInfo* info) { RegLocation rl_dest = InlineTargetWide(info); RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); - if (cu_->instruction_set == kArm64) { - // TODO - Can ecode ? UBXF otherwise - // OpRegRegImm(kOpAnd, rl_result.reg, 0x7fffffffffffffff); - return false; - } else { - OpRegCopyWide(rl_result.reg, rl_src.reg); - OpRegImm(kOpAnd, rl_result.reg.GetHigh(), 0x7fffffff); - } + OpRegCopyWide(rl_result.reg, rl_src.reg); + OpRegImm(kOpAnd, rl_result.reg.GetHigh(), 0x7fffffff); StoreValueWide(rl_dest, rl_result); return true; } @@ -1651,7 +1645,8 @@ bool Mir2Lir::GenInlinedCurrentThread(CallInfo* info) { break; case kArm64: - Load32Disp(TargetPtrReg(kSelf), Thread::PeerOffset<8>().Int32Value(), rl_result.reg); + LoadRefDisp(TargetPtrReg(kSelf), Thread::PeerOffset<8>().Int32Value(), rl_result.reg, + kNotVolatile); break; case kX86: @@ -1685,10 +1680,11 @@ bool Mir2Lir::GenInlinedUnsafeGet(CallInfo* info, RegLocation rl_object = LoadValue(rl_src_obj, kRefReg); RegLocation rl_offset = LoadValue(rl_src_offset, kCoreReg); - RegLocation rl_result = EvalLoc(rl_dest, rl_dest.ref ? kRefReg : kCoreReg, true); + RegLocation rl_result = EvalLoc(rl_dest, LocToRegClass(rl_dest), true); if (is_long) { - if (cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64) { - LoadBaseIndexedDisp(rl_object.reg, rl_offset.reg, 0, 0, rl_result.reg, k64); + if (cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64 + || cu_->instruction_set == kArm64) { + LoadBaseIndexed(rl_object.reg, rl_offset.reg, rl_result.reg, 0, k64); } else { RegStorage rl_temp_offset = AllocTemp(); OpRegRegReg(kOpAdd, rl_temp_offset, rl_object.reg, rl_offset.reg); @@ -1696,7 +1692,8 @@ bool Mir2Lir::GenInlinedUnsafeGet(CallInfo* info, FreeTemp(rl_temp_offset); } } else { - LoadBaseIndexed(rl_object.reg, rl_offset.reg, rl_result.reg, 0, k32); + LoadBaseIndexed(rl_object.reg, rl_offset.reg, rl_result.reg, 0, + (rl_result.ref) ? kReference : k32); } if (is_volatile) { @@ -1734,8 +1731,9 @@ bool Mir2Lir::GenInlinedUnsafePut(CallInfo* info, bool is_long, RegLocation rl_value; if (is_long) { rl_value = LoadValueWide(rl_src_value, kCoreReg); - if (cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64) { - StoreBaseIndexedDisp(rl_object.reg, rl_offset.reg, 0, 0, rl_value.reg, k64); + if (cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64 + || cu_->instruction_set == kArm64) { + StoreBaseIndexed(rl_object.reg, rl_offset.reg, rl_value.reg, 0, k64); } else { RegStorage rl_temp_offset = AllocTemp(); OpRegRegReg(kOpAdd, rl_temp_offset, rl_object.reg, rl_offset.reg); @@ -1744,7 +1742,8 @@ bool Mir2Lir::GenInlinedUnsafePut(CallInfo* info, bool is_long, } } else { rl_value = LoadValue(rl_src_value); - StoreBaseIndexed(rl_object.reg, rl_offset.reg, rl_value.reg, 0, k32); + StoreBaseIndexed(rl_object.reg, rl_offset.reg, rl_value.reg, 0, + (rl_value.ref) ? kReference : k32); } // Free up the temp early, to ensure x86 doesn't run out of temporaries in MarkGCCard. @@ -1772,12 +1771,9 @@ void Mir2Lir::GenInvoke(CallInfo* info) { return; } DCHECK(cu_->compiler_driver->GetMethodInlinerMap() != nullptr); - // Temporary disable intrinsics for Arm64. We will enable them later step by step. - if (cu_->instruction_set != kArm64) { - if (cu_->compiler_driver->GetMethodInlinerMap()->GetMethodInliner(cu_->dex_file) - ->GenIntrinsic(this, info)) { - return; - } + if (cu_->compiler_driver->GetMethodInlinerMap()->GetMethodInliner(cu_->dex_file) + ->GenIntrinsic(this, info)) { + return; } GenInvokeNoInline(info); } -- 2.11.0