From 7c2ad5af0bdd3cc1069038f8e3422d99aeb5f44c Mon Sep 17 00:00:00 2001 From: Vladimir Marko Date: Wed, 24 Sep 2014 12:42:55 +0100 Subject: [PATCH] Implement method calls using relative BL on ARM64. Change-Id: I9e5d0b6c100b6cddd6bbb7ab07cff77ab104ea31 --- compiler/dex/quick/arm64/arm64_lir.h | 2 + compiler/dex/quick/arm64/assemble_arm64.cc | 6 +- compiler/dex/quick/arm64/call_arm64.cc | 115 +++++++++++++++++ compiler/dex/quick/arm64/codegen_arm64.h | 24 ++++ compiler/dex/quick/arm64/target_arm64.cc | 20 ++- compiler/dex/quick/gen_invoke.cc | 6 +- compiler/oat_writer.cc | 198 ++++++++++++++++++++--------- compiler/oat_writer.h | 2 + 8 files changed, 307 insertions(+), 66 deletions(-) diff --git a/compiler/dex/quick/arm64/arm64_lir.h b/compiler/dex/quick/arm64/arm64_lir.h index ab7192143..a87b06aeb 100644 --- a/compiler/dex/quick/arm64/arm64_lir.h +++ b/compiler/dex/quick/arm64/arm64_lir.h @@ -117,6 +117,7 @@ enum A64ResourceEncodingPos { #define IS_SIGNED_IMM14(value) IS_SIGNED_IMM(14, value) #define IS_SIGNED_IMM19(value) IS_SIGNED_IMM(19, value) #define IS_SIGNED_IMM21(value) IS_SIGNED_IMM(21, value) +#define IS_SIGNED_IMM26(value) IS_SIGNED_IMM(26, value) // Quick macro used to define the registers. #define A64_REGISTER_CODE_LIST(R) \ @@ -240,6 +241,7 @@ enum A64Opcode { kA64B2ct, // b.cond [01010100] imm_19[23-5] [0] cond[3-0]. kA64Blr1x, // blr [1101011000111111000000] rn[9-5] [00000]. kA64Br1x, // br [1101011000011111000000] rn[9-5] [00000]. + kA64Bl1t, // bl [100101] imm26[25-0]. kA64Brk1d, // brk [11010100001] imm_16[20-5] [00000]. kA64B1t, // b [00010100] offset_26[25-0]. kA64Cbnz2rt, // cbnz[00110101] imm_19[23-5] rt[4-0]. diff --git a/compiler/dex/quick/arm64/assemble_arm64.cc b/compiler/dex/quick/arm64/assemble_arm64.cc index b1cf27968..7c663a941 100644 --- a/compiler/dex/quick/arm64/assemble_arm64.cc +++ b/compiler/dex/quick/arm64/assemble_arm64.cc @@ -155,6 +155,10 @@ const A64EncodingMap Arm64Mir2Lir::EncodingMap[kA64Last] = { kFmtRegX, 9, 5, kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_UNARY_OP | REG_USE0 | IS_BRANCH, "br", "!0x", kFixupNone), + ENCODING_MAP(kA64Bl1t, NO_VARIANTS(0x94000000), + kFmtBitBlt, 25, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH | REG_DEF_LR | NEEDS_FIXUP, + "bl", "!0T", kFixupLabel), ENCODING_MAP(kA64Brk1d, NO_VARIANTS(0xd4200000), kFmtBitBlt, 20, 5, kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH, @@ -873,7 +877,7 @@ void Arm64Mir2Lir::AssembleLIR() { ((target_lir->flags.generation == lir->flags.generation) ? 0 : offset_adjustment); int32_t delta = target - pc; DCHECK_EQ(delta & 0x3, 0); - if (!IS_SIGNED_IMM19(delta >> 2)) { + if (!IS_SIGNED_IMM26(delta >> 2)) { LOG(FATAL) << "Invalid jump range in kFixupT1Branch"; } lir->operands[0] = delta >> 2; diff --git a/compiler/dex/quick/arm64/call_arm64.cc b/compiler/dex/quick/arm64/call_arm64.cc index 6081f289e..e8de876d6 100644 --- a/compiler/dex/quick/arm64/call_arm64.cc +++ b/compiler/dex/quick/arm64/call_arm64.cc @@ -21,6 +21,8 @@ #include "dex/quick/mir_to_lir-inl.h" #include "gc/accounting/card_table.h" #include "entrypoints/quick/quick_entrypoints.h" +#include "mirror/art_method.h" +#include "mirror/object_array-inl.h" namespace art { @@ -433,4 +435,117 @@ void Arm64Mir2Lir::GenSpecialExitSequence() { NewLIR0(kA64Ret); } +static bool Arm64UseRelativeCall(CompilationUnit* cu, const MethodReference& target_method) { + // Always emit relative calls. + return true; +} + +/* + * Bit of a hack here - in the absence of a real scheduling pass, + * emit the next instruction in static & direct invoke sequences. + */ +static int Arm64NextSDCallInsn(CompilationUnit* cu, CallInfo* info, + int state, const MethodReference& target_method, + uint32_t unused, + uintptr_t direct_code, uintptr_t direct_method, + InvokeType type) { + Mir2Lir* cg = static_cast(cu->cg.get()); + if (direct_code != 0 && direct_method != 0) { + switch (state) { + case 0: // Get the current Method* [sets kArg0] + if (direct_code != static_cast(-1)) { + cg->LoadConstant(cg->TargetPtrReg(kInvokeTgt), direct_code); + } else if (Arm64UseRelativeCall(cu, target_method)) { + // Defer to linker patch. + } else { + cg->LoadCodeAddress(target_method, type, kInvokeTgt); + } + if (direct_method != static_cast(-1)) { + cg->LoadConstant(cg->TargetReg(kArg0, kRef), direct_method); + } else { + cg->LoadMethodAddress(target_method, type, kArg0); + } + break; + default: + return -1; + } + } else { + RegStorage arg0_ref = cg->TargetReg(kArg0, kRef); + switch (state) { + case 0: // Get the current Method* [sets kArg0] + // TUNING: we can save a reg copy if Method* has been promoted. + cg->LoadCurrMethodDirect(arg0_ref); + break; + case 1: // Get method->dex_cache_resolved_methods_ + cg->LoadRefDisp(arg0_ref, + mirror::ArtMethod::DexCacheResolvedMethodsOffset().Int32Value(), + arg0_ref, + kNotVolatile); + // Set up direct code if known. + if (direct_code != 0) { + if (direct_code != static_cast(-1)) { + cg->LoadConstant(cg->TargetPtrReg(kInvokeTgt), direct_code); + } else if (Arm64UseRelativeCall(cu, target_method)) { + // Defer to linker patch. + } else { + CHECK_LT(target_method.dex_method_index, target_method.dex_file->NumMethodIds()); + cg->LoadCodeAddress(target_method, type, kInvokeTgt); + } + } + break; + case 2: // Grab target method* + CHECK_EQ(cu->dex_file, target_method.dex_file); + cg->LoadRefDisp(arg0_ref, + mirror::ObjectArray::OffsetOfElement( + target_method.dex_method_index).Int32Value(), + arg0_ref, + kNotVolatile); + break; + case 3: // Grab the code from the method* + if (direct_code == 0) { + // kInvokeTgt := arg0_ref->entrypoint + cg->LoadWordDisp(arg0_ref, + mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset().Int32Value(), + cg->TargetPtrReg(kInvokeTgt)); + } + break; + default: + return -1; + } + } + return state + 1; +} + +NextCallInsn Arm64Mir2Lir::GetNextSDCallInsn() { + return Arm64NextSDCallInsn; +} + +LIR* Arm64Mir2Lir::CallWithLinkerFixup(const MethodReference& target_method, InvokeType type) { + // For ARM64, just generate a relative BL instruction that will be filled in at 'link time'. + // If the target turns out to be too far, the linker will generate a thunk for dispatch. + int target_method_idx = target_method.dex_method_index; + const DexFile* target_dex_file = target_method.dex_file; + + // Generate the call instruction and save index, dex_file, and type. + // NOTE: Method deduplication takes linker patches into account, so we can just pass 0 + // as a placeholder for the offset. + LIR* call = RawLIR(current_dalvik_offset_, kA64Bl1t, 0, + target_method_idx, WrapPointer(const_cast(target_dex_file)), type); + AppendLIR(call); + call_method_insns_.push_back(call); + return call; +} + +LIR* Arm64Mir2Lir::GenCallInsn(const MirMethodLoweringInfo& method_info) { + LIR* call_insn; + if (method_info.FastPath() && Arm64UseRelativeCall(cu_, method_info.GetTargetMethod()) && + (method_info.GetSharpType() == kDirect || method_info.GetSharpType() == kStatic) && + method_info.DirectCode() == static_cast(-1)) { + call_insn = CallWithLinkerFixup(method_info.GetTargetMethod(), method_info.GetSharpType()); + } else { + call_insn = OpReg(kOpBlx, TargetPtrReg(kInvokeTgt)); + } + return call_insn; +} + } // namespace art diff --git a/compiler/dex/quick/arm64/codegen_arm64.h b/compiler/dex/quick/arm64/codegen_arm64.h index 55cc93842..93d9b34be 100644 --- a/compiler/dex/quick/arm64/codegen_arm64.h +++ b/compiler/dex/quick/arm64/codegen_arm64.h @@ -259,6 +259,28 @@ class Arm64Mir2Lir FINAL : public Mir2Lir { size_t GetInstructionOffset(LIR* lir) OVERRIDE; + NextCallInsn GetNextSDCallInsn() OVERRIDE; + + /* + * @brief Generate a relative call to the method that will be patched at link time. + * @param target_method The MethodReference of the method to be invoked. + * @param type How the method will be invoked. + * @returns Call instruction + */ + LIR* CallWithLinkerFixup(const MethodReference& target_method, InvokeType type); + + /* + * @brief Generate the actual call insn based on the method info. + * @param method_info the lowering info for the method call. + * @returns Call instruction + */ + virtual LIR* GenCallInsn(const MirMethodLoweringInfo& method_info) OVERRIDE; + + /* + * @brief Handle ARM specific literals. + */ + void InstallLiteralPools() OVERRIDE; + LIR* InvokeTrampoline(OpKind op, RegStorage r_tgt, QuickEntrypointEnum trampoline) OVERRIDE; private: @@ -396,6 +418,8 @@ class Arm64Mir2Lir FINAL : public Mir2Lir { InToRegStorageMapping in_to_reg_storage_mapping_; static const A64EncodingMap EncodingMap[kA64Last]; + + ArenaVector call_method_insns_; }; } // namespace art diff --git a/compiler/dex/quick/arm64/target_arm64.cc b/compiler/dex/quick/arm64/target_arm64.cc index 0462530a3..ba47883d9 100644 --- a/compiler/dex/quick/arm64/target_arm64.cc +++ b/compiler/dex/quick/arm64/target_arm64.cc @@ -585,7 +585,8 @@ RegisterClass Arm64Mir2Lir::RegClassForFieldLoadStore(OpSize size, bool is_volat } Arm64Mir2Lir::Arm64Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena) - : Mir2Lir(cu, mir_graph, arena) { + : Mir2Lir(cu, mir_graph, arena), + call_method_insns_(arena->Adapter()) { // Sanity check - make sure encoding map lines up. for (int i = 0; i < kA64Last; i++) { if (UNWIDE(Arm64Mir2Lir::EncodingMap[i].opcode) != i) { @@ -1201,4 +1202,21 @@ int Arm64Mir2Lir::GenDalvikArgsRange(CallInfo* info, int call_state, return call_state; } +void Arm64Mir2Lir::InstallLiteralPools() { + // PC-relative calls to methods. + patches_.reserve(call_method_insns_.size()); + for (LIR* p : call_method_insns_) { + DCHECK_EQ(p->opcode, kA64Bl1t); + uint32_t target_method_idx = p->operands[1]; + const DexFile* target_dex_file = + reinterpret_cast(UnwrapPointer(p->operands[2])); + + patches_.push_back(LinkerPatch::RelativeCodePatch(p->offset, + target_dex_file, target_method_idx)); + } + + // And do the normal processing. + Mir2Lir::InstallLiteralPools(); +} + } // namespace art diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc index c308932bc..174e4e0bb 100755 --- a/compiler/dex/quick/gen_invoke.cc +++ b/compiler/dex/quick/gen_invoke.cc @@ -495,7 +495,8 @@ static int NextSDCallInsn(CompilationUnit* cu, CallInfo* info, uintptr_t direct_code, uintptr_t direct_method, InvokeType type) { DCHECK(cu->instruction_set != kX86 && cu->instruction_set != kX86_64 && - cu->instruction_set != kThumb2 && cu->instruction_set != kArm); + cu->instruction_set != kThumb2 && cu->instruction_set != kArm && + cu->instruction_set != kArm64); Mir2Lir* cg = static_cast(cu->cg.get()); if (direct_code != 0 && direct_method != 0) { switch (state) { @@ -1751,7 +1752,8 @@ NextCallInsn Mir2Lir::GetNextSDCallInsn() { LIR* Mir2Lir::GenCallInsn(const MirMethodLoweringInfo& method_info) { DCHECK(cu_->instruction_set != kX86 && cu_->instruction_set != kX86_64 && - cu_->instruction_set != kThumb2 && cu_->instruction_set != kArm); + cu_->instruction_set != kThumb2 && cu_->instruction_set != kArm && + cu_->instruction_set != kArm64); return OpReg(kOpBlx, TargetPtrReg(kInvokeTgt)); } diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc index dd64368ab..e64d2ab27 100644 --- a/compiler/oat_writer.cc +++ b/compiler/oat_writer.cc @@ -38,6 +38,7 @@ #include "scoped_thread_state_change.h" #include "handle_scope-inl.h" #include "utils/arm/assembler_thumb2.h" +#include "utils/arm64/assembler_arm64.h" #include "verifier/method_verifier.h" namespace art { @@ -117,10 +118,14 @@ class OatWriter::X86RelativeCallPatcher FINAL : public RelativeCallPatcher { DISALLOW_COPY_AND_ASSIGN(X86RelativeCallPatcher); }; -class OatWriter::Thumb2RelativeCallPatcher FINAL : public RelativeCallPatcher { +class OatWriter::ArmBaseRelativeCallPatcher : public RelativeCallPatcher { public: - explicit Thumb2RelativeCallPatcher(OatWriter* writer) - : writer_(writer), thunk_code_(CompileThunkCode()), + ArmBaseRelativeCallPatcher(OatWriter* writer, + InstructionSet instruction_set, std::vector thunk_code, + uint32_t max_positive_displacement, uint32_t max_negative_displacement) + : writer_(writer), instruction_set_(instruction_set), thunk_code_(thunk_code), + max_positive_displacement_(max_positive_displacement), + max_negative_displacement_(max_negative_displacement), thunk_locations_(), current_thunk_to_write_(0u), unprocessed_patches_() { } @@ -130,11 +135,11 @@ class OatWriter::Thumb2RelativeCallPatcher FINAL : public RelativeCallPatcher { // of code. To avoid any alignment discrepancies for the final chunk, we always align the // offset after reserving of writing any chunk. if (UNLIKELY(compiled_method == nullptr)) { - uint32_t aligned_offset = CompiledMethod::AlignCode(offset, kThumb2); + uint32_t aligned_offset = CompiledMethod::AlignCode(offset, instruction_set_); bool needs_thunk = ReserveSpaceProcessPatches(aligned_offset); if (needs_thunk) { thunk_locations_.push_back(aligned_offset); - offset = CompiledMethod::AlignCode(aligned_offset + thunk_code_.size(), kThumb2); + offset = CompiledMethod::AlignCode(aligned_offset + thunk_code_.size(), instruction_set_); } return offset; } @@ -143,14 +148,14 @@ class OatWriter::Thumb2RelativeCallPatcher FINAL : public RelativeCallPatcher { uint32_t quick_code_offset = compiled_method->AlignCode(offset) + sizeof(OatQuickMethodHeader); uint32_t next_aligned_offset = compiled_method->AlignCode(quick_code_offset + quick_code_size); if (!unprocessed_patches_.empty() && - next_aligned_offset - unprocessed_patches_.front().second > kMaxPositiveDisplacement) { + next_aligned_offset - unprocessed_patches_.front().second > max_positive_displacement_) { bool needs_thunk = ReserveSpaceProcessPatches(next_aligned_offset); if (needs_thunk) { // A single thunk will cover all pending patches. unprocessed_patches_.clear(); uint32_t thunk_location = compiled_method->AlignCode(offset); thunk_locations_.push_back(thunk_location); - offset = CompiledMethod::AlignCode(thunk_location + thunk_code_.size(), kThumb2); + offset = CompiledMethod::AlignCode(thunk_location + thunk_code_.size(), instruction_set_); } } for (const LinkerPatch& patch : compiled_method->GetPatches()) { @@ -166,7 +171,7 @@ class OatWriter::Thumb2RelativeCallPatcher FINAL : public RelativeCallPatcher { if (current_thunk_to_write_ == thunk_locations_.size()) { return offset; } - uint32_t aligned_offset = CompiledMethod::AlignCode(offset, kThumb2); + uint32_t aligned_offset = CompiledMethod::AlignCode(offset, instruction_set_); if (UNLIKELY(aligned_offset == thunk_locations_[current_thunk_to_write_])) { ++current_thunk_to_write_; uint32_t aligned_code_delta = aligned_offset - offset; @@ -179,7 +184,7 @@ class OatWriter::Thumb2RelativeCallPatcher FINAL : public RelativeCallPatcher { writer_->size_relative_call_thunks_ += thunk_code_.size(); uint32_t thunk_end_offset = aligned_offset + thunk_code_.size(); // Align after writing chunk, see the ReserveSpace() above. - offset = CompiledMethod::AlignCode(thunk_end_offset, kThumb2); + offset = CompiledMethod::AlignCode(thunk_end_offset, instruction_set_); aligned_code_delta = offset - thunk_end_offset; if (aligned_code_delta != 0u && !writer_->WriteCodeAlignment(out, aligned_code_delta)) { return 0u; @@ -188,52 +193,27 @@ class OatWriter::Thumb2RelativeCallPatcher FINAL : public RelativeCallPatcher { return offset; } - void Patch(std::vector* code, uint32_t literal_offset, uint32_t patch_offset, - uint32_t target_offset) OVERRIDE { - DCHECK_LE(literal_offset + 4u, code->size()); - DCHECK_EQ(literal_offset & 1u, 0u); - DCHECK_EQ(patch_offset & 1u, 0u); - DCHECK_EQ(target_offset & 1u, 1u); // Thumb2 mode bit. + protected: + uint32_t CalculateDisplacement(uint32_t patch_offset, uint32_t target_offset) { // Unsigned arithmetic with its well-defined overflow behavior is just fine here. - uint32_t displacement = target_offset - 1u - patch_offset; + uint32_t displacement = target_offset - patch_offset; // NOTE: With unsigned arithmetic we do mean to use && rather than || below. - if (displacement > kMaxPositiveDisplacement && displacement < -kMaxNegativeDisplacement) { + if (displacement > max_positive_displacement_ && displacement < -max_negative_displacement_) { // Unwritten thunks have higher offsets, check if it's within range. DCHECK(current_thunk_to_write_ == thunk_locations_.size() || thunk_locations_[current_thunk_to_write_] > patch_offset); if (current_thunk_to_write_ != thunk_locations_.size() && - thunk_locations_[current_thunk_to_write_] - patch_offset < kMaxPositiveDisplacement) { + thunk_locations_[current_thunk_to_write_] - patch_offset < max_positive_displacement_) { displacement = thunk_locations_[current_thunk_to_write_] - patch_offset; } else { // We must have a previous thunk then. DCHECK_NE(current_thunk_to_write_, 0u); DCHECK_LT(thunk_locations_[current_thunk_to_write_ - 1], patch_offset); displacement = thunk_locations_[current_thunk_to_write_ - 1] - patch_offset; - DCHECK(displacement >= -kMaxNegativeDisplacement); + DCHECK(displacement >= -max_negative_displacement_); } } - displacement -= kPcDisplacement; // The base PC is at the end of the 4-byte patch. - DCHECK_EQ(displacement & 1u, 0u); - DCHECK((displacement >> 24) == 0u || (displacement >> 24) == 255u); // 25-bit signed. - uint32_t signbit = (displacement >> 31) & 0x1; - uint32_t i1 = (displacement >> 23) & 0x1; - uint32_t i2 = (displacement >> 22) & 0x1; - uint32_t imm10 = (displacement >> 12) & 0x03ff; - uint32_t imm11 = (displacement >> 1) & 0x07ff; - uint32_t j1 = i1 ^ (signbit ^ 1); - uint32_t j2 = i2 ^ (signbit ^ 1); - uint32_t value = (signbit << 26) | (j1 << 13) | (j2 << 11) | (imm10 << 16) | imm11; - value |= 0xf000d000; // BL - - uint8_t* addr = &(*code)[literal_offset]; - // Check that we're just overwriting an existing BL. - DCHECK_EQ(addr[1] & 0xf8, 0xf0); - DCHECK_EQ(addr[3] & 0xd0, 0xd0); - // Write the new BL. - addr[0] = (value >> 16) & 0xff; - addr[1] = (value >> 24) & 0xff; - addr[2] = (value >> 0) & 0xff; - addr[3] = (value >> 8) & 0xff; + return displacement; } private: @@ -246,18 +226,18 @@ class OatWriter::Thumb2RelativeCallPatcher FINAL : public RelativeCallPatcher { // If still unresolved, check if we have a thunk within range. DCHECK(thunk_locations_.empty() || thunk_locations_.back() <= patch_offset); if (thunk_locations_.empty() || - patch_offset - thunk_locations_.back() > kMaxNegativeDisplacement) { - return next_aligned_offset - patch_offset > kMaxPositiveDisplacement; + patch_offset - thunk_locations_.back() > max_negative_displacement_) { + return next_aligned_offset - patch_offset > max_positive_displacement_; } } else if (it->second >= patch_offset) { - DCHECK_LE(it->second - patch_offset, kMaxPositiveDisplacement); + DCHECK_LE(it->second - patch_offset, max_positive_displacement_); } else { // When calling back, check if we have a thunk that's closer than the actual target. uint32_t target_offset = (thunk_locations_.empty() || it->second > thunk_locations_.back()) ? it->second : thunk_locations_.back(); DCHECK_GT(patch_offset, target_offset); - if (patch_offset - target_offset > kMaxNegativeDisplacement) { + if (patch_offset - target_offset > max_negative_displacement_) { return true; } } @@ -266,6 +246,60 @@ class OatWriter::Thumb2RelativeCallPatcher FINAL : public RelativeCallPatcher { return false; } + OatWriter* const writer_; + const InstructionSet instruction_set_; + const std::vector thunk_code_; + const uint32_t max_positive_displacement_; + const uint32_t max_negative_displacement_; + std::vector thunk_locations_; + size_t current_thunk_to_write_; + + // ReserveSpace() tracks unprocessed patches. + typedef std::pair UnprocessedPatch; + std::deque unprocessed_patches_; + + DISALLOW_COPY_AND_ASSIGN(ArmBaseRelativeCallPatcher); +}; + +class OatWriter::Thumb2RelativeCallPatcher FINAL : public ArmBaseRelativeCallPatcher { + public: + explicit Thumb2RelativeCallPatcher(OatWriter* writer) + : ArmBaseRelativeCallPatcher(writer, kThumb2, CompileThunkCode(), + kMaxPositiveDisplacement, kMaxNegativeDisplacement) { + } + + void Patch(std::vector* code, uint32_t literal_offset, uint32_t patch_offset, + uint32_t target_offset) OVERRIDE { + DCHECK_LE(literal_offset + 4u, code->size()); + DCHECK_EQ(literal_offset & 1u, 0u); + DCHECK_EQ(patch_offset & 1u, 0u); + DCHECK_EQ(target_offset & 1u, 1u); // Thumb2 mode bit. + uint32_t displacement = CalculateDisplacement(patch_offset, target_offset & ~1u); + displacement -= kPcDisplacement; // The base PC is at the end of the 4-byte patch. + DCHECK_EQ(displacement & 1u, 0u); + DCHECK((displacement >> 24) == 0u || (displacement >> 24) == 255u); // 25-bit signed. + uint32_t signbit = (displacement >> 31) & 0x1; + uint32_t i1 = (displacement >> 23) & 0x1; + uint32_t i2 = (displacement >> 22) & 0x1; + uint32_t imm10 = (displacement >> 12) & 0x03ff; + uint32_t imm11 = (displacement >> 1) & 0x07ff; + uint32_t j1 = i1 ^ (signbit ^ 1); + uint32_t j2 = i2 ^ (signbit ^ 1); + uint32_t value = (signbit << 26) | (j1 << 13) | (j2 << 11) | (imm10 << 16) | imm11; + value |= 0xf000d000; // BL + + uint8_t* addr = &(*code)[literal_offset]; + // Check that we're just overwriting an existing BL. + DCHECK_EQ(addr[1] & 0xf8, 0xf0); + DCHECK_EQ(addr[3] & 0xd0, 0xd0); + // Write the new BL. + addr[0] = (value >> 16) & 0xff; + addr[1] = (value >> 24) & 0xff; + addr[2] = (value >> 0) & 0xff; + addr[3] = (value >> 8) & 0xff; + } + + private: static std::vector CompileThunkCode() { // The thunk just uses the entry point in the ArtMethod. This works even for calls // to the generic JNI and interpreter trampolines. @@ -289,16 +323,58 @@ class OatWriter::Thumb2RelativeCallPatcher FINAL : public RelativeCallPatcher { static constexpr uint32_t kMaxPositiveDisplacement = (1u << 24) - 2 + kPcDisplacement; static constexpr uint32_t kMaxNegativeDisplacement = (1u << 24) - kPcDisplacement; - OatWriter* const writer_; - const std::vector thunk_code_; - std::vector thunk_locations_; - size_t current_thunk_to_write_; + DISALLOW_COPY_AND_ASSIGN(Thumb2RelativeCallPatcher); +}; - // ReserveSpace() tracks unprocessed patches. - typedef std::pair UnprocessedPatch; - std::deque unprocessed_patches_; +class OatWriter::Arm64RelativeCallPatcher FINAL : public ArmBaseRelativeCallPatcher { + public: + explicit Arm64RelativeCallPatcher(OatWriter* writer) + : ArmBaseRelativeCallPatcher(writer, kArm64, CompileThunkCode(), + kMaxPositiveDisplacement, kMaxNegativeDisplacement) { + } - DISALLOW_COPY_AND_ASSIGN(Thumb2RelativeCallPatcher); + void Patch(std::vector* code, uint32_t literal_offset, uint32_t patch_offset, + uint32_t target_offset) OVERRIDE { + DCHECK_LE(literal_offset + 4u, code->size()); + DCHECK_EQ(literal_offset & 3u, 0u); + DCHECK_EQ(patch_offset & 3u, 0u); + DCHECK_EQ(target_offset & 3u, 0u); + uint32_t displacement = CalculateDisplacement(patch_offset, target_offset & ~1u); + DCHECK_EQ(displacement & 3u, 0u); + DCHECK((displacement >> 27) == 0u || (displacement >> 27) == 31u); // 28-bit signed. + uint32_t value = (displacement & 0x0fffffffu) >> 2; + value |= 0x94000000; // BL + + uint8_t* addr = &(*code)[literal_offset]; + // Check that we're just overwriting an existing BL. + DCHECK_EQ(addr[3] & 0xfc, 0x94); + // Write the new BL. + addr[0] = (value >> 0) & 0xff; + addr[1] = (value >> 8) & 0xff; + addr[2] = (value >> 16) & 0xff; + addr[3] = (value >> 24) & 0xff; + } + + private: + static std::vector CompileThunkCode() { + // The thunk just uses the entry point in the ArtMethod. This works even for calls + // to the generic JNI and interpreter trampolines. + arm64::Arm64Assembler assembler; + Offset offset(mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset().Int32Value()); + assembler.JumpTo(ManagedRegister(arm64::X0), offset, ManagedRegister(arm64::IP0)); + std::vector thunk_code(assembler.CodeSize()); + MemoryRegion code(thunk_code.data(), thunk_code.size()); + assembler.FinalizeInstructions(code); + return thunk_code; + } + + // Maximum positive and negative displacement measured from the patch location. + // (Signed 28 bit displacement with the last bit 0 has range [-2^27, 2^27-4] measured from + // the ARM64 PC pointing to the BL.) + static constexpr uint32_t kMaxPositiveDisplacement = (1u << 27) - 4u; + static constexpr uint32_t kMaxNegativeDisplacement = (1u << 27); + + DISALLOW_COPY_AND_ASSIGN(Arm64RelativeCallPatcher); }; #define DCHECK_OFFSET() \ @@ -373,7 +449,8 @@ OatWriter::OatWriter(const std::vector& dex_files, relative_call_patcher_.reset(new Thumb2RelativeCallPatcher(this)); break; case kArm64: - // TODO: Implement relative calls for arm64. + relative_call_patcher_.reset(new Arm64RelativeCallPatcher(this)); + break; default: relative_call_patcher_.reset(new NoRelativeCallPatcher); break; @@ -868,8 +945,8 @@ class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor { : OatDexMethodVisitor(writer, relative_offset), out_(out), file_offset_(file_offset), - self_(Thread::Current()), - old_no_thread_suspension_cause_(self_->StartAssertNoThreadSuspension("OatWriter patching")), + soa_(Thread::Current()), + no_thread_suspension_(soa_.Self(), "OatWriter patching"), class_linker_(Runtime::Current()->GetClassLinker()), dex_cache_(nullptr) { if (writer_->image_writer_ != nullptr) { @@ -877,12 +954,9 @@ class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor { CHECK(writer_->image_writer_->IsImageAddressSpaceReady()); patched_code_.reserve(16 * KB); } - self_->TransitionFromSuspendedToRunnable(); } ~WriteCodeMethodVisitor() UNLOCK_FUNCTION(Locks::mutator_lock_) { - self_->EndAssertNoThreadSuspension(old_no_thread_suspension_cause_); - self_->TransitionFromRunnableToSuspended(kNative); } bool StartClass(const DexFile* dex_file, size_t class_def_index) @@ -997,9 +1071,9 @@ class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor { private: OutputStream* const out_; - size_t const file_offset_; - Thread* const self_; - const char* const old_no_thread_suspension_cause_; // TODO: Use ScopedAssertNoThreadSuspension. + const size_t file_offset_; + const ScopedObjectAccess soa_; + const ScopedAssertNoThreadSuspension no_thread_suspension_; ClassLinker* const class_linker_; mirror::DexCache* dex_cache_; std::vector patched_code_; diff --git a/compiler/oat_writer.h b/compiler/oat_writer.h index 5545ba817..a1e61b936 100644 --- a/compiler/oat_writer.h +++ b/compiler/oat_writer.h @@ -330,7 +330,9 @@ class OatWriter { class RelativeCallPatcher; class NoRelativeCallPatcher; class X86RelativeCallPatcher; + class ArmBaseRelativeCallPatcher; class Thumb2RelativeCallPatcher; + class Arm64RelativeCallPatcher; std::unique_ptr relative_call_patcher_; -- 2.11.0