From 69a503050fb8a7b3a79b2cd2cdc2d8fbc594575d Mon Sep 17 00:00:00 2001 From: Zheng Xu Date: Tue, 14 Apr 2015 20:04:41 +0800 Subject: [PATCH] ARM64: Remove suspend register. It also clean up build/remove frame used by JNI compiler and generates stp/ldp instead of str/ldr. Also x19 has been unblocked in both quick and optimizing compiler. Change-Id: Idbeac0942265f493266b2ef9b7a65bb4054f0e2d --- compiler/dex/quick/arm64/arm64_lir.h | 7 +- compiler/dex/quick/arm64/int_arm64.cc | 8 +- compiler/dex/quick/arm64/target_arm64.cc | 9 +- compiler/dex/quick/quick_cfi_test_expected.inc | 20 +- compiler/jni/jni_cfi_test_expected.inc | 213 +++++++++------------ .../jni/quick/arm64/calling_convention_arm64.cc | 38 ++-- compiler/optimizing/code_generator_arm64.cc | 57 +----- compiler/optimizing/code_generator_arm64.h | 7 +- compiler/utils/arm64/assembler_arm64.cc | 171 ++++++++++------- compiler/utils/arm64/assembler_arm64.h | 3 + runtime/arch/arm64/asm_support_arm64.S | 6 - runtime/arch/arm64/asm_support_arm64.h | 2 +- runtime/arch/arm64/quick_entrypoints_arm64.S | 166 ++++++++-------- runtime/arch/arm64/quick_method_frame_info_arm64.h | 12 +- runtime/asm_support.h | 4 +- .../quick/quick_trampoline_entrypoints.cc | 2 +- 16 files changed, 346 insertions(+), 379 deletions(-) diff --git a/compiler/dex/quick/arm64/arm64_lir.h b/compiler/dex/quick/arm64/arm64_lir.h index f6fa9389d..5bf77aae5 100644 --- a/compiler/dex/quick/arm64/arm64_lir.h +++ b/compiler/dex/quick/arm64/arm64_lir.h @@ -36,8 +36,7 @@ namespace art { * the linker, by the trampolines and other stubs (the backend uses * these as temporary registers). * r18 : (rxSELF) is reserved (pointer to thread-local storage). - * r19 : (rwSUSPEND) is reserved (suspend check/debugger assist). - * r20-r29: Callee save registers (promotion targets). + * r19-r29: Callee save registers (promotion targets). * r30 : (lr) is reserved (the link register). * rsp : (sp) is reserved (the stack pointer). * rzr : (zr) is reserved (the zero register). @@ -146,7 +145,6 @@ enum A64NativeRegisterPool { // private marker to avoid generate-operator-out.p // Aliases which are not defined in "ARM Architecture Reference, register names". rxIP0 = rx16, rxIP1 = rx17, - rxSUSPEND = rx19, rxSELF = rx18, rxLR = rx30, /* @@ -156,7 +154,6 @@ enum A64NativeRegisterPool { // private marker to avoid generate-operator-out.p */ rwIP0 = rw16, rwIP1 = rw17, - rwSUSPEND = rw19, rwSELF = rw18, rwLR = rw30, }; @@ -176,12 +173,10 @@ constexpr RegStorage rs_wIP0(RegStorage::kValid | rwIP0); constexpr RegStorage rs_xIP1(RegStorage::kValid | rxIP1); constexpr RegStorage rs_wIP1(RegStorage::kValid | rwIP1); // Reserved registers. -constexpr RegStorage rs_xSUSPEND(RegStorage::kValid | rxSUSPEND); constexpr RegStorage rs_xSELF(RegStorage::kValid | rxSELF); constexpr RegStorage rs_sp(RegStorage::kValid | rsp); constexpr RegStorage rs_xLR(RegStorage::kValid | rxLR); // TODO: eliminate the need for these. -constexpr RegStorage rs_wSUSPEND(RegStorage::kValid | rwSUSPEND); constexpr RegStorage rs_wSELF(RegStorage::kValid | rwSELF); constexpr RegStorage rs_wsp(RegStorage::kValid | rwsp); constexpr RegStorage rs_wLR(RegStorage::kValid | rwLR); diff --git a/compiler/dex/quick/arm64/int_arm64.cc b/compiler/dex/quick/arm64/int_arm64.cc index 20f61f226..b7dbd0a97 100644 --- a/compiler/dex/quick/arm64/int_arm64.cc +++ b/compiler/dex/quick/arm64/int_arm64.cc @@ -1010,8 +1010,12 @@ void Arm64Mir2Lir::GenDivZeroCheckWide(RegStorage reg ATTRIBUTE_UNUSED) { // Test suspend flag, return target of taken suspend branch LIR* Arm64Mir2Lir::OpTestSuspend(LIR* target) { - NewLIR3(kA64Subs3rRd, rwSUSPEND, rwSUSPEND, 1); - return OpCondBranch((target == NULL) ? kCondEq : kCondNe, target); + RegStorage r_tmp = AllocTemp(); + LoadBaseDisp(rs_xSELF, Thread::ThreadFlagsOffset().Int32Value(), r_tmp, + kUnsignedHalf, kNotVolatile); + LIR* cmp_branch = OpCmpImmBranch(target == nullptr ? kCondNe: kCondEq, r_tmp, 0, target); + FreeTemp(r_tmp); + return cmp_branch; } // Decrement register and branch on condition diff --git a/compiler/dex/quick/arm64/target_arm64.cc b/compiler/dex/quick/arm64/target_arm64.cc index c5c0dc544..fc32ecd95 100644 --- a/compiler/dex/quick/arm64/target_arm64.cc +++ b/compiler/dex/quick/arm64/target_arm64.cc @@ -53,10 +53,9 @@ static constexpr RegStorage dp_regs_arr[] = rs_d24, rs_d25, rs_d26, rs_d27, rs_d28, rs_d29, rs_d30, rs_d31}; // Note: we are not able to call to C function since rs_xSELF is a special register need to be // preserved but would be scratched by native functions follow aapcs64. -static constexpr RegStorage reserved_regs_arr[] = - {rs_wSUSPEND, rs_wSELF, rs_wsp, rs_wLR, rs_wzr}; -static constexpr RegStorage reserved64_regs_arr[] = - {rs_xSUSPEND, rs_xSELF, rs_sp, rs_xLR, rs_xzr}; +static constexpr RegStorage reserved_regs_arr[] = {rs_wSELF, rs_wsp, rs_wLR, rs_wzr}; +static constexpr RegStorage reserved64_regs_arr[] = {rs_xSELF, rs_sp, rs_xLR, rs_xzr}; + static constexpr RegStorage core_temps_arr[] = {rs_w0, rs_w1, rs_w2, rs_w3, rs_w4, rs_w5, rs_w6, rs_w7, rs_w8, rs_w9, rs_w10, rs_w11, rs_w12, rs_w13, rs_w14, rs_w15, rs_w16, @@ -110,7 +109,7 @@ RegStorage Arm64Mir2Lir::TargetReg(SpecialTargetRegister reg) { RegStorage res_reg = RegStorage::InvalidReg(); switch (reg) { case kSelf: res_reg = rs_wSELF; break; - case kSuspend: res_reg = rs_wSUSPEND; break; + case kSuspend: res_reg = RegStorage::InvalidReg(); break; case kLr: res_reg = rs_wLR; break; case kPc: res_reg = RegStorage::InvalidReg(); break; case kSp: res_reg = rs_wsp; break; diff --git a/compiler/dex/quick/quick_cfi_test_expected.inc b/compiler/dex/quick/quick_cfi_test_expected.inc index 634fdeead..48109d2f4 100644 --- a/compiler/dex/quick/quick_cfi_test_expected.inc +++ b/compiler/dex/quick/quick_cfi_test_expected.inc @@ -33,15 +33,15 @@ static constexpr uint8_t expected_cfi_kThumb2[] = { // 0x00000014: .cfi_def_cfa_offset: 64 static constexpr uint8_t expected_asm_kArm64[] = { - 0xFF, 0x03, 0x01, 0xD1, 0xE8, 0xA7, 0x01, 0x6D, 0xF4, 0xD7, 0x02, 0xA9, + 0xFF, 0x03, 0x01, 0xD1, 0xE8, 0xA7, 0x01, 0x6D, 0xF3, 0xD3, 0x02, 0xA9, 0xFE, 0x1F, 0x00, 0xF9, 0xE0, 0x03, 0x00, 0xB9, 0xE8, 0xA7, 0x41, 0x6D, - 0xF4, 0xD7, 0x42, 0xA9, 0xFE, 0x1F, 0x40, 0xF9, 0xFF, 0x03, 0x01, 0x91, + 0xF3, 0xD3, 0x42, 0xA9, 0xFE, 0x1F, 0x40, 0xF9, 0xFF, 0x03, 0x01, 0x91, 0xC0, 0x03, 0x5F, 0xD6, }; static constexpr uint8_t expected_cfi_kArm64[] = { - 0x44, 0x0E, 0x40, 0x44, 0x05, 0x48, 0x0A, 0x05, 0x49, 0x08, 0x44, 0x94, - 0x06, 0x95, 0x04, 0x44, 0x9E, 0x02, 0x44, 0x0A, 0x44, 0x06, 0x48, 0x06, - 0x49, 0x44, 0xD4, 0xD5, 0x44, 0xDE, 0x44, 0x0E, 0x00, 0x44, 0x0B, 0x0E, + 0x44, 0x0E, 0x40, 0x44, 0x05, 0x48, 0x0A, 0x05, 0x49, 0x08, 0x44, 0x93, + 0x06, 0x94, 0x04, 0x44, 0x9E, 0x02, 0x44, 0x0A, 0x44, 0x06, 0x48, 0x06, + 0x49, 0x44, 0xD3, 0xD4, 0x44, 0xDE, 0x44, 0x0E, 0x00, 0x44, 0x0B, 0x0E, 0x40, }; // 0x00000000: sub sp, sp, #0x40 (64) @@ -49,9 +49,9 @@ static constexpr uint8_t expected_cfi_kArm64[] = { // 0x00000004: stp d8, d9, [sp, #24] // 0x00000008: .cfi_offset_extended: r72 at cfa-40 // 0x00000008: .cfi_offset_extended: r73 at cfa-32 -// 0x00000008: stp x20, x21, [sp, #40] -// 0x0000000c: .cfi_offset: r20 at cfa-24 -// 0x0000000c: .cfi_offset: r21 at cfa-16 +// 0x00000008: stp x19, x20, [sp, #40] +// 0x0000000c: .cfi_offset: r19 at cfa-24 +// 0x0000000c: .cfi_offset: r20 at cfa-16 // 0x0000000c: str lr, [sp, #56] // 0x00000010: .cfi_offset: r30 at cfa-8 // 0x00000010: str w0, [sp] @@ -59,9 +59,9 @@ static constexpr uint8_t expected_cfi_kArm64[] = { // 0x00000014: ldp d8, d9, [sp, #24] // 0x00000018: .cfi_restore_extended: r72 // 0x00000018: .cfi_restore_extended: r73 -// 0x00000018: ldp x20, x21, [sp, #40] +// 0x00000018: ldp x19, x20, [sp, #40] +// 0x0000001c: .cfi_restore: r19 // 0x0000001c: .cfi_restore: r20 -// 0x0000001c: .cfi_restore: r21 // 0x0000001c: ldr lr, [sp, #56] // 0x00000020: .cfi_restore: r30 // 0x00000020: add sp, sp, #0x40 (64) diff --git a/compiler/jni/jni_cfi_test_expected.inc b/compiler/jni/jni_cfi_test_expected.inc index 47e6f106c..eaf7872a0 100644 --- a/compiler/jni/jni_cfi_test_expected.inc +++ b/compiler/jni/jni_cfi_test_expected.inc @@ -81,132 +81,109 @@ static constexpr uint8_t expected_cfi_kThumb2[] = { // 0x0000002a: .cfi_def_cfa_offset: 128 static constexpr uint8_t expected_asm_kArm64[] = { - 0xFF, 0x03, 0x03, 0xD1, 0xFE, 0x5F, 0x00, 0xF9, 0xFD, 0x5B, 0x00, 0xF9, - 0xFC, 0x57, 0x00, 0xF9, 0xFB, 0x53, 0x00, 0xF9, 0xFA, 0x4F, 0x00, 0xF9, - 0xF9, 0x4B, 0x00, 0xF9, 0xF8, 0x47, 0x00, 0xF9, 0xF7, 0x43, 0x00, 0xF9, - 0xF6, 0x3F, 0x00, 0xF9, 0xF5, 0x3B, 0x00, 0xF9, 0xF4, 0x37, 0x00, 0xF9, - 0xEF, 0x33, 0x00, 0xFD, 0xEE, 0x2F, 0x00, 0xFD, 0xED, 0x2B, 0x00, 0xFD, - 0xEC, 0x27, 0x00, 0xFD, 0xEB, 0x23, 0x00, 0xFD, 0xEA, 0x1F, 0x00, 0xFD, - 0xE9, 0x1B, 0x00, 0xFD, 0xE8, 0x17, 0x00, 0xFD, 0xF5, 0x03, 0x12, 0xAA, + 0xFF, 0x03, 0x03, 0xD1, 0xF3, 0x53, 0x06, 0xA9, 0xF5, 0x5B, 0x07, 0xA9, + 0xF7, 0x63, 0x08, 0xA9, 0xF9, 0x6B, 0x09, 0xA9, 0xFB, 0x73, 0x0A, 0xA9, + 0xFD, 0x7B, 0x0B, 0xA9, 0xE8, 0x27, 0x02, 0x6D, 0xEA, 0x2F, 0x03, 0x6D, + 0xEC, 0x37, 0x04, 0x6D, 0xEE, 0x3F, 0x05, 0x6D, 0xF5, 0x03, 0x12, 0xAA, 0xE0, 0x03, 0x00, 0xB9, 0xE1, 0xC7, 0x00, 0xB9, 0xE0, 0xCB, 0x00, 0xBD, 0xE2, 0xCF, 0x00, 0xB9, 0xE3, 0xD3, 0x00, 0xB9, 0xFF, 0x83, 0x00, 0xD1, - 0xFF, 0x83, 0x00, 0x91, 0xF2, 0x03, 0x15, 0xAA, 0xFE, 0x5F, 0x40, 0xF9, - 0xFD, 0x5B, 0x40, 0xF9, 0xFC, 0x57, 0x40, 0xF9, 0xFB, 0x53, 0x40, 0xF9, - 0xFA, 0x4F, 0x40, 0xF9, 0xF9, 0x4B, 0x40, 0xF9, 0xF8, 0x47, 0x40, 0xF9, - 0xF7, 0x43, 0x40, 0xF9, 0xF6, 0x3F, 0x40, 0xF9, 0xF5, 0x3B, 0x40, 0xF9, - 0xF4, 0x37, 0x40, 0xF9, 0xEF, 0x33, 0x40, 0xFD, 0xEE, 0x2F, 0x40, 0xFD, - 0xED, 0x2B, 0x40, 0xFD, 0xEC, 0x27, 0x40, 0xFD, 0xEB, 0x23, 0x40, 0xFD, - 0xEA, 0x1F, 0x40, 0xFD, 0xE9, 0x1B, 0x40, 0xFD, 0xE8, 0x17, 0x40, 0xFD, + 0xFF, 0x83, 0x00, 0x91, 0xF2, 0x03, 0x15, 0xAA, 0xF3, 0x53, 0x46, 0xA9, + 0xF5, 0x5B, 0x47, 0xA9, 0xF7, 0x63, 0x48, 0xA9, 0xF9, 0x6B, 0x49, 0xA9, + 0xFB, 0x73, 0x4A, 0xA9, 0xFD, 0x7B, 0x4B, 0xA9, 0xE8, 0x27, 0x42, 0x6D, + 0xEA, 0x2F, 0x43, 0x6D, 0xEC, 0x37, 0x44, 0x6D, 0xEE, 0x3F, 0x45, 0x6D, 0xFF, 0x03, 0x03, 0x91, 0xC0, 0x03, 0x5F, 0xD6, }; static constexpr uint8_t expected_cfi_kArm64[] = { - 0x44, 0x0E, 0xC0, 0x01, 0x44, 0x9E, 0x02, 0x44, 0x9D, 0x04, 0x44, 0x9C, - 0x06, 0x44, 0x9B, 0x08, 0x44, 0x9A, 0x0A, 0x44, 0x99, 0x0C, 0x44, 0x98, - 0x0E, 0x44, 0x97, 0x10, 0x44, 0x96, 0x12, 0x44, 0x95, 0x14, 0x44, 0x94, - 0x16, 0x44, 0x05, 0x4F, 0x18, 0x44, 0x05, 0x4E, 0x1A, 0x44, 0x05, 0x4D, - 0x1C, 0x44, 0x05, 0x4C, 0x1E, 0x44, 0x05, 0x4B, 0x20, 0x44, 0x05, 0x4A, - 0x22, 0x44, 0x05, 0x49, 0x24, 0x44, 0x05, 0x48, 0x26, 0x5C, 0x0E, 0xE0, - 0x01, 0x44, 0x0E, 0xC0, 0x01, 0x0A, 0x48, 0xDE, 0x44, 0xDD, 0x44, 0xDC, - 0x44, 0xDB, 0x44, 0xDA, 0x44, 0xD9, 0x44, 0xD8, 0x44, 0xD7, 0x44, 0xD6, - 0x44, 0xD5, 0x44, 0xD4, 0x44, 0x06, 0x4F, 0x44, 0x06, 0x4E, 0x44, 0x06, - 0x4D, 0x44, 0x06, 0x4C, 0x44, 0x06, 0x4B, 0x44, 0x06, 0x4A, 0x44, 0x06, - 0x49, 0x44, 0x06, 0x48, 0x44, 0x0E, 0x00, 0x44, 0x0B, 0x0E, 0xC0, 0x01, + 0x44, 0x0E, 0xC0, 0x01, 0x44, 0x93, 0x18, 0x94, 0x16, 0x44, 0x95, 0x14, + 0x96, 0x12, 0x44, 0x97, 0x10, 0x98, 0x0E, 0x44, 0x99, 0x0C, 0x9A, 0x0A, + 0x44, 0x9B, 0x08, 0x9C, 0x06, 0x44, 0x9D, 0x04, 0x9E, 0x02, 0x44, 0x05, + 0x48, 0x28, 0x05, 0x49, 0x26, 0x44, 0x05, 0x4A, 0x24, 0x05, 0x4B, 0x22, + 0x44, 0x05, 0x4C, 0x20, 0x05, 0x4D, 0x1E, 0x44, 0x05, 0x4E, 0x1C, 0x05, + 0x4F, 0x1A, 0x5C, 0x0E, 0xE0, 0x01, 0x44, 0x0E, 0xC0, 0x01, 0x44, 0x0A, + 0x44, 0xD3, 0xD4, 0x44, 0xD5, 0xD6, 0x44, 0xD7, 0xD8, 0x44, 0xD9, 0xDA, + 0x44, 0xDB, 0xDC, 0x44, 0xDD, 0xDE, 0x44, 0x06, 0x48, 0x06, 0x49, 0x44, + 0x06, 0x4A, 0x06, 0x4B, 0x44, 0x06, 0x4C, 0x06, 0x4D, 0x44, 0x06, 0x4E, + 0x06, 0x4F, 0x44, 0x0E, 0x00, 0x44, 0x0B, 0x0E, 0xC0, 0x01, }; // 0x00000000: sub sp, sp, #0xc0 (192) // 0x00000004: .cfi_def_cfa_offset: 192 -// 0x00000004: str lr, [sp, #184] -// 0x00000008: .cfi_offset: r30 at cfa-8 -// 0x00000008: str x29, [sp, #176] -// 0x0000000c: .cfi_offset: r29 at cfa-16 -// 0x0000000c: str x28, [sp, #168] -// 0x00000010: .cfi_offset: r28 at cfa-24 -// 0x00000010: str x27, [sp, #160] -// 0x00000014: .cfi_offset: r27 at cfa-32 -// 0x00000014: str x26, [sp, #152] -// 0x00000018: .cfi_offset: r26 at cfa-40 -// 0x00000018: str x25, [sp, #144] -// 0x0000001c: .cfi_offset: r25 at cfa-48 -// 0x0000001c: str x24, [sp, #136] -// 0x00000020: .cfi_offset: r24 at cfa-56 -// 0x00000020: str x23, [sp, #128] -// 0x00000024: .cfi_offset: r23 at cfa-64 -// 0x00000024: str x22, [sp, #120] -// 0x00000028: .cfi_offset: r22 at cfa-72 -// 0x00000028: str x21, [sp, #112] -// 0x0000002c: .cfi_offset: r21 at cfa-80 -// 0x0000002c: str x20, [sp, #104] -// 0x00000030: .cfi_offset: r20 at cfa-88 -// 0x00000030: str d15, [sp, #96] -// 0x00000034: .cfi_offset_extended: r79 at cfa-96 -// 0x00000034: str d14, [sp, #88] -// 0x00000038: .cfi_offset_extended: r78 at cfa-104 -// 0x00000038: str d13, [sp, #80] -// 0x0000003c: .cfi_offset_extended: r77 at cfa-112 -// 0x0000003c: str d12, [sp, #72] -// 0x00000040: .cfi_offset_extended: r76 at cfa-120 -// 0x00000040: str d11, [sp, #64] -// 0x00000044: .cfi_offset_extended: r75 at cfa-128 -// 0x00000044: str d10, [sp, #56] -// 0x00000048: .cfi_offset_extended: r74 at cfa-136 -// 0x00000048: str d9, [sp, #48] -// 0x0000004c: .cfi_offset_extended: r73 at cfa-144 -// 0x0000004c: str d8, [sp, #40] -// 0x00000050: .cfi_offset_extended: r72 at cfa-152 -// 0x00000050: mov x21, tr -// 0x00000054: str w0, [sp] -// 0x00000058: str w1, [sp, #196] -// 0x0000005c: str s0, [sp, #200] -// 0x00000060: str w2, [sp, #204] -// 0x00000064: str w3, [sp, #208] -// 0x00000068: sub sp, sp, #0x20 (32) -// 0x0000006c: .cfi_def_cfa_offset: 224 -// 0x0000006c: add sp, sp, #0x20 (32) -// 0x00000070: .cfi_def_cfa_offset: 192 -// 0x00000070: .cfi_remember_state -// 0x00000070: mov tr, x21 -// 0x00000074: ldr lr, [sp, #184] -// 0x00000078: .cfi_restore: r30 -// 0x00000078: ldr x29, [sp, #176] -// 0x0000007c: .cfi_restore: r29 -// 0x0000007c: ldr x28, [sp, #168] -// 0x00000080: .cfi_restore: r28 -// 0x00000080: ldr x27, [sp, #160] -// 0x00000084: .cfi_restore: r27 -// 0x00000084: ldr x26, [sp, #152] -// 0x00000088: .cfi_restore: r26 -// 0x00000088: ldr x25, [sp, #144] -// 0x0000008c: .cfi_restore: r25 -// 0x0000008c: ldr x24, [sp, #136] -// 0x00000090: .cfi_restore: r24 -// 0x00000090: ldr x23, [sp, #128] -// 0x00000094: .cfi_restore: r23 -// 0x00000094: ldr x22, [sp, #120] -// 0x00000098: .cfi_restore: r22 -// 0x00000098: ldr x21, [sp, #112] -// 0x0000009c: .cfi_restore: r21 -// 0x0000009c: ldr x20, [sp, #104] -// 0x000000a0: .cfi_restore: r20 -// 0x000000a0: ldr d15, [sp, #96] -// 0x000000a4: .cfi_restore_extended: r79 -// 0x000000a4: ldr d14, [sp, #88] -// 0x000000a8: .cfi_restore_extended: r78 -// 0x000000a8: ldr d13, [sp, #80] -// 0x000000ac: .cfi_restore_extended: r77 -// 0x000000ac: ldr d12, [sp, #72] -// 0x000000b0: .cfi_restore_extended: r76 -// 0x000000b0: ldr d11, [sp, #64] -// 0x000000b4: .cfi_restore_extended: r75 -// 0x000000b4: ldr d10, [sp, #56] -// 0x000000b8: .cfi_restore_extended: r74 -// 0x000000b8: ldr d9, [sp, #48] -// 0x000000bc: .cfi_restore_extended: r73 -// 0x000000bc: ldr d8, [sp, #40] -// 0x000000c0: .cfi_restore_extended: r72 -// 0x000000c0: add sp, sp, #0xc0 (192) -// 0x000000c4: .cfi_def_cfa_offset: 0 -// 0x000000c4: ret -// 0x000000c8: .cfi_restore_state -// 0x000000c8: .cfi_def_cfa_offset: 192 +// 0x00000004: stp x19, x20, [sp, #96] +// 0x00000008: .cfi_offset: r19 at cfa-96 +// 0x00000008: .cfi_offset: r20 at cfa-88 +// 0x00000008: stp x21, x22, [sp, #112] +// 0x0000000c: .cfi_offset: r21 at cfa-80 +// 0x0000000c: .cfi_offset: r22 at cfa-72 +// 0x0000000c: stp x23, x24, [sp, #128] +// 0x00000010: .cfi_offset: r23 at cfa-64 +// 0x00000010: .cfi_offset: r24 at cfa-56 +// 0x00000010: stp x25, x26, [sp, #144] +// 0x00000014: .cfi_offset: r25 at cfa-48 +// 0x00000014: .cfi_offset: r26 at cfa-40 +// 0x00000014: stp x27, x28, [sp, #160] +// 0x00000018: .cfi_offset: r27 at cfa-32 +// 0x00000018: .cfi_offset: r28 at cfa-24 +// 0x00000018: stp x29, lr, [sp, #176] +// 0x0000001c: .cfi_offset: r29 at cfa-16 +// 0x0000001c: .cfi_offset: r30 at cfa-8 +// 0x0000001c: stp d8, d9, [sp, #32] +// 0x00000020: .cfi_offset_extended: r72 at cfa-160 +// 0x00000020: .cfi_offset_extended: r73 at cfa-152 +// 0x00000020: stp d10, d11, [sp, #48] +// 0x00000024: .cfi_offset_extended: r74 at cfa-144 +// 0x00000024: .cfi_offset_extended: r75 at cfa-136 +// 0x00000024: stp d12, d13, [sp, #64] +// 0x00000028: .cfi_offset_extended: r76 at cfa-128 +// 0x00000028: .cfi_offset_extended: r77 at cfa-120 +// 0x00000028: stp d14, d15, [sp, #80] +// 0x0000002c: .cfi_offset_extended: r78 at cfa-112 +// 0x0000002c: .cfi_offset_extended: r79 at cfa-104 +// 0x0000002c: mov x21, tr +// 0x00000030: str w0, [sp] +// 0x00000034: str w1, [sp, #196] +// 0x00000038: str s0, [sp, #200] +// 0x0000003c: str w2, [sp, #204] +// 0x00000040: str w3, [sp, #208] +// 0x00000044: sub sp, sp, #0x20 (32) +// 0x00000048: .cfi_def_cfa_offset: 224 +// 0x00000048: add sp, sp, #0x20 (32) +// 0x0000004c: .cfi_def_cfa_offset: 192 +// 0x0000004c: mov tr, x21 +// 0x00000050: .cfi_remember_state +// 0x00000050: ldp x19, x20, [sp, #96] +// 0x00000054: .cfi_restore: r19 +// 0x00000054: .cfi_restore: r20 +// 0x00000054: ldp x21, x22, [sp, #112] +// 0x00000058: .cfi_restore: r21 +// 0x00000058: .cfi_restore: r22 +// 0x00000058: ldp x23, x24, [sp, #128] +// 0x0000005c: .cfi_restore: r23 +// 0x0000005c: .cfi_restore: r24 +// 0x0000005c: ldp x25, x26, [sp, #144] +// 0x00000060: .cfi_restore: r25 +// 0x00000060: .cfi_restore: r26 +// 0x00000060: ldp x27, x28, [sp, #160] +// 0x00000064: .cfi_restore: r27 +// 0x00000064: .cfi_restore: r28 +// 0x00000064: ldp x29, lr, [sp, #176] +// 0x00000068: .cfi_restore: r29 +// 0x00000068: .cfi_restore: r30 +// 0x00000068: ldp d8, d9, [sp, #32] +// 0x0000006c: .cfi_restore_extended: r72 +// 0x0000006c: .cfi_restore_extended: r73 +// 0x0000006c: ldp d10, d11, [sp, #48] +// 0x00000070: .cfi_restore_extended: r74 +// 0x00000070: .cfi_restore_extended: r75 +// 0x00000070: ldp d12, d13, [sp, #64] +// 0x00000074: .cfi_restore_extended: r76 +// 0x00000074: .cfi_restore_extended: r77 +// 0x00000074: ldp d14, d15, [sp, #80] +// 0x00000078: .cfi_restore_extended: r78 +// 0x00000078: .cfi_restore_extended: r79 +// 0x00000078: add sp, sp, #0xc0 (192) +// 0x0000007c: .cfi_def_cfa_offset: 0 +// 0x0000007c: ret +// 0x00000080: .cfi_restore_state +// 0x00000080: .cfi_def_cfa_offset: 192 static constexpr uint8_t expected_asm_kX86[] = { 0x57, 0x56, 0x55, 0x83, 0xC4, 0xE4, 0x50, 0x89, 0x4C, 0x24, 0x34, 0xF3, diff --git a/compiler/jni/quick/arm64/calling_convention_arm64.cc b/compiler/jni/quick/arm64/calling_convention_arm64.cc index 05eb80a51..a6caff1c7 100644 --- a/compiler/jni/quick/arm64/calling_convention_arm64.cc +++ b/compiler/jni/quick/arm64/calling_convention_arm64.cc @@ -157,27 +157,25 @@ const ManagedRegisterEntrySpills& Arm64ManagedRuntimeCallingConvention::EntrySpi Arm64JniCallingConvention::Arm64JniCallingConvention(bool is_static, bool is_synchronized, const char* shorty) : JniCallingConvention(is_static, is_synchronized, shorty, kFramePointerSize) { - // TODO: Ugly hard code... - // Should generate these according to the spill mask automatically. - callee_save_regs_.push_back(Arm64ManagedRegister::FromXRegister(X20)); - callee_save_regs_.push_back(Arm64ManagedRegister::FromXRegister(X21)); - callee_save_regs_.push_back(Arm64ManagedRegister::FromXRegister(X22)); - callee_save_regs_.push_back(Arm64ManagedRegister::FromXRegister(X23)); - callee_save_regs_.push_back(Arm64ManagedRegister::FromXRegister(X24)); - callee_save_regs_.push_back(Arm64ManagedRegister::FromXRegister(X25)); - callee_save_regs_.push_back(Arm64ManagedRegister::FromXRegister(X26)); - callee_save_regs_.push_back(Arm64ManagedRegister::FromXRegister(X27)); - callee_save_regs_.push_back(Arm64ManagedRegister::FromXRegister(X28)); - callee_save_regs_.push_back(Arm64ManagedRegister::FromXRegister(X29)); - callee_save_regs_.push_back(Arm64ManagedRegister::FromXRegister(X30)); + uint32_t core_spill_mask = CoreSpillMask(); + for (int x_reg = 0; x_reg < kNumberOfXRegisters; ++x_reg) { + if (((1 << x_reg) & core_spill_mask) != 0) { + callee_save_regs_.push_back( + Arm64ManagedRegister::FromXRegister(static_cast(x_reg))); + } + } - for (size_t i = 0; i < arraysize(kDCalleeSaveRegisters); ++i) { - callee_save_regs_.push_back(Arm64ManagedRegister::FromDRegister(kDCalleeSaveRegisters[i])); + uint32_t fp_spill_mask = FpSpillMask(); + for (int d_reg = 0; d_reg < kNumberOfDRegisters; ++d_reg) { + if (((1 << d_reg) & fp_spill_mask) != 0) { + callee_save_regs_.push_back( + Arm64ManagedRegister::FromDRegister(static_cast(d_reg))); + } } } uint32_t Arm64JniCallingConvention::CoreSpillMask() const { - // Compute spill mask to agree with callee saves initialized in the constructor + // Compute spill mask to agree with callee saves initialized in the constructor. // Note: The native jni function may call to some VM runtime functions which may suspend // or trigger GC. And the jni method frame will become top quick frame in those cases. // So we need to satisfy GC to save LR and callee-save registers which is similar to @@ -186,12 +184,14 @@ uint32_t Arm64JniCallingConvention::CoreSpillMask() const { // Jni method is the method that compiled by jni compiler. // Call chain: managed code(java) --> jni method --> jni function. // Thread register(X18, scratched by aapcs64) is not saved on stack, it is saved in ETR(X21). - // Suspend register(x19) is preserved by aapcs64 and it is not used in Jni method. - return 1 << X20 | 1 << X21 | 1 << X22 | 1 << X23 | 1 << X24 | 1 << X25 | - 1 << X26 | 1 << X27 | 1 << X28 | 1 << X29 | 1 << LR; + return 1 << X19 | 1 << X20 | 1 << X21 | 1 << X22 | 1 << X23 | 1 << X24 | + 1 << X25 | 1 << X26 | 1 << X27 | 1 << X28 | 1 << X29 | 1 << LR; } uint32_t Arm64JniCallingConvention::FpSpillMask() const { + // Considering the case, java_method_1 --> jni method --> jni function --> java_method_2, we may + // break on java_method_2 and we still need to find out the values of DEX registers in + // java_method_1. So all callee-saves(in managed code) need to be saved. uint32_t result = 0; for (size_t i = 0; i < arraysize(kDCalleeSaveRegisters); ++i) { result |= (1 << kDCalleeSaveRegisters[i]); diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index 33eacbaf0..263f83fd3 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -466,8 +466,10 @@ void CodeGeneratorARM64::GenerateFrameEntry() { // sp[0] : current method. __ Str(kArtMethodRegister, MemOperand(sp, -frame_size, PreIndex)); GetAssembler()->cfi().AdjustCFAOffset(frame_size); - SpillRegisters(GetFramePreservedCoreRegisters(), frame_size - GetCoreSpillSize()); - SpillRegisters(GetFramePreservedFPRegisters(), frame_size - FrameEntrySpillSize()); + GetAssembler()->SpillRegisters(GetFramePreservedCoreRegisters(), + frame_size - GetCoreSpillSize()); + GetAssembler()->SpillRegisters(GetFramePreservedFPRegisters(), + frame_size - FrameEntrySpillSize()); } } @@ -475,8 +477,10 @@ void CodeGeneratorARM64::GenerateFrameExit() { GetAssembler()->cfi().RememberState(); if (!HasEmptyFrame()) { int frame_size = GetFrameSize(); - UnspillRegisters(GetFramePreservedFPRegisters(), frame_size - FrameEntrySpillSize()); - UnspillRegisters(GetFramePreservedCoreRegisters(), frame_size - GetCoreSpillSize()); + GetAssembler()->UnspillRegisters(GetFramePreservedFPRegisters(), + frame_size - FrameEntrySpillSize()); + GetAssembler()->UnspillRegisters(GetFramePreservedCoreRegisters(), + frame_size - GetCoreSpillSize()); __ Drop(frame_size); GetAssembler()->cfi().AdjustCFAOffset(-frame_size); } @@ -485,51 +489,6 @@ void CodeGeneratorARM64::GenerateFrameExit() { GetAssembler()->cfi().DefCFAOffset(GetFrameSize()); } -static inline dwarf::Reg DWARFReg(CPURegister reg) { - if (reg.IsFPRegister()) { - return dwarf::Reg::Arm64Fp(reg.code()); - } else { - DCHECK_LT(reg.code(), 31u); // X0 - X30. - return dwarf::Reg::Arm64Core(reg.code()); - } -} - -void CodeGeneratorARM64::SpillRegisters(vixl::CPURegList registers, int offset) { - int size = registers.RegisterSizeInBytes(); - while (registers.Count() >= 2) { - const CPURegister& dst0 = registers.PopLowestIndex(); - const CPURegister& dst1 = registers.PopLowestIndex(); - __ Stp(dst0, dst1, MemOperand(__ StackPointer(), offset)); - GetAssembler()->cfi().RelOffset(DWARFReg(dst0), offset); - GetAssembler()->cfi().RelOffset(DWARFReg(dst1), offset + size); - offset += 2 * size; - } - if (!registers.IsEmpty()) { - const CPURegister& dst0 = registers.PopLowestIndex(); - __ Str(dst0, MemOperand(__ StackPointer(), offset)); - GetAssembler()->cfi().RelOffset(DWARFReg(dst0), offset); - } - DCHECK(registers.IsEmpty()); -} - -void CodeGeneratorARM64::UnspillRegisters(vixl::CPURegList registers, int offset) { - int size = registers.RegisterSizeInBytes(); - while (registers.Count() >= 2) { - const CPURegister& dst0 = registers.PopLowestIndex(); - const CPURegister& dst1 = registers.PopLowestIndex(); - __ Ldp(dst0, dst1, MemOperand(__ StackPointer(), offset)); - GetAssembler()->cfi().Restore(DWARFReg(dst0)); - GetAssembler()->cfi().Restore(DWARFReg(dst1)); - offset += 2 * size; - } - if (!registers.IsEmpty()) { - const CPURegister& dst0 = registers.PopLowestIndex(); - __ Ldr(dst0, MemOperand(__ StackPointer(), offset)); - GetAssembler()->cfi().Restore(DWARFReg(dst0)); - } - DCHECK(registers.IsEmpty()); -} - void CodeGeneratorARM64::Bind(HBasicBlock* block) { __ Bind(GetLabelOf(block)); } diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h index 9430e3103..07c6dd059 100644 --- a/compiler/optimizing/code_generator_arm64.h +++ b/compiler/optimizing/code_generator_arm64.h @@ -46,14 +46,11 @@ static constexpr size_t kParameterFPRegistersLength = arraysize(kParameterFPRegi const vixl::Register tr = vixl::x18; // Thread Register static const vixl::Register kArtMethodRegister = vixl::w0; // Method register on invoke. -const vixl::Register kQuickSuspendRegister = vixl::x19; const vixl::CPURegList vixl_reserved_core_registers(vixl::ip0, vixl::ip1); const vixl::CPURegList vixl_reserved_fp_registers(vixl::d31); -// TODO: When the runtime does not use kQuickSuspendRegister as a suspend -// counter remove it from the reserved registers list. -const vixl::CPURegList runtime_reserved_core_registers(tr, kQuickSuspendRegister, vixl::lr); +const vixl::CPURegList runtime_reserved_core_registers(tr, vixl::lr); // Callee-saved registers defined by AAPCS64. const vixl::CPURegList callee_saved_core_registers(vixl::CPURegister::kRegister, @@ -227,8 +224,6 @@ class CodeGeneratorARM64 : public CodeGenerator { void GenerateFrameEntry() OVERRIDE; void GenerateFrameExit() OVERRIDE; - void SpillRegisters(vixl::CPURegList registers, int offset); - void UnspillRegisters(vixl::CPURegList registers, int offset); vixl::CPURegList GetFramePreservedCoreRegisters() const { return vixl::CPURegList(vixl::CPURegister::kRegister, vixl::kXRegSize, diff --git a/compiler/utils/arm64/assembler_arm64.cc b/compiler/utils/arm64/assembler_arm64.cc index fbd04114e..98702a23a 100644 --- a/compiler/utils/arm64/assembler_arm64.cc +++ b/compiler/utils/arm64/assembler_arm64.cc @@ -640,56 +640,89 @@ void Arm64Assembler::EmitExceptionPoll(Arm64Exception *exception) { ___ Brk(); } -static dwarf::Reg DWARFReg(XRegister reg) { - return dwarf::Reg::Arm64Core(static_cast(reg)); +static inline dwarf::Reg DWARFReg(CPURegister reg) { + if (reg.IsFPRegister()) { + return dwarf::Reg::Arm64Fp(reg.code()); + } else { + DCHECK_LT(reg.code(), 31u); // X0 - X30. + return dwarf::Reg::Arm64Core(reg.code()); + } } -static dwarf::Reg DWARFReg(DRegister reg) { - return dwarf::Reg::Arm64Fp(static_cast(reg)); +void Arm64Assembler::SpillRegisters(vixl::CPURegList registers, int offset) { + int size = registers.RegisterSizeInBytes(); + const Register sp = vixl_masm_->StackPointer(); + while (registers.Count() >= 2) { + const CPURegister& dst0 = registers.PopLowestIndex(); + const CPURegister& dst1 = registers.PopLowestIndex(); + ___ Stp(dst0, dst1, MemOperand(sp, offset)); + cfi_.RelOffset(DWARFReg(dst0), offset); + cfi_.RelOffset(DWARFReg(dst1), offset + size); + offset += 2 * size; + } + if (!registers.IsEmpty()) { + const CPURegister& dst0 = registers.PopLowestIndex(); + ___ Str(dst0, MemOperand(sp, offset)); + cfi_.RelOffset(DWARFReg(dst0), offset); + } + DCHECK(registers.IsEmpty()); +} + +void Arm64Assembler::UnspillRegisters(vixl::CPURegList registers, int offset) { + int size = registers.RegisterSizeInBytes(); + const Register sp = vixl_masm_->StackPointer(); + while (registers.Count() >= 2) { + const CPURegister& dst0 = registers.PopLowestIndex(); + const CPURegister& dst1 = registers.PopLowestIndex(); + ___ Ldp(dst0, dst1, MemOperand(sp, offset)); + cfi_.Restore(DWARFReg(dst0)); + cfi_.Restore(DWARFReg(dst1)); + offset += 2 * size; + } + if (!registers.IsEmpty()) { + const CPURegister& dst0 = registers.PopLowestIndex(); + ___ Ldr(dst0, MemOperand(sp, offset)); + cfi_.Restore(DWARFReg(dst0)); + } + DCHECK(registers.IsEmpty()); } -constexpr size_t kFramePointerSize = 8; -constexpr unsigned int kJniRefSpillRegsSize = 11 + 8; - void Arm64Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg, - const std::vector& callee_save_regs, - const ManagedRegisterEntrySpills& entry_spills) { - CHECK_ALIGNED(frame_size, kStackAlignment); - CHECK(X0 == method_reg.AsArm64().AsXRegister()); - - // TODO: *create APCS FP - end of FP chain; - // *add support for saving a different set of callee regs. - // For now we check that the size of callee regs vector is 11 core registers and 8 fp registers. - CHECK_EQ(callee_save_regs.size(), kJniRefSpillRegsSize); - // Increase frame to required size - must be at least space to push StackReference. - CHECK_GT(frame_size, kJniRefSpillRegsSize * kFramePointerSize); + const std::vector& callee_save_regs, + const ManagedRegisterEntrySpills& entry_spills) { + // Setup VIXL CPURegList for callee-saves. + CPURegList core_reg_list(CPURegister::kRegister, kXRegSize, 0); + CPURegList fp_reg_list(CPURegister::kFPRegister, kDRegSize, 0); + for (auto r : callee_save_regs) { + Arm64ManagedRegister reg = r.AsArm64(); + if (reg.IsXRegister()) { + core_reg_list.Combine(reg_x(reg.AsXRegister()).code()); + } else { + DCHECK(reg.IsDRegister()); + fp_reg_list.Combine(reg_d(reg.AsDRegister()).code()); + } + } + size_t core_reg_size = core_reg_list.TotalSizeInBytes(); + size_t fp_reg_size = fp_reg_list.TotalSizeInBytes(); + + // Increase frame to required size. + DCHECK_ALIGNED(frame_size, kStackAlignment); + DCHECK_GE(frame_size, core_reg_size + fp_reg_size + sizeof(StackReference)); IncreaseFrameSize(frame_size); - // TODO: Ugly hard code... - // Should generate these according to the spill mask automatically. - // TUNING: Use stp. - // Note: Must match Arm64JniCallingConvention::CoreSpillMask(). - size_t reg_offset = frame_size; - static constexpr XRegister x_spills[] = { - LR, X29, X28, X27, X26, X25, X24, X23, X22, X21, X20 }; - for (size_t i = 0; i < arraysize(x_spills); i++) { - XRegister reg = x_spills[i]; - reg_offset -= 8; - StoreToOffset(reg, SP, reg_offset); - cfi_.RelOffset(DWARFReg(reg), reg_offset); - } - for (int d = 15; d >= 8; d--) { - DRegister reg = static_cast(d); - reg_offset -= 8; - StoreDToOffset(reg, SP, reg_offset); - cfi_.RelOffset(DWARFReg(reg), reg_offset); - } + // Save callee-saves. + SpillRegisters(core_reg_list, frame_size - core_reg_size); + SpillRegisters(fp_reg_list, frame_size - core_reg_size - fp_reg_size); - // Move TR(Caller saved) to ETR(Callee saved). The original (ETR)X21 has been saved on stack. - // This way we make sure that TR is not trashed by native code. + // Note: This is specific to JNI method frame. + // We will need to move TR(Caller saved in AAPCS) to ETR(Callee saved in AAPCS). The original + // (ETR)X21 has been saved on stack. In this way, we can restore TR later. + DCHECK(!core_reg_list.IncludesAliasOf(reg_x(TR))); + DCHECK(core_reg_list.IncludesAliasOf(reg_x(ETR))); ___ Mov(reg_x(ETR), reg_x(TR)); // Write StackReference. + DCHECK(X0 == method_reg.AsArm64().AsXRegister()); DCHECK_EQ(4U, sizeof(StackReference)); StoreWToOffset(StoreOperandType::kStoreWord, W0, SP, 0); @@ -717,37 +750,39 @@ void Arm64Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg, } } -void Arm64Assembler::RemoveFrame(size_t frame_size, const std::vector& callee_save_regs) { - CHECK_ALIGNED(frame_size, kStackAlignment); - cfi_.RememberState(); - - // For now we only check that the size of the frame is greater than the spill size. - CHECK_EQ(callee_save_regs.size(), kJniRefSpillRegsSize); - CHECK_GT(frame_size, kJniRefSpillRegsSize * kFramePointerSize); - - // We move ETR(aapcs64 callee saved) back to TR(aapcs64 caller saved) which might have - // been trashed in the native call. The original ETR(X21) is restored from stack. +void Arm64Assembler::RemoveFrame(size_t frame_size, + const std::vector& callee_save_regs) { + // Setup VIXL CPURegList for callee-saves. + CPURegList core_reg_list(CPURegister::kRegister, kXRegSize, 0); + CPURegList fp_reg_list(CPURegister::kFPRegister, kDRegSize, 0); + for (auto r : callee_save_regs) { + Arm64ManagedRegister reg = r.AsArm64(); + if (reg.IsXRegister()) { + core_reg_list.Combine(reg_x(reg.AsXRegister()).code()); + } else { + DCHECK(reg.IsDRegister()); + fp_reg_list.Combine(reg_d(reg.AsDRegister()).code()); + } + } + size_t core_reg_size = core_reg_list.TotalSizeInBytes(); + size_t fp_reg_size = fp_reg_list.TotalSizeInBytes(); + + // For now we only check that the size of the frame is large enough to hold spills and method + // reference. + DCHECK_GE(frame_size, core_reg_size + fp_reg_size + sizeof(StackReference)); + DCHECK_ALIGNED(frame_size, kStackAlignment); + + // Note: This is specific to JNI method frame. + // Restore TR(Caller saved in AAPCS) from ETR(Callee saved in AAPCS). + DCHECK(!core_reg_list.IncludesAliasOf(reg_x(TR))); + DCHECK(core_reg_list.IncludesAliasOf(reg_x(ETR))); ___ Mov(reg_x(TR), reg_x(ETR)); - // TODO: Ugly hard code... - // Should generate these according to the spill mask automatically. - // TUNING: Use ldp. - // Note: Must match Arm64JniCallingConvention::CoreSpillMask(). - size_t reg_offset = frame_size; - static constexpr XRegister x_spills[] = { - LR, X29, X28, X27, X26, X25, X24, X23, X22, X21, X20 }; - for (size_t i = 0; i < arraysize(x_spills); i++) { - XRegister reg = x_spills[i]; - reg_offset -= 8; - LoadFromOffset(reg, SP, reg_offset); - cfi_.Restore(DWARFReg(reg)); - } - for (int d = 15; d >= 8; d--) { - DRegister reg = static_cast(d); - reg_offset -= 8; - LoadDFromOffset(reg, SP, reg_offset); - cfi_.Restore(DWARFReg(reg)); - } + cfi_.RememberState(); + + // Restore callee-saves. + UnspillRegisters(core_reg_list, frame_size - core_reg_size); + UnspillRegisters(fp_reg_list, frame_size - core_reg_size - fp_reg_size); // Decrease frame size to start of callee saved regs. DecreaseFrameSize(frame_size); diff --git a/compiler/utils/arm64/assembler_arm64.h b/compiler/utils/arm64/assembler_arm64.h index 8973b9ca8..b7715af6c 100644 --- a/compiler/utils/arm64/assembler_arm64.h +++ b/compiler/utils/arm64/assembler_arm64.h @@ -83,6 +83,9 @@ class Arm64Assembler FINAL : public Assembler { // Copy instructions out of assembly buffer into the given region of memory. void FinalizeInstructions(const MemoryRegion& region); + void SpillRegisters(vixl::CPURegList registers, int offset); + void UnspillRegisters(vixl::CPURegList registers, int offset); + // Emit code that will create an activation on the stack. void BuildFrame(size_t frame_size, ManagedRegister method_reg, const std::vector& callee_save_regs, diff --git a/runtime/arch/arm64/asm_support_arm64.S b/runtime/arch/arm64/asm_support_arm64.S index b3e9242c5..39a8aa5cf 100644 --- a/runtime/arch/arm64/asm_support_arm64.S +++ b/runtime/arch/arm64/asm_support_arm64.S @@ -21,12 +21,6 @@ // Define special registers. -// Register holding suspend check count down. -// 32-bit is enough for the suspend register. -#define wSUSPEND w19 -// xSUSPEND is 64-bit view of wSUSPEND. -// Used to save/restore the register scratched by managed code. -#define xSUSPEND x19 // Register holding Thread::Current(). #define xSELF x18 // x18 is not preserved by aapcs64, save it on xETR(External Thread reg) for restore and later use. diff --git a/runtime/arch/arm64/asm_support_arm64.h b/runtime/arch/arm64/asm_support_arm64.h index 989ecc6c5..998f5672f 100644 --- a/runtime/arch/arm64/asm_support_arm64.h +++ b/runtime/arch/arm64/asm_support_arm64.h @@ -20,7 +20,7 @@ #include "asm_support.h" #define FRAME_SIZE_SAVE_ALL_CALLEE_SAVE 176 -#define FRAME_SIZE_REFS_ONLY_CALLEE_SAVE 96 +#define FRAME_SIZE_REFS_ONLY_CALLEE_SAVE 112 #define FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE 224 #endif // ART_RUNTIME_ARCH_ARM64_ASM_SUPPORT_ARM64_H_ diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S index b4de8796d..4079436c4 100644 --- a/runtime/arch/arm64/quick_entrypoints_arm64.S +++ b/runtime/arch/arm64/quick_entrypoints_arm64.S @@ -48,8 +48,8 @@ stp d12, d13, [sp, #40] stp d14, d15, [sp, #56] - // Reserved registers - stp xSELF, xSUSPEND, [sp, #72] + // Thread register and x19 (callee-save) + stp xSELF, x19, [sp, #72] .cfi_rel_offset x18, 72 .cfi_rel_offset x19, 80 @@ -99,38 +99,39 @@ THIS_LOAD_REQUIRES_READ_BARRIER ldr wIP0, [xIP0, RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET ] - sub sp, sp, #96 - .cfi_adjust_cfa_offset 96 + sub sp, sp, #112 + .cfi_adjust_cfa_offset 112 // Ugly compile-time check, but we only have the preprocessor. -#if (FRAME_SIZE_REFS_ONLY_CALLEE_SAVE != 96) +#if (FRAME_SIZE_REFS_ONLY_CALLEE_SAVE != 112) #error "REFS_ONLY_CALLEE_SAVE_FRAME(ARM64) size not as expected." #endif // Callee-saves - stp x20, x21, [sp, #8] - .cfi_rel_offset x20, 8 - .cfi_rel_offset x21, 16 + stp x19, x20, [sp, #16] + .cfi_rel_offset x19, 16 + .cfi_rel_offset x20, 24 - stp x22, x23, [sp, #24] - .cfi_rel_offset x22, 24 - .cfi_rel_offset x23, 32 + stp x21, x22, [sp, #32] + .cfi_rel_offset x21, 32 + .cfi_rel_offset x22, 40 - stp x24, x25, [sp, #40] - .cfi_rel_offset x24, 40 - .cfi_rel_offset x25, 48 + stp x23, x24, [sp, #48] + .cfi_rel_offset x23, 48 + .cfi_rel_offset x24, 56 - stp x26, x27, [sp, #56] - .cfi_rel_offset x26, 56 - .cfi_rel_offset x27, 64 + stp x25, x26, [sp, #64] + .cfi_rel_offset x25, 64 + .cfi_rel_offset x26, 72 - stp x28, x29, [sp, #72] - .cfi_rel_offset x28, 72 - .cfi_rel_offset x29, 80 + stp x27, x28, [sp, #80] + .cfi_rel_offset x27, 80 + .cfi_rel_offset x28, 88 - // LR - str xLR, [sp, #88] - .cfi_rel_offset x30, 88 + // x29(callee-save) and LR + stp x29, xLR, [sp, #96] + .cfi_rel_offset x29, 96 + .cfi_rel_offset x30, 104 // Save xSELF to xETR. mov xETR, xSELF @@ -148,32 +149,33 @@ mov xSELF, xETR // Callee-saves - ldp x20, x21, [sp, #8] + ldp x19, x20, [sp, #16] + .cfi_restore x19 .cfi_restore x20 - .cfi_restore x21 - ldp x22, x23, [sp, #24] + ldp x21, x22, [sp, #32] + .cfi_restore x21 .cfi_restore x22 - .cfi_restore x23 - ldp x24, x25, [sp, #40] + ldp x23, x24, [sp, #48] + .cfi_restore x23 .cfi_restore x24 - .cfi_restore x25 - ldp x26, x27, [sp, #56] + ldp x25, x26, [sp, #64] + .cfi_restore x25 .cfi_restore x26 - .cfi_restore x27 - ldp x28, x29, [sp, #72] + ldp x27, x28, [sp, #80] + .cfi_restore x27 .cfi_restore x28 - .cfi_restore x29 - // LR - ldr xLR, [sp, #88] + // x29(callee-save) and LR + ldp x29, xLR, [sp, #96] + .cfi_restore x29 .cfi_restore x30 - add sp, sp, #96 - .cfi_adjust_cfa_offset -96 + add sp, sp, #112 + .cfi_adjust_cfa_offset -112 .endm .macro POP_REFS_ONLY_CALLEE_SAVE_FRAME @@ -183,8 +185,8 @@ ldr xETR, [sp, #16] .cfi_restore x21 - add sp, sp, #96 - .cfi_adjust_cfa_offset -96 + add sp, sp, #112 + .cfi_adjust_cfa_offset -112 .endm .macro RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME_AND_RETURN @@ -202,30 +204,33 @@ #error "REFS_AND_ARGS_CALLEE_SAVE_FRAME(ARM64) size not as expected." #endif - // FP args - stp d0, d1, [sp, #16] - stp d2, d3, [sp, #32] - stp d4, d5, [sp, #48] - stp d6, d7, [sp, #64] + // FP args. + stp d0, d1, [sp, #8] + stp d2, d3, [sp, #24] + stp d4, d5, [sp, #40] + stp d6, d7, [sp, #56] - // args and x20(callee-save) - stp x1, x2, [sp, #80] - .cfi_rel_offset x1, 80 - .cfi_rel_offset x2, 88 + // Core args. + str x1, [sp, 72] + .cfi_rel_offset x1, 72 - stp x3, x4, [sp, #96] - .cfi_rel_offset x3, 96 - .cfi_rel_offset x4, 104 + stp x2, x3, [sp, #80] + .cfi_rel_offset x2, 80 + .cfi_rel_offset x3, 88 - stp x5, x6, [sp, #112] - .cfi_rel_offset x5, 112 - .cfi_rel_offset x6, 120 + stp x4, x5, [sp, #96] + .cfi_rel_offset x4, 96 + .cfi_rel_offset x5, 104 - stp x7, x20, [sp, #128] - .cfi_rel_offset x7, 128 - .cfi_rel_offset x20, 136 + stp x6, x7, [sp, #112] + .cfi_rel_offset x6, 112 + .cfi_rel_offset x7, 120 // Callee-saves. + stp x19, x20, [sp, #128] + .cfi_rel_offset x19, 128 + .cfi_rel_offset x20, 136 + stp x21, x22, [sp, #144] .cfi_rel_offset x21, 144 .cfi_rel_offset x22, 152 @@ -289,30 +294,33 @@ // Restore xSELF. mov xSELF, xETR - // FP args - ldp d0, d1, [sp, #16] - ldp d2, d3, [sp, #32] - ldp d4, d5, [sp, #48] - ldp d6, d7, [sp, #64] + // FP args. + ldp d0, d1, [sp, #8] + ldp d2, d3, [sp, #24] + ldp d4, d5, [sp, #40] + ldp d6, d7, [sp, #56] - // args and x20(callee-save) - ldp x1, x2, [sp, #80] + // Core args. + ldr x1, [sp, 72] .cfi_restore x1 - .cfi_restore x2 - ldp x3, x4, [sp, #96] + ldp x2, x3, [sp, #80] + .cfi_restore x2 .cfi_restore x3 - .cfi_restore x4 - ldp x5, x6, [sp, #112] + ldp x4, x5, [sp, #96] + .cfi_restore x4 .cfi_restore x5 - .cfi_restore x6 - ldp x7, x20, [sp, #128] + ldp x6, x7, [sp, #112] + .cfi_restore x6 .cfi_restore x7 - .cfi_restore x20 // Callee-saves. + ldp x19, x20, [sp, #128] + .cfi_restore x19 + .cfi_restore x20 + ldp x21, x22, [sp, #144] .cfi_restore x21 .cfi_restore x22 @@ -499,7 +507,7 @@ INVOKE_TRAMPOLINE art_quick_invoke_virtual_trampoline_with_access_check, artInvo .macro INVOKE_STUB_CREATE_FRAME -SAVE_SIZE=15*8 // x4, x5, x20, x21, x22, x23, x24, x25, x26, x27, x28, xSUSPEND, SP, LR, FP saved. +SAVE_SIZE=15*8 // x4, x5, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, SP, LR, FP saved. SAVE_SIZE_AND_METHOD=SAVE_SIZE+STACK_REFERENCE_SIZE @@ -534,7 +542,7 @@ SAVE_SIZE_AND_METHOD=SAVE_SIZE+STACK_REFERENCE_SIZE .cfi_rel_offset x20, 48 .cfi_rel_offset x21, 56 - stp x9, xSUSPEND, [x10, #32] // Save old stack pointer and xSUSPEND + stp x9, x19, [x10, #32] // Save old stack pointer and x19. .cfi_rel_offset sp, 32 .cfi_rel_offset x19, 40 @@ -549,7 +557,6 @@ SAVE_SIZE_AND_METHOD=SAVE_SIZE+STACK_REFERENCE_SIZE mov xFP, x10 // Use xFP now, as it's callee-saved. .cfi_def_cfa_register x29 mov xSELF, x3 // Move thread pointer into SELF register. - mov wSUSPEND, #SUSPEND_CHECK_INTERVAL // reset wSUSPEND to suspend check interval // Copy arguments into stack frame. // Use simple copy routine for now. @@ -634,7 +641,7 @@ SAVE_SIZE_AND_METHOD=SAVE_SIZE+STACK_REFERENCE_SIZE str x0, [x4] .Lexit_art_quick_invoke_stub\@: - ldp x2, xSUSPEND, [xFP, #32] // Restore stack pointer and xSUSPEND. + ldp x2, x19, [xFP, #32] // Restore stack pointer and x19. .cfi_restore x19 mov sp, x2 .cfi_restore sp @@ -662,7 +669,9 @@ SAVE_SIZE_AND_METHOD=SAVE_SIZE+STACK_REFERENCE_SIZE * | FP'' | <- SP' * +----------------------+ * +----------------------+ - * | x19 | <- Used as wSUSPEND, won't be restored by managed code. + * | x28 | <- TODO: Remove callee-saves. + * | : | + * | x19 | * | SP' | * | X5 | * | X4 | Saved registers @@ -680,7 +689,6 @@ SAVE_SIZE_AND_METHOD=SAVE_SIZE+STACK_REFERENCE_SIZE * x1-x7 - integer parameters. * d0-d7 - Floating point parameters. * xSELF = self - * wSUSPEND = suspend count * SP = & of ArtMethod* * x1 = "this" pointer. * @@ -1388,12 +1396,11 @@ TWO_ARG_DOWNCALL art_quick_resolve_string, artResolveStringFromCode, RETURN_IF_R GENERATE_ALL_ALLOC_ENTRYPOINTS /* - * Called by managed code when the value in wSUSPEND has been decremented to 0. + * Called by managed code when the thread has been asked to suspend. */ .extern artTestSuspendFromCode ENTRY art_quick_test_suspend ldrh w0, [xSELF, #THREAD_FLAGS_OFFSET] // get xSELF->state_and_flags.as_struct.flags - mov wSUSPEND, #SUSPEND_CHECK_INTERVAL // reset wSUSPEND to SUSPEND_CHECK_INTERVAL cbnz w0, .Lneed_suspend // check flags == 0 ret // return if flags == 0 .Lneed_suspend: @@ -1477,6 +1484,7 @@ END art_quick_resolution_trampoline * | X22 | callee save * | X21 | callee save * | X20 | callee save + * | X19 | callee save * | X7 | arg7 * | X6 | arg6 * | X5 | arg5 diff --git a/runtime/arch/arm64/quick_method_frame_info_arm64.h b/runtime/arch/arm64/quick_method_frame_info_arm64.h index 0e1e32b95..61b4dffb7 100644 --- a/runtime/arch/arm64/quick_method_frame_info_arm64.h +++ b/runtime/arch/arm64/quick_method_frame_info_arm64.h @@ -33,10 +33,10 @@ static constexpr uint32_t kArm64CalleeSaveAlwaysSpills = (1 << art::arm64::LR); // Callee saved registers static constexpr uint32_t kArm64CalleeSaveRefSpills = - (1 << art::arm64::X20) | (1 << art::arm64::X21) | (1 << art::arm64::X22) | - (1 << art::arm64::X23) | (1 << art::arm64::X24) | (1 << art::arm64::X25) | - (1 << art::arm64::X26) | (1 << art::arm64::X27) | (1 << art::arm64::X28) | - (1 << art::arm64::X29); + (1 << art::arm64::X19) | (1 << art::arm64::X20) | (1 << art::arm64::X21) | + (1 << art::arm64::X22) | (1 << art::arm64::X23) | (1 << art::arm64::X24) | + (1 << art::arm64::X25) | (1 << art::arm64::X26) | (1 << art::arm64::X27) | + (1 << art::arm64::X28) | (1 << art::arm64::X29); // X0 is the method pointer. Not saved. static constexpr uint32_t kArm64CalleeSaveArgSpills = (1 << art::arm64::X1) | (1 << art::arm64::X2) | (1 << art::arm64::X3) | @@ -44,9 +44,7 @@ static constexpr uint32_t kArm64CalleeSaveArgSpills = (1 << art::arm64::X7); static constexpr uint32_t kArm64CalleeSaveAllSpills = // Thread register. - (1 << art::arm64::X18) | - // Suspend register. - 1 << art::arm64::X19; + (1 << art::arm64::X18); static constexpr uint32_t kArm64CalleeSaveFpAlwaysSpills = 0; static constexpr uint32_t kArm64CalleeSaveFpRefSpills = 0; diff --git a/runtime/asm_support.h b/runtime/asm_support.h index 4c83e8856..8057dd16b 100644 --- a/runtime/asm_support.h +++ b/runtime/asm_support.h @@ -28,8 +28,8 @@ #include "read_barrier_c.h" -#if defined(__arm__) || defined(__aarch64__) || defined(__mips__) -// In quick code for ARM, ARM64 and MIPS we make poor use of registers and perform frequent suspend +#if defined(__arm__) || defined(__mips__) +// In quick code for ARM and MIPS we make poor use of registers and perform frequent suspend // checks in the event of loop back edges. The SUSPEND_CHECK_INTERVAL constant is loaded into a // register at the point of an up-call or after handling a suspend check. It reduces the number of // loads of the TLS suspend check value by the given amount (turning it into a decrement and compare diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc index 8351e22fa..2e813c808 100644 --- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc +++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc @@ -88,7 +88,7 @@ class QuickArgumentVisitor { // | LR | // | X29 | // | : | - // | X20 | + // | X19 | // | X7 | // | : | // | X1 | -- 2.11.0