From 10ef6941648aad04d54527d4a7a6070bf7065e88 Mon Sep 17 00:00:00 2001 From: Vladimir Marko Date: Thu, 22 Oct 2015 15:25:54 +0100 Subject: [PATCH] Delay emitting CFI PC adjustments until after Thumb2/Mips fixup. On Mips also take into account out-of-order CFI data emitted from EmitBranches(). Change-Id: I03b0b0b4c2b1ea31a02699ef5fa1c55aa42c23c3 --- compiler/dwarf/debug_frame_opcode_writer.h | 23 +- compiler/jni/jni_cfi_test.cc | 1 + compiler/optimizing/optimizing_cfi_test.cc | 151 ++++++++--- .../optimizing/optimizing_cfi_test_expected.inc | 275 +++++++++++++++++++++ compiler/utils/arm/assembler_thumb2.cc | 27 ++ compiler/utils/arm/assembler_thumb2.h | 2 + compiler/utils/assembler.cc | 9 +- compiler/utils/assembler.h | 59 ++++- compiler/utils/mips/assembler_mips.cc | 60 +++++ compiler/utils/mips/assembler_mips.h | 5 +- 10 files changed, 567 insertions(+), 45 deletions(-) diff --git a/compiler/dwarf/debug_frame_opcode_writer.h b/compiler/dwarf/debug_frame_opcode_writer.h index 60241f722..5a9964178 100644 --- a/compiler/dwarf/debug_frame_opcode_writer.h +++ b/compiler/dwarf/debug_frame_opcode_writer.h @@ -282,7 +282,12 @@ class DebugFrameOpCodeWriter : private Writer { bool IsEnabled() const { return enabled_; } - void SetEnabled(bool value) { enabled_ = value; } + void SetEnabled(bool value) { + enabled_ = value; + if (enabled_ && opcodes_.capacity() == 0u) { + opcodes_.reserve(kDefaultCapacity); + } + } int GetCurrentPC() const { return current_pc_; } @@ -292,24 +297,24 @@ class DebugFrameOpCodeWriter : private Writer { using Writer::data; - DebugFrameOpCodeWriter(bool enabled = true, - const typename Vector::allocator_type& alloc = - typename Vector::allocator_type()) + explicit DebugFrameOpCodeWriter(bool enabled = true, + const typename Vector::allocator_type& alloc = + typename Vector::allocator_type()) : Writer(&opcodes_), - enabled_(enabled), + enabled_(false), opcodes_(alloc), current_cfa_offset_(0), current_pc_(0), uses_dwarf3_features_(false) { - if (enabled) { - // Best guess based on couple of observed outputs. - opcodes_.reserve(16); - } + SetEnabled(enabled); } virtual ~DebugFrameOpCodeWriter() { } protected: + // Best guess based on couple of observed outputs. + static constexpr size_t kDefaultCapacity = 32u; + int FactorDataOffset(int offset) const { DCHECK_EQ(offset % kDataAlignmentFactor, 0); return offset / kDataAlignmentFactor; diff --git a/compiler/jni/jni_cfi_test.cc b/compiler/jni/jni_cfi_test.cc index 0bfe8a276..8832c84ac 100644 --- a/compiler/jni/jni_cfi_test.cc +++ b/compiler/jni/jni_cfi_test.cc @@ -51,6 +51,7 @@ class JNICFITest : public CFITest { // Assemble the method. std::unique_ptr jni_asm(Assembler::Create(isa)); + jni_asm->cfi().SetEnabled(true); jni_asm->BuildFrame(frame_size, mr_conv->MethodRegister(), callee_save_regs, mr_conv->EntrySpills()); jni_asm->IncreaseFrameSize(32); diff --git a/compiler/optimizing/optimizing_cfi_test.cc b/compiler/optimizing/optimizing_cfi_test.cc index 05c6b2ca5..34f1fe594 100644 --- a/compiler/optimizing/optimizing_cfi_test.cc +++ b/compiler/optimizing/optimizing_cfi_test.cc @@ -23,6 +23,8 @@ #include "optimizing/code_generator.h" #include "optimizing/optimizing_unit_test.h" #include "utils/assembler.h" +#include "utils/arm/assembler_thumb2.h" +#include "utils/mips/assembler_mips.h" #include "optimizing/optimizing_cfi_test_expected.inc" @@ -36,52 +38,62 @@ class OptimizingCFITest : public CFITest { // Enable this flag to generate the expected outputs. static constexpr bool kGenerateExpected = false; - void TestImpl(InstructionSet isa, const char* isa_str, - const std::vector& expected_asm, - const std::vector& expected_cfi) { + OptimizingCFITest() + : pool_(), + allocator_(&pool_), + opts_(), + isa_features_(), + graph_(nullptr), + code_gen_(), + blocks_(allocator_.Adapter()) {} + + void SetUpFrame(InstructionSet isa) { // Setup simple context. - ArenaPool pool; - ArenaAllocator allocator(&pool); - CompilerOptions opts; - std::unique_ptr isa_features; std::string error; - isa_features.reset(InstructionSetFeatures::FromVariant(isa, "default", &error)); - HGraph* graph = CreateGraph(&allocator); + isa_features_.reset(InstructionSetFeatures::FromVariant(isa, "default", &error)); + graph_ = CreateGraph(&allocator_); // Generate simple frame with some spills. - std::unique_ptr code_gen( - CodeGenerator::Create(graph, isa, *isa_features.get(), opts)); + code_gen_.reset(CodeGenerator::Create(graph_, isa, *isa_features_, opts_)); + code_gen_->GetAssembler()->cfi().SetEnabled(true); const int frame_size = 64; int core_reg = 0; int fp_reg = 0; for (int i = 0; i < 2; i++) { // Two registers of each kind. for (; core_reg < 32; core_reg++) { - if (code_gen->IsCoreCalleeSaveRegister(core_reg)) { + if (code_gen_->IsCoreCalleeSaveRegister(core_reg)) { auto location = Location::RegisterLocation(core_reg); - code_gen->AddAllocatedRegister(location); + code_gen_->AddAllocatedRegister(location); core_reg++; break; } } for (; fp_reg < 32; fp_reg++) { - if (code_gen->IsFloatingPointCalleeSaveRegister(fp_reg)) { + if (code_gen_->IsFloatingPointCalleeSaveRegister(fp_reg)) { auto location = Location::FpuRegisterLocation(fp_reg); - code_gen->AddAllocatedRegister(location); + code_gen_->AddAllocatedRegister(location); fp_reg++; break; } } } - ArenaVector blocks(allocator.Adapter()); - code_gen->block_order_ = &blocks; - code_gen->ComputeSpillMask(); - code_gen->SetFrameSize(frame_size); - code_gen->GenerateFrameEntry(); - code_gen->GenerateFrameExit(); + code_gen_->block_order_ = &blocks_; + code_gen_->ComputeSpillMask(); + code_gen_->SetFrameSize(frame_size); + code_gen_->GenerateFrameEntry(); + } + + void Finish() { + code_gen_->GenerateFrameExit(); + code_gen_->Finalize(&code_allocator_); + } + + void Check(InstructionSet isa, + const char* isa_str, + const std::vector& expected_asm, + const std::vector& expected_cfi) { // Get the outputs. - InternalCodeAllocator code_allocator; - code_gen->Finalize(&code_allocator); - const std::vector& actual_asm = code_allocator.GetMemory(); - Assembler* opt_asm = code_gen->GetAssembler(); + const std::vector& actual_asm = code_allocator_.GetMemory(); + Assembler* opt_asm = code_gen_->GetAssembler(); const std::vector& actual_cfi = *(opt_asm->cfi().data()); if (kGenerateExpected) { @@ -92,6 +104,19 @@ class OptimizingCFITest : public CFITest { } } + void TestImpl(InstructionSet isa, const char* + isa_str, + const std::vector& expected_asm, + const std::vector& expected_cfi) { + SetUpFrame(isa); + Finish(); + Check(isa, isa_str, expected_asm, expected_cfi); + } + + CodeGenerator* GetCodeGenerator() { + return code_gen_.get(); + } + private: class InternalCodeAllocator : public CodeAllocator { public: @@ -109,21 +134,83 @@ class OptimizingCFITest : public CFITest { DISALLOW_COPY_AND_ASSIGN(InternalCodeAllocator); }; + + ArenaPool pool_; + ArenaAllocator allocator_; + CompilerOptions opts_; + std::unique_ptr isa_features_; + HGraph* graph_; + std::unique_ptr code_gen_; + ArenaVector blocks_; + InternalCodeAllocator code_allocator_; }; -#define TEST_ISA(isa) \ - TEST_F(OptimizingCFITest, isa) { \ - std::vector expected_asm(expected_asm_##isa, \ - expected_asm_##isa + arraysize(expected_asm_##isa)); \ - std::vector expected_cfi(expected_cfi_##isa, \ - expected_cfi_##isa + arraysize(expected_cfi_##isa)); \ - TestImpl(isa, #isa, expected_asm, expected_cfi); \ +#define TEST_ISA(isa) \ + TEST_F(OptimizingCFITest, isa) { \ + std::vector expected_asm( \ + expected_asm_##isa, \ + expected_asm_##isa + arraysize(expected_asm_##isa)); \ + std::vector expected_cfi( \ + expected_cfi_##isa, \ + expected_cfi_##isa + arraysize(expected_cfi_##isa)); \ + TestImpl(isa, #isa, expected_asm, expected_cfi); \ } TEST_ISA(kThumb2) TEST_ISA(kArm64) TEST_ISA(kX86) TEST_ISA(kX86_64) +TEST_ISA(kMips) +TEST_ISA(kMips64) + +TEST_F(OptimizingCFITest, kThumb2Adjust) { + std::vector expected_asm( + expected_asm_kThumb2_adjust, + expected_asm_kThumb2_adjust + arraysize(expected_asm_kThumb2_adjust)); + std::vector expected_cfi( + expected_cfi_kThumb2_adjust, + expected_cfi_kThumb2_adjust + arraysize(expected_cfi_kThumb2_adjust)); + SetUpFrame(kThumb2); +#define __ down_cast(GetCodeGenerator()->GetAssembler())-> + Label target; + __ CompareAndBranchIfZero(arm::R0, &target); + // Push the target out of range of CBZ. + for (size_t i = 0; i != 65; ++i) { + __ ldr(arm::R0, arm::Address(arm::R0)); + } + __ Bind(&target); +#undef __ + Finish(); + Check(kThumb2, "kThumb2_adjust", expected_asm, expected_cfi); +} + +TEST_F(OptimizingCFITest, kMipsAdjust) { + // One NOP in delay slot, 1 << 15 NOPS have size 1 << 17 which exceeds 18-bit signed maximum. + static constexpr size_t kNumNops = 1u + (1u << 15); + std::vector expected_asm( + expected_asm_kMips_adjust_head, + expected_asm_kMips_adjust_head + arraysize(expected_asm_kMips_adjust_head)); + expected_asm.resize(expected_asm.size() + kNumNops * 4u, 0u); + expected_asm.insert( + expected_asm.end(), + expected_asm_kMips_adjust_tail, + expected_asm_kMips_adjust_tail + arraysize(expected_asm_kMips_adjust_tail)); + std::vector expected_cfi( + expected_cfi_kMips_adjust, + expected_cfi_kMips_adjust + arraysize(expected_cfi_kMips_adjust)); + SetUpFrame(kMips); +#define __ down_cast(GetCodeGenerator()->GetAssembler())-> + mips::MipsLabel target; + __ Beqz(mips::A0, &target); + // Push the target out of range of BEQZ. + for (size_t i = 0; i != kNumNops; ++i) { + __ Nop(); + } + __ Bind(&target); +#undef __ + Finish(); + Check(kMips, "kMips_adjust", expected_asm, expected_cfi); +} #endif // __ANDROID__ diff --git a/compiler/optimizing/optimizing_cfi_test_expected.inc b/compiler/optimizing/optimizing_cfi_test_expected.inc index 2c2c55f29..4571ebf2d 100644 --- a/compiler/optimizing/optimizing_cfi_test_expected.inc +++ b/compiler/optimizing/optimizing_cfi_test_expected.inc @@ -138,3 +138,278 @@ static constexpr uint8_t expected_cfi_kX86_64[] = { // 0x0000002c: ret // 0x0000002d: .cfi_restore_state // 0x0000002d: .cfi_def_cfa_offset: 64 + +static constexpr uint8_t expected_asm_kMips[] = { + 0xE4, 0xFF, 0xBD, 0x27, 0x18, 0x00, 0xBF, 0xAF, 0x14, 0x00, 0xB1, 0xAF, + 0x10, 0x00, 0xB0, 0xAF, 0x08, 0x00, 0xB6, 0xE7, 0x0C, 0x00, 0xB7, 0xE7, + 0x00, 0x00, 0xB4, 0xE7, 0x04, 0x00, 0xB5, 0xE7, 0xDC, 0xFF, 0xBD, 0x27, + 0x00, 0x00, 0xA4, 0xAF, 0x24, 0x00, 0xBD, 0x27, 0x00, 0x00, 0xB4, 0xC7, + 0x04, 0x00, 0xB5, 0xC7, 0x08, 0x00, 0xB6, 0xC7, 0x0C, 0x00, 0xB7, 0xC7, + 0x10, 0x00, 0xB0, 0x8F, 0x14, 0x00, 0xB1, 0x8F, 0x18, 0x00, 0xBF, 0x8F, + 0x1C, 0x00, 0xBD, 0x27, 0x09, 0x00, 0xE0, 0x03, 0x00, 0x00, 0x00, 0x00, +}; +static constexpr uint8_t expected_cfi_kMips[] = { + 0x44, 0x0E, 0x1C, 0x44, 0x9F, 0x01, 0x44, 0x91, 0x02, 0x44, 0x90, 0x03, + 0x54, 0x0E, 0x40, 0x44, 0x0A, 0x44, 0x0E, 0x1C, 0x54, 0xD0, 0x44, 0xD1, + 0x44, 0xDF, 0x44, 0x0E, 0x00, 0x48, 0x0B, 0x0E, 0x40, +}; +// 0x00000000: addiu r29, r29, -28 +// 0x00000004: .cfi_def_cfa_offset: 28 +// 0x00000004: sw r31, +24(r29) +// 0x00000008: .cfi_offset: r31 at cfa-4 +// 0x00000008: sw r17, +20(r29) +// 0x0000000c: .cfi_offset: r17 at cfa-8 +// 0x0000000c: sw r16, +16(r29) +// 0x00000010: .cfi_offset: r16 at cfa-12 +// 0x00000010: swc1 f22, +8(r29) +// 0x00000014: swc1 f23, +12(r29) +// 0x00000018: swc1 f20, +0(r29) +// 0x0000001c: swc1 f21, +4(r29) +// 0x00000020: addiu r29, r29, -36 +// 0x00000024: .cfi_def_cfa_offset: 64 +// 0x00000024: sw r4, +0(r29) +// 0x00000028: .cfi_remember_state +// 0x00000028: addiu r29, r29, 36 +// 0x0000002c: .cfi_def_cfa_offset: 28 +// 0x0000002c: lwc1 f20, +0(r29) +// 0x00000030: lwc1 f21, +4(r29) +// 0x00000034: lwc1 f22, +8(r29) +// 0x00000038: lwc1 f23, +12(r29) +// 0x0000003c: lw r16, +16(r29) +// 0x00000040: .cfi_restore: r16 +// 0x00000040: lw r17, +20(r29) +// 0x00000044: .cfi_restore: r17 +// 0x00000044: lw r31, +24(r29) +// 0x00000048: .cfi_restore: r31 +// 0x00000048: addiu r29, r29, 28 +// 0x0000004c: .cfi_def_cfa_offset: 0 +// 0x0000004c: jr r31 +// 0x00000050: nop +// 0x00000054: .cfi_restore_state +// 0x00000054: .cfi_def_cfa_offset: 64 + +static constexpr uint8_t expected_asm_kMips64[] = { + 0xD8, 0xFF, 0xBD, 0x67, 0x20, 0x00, 0xBF, 0xFF, 0x18, 0x00, 0xB1, 0xFF, + 0x10, 0x00, 0xB0, 0xFF, 0x08, 0x00, 0xB9, 0xF7, 0x00, 0x00, 0xB8, 0xF7, + 0xE8, 0xFF, 0xBD, 0x67, 0x00, 0x00, 0xA4, 0xFF, 0x18, 0x00, 0xBD, 0x67, + 0x00, 0x00, 0xB8, 0xD7, 0x08, 0x00, 0xB9, 0xD7, 0x10, 0x00, 0xB0, 0xDF, + 0x18, 0x00, 0xB1, 0xDF, 0x20, 0x00, 0xBF, 0xDF, 0x28, 0x00, 0xBD, 0x67, + 0x09, 0x00, 0xE0, 0x03, 0x00, 0x00, 0x00, 0x00, +}; +static constexpr uint8_t expected_cfi_kMips64[] = { + 0x44, 0x0E, 0x28, 0x44, 0x9F, 0x02, 0x44, 0x91, 0x04, 0x44, 0x90, 0x06, + 0x4C, 0x0E, 0x40, 0x44, 0x0A, 0x44, 0x0E, 0x28, 0x4C, 0xD0, 0x44, 0xD1, + 0x44, 0xDF, 0x44, 0x0E, 0x00, 0x48, 0x0B, 0x0E, 0x40, +}; +// 0x00000000: daddiu r29, r29, -40 +// 0x00000004: .cfi_def_cfa_offset: 40 +// 0x00000004: sd r31, +32(r29) +// 0x00000008: .cfi_offset: r31 at cfa-8 +// 0x00000008: sd r17, +24(r29) +// 0x0000000c: .cfi_offset: r17 at cfa-16 +// 0x0000000c: sd r16, +16(r29) +// 0x00000010: .cfi_offset: r16 at cfa-24 +// 0x00000010: sdc1 f25, +8(r29) +// 0x00000014: sdc1 f24, +0(r29) +// 0x00000018: daddiu r29, r29, -24 +// 0x0000001c: .cfi_def_cfa_offset: 64 +// 0x0000001c: sd r4, +0(r29) +// 0x00000020: .cfi_remember_state +// 0x00000020: daddiu r29, r29, 24 +// 0x00000024: .cfi_def_cfa_offset: 40 +// 0x00000024: ldc1 f24, +0(r29) +// 0x00000028: ldc1 f25, +8(r29) +// 0x0000002c: ld r16, +16(r29) +// 0x00000030: .cfi_restore: r16 +// 0x00000030: ld r17, +24(r29) +// 0x00000034: .cfi_restore: r17 +// 0x00000034: ld r31, +32(r29) +// 0x00000038: .cfi_restore: r31 +// 0x00000038: daddiu r29, r29, 40 +// 0x0000003c: .cfi_def_cfa_offset: 0 +// 0x0000003c: jr r31 +// 0x00000040: nop +// 0x00000044: .cfi_restore_state +// 0x00000044: .cfi_def_cfa_offset: 64 + +static constexpr uint8_t expected_asm_kThumb2_adjust[] = { + 0x60, 0xB5, 0x2D, 0xED, 0x02, 0x8A, 0x8B, 0xB0, 0x00, 0x90, 0x00, 0x28, + 0x40, 0xD0, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, + 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, + 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, + 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, + 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, + 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, + 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, + 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, + 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, + 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, + 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, + 0x0B, 0xB0, 0xBD, 0xEC, 0x02, 0x8A, 0x60, 0xBD, +}; +static constexpr uint8_t expected_cfi_kThumb2_adjust[] = { + 0x42, 0x0E, 0x0C, 0x85, 0x03, 0x86, 0x02, 0x8E, 0x01, 0x44, 0x0E, 0x14, + 0x05, 0x50, 0x05, 0x05, 0x51, 0x04, 0x42, 0x0E, 0x40, 0x02, 0x88, 0x0A, + 0x42, 0x0E, 0x14, 0x44, 0x0E, 0x0C, 0x06, 0x50, 0x06, 0x51, 0x42, 0x0B, + 0x0E, 0x40, +}; +// 0x00000000: push {r5, r6, lr} +// 0x00000002: .cfi_def_cfa_offset: 12 +// 0x00000002: .cfi_offset: r5 at cfa-12 +// 0x00000002: .cfi_offset: r6 at cfa-8 +// 0x00000002: .cfi_offset: r14 at cfa-4 +// 0x00000002: vpush.f32 {s16-s17} +// 0x00000006: .cfi_def_cfa_offset: 20 +// 0x00000006: .cfi_offset_extended: r80 at cfa-20 +// 0x00000006: .cfi_offset_extended: r81 at cfa-16 +// 0x00000006: sub sp, sp, #44 +// 0x00000008: .cfi_def_cfa_offset: 64 +// 0x00000008: str r0, [sp, #0] +// 0x0000000a: cmp r0, #0 +// 0x0000000c: beq +128 (0x00000090) +// 0x0000000e: ldr r0, [r0, #0] +// 0x00000010: ldr r0, [r0, #0] +// 0x00000012: ldr r0, [r0, #0] +// 0x00000014: ldr r0, [r0, #0] +// 0x00000016: ldr r0, [r0, #0] +// 0x00000018: ldr r0, [r0, #0] +// 0x0000001a: ldr r0, [r0, #0] +// 0x0000001c: ldr r0, [r0, #0] +// 0x0000001e: ldr r0, [r0, #0] +// 0x00000020: ldr r0, [r0, #0] +// 0x00000022: ldr r0, [r0, #0] +// 0x00000024: ldr r0, [r0, #0] +// 0x00000026: ldr r0, [r0, #0] +// 0x00000028: ldr r0, [r0, #0] +// 0x0000002a: ldr r0, [r0, #0] +// 0x0000002c: ldr r0, [r0, #0] +// 0x0000002e: ldr r0, [r0, #0] +// 0x00000030: ldr r0, [r0, #0] +// 0x00000032: ldr r0, [r0, #0] +// 0x00000034: ldr r0, [r0, #0] +// 0x00000036: ldr r0, [r0, #0] +// 0x00000038: ldr r0, [r0, #0] +// 0x0000003a: ldr r0, [r0, #0] +// 0x0000003c: ldr r0, [r0, #0] +// 0x0000003e: ldr r0, [r0, #0] +// 0x00000040: ldr r0, [r0, #0] +// 0x00000042: ldr r0, [r0, #0] +// 0x00000044: ldr r0, [r0, #0] +// 0x00000046: ldr r0, [r0, #0] +// 0x00000048: ldr r0, [r0, #0] +// 0x0000004a: ldr r0, [r0, #0] +// 0x0000004c: ldr r0, [r0, #0] +// 0x0000004e: ldr r0, [r0, #0] +// 0x00000050: ldr r0, [r0, #0] +// 0x00000052: ldr r0, [r0, #0] +// 0x00000054: ldr r0, [r0, #0] +// 0x00000056: ldr r0, [r0, #0] +// 0x00000058: ldr r0, [r0, #0] +// 0x0000005a: ldr r0, [r0, #0] +// 0x0000005c: ldr r0, [r0, #0] +// 0x0000005e: ldr r0, [r0, #0] +// 0x00000060: ldr r0, [r0, #0] +// 0x00000062: ldr r0, [r0, #0] +// 0x00000064: ldr r0, [r0, #0] +// 0x00000066: ldr r0, [r0, #0] +// 0x00000068: ldr r0, [r0, #0] +// 0x0000006a: ldr r0, [r0, #0] +// 0x0000006c: ldr r0, [r0, #0] +// 0x0000006e: ldr r0, [r0, #0] +// 0x00000070: ldr r0, [r0, #0] +// 0x00000072: ldr r0, [r0, #0] +// 0x00000074: ldr r0, [r0, #0] +// 0x00000076: ldr r0, [r0, #0] +// 0x00000078: ldr r0, [r0, #0] +// 0x0000007a: ldr r0, [r0, #0] +// 0x0000007c: ldr r0, [r0, #0] +// 0x0000007e: ldr r0, [r0, #0] +// 0x00000080: ldr r0, [r0, #0] +// 0x00000082: ldr r0, [r0, #0] +// 0x00000084: ldr r0, [r0, #0] +// 0x00000086: ldr r0, [r0, #0] +// 0x00000088: ldr r0, [r0, #0] +// 0x0000008a: ldr r0, [r0, #0] +// 0x0000008c: ldr r0, [r0, #0] +// 0x0000008e: ldr r0, [r0, #0] +// 0x00000090: .cfi_remember_state +// 0x00000090: add sp, sp, #44 +// 0x00000092: .cfi_def_cfa_offset: 20 +// 0x00000092: vpop.f32 {s16-s17} +// 0x00000096: .cfi_def_cfa_offset: 12 +// 0x00000096: .cfi_restore_extended: r80 +// 0x00000096: .cfi_restore_extended: r81 +// 0x00000096: pop {r5, r6, pc} +// 0x00000098: .cfi_restore_state +// 0x00000098: .cfi_def_cfa_offset: 64 + +static constexpr uint8_t expected_asm_kMips_adjust_head[] = { + 0xE4, 0xFF, 0xBD, 0x27, 0x18, 0x00, 0xBF, 0xAF, 0x14, 0x00, 0xB1, 0xAF, + 0x10, 0x00, 0xB0, 0xAF, 0x08, 0x00, 0xB6, 0xE7, 0x0C, 0x00, 0xB7, 0xE7, + 0x00, 0x00, 0xB4, 0xE7, 0x04, 0x00, 0xB5, 0xE7, 0xDC, 0xFF, 0xBD, 0x27, + 0x00, 0x00, 0xA4, 0xAF, 0x08, 0x00, 0x04, 0x14, 0xFC, 0xFF, 0xBD, 0x27, + 0x00, 0x00, 0xBF, 0xAF, 0x00, 0x00, 0x10, 0x04, 0x02, 0x00, 0x01, 0x3C, + 0x18, 0x00, 0x21, 0x34, 0x21, 0x08, 0x3F, 0x00, 0x00, 0x00, 0xBF, 0x8F, + 0x09, 0x00, 0x20, 0x00, 0x04, 0x00, 0xBD, 0x27, +}; +static constexpr uint8_t expected_asm_kMips_adjust_tail[] = { + 0x24, 0x00, 0xBD, 0x27, 0x00, 0x00, 0xB4, 0xC7, 0x04, 0x00, 0xB5, 0xC7, + 0x08, 0x00, 0xB6, 0xC7, 0x0C, 0x00, 0xB7, 0xC7, 0x10, 0x00, 0xB0, 0x8F, + 0x14, 0x00, 0xB1, 0x8F, 0x18, 0x00, 0xBF, 0x8F, 0x1C, 0x00, 0xBD, 0x27, + 0x09, 0x00, 0xE0, 0x03, 0x00, 0x00, 0x00, 0x00, +}; +static constexpr uint8_t expected_cfi_kMips_adjust[] = { + 0x44, 0x0E, 0x1C, 0x44, 0x9F, 0x01, 0x44, 0x91, 0x02, 0x44, 0x90, 0x03, + 0x54, 0x0E, 0x40, 0x4C, 0x0E, 0x44, 0x60, 0x0E, 0x40, 0x04, 0x04, 0x00, + 0x02, 0x00, 0x0A, 0x44, 0x0E, 0x1C, 0x54, 0xD0, 0x44, 0xD1, 0x44, 0xDF, + 0x44, 0x0E, 0x00, 0x48, 0x0B, 0x0E, 0x40, +}; +// 0x00000000: addiu r29, r29, -28 +// 0x00000004: .cfi_def_cfa_offset: 28 +// 0x00000004: sw r31, +24(r29) +// 0x00000008: .cfi_offset: r31 at cfa-4 +// 0x00000008: sw r17, +20(r29) +// 0x0000000c: .cfi_offset: r17 at cfa-8 +// 0x0000000c: sw r16, +16(r29) +// 0x00000010: .cfi_offset: r16 at cfa-12 +// 0x00000010: swc1 f22, +8(r29) +// 0x00000014: swc1 f23, +12(r29) +// 0x00000018: swc1 f20, +0(r29) +// 0x0000001c: swc1 f21, +4(r29) +// 0x00000020: addiu r29, r29, -36 +// 0x00000024: .cfi_def_cfa_offset: 64 +// 0x00000024: sw r4, +0(r29) +// 0x00000028: bne r0, r4, 0x0000004c ; +36 +// 0x0000002c: addiu r29, r29, -4 +// 0x00000030: .cfi_def_cfa_offset: 68 +// 0x00000030: sw r31, +0(r29) +// 0x00000034: bltzal r0, 0x00000038 ; +4 +// 0x00000038: lui r1, 0x20000 +// 0x0000003c: ori r1, r1, 24 +// 0x00000040: addu r1, r1, r31 +// 0x00000044: lw r31, +0(r29) +// 0x00000048: jr r1 +// 0x0000004c: addiu r29, r29, 4 +// 0x00000050: .cfi_def_cfa_offset: 64 +// 0x00000050: nop +// ... +// 0x00020050: nop +// 0x00020054: .cfi_remember_state +// 0x00020054: addiu r29, r29, 36 +// 0x00020058: .cfi_def_cfa_offset: 28 +// 0x00020058: lwc1 f20, +0(r29) +// 0x0002005c: lwc1 f21, +4(r29) +// 0x00020060: lwc1 f22, +8(r29) +// 0x00020064: lwc1 f23, +12(r29) +// 0x00020068: lw r16, +16(r29) +// 0x0002006c: .cfi_restore: r16 +// 0x0002006c: lw r17, +20(r29) +// 0x00020070: .cfi_restore: r17 +// 0x00020070: lw r31, +24(r29) +// 0x00020074: .cfi_restore: r31 +// 0x00020074: addiu r29, r29, 28 +// 0x00020078: .cfi_def_cfa_offset: 0 +// 0x00020078: jr r31 +// 0x0002007c: nop +// 0x00020080: .cfi_restore_state +// 0x00020080: .cfi_def_cfa_offset: 64 diff --git a/compiler/utils/arm/assembler_thumb2.cc b/compiler/utils/arm/assembler_thumb2.cc index fb3aa1ea8..297cc54e2 100644 --- a/compiler/utils/arm/assembler_thumb2.cc +++ b/compiler/utils/arm/assembler_thumb2.cc @@ -282,6 +282,32 @@ void Thumb2Assembler::EmitJumpTables() { } } +void Thumb2Assembler::PatchCFI() { + if (cfi().NumberOfDelayedAdvancePCs() == 0u) { + return; + } + + typedef DebugFrameOpCodeWriterForAssembler::DelayedAdvancePC DelayedAdvancePC; + const auto data = cfi().ReleaseStreamAndPrepareForDelayedAdvancePC(); + const std::vector& old_stream = data.first; + const std::vector& advances = data.second; + + // Refill our data buffer with patched opcodes. + cfi().ReserveCFIStream(old_stream.size() + advances.size() + 16); + size_t stream_pos = 0; + for (const DelayedAdvancePC& advance : advances) { + DCHECK_GE(advance.stream_pos, stream_pos); + // Copy old data up to the point where advance was issued. + cfi().AppendRawData(old_stream, stream_pos, advance.stream_pos); + stream_pos = advance.stream_pos; + // Insert the advance command with its final offset. + size_t final_pc = GetAdjustedPosition(advance.pc); + cfi().AdvancePC(final_pc); + } + // Copy the final segment if any. + cfi().AppendRawData(old_stream, stream_pos, old_stream.size()); +} + inline int16_t Thumb2Assembler::BEncoding16(int32_t offset, Condition cond) { DCHECK_ALIGNED(offset, 2); int16_t encoding = B15 | B14; @@ -463,6 +489,7 @@ void Thumb2Assembler::FinalizeCode() { EmitLiterals(); FinalizeTrackedLabels(); EmitJumpTables(); + PatchCFI(); } bool Thumb2Assembler::ShifterOperandCanAlwaysHold(uint32_t immediate) { diff --git a/compiler/utils/arm/assembler_thumb2.h b/compiler/utils/arm/assembler_thumb2.h index 38fd24408..e18361300 100644 --- a/compiler/utils/arm/assembler_thumb2.h +++ b/compiler/utils/arm/assembler_thumb2.h @@ -44,6 +44,7 @@ class Thumb2Assembler FINAL : public ArmAssembler { last_position_adjustment_(0u), last_old_position_(0u), last_fixup_id_(0u) { + cfi().DelayEmittingAdvancePCs(); } virtual ~Thumb2Assembler() { @@ -792,6 +793,7 @@ class Thumb2Assembler FINAL : public ArmAssembler { void EmitFixups(uint32_t adjusted_code_size); void EmitLiterals(); void EmitJumpTables(); + void PatchCFI(); static int16_t BEncoding16(int32_t offset, Condition cond); static int32_t BEncoding32(int32_t offset, Condition cond); diff --git a/compiler/utils/assembler.cc b/compiler/utils/assembler.cc index b01b0fe4e..f784d2c3f 100644 --- a/compiler/utils/assembler.cc +++ b/compiler/utils/assembler.cc @@ -38,6 +38,7 @@ #ifdef ART_ENABLE_CODEGEN_x86_64 #include "x86_64/assembler_x86_64.h" #endif +#include "base/casts.h" #include "globals.h" #include "memory_region.h" @@ -119,7 +120,13 @@ void AssemblerBuffer::ExtendCapacity(size_t min_capacity) { } void DebugFrameOpCodeWriterForAssembler::ImplicitlyAdvancePC() { - this->AdvancePC(assembler_->CodeSize()); + uint32_t pc = dchecked_integral_cast(assembler_->CodeSize()); + if (delay_emitting_advance_pc_) { + uint32_t stream_pos = dchecked_integral_cast(opcodes_.size()); + delayed_advance_pcs_.push_back(DelayedAdvancePC {stream_pos, pc}); + } else { + AdvancePC(pc); + } } Assembler* Assembler::Create(InstructionSet instruction_set, diff --git a/compiler/utils/assembler.h b/compiler/utils/assembler.h index dfe6babb2..1dbc14239 100644 --- a/compiler/utils/assembler.h +++ b/compiler/utils/assembler.h @@ -271,16 +271,71 @@ class AssemblerBuffer { class DebugFrameOpCodeWriterForAssembler FINAL : public dwarf::DebugFrameOpCodeWriter<> { public: + struct DelayedAdvancePC { + uint32_t stream_pos; + uint32_t pc; + }; + // This method is called the by the opcode writers. virtual void ImplicitlyAdvancePC() FINAL; explicit DebugFrameOpCodeWriterForAssembler(Assembler* buffer) - : dwarf::DebugFrameOpCodeWriter<>(), - assembler_(buffer) { + : dwarf::DebugFrameOpCodeWriter<>(false /* enabled */), + assembler_(buffer), + delay_emitting_advance_pc_(false), + delayed_advance_pcs_() { + } + + ~DebugFrameOpCodeWriterForAssembler() { + DCHECK(delayed_advance_pcs_.empty()); + } + + // Tell the writer to delay emitting advance PC info. + // The assembler must explicitly process all the delayed advances. + void DelayEmittingAdvancePCs() { + delay_emitting_advance_pc_ = true; + } + + // Override the last delayed PC. The new PC can be out of order. + void OverrideDelayedPC(size_t pc) { + DCHECK(delay_emitting_advance_pc_); + DCHECK(!delayed_advance_pcs_.empty()); + delayed_advance_pcs_.back().pc = pc; + } + + // Return the number of delayed advance PC entries. + size_t NumberOfDelayedAdvancePCs() const { + return delayed_advance_pcs_.size(); + } + + // Release the CFI stream and advance PC infos so that the assembler can patch it. + std::pair, std::vector> + ReleaseStreamAndPrepareForDelayedAdvancePC() { + DCHECK(delay_emitting_advance_pc_); + delay_emitting_advance_pc_ = false; + std::pair, std::vector> result; + result.first.swap(opcodes_); + result.second.swap(delayed_advance_pcs_); + return result; + } + + // Reserve space for the CFI stream. + void ReserveCFIStream(size_t capacity) { + opcodes_.reserve(capacity); + } + + // Append raw data to the CFI stream. + void AppendRawData(const std::vector& raw_data, size_t first, size_t last) { + DCHECK_LE(0u, first); + DCHECK_LE(first, last); + DCHECK_LE(last, raw_data.size()); + opcodes_.insert(opcodes_.end(), raw_data.begin() + first, raw_data.begin() + last); } private: Assembler* assembler_; + bool delay_emitting_advance_pc_; + std::vector delayed_advance_pcs_; }; class Assembler { diff --git a/compiler/utils/mips/assembler_mips.cc b/compiler/utils/mips/assembler_mips.cc index 6f35e9ef5..aee64120a 100644 --- a/compiler/utils/mips/assembler_mips.cc +++ b/compiler/utils/mips/assembler_mips.cc @@ -43,8 +43,60 @@ void MipsAssembler::FinalizeCode() { } void MipsAssembler::FinalizeInstructions(const MemoryRegion& region) { + size_t number_of_delayed_adjust_pcs = cfi().NumberOfDelayedAdvancePCs(); EmitBranches(); Assembler::FinalizeInstructions(region); + PatchCFI(number_of_delayed_adjust_pcs); +} + +void MipsAssembler::PatchCFI(size_t number_of_delayed_adjust_pcs) { + if (cfi().NumberOfDelayedAdvancePCs() == 0u) { + DCHECK_EQ(number_of_delayed_adjust_pcs, 0u); + return; + } + + typedef DebugFrameOpCodeWriterForAssembler::DelayedAdvancePC DelayedAdvancePC; + const auto data = cfi().ReleaseStreamAndPrepareForDelayedAdvancePC(); + const std::vector& old_stream = data.first; + const std::vector& advances = data.second; + + // PCs recorded before EmitBranches() need to be adjusted. + // PCs recorded during EmitBranches() are already adjusted. + // Both ranges are separately sorted but they may overlap. + if (kIsDebugBuild) { + auto cmp = [](const DelayedAdvancePC& lhs, const DelayedAdvancePC& rhs) { + return lhs.pc < rhs.pc; + }; + CHECK(std::is_sorted(advances.begin(), advances.begin() + number_of_delayed_adjust_pcs, cmp)); + CHECK(std::is_sorted(advances.begin() + number_of_delayed_adjust_pcs, advances.end(), cmp)); + } + + // Append initial CFI data if any. + size_t size = advances.size(); + DCHECK_NE(size, 0u); + cfi().AppendRawData(old_stream, 0u, advances[0].stream_pos); + // Emit PC adjustments interleaved with the old CFI stream. + size_t adjust_pos = 0u; + size_t late_emit_pos = number_of_delayed_adjust_pcs; + while (adjust_pos != number_of_delayed_adjust_pcs || late_emit_pos != size) { + size_t adjusted_pc = (adjust_pos != number_of_delayed_adjust_pcs) + ? GetAdjustedPosition(advances[adjust_pos].pc) + : static_cast(-1); + size_t late_emit_pc = (late_emit_pos != size) + ? advances[late_emit_pos].pc + : static_cast(-1); + size_t advance_pc = std::min(adjusted_pc, late_emit_pc); + DCHECK_NE(advance_pc, static_cast(-1)); + size_t entry = (adjusted_pc <= late_emit_pc) ? adjust_pos : late_emit_pos; + if (adjusted_pc <= late_emit_pc) { + ++adjust_pos; + } else { + ++late_emit_pos; + } + cfi().AdvancePC(advance_pc); + size_t end_pos = (entry + 1u == size) ? old_stream.size() : advances[entry + 1u].stream_pos; + cfi().AppendRawData(old_stream, advances[entry].stream_pos, end_pos); + } } void MipsAssembler::EmitBranches() { @@ -1770,6 +1822,7 @@ void MipsAssembler::BuildFrame(size_t frame_size, ManagedRegister method_reg, const std::vector& callee_save_regs, const ManagedRegisterEntrySpills& entry_spills) { CHECK_ALIGNED(frame_size, kStackAlignment); + DCHECK(!overwriting_); // Increase frame to required size. IncreaseFrameSize(frame_size); @@ -1811,6 +1864,7 @@ void MipsAssembler::BuildFrame(size_t frame_size, ManagedRegister method_reg, void MipsAssembler::RemoveFrame(size_t frame_size, const std::vector& callee_save_regs) { CHECK_ALIGNED(frame_size, kStackAlignment); + DCHECK(!overwriting_); cfi_.RememberState(); // Pop callee saves and return address. @@ -1840,12 +1894,18 @@ void MipsAssembler::IncreaseFrameSize(size_t adjust) { CHECK_ALIGNED(adjust, kFramePointerSize); Addiu32(SP, SP, -adjust); cfi_.AdjustCFAOffset(adjust); + if (overwriting_) { + cfi_.OverrideDelayedPC(overwrite_location_); + } } void MipsAssembler::DecreaseFrameSize(size_t adjust) { CHECK_ALIGNED(adjust, kFramePointerSize); Addiu32(SP, SP, adjust); cfi_.AdjustCFAOffset(-adjust); + if (overwriting_) { + cfi_.OverrideDelayedPC(overwrite_location_); + } } void MipsAssembler::Store(FrameOffset dest, ManagedRegister msrc, size_t size) { diff --git a/compiler/utils/mips/assembler_mips.h b/compiler/utils/mips/assembler_mips.h index aa187b812..4038c1f1c 100644 --- a/compiler/utils/mips/assembler_mips.h +++ b/compiler/utils/mips/assembler_mips.h @@ -94,7 +94,9 @@ class MipsAssembler FINAL : public Assembler { last_position_adjustment_(0), last_old_position_(0), last_branch_id_(0), - isa_features_(instruction_set_features) {} + isa_features_(instruction_set_features) { + cfi().DelayEmittingAdvancePCs(); + } virtual ~MipsAssembler() { for (auto& branch : branches_) { @@ -599,6 +601,7 @@ class MipsAssembler FINAL : public Assembler { void PromoteBranches(); void EmitBranch(Branch* branch); void EmitBranches(); + void PatchCFI(size_t number_of_delayed_adjust_pcs); // Emits exception block. void EmitExceptionPoll(MipsExceptionSlowPath* exception); -- 2.11.0