From 27dee8bcd7b4a53840b60818da8d2c819ef199bd Mon Sep 17 00:00:00 2001 From: Mark Mendell Date: Mon, 1 Dec 2014 19:06:12 -0500 Subject: [PATCH] X86_64 QBE: use RIP addressing Take advantage of RIP addressing in 64 bit mode to improve the code generation for accesses to the constant area as well as packed switches. Avoid computing the address of the start of the method, which is needed in 32 bit mode. To do this, we add a new 'pseudo-register' kRIPReg to minimize the changes needed to get the new addressing mode to be generated. Change-Id: Ia28c93f98b09939806d91ff0bd7392e58996d108 Signed-off-by: Mark Mendell --- compiler/dex/compiler_enums.h | 3 +- compiler/dex/quick/codegen_util.cc | 5 ++- compiler/dex/quick/x86/assemble_x86.cc | 63 +++++++++++++++++++++++------- compiler/dex/quick/x86/call_x86.cc | 64 +++++++++++++++++------------- compiler/dex/quick/x86/int_x86.cc | 13 ++++++- compiler/dex/quick/x86/target_x86.cc | 41 +++++++++++++------- compiler/dex/quick/x86/utility_x86.cc | 71 ++++++++++++++++++++-------------- compiler/dex/quick/x86/x86_lir.h | 3 ++ 8 files changed, 175 insertions(+), 88 deletions(-) diff --git a/compiler/dex/compiler_enums.h b/compiler/dex/compiler_enums.h index 3b3170e51..a3fe8ad03 100644 --- a/compiler/dex/compiler_enums.h +++ b/compiler/dex/compiler_enums.h @@ -606,7 +606,7 @@ enum SelectInstructionKind { }; std::ostream& operator<<(std::ostream& os, const SelectInstructionKind& kind); -// LIR fixup kinds for Arm +// LIR fixup kinds for Arm and X86. enum FixupKind { kFixupNone, kFixupLabel, // For labels we just adjust the offset. @@ -624,6 +624,7 @@ enum FixupKind { kFixupMovImmHST, // kThumb2MovImm16HST. kFixupAlign4, // Align to 4-byte boundary. kFixupA53Erratum835769, // Cortex A53 Erratum 835769. + kFixupSwitchTable, // X86_64 packed switch table. }; std::ostream& operator<<(std::ostream& os, const FixupKind& kind); diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc index 00217549b..066041c6a 100644 --- a/compiler/dex/quick/codegen_util.cc +++ b/compiler/dex/quick/codegen_util.cc @@ -538,9 +538,12 @@ void Mir2Lir::InstallSwitchTables() { bx_offset = tab_rec->anchor->offset + 4; break; case kX86: - case kX86_64: bx_offset = 0; break; + case kX86_64: + // RIP relative to switch table. + bx_offset = tab_rec->offset; + break; case kArm64: case kMips: bx_offset = tab_rec->anchor->offset; diff --git a/compiler/dex/quick/x86/assemble_x86.cc b/compiler/dex/quick/x86/assemble_x86.cc index 84d68d2b7..ad2ed01ad 100644 --- a/compiler/dex/quick/x86/assemble_x86.cc +++ b/compiler/dex/quick/x86/assemble_x86.cc @@ -553,7 +553,7 @@ std::ostream& operator<<(std::ostream& os, const X86OpCode& rhs) { } static bool NeedsRex(int32_t raw_reg) { - return RegStorage::RegNum(raw_reg) > 7; + return raw_reg != kRIPReg && RegStorage::RegNum(raw_reg) > 7; } static uint8_t LowRegisterBits(int32_t raw_reg) { @@ -689,7 +689,13 @@ size_t X86Mir2Lir::ComputeSize(const X86EncodingMap* entry, int32_t raw_reg, int entry->opcode != kX86Lea32RM && entry->opcode != kX86Lea64RM) { DCHECK_NE(entry->flags & (IS_LOAD | IS_STORE), UINT64_C(0)) << entry->name; } - size += IS_SIMM8(displacement) ? 1 : 4; + if (raw_base == kRIPReg) { + DCHECK(cu_->target64) << + "Attempt to use a 64-bit RIP adressing with instruction " << entry->name; + size += 4; + } else { + size += IS_SIMM8(displacement) ? 1 : 4; + } } } size += entry->skeleton.immediate_bytes; @@ -1022,14 +1028,24 @@ void X86Mir2Lir::EmitModrmThread(uint8_t reg_or_opcode) { void X86Mir2Lir::EmitModrmDisp(uint8_t reg_or_opcode, uint8_t base, int32_t disp) { DCHECK_LT(reg_or_opcode, 8); - DCHECK_LT(base, 8); - uint8_t modrm = (ModrmForDisp(base, disp) << 6) | (reg_or_opcode << 3) | base; - code_buffer_.push_back(modrm); - if (base == rs_rX86_SP_32.GetRegNum()) { - // Special SIB for SP base - code_buffer_.push_back(0 << 6 | rs_rX86_SP_32.GetRegNum() << 3 | rs_rX86_SP_32.GetRegNum()); + if (base == kRIPReg) { + // x86_64 RIP handling: always 32 bit displacement. + uint8_t modrm = (0x0 << 6) | (reg_or_opcode << 3) | 0x5; + code_buffer_.push_back(modrm); + code_buffer_.push_back(disp & 0xFF); + code_buffer_.push_back((disp >> 8) & 0xFF); + code_buffer_.push_back((disp >> 16) & 0xFF); + code_buffer_.push_back((disp >> 24) & 0xFF); + } else { + DCHECK_LT(base, 8); + uint8_t modrm = (ModrmForDisp(base, disp) << 6) | (reg_or_opcode << 3) | base; + code_buffer_.push_back(modrm); + if (base == rs_rX86_SP_32.GetRegNum()) { + // Special SIB for SP base + code_buffer_.push_back(0 << 6 | rs_rX86_SP_32.GetRegNum() << 3 | rs_rX86_SP_32.GetRegNum()); + } + EmitDisp(base, disp); } - EmitDisp(base, disp); } void X86Mir2Lir::EmitModrmSibDisp(uint8_t reg_or_opcode, uint8_t base, uint8_t index, @@ -1141,7 +1157,7 @@ void X86Mir2Lir::EmitMemReg(const X86EncodingMap* entry, int32_t raw_base, int32 CheckValidByteRegister(entry, raw_reg); EmitPrefixAndOpcode(entry, raw_reg, NO_REG, raw_base); uint8_t low_reg = LowRegisterBits(raw_reg); - uint8_t low_base = LowRegisterBits(raw_base); + uint8_t low_base = (raw_base == kRIPReg) ? raw_base : LowRegisterBits(raw_base); EmitModrmDisp(low_reg, low_base, disp); DCHECK_EQ(0, entry->skeleton.modrm_opcode); DCHECK_EQ(0, entry->skeleton.ax_opcode); @@ -1758,12 +1774,29 @@ AssemblerStatus X86Mir2Lir::AssembleInstructions(CodeOffset start_addr) { LIR *target_lir = lir->target; DCHECK(target_lir != NULL); CodeOffset target = target_lir->offset; - lir->operands[2] = target; - int newSize = GetInsnSize(lir); - if (newSize != lir->flags.size) { - lir->flags.size = newSize; - res = kRetryAll; + // Handle 64 bit RIP addressing. + if (lir->operands[1] == kRIPReg) { + // Offset is relative to next instruction. + lir->operands[2] = target - (lir->offset + lir->flags.size); + } else { + lir->operands[2] = target; + int newSize = GetInsnSize(lir); + if (newSize != lir->flags.size) { + lir->flags.size = newSize; + res = kRetryAll; + } } + } else if (lir->flags.fixup == kFixupSwitchTable) { + DCHECK(cu_->target64); + DCHECK_EQ(lir->opcode, kX86Lea64RM) << "Unknown instruction: " << X86Mir2Lir::EncodingMap[lir->opcode].name; + DCHECK_EQ(lir->operands[1], static_cast(kRIPReg)); + // Grab the target offset from the saved data. + Mir2Lir::EmbeddedData* tab_rec = + reinterpret_cast(UnwrapPointer(lir->operands[4])); + CodeOffset target = tab_rec->offset; + // Handle 64 bit RIP addressing. + // Offset is relative to next instruction. + lir->operands[2] = target - (lir->offset + lir->flags.size); } break; } diff --git a/compiler/dex/quick/x86/call_x86.cc b/compiler/dex/quick/x86/call_x86.cc index be10d93a9..544ac3b81 100644 --- a/compiler/dex/quick/x86/call_x86.cc +++ b/compiler/dex/quick/x86/call_x86.cc @@ -142,25 +142,7 @@ void X86Mir2Lir::GenLargePackedSwitch(MIR* mir, DexOffset table_offset, RegLocat // Get the switch value rl_src = LoadValue(rl_src, kCoreReg); - // NewLIR0(kX86Bkpt); - // Materialize a pointer to the switch table - RegStorage start_of_method_reg; - if (base_of_code_ != nullptr) { - // We can use the saved value. - RegLocation rl_method = mir_graph_->GetRegLocation(base_of_code_->s_reg_low); - if (rl_method.wide) { - rl_method = LoadValueWide(rl_method, kCoreReg); - } else { - rl_method = LoadValue(rl_method, kCoreReg); - } - start_of_method_reg = rl_method.reg; - store_method_addr_used_ = true; - } else { - start_of_method_reg = AllocTempRef(); - NewLIR1(kX86StartOfMethod, start_of_method_reg.GetReg()); - } - DCHECK_EQ(start_of_method_reg.Is64Bit(), cu_->target64); int low_key = s4FromSwitchData(&table[2]); RegStorage keyReg; // Remove the bias, if necessary @@ -170,19 +152,49 @@ void X86Mir2Lir::GenLargePackedSwitch(MIR* mir, DexOffset table_offset, RegLocat keyReg = AllocTemp(); OpRegRegImm(kOpSub, keyReg, rl_src.reg, low_key); } + // Bounds check - if < 0 or >= size continue following switch OpRegImm(kOpCmp, keyReg, size - 1); LIR* branch_over = OpCondBranch(kCondHi, NULL); - // Load the displacement from the switch table - RegStorage disp_reg = AllocTemp(); - NewLIR5(kX86PcRelLoadRA, disp_reg.GetReg(), start_of_method_reg.GetReg(), keyReg.GetReg(), - 2, WrapPointer(tab_rec)); - // Add displacement to start of method - OpRegReg(kOpAdd, start_of_method_reg, cu_->target64 ? As64BitReg(disp_reg) : disp_reg); + RegStorage addr_for_jump; + if (cu_->target64) { + RegStorage table_base = AllocTempWide(); + // Load the address of the table into table_base. + LIR* lea = RawLIR(current_dalvik_offset_, kX86Lea64RM, table_base.GetReg(), kRIPReg, + 256, 0, WrapPointer(tab_rec)); + lea->flags.fixup = kFixupSwitchTable; + AppendLIR(lea); + + // Load the offset from the table out of the table. + addr_for_jump = AllocTempWide(); + NewLIR5(kX86MovsxdRA, addr_for_jump.GetReg(), table_base.GetReg(), keyReg.GetReg(), 2, 0); + + // Add the offset from the table to the table base. + OpRegReg(kOpAdd, addr_for_jump, table_base); + } else { + // Materialize a pointer to the switch table. + RegStorage start_of_method_reg; + if (base_of_code_ != nullptr) { + // We can use the saved value. + RegLocation rl_method = mir_graph_->GetRegLocation(base_of_code_->s_reg_low); + rl_method = LoadValue(rl_method, kCoreReg); + start_of_method_reg = rl_method.reg; + store_method_addr_used_ = true; + } else { + start_of_method_reg = AllocTempRef(); + NewLIR1(kX86StartOfMethod, start_of_method_reg.GetReg()); + } + // Load the displacement from the switch table. + addr_for_jump = AllocTemp(); + NewLIR5(kX86PcRelLoadRA, addr_for_jump.GetReg(), start_of_method_reg.GetReg(), keyReg.GetReg(), + 2, WrapPointer(tab_rec)); + // Add displacement to start of method. + OpRegReg(kOpAdd, addr_for_jump, start_of_method_reg); + } + // ..and go! - LIR* switch_branch = NewLIR1(kX86JmpR, start_of_method_reg.GetReg()); - tab_rec->anchor = switch_branch; + tab_rec->anchor = NewLIR1(kX86JmpR, addr_for_jump.GetReg()); /* branch_over target here */ LIR* target = NewLIR0(kPseudoTargetLabel); diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc index 80cdc8349..85ab92bc0 100755 --- a/compiler/dex/quick/x86/int_x86.cc +++ b/compiler/dex/quick/x86/int_x86.cc @@ -1289,6 +1289,18 @@ bool X86Mir2Lir::GenInlinedReverseBits(CallInfo* info, OpSize size) { } LIR* X86Mir2Lir::OpPcRelLoad(RegStorage reg, LIR* target) { + if (cu_->target64) { + // We can do this directly using RIP addressing. + // We don't know the proper offset for the value, so pick one that will force + // 4 byte offset. We will fix this up in the assembler later to have the right + // value. + ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral); + LIR* res = NewLIR3(kX86Mov32RM, reg.GetReg(), kRIPReg, 256); + res->target = target; + res->flags.fixup = kFixupLoad; + return res; + } + CHECK(base_of_code_ != nullptr); // Address the start of the method @@ -1309,7 +1321,6 @@ LIR* X86Mir2Lir::OpPcRelLoad(RegStorage reg, LIR* target) { 0, 0, target); res->target = target; res->flags.fixup = kFixupLoad; - store_method_addr_used_ = true; return res; } diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc index 998aeff36..ae80e9f1c 100755 --- a/compiler/dex/quick/x86/target_x86.cc +++ b/compiler/dex/quick/x86/target_x86.cc @@ -206,7 +206,7 @@ static const RegStorage RegStorage32FromSpecialTargetRegister_Target64[] { RegStorage::InvalidReg(), // kSelf - Thread pointer. RegStorage::InvalidReg(), // kSuspend - Used to reduce suspend checks for some targets. RegStorage::InvalidReg(), // kLr - no register as the return address is pushed on entry. - RegStorage::InvalidReg(), // kPc - TODO: RIP based addressing. + RegStorage(kRIPReg), // kPc rs_rX86_SP_32, // kSp rs_rDI, // kArg0 rs_rSI, // kArg1 @@ -662,6 +662,12 @@ void X86Mir2Lir::CompilerInitializeRegAlloc() { xp_reg_info->SetIsTemp(true); } + // Special Handling for x86_64 RIP addressing. + if (cu_->target64) { + RegisterInfo* info = new (arena_) RegisterInfo(RegStorage(kRIPReg), kEncodeNone); + reginfo_map_[kRIPReg] = info; + } + // Alias single precision xmm to double xmms. // TODO: as needed, add larger vector sizes - alias all to the largest. for (RegisterInfo* info : reg_pool_->sp_regs_) { @@ -1608,9 +1614,6 @@ void X86Mir2Lir::GenConst128(MIR* mir) { } void X86Mir2Lir::AppendOpcodeWithConst(X86OpCode opcode, int reg, MIR* mir) { - // The literal pool needs position independent logic. - store_method_addr_used_ = true; - // To deal with correct memory ordering, reverse order of constants. int32_t constants[4]; constants[3] = mir->dalvikInsn.arg[0]; @@ -1624,20 +1627,28 @@ void X86Mir2Lir::AppendOpcodeWithConst(X86OpCode opcode, int reg, MIR* mir) { data_target = AddVectorLiteral(constants); } - // Address the start of the method. - RegLocation rl_method = mir_graph_->GetRegLocation(base_of_code_->s_reg_low); - if (rl_method.wide) { - rl_method = LoadValueWide(rl_method, kCoreReg); - } else { - rl_method = LoadValue(rl_method, kCoreReg); - } - // Load the proper value from the literal area. // We don't know the proper offset for the value, so pick one that will force - // 4 byte offset. We will fix this up in the assembler later to have the right - // value. + // 4 byte offset. We will fix this up in the assembler later to have the + // right value. + LIR* load; ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral); - LIR *load = NewLIR3(opcode, reg, rl_method.reg.GetReg(), 256 /* bogus */); + if (cu_->target64) { + load = NewLIR3(opcode, reg, kRIPReg, 256 /* bogus */); + } else { + // Address the start of the method. + RegLocation rl_method = mir_graph_->GetRegLocation(base_of_code_->s_reg_low); + if (rl_method.wide) { + rl_method = LoadValueWide(rl_method, kCoreReg); + } else { + rl_method = LoadValue(rl_method, kCoreReg); + } + + load = NewLIR3(opcode, reg, rl_method.reg.GetReg(), 256 /* bogus */); + + // The literal pool needs position independent logic. + store_method_addr_used_ = true; + } load->flags.fixup = kFixupLoad; load->target = data_target; } diff --git a/compiler/dex/quick/x86/utility_x86.cc b/compiler/dex/quick/x86/utility_x86.cc index ad3222cd8..3b5869861 100644 --- a/compiler/dex/quick/x86/utility_x86.cc +++ b/compiler/dex/quick/x86/utility_x86.cc @@ -570,32 +570,36 @@ LIR* X86Mir2Lir::LoadConstantWide(RegStorage r_dest, int64_t value) { if (is_fp) { DCHECK(r_dest.IsDouble()); if (value == 0) { - return NewLIR2(kX86XorpsRR, low_reg_val, low_reg_val); - } else if (base_of_code_ != nullptr) { + return NewLIR2(kX86XorpdRR, low_reg_val, low_reg_val); + } else if (base_of_code_ != nullptr || cu_->target64) { // We will load the value from the literal area. LIR* data_target = ScanLiteralPoolWide(literal_list_, val_lo, val_hi); if (data_target == NULL) { data_target = AddWideData(&literal_list_, val_lo, val_hi); } - // Address the start of the method - RegLocation rl_method = mir_graph_->GetRegLocation(base_of_code_->s_reg_low); - if (rl_method.wide) { - rl_method = LoadValueWide(rl_method, kCoreReg); - } else { - rl_method = LoadValue(rl_method, kCoreReg); - } - // Load the proper value from the literal area. - // We don't know the proper offset for the value, so pick one that will force - // 4 byte offset. We will fix this up in the assembler later to have the right - // value. + // We don't know the proper offset for the value, so pick one that + // will force 4 byte offset. We will fix this up in the assembler + // later to have the right value. ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral); - res = LoadBaseDisp(rl_method.reg, 256 /* bogus */, RegStorage::FloatSolo64(low_reg_val), - kDouble, kNotVolatile); + if (cu_->target64) { + res = NewLIR3(kX86MovsdRM, low_reg_val, kRIPReg, 256 /* bogus */); + } else { + // Address the start of the method. + RegLocation rl_method = mir_graph_->GetRegLocation(base_of_code_->s_reg_low); + if (rl_method.wide) { + rl_method = LoadValueWide(rl_method, kCoreReg); + } else { + rl_method = LoadValue(rl_method, kCoreReg); + } + + res = LoadBaseDisp(rl_method.reg, 256 /* bogus */, RegStorage::FloatSolo64(low_reg_val), + kDouble, kNotVolatile); + store_method_addr_used_ = true; + } res->target = data_target; res->flags.fixup = kFixupLoad; - store_method_addr_used_ = true; } else { if (r_dest.IsPair()) { if (val_lo == 0) { @@ -960,12 +964,14 @@ void X86Mir2Lir::AnalyzeMIR() { curr_bb = iter.Next(); } - // Did we need a pointer to the method code? + // Did we need a pointer to the method code? Not in 64 bit mode. + base_of_code_ = nullptr; + + // store_method_addr_ must be false for x86_64, since RIP addressing is used. + CHECK(!(cu_->target64 && store_method_addr_)); if (store_method_addr_) { - base_of_code_ = mir_graph_->GetNewCompilerTemp(kCompilerTempBackend, cu_->target64 == true); + base_of_code_ = mir_graph_->GetNewCompilerTemp(kCompilerTempBackend, false); DCHECK(base_of_code_ != nullptr); - } else { - base_of_code_ = nullptr; } } @@ -994,19 +1000,22 @@ void X86Mir2Lir::AnalyzeExtendedMIR(int opcode, BasicBlock* bb, MIR* mir) { AnalyzeFPInstruction(opcode, bb, mir); break; case kMirOpConstVector: - store_method_addr_ = true; + if (!cu_->target64) { + store_method_addr_ = true; + } break; case kMirOpPackedMultiply: case kMirOpPackedShiftLeft: case kMirOpPackedSignedShiftRight: - case kMirOpPackedUnsignedShiftRight: { - // Byte emulation requires constants from the literal pool. - OpSize opsize = static_cast(mir->dalvikInsn.vC >> 16); - if (opsize == kSignedByte || opsize == kUnsignedByte) { - store_method_addr_ = true; + case kMirOpPackedUnsignedShiftRight: + if (!cu_->target64) { + // Byte emulation requires constants from the literal pool. + OpSize opsize = static_cast(mir->dalvikInsn.vC >> 16); + if (opsize == kSignedByte || opsize == kUnsignedByte) { + store_method_addr_ = true; + } } break; - } default: // Ignore the rest. break; @@ -1016,6 +1025,7 @@ void X86Mir2Lir::AnalyzeExtendedMIR(int opcode, BasicBlock* bb, MIR* mir) { void X86Mir2Lir::AnalyzeMIR(int opcode, BasicBlock* bb, MIR* mir) { // Looking for // - Do we need a pointer to the code (used for packed switches and double lits)? + // 64 bit uses RIP addressing instead. switch (opcode) { // Instructions referencing doubles. @@ -1038,7 +1048,9 @@ void X86Mir2Lir::AnalyzeMIR(int opcode, BasicBlock* bb, MIR* mir) { // Packed switches and array fills need a pointer to the base of the method. case Instruction::FILL_ARRAY_DATA: case Instruction::PACKED_SWITCH: - store_method_addr_ = true; + if (!cu_->target64) { + store_method_addr_ = true; + } break; case Instruction::INVOKE_STATIC: case Instruction::INVOKE_STATIC_RANGE: @@ -1115,7 +1127,8 @@ RegLocation X86Mir2Lir::UpdateLocWideTyped(RegLocation loc) { void X86Mir2Lir::AnalyzeInvokeStatic(int opcode, BasicBlock* bb, MIR* mir) { UNUSED(opcode, bb); - // For now this is only actual for x86-32. + + // 64 bit RIP addressing doesn't need store_method_addr_ set. if (cu_->target64) { return; } diff --git a/compiler/dex/quick/x86/x86_lir.h b/compiler/dex/quick/x86/x86_lir.h index 76a67c4d6..3e0a8527a 100644 --- a/compiler/dex/quick/x86/x86_lir.h +++ b/compiler/dex/quick/x86/x86_lir.h @@ -217,6 +217,9 @@ enum X86NativeRegisterPool { xr14 = RegStorage::k128BitSolo | 14, xr15 = RegStorage::k128BitSolo | 15, + // Special value for RIP 64 bit addressing. + kRIPReg = 255, + // TODO: as needed, add 256, 512 and 1024-bit xmm views. }; -- 2.11.0