From: Jean Christophe Beyler Date: Fri, 25 Jul 2014 19:32:18 +0000 (-0700) Subject: ART: Add non-temporal store support X-Git-Tag: android-x86-7.1-r1~889^2~3240^2 X-Git-Url: http://git.osdn.net/view?a=commitdiff_plain;h=b5bce7cc9f1130ab4932ba8e6917c362bf871f24;p=android-x86%2Fart.git ART: Add non-temporal store support Added non-temporal store support as a hint from the ME. Added the implementation of the memory barrier extended instruction that supports non-temporal stores by explicitly serializing all previous store-to-memory instructions. Change-Id: I8205a92083f9725253d8ce893671a133a0b6849d Signed-off-by: Jean Christophe Beyler Signed-off-by: Chao-ying Fu --- diff --git a/compiler/dex/compiler_enums.h b/compiler/dex/compiler_enums.h index 47cb4245d..5263b8de7 100644 --- a/compiler/dex/compiler_enums.h +++ b/compiler/dex/compiler_enums.h @@ -234,6 +234,10 @@ enum ExtendedMIROpcode { // @note: All currently reserved vector registers are returned to the temporary pool. kMirOpReturnVectorRegisters, + // @brief Create a memory barrier. + // vA: a constant defined by enum MemBarrierKind. + kMirOpMemBarrier, + kMirOpLast, }; @@ -249,6 +253,7 @@ enum MIROptimizationFlagPositions { kMIRIgnoreSuspendCheck, kMIRDup, kMIRMark, // Temporary node mark. + kMIRStoreNonTemporal, kMIRLastMIRFlag, }; @@ -453,12 +458,15 @@ std::ostream& operator<<(std::ostream& os, const DividePattern& pattern); * -# Use LoadAny barrier ~= (LoadLoad | LoadStore) ~= acquire barrierafter each volatile load. * -# Use StoreStore barrier after all stores but before return from any constructor whose * class has final fields. + * -# Use NTStoreStore to order non-temporal stores with respect to all later + * store-to-memory instructions. Only generated together with non-temporal stores. */ enum MemBarrierKind { kAnyStore, kLoadAny, kStoreStore, - kAnyAny + kAnyAny, + kNTStoreStore, }; std::ostream& operator<<(std::ostream& os, const MemBarrierKind& kind); diff --git a/compiler/dex/mir_analysis.cc b/compiler/dex/mir_analysis.cc index 0b05bbea1..a8af92cbe 100644 --- a/compiler/dex/mir_analysis.cc +++ b/compiler/dex/mir_analysis.cc @@ -889,6 +889,9 @@ const uint32_t MIRGraph::analysis_attributes_[kMirOpLast] = { // 129 MirOpReturnVectorRegisters AN_NONE, + + // 130 MirOpMemBarrier + AN_NONE, }; struct MethodStats { diff --git a/compiler/dex/mir_dataflow.cc b/compiler/dex/mir_dataflow.cc index 55ccf64b9..a964cc720 100644 --- a/compiler/dex/mir_dataflow.cc +++ b/compiler/dex/mir_dataflow.cc @@ -888,6 +888,9 @@ const uint64_t MIRGraph::oat_data_flow_attributes_[kMirOpLast] = { // 129 MirOpReturnVectorRegisters 0, + + // 130 MirOpMemBarrier + 0, }; /* Return the base virtual register for a SSA name */ diff --git a/compiler/dex/mir_graph.cc b/compiler/dex/mir_graph.cc index 6a20908a2..49e5c7682 100644 --- a/compiler/dex/mir_graph.cc +++ b/compiler/dex/mir_graph.cc @@ -67,6 +67,7 @@ const char* MIRGraph::extended_mir_op_names_[kMirOpLast - kMirOpFirst] = { "PackedSet", "ReserveVectorRegisters", "ReturnVectorRegisters", + "MemBarrier", }; MIRGraph::MIRGraph(CompilationUnit* cu, ArenaAllocator* arena) @@ -941,7 +942,7 @@ void MIRGraph::DumpCFG(const char* dir_prefix, bool all_blocks, const char *suff mir->next ? " | " : " "); } } else { - fprintf(file, " {%04x %s %s %s %s\\l}%s\\\n", mir->offset, + fprintf(file, " {%04x %s %s %s %s %s\\l}%s\\\n", mir->offset, mir->ssa_rep ? GetDalvikDisassembly(mir) : !MIR::DecodedInstruction::IsPseudoMirOp(opcode) ? Instruction::Name(mir->dalvikInsn.opcode) : @@ -949,6 +950,7 @@ void MIRGraph::DumpCFG(const char* dir_prefix, bool all_blocks, const char *suff (mir->optimization_flags & MIR_IGNORE_RANGE_CHECK) != 0 ? " no_rangecheck" : " ", (mir->optimization_flags & MIR_IGNORE_NULL_CHECK) != 0 ? " no_nullcheck" : " ", (mir->optimization_flags & MIR_IGNORE_SUSPEND_CHECK) != 0 ? " no_suspendcheck" : " ", + (mir->optimization_flags & MIR_STORE_NON_TEMPORAL) != 0 ? " non_temporal" : " ", mir->next ? " | " : " "); } } diff --git a/compiler/dex/mir_graph.h b/compiler/dex/mir_graph.h index 4e0dfc106..2943b9d9b 100644 --- a/compiler/dex/mir_graph.h +++ b/compiler/dex/mir_graph.h @@ -195,6 +195,7 @@ enum OatMethodAttributes { #define MIR_CALLEE (1 << kMIRCallee) #define MIR_IGNORE_SUSPEND_CHECK (1 << kMIRIgnoreSuspendCheck) #define MIR_DUP (1 << kMIRDup) +#define MIR_STORE_NON_TEMPORAL (1 << kMIRStoreNonTemporal) #define BLOCK_NAME_LEN 80 diff --git a/compiler/dex/quick/x86/assemble_x86.cc b/compiler/dex/quick/x86/assemble_x86.cc index 6173163aa..46f5dd332 100644 --- a/compiler/dex/quick/x86/assemble_x86.cc +++ b/compiler/dex/quick/x86/assemble_x86.cc @@ -188,6 +188,8 @@ ENCODING_MAP(Cmp, IS_LOAD, 0, 0, { kX86Mov32MR, kMemReg, IS_STORE | IS_TERTIARY_OP | REG_USE02, { 0, 0, 0x89, 0, 0, 0, 0, 0, false }, "Mov32MR", "[!0r+!1d],!2r" }, { kX86Mov32AR, kArrayReg, IS_STORE | IS_QUIN_OP | REG_USE014, { 0, 0, 0x89, 0, 0, 0, 0, 0, false }, "Mov32AR", "[!0r+!1r<(mir->dalvikInsn.vA)); + break; default: break; } diff --git a/compiler/dex/quick/x86/utility_x86.cc b/compiler/dex/quick/x86/utility_x86.cc index f159beb64..037dfedfe 100644 --- a/compiler/dex/quick/x86/utility_x86.cc +++ b/compiler/dex/quick/x86/utility_x86.cc @@ -779,15 +779,20 @@ LIR* X86Mir2Lir::LoadBaseDisp(RegStorage r_base, int displacement, RegStorage r_ } LIR* X86Mir2Lir::StoreBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int scale, - int displacement, RegStorage r_src, OpSize size) { + int displacement, RegStorage r_src, OpSize size, + int opt_flags) { LIR *store = NULL; LIR *store2 = NULL; bool is_array = r_index.Valid(); bool pair = r_src.IsPair(); bool is64bit = (size == k64) || (size == kDouble); + bool consider_non_temporal = false; + X86OpCode opcode = kX86Nop; switch (size) { case k64: + consider_non_temporal = true; + // Fall through! case kDouble: if (r_src.IsFloat()) { opcode = is_array ? kX86MovsdAR : kX86MovsdMR; @@ -804,6 +809,7 @@ LIR* X86Mir2Lir::StoreBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int opcode = is_array ? kX86Mov64AR : kX86Mov64MR; CHECK_EQ(is_array, false); CHECK_EQ(r_src.IsFloat(), false); + consider_non_temporal = true; break; } // else fall-through to k32 case case k32: @@ -815,6 +821,7 @@ LIR* X86Mir2Lir::StoreBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int DCHECK(r_src.IsSingle()); } DCHECK_EQ((displacement & 0x3), 0); + consider_non_temporal = true; break; case kUnsignedHalf: case kSignedHalf: @@ -829,6 +836,28 @@ LIR* X86Mir2Lir::StoreBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int LOG(FATAL) << "Bad case in StoreBaseIndexedDispBody"; } + // Handle non temporal hint here. + if (consider_non_temporal && ((opt_flags & MIR_STORE_NON_TEMPORAL) != 0)) { + switch (opcode) { + // We currently only handle 32/64 bit moves here. + case kX86Mov64AR: + opcode = kX86Movnti64AR; + break; + case kX86Mov64MR: + opcode = kX86Movnti64MR; + break; + case kX86Mov32AR: + opcode = kX86Movnti32AR; + break; + case kX86Mov32MR: + opcode = kX86Movnti32MR; + break; + default: + // Do nothing here. + break; + } + } + if (!is_array) { if (!pair) { store = NewLIR3(opcode, r_base.GetReg(), displacement + LOWORD_OFFSET, r_src.GetReg()); diff --git a/compiler/dex/quick/x86/x86_lir.h b/compiler/dex/quick/x86/x86_lir.h index 9620cd129..15fe7e298 100644 --- a/compiler/dex/quick/x86/x86_lir.h +++ b/compiler/dex/quick/x86/x86_lir.h @@ -440,12 +440,12 @@ enum X86OpCode { kX86Mov16MR, kX86Mov16AR, kX86Mov16TR, kX86Mov16RR, kX86Mov16RM, kX86Mov16RA, kX86Mov16RT, kX86Mov16RI, kX86Mov16MI, kX86Mov16AI, kX86Mov16TI, - kX86Mov32MR, kX86Mov32AR, kX86Mov32TR, + kX86Mov32MR, kX86Mov32AR, kX86Movnti32MR, kX86Movnti32AR, kX86Mov32TR, kX86Mov32RR, kX86Mov32RM, kX86Mov32RA, kX86Mov32RT, kX86Mov32RI, kX86Mov32MI, kX86Mov32AI, kX86Mov32TI, kX86Lea32RM, kX86Lea32RA, - kX86Mov64MR, kX86Mov64AR, kX86Mov64TR, + kX86Mov64MR, kX86Mov64AR, kX86Movnti64MR, kX86Movnti64AR, kX86Mov64TR, kX86Mov64RR, kX86Mov64RM, kX86Mov64RA, kX86Mov64RT, kX86Mov64RI32, kX86Mov64RI64, kX86Mov64MI, kX86Mov64AI, kX86Mov64TI, kX86Lea64RM, @@ -620,7 +620,12 @@ enum X86OpCode { kX86MovdrxRR, kX86MovdrxMR, kX86MovdrxAR, // move into reg from xmm kX86MovsxdRR, kX86MovsxdRM, kX86MovsxdRA, // move 32 bit to 64 bit with sign extension kX86Set8R, kX86Set8M, kX86Set8A, // set byte depending on condition operand - kX86Mfence, // memory barrier + kX86Lfence, // memory barrier to serialize all previous + // load-from-memory instructions + kX86Mfence, // memory barrier to serialize all previous + // load-from-memory and store-to-memory instructions + kX86Sfence, // memory barrier to serialize all previous + // store-to-memory instructions Binary0fOpCode(kX86Imul16), // 16bit multiply Binary0fOpCode(kX86Imul32), // 32bit multiply Binary0fOpCode(kX86Imul64), // 64bit multiply diff --git a/disassembler/disassembler_x86.cc b/disassembler/disassembler_x86.cc index 0bf758efb..7551add41 100644 --- a/disassembler/disassembler_x86.cc +++ b/disassembler/disassembler_x86.cc @@ -764,6 +764,7 @@ DISASSEMBLER_ENTRY(cmp, case 0xB7: opcode << "movzxw"; has_modrm = true; load = true; break; case 0xBE: opcode << "movsxb"; has_modrm = true; load = true; byte_second_operand = true; rex |= (rex == 0 ? 0 : 0b1000); break; case 0xBF: opcode << "movsxw"; has_modrm = true; load = true; break; + case 0xC3: opcode << "movnti"; store = true; has_modrm = true; break; case 0xC5: if (prefix[2] == 0x66) { opcode << "pextrw";