From: Nicolas Geoffray Date: Thu, 11 Feb 2016 17:35:55 +0000 (+0000) Subject: Re-enable OSR. X-Git-Tag: android-x86-7.1-r1~408^2~7^2~37^2 X-Git-Url: http://git.osdn.net/view?a=commitdiff_plain;h=d9994f069dfeaa32ba929ca78816b5b83e2a4134;p=android-x86%2Fart.git Re-enable OSR. Fixes two bugs: - Dealing with proxy methods, which the compiler and code cache does not handle. - Dealing with phi types, that may have been speculatively optimized but do not hold once jumping to the compiled code. Change-Id: I7dcd9976ef7b12128fff95d2b7ed3e69cc42e90a --- diff --git a/compiler/jit/jit_compiler.cc b/compiler/jit/jit_compiler.cc index d2bf6c07c..3fe786141 100644 --- a/compiler/jit/jit_compiler.cc +++ b/compiler/jit/jit_compiler.cc @@ -203,6 +203,7 @@ JitCompiler::~JitCompiler() { } bool JitCompiler::CompileMethod(Thread* self, ArtMethod* method, bool osr) { + DCHECK(!method->IsProxyMethod()); TimingLogger logger("JIT compiler timing logger", true, VLOG_IS_ON(jit)); const uint64_t start_time = NanoTime(); StackHandleScope<2> hs(self); @@ -220,20 +221,17 @@ bool JitCompiler::CompileMethod(Thread* self, ArtMethod* method, bool osr) { bool success = false; { TimingLogger::ScopedTiming t2("Compiling", &logger); - // If we get a request to compile a proxy method, we pass the actual Java method - // of that proxy method, as the compiler does not expect a proxy method. - ArtMethod* method_to_compile = method->GetInterfaceMethodIfProxy(sizeof(void*)); JitCodeCache* const code_cache = runtime->GetJit()->GetCodeCache(); - success = compiler_driver_->GetCompiler()->JitCompile(self, code_cache, method_to_compile, osr); + success = compiler_driver_->GetCompiler()->JitCompile(self, code_cache, method, osr); if (success && (perf_file_ != nullptr)) { - const void* ptr = method_to_compile->GetEntryPointFromQuickCompiledCode(); + const void* ptr = method->GetEntryPointFromQuickCompiledCode(); std::ostringstream stream; stream << std::hex << reinterpret_cast(ptr) << " " << code_cache->GetMemorySizeOfCodePointer(ptr) << " " - << PrettyMethod(method_to_compile) + << PrettyMethod(method) << std::endl; std::string str = stream.str(); bool res = perf_file_->WriteFully(str.c_str(), str.size()); diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc index fa6aae81f..3740c405c 100644 --- a/compiler/optimizing/inliner.cc +++ b/compiler/optimizing/inliner.cc @@ -378,7 +378,7 @@ bool HInliner::TryInlineMonomorphicCall(HInvoke* invoke_instruction, // Run type propagation to get the guard typed, and eventually propagate the // type of the receiver. - ReferenceTypePropagation rtp_fixup(graph_, handles_); + ReferenceTypePropagation rtp_fixup(graph_, handles_, /* is_first_run */ false); rtp_fixup.Run(); MaybeRecordStat(kInlinedMonomorphicCall); @@ -420,6 +420,9 @@ bool HInliner::TryInlinePolymorphicCall(HInvoke* invoke_instruction, actual_method = new_method; } else if (actual_method != new_method) { // Different methods, bailout. + VLOG(compiler) << "Call to " << PrettyMethod(resolved_method) + << " from inline cache is not inlined because it resolves" + << " to different methods"; return false; } } @@ -474,7 +477,7 @@ bool HInliner::TryInlinePolymorphicCall(HInvoke* invoke_instruction, deoptimize->CopyEnvironmentFrom(invoke_instruction->GetEnvironment()); // Run type propagation to get the guard typed. - ReferenceTypePropagation rtp_fixup(graph_, handles_); + ReferenceTypePropagation rtp_fixup(graph_, handles_, /* is_first_run */ false); rtp_fixup.Run(); MaybeRecordStat(kInlinedPolymorphicCall); @@ -727,7 +730,7 @@ HInstanceFieldGet* HInliner::CreateInstanceFieldGet(Handle dex // dex pc for the associated stack map. 0 is bogus but valid. Bug: 26854537. /* dex_pc */ 0); if (iget->GetType() == Primitive::kPrimNot) { - ReferenceTypePropagation rtp(graph_, handles_); + ReferenceTypePropagation rtp(graph_, handles_, /* is_first_run */ false); rtp.Visit(iget); } return iget; @@ -756,6 +759,7 @@ HInstanceFieldSet* HInliner::CreateInstanceFieldSet(Handle dex /* dex_pc */ 0); return iput; } + bool HInliner::TryBuildAndInline(ArtMethod* resolved_method, HInvoke* invoke_instruction, bool same_dex_file, @@ -988,12 +992,18 @@ bool HInliner::TryBuildAndInline(ArtMethod* resolved_method, if (current->IsNewInstance() && (current->AsNewInstance()->GetEntrypoint() == kQuickAllocObjectWithAccessCheck)) { + VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file) + << " could not be inlined because it is using an entrypoint" + << " with access checks"; // Allocation entrypoint does not handle inlined frames. return false; } if (current->IsNewArray() && (current->AsNewArray()->GetEntrypoint() == kQuickAllocArrayWithAccessCheck)) { + VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file) + << " could not be inlined because it is using an entrypoint" + << " with access checks"; // Allocation entrypoint does not handle inlined frames. return false; } @@ -1003,6 +1013,9 @@ bool HInliner::TryBuildAndInline(ArtMethod* resolved_method, current->IsUnresolvedStaticFieldSet() || current->IsUnresolvedInstanceFieldSet()) { // Entrypoint for unresolved fields does not handle inlined frames. + VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file) + << " could not be inlined because it is using an unresolved" + << " entrypoint"; return false; } } @@ -1044,13 +1057,13 @@ void HInliner::FixUpReturnReferenceType(ArtMethod* resolved_method, if (invoke_rti.IsStrictSupertypeOf(return_rti) || (return_rti.IsExact() && !invoke_rti.IsExact()) || !return_replacement->CanBeNull()) { - ReferenceTypePropagation(graph_, handles_).Run(); + ReferenceTypePropagation(graph_, handles_, /* is_first_run */ false).Run(); } } } else if (return_replacement->IsInstanceOf()) { if (do_rtp) { // Inlining InstanceOf into an If may put a tighter bound on reference types. - ReferenceTypePropagation(graph_, handles_).Run(); + ReferenceTypePropagation(graph_, handles_, /* is_first_run */ false).Run(); } } } diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc index 1224a48fa..deaa415ed 100644 --- a/compiler/optimizing/reference_type_propagation.cc +++ b/compiler/optimizing/reference_type_propagation.cc @@ -55,10 +55,12 @@ class ReferenceTypePropagation::RTPVisitor : public HGraphDelegateVisitor { public: RTPVisitor(HGraph* graph, HandleCache* handle_cache, - ArenaVector* worklist) + ArenaVector* worklist, + bool is_first_run) : HGraphDelegateVisitor(graph), handle_cache_(handle_cache), - worklist_(worklist) {} + worklist_(worklist), + is_first_run_(is_first_run) {} void VisitNewInstance(HNewInstance* new_instance) OVERRIDE; void VisitLoadClass(HLoadClass* load_class) OVERRIDE; @@ -86,14 +88,17 @@ class ReferenceTypePropagation::RTPVisitor : public HGraphDelegateVisitor { private: HandleCache* handle_cache_; ArenaVector* worklist_; + const bool is_first_run_; }; ReferenceTypePropagation::ReferenceTypePropagation(HGraph* graph, StackHandleScopeCollection* handles, + bool is_first_run, const char* name) : HOptimization(graph, name), handle_cache_(handles), - worklist_(graph->GetArena()->Adapter(kArenaAllocReferenceTypePropagation)) { + worklist_(graph->GetArena()->Adapter(kArenaAllocReferenceTypePropagation)), + is_first_run_(is_first_run) { } void ReferenceTypePropagation::ValidateTypes() { @@ -125,7 +130,7 @@ void ReferenceTypePropagation::ValidateTypes() { } void ReferenceTypePropagation::Visit(HInstruction* instruction) { - RTPVisitor visitor(graph_, &handle_cache_, &worklist_); + RTPVisitor visitor(graph_, &handle_cache_, &worklist_, is_first_run_); instruction->Accept(&visitor); } @@ -144,7 +149,7 @@ void ReferenceTypePropagation::Run() { } void ReferenceTypePropagation::VisitBasicBlock(HBasicBlock* block) { - RTPVisitor visitor(graph_, &handle_cache_, &worklist_); + RTPVisitor visitor(graph_, &handle_cache_, &worklist_, is_first_run_); // Handle Phis first as there might be instructions in the same block who depend on them. for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) { VisitPhi(it.Current()->AsPhi()); @@ -620,6 +625,7 @@ void ReferenceTypePropagation::RTPVisitor::VisitCheckCast(HCheckCast* check_cast DCHECK_EQ(bound_type->InputAt(0), check_cast->InputAt(0)); if (class_rti.IsValid()) { + DCHECK(is_first_run_); // This is the first run of RTP and class is resolved. bound_type->SetUpperBound(class_rti, /* CheckCast succeeds for nulls. */ true); } else { @@ -636,6 +642,12 @@ void ReferenceTypePropagation::VisitPhi(HPhi* phi) { } if (phi->GetBlock()->IsLoopHeader()) { + if (!is_first_run_ && graph_->IsCompilingOsr()) { + // Don't update the type of a loop phi when compiling OSR: we may have done + // speculative optimizations dominating that phi, that do not hold at the + // point the interpreter jumps to that loop header. + return; + } ScopedObjectAccess soa(Thread::Current()); // Set the initial type for the phi. Use the non back edge input for reaching // a fixed point faster. diff --git a/compiler/optimizing/reference_type_propagation.h b/compiler/optimizing/reference_type_propagation.h index a7f10a65a..028a6fc51 100644 --- a/compiler/optimizing/reference_type_propagation.h +++ b/compiler/optimizing/reference_type_propagation.h @@ -33,6 +33,7 @@ class ReferenceTypePropagation : public HOptimization { public: ReferenceTypePropagation(HGraph* graph, StackHandleScopeCollection* handles, + bool is_first_run, const char* name = kReferenceTypePropagationPassName); // Visit a single instruction. @@ -93,6 +94,8 @@ class ReferenceTypePropagation : public HOptimization { ArenaVector worklist_; + // Whether this reference type propagation is the first run we are doing. + const bool is_first_run_; static constexpr size_t kDefaultWorklistSize = 8; diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc index 165d09d1a..c08e5dd91 100644 --- a/compiler/optimizing/ssa_builder.cc +++ b/compiler/optimizing/ssa_builder.cc @@ -483,7 +483,7 @@ GraphAnalysisResult SsaBuilder::BuildSsa() { // 6) Compute type of reference type instructions. The pass assumes that // NullConstant has been fixed up. - ReferenceTypePropagation(GetGraph(), handles_).Run(); + ReferenceTypePropagation(GetGraph(), handles_, /* is_first_run */ true).Run(); // 7) Step 1) duplicated ArrayGet instructions with ambiguous type (int/float // or long/double) and marked ArraySets with ambiguous input type. Now that RTP diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S index 949ad9926..c4e314b6c 100644 --- a/runtime/arch/arm/quick_entrypoints_arm.S +++ b/runtime/arch/arm/quick_entrypoints_arm.S @@ -444,11 +444,12 @@ ENTRY art_quick_osr_stub mov r10, r1 @ Save size of stack ldr r9, [r11, #40] @ Move managed thread pointer into r9 mov r8, r2 @ Save the pc to call - sub r7, sp, #12 @ Reserve space for stack pointer, JValue result, and ArtMethod* slot + sub r7, sp, #12 @ Reserve space for stack pointer, + @ JValue* result, and ArtMethod* slot. and r7, #0xFFFFFFF0 @ Align stack pointer mov sp, r7 @ Update stack pointer str r11, [sp, #4] @ Save old stack pointer - str r3, [sp, #8] @ Save JValue result + str r3, [sp, #8] @ Save JValue* result mov ip, #0 str ip, [sp] @ Store null for ArtMethod* at bottom of frame sub sp, sp, r1 @ Reserve space for callee stack @@ -457,9 +458,8 @@ ENTRY art_quick_osr_stub mov r0, sp bl memcpy @ memcpy (dest r0, src r1, bytes r2) bl .Losr_entry @ Call the method - ldr r11, [sp, #4] @ Restore saved stack pointer - ldr r10, [sp, #8] @ Restore JValue result - mov sp, r11 @ Restore stack pointer. + ldr r10, [sp, #8] @ Restore JValue* result + ldr sp, [sp, #4] @ Restore saved stack pointer ldr r4, [sp, #36] @ load shorty ldrb r4, [r4, #0] @ load return type cmp r4, #68 @ Test if result type char == 'D'. diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S index d6e0f1c1a..69caec88f 100644 --- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S +++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S @@ -1755,6 +1755,8 @@ END_FUNCTION art_quick_read_barrier_for_root_slow * rcx = JValue* result * r8 = shorty * r9 = thread + * + * Note that the native C ABI already aligned the stack to 16-byte. */ DEFINE_FUNCTION art_quick_osr_stub // Save the non-volatiles. diff --git a/runtime/jit/jit.cc b/runtime/jit/jit.cc index e38a68482..fcfa457bf 100644 --- a/runtime/jit/jit.cc +++ b/runtime/jit/jit.cc @@ -36,8 +36,6 @@ namespace art { namespace jit { -static constexpr bool kEnableOnStackReplacement = false; - JitOptions* JitOptions::CreateFromRuntimeArguments(const RuntimeArgumentMap& options) { auto* jit_options = new JitOptions; jit_options->use_jit_ = options.GetOrDefault(RuntimeArgumentMap::UseJIT); @@ -164,6 +162,7 @@ bool Jit::LoadCompiler(std::string* error_msg) { bool Jit::CompileMethod(ArtMethod* method, Thread* self, bool osr) { DCHECK(!method->IsRuntimeMethod()); + // Don't compile the method if it has breakpoints. if (Dbg::IsDebuggerActive() && Dbg::MethodHasAnyBreakpoints(method)) { VLOG(jit) << "JIT not compiling " << PrettyMethod(method) << " due to breakpoint"; @@ -177,12 +176,15 @@ bool Jit::CompileMethod(ArtMethod* method, Thread* self, bool osr) { return false; } - if (!code_cache_->NotifyCompilationOf(method, self, osr)) { + // If we get a request to compile a proxy method, we pass the actual Java method + // of that proxy method, as the compiler does not expect a proxy method. + ArtMethod* method_to_compile = method->GetInterfaceMethodIfProxy(sizeof(void*)); + if (!code_cache_->NotifyCompilationOf(method_to_compile, self, osr)) { VLOG(jit) << "JIT not compiling " << PrettyMethod(method) << " due to code cache"; return false; } - bool success = jit_compile_method_(jit_compiler_handle_, method, self, osr); - code_cache_->DoneCompiling(method, self); + bool success = jit_compile_method_(jit_compiler_handle_, method_to_compile, self, osr); + code_cache_->DoneCompiling(method_to_compile, self); return success; } @@ -276,10 +278,6 @@ bool Jit::MaybeDoOnStackReplacement(Thread* thread, uint32_t dex_pc, int32_t dex_pc_offset, JValue* result) { - if (!kEnableOnStackReplacement) { - return false; - } - Jit* jit = Runtime::Current()->GetJit(); if (jit == nullptr) { return false; @@ -326,7 +324,13 @@ bool Jit::MaybeDoOnStackReplacement(Thread* thread, ShadowFrame* shadow_frame = thread->PopShadowFrame(); size_t frame_size = osr_method->GetFrameSizeInBytes(); + + // Allocate memory to put shadow frame values. The osr stub will copy that memory to + // stack. + // Note that we could pass the shadow frame to the stub, and let it copy the values there, + // but that is engineering complexity not worth the effort for something like OSR. void** memory = reinterpret_cast(malloc(frame_size)); + CHECK(memory != nullptr); memset(memory, 0, frame_size); // Art ABI: ArtMethod is at the bottom of the stack. diff --git a/runtime/jit/jit_code_cache.cc b/runtime/jit/jit_code_cache.cc index 464c441e8..9111ddf9f 100644 --- a/runtime/jit/jit_code_cache.cc +++ b/runtime/jit/jit_code_cache.cc @@ -578,7 +578,7 @@ void JitCodeCache::GarbageCollectCache(Thread* self) { } } - // Empty osr method map, as osr compile code will be deleted (except the ones + // Empty osr method map, as osr compiled code will be deleted (except the ones // on thread stacks). osr_code_map_.clear(); } diff --git a/test/570-checker-osr/osr.cc b/test/570-checker-osr/osr.cc index fb846872e..c1774f3d0 100644 --- a/test/570-checker-osr/osr.cc +++ b/test/570-checker-osr/osr.cc @@ -38,7 +38,8 @@ class OsrVisitor : public StackVisitor { (m_name.compare("$noinline$returnFloat") == 0) || (m_name.compare("$noinline$returnDouble") == 0) || (m_name.compare("$noinline$returnLong") == 0) || - (m_name.compare("$noinline$deopt") == 0)) { + (m_name.compare("$noinline$deopt") == 0) || + (m_name.compare("$noinline$inlineCache") == 0)) { const OatQuickMethodHeader* header = Runtime::Current()->GetJit()->GetCodeCache()->LookupOsrMethodHeader(m); if (header != nullptr && header == GetCurrentOatQuickMethodHeader()) { diff --git a/test/570-checker-osr/src/Main.java b/test/570-checker-osr/src/Main.java index 748516331..4397b9104 100644 --- a/test/570-checker-osr/src/Main.java +++ b/test/570-checker-osr/src/Main.java @@ -16,6 +16,7 @@ public class Main { public static void main(String[] args) { + new SubMain(); System.loadLibrary(args[0]); if ($noinline$returnInt() != 53) { throw new Error("Unexpected return value"); @@ -33,6 +34,12 @@ public class Main { try { $noinline$deopt(); } catch (Exception e) {} + DeoptimizationController.stopDeoptimization(); + + $noinline$inlineCache(new Main(), 0); + if ($noinline$inlineCache(new SubMain(), 1) != SubMain.class) { + throw new Error("Unexpected return value"); + } } public static int $noinline$returnInt() { @@ -84,9 +91,57 @@ public class Main { DeoptimizationController.startDeoptimization(); } + public static Class $noinline$inlineCache(Main m, int count) { + for (int i = 0; i < 500; ++i) { + // Warm me up. + } + if (count == 1) { + // Lots of back edges to trigger OSR compilation. + for (int i = 0; i < 1000; ++i) { + } + // Wait for OSR compilation. + try { + Thread.sleep(10); + } catch (Exception e) {} + } + + // This call will be optimized in the OSR compiled code + // to check and deoptimize if m is not of type 'Main'. + Main other = m.inlineCache(); + + if (count == 1) { + // Jump to OSR compiled code. The second run + // of this method will have 'm' as a SubMain, and the compiled + // code we are jumping to will have wrongly optimize other as being a + // 'Main'. + while (!ensureInOsrCode()) {} + } + + // We used to wrongly optimize this call and assume 'other' was a 'Main'. + return other.returnClass(); + } + + public Main inlineCache() { + return new Main(); + } + + public Class returnClass() { + return Main.class; + } + public static int[] array = new int[4]; public static native boolean ensureInOsrCode(); public static boolean doThrow = false; } + +class SubMain extends Main { + public Class returnClass() { + return SubMain.class; + } + + public Main inlineCache() { + return new SubMain(); + } +} diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk index 9dcd4dc7b..b3560b634 100644 --- a/test/Android.run-test.mk +++ b/test/Android.run-test.mk @@ -446,9 +446,7 @@ TEST_ART_BROKEN_INTERPRETER_RUN_TESTS := # Known broken tests for the JIT. # CFI unwinding expects managed frames, and the test does not iterate enough to even compile. JIT # also uses Generic JNI instead of the JNI compiler. -# 570 is disabled while investigating osr flakiness. TEST_ART_BROKEN_JIT_RUN_TESTS := \ - 570-checker-osr \ 137-cfi ifneq (,$(filter jit,$(COMPILER_TYPES)))