OSDN Git Service

Fix compilation statistics collection in inliner.
authorVladimir Marko <vmarko@google.com>
Thu, 23 Feb 2017 18:56:13 +0000 (18:56 +0000)
committerVladimir Marko <vmarko@google.com>
Thu, 23 Feb 2017 19:06:46 +0000 (19:06 +0000)
Stats from callee graph builder were not merged into main
stats and stats for callee graph optimizations were counted
even when the callee graph was eventually rejected.

Allocate the callee graph statistics on the arena.
Measured compilation of a big app using heaptrack:
  bytes allocated in total (ignoring deallocations): 3.77GB -> 3.37GB
  calls to allocation functions: 10650510 -> 8203129

Test: testrunner.py --host
Test: Stats change in the expected direction for an app.
Bug: 34053922
Change-Id: I605280d262b86af14b847acf3bb6dc077b749cc0

compiler/optimizing/inliner.cc
compiler/optimizing/inliner.h
compiler/optimizing/optimizing_compiler_stats.h

index 8c73f1d..3e34090 100644 (file)
@@ -1272,12 +1272,19 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction,
       caller_instruction_counter);
   callee_graph->SetArtMethod(resolved_method);
 
-  // When they are needed, allocate `inline_stats` on the heap instead
+  // When they are needed, allocate `inline_stats_` on the Arena instead
   // of on the stack, as Clang might produce a stack frame too large
   // for this function, that would not fit the requirements of the
   // `-Wframe-larger-than` option.
-  std::unique_ptr<OptimizingCompilerStats> inline_stats =
-      (stats_ == nullptr) ? nullptr : MakeUnique<OptimizingCompilerStats>();
+  if (stats_ != nullptr) {
+    // Reuse one object for all inline attempts from this caller to keep Arena memory usage low.
+    if (inline_stats_ == nullptr) {
+      void* storage = graph_->GetArena()->Alloc<OptimizingCompilerStats>(kArenaAllocMisc);
+      inline_stats_ = new (storage) OptimizingCompilerStats;
+    } else {
+      inline_stats_->Reset();
+    }
+  }
   HGraphBuilder builder(callee_graph,
                         &dex_compilation_unit,
                         &outer_compilation_unit_,
@@ -1285,7 +1292,7 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction,
                         *code_item,
                         compiler_driver_,
                         codegen_,
-                        inline_stats.get(),
+                        inline_stats_,
                         resolved_method->GetQuickenedInfo(class_linker->GetImagePointerSize()),
                         dex_cache,
                         handles_);
@@ -1468,6 +1475,11 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction,
   DCHECK_EQ(callee_instruction_counter, callee_graph->GetCurrentInstructionId())
       << "No instructions can be added to the inner graph during inlining into the outer graph";
 
+  if (stats_ != nullptr) {
+    DCHECK(inline_stats_ != nullptr);
+    inline_stats_->AddTo(stats_);
+  }
+
   return true;
 }
 
@@ -1476,11 +1488,11 @@ size_t HInliner::RunOptimizations(HGraph* callee_graph,
                                   const DexCompilationUnit& dex_compilation_unit) {
   // Note: if the outermost_graph_ is being compiled OSR, we should not run any
   // optimization that could lead to a HDeoptimize. The following optimizations do not.
-  HDeadCodeElimination dce(callee_graph, stats_, "dead_code_elimination$inliner");
+  HDeadCodeElimination dce(callee_graph, inline_stats_, "dead_code_elimination$inliner");
   HConstantFolding fold(callee_graph, "constant_folding$inliner");
   HSharpening sharpening(callee_graph, codegen_, dex_compilation_unit, compiler_driver_, handles_);
-  InstructionSimplifier simplify(callee_graph, stats_);
-  IntrinsicsRecognizer intrinsics(callee_graph, stats_);
+  InstructionSimplifier simplify(callee_graph, inline_stats_);
+  IntrinsicsRecognizer intrinsics(callee_graph, inline_stats_);
 
   HOptimization* optimizations[] = {
     &intrinsics,
@@ -1504,7 +1516,7 @@ size_t HInliner::RunOptimizations(HGraph* callee_graph,
                      dex_compilation_unit,
                      compiler_driver_,
                      handles_,
-                     stats_,
+                     inline_stats_,
                      total_number_of_dex_registers_ + code_item->registers_size_,
                      depth_ + 1);
     inliner.Run();
index 11aacab..75d025a 100644 (file)
@@ -51,7 +51,8 @@ class HInliner : public HOptimization {
         total_number_of_dex_registers_(total_number_of_dex_registers),
         depth_(depth),
         number_of_inlined_instructions_(0),
-        handles_(handles) {}
+        handles_(handles),
+        inline_stats_(nullptr) {}
 
   void Run() OVERRIDE;
 
@@ -218,6 +219,10 @@ class HInliner : public HOptimization {
   size_t number_of_inlined_instructions_;
   VariableSizedHandleScope* const handles_;
 
+  // Used to record stats about optimizations on the inlined graph.
+  // If the inlining is successful, these stats are merged to the caller graph's stats.
+  OptimizingCompilerStats* inline_stats_;
+
   DISALLOW_COPY_AND_ASSIGN(HInliner);
 };
 
index 203b1ec..f7f6a14 100644 (file)
@@ -17,6 +17,7 @@
 #ifndef ART_COMPILER_OPTIMIZING_OPTIMIZING_COMPILER_STATS_H_
 #define ART_COMPILER_OPTIMIZING_OPTIMIZING_COMPILER_STATS_H_
 
+#include <atomic>
 #include <iomanip>
 #include <string>
 #include <type_traits>
@@ -74,7 +75,7 @@ class OptimizingCompilerStats {
  public:
   OptimizingCompilerStats() {}
 
-  void RecordStat(MethodCompilationStat stat, size_t count = 1) {
+  void RecordStat(MethodCompilationStat stat, uint32_t count = 1) {
     compile_stats_[stat] += count;
   }
 
@@ -93,7 +94,7 @@ class OptimizingCompilerStats {
           << " methods: " << std::fixed << std::setprecision(2)
           << compiled_percent << "% (" << compile_stats_[kCompiled] << ") compiled.";
 
-      for (int i = 0; i < kLastStat; i++) {
+      for (size_t i = 0; i < kLastStat; i++) {
         if (compile_stats_[i] != 0) {
           LOG(INFO) << PrintMethodCompilationStat(static_cast<MethodCompilationStat>(i)) << ": "
               << compile_stats_[i];
@@ -102,6 +103,21 @@ class OptimizingCompilerStats {
     }
   }
 
+  void AddTo(OptimizingCompilerStats* other_stats) {
+    for (size_t i = 0; i != kLastStat; ++i) {
+      uint32_t count = compile_stats_[i];
+      if (count != 0) {
+        other_stats->RecordStat(static_cast<MethodCompilationStat>(i), count);
+      }
+    }
+  }
+
+  void Reset() {
+    for (size_t i = 0; i != kLastStat; ++i) {
+      compile_stats_[i] = 0u;
+    }
+  }
+
  private:
   std::string PrintMethodCompilationStat(MethodCompilationStat stat) const {
     std::string name;
@@ -156,7 +172,7 @@ class OptimizingCompilerStats {
     return "OptStat#" + name;
   }
 
-  AtomicInteger compile_stats_[kLastStat];
+  std::atomic<uint32_t> compile_stats_[kLastStat];
 
   DISALLOW_COPY_AND_ASSIGN(OptimizingCompilerStats);
 };