From cc23481b66fd1f2b459d82da4852073e32f033aa Mon Sep 17 00:00:00 2001
From: Vladimir Marko <vmarko@google.com>
Date: Tue, 7 Apr 2015 09:36:09 +0100
Subject: [PATCH] Promote pointer to dex cache arrays on arm.

Do the use-count analysis on temps (ArtMethod* and the new
PC-relative temp) in Mir2Lir, rather than MIRGraph. MIRGraph
isn't really supposed to know how the ArtMethod* is used by
the backend.

Change-Id: Iaf56a46ae203eca86281b02b54f39a80fe5cc2dd
---
 compiler/dex/mir_dataflow.cc          |  28 ++----
 compiler/dex/mir_graph.h              |  12 +++
 compiler/dex/mir_optimization.cc      |  44 ++++++++--
 compiler/dex/quick/arm/call_arm.cc    |   8 ++
 compiler/dex/quick/arm/codegen_arm.h  |   9 ++
 compiler/dex/quick/arm/int_arm.cc     |  13 ++-
 compiler/dex/quick/arm/target_arm.cc  |   3 +-
 compiler/dex/quick/arm/utility_arm.cc |  35 ++++++++
 compiler/dex/quick/codegen_util.cc    |   2 +
 compiler/dex/quick/mir_to_lir.h       |  18 +++-
 compiler/dex/quick/ralloc_util.cc     | 158 ++++++++++++++++++++++++++++++++++
 11 files changed, 297 insertions(+), 33 deletions(-)

diff --git a/compiler/dex/mir_dataflow.cc b/compiler/dex/mir_dataflow.cc
index f638b0bf4..2a920a4e2 100644
--- a/compiler/dex/mir_dataflow.cc
+++ b/compiler/dex/mir_dataflow.cc
@@ -1396,6 +1396,13 @@ void MIRGraph::CompilerInitializeSSAConversion() {
   InitializeBasicBlockDataFlow();
 }
 
+uint32_t MIRGraph::GetUseCountWeight(BasicBlock* bb) const {
+  // Each level of nesting adds *100 to count, up to 3 levels deep.
+  uint32_t depth = std::min(3U, static_cast<uint32_t>(bb->nesting_depth));
+  uint32_t weight = std::max(1U, depth * 100);
+  return weight;
+}
+
 /*
  * Count uses, weighting by loop nesting depth.  This code only
  * counts explicitly used s_regs.  A later phase will add implicit
@@ -1405,9 +1412,7 @@ void MIRGraph::CountUses(BasicBlock* bb) {
   if (bb->block_type != kDalvikByteCode) {
     return;
   }
-  // Each level of nesting adds *100 to count, up to 3 levels deep.
-  uint32_t depth = std::min(3U, static_cast<uint32_t>(bb->nesting_depth));
-  uint32_t weight = std::max(1U, depth * 100);
+  uint32_t weight = GetUseCountWeight(bb);
   for (MIR* mir = bb->first_mir_insn; (mir != NULL); mir = mir->next) {
     if (mir->ssa_rep == NULL) {
       continue;
@@ -1417,23 +1422,6 @@ void MIRGraph::CountUses(BasicBlock* bb) {
       raw_use_counts_[s_reg] += 1u;
       use_counts_[s_reg] += weight;
     }
-    if (!(cu_->disable_opt & (1 << kPromoteCompilerTemps))) {
-      uint64_t df_attributes = GetDataFlowAttributes(mir);
-      // Implicit use of Method* ? */
-      if (df_attributes & DF_UMS) {
-        /*
-         * Some invokes will not use Method* - need to perform test similar
-         * to that found in GenInvoke() to decide whether to count refs
-         * for Method* on invoke-class opcodes.  This is a relatively expensive
-         * operation, so should only be done once.
-         * TODO: refactor InvokeUsesMethodStar() to perform check at parse time,
-         * and save results for both here and GenInvoke.  For now, go ahead
-         * and assume all invokes use method*.
-         */
-        raw_use_counts_[method_sreg_] += 1u;
-        use_counts_[method_sreg_] += weight;
-      }
-    }
   }
 }
 
diff --git a/compiler/dex/mir_graph.h b/compiler/dex/mir_graph.h
index 3298af116..d4a9eb938 100644
--- a/compiler/dex/mir_graph.h
+++ b/compiler/dex/mir_graph.h
@@ -960,6 +960,12 @@ class MIRGraph {
    */
   CompilerTemp* GetNewCompilerTemp(CompilerTempType ct_type, bool wide);
 
+  /**
+   * @brief Used to remove last created compiler temporary when it's not needed.
+   * @param temp the temporary to remove.
+   */
+  void RemoveLastCompilerTemp(CompilerTempType ct_type, bool wide, CompilerTemp* temp);
+
   bool MethodIsLeaf() {
     return attributes_ & METHOD_IS_LEAF;
   }
@@ -1185,6 +1191,12 @@ class MIRGraph {
   void DoConstantPropagation(BasicBlock* bb);
 
   /**
+   * @brief Get use count weight for a given block.
+   * @param bb the BasicBlock.
+   */
+  uint32_t GetUseCountWeight(BasicBlock* bb) const;
+
+  /**
    * @brief Count the uses in the BasicBlock
    * @param bb the BasicBlock
    */
diff --git a/compiler/dex/mir_optimization.cc b/compiler/dex/mir_optimization.cc
index c85c3b6f2..5dcc903cf 100644
--- a/compiler/dex/mir_optimization.cc
+++ b/compiler/dex/mir_optimization.cc
@@ -318,9 +318,11 @@ CompilerTemp* MIRGraph::GetNewCompilerTemp(CompilerTempType ct_type, bool wide)
     // Since VR temps cannot be requested once the BE temps are requested, we
     // allow reservation of VR temps as well for BE. We
     size_t available_temps = reserved_temps_for_backend_ + GetNumAvailableVRTemps();
-    if (available_temps <= 0 || (available_temps <= 1 && wide)) {
+    size_t needed_temps = wide ? 2u : 1u;
+    if (available_temps < needed_temps) {
       if (verbose) {
-        LOG(INFO) << "CompilerTemps: Not enough temp(s) of type " << ct_type_str << " are available.";
+        LOG(INFO) << "CompilerTemps: Not enough temp(s) of type " << ct_type_str
+            << " are available.";
       }
       return nullptr;
     }
@@ -328,12 +330,8 @@ CompilerTemp* MIRGraph::GetNewCompilerTemp(CompilerTempType ct_type, bool wide)
     // Update the remaining reserved temps since we have now used them.
     // Note that the code below is actually subtracting to remove them from reserve
     // once they have been claimed. It is careful to not go below zero.
-    if (reserved_temps_for_backend_ >= 1) {
-      reserved_temps_for_backend_--;
-    }
-    if (wide && reserved_temps_for_backend_ >= 1) {
-      reserved_temps_for_backend_--;
-    }
+    reserved_temps_for_backend_ =
+        std::max(reserved_temps_for_backend_, needed_temps) - needed_temps;
 
     // The new non-special compiler temp must receive a unique v_reg.
     compiler_temp->v_reg = GetFirstNonSpecialTempVR() + num_non_special_compiler_temps_;
@@ -407,6 +405,36 @@ CompilerTemp* MIRGraph::GetNewCompilerTemp(CompilerTempType ct_type, bool wide)
   return compiler_temp;
 }
 
+void MIRGraph::RemoveLastCompilerTemp(CompilerTempType ct_type, bool wide, CompilerTemp* temp) {
+  // Once the compiler temps have been committed, it's too late for any modifications.
+  DCHECK_EQ(compiler_temps_committed_, false);
+
+  size_t used_temps = wide ? 2u : 1u;
+
+  if (ct_type == kCompilerTempBackend) {
+    DCHECK(requested_backend_temp_);
+
+    // Make the temps available to backend again.
+    reserved_temps_for_backend_ += used_temps;
+  } else if (ct_type == kCompilerTempVR) {
+    DCHECK(!requested_backend_temp_);
+  } else {
+    UNIMPLEMENTED(FATAL) << "No handling for compiler temp type " << static_cast<int>(ct_type);
+  }
+
+  // Reduce the number of non-special compiler temps.
+  DCHECK_LE(used_temps, num_non_special_compiler_temps_);
+  num_non_special_compiler_temps_ -= used_temps;
+
+  // Check that this was really the last temp.
+  DCHECK_EQ(static_cast<size_t>(temp->v_reg),
+            GetFirstNonSpecialTempVR() + num_non_special_compiler_temps_);
+
+  if (cu_->verbose) {
+    LOG(INFO) << "Last temporary has been removed.";
+  }
+}
+
 static bool EvaluateBranch(Instruction::Code opcode, int32_t src1, int32_t src2) {
   bool is_taken;
   switch (opcode) {
diff --git a/compiler/dex/quick/arm/call_arm.cc b/compiler/dex/quick/arm/call_arm.cc
index 3f4f1fef9..518e3ea3c 100644
--- a/compiler/dex/quick/arm/call_arm.cc
+++ b/compiler/dex/quick/arm/call_arm.cc
@@ -491,6 +491,14 @@ void ArmMir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) {
 
   FlushIns(ArgLocs, rl_method);
 
+  // We can promote a PC-relative reference to dex cache arrays to a register
+  // if it's used at least twice. Without investigating where we should lazily
+  // load the reference, we conveniently load it after flushing inputs.
+  if (dex_cache_arrays_base_reg_.Valid()) {
+    OpPcRelDexCacheArrayAddr(cu_->dex_file, dex_cache_arrays_min_offset_,
+                             dex_cache_arrays_base_reg_);
+  }
+
   FreeTemp(rs_r0);
   FreeTemp(rs_r1);
   FreeTemp(rs_r2);
diff --git a/compiler/dex/quick/arm/codegen_arm.h b/compiler/dex/quick/arm/codegen_arm.h
index 619c11fb9..83b27df93 100644
--- a/compiler/dex/quick/arm/codegen_arm.h
+++ b/compiler/dex/quick/arm/codegen_arm.h
@@ -260,6 +260,9 @@ class ArmMir2Lir FINAL : public Mir2Lir {
      */
     LIR* GenCallInsn(const MirMethodLoweringInfo& method_info) OVERRIDE;
 
+    void CountRefs(RefCounts* core_counts, RefCounts* fp_counts, size_t num_regs) OVERRIDE;
+    void DoPromotion() OVERRIDE;
+
     /*
      * @brief Handle ARM specific literals.
      */
@@ -306,6 +309,10 @@ class ArmMir2Lir FINAL : public Mir2Lir {
     // Instructions needing patching with PC relative code addresses.
     ArenaVector<LIR*> dex_cache_access_insns_;
 
+    // Register with a reference to the dex cache arrays at dex_cache_arrays_min_offset_,
+    // if promoted.
+    RegStorage dex_cache_arrays_base_reg_;
+
     /**
      * @brief Given float register pair, returns Solo64 float register.
      * @param reg #RegStorage containing a float register pair (e.g. @c s2 and @c s3).
@@ -341,6 +348,8 @@ class ArmMir2Lir FINAL : public Mir2Lir {
                                  uint32_t unused_idx ATTRIBUTE_UNUSED,
                                  uintptr_t direct_code, uintptr_t direct_method,
                                  InvokeType type);
+
+    void OpPcRelDexCacheArrayAddr(const DexFile* dex_file, int offset, RegStorage r_dest);
 };
 
 }  // namespace art
diff --git a/compiler/dex/quick/arm/int_arm.cc b/compiler/dex/quick/arm/int_arm.cc
index c788401cf..47669db97 100644
--- a/compiler/dex/quick/arm/int_arm.cc
+++ b/compiler/dex/quick/arm/int_arm.cc
@@ -1091,7 +1091,7 @@ bool ArmMir2Lir::CanUseOpPcRelDexCacheArrayLoad() const {
   return dex_cache_arrays_layout_.Valid();
 }
 
-void ArmMir2Lir::OpPcRelDexCacheArrayLoad(const DexFile* dex_file, int offset, RegStorage r_dest) {
+void ArmMir2Lir::OpPcRelDexCacheArrayAddr(const DexFile* dex_file, int offset, RegStorage r_dest) {
   LIR* movw = NewLIR2(kThumb2MovImm16, r_dest.GetReg(), 0);
   LIR* movt = NewLIR2(kThumb2MovImm16H, r_dest.GetReg(), 0);
   ArmOpcode add_pc_opcode = (r_dest.GetRegNum() < 8) ? kThumbAddRRLH : kThumbAddRRHH;
@@ -1105,7 +1105,16 @@ void ArmMir2Lir::OpPcRelDexCacheArrayLoad(const DexFile* dex_file, int offset, R
   movt->operands[4] = movw->operands[4];
   dex_cache_access_insns_.push_back(movw);
   dex_cache_access_insns_.push_back(movt);
-  LoadRefDisp(r_dest, 0, r_dest, kNotVolatile);
+}
+
+void ArmMir2Lir::OpPcRelDexCacheArrayLoad(const DexFile* dex_file, int offset, RegStorage r_dest) {
+  if (dex_cache_arrays_base_reg_.Valid()) {
+    LoadRefDisp(dex_cache_arrays_base_reg_, offset - dex_cache_arrays_min_offset_,
+                r_dest, kNotVolatile);
+  } else {
+    OpPcRelDexCacheArrayAddr(dex_file, offset, r_dest);
+    LoadRefDisp(r_dest, 0, r_dest, kNotVolatile);
+  }
 }
 
 LIR* ArmMir2Lir::OpVldm(RegStorage r_base, int count) {
diff --git a/compiler/dex/quick/arm/target_arm.cc b/compiler/dex/quick/arm/target_arm.cc
index 580dcb789..5f27338e6 100644
--- a/compiler/dex/quick/arm/target_arm.cc
+++ b/compiler/dex/quick/arm/target_arm.cc
@@ -576,7 +576,8 @@ RegisterClass ArmMir2Lir::RegClassForFieldLoadStore(OpSize size, bool is_volatil
 ArmMir2Lir::ArmMir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena)
     : Mir2Lir(cu, mir_graph, arena),
       call_method_insns_(arena->Adapter()),
-      dex_cache_access_insns_(arena->Adapter()) {
+      dex_cache_access_insns_(arena->Adapter()),
+      dex_cache_arrays_base_reg_(RegStorage::InvalidReg()) {
   call_method_insns_.reserve(100);
   // Sanity check - make sure encoding map lines up.
   for (int i = 0; i < kArmLast; i++) {
diff --git a/compiler/dex/quick/arm/utility_arm.cc b/compiler/dex/quick/arm/utility_arm.cc
index e4bd2a33a..c3371cf32 100644
--- a/compiler/dex/quick/arm/utility_arm.cc
+++ b/compiler/dex/quick/arm/utility_arm.cc
@@ -19,6 +19,7 @@
 #include "arch/arm/instruction_set_features_arm.h"
 #include "arm_lir.h"
 #include "base/logging.h"
+#include "dex/mir_graph.h"
 #include "dex/quick/mir_to_lir-inl.h"
 #include "dex/reg_storage_eq.h"
 #include "driver/compiler_driver.h"
@@ -1266,4 +1267,38 @@ size_t ArmMir2Lir::GetInstructionOffset(LIR* lir) {
   return offset;
 }
 
+void ArmMir2Lir::CountRefs(RefCounts* core_counts, RefCounts* fp_counts, size_t num_regs) {
+  // Start with the default counts.
+  Mir2Lir::CountRefs(core_counts, fp_counts, num_regs);
+
+  if (pc_rel_temp_ != nullptr) {
+    // Now, if the dex cache array base temp is used only once outside any loops (weight = 1),
+    // avoid the promotion, otherwise boost the weight by factor 4 because the full PC-relative
+    // load sequence is 4 instructions long.
+    int p_map_idx = SRegToPMap(pc_rel_temp_->s_reg_low);
+    if (core_counts[p_map_idx].count == 1) {
+      core_counts[p_map_idx].count = 0;
+    } else {
+      core_counts[p_map_idx].count *= 4;
+    }
+  }
+}
+
+void ArmMir2Lir::DoPromotion() {
+  if (CanUseOpPcRelDexCacheArrayLoad()) {
+    pc_rel_temp_ = mir_graph_->GetNewCompilerTemp(kCompilerTempBackend, false);
+  }
+
+  Mir2Lir::DoPromotion();
+
+  if (pc_rel_temp_ != nullptr) {
+    // Now, if the dex cache array base temp is promoted, remember the register but
+    // always remove the temp's stack location to avoid unnecessarily bloating the stack.
+    dex_cache_arrays_base_reg_ = mir_graph_->reg_location_[pc_rel_temp_->s_reg_low].reg;
+    DCHECK(!dex_cache_arrays_base_reg_.Valid() || !dex_cache_arrays_base_reg_.IsFloat());
+    mir_graph_->RemoveLastCompilerTemp(kCompilerTempBackend, false, pc_rel_temp_);
+    pc_rel_temp_ = nullptr;
+  }
+}
+
 }  // namespace art
diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc
index f944c1193..c51046eed 100644
--- a/compiler/dex/quick/codegen_util.cc
+++ b/compiler/dex/quick/codegen_util.cc
@@ -1070,6 +1070,8 @@ Mir2Lir::Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena
       mask_cache_(arena),
       safepoints_(arena->Adapter()),
       dex_cache_arrays_layout_(cu->compiler_driver->GetDexCacheArraysLayout(cu->dex_file)),
+      pc_rel_temp_(nullptr),
+      dex_cache_arrays_min_offset_(std::numeric_limits<uint32_t>::max()),
       in_to_reg_storage_mapping_(arena) {
   switch_tables_.reserve(4);
   fill_array_data_.reserve(4);
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index f9b58b17a..45a585553 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -135,6 +135,7 @@ class BasicBlock;
 class BitVector;
 struct CallInfo;
 struct CompilationUnit;
+struct CompilerTemp;
 struct InlineMethod;
 class MIR;
 struct LIR;
@@ -775,9 +776,10 @@ class Mir2Lir {
      */
     virtual RegLocation EvalLoc(RegLocation loc, int reg_class, bool update);
 
-    void CountRefs(RefCounts* core_counts, RefCounts* fp_counts, size_t num_regs);
+    void AnalyzeMIR(RefCounts* core_counts, MIR* mir, uint32_t weight);
+    virtual void CountRefs(RefCounts* core_counts, RefCounts* fp_counts, size_t num_regs);
     void DumpCounts(const RefCounts* arr, int size, const char* msg);
-    void DoPromotion();
+    virtual void DoPromotion();
     int VRegOffset(int v_reg);
     int SRegOffset(int s_reg);
     RegLocation GetReturnWide(RegisterClass reg_class);
@@ -1849,6 +1851,18 @@ class Mir2Lir {
     // The layout of the cu_->dex_file's dex cache arrays for PC-relative addressing.
     const DexCacheArraysLayout dex_cache_arrays_layout_;
 
+    // For architectures that don't have true PC-relative addressing, we can promote
+    // a PC of an instruction (or another PC-relative address such as a pointer to
+    // the dex cache arrays if supported) to a register. This is indicated to the
+    // register promotion by allocating a backend temp.
+    CompilerTemp* pc_rel_temp_;
+
+    // For architectures that don't have true PC-relative addressing (see pc_rel_temp_
+    // above) and also have a limited range of offsets for loads, it's be useful to
+    // know the minimum offset into the dex cache arrays, so we calculate that as well
+    // if pc_rel_temp_ isn't nullptr.
+    uint32_t dex_cache_arrays_min_offset_;
+
     // ABI support
     class ShortyArg {
       public:
diff --git a/compiler/dex/quick/ralloc_util.cc b/compiler/dex/quick/ralloc_util.cc
index 741657bc6..487d31c9e 100644
--- a/compiler/dex/quick/ralloc_util.cc
+++ b/compiler/dex/quick/ralloc_util.cc
@@ -19,9 +19,11 @@
 #include "mir_to_lir-inl.h"
 
 #include "dex/compiler_ir.h"
+#include "dex/dataflow_iterator-inl.h"
 #include "dex/mir_graph.h"
 #include "driver/compiler_driver.h"
 #include "driver/dex_compilation_unit.h"
+#include "utils/dex_cache_arrays_layout-inl.h"
 
 namespace art {
 
@@ -1128,6 +1130,146 @@ RegLocation Mir2Lir::EvalLoc(RegLocation loc, int reg_class, bool update) {
   return loc;
 }
 
+void Mir2Lir::AnalyzeMIR(RefCounts* core_counts, MIR* mir, uint32_t weight) {
+  // NOTE: This should be in sync with functions that actually generate code for
+  // the opcodes below. However, if we get this wrong, the generated code will
+  // still be correct even if it may be sub-optimal.
+  int opcode = mir->dalvikInsn.opcode;
+  bool uses_method = false;
+  bool uses_pc_rel_load = false;
+  uint32_t dex_cache_array_offset = std::numeric_limits<uint32_t>::max();
+  switch (opcode) {
+    case Instruction::CHECK_CAST:
+    case Instruction::INSTANCE_OF: {
+      if ((opcode == Instruction::CHECK_CAST) &&
+          (mir->optimization_flags & MIR_IGNORE_CHECK_CAST) != 0) {
+        break;  // No code generated.
+      }
+      uint32_t type_idx =
+          (opcode == Instruction::CHECK_CAST) ? mir->dalvikInsn.vB : mir->dalvikInsn.vC;
+      bool type_known_final, type_known_abstract, use_declaring_class;
+      bool needs_access_check = !cu_->compiler_driver->CanAccessTypeWithoutChecks(
+          cu_->method_idx, *cu_->dex_file, type_idx,
+          &type_known_final, &type_known_abstract, &use_declaring_class);
+      if (opcode == Instruction::CHECK_CAST && !needs_access_check &&
+          cu_->compiler_driver->IsSafeCast(
+              mir_graph_->GetCurrentDexCompilationUnit(), mir->offset)) {
+        break;  // No code generated.
+      }
+      if (!needs_access_check && !use_declaring_class && pc_rel_temp_ != nullptr) {
+        uses_pc_rel_load = true;  // And ignore method use in slow path.
+        dex_cache_array_offset = dex_cache_arrays_layout_.TypeOffset(type_idx);
+      } else {
+        uses_method = true;
+      }
+      break;
+    }
+
+    case Instruction::CONST_CLASS:
+      if (pc_rel_temp_ != nullptr &&
+          cu_->compiler_driver->CanAccessTypeWithoutChecks(cu_->method_idx, *cu_->dex_file,
+                                                           mir->dalvikInsn.vB)) {
+        uses_pc_rel_load = true;  // And ignore method use in slow path.
+        dex_cache_array_offset = dex_cache_arrays_layout_.TypeOffset(mir->dalvikInsn.vB);
+      } else {
+        uses_method = true;
+      }
+      break;
+
+    case Instruction::CONST_STRING:
+    case Instruction::CONST_STRING_JUMBO:
+      if (pc_rel_temp_ != nullptr) {
+        uses_pc_rel_load = true;  // And ignore method use in slow path.
+        dex_cache_array_offset = dex_cache_arrays_layout_.StringOffset(mir->dalvikInsn.vB);
+      } else {
+        uses_method = true;
+      }
+      break;
+
+    case Instruction::INVOKE_VIRTUAL:
+    case Instruction::INVOKE_SUPER:
+    case Instruction::INVOKE_DIRECT:
+    case Instruction::INVOKE_STATIC:
+    case Instruction::INVOKE_INTERFACE:
+    case Instruction::INVOKE_VIRTUAL_RANGE:
+    case Instruction::INVOKE_SUPER_RANGE:
+    case Instruction::INVOKE_DIRECT_RANGE:
+    case Instruction::INVOKE_STATIC_RANGE:
+    case Instruction::INVOKE_INTERFACE_RANGE:
+    case Instruction::INVOKE_VIRTUAL_QUICK:
+    case Instruction::INVOKE_VIRTUAL_RANGE_QUICK: {
+      const MirMethodLoweringInfo& info = mir_graph_->GetMethodLoweringInfo(mir);
+      InvokeType sharp_type = info.GetSharpType();
+      if (!info.FastPath() || (sharp_type != kStatic && sharp_type != kDirect)) {
+        // Nothing to do, the generated code or entrypoint uses method from the stack.
+      } else if (info.DirectCode() != 0 && info.DirectMethod() != 0) {
+        // Nothing to do, the generated code uses method from the stack.
+      } else if (pc_rel_temp_ != nullptr) {
+        uses_pc_rel_load = true;
+        dex_cache_array_offset = dex_cache_arrays_layout_.MethodOffset(mir->dalvikInsn.vB);
+      } else {
+        uses_method = true;
+      }
+      break;
+    }
+
+    case Instruction::NEW_INSTANCE:
+    case Instruction::NEW_ARRAY:
+    case Instruction::FILLED_NEW_ARRAY:
+    case Instruction::FILLED_NEW_ARRAY_RANGE:
+      uses_method = true;
+      break;
+    case Instruction::FILL_ARRAY_DATA:
+      // Nothing to do, the entrypoint uses method from the stack.
+      break;
+    case Instruction::THROW:
+      // Nothing to do, the entrypoint uses method from the stack.
+      break;
+
+    case Instruction::SGET:
+    case Instruction::SGET_WIDE:
+    case Instruction::SGET_OBJECT:
+    case Instruction::SGET_BOOLEAN:
+    case Instruction::SGET_BYTE:
+    case Instruction::SGET_CHAR:
+    case Instruction::SGET_SHORT:
+    case Instruction::SPUT:
+    case Instruction::SPUT_WIDE:
+    case Instruction::SPUT_OBJECT:
+    case Instruction::SPUT_BOOLEAN:
+    case Instruction::SPUT_BYTE:
+    case Instruction::SPUT_CHAR:
+    case Instruction::SPUT_SHORT: {
+      const MirSFieldLoweringInfo& field_info = mir_graph_->GetSFieldLoweringInfo(mir);
+      bool fast = IsInstructionSGet(static_cast<Instruction::Code>(opcode))
+          ? field_info.FastGet()
+          : field_info.FastPut();
+      if (fast && (cu_->enable_debug & (1 << kDebugSlowFieldPath)) == 0) {
+        if (!field_info.IsReferrersClass() && pc_rel_temp_ != nullptr) {
+          uses_pc_rel_load = true;  // And ignore method use in slow path.
+          dex_cache_array_offset = dex_cache_arrays_layout_.TypeOffset(field_info.StorageIndex());
+        } else {
+          uses_method = true;
+        }
+      } else {
+        // Nothing to do, the entrypoint uses method from the stack.
+      }
+      break;
+    }
+
+    default:
+      break;
+  }
+  if (uses_method) {
+    core_counts[SRegToPMap(mir_graph_->GetMethodLoc().s_reg_low)].count += weight;
+  }
+  if (uses_pc_rel_load) {
+    core_counts[SRegToPMap(pc_rel_temp_->s_reg_low)].count += weight;
+    DCHECK_NE(dex_cache_array_offset, std::numeric_limits<uint32_t>::max());
+    dex_cache_arrays_min_offset_ = std::min(dex_cache_arrays_min_offset_, dex_cache_array_offset);
+  }
+}
+
 /* USE SSA names to count references of base Dalvik v_regs. */
 void Mir2Lir::CountRefs(RefCounts* core_counts, RefCounts* fp_counts, size_t num_regs) {
   for (int i = 0; i < mir_graph_->GetNumSSARegs(); i++) {
@@ -1157,6 +1299,22 @@ void Mir2Lir::CountRefs(RefCounts* core_counts, RefCounts* fp_counts, size_t num
       }
     }
   }
+
+  // Now analyze the ArtMethod* and pc_rel_temp_ uses.
+  DCHECK_EQ(core_counts[SRegToPMap(mir_graph_->GetMethodLoc().s_reg_low)].count, 0);
+  if (pc_rel_temp_ != nullptr) {
+    DCHECK_EQ(core_counts[SRegToPMap(pc_rel_temp_->s_reg_low)].count, 0);
+  }
+  PreOrderDfsIterator iter(mir_graph_);
+  for (BasicBlock* bb = iter.Next(); bb != nullptr; bb = iter.Next()) {
+    if (bb->block_type == kDead) {
+      continue;
+    }
+    uint32_t weight = mir_graph_->GetUseCountWeight(bb);
+    for (MIR* mir = bb->first_mir_insn; mir != nullptr; mir = mir->next) {
+      AnalyzeMIR(core_counts, mir, weight);
+    }
+  }
 }
 
 /* qsort callback function, sort descending */
-- 
2.11.0