From 6bbf0967d217ab2b7bdbb78bfd076b8fb07a44e8 Mon Sep 17 00:00:00 2001
From: Alexei Zavjalov <alexei.zavjalov@intel.com>
Date: Tue, 15 Jul 2014 02:19:41 +0700
Subject: [PATCH] ART: Implement the easy long division/remainder by a constant

Also optimizes long/int divisions by power-of-two values.

Also do some clean-up.

Change-Id: Ie414e64aac251c81361ae107d157c14439e6dab5
Signed-off-by: Alexei Zavjalov <alexei.zavjalov@intel.com>
---
 compiler/dex/quick/gen_invoke.cc          |  10 -
 compiler/dex/quick/mir_to_lir.h           |   2 +-
 compiler/dex/quick/x86/codegen_x86.h      | 328 ++++++++++++++++--------------
 compiler/dex/quick/x86/int_x86.cc         | 295 ++++++++++++++++++++++-----
 compiler/dex/quick/x86/target_x86.cc      |  20 ++
 test/003-omnibus-opcodes/src/IntMath.java |   2 +-
 6 files changed, 442 insertions(+), 215 deletions(-)

diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc
index a0a2ed044..5fc699669 100755
--- a/compiler/dex/quick/gen_invoke.cc
+++ b/compiler/dex/quick/gen_invoke.cc
@@ -1568,16 +1568,6 @@ bool Mir2Lir::GenInlinedCurrentThread(CallInfo* info) {
                   kNotVolatile);
       break;
 
-    case kX86:
-      reinterpret_cast<X86Mir2Lir*>(this)->OpRegThreadMem(kOpMov, rl_result.reg,
-                                                          Thread::PeerOffset<4>());
-      break;
-
-    case kX86_64:
-      reinterpret_cast<X86Mir2Lir*>(this)->OpRegThreadMem(kOpMov, rl_result.reg,
-                                                          Thread::PeerOffset<8>());
-      break;
-
     default:
       LOG(FATAL) << "Unexpected isa " << cu_->instruction_set;
   }
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index d03b85989..b19942db8 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -968,7 +968,7 @@ class Mir2Lir : public Backend {
     virtual bool GenInlinedArrayCopyCharArray(CallInfo* info);
     virtual bool GenInlinedIndexOf(CallInfo* info, bool zero_based);
     bool GenInlinedStringCompareTo(CallInfo* info);
-    bool GenInlinedCurrentThread(CallInfo* info);
+    virtual bool GenInlinedCurrentThread(CallInfo* info);
     bool GenInlinedUnsafeGet(CallInfo* info, bool is_long, bool is_volatile);
     bool GenInlinedUnsafePut(CallInfo* info, bool is_long, bool is_object,
                              bool is_volatile, bool is_ordered);
diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h
index 0a46f2e54..40621b15d 100644
--- a/compiler/dex/quick/x86/codegen_x86.h
+++ b/compiler/dex/quick/x86/codegen_x86.h
@@ -65,7 +65,7 @@ class X86Mir2Lir : public Mir2Lir {
 
   // Required for target - codegen helpers.
   bool SmallLiteralDivRem(Instruction::Code dalvik_opcode, bool is_div, RegLocation rl_src,
-                          RegLocation rl_dest, int lit);
+                          RegLocation rl_dest, int lit) OVERRIDE;
   bool EasyMultiply(RegLocation rl_src, RegLocation rl_dest, int lit) OVERRIDE;
   LIR* CheckSuspendUsingLoad() OVERRIDE;
   RegStorage LoadHelper(QuickEntrypointEnum trampoline) OVERRIDE;
@@ -73,22 +73,17 @@ class X86Mir2Lir : public Mir2Lir {
                     OpSize size, VolatileKind is_volatile) OVERRIDE;
   LIR* LoadBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_dest, int scale,
                        OpSize size) OVERRIDE;
-  LIR* LoadBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int scale, int displacement,
-                           RegStorage r_dest, OpSize size);
   LIR* LoadConstantNoClobber(RegStorage r_dest, int value);
   LIR* LoadConstantWide(RegStorage r_dest, int64_t value);
   LIR* StoreBaseDisp(RegStorage r_base, int displacement, RegStorage r_src,
                      OpSize size, VolatileKind is_volatile) OVERRIDE;
   LIR* StoreBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_src, int scale,
                         OpSize size) OVERRIDE;
-  LIR* StoreBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int scale, int displacement,
-                            RegStorage r_src, OpSize size);
-  void MarkGCCard(RegStorage val_reg, RegStorage tgt_addr_reg);
-  void GenImplicitNullCheck(RegStorage reg, int opt_flags);
+  void MarkGCCard(RegStorage val_reg, RegStorage tgt_addr_reg) OVERRIDE;
+  void GenImplicitNullCheck(RegStorage reg, int opt_flags) OVERRIDE;
 
   // Required for target - register utilities.
   RegStorage TargetReg(SpecialTargetRegister reg) OVERRIDE;
-  RegStorage TargetReg32(SpecialTargetRegister reg);
   RegStorage TargetReg(SpecialTargetRegister symbolic_reg, WideKind wide_kind) OVERRIDE {
     if (wide_kind == kWide) {
       if (cu_->target64) {
@@ -110,111 +105,92 @@ class X86Mir2Lir : public Mir2Lir {
   RegStorage TargetPtrReg(SpecialTargetRegister symbolic_reg) OVERRIDE {
     return TargetReg(symbolic_reg, cu_->target64 ? kWide : kNotWide);
   }
-  RegStorage GetArgMappingToPhysicalReg(int arg_num);
-  RegStorage GetCoreArgMappingToPhysicalReg(int core_arg_num);
-  RegLocation GetReturnAlt();
-  RegLocation GetReturnWideAlt();
-  RegLocation LocCReturn();
-  RegLocation LocCReturnRef();
-  RegLocation LocCReturnDouble();
-  RegLocation LocCReturnFloat();
-  RegLocation LocCReturnWide();
+
+  RegStorage GetArgMappingToPhysicalReg(int arg_num) OVERRIDE;
+
+  RegLocation GetReturnAlt() OVERRIDE;
+  RegLocation GetReturnWideAlt() OVERRIDE;
+  RegLocation LocCReturn() OVERRIDE;
+  RegLocation LocCReturnRef() OVERRIDE;
+  RegLocation LocCReturnDouble() OVERRIDE;
+  RegLocation LocCReturnFloat() OVERRIDE;
+  RegLocation LocCReturnWide() OVERRIDE;
+
   ResourceMask GetRegMaskCommon(const RegStorage& reg) const OVERRIDE;
-  void AdjustSpillMask();
-  void ClobberCallerSave();
-  void FreeCallTemps();
-  void LockCallTemps();
-  void CompilerInitializeRegAlloc();
-  int VectorRegisterSize();
-  int NumReservableVectorRegisters(bool fp_used);
+  void AdjustSpillMask() OVERRIDE;
+  void ClobberCallerSave() OVERRIDE;
+  void FreeCallTemps() OVERRIDE;
+  void LockCallTemps() OVERRIDE;
+
+  void CompilerInitializeRegAlloc() OVERRIDE;
+  int VectorRegisterSize() OVERRIDE;
+  int NumReservableVectorRegisters(bool fp_used) OVERRIDE;
 
   // Required for target - miscellaneous.
-  void AssembleLIR();
-  int AssignInsnOffsets();
-  void AssignOffsets();
-  AssemblerStatus AssembleInstructions(CodeOffset start_addr);
+  void AssembleLIR() OVERRIDE;
   void DumpResourceMask(LIR* lir, const ResourceMask& mask, const char* prefix) OVERRIDE;
   void SetupTargetResourceMasks(LIR* lir, uint64_t flags,
                                 ResourceMask* use_mask, ResourceMask* def_mask) OVERRIDE;
-  const char* GetTargetInstFmt(int opcode);
-  const char* GetTargetInstName(int opcode);
-  std::string BuildInsnString(const char* fmt, LIR* lir, unsigned char* base_addr);
+  const char* GetTargetInstFmt(int opcode) OVERRIDE;
+  const char* GetTargetInstName(int opcode) OVERRIDE;
+  std::string BuildInsnString(const char* fmt, LIR* lir, unsigned char* base_addr) OVERRIDE;
   ResourceMask GetPCUseDefEncoding() const OVERRIDE;
-  uint64_t GetTargetInstFlags(int opcode);
+  uint64_t GetTargetInstFlags(int opcode) OVERRIDE;
   size_t GetInsnSize(LIR* lir) OVERRIDE;
-  bool IsUnconditionalBranch(LIR* lir);
+  bool IsUnconditionalBranch(LIR* lir) OVERRIDE;
 
   // Get the register class for load/store of a field.
   RegisterClass RegClassForFieldLoadStore(OpSize size, bool is_volatile) OVERRIDE;
 
   // Required for target - Dalvik-level generators.
-  void GenArithImmOpLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
-                         RegLocation rl_src2);
   void GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array, RegLocation rl_index,
-                   RegLocation rl_dest, int scale);
+                   RegLocation rl_dest, int scale) OVERRIDE;
   void GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array,
-                   RegLocation rl_index, RegLocation rl_src, int scale, bool card_mark);
-  void GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
-                         RegLocation rl_src1, RegLocation rl_shift);
-  void GenMulLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
-                  RegLocation rl_src2);
-  void GenAddLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
-                  RegLocation rl_src2);
-  void GenAndLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
-                  RegLocation rl_src2);
+                   RegLocation rl_index, RegLocation rl_src, int scale, bool card_mark) OVERRIDE;
+
   void GenArithOpDouble(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
-                        RegLocation rl_src2);
+                        RegLocation rl_src2) OVERRIDE;
   void GenArithOpFloat(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
-                       RegLocation rl_src2);
-  void GenRemFP(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2, bool is_double);
+                       RegLocation rl_src2) OVERRIDE;
   void GenCmpFP(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
-                RegLocation rl_src2);
-  void GenConversion(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src);
-  bool GenInlinedCas(CallInfo* info, bool is_long, bool is_object);
-  bool GenInlinedMinMax(CallInfo* info, bool is_min, bool is_long);
-  bool GenInlinedMinMaxFP(CallInfo* info, bool is_min, bool is_double);
-  bool GenInlinedSqrt(CallInfo* info);
+                RegLocation rl_src2) OVERRIDE;
+  void GenConversion(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src) OVERRIDE;
+
+  bool GenInlinedCas(CallInfo* info, bool is_long, bool is_object) OVERRIDE;
+  bool GenInlinedMinMax(CallInfo* info, bool is_min, bool is_long) OVERRIDE;
+  bool GenInlinedMinMaxFP(CallInfo* info, bool is_min, bool is_double) OVERRIDE;
+  bool GenInlinedSqrt(CallInfo* info) OVERRIDE;
   bool GenInlinedAbsFloat(CallInfo* info) OVERRIDE;
   bool GenInlinedAbsDouble(CallInfo* info) OVERRIDE;
-  bool GenInlinedPeek(CallInfo* info, OpSize size);
-  bool GenInlinedPoke(CallInfo* info, OpSize size);
+  bool GenInlinedPeek(CallInfo* info, OpSize size) OVERRIDE;
+  bool GenInlinedPoke(CallInfo* info, OpSize size) OVERRIDE;
   bool GenInlinedCharAt(CallInfo* info) OVERRIDE;
-  void GenNotLong(RegLocation rl_dest, RegLocation rl_src);
-  void GenNegLong(RegLocation rl_dest, RegLocation rl_src);
+
+  // Long instructions.
+  void GenArithImmOpLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
+                         RegLocation rl_src2) OVERRIDE;
+  void GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
+                         RegLocation rl_src1, RegLocation rl_shift) OVERRIDE;
+  void GenMulLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
+                  RegLocation rl_src2) OVERRIDE;
+  void GenAddLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
+                  RegLocation rl_src2) OVERRIDE;
+  void GenAndLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
+                  RegLocation rl_src2) OVERRIDE;
+  void GenNotLong(RegLocation rl_dest, RegLocation rl_src) OVERRIDE;
+  void GenNegLong(RegLocation rl_dest, RegLocation rl_src) OVERRIDE;
   void GenOrLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
-                 RegLocation rl_src2);
+                 RegLocation rl_src2) OVERRIDE;
   void GenSubLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
-                  RegLocation rl_src2);
+                  RegLocation rl_src2) OVERRIDE;
   void GenXorLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
-                  RegLocation rl_src2);
+                  RegLocation rl_src2) OVERRIDE;
   void GenDivRemLong(Instruction::Code, RegLocation rl_dest, RegLocation rl_src1,
-                     RegLocation rl_src2, bool is_div);
-  // TODO: collapse reg_lo, reg_hi
-  RegLocation GenDivRem(RegLocation rl_dest, RegStorage reg_lo, RegStorage reg_hi, bool is_div);
-  RegLocation GenDivRemLit(RegLocation rl_dest, RegStorage reg_lo, int lit, bool is_div);
-  void GenCmpLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
-  void GenDivZeroCheckWide(RegStorage reg);
-  void GenArrayBoundsCheck(RegStorage index, RegStorage array_base, int32_t len_offset);
-  void GenArrayBoundsCheck(int32_t index, RegStorage array_base, int32_t len_offset);
-  void GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method);
-  void GenExitSequence();
-  void GenSpecialExitSequence();
-  void GenFillArrayData(DexOffset table_offset, RegLocation rl_src);
-  void GenFusedFPCmpBranch(BasicBlock* bb, MIR* mir, bool gt_bias, bool is_double);
-  void GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir);
-  void GenSelect(BasicBlock* bb, MIR* mir);
-  void GenSelectConst32(RegStorage left_op, RegStorage right_op, ConditionCode code,
-                        int32_t true_val, int32_t false_val, RegStorage rs_dest,
-                        int dest_reg_class) OVERRIDE;
-  bool GenMemBarrier(MemBarrierKind barrier_kind);
-  void GenMoveException(RegLocation rl_dest);
-  void GenMultiplyByTwoBitMultiplier(RegLocation rl_src, RegLocation rl_result, int lit,
-                                     int first_bit, int second_bit);
-  void GenNegDouble(RegLocation rl_dest, RegLocation rl_src);
-  void GenNegFloat(RegLocation rl_dest, RegLocation rl_src);
-  void GenPackedSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src);
-  void GenSparseSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src);
-  void GenIntToLong(RegLocation rl_dest, RegLocation rl_src);
+                     RegLocation rl_src2, bool is_div) OVERRIDE;
+  void GenCmpLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) OVERRIDE;
+  void GenIntToLong(RegLocation rl_dest, RegLocation rl_src) OVERRIDE;
+  void GenShiftOpLong(Instruction::Code opcode, RegLocation rl_dest,
+                      RegLocation rl_src1, RegLocation rl_shift) OVERRIDE;
 
   /*
    * @brief Generate a two address long operation with a constant value
@@ -224,6 +200,7 @@ class X86Mir2Lir : public Mir2Lir {
    * @return success or not
    */
   bool GenLongImm(RegLocation rl_dest, RegLocation rl_src, Instruction::Code op);
+
   /*
    * @brief Generate a three address long operation with a constant value
    * @param rl_dest location of result
@@ -234,7 +211,6 @@ class X86Mir2Lir : public Mir2Lir {
    */
   bool GenLongLongImm(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2,
                       Instruction::Code op);
-
   /**
    * @brief Generate a long arithmetic operation.
    * @param rl_dest The destination.
@@ -262,6 +238,31 @@ class X86Mir2Lir : public Mir2Lir {
    */
   virtual void GenLongRegOrMemOp(RegLocation rl_dest, RegLocation rl_src, Instruction::Code op);
 
+
+  // TODO: collapse reg_lo, reg_hi
+  RegLocation GenDivRem(RegLocation rl_dest, RegStorage reg_lo, RegStorage reg_hi, bool is_div)
+      OVERRIDE;
+  RegLocation GenDivRemLit(RegLocation rl_dest, RegStorage reg_lo, int lit, bool is_div) OVERRIDE;
+  void GenDivZeroCheckWide(RegStorage reg) OVERRIDE;
+  void GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) OVERRIDE;
+  void GenExitSequence() OVERRIDE;
+  void GenSpecialExitSequence() OVERRIDE;
+  void GenFillArrayData(DexOffset table_offset, RegLocation rl_src) OVERRIDE;
+  void GenFusedFPCmpBranch(BasicBlock* bb, MIR* mir, bool gt_bias, bool is_double) OVERRIDE;
+  void GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir) OVERRIDE;
+  void GenSelect(BasicBlock* bb, MIR* mir) OVERRIDE;
+  void GenSelectConst32(RegStorage left_op, RegStorage right_op, ConditionCode code,
+                        int32_t true_val, int32_t false_val, RegStorage rs_dest,
+                        int dest_reg_class) OVERRIDE;
+  bool GenMemBarrier(MemBarrierKind barrier_kind) OVERRIDE;
+  void GenMoveException(RegLocation rl_dest) OVERRIDE;
+  void GenMultiplyByTwoBitMultiplier(RegLocation rl_src, RegLocation rl_result, int lit,
+                                     int first_bit, int second_bit) OVERRIDE;
+  void GenNegDouble(RegLocation rl_dest, RegLocation rl_src) OVERRIDE;
+  void GenNegFloat(RegLocation rl_dest, RegLocation rl_src) OVERRIDE;
+  void GenPackedSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src) OVERRIDE;
+  void GenSparseSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src) OVERRIDE;
+
   /**
    * @brief Implement instanceof a final class with x86 specific code.
    * @param use_declaring_class 'true' if we can use the class itself.
@@ -270,56 +271,39 @@ class X86Mir2Lir : public Mir2Lir {
    * @param rl_src Object to be tested.
    */
   void GenInstanceofFinal(bool use_declaring_class, uint32_t type_idx, RegLocation rl_dest,
-                          RegLocation rl_src);
-
-  void GenShiftOpLong(Instruction::Code opcode, RegLocation rl_dest,
-                      RegLocation rl_src1, RegLocation rl_shift);
+                          RegLocation rl_src) OVERRIDE;
 
   // Single operation generators.
-  LIR* OpUnconditionalBranch(LIR* target);
-  LIR* OpCmpBranch(ConditionCode cond, RegStorage src1, RegStorage src2, LIR* target);
-  LIR* OpCmpImmBranch(ConditionCode cond, RegStorage reg, int check_value, LIR* target);
-  LIR* OpCondBranch(ConditionCode cc, LIR* target);
-  LIR* OpDecAndBranch(ConditionCode c_code, RegStorage reg, LIR* target);
-  LIR* OpFpRegCopy(RegStorage r_dest, RegStorage r_src);
-  LIR* OpIT(ConditionCode cond, const char* guide);
-  void OpEndIT(LIR* it);
-  LIR* OpMem(OpKind op, RegStorage r_base, int disp);
-  LIR* OpPcRelLoad(RegStorage reg, LIR* target);
-  LIR* OpReg(OpKind op, RegStorage r_dest_src);
-  void OpRegCopy(RegStorage r_dest, RegStorage r_src);
-  LIR* OpRegCopyNoInsert(RegStorage r_dest, RegStorage r_src);
-  LIR* OpRegImm(OpKind op, RegStorage r_dest_src1, int value);
-  LIR* OpRegMem(OpKind op, RegStorage r_dest, RegStorage r_base, int offset);
-  LIR* OpRegMem(OpKind op, RegStorage r_dest, RegLocation value);
-  LIR* OpMemReg(OpKind op, RegLocation rl_dest, int value);
-  LIR* OpRegReg(OpKind op, RegStorage r_dest_src1, RegStorage r_src2);
-  LIR* OpMovRegMem(RegStorage r_dest, RegStorage r_base, int offset, MoveType move_type);
-  LIR* OpMovMemReg(RegStorage r_base, int offset, RegStorage r_src, MoveType move_type);
-  LIR* OpCondRegReg(OpKind op, ConditionCode cc, RegStorage r_dest, RegStorage r_src);
-  LIR* OpRegRegImm(OpKind op, RegStorage r_dest, RegStorage r_src1, int value);
-  LIR* OpRegRegReg(OpKind op, RegStorage r_dest, RegStorage r_src1, RegStorage r_src2);
-  LIR* OpTestSuspend(LIR* target);
-  LIR* OpThreadMem(OpKind op, ThreadOffset<4> thread_offset);
-  LIR* OpThreadMem(OpKind op, ThreadOffset<8> thread_offset);
-  LIR* OpVldm(RegStorage r_base, int count);
-  LIR* OpVstm(RegStorage r_base, int count);
-  void OpLea(RegStorage r_base, RegStorage reg1, RegStorage reg2, int scale, int offset);
-  void OpRegCopyWide(RegStorage dest, RegStorage src);
-  void OpTlsCmp(ThreadOffset<4> offset, int val);
-  void OpTlsCmp(ThreadOffset<8> offset, int val);
-
-  void OpRegThreadMem(OpKind op, RegStorage r_dest, ThreadOffset<4> thread_offset);
-  void OpRegThreadMem(OpKind op, RegStorage r_dest, ThreadOffset<8> thread_offset);
-  void SpillCoreRegs();
-  void UnSpillCoreRegs();
-  void UnSpillFPRegs();
-  void SpillFPRegs();
-  static const X86EncodingMap EncodingMap[kX86Last];
-  bool InexpensiveConstantInt(int32_t value);
-  bool InexpensiveConstantFloat(int32_t value);
-  bool InexpensiveConstantLong(int64_t value);
-  bool InexpensiveConstantDouble(int64_t value);
+  LIR* OpUnconditionalBranch(LIR* target) OVERRIDE;
+  LIR* OpCmpBranch(ConditionCode cond, RegStorage src1, RegStorage src2, LIR* target) OVERRIDE;
+  LIR* OpCmpImmBranch(ConditionCode cond, RegStorage reg, int check_value, LIR* target) OVERRIDE;
+  LIR* OpCondBranch(ConditionCode cc, LIR* target) OVERRIDE;
+  LIR* OpDecAndBranch(ConditionCode c_code, RegStorage reg, LIR* target) OVERRIDE;
+  LIR* OpFpRegCopy(RegStorage r_dest, RegStorage r_src) OVERRIDE;
+  LIR* OpIT(ConditionCode cond, const char* guide) OVERRIDE;
+  void OpEndIT(LIR* it) OVERRIDE;
+  LIR* OpMem(OpKind op, RegStorage r_base, int disp) OVERRIDE;
+  LIR* OpPcRelLoad(RegStorage reg, LIR* target) OVERRIDE;
+  LIR* OpReg(OpKind op, RegStorage r_dest_src) OVERRIDE;
+  void OpRegCopy(RegStorage r_dest, RegStorage r_src) OVERRIDE;
+  LIR* OpRegCopyNoInsert(RegStorage r_dest, RegStorage r_src) OVERRIDE;
+  LIR* OpRegImm(OpKind op, RegStorage r_dest_src1, int value) OVERRIDE;
+  LIR* OpRegReg(OpKind op, RegStorage r_dest_src1, RegStorage r_src2) OVERRIDE;
+  LIR* OpMovRegMem(RegStorage r_dest, RegStorage r_base, int offset, MoveType move_type) OVERRIDE;
+  LIR* OpMovMemReg(RegStorage r_base, int offset, RegStorage r_src, MoveType move_type) OVERRIDE;
+  LIR* OpCondRegReg(OpKind op, ConditionCode cc, RegStorage r_dest, RegStorage r_src) OVERRIDE;
+  LIR* OpRegRegImm(OpKind op, RegStorage r_dest, RegStorage r_src1, int value) OVERRIDE;
+  LIR* OpRegRegReg(OpKind op, RegStorage r_dest, RegStorage r_src1, RegStorage r_src2) OVERRIDE;
+  LIR* OpTestSuspend(LIR* target) OVERRIDE;
+  LIR* OpVldm(RegStorage r_base, int count) OVERRIDE;
+  LIR* OpVstm(RegStorage r_base, int count) OVERRIDE;
+  void OpRegCopyWide(RegStorage dest, RegStorage src) OVERRIDE;
+  bool GenInlinedCurrentThread(CallInfo* info) OVERRIDE;
+
+  bool InexpensiveConstantInt(int32_t value) OVERRIDE;
+  bool InexpensiveConstantFloat(int32_t value) OVERRIDE;
+  bool InexpensiveConstantLong(int64_t value) OVERRIDE;
+  bool InexpensiveConstantDouble(int64_t value) OVERRIDE;
 
   /*
    * @brief Should try to optimize for two address instructions?
@@ -335,13 +319,7 @@ class X86Mir2Lir : public Mir2Lir {
    * @param rl_rhs Right hand operand.
    */
   void GenArithOpInt(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_lhs,
-                     RegLocation rl_rhs);
-
-  /*
-   * @brief Dump a RegLocation using printf
-   * @param loc Register location to dump
-   */
-  static void DumpRegLocation(RegLocation loc);
+                     RegLocation rl_rhs) OVERRIDE;
 
   /*
    * @brief Load the Method* of a dex method into the register.
@@ -351,7 +329,7 @@ class X86Mir2Lir : public Mir2Lir {
    * @note register will be passed to TargetReg to get physical register.
    */
   void LoadMethodAddress(const MethodReference& target_method, InvokeType type,
-                         SpecialTargetRegister symbolic_reg);
+                         SpecialTargetRegister symbolic_reg) OVERRIDE;
 
   /*
    * @brief Load the Class* of a Dex Class type into the register.
@@ -359,23 +337,23 @@ class X86Mir2Lir : public Mir2Lir {
    * @param register that will contain the code address.
    * @note register will be passed to TargetReg to get physical register.
    */
-  void LoadClassType(uint32_t type_idx, SpecialTargetRegister symbolic_reg);
+  void LoadClassType(uint32_t type_idx, SpecialTargetRegister symbolic_reg) OVERRIDE;
 
-  void FlushIns(RegLocation* ArgLocs, RegLocation rl_method);
+  void FlushIns(RegLocation* ArgLocs, RegLocation rl_method) OVERRIDE;
 
   int GenDalvikArgsNoRange(CallInfo* info, int call_state, LIR** pcrLabel,
                            NextCallInsn next_call_insn,
                            const MethodReference& target_method,
                            uint32_t vtable_idx,
                            uintptr_t direct_code, uintptr_t direct_method, InvokeType type,
-                           bool skip_this);
+                           bool skip_this) OVERRIDE;
 
   int GenDalvikArgsRange(CallInfo* info, int call_state, LIR** pcrLabel,
                          NextCallInsn next_call_insn,
                          const MethodReference& target_method,
                          uint32_t vtable_idx,
                          uintptr_t direct_code, uintptr_t direct_method, InvokeType type,
-                         bool skip_this);
+                         bool skip_this) OVERRIDE;
 
   /*
    * @brief Generate a relative call to the method that will be patched at link time.
@@ -388,7 +366,7 @@ class X86Mir2Lir : public Mir2Lir {
   /*
    * @brief Handle x86 specific literals
    */
-  void InstallLiteralPools();
+  void InstallLiteralPools() OVERRIDE;
 
   /*
    * @brief Generate the debug_frame CFI information.
@@ -400,11 +378,12 @@ class X86Mir2Lir : public Mir2Lir {
    * @brief Generate the debug_frame FDE information.
    * @returns pointer to vector containing CFE information
    */
-  std::vector<uint8_t>* ReturnCallFrameInformation();
+  std::vector<uint8_t>* ReturnCallFrameInformation() OVERRIDE;
 
   LIR* InvokeTrampoline(OpKind op, RegStorage r_tgt, QuickEntrypointEnum trampoline) OVERRIDE;
 
  protected:
+  RegStorage TargetReg32(SpecialTargetRegister reg);
   // Casting of RegStorage
   RegStorage As32BitReg(RegStorage reg) {
     DCHECK(!reg.IsPair());
@@ -442,6 +421,17 @@ class X86Mir2Lir : public Mir2Lir {
     return ret_val;
   }
 
+  LIR* LoadBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int scale, int displacement,
+                           RegStorage r_dest, OpSize size);
+  LIR* StoreBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int scale, int displacement,
+                            RegStorage r_src, OpSize size);
+
+  RegStorage GetCoreArgMappingToPhysicalReg(int core_arg_num);
+
+  int AssignInsnOffsets();
+  void AssignOffsets();
+  AssemblerStatus AssembleInstructions(CodeOffset start_addr);
+
   size_t ComputeSize(const X86EncodingMap* entry, int32_t raw_reg, int32_t raw_index,
                      int32_t raw_base, int32_t displacement);
   void CheckValidByteRegister(const X86EncodingMap* entry, int32_t raw_reg);
@@ -528,6 +518,9 @@ class X86Mir2Lir : public Mir2Lir {
    * @returns true if a register is byte addressable.
    */
   bool IsByteRegister(RegStorage reg);
+
+  void GenDivRemLongLit(RegLocation rl_dest, RegLocation rl_src, int64_t imm, bool is_div);
+
   bool GenInlinedArrayCopyCharArray(CallInfo* info) OVERRIDE;
 
   /*
@@ -736,8 +729,9 @@ class X86Mir2Lir : public Mir2Lir {
    * @param divisor divisor number for calculation
    * @param magic hold calculated magic number
    * @param shift hold calculated shift
+   * @param is_long 'true' if divisor is jlong, 'false' for jint.
    */
-  void CalculateMagicAndShift(int divisor, int& magic, int& shift);
+  void CalculateMagicAndShift(int64_t divisor, int64_t& magic, int& shift, bool is_long);
 
   /*
    * @brief Generate an integer div or rem operation.
@@ -800,6 +794,8 @@ class X86Mir2Lir : public Mir2Lir {
   LIR* OpCmpMemImmBranch(ConditionCode cond, RegStorage temp_reg, RegStorage base_reg,
                          int offset, int check_value, LIR* target, LIR** compare);
 
+  void GenRemFP(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2, bool is_double);
+
   /*
    * Can this operation be using core registers without temporaries?
    * @param rl_lhs Left hand operand.
@@ -816,6 +812,26 @@ class X86Mir2Lir : public Mir2Lir {
    */
   virtual void GenLongToFP(RegLocation rl_dest, RegLocation rl_src, bool is_double);
 
+  void GenArrayBoundsCheck(RegStorage index, RegStorage array_base, int32_t len_offset);
+  void GenArrayBoundsCheck(int32_t index, RegStorage array_base, int32_t len_offset);
+
+  LIR* OpRegMem(OpKind op, RegStorage r_dest, RegStorage r_base, int offset);
+  LIR* OpRegMem(OpKind op, RegStorage r_dest, RegLocation value);
+  LIR* OpMemReg(OpKind op, RegLocation rl_dest, int value);
+  LIR* OpThreadMem(OpKind op, ThreadOffset<4> thread_offset);
+  LIR* OpThreadMem(OpKind op, ThreadOffset<8> thread_offset);
+  void OpRegThreadMem(OpKind op, RegStorage r_dest, ThreadOffset<4> thread_offset);
+  void OpRegThreadMem(OpKind op, RegStorage r_dest, ThreadOffset<8> thread_offset);
+  void OpTlsCmp(ThreadOffset<4> offset, int val);
+  void OpTlsCmp(ThreadOffset<8> offset, int val);
+
+  void OpLea(RegStorage r_base, RegStorage reg1, RegStorage reg2, int scale, int offset);
+
+  void SpillCoreRegs();
+  void UnSpillCoreRegs();
+  void UnSpillFPRegs();
+  void SpillFPRegs();
+
   /*
    * @brief Perform MIR analysis before compiling method.
    * @note Invokes Mir2LiR::Materialize after analysis.
@@ -938,6 +954,14 @@ class X86Mir2Lir : public Mir2Lir {
     return true;  // xmm registers have 64b views even on x86.
   }
 
+  /*
+   * @brief Dump a RegLocation using printf
+   * @param loc Register location to dump
+   */
+  static void DumpRegLocation(RegLocation loc);
+
+  static const X86EncodingMap EncodingMap[kX86Last];
+
  private:
   // The number of vector registers [0..N] reserved by a call to ReserveVectorRegisters
   int num_reserved_vector_regs_;
diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc
index b9abdbffb..057639c25 100755
--- a/compiler/dex/quick/x86/int_x86.cc
+++ b/compiler/dex/quick/x86/int_x86.cc
@@ -513,7 +513,7 @@ void X86Mir2Lir::GenFusedLongCmpImmBranch(BasicBlock* bb, RegLocation rl_src1,
   OpCondBranch(ccode, taken);
 }
 
-void X86Mir2Lir::CalculateMagicAndShift(int divisor, int& magic, int& shift) {
+void X86Mir2Lir::CalculateMagicAndShift(int64_t divisor, int64_t& magic, int& shift, bool is_long) {
   // It does not make sense to calculate magic and shift for zero divisor.
   DCHECK_NE(divisor, 0);
 
@@ -525,8 +525,8 @@ void X86Mir2Lir::CalculateMagicAndShift(int divisor, int& magic, int& shift) {
    * Let nc be the most negative value of numerator(n) such that nc = kd + 1,
    * where divisor(d) <= -2.
    * Thus nc can be calculated like:
-   * nc = 2^31 + 2^31 % d - 1, where d >= 2
-   * nc = -2^31 + (2^31 + 1) % d, where d >= 2.
+   * nc = exp + exp % d - 1, where d >= 2 and exp = 2^31 for int or 2^63 for long
+   * nc = -exp + (exp + 1) % d, where d >= 2 and exp = 2^31 for int or 2^63 for long
    *
    * So the shift p is the smallest p satisfying
    * 2^p > nc * (d - 2^p % d), where d >= 2
@@ -536,27 +536,28 @@ void X86Mir2Lir::CalculateMagicAndShift(int divisor, int& magic, int& shift) {
    * M = (2^p + d - 2^p % d) / d, where d >= 2
    * M = (2^p - d - 2^p % d) / d, where d <= -2.
    *
-   * Notice that p is always bigger than or equal to 32, so we just return 32-p as
+   * Notice that p is always bigger than or equal to 32/64, so we just return 32-p/64-p as
    * the shift number S.
    */
 
-  int32_t p = 31;
-  const uint32_t two31 = 0x80000000U;
+  int64_t p = (is_long) ? 63 : 31;
+  const uint64_t exp = (is_long) ? 0x8000000000000000ULL : 0x80000000U;
 
   // Initialize the computations.
-  uint32_t abs_d = (divisor >= 0) ? divisor : -divisor;
-  uint32_t tmp = two31 + (static_cast<uint32_t>(divisor) >> 31);
-  uint32_t abs_nc = tmp - 1 - tmp % abs_d;
-  uint32_t quotient1 = two31 / abs_nc;
-  uint32_t remainder1 = two31 % abs_nc;
-  uint32_t quotient2 = two31 / abs_d;
-  uint32_t remainder2 = two31 % abs_d;
+  uint64_t abs_d = (divisor >= 0) ? divisor : -divisor;
+  uint64_t tmp = exp + ((is_long) ? static_cast<uint64_t>(divisor) >> 63 :
+                                    static_cast<uint32_t>(divisor) >> 31);
+  uint64_t abs_nc = tmp - 1 - tmp % abs_d;
+  uint64_t quotient1 = exp / abs_nc;
+  uint64_t remainder1 = exp % abs_nc;
+  uint64_t quotient2 = exp / abs_d;
+  uint64_t remainder2 = exp % abs_d;
 
   /*
    * To avoid handling both positive and negative divisor, Hacker's Delight
    * introduces a method to handle these 2 cases together to avoid duplication.
    */
-  uint32_t delta;
+  uint64_t delta;
   do {
     p++;
     quotient1 = 2 * quotient1;
@@ -575,7 +576,12 @@ void X86Mir2Lir::CalculateMagicAndShift(int divisor, int& magic, int& shift) {
   } while (quotient1 < delta || (quotient1 == delta && remainder1 == 0));
 
   magic = (divisor > 0) ? (quotient2 + 1) : (-quotient2 - 1);
-  shift = p - 32;
+
+  if (!is_long) {
+    magic = static_cast<int>(magic);
+  }
+
+  shift = (is_long) ? p - 64 : p - 32;
 }
 
 RegLocation X86Mir2Lir::GenDivRemLit(RegLocation rl_dest, RegStorage reg_lo, int lit, bool is_div) {
@@ -586,52 +592,57 @@ RegLocation X86Mir2Lir::GenDivRemLit(RegLocation rl_dest, RegStorage reg_lo, int
 RegLocation X86Mir2Lir::GenDivRemLit(RegLocation rl_dest, RegLocation rl_src,
                                      int imm, bool is_div) {
   // Use a multiply (and fixup) to perform an int div/rem by a constant.
+  RegLocation rl_result;
 
-  // We have to use fixed registers, so flush all the temps.
-  FlushAllRegs();
-  LockCallTemps();  // Prepare for explicit register usage.
-
-  // Assume that the result will be in EDX.
-  RegLocation rl_result = {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1, rs_r2, INVALID_SREG, INVALID_SREG};
-
-  // handle div/rem by 1 special case.
   if (imm == 1) {
+    rl_result = EvalLoc(rl_dest, kCoreReg, true);
     if (is_div) {
       // x / 1 == x.
-      StoreValue(rl_result, rl_src);
+      LoadValueDirectFixed(rl_src, rl_result.reg);
     } else {
       // x % 1 == 0.
-      LoadConstantNoClobber(rs_r0, 0);
-      // For this case, return the result in EAX.
-      rl_result.reg.SetReg(r0);
+      LoadConstantNoClobber(rl_result.reg, 0);
     }
   } else if (imm == -1) {  // handle 0x80000000 / -1 special case.
+    rl_result = EvalLoc(rl_dest, kCoreReg, true);
     if (is_div) {
-      LIR *minint_branch = 0;
-      LoadValueDirectFixed(rl_src, rs_r0);
-      OpRegImm(kOpCmp, rs_r0, 0x80000000);
-      minint_branch = NewLIR2(kX86Jcc8, 0, kX86CondEq);
+      LoadValueDirectFixed(rl_src, rl_result.reg);
+      OpRegImm(kOpCmp, rl_result.reg, 0x80000000);
+      LIR *minint_branch = NewLIR2(kX86Jcc8, 0, kX86CondEq);
 
       // for x != MIN_INT, x / -1 == -x.
-      NewLIR1(kX86Neg32R, r0);
+      NewLIR1(kX86Neg32R, rl_result.reg.GetReg());
 
-      LIR* branch_around = NewLIR1(kX86Jmp8, 0);
-      // The target for cmp/jmp above.
-      minint_branch->target = NewLIR0(kPseudoTargetLabel);
       // EAX already contains the right value (0x80000000),
-      branch_around->target = NewLIR0(kPseudoTargetLabel);
+      minint_branch->target = NewLIR0(kPseudoTargetLabel);
     } else {
       // x % -1 == 0.
-      LoadConstantNoClobber(rs_r0, 0);
+      LoadConstantNoClobber(rl_result.reg, 0);
+    }
+  } else if (is_div && IsPowerOfTwo(std::abs(imm))) {
+    // Division using shifting.
+    rl_src = LoadValue(rl_src, kCoreReg);
+    rl_result = EvalLoc(rl_dest, kCoreReg, true);
+    if (IsSameReg(rl_result.reg, rl_src.reg)) {
+      RegStorage rs_temp = AllocTypedTemp(false, kCoreReg);
+      rl_result.reg.SetReg(rs_temp.GetReg());
+    }
+    NewLIR3(kX86Lea32RM, rl_result.reg.GetReg(), rl_src.reg.GetReg(), std::abs(imm) - 1);
+    NewLIR2(kX86Test32RR, rl_src.reg.GetReg(), rl_src.reg.GetReg());
+    OpCondRegReg(kOpCmov, kCondPl, rl_result.reg, rl_src.reg);
+    int shift_amount = LowestSetBit(imm);
+    OpRegImm(kOpAsr, rl_result.reg, shift_amount);
+    if (imm < 0) {
+      OpReg(kOpNeg, rl_result.reg);
     }
-    // For this case, return the result in EAX.
-    rl_result.reg.SetReg(r0);
   } else {
     CHECK(imm <= -2 || imm >= 2);
+
     // Use H.S.Warren's Hacker's Delight Chapter 10 and
     // T,Grablund, P.L.Montogomery's Division by invariant integers using multiplication.
-    int magic, shift;
-    CalculateMagicAndShift(imm, magic, shift);
+    int64_t magic;
+    int shift;
+    CalculateMagicAndShift((int64_t)imm, magic, shift, false /* is_long */);
 
     /*
      * For imm >= 2,
@@ -649,18 +660,22 @@ RegLocation X86Mir2Lir::GenDivRemLit(RegLocation rl_dest, RegLocation rl_src,
      * 5. Thus, EDX is the quotient
      */
 
+    FlushReg(rs_r0);
+    Clobber(rs_r0);
+    LockTemp(rs_r0);
+    FlushReg(rs_r2);
+    Clobber(rs_r2);
+    LockTemp(rs_r2);
+
+    // Assume that the result will be in EDX.
+    rl_result = {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1, rs_r2, INVALID_SREG, INVALID_SREG};
+
     // Numerator into EAX.
     RegStorage numerator_reg;
     if (!is_div || (imm > 0 && magic < 0) || (imm < 0 && magic > 0)) {
       // We will need the value later.
-      if (rl_src.location == kLocPhysReg) {
-        // We can use it directly.
-        DCHECK(rl_src.reg.GetReg() != rs_r0.GetReg() && rl_src.reg.GetReg() != rs_r2.GetReg());
-        numerator_reg = rl_src.reg;
-      } else {
-        numerator_reg = rs_r1;
-        LoadValueDirectFixed(rl_src, numerator_reg);
-      }
+      rl_src = LoadValue(rl_src, kCoreReg);
+      numerator_reg = rl_src.reg;
       OpRegCopy(rs_r0, numerator_reg);
     } else {
       // Only need this once.  Just put it into EAX.
@@ -1704,13 +1719,191 @@ void X86Mir2Lir::GenNotLong(RegLocation rl_dest, RegLocation rl_src) {
   }
 }
 
+void X86Mir2Lir::GenDivRemLongLit(RegLocation rl_dest, RegLocation rl_src,
+                                  int64_t imm, bool is_div) {
+  if (imm == 0) {
+    GenDivZeroException();
+  } else if (imm == 1) {
+    if (is_div) {
+      // x / 1 == x.
+      StoreValueWide(rl_dest, rl_src);
+    } else {
+      // x % 1 == 0.
+      RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
+      LoadConstantWide(rl_result.reg, 0);
+      StoreValueWide(rl_dest, rl_result);
+    }
+  } else if (imm == -1) {  // handle 0x8000000000000000 / -1 special case.
+    if (is_div) {
+      rl_src = LoadValueWide(rl_src, kCoreReg);
+      RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
+      RegStorage rs_temp = AllocTempWide();
+
+      OpRegCopy(rl_result.reg, rl_src.reg);
+      LoadConstantWide(rs_temp, 0x8000000000000000);
+
+      // If x == MIN_LONG, return MIN_LONG.
+      OpRegReg(kOpCmp, rl_src.reg, rs_temp);
+      LIR *minint_branch = NewLIR2(kX86Jcc8, 0, kX86CondEq);
+
+      // For x != MIN_LONG, x / -1 == -x.
+      OpReg(kOpNeg, rl_result.reg);
+
+      minint_branch->target = NewLIR0(kPseudoTargetLabel);
+      FreeTemp(rs_temp);
+      StoreValueWide(rl_dest, rl_result);
+    } else {
+      // x % -1 == 0.
+      RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
+      LoadConstantWide(rl_result.reg, 0);
+      StoreValueWide(rl_dest, rl_result);
+    }
+  } else if (is_div && IsPowerOfTwo(std::abs(imm))) {
+    // Division using shifting.
+    rl_src = LoadValueWide(rl_src, kCoreReg);
+    RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
+    if (IsSameReg(rl_result.reg, rl_src.reg)) {
+      RegStorage rs_temp = AllocTypedTempWide(false, kCoreReg);
+      rl_result.reg.SetReg(rs_temp.GetReg());
+    }
+    LoadConstantWide(rl_result.reg, std::abs(imm) - 1);
+    OpRegReg(kOpAdd, rl_result.reg, rl_src.reg);
+    NewLIR2(kX86Test64RR, rl_src.reg.GetReg(), rl_src.reg.GetReg());
+    OpCondRegReg(kOpCmov, kCondPl, rl_result.reg, rl_src.reg);
+    int shift_amount = LowestSetBit(imm);
+    OpRegImm(kOpAsr, rl_result.reg, shift_amount);
+    if (imm < 0) {
+      OpReg(kOpNeg, rl_result.reg);
+    }
+    StoreValueWide(rl_dest, rl_result);
+  } else {
+    CHECK(imm <= -2 || imm >= 2);
+
+    FlushReg(rs_r0q);
+    Clobber(rs_r0q);
+    LockTemp(rs_r0q);
+    FlushReg(rs_r2q);
+    Clobber(rs_r2q);
+    LockTemp(rs_r2q);
+
+    RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, rs_r2q, INVALID_SREG, INVALID_SREG};
+
+    // Use H.S.Warren's Hacker's Delight Chapter 10 and
+    // T,Grablund, P.L.Montogomery's Division by invariant integers using multiplication.
+    int64_t magic;
+    int shift;
+    CalculateMagicAndShift(imm, magic, shift, true /* is_long */);
+
+    /*
+     * For imm >= 2,
+     *     int(n/imm) = floor(n/imm) = floor(M*n/2^S), while n > 0
+     *     int(n/imm) = ceil(n/imm) = floor(M*n/2^S) +1, while n < 0.
+     * For imm <= -2,
+     *     int(n/imm) = ceil(n/imm) = floor(M*n/2^S) +1 , while n > 0
+     *     int(n/imm) = floor(n/imm) = floor(M*n/2^S), while n < 0.
+     * We implement this algorithm in the following way:
+     * 1. multiply magic number m and numerator n, get the higher 64bit result in RDX
+     * 2. if imm > 0 and magic < 0, add numerator to RDX
+     *    if imm < 0 and magic > 0, sub numerator from RDX
+     * 3. if S !=0, SAR S bits for RDX
+     * 4. add 1 to RDX if RDX < 0
+     * 5. Thus, RDX is the quotient
+     */
+
+    // Numerator into RAX.
+    RegStorage numerator_reg;
+    if (!is_div || (imm > 0 && magic < 0) || (imm < 0 && magic > 0)) {
+      // We will need the value later.
+      rl_src = LoadValueWide(rl_src, kCoreReg);
+      numerator_reg = rl_src.reg;
+      OpRegCopyWide(rs_r0q, numerator_reg);
+    } else {
+      // Only need this once.  Just put it into RAX.
+      LoadValueDirectWideFixed(rl_src, rs_r0q);
+    }
+
+    // RDX = magic.
+    LoadConstantWide(rs_r2q, magic);
+
+    // RDX:RAX = magic & dividend.
+    NewLIR1(kX86Imul64DaR, rs_r2q.GetReg());
+
+    if (imm > 0 && magic < 0) {
+      // Add numerator to RDX.
+      DCHECK(numerator_reg.Valid());
+      OpRegReg(kOpAdd, rs_r2q, numerator_reg);
+    } else if (imm < 0 && magic > 0) {
+      DCHECK(numerator_reg.Valid());
+      OpRegReg(kOpSub, rs_r2q, numerator_reg);
+    }
+
+    // Do we need the shift?
+    if (shift != 0) {
+      // Shift RDX by 'shift' bits.
+      OpRegImm(kOpAsr, rs_r2q, shift);
+    }
+
+    // Move RDX to RAX.
+    OpRegCopyWide(rs_r0q, rs_r2q);
+
+    // Move sign bit to bit 0, zeroing the rest.
+    OpRegImm(kOpLsr, rs_r2q, 63);
+
+    // RDX = RDX + RAX.
+    OpRegReg(kOpAdd, rs_r2q, rs_r0q);
+
+    // Quotient is in RDX.
+    if (!is_div) {
+      // We need to compute the remainder.
+      // Remainder is divisor - (quotient * imm).
+      DCHECK(numerator_reg.Valid());
+      OpRegCopyWide(rs_r0q, numerator_reg);
+
+      // Imul doesn't support 64-bit imms.
+      if (imm > std::numeric_limits<int32_t>::max() ||
+          imm < std::numeric_limits<int32_t>::min()) {
+        RegStorage rs_temp = AllocTempWide();
+        LoadConstantWide(rs_temp, imm);
+
+        // RAX = numerator * imm.
+        NewLIR2(kX86Imul64RR, rs_r2q.GetReg(), rs_temp.GetReg());
+
+        FreeTemp(rs_temp);
+      } else {
+        // RAX = numerator * imm.
+        int short_imm = static_cast<int>(imm);
+        NewLIR3(kX86Imul64RRI, rs_r2q.GetReg(), rs_r2q.GetReg(), short_imm);
+      }
+
+      // RDX -= RAX.
+      OpRegReg(kOpSub, rs_r0q, rs_r2q);
+
+      // Store result.
+      OpRegCopyWide(rl_result.reg, rs_r0q);
+    } else {
+      // Store result.
+      OpRegCopyWide(rl_result.reg, rs_r2q);
+    }
+    StoreValueWide(rl_dest, rl_result);
+    FreeTemp(rs_r0q);
+    FreeTemp(rs_r2q);
+  }
+}
+
 void X86Mir2Lir::GenDivRemLong(Instruction::Code, RegLocation rl_dest, RegLocation rl_src1,
-                           RegLocation rl_src2, bool is_div) {
+                               RegLocation rl_src2, bool is_div) {
   if (!cu_->target64) {
     LOG(FATAL) << "Unexpected use GenDivRemLong()";
     return;
   }
 
+  if (rl_src2.is_const) {
+    DCHECK(rl_src2.wide);
+    int64_t imm = mir_graph_->ConstantValueWide(rl_src2);
+    GenDivRemLongLit(rl_dest, rl_src1, imm, is_div);
+    return;
+  }
+
   // We have to use fixed registers, so flush all the temps.
   FlushAllRegs();
   LockCallTemps();  // Prepare for explicit register usage.
@@ -1734,7 +1927,7 @@ void X86Mir2Lir::GenDivRemLong(Instruction::Code, RegLocation rl_dest, RegLocati
   // RHS is -1.
   LoadConstantWide(rs_r6q, 0x8000000000000000);
   NewLIR2(kX86Cmp64RR, rs_r0q.GetReg(), rs_r6q.GetReg());
-  LIR * minint_branch = NewLIR2(kX86Jcc8, 0, kX86CondNe);
+  LIR *minint_branch = NewLIR2(kX86Jcc8, 0, kX86CondNe);
 
   // In 0x8000000000000000/-1 case.
   if (!is_div) {
diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc
index 1bda73890..a72d94a79 100755
--- a/compiler/dex/quick/x86/target_x86.cc
+++ b/compiler/dex/quick/x86/target_x86.cc
@@ -2868,4 +2868,24 @@ bool X86Mir2Lir::GenInlinedCharAt(CallInfo* info) {
   return true;
 }
 
+bool X86Mir2Lir::GenInlinedCurrentThread(CallInfo* info) {
+  RegLocation rl_dest = InlineTarget(info);
+
+  // Early exit if the result is unused.
+  if (rl_dest.orig_sreg < 0) {
+    return true;
+  }
+
+  RegLocation rl_result = EvalLoc(rl_dest, kRefReg, true);
+
+  if (cu_->target64) {
+    OpRegThreadMem(kOpMov, rl_result.reg, Thread::PeerOffset<8>());
+  } else {
+    OpRegThreadMem(kOpMov, rl_result.reg, Thread::PeerOffset<4>());
+  }
+
+  StoreValue(rl_dest, rl_result);
+  return true;
+}
+
 }  // namespace art
diff --git a/test/003-omnibus-opcodes/src/IntMath.java b/test/003-omnibus-opcodes/src/IntMath.java
index 2e2962a4a..ad540fdd8 100644
--- a/test/003-omnibus-opcodes/src/IntMath.java
+++ b/test/003-omnibus-opcodes/src/IntMath.java
@@ -335,8 +335,8 @@ public class IntMath {
                        special = (start+i) / 15;
                        break;
                }
+               Main.assertTrue(normal == special);
            }
-           Main.assertTrue(normal == special);
        }
     }
 
-- 
2.11.0