From: Bill Buzbee <buzbee@google.com>
Date: Wed, 8 Jul 2009 20:08:04 +0000 (-0700)
Subject: Inline-execute for Java.Lang.Math routines, jit codegen restructure, various bug... 
X-Git-Tag: android-x86-2.2~819
X-Git-Url: http://git.osdn.net/view?a=commitdiff_plain;h=50a6bf2f01efba0acbff9bb03e7ee09688553e08;p=android-x86%2Fdalvik.git

Inline-execute for Java.Lang.Math routines, jit codegen restructure, various bug fixes.
---

diff --git a/libdex/InstrUtils.c b/libdex/InstrUtils.c
index b58e647e3..42aa3ec62 100644
--- a/libdex/InstrUtils.c
+++ b/libdex/InstrUtils.c
@@ -657,7 +657,6 @@ InstructionFlags* dexCreateInstrFlagsTable(void)
         case OP_UNUSED_FD:
         case OP_UNUSED_FE:
         case OP_UNUSED_FF:
-            flags = kInstrNoJit;
             break;
 
         /*
diff --git a/libdex/InstrUtils.h b/libdex/InstrUtils.h
index 5ca175e68..9728cd453 100644
--- a/libdex/InstrUtils.h
+++ b/libdex/InstrUtils.h
@@ -101,7 +101,6 @@ enum InstructionFlags {
     kInstrCanReturn     = 1 << 4,   // returns, no additional statements
     kInstrInvoke        = 1 << 5,   // a flavor of invoke
     kInstrUnconditional = 1 << 6,   // unconditional branch
-    kInstrNoJit         = 1 << 7,   // don't jit trace containing this
 };
 
 
diff --git a/vm/Android.mk b/vm/Android.mk
index b3f02fac6..cea83a1fb 100644
--- a/vm/Android.mk
+++ b/vm/Android.mk
@@ -264,13 +264,12 @@ ifeq ($(TARGET_ARCH),arm)
 
   ifeq ($(WITH_JIT),true)
     LOCAL_SRC_FILES += \
-		compiler/codegen/armv5te/Codegen.c \
+		compiler/codegen/armv5te/Codegen-$(TARGET_ARCH_VARIANT).c \
 		compiler/codegen/armv5te/Assemble.c \
 		compiler/codegen/armv5te/ArchUtility.c \
-		compiler/codegen/armv5te/FpCodegen-$(TARGET_ARCH_VARIANT).c \
 		compiler/codegen/armv5te/LocalOptimizations.c \
 		compiler/codegen/armv5te/GlobalOptimizations.c \
-		compiler/template/out/CompilerTemplateAsm-armv5te.S
+		compiler/template/out/CompilerTemplateAsm-$(TARGET_ARCH_VARIANT).S
   endif
 endif
 
diff --git a/vm/InlineNative.c b/vm/InlineNative.c
index 6364e9493..f8293608f 100644
--- a/vm/InlineNative.c
+++ b/vm/InlineNative.c
@@ -441,7 +441,7 @@ static bool javaLangMath_abs_float(u4 arg0, u4 arg1, u4 arg2, u4 arg3,
 }
 
 /*
- * public static float abs(float)
+ * public static double abs(double)
  */
 static bool javaLangMath_abs_double(u4 arg0, u4 arg1, u4 arg2, u4 arg3,
     JValue* pResult)
@@ -550,7 +550,8 @@ static bool javaLangMath_sin(u4 arg0, u4 arg1, u4 arg2, u4 arg3,
  * pointer field.
  *
  * IMPORTANT: you must update DALVIK_VM_BUILD in DalvikVersion.h if you make
- * changes to this table.
+ * changes to this table.  Must also be kept in sync with NativeInlineOps
+ * enum in InlineNative.h.
  */
 const InlineOperation gDvmInlineOpsTable[] = {
     { org_apache_harmony_dalvik_NativeTestTarget_emptyInlineMethod,
@@ -695,4 +696,3 @@ skip_prof:
 #endif
     return (*gDvmInlineOpsTable[opIndex].func)(arg0, arg1, arg2, arg3, pResult);
 }
-
diff --git a/vm/InlineNative.h b/vm/InlineNative.h
index a6177a6a1..98398da28 100644
--- a/vm/InlineNative.h
+++ b/vm/InlineNative.h
@@ -48,6 +48,24 @@ typedef struct InlineOperation {
     const char*     methodSignature;
 } InlineOperation;
 
+/* Must be kept in sync w/ gDvmInlineOpsTable in InlineNative.c */
+typedef enum NativeInlineOps {
+    INLINE_EMPTYINLINEMETHOD = 0,
+    INLINE_STRING_CHARAT = 1,
+    INLINE_STRING_COMPARETO = 2,
+    INLINE_STRING_EQUALS = 3,
+    INLINE_STRING_LENGTH = 4,
+    INLINE_MATH_ABS_INT = 5,
+    INLINE_MATH_ABS_LONG = 6,
+    INLINE_MATH_ABS_FLOAT = 7,
+    INLINE_MATH_ABS_DOUBLE = 8,
+    INLINE_MATH_MIN_INT = 9,
+    INLINE_MATH_MAX_INT = 10,
+    INLINE_MATH_SQRT = 11,
+    INLINE_MATH_COS = 12,
+    INLINE_MATH_SIN = 13,
+} NativeInlineOps;
+
 /*
  * Get the inlineops table.
  */
diff --git a/vm/compiler/codegen/armv5te/Armv5teLIR.h b/vm/compiler/codegen/armv5te/Armv5teLIR.h
index c1dbf33db..709b95fe3 100644
--- a/vm/compiler/codegen/armv5te/Armv5teLIR.h
+++ b/vm/compiler/codegen/armv5te/Armv5teLIR.h
@@ -228,18 +228,4 @@ typedef struct PredictedChainingCell {
 
 #define CHAIN_CELL_OFFSET_TAG   0xcdab
 
-/* Create the TemplateOpcode enum */
-#define JIT_TEMPLATE(X) TEMPLATE_##X,
-typedef enum {
-#include "../../template/armv5te/TemplateOpList.h"
-/*
- * For example,
- *     TEMPLATE_CMP_LONG,
- *     TEMPLATE_RETURN,
- *     ...
- */
-    TEMPLATE_LAST_MARK,
-} TemplateOpCode;
-#undef JIT_TEMPLATE
-
 #endif /* _DALVIK_VM_COMPILER_CODEGEN_ARMV5TE_H */
diff --git a/vm/compiler/codegen/armv5te/FpCodegen.h b/vm/compiler/codegen/armv5te/Codegen-armv5te-vfp.c
similarity index 50%
rename from vm/compiler/codegen/armv5te/FpCodegen.h
rename to vm/compiler/codegen/armv5te/Codegen-armv5te-vfp.c
index 72625b53f..4e376ba26 100644
--- a/vm/compiler/codegen/armv5te/FpCodegen.h
+++ b/vm/compiler/codegen/armv5te/Codegen-armv5te-vfp.c
@@ -15,18 +15,14 @@
  */
 
 #include "Dalvik.h"
-#include "compiler/CompilerInternals.h"
+#include "interp/InterpDefs.h"
+#include "libdex/OpCode.h"
+#include "dexdump/OpCodeNames.h"
+#include "vm/compiler/CompilerInternals.h"
+#include "Armv5teLIR.h"
+#include "vm/mterp/common/FindInterface.h"
 
-#ifndef _DALVIK_VM_COMPILER_CODEGEN_FPCODEGEN_H
-#define _DALVIK_VM_COMPILER_CODEGEN_FPCODEGEN_H
+#include "armv5te-vfp/ArchVariant.h"
 
-bool dvmCompilerGenConversion(CompilationUnit *cUnit, MIR *mir);
-bool dvmCompilerGenArithOpFloat(CompilationUnit *cUnit, MIR *mir, int vDest,
-                                int vSrc1, int vSrc2);
-bool dvmCompilerGenArithOpDouble(CompilationUnit *cUnit, MIR *mir, int vDest,
-                                 int vSrc1, int vSrc2);
-bool dvmCompilerGenCmpX(CompilationUnit *cUnit, MIR *mir, int vDest,
-                        int vSrc1, int vSrc2);
-
-
-#endif /* _DALVIK_VM_COMPILER_CODEGEN_FPCODEGEN_H */
+#include "Codegen.c"
+#include "armv5te-vfp/ArchVariant.c"
diff --git a/vm/compiler/codegen/armv5te/Codegen-armv5te.c b/vm/compiler/codegen/armv5te/Codegen-armv5te.c
new file mode 100644
index 000000000..155c1adbb
--- /dev/null
+++ b/vm/compiler/codegen/armv5te/Codegen-armv5te.c
@@ -0,0 +1,28 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Dalvik.h"
+#include "interp/InterpDefs.h"
+#include "libdex/OpCode.h"
+#include "dexdump/OpCodeNames.h"
+#include "vm/compiler/CompilerInternals.h"
+#include "Armv5teLIR.h"
+#include "vm/mterp/common/FindInterface.h"
+
+#include "armv5te/ArchVariant.h"
+
+#include "Codegen.c"
+#include "armv5te/ArchVariant.c"
diff --git a/vm/compiler/codegen/armv5te/Codegen.c b/vm/compiler/codegen/armv5te/Codegen.c
index 3ba3cc6ab..6d194725c 100644
--- a/vm/compiler/codegen/armv5te/Codegen.c
+++ b/vm/compiler/codegen/armv5te/Codegen.c
@@ -14,14 +14,29 @@
  * limitations under the License.
  */
 
-#include "Dalvik.h"
-#include "interp/InterpDefs.h"
-#include "libdex/OpCode.h"
-#include "dexdump/OpCodeNames.h"
-#include "vm/compiler/CompilerInternals.h"
-#include "FpCodegen.h"
-#include "Armv5teLIR.h"
-#include "vm/mterp/common/FindInterface.h"
+/*
+ * This file contains codegen and support common to all supported
+ * ARM variants.  It is included by:
+ *
+ *        Codegen-$(TARGET_ARCH_VARIANT).c
+ *
+ * which combines this common code with specific support found in the
+ * applicable directory below this one.
+ */
+
+/* Routines which must be supplied by the variant-specific code */
+static void genDispatchToHandler(CompilationUnit *cUnit, TemplateOpCode opCode);
+bool dvmCompilerArchInit(void);
+static bool genInlineSqrt(CompilationUnit *cUnit, MIR *mir);
+static bool genInlineCos(CompilationUnit *cUnit, MIR *mir);
+static bool genInlineSin(CompilationUnit *cUnit, MIR *mir);
+static bool genConversion(CompilationUnit *cUnit, MIR *mir);
+static bool genArithOpFloat(CompilationUnit *cUnit, MIR *mir, int vDest,
+                            int vSrc1, int vSrc2);
+static bool genArithOpDouble(CompilationUnit *cUnit, MIR *mir, int vDest,
+                             int vSrc1, int vSrc2);
+static bool genCmpX(CompilationUnit *cUnit, MIR *mir, int vDest, int vSrc1,
+                    int vSrc2);
 
 /* Array holding the entry offset of each template relative to the first one */
 static intptr_t templateEntryOffsets[TEMPLATE_LAST_MARK];
@@ -275,51 +290,6 @@ static void genUnconditionalBranch(CompilationUnit *cUnit, Armv5teLIR *target)
     branch->generic.target = (LIR *) target;
 }
 
-#define USE_IN_CACHE_HANDLER 1
-
-/*
- * Jump to the out-of-line handler in ARM mode to finish executing the
- * remaining of more complex instructions.
- */
-static void genDispatchToHandler(CompilationUnit *cUnit, TemplateOpCode opCode)
-{
-#if USE_IN_CACHE_HANDLER
-    /*
-     * NOTE - In practice BLX only needs one operand, but since the assembler
-     * may abort itself and retry due to other out-of-range conditions we
-     * cannot really use operand[0] to store the absolute target address since
-     * it may get clobbered by the final relative offset. Therefore,
-     * we fake BLX_1 is a two operand instruction and the absolute target
-     * address is stored in operand[1].
-     */
-    newLIR2(cUnit, ARMV5TE_BLX_1,
-            (int) gDvmJit.codeCache + templateEntryOffsets[opCode],
-            (int) gDvmJit.codeCache + templateEntryOffsets[opCode]);
-    newLIR2(cUnit, ARMV5TE_BLX_2,
-            (int) gDvmJit.codeCache + templateEntryOffsets[opCode],
-            (int) gDvmJit.codeCache + templateEntryOffsets[opCode]);
-#else
-    /*
-     * In case we want to access the statically compiled handlers for
-     * debugging purposes, define USE_IN_CACHE_HANDLER to 0
-     */
-    void *templatePtr;
-
-#define JIT_TEMPLATE(X) extern void dvmCompiler_TEMPLATE_##X();
-#include "../../template/armv5te/TemplateOpList.h"
-#undef JIT_TEMPLATE
-    switch (opCode) {
-#define JIT_TEMPLATE(X) \
-        case TEMPLATE_##X: { templatePtr = dvmCompiler_TEMPLATE_##X; break; }
-#include "../../template/armv5te/TemplateOpList.h"
-#undef JIT_TEMPLATE
-        default: templatePtr = NULL;
-    }
-    loadConstant(cUnit, r7, (int) templatePtr);
-    newLIR1(cUnit, ARMV5TE_BLX_R, r7);
-#endif
-}
-
 /* Perform the actual operation for OP_RETURN_* */
 static void genReturnCommon(CompilationUnit *cUnit, MIR *mir)
 {
@@ -424,6 +394,20 @@ static void loadValue(CompilationUnit *cUnit, int vSrc, int rDest)
     }
 }
 
+/* Load a word at base + displacement.  Displacement must be word multiple */
+static void loadWordDisp(CompilationUnit *cUnit, int rBase, int displacement,
+                         int rDest)
+{
+    assert((displacement & 0x3) == 0);
+    /* Can it fit in a RRI5? */
+    if (displacement < 128) {
+        newLIR3(cUnit, ARMV5TE_LDR_RRI5, rDest, rBase, displacement >> 2);
+    } else {
+        loadConstant(cUnit, rDest, displacement);
+        newLIR3(cUnit, ARMV5TE_LDR_RRR, rDest, rBase, rDest);
+    }
+}
+
 /* Store a value from rSrc to vDest */
 static void storeValue(CompilationUnit *cUnit, int rSrc, int vDest,
                        int rScratch)
@@ -796,8 +780,8 @@ static bool genShiftOpLong(CompilationUnit *cUnit, MIR *mir, int vDest,
     storeValuePair(cUnit, r0, r1, vDest, r2);
     return false;
 }
-bool dvmCompilerGenArithOpFloatPortable(CompilationUnit *cUnit, MIR *mir,
-                                        int vDest, int vSrc1, int vSrc2)
+bool genArithOpFloatPortable(CompilationUnit *cUnit, MIR *mir,
+                             int vDest, int vSrc1, int vSrc2)
 {
     /*
      * Don't optimize the regsiter usage here as they are governed by the EABI
@@ -855,8 +839,8 @@ bool dvmCompilerGenArithOpFloatPortable(CompilationUnit *cUnit, MIR *mir,
     return false;
 }
 
-bool dvmCompilerGenArithOpDoublePortable(CompilationUnit *cUnit, MIR *mir,
-                                         int vDest, int vSrc1, int vSrc2)
+bool genArithOpDoublePortable(CompilationUnit *cUnit, MIR *mir,
+                              int vDest, int vSrc1, int vSrc2)
 {
     void* funct;
     int reg0, reg1, reg2;
@@ -1169,16 +1153,16 @@ static bool genArithOp(CompilationUnit *cUnit, MIR *mir)
         return genArithOpInt(cUnit,mir, vA, vB, vC);
     }
     if ((opCode >= OP_ADD_FLOAT_2ADDR) && (opCode <= OP_REM_FLOAT_2ADDR)) {
-        return dvmCompilerGenArithOpFloat(cUnit,mir, vA, vA, vB);
+        return genArithOpFloat(cUnit,mir, vA, vA, vB);
     }
     if ((opCode >= OP_ADD_FLOAT) && (opCode <= OP_REM_FLOAT)) {
-        return dvmCompilerGenArithOpFloat(cUnit, mir, vA, vB, vC);
+        return genArithOpFloat(cUnit, mir, vA, vB, vC);
     }
     if ((opCode >= OP_ADD_DOUBLE_2ADDR) && (opCode <= OP_REM_DOUBLE_2ADDR)) {
-        return dvmCompilerGenArithOpDouble(cUnit,mir, vA, vA, vB);
+        return genArithOpDouble(cUnit,mir, vA, vA, vB);
     }
     if ((opCode >= OP_ADD_DOUBLE) && (opCode <= OP_REM_DOUBLE)) {
-        return dvmCompilerGenArithOpDouble(cUnit,mir, vA, vB, vC);
+        return genArithOpDouble(cUnit,mir, vA, vB, vC);
     }
     return true;
 }
@@ -1205,18 +1189,133 @@ static bool genConversionCall(CompilationUnit *cUnit, MIR *mir, void *funct,
     return false;
 }
 
-/* Experimental example of completely inlining a native replacement */
 static bool genInlinedStringLength(CompilationUnit *cUnit, MIR *mir)
 {
-    /* Don't optimize the register usage */
-    int offset = (int) &((InterpState *) NULL)->retval;
     DecodedInstruction *dInsn = &mir->dalvikInsn;
-    assert(dInsn->vA == 1);
-    loadValue(cUnit, dInsn->arg[0], r0);
-    loadConstant(cUnit, r1, gDvm.offJavaLangString_count);
-    genNullCheck(cUnit, dInsn->arg[0], r0, mir->offset, NULL);
-    newLIR3(cUnit, ARMV5TE_LDR_RRR, r0, r0, r1);
-    newLIR3(cUnit, ARMV5TE_STR_RRI5, r0, rGLUE, offset >> 2);
+    int offset = offsetof(InterpState, retval);
+    int regObj = selectFirstRegister(cUnit, dInsn->arg[0], false);
+    int reg1 = NEXT_REG(regObj);
+    loadValue(cUnit, dInsn->arg[0], regObj);
+    genNullCheck(cUnit, dInsn->arg[0], regObj, mir->offset, NULL);
+    loadWordDisp(cUnit, regObj, gDvm.offJavaLangString_count, reg1);
+    newLIR3(cUnit, ARMV5TE_STR_RRI5, reg1, rGLUE, offset >> 2);
+    return false;
+}
+
+/*
+ * NOTE: The amount of code for this body suggests it ought to
+ * be handled in a template (and could also be coded quite a bit
+ * more efficiently in ARM).  However, the code is dependent on the
+ * internal structure layout of string objects which are most safely
+ * known at run time.
+ * TUNING:  One possibility (which could also be used for StringCompareTo
+ * and StringEquals) is to generate string access helper subroutines on
+ * Jit startup, and then call them from the translated inline-executes.
+ */
+static bool genInlinedStringCharAt(CompilationUnit *cUnit, MIR *mir)
+{
+    DecodedInstruction *dInsn = &mir->dalvikInsn;
+    int offset = offsetof(InterpState, retval);
+    int contents = offsetof(ArrayObject, contents);
+    int regObj = selectFirstRegister(cUnit, dInsn->arg[0], false);
+    int regIdx = NEXT_REG(regObj);
+    int regMax = NEXT_REG(regIdx);
+    int regOff = NEXT_REG(regMax);
+    loadValue(cUnit, dInsn->arg[0], regObj);
+    loadValue(cUnit, dInsn->arg[1], regIdx);
+    Armv5teLIR * pcrLabel = genNullCheck(cUnit, dInsn->arg[0], regObj,
+                                         mir->offset, NULL);
+    loadWordDisp(cUnit, regObj, gDvm.offJavaLangString_count, regMax);
+    loadWordDisp(cUnit, regObj, gDvm.offJavaLangString_offset, regOff);
+    loadWordDisp(cUnit, regObj, gDvm.offJavaLangString_value, regObj);
+    genBoundsCheck(cUnit, regIdx, regMax, mir->offset, pcrLabel);
+
+    newLIR2(cUnit, ARMV5TE_ADD_RI8, regObj, contents);
+    newLIR3(cUnit, ARMV5TE_ADD_RRR, regIdx, regIdx, regOff);
+    newLIR3(cUnit, ARMV5TE_ADD_RRR, regIdx, regIdx, regIdx);
+    newLIR3(cUnit, ARMV5TE_LDRH_RRR, regMax, regObj, regIdx);
+    newLIR3(cUnit, ARMV5TE_STR_RRI5, regMax, rGLUE, offset >> 2);
+    return false;
+}
+
+static bool genInlinedAbsInt(CompilationUnit *cUnit, MIR *mir)
+{
+    int offset = offsetof(InterpState, retval);
+    DecodedInstruction *dInsn = &mir->dalvikInsn;
+    int reg0 = selectFirstRegister(cUnit, dInsn->arg[0], false);
+    int sign = NEXT_REG(reg0);
+    /* abs(x) = y<=x>>31, (x+y)^y.  Shorter in ARM/THUMB2, no skip in THUMB */
+    loadValue(cUnit, dInsn->arg[0], reg0);
+    newLIR3(cUnit, ARMV5TE_ASR, sign, reg0, 31);
+    newLIR3(cUnit, ARMV5TE_ADD_RRR, reg0, reg0, sign);
+    newLIR2(cUnit, ARMV5TE_EOR, reg0, sign);
+    newLIR3(cUnit, ARMV5TE_STR_RRI5, reg0, rGLUE, offset >> 2);
+    return false;
+}
+
+static bool genInlinedAbsFloat(CompilationUnit *cUnit, MIR *mir)
+{
+    int offset = offsetof(InterpState, retval);
+    DecodedInstruction *dInsn = &mir->dalvikInsn;
+    int reg0 = selectFirstRegister(cUnit, dInsn->arg[0], false);
+    int signMask = NEXT_REG(reg0);
+    loadValue(cUnit, dInsn->arg[0], reg0);
+    loadConstant(cUnit, signMask, 0x7fffffff);
+    newLIR2(cUnit, ARMV5TE_AND_RR, reg0, signMask);
+    newLIR3(cUnit, ARMV5TE_STR_RRI5, reg0, rGLUE, offset >> 2);
+    return false;
+}
+
+static bool genInlinedAbsDouble(CompilationUnit *cUnit, MIR *mir)
+{
+    int offset = offsetof(InterpState, retval);
+    DecodedInstruction *dInsn = &mir->dalvikInsn;
+    int oplo = selectFirstRegister(cUnit, dInsn->arg[0], true);
+    int ophi = NEXT_REG(oplo);
+    int signMask = NEXT_REG(ophi);
+    loadValuePair(cUnit, dInsn->arg[0], oplo, ophi);
+    loadConstant(cUnit, signMask, 0x7fffffff);
+    newLIR3(cUnit, ARMV5TE_STR_RRI5, oplo, rGLUE, offset >> 2);
+    newLIR2(cUnit, ARMV5TE_AND_RR, ophi, signMask);
+    newLIR3(cUnit, ARMV5TE_STR_RRI5, ophi, rGLUE, (offset >> 2)+1);
+    return false;
+}
+
+ /* No select in thumb, so we need to branch.  Thumb2 will do better */
+static bool genInlinedMinMaxInt(CompilationUnit *cUnit, MIR *mir, bool isMin)
+{
+    int offset = offsetof(InterpState, retval);
+    DecodedInstruction *dInsn = &mir->dalvikInsn;
+    int reg0 = selectFirstRegister(cUnit, dInsn->arg[0], false);
+    int reg1 = NEXT_REG(reg0);
+    loadValue(cUnit, dInsn->arg[0], reg0);
+    loadValue(cUnit, dInsn->arg[1], reg1);
+    newLIR2(cUnit, ARMV5TE_CMP_RR, reg0, reg1);
+    Armv5teLIR *branch1 = newLIR2(cUnit, ARMV5TE_B_COND, 2,
+           isMin ? ARM_COND_LT : ARM_COND_GT);
+    newLIR2(cUnit, ARMV5TE_MOV_RR, reg0, reg1);
+    Armv5teLIR *target =
+        newLIR3(cUnit, ARMV5TE_STR_RRI5, reg0, rGLUE, offset >> 2);
+    branch1->generic.target = (LIR *)target;
+    return false;
+}
+
+static bool genInlinedAbsLong(CompilationUnit *cUnit, MIR *mir)
+{
+    int offset = offsetof(InterpState, retval);
+    DecodedInstruction *dInsn = &mir->dalvikInsn;
+    int oplo = selectFirstRegister(cUnit, dInsn->arg[0], true);
+    int ophi = NEXT_REG(oplo);
+    int sign = NEXT_REG(ophi);
+    /* abs(x) = y<=x>>31, (x+y)^y.  Shorter in ARM/THUMB2, no skip in THUMB */
+    loadValuePair(cUnit, dInsn->arg[0], oplo, ophi);
+    newLIR3(cUnit, ARMV5TE_ASR, sign, ophi, 31);
+    newLIR3(cUnit, ARMV5TE_ADD_RRR, oplo, oplo, sign);
+    newLIR2(cUnit, ARMV5TE_ADC, ophi, sign);
+    newLIR2(cUnit, ARMV5TE_EOR, oplo, sign);
+    newLIR2(cUnit, ARMV5TE_EOR, ophi, sign);
+    newLIR3(cUnit, ARMV5TE_STR_RRI5, oplo, rGLUE, offset >> 2);
+    newLIR3(cUnit, ARMV5TE_STR_RRI5, ophi, rGLUE, (offset >> 2)+1);
     return false;
 }
 
@@ -1724,7 +1823,7 @@ static bool handleFmt21c_Fmt31c(CompilationUnit *cUnit, MIR *mir)
         case OP_SGET_BYTE:
         case OP_SGET_SHORT:
         case OP_SGET: {
-            int valOffset = (int)&((struct StaticField*)NULL)->value;
+            int valOffset = offsetof(StaticField, value);
             void *fieldPtr = (void*)
               (cUnit->method->clazz->pDvmDex->pResFields[mir->dalvikInsn.vB]);
             assert(fieldPtr != NULL);
@@ -1734,7 +1833,7 @@ static bool handleFmt21c_Fmt31c(CompilationUnit *cUnit, MIR *mir)
             break;
         }
         case OP_SGET_WIDE: {
-            int valOffset = (int)&((struct StaticField*)NULL)->value;
+            int valOffset = offsetof(StaticField, value);
             void *fieldPtr = (void*)
               (cUnit->method->clazz->pDvmDex->pResFields[mir->dalvikInsn.vB]);
             int reg0, reg1, reg2;
@@ -1754,7 +1853,7 @@ static bool handleFmt21c_Fmt31c(CompilationUnit *cUnit, MIR *mir)
         case OP_SPUT_BYTE:
         case OP_SPUT_SHORT:
         case OP_SPUT: {
-            int valOffset = (int)&((struct StaticField*)NULL)->value;
+            int valOffset = offsetof(StaticField, value);
             void *fieldPtr = (void*)
               (cUnit->method->clazz->pDvmDex->pResFields[mir->dalvikInsn.vB]);
 
@@ -1767,7 +1866,7 @@ static bool handleFmt21c_Fmt31c(CompilationUnit *cUnit, MIR *mir)
         }
         case OP_SPUT_WIDE: {
             int reg0, reg1, reg2;
-            int valOffset = (int)&((struct StaticField*)NULL)->value;
+            int valOffset = offsetof(StaticField, value);
             void *fieldPtr = (void*)
               (cUnit->method->clazz->pDvmDex->pResFields[mir->dalvikInsn.vB]);
 
@@ -1926,7 +2025,7 @@ static bool handleFmt11x(CompilationUnit *cUnit, MIR *mir)
     return false;
 }
 
-bool dvmCompilerGenConversionPortable(CompilationUnit *cUnit, MIR *mir)
+static bool genConversionPortable(CompilationUnit *cUnit, MIR *mir)
 {
     OpCode opCode = mir->dalvikInsn.opCode;
 
@@ -1936,9 +2035,7 @@ bool dvmCompilerGenConversionPortable(CompilationUnit *cUnit, MIR *mir)
     double __aeabi_f2d(  float op1 );
     double __aeabi_i2d(  int op1 );
     int    __aeabi_d2iz( double op1 );
-    long   __aeabi_f2lz( float op1 );
     float  __aeabi_l2f(  long op1 );
-    long   __aeabi_d2lz( double op1 );
     double __aeabi_l2d(  long op1 );
 
     switch (opCode) {
@@ -1955,11 +2052,11 @@ bool dvmCompilerGenConversionPortable(CompilationUnit *cUnit, MIR *mir)
         case OP_DOUBLE_TO_INT:
             return genConversionCall(cUnit, mir, (void*)__aeabi_d2iz, 2, 1);
         case OP_FLOAT_TO_LONG:
-            return genConversionCall(cUnit, mir, (void*)__aeabi_f2lz, 1, 2);
+            return genConversionCall(cUnit, mir, (void*)dvmJitf2l, 1, 2);
         case OP_LONG_TO_FLOAT:
             return genConversionCall(cUnit, mir, (void*)__aeabi_l2f, 2, 1);
         case OP_DOUBLE_TO_LONG:
-            return genConversionCall(cUnit, mir, (void*)__aeabi_d2lz, 2, 2);
+            return genConversionCall(cUnit, mir, (void*)dvmJitd2l, 2, 2);
         case OP_LONG_TO_DOUBLE:
             return genConversionCall(cUnit, mir, (void*)__aeabi_l2d, 2, 2);
         default:
@@ -2000,7 +2097,7 @@ static bool handleFmt12x(CompilationUnit *cUnit, MIR *mir)
         case OP_LONG_TO_FLOAT:
         case OP_DOUBLE_TO_LONG:
         case OP_LONG_TO_DOUBLE:
-            return dvmCompilerGenConversion(cUnit, mir);
+            return genConversion(cUnit, mir);
         case OP_NEG_INT:
         case OP_NOT_INT:
             return genArithOpInt(cUnit, mir, vSrc1Dest, vSrc1Dest, vSrc2);
@@ -2008,11 +2105,9 @@ static bool handleFmt12x(CompilationUnit *cUnit, MIR *mir)
         case OP_NOT_LONG:
             return genArithOpLong(cUnit,mir, vSrc1Dest, vSrc1Dest, vSrc2);
         case OP_NEG_FLOAT:
-            return dvmCompilerGenArithOpFloat(cUnit, mir, vSrc1Dest,
-                                              vSrc1Dest, vSrc2);
+            return genArithOpFloat(cUnit, mir, vSrc1Dest, vSrc1Dest, vSrc2);
         case OP_NEG_DOUBLE:
-            return dvmCompilerGenArithOpDouble(cUnit, mir, vSrc1Dest,
-                                               vSrc1Dest, vSrc2);
+            return genArithOpDouble(cUnit, mir, vSrc1Dest, vSrc1Dest, vSrc2);
         case OP_MOVE_WIDE: {
             reg0 = selectFirstRegister(cUnit, vSrc2, true);
             reg1 = NEXT_REG(reg0);
@@ -2027,7 +2122,7 @@ static bool handleFmt12x(CompilationUnit *cUnit, MIR *mir)
             reg1 = NEXT_REG(reg0);
             reg2 = NEXT_REG(reg1);
 
-            loadValue(cUnit, mir->dalvikInsn.vB, reg0);
+            loadValue(cUnit, vSrc2, reg0);
             newLIR3(cUnit, ARMV5TE_ASR, reg1, reg0, 31);
             storeValuePair(cUnit, reg0, reg1, vSrc1Dest, reg2);
             break;
@@ -2085,10 +2180,7 @@ static bool handleFmt21s(CompilationUnit *cUnit, MIR *mir)
         reg2 = NEXT_REG(reg1);
 
         loadConstant(cUnit, reg0, BBBB);
-        loadConstant(cUnit, reg1, 0);
-        if (BBBB < 0) {
-            newLIR2(cUnit, ARMV5TE_SUB_RI8, reg1, -1);
-        }
+        newLIR3(cUnit, ARMV5TE_ASR, reg1, reg0, 31);
 
         /* Save the long values to the specified Dalvik register pair */
         storeValuePair(cUnit, reg0, reg1, vDest, reg2);
@@ -2526,7 +2618,7 @@ static bool handleFmt23x(CompilationUnit *cUnit, MIR *mir)
         case OP_CMPG_FLOAT:
         case OP_CMPL_DOUBLE:
         case OP_CMPG_DOUBLE:
-            return dvmCompilerGenCmpX(cUnit, mir, vA, vB, vC);
+            return genCmpX(cUnit, mir, vA, vB, vC);
         case OP_CMP_LONG:
             loadValuePair(cUnit,vB, r0, r1);
             loadValuePair(cUnit, vC, r2, r3);
@@ -2982,16 +3074,48 @@ static bool handleFmt3inline(CompilationUnit *cUnit, MIR *mir)
         case OP_EXECUTE_INLINE: {
             unsigned int i;
             const InlineOperation* inLineTable = dvmGetInlineOpsTable();
-            int offset = (int) &((InterpState *) NULL)->retval;
+            int offset = offsetof(InterpState, retval);
             int operation = dInsn->vB;
 
-            if (!strcmp(inLineTable[operation].classDescriptor,
-                        "Ljava/lang/String;") &&
-                !strcmp(inLineTable[operation].methodName,
-                        "length") &&
-                !strcmp(inLineTable[operation].methodSignature,
-                        "()I")) {
-                return genInlinedStringLength(cUnit,mir);
+            switch (operation) {
+                case INLINE_EMPTYINLINEMETHOD:
+                    return false;  /* Nop */
+                case INLINE_STRING_LENGTH:
+                    return genInlinedStringLength(cUnit, mir);
+                case INLINE_MATH_ABS_INT:
+                    return genInlinedAbsInt(cUnit, mir);
+                case INLINE_MATH_ABS_LONG:
+                    return genInlinedAbsLong(cUnit, mir);
+                case INLINE_MATH_MIN_INT:
+                    return genInlinedMinMaxInt(cUnit, mir, true);
+                case INLINE_MATH_MAX_INT:
+                    return genInlinedMinMaxInt(cUnit, mir, false);
+                case INLINE_STRING_CHARAT:
+                    return genInlinedStringCharAt(cUnit, mir);
+                case INLINE_MATH_SQRT:
+                    if (genInlineSqrt(cUnit, mir))
+                        return true;
+                    else
+                        break;   /* Handle with C routine */
+                case INLINE_MATH_COS:
+                    if (genInlineCos(cUnit, mir))
+                        return true;
+                    else
+                        break;   /* Handle with C routine */
+                case INLINE_MATH_SIN:
+                    if (genInlineSin(cUnit, mir))
+                        return true;
+                    else
+                        break;   /* Handle with C routine */
+                case INLINE_MATH_ABS_FLOAT:
+                    return genInlinedAbsFloat(cUnit, mir);
+                case INLINE_MATH_ABS_DOUBLE:
+                    return genInlinedAbsDouble(cUnit, mir);
+                case INLINE_STRING_COMPARETO:
+                case INLINE_STRING_EQUALS:
+                    break;
+                default:
+                    dvmAbort();
             }
 
             /* Materialize pointer to retval & push */
@@ -3355,7 +3479,6 @@ void dvmCompilerMIR2LIR(CompilationUnit *cUnit)
                 break;
             }
         }
-
         /* Eliminate redundant loads/stores and delay stores into later slots */
         dvmCompilerApplyLocalOptimizations(cUnit, (LIR *) headLIR,
                                            cUnit->lastLIRInsn);
@@ -3444,45 +3567,6 @@ void *dvmCompilerDoWork(CompilerWorkOrder *work)
    return res;
 }
 
-/* Architecture-specific initializations and checks go here */
-bool dvmCompilerArchInit(void)
-{
-    /* First, declare dvmCompiler_TEMPLATE_XXX for each template */
-#define JIT_TEMPLATE(X) extern void dvmCompiler_TEMPLATE_##X();
-#include "../../template/armv5te/TemplateOpList.h"
-#undef JIT_TEMPLATE
-
-    int i = 0;
-    extern void dvmCompilerTemplateStart(void);
-
-    /*
-     * Then, populate the templateEntryOffsets array with the offsets from the
-     * the dvmCompilerTemplateStart symbol for each template.
-     */
-#define JIT_TEMPLATE(X) templateEntryOffsets[i++] = \
-    (intptr_t) dvmCompiler_TEMPLATE_##X - (intptr_t) dvmCompilerTemplateStart;
-#include "../../template/armv5te/TemplateOpList.h"
-#undef JIT_TEMPLATE
-
-    /* Codegen-specific assumptions */
-    assert(offsetof(ClassObject, vtable) < 128 &&
-           (offsetof(ClassObject, vtable) & 0x3) == 0);
-    assert(offsetof(ArrayObject, length) < 128 &&
-           (offsetof(ArrayObject, length) & 0x3) == 0);
-    assert(offsetof(ArrayObject, contents) < 256);
-
-    /* Up to 5 args are pushed on top of FP - sizeofStackSaveArea */
-    assert(sizeof(StackSaveArea) < 236);
-
-    /*
-     * EA is calculated by doing "Rn + imm5 << 2", and there are 5 entry points
-     * that codegen may access, make sure that the offset from the top of the
-     * struct is less than 108.
-     */
-    assert(offsetof(InterpState, jitToInterpEntries) < 108);
-    return true;
-}
-
 /* Architectural-specific debugging helpers go here */
 void dvmCompilerArchDump(void)
 {
@@ -3528,41 +3612,3 @@ void dvmCompilerArchDump(void)
         LOGD("dalvik.vm.jit.op = %s", buf);
     }
 }
-
-/*
- * Exported version of loadValueAddress
- * TODO: revisit source file structure
- */
-void dvmCompilerLoadValueAddress(CompilationUnit *cUnit, int vSrc, int rDest)
-{
-    loadValueAddress(cUnit, vSrc, rDest);
-}
-
-/*
- * Exported version of genDispatchToHandler
- * TODO: revisit source file structure
- */
-void dvmCompilerGenDispatchToHandler(CompilationUnit *cUnit,
-                                     TemplateOpCode opCode)
-{
-    genDispatchToHandler(cUnit, opCode);
-}
-
-/*
- * Exported version of loadValue
- * TODO: revisit source file structure
- */
-void dvmCompilerLoadValue(CompilationUnit *cUnit, int vSrc, int rDest)
-{
-    loadValue(cUnit, vSrc, rDest);
-}
-
-/*
- * Exported version of storeValue
- * TODO: revisit source file structure
- */
-void dvmCompilerStoreValue(CompilationUnit *cUnit, int rSrc, int vDest,
-                       int rScratch)
-{
-    storeValue(cUnit, rSrc, vDest, rScratch);
-}
diff --git a/vm/compiler/codegen/armv5te/Codegen.h b/vm/compiler/codegen/armv5te/Codegen.h
deleted file mode 100644
index f156e600e..000000000
--- a/vm/compiler/codegen/armv5te/Codegen.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Copyright (C) 2009 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Dalvik.h"
-#include "compiler/CompilerInternals.h"
-#include "Armv5teLIR.h"
-
-#ifndef _DALVIK_VM_COMPILER_CODEGEN_CODEGEN_H
-#define _DALVIK_VM_COMPILER_CODEGEN_CODEGEN_H
-
-bool dvmCompilerGenConversionPortable(CompilationUnit *cUnit, MIR *mir);
-bool dvmCompilerGenArithOpFloatPortable(CompilationUnit *cUnit, MIR *mir,
-                                        int vDest, int vSrc1, int vSrc2);
-bool dvmCompilerGenArithOpDoublePortable(CompilationUnit *cUnit, MIR *mir,
-                                         int vDest, int vSrc1, int vSrc2);
-void dvmCompilerLoadValueAddress(CompilationUnit *cUnit, int vSrc, int rDest);
-void dvmCompilerGenDispatchToHandler(CompilationUnit *cUnit,
-                                     TemplateOpCode opCode);
-void dvmCompilerLoadValue(CompilationUnit *cUnit, int vSrc, int rDest);
-void dvmCompilerStoreValue(CompilationUnit *cUnit, int rSrc, int vDest,
-                           int rScratch);
-
-#endif /* _DALVIK_VM_COMPILER_CODEGEN_CODEGEN_H */
diff --git a/vm/compiler/codegen/armv5te/FpCodegen-armv5te-vfp.c b/vm/compiler/codegen/armv5te/FpCodegen-armv5te-vfp.c
deleted file mode 100644
index a2dc3bfaf..000000000
--- a/vm/compiler/codegen/armv5te/FpCodegen-armv5te-vfp.c
+++ /dev/null
@@ -1,174 +0,0 @@
-/*
- * Copyright (C) 2009 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Dalvik.h"
-#include "Armv5teLIR.h"
-#include "Codegen.h"
-
-bool dvmCompilerGenArithOpFloat(CompilationUnit *cUnit, MIR *mir, int vDest,
-                                int vSrc1, int vSrc2)
-{
-    TemplateOpCode opCode;
-
-    /*
-     * Don't attempt to optimize register usage since these opcodes call out to
-     * the handlers.
-     */
-    switch (mir->dalvikInsn.opCode) {
-        case OP_ADD_FLOAT_2ADDR:
-        case OP_ADD_FLOAT:
-            opCode = TEMPLATE_ADD_FLOAT_VFP;
-            break;
-        case OP_SUB_FLOAT_2ADDR:
-        case OP_SUB_FLOAT:
-            opCode = TEMPLATE_SUB_FLOAT_VFP;
-        case OP_DIV_FLOAT_2ADDR:
-        case OP_DIV_FLOAT:
-            opCode = TEMPLATE_DIV_FLOAT_VFP;
-            break;
-        case OP_MUL_FLOAT_2ADDR:
-        case OP_MUL_FLOAT:
-            opCode = TEMPLATE_MUL_FLOAT_VFP;
-            break;
-        case OP_REM_FLOAT_2ADDR:
-        case OP_REM_FLOAT:
-        case OP_NEG_FLOAT: {
-            return dvmCompilerGenArithOpFloatPortable(cUnit, mir, vDest,
-                                                      vSrc1, vSrc2);
-        }
-        default:
-            return true;
-    }
-    dvmCompilerLoadValueAddress(cUnit, vDest, r0);
-    dvmCompilerLoadValueAddress(cUnit, vSrc1, r1);
-    dvmCompilerLoadValueAddress(cUnit, vSrc2, r2);
-    dvmCompilerGenDispatchToHandler(cUnit, opCode);
-    return false;
-}
-
-bool dvmCompilerGenArithOpDouble(CompilationUnit *cUnit, MIR *mir, int vDest,
-                                 int vSrc1, int vSrc2)
-{
-    TemplateOpCode opCode;
-
-    /*
-     * Don't attempt to optimize register usage since these opcodes call out to
-     * the handlers.
-     */
-    switch (mir->dalvikInsn.opCode) {
-        case OP_ADD_DOUBLE_2ADDR:
-        case OP_ADD_DOUBLE:
-            opCode = TEMPLATE_ADD_DOUBLE_VFP;
-            break;
-        case OP_SUB_DOUBLE_2ADDR:
-        case OP_SUB_DOUBLE:
-            opCode = TEMPLATE_SUB_DOUBLE_VFP;
-            break;
-        case OP_DIV_DOUBLE_2ADDR:
-        case OP_DIV_DOUBLE:
-            opCode = TEMPLATE_DIV_DOUBLE_VFP;
-            break;
-        case OP_MUL_DOUBLE_2ADDR:
-        case OP_MUL_DOUBLE:
-            opCode = TEMPLATE_MUL_DOUBLE_VFP;
-            break;
-        case OP_REM_DOUBLE_2ADDR:
-        case OP_REM_DOUBLE:
-        case OP_NEG_DOUBLE: {
-            return dvmCompilerGenArithOpDoublePortable(cUnit, mir, vDest,
-                                                       vSrc1, vSrc2);
-        }
-        default:
-            return true;
-    }
-    dvmCompilerLoadValueAddress(cUnit, vDest, r0);
-    dvmCompilerLoadValueAddress(cUnit, vSrc1, r1);
-    dvmCompilerLoadValueAddress(cUnit, vSrc2, r2);
-    dvmCompilerGenDispatchToHandler(cUnit, opCode);
-    return false;
-}
-
-bool dvmCompilerGenConversion(CompilationUnit *cUnit, MIR *mir)
-{
-    OpCode opCode = mir->dalvikInsn.opCode;
-    int vSrc1Dest = mir->dalvikInsn.vA;
-    int vSrc2 = mir->dalvikInsn.vB;
-    TemplateOpCode template;
-
-    switch (opCode) {
-        case OP_INT_TO_FLOAT:
-            template = TEMPLATE_INT_TO_FLOAT_VFP;
-            break;
-        case OP_FLOAT_TO_INT:
-            template = TEMPLATE_FLOAT_TO_INT_VFP;
-            break;
-        case OP_DOUBLE_TO_FLOAT:
-            template = TEMPLATE_DOUBLE_TO_FLOAT_VFP;
-            break;
-        case OP_FLOAT_TO_DOUBLE:
-            template = TEMPLATE_FLOAT_TO_DOUBLE_VFP;
-            break;
-        case OP_INT_TO_DOUBLE:
-            template = TEMPLATE_INT_TO_DOUBLE_VFP;
-            break;
-        case OP_DOUBLE_TO_INT:
-            template = TEMPLATE_DOUBLE_TO_INT_VFP;
-            break;
-        case OP_FLOAT_TO_LONG:
-        case OP_LONG_TO_FLOAT:
-        case OP_DOUBLE_TO_LONG:
-        case OP_LONG_TO_DOUBLE:
-            return dvmCompilerGenConversionPortable(cUnit, mir);
-        default:
-            return true;
-    }
-    dvmCompilerLoadValueAddress(cUnit, vSrc1Dest, r0);
-    dvmCompilerLoadValueAddress(cUnit, vSrc2, r1);
-    dvmCompilerGenDispatchToHandler(cUnit, template);
-    return false;
-}
-
-bool dvmCompilerGenCmpX(CompilationUnit *cUnit, MIR *mir, int vDest,
-                                int vSrc1, int vSrc2)
-{
-    TemplateOpCode template;
-
-    /*
-     * Don't attempt to optimize register usage since these opcodes call out to
-     * the handlers.
-     */
-    switch(mir->dalvikInsn.opCode) {
-        case OP_CMPL_FLOAT:
-            template = TEMPLATE_CMPL_FLOAT_VFP;
-            break;
-        case OP_CMPG_FLOAT:
-            template = TEMPLATE_CMPG_FLOAT_VFP;
-            break;
-        case OP_CMPL_DOUBLE:
-            template = TEMPLATE_CMPL_DOUBLE_VFP;
-            break;
-        case OP_CMPG_DOUBLE:
-            template = TEMPLATE_CMPG_DOUBLE_VFP;
-            break;
-        default:
-            return true;
-    }
-    dvmCompilerLoadValueAddress(cUnit, vSrc1, r0);
-    dvmCompilerLoadValueAddress(cUnit, vSrc2, r1);
-    dvmCompilerGenDispatchToHandler(cUnit, template);
-    dvmCompilerStoreValue(cUnit, r0, vDest, r1);
-    return false;
-}
diff --git a/vm/compiler/codegen/armv5te/FpCodegen-armv5te.c b/vm/compiler/codegen/armv5te/FpCodegen-armv5te.c
deleted file mode 100644
index b69824d34..000000000
--- a/vm/compiler/codegen/armv5te/FpCodegen-armv5te.c
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * Copyright (C) 2009 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Dalvik.h"
-#include "Armv5teLIR.h"
-#include "Codegen.h"
-
-bool dvmCompilerGenConversion(CompilationUnit *cUnit, MIR *mir)
-{
-    return dvmCompilerGenConversionPortable(cUnit, mir);
-}
-
-bool dvmCompilerGenArithOpFloat(CompilationUnit *cUnit, MIR *mir, int vDest,
-                        int vSrc1, int vSrc2)
-{
-    return dvmCompilerGenArithOpFloatPortable(cUnit, mir, vDest, vSrc1, vSrc2);
-}
-
-bool dvmCompilerGenArithOpDouble(CompilationUnit *cUnit, MIR *mir, int vDest,
-                         int vSrc1, int vSrc2)
-{
-    return dvmCompilerGenArithOpDoublePortable(cUnit, mir, vDest, vSrc1, vSrc2);
-}
-
-bool dvmCompilerGenCmpX(CompilationUnit *cUnit, MIR *mir, int vDest,
-                                int vSrc1, int vSrc2)
-{
-    /*
-     * Don't attempt to optimize register usage since these opcodes call out to
-     * the handlers.
-     */
-    switch (mir->dalvikInsn.opCode) {
-        case OP_CMPL_FLOAT:
-            dvmCompilerLoadValue(cUnit, vSrc1, r0);
-            dvmCompilerLoadValue(cUnit, vSrc2, r1);
-            dvmCompilerGenDispatchToHandler(cUnit, TEMPLATE_CMPL_FLOAT);
-            dvmCompilerStoreValue(cUnit, r0, vDest, r1);
-            break;
-        case OP_CMPG_FLOAT:
-            dvmCompilerLoadValue(cUnit, vSrc1, r0);
-            dvmCompilerLoadValue(cUnit, vSrc2, r1);
-            dvmCompilerGenDispatchToHandler(cUnit, TEMPLATE_CMPG_FLOAT);
-            dvmCompilerStoreValue(cUnit, r0, vDest, r1);
-            break;
-        case OP_CMPL_DOUBLE:
-            dvmCompilerLoadValueAddress(cUnit, vSrc1, r0);
-            dvmCompilerLoadValueAddress(cUnit, vSrc2, r1);
-            dvmCompilerGenDispatchToHandler(cUnit, TEMPLATE_CMPL_DOUBLE);
-            dvmCompilerStoreValue(cUnit, r0, vDest, r1);
-            break;
-        case OP_CMPG_DOUBLE:
-            dvmCompilerLoadValueAddress(cUnit, vSrc1, r0);
-            dvmCompilerLoadValueAddress(cUnit, vSrc2, r1);
-            dvmCompilerGenDispatchToHandler(cUnit, TEMPLATE_CMPG_DOUBLE);
-            dvmCompilerStoreValue(cUnit, r0, vDest, r1);
-            break;
-        default:
-            return true;
-    }
-    return false;
-}
diff --git a/vm/compiler/codegen/armv5te/armv5te-vfp/ArchVariant.c b/vm/compiler/codegen/armv5te/armv5te-vfp/ArchVariant.c
new file mode 100644
index 000000000..4c978979e
--- /dev/null
+++ b/vm/compiler/codegen/armv5te/armv5te-vfp/ArchVariant.c
@@ -0,0 +1,282 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * This file is included by Codegen-armv5te-vfp.c, and implements architecture
+ * variant-specific code.
+ */
+
+#define USE_IN_CACHE_HANDLER 1
+
+/*
+ * Jump to the out-of-line handler in ARM mode to finish executing the
+ * remaining of more complex instructions.
+ */
+static void genDispatchToHandler(CompilationUnit *cUnit, TemplateOpCode opCode)
+{
+#if USE_IN_CACHE_HANDLER
+    /*
+     * NOTE - In practice BLX only needs one operand, but since the assembler
+     * may abort itself and retry due to other out-of-range conditions we
+     * cannot really use operand[0] to store the absolute target address since
+     * it may get clobbered by the final relative offset. Therefore,
+     * we fake BLX_1 is a two operand instruction and the absolute target
+     * address is stored in operand[1].
+     */
+    newLIR2(cUnit, ARMV5TE_BLX_1,
+            (int) gDvmJit.codeCache + templateEntryOffsets[opCode],
+            (int) gDvmJit.codeCache + templateEntryOffsets[opCode]);
+    newLIR2(cUnit, ARMV5TE_BLX_2,
+            (int) gDvmJit.codeCache + templateEntryOffsets[opCode],
+            (int) gDvmJit.codeCache + templateEntryOffsets[opCode]);
+#else
+    /*
+     * In case we want to access the statically compiled handlers for
+     * debugging purposes, define USE_IN_CACHE_HANDLER to 0
+     */
+    void *templatePtr;
+
+#define JIT_TEMPLATE(X) extern void dvmCompiler_TEMPLATE_##X();
+#include "../../../template/armv5te-vfp/TemplateOpList.h"
+#undef JIT_TEMPLATE
+    switch (opCode) {
+#define JIT_TEMPLATE(X) \
+        case TEMPLATE_##X: { templatePtr = dvmCompiler_TEMPLATE_##X; break; }
+#include "../../../template/armv5te-vfp/TemplateOpList.h"
+#undef JIT_TEMPLATE
+        default: templatePtr = NULL;
+    }
+    loadConstant(cUnit, r7, (int) templatePtr);
+    newLIR1(cUnit, ARMV5TE_BLX_R, r7);
+#endif
+}
+
+/* Architecture-specific initializations and checks go here */
+bool dvmCompilerArchInit(void)
+{
+    /* First, declare dvmCompiler_TEMPLATE_XXX for each template */
+#define JIT_TEMPLATE(X) extern void dvmCompiler_TEMPLATE_##X();
+#include "../../../template/armv5te-vfp/TemplateOpList.h"
+#undef JIT_TEMPLATE
+
+    int i = 0;
+    extern void dvmCompilerTemplateStart(void);
+
+    /*
+     * Then, populate the templateEntryOffsets array with the offsets from the
+     * the dvmCompilerTemplateStart symbol for each template.
+     */
+#define JIT_TEMPLATE(X) templateEntryOffsets[i++] = \
+    (intptr_t) dvmCompiler_TEMPLATE_##X - (intptr_t) dvmCompilerTemplateStart;
+#include "../../../template/armv5te-vfp/TemplateOpList.h"
+#undef JIT_TEMPLATE
+
+    /* Codegen-specific assumptions */
+    assert(offsetof(ClassObject, vtable) < 128 &&
+           (offsetof(ClassObject, vtable) & 0x3) == 0);
+    assert(offsetof(ArrayObject, length) < 128 &&
+           (offsetof(ArrayObject, length) & 0x3) == 0);
+    assert(offsetof(ArrayObject, contents) < 256);
+
+    /* Up to 5 args are pushed on top of FP - sizeofStackSaveArea */
+    assert(sizeof(StackSaveArea) < 236);
+
+    /*
+     * EA is calculated by doing "Rn + imm5 << 2", and there are 5 entry points
+     * that codegen may access, make sure that the offset from the top of the
+     * struct is less than 108.
+     */
+    assert(offsetof(InterpState, jitToInterpEntries) < 108);
+    return true;
+}
+
+static bool genInlineSqrt(CompilationUnit *cUnit, MIR *mir)
+{
+    int offset = offsetof(InterpState, retval);
+    OpCode opCode = mir->dalvikInsn.opCode;
+    int vSrc = mir->dalvikInsn.vA;
+    loadValueAddress(cUnit, vSrc, r2);
+    genDispatchToHandler(cUnit, TEMPLATE_SQRT_DOUBLE_VFP);
+    newLIR3(cUnit, ARMV5TE_STR_RRI5, r0, rGLUE, offset >> 2);
+    newLIR3(cUnit, ARMV5TE_STR_RRI5, r1, rGLUE, (offset >> 2) + 1);
+    return false;
+}
+
+static bool genInlineCos(CompilationUnit *cUnit, MIR *mir)
+{
+    return false;
+}
+
+static bool genInlineSin(CompilationUnit *cUnit, MIR *mir)
+{
+    return false;
+}
+
+static bool genArithOpFloat(CompilationUnit *cUnit, MIR *mir, int vDest,
+                                int vSrc1, int vSrc2)
+{
+    TemplateOpCode opCode;
+
+    /*
+     * Don't attempt to optimize register usage since these opcodes call out to
+     * the handlers.
+     */
+    switch (mir->dalvikInsn.opCode) {
+        case OP_ADD_FLOAT_2ADDR:
+        case OP_ADD_FLOAT:
+            opCode = TEMPLATE_ADD_FLOAT_VFP;
+            break;
+        case OP_SUB_FLOAT_2ADDR:
+        case OP_SUB_FLOAT:
+            opCode = TEMPLATE_SUB_FLOAT_VFP;
+            break;
+        case OP_DIV_FLOAT_2ADDR:
+        case OP_DIV_FLOAT:
+            opCode = TEMPLATE_DIV_FLOAT_VFP;
+            break;
+        case OP_MUL_FLOAT_2ADDR:
+        case OP_MUL_FLOAT:
+            opCode = TEMPLATE_MUL_FLOAT_VFP;
+            break;
+        case OP_REM_FLOAT_2ADDR:
+        case OP_REM_FLOAT:
+        case OP_NEG_FLOAT: {
+            return genArithOpFloatPortable(cUnit, mir, vDest,
+                                                      vSrc1, vSrc2);
+        }
+        default:
+            return true;
+    }
+    loadValueAddress(cUnit, vDest, r0);
+    loadValueAddress(cUnit, vSrc1, r1);
+    loadValueAddress(cUnit, vSrc2, r2);
+    genDispatchToHandler(cUnit, opCode);
+    return false;
+}
+
+static bool genArithOpDouble(CompilationUnit *cUnit, MIR *mir, int vDest,
+                             int vSrc1, int vSrc2)
+{
+    TemplateOpCode opCode;
+
+    /*
+     * Don't attempt to optimize register usage since these opcodes call out to
+     * the handlers.
+     */
+    switch (mir->dalvikInsn.opCode) {
+        case OP_ADD_DOUBLE_2ADDR:
+        case OP_ADD_DOUBLE:
+            opCode = TEMPLATE_ADD_DOUBLE_VFP;
+            break;
+        case OP_SUB_DOUBLE_2ADDR:
+        case OP_SUB_DOUBLE:
+            opCode = TEMPLATE_SUB_DOUBLE_VFP;
+            break;
+        case OP_DIV_DOUBLE_2ADDR:
+        case OP_DIV_DOUBLE:
+            opCode = TEMPLATE_DIV_DOUBLE_VFP;
+            break;
+        case OP_MUL_DOUBLE_2ADDR:
+        case OP_MUL_DOUBLE:
+            opCode = TEMPLATE_MUL_DOUBLE_VFP;
+            break;
+        case OP_REM_DOUBLE_2ADDR:
+        case OP_REM_DOUBLE:
+        case OP_NEG_DOUBLE: {
+            return genArithOpDoublePortable(cUnit, mir, vDest,
+                                                       vSrc1, vSrc2);
+        }
+        default:
+            return true;
+    }
+    loadValueAddress(cUnit, vDest, r0);
+    loadValueAddress(cUnit, vSrc1, r1);
+    loadValueAddress(cUnit, vSrc2, r2);
+    genDispatchToHandler(cUnit, opCode);
+    return false;
+}
+
+static bool genConversion(CompilationUnit *cUnit, MIR *mir)
+{
+    OpCode opCode = mir->dalvikInsn.opCode;
+    int vSrc1Dest = mir->dalvikInsn.vA;
+    int vSrc2 = mir->dalvikInsn.vB;
+    TemplateOpCode template;
+
+    switch (opCode) {
+        case OP_INT_TO_FLOAT:
+            template = TEMPLATE_INT_TO_FLOAT_VFP;
+            break;
+        case OP_FLOAT_TO_INT:
+            template = TEMPLATE_FLOAT_TO_INT_VFP;
+            break;
+        case OP_DOUBLE_TO_FLOAT:
+            template = TEMPLATE_DOUBLE_TO_FLOAT_VFP;
+            break;
+        case OP_FLOAT_TO_DOUBLE:
+            template = TEMPLATE_FLOAT_TO_DOUBLE_VFP;
+            break;
+        case OP_INT_TO_DOUBLE:
+            template = TEMPLATE_INT_TO_DOUBLE_VFP;
+            break;
+        case OP_DOUBLE_TO_INT:
+            template = TEMPLATE_DOUBLE_TO_INT_VFP;
+            break;
+        case OP_FLOAT_TO_LONG:
+        case OP_LONG_TO_FLOAT:
+        case OP_DOUBLE_TO_LONG:
+        case OP_LONG_TO_DOUBLE:
+            return genConversionPortable(cUnit, mir);
+        default:
+            return true;
+    }
+    loadValueAddress(cUnit, vSrc1Dest, r0);
+    loadValueAddress(cUnit, vSrc2, r1);
+    genDispatchToHandler(cUnit, template);
+    return false;
+}
+
+static bool genCmpX(CompilationUnit *cUnit, MIR *mir, int vDest, int vSrc1,
+                    int vSrc2)
+{
+    TemplateOpCode template;
+
+    /*
+     * Don't attempt to optimize register usage since these opcodes call out to
+     * the handlers.
+     */
+    switch(mir->dalvikInsn.opCode) {
+        case OP_CMPL_FLOAT:
+            template = TEMPLATE_CMPL_FLOAT_VFP;
+            break;
+        case OP_CMPG_FLOAT:
+            template = TEMPLATE_CMPG_FLOAT_VFP;
+            break;
+        case OP_CMPL_DOUBLE:
+            template = TEMPLATE_CMPL_DOUBLE_VFP;
+            break;
+        case OP_CMPG_DOUBLE:
+            template = TEMPLATE_CMPG_DOUBLE_VFP;
+            break;
+        default:
+            return true;
+    }
+    loadValueAddress(cUnit, vSrc1, r0);
+    loadValueAddress(cUnit, vSrc2, r1);
+    genDispatchToHandler(cUnit, template);
+    storeValue(cUnit, r0, vDest, r1);
+    return false;
+}
diff --git a/vm/compiler/codegen/armv5te/armv5te-vfp/ArchVariant.h b/vm/compiler/codegen/armv5te/armv5te-vfp/ArchVariant.h
new file mode 100644
index 000000000..c444bc989
--- /dev/null
+++ b/vm/compiler/codegen/armv5te/armv5te-vfp/ArchVariant.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _DALVIK_VM_COMPILER_CODEGEN_ARMV5TE_ARMV5TE_VFP_ARCHVARIANT_H
+#define _DALVIK_VM_COMPILER_CODEGEN_ARMV5TE_ARMV5TE_VFP_ARCHVARIANT_H
+
+/* Create the TemplateOpcode enum */
+#define JIT_TEMPLATE(X) TEMPLATE_##X,
+typedef enum {
+#include "../../../template/armv5te-vfp/TemplateOpList.h"
+/*
+ * For example,
+ *     TEMPLATE_CMP_LONG,
+ *     TEMPLATE_RETURN,
+ *     ...
+ */
+    TEMPLATE_LAST_MARK,
+} TemplateOpCode;
+#undef JIT_TEMPLATE
+
+#endif /* _DALVIK_VM_COMPILER_CODEGEN_ARMV5TE_ARMV5TE_VFP_ARCHVARIANT_H */
diff --git a/vm/compiler/codegen/armv5te/armv5te/ArchVariant.c b/vm/compiler/codegen/armv5te/armv5te/ArchVariant.c
new file mode 100644
index 000000000..3d9d0141c
--- /dev/null
+++ b/vm/compiler/codegen/armv5te/armv5te/ArchVariant.c
@@ -0,0 +1,174 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * This file is included by Codegen-armv5te.c, and implements architecture
+ * variant-specific code.
+ */
+
+#define USE_IN_CACHE_HANDLER 1
+
+/*
+ * Jump to the out-of-line handler in ARM mode to finish executing the
+ * remaining of more complex instructions.
+ */
+static void genDispatchToHandler(CompilationUnit *cUnit, TemplateOpCode opCode)
+{
+#if USE_IN_CACHE_HANDLER
+    /*
+     * NOTE - In practice BLX only needs one operand, but since the assembler
+     * may abort itself and retry due to other out-of-range conditions we
+     * cannot really use operand[0] to store the absolute target address since
+     * it may get clobbered by the final relative offset. Therefore,
+     * we fake BLX_1 is a two operand instruction and the absolute target
+     * address is stored in operand[1].
+     */
+    newLIR2(cUnit, ARMV5TE_BLX_1,
+            (int) gDvmJit.codeCache + templateEntryOffsets[opCode],
+            (int) gDvmJit.codeCache + templateEntryOffsets[opCode]);
+    newLIR2(cUnit, ARMV5TE_BLX_2,
+            (int) gDvmJit.codeCache + templateEntryOffsets[opCode],
+            (int) gDvmJit.codeCache + templateEntryOffsets[opCode]);
+#else
+    /*
+     * In case we want to access the statically compiled handlers for
+     * debugging purposes, define USE_IN_CACHE_HANDLER to 0
+     */
+    void *templatePtr;
+
+#define JIT_TEMPLATE(X) extern void dvmCompiler_TEMPLATE_##X();
+#include "../../../template/armv5te/TemplateOpList.h"
+#undef JIT_TEMPLATE
+    switch (opCode) {
+#define JIT_TEMPLATE(X) \
+        case TEMPLATE_##X: { templatePtr = dvmCompiler_TEMPLATE_##X; break; }
+#include "../../../template/armv5te/TemplateOpList.h"
+#undef JIT_TEMPLATE
+        default: templatePtr = NULL;
+    }
+    loadConstant(cUnit, r7, (int) templatePtr);
+    newLIR1(cUnit, ARMV5TE_BLX_R, r7);
+#endif
+}
+
+/* Architecture-specific initializations and checks go here */
+bool dvmCompilerArchInit(void)
+{
+    /* First, declare dvmCompiler_TEMPLATE_XXX for each template */
+#define JIT_TEMPLATE(X) extern void dvmCompiler_TEMPLATE_##X();
+#include "../../../template/armv5te/TemplateOpList.h"
+#undef JIT_TEMPLATE
+
+    int i = 0;
+    extern void dvmCompilerTemplateStart(void);
+
+    /*
+     * Then, populate the templateEntryOffsets array with the offsets from the
+     * the dvmCompilerTemplateStart symbol for each template.
+     */
+#define JIT_TEMPLATE(X) templateEntryOffsets[i++] = \
+    (intptr_t) dvmCompiler_TEMPLATE_##X - (intptr_t) dvmCompilerTemplateStart;
+#include "../../../template/armv5te/TemplateOpList.h"
+#undef JIT_TEMPLATE
+
+    /* Codegen-specific assumptions */
+    assert(offsetof(ClassObject, vtable) < 128 &&
+           (offsetof(ClassObject, vtable) & 0x3) == 0);
+    assert(offsetof(ArrayObject, length) < 128 &&
+           (offsetof(ArrayObject, length) & 0x3) == 0);
+    assert(offsetof(ArrayObject, contents) < 256);
+
+    /* Up to 5 args are pushed on top of FP - sizeofStackSaveArea */
+    assert(sizeof(StackSaveArea) < 236);
+
+    /*
+     * EA is calculated by doing "Rn + imm5 << 2", and there are 5 entry points
+     * that codegen may access, make sure that the offset from the top of the
+     * struct is less than 108.
+     */
+    assert(offsetof(InterpState, jitToInterpEntries) < 108);
+    return true;
+}
+
+static bool genInlineSqrt(CompilationUnit *cUnit, MIR *mir)
+{
+    return false;   /* punt to C handler */
+}
+
+static bool genInlineCos(CompilationUnit *cUnit, MIR *mir)
+{
+    return false;   /* punt to C handler */
+}
+
+static bool genInlineSin(CompilationUnit *cUnit, MIR *mir)
+{
+    return false;   /* punt to C handler */
+}
+
+static bool genConversion(CompilationUnit *cUnit, MIR *mir)
+{
+    return genConversionPortable(cUnit, mir);
+}
+
+static bool genArithOpFloat(CompilationUnit *cUnit, MIR *mir, int vDest,
+                        int vSrc1, int vSrc2)
+{
+    return genArithOpFloatPortable(cUnit, mir, vDest, vSrc1, vSrc2);
+}
+
+static bool genArithOpDouble(CompilationUnit *cUnit, MIR *mir, int vDest,
+                      int vSrc1, int vSrc2)
+{
+    return genArithOpDoublePortable(cUnit, mir, vDest, vSrc1, vSrc2);
+}
+
+static bool genCmpX(CompilationUnit *cUnit, MIR *mir, int vDest, int vSrc1,
+                    int vSrc2)
+{
+    /*
+     * Don't attempt to optimize register usage since these opcodes call out to
+     * the handlers.
+     */
+    switch (mir->dalvikInsn.opCode) {
+        case OP_CMPL_FLOAT:
+            loadValue(cUnit, vSrc1, r0);
+            loadValue(cUnit, vSrc2, r1);
+            genDispatchToHandler(cUnit, TEMPLATE_CMPL_FLOAT);
+            storeValue(cUnit, r0, vDest, r1);
+            break;
+        case OP_CMPG_FLOAT:
+            loadValue(cUnit, vSrc1, r0);
+            loadValue(cUnit, vSrc2, r1);
+            genDispatchToHandler(cUnit, TEMPLATE_CMPG_FLOAT);
+            storeValue(cUnit, r0, vDest, r1);
+            break;
+        case OP_CMPL_DOUBLE:
+            loadValueAddress(cUnit, vSrc1, r0);
+            loadValueAddress(cUnit, vSrc2, r1);
+            genDispatchToHandler(cUnit, TEMPLATE_CMPL_DOUBLE);
+            storeValue(cUnit, r0, vDest, r1);
+            break;
+        case OP_CMPG_DOUBLE:
+            loadValueAddress(cUnit, vSrc1, r0);
+            loadValueAddress(cUnit, vSrc2, r1);
+            genDispatchToHandler(cUnit, TEMPLATE_CMPG_DOUBLE);
+            storeValue(cUnit, r0, vDest, r1);
+            break;
+        default:
+            return true;
+    }
+    return false;
+}
diff --git a/vm/compiler/codegen/armv5te/armv5te/ArchVariant.h b/vm/compiler/codegen/armv5te/armv5te/ArchVariant.h
new file mode 100644
index 000000000..23f248385
--- /dev/null
+++ b/vm/compiler/codegen/armv5te/armv5te/ArchVariant.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _DALVIK_VM_COMPILER_CODEGEN_ARMV5TE_ARMV5TE_ARCHVARIANT_H
+#define _DALVIK_VM_COMPILER_CODEGEN_ARMV5TE_ARMV5TE_ARCHVARIANT_H
+
+/* Create the TemplateOpcode enum */
+#define JIT_TEMPLATE(X) TEMPLATE_##X,
+typedef enum {
+#include "../../../template/armv5te/TemplateOpList.h"
+/*
+ * For example,
+ *     TEMPLATE_CMP_LONG,
+ *     TEMPLATE_RETURN,
+ *     ...
+ */
+    TEMPLATE_LAST_MARK,
+} TemplateOpCode;
+#undef JIT_TEMPLATE
+
+#endif /* _DALVIK_VM_COMPILER_CODEGEN_ARMV5TE_ARMV5TE_ARCHVARIANT_H */
diff --git a/vm/compiler/template/armv5te-vfp/TEMPLATE_ADD_DOUBLE_VFP.S b/vm/compiler/template/armv5te-vfp/TEMPLATE_ADD_DOUBLE_VFP.S
new file mode 100644
index 000000000..51693fa0e
--- /dev/null
+++ b/vm/compiler/template/armv5te-vfp/TEMPLATE_ADD_DOUBLE_VFP.S
@@ -0,0 +1,2 @@
+%verify "executed"
+%include "armv5te-vfp/fbinopWide.S" {"instr":"faddd   d2, d0, d1"}
diff --git a/vm/compiler/template/armv5te-vfp/TEMPLATE_ADD_FLOAT_VFP.S b/vm/compiler/template/armv5te-vfp/TEMPLATE_ADD_FLOAT_VFP.S
new file mode 100644
index 000000000..ad1e12211
--- /dev/null
+++ b/vm/compiler/template/armv5te-vfp/TEMPLATE_ADD_FLOAT_VFP.S
@@ -0,0 +1,2 @@
+%verify "executed"
+%include "armv5te-vfp/fbinop.S" {"instr":"fadds   s2, s0, s1"}
diff --git a/vm/compiler/template/armv5te/TEMPLATE_CMPG_DOUBLE_VFP.S b/vm/compiler/template/armv5te-vfp/TEMPLATE_CMPG_DOUBLE_VFP.S
similarity index 100%
rename from vm/compiler/template/armv5te/TEMPLATE_CMPG_DOUBLE_VFP.S
rename to vm/compiler/template/armv5te-vfp/TEMPLATE_CMPG_DOUBLE_VFP.S
diff --git a/vm/compiler/template/armv5te/TEMPLATE_CMPG_FLOAT_VFP.S b/vm/compiler/template/armv5te-vfp/TEMPLATE_CMPG_FLOAT_VFP.S
similarity index 100%
rename from vm/compiler/template/armv5te/TEMPLATE_CMPG_FLOAT_VFP.S
rename to vm/compiler/template/armv5te-vfp/TEMPLATE_CMPG_FLOAT_VFP.S
diff --git a/vm/compiler/template/armv5te/TEMPLATE_CMPL_DOUBLE_VFP.S b/vm/compiler/template/armv5te-vfp/TEMPLATE_CMPL_DOUBLE_VFP.S
similarity index 100%
rename from vm/compiler/template/armv5te/TEMPLATE_CMPL_DOUBLE_VFP.S
rename to vm/compiler/template/armv5te-vfp/TEMPLATE_CMPL_DOUBLE_VFP.S
diff --git a/vm/compiler/template/armv5te/TEMPLATE_CMPL_FLOAT_VFP.S b/vm/compiler/template/armv5te-vfp/TEMPLATE_CMPL_FLOAT_VFP.S
similarity index 100%
rename from vm/compiler/template/armv5te/TEMPLATE_CMPL_FLOAT_VFP.S
rename to vm/compiler/template/armv5te-vfp/TEMPLATE_CMPL_FLOAT_VFP.S
diff --git a/vm/compiler/template/armv5te-vfp/TEMPLATE_DIV_DOUBLE_VFP.S b/vm/compiler/template/armv5te-vfp/TEMPLATE_DIV_DOUBLE_VFP.S
new file mode 100644
index 000000000..8fa58b86a
--- /dev/null
+++ b/vm/compiler/template/armv5te-vfp/TEMPLATE_DIV_DOUBLE_VFP.S
@@ -0,0 +1,2 @@
+%verify "executed"
+%include "armv5te-vfp/fbinopWide.S" {"instr":"fdivd   d2, d0, d1"}
diff --git a/vm/compiler/template/armv5te-vfp/TEMPLATE_DIV_FLOAT_VFP.S b/vm/compiler/template/armv5te-vfp/TEMPLATE_DIV_FLOAT_VFP.S
new file mode 100644
index 000000000..fc125ce6a
--- /dev/null
+++ b/vm/compiler/template/armv5te-vfp/TEMPLATE_DIV_FLOAT_VFP.S
@@ -0,0 +1,2 @@
+%verify "executed"
+%include "armv5te-vfp/fbinop.S" {"instr":"fdivs   s2, s0, s1"}
diff --git a/vm/compiler/template/armv5te-vfp/TEMPLATE_DOUBLE_TO_FLOAT_VFP.S b/vm/compiler/template/armv5te-vfp/TEMPLATE_DOUBLE_TO_FLOAT_VFP.S
new file mode 100644
index 000000000..dba3b082f
--- /dev/null
+++ b/vm/compiler/template/armv5te-vfp/TEMPLATE_DOUBLE_TO_FLOAT_VFP.S
@@ -0,0 +1,2 @@
+%verify "executed"
+%include "armv5te-vfp/funopNarrower.S" {"instr":"fcvtsd  s0, d0"}
diff --git a/vm/compiler/template/armv5te-vfp/TEMPLATE_DOUBLE_TO_INT_VFP.S b/vm/compiler/template/armv5te-vfp/TEMPLATE_DOUBLE_TO_INT_VFP.S
new file mode 100644
index 000000000..4d910aadd
--- /dev/null
+++ b/vm/compiler/template/armv5te-vfp/TEMPLATE_DOUBLE_TO_INT_VFP.S
@@ -0,0 +1,2 @@
+%verify "executed"
+%include "armv5te-vfp/funopNarrower.S" {"instr":"ftosizd  s0, d0"}
diff --git a/vm/compiler/template/armv5te-vfp/TEMPLATE_FLOAT_TO_DOUBLE_VFP.S b/vm/compiler/template/armv5te-vfp/TEMPLATE_FLOAT_TO_DOUBLE_VFP.S
new file mode 100644
index 000000000..a5157dd8b
--- /dev/null
+++ b/vm/compiler/template/armv5te-vfp/TEMPLATE_FLOAT_TO_DOUBLE_VFP.S
@@ -0,0 +1,2 @@
+%verify "executed"
+%include "armv5te-vfp/funopWider.S" {"instr":"fcvtds  d0, s0"}
diff --git a/vm/compiler/template/armv5te-vfp/TEMPLATE_FLOAT_TO_INT_VFP.S b/vm/compiler/template/armv5te-vfp/TEMPLATE_FLOAT_TO_INT_VFP.S
new file mode 100644
index 000000000..90900aa1b
--- /dev/null
+++ b/vm/compiler/template/armv5te-vfp/TEMPLATE_FLOAT_TO_INT_VFP.S
@@ -0,0 +1,2 @@
+%verify "executed"
+%include "armv5te-vfp/funop.S" {"instr":"ftosizs s1, s0"}
diff --git a/vm/compiler/template/armv5te-vfp/TEMPLATE_INT_TO_DOUBLE_VFP.S b/vm/compiler/template/armv5te-vfp/TEMPLATE_INT_TO_DOUBLE_VFP.S
new file mode 100644
index 000000000..c9f4fd688
--- /dev/null
+++ b/vm/compiler/template/armv5te-vfp/TEMPLATE_INT_TO_DOUBLE_VFP.S
@@ -0,0 +1,2 @@
+%verify "executed"
+%include "armv5te-vfp/funopWider.S" {"instr":"fsitod  d0, s0"}
diff --git a/vm/compiler/template/armv5te-vfp/TEMPLATE_INT_TO_FLOAT_VFP.S b/vm/compiler/template/armv5te-vfp/TEMPLATE_INT_TO_FLOAT_VFP.S
new file mode 100644
index 000000000..a8f57b505
--- /dev/null
+++ b/vm/compiler/template/armv5te-vfp/TEMPLATE_INT_TO_FLOAT_VFP.S
@@ -0,0 +1,2 @@
+%verify "executed"
+%include "armv5te-vfp/funop.S" {"instr":"fsitos  s1, s0"}
diff --git a/vm/compiler/template/armv5te-vfp/TEMPLATE_MUL_DOUBLE_VFP.S b/vm/compiler/template/armv5te-vfp/TEMPLATE_MUL_DOUBLE_VFP.S
new file mode 100644
index 000000000..459e7960e
--- /dev/null
+++ b/vm/compiler/template/armv5te-vfp/TEMPLATE_MUL_DOUBLE_VFP.S
@@ -0,0 +1,2 @@
+%verify "executed"
+%include "armv5te-vfp/fbinopWide.S" {"instr":"fmuld   d2, d0, d1"}
diff --git a/vm/compiler/template/armv5te-vfp/TEMPLATE_MUL_FLOAT_VFP.S b/vm/compiler/template/armv5te-vfp/TEMPLATE_MUL_FLOAT_VFP.S
new file mode 100644
index 000000000..301fa8436
--- /dev/null
+++ b/vm/compiler/template/armv5te-vfp/TEMPLATE_MUL_FLOAT_VFP.S
@@ -0,0 +1,2 @@
+%verify "executed"
+%include "armv5te-vfp/fbinop.S" {"instr":"fmuls   s2, s0, s1"}
diff --git a/vm/compiler/template/armv5te-vfp/TEMPLATE_SQRT_DOUBLE_VFP.S b/vm/compiler/template/armv5te-vfp/TEMPLATE_SQRT_DOUBLE_VFP.S
new file mode 100644
index 000000000..1c6bb467b
--- /dev/null
+++ b/vm/compiler/template/armv5te-vfp/TEMPLATE_SQRT_DOUBLE_VFP.S
@@ -0,0 +1,23 @@
+%verify "executed"
+    /*
+     * 64-bit floating point vfp sqrt operation.
+     * If the result is a NaN, bail out to library code to do
+     * the right thing.
+     *
+     * On entry:
+     *     r2 src addr of op1
+     * On exit:
+     *     r0,r1 = res
+     */
+    fldd    d0, [r2]
+    fsqrtd  d1, d0
+    fcmpd   d1, d1
+    fmstat
+    fmrrd   r0, r1, d1
+    bxeq    lr   @ Result OK - return
+    ldr     r2, .Lsqrt
+    fmrrd   r0, r1, d0   @ reload orig operand
+    bx      r2   @ tail call to sqrt library routine
+
+.Lsqrt:
+    .word   sqrt
diff --git a/vm/compiler/template/armv5te-vfp/TEMPLATE_SUB_DOUBLE_VFP.S b/vm/compiler/template/armv5te-vfp/TEMPLATE_SUB_DOUBLE_VFP.S
new file mode 100644
index 000000000..8fa20a028
--- /dev/null
+++ b/vm/compiler/template/armv5te-vfp/TEMPLATE_SUB_DOUBLE_VFP.S
@@ -0,0 +1,2 @@
+%verify "executed"
+%include "armv5te-vfp/fbinopWide.S" {"instr":"fsubd   d2, d0, d1"}
diff --git a/vm/compiler/template/armv5te-vfp/TEMPLATE_SUB_FLOAT_VFP.S b/vm/compiler/template/armv5te-vfp/TEMPLATE_SUB_FLOAT_VFP.S
new file mode 100644
index 000000000..5e17e514c
--- /dev/null
+++ b/vm/compiler/template/armv5te-vfp/TEMPLATE_SUB_FLOAT_VFP.S
@@ -0,0 +1,2 @@
+%verify "executed"
+%include "armv5te-vfp/fbinop.S" {"instr":"fsubs   s2, s0, s1"}
diff --git a/vm/compiler/template/armv5te-vfp/TemplateOpList.h b/vm/compiler/template/armv5te-vfp/TemplateOpList.h
new file mode 100644
index 000000000..c95163c3c
--- /dev/null
+++ b/vm/compiler/template/armv5te-vfp/TemplateOpList.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Dalvik opcode list that uses additional templates to complete JIT execution.
+ */
+#ifndef JIT_TEMPLATE
+#define JIT_TEMPLATE(X)
+#endif
+
+JIT_TEMPLATE(CMP_LONG)
+JIT_TEMPLATE(RETURN)
+JIT_TEMPLATE(INVOKE_METHOD_NO_OPT)
+JIT_TEMPLATE(INVOKE_METHOD_CHAIN)
+JIT_TEMPLATE(INVOKE_METHOD_PREDICTED_CHAIN)
+JIT_TEMPLATE(INVOKE_METHOD_NATIVE)
+JIT_TEMPLATE(MUL_LONG)
+JIT_TEMPLATE(SHL_LONG)
+JIT_TEMPLATE(SHR_LONG)
+JIT_TEMPLATE(USHR_LONG)
+JIT_TEMPLATE(ADD_FLOAT_VFP)
+JIT_TEMPLATE(SUB_FLOAT_VFP)
+JIT_TEMPLATE(MUL_FLOAT_VFP)
+JIT_TEMPLATE(DIV_FLOAT_VFP)
+JIT_TEMPLATE(ADD_DOUBLE_VFP)
+JIT_TEMPLATE(SUB_DOUBLE_VFP)
+JIT_TEMPLATE(MUL_DOUBLE_VFP)
+JIT_TEMPLATE(DIV_DOUBLE_VFP)
+JIT_TEMPLATE(DOUBLE_TO_FLOAT_VFP)
+JIT_TEMPLATE(DOUBLE_TO_INT_VFP)
+JIT_TEMPLATE(FLOAT_TO_DOUBLE_VFP)
+JIT_TEMPLATE(FLOAT_TO_INT_VFP)
+JIT_TEMPLATE(INT_TO_DOUBLE_VFP)
+JIT_TEMPLATE(INT_TO_FLOAT_VFP)
+JIT_TEMPLATE(CMPG_DOUBLE_VFP)
+JIT_TEMPLATE(CMPL_DOUBLE_VFP)
+JIT_TEMPLATE(CMPG_FLOAT_VFP)
+JIT_TEMPLATE(CMPL_FLOAT_VFP)
+JIT_TEMPLATE(SQRT_DOUBLE_VFP)
diff --git a/vm/compiler/template/armv5te/fbinop.S b/vm/compiler/template/armv5te-vfp/fbinop.S
similarity index 100%
rename from vm/compiler/template/armv5te/fbinop.S
rename to vm/compiler/template/armv5te-vfp/fbinop.S
diff --git a/vm/compiler/template/armv5te/fbinopWide.S b/vm/compiler/template/armv5te-vfp/fbinopWide.S
similarity index 100%
rename from vm/compiler/template/armv5te/fbinopWide.S
rename to vm/compiler/template/armv5te-vfp/fbinopWide.S
diff --git a/vm/compiler/template/armv5te/funop.S b/vm/compiler/template/armv5te-vfp/funop.S
similarity index 100%
rename from vm/compiler/template/armv5te/funop.S
rename to vm/compiler/template/armv5te-vfp/funop.S
diff --git a/vm/compiler/template/armv5te/funopNarrower.S b/vm/compiler/template/armv5te-vfp/funopNarrower.S
similarity index 100%
rename from vm/compiler/template/armv5te/funopNarrower.S
rename to vm/compiler/template/armv5te-vfp/funopNarrower.S
diff --git a/vm/compiler/template/armv5te/funopWider.S b/vm/compiler/template/armv5te-vfp/funopWider.S
similarity index 100%
rename from vm/compiler/template/armv5te/funopWider.S
rename to vm/compiler/template/armv5te-vfp/funopWider.S
diff --git a/vm/compiler/template/armv5te/TEMPLATE_ADD_DOUBLE_VFP.S b/vm/compiler/template/armv5te/TEMPLATE_ADD_DOUBLE_VFP.S
deleted file mode 100644
index 7b4fa0153..000000000
--- a/vm/compiler/template/armv5te/TEMPLATE_ADD_DOUBLE_VFP.S
+++ /dev/null
@@ -1,2 +0,0 @@
-%verify "executed"
-%include "armv5te/fbinopWide.S" {"instr":"faddd   d2, d0, d1"}
diff --git a/vm/compiler/template/armv5te/TEMPLATE_ADD_FLOAT_VFP.S b/vm/compiler/template/armv5te/TEMPLATE_ADD_FLOAT_VFP.S
deleted file mode 100644
index 6e8077c24..000000000
--- a/vm/compiler/template/armv5te/TEMPLATE_ADD_FLOAT_VFP.S
+++ /dev/null
@@ -1,2 +0,0 @@
-%verify "executed"
-%include "armv5te/fbinop.S" {"instr":"fadds   s2, s0, s1"}
diff --git a/vm/compiler/template/armv5te/TEMPLATE_DIV_DOUBLE_VFP.S b/vm/compiler/template/armv5te/TEMPLATE_DIV_DOUBLE_VFP.S
deleted file mode 100644
index 796275a0a..000000000
--- a/vm/compiler/template/armv5te/TEMPLATE_DIV_DOUBLE_VFP.S
+++ /dev/null
@@ -1,2 +0,0 @@
-%verify "executed"
-%include "armv5te/fbinopWide.S" {"instr":"fdivd   d2, d0, d1"}
diff --git a/vm/compiler/template/armv5te/TEMPLATE_DIV_FLOAT_VFP.S b/vm/compiler/template/armv5te/TEMPLATE_DIV_FLOAT_VFP.S
deleted file mode 100644
index 5895b93b7..000000000
--- a/vm/compiler/template/armv5te/TEMPLATE_DIV_FLOAT_VFP.S
+++ /dev/null
@@ -1,2 +0,0 @@
-%verify "executed"
-%include "armv5te/fbinop.S" {"instr":"fdivs   s2, s0, s1"}
diff --git a/vm/compiler/template/armv5te/TEMPLATE_DOUBLE_TO_FLOAT_VFP.S b/vm/compiler/template/armv5te/TEMPLATE_DOUBLE_TO_FLOAT_VFP.S
deleted file mode 100644
index 96f50c7fc..000000000
--- a/vm/compiler/template/armv5te/TEMPLATE_DOUBLE_TO_FLOAT_VFP.S
+++ /dev/null
@@ -1,2 +0,0 @@
-%verify "executed"
-%include "armv5te/funopNarrower.S" {"instr":"fcvtsd  s0, d0"}
diff --git a/vm/compiler/template/armv5te/TEMPLATE_DOUBLE_TO_INT_VFP.S b/vm/compiler/template/armv5te/TEMPLATE_DOUBLE_TO_INT_VFP.S
deleted file mode 100644
index f6353836c..000000000
--- a/vm/compiler/template/armv5te/TEMPLATE_DOUBLE_TO_INT_VFP.S
+++ /dev/null
@@ -1,2 +0,0 @@
-%verify "executed"
-%include "armv5te/funopNarrower.S" {"instr":"ftosizd  s0, d0"}
diff --git a/vm/compiler/template/armv5te/TEMPLATE_FLOAT_TO_DOUBLE_VFP.S b/vm/compiler/template/armv5te/TEMPLATE_FLOAT_TO_DOUBLE_VFP.S
deleted file mode 100644
index a2d68bde5..000000000
--- a/vm/compiler/template/armv5te/TEMPLATE_FLOAT_TO_DOUBLE_VFP.S
+++ /dev/null
@@ -1,2 +0,0 @@
-%verify "executed"
-%include "armv5te/funopWider.S" {"instr":"fcvtds  d0, s0"}
diff --git a/vm/compiler/template/armv5te/TEMPLATE_FLOAT_TO_INT_VFP.S b/vm/compiler/template/armv5te/TEMPLATE_FLOAT_TO_INT_VFP.S
deleted file mode 100644
index bebff4389..000000000
--- a/vm/compiler/template/armv5te/TEMPLATE_FLOAT_TO_INT_VFP.S
+++ /dev/null
@@ -1,2 +0,0 @@
-%verify "executed"
-%include "armv5te/funop.S" {"instr":"ftosizs s1, s0"}
diff --git a/vm/compiler/template/armv5te/TEMPLATE_INT_TO_DOUBLE_VFP.S b/vm/compiler/template/armv5te/TEMPLATE_INT_TO_DOUBLE_VFP.S
deleted file mode 100644
index 0a987ac5e..000000000
--- a/vm/compiler/template/armv5te/TEMPLATE_INT_TO_DOUBLE_VFP.S
+++ /dev/null
@@ -1,2 +0,0 @@
-%verify "executed"
-%include "armv5te/funopWider.S" {"instr":"fsitod  d0, s0"}
diff --git a/vm/compiler/template/armv5te/TEMPLATE_INT_TO_FLOAT_VFP.S b/vm/compiler/template/armv5te/TEMPLATE_INT_TO_FLOAT_VFP.S
deleted file mode 100644
index 105a4a42d..000000000
--- a/vm/compiler/template/armv5te/TEMPLATE_INT_TO_FLOAT_VFP.S
+++ /dev/null
@@ -1,2 +0,0 @@
-%verify "executed"
-%include "armv5te/funop.S" {"instr":"fsitos  s1, s0"}
diff --git a/vm/compiler/template/armv5te/TEMPLATE_MUL_DOUBLE_VFP.S b/vm/compiler/template/armv5te/TEMPLATE_MUL_DOUBLE_VFP.S
deleted file mode 100644
index f9afa213c..000000000
--- a/vm/compiler/template/armv5te/TEMPLATE_MUL_DOUBLE_VFP.S
+++ /dev/null
@@ -1,2 +0,0 @@
-%verify "executed"
-%include "armv5te/fbinopWide.S" {"instr":"fmuld   d2, d0, d1"}
diff --git a/vm/compiler/template/armv5te/TEMPLATE_MUL_FLOAT_VFP.S b/vm/compiler/template/armv5te/TEMPLATE_MUL_FLOAT_VFP.S
deleted file mode 100644
index 066680328..000000000
--- a/vm/compiler/template/armv5te/TEMPLATE_MUL_FLOAT_VFP.S
+++ /dev/null
@@ -1,2 +0,0 @@
-%verify "executed"
-%include "armv5te/fbinop.S" {"instr":"fmuls   s2, s0, s1"}
diff --git a/vm/compiler/template/armv5te/TEMPLATE_SUB_DOUBLE_VFP.S b/vm/compiler/template/armv5te/TEMPLATE_SUB_DOUBLE_VFP.S
deleted file mode 100644
index 0c3dd4e0e..000000000
--- a/vm/compiler/template/armv5te/TEMPLATE_SUB_DOUBLE_VFP.S
+++ /dev/null
@@ -1,2 +0,0 @@
-%verify "executed"
-%include "armv5te/fbinopWide.S" {"instr":"fsubd   d2, d0, d1"}
diff --git a/vm/compiler/template/armv5te/TEMPLATE_SUB_FLOAT_VFP.S b/vm/compiler/template/armv5te/TEMPLATE_SUB_FLOAT_VFP.S
deleted file mode 100644
index b7bb5b875..000000000
--- a/vm/compiler/template/armv5te/TEMPLATE_SUB_FLOAT_VFP.S
+++ /dev/null
@@ -1,2 +0,0 @@
-%verify "executed"
-%include "armv5te/fbinop.S" {"instr":"fsubs   s2, s0, s1"}
diff --git a/vm/compiler/template/armv5te/TemplateOpList.h b/vm/compiler/template/armv5te/TemplateOpList.h
index 3201dfd60..39cd07a98 100644
--- a/vm/compiler/template/armv5te/TemplateOpList.h
+++ b/vm/compiler/template/armv5te/TemplateOpList.h
@@ -35,21 +35,3 @@ JIT_TEMPLATE(MUL_LONG)
 JIT_TEMPLATE(SHL_LONG)
 JIT_TEMPLATE(SHR_LONG)
 JIT_TEMPLATE(USHR_LONG)
-JIT_TEMPLATE(ADD_FLOAT_VFP)
-JIT_TEMPLATE(SUB_FLOAT_VFP)
-JIT_TEMPLATE(MUL_FLOAT_VFP)
-JIT_TEMPLATE(DIV_FLOAT_VFP)
-JIT_TEMPLATE(ADD_DOUBLE_VFP)
-JIT_TEMPLATE(SUB_DOUBLE_VFP)
-JIT_TEMPLATE(MUL_DOUBLE_VFP)
-JIT_TEMPLATE(DIV_DOUBLE_VFP)
-JIT_TEMPLATE(DOUBLE_TO_FLOAT_VFP)
-JIT_TEMPLATE(DOUBLE_TO_INT_VFP)
-JIT_TEMPLATE(FLOAT_TO_DOUBLE_VFP)
-JIT_TEMPLATE(FLOAT_TO_INT_VFP)
-JIT_TEMPLATE(INT_TO_DOUBLE_VFP)
-JIT_TEMPLATE(INT_TO_FLOAT_VFP)
-JIT_TEMPLATE(CMPG_DOUBLE_VFP)
-JIT_TEMPLATE(CMPL_DOUBLE_VFP)
-JIT_TEMPLATE(CMPG_FLOAT_VFP)
-JIT_TEMPLATE(CMPL_FLOAT_VFP)
diff --git a/vm/compiler/template/config-armv5te-vfp b/vm/compiler/template/config-armv5te-vfp
new file mode 100644
index 000000000..628e75fa3
--- /dev/null
+++ b/vm/compiler/template/config-armv5te-vfp
@@ -0,0 +1,56 @@
+
+# Copyright (C) 2009 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#
+# Configuration for ARMv5TE architecture targets.
+#
+
+# file header and basic definitions
+#import c/header.c
+import armv5te/header.S
+
+# C pre-processor defines for stub C instructions
+#import cstubs/stubdefs.c
+
+# highly-platform-specific defs
+import armv5te/platform.S
+
+# common defs for the C helpers; include this before the instruction handlers
+#import c/opcommon.c
+
+# opcode list; argument to op-start is default directory
+op-start armv5te-vfp
+    op TEMPLATE_CMP_LONG armv5te
+    op TEMPLATE_INVOKE_METHOD_CHAIN armv5te
+    op TEMPLATE_INVOKE_METHOD_NATIVE armv5te
+    op TEMPLATE_INVOKE_METHOD_NO_OPT armv5te
+    op TEMPLATE_INVOKE_METHOD_PREDICTED_CHAIN armv5te
+    op TEMPLATE_MUL_LONG armv5te
+    op TEMPLATE_RETURN armv5te
+    op TEMPLATE_SHL_LONG armv5te
+    op TEMPLATE_SHR_LONG armv5te
+    op TEMPLATE_USHR_LONG armv5te
+
+op-end
+
+# "helper" code for C; include if you use any of the C stubs (this generates
+# object code, so it's normally excluded)
+##import c/gotoTargets.c
+
+# end of defs; include this when cstubs/stubdefs.c is included
+#import cstubs/enddefs.c
+
+# common subroutines for asm
+import armv5te/footer.S
diff --git a/vm/compiler/template/out/CompilerTemplateAsm-armv5te-vfp.S b/vm/compiler/template/out/CompilerTemplateAsm-armv5te-vfp.S
new file mode 100644
index 000000000..22d6d78fb
--- /dev/null
+++ b/vm/compiler/template/out/CompilerTemplateAsm-armv5te-vfp.S
@@ -0,0 +1,1064 @@
+/*
+ * This file was generated automatically by gen-template.py for 'armv5te-vfp'.
+ *
+ * --> DO NOT EDIT <--
+ */
+
+/* File: armv5te/header.S */
+/*
+ * Copyright (C) 2008 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#if defined(WITH_JIT)
+
+/*
+ * ARMv5 definitions and declarations.
+ */
+
+/*
+ARM EABI general notes:
+
+r0-r3 hold first 4 args to a method; they are not preserved across method calls
+r4-r8 are available for general use
+r9 is given special treatment in some situations, but not for us
+r10 (sl) seems to be generally available
+r11 (fp) is used by gcc (unless -fomit-frame-pointer is set)
+r12 (ip) is scratch -- not preserved across method calls
+r13 (sp) should be managed carefully in case a signal arrives
+r14 (lr) must be preserved
+r15 (pc) can be tinkered with directly
+
+r0 holds returns of <= 4 bytes
+r0-r1 hold returns of 8 bytes, low word in r0
+
+Callee must save/restore r4+ (except r12) if it modifies them.
+
+Stack is "full descending".  Only the arguments that don't fit in the first 4
+registers are placed on the stack.  "sp" points at the first stacked argument
+(i.e. the 5th arg).
+
+VFP: single-precision results in s0, double-precision results in d0.
+
+In the EABI, "sp" must be 64-bit aligned on entry to a function, and any
+64-bit quantities (long long, double) must be 64-bit aligned.
+*/
+
+/*
+JIT and ARM notes:
+
+The following registers have fixed assignments:
+
+  reg nick      purpose
+  r5  rFP       interpreted frame pointer, used for accessing locals and args
+  r6  rGLUE     MterpGlue pointer
+
+The following registers have fixed assignments in mterp but are scratch
+registers in compiled code
+
+  reg nick      purpose
+  r4  rPC       interpreted program counter, used for fetching instructions
+  r7  rINST     first 16-bit code unit of current instruction
+  r8  rIBASE    interpreted instruction base pointer, used for computed goto
+
+Macros are provided for common operations.  Each macro MUST emit only
+one instruction to make instruction-counting easier.  They MUST NOT alter
+unspecified registers or condition codes.
+*/
+
+/* single-purpose registers, given names for clarity */
+#define rPC     r4
+#define rFP     r5
+#define rGLUE   r6
+#define rINST   r7
+#define rIBASE  r8
+
+/*
+ * Given a frame pointer, find the stack save area.
+ *
+ * In C this is "((StackSaveArea*)(_fp) -1)".
+ */
+#define SAVEAREA_FROM_FP(_reg, _fpreg) \
+    sub     _reg, _fpreg, #sizeofStackSaveArea
+
+/*
+ * This is a #include, not a %include, because we want the C pre-processor
+ * to expand the macros into assembler assignment statements.
+ */
+#include "../../../mterp/common/asm-constants.h"
+
+
+/* File: armv5te/platform.S */
+/*
+ * ===========================================================================
+ *  CPU-version-specific defines
+ * ===========================================================================
+ */
+
+/*
+ * Macro for "MOV LR,PC / LDR PC,xxx", which is not allowed pre-ARMv5.
+ * Jump to subroutine.
+ *
+ * May modify IP and LR.
+ */
+.macro  LDR_PC_LR source
+    mov     lr, pc
+    ldr     pc, \source
+.endm
+
+
+    .global dvmCompilerTemplateStart
+    .type   dvmCompilerTemplateStart, %function
+    .text
+
+dvmCompilerTemplateStart:
+
+/* ------------------------------ */
+    .balign 4
+    .global dvmCompiler_TEMPLATE_CMP_LONG
+dvmCompiler_TEMPLATE_CMP_LONG:
+/* File: armv5te/TEMPLATE_CMP_LONG.S */
+    /*
+     * Compare two 64-bit values.  Puts 0, 1, or -1 into the destination
+     * register based on the results of the comparison.
+     *
+     * We load the full values with LDM, but in practice many values could
+     * be resolved by only looking at the high word.  This could be made
+     * faster or slower by splitting the LDM into a pair of LDRs.
+     *
+     * If we just wanted to set condition flags, we could do this:
+     *  subs    ip, r0, r2
+     *  sbcs    ip, r1, r3
+     *  subeqs  ip, r0, r2
+     * Leaving { <0, 0, >0 } in ip.  However, we have to set it to a specific
+     * integer value, which we can do with 2 conditional mov/mvn instructions
+     * (set 1, set -1; if they're equal we already have 0 in ip), giving
+     * us a constant 5-cycle path plus a branch at the end to the
+     * instruction epilogue code.  The multi-compare approach below needs
+     * 2 or 3 cycles + branch if the high word doesn't match, 6 + branch
+     * in the worst case (the 64-bit values are equal).
+     */
+    /* cmp-long vAA, vBB, vCC */
+    cmp     r1, r3                      @ compare (vBB+1, vCC+1)
+    blt     .LTEMPLATE_CMP_LONG_less            @ signed compare on high part
+    bgt     .LTEMPLATE_CMP_LONG_greater
+    subs    r0, r0, r2                  @ r0<- r0 - r2
+    bxeq     lr
+    bhi     .LTEMPLATE_CMP_LONG_greater         @ unsigned compare on low part
+.LTEMPLATE_CMP_LONG_less:
+    mvn     r0, #0                      @ r0<- -1
+    bx      lr
+.LTEMPLATE_CMP_LONG_greater:
+    mov     r0, #1                      @ r0<- 1
+    bx      lr
+
+
+/* ------------------------------ */
+    .balign 4
+    .global dvmCompiler_TEMPLATE_RETURN
+dvmCompiler_TEMPLATE_RETURN:
+/* File: armv5te/TEMPLATE_RETURN.S */
+    /*
+     * Unwind a frame from the Dalvik stack for compiled OP_RETURN_XXX.
+     * If the stored value in returnAddr
+     * is non-zero, the caller is compiled by the JIT thus return to the
+     * address in the code cache following the invoke instruction. Otherwise
+     * return to the special dvmJitToInterpNoChain entry point.
+     */
+    SAVEAREA_FROM_FP(r0, rFP)           @ r0<- saveArea (old)
+    ldr     r10, [r0, #offStackSaveArea_prevFrame] @ r10<- saveArea->prevFrame
+    ldr     r8, [rGLUE, #offGlue_pSelfSuspendCount] @ r8<- &suspendCount
+    ldr     rPC, [r0, #offStackSaveArea_savedPc] @ rPC<- saveArea->savedPc
+    ldr     r9,  [r0, #offStackSaveArea_returnAddr] @ r9<- chaining cell ret
+    ldr     r2, [r10, #(offStackSaveArea_method - sizeofStackSaveArea)]
+                                        @ r2<- method we're returning to
+    ldr     r3, [rGLUE, #offGlue_self]  @ r3<- glue->self
+    cmp     r2, #0                      @ break frame?
+    beq     1f                          @ bail to interpreter
+    ldr     r0, .LdvmJitToInterpNoChain @ defined in footer.S
+    mov     rFP, r10                    @ publish new FP
+    ldrne   r10, [r2, #offMethod_clazz] @ r10<- method->clazz
+    ldr     r8, [r8]                    @ r8<- suspendCount
+
+    str     r2, [rGLUE, #offGlue_method]@ glue->method = newSave->method
+    ldr     r1, [r10, #offClassObject_pDvmDex] @ r1<- method->clazz->pDvmDex
+    str     rFP, [r3, #offThread_curFrame] @ self->curFrame = fp
+    add     rPC, rPC, #6                @ publish new rPC (advance 6 bytes)
+    str     r1, [rGLUE, #offGlue_methodClassDex]
+    cmp     r8, #0                      @ check the suspendCount
+    movne   r9, #0                      @ clear the chaining cell address
+    cmp     r9, #0                      @ chaining cell exists?
+    blxne   r9                          @ jump to the chaining cell
+    mov     pc, r0                      @ callsite is interpreted
+1:
+    stmia   rGLUE, {rPC, rFP}           @ SAVE_PC_FP_TO_GLUE()
+    ldr     r2, .LdvmMterpStdBail       @ defined in footer.S
+    mov     r1, #0                      @ changeInterp = false
+    mov     r0, rGLUE                   @ Expecting rGLUE in r0
+    blx     r2                          @ exit the interpreter
+
+/* ------------------------------ */
+    .balign 4
+    .global dvmCompiler_TEMPLATE_INVOKE_METHOD_NO_OPT
+dvmCompiler_TEMPLATE_INVOKE_METHOD_NO_OPT:
+/* File: armv5te/TEMPLATE_INVOKE_METHOD_NO_OPT.S */
+    /*
+     * For polymorphic callsites - setup the Dalvik frame and load Dalvik PC
+     * into rPC then jump to dvmJitToInterpNoChain to dispatch the
+     * runtime-resolved callee.
+     */
+    @ r0 = methodToCall, r1 = returnCell, rPC = dalvikCallsite
+    ldrh    r7, [r0, #offMethod_registersSize]  @ r7<- methodToCall->regsSize
+    ldrh    r2, [r0, #offMethod_outsSize]  @ r2<- methodToCall->outsSize
+    ldr     r9, [rGLUE, #offGlue_interpStackEnd]    @ r9<- interpStackEnd
+    ldr     r8, [rGLUE, #offGlue_pSelfSuspendCount] @ r8<- &suspendCount
+    add     r3, r1, #1  @ Thumb addr is odd
+    SAVEAREA_FROM_FP(r1, rFP)           @ r1<- stack save area
+    sub     r1, r1, r7, lsl #2          @ r1<- newFp (old savearea - regsSize)
+    SAVEAREA_FROM_FP(r10, r1)           @ r10<- stack save area
+    sub     r10, r10, r2, lsl #2        @ r10<- bottom (newsave - outsSize)
+    ldr     r8, [r8]                    @ r3<- suspendCount (int)
+    cmp     r10, r9                     @ bottom < interpStackEnd?
+    bxlt    lr                          @ return to raise stack overflow excep.
+    @ r1 = newFP, r0 = methodToCall, r3 = returnCell, rPC = dalvikCallsite
+    ldr     r9, [r0, #offMethod_clazz]      @ r9<- method->clazz
+    ldr     r10, [r0, #offMethod_accessFlags] @ r10<- methodToCall->accessFlags
+    str     rPC, [rFP, #(offStackSaveArea_currentPc - sizeofStackSaveArea)]
+    str     rPC, [r1, #(offStackSaveArea_savedPc - sizeofStackSaveArea)]
+    ldr     rPC, [r0, #offMethod_insns]     @ rPC<- methodToCall->insns
+
+
+    @ set up newSaveArea
+    str     rFP, [r1, #(offStackSaveArea_prevFrame - sizeofStackSaveArea)]
+    str     r3, [r1, #(offStackSaveArea_returnAddr - sizeofStackSaveArea)]
+    str     r0, [r1, #(offStackSaveArea_method - sizeofStackSaveArea)]
+    cmp     r8, #0                      @ suspendCount != 0
+    bxne    lr                          @ bail to the interpreter
+    tst     r10, #ACC_NATIVE
+    bne     .LinvokeNative
+
+    ldr     r10, .LdvmJitToInterpNoChain
+    ldr     r3, [r9, #offClassObject_pDvmDex] @ r3<- method->clazz->pDvmDex
+    ldr     r2, [rGLUE, #offGlue_self]      @ r2<- glue->self
+
+    @ Update "glue" values for the new method
+    str     r0, [rGLUE, #offGlue_method]    @ glue->method = methodToCall
+    str     r3, [rGLUE, #offGlue_methodClassDex] @ glue->methodClassDex = ...
+    mov     rFP, r1                         @ fp = newFp
+    str     rFP, [r2, #offThread_curFrame]  @ self->curFrame = newFp
+
+    @ Start executing the callee
+    mov     pc, r10                         @ dvmJitToInterpNoChain
+
+/* ------------------------------ */
+    .balign 4
+    .global dvmCompiler_TEMPLATE_INVOKE_METHOD_CHAIN
+dvmCompiler_TEMPLATE_INVOKE_METHOD_CHAIN:
+/* File: armv5te/TEMPLATE_INVOKE_METHOD_CHAIN.S */
+    /*
+     * For monomorphic callsite, setup the Dalvik frame and return to the
+     * Thumb code through the link register to transfer control to the callee
+     * method through a dedicated chaining cell.
+     */
+    @ r0 = methodToCall, r1 = returnCell, rPC = dalvikCallsite
+    @ methodToCall is guaranteed to be non-native
+.LinvokeChain:
+    ldrh    r7, [r0, #offMethod_registersSize]  @ r7<- methodToCall->regsSize
+    ldrh    r2, [r0, #offMethod_outsSize]  @ r2<- methodToCall->outsSize
+    ldr     r9, [rGLUE, #offGlue_interpStackEnd]    @ r9<- interpStackEnd
+    ldr     r8, [rGLUE, #offGlue_pSelfSuspendCount] @ r8<- &suspendCount
+    add     r3, r1, #1  @ Thumb addr is odd
+    SAVEAREA_FROM_FP(r1, rFP)           @ r1<- stack save area
+    sub     r1, r1, r7, lsl #2          @ r1<- newFp (old savearea - regsSize)
+    SAVEAREA_FROM_FP(r10, r1)           @ r10<- stack save area
+    add     r12, lr, #2                 @ setup the punt-to-interp address
+    sub     r10, r10, r2, lsl #2        @ r10<- bottom (newsave - outsSize)
+    ldr     r8, [r8]                    @ r3<- suspendCount (int)
+    cmp     r10, r9                     @ bottom < interpStackEnd?
+    bxlt    r12                         @ return to raise stack overflow excep.
+    @ r1 = newFP, r0 = methodToCall, r3 = returnCell, rPC = dalvikCallsite
+    ldr     r9, [r0, #offMethod_clazz]      @ r9<- method->clazz
+    str     rPC, [rFP, #(offStackSaveArea_currentPc - sizeofStackSaveArea)]
+    str     rPC, [r1, #(offStackSaveArea_savedPc - sizeofStackSaveArea)]
+    ldr     rPC, [r0, #offMethod_insns]     @ rPC<- methodToCall->insns
+
+
+    @ set up newSaveArea
+    str     rFP, [r1, #(offStackSaveArea_prevFrame - sizeofStackSaveArea)]
+    str     r3, [r1, #(offStackSaveArea_returnAddr - sizeofStackSaveArea)]
+    str     r0, [r1, #(offStackSaveArea_method - sizeofStackSaveArea)]
+    cmp     r8, #0                      @ suspendCount != 0
+    bxne    r12                         @ bail to the interpreter
+
+    ldr     r3, [r9, #offClassObject_pDvmDex] @ r3<- method->clazz->pDvmDex
+    ldr     r2, [rGLUE, #offGlue_self]      @ r2<- glue->self
+
+    @ Update "glue" values for the new method
+    str     r0, [rGLUE, #offGlue_method]    @ glue->method = methodToCall
+    str     r3, [rGLUE, #offGlue_methodClassDex] @ glue->methodClassDex = ...
+    mov     rFP, r1                         @ fp = newFp
+    str     rFP, [r2, #offThread_curFrame]  @ self->curFrame = newFp
+
+    bx      lr                              @ return to the callee-chaining cell
+
+
+
+/* ------------------------------ */
+    .balign 4
+    .global dvmCompiler_TEMPLATE_INVOKE_METHOD_PREDICTED_CHAIN
+dvmCompiler_TEMPLATE_INVOKE_METHOD_PREDICTED_CHAIN:
+/* File: armv5te/TEMPLATE_INVOKE_METHOD_PREDICTED_CHAIN.S */
+    /*
+     * For polymorphic callsite, check whether the cached class pointer matches
+     * the current one. If so setup the Dalvik frame and return to the
+     * Thumb code through the link register to transfer control to the callee
+     * method through a dedicated chaining cell.
+     *
+     * The predicted chaining cell is declared in Armv5teLIR.h with the
+     * following layout:
+     *
+     *  typedef struct PredictedChainingCell {
+     *      u4 branch;
+     *      const ClassObject *clazz;
+     *      const Method *method;
+     *      u4 counter;
+     *  } PredictedChainingCell;
+     *
+     * Upon returning to the callsite:
+     *    - lr  : to branch to the chaining cell
+     *    - lr+2: to punt to the interpreter
+     *    - lr+4: to fully resolve the callee and may rechain.
+     *            r3 <- class
+     *            r9 <- counter
+     */
+    @ r0 = this, r1 = returnCell, r2 = predictedChainCell, rPC = dalvikCallsite
+    ldr     r3, [r0, #offObject_clazz]  @ r3 <- this->class
+    ldr     r8, [r2, #4]    @ r8 <- predictedChainCell->clazz
+    ldr     r0, [r2, #8]    @ r0 <- predictedChainCell->method
+    ldr     r9, [r2, #12]   @ r9 <- predictedChainCell->counter
+    cmp     r3, r8          @ predicted class == actual class?
+    beq     .LinvokeChain   @ predicted chain is valid
+    ldr     r7, [r3, #offClassObject_vtable] @ r7 <- this->class->vtable
+    sub     r1, r9, #1      @ count--
+    str     r1, [r2, #12]   @ write back to PredictedChainingCell->counter
+    add     lr, lr, #4      @ return to fully-resolve landing pad
+    /*
+     * r1 <- count
+     * r2 <- &predictedChainCell
+     * r3 <- this->class
+     * r4 <- dPC
+     * r7 <- this->class->vtable
+     */
+    bx      lr
+
+/* ------------------------------ */
+    .balign 4
+    .global dvmCompiler_TEMPLATE_INVOKE_METHOD_NATIVE
+dvmCompiler_TEMPLATE_INVOKE_METHOD_NATIVE:
+/* File: armv5te/TEMPLATE_INVOKE_METHOD_NATIVE.S */
+    @ r0 = methodToCall, r1 = returnCell, rPC = dalvikCallsite
+    ldrh    r7, [r0, #offMethod_registersSize]  @ r7<- methodToCall->regsSize
+    ldr     r9, [rGLUE, #offGlue_interpStackEnd]    @ r9<- interpStackEnd
+    ldr     r8, [rGLUE, #offGlue_pSelfSuspendCount] @ r8<- &suspendCount
+    add     r3, r1, #1  @ Thumb addr is odd
+    SAVEAREA_FROM_FP(r1, rFP)           @ r1<- stack save area
+    sub     r1, r1, r7, lsl #2          @ r1<- newFp (old savearea - regsSize)
+    SAVEAREA_FROM_FP(r10, r1)           @ r10<- stack save area
+    ldr     r8, [r8]                    @ r3<- suspendCount (int)
+    cmp     r10, r9                     @ bottom < interpStackEnd?
+    bxlt    lr                          @ return to raise stack overflow excep.
+    @ r1 = newFP, r0 = methodToCall, r3 = returnCell, rPC = dalvikCallsite
+    str     rPC, [rFP, #(offStackSaveArea_currentPc - sizeofStackSaveArea)]
+    str     rPC, [r1, #(offStackSaveArea_savedPc - sizeofStackSaveArea)]
+    ldr     rPC, [r0, #offMethod_insns]     @ rPC<- methodToCall->insns
+
+
+    @ set up newSaveArea
+    str     rFP, [r1, #(offStackSaveArea_prevFrame - sizeofStackSaveArea)]
+    str     r3, [r1, #(offStackSaveArea_returnAddr - sizeofStackSaveArea)]
+    ldr     r3, [rGLUE, #offGlue_self]      @ r3<- glue->self
+    str     r0, [r1, #(offStackSaveArea_method - sizeofStackSaveArea)]
+    cmp     r8, #0                      @ suspendCount != 0
+    ldr     r8, [r0, #offMethod_nativeFunc] @ r8<- method->nativeFunc
+    bxne    lr                          @ bail to the interpreter
+
+    @ go ahead and transfer control to the native code
+    ldr     r9, [r3, #offThread_jniLocal_nextEntry] @ r9<- thread->refNext
+    str     r1, [r3, #offThread_curFrame]   @ self->curFrame = newFp
+    str     r9, [r1, #(offStackSaveArea_localRefTop - sizeofStackSaveArea)]
+                                        @ newFp->localRefTop=refNext
+    mov     r9, r3                      @ r9<- glue->self (preserve)
+    SAVEAREA_FROM_FP(r10, r1)           @ r10<- new stack save area
+
+    mov     r2, r0                      @ r2<- methodToCall
+    mov     r0, r1                      @ r0<- newFP
+    add     r1, rGLUE, #offGlue_retval  @ r1<- &retval
+
+    blx     r8                          @ off to the native code
+
+    @ native return; r9=self, r10=newSaveArea
+    @ equivalent to dvmPopJniLocals
+    ldr     r2, [r10, #offStackSaveArea_returnAddr] @ r2 = chaining cell ret
+    ldr     r0, [r10, #offStackSaveArea_localRefTop] @ r0<- newSave->localRefTop
+    ldr     r1, [r9, #offThread_exception] @ check for exception
+    str     rFP, [r9, #offThread_curFrame]  @ self->curFrame = fp
+    cmp     r1, #0                      @ null?
+    str     r0, [r9, #offThread_jniLocal_nextEntry] @ self->refNext<- r0
+    bne     .LhandleException             @ no, handle exception
+    bx      r2
+
+
+/* ------------------------------ */
+    .balign 4
+    .global dvmCompiler_TEMPLATE_MUL_LONG
+dvmCompiler_TEMPLATE_MUL_LONG:
+/* File: armv5te/TEMPLATE_MUL_LONG.S */
+    /*
+     * Signed 64-bit integer multiply.
+     *
+     * For JIT: op1 in r0/r1, op2 in r2/r3, return in r0/r1
+     *
+     * Consider WXxYZ (r1r0 x r3r2) with a long multiply:
+     *        WX
+     *      x YZ
+     *  --------
+     *     ZW ZX
+     *  YW YX
+     *
+     * The low word of the result holds ZX, the high word holds
+     * (ZW+YX) + (the high overflow from ZX).  YW doesn't matter because
+     * it doesn't fit in the low 64 bits.
+     *
+     * Unlike most ARM math operations, multiply instructions have
+     * restrictions on using the same register more than once (Rd and Rm
+     * cannot be the same).
+     */
+    /* mul-long vAA, vBB, vCC */
+    mul     ip, r2, r1                  @  ip<- ZxW
+    umull   r9, r10, r2, r0             @  r9/r10 <- ZxX
+    mla     r2, r0, r3, ip              @  r2<- YxX + (ZxW)
+    add     r10, r2, r10                @  r10<- r10 + low(ZxW + (YxX))
+    mov     r0,r9
+    mov     r1,r10
+    bx      lr
+
+/* ------------------------------ */
+    .balign 4
+    .global dvmCompiler_TEMPLATE_SHL_LONG
+dvmCompiler_TEMPLATE_SHL_LONG:
+/* File: armv5te/TEMPLATE_SHL_LONG.S */
+    /*
+     * Long integer shift.  This is different from the generic 32/64-bit
+     * binary operations because vAA/vBB are 64-bit but vCC (the shift
+     * distance) is 32-bit.  Also, Dalvik requires us to ignore all but the low
+     * 6 bits.
+     */
+    /* shl-long vAA, vBB, vCC */
+    and     r2, r2, #63                 @ r2<- r2 & 0x3f
+    mov     r1, r1, asl r2              @  r1<- r1 << r2
+    rsb     r3, r2, #32                 @  r3<- 32 - r2
+    orr     r1, r1, r0, lsr r3          @  r1<- r1 | (r0 << (32-r2))
+    subs    ip, r2, #32                 @  ip<- r2 - 32
+    movpl   r1, r0, asl ip              @  if r2 >= 32, r1<- r0 << (r2-32)
+    mov     r0, r0, asl r2              @  r0<- r0 << r2
+    bx      lr
+
+/* ------------------------------ */
+    .balign 4
+    .global dvmCompiler_TEMPLATE_SHR_LONG
+dvmCompiler_TEMPLATE_SHR_LONG:
+/* File: armv5te/TEMPLATE_SHR_LONG.S */
+    /*
+     * Long integer shift.  This is different from the generic 32/64-bit
+     * binary operations because vAA/vBB are 64-bit but vCC (the shift
+     * distance) is 32-bit.  Also, Dalvik requires us to ignore all but the low
+     * 6 bits.
+     */
+    /* shr-long vAA, vBB, vCC */
+    and     r2, r2, #63                 @ r0<- r0 & 0x3f
+    mov     r0, r0, lsr r2              @  r0<- r2 >> r2
+    rsb     r3, r2, #32                 @  r3<- 32 - r2
+    orr     r0, r0, r1, asl r3          @  r0<- r0 | (r1 << (32-r2))
+    subs    ip, r2, #32                 @  ip<- r2 - 32
+    movpl   r0, r1, asr ip              @  if r2 >= 32, r0<-r1 >> (r2-32)
+    mov     r1, r1, asr r2              @  r1<- r1 >> r2
+    bx      lr
+
+
+/* ------------------------------ */
+    .balign 4
+    .global dvmCompiler_TEMPLATE_USHR_LONG
+dvmCompiler_TEMPLATE_USHR_LONG:
+/* File: armv5te/TEMPLATE_USHR_LONG.S */
+    /*
+     * Long integer shift.  This is different from the generic 32/64-bit
+     * binary operations because vAA/vBB are 64-bit but vCC (the shift
+     * distance) is 32-bit.  Also, Dalvik requires us to ignore all but the low
+     * 6 bits.
+     */
+    /* ushr-long vAA, vBB, vCC */
+    and     r2, r2, #63                 @ r0<- r0 & 0x3f
+    mov     r0, r0, lsr r2              @  r0<- r2 >> r2
+    rsb     r3, r2, #32                 @  r3<- 32 - r2
+    orr     r0, r0, r1, asl r3          @  r0<- r0 | (r1 << (32-r2))
+    subs    ip, r2, #32                 @  ip<- r2 - 32
+    movpl   r0, r1, lsr ip              @  if r2 >= 32, r0<-r1 >>> (r2-32)
+    mov     r1, r1, lsr r2              @  r1<- r1 >>> r2
+    bx      lr
+
+
+/* ------------------------------ */
+    .balign 4
+    .global dvmCompiler_TEMPLATE_ADD_FLOAT_VFP
+dvmCompiler_TEMPLATE_ADD_FLOAT_VFP:
+/* File: armv5te-vfp/TEMPLATE_ADD_FLOAT_VFP.S */
+/* File: armv5te-vfp/fbinop.S */
+    /*
+     * Generic 32-bit floating point operation.  Provide an "instr" line that
+     * specifies an instruction that performs s2 = s0 op s1.
+     *
+     * On entry:
+     *     r0 = target dalvik register address
+     *     r1 = op1 address
+     *     r2 = op2 address
+     */
+     flds    s0,[r1]
+     flds    s1,[r2]
+     fadds   s2, s0, s1
+     fsts    s2,[r0]
+     bx      lr
+
+
+/* ------------------------------ */
+    .balign 4
+    .global dvmCompiler_TEMPLATE_SUB_FLOAT_VFP
+dvmCompiler_TEMPLATE_SUB_FLOAT_VFP:
+/* File: armv5te-vfp/TEMPLATE_SUB_FLOAT_VFP.S */
+/* File: armv5te-vfp/fbinop.S */
+    /*
+     * Generic 32-bit floating point operation.  Provide an "instr" line that
+     * specifies an instruction that performs s2 = s0 op s1.
+     *
+     * On entry:
+     *     r0 = target dalvik register address
+     *     r1 = op1 address
+     *     r2 = op2 address
+     */
+     flds    s0,[r1]
+     flds    s1,[r2]
+     fsubs   s2, s0, s1
+     fsts    s2,[r0]
+     bx      lr
+
+
+/* ------------------------------ */
+    .balign 4
+    .global dvmCompiler_TEMPLATE_MUL_FLOAT_VFP
+dvmCompiler_TEMPLATE_MUL_FLOAT_VFP:
+/* File: armv5te-vfp/TEMPLATE_MUL_FLOAT_VFP.S */
+/* File: armv5te-vfp/fbinop.S */
+    /*
+     * Generic 32-bit floating point operation.  Provide an "instr" line that
+     * specifies an instruction that performs s2 = s0 op s1.
+     *
+     * On entry:
+     *     r0 = target dalvik register address
+     *     r1 = op1 address
+     *     r2 = op2 address
+     */
+     flds    s0,[r1]
+     flds    s1,[r2]
+     fmuls   s2, s0, s1
+     fsts    s2,[r0]
+     bx      lr
+
+
+/* ------------------------------ */
+    .balign 4
+    .global dvmCompiler_TEMPLATE_DIV_FLOAT_VFP
+dvmCompiler_TEMPLATE_DIV_FLOAT_VFP:
+/* File: armv5te-vfp/TEMPLATE_DIV_FLOAT_VFP.S */
+/* File: armv5te-vfp/fbinop.S */
+    /*
+     * Generic 32-bit floating point operation.  Provide an "instr" line that
+     * specifies an instruction that performs s2 = s0 op s1.
+     *
+     * On entry:
+     *     r0 = target dalvik register address
+     *     r1 = op1 address
+     *     r2 = op2 address
+     */
+     flds    s0,[r1]
+     flds    s1,[r2]
+     fdivs   s2, s0, s1
+     fsts    s2,[r0]
+     bx      lr
+
+
+/* ------------------------------ */
+    .balign 4
+    .global dvmCompiler_TEMPLATE_ADD_DOUBLE_VFP
+dvmCompiler_TEMPLATE_ADD_DOUBLE_VFP:
+/* File: armv5te-vfp/TEMPLATE_ADD_DOUBLE_VFP.S */
+/* File: armv5te-vfp/fbinopWide.S */
+    /*
+     * Generic 64-bit floating point operation.  Provide an "instr" line that
+     * specifies an instruction that performs s2 = s0 op s1.
+     *
+     * On entry:
+     *     r0 = target dalvik register address
+     *     r1 = op1 address
+     *     r2 = op2 address
+     */
+     fldd    d0,[r1]
+     fldd    d1,[r2]
+     faddd   d2, d0, d1
+     fstd    d2,[r0]
+     bx      lr
+
+
+/* ------------------------------ */
+    .balign 4
+    .global dvmCompiler_TEMPLATE_SUB_DOUBLE_VFP
+dvmCompiler_TEMPLATE_SUB_DOUBLE_VFP:
+/* File: armv5te-vfp/TEMPLATE_SUB_DOUBLE_VFP.S */
+/* File: armv5te-vfp/fbinopWide.S */
+    /*
+     * Generic 64-bit floating point operation.  Provide an "instr" line that
+     * specifies an instruction that performs s2 = s0 op s1.
+     *
+     * On entry:
+     *     r0 = target dalvik register address
+     *     r1 = op1 address
+     *     r2 = op2 address
+     */
+     fldd    d0,[r1]
+     fldd    d1,[r2]
+     fsubd   d2, d0, d1
+     fstd    d2,[r0]
+     bx      lr
+
+
+/* ------------------------------ */
+    .balign 4
+    .global dvmCompiler_TEMPLATE_MUL_DOUBLE_VFP
+dvmCompiler_TEMPLATE_MUL_DOUBLE_VFP:
+/* File: armv5te-vfp/TEMPLATE_MUL_DOUBLE_VFP.S */
+/* File: armv5te-vfp/fbinopWide.S */
+    /*
+     * Generic 64-bit floating point operation.  Provide an "instr" line that
+     * specifies an instruction that performs s2 = s0 op s1.
+     *
+     * On entry:
+     *     r0 = target dalvik register address
+     *     r1 = op1 address
+     *     r2 = op2 address
+     */
+     fldd    d0,[r1]
+     fldd    d1,[r2]
+     fmuld   d2, d0, d1
+     fstd    d2,[r0]
+     bx      lr
+
+
+/* ------------------------------ */
+    .balign 4
+    .global dvmCompiler_TEMPLATE_DIV_DOUBLE_VFP
+dvmCompiler_TEMPLATE_DIV_DOUBLE_VFP:
+/* File: armv5te-vfp/TEMPLATE_DIV_DOUBLE_VFP.S */
+/* File: armv5te-vfp/fbinopWide.S */
+    /*
+     * Generic 64-bit floating point operation.  Provide an "instr" line that
+     * specifies an instruction that performs s2 = s0 op s1.
+     *
+     * On entry:
+     *     r0 = target dalvik register address
+     *     r1 = op1 address
+     *     r2 = op2 address
+     */
+     fldd    d0,[r1]
+     fldd    d1,[r2]
+     fdivd   d2, d0, d1
+     fstd    d2,[r0]
+     bx      lr
+
+
+/* ------------------------------ */
+    .balign 4
+    .global dvmCompiler_TEMPLATE_DOUBLE_TO_FLOAT_VFP
+dvmCompiler_TEMPLATE_DOUBLE_TO_FLOAT_VFP:
+/* File: armv5te-vfp/TEMPLATE_DOUBLE_TO_FLOAT_VFP.S */
+/* File: armv5te-vfp/funopNarrower.S */
+    /*
+     * Generic 64bit-to-32bit floating point unary operation.  Provide an
+     * "instr" line that specifies an instruction that performs "s0 = op d0".
+     *
+     * For: double-to-int, double-to-float
+     *
+     * On entry:
+     *     r0 = target dalvik register address
+     *     r1 = src dalvik register address
+     */
+    /* unop vA, vB */
+    fldd    d0, [r1]                    @ d0<- vB
+    fcvtsd  s0, d0                              @ s0<- op d0
+    fsts    s0, [r0]                    @ vA<- s0
+    bx      lr
+
+
+/* ------------------------------ */
+    .balign 4
+    .global dvmCompiler_TEMPLATE_DOUBLE_TO_INT_VFP
+dvmCompiler_TEMPLATE_DOUBLE_TO_INT_VFP:
+/* File: armv5te-vfp/TEMPLATE_DOUBLE_TO_INT_VFP.S */
+/* File: armv5te-vfp/funopNarrower.S */
+    /*
+     * Generic 64bit-to-32bit floating point unary operation.  Provide an
+     * "instr" line that specifies an instruction that performs "s0 = op d0".
+     *
+     * For: double-to-int, double-to-float
+     *
+     * On entry:
+     *     r0 = target dalvik register address
+     *     r1 = src dalvik register address
+     */
+    /* unop vA, vB */
+    fldd    d0, [r1]                    @ d0<- vB
+    ftosizd  s0, d0                              @ s0<- op d0
+    fsts    s0, [r0]                    @ vA<- s0
+    bx      lr
+
+
+/* ------------------------------ */
+    .balign 4
+    .global dvmCompiler_TEMPLATE_FLOAT_TO_DOUBLE_VFP
+dvmCompiler_TEMPLATE_FLOAT_TO_DOUBLE_VFP:
+/* File: armv5te-vfp/TEMPLATE_FLOAT_TO_DOUBLE_VFP.S */
+/* File: armv5te-vfp/funopWider.S */
+    /*
+     * Generic 32bit-to-64bit floating point unary operation.  Provide an
+     * "instr" line that specifies an instruction that performs "d0 = op s0".
+     *
+     * For: int-to-double, float-to-double
+     *
+     * On entry:
+     *     r0 = target dalvik register address
+     *     r1 = src dalvik register address
+     */
+    /* unop vA, vB */
+    flds    s0, [r1]                    @ s0<- vB
+    fcvtds  d0, s0                              @ d0<- op s0
+    fstd    d0, [r0]                    @ vA<- d0
+    bx      lr
+
+
+/* ------------------------------ */
+    .balign 4
+    .global dvmCompiler_TEMPLATE_FLOAT_TO_INT_VFP
+dvmCompiler_TEMPLATE_FLOAT_TO_INT_VFP:
+/* File: armv5te-vfp/TEMPLATE_FLOAT_TO_INT_VFP.S */
+/* File: armv5te-vfp/funop.S */
+    /*
+     * Generic 32bit-to-32bit floating point unary operation.  Provide an
+     * "instr" line that specifies an instruction that performs "s1 = op s0".
+     *
+     * For: float-to-int, int-to-float
+     *
+     * On entry:
+     *     r0 = target dalvik register address
+     *     r1 = src dalvik register address
+     */
+    /* unop vA, vB */
+    flds    s0, [r1]                    @ s0<- vB
+    ftosizs s1, s0                              @ s1<- op s0
+    fsts    s1, [r0]                    @ vA<- s1
+    bx      lr
+
+
+/* ------------------------------ */
+    .balign 4
+    .global dvmCompiler_TEMPLATE_INT_TO_DOUBLE_VFP
+dvmCompiler_TEMPLATE_INT_TO_DOUBLE_VFP:
+/* File: armv5te-vfp/TEMPLATE_INT_TO_DOUBLE_VFP.S */
+/* File: armv5te-vfp/funopWider.S */
+    /*
+     * Generic 32bit-to-64bit floating point unary operation.  Provide an
+     * "instr" line that specifies an instruction that performs "d0 = op s0".
+     *
+     * For: int-to-double, float-to-double
+     *
+     * On entry:
+     *     r0 = target dalvik register address
+     *     r1 = src dalvik register address
+     */
+    /* unop vA, vB */
+    flds    s0, [r1]                    @ s0<- vB
+    fsitod  d0, s0                              @ d0<- op s0
+    fstd    d0, [r0]                    @ vA<- d0
+    bx      lr
+
+
+/* ------------------------------ */
+    .balign 4
+    .global dvmCompiler_TEMPLATE_INT_TO_FLOAT_VFP
+dvmCompiler_TEMPLATE_INT_TO_FLOAT_VFP:
+/* File: armv5te-vfp/TEMPLATE_INT_TO_FLOAT_VFP.S */
+/* File: armv5te-vfp/funop.S */
+    /*
+     * Generic 32bit-to-32bit floating point unary operation.  Provide an
+     * "instr" line that specifies an instruction that performs "s1 = op s0".
+     *
+     * For: float-to-int, int-to-float
+     *
+     * On entry:
+     *     r0 = target dalvik register address
+     *     r1 = src dalvik register address
+     */
+    /* unop vA, vB */
+    flds    s0, [r1]                    @ s0<- vB
+    fsitos  s1, s0                              @ s1<- op s0
+    fsts    s1, [r0]                    @ vA<- s1
+    bx      lr
+
+
+/* ------------------------------ */
+    .balign 4
+    .global dvmCompiler_TEMPLATE_CMPG_DOUBLE_VFP
+dvmCompiler_TEMPLATE_CMPG_DOUBLE_VFP:
+/* File: armv5te-vfp/TEMPLATE_CMPG_DOUBLE_VFP.S */
+    /*
+     * Compare two floating-point values.  Puts 0, 1, or -1 into the
+     * destination register based on the results of the comparison.
+     *
+     * int compare(x, y) {
+     *     if (x == y) {
+     *         return 0;
+     *     } else if (x < y) {
+     *         return -1;
+     *     } else if (x > y) {
+     *         return 1;
+     *     } else {
+     *         return 1;
+     *     }
+     * }
+     *
+     * On entry:
+     *    r0 = &op1 [vBB]
+     *    r1 = &op2 [vCC]
+     */
+    /* op vAA, vBB, vCC */
+    fldd    d0, [r0]                    @ d0<- vBB
+    fldd    d1, [r1]                    @ d1<- vCC
+    fcmped  d0, d1                      @ compare (vBB, vCC)
+    mov     r0, #1                      @ r0<- 1 (default)
+    fmstat                              @ export status flags
+    mvnmi   r0, #0                      @ (less than) r0<- -1
+    moveq   r0, #0                      @ (equal) r0<- 0
+    bx      lr
+
+
+/* ------------------------------ */
+    .balign 4
+    .global dvmCompiler_TEMPLATE_CMPL_DOUBLE_VFP
+dvmCompiler_TEMPLATE_CMPL_DOUBLE_VFP:
+/* File: armv5te-vfp/TEMPLATE_CMPL_DOUBLE_VFP.S */
+    /*
+     * Compare two floating-point values.  Puts 0, 1, or -1 into the
+     * destination register based on the results of the comparison.
+     *
+     * int compare(x, y) {
+     *     if (x == y) {
+     *         return 0;
+     *     } else if (x > y) {
+     *         return 1;
+     *     } else if (x < y) {
+     *         return -1;
+     *     } else {
+     *         return -1;
+     *     }
+     * }
+     * On entry:
+     *    r0 = &op1 [vBB]
+     *    r1 = &op2 [vCC]
+     */
+    /* op vAA, vBB, vCC */
+    fldd    d0, [r0]                    @ d0<- vBB
+    fldd    d1, [r1]                    @ d1<- vCC
+    fcmped  d0, d1                      @ compare (vBB, vCC)
+    mvn     r0, #0                      @ r0<- -1 (default)
+    fmstat                              @ export status flags
+    movgt   r0, #1                      @ (greater than) r0<- 1
+    moveq   r0, #0                      @ (equal) r0<- 0
+    bx      lr
+
+/* ------------------------------ */
+    .balign 4
+    .global dvmCompiler_TEMPLATE_CMPG_FLOAT_VFP
+dvmCompiler_TEMPLATE_CMPG_FLOAT_VFP:
+/* File: armv5te-vfp/TEMPLATE_CMPG_FLOAT_VFP.S */
+    /*
+     * Compare two floating-point values.  Puts 0, 1, or -1 into the
+     * destination register based on the results of the comparison.
+     *
+     * int compare(x, y) {
+     *     if (x == y) {
+     *         return 0;
+     *     } else if (x < y) {
+     *         return -1;
+     *     } else if (x > y) {
+     *         return 1;
+     *     } else {
+     *         return 1;
+     *     }
+     * }
+     * On entry:
+     *    r0 = &op1 [vBB]
+     *    r1 = &op2 [vCC]
+     */
+    /* op vAA, vBB, vCC */
+    flds    s0, [r0]                    @ d0<- vBB
+    flds    s1, [r1]                    @ d1<- vCC
+    fcmpes  s0, s1                      @ compare (vBB, vCC)
+    mov     r0, #1                      @ r0<- 1 (default)
+    fmstat                              @ export status flags
+    mvnmi   r0, #0                      @ (less than) r0<- -1
+    moveq   r0, #0                      @ (equal) r0<- 0
+    bx      lr
+
+/* ------------------------------ */
+    .balign 4
+    .global dvmCompiler_TEMPLATE_CMPL_FLOAT_VFP
+dvmCompiler_TEMPLATE_CMPL_FLOAT_VFP:
+/* File: armv5te-vfp/TEMPLATE_CMPL_FLOAT_VFP.S */
+    /*
+     * Compare two floating-point values.  Puts 0, 1, or -1 into the
+     * destination register based on the results of the comparison.
+     *
+     * int compare(x, y) {
+     *     if (x == y) {
+     *         return 0;
+     *     } else if (x > y) {
+     *         return 1;
+     *     } else if (x < y) {
+     *         return -1;
+     *     } else {
+     *         return -1;
+     *     }
+     * }
+     * On entry:
+     *    r0 = &op1 [vBB]
+     *    r1 = &op2 [vCC]
+     */
+    /* op vAA, vBB, vCC */
+    flds    s0, [r0]                    @ d0<- vBB
+    flds    s1, [r1]                    @ d1<- vCC
+    fcmpes  s0, s1                      @ compare (vBB, vCC)
+    mvn     r0, #0                      @ r0<- -1 (default)
+    fmstat                              @ export status flags
+    movgt   r0, #1                      @ (greater than) r0<- 1
+    moveq   r0, #0                      @ (equal) r0<- 0
+    bx      lr
+
+/* ------------------------------ */
+    .balign 4
+    .global dvmCompiler_TEMPLATE_SQRT_DOUBLE_VFP
+dvmCompiler_TEMPLATE_SQRT_DOUBLE_VFP:
+/* File: armv5te-vfp/TEMPLATE_SQRT_DOUBLE_VFP.S */
+    /*
+     * 64-bit floating point vfp sqrt operation.
+     * If the result is a NaN, bail out to library code to do
+     * the right thing.
+     *
+     * On entry:
+     *     r2 src addr of op1
+     * On exit:
+     *     r0,r1 = res
+     */
+    fldd    d0, [r2]
+    fsqrtd  d1, d0
+    fcmpd   d1, d1
+    fmstat
+    fmrrd   r0, r1, d1
+    bxeq    lr   @ Result OK - return
+    ldr     r2, .Lsqrt
+    fmrrd   r0, r1, d0   @ reload orig operand
+    bx      r2   @ tail call to sqrt library routine
+
+.Lsqrt:
+    .word   sqrt
+
+    .size   dvmCompilerTemplateStart, .-dvmCompilerTemplateStart
+/* File: armv5te/footer.S */
+/*
+ * ===========================================================================
+ *  Common subroutines and data
+ * ===========================================================================
+ */
+
+    .text
+    .align  2
+.LinvokeNative:
+    @ Prep for the native call
+    @ r1 = newFP, r0 = methodToCall
+    ldr     r3, [rGLUE, #offGlue_self]      @ r3<- glue->self
+    ldr     r9, [r3, #offThread_jniLocal_nextEntry] @ r9<- thread->refNext
+    str     r1, [r3, #offThread_curFrame]   @ self->curFrame = newFp
+    str     r9, [r1, #(offStackSaveArea_localRefTop - sizeofStackSaveArea)]
+                                        @ newFp->localRefTop=refNext
+    mov     r9, r3                      @ r9<- glue->self (preserve)
+    SAVEAREA_FROM_FP(r10, r1)           @ r10<- new stack save area
+
+    mov     r2, r0                      @ r2<- methodToCall
+    mov     r0, r1                      @ r0<- newFP
+    add     r1, rGLUE, #offGlue_retval  @ r1<- &retval
+
+    LDR_PC_LR "[r2, #offMethod_nativeFunc]"
+
+    @ native return; r9=self, r10=newSaveArea
+    @ equivalent to dvmPopJniLocals
+    ldr     r2, [r10, #offStackSaveArea_returnAddr] @ r2 = chaining cell ret
+    ldr     r0, [r10, #offStackSaveArea_localRefTop] @ r0<- newSave->localRefTop
+    ldr     r1, [r9, #offThread_exception] @ check for exception
+    str     rFP, [r9, #offThread_curFrame]  @ self->curFrame = fp
+    cmp     r1, #0                      @ null?
+    str     r0, [r9, #offThread_jniLocal_nextEntry] @ self->refNext<- r0
+    bne     .LhandleException             @ no, handle exception
+    bx      r2
+
+/* NOTE - this path can be exercised if the JIT threshold is set to 5 */
+.LhandleException:
+    ldr     r0, .LdvmMterpCommonExceptionThrown @ PIC way of getting &func
+    ldr     rIBASE, .LdvmAsmInstructionStart    @ same as above
+    ldr     rPC, [r10, #offStackSaveArea_savedPc] @ reload rPC
+    mov     pc, r0                  @ branch to dvmMterpCommonExceptionThrown
+
+    .align  2
+.LdvmAsmInstructionStart:
+    .word   dvmAsmInstructionStart
+.LdvmJitToInterpNoChain:
+    .word   dvmJitToInterpNoChain
+.LdvmMterpStdBail:
+    .word   dvmMterpStdBail
+.LdvmMterpCommonExceptionThrown:
+    .word   dvmMterpCommonExceptionThrown
+.L__aeabi_cdcmple:
+    .word   __aeabi_cdcmple
+.L__aeabi_cfcmple:
+    .word   __aeabi_cfcmple
+
+    .global dmvCompilerTemplateEnd
+dmvCompilerTemplateEnd:
+
+#endif /* WITH_JIT */
+
diff --git a/vm/compiler/template/out/CompilerTemplateAsm-armv5te.S b/vm/compiler/template/out/CompilerTemplateAsm-armv5te.S
index ff0a953e7..a9cf2d3eb 100644
--- a/vm/compiler/template/out/CompilerTemplateAsm-armv5te.S
+++ b/vm/compiler/template/out/CompilerTemplateAsm-armv5te.S
@@ -732,458 +732,6 @@ dvmCompiler_TEMPLATE_USHR_LONG:
     bx      lr
 
 
-/* ------------------------------ */
-    .balign 4
-    .global dvmCompiler_TEMPLATE_ADD_FLOAT_VFP
-dvmCompiler_TEMPLATE_ADD_FLOAT_VFP:
-/* File: armv5te/TEMPLATE_ADD_FLOAT_VFP.S */
-/* File: armv5te/fbinop.S */
-    /*
-     * Generic 32-bit floating point operation.  Provide an "instr" line that
-     * specifies an instruction that performs s2 = s0 op s1.
-     *
-     * On entry:
-     *     r0 = target dalvik register address
-     *     r1 = op1 address
-     *     r2 = op2 address
-     */
-     flds    s0,[r1]
-     flds    s1,[r2]
-     fadds   s2, s0, s1
-     fsts    s2,[r0]
-     bx      lr
-
-
-/* ------------------------------ */
-    .balign 4
-    .global dvmCompiler_TEMPLATE_SUB_FLOAT_VFP
-dvmCompiler_TEMPLATE_SUB_FLOAT_VFP:
-/* File: armv5te/TEMPLATE_SUB_FLOAT_VFP.S */
-/* File: armv5te/fbinop.S */
-    /*
-     * Generic 32-bit floating point operation.  Provide an "instr" line that
-     * specifies an instruction that performs s2 = s0 op s1.
-     *
-     * On entry:
-     *     r0 = target dalvik register address
-     *     r1 = op1 address
-     *     r2 = op2 address
-     */
-     flds    s0,[r1]
-     flds    s1,[r2]
-     fsubs   s2, s0, s1
-     fsts    s2,[r0]
-     bx      lr
-
-
-/* ------------------------------ */
-    .balign 4
-    .global dvmCompiler_TEMPLATE_MUL_FLOAT_VFP
-dvmCompiler_TEMPLATE_MUL_FLOAT_VFP:
-/* File: armv5te/TEMPLATE_MUL_FLOAT_VFP.S */
-/* File: armv5te/fbinop.S */
-    /*
-     * Generic 32-bit floating point operation.  Provide an "instr" line that
-     * specifies an instruction that performs s2 = s0 op s1.
-     *
-     * On entry:
-     *     r0 = target dalvik register address
-     *     r1 = op1 address
-     *     r2 = op2 address
-     */
-     flds    s0,[r1]
-     flds    s1,[r2]
-     fmuls   s2, s0, s1
-     fsts    s2,[r0]
-     bx      lr
-
-
-/* ------------------------------ */
-    .balign 4
-    .global dvmCompiler_TEMPLATE_DIV_FLOAT_VFP
-dvmCompiler_TEMPLATE_DIV_FLOAT_VFP:
-/* File: armv5te/TEMPLATE_DIV_FLOAT_VFP.S */
-/* File: armv5te/fbinop.S */
-    /*
-     * Generic 32-bit floating point operation.  Provide an "instr" line that
-     * specifies an instruction that performs s2 = s0 op s1.
-     *
-     * On entry:
-     *     r0 = target dalvik register address
-     *     r1 = op1 address
-     *     r2 = op2 address
-     */
-     flds    s0,[r1]
-     flds    s1,[r2]
-     fdivs   s2, s0, s1
-     fsts    s2,[r0]
-     bx      lr
-
-
-/* ------------------------------ */
-    .balign 4
-    .global dvmCompiler_TEMPLATE_ADD_DOUBLE_VFP
-dvmCompiler_TEMPLATE_ADD_DOUBLE_VFP:
-/* File: armv5te/TEMPLATE_ADD_DOUBLE_VFP.S */
-/* File: armv5te/fbinopWide.S */
-    /*
-     * Generic 64-bit floating point operation.  Provide an "instr" line that
-     * specifies an instruction that performs s2 = s0 op s1.
-     *
-     * On entry:
-     *     r0 = target dalvik register address
-     *     r1 = op1 address
-     *     r2 = op2 address
-     */
-     fldd    d0,[r1]
-     fldd    d1,[r2]
-     faddd   d2, d0, d1
-     fstd    d2,[r0]
-     bx      lr
-
-
-/* ------------------------------ */
-    .balign 4
-    .global dvmCompiler_TEMPLATE_SUB_DOUBLE_VFP
-dvmCompiler_TEMPLATE_SUB_DOUBLE_VFP:
-/* File: armv5te/TEMPLATE_SUB_DOUBLE_VFP.S */
-/* File: armv5te/fbinopWide.S */
-    /*
-     * Generic 64-bit floating point operation.  Provide an "instr" line that
-     * specifies an instruction that performs s2 = s0 op s1.
-     *
-     * On entry:
-     *     r0 = target dalvik register address
-     *     r1 = op1 address
-     *     r2 = op2 address
-     */
-     fldd    d0,[r1]
-     fldd    d1,[r2]
-     fsubd   d2, d0, d1
-     fstd    d2,[r0]
-     bx      lr
-
-
-/* ------------------------------ */
-    .balign 4
-    .global dvmCompiler_TEMPLATE_MUL_DOUBLE_VFP
-dvmCompiler_TEMPLATE_MUL_DOUBLE_VFP:
-/* File: armv5te/TEMPLATE_MUL_DOUBLE_VFP.S */
-/* File: armv5te/fbinopWide.S */
-    /*
-     * Generic 64-bit floating point operation.  Provide an "instr" line that
-     * specifies an instruction that performs s2 = s0 op s1.
-     *
-     * On entry:
-     *     r0 = target dalvik register address
-     *     r1 = op1 address
-     *     r2 = op2 address
-     */
-     fldd    d0,[r1]
-     fldd    d1,[r2]
-     fmuld   d2, d0, d1
-     fstd    d2,[r0]
-     bx      lr
-
-
-/* ------------------------------ */
-    .balign 4
-    .global dvmCompiler_TEMPLATE_DIV_DOUBLE_VFP
-dvmCompiler_TEMPLATE_DIV_DOUBLE_VFP:
-/* File: armv5te/TEMPLATE_DIV_DOUBLE_VFP.S */
-/* File: armv5te/fbinopWide.S */
-    /*
-     * Generic 64-bit floating point operation.  Provide an "instr" line that
-     * specifies an instruction that performs s2 = s0 op s1.
-     *
-     * On entry:
-     *     r0 = target dalvik register address
-     *     r1 = op1 address
-     *     r2 = op2 address
-     */
-     fldd    d0,[r1]
-     fldd    d1,[r2]
-     fdivd   d2, d0, d1
-     fstd    d2,[r0]
-     bx      lr
-
-
-/* ------------------------------ */
-    .balign 4
-    .global dvmCompiler_TEMPLATE_DOUBLE_TO_FLOAT_VFP
-dvmCompiler_TEMPLATE_DOUBLE_TO_FLOAT_VFP:
-/* File: armv5te/TEMPLATE_DOUBLE_TO_FLOAT_VFP.S */
-/* File: armv5te/funopNarrower.S */
-    /*
-     * Generic 64bit-to-32bit floating point unary operation.  Provide an
-     * "instr" line that specifies an instruction that performs "s0 = op d0".
-     *
-     * For: double-to-int, double-to-float
-     *
-     * On entry:
-     *     r0 = target dalvik register address
-     *     r1 = src dalvik register address
-     */
-    /* unop vA, vB */
-    fldd    d0, [r1]                    @ d0<- vB
-    fcvtsd  s0, d0                              @ s0<- op d0
-    fsts    s0, [r0]                    @ vA<- s0
-    bx      lr
-
-
-/* ------------------------------ */
-    .balign 4
-    .global dvmCompiler_TEMPLATE_DOUBLE_TO_INT_VFP
-dvmCompiler_TEMPLATE_DOUBLE_TO_INT_VFP:
-/* File: armv5te/TEMPLATE_DOUBLE_TO_INT_VFP.S */
-/* File: armv5te/funopNarrower.S */
-    /*
-     * Generic 64bit-to-32bit floating point unary operation.  Provide an
-     * "instr" line that specifies an instruction that performs "s0 = op d0".
-     *
-     * For: double-to-int, double-to-float
-     *
-     * On entry:
-     *     r0 = target dalvik register address
-     *     r1 = src dalvik register address
-     */
-    /* unop vA, vB */
-    fldd    d0, [r1]                    @ d0<- vB
-    ftosizd  s0, d0                              @ s0<- op d0
-    fsts    s0, [r0]                    @ vA<- s0
-    bx      lr
-
-
-/* ------------------------------ */
-    .balign 4
-    .global dvmCompiler_TEMPLATE_FLOAT_TO_DOUBLE_VFP
-dvmCompiler_TEMPLATE_FLOAT_TO_DOUBLE_VFP:
-/* File: armv5te/TEMPLATE_FLOAT_TO_DOUBLE_VFP.S */
-/* File: armv5te/funopWider.S */
-    /*
-     * Generic 32bit-to-64bit floating point unary operation.  Provide an
-     * "instr" line that specifies an instruction that performs "d0 = op s0".
-     *
-     * For: int-to-double, float-to-double
-     *
-     * On entry:
-     *     r0 = target dalvik register address
-     *     r1 = src dalvik register address
-     */
-    /* unop vA, vB */
-    flds    s0, [r1]                    @ s0<- vB
-    fcvtds  d0, s0                              @ d0<- op s0
-    fstd    d0, [r0]                    @ vA<- d0
-    bx      lr
-
-
-/* ------------------------------ */
-    .balign 4
-    .global dvmCompiler_TEMPLATE_FLOAT_TO_INT_VFP
-dvmCompiler_TEMPLATE_FLOAT_TO_INT_VFP:
-/* File: armv5te/TEMPLATE_FLOAT_TO_INT_VFP.S */
-/* File: armv5te/funop.S */
-    /*
-     * Generic 32bit-to-32bit floating point unary operation.  Provide an
-     * "instr" line that specifies an instruction that performs "s1 = op s0".
-     *
-     * For: float-to-int, int-to-float
-     *
-     * On entry:
-     *     r0 = target dalvik register address
-     *     r1 = src dalvik register address
-     */
-    /* unop vA, vB */
-    flds    s0, [r1]                    @ s0<- vB
-    ftosizs s1, s0                              @ s1<- op s0
-    fsts    s1, [r0]                    @ vA<- s1
-    bx      lr
-
-
-/* ------------------------------ */
-    .balign 4
-    .global dvmCompiler_TEMPLATE_INT_TO_DOUBLE_VFP
-dvmCompiler_TEMPLATE_INT_TO_DOUBLE_VFP:
-/* File: armv5te/TEMPLATE_INT_TO_DOUBLE_VFP.S */
-/* File: armv5te/funopWider.S */
-    /*
-     * Generic 32bit-to-64bit floating point unary operation.  Provide an
-     * "instr" line that specifies an instruction that performs "d0 = op s0".
-     *
-     * For: int-to-double, float-to-double
-     *
-     * On entry:
-     *     r0 = target dalvik register address
-     *     r1 = src dalvik register address
-     */
-    /* unop vA, vB */
-    flds    s0, [r1]                    @ s0<- vB
-    fsitod  d0, s0                              @ d0<- op s0
-    fstd    d0, [r0]                    @ vA<- d0
-    bx      lr
-
-
-/* ------------------------------ */
-    .balign 4
-    .global dvmCompiler_TEMPLATE_INT_TO_FLOAT_VFP
-dvmCompiler_TEMPLATE_INT_TO_FLOAT_VFP:
-/* File: armv5te/TEMPLATE_INT_TO_FLOAT_VFP.S */
-/* File: armv5te/funop.S */
-    /*
-     * Generic 32bit-to-32bit floating point unary operation.  Provide an
-     * "instr" line that specifies an instruction that performs "s1 = op s0".
-     *
-     * For: float-to-int, int-to-float
-     *
-     * On entry:
-     *     r0 = target dalvik register address
-     *     r1 = src dalvik register address
-     */
-    /* unop vA, vB */
-    flds    s0, [r1]                    @ s0<- vB
-    fsitos  s1, s0                              @ s1<- op s0
-    fsts    s1, [r0]                    @ vA<- s1
-    bx      lr
-
-
-/* ------------------------------ */
-    .balign 4
-    .global dvmCompiler_TEMPLATE_CMPG_DOUBLE_VFP
-dvmCompiler_TEMPLATE_CMPG_DOUBLE_VFP:
-/* File: armv5te/TEMPLATE_CMPG_DOUBLE_VFP.S */
-    /*
-     * Compare two floating-point values.  Puts 0, 1, or -1 into the
-     * destination register based on the results of the comparison.
-     *
-     * int compare(x, y) {
-     *     if (x == y) {
-     *         return 0;
-     *     } else if (x < y) {
-     *         return -1;
-     *     } else if (x > y) {
-     *         return 1;
-     *     } else {
-     *         return 1;
-     *     }
-     * }
-     *
-     * On entry:
-     *    r0 = &op1 [vBB]
-     *    r1 = &op2 [vCC]
-     */
-    /* op vAA, vBB, vCC */
-    fldd    d0, [r0]                    @ d0<- vBB
-    fldd    d1, [r1]                    @ d1<- vCC
-    fcmped  d0, d1                      @ compare (vBB, vCC)
-    mov     r0, #1                      @ r0<- 1 (default)
-    fmstat                              @ export status flags
-    mvnmi   r0, #0                      @ (less than) r0<- -1
-    moveq   r0, #0                      @ (equal) r0<- 0
-    bx      lr
-
-
-/* ------------------------------ */
-    .balign 4
-    .global dvmCompiler_TEMPLATE_CMPL_DOUBLE_VFP
-dvmCompiler_TEMPLATE_CMPL_DOUBLE_VFP:
-/* File: armv5te/TEMPLATE_CMPL_DOUBLE_VFP.S */
-    /*
-     * Compare two floating-point values.  Puts 0, 1, or -1 into the
-     * destination register based on the results of the comparison.
-     *
-     * int compare(x, y) {
-     *     if (x == y) {
-     *         return 0;
-     *     } else if (x > y) {
-     *         return 1;
-     *     } else if (x < y) {
-     *         return -1;
-     *     } else {
-     *         return -1;
-     *     }
-     * }
-     * On entry:
-     *    r0 = &op1 [vBB]
-     *    r1 = &op2 [vCC]
-     */
-    /* op vAA, vBB, vCC */
-    fldd    d0, [r0]                    @ d0<- vBB
-    fldd    d1, [r1]                    @ d1<- vCC
-    fcmped  d0, d1                      @ compare (vBB, vCC)
-    mvn     r0, #0                      @ r0<- -1 (default)
-    fmstat                              @ export status flags
-    movgt   r0, #1                      @ (greater than) r0<- 1
-    moveq   r0, #0                      @ (equal) r0<- 0
-    bx      lr
-
-/* ------------------------------ */
-    .balign 4
-    .global dvmCompiler_TEMPLATE_CMPG_FLOAT_VFP
-dvmCompiler_TEMPLATE_CMPG_FLOAT_VFP:
-/* File: armv5te/TEMPLATE_CMPG_FLOAT_VFP.S */
-    /*
-     * Compare two floating-point values.  Puts 0, 1, or -1 into the
-     * destination register based on the results of the comparison.
-     *
-     * int compare(x, y) {
-     *     if (x == y) {
-     *         return 0;
-     *     } else if (x < y) {
-     *         return -1;
-     *     } else if (x > y) {
-     *         return 1;
-     *     } else {
-     *         return 1;
-     *     }
-     * }
-     * On entry:
-     *    r0 = &op1 [vBB]
-     *    r1 = &op2 [vCC]
-     */
-    /* op vAA, vBB, vCC */
-    flds    s0, [r0]                    @ d0<- vBB
-    flds    s1, [r1]                    @ d1<- vCC
-    fcmpes  s0, s1                      @ compare (vBB, vCC)
-    mov     r0, #1                      @ r0<- 1 (default)
-    fmstat                              @ export status flags
-    mvnmi   r0, #0                      @ (less than) r0<- -1
-    moveq   r0, #0                      @ (equal) r0<- 0
-    bx      lr
-
-/* ------------------------------ */
-    .balign 4
-    .global dvmCompiler_TEMPLATE_CMPL_FLOAT_VFP
-dvmCompiler_TEMPLATE_CMPL_FLOAT_VFP:
-/* File: armv5te/TEMPLATE_CMPL_FLOAT_VFP.S */
-    /*
-     * Compare two floating-point values.  Puts 0, 1, or -1 into the
-     * destination register based on the results of the comparison.
-     *
-     * int compare(x, y) {
-     *     if (x == y) {
-     *         return 0;
-     *     } else if (x > y) {
-     *         return 1;
-     *     } else if (x < y) {
-     *         return -1;
-     *     } else {
-     *         return -1;
-     *     }
-     * }
-     * On entry:
-     *    r0 = &op1 [vBB]
-     *    r1 = &op2 [vCC]
-     */
-    /* op vAA, vBB, vCC */
-    flds    s0, [r0]                    @ d0<- vBB
-    flds    s1, [r1]                    @ d1<- vCC
-    fcmpes  s0, s1                      @ compare (vBB, vCC)
-    mvn     r0, #0                      @ r0<- -1 (default)
-    fmstat                              @ export status flags
-    movgt   r0, #1                      @ (greater than) r0<- 1
-    moveq   r0, #0                      @ (equal) r0<- 0
-    bx      lr
-
     .size   dvmCompilerTemplateStart, .-dvmCompilerTemplateStart
 /* File: armv5te/footer.S */
 /*
diff --git a/vm/compiler/template/rebuild.sh b/vm/compiler/template/rebuild.sh
index 07d951650..2717d232e 100755
--- a/vm/compiler/template/rebuild.sh
+++ b/vm/compiler/template/rebuild.sh
@@ -19,5 +19,5 @@
 # generated as part of the build.
 #
 set -e
-for arch in armv5te; do TARGET_ARCH_EXT=$arch make -f Makefile-template; done
+for arch in armv5te armv5te-vfp; do TARGET_ARCH_EXT=$arch make -f Makefile-template; done
 
diff --git a/vm/interp/Jit.c b/vm/interp/Jit.c
index acd470469..ffdb283e7 100644
--- a/vm/interp/Jit.c
+++ b/vm/interp/Jit.c
@@ -275,49 +275,44 @@ int dvmCheckJit(const u2* pc, Thread* self, InterpState* interpState)
             flags = dexGetInstrFlags(gDvm.instrFlags, decInsn.opCode);
             len = dexGetInstrOrTableWidthAbs(gDvm.instrWidth, pc);
             offset = pc - interpState->method->insns;
-            if ((flags & kInstrNoJit) == kInstrNoJit) {
-                interpState->jitState = kJitTSelectEnd;
-                break;
-            } else {
-                if (pc != interpState->currRunHead + interpState->currRunLen) {
-                    int currTraceRun;
-                    /* We need to start a new trace run */
-                    currTraceRun = ++interpState->currTraceRun;
-                    interpState->currRunLen = 0;
-                    interpState->currRunHead = (u2*)pc;
-                    interpState->trace[currTraceRun].frag.startOffset = offset;
-                    interpState->trace[currTraceRun].frag.numInsts = 0;
-                    interpState->trace[currTraceRun].frag.runEnd = false;
-                    interpState->trace[currTraceRun].frag.hint = kJitHintNone;
-                }
-                interpState->trace[interpState->currTraceRun].frag.numInsts++;
-                interpState->totalTraceLen++;
-                interpState->currRunLen += len;
-                if (  ((flags & kInstrUnconditional) == 0) &&
-                      ((flags & (kInstrCanBranch |
-                                 kInstrCanSwitch |
-                                 kInstrCanReturn |
-                                 kInstrInvoke)) != 0)) {
-                        interpState->jitState = kJitTSelectEnd;
+            if (pc != interpState->currRunHead + interpState->currRunLen) {
+                int currTraceRun;
+                /* We need to start a new trace run */
+                currTraceRun = ++interpState->currTraceRun;
+                interpState->currRunLen = 0;
+                interpState->currRunHead = (u2*)pc;
+                interpState->trace[currTraceRun].frag.startOffset = offset;
+                interpState->trace[currTraceRun].frag.numInsts = 0;
+                interpState->trace[currTraceRun].frag.runEnd = false;
+                interpState->trace[currTraceRun].frag.hint = kJitHintNone;
+            }
+            interpState->trace[interpState->currTraceRun].frag.numInsts++;
+            interpState->totalTraceLen++;
+            interpState->currRunLen += len;
+            if (  ((flags & kInstrUnconditional) == 0) &&
+                  ((flags & (kInstrCanBranch |
+                             kInstrCanSwitch |
+                             kInstrCanReturn |
+                             kInstrInvoke)) != 0)) {
+                    interpState->jitState = kJitTSelectEnd;
 #if defined(SHOW_TRACE)
-                LOGD("TraceGen: ending on %s, basic block end",
-                     getOpcodeName(decInsn.opCode));
+            LOGD("TraceGen: ending on %s, basic block end",
+                 getOpcodeName(decInsn.opCode));
 #endif
-                }
-                if (decInsn.opCode == OP_THROW) {
-                    interpState->jitState = kJitTSelectEnd;
-                }
-                if (interpState->totalTraceLen >= JIT_MAX_TRACE_LEN) {
-                    interpState->jitState = kJitTSelectEnd;
-                }
-                if (debugOrProfile) {
-                    interpState->jitState = kJitTSelectAbort;
-                    switchInterp = !debugOrProfile;
-                    break;
-                }
-                if ((flags & kInstrCanReturn) != kInstrCanReturn) {
-                    break;
-                }
+            }
+            if (decInsn.opCode == OP_THROW) {
+                interpState->jitState = kJitTSelectEnd;
+            }
+            if (interpState->totalTraceLen >= JIT_MAX_TRACE_LEN) {
+                interpState->jitState = kJitTSelectEnd;
+            }
+            if (debugOrProfile) {
+                interpState->jitState = kJitTSelectAbort;
+                switchInterp = !debugOrProfile;
+                break;
+            }
+            if ((flags & kInstrCanReturn) != kInstrCanReturn) {
+                break;
             }
             /* NOTE: intentional fallthrough for returns */
         case kJitTSelectEnd:
@@ -693,5 +688,37 @@ bool dvmJitResizeJitTable( unsigned int size )
     return false;
 }
 
+/*
+ * Float/double conversion requires clamping to min and max of integer form.  If
+ * target doesn't support this normally, use these.
+ */
+s8 dvmJitd2l(double d)
+{
+    static const double kMaxLong = (double)0x7fffffffffffffffULL;
+    static const double kMinLong = (double)0x8000000000000000ULL;
+    if (d >= kMaxLong)
+        return 0x7fffffffffffffffULL;
+    else if (d <= kMinLong)
+        return 0x8000000000000000ULL;
+    else if (d != d) // NaN case
+        return 0;
+    else
+        return (s8)d;
+}
+
+s8 dvmJitf2l(float f)
+{
+    static const float kMaxLong = (float)0x7fffffffffffffffULL;
+    static const float kMinLong = (float)0x8000000000000000ULL;
+    if (f >= kMaxLong)
+        return 0x7fffffffffffffffULL;
+    else if (f <= kMinLong)
+        return 0x8000000000000000ULL;
+    else if (f != f) // NaN case
+        return 0;
+    else
+        return (s8)f;
+}
+
 
 #endif /* WITH_JIT */
diff --git a/vm/interp/Jit.h b/vm/interp/Jit.h
index 5d748d503..31454e449 100644
--- a/vm/interp/Jit.h
+++ b/vm/interp/Jit.h
@@ -63,6 +63,8 @@ void dvmJitStopTranslationRequests(void);
 void dvmJitStats(void);
 bool dvmJitResizeJitTable(unsigned int size);
 struct JitEntry *dvmFindJitEntry(const u2* pc);
+s8 dvmJitd2l(double d);
+s8 dvmJitf2l(float f);
 
 
 #endif /*_DALVIK_INTERP_JIT*/