From ba4fc8bfc1bccae048403bd1cea3b869dca61dd7 Mon Sep 17 00:00:00 2001
From: Ben Cheng <bccheng@android.com>
Date: Mon, 1 Jun 2009 13:00:29 -0700
Subject: [PATCH] Initial port of the Dalvik JIT enging to the internal
 repository. Fixed files with trailing spaces. Addressed review comments from
 Dan. Addressed review comments from fadden. Addressed review comments from
 Dan x 2. Addressed review comments from Dan x 3.

---
 libdex/InstrUtils.c                                |   32 +-
 libdex/InstrUtils.h                                |    3 +
 vm/Android.mk                                      |   23 +
 vm/Dalvik.h                                        |    3 +
 vm/Globals.h                                       |  101 +
 vm/Init.c                                          |  159 ++
 vm/SignalCatcher.c                                 |    9 +-
 vm/compiler/Compiler.c                             |  242 ++
 vm/compiler/Compiler.h                             |   99 +
 vm/compiler/CompilerIR.h                           |   87 +
 vm/compiler/CompilerInternals.h                    |   26 +
 vm/compiler/CompilerUtility.h                      |   44 +
 vm/compiler/Frontend.c                             |  603 ++++
 vm/compiler/IntermediateRep.c                      |   59 +
 vm/compiler/Utility.c                              |  169 ++
 vm/compiler/codegen/CompilerCodegen.h              |   37 +
 vm/compiler/codegen/armv5te/ArchUtility.c          |  223 ++
 vm/compiler/codegen/armv5te/Armv5teLIR.h           |  181 ++
 vm/compiler/codegen/armv5te/Assemble.c             |  499 ++++
 vm/compiler/codegen/armv5te/Codegen.c              | 2892 ++++++++++++++++++++
 vm/compiler/template/Makefile-template             |   49 +
 vm/compiler/template/README.txt                    |    1 +
 .../template/armv5te/TEMPLATE_CMPG_DOUBLE.S        |    1 +
 vm/compiler/template/armv5te/TEMPLATE_CMPG_FLOAT.S |    1 +
 .../template/armv5te/TEMPLATE_CMPL_DOUBLE.S        |   39 +
 vm/compiler/template/armv5te/TEMPLATE_CMPL_FLOAT.S |   56 +
 vm/compiler/template/armv5te/TEMPLATE_CMP_LONG.S   |   34 +
 .../armv5te/TEMPLATE_INVOKE_METHOD_CHAIN.S         |   54 +
 .../armv5te/TEMPLATE_INVOKE_METHOD_NO_OPT.S        |   53 +
 vm/compiler/template/armv5te/TEMPLATE_MUL_LONG.S   |   28 +
 vm/compiler/template/armv5te/TEMPLATE_RETURN.S     |   38 +
 vm/compiler/template/armv5te/TEMPLATE_SHL_LONG.S   |   15 +
 vm/compiler/template/armv5te/TEMPLATE_SHR_LONG.S   |   16 +
 vm/compiler/template/armv5te/TEMPLATE_USHR_LONG.S  |   16 +
 vm/compiler/template/armv5te/TemplateOpList.h      |   35 +
 vm/compiler/template/armv5te/footer.S              |   58 +
 vm/compiler/template/armv5te/header.S              |   93 +
 vm/compiler/template/armv5te/platform.S            |   16 +
 vm/compiler/template/config-armv5te                |   45 +
 vm/compiler/template/gen-template.py               |  422 +++
 .../template/out/CompilerTemplateAsm-armv5te.S     |  703 +++++
 vm/compiler/template/rebuild.sh                    |   23 +
 vm/interp/Interp.c                                 |   39 +-
 vm/interp/InterpDefs.h                             |   78 +-
 vm/interp/Jit.c                                    |  567 ++++
 vm/interp/Jit.h                                    |   59 +
 vm/interp/Stack.h                                  |    2 +
 vm/mterp/Mterp.c                                   |    5 +-
 vm/mterp/Mterp.h                                   |    3 +
 vm/mterp/armv5te/OP_GOTO.S                         |   10 +-
 vm/mterp/armv5te/OP_GOTO_16.S                      |    9 +
 vm/mterp/armv5te/OP_GOTO_32.S                      |   10 +-
 vm/mterp/armv5te/OP_PACKED_SWITCH.S                |    9 +
 vm/mterp/armv5te/bincmp.S                          |    9 +-
 vm/mterp/armv5te/entry.S                           |   27 +
 vm/mterp/armv5te/footer.S                          |  224 ++
 vm/mterp/armv5te/header.S                          |   10 +
 vm/mterp/armv5te/zcmp.S                            |   12 +-
 vm/mterp/c/gotoTargets.c                           |    4 +-
 vm/mterp/c/header.c                                |   24 +-
 vm/mterp/common/FindInterface.h                    |   37 +
 vm/mterp/common/asm-constants.h                    |   48 +-
 vm/mterp/out/InterpAsm-armv4t.S                    |  434 ++-
 vm/mterp/out/InterpAsm-armv5te-vfp.S               |  434 ++-
 vm/mterp/out/InterpAsm-armv5te.S                   |  434 ++-
 vm/mterp/out/InterpC-allstubs.c                    |   28 +-
 vm/mterp/out/InterpC-armv4t.c                      |   24 +-
 vm/mterp/out/InterpC-armv5te-vfp.c                 |   24 +-
 vm/mterp/out/InterpC-armv5te.c                     |   24 +-
 vm/mterp/out/InterpC-portdbg.c                     |   58 +-
 vm/mterp/out/InterpC-portstd.c                     |   52 +-
 vm/mterp/out/InterpC-x86.c                         |   28 +-
 vm/mterp/portable/entry.c                          |   21 +
 vm/mterp/portable/portdbg.c                        |    8 +
 vm/mterp/portable/portstd.c                        |    2 +
 vm/mterp/portable/stubdefs.c                       |    1 +
 76 files changed, 9844 insertions(+), 201 deletions(-)
 create mode 100644 vm/compiler/Compiler.c
 create mode 100644 vm/compiler/Compiler.h
 create mode 100644 vm/compiler/CompilerIR.h
 create mode 100644 vm/compiler/CompilerInternals.h
 create mode 100644 vm/compiler/CompilerUtility.h
 create mode 100644 vm/compiler/Frontend.c
 create mode 100644 vm/compiler/IntermediateRep.c
 create mode 100644 vm/compiler/Utility.c
 create mode 100644 vm/compiler/codegen/CompilerCodegen.h
 create mode 100644 vm/compiler/codegen/armv5te/ArchUtility.c
 create mode 100644 vm/compiler/codegen/armv5te/Armv5teLIR.h
 create mode 100644 vm/compiler/codegen/armv5te/Assemble.c
 create mode 100644 vm/compiler/codegen/armv5te/Codegen.c
 create mode 100644 vm/compiler/template/Makefile-template
 create mode 100644 vm/compiler/template/README.txt
 create mode 100644 vm/compiler/template/armv5te/TEMPLATE_CMPG_DOUBLE.S
 create mode 100644 vm/compiler/template/armv5te/TEMPLATE_CMPG_FLOAT.S
 create mode 100644 vm/compiler/template/armv5te/TEMPLATE_CMPL_DOUBLE.S
 create mode 100644 vm/compiler/template/armv5te/TEMPLATE_CMPL_FLOAT.S
 create mode 100644 vm/compiler/template/armv5te/TEMPLATE_CMP_LONG.S
 create mode 100644 vm/compiler/template/armv5te/TEMPLATE_INVOKE_METHOD_CHAIN.S
 create mode 100644 vm/compiler/template/armv5te/TEMPLATE_INVOKE_METHOD_NO_OPT.S
 create mode 100644 vm/compiler/template/armv5te/TEMPLATE_MUL_LONG.S
 create mode 100644 vm/compiler/template/armv5te/TEMPLATE_RETURN.S
 create mode 100644 vm/compiler/template/armv5te/TEMPLATE_SHL_LONG.S
 create mode 100644 vm/compiler/template/armv5te/TEMPLATE_SHR_LONG.S
 create mode 100644 vm/compiler/template/armv5te/TEMPLATE_USHR_LONG.S
 create mode 100644 vm/compiler/template/armv5te/TemplateOpList.h
 create mode 100644 vm/compiler/template/armv5te/footer.S
 create mode 100644 vm/compiler/template/armv5te/header.S
 create mode 100644 vm/compiler/template/armv5te/platform.S
 create mode 100644 vm/compiler/template/config-armv5te
 create mode 100755 vm/compiler/template/gen-template.py
 create mode 100644 vm/compiler/template/out/CompilerTemplateAsm-armv5te.S
 create mode 100755 vm/compiler/template/rebuild.sh
 create mode 100644 vm/interp/Jit.c
 create mode 100644 vm/interp/Jit.h
 create mode 100644 vm/mterp/common/FindInterface.h

diff --git a/libdex/InstrUtils.c b/libdex/InstrUtils.c
index 33c7e7ddc..b58e647e3 100644
--- a/libdex/InstrUtils.c
+++ b/libdex/InstrUtils.c
@@ -539,16 +539,6 @@ InstructionFlags* dexCreateInstrFlagsTable(void)
         case OP_SPUT_SHORT:
         case OP_SPUT_WIDE:
         case OP_SPUT_OBJECT:
-        case OP_INVOKE_VIRTUAL:
-        case OP_INVOKE_VIRTUAL_RANGE:
-        case OP_INVOKE_SUPER:
-        case OP_INVOKE_SUPER_RANGE:
-        case OP_INVOKE_DIRECT:
-        case OP_INVOKE_DIRECT_RANGE:
-        case OP_INVOKE_STATIC:
-        case OP_INVOKE_STATIC_RANGE:
-        case OP_INVOKE_INTERFACE:
-        case OP_INVOKE_INTERFACE_RANGE:
         case OP_DIV_INT:
         case OP_REM_INT:
         case OP_DIV_LONG:
@@ -564,6 +554,19 @@ InstructionFlags* dexCreateInstrFlagsTable(void)
             flags = kInstrCanContinue | kInstrCanThrow;
             break;
 
+        case OP_INVOKE_VIRTUAL:
+        case OP_INVOKE_VIRTUAL_RANGE:
+        case OP_INVOKE_SUPER:
+        case OP_INVOKE_SUPER_RANGE:
+        case OP_INVOKE_DIRECT:
+        case OP_INVOKE_DIRECT_RANGE:
+        case OP_INVOKE_STATIC:
+        case OP_INVOKE_STATIC_RANGE:
+        case OP_INVOKE_INTERFACE:
+        case OP_INVOKE_INTERFACE_RANGE:
+            flags = kInstrCanContinue | kInstrCanThrow | kInstrInvoke;
+            break;
+
         case OP_RETURN_VOID:
         case OP_RETURN:
         case OP_RETURN_WIDE:
@@ -579,7 +582,7 @@ InstructionFlags* dexCreateInstrFlagsTable(void)
         case OP_GOTO:
         case OP_GOTO_16:
         case OP_GOTO_32:
-            flags = kInstrCanBranch;
+            flags = kInstrCanBranch | kInstrUnconditional;
             break;
 
         /* conditional branches */
@@ -617,12 +620,15 @@ InstructionFlags* dexCreateInstrFlagsTable(void)
         case OP_IPUT_QUICK:
         case OP_IPUT_WIDE_QUICK:
         case OP_IPUT_OBJECT_QUICK:
+            flags = kInstrCanContinue | kInstrCanThrow;
+            break;
+
         case OP_INVOKE_VIRTUAL_QUICK:
         case OP_INVOKE_VIRTUAL_QUICK_RANGE:
         case OP_INVOKE_SUPER_QUICK:
         case OP_INVOKE_SUPER_QUICK_RANGE:
         case OP_INVOKE_DIRECT_EMPTY:
-            flags = kInstrCanContinue | kInstrCanThrow;
+            flags = kInstrCanContinue | kInstrCanThrow | kInstrInvoke;
             break;
 
         /* these should never appear */
@@ -651,6 +657,7 @@ InstructionFlags* dexCreateInstrFlagsTable(void)
         case OP_UNUSED_FD:
         case OP_UNUSED_FE:
         case OP_UNUSED_FF:
+            flags = kInstrNoJit;
             break;
 
         /*
@@ -1238,4 +1245,3 @@ int dexGetInstrOrTableWidthAbs(const InstructionWidth* widths, const u2* insns)
     }
     return width;
 }
-
diff --git a/libdex/InstrUtils.h b/libdex/InstrUtils.h
index 7621b8e38..5ca175e68 100644
--- a/libdex/InstrUtils.h
+++ b/libdex/InstrUtils.h
@@ -99,6 +99,9 @@ enum InstructionFlags {
     kInstrCanSwitch     = 1 << 2,   // switch statement
     kInstrCanThrow      = 1 << 3,   // could cause an exception to be thrown
     kInstrCanReturn     = 1 << 4,   // returns, no additional statements
+    kInstrInvoke        = 1 << 5,   // a flavor of invoke
+    kInstrUnconditional = 1 << 6,   // unconditional branch
+    kInstrNoJit         = 1 << 7,   // don't jit trace containing this
 };
 
 
diff --git a/vm/Android.mk b/vm/Android.mk
index 359291081..a2c2d574a 100644
--- a/vm/Android.mk
+++ b/vm/Android.mk
@@ -184,6 +184,21 @@ LOCAL_SRC_FILES := \
 	test/AtomicSpeed.c \
 	test/TestHash.c
 
+ifeq ($(WITH_JIT_TUNING),true)
+  LOCAL_CFLAGS += -DWITH_JIT_TUNING
+endif
+
+ifeq ($(WITH_JIT),true)
+  LOCAL_CFLAGS += -DWITH_JIT
+  LOCAL_SRC_FILES += \
+	../dexdump/OpCodeNames.c \
+	compiler/Compiler.c \
+	compiler/Frontend.c \
+	compiler/Utility.c \
+	compiler/IntermediateRep.c \
+	interp/Jit.c
+endif
+
 WITH_HPROF := $(strip $(WITH_HPROF))
 ifeq ($(WITH_HPROF),)
   WITH_HPROF := true
@@ -242,6 +257,14 @@ ifeq ($(TARGET_ARCH),arm)
 		mterp/out/InterpC-$(TARGET_ARCH_VARIANT).c.arm \
 		mterp/out/InterpAsm-$(TARGET_ARCH_VARIANT).S
   LOCAL_SHARED_LIBRARIES += libdl
+  # TODO - may become TARGET_ARCH_VARIANT specific
+  ifeq ($(WITH_JIT),true)
+    LOCAL_SRC_FILES += \
+		compiler/codegen/armv5te/Codegen.c \
+		compiler/codegen/armv5te/Assemble.c \
+		compiler/codegen/armv5te/ArchUtility.c \
+		compiler/template/out/CompilerTemplateAsm-armv5te.S
+  endif
 else
   ifeq ($(TARGET_ARCH),x86)
     LOCAL_SRC_FILES += \
diff --git a/vm/Dalvik.h b/vm/Dalvik.h
index 29abc2c31..618d51ae9 100644
--- a/vm/Dalvik.h
+++ b/vm/Dalvik.h
@@ -73,6 +73,9 @@
 #include "libdex/InstrUtils.h"
 #include "AllocTracker.h"
 #include "PointerSet.h"
+#if defined(WITH_JIT)
+#include "compiler/Compiler.h"
+#endif
 #include "Globals.h"
 #include "reflect/Reflect.h"
 #include "oo/TypeCheck.h"
diff --git a/vm/Globals.h b/vm/Globals.h
index 1a81b93e2..b9c73fe75 100644
--- a/vm/Globals.h
+++ b/vm/Globals.h
@@ -54,6 +54,9 @@ typedef enum ExecutionMode {
     kExecutionModeUnknown = 0,
     kExecutionModeInterpPortable,
     kExecutionModeInterpFast,
+#if defined(WITH_JIT)
+    kExecutionModeJit,
+#endif
 } ExecutionMode;
 
 /*
@@ -328,6 +331,8 @@ struct DvmGlobals {
      *  (3) a thread has hit a breakpoint or exception that the debugger
      *      has marked as a "suspend all" event;
      *  (4) the SignalCatcher caught a signal that requires suspension.
+     *  (5) (if implemented) the JIT needs to perform a heavyweight
+     *      rearrangement of the translation cache or JitTable.
      *
      * Because we use "safe point" self-suspension, it is never safe to
      * do a blocking "lock" call on this mutex -- if it has been acquired,
@@ -612,4 +617,100 @@ struct DvmGlobals {
 
 extern struct DvmGlobals gDvm;
 
+#if defined(WITH_JIT)
+/*
+ * JIT-specific global state
+ */
+struct DvmJitGlobals {
+    /*
+     * Guards writes to Dalvik PC (dPC), translated code address (codeAddr) and
+     * chain fields within the JIT hash table.  Note carefully the access
+     * mechanism.
+     * Only writes are guarded, and the guarded fields must be updated in a
+     * specific order using atomic operations.  Further, once a field is
+     * written it cannot be changed without halting all threads.
+     *
+     * The write order is:
+     *    1) codeAddr
+     *    2) dPC
+     *    3) chain [if necessary]
+     *
+     * This mutex also guards both read and write of curJitTableEntries.
+     */
+    pthread_mutex_t tableLock;
+
+    /* The JIT hash table.  Note that for access speed, copies of this pointer
+     * are stored in each thread. */
+    struct JitEntry *pJitEntryTable;
+
+    /* Array of profile threshold counters */
+    unsigned char *pProfTable;
+    unsigned char *pProfTableCopy;
+
+    /* Size of JIT hash table in entries.  Must be a power of 2 */
+    unsigned int maxTableEntries;
+
+    /* Trigger for trace selection */
+    unsigned short threshold;
+
+    /* JIT Compiler Control */
+    bool               haltCompilerThread;
+    bool               blockingMode;
+    pthread_t          compilerHandle;
+    pthread_mutex_t    compilerLock;
+    pthread_cond_t     compilerQueueActivity;
+    pthread_cond_t     compilerQueueEmpty;
+    int                compilerQueueLength;
+    int                compilerHighWater;
+    int                compilerWorkEnqueueIndex;
+    int                compilerWorkDequeueIndex;
+    CompilerWorkOrder  compilerWorkQueue[COMPILER_WORK_QUEUE_SIZE];
+
+    /* JIT internal stats */
+    int                compilerMaxQueued;
+    int                addrLookupsFound;
+    int                addrLookupsNotFound;
+    int                noChainExit;
+    int                normalExit;
+    int                puntExit;
+    int                translationChains;
+    int                invokeNoOpt;
+    int                InvokeChain;
+    int                returnOp;
+
+    /* Compiled code cache */
+    void* codeCache;
+
+    /* Bytes already used in the code cache */
+    unsigned int codeCacheByteUsed;
+
+    /* Number of installed compilations in the cache */
+    unsigned int numCompilations;
+
+    /* Flag to indicate that the code cache is full */
+    bool codeCacheFull;
+
+    /* true/false: compile/reject opcodes specified in the -Xjitop list */
+    bool includeSelectedOp;
+
+    /* true/false: compile/reject methods specified in the -Xjitmethod list */
+    bool includeSelectedMethod;
+
+    /* Disable JIT for selected opcodes - one bit for each opcode */
+    char opList[32];
+
+    /* Disable JIT for selected methods */
+    HashTable *methodTable;
+
+    /* Record how many times an opcode has been JIT'ed */
+    int opHistogram[256];
+
+    /* Flag to dump all compiled code */
+    bool printMe;
+};
+
+extern struct DvmJitGlobals gDvmJit;
+
+#endif
+
 #endif /*_DALVIK_GLOBALS*/
diff --git a/vm/Init.c b/vm/Init.c
index 4ba10b736..5295f4921 100644
--- a/vm/Init.c
+++ b/vm/Init.c
@@ -20,6 +20,7 @@
 #include "Dalvik.h"
 #include "test/Test.h"
 #include "mterp/Mterp.h"
+#include "Hash.h"
 
 #include <stdlib.h>
 #include <stdio.h>
@@ -49,6 +50,11 @@ static bool dvmInitZygote(void);
 /* global state */
 struct DvmGlobals gDvm;
 
+/* JIT-specific global state */
+#if defined(WITH_JIT)
+struct DvmJitGlobals gDvmJit;
+#endif
+
 /*
  * Show usage.
  *
@@ -83,8 +89,13 @@ static void dvmUsage(const char* progName)
         kMinStackSize / 1024, kMaxStackSize / 1024);
     dvmFprintf(stderr, "  -Xverify:{none,remote,all}\n");
     dvmFprintf(stderr, "  -Xrs\n");
+#if defined(WITH_JIT)
+    dvmFprintf(stderr,
+                "  -Xint  (extended to accept ':portable', ':fast' and ':jit')\n");
+#else
     dvmFprintf(stderr,
                 "  -Xint  (extended to accept ':portable' and ':fast')\n");
+#endif
     dvmFprintf(stderr, "\n");
     dvmFprintf(stderr, "These are unique to Dalvik:\n");
     dvmFprintf(stderr, "  -Xzygote\n");
@@ -98,6 +109,17 @@ static void dvmUsage(const char* progName)
     dvmFprintf(stderr, "  -Xgc:[no]precise\n");
     dvmFprintf(stderr, "  -Xgenregmap\n");
     dvmFprintf(stderr, "  -Xcheckdexsum\n");
+#if defined(WITH_JIT)
+    dvmFprintf(stderr, "  -Xincludeselectedop\n");
+    dvmFprintf(stderr, "  -Xjitop:hexopvalue[-endvalue]"
+                       "[,hexopvalue[-endvalue]]*\n");
+    dvmFprintf(stderr, "  -Xincludeselectedmethod\n");
+    dvmFprintf(stderr, "  -Xthreshold:decimalvalue\n");
+    dvmFprintf(stderr, "  -Xblocking\n");
+    dvmFprintf(stderr, "  -Xjitmethod:signture[,signature]* "
+                       "(eg Ljava/lang/String\\;replace)\n");
+    dvmFprintf(stderr, "  -Xjitverbose\n");
+#endif
     dvmFprintf(stderr, "\n");
     dvmFprintf(stderr, "Configured with:"
 #ifdef WITH_DEBUGGER
@@ -161,6 +183,9 @@ static void dvmUsage(const char* progName)
 #elif DVM_RESOLVER_CACHE == DVM_RC_NO_CACHE
         " resolver_cache_disabled"
 #endif
+#if defined(WITH_JIT)
+        " with_jit"
+#endif
     );
 #ifdef DVM_SHOW_EXCEPTION
     dvmFprintf(stderr, " show_exception=%d", DVM_SHOW_EXCEPTION);
@@ -531,6 +556,97 @@ static void freeAssertionCtrl(void)
     free(gDvm.assertionCtrl);
 }
 
+#if defined(WITH_JIT)
+/* Parse -Xjitop to selectively turn on/off certain opcodes for JIT */
+static void processXjitop(const char *opt)
+{
+    if (opt[7] == ':') {
+        const char *startPtr = &opt[8];
+        char *endPtr = NULL;
+
+        do {
+            long startValue, endValue;
+
+            startValue = strtol(startPtr, &endPtr, 16);
+            if (startPtr != endPtr) {
+                /* Just in case value is out of range */
+                startValue &= 0xff;
+
+                if (*endPtr == '-') {
+                    endValue = strtol(endPtr+1, &endPtr, 16);
+                    endValue &= 0xff;
+                } else {
+                    endValue = startValue;
+                }
+
+                for (; startValue <= endValue; startValue++) {
+                    LOGW("Dalvik opcode %x is selected for debugging",
+                         (unsigned int) startValue);
+                    /* Mark the corresponding bit to 1 */
+                    gDvmJit.opList[startValue >> 3] |=
+                        1 << (startValue & 0x7);
+                }
+
+                if (*endPtr == 0) {
+                    break;
+                }
+
+                startPtr = endPtr + 1;
+
+                continue;
+            } else {
+                if (*endPtr != 0) {
+                    dvmFprintf(stderr,
+                        "Warning: Unrecognized opcode value substring "
+                        "%s\n", endPtr);
+                }
+                break;
+            }
+        } while (1);
+    } else {
+        int i;
+        for (i = 0; i < 32; i++) {
+            gDvmJit.opList[i] = 0xff;
+        }
+        dvmFprintf(stderr, "Warning: select all opcodes\n");
+    }
+}
+
+/* Parse -Xjitmethod to selectively turn on/off certain methods for JIT */
+static void processXjitmethod(const char *opt)
+{
+    char *buf = strdup(&opt[12]);
+    char *start, *end;
+
+    gDvmJit.methodTable = dvmHashTableCreate(8, NULL);
+
+    start = buf;
+    /* 
+     * Break comma-separated method signatures and enter them into the hash
+     * table individually.
+     */
+    do {
+        int hashValue;
+
+        end = strchr(start, ',');
+        if (end) {
+            *end = 0;
+        }
+
+        hashValue = dvmComputeUtf8Hash(start);
+
+        dvmHashTableLookup(gDvmJit.methodTable, hashValue,
+                           strdup(start),
+                           (HashCompareFunc) strcmp, true);
+        if (end) {
+            start = end + 1;
+        } else {
+            break;
+        }
+    } while (1);
+    free(buf);
+}
+#endif
 
 /*
  * Process an argument vector full of options.  Unlike standard C programs,
@@ -760,6 +876,10 @@ static int dvmProcessOptions(int argc, const char* const argv[],
                     gDvm.executionMode = kExecutionModeInterpPortable;
                 else if (strcmp(argv[i] + 6, "fast") == 0)
                     gDvm.executionMode = kExecutionModeInterpFast;
+#ifdef WITH_JIT
+                else if (strcmp(argv[i] + 6, "jit") == 0)
+                    gDvm.executionMode = kExecutionModeJit;
+#endif
                 else {
                     dvmFprintf(stderr,
                         "Warning: Unrecognized interpreter mode %s\n",argv[i]);
@@ -769,6 +889,23 @@ static int dvmProcessOptions(int argc, const char* const argv[],
                 /* disable JIT -- nothing to do here for now */
             }
 
+#ifdef WITH_JIT
+        } else if (strncmp(argv[i], "-Xjitop", 7) == 0) {
+            processXjitop(argv[i]);
+        } else if (strncmp(argv[i], "-Xjitmethod", 11) == 0) {
+            processXjitmethod(argv[i]);
+        } else if (strncmp(argv[i], "-Xblocking", 10) == 0) {
+          gDvmJit.blockingMode = true;
+        } else if (strncmp(argv[i], "-Xthreshold:", 12) == 0) {
+          gDvmJit.threshold = atoi(argv[i] + 12);
+        } else if (strncmp(argv[i], "-Xincludeselectedop", 19) == 0) {
+          gDvmJit.includeSelectedOp = true;
+        } else if (strncmp(argv[i], "-Xincludeselectedmethod", 23) == 0) {
+          gDvmJit.includeSelectedMethod = true;
+        } else if (strncmp(argv[i], "-Xjitverbose", 12) == 0) {
+          gDvmJit.printMe = true;
+#endif
+
         } else if (strncmp(argv[i], "-Xdeadlockpredict:", 18) == 0) {
 #ifdef WITH_DEADLOCK_PREDICTION
             if (strcmp(argv[i] + 18, "off") == 0)
@@ -867,7 +1004,18 @@ static void setCommandLineDefaults()
      * we know we're using the "desktop" build we should probably be
      * using "portable" rather than "fast".
      */
+#if defined(WITH_JIT)
+    gDvm.executionMode = kExecutionModeJit;
+    /* 
+     * TODO - check system property and insert command-line options in 
+     *        frameworks/base/core/jni/AndroidRuntime.cpp
+     */
+    gDvmJit.blockingMode = false;
+    gDvmJit.maxTableEntries = 2048;
+    gDvmJit.threshold = 200;
+#else
     gDvm.executionMode = kExecutionModeInterpFast;
+#endif
 }
 
 
@@ -904,6 +1052,9 @@ static void blockSignals()
     sigemptyset(&mask);
     sigaddset(&mask, SIGQUIT);
     sigaddset(&mask, SIGUSR1);      // used to initiate heap dump
+#if defined(WITH_JIT) && defined(WITH_JIT_TUNING)
+    sigaddset(&mask, SIGUSR2);      // used to investigate JIT internals
+#endif
     //sigaddset(&mask, SIGPIPE);
     cc = sigprocmask(SIG_BLOCK, &mask, NULL);
     assert(cc == 0);
@@ -1195,6 +1346,11 @@ bool dvmInitAfterZygote(void)
         (int)(endHeap-startHeap), (int)(endQuit-startQuit),
         (int)(endJdwp-startJdwp), (int)(endJdwp-startHeap));
 
+#ifdef WITH_JIT
+    if (!dvmJitStartup())
+        return false;
+#endif
+
     return true;
 }
 
@@ -1389,6 +1545,9 @@ void dvmShutdown(void)
 
     LOGD("VM cleaning up\n");
 
+#ifdef WITH_JIT
+    dvmJitShutdown();
+#endif
     dvmDebuggerShutdown();
     dvmReflectShutdown();
 #ifdef WITH_PROFILER
diff --git a/vm/SignalCatcher.c b/vm/SignalCatcher.c
index 550f77796..adcff9f34 100644
--- a/vm/SignalCatcher.c
+++ b/vm/SignalCatcher.c
@@ -192,6 +192,9 @@ static void* signalCatcherThreadStart(void* arg)
     sigemptyset(&mask);
     sigaddset(&mask, SIGQUIT);
     sigaddset(&mask, SIGUSR1);
+#if defined(WITH_JIT) && defined(WITH_JIT_TUNING)
+    sigaddset(&mask, SIGUSR2);
+#endif
 
     while (true) {
         int rcvd;
@@ -253,6 +256,11 @@ loop:
             LOGI("SIGUSR1 forcing GC (no HPROF)\n");
             dvmCollectGarbage(false);
 #endif
+#if defined(WITH_JIT) && defined(WITH_JIT_TUNING)
+        } else if (rcvd == SIGUSR2) {
+            gDvmJit.printMe ^= true;
+            dvmCompilerDumpStats();
+#endif
         } else {
             LOGE("unexpected signal %d\n", rcvd);
         }
@@ -260,4 +268,3 @@ loop:
 
     return NULL;
 }
-
diff --git a/vm/compiler/Compiler.c b/vm/compiler/Compiler.c
new file mode 100644
index 000000000..dc2497793
--- /dev/null
+++ b/vm/compiler/Compiler.c
@@ -0,0 +1,242 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <sys/mman.h>
+#include <errno.h>
+
+#include "Dalvik.h"
+#include "interp/Jit.h"
+#include "CompilerInternals.h"
+
+
+static inline bool workQueueLength(void)
+{
+    return gDvmJit.compilerQueueLength;
+}
+
+static CompilerWorkOrder workDequeue(void)
+{
+    assert(gDvmJit.compilerWorkQueue[gDvmJit.compilerWorkDequeueIndex].kind
+           != kWorkOrderInvalid);
+    CompilerWorkOrder work =
+        gDvmJit.compilerWorkQueue[gDvmJit.compilerWorkDequeueIndex];
+    gDvmJit.compilerWorkQueue[gDvmJit.compilerWorkDequeueIndex++].kind =
+        kWorkOrderInvalid;
+    if (gDvmJit.compilerWorkDequeueIndex == COMPILER_WORK_QUEUE_SIZE) {
+        gDvmJit.compilerWorkDequeueIndex = 0;
+    }
+    gDvmJit.compilerQueueLength--;
+
+    /* Remember the high water mark of the queue length */
+    if (gDvmJit.compilerQueueLength > gDvmJit.compilerMaxQueued)
+        gDvmJit.compilerMaxQueued = gDvmJit.compilerQueueLength;
+
+    return work;
+}
+
+bool dvmCompilerWorkEnqueue(const u2 *pc, WorkOrderKind kind, void* info)
+{
+    int cc;
+    int i;
+    int numWork;
+
+    dvmLockMutex(&gDvmJit.compilerLock);
+
+    /* Queue full */
+    if (gDvmJit.compilerQueueLength == COMPILER_WORK_QUEUE_SIZE ||
+        gDvmJit.codeCacheFull == true) {
+        dvmUnlockMutex(&gDvmJit.compilerLock);
+        return false;
+    }
+
+    for (numWork = gDvmJit.compilerQueueLength,
+           i = gDvmJit.compilerWorkDequeueIndex;
+         numWork > 0;
+         numWork--) {
+        /* Already enqueued */
+        if (gDvmJit.compilerWorkQueue[i++].pc == pc)
+            goto done;
+        /* Wrap around */
+        if (i == COMPILER_WORK_QUEUE_SIZE)
+            i = 0;
+    }
+
+    gDvmJit.compilerWorkQueue[gDvmJit.compilerWorkEnqueueIndex].pc = pc;
+    gDvmJit.compilerWorkQueue[gDvmJit.compilerWorkEnqueueIndex].kind = kind;
+    gDvmJit.compilerWorkQueue[gDvmJit.compilerWorkEnqueueIndex].info = info;
+    gDvmJit.compilerWorkEnqueueIndex++;
+    if (gDvmJit.compilerWorkEnqueueIndex == COMPILER_WORK_QUEUE_SIZE)
+        gDvmJit.compilerWorkEnqueueIndex = 0;
+    gDvmJit.compilerQueueLength++;
+    cc = pthread_cond_signal(&gDvmJit.compilerQueueActivity);
+    assert(cc == 0);
+
+done:
+    dvmUnlockMutex(&gDvmJit.compilerLock);
+    return true;
+}
+
+/* Block until queue length is 0 */
+void dvmCompilerDrainQueue(void)
+{
+    dvmLockMutex(&gDvmJit.compilerLock);
+    while (workQueueLength() != 0 && !gDvmJit.haltCompilerThread) {
+        pthread_cond_wait(&gDvmJit.compilerQueueEmpty, &gDvmJit.compilerLock);
+    }
+    dvmUnlockMutex(&gDvmJit.compilerLock);
+}
+
+static void *compilerThreadStart(void *arg)
+{
+    dvmLockMutex(&gDvmJit.compilerLock);
+    /*
+     * Since the compiler thread will not touch any objects on the heap once
+     * being created, we just fake its state as VMWAIT so that it can be a
+     * bit late when there is suspend request pending.
+     */
+    dvmChangeStatus(NULL, THREAD_VMWAIT);
+    while (!gDvmJit.haltCompilerThread) {
+        if (workQueueLength() == 0) {
+            int cc;
+            cc = pthread_cond_signal(&gDvmJit.compilerQueueEmpty);
+            assert(cc == 0);
+            pthread_cond_wait(&gDvmJit.compilerQueueActivity,
+                              &gDvmJit.compilerLock);
+            continue;
+        } else {
+            do {
+                void *compiledCodePtr;
+                CompilerWorkOrder work = workDequeue();
+                dvmUnlockMutex(&gDvmJit.compilerLock);
+                /* Check whether there is a suspend request on me */
+                dvmCheckSuspendPending(NULL);
+                if (gDvmJit.haltCompilerThread) {
+                    LOGD("Compiler shutdown in progress - discarding request");
+                } else {
+                    compiledCodePtr = dvmCompilerDoWork(&work);
+                    /* Compilation is successful */
+                    if (compiledCodePtr) {
+                        dvmJitSetCodeAddr(work.pc, compiledCodePtr);
+                    }
+                }
+                free(work.info);
+                dvmLockMutex(&gDvmJit.compilerLock);
+            } while (workQueueLength() != 0);
+        }
+    }
+    pthread_cond_signal(&gDvmJit.compilerQueueEmpty);
+    dvmUnlockMutex(&gDvmJit.compilerLock);
+    return NULL;
+}
+
+bool dvmCompilerSetupCodeCache(void)
+{
+    extern void dvmCompilerTemplateStart(void);
+    extern void dmvCompilerTemplateEnd(void);
+
+    /* Allocate the code cache */
+    gDvmJit.codeCache = mmap(0, CODE_CACHE_SIZE,
+                          PROT_READ | PROT_WRITE | PROT_EXEC,
+                          MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+    if (gDvmJit.codeCache == MAP_FAILED) {
+        LOGE("Failed to create the code cache: %s\n", strerror(errno));
+        return false;
+    }
+
+    /* Copy the template code into the beginning of the code cache */
+    int templateSize = (intptr_t) dmvCompilerTemplateEnd -
+                       (intptr_t) dvmCompilerTemplateStart;
+    memcpy((void *) gDvmJit.codeCache,
+           (void *) dvmCompilerTemplateStart,
+           templateSize);
+    gDvmJit.codeCacheByteUsed = templateSize;
+
+    /* Flush dcache and invalidate the icache to maintain coherence */
+    cacheflush((intptr_t) gDvmJit.codeCache,
+               (intptr_t) gDvmJit.codeCache + CODE_CACHE_SIZE, 0);
+    return true;
+}
+
+bool dvmCompilerStartup(void)
+{
+    /* Make sure the BBType enum is in sane state */
+    assert(CHAINING_CELL_GENERIC == 0);
+
+    /* Architecture-specific chores to initialize */
+    if (!dvmCompilerArchInit())
+        goto fail;
+
+    /*
+     * Setup the code cache if it is not done so already. For apps it should be
+     * done by the Zygote already, but for command-line dalvikvm invocation we
+     * need to do it here.
+     */
+    if (gDvmJit.codeCache == NULL) {
+        if (!dvmCompilerSetupCodeCache())
+            goto fail;
+    }
+
+    /* Allocate the initial arena block */
+    if (dvmCompilerHeapInit() == false) {
+        goto fail;
+    }
+
+    dvmInitMutex(&gDvmJit.compilerLock);
+    pthread_cond_init(&gDvmJit.compilerQueueActivity, NULL);
+    pthread_cond_init(&gDvmJit.compilerQueueEmpty, NULL);
+
+    dvmLockMutex(&gDvmJit.compilerLock);
+
+    gDvmJit.haltCompilerThread = false;
+
+    /* Reset the work queue */
+    memset(gDvmJit.compilerWorkQueue, 0,
+           sizeof(CompilerWorkOrder) * COMPILER_WORK_QUEUE_SIZE);
+    gDvmJit.compilerWorkEnqueueIndex = gDvmJit.compilerWorkDequeueIndex = 0;
+    gDvmJit.compilerQueueLength = 0;
+    gDvmJit.compilerHighWater =
+        COMPILER_WORK_QUEUE_SIZE - (COMPILER_WORK_QUEUE_SIZE/4);
+
+    assert(gDvmJit.compilerHighWater < COMPILER_WORK_QUEUE_SIZE);
+    if (!dvmCreateInternalThread(&gDvmJit.compilerHandle, "Compiler",
+                                 compilerThreadStart, NULL)) {
+        dvmUnlockMutex(&gDvmJit.compilerLock);
+        goto fail;
+    }
+
+    dvmUnlockMutex(&gDvmJit.compilerLock);
+
+    return true;
+
+fail:
+    return false;
+}
+
+void dvmCompilerShutdown(void)
+{
+    void *threadReturn;
+
+    if (gDvmJit.compilerHandle) {
+
+        gDvmJit.haltCompilerThread = true;
+
+        dvmLockMutex(&gDvmJit.compilerLock);
+        pthread_cond_signal(&gDvmJit.compilerQueueActivity);
+        dvmUnlockMutex(&gDvmJit.compilerLock);
+
+        pthread_join(gDvmJit.compilerHandle, &threadReturn);
+    }
+}
diff --git a/vm/compiler/Compiler.h b/vm/compiler/Compiler.h
new file mode 100644
index 000000000..720970124
--- /dev/null
+++ b/vm/compiler/Compiler.h
@@ -0,0 +1,99 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _DALVIK_VM_COMPILER
+#define _DALVIK_VM_COMPILER
+
+#define CODE_CACHE_SIZE                 1024*1024
+#define MAX_JIT_RUN_LEN                 64
+#define COMPILER_WORK_QUEUE_SIZE        100
+
+#define COMPILER_TRACED(X)
+#define COMPILER_TRACEE(X)
+#define COMPILER_TRACE_CHAINING(X)
+
+typedef enum WorkOrderKind {
+    kWorkOrderInvalid = 0,      // Should never see by the backend
+    kWorkOrderMethod = 1,       // Work is to compile a whole method
+    kWorkOrderTrace = 2,        // Work is to compile code fragment(s)
+} WorkOrderKind;
+
+typedef struct CompilerWorkOrder {
+    const u2* pc;
+    WorkOrderKind kind;
+    void* info;
+} CompilerWorkOrder;
+
+typedef enum JitState {
+    kJitOff = 0,
+    kJitNormal = 1,            // Profiling in mterp or running native
+    kJitTSelectRequest = 2,    // Transition state - start trace selection
+    kJitTSelect = 3,           // Actively selecting trace in dbg interp
+    kJitTSelectAbort = 4,      // Something threw during selection - abort
+    kJitTSelectEnd = 5,        // Done with the trace - wrap it up
+    kJitSingleStep = 6,        // Single step interpretation
+    kJitSingleStepEnd = 7,     // Done with single step, return to mterp
+} JitState;
+
+typedef enum JitHint {
+   kJitHintNone = 0,
+   kJitHintTaken = 1,         // Last inst in run was taken branch
+   kJitHintNotTaken = 2,      // Last inst in run was not taken branch
+   kJitHintNoBias = 3,        // Last inst in run was unbiased branch
+} jitHint;
+
+/*
+ * Element of a Jit trace description.  Describes a contiguous
+ * sequence of Dalvik byte codes, the last of which can be
+ * associated with a hint.
+ * Dalvik byte code
+ */
+typedef struct {
+    u2    startOffset;       // Starting offset for trace run
+    unsigned numInsts:8;     // Number of Byte codes in run
+    unsigned runEnd:1;       // Run ends with last byte code
+    jitHint  hint:7;         // Hint to apply to final code of run
+} JitCodeDesc;
+
+typedef union {
+    JitCodeDesc frag;
+    void*       hint;
+} JitTraceRun;
+
+/*
+ * Trace description as will appear in the translation cache.  Note
+ * flexible array at end, as these will be of variable size.  To
+ * conserve space in the translation cache, total length of JitTraceRun
+ * array must be recomputed via seqential scan if needed.
+ */
+typedef struct {
+    const Method* method;
+    JitTraceRun trace[];
+} JitTraceDescription;
+
+bool dvmCompilerSetupCodeCache(void);
+bool dvmCompilerArchInit(void);
+void dvmCompilerArchDump(void);
+bool dvmCompilerStartup(void);
+void dvmCompilerShutdown(void);
+bool dvmCompilerWorkEnqueue(const u2* pc, WorkOrderKind kind, void* info);
+void *dvmCheckCodeCache(void *method);
+void *dvmCompileMethod(Method *method);
+void *dvmCompileTrace(JitTraceDescription *trace);
+void dvmCompilerDumpStats(void);
+void dvmCompilerDrainQueue(void);
+
+#endif /* _DALVIK_VM_COMPILER */
diff --git a/vm/compiler/CompilerIR.h b/vm/compiler/CompilerIR.h
new file mode 100644
index 000000000..6ffdf446f
--- /dev/null
+++ b/vm/compiler/CompilerIR.h
@@ -0,0 +1,87 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _DALVIK_VM_COMPILER_IR
+#define _DALVIK_VM_COMPILER_IR
+
+typedef enum BBType {
+    /* For coding convenience reasons chaining cell types should appear first */
+    CHAINING_CELL_GENERIC = 0,
+    CHAINING_CELL_POST_INVOKE,
+    CHAINING_CELL_INVOKE,
+    CHAINING_CELL_LAST,
+    DALVIK_BYTECODE,
+    PC_RECONSTRUCTION,
+    EXCEPTION_HANDLING,
+} BBType;
+
+typedef struct LIR {
+    int offset;
+    struct LIR *next;
+    struct LIR *prev;
+    struct LIR *target;
+} LIR;
+
+typedef struct MIR {
+    DecodedInstruction dalvikInsn;
+    unsigned int width;
+    unsigned int offset;
+    struct MIR *prev;
+    struct MIR *next;
+} MIR;
+
+typedef struct BasicBlock {
+    int id;
+    int visited;
+    unsigned int startOffset;
+    const Method *containingMethod;     // For blocks from the callee
+    BBType blockType;
+    MIR *firstMIRInsn;
+    MIR *lastMIRInsn;
+    struct BasicBlock *fallThrough;
+    struct BasicBlock *taken;
+    struct BasicBlock *next;            // Serial link for book keeping purposes
+} BasicBlock;
+
+typedef struct CompilationUnit {
+    int numBlocks;
+    BasicBlock **blockList;
+    const Method *method;
+    const JitTraceDescription *traceDesc;
+    LIR *firstLIRInsn;
+    LIR *lastLIRInsn;
+    LIR *wordList;
+    GrowableList pcReconstructionList;
+    int dataOffset;
+    int totalSize;
+    unsigned char *codeBuffer;
+    void *baseAddr;
+    bool printMe;
+    bool allSingleStep;
+    int numChainingCells[CHAINING_CELL_LAST];
+    LIR *firstChainingLIR[CHAINING_CELL_LAST];
+} CompilationUnit;
+
+BasicBlock *dvmCompilerNewBB(BBType blockType);
+
+void dvmCompilerAppendMIR(BasicBlock *bb, MIR *mir);
+
+void dvmCompilerAppendLIR(CompilationUnit *cUnit, LIR *lir);
+
+/* Debug Utilities */
+void dvmCompilerDumpCompilationUnit(CompilationUnit *cUnit);
+
+#endif /* _DALVIK_VM_COMPILER_IR */
diff --git a/vm/compiler/CompilerInternals.h b/vm/compiler/CompilerInternals.h
new file mode 100644
index 000000000..410213aac
--- /dev/null
+++ b/vm/compiler/CompilerInternals.h
@@ -0,0 +1,26 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _DALVIK_VM_COMPILER_INTERNAL
+#define _DALVIK_VM_COMPILER_INTERNAL
+
+#include "Dalvik.h"
+#include "CompilerUtility.h"
+#include "CompilerIR.h"
+#include "codegen/CompilerCodegen.h"
+#include "interp/Jit.h"
+
+#endif /* _DALVIK_VM_COMPILER_INTERNAL */
diff --git a/vm/compiler/CompilerUtility.h b/vm/compiler/CompilerUtility.h
new file mode 100644
index 000000000..7b4de1199
--- /dev/null
+++ b/vm/compiler/CompilerUtility.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _DALVIK_VM_COMPILER_UTILITY
+#define _DALVIK_VM_COMPILER_UTILITY
+
+#define ARENA_DEFAULT_SIZE 4096
+
+/* Allocate the initial memory block for arena-based allocation */
+bool dvmCompilerHeapInit(void);
+
+typedef struct ArenaMemBlock {
+    size_t bytesAllocated;
+    struct ArenaMemBlock *next;
+    char ptr[0];
+} ArenaMemBlock;
+
+void *dvmCompilerNew(size_t size, bool zero);
+
+void dvmCompilerArenaReset(void);
+
+typedef struct GrowableList {
+    size_t numAllocated;
+    size_t numUsed;
+    void **elemList;
+} GrowableList;
+
+void dvmInitGrowableList(GrowableList *gList, size_t initLength);
+void dvmInsertGrowableList(GrowableList *gList, void *elem);
+
+#endif /* _DALVIK_COMPILER_UTILITY */
diff --git a/vm/compiler/Frontend.c b/vm/compiler/Frontend.c
new file mode 100644
index 000000000..59a745584
--- /dev/null
+++ b/vm/compiler/Frontend.c
@@ -0,0 +1,603 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Dalvik.h"
+#include "libdex/OpCode.h"
+#include "dexdump/OpCodeNames.h"
+#include "interp/Jit.h"
+#include "CompilerInternals.h"
+
+/*
+ * Parse an instruction, return the length of the instruction
+ */
+static inline int parseInsn(const u2 *codePtr, DecodedInstruction *decInsn,
+                            bool printMe)
+{
+    u2 instr = *codePtr;
+    OpCode opcode = instr & 0xff;
+    int insnWidth;
+
+    // Need to check if this is a real NOP or a pseudo opcode
+    if (opcode == OP_NOP && instr != 0) {
+        if (instr == kPackedSwitchSignature) {
+            insnWidth = 4 + codePtr[1] * 2;
+        } else if (instr == kSparseSwitchSignature) {
+            insnWidth = 2 + codePtr[1] * 4;
+        } else if (instr == kArrayDataSignature) {
+            int width = codePtr[1];
+            int size = codePtr[2] | (codePtr[3] << 16);
+            // The plus 1 is to round up for odd size and width
+            insnWidth = 4 + ((size * width) + 1) / 2;
+        }
+        insnWidth = 0;
+    } else {
+        insnWidth = gDvm.instrWidth[opcode];
+        if (insnWidth < 0) {
+            insnWidth = -insnWidth;
+        }
+    }
+
+    dexDecodeInstruction(gDvm.instrFormat, codePtr, decInsn);
+    if (printMe) {
+        LOGD("%p: %#06x %s\n", codePtr, opcode, getOpcodeName(opcode));
+    }
+    return insnWidth;
+}
+
+/*
+ * Identify block-ending instructions and collect supplemental information
+ * regarding the following instructions.
+ */
+static inline bool findBlockBoundary(const Method *caller, MIR *insn,
+                                     unsigned int curOffset,
+                                     unsigned int *target, bool *isInvoke,
+                                     const Method **callee)
+{
+    switch (insn->dalvikInsn.opCode) {
+        /* Target is not compile-time constant */
+        case OP_RETURN_VOID:
+        case OP_RETURN:
+        case OP_RETURN_WIDE:
+        case OP_RETURN_OBJECT:
+        case OP_THROW:
+        case OP_INVOKE_VIRTUAL:
+        case OP_INVOKE_VIRTUAL_RANGE:
+        case OP_INVOKE_INTERFACE:
+        case OP_INVOKE_INTERFACE_RANGE:
+        case OP_INVOKE_VIRTUAL_QUICK:
+        case OP_INVOKE_VIRTUAL_QUICK_RANGE:
+            *isInvoke = true;
+            break;
+        case OP_INVOKE_SUPER:
+        case OP_INVOKE_SUPER_RANGE: {
+            int mIndex = caller->clazz->pDvmDex->
+                pResMethods[insn->dalvikInsn.vB]->methodIndex;
+            const Method *calleeMethod =
+                caller->clazz->super->vtable[mIndex];
+
+            if (!dvmIsNativeMethod(calleeMethod)) {
+                *target = (unsigned int) calleeMethod->insns;
+            }
+            *isInvoke = true;
+            *callee = calleeMethod;
+            break;
+        }
+        case OP_INVOKE_STATIC:
+        case OP_INVOKE_STATIC_RANGE: {
+            const Method *calleeMethod =
+                caller->clazz->pDvmDex->pResMethods[insn->dalvikInsn.vB];
+
+            if (!dvmIsNativeMethod(calleeMethod)) {
+                *target = (unsigned int) calleeMethod->insns;
+            }
+            *isInvoke = true;
+            *callee = calleeMethod;
+            break;
+        }
+        case OP_INVOKE_SUPER_QUICK:
+        case OP_INVOKE_SUPER_QUICK_RANGE: {
+            const Method *calleeMethod =
+                caller->clazz->super->vtable[insn->dalvikInsn.vB];
+
+            if (!dvmIsNativeMethod(calleeMethod)) {
+                *target = (unsigned int) calleeMethod->insns;
+            }
+            *isInvoke = true;
+            *callee = calleeMethod;
+            break;
+        }
+        case OP_INVOKE_DIRECT:
+        case OP_INVOKE_DIRECT_RANGE: {
+            const Method *calleeMethod =
+                caller->clazz->pDvmDex->pResMethods[insn->dalvikInsn.vB];
+            if (!dvmIsNativeMethod(calleeMethod)) {
+                *target = (unsigned int) calleeMethod->insns;
+            }
+            *isInvoke = true;
+            *callee = calleeMethod;
+            break;
+        }
+        case OP_GOTO:
+        case OP_GOTO_16:
+        case OP_GOTO_32:
+            *target = curOffset + (int) insn->dalvikInsn.vA;
+            break;
+
+        case OP_IF_EQ:
+        case OP_IF_NE:
+        case OP_IF_LT:
+        case OP_IF_GE:
+        case OP_IF_GT:
+        case OP_IF_LE:
+            *target = curOffset + (int) insn->dalvikInsn.vC;
+            break;
+
+        case OP_IF_EQZ:
+        case OP_IF_NEZ:
+        case OP_IF_LTZ:
+        case OP_IF_GEZ:
+        case OP_IF_GTZ:
+        case OP_IF_LEZ:
+            *target = curOffset + (int) insn->dalvikInsn.vB;
+            break;
+
+        default:
+            return false;
+    } return true;
+}
+
+/*
+ * Identify conditional branch instructions
+ */
+static inline bool isUnconditionalBranch(MIR *insn)
+{
+    switch (insn->dalvikInsn.opCode) {
+        case OP_RETURN_VOID:
+        case OP_RETURN:
+        case OP_RETURN_WIDE:
+        case OP_RETURN_OBJECT:
+        case OP_GOTO:
+        case OP_GOTO_16:
+        case OP_GOTO_32:
+            return true;
+        default:
+            return false;
+    }
+}
+
+/*
+ * Main entry point to start trace compilation. Basic blocks are constructed
+ * first and they will be passed to the codegen routines to convert Dalvik
+ * bytecode into machine code.
+ */
+void *dvmCompileTrace(JitTraceDescription *desc)
+{
+    const DexCode *dexCode = dvmGetMethodCode(desc->method);
+    const JitTraceRun* currRun = &desc->trace[0];
+    bool done = false;
+    unsigned int curOffset = currRun->frag.startOffset;
+    unsigned int numInsts = currRun->frag.numInsts;
+    const u2 *codePtr = dexCode->insns + curOffset;
+    int traceSize = 0;
+    const u2 *startCodePtr = codePtr;
+    BasicBlock *startBB, *curBB, *lastBB;
+    int numBlocks = 0;
+    static int compilationId;
+    CompilationUnit cUnit;
+    memset(&cUnit, 0, sizeof(CompilationUnit));
+
+    /* Initialize the printMe flag */
+    cUnit.printMe = gDvmJit.printMe;
+
+    /* Identify traces that we don't want to compile */
+    if (gDvmJit.methodTable) {
+        int len = strlen(desc->method->clazz->descriptor) +
+                  strlen(desc->method->name) + 1;
+        char *fullSignature = dvmCompilerNew(len, true);
+        strcpy(fullSignature, desc->method->clazz->descriptor);
+        strcat(fullSignature, desc->method->name);
+
+        int hashValue = dvmComputeUtf8Hash(fullSignature);
+
+        /*
+         * Doing three levels of screening to see whether we want to skip
+         * compiling this method
+         */
+
+        /* First, check the full "class;method" signature */
+        bool methodFound =
+            dvmHashTableLookup(gDvmJit.methodTable, hashValue,
+                               fullSignature, (HashCompareFunc) strcmp,
+                               false) !=
+            NULL;
+
+        /* Full signature not found - check the enclosing class */
+        if (methodFound == false) {
+            int hashValue = dvmComputeUtf8Hash(desc->method->clazz->descriptor);
+            methodFound =
+                dvmHashTableLookup(gDvmJit.methodTable, hashValue,
+                               (char *) desc->method->clazz->descriptor,
+                               (HashCompareFunc) strcmp, false) !=
+                NULL;
+            /* Enclosing class not found - check the method name */
+            if (methodFound == false) {
+                int hashValue = dvmComputeUtf8Hash(desc->method->name);
+                methodFound =
+                    dvmHashTableLookup(gDvmJit.methodTable, hashValue,
+                                   (char *) desc->method->name,
+                                   (HashCompareFunc) strcmp, false) !=
+                    NULL;
+            }
+        }
+
+        /*
+         * Under the following conditions, the trace will be *conservatively*
+         * compiled by only containing single-step instructions to and from the
+         * interpreter.
+         * 1) If includeSelectedMethod == false, the method matches the full or
+         *    partial signature stored in the hash table.
+         *
+         * 2) If includeSelectedMethod == true, the method does not match the
+         *    full and partial signature stored in the hash table.
+         */
+        if (gDvmJit.includeSelectedMethod != methodFound) {
+            cUnit.allSingleStep = true;
+        } else {
+            /* Compile the trace as normal */
+
+            /* Print the method we cherry picked */
+            if (gDvmJit.includeSelectedMethod == true) {
+                cUnit.printMe = true;
+            }
+        }
+    }
+
+    /* Allocate the first basic block */
+    lastBB = startBB = curBB = dvmCompilerNewBB(DALVIK_BYTECODE);
+    curBB->startOffset = curOffset;
+    curBB->id = numBlocks++;
+
+    if (cUnit.printMe) {
+        LOGD("--------\nCompiler: Building trace for %s, offset 0x%x\n",
+             desc->method->name, curOffset);
+    }
+
+    while (!done) {
+        MIR *insn;
+        int width;
+        insn = dvmCompilerNew(sizeof(MIR),false);
+        insn->offset = curOffset;
+        width = parseInsn(codePtr, &insn->dalvikInsn, cUnit.printMe);
+        insn->width = width;
+        traceSize += width;
+        dvmCompilerAppendMIR(curBB, insn);
+        if (--numInsts==0) {
+            if (currRun->frag.runEnd) {
+                done = true;
+            } else {
+                curBB = dvmCompilerNewBB(DALVIK_BYTECODE);
+                lastBB->next = curBB;
+                lastBB = curBB;
+                curBB->id = numBlocks++;
+                currRun++;
+                curOffset = currRun->frag.startOffset;
+                numInsts = currRun->frag.numInsts;
+                curBB->startOffset = curOffset;
+                codePtr = dexCode->insns + curOffset;
+            }
+        } else {
+            curOffset += width;
+            codePtr += width;
+        }
+    }
+
+    /*
+     * Now scan basic blocks containing real code to connect the
+     * taken/fallthrough links. Also create chaining cells for code not included
+     * in the trace.
+     */
+    for (curBB = startBB; curBB; curBB = curBB->next) {
+        MIR *lastInsn = curBB->lastMIRInsn;
+        /* Hit a pseudo block - exit the search now */
+        if (lastInsn == NULL) {
+            break;
+        }
+        curOffset = lastInsn->offset;
+        unsigned int targetOffset = curOffset;
+        unsigned int fallThroughOffset = curOffset + lastInsn->width;
+        bool isInvoke = false;
+        const Method *callee = NULL;
+
+        findBlockBoundary(desc->method, curBB->lastMIRInsn, curOffset,
+                          &targetOffset, &isInvoke, &callee);
+
+        /* Link the taken and fallthrough blocks */
+        BasicBlock *searchBB;
+
+        /* No backward branch in the trace - start searching the next BB */
+        for (searchBB = curBB->next; searchBB; searchBB = searchBB->next) {
+            if (targetOffset == searchBB->startOffset) {
+                curBB->taken = searchBB;
+            }
+            if (fallThroughOffset == searchBB->startOffset) {
+                curBB->fallThrough = searchBB;
+            }
+        }
+
+        /* Target block not included in the trace */
+        if (targetOffset != curOffset && curBB->taken == NULL) {
+            lastBB->next = dvmCompilerNewBB(
+                isInvoke ? CHAINING_CELL_INVOKE : CHAINING_CELL_GENERIC);
+            lastBB = lastBB->next;
+            lastBB->id = numBlocks++;
+            if (isInvoke) {
+                lastBB->startOffset = 0;
+                lastBB->containingMethod = callee;
+            } else {
+                lastBB->startOffset = targetOffset;
+            }
+            curBB->taken = lastBB;
+        }
+
+        /* Fallthrough block not included in the trace */
+        if (!isUnconditionalBranch(lastInsn) && curBB->fallThrough == NULL) {
+            lastBB->next = dvmCompilerNewBB(
+                isInvoke ? CHAINING_CELL_POST_INVOKE : CHAINING_CELL_GENERIC);
+            lastBB = lastBB->next;
+            lastBB->id = numBlocks++;
+            lastBB->startOffset = fallThroughOffset;
+            curBB->fallThrough = lastBB;
+        }
+    }
+
+    /* Now create a special block to host PC reconstruction code */
+    lastBB->next = dvmCompilerNewBB(PC_RECONSTRUCTION);
+    lastBB = lastBB->next;
+    lastBB->id = numBlocks++;
+
+    /* And one final block that publishes the PC and raise the exception */
+    lastBB->next = dvmCompilerNewBB(EXCEPTION_HANDLING);
+    lastBB = lastBB->next;
+    lastBB->id = numBlocks++;
+
+    if (cUnit.printMe) {
+        LOGD("TRACEINFO (%d): 0x%08x %s%s 0x%x %d of %d, %d blocks",
+            compilationId++,
+            (intptr_t) desc->method->insns,
+            desc->method->clazz->descriptor,
+            desc->method->name,
+            desc->trace[0].frag.startOffset,
+            traceSize,
+            dexCode->insnsSize,
+            numBlocks);
+    }
+
+    BasicBlock **blockList;
+
+    cUnit.method = desc->method;
+    cUnit.traceDesc = desc;
+    cUnit.numBlocks = numBlocks;
+    dvmInitGrowableList(&cUnit.pcReconstructionList, 8);
+    blockList = cUnit.blockList =
+        dvmCompilerNew(sizeof(BasicBlock *) * numBlocks, true);
+
+    int i;
+
+    for (i = 0, curBB = startBB; i < numBlocks; i++) {
+        blockList[i] = curBB;
+        curBB = curBB->next;
+    }
+    /* Make sure all blocks are added to the cUnit */
+    assert(curBB == NULL);
+
+    if (cUnit.printMe) {
+        dvmCompilerDumpCompilationUnit(&cUnit);
+    }
+
+    /* Convert MIR to LIR, etc. */
+    dvmCompilerMIR2LIR(&cUnit);
+
+    /* Convert LIR into machine code */
+    dvmCompilerAssembleLIR(&cUnit);
+
+    if (cUnit.printMe) {
+        dvmCompilerCodegenDump(&cUnit);
+        LOGD("End %s%s", desc->method->clazz->descriptor, desc->method->name);
+    }
+
+    /* Reset the compiler resource pool */
+    dvmCompilerArenaReset();
+
+    return cUnit.baseAddr;
+}
+
+/*
+ * Similar to dvmCompileTrace, but the entity processed here is the whole
+ * method.
+ *
+ * TODO: implementation will be revisited when the trace builder can provide
+ * whole-method traces.
+ */
+void *dvmCompileMethod(Method *method)
+{
+    const DexCode *dexCode = dvmGetMethodCode(method);
+    const u2 *codePtr = dexCode->insns;
+    const u2 *codeEnd = dexCode->insns + dexCode->insnsSize;
+    int blockID = 0;
+    unsigned int curOffset = 0;
+
+    BasicBlock *firstBlock = dvmCompilerNewBB(DALVIK_BYTECODE);
+    firstBlock->id = blockID++;
+
+    /* Allocate the bit-vector to track the beginning of basic blocks */
+    BitVector *bbStartAddr = dvmAllocBitVector(dexCode->insnsSize+1, false);
+    dvmSetBit(bbStartAddr, 0);
+
+    /*
+     * Sequentially go through every instruction first and put them in a single
+     * basic block. Identify block boundaries at the mean time.
+     */
+    while (codePtr < codeEnd) {
+        MIR *insn = dvmCompilerNew(sizeof(MIR), false);
+        insn->offset = curOffset;
+        int width = parseInsn(codePtr, &insn->dalvikInsn, false);
+        bool isInvoke = false;
+        const Method *callee;
+        insn->width = width;
+
+        dvmCompilerAppendMIR(firstBlock, insn);
+        /*
+         * Check whether this is a block ending instruction and whether it
+         * suggests the start of a new block
+         */
+        unsigned int target = curOffset;
+
+        /*
+         * If findBlockBoundary returns true, it means the current instruction
+         * is terminating the current block. If it is a branch, the target
+         * address will be recorded in target.
+         */
+        if (findBlockBoundary(method, insn, curOffset, &target, &isInvoke,
+                              &callee)) {
+            dvmSetBit(bbStartAddr, curOffset + width);
+            if (target != curOffset) {
+                dvmSetBit(bbStartAddr, target);
+            }
+        }
+
+        codePtr += width;
+        /* each bit represents 16-bit quantity */
+        curOffset += width;
+    }
+
+    /*
+     * The number of blocks will be equal to the number of bits set to 1 in the
+     * bit vector minus 1, because the bit representing the location after the
+     * last instruction is set to one.
+     */
+    int numBlocks = dvmCountSetBits(bbStartAddr);
+    if (dvmIsBitSet(bbStartAddr, dexCode->insnsSize)) {
+        numBlocks--;
+    }
+
+    CompilationUnit cUnit;
+    BasicBlock **blockList;
+
+    memset(&cUnit, 0, sizeof(CompilationUnit));
+    cUnit.method = method;
+    blockList = cUnit.blockList =
+        dvmCompilerNew(sizeof(BasicBlock *) * numBlocks, true);
+
+    /*
+     * Register the first block onto the list and start split it into block
+     * boundaries from there.
+     */
+    blockList[0] = firstBlock;
+    cUnit.numBlocks = 1;
+
+    int i;
+    for (i = 0; i < numBlocks; i++) {
+        MIR *insn;
+        BasicBlock *curBB = blockList[i];
+        curOffset = curBB->lastMIRInsn->offset;
+
+        for (insn = curBB->firstMIRInsn->next; insn; insn = insn->next) {
+            /* Found the beginning of a new block, see if it is created yet */
+            if (dvmIsBitSet(bbStartAddr, insn->offset)) {
+                int j;
+                for (j = 0; j < cUnit.numBlocks; j++) {
+                    if (blockList[j]->firstMIRInsn->offset == insn->offset)
+                        break;
+                }
+
+                /* Block not split yet - do it now */
+                if (j == cUnit.numBlocks) {
+                    BasicBlock *newBB = dvmCompilerNewBB(DALVIK_BYTECODE);
+                    newBB->id = blockID++;
+                    newBB->firstMIRInsn = insn;
+                    newBB->lastMIRInsn = curBB->lastMIRInsn;
+                    curBB->lastMIRInsn = insn->prev;
+                    insn->prev->next = NULL;
+                    insn->prev = NULL;
+
+                    /*
+                     * If the insn is not an unconditional branch, set up the
+                     * fallthrough link.
+                     */
+                    if (!isUnconditionalBranch(curBB->lastMIRInsn)) {
+                        curBB->fallThrough = newBB;
+                    }
+
+                    /* enqueue the new block */
+                    blockList[cUnit.numBlocks++] = newBB;
+                    break;
+                }
+            }
+        }
+    }
+
+    if (numBlocks != cUnit.numBlocks) {
+        LOGE("Expect %d vs %d basic blocks\n", numBlocks, cUnit.numBlocks);
+        dvmAbort();
+    }
+
+    dvmFreeBitVector(bbStartAddr);
+
+    /* Connect the basic blocks through the taken links */
+    for (i = 0; i < numBlocks; i++) {
+        BasicBlock *curBB = blockList[i];
+        MIR *insn = curBB->lastMIRInsn;
+        unsigned int target = insn->offset;
+        bool isInvoke;
+        const Method *callee;
+
+        findBlockBoundary(method, insn, target, &target, &isInvoke, &callee);
+
+        /* Found a block ended on a branch */
+        if (target != insn->offset) {
+            int j;
+            /* Forward branch */
+            if (target > insn->offset) {
+                j = i + 1;
+            } else {
+                /* Backward branch */
+                j = 0;
+            }
+            for (; j < numBlocks; j++) {
+                if (blockList[j]->firstMIRInsn->offset == target) {
+                    curBB->taken = blockList[j];
+                    break;
+                }
+            }
+
+            if (j == numBlocks) {
+                LOGE("Target not found for insn %x: expect target %x\n",
+                     curBB->lastMIRInsn->offset, target);
+                dvmAbort();
+            }
+        }
+    }
+
+    dvmCompilerMIR2LIR(&cUnit);
+
+    dvmCompilerAssembleLIR(&cUnit);
+
+    dvmCompilerDumpCompilationUnit(&cUnit);
+
+    dvmCompilerArenaReset();
+
+    return cUnit.baseAddr;
+}
diff --git a/vm/compiler/IntermediateRep.c b/vm/compiler/IntermediateRep.c
new file mode 100644
index 000000000..2596aab92
--- /dev/null
+++ b/vm/compiler/IntermediateRep.c
@@ -0,0 +1,59 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Dalvik.h"
+#include "CompilerInternals.h"
+
+/* Allocate a new basic block */
+BasicBlock *dvmCompilerNewBB(BBType blockType)
+{
+    BasicBlock *bb = dvmCompilerNew(sizeof(BasicBlock), true);
+    bb->blockType = blockType;
+    return bb;
+}
+
+/* Insert an MIR instruction to the end of a basic block */
+void dvmCompilerAppendMIR(BasicBlock *bb, MIR *mir)
+{
+    if (bb->firstMIRInsn == NULL) {
+        assert(bb->firstMIRInsn == NULL);
+        bb->lastMIRInsn = bb->firstMIRInsn = mir;
+        mir->prev = mir->next = NULL;
+    } else {
+        bb->lastMIRInsn->next = mir;
+        mir->prev = bb->lastMIRInsn;
+        mir->next = NULL;
+        bb->lastMIRInsn = mir;
+    }
+}
+
+/*
+ * Append an LIR instruction to the LIR list maintained by a compilation
+ * unit
+ */
+void dvmCompilerAppendLIR(CompilationUnit *cUnit, LIR *lir)
+{
+    if (cUnit->firstLIRInsn == NULL) {
+        assert(cUnit->lastLIRInsn == NULL);
+        cUnit->lastLIRInsn = cUnit->firstLIRInsn = lir;
+        lir->prev = lir->next = NULL;
+    } else {
+        cUnit->lastLIRInsn->next = lir;
+        lir->prev = cUnit->lastLIRInsn;
+        lir->next = NULL;
+        cUnit->lastLIRInsn = lir;
+    }
+}
diff --git a/vm/compiler/Utility.c b/vm/compiler/Utility.c
new file mode 100644
index 000000000..9d8e08861
--- /dev/null
+++ b/vm/compiler/Utility.c
@@ -0,0 +1,169 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Dalvik.h"
+#include "CompilerInternals.h"
+
+static ArenaMemBlock *arenaHead, *currentArena;
+static int numArenaBlocks;
+
+/* Allocate the initial memory block for arena-based allocation */
+bool dvmCompilerHeapInit(void)
+{
+    assert(arenaHead == NULL);
+    arenaHead =
+        (ArenaMemBlock *) malloc(sizeof(ArenaMemBlock) + ARENA_DEFAULT_SIZE);
+    if (arenaHead == NULL) {
+        LOGE("No memory left to create compiler heap memory\n");
+        return false;
+    }
+    currentArena = arenaHead;
+    currentArena->bytesAllocated = 0;
+    currentArena->next = NULL;
+    numArenaBlocks = 1;
+
+    return true;
+}
+
+/* Arena-based malloc for compilation tasks */
+void * dvmCompilerNew(size_t size, bool zero)
+{
+    size = (size + 3) & ~3;
+retry:
+    /* Normal case - space is available in the current page */
+    if (size + currentArena->bytesAllocated <= ARENA_DEFAULT_SIZE) {
+        void *ptr;
+        ptr = &currentArena->ptr[currentArena->bytesAllocated];
+        currentArena->bytesAllocated += size;
+        if (zero) {
+            memset(ptr, 0, size);
+        }
+        return ptr;
+    } else {
+        /*
+         * See if there are previously allocated arena blocks before the last
+         * reset
+         */
+        if (currentArena->next) {
+            currentArena = currentArena->next;
+            goto retry;
+        }
+        /*
+         * If we allocate really large variable-sized data structures that
+         * could go above the limit we need to enhance the allocation
+         * mechanism.
+         */
+        if (size > ARENA_DEFAULT_SIZE) {
+            LOGE("Requesting %d bytes which exceed the maximal size allowed\n",
+                 size);
+            return NULL;
+        }
+        /* Time to allocate a new arena */
+        ArenaMemBlock *newArena = (ArenaMemBlock *)
+            malloc(sizeof(ArenaMemBlock) + ARENA_DEFAULT_SIZE);
+        newArena->bytesAllocated = 0;
+        newArena->next = NULL;
+        currentArena->next = newArena;
+        currentArena = newArena;
+        numArenaBlocks++;
+        goto retry;
+    }
+    return NULL;
+}
+
+/* Reclaim all the arena blocks allocated so far */
+void dvmCompilerArenaReset(void)
+{
+    ArenaMemBlock *block;
+
+    for (block = arenaHead; block; block = block->next) {
+        block->bytesAllocated = 0;
+    }
+    currentArena = arenaHead;
+}
+
+/* Growable List initialization */
+void dvmInitGrowableList(GrowableList *gList, size_t initLength)
+{
+    gList->numAllocated = initLength;
+    gList->numUsed = 0;
+    gList->elemList = (void **) dvmCompilerNew(sizeof(void *) * initLength,
+                                               true);
+}
+
+/* Expand the capacity of a growable list */
+static void expandGrowableList(GrowableList *gList)
+{
+    int newLength = gList->numAllocated;
+    if (newLength < 128) {
+        newLength <<= 1;
+    } else {
+        newLength += 128;
+    }
+    void *newArray = dvmCompilerNew(sizeof(void *) * newLength, true);
+    memcpy(newArray, gList->elemList, sizeof(void *) * gList->numAllocated);
+    gList->numAllocated = newLength;
+    gList->elemList = newArray;
+}
+
+/* Insert a new element into the growable list */
+void dvmInsertGrowableList(GrowableList *gList, void *elem)
+{
+    if (gList->numUsed == gList->numAllocated) {
+        expandGrowableList(gList);
+    }
+    gList->elemList[gList->numUsed++] = elem;
+}
+
+/* Debug Utility - dump a compilation unit */
+void dvmCompilerDumpCompilationUnit(CompilationUnit *cUnit)
+{
+    int i;
+    BasicBlock *bb;
+    LOGD("%d blocks in total\n", cUnit->numBlocks);
+
+    for (i = 0; i < cUnit->numBlocks; i++) {
+        bb = cUnit->blockList[i];
+        LOGD("Block %d (insn %04x - %04x%s)\n",
+             bb->id, bb->startOffset,
+             bb->lastMIRInsn ? bb->lastMIRInsn->offset : bb->startOffset,
+             bb->lastMIRInsn ? "" : " empty");
+        if (bb->taken) {
+            LOGD("  Taken branch: block %d (%04x)\n",
+                 bb->taken->id, bb->taken->startOffset);
+        }
+        if (bb->fallThrough) {
+            LOGD("  Fallthrough : block %d (%04x)\n",
+                 bb->fallThrough->id, bb->fallThrough->startOffset);
+        }
+    }
+}
+
+/*
+ * Dump the current stats of the compiler, including number of bytes used in
+ * the code cache, arena size, and work queue length, and various JIT stats.
+ */
+void dvmCompilerDumpStats(void)
+{
+    LOGD("%d compilations using %d bytes",
+         gDvmJit.numCompilations, gDvmJit.codeCacheByteUsed);
+    LOGD("Compiler arena uses %d blocks (%d bytes each)",
+         numArenaBlocks, ARENA_DEFAULT_SIZE);
+    LOGD("Compiler work queue length is %d/%d", gDvmJit.compilerQueueLength,
+         gDvmJit.compilerMaxQueued);
+    dvmJitStats();
+    dvmCompilerArchDump();
+}
diff --git a/vm/compiler/codegen/CompilerCodegen.h b/vm/compiler/codegen/CompilerCodegen.h
new file mode 100644
index 000000000..97077b415
--- /dev/null
+++ b/vm/compiler/codegen/CompilerCodegen.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "../CompilerIR.h"
+
+#ifndef _DALVIK_VM_COMPILERCODEGEN_H_
+#define _DALVIK_VM_COMPILERCODEGEN_H_
+
+/* Work unit is architecture dependent */
+void *dvmCompilerDoWork(CompilerWorkOrder *work);
+
+/* Lower middle-level IR to low-level IR */
+void dvmCompilerMIR2LIR(CompilationUnit *cUnit);
+
+/* Assemble LIR into machine code */
+void dvmCompilerAssembleLIR(CompilationUnit *cUnit);
+
+/* Implemented in the codegen/<target>/ArchUtility.c */
+void dvmCompilerCodegenDump(CompilationUnit *cUnit);
+
+/* Implemented in the codegen/<target>/Assembler.c */
+void* dvmJitChain(void *tgtAddr, u4* branchAddr);
+
+#endif /* _DALVIK_VM_COMPILERCODEGEN_H_ */
diff --git a/vm/compiler/codegen/armv5te/ArchUtility.c b/vm/compiler/codegen/armv5te/ArchUtility.c
new file mode 100644
index 000000000..58b181b7e
--- /dev/null
+++ b/vm/compiler/codegen/armv5te/ArchUtility.c
@@ -0,0 +1,223 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "../../CompilerInternals.h"
+#include "dexdump/OpCodeNames.h"
+#include "Armv5teLIR.h"
+
+/* Decode and print a ARM register name */
+static char * decodeRegList(int vector, char *buf)
+{
+    int i;
+    bool printed = false;
+    buf[0] = 0;
+    for (i = 0; i < 8; i++, vector >>= 1) {
+        if (vector & 0x1) {
+            if (printed) {
+                sprintf(buf + strlen(buf), ", r%d", i);
+            } else {
+                printed = true;
+                sprintf(buf, "r%d", i);
+            }
+        }
+    }
+    return buf;
+}
+
+/*
+ * Interpret a format string and build a string no longer than size
+ * See format key in Assemble.c.
+ */
+static void buildInsnString(char *fmt, Armv5teLIR *lir, char* buf,
+                            unsigned char *baseAddr, int size)
+{
+    int i;
+    char *bufEnd = &buf[size-1];
+    char *fmtEnd = &fmt[strlen(fmt)];
+    char tbuf[256];
+    char nc;
+    while (fmt < fmtEnd) {
+        int operand;
+        if (*fmt == '!') {
+            fmt++;
+            assert(fmt < fmtEnd);
+            nc = *fmt++;
+            if (nc=='!') {
+                strcpy(tbuf, "!");
+            } else {
+               assert(fmt < fmtEnd);
+               assert((unsigned)(nc-'0') < 3);
+               operand = lir->operands[nc-'0'];
+               switch(*fmt++) {
+                   case 'h':
+                       sprintf(tbuf,"%04x", operand);
+                       break;
+                   case 'd':
+                       sprintf(tbuf,"%d", operand);
+                       break;
+                   case 'D':
+                       sprintf(tbuf,"%d", operand+8);
+                       break;
+                   case 'E':
+                       sprintf(tbuf,"%d", operand*4);
+                       break;
+                   case 'F':
+                       sprintf(tbuf,"%d", operand*2);
+                       break;
+                   case 'c':
+                       switch (operand) {
+                           case ARM_COND_EQ:
+                               strcpy(tbuf, "beq");
+                               break;
+                           case ARM_COND_NE:
+                               strcpy(tbuf, "bne");
+                               break;
+                           case ARM_COND_LT:
+                               strcpy(tbuf, "blt");
+                               break;
+                           case ARM_COND_GE:
+                               strcpy(tbuf, "bge");
+                               break;
+                           case ARM_COND_GT:
+                               strcpy(tbuf, "bgt");
+                               break;
+                           case ARM_COND_LE:
+                               strcpy(tbuf, "ble");
+                               break;
+                           case ARM_COND_CS:
+                               strcpy(tbuf, "bcs");
+                               break;
+                           default:
+                               strcpy(tbuf, "");
+                               break;
+                       }
+                       break;
+                   case 't':
+                       sprintf(tbuf,"0x%08x",
+                               (int) baseAddr + lir->generic.offset + 4 +
+                               (operand << 1));
+                       break;
+                   case 'u': {
+                       int offset_1 = lir->operands[0];
+                       int offset_2 = NEXT_LIR(lir)->operands[0];
+                       intptr_t target =
+                           ((((intptr_t) baseAddr + lir->generic.offset + 4) &
+                            ~3) + (offset_1 << 21 >> 9) + (offset_2 << 1)) &
+                           0xfffffffc;
+                       sprintf(tbuf, "%p", (void *) target);
+                       break;
+                    }
+
+                   /* Nothing to print for BLX_2 */
+                   case 'v':
+                       strcpy(tbuf, "see above");
+                       break;
+                   case 'R':
+                       decodeRegList(operand, tbuf);
+                       break;
+                   default:
+                       strcpy(tbuf,"DecodeError");
+                       break;
+               }
+               if (buf+strlen(tbuf) <= bufEnd) {
+                   strcpy(buf, tbuf);
+                   buf += strlen(tbuf);
+               } else {
+                   break;
+               }
+            }
+        } else {
+           *buf++ = *fmt++;
+        }
+        if (buf == bufEnd)
+            break;
+    }
+    *buf = 0;
+}
+
+/* Pretty-print a LIR instruction */
+static void dumpLIRInsn(LIR *arg, unsigned char *baseAddr)
+{
+    Armv5teLIR *lir = (Armv5teLIR *) arg;
+    char buf[256];
+    char opName[256];
+    int offset = lir->generic.offset;
+    int dest = lir->operands[0];
+    u2 *cPtr = (u2*)baseAddr;
+    /* Handle pseudo-ops individually, and all regular insns as a group */
+    switch(lir->opCode) {
+        case ARMV5TE_PSEUDO_TARGET_LABEL:
+            break;
+        case ARMV5TE_PSEUDO_CHAINING_CELL_GENERIC:
+            LOGD("-------- chaining cell (generic): 0x%04x\n", dest);
+            break;
+        case ARMV5TE_PSEUDO_CHAINING_CELL_POST_INVOKE:
+            LOGD("-------- chaining cell (post-invoke): 0x%04x\n", dest);
+            break;
+        case ARMV5TE_PSEUDO_CHAINING_CELL_INVOKE:
+            LOGD("-------- chaining cell (invoke): %s/%p\n",
+                 ((Method *)dest)->name,
+                 ((Method *)dest)->insns);
+            break;
+        case ARMV5TE_PSEUDO_DALVIK_BYTECODE_BOUNDARY:
+            LOGD("-------- dalvik offset: 0x%04x @ %s\n", dest,
+                   getOpcodeName(lir->operands[1]));
+            break;
+        case ARMV5TE_PSEUDO_ALIGN4:
+            LOGD("%p (%04x): .align4\n", baseAddr + offset, offset);
+            break;
+        case ARMV5TE_PSEUDO_PC_RECONSTRUCTION_CELL:
+            LOGD("-------- reconstruct dalvik PC : 0x%04x @ +0x%04x\n", dest,
+                 lir->operands[1]);
+            break;
+        case ARMV5TE_PSEUDO_PC_RECONSTRUCTION_BLOCK_LABEL:
+            /* Do nothing */
+            break;
+        case ARMV5TE_PSEUDO_EH_BLOCK_LABEL:
+            LOGD("Exception_Handling:\n");
+            break;
+        case ARMV5TE_PSEUDO_NORMAL_BLOCK_LABEL:
+            LOGD("L%#06x:\n", dest);
+            break;
+        default:
+            buildInsnString(EncodingMap[lir->opCode].name, lir, opName,
+                            baseAddr, 256);
+            buildInsnString(EncodingMap[lir->opCode].fmt, lir, buf, baseAddr,
+                            256);
+            LOGD("%p (%04x): %-8s%s\n", baseAddr + offset, offset, opName, buf);
+            break;
+    }
+}
+
+/* Dump instructions and constant pool contents */
+void dvmCompilerCodegenDump(CompilationUnit *cUnit)
+{
+    LOGD("Dumping LIR insns\n");
+    LIR *lirInsn;
+    Armv5teLIR *armLIR;
+
+    LOGD("installed code is at %p\n", cUnit->baseAddr);
+    LOGD("total size is %d bytes\n", cUnit->totalSize);
+    for (lirInsn = cUnit->firstLIRInsn; lirInsn; lirInsn = lirInsn->next) {
+        dumpLIRInsn(lirInsn, cUnit->baseAddr);
+    }
+    for (lirInsn = cUnit->wordList; lirInsn; lirInsn = lirInsn->next) {
+        armLIR = (Armv5teLIR *) lirInsn;
+        LOGD("%p (%04x): .word (0x%x)\n",
+             cUnit->baseAddr + armLIR->generic.offset, armLIR->generic.offset,
+             armLIR->operands[0]);
+    }
+}
diff --git a/vm/compiler/codegen/armv5te/Armv5teLIR.h b/vm/compiler/codegen/armv5te/Armv5teLIR.h
new file mode 100644
index 000000000..208e6c0eb
--- /dev/null
+++ b/vm/compiler/codegen/armv5te/Armv5teLIR.h
@@ -0,0 +1,181 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Dalvik.h"
+#include "compiler/CompilerInternals.h"
+
+#ifndef _DALVIK_VM_COMPILER_CODEGEN_ARMV5TE_H
+#define _DALVIK_VM_COMPILER_CODEGEN_ARMV5TE_H
+
+/*
+ * r0, r1, r2, r3, and r7 are always scratch
+ * r4PC is scratch if used solely in the compiled land. Otherwise it holds the
+ * Dalvik PC.
+ * rFP holds the current frame pointer
+ * rGLUE holds &InterpState
+ */
+typedef enum NativeRegisterPool {
+    r0 = 0,
+    r1 = 1,
+    r2 = 2,
+    r3 = 3,
+    r4PC = 4,
+    rFP = 5,
+    rGLUE = 6,
+    r7 = 7,
+} NativeRegisterPool;
+
+/* Thumb condition encodings */
+typedef enum Armv5teConditionCode {
+    ARM_COND_EQ = 0x0,    /* 0000 */
+    ARM_COND_NE = 0x1,    /* 0001 */
+    ARM_COND_LT = 0xb,    /* 1011 */
+    ARM_COND_GE = 0xa,    /* 1010 */
+    ARM_COND_GT = 0xc,    /* 1100 */
+    ARM_COND_LE = 0xd,    /* 1101 */
+    ARM_COND_CS = 0x2,    /* 0010 */
+    ARM_COND_MI = 0x4,    /* 0100 */
+} Armv5teConditionCode;
+
+#define isPseudoOpCode(opCode) ((int)(opCode) < 0)
+
+/*
+ * The following enum defines the list of supported Thumb instructions by the
+ * assembler. Their corresponding snippet positions will be defined in
+ * Assemble.c.
+ */
+typedef enum Armv5teOpCode {
+    ARMV5TE_PSEUDO_TARGET_LABEL = -10,
+    ARMV5TE_PSEUDO_CHAINING_CELL_POST_INVOKE = -9,
+    ARMV5TE_PSEUDO_CHAINING_CELL_INVOKE = -8,
+    ARMV5TE_PSEUDO_CHAINING_CELL_GENERIC = -7,
+    ARMV5TE_PSEUDO_DALVIK_BYTECODE_BOUNDARY = -6,
+    ARMV5TE_PSEUDO_ALIGN4 = -5,
+    ARMV5TE_PSEUDO_PC_RECONSTRUCTION_CELL = -4,
+    ARMV5TE_PSEUDO_PC_RECONSTRUCTION_BLOCK_LABEL = -3,
+    ARMV5TE_PSEUDO_EH_BLOCK_LABEL = -2,
+    ARMV5TE_PSEUDO_NORMAL_BLOCK_LABEL = -1,
+    /************************************************************************/
+    ARMV5TE_16BIT_DATA,     /* DATA   [0] rd[15..0] */
+    ARMV5TE_ADC,            /* adc     [0100000101] rm[5..3] rd[2..0] */
+    ARMV5TE_ADD_RRI3,       /* add(1)  [0001110] imm_3[8..6] rn[5..3] rd[2..0]*/
+    ARMV5TE_ADD_RI8,        /* add(2)  [00110] rd[10..8] imm_8[7..0] */
+    ARMV5TE_ADD_RRR,        /* add(3)  [0001100] rm[8..6] rn[5..3] rd[2..0] */
+    ARMV5TE_ADD_RR_LH,      /* add(4)  [01000100] H12[01] rm[5..3] rd[2..0] */
+    ARMV5TE_ADD_RR_HL,      /* add(4)  [01001000] H12[10] rm[5..3] rd[2..0] */
+    ARMV5TE_ADD_RR_HH,      /* add(4)  [01001100] H12[11] rm[5..3] rd[2..0] */
+    ARMV5TE_ADD_PC_REL,     /* add(5)  [10100] rd[10..8] imm_8[7..0] */
+    ARMV5TE_ADD_SP_REL,     /* add(6)  [10101] rd[10..8] imm_8[7..0] */
+    ARMV5TE_ADD_SPI7,       /* add(7)  [101100000] imm_7[6..0] */
+    ARMV5TE_AND_RR,         /* and     [0100000000] rm[5..3] rd[2..0] */
+    ARMV5TE_ASR,            /* asr(1)  [00010] imm_5[10..6] rm[5..3] rd[2..0] */
+    ARMV5TE_ASRV,           /* asr(2)  [0100000100] rs[5..3] rd[2..0] */
+    ARMV5TE_B_COND,         /* b(1)    [1101] cond[11..8] offset_8[7..0] */
+    ARMV5TE_B_UNCOND,       /* b(2)    [11100] offset_11[10..0] */
+    ARMV5TE_BIC,            /* bic     [0100001110] rm[5..3] rd[2..0] */
+    ARMV5TE_BKPT,           /* bkpt    [10111110] imm_8[7..0] */
+    ARMV5TE_BLX_1,          /* blx(1)  [111] H[10] offset_11[10..0] */
+    ARMV5TE_BLX_2,          /* blx(1)  [111] H[01] offset_11[10..0] */
+    ARMV5TE_BL_1,           /* blx(1)  [111] H[10] offset_11[10..0] */
+    ARMV5TE_BL_2,           /* blx(1)  [111] H[11] offset_11[10..0] */
+    ARMV5TE_BLX_R,          /* blx(2)  [010001111] H2[6..6] rm[5..3] SBZ[000] */
+    ARMV5TE_BX,             /* bx      [010001110] H2[6..6] rm[5..3] SBZ[000] */
+    ARMV5TE_CMN,            /* cmn     [0100001011] rm[5..3] rd[2..0] */
+    ARMV5TE_CMP_RI8,        /* cmp(1)  [00101] rn[10..8] imm_8[7..0] */
+    ARMV5TE_CMP_RR,         /* cmp(2)  [0100001010] rm[5..3] rd[2..0] */
+    ARMV5TE_CMP_LH,         /* cmp(3)  [01000101] H12[01] rm[5..3] rd[2..0] */
+    ARMV5TE_CMP_HL,         /* cmp(3)  [01000110] H12[10] rm[5..3] rd[2..0] */
+    ARMV5TE_CMP_HH,         /* cmp(3)  [01000111] H12[11] rm[5..3] rd[2..0] */
+    ARMV5TE_EOR,            /* eor     [0100000001] rm[5..3] rd[2..0] */
+    ARMV5TE_LDMIA,          /* ldmia   [11001] rn[10..8] reglist [7..0] */
+    ARMV5TE_LDR_RRI5,       /* ldr(1)  [01101] imm_5[10..6] rn[5..3] rd[2..0] */
+    ARMV5TE_LDR_RRR,        /* ldr(2)  [0101100] rm[8..6] rn[5..3] rd[2..0] */
+    ARMV5TE_LDR_PC_REL,     /* ldr(3)  [01001] rd[10..8] imm_8[7..0] */
+    ARMV5TE_LDR_SP_REL,     /* ldr(4)  [10011] rd[10..8] imm_8[7..0] */
+    ARMV5TE_LDRB_RRI5,      /* ldrb(1) [01111] imm_5[10..6] rn[5..3] rd[2..0] */
+    ARMV5TE_LDRB_RRR,       /* ldrb(2) [0101110] rm[8..6] rn[5..3] rd[2..0] */
+    ARMV5TE_LDRH_RRI5,      /* ldrh(1) [10001] imm_5[10..6] rn[5..3] rd[2..0] */
+    ARMV5TE_LDRH_RRR,       /* ldrh(2) [0101101] rm[8..6] rn[5..3] rd[2..0] */
+    ARMV5TE_LDRSB_RRR,      /* ldrsb   [0101011] rm[8..6] rn[5..3] rd[2..0] */
+    ARMV5TE_LDRSH_RRR,      /* ldrsh   [0101111] rm[8..6] rn[5..3] rd[2..0] */
+    ARMV5TE_LSL,            /* lsl(1)  [00000] imm_5[10..6] rm[5..3] rd[2..0] */
+    ARMV5TE_LSLV,           /* lsl(2)  [0100000010] rs[5..3] rd[2..0] */
+    ARMV5TE_LSR,            /* lsr(1)  [00001] imm_5[10..6] rm[5..3] rd[2..0] */
+    ARMV5TE_LSRV,           /* lsr(2)  [0100000011] rs[5..3] rd[2..0] */
+    ARMV5TE_MOV_IMM,        /* mov(1)  [00100] rd[10..8] imm_8[7..0] */
+    ARMV5TE_MOV_RR,         /* mov(2)  [0001110000] rn[5..3] rd[2..0] */
+    ARMV5TE_MOV_RR_HL,      /* mov(3)  [01000110] H12[10] rm[5..3] rd[2..0] */
+    ARMV5TE_MOV_RR_LH,      /* mov(3)  [01000101] H12[01] rm[5..3] rd[2..0] */
+    ARMV5TE_MOV_RR_HH,      /* mov(3)  [01000111] H12[11] rm[5..3] rd[2..0] */
+    ARMV5TE_MUL,            /* mul     [0100001101] rm[5..3] rd[2..0] */
+    ARMV5TE_MVN,            /* mvn     [0100001111] rm[5..3] rd[2..0] */
+    ARMV5TE_NEG,            /* neg     [0100001001] rm[5..3] rd[2..0] */
+    ARMV5TE_ORR,            /* orr     [0100001100] rm[5..3] rd[2..0] */
+    ARMV5TE_POP,            /* pop     [1011110] r[8..8] rl[7..0] */
+    ARMV5TE_PUSH,           /* push    [1011010] r[8..8] rl[7..0] */
+    ARMV5TE_ROR,            /* ror     [0100000111] rs[5..3] rd[2..0] */
+    ARMV5TE_SBC,            /* sbc     [0100000110] rm[5..3] rd[2..0] */
+    ARMV5TE_STMIA,          /* stmia   [11000] rn[10..8] reglist [7.. 0] */
+    ARMV5TE_STR_RRI5,       /* str(1)  [01100] imm_5[10..6] rn[5..3] rd[2..0] */
+    ARMV5TE_STR_RRR,        /* str(2)  [0101000] rm[8..6] rn[5..3] rd[2..0] */
+    ARMV5TE_STR_SP_REL,     /* str(3)  [10010] rd[10..8] imm_8[7..0] */
+    ARMV5TE_STRB_RRI5,      /* strb(1) [01110] imm_5[10..6] rn[5..3] rd[2..0] */
+    ARMV5TE_STRB_RRR,       /* strb(2) [0101010] rm[8..6] rn[5..3] rd[2..0] */
+    ARMV5TE_STRH_RRI5,      /* strh(1) [10000] imm_5[10..6] rn[5..3] rd[2..0] */
+    ARMV5TE_STRH_RRR,       /* strh(2) [0101001] rm[8..6] rn[5..3] rd[2..0] */
+    ARMV5TE_SUB_RRI3,       /* sub(1)  [0001111] imm_3[8..6] rn[5..3] rd[2..0]*/
+    ARMV5TE_SUB_RI8,        /* sub(2)  [00111] rd[10..8] imm_8[7..0] */
+    ARMV5TE_SUB_RRR,        /* sub(3)  [0001101] rm[8..6] rn[5..3] rd[2..0] */
+    ARMV5TE_SUB_SPI7,       /* sub(4)  [101100001] imm_7[6..0] */
+    ARMV5TE_SWI,            /* swi     [11011111] imm_8[7..0] */
+    ARMV5TE_TST,            /* tst     [0100001000] rm[5..3] rn[2..0] */
+    ARMV5TE_LAST,
+} Armv5teOpCode;
+
+/* Struct used to define the snippet posotions for each Thumb opcode */
+typedef struct Armv5teEncodingMap {
+    short skeleton;
+    struct {
+        int end;
+        int start;
+    } fieldLoc[3];
+    Armv5teOpCode opCode;
+    int operands;
+    char *name;
+    char* fmt;
+} Armv5teEncodingMap;
+
+extern Armv5teEncodingMap EncodingMap[ARMV5TE_LAST];
+
+/*
+ * Each instance of this struct holds a pseudo or real LIR instruction:
+ * - pesudo ones (eg labels and marks) and will be discarded by the assembler.
+ * - real ones will e assembled into Thumb instructions.
+ */
+typedef struct Armv5teLIR {
+    LIR generic;
+    Armv5teOpCode opCode;
+    int operands[3]; /* dest, src1, src2 */
+} Armv5teLIR;
+
+/* Utility macros to traverse the LIR/Armv5teLIR list */
+#define NEXT_LIR(lir) ((Armv5teLIR *) lir->generic.next)
+#define PREV_LIR(lir) ((Armv5teLIR *) lir->generic.prev)
+
+#define NEXT_LIR_LVALUE(lir) (lir)->generic.next
+#define PREV_LIR_LVALUE(lir) (lir)->generic.prev
+
+#endif /* _DALVIK_VM_COMPILER_CODEGEN_ARMV5TE_H */
diff --git a/vm/compiler/codegen/armv5te/Assemble.c b/vm/compiler/codegen/armv5te/Assemble.c
new file mode 100644
index 000000000..14355cb28
--- /dev/null
+++ b/vm/compiler/codegen/armv5te/Assemble.c
@@ -0,0 +1,499 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Dalvik.h"
+#include "libdex/OpCode.h"
+#include "dexdump/OpCodeNames.h"
+
+#include "../../CompilerInternals.h"
+#include "Armv5teLIR.h"
+#include <unistd.h>             /* for cacheflush */
+
+/*
+ * opcode: Armv5teOpCode enum
+ * skeleton: pre-designated bit-pattern for this opcode
+ * ds: dest start bit position
+ * de: dest end bit position
+ * s1s: src1 start bit position
+ * s1e: src1 end bit position
+ * s2s: src2 start bit position
+ * s2e: src2 end bit position
+ * operands: number of operands (for sanity check purposes)
+ * name: mnemonic name
+ * fmt: for pretty-prining
+ */
+#define ENCODING_MAP(opcode, skeleton, ds, de, s1s, s1e, s2s, s2e, operands, \
+                     name, fmt) \
+        {skeleton, {{ds, de}, {s1s, s1e}, {s2s, s2e}}, opcode, operands, name, \
+         fmt}
+
+/* Instruction dump string format keys: !pf, where "!" is the start
+ * of the key, "p" is which numeric operand to use and "f" is the
+ * print format.
+ *
+ * [p]ositions:
+ *     0 -> operands[0] (dest)
+ *     1 -> operands[1] (src1)
+ *     2 -> operands[2] (src2)
+ *
+ * [f]ormats:
+ *     h -> 4-digit hex
+ *     d -> decimal
+ *     D -> decimal+8 (used to convert 3-bit regnum field to high reg)
+ *     E -> decimal*4
+ *     F -> decimal*2
+ *     c -> branch condition (beq, bne, etc.)
+ *     t -> pc-relative target
+ *     u -> 1st half of bl[x] target
+ *     v -> 2nd half ob bl[x] target
+ *     R -> register list
+ *
+ *  [!] escape.  To insert "!", use "!!"
+ */
+/* NOTE: must be kept in sync with enum Armv5teOpcode from Armv5teLIR.h */
+Armv5teEncodingMap EncodingMap[ARMV5TE_LAST] = {
+    ENCODING_MAP(ARMV5TE_16BIT_DATA,    0x0000, 15, 0, -1, -1, -1, -1,
+                 1, "data", "0x!0h(!0d)"),
+    ENCODING_MAP(ARMV5TE_ADC,           0x4140, 2, 0, 5, 3, -1, -1,
+                 2, "adc", "r!0d, r!1d"),
+    ENCODING_MAP(ARMV5TE_ADD_RRI3,      0x1c00, 2, 0, 5, 3, 8, 6,
+                 3, "add", "r!0d, r!1d, #!2d"),
+    ENCODING_MAP(ARMV5TE_ADD_RI8,       0x3000, 10, 8, 7, 0, -1, -1,
+                 2, "add", "r!0d, r!0d, #!1d"),
+    ENCODING_MAP(ARMV5TE_ADD_RRR,       0x1800, 2, 0, 5, 3, 8, 6,
+                 3, "add", "r!0d, r!1d, r!2d"),
+    ENCODING_MAP(ARMV5TE_ADD_RR_LH,     0x4440, 2, 0, 5, 3, -1, -1,
+                 2, "add", "r!0d, r!1d"),
+    ENCODING_MAP(ARMV5TE_ADD_RR_HL,     0x4480, 2, 0, 5, 3, -1, -1,
+                 2, "add", "r!0d, r!1d"),
+    ENCODING_MAP(ARMV5TE_ADD_RR_HH,     0x44c0, 2, 0, 5, 3, -1, -1,
+                 2, "add", "r!0d, r!1d"),
+    ENCODING_MAP(ARMV5TE_ADD_PC_REL,    0xa000, 10, 8, 7, 0, -1, -1,
+                 2, "add", "r!0d, pc, #!1E"),
+    ENCODING_MAP(ARMV5TE_ADD_SP_REL,    0xa800, 10, 8, 7, 0, -1, -1,
+                 2, "add", "r!0d, sp, #!1E"),
+    ENCODING_MAP(ARMV5TE_ADD_SPI7,      0xb000, 6, 0, -1, -1, -1, -1,
+                 1, "add", "sp, #!0d*4"),
+    ENCODING_MAP(ARMV5TE_AND_RR,        0x4000, 2, 0, 5, 3, -1, -1,
+                 2, "and", "r!0d, r!1d"),
+    ENCODING_MAP(ARMV5TE_ASR,           0x1000, 2, 0, 5, 3, 10, 6,
+                 3, "asr", "r!0d, r!1d, #!2d"),
+    ENCODING_MAP(ARMV5TE_ASRV,          0x4100, 2, 0, 5, 3, -1, -1,
+                 2, "asr", "r!0d, r!1d"),
+    ENCODING_MAP(ARMV5TE_B_COND,        0xd000, 7, 0, 11, 8, -1, -1,
+                 2, "!1c", "!0t"),
+    ENCODING_MAP(ARMV5TE_B_UNCOND,      0xe000, 10, 0, -1, -1, -1, -1,
+                 0, "b", "!0t"),
+    ENCODING_MAP(ARMV5TE_BIC,           0x4380, 2, 0, 5, 3, -1, -1,
+                 2, "bic", "r!0d, r!1d"),
+    ENCODING_MAP(ARMV5TE_BKPT,          0xbe00, 7, 0, -1, -1, -1, -1,
+                 1, "bkpt", "!0d"),
+    ENCODING_MAP(ARMV5TE_BLX_1,         0xf000, 10, 0, -1, -1, -1, -1,
+                 2, "blx_1", "!0u"),
+    ENCODING_MAP(ARMV5TE_BLX_2,         0xe800, 10, 0, -1, -1, -1, -1,
+                 2, "blx_2", "!0v"),
+    ENCODING_MAP(ARMV5TE_BL_1,          0xf000, 10, 0, -1, -1, -1, -1,
+                 1, "bl_1", "!0u"),
+    ENCODING_MAP(ARMV5TE_BL_2,          0xf800, 10, 0, -1, -1, -1, -1,
+                 1, "bl_2", "!0v"),
+    ENCODING_MAP(ARMV5TE_BLX_R,         0x4780, 6, 3, -1, -1, -1, -1,
+                 1, "blx", "r!0d"),
+    ENCODING_MAP(ARMV5TE_BX,            0x4700, 6, 3, -1, -1, -1, -1,
+                 1, "bx", "r!0d"),
+    ENCODING_MAP(ARMV5TE_CMN,           0x42c0, 2, 0, 5, 3, -1, -1,
+                 2, "cmn", "r!0d, r!1d"),
+    ENCODING_MAP(ARMV5TE_CMP_RI8,       0x2800, 10, 8, 7, 0, -1, -1,
+                 2, "cmp", "r!0d, #!1d"),
+    ENCODING_MAP(ARMV5TE_CMP_RR,        0x4280, 2, 0, 5, 3, -1, -1,
+                 2, "cmp", "r!0d, r!1d"),
+    ENCODING_MAP(ARMV5TE_CMP_LH,        0x4540, 2, 0, 5, 3, -1, -1,
+                 2, "cmp", "r!0d, r!1D"),
+    ENCODING_MAP(ARMV5TE_CMP_HL,        0x4580, 2, 0, 5, 3, -1, -1,
+                 2, "cmp", "r!0D, r!1d"),
+    ENCODING_MAP(ARMV5TE_CMP_HH,        0x45c0, 2, 0, 5, 3, -1, -1,
+                 2, "cmp", "r!0D, r!1D"),
+    ENCODING_MAP(ARMV5TE_EOR,           0x4040, 2, 0, 5, 3, -1, -1,
+                 2, "eor", "r!0d, r!1d"),
+    ENCODING_MAP(ARMV5TE_LDMIA,         0xc800, 10, 8, 7, 0, -1, -1,
+                 2, "ldmia", "r!0d!!, <!1R>"),
+    ENCODING_MAP(ARMV5TE_LDR_RRI5,      0x6800, 2, 0, 5, 3, 10, 6,
+                 3, "ldr", "r!0d, [r!1d, #!2E]"),
+    ENCODING_MAP(ARMV5TE_LDR_RRR,       0x5800, 2, 0, 5, 3, 8, 6,
+                 3, "ldr", "r!0d, [r!1d, r!2d]"),
+    ENCODING_MAP(ARMV5TE_LDR_PC_REL,    0x4800, 10, 8, 7, 0, -1, -1,
+                 2, "ldr", "r!0d, [pc, #!1E]"),
+    ENCODING_MAP(ARMV5TE_LDR_SP_REL,    0x9800, 10, 8, 7, 0, -1, -1,
+                 2, "ldr", "r!0d, [sp, #!1E]"),
+    ENCODING_MAP(ARMV5TE_LDRB_RRI5,     0x7800, 2, 0, 5, 3, 10, 6,
+                 3, "ldrb", "r!0d, [r!1d, #2d]"),
+    ENCODING_MAP(ARMV5TE_LDRB_RRR,      0x5c00, 2, 0, 5, 3, 8, 6,
+                 3, "ldrb", "r!0d, [r!1d, r!2d]"),
+    ENCODING_MAP(ARMV5TE_LDRH_RRI5,     0x8800, 2, 0, 5, 3, 10, 6,
+                 3, "ldrh", "r!0d, [r!1d, #!2F]"),
+    ENCODING_MAP(ARMV5TE_LDRH_RRR,      0x5a00, 2, 0, 5, 3, 8, 6,
+                 3, "ldrh", "r!0d, [r!1d, r!2d]"),
+    ENCODING_MAP(ARMV5TE_LDRSB_RRR,     0x5600, 2, 0, 5, 3, 8, 6,
+                 3, "ldrsb", "r!0d, [r!1d, r!2d]"),
+    ENCODING_MAP(ARMV5TE_LDRSH_RRR,     0x5e00, 2, 0, 5, 3, 8, 6,
+                 3, "ldrsh", "r!0d, [r!1d, r!2d]"),
+    ENCODING_MAP(ARMV5TE_LSL,           0x0000, 2, 0, 5, 3, 10, 6,
+                 3, "lsl", "r!0d, r!1d, #!2d"),
+    ENCODING_MAP(ARMV5TE_LSLV,          0x4080, 2, 0, 5, 3, -1, -1,
+                 2, "lsl", "r!0d, r!1d"),
+    ENCODING_MAP(ARMV5TE_LSR,           0x0800, 2, 0, 5, 3, 10, 6,
+                 3, "lsr", "r!0d, r!1d, #!2d"),
+    ENCODING_MAP(ARMV5TE_LSRV,          0x40c0, 2, 0, 5, 3, -1, -1,
+                 2, "lsr", "r!0d, r!1d"),
+    ENCODING_MAP(ARMV5TE_MOV_IMM,       0x2000, 10, 8, 7, 0, -1, -1,
+                 2, "mov", "r!0d, #!1d"),
+    ENCODING_MAP(ARMV5TE_MOV_RR,        0x1c00, 2, 0, 5, 3, -1, -1,
+                 2, "mov", "r!0d, r!1d"),
+    ENCODING_MAP(ARMV5TE_MOV_RR_LH,     0x4640, 2, 0, 5, 3, -1, -1,
+                 2, "mov", "r!0D, r!1d"),
+    ENCODING_MAP(ARMV5TE_MOV_RR_HL,     0x4680, 2, 0, 5, 3, -1, -1,
+                 2, "mov", "r!0d, r!1D"),
+    ENCODING_MAP(ARMV5TE_MOV_RR_HH,     0x46c0, 2, 0, 5, 3, -1, -1,
+                 2, "mov", "r!0D, r!1D"),
+    ENCODING_MAP(ARMV5TE_MUL,           0x4340, 2, 0, 5, 3, -1, -1,
+                 2, "mul", "r!0d, r!1d"),
+    ENCODING_MAP(ARMV5TE_MVN,           0x43c0, 2, 0, 5, 3, -1, -1,
+                 2, "mvn", "r!0d, r!1d"),
+    ENCODING_MAP(ARMV5TE_NEG,           0x4240, 2, 0, 5, 3, -1, -1,
+                 2, "neg", "r!0d, r!1d"),
+    ENCODING_MAP(ARMV5TE_ORR,           0x4300, 2, 0, 5, 3, -1, -1,
+                 2, "orr", "r!0d, r!1d"),
+    ENCODING_MAP(ARMV5TE_POP,           0xbc00, 8, 0, -1, -1, -1, -1,
+                 1, "pop", "<!0R>"),
+    ENCODING_MAP(ARMV5TE_PUSH,          0xb400, 8, 0, -1, -1, -1, -1,
+                 1, "push", "<!0R>"),
+    ENCODING_MAP(ARMV5TE_ROR,           0x41c0, 2, 0, 5, 3, -1, -1,
+                 2, "ror", "r!0d, r!1d"),
+    ENCODING_MAP(ARMV5TE_SBC,           0x4180, 2, 0, 5, 3, -1, -1,
+                 2, "sbc", "r!0d, r!1d"),
+    ENCODING_MAP(ARMV5TE_STMIA,         0xc000, 10, 8, 7, 0, -1, -1,
+                 2, "stmia", "r!0d!!, <!1R>"),
+    ENCODING_MAP(ARMV5TE_STR_RRI5,      0x6000, 2, 0, 5, 3, 10, 6,
+                 3, "str", "r!0d, [r!1d, #!2E]"),
+    ENCODING_MAP(ARMV5TE_STR_RRR,       0x5000, 2, 0, 5, 3, 8, 6,
+                 3, "str", "r!0d, [r!1d, r!2d]"),
+    ENCODING_MAP(ARMV5TE_STR_SP_REL,    0x9000, 10, 8, 7, 0, -1, -1,
+                 2, "str", "r!0d, [sp, #!1E]"),
+    ENCODING_MAP(ARMV5TE_STRB_RRI5,     0x7000, 2, 0, 5, 3, 10, 6,
+                 3, "strb", "r!0d, [r!1d, #!2d]"),
+    ENCODING_MAP(ARMV5TE_STRB_RRR,      0x5400, 2, 0, 5, 3, 8, 6,
+                 3, "strb", "r!0d, [r!1d, r!2d]"),
+    ENCODING_MAP(ARMV5TE_STRH_RRI5,     0x8000, 2, 0, 5, 3, 10, 6,
+                 3, "strh", "r!0d, [r!1d, #!2F]"),
+    ENCODING_MAP(ARMV5TE_STRH_RRR,      0x5200, 2, 0, 5, 3, 8, 6,
+                 3, "strh", "r!0d, [r!1d, r!2d]"),
+    ENCODING_MAP(ARMV5TE_SUB_RRI3,      0x1e00, 2, 0, 5, 3, 8, 6,
+                 3, "sub", "r!0d, r!1d, #!2d]"),
+    ENCODING_MAP(ARMV5TE_SUB_RI8,       0x3800, 10, 8, 7, 0, -1, -1,
+                 2, "sub", "r!0d, #!1d"),
+    ENCODING_MAP(ARMV5TE_SUB_RRR,       0x1a00, 2, 0, 5, 3, 8, 6,
+                 3, "sub", "r!0d, r!1d, r!2d"),
+    ENCODING_MAP(ARMV5TE_SUB_SPI7,      0xb080, 6, 0, -1, -1, -1, -1,
+                 1, "sub", "sp, #!0d"),
+    ENCODING_MAP(ARMV5TE_SWI,           0xdf00, 7, 0, -1, -1, -1, -1,
+                 1, "swi", "!0d"),
+    ENCODING_MAP(ARMV5TE_TST,           0x4200, 2, 0, 5, 3, -1, -1,
+                 1, "tst", "r!0d, r!1d"),
+};
+
+#define PADDING_MOV_R0_R0               0x1C00
+
+/* Write the numbers in the literal pool to the codegen stream */
+static void writeDataContent(CompilationUnit *cUnit)
+{
+    int *dataPtr = (int *) (cUnit->codeBuffer + cUnit->dataOffset);
+    Armv5teLIR *dataLIR = (Armv5teLIR *) cUnit->wordList;
+    while (dataLIR) {
+        *dataPtr++ = dataLIR->operands[0];
+        dataLIR = NEXT_LIR(dataLIR);
+    }
+}
+
+/* Return TRUE if error happens */
+static bool assembleInstructions(CompilationUnit *cUnit, intptr_t startAddr)
+{
+    short *bufferAddr = (short *) cUnit->codeBuffer;
+    Armv5teLIR *lir;
+    bool retry = false;
+
+    for (lir = (Armv5teLIR *) cUnit->firstLIRInsn; lir; lir = NEXT_LIR(lir)) {
+        if (lir->opCode < 0) {
+            if ((lir->opCode == ARMV5TE_PSEUDO_ALIGN4) &&
+                (lir->operands[0] == 1) &&
+                !retry) {
+                *bufferAddr++ = PADDING_MOV_R0_R0;
+            }
+            continue;
+        }
+
+        if (lir->opCode == ARMV5TE_LDR_PC_REL ||
+            lir->opCode == ARMV5TE_ADD_PC_REL) {
+            Armv5teLIR *lirTarget = (Armv5teLIR *) lir->generic.target;
+            intptr_t pc = (lir->generic.offset + 4) & ~3;
+            intptr_t target = lirTarget->generic.offset;
+            int delta = target - pc;
+            if (delta & 0x3) {
+                LOGE("PC-rel distance is not multiples of 4: %d\n", delta);
+                dvmAbort();
+            }
+            lir->operands[1] = delta >> 2;
+        } else if (lir->opCode == ARMV5TE_B_COND) {
+            Armv5teLIR *targetLIR = (Armv5teLIR *) lir->generic.target;
+            intptr_t pc = lir->generic.offset + 4;
+            intptr_t target = targetLIR->generic.offset;
+            int delta = target - pc;
+            if (delta > 254 || delta < -256) {
+                /* Pull in the PC reconstruction code inline */
+                if (targetLIR->opCode == ARMV5TE_PSEUDO_PC_RECONSTRUCTION_CELL){
+                    /*
+                     * The original code is:
+                     *
+                     * bxx targetLIR
+                     * origNextLir
+                     *       :
+                     *       :
+                     * targetLIR (a PC reconstruction cell)
+                     *       :
+                     * lastLIR (should be a unconditional branch)
+                     *
+                     * The distance from bxx to targetLIR is too far, so we want
+                     * to rearrange the code to be:
+                     *
+                     * bxx targetLIR
+                     * branchoverLIR to origNextLir
+                     * targetLIR (a PC reconstruction cell)
+                     *       :
+                     * lastLIR (should be a unconditional branch)
+                     * origNextLir
+                     *
+                     * Although doing so adds a unconditional branchover
+                     * instruction, it can be predicted for free by ARM so
+                     * the penalty should be minimal.
+                     */
+                    Armv5teLIR *pcrLIR = targetLIR;
+                    Armv5teLIR *lastLIR = pcrLIR;
+                    Armv5teLIR *origNextLIR = NEXT_LIR(lir);
+
+                    /*
+                     * Find out the last instruction in the PC reconstruction
+                     * cell
+                     */
+                    while (lastLIR->opCode != ARMV5TE_B_UNCOND) {
+                        lastLIR = NEXT_LIR(lastLIR);
+                    }
+
+                    /* Yank out the PCR code */
+                    PREV_LIR_LVALUE(NEXT_LIR(lastLIR)) =
+                        (LIR *) PREV_LIR(targetLIR);
+                    NEXT_LIR_LVALUE(PREV_LIR(targetLIR)) =
+                        (LIR *) NEXT_LIR(lastLIR);
+
+                    /* Create the branch over instruction */
+                    Armv5teLIR *branchoverLIR =
+                        dvmCompilerNew(sizeof(Armv5teLIR), true);
+                    branchoverLIR->opCode = ARMV5TE_B_UNCOND;
+                    branchoverLIR->generic.target = (LIR *) origNextLIR;
+
+                    /* Reconnect the instructions */
+                    NEXT_LIR_LVALUE(lir) = (LIR *) branchoverLIR;
+                    PREV_LIR_LVALUE(branchoverLIR) = (LIR *) lir;
+
+                    NEXT_LIR_LVALUE(branchoverLIR) = (LIR *) targetLIR;
+                    PREV_LIR_LVALUE(targetLIR) = (LIR *) branchoverLIR;
+
+                    NEXT_LIR_LVALUE(lastLIR) = (LIR *) origNextLIR;
+                    PREV_LIR_LVALUE(origNextLIR) = (LIR *) lastLIR;
+
+                    retry = true;
+                    continue;
+                } else {
+                    LOGE("Conditional branch distance out of range: %d\n",
+                         delta);
+                    dvmAbort();
+                }
+            }
+            lir->operands[0] = delta >> 1;
+        } else if (lir->opCode == ARMV5TE_B_UNCOND) {
+            Armv5teLIR *targetLIR = (Armv5teLIR *) lir->generic.target;
+            intptr_t pc = lir->generic.offset + 4;
+            intptr_t target = targetLIR->generic.offset;
+            int delta = target - pc;
+            if (delta > 2046 || delta < -2048) {
+                LOGE("Unconditional branch distance out of range: %d\n", delta);
+                dvmAbort();
+            }
+            lir->operands[0] = delta >> 1;
+        } else if (lir->opCode == ARMV5TE_BLX_1) {
+            assert(NEXT_LIR(lir)->opCode == ARMV5TE_BLX_2);
+            /* curPC is Thumb */
+            intptr_t curPC = (startAddr + lir->generic.offset + 4) & ~3;
+            intptr_t target = lir->operands[1];
+
+            /* Match bit[1] in target with base */
+            if (curPC & 0x2) {
+                target |= 0x2;
+            }
+            int delta = target - curPC;
+            assert((delta >= -(1<<22)) && (delta <= ((1<<22)-2)));
+
+            lir->operands[0] = (delta >> 12) & 0x7ff;
+            NEXT_LIR(lir)->operands[0] = (delta>> 1) & 0x7ff;
+        }
+
+        /*
+         * The code offset will be recalculated, just continue to check if
+         * there are other places where code will be rescheduled and do not
+         * write to the output buffer
+         */
+        if (retry) {
+            continue;
+        }
+        Armv5teEncodingMap *encoder = &EncodingMap[lir->opCode];
+        short bits = encoder->skeleton;
+        int i;
+        for (i = 0; i < 3; i++) {
+            short value;
+            if (encoder->fieldLoc[i].end != -1) {
+                value = (lir->operands[i] << encoder->fieldLoc[i].start) &
+                        ((1 << (encoder->fieldLoc[i].end + 1)) - 1);
+                bits |= value;
+
+            }
+        }
+        *bufferAddr++ = bits;
+    }
+    return retry;
+}
+
+/*
+ * Go over each instruction in the list and calculate the offset from the top
+ * before sending them off to the assembler. If out-of-range branch distance is
+ * seen rearrange the instructions a bit to correct it.
+ */
+void dvmCompilerAssembleLIR(CompilationUnit *cUnit)
+{
+    LIR *lir;
+    Armv5teLIR *armLIR;
+    int offset;
+    int i;
+
+retry:
+    for (armLIR = (Armv5teLIR *) cUnit->firstLIRInsn, offset = 0;
+         armLIR;
+         armLIR = NEXT_LIR(armLIR)) {
+        armLIR->generic.offset = offset;
+        if (armLIR->opCode >= 0) {
+            offset += 2;
+        } else if (armLIR->opCode == ARMV5TE_PSEUDO_ALIGN4) {
+            if (offset & 0x2) {
+                offset += 2;
+                armLIR->operands[0] = 1;
+            } else {
+                armLIR->operands[0] = 0;
+            }
+        }
+        /* Pseudo opcodes don't consume space */
+    }
+
+    /* Const values have to be word aligned */
+    offset = ((offset + 3) >> 2) << 2;
+
+    cUnit->dataOffset = offset;
+
+    for (lir = cUnit->wordList; lir; lir = lir->next) {
+        lir->offset = offset;
+        offset += 4;
+    }
+
+    cUnit->totalSize = offset;
+
+    if (gDvmJit.codeCacheByteUsed + offset > CODE_CACHE_SIZE) {
+        gDvmJit.codeCacheFull = true;
+        cUnit->baseAddr = NULL;
+        return;
+    }
+    cUnit->codeBuffer = dvmCompilerNew(offset, true);
+    if (cUnit->codeBuffer == NULL) {
+        LOGE("Code buffer allocation failure\n");
+        cUnit->baseAddr = NULL;
+        return;
+    }
+
+    bool needRetry = assembleInstructions(
+        cUnit, (intptr_t) gDvmJit.codeCache + gDvmJit.codeCacheByteUsed);
+
+    if (needRetry)
+        goto retry;
+
+    writeDataContent(cUnit);
+
+    cUnit->baseAddr = (char *) gDvmJit.codeCache + gDvmJit.codeCacheByteUsed;
+    gDvmJit.codeCacheByteUsed += offset;
+
+
+    /* Install the compilation */
+    memcpy(cUnit->baseAddr, cUnit->codeBuffer, offset);
+    gDvmJit.numCompilations++;
+
+    /* Flush dcache and invalidate the icache to maintain coherence */
+    cacheflush((intptr_t) cUnit->baseAddr,
+               (intptr_t) (cUnit->baseAddr + offset), 0);
+}
+
+/*
+ * Perform translation chain operation.
+ * For ARM, we'll use a pair of thumb instructions to generate
+ * an unconditional chaining branch of up to 4MB in distance.
+ * Use a BL, though we don't really need the link.  The format is
+ *     111HHooooooooooo
+ * Where HH is 10 for the 1st inst, and 11 for the second and
+ * the "o" field is each instruction's 11-bit contribution to the
+ * 22-bit branch offset.
+ * TUNING: use a single-instruction variant if it reaches.
+ */
+void* dvmJitChain(void* tgtAddr, u4* branchAddr)
+{
+    int baseAddr = (u4) branchAddr + 4;
+    int branchOffset = (int) tgtAddr - baseAddr;
+    u4 thumb1;
+    u4 thumb2;
+    u4 newInst;
+
+    assert((branchOffset >= -(1<<22)) && (branchOffset <= ((1<<22)-2)));
+
+    gDvmJit.translationChains++;
+
+    COMPILER_TRACE_CHAINING(
+        LOGD("Jit Runtime: chaining 0x%x to 0x%x\n",
+             (int) branchAddr, (int) tgtAddr & -2));
+    if ((branchOffset < -2048) | (branchOffset > 2046)) {
+        thumb1 =  (0xf000 | ((branchOffset>>12) & 0x7ff));
+        thumb2 =  (0xf800 | ((branchOffset>> 1) & 0x7ff));
+    } else {
+        thumb1 =  (0xe000 | ((branchOffset>> 1) & 0x7ff));
+        thumb2 =  0x4300;  /* nop -> or r0, r0 */
+    }
+
+    newInst = thumb2<<16 | thumb1;
+    *branchAddr = newInst;
+    cacheflush((intptr_t) branchAddr, (intptr_t) branchAddr + 4, 0);
+
+    return tgtAddr;
+}
diff --git a/vm/compiler/codegen/armv5te/Codegen.c b/vm/compiler/codegen/armv5te/Codegen.c
new file mode 100644
index 000000000..178e5368b
--- /dev/null
+++ b/vm/compiler/codegen/armv5te/Codegen.c
@@ -0,0 +1,2892 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Dalvik.h"
+#include "interp/InterpDefs.h"
+#include "libdex/OpCode.h"
+#include "dexdump/OpCodeNames.h"
+#include "vm/compiler/CompilerInternals.h"
+#include "Armv5teLIR.h"
+#include "vm/mterp/common/FindInterface.h"
+
+/* Create the TemplateOpcode enum */
+#define JIT_TEMPLATE(X) TEMPLATE_##X,
+typedef enum {
+#include "../../template/armv5te/TemplateOpList.h"
+/*
+ * For example,
+ *     TEMPLATE_CMP_LONG,
+ *     TEMPLATE_RETURN,
+ *     ...
+ */
+    TEMPLATE_LAST_MARK,
+} TemplateOpCode;
+#undef JIT_TEMPLATE
+
+/* Array holding the entry offset of each template relative to the first one */
+static intptr_t templateEntryOffsets[TEMPLATE_LAST_MARK];
+
+/* Track exercised opcodes */
+static int opcodeCoverage[256];
+
+/*****************************************************************************/
+
+/*
+ * The following are building blocks to construct low-level IRs with 0 - 3
+ * operands.
+ */
+static Armv5teLIR *newLIR0(CompilationUnit *cUnit, Armv5teOpCode opCode)
+{
+    Armv5teLIR *insn = dvmCompilerNew(sizeof(Armv5teLIR), true);
+    assert(isPseudoOpCode(opCode) || EncodingMap[opCode].operands == 0);
+    insn->opCode = opCode;
+    dvmCompilerAppendLIR(cUnit, (LIR *) insn);
+    return insn;
+}
+
+static Armv5teLIR *newLIR1(CompilationUnit *cUnit, Armv5teOpCode opCode,
+                           int dest)
+{
+    Armv5teLIR *insn = dvmCompilerNew(sizeof(Armv5teLIR), true);
+    assert(isPseudoOpCode(opCode) || EncodingMap[opCode].operands == 1);
+    insn->opCode = opCode;
+    insn->operands[0] = dest;
+    dvmCompilerAppendLIR(cUnit, (LIR *) insn);
+    return insn;
+}
+
+static Armv5teLIR *newLIR2(CompilationUnit *cUnit, Armv5teOpCode opCode,
+                           int dest, int src1)
+{
+    Armv5teLIR *insn = dvmCompilerNew(sizeof(Armv5teLIR), true);
+    assert(isPseudoOpCode(opCode) || EncodingMap[opCode].operands == 2);
+    insn->opCode = opCode;
+    insn->operands[0] = dest;
+    insn->operands[1] = src1;
+    dvmCompilerAppendLIR(cUnit, (LIR *) insn);
+    return insn;
+}
+
+static Armv5teLIR *newLIR3(CompilationUnit *cUnit, Armv5teOpCode opCode,
+                           int dest, int src1, int src2)
+{
+    Armv5teLIR *insn = dvmCompilerNew(sizeof(Armv5teLIR), true);
+    assert(isPseudoOpCode(opCode) || EncodingMap[opCode].operands == 3);
+    insn->opCode = opCode;
+    insn->operands[0] = dest;
+    insn->operands[1] = src1;
+    insn->operands[2] = src2;
+    dvmCompilerAppendLIR(cUnit, (LIR *) insn);
+    return insn;
+}
+
+static Armv5teLIR *newLIR23(CompilationUnit *cUnit, Armv5teOpCode opCode,
+                            int srcdest, int src2)
+{
+    assert(!isPseudoOpCode(opCode));
+    if (EncodingMap[opCode].operands==2)
+        return newLIR2(cUnit, opCode, srcdest, src2);
+    else
+        return newLIR3(cUnit, opCode, srcdest, srcdest, src2);
+}
+
+/*****************************************************************************/
+
+/*
+ * The following are building blocks to insert constants into the pool or
+ * instruction streams.
+ */
+
+/* Add a 32-bit constant either in the constant pool or mixed with code */
+static Armv5teLIR *addWordData(CompilationUnit *cUnit, int value, bool inPlace)
+{
+    /* Add the constant to the literal pool */
+    if (!inPlace) {
+        Armv5teLIR *newValue = dvmCompilerNew(sizeof(Armv5teLIR), true);
+        newValue->operands[0] = value;
+        newValue->generic.next = cUnit->wordList;
+        cUnit->wordList = (LIR *) newValue;
+        return newValue;
+    } else {
+        /* Add the constant in the middle of code stream */
+        newLIR1(cUnit, ARMV5TE_16BIT_DATA, (value & 0xffff));
+        newLIR1(cUnit, ARMV5TE_16BIT_DATA, (value >> 16));
+    }
+    return NULL;
+}
+
+/*
+ * Search the existing constants in the literal pool for an exact or close match
+ * within specified delta (greater or equal to 0).
+ */
+static Armv5teLIR *scanLiteralPool(CompilationUnit *cUnit, int value,
+                                   unsigned int delta)
+{
+    LIR *dataTarget = cUnit->wordList;
+    while (dataTarget) {
+        if (((unsigned) (value - ((Armv5teLIR *) dataTarget)->operands[0])) <=
+            delta)
+            return (Armv5teLIR *) dataTarget;
+        dataTarget = dataTarget->next;
+    }
+    return NULL;
+}
+
+/*
+ * Load a immediate using a shortcut if possible; otherwise
+ * grab from the per-translation literal pool
+ */
+void loadConstant(CompilationUnit *cUnit, int rDest, int value)
+{
+    /* See if the value can be constructed cheaply */
+    if ((value >= 0) && (value <= 255)) {
+        newLIR2(cUnit, ARMV5TE_MOV_IMM, rDest, value);
+        return;
+    } else if ((value & 0xFFFFFF00) == 0xFFFFFF00) {
+        newLIR2(cUnit, ARMV5TE_MOV_IMM, rDest, ~value);
+        newLIR2(cUnit, ARMV5TE_MVN, rDest, rDest);
+        return;
+    }
+    /* No shortcut - go ahead and use literal pool */
+    Armv5teLIR *dataTarget = scanLiteralPool(cUnit, value, 255);
+    if (dataTarget == NULL) {
+        dataTarget = addWordData(cUnit, value, false);
+    }
+    Armv5teLIR *loadPcRel = dvmCompilerNew(sizeof(Armv5teLIR), true);
+    loadPcRel->opCode = ARMV5TE_LDR_PC_REL;
+    loadPcRel->generic.target = (LIR *) dataTarget;
+    loadPcRel->operands[0] = rDest;
+    dvmCompilerAppendLIR(cUnit, (LIR *) loadPcRel);
+
+    /*
+     * To save space in the constant pool, we use the ADD_RRI8 instruction to
+     * add up to 255 to an existing constant value.
+     */
+    if (dataTarget->operands[0] != value) {
+        newLIR2(cUnit, ARMV5TE_ADD_RI8, rDest, value - dataTarget->operands[0]);
+    }
+}
+
+/* Export the Dalvik PC assicated with an instruction to the StackSave area */
+static void genExportPC(CompilationUnit *cUnit, MIR *mir, int rDPC, int rAddr)
+{
+    int offset = offsetof(StackSaveArea, xtra.currentPc);
+    loadConstant(cUnit, rDPC, (int) (cUnit->method->insns + mir->offset));
+    newLIR2(cUnit, ARMV5TE_MOV_RR, rAddr, rFP);
+    newLIR2(cUnit, ARMV5TE_SUB_RI8, rAddr, sizeof(StackSaveArea) - offset);
+    newLIR3(cUnit, ARMV5TE_STR_RRI5, rDPC, rAddr, 0);
+}
+
+/* Generate conditional branch instructions */
+static void genConditionalBranch(CompilationUnit *cUnit,
+                                 Armv5teConditionCode cond,
+                                 Armv5teLIR *target)
+{
+    Armv5teLIR *branch = newLIR2(cUnit, ARMV5TE_B_COND, 0, cond);
+    branch->generic.target = (LIR *) target;
+}
+
+/* Generate unconditional branch instructions */
+static void genUnconditionalBranch(CompilationUnit *cUnit, Armv5teLIR *target)
+{
+    Armv5teLIR *branch = newLIR0(cUnit, ARMV5TE_B_UNCOND);
+    branch->generic.target = (LIR *) target;
+}
+
+#define USE_IN_CACHE_HANDLER 1
+
+/*
+ * Jump to the out-of-line handler in ARM mode to finish executing the
+ * remaining of more complex instructions.
+ */
+static void genDispatchToHandler(CompilationUnit *cUnit, TemplateOpCode opCode)
+{
+#if USE_IN_CACHE_HANDLER
+    /*
+     * NOTE - In practice BLX only needs one operand, but since the assembler
+     * may abort itself and retry due to other out-of-range conditions we
+     * cannot really use operand[0] to store the absolute target address since
+     * it may get clobbered by the final relative offset. Therefore,
+     * we fake BLX_1 is a two operand instruction and the absolute target
+     * address is stored in operand[1].
+     */
+    newLIR2(cUnit, ARMV5TE_BLX_1,
+            (int) gDvmJit.codeCache + templateEntryOffsets[opCode],
+            (int) gDvmJit.codeCache + templateEntryOffsets[opCode]);
+    newLIR2(cUnit, ARMV5TE_BLX_2,
+            (int) gDvmJit.codeCache + templateEntryOffsets[opCode],
+            (int) gDvmJit.codeCache + templateEntryOffsets[opCode]);
+#else
+    /*
+     * In case we want to access the statically compiled handlers for
+     * debugging purposes, define USE_IN_CACHE_HANDLER to 0
+     */
+    void *templatePtr;
+
+#define JIT_TEMPLATE(X) extern void dvmCompiler_TEMPLATE_##X();
+#include "../../template/armv5te/TemplateOpList.h"
+#undef JIT_TEMPLATE
+    switch (opCode) {
+#define JIT_TEMPLATE(X) \
+        case TEMPLATE_##X: { templatePtr = dvmCompiler_TEMPLATE_##X; break; }
+#include "../../template/armv5te/TemplateOpList.h"
+#undef JIT_TEMPLATE
+        default: templatePtr = NULL;
+    }
+    loadConstant(cUnit, r7, (int) templatePtr);
+    newLIR1(cUnit, ARMV5TE_BLX_R, r7);
+#endif
+}
+
+/* Perform the actual operation for OP_RETURN_* */
+static void genReturnCommon(CompilationUnit *cUnit, MIR *mir)
+{
+    genDispatchToHandler(cUnit, TEMPLATE_RETURN);
+#if defined(INVOKE_STATS)
+    gDvmJit.jitReturn++;
+#endif
+    int dPC = (int) (cUnit->method->insns + mir->offset);
+    Armv5teLIR *branch = newLIR0(cUnit, ARMV5TE_B_UNCOND);
+    /* Set up the place holder to reconstruct this Dalvik PC */
+    Armv5teLIR *pcrLabel = dvmCompilerNew(sizeof(Armv5teLIR), true);
+    pcrLabel->opCode = ARMV5TE_PSEUDO_PC_RECONSTRUCTION_CELL;
+    pcrLabel->operands[0] = dPC;
+    pcrLabel->operands[1] = mir->offset;
+    /* Insert the place holder to the growable list */
+    dvmInsertGrowableList(&cUnit->pcReconstructionList, pcrLabel);
+    /* Branch to the PC reconstruction code */
+    branch->generic.target = (LIR *) pcrLabel;
+}
+
+/*
+ * Load a pair of values of rFP[src..src+1] and store them into rDestLo and
+ * rDestHi
+ */
+static void loadValuePair(CompilationUnit *cUnit, int vSrc, int rDestLo,
+                          int rDestHi)
+{
+    /* Use reg + imm5*4 to load the values if possible */
+    if (vSrc <= 30) {
+        newLIR3(cUnit, ARMV5TE_LDR_RRI5, rDestLo, rFP, vSrc);
+        newLIR3(cUnit, ARMV5TE_LDR_RRI5, rDestHi, rFP, vSrc+1);
+    } else {
+        if (vSrc <= 64) {
+            /* Sneak 4 into the base address first */
+            newLIR3(cUnit, ARMV5TE_ADD_RRI3, rDestLo, rFP, 4);
+            newLIR2(cUnit, ARMV5TE_ADD_RI8, rDestHi, (vSrc-1)*4);
+        } else {
+            /* Offset too far from rFP */
+            loadConstant(cUnit, rDestLo, vSrc*4);
+            newLIR3(cUnit, ARMV5TE_ADD_RRR, rDestLo, rFP, rDestLo);
+        }
+        assert(rDestLo != rDestHi);
+        newLIR2(cUnit, ARMV5TE_LDMIA, rDestLo, (1<<rDestLo) | (1<<(rDestHi)));
+    }
+}
+
+/*
+ * Store a pair of values of rSrc and rSrc+1 and store them into vDest and
+ * vDest+1
+ */
+static void storeValuePair(CompilationUnit *cUnit, int rSrcLo, int rSrcHi,
+                           int vDest, int rScratch)
+{
+    /* Use reg + imm5*4 to store the values if possible */
+    if (vDest <= 30) {
+        newLIR3(cUnit, ARMV5TE_STR_RRI5, rSrcLo, rFP, vDest);
+        newLIR3(cUnit, ARMV5TE_STR_RRI5, rSrcHi, rFP, vDest+1);
+    } else {
+        if (vDest <= 64) {
+            /* Sneak 4 into the base address first */
+            newLIR3(cUnit, ARMV5TE_ADD_RRI3, rScratch, rFP, 4);
+            newLIR2(cUnit, ARMV5TE_ADD_RI8, rScratch, (vDest-1)*4);
+        } else {
+            /* Offset too far from rFP */
+            loadConstant(cUnit, rScratch, vDest*4);
+            newLIR3(cUnit, ARMV5TE_ADD_RRR, rScratch, rFP, rScratch);
+        }
+        assert(rSrcLo != rSrcHi);
+        newLIR2(cUnit, ARMV5TE_STMIA, rScratch, (1<<rSrcLo) | (1 << (rSrcHi)));
+    }
+}
+
+/* Load the address of a Dalvik register on the frame */
+static void loadValueAddress(CompilationUnit *cUnit, int vSrc, int rDest)
+{
+    /* RRI3 can add up to 7 */
+    if (vSrc <= 1) {
+        newLIR3(cUnit, ARMV5TE_ADD_RRI3, rDest, rFP, vSrc*4);
+    } else if (vSrc <= 64) {
+        /* Sneak 4 into the base address first */
+        newLIR3(cUnit, ARMV5TE_ADD_RRI3, rDest, rFP, 4);
+        newLIR2(cUnit, ARMV5TE_ADD_RI8, rDest, (vSrc-1)*4);
+    } else {
+        loadConstant(cUnit, rDest, vSrc*4);
+        newLIR3(cUnit, ARMV5TE_ADD_RRR, rDest, rFP, rDest);
+    }
+}
+
+
+/* Load a single value from rFP[src] and store them into rDest */
+static void loadValue(CompilationUnit *cUnit, int vSrc, int rDest)
+{
+    /* Use reg + imm5*4 to load the value if possible */
+    if (vSrc <= 31) {
+        newLIR3(cUnit, ARMV5TE_LDR_RRI5, rDest, rFP, vSrc);
+    } else {
+        loadConstant(cUnit, rDest, vSrc*4);
+        newLIR3(cUnit, ARMV5TE_LDR_RRR, rDest, rFP, rDest);
+    }
+}
+
+/* Store a value from rSrc to vDest */
+static void storeValue(CompilationUnit *cUnit, int rSrc, int vDest,
+                       int rScratch)
+{
+    /* Use reg + imm5*4 to store the value if possible */
+    if (vDest <= 31) {
+        newLIR3(cUnit, ARMV5TE_STR_RRI5, rSrc, rFP, vDest);
+    } else {
+        loadConstant(cUnit, rScratch, vDest*4);
+        newLIR3(cUnit, ARMV5TE_STR_RRR, rSrc, rFP, rScratch);
+    }
+}
+
+/* Calculate the address of rFP+vSrc*4 */
+static void calculateValueAddress(CompilationUnit *cUnit, int vSrc, int rDest)
+{
+    /* Use add rd, rs, imm_3 */
+    if (vSrc <= 1) {
+        newLIR3(cUnit, ARMV5TE_ADD_RRI3, rDest, rFP, vSrc*4);
+    } else if (vSrc <= 64) {
+        /* Use add rd, imm_8 */
+        /* Sneak in 4 above rFP to cover one more register offset (ie v64) */
+        newLIR3(cUnit, ARMV5TE_ADD_RRI3, rDest, rFP, 4);
+        newLIR2(cUnit, ARMV5TE_ADD_RI8, rDest, (vSrc-1)*4);
+    } else {
+        /* Load offset from the constant pool */
+        loadConstant(cUnit, rDest, vSrc*4);
+        newLIR3(cUnit, ARMV5TE_ADD_RRR, rDest, rFP, rDest);
+    }
+}
+
+/*
+ * Perform a binary operation on 64-bit operands and leave the results in the
+ * r0/r1 pair.
+ */
+static void genBinaryOpWide(CompilationUnit *cUnit, int vDest,
+                            Armv5teOpCode preinst, Armv5teOpCode inst)
+{
+    newLIR23(cUnit, preinst, r0, r2);
+    newLIR23(cUnit, inst, r1, r3);
+    storeValuePair(cUnit, r0, r1, vDest, r2);
+}
+
+/* Perform a binary operation on 32-bit operands and leave the results in r0. */
+static void genBinaryOp(CompilationUnit *cUnit, int vDest, Armv5teOpCode inst)
+{
+    newLIR23(cUnit, inst, r0, r1);
+    storeValue(cUnit, r0, vDest, r1);
+}
+
+/* Create the PC reconstruction slot if not already done */
+static inline Armv5teLIR *genCheckCommon(CompilationUnit *cUnit, int dOffset,
+                                         Armv5teLIR *branch,
+                                         Armv5teLIR *pcrLabel)
+{
+    /* Set up the place holder to reconstruct this Dalvik PC */
+    if (pcrLabel == NULL) {
+        int dPC = (int) (cUnit->method->insns + dOffset);
+        pcrLabel = dvmCompilerNew(sizeof(Armv5teLIR), true);
+        pcrLabel->opCode = ARMV5TE_PSEUDO_PC_RECONSTRUCTION_CELL;
+        pcrLabel->operands[0] = dPC;
+        pcrLabel->operands[1] = dOffset;
+        /* Insert the place holder to the growable list */
+        dvmInsertGrowableList(&cUnit->pcReconstructionList, pcrLabel);
+    }
+    /* Branch to the PC reconstruction code */
+    branch->generic.target = (LIR *) pcrLabel;
+    return pcrLabel;
+}
+
+/*
+ * Perform a "reg cmp imm" operation and jump to the PCR region if condition
+ * satisfies.
+ */
+static inline Armv5teLIR *genRegImmCheck(CompilationUnit *cUnit,
+                                         Armv5teConditionCode cond, int reg,
+                                         int checkValue, int dOffset,
+                                         Armv5teLIR *pcrLabel)
+{
+    newLIR2(cUnit, ARMV5TE_CMP_RI8, reg, checkValue);
+    Armv5teLIR *branch = newLIR2(cUnit, ARMV5TE_B_COND, 0, cond);
+    return genCheckCommon(cUnit, dOffset, branch, pcrLabel);
+}
+
+/*
+ * Perform a "reg cmp reg" operation and jump to the PCR region if condition
+ * satisfies.
+ */
+static inline Armv5teLIR *inertRegRegCheck(CompilationUnit *cUnit,
+                                           Armv5teConditionCode cond,
+                                           int reg1, int reg2, int dOffset,
+                                           Armv5teLIR *pcrLabel)
+{
+    newLIR2(cUnit, ARMV5TE_CMP_RR, reg1, reg2);
+    Armv5teLIR *branch = newLIR2(cUnit, ARMV5TE_B_COND, 0, cond);
+    return genCheckCommon(cUnit, dOffset, branch, pcrLabel);
+}
+
+/* Perform null-check on a register */
+static Armv5teLIR *genNullCheck(CompilationUnit *cUnit, int reg, int dOffset,
+                                Armv5teLIR *pcrLabel)
+{
+    return genRegImmCheck(cUnit, ARM_COND_EQ, reg, 0, dOffset, pcrLabel);
+}
+
+/* Perform bound check on two registers */
+static Armv5teLIR *genBoundsCheck(CompilationUnit *cUnit, int rIndex,
+                                  int rBound, int dOffset, Armv5teLIR *pcrLabel)
+{
+    return inertRegRegCheck(cUnit, ARM_COND_CS, rIndex, rBound, dOffset,
+                            pcrLabel);
+}
+
+/* Generate a unconditional branch to go to the interpreter */
+static inline Armv5teLIR *genTrap(CompilationUnit *cUnit, int dOffset,
+                                  Armv5teLIR *pcrLabel)
+{
+    Armv5teLIR *branch = newLIR0(cUnit, ARMV5TE_B_UNCOND);
+    return genCheckCommon(cUnit, dOffset, branch, pcrLabel);
+}
+
+/* Load a wide field from an object instance */
+static void genIGetWide(CompilationUnit *cUnit, MIR *mir, int fieldOffset)
+{
+    DecodedInstruction *dInsn = &mir->dalvikInsn;
+
+    loadValue(cUnit, dInsn->vB, r2);
+    loadConstant(cUnit, r3, fieldOffset);
+    genNullCheck(cUnit, r2, mir->offset, NULL); /* null object? */
+    newLIR3(cUnit, ARMV5TE_ADD_RRR, r2, r2, r3);
+    newLIR2(cUnit, ARMV5TE_LDMIA, r2, (1<<r0 | 1<<r1));
+    storeValuePair(cUnit, r0, r1, dInsn->vA, r3);
+}
+
+/* Store a wide field to an object instance */
+static void genIPutWide(CompilationUnit *cUnit, MIR *mir, int fieldOffset)
+{
+    DecodedInstruction *dInsn = &mir->dalvikInsn;
+
+    loadValue(cUnit, dInsn->vB, r2);
+    loadValuePair(cUnit, dInsn->vA, r0, r1);
+    loadConstant(cUnit, r3, fieldOffset);
+    genNullCheck(cUnit, r2, mir->offset, NULL); /* null object? */
+    newLIR3(cUnit, ARMV5TE_ADD_RRR, r2, r2, r3);
+    newLIR2(cUnit, ARMV5TE_STMIA, r2, (1<<r0 | 1<<r1));
+}
+
+/*
+ * Load a field from an object instance
+ *
+ * Inst should be one of:
+ *      ARMV5TE_LDR_RRR
+ *      ARMV5TE_LDRB_RRR
+ *      ARMV5TE_LDRH_RRR
+ *      ARMV5TE_LDRSB_RRR
+ *      ARMV5TE_LDRSH_RRR
+ */
+static void genIGet(CompilationUnit *cUnit, MIR *mir, Armv5teOpCode inst,
+                    int fieldOffset)
+{
+    DecodedInstruction *dInsn = &mir->dalvikInsn;
+
+    /* TUNING: write a utility routine to load via base + constant offset */
+    loadValue(cUnit, dInsn->vB, r0);
+    loadConstant(cUnit, r1, fieldOffset);
+    genNullCheck(cUnit, r0, mir->offset, NULL); /* null object? */
+    newLIR3(cUnit, inst, r0, r0, r1);
+    storeValue(cUnit, r0, dInsn->vA, r1);
+}
+
+/*
+ * Store a field to an object instance
+ *
+ * Inst should be one of:
+ *      ARMV5TE_STR_RRR
+ *      ARMV5TE_STRB_RRR
+ *      ARMV5TE_STRH_RRR
+ */
+static void genIPut(CompilationUnit *cUnit, MIR *mir, Armv5teOpCode inst,
+                    int fieldOffset)
+{
+    DecodedInstruction *dInsn = &mir->dalvikInsn;
+
+    /* TUNING: write a utility routine to load via base + constant offset */
+    loadValue(cUnit, dInsn->vB, r2);
+    loadConstant(cUnit, r1, fieldOffset);
+    loadValue(cUnit, dInsn->vA, r0);
+    genNullCheck(cUnit, r2, mir->offset, NULL); /* null object? */
+    newLIR3(cUnit, inst, r0, r2, r1);
+}
+
+
+/* TODO: This should probably be done as an out-of-line instruction handler. */
+
+/*
+ * Generate array load
+ *
+ * Inst should be one of:
+ *      ARMV5TE_LDR_RRR
+ *      ARMV5TE_LDRB_RRR
+ *      ARMV5TE_LDRH_RRR
+ *      ARMV5TE_LDRSB_RRR
+ *      ARMV5TE_LDRSH_RRR
+ */
+static void genArrayGet(CompilationUnit *cUnit, MIR *mir, Armv5teOpCode inst,
+                        int vArray, int vIndex, int vDest, int scale)
+{
+    int lenOffset = offsetof(ArrayObject, length);
+    int dataOffset = offsetof(ArrayObject, contents);
+
+    loadValue(cUnit, vArray, r2);
+    loadValue(cUnit, vIndex, r3);
+
+    /* null object? */
+    Armv5teLIR * pcrLabel = genNullCheck(cUnit, r2, mir->offset, NULL);
+    newLIR3(cUnit, ARMV5TE_LDR_RRI5, r0, r2, lenOffset >> 2);  /* Get len */
+    newLIR2(cUnit, ARMV5TE_ADD_RI8, r2, dataOffset); /* r2 -> array data */
+    genBoundsCheck(cUnit, r3, r0, mir->offset, pcrLabel);
+    if (scale) {
+        newLIR3(cUnit, ARMV5TE_LSL, r3, r3, scale);
+    }
+    if (scale==3) {
+        newLIR3(cUnit, inst, r0, r2, r3);
+        newLIR2(cUnit, ARMV5TE_ADD_RI8, r2, 4);
+        newLIR3(cUnit, inst, r1, r2, r3);
+        storeValuePair(cUnit, r0, r1, vDest, r3);
+    } else {
+        newLIR3(cUnit, inst, r0, r2, r3);
+        storeValue(cUnit, r0, vDest, r3);
+    }
+}
+
+/* TODO: This should probably be done as an out-of-line instruction handler. */
+
+/*
+ * Generate array store
+ *
+ * Inst should be one of:
+ *      ARMV5TE_STR_RRR
+ *      ARMV5TE_STRB_RRR
+ *      ARMV5TE_STRH_RRR
+ */
+static void genArrayPut(CompilationUnit *cUnit, MIR *mir, Armv5teOpCode inst,
+                        int vArray, int vIndex, int vSrc, int scale)
+{
+    int lenOffset = offsetof(ArrayObject, length);
+    int dataOffset = offsetof(ArrayObject, contents);
+
+    loadValue(cUnit, vArray, r2);
+    loadValue(cUnit, vIndex, r3);
+    genNullCheck(cUnit, r2, mir->offset, NULL); /* null object? */
+    newLIR3(cUnit, ARMV5TE_LDR_RRI5, r0, r2, lenOffset >> 2);  /* Get len */
+    newLIR2(cUnit, ARMV5TE_ADD_RI8, r2, dataOffset); /* r2 -> array data */
+    genBoundsCheck(cUnit, r3, r0, mir->offset, NULL);
+    /* at this point, r2 points to array, r3 is unscaled index */
+    if (scale==3) {
+        loadValuePair(cUnit, vSrc, r0, r1);
+    } else {
+        loadValue(cUnit, vSrc, r0);
+    }
+    if (scale) {
+        newLIR3(cUnit, ARMV5TE_LSL, r3, r3, scale);
+    }
+    /*
+     * at this point, r2 points to array, r3 is scaled index, and r0[r1] is
+     * data
+     */
+    if (scale==3) {
+        newLIR3(cUnit, inst, r0, r2, r3);
+        newLIR2(cUnit, ARMV5TE_ADD_RI8, r2, 4);
+        newLIR3(cUnit, inst, r1, r2, r3);
+    } else {
+        newLIR3(cUnit, inst, r0, r2, r3);
+    }
+}
+
+static bool genShiftOpLong(CompilationUnit *cUnit, MIR *mir, int vDest,
+                           int vSrc1, int vShift)
+{
+     loadValuePair(cUnit, vSrc1, r0, r1);
+     loadValue(cUnit, vShift, r2);
+     switch( mir->dalvikInsn.opCode) {
+         case OP_SHL_LONG:
+         case OP_SHL_LONG_2ADDR:
+             genDispatchToHandler(cUnit, TEMPLATE_SHL_LONG);
+             break;
+         case OP_SHR_LONG:
+         case OP_SHR_LONG_2ADDR:
+             genDispatchToHandler(cUnit, TEMPLATE_SHR_LONG);
+             break;
+         case OP_USHR_LONG:
+         case OP_USHR_LONG_2ADDR:
+             genDispatchToHandler(cUnit, TEMPLATE_USHR_LONG);
+             break;
+         default:
+             return true;
+     }
+     storeValuePair(cUnit, r0, r1, vDest, r2);
+     return false;
+}
+
+static bool genArithOpFloat(CompilationUnit *cUnit, MIR *mir, int vDest,
+                            int vSrc1, int vSrc2)
+{
+    void* funct;
+    /* TODO: use a proper include file to define these */
+    float __aeabi_fadd(float a, float b);
+    float __aeabi_fsub(float a, float b);
+    float __aeabi_fdiv(float a, float b);
+    float __aeabi_fmul(float a, float b);
+    float fmodf(float a, float b);
+
+    switch (mir->dalvikInsn.opCode) {
+        case OP_ADD_FLOAT_2ADDR:
+        case OP_ADD_FLOAT:
+            funct = (void*) __aeabi_fadd;
+            break;
+        case OP_SUB_FLOAT_2ADDR:
+        case OP_SUB_FLOAT:
+            funct = (void*) __aeabi_fsub;
+            break;
+        case OP_DIV_FLOAT_2ADDR:
+        case OP_DIV_FLOAT:
+            funct = (void*) __aeabi_fdiv;
+            break;
+        case OP_MUL_FLOAT_2ADDR:
+        case OP_MUL_FLOAT:
+            funct = (void*) __aeabi_fmul;
+            break;
+        case OP_REM_FLOAT_2ADDR:
+        case OP_REM_FLOAT:
+            funct = (void*) fmodf;
+            break;
+        case OP_NEG_FLOAT: {
+            loadValue(cUnit, vSrc2, r0);
+            loadConstant(cUnit, r1, 0x80000000);
+            newLIR3(cUnit, ARMV5TE_ADD_RRR, r0, r0, r1);
+            storeValue(cUnit, r0, vDest, r1);
+            return false;
+        }
+        default:
+            return true;
+    }
+    loadConstant(cUnit, r2, (int)funct);
+    loadValue(cUnit, vSrc1, r0);
+    loadValue(cUnit, vSrc2, r1);
+    newLIR1(cUnit, ARMV5TE_BLX_R, r2);
+    storeValue(cUnit, r0, vDest, r1);
+    return false;
+}
+
+static bool genArithOpDouble(CompilationUnit *cUnit, MIR *mir, int vDest,
+                             int vSrc1, int vSrc2)
+{
+    void* funct;
+    /* TODO: use a proper include file to define these */
+    double __aeabi_dadd(double a, double b);
+    double __aeabi_dsub(double a, double b);
+    double __aeabi_ddiv(double a, double b);
+    double __aeabi_dmul(double a, double b);
+    double fmod(double a, double b);
+
+    switch (mir->dalvikInsn.opCode) {
+        case OP_ADD_DOUBLE_2ADDR:
+        case OP_ADD_DOUBLE:
+            funct = (void*) __aeabi_dadd;
+            break;
+        case OP_SUB_DOUBLE_2ADDR:
+        case OP_SUB_DOUBLE:
+            funct = (void*) __aeabi_dsub;
+            break;
+        case OP_DIV_DOUBLE_2ADDR:
+        case OP_DIV_DOUBLE:
+            funct = (void*) __aeabi_ddiv;
+            break;
+        case OP_MUL_DOUBLE_2ADDR:
+        case OP_MUL_DOUBLE:
+            funct = (void*) __aeabi_dmul;
+            break;
+        case OP_REM_DOUBLE_2ADDR:
+        case OP_REM_DOUBLE:
+            funct = (void*) fmod;
+            break;
+        case OP_NEG_DOUBLE: {
+            loadValuePair(cUnit, vSrc2, r0, r1);
+            loadConstant(cUnit, r2, 0x80000000);
+            newLIR3(cUnit, ARMV5TE_ADD_RRR, r1, r1, r2);
+            storeValuePair(cUnit, r0, r1, vDest, r2);
+            return false;
+        }
+        default:
+            return true;
+    }
+    loadConstant(cUnit, r4PC, (int)funct);
+    loadValuePair(cUnit, vSrc1, r0, r1);
+    loadValuePair(cUnit, vSrc2, r2, r3);
+    newLIR1(cUnit, ARMV5TE_BLX_R, r4PC);
+    storeValuePair(cUnit, r0, r1, vDest, r2);
+    return false;
+}
+
+static bool genArithOpLong(CompilationUnit *cUnit, MIR *mir, int vDest,
+                           int vSrc1, int vSrc2)
+{
+    int firstOp = ARMV5TE_BKPT;
+    int secondOp = ARMV5TE_BKPT;
+    bool callOut = false;
+    void *callTgt;
+    int retReg = r0;
+    /* TODO - find proper .h file to declare these */
+    long long __aeabi_ldivmod(long long op1, long long op2);
+
+    switch (mir->dalvikInsn.opCode) {
+        case OP_NOT_LONG:
+            firstOp = ARMV5TE_MVN;
+            secondOp = ARMV5TE_MVN;
+            break;
+        case OP_ADD_LONG:
+        case OP_ADD_LONG_2ADDR:
+            firstOp = ARMV5TE_ADD_RRR;
+            secondOp = ARMV5TE_ADC;
+            break;
+        case OP_SUB_LONG:
+        case OP_SUB_LONG_2ADDR:
+            firstOp = ARMV5TE_SUB_RRR;
+            secondOp = ARMV5TE_SBC;
+            break;
+        case OP_MUL_LONG:
+        case OP_MUL_LONG_2ADDR:
+            loadValuePair(cUnit, vSrc1, r0, r1);
+            loadValuePair(cUnit, vSrc2, r2, r3);
+            genDispatchToHandler(cUnit, TEMPLATE_MUL_LONG);
+            storeValuePair(cUnit, r0, r1, vDest, r2);
+            return false;
+            break;
+        case OP_DIV_LONG:
+        case OP_DIV_LONG_2ADDR:
+            callOut = true;
+            retReg = r0;
+            callTgt = (void*)__aeabi_ldivmod;
+            break;
+        /* NOTE - result is in r2/r3 instead of r0/r1 */
+        case OP_REM_LONG:
+        case OP_REM_LONG_2ADDR:
+            callOut = true;
+            callTgt = (void*)__aeabi_ldivmod;
+            retReg = r2;
+            break;
+        case OP_AND_LONG:
+        case OP_AND_LONG_2ADDR:
+            firstOp = ARMV5TE_AND_RR;
+            secondOp = ARMV5TE_AND_RR;
+            break;
+        case OP_OR_LONG:
+        case OP_OR_LONG_2ADDR:
+            firstOp = ARMV5TE_ORR;
+            secondOp = ARMV5TE_ORR;
+            break;
+        case OP_XOR_LONG:
+        case OP_XOR_LONG_2ADDR:
+            firstOp = ARMV5TE_EOR;
+            secondOp = ARMV5TE_EOR;
+            break;
+        case OP_NEG_LONG:
+            loadValuePair(cUnit, vSrc2, r2, r3);
+            loadConstant(cUnit, r1, 0);
+            newLIR3(cUnit, ARMV5TE_SUB_RRR, r0, r1, r2);
+            newLIR2(cUnit, ARMV5TE_SBC, r1, r3);
+            storeValuePair(cUnit, r0, r1, vDest, r2);
+            return false;
+        default:
+            LOGE("Invalid long arith op");
+            dvmAbort();
+    }
+    if (!callOut) {
+        loadValuePair(cUnit, vSrc1, r0, r1);
+        loadValuePair(cUnit, vSrc2, r2, r3);
+        genBinaryOpWide(cUnit, vDest, firstOp, secondOp);
+    } else {
+        loadValuePair(cUnit, vSrc2, r2, r3);
+        loadConstant(cUnit, r4PC, (int) callTgt);
+        loadValuePair(cUnit, vSrc1, r0, r1);
+        newLIR1(cUnit, ARMV5TE_BLX_R, r4PC);
+        storeValuePair(cUnit, retReg, retReg+1, vDest, r4PC);
+    }
+    return false;
+}
+
+static bool genArithOpInt(CompilationUnit *cUnit, MIR *mir, int vDest,
+                          int vSrc1, int vSrc2)
+{
+    int armOp = ARMV5TE_BKPT;
+    bool callOut = false;
+    bool checkZero = false;
+    int retReg = r0;
+    void *callTgt;
+
+    /* TODO - find proper .h file to declare these */
+    int __aeabi_idivmod(int op1, int op2);
+    int __aeabi_idiv(int op1, int op2);
+
+    switch (mir->dalvikInsn.opCode) {
+        case OP_NEG_INT:
+            armOp = ARMV5TE_NEG;
+            break;
+        case OP_NOT_INT:
+            armOp = ARMV5TE_MVN;
+            break;
+        case OP_ADD_INT:
+        case OP_ADD_INT_2ADDR:
+            armOp = ARMV5TE_ADD_RRR;
+            break;
+        case OP_SUB_INT:
+        case OP_SUB_INT_2ADDR:
+            armOp = ARMV5TE_SUB_RRR;
+            break;
+        case OP_MUL_INT:
+        case OP_MUL_INT_2ADDR:
+            armOp = ARMV5TE_MUL;
+            break;
+        case OP_DIV_INT:
+        case OP_DIV_INT_2ADDR:
+            callOut = true;
+            checkZero = true;
+            callTgt = __aeabi_idiv;
+            retReg = r0;
+            break;
+        /* NOTE: returns in r1 */
+        case OP_REM_INT:
+        case OP_REM_INT_2ADDR:
+            callOut = true;
+            checkZero = true;
+            callTgt = __aeabi_idivmod;
+            retReg = r1;
+            break;
+        case OP_AND_INT:
+        case OP_AND_INT_2ADDR:
+            armOp = ARMV5TE_AND_RR;
+            break;
+        case OP_OR_INT:
+        case OP_OR_INT_2ADDR:
+            armOp = ARMV5TE_ORR;
+            break;
+        case OP_XOR_INT:
+        case OP_XOR_INT_2ADDR:
+            armOp = ARMV5TE_EOR;
+            break;
+        case OP_SHL_INT:
+        case OP_SHL_INT_2ADDR:
+            armOp = ARMV5TE_LSLV;
+            break;
+        case OP_SHR_INT:
+        case OP_SHR_INT_2ADDR:
+            armOp = ARMV5TE_ASRV;
+            break;
+        case OP_USHR_INT:
+        case OP_USHR_INT_2ADDR:
+            armOp = ARMV5TE_LSRV;
+            break;
+        default:
+            LOGE("Invalid word arith op: 0x%x(%d)",
+                 mir->dalvikInsn.opCode, mir->dalvikInsn.opCode);
+            dvmAbort();
+    }
+    if (!callOut) {
+        loadValue(cUnit, vSrc1, r0);
+        loadValue(cUnit, vSrc2, r1);
+        genBinaryOp(cUnit, vDest, armOp);
+    } else {
+        loadValue(cUnit, vSrc2, r1);
+        loadConstant(cUnit, r2, (int) callTgt);
+        loadValue(cUnit, vSrc1, r0);
+        if (checkZero) {
+            genNullCheck(cUnit, r1, mir->offset, NULL);
+        }
+        newLIR1(cUnit, ARMV5TE_BLX_R, r2);
+        storeValue(cUnit, retReg, vDest, r2);
+    }
+    return false;
+}
+
+static bool genArithOp(CompilationUnit *cUnit, MIR *mir)
+{
+    OpCode opCode = mir->dalvikInsn.opCode;
+    int vA = mir->dalvikInsn.vA;
+    int vB = mir->dalvikInsn.vB;
+    int vC = mir->dalvikInsn.vC;
+
+    if ((opCode >= OP_ADD_LONG_2ADDR) && (opCode <= OP_XOR_LONG_2ADDR)) {
+        return genArithOpLong(cUnit,mir, vA, vA, vB);
+    }
+    if ((opCode >= OP_ADD_LONG) && (opCode <= OP_XOR_LONG)) {
+        return genArithOpLong(cUnit,mir, vA, vB, vC);
+    }
+    if ((opCode >= OP_SHL_LONG_2ADDR) && (opCode <= OP_USHR_LONG_2ADDR)) {
+        return genShiftOpLong(cUnit,mir, vA, vA, vB);
+    }
+    if ((opCode >= OP_SHL_LONG) && (opCode <= OP_USHR_LONG)) {
+        return genShiftOpLong(cUnit,mir, vA, vB, vC);
+    }
+    if ((opCode >= OP_ADD_INT_2ADDR) && (opCode <= OP_USHR_INT_2ADDR)) {
+        return genArithOpInt(cUnit,mir, vA, vA, vB);
+    }
+    if ((opCode >= OP_ADD_INT) && (opCode <= OP_USHR_INT)) {
+        return genArithOpInt(cUnit,mir, vA, vB, vC);
+    }
+    if ((opCode >= OP_ADD_FLOAT_2ADDR) && (opCode <= OP_REM_FLOAT_2ADDR)) {
+        return genArithOpFloat(cUnit,mir, vA, vA, vB);
+    }
+    if ((opCode >= OP_ADD_FLOAT) && (opCode <= OP_REM_FLOAT)) {
+        return genArithOpFloat(cUnit,mir, vA, vB, vC);
+    }
+    if ((opCode >= OP_ADD_DOUBLE_2ADDR) && (opCode <= OP_REM_DOUBLE_2ADDR)) {
+        return genArithOpDouble(cUnit,mir, vA, vA, vB);
+    }
+    if ((opCode >= OP_ADD_DOUBLE) && (opCode <= OP_REM_DOUBLE)) {
+        return genArithOpDouble(cUnit,mir, vA, vB, vC);
+    }
+    return true;
+}
+
+static bool genConversion(CompilationUnit *cUnit, MIR *mir, void *funct,
+                          int srcSize, int tgtSize)
+{
+    loadConstant(cUnit, r2, (int)funct);
+    if (srcSize == 1) {
+        loadValue(cUnit, mir->dalvikInsn.vB, r0);
+    } else {
+        loadValuePair(cUnit, mir->dalvikInsn.vB, r0, r1);
+    }
+    newLIR1(cUnit, ARMV5TE_BLX_R, r2);
+    if (tgtSize == 1) {
+        storeValue(cUnit, r0, mir->dalvikInsn.vA, r1);
+    } else {
+        storeValuePair(cUnit, r0, r1, mir->dalvikInsn.vA, r2);
+    }
+    return false;
+}
+
+/* Experimental example of completely inlining a native replacement */
+static bool genInlinedStringLength(CompilationUnit *cUnit, MIR *mir)
+{
+    int offset = (int) &((InterpState *) NULL)->retval;
+    DecodedInstruction *dInsn = &mir->dalvikInsn;
+    assert(dInsn->vA == 1);
+    loadValue(cUnit, dInsn->arg[0], r0);
+    loadConstant(cUnit, r1, gDvm.offJavaLangString_count);
+    genNullCheck(cUnit, r0, mir->offset, NULL);
+    newLIR3(cUnit, ARMV5TE_LDR_RRR, r0, r0, r1);
+    newLIR3(cUnit, ARMV5TE_STR_RRI5, r0, rGLUE, offset >> 2);
+    return false;
+}
+
+static void genProcessArgsNoRange(CompilationUnit *cUnit, MIR *mir,
+                                  DecodedInstruction *dInsn,
+                                  Armv5teLIR **pcrLabel)
+{
+    unsigned int i;
+    unsigned int regMask = 0;
+
+    /* Load arguments to r0..r4 */
+    for (i = 0; i < dInsn->vA; i++) {
+        regMask |= 1 << i;
+        loadValue(cUnit, dInsn->arg[i], i);
+    }
+    if (regMask) {
+        /* Up to 5 args are pushed on top of FP - sizeofStackSaveArea */
+        newLIR2(cUnit, ARMV5TE_MOV_RR, r7, rFP);
+        newLIR2(cUnit, ARMV5TE_SUB_RI8, r7,
+                sizeof(StackSaveArea) + (dInsn->vA << 2));
+        /* generate null check */
+        if (pcrLabel) {
+            *pcrLabel = genNullCheck(cUnit, r0, mir->offset, NULL);
+        }
+        newLIR2(cUnit, ARMV5TE_STMIA, r7, regMask);
+    }
+}
+
+static void genProcessArgsRange(CompilationUnit *cUnit, MIR *mir,
+                                DecodedInstruction *dInsn,
+                                Armv5teLIR **pcrLabel)
+{
+    int srcOffset = dInsn->vC << 2;
+    int numArgs = dInsn->vA;
+    int regMask;
+    /*
+     * r4PC     : &rFP[vC]
+     * r7: &newFP[0]
+     */
+    if (srcOffset < 8) {
+        newLIR3(cUnit, ARMV5TE_ADD_RRI3, r4PC, rFP, srcOffset);
+    } else {
+        loadConstant(cUnit, r4PC, srcOffset);
+        newLIR3(cUnit, ARMV5TE_ADD_RRR, r4PC, rFP, r4PC);
+    }
+    /* load [r0 .. min(numArgs,4)] */
+    regMask = (1 << ((numArgs < 4) ? numArgs : 4)) - 1;
+    newLIR2(cUnit, ARMV5TE_LDMIA, r4PC, regMask);
+
+    if (sizeof(StackSaveArea) + (numArgs << 2) < 256) {
+        newLIR2(cUnit, ARMV5TE_MOV_RR, r7, rFP);
+        newLIR2(cUnit, ARMV5TE_SUB_RI8, r7,
+                sizeof(StackSaveArea) + (numArgs << 2));
+    } else {
+        loadConstant(cUnit, r7, sizeof(StackSaveArea) + (numArgs << 2));
+        newLIR3(cUnit, ARMV5TE_SUB_RRR, r7, rFP, r7);
+    }
+
+    /* generate null check */
+    if (pcrLabel) {
+        *pcrLabel = genNullCheck(cUnit, r0, mir->offset, NULL);
+    }
+
+    /*
+     * Handle remaining 4n arguments:
+     * store previously loaded 4 values and load the next 4 values
+     */
+    if (numArgs >= 8) {
+        Armv5teLIR *loopLabel = NULL;
+        /*
+         * r0 contains "this" and it will be used later, so push it to the stack
+         * first. Pushing r5 is just for stack alignment purposes.
+         */
+        newLIR1(cUnit, ARMV5TE_PUSH, 1 << r0 | 1 << 5);
+        /* No need to generate the loop structure if numArgs <= 11 */
+        if (numArgs > 11) {
+            loadConstant(cUnit, 5, ((numArgs - 4) >> 2) << 2);
+            loopLabel = newLIR0(cUnit, ARMV5TE_PSEUDO_TARGET_LABEL);
+        }
+        newLIR2(cUnit, ARMV5TE_STMIA, r7, regMask);
+        newLIR2(cUnit, ARMV5TE_LDMIA, r4PC, regMask);
+        /* No need to generate the loop structure if numArgs <= 11 */
+        if (numArgs > 11) {
+            newLIR2(cUnit, ARMV5TE_SUB_RI8, 5, 4);
+            genConditionalBranch(cUnit, ARM_COND_NE, loopLabel);
+        }
+    }
+
+    /* Save the last batch of loaded values */
+    newLIR2(cUnit, ARMV5TE_STMIA, r7, regMask);
+
+    /* Generate the loop epilogue - don't use r0 */
+    if ((numArgs > 4) && (numArgs % 4)) {
+        regMask = ((1 << (numArgs & 0x3)) - 1) << 1;
+        newLIR2(cUnit, ARMV5TE_LDMIA, r4PC, regMask);
+    }
+    if (numArgs >= 8)
+        newLIR1(cUnit, ARMV5TE_POP, 1 << r0 | 1 << 5);
+
+    /* Save the modulo 4 arguments */
+    if ((numArgs > 4) && (numArgs % 4)) {
+        newLIR2(cUnit, ARMV5TE_STMIA, r7, regMask);
+    }
+}
+
+static void genInvokeCommon(CompilationUnit *cUnit, MIR *mir, BasicBlock *bb,
+                            Armv5teLIR *labelList, Armv5teLIR *pcrLabel,
+                            const Method *calleeMethod)
+{
+    Armv5teLIR *retChainingCell = &labelList[bb->fallThrough->id];
+
+    /* r1 = &retChainingCell */
+    Armv5teLIR *addrRetChain = newLIR2(cUnit, ARMV5TE_ADD_PC_REL,
+                                           r1, 0);
+    /* r4PC = dalvikCallsite */
+    loadConstant(cUnit, r4PC,
+                 (int) (cUnit->method->insns + mir->offset));
+    addrRetChain->generic.target = (LIR *) retChainingCell;
+    /*
+     * r0 = calleeMethod (loaded upon calling genInvokeCommon)
+     * r1 = &ChainingCell
+     * r4PC = callsiteDPC
+     */
+    if (dvmIsNativeMethod(calleeMethod)) {
+        genDispatchToHandler(cUnit, TEMPLATE_INVOKE_METHOD_NO_OPT);
+#if defined(INVOKE_STATS)
+        gDvmJit.invokeNoOpt++;
+#endif
+    } else {
+        genDispatchToHandler(cUnit, TEMPLATE_INVOKE_METHOD_CHAIN);
+#if defined(INVOKE_STATS)
+        gDvmJit.invokeChain++;
+#endif
+        genUnconditionalBranch(cUnit, &labelList[bb->taken->id]);
+    }
+    /* Handle exceptions using the interpreter */
+    genTrap(cUnit, mir->offset, pcrLabel);
+}
+
+/* Geneate a branch to go back to the interpreter */
+static void genPuntToInterp(CompilationUnit *cUnit, unsigned int offset)
+{
+    /* r0 = dalvik pc */
+    loadConstant(cUnit, r0, (int) (cUnit->method->insns + offset));
+    newLIR3(cUnit, ARMV5TE_LDR_RRI5, r1, rGLUE,
+            offsetof(InterpState, jitToInterpEntries.dvmJitToInterpPunt) >> 2);
+    newLIR1(cUnit, ARMV5TE_BLX_R, r1);
+}
+
+/*
+ * Attempt to single step one instruction using the interpreter and return
+ * to the compiled code for the next Dalvik instruction
+ */
+static void genInterpSingleStep(CompilationUnit *cUnit, MIR *mir)
+{
+    int flags = dexGetInstrFlags(gDvm.instrFlags, mir->dalvikInsn.opCode);
+    int flagsToCheck = kInstrCanBranch | kInstrCanSwitch | kInstrCanReturn |
+                       kInstrCanThrow;
+    if ((mir->next == NULL) || (flags & flagsToCheck)) {
+       genPuntToInterp(cUnit, mir->offset);
+       return;
+    }
+    int entryAddr = offsetof(InterpState,
+                             jitToInterpEntries.dvmJitToInterpSingleStep);
+    newLIR3(cUnit, ARMV5TE_LDR_RRI5, r2, rGLUE, entryAddr >> 2);
+    /* r0 = dalvik pc */
+    loadConstant(cUnit, r0, (int) (cUnit->method->insns + mir->offset));
+    /* r1 = dalvik pc of following instruction */
+    loadConstant(cUnit, r1, (int) (cUnit->method->insns + mir->next->offset));
+    newLIR1(cUnit, ARMV5TE_BLX_R, r2);
+}
+
+
+/*****************************************************************************/
+/*
+ * The following are the first-level codegen routines that analyze the format
+ * of each bytecode then either dispatch special purpose codegen routines
+ * or produce corresponding Thumb instructions directly.
+ */
+
+static bool handleFmt10t_Fmt20t_Fmt30t(CompilationUnit *cUnit, MIR *mir,
+                                       BasicBlock *bb, Armv5teLIR *labelList)
+{
+    /* For OP_GOTO, OP_GOTO_16, and OP_GOTO_32 */
+    genUnconditionalBranch(cUnit, &labelList[bb->taken->id]);
+    return false;
+}
+
+static bool handleFmt10x(CompilationUnit *cUnit, MIR *mir)
+{
+    OpCode dalvikOpCode = mir->dalvikInsn.opCode;
+    if (((dalvikOpCode >= OP_UNUSED_3E) && (dalvikOpCode <= OP_UNUSED_43)) ||
+        ((dalvikOpCode >= OP_UNUSED_E3) && (dalvikOpCode <= OP_UNUSED_EC))) {
+        LOGE("Codegen: got unused opcode 0x%x\n",dalvikOpCode);
+        return true;
+    }
+    switch (dalvikOpCode) {
+        case OP_RETURN_VOID:
+            genReturnCommon(cUnit,mir);
+            break;
+        case OP_UNUSED_73:
+        case OP_UNUSED_79:
+        case OP_UNUSED_7A:
+            LOGE("Codegen: got unused opcode 0x%x\n",dalvikOpCode);
+            return true;
+        case OP_NOP:
+            break;
+        default:
+            return true;
+    }
+    return false;
+}
+
+static bool handleFmt11n_Fmt31i(CompilationUnit *cUnit, MIR *mir)
+{
+    switch (mir->dalvikInsn.opCode) {
+        case OP_CONST:
+        case OP_CONST_4:
+            loadConstant(cUnit, r0, mir->dalvikInsn.vB);
+            storeValue(cUnit, r0, mir->dalvikInsn.vA, r1);
+            break;
+        case OP_CONST_WIDE_32:
+            loadConstant(cUnit, r0, mir->dalvikInsn.vB);
+            newLIR3(cUnit, ARMV5TE_ASR, r1, r0, 31);
+            storeValuePair(cUnit, r0, r1, mir->dalvikInsn.vA, r2);
+            break;
+        default:
+            return true;
+    }
+    return false;
+}
+
+static bool handleFmt21h(CompilationUnit *cUnit, MIR *mir)
+{
+    switch (mir->dalvikInsn.opCode) {
+        case OP_CONST_HIGH16:
+            loadConstant(cUnit, r0, mir->dalvikInsn.vB << 16);
+            storeValue(cUnit, r0, mir->dalvikInsn.vA, r1);
+            break;
+        case OP_CONST_WIDE_HIGH16:
+            loadConstant(cUnit, r1, mir->dalvikInsn.vB << 16);
+            loadConstant(cUnit, r0, 0);
+            storeValuePair(cUnit, r0, r1, mir->dalvikInsn.vA, r2);
+            break;
+        default:
+            return true;
+    }
+    return false;
+}
+
+static bool handleFmt20bc(CompilationUnit *cUnit, MIR *mir)
+{
+    /* For OP_THROW_VERIFICATION_ERROR */
+    genInterpSingleStep(cUnit, mir);
+    return false;
+}
+
+static bool handleFmt21c_Fmt31c(CompilationUnit *cUnit, MIR *mir)
+{
+    switch (mir->dalvikInsn.opCode) {
+        /*
+         * TODO: Verify that we can ignore the resolution check here because
+         * it will have already successfully been interpreted once
+         */
+        case OP_CONST_STRING_JUMBO:
+        case OP_CONST_STRING: {
+            void *strPtr = (void*)
+              (cUnit->method->clazz->pDvmDex->pResStrings[mir->dalvikInsn.vB]);
+            assert(strPtr != NULL);
+            loadConstant(cUnit, r0, (int) strPtr );
+            storeValue(cUnit, r0, mir->dalvikInsn.vA, r1);
+            break;
+        }
+        /*
+         * TODO: Verify that we can ignore the resolution check here because
+         * it will have already successfully been interpreted once
+         */
+        case OP_CONST_CLASS: {
+            void *classPtr = (void*)
+              (cUnit->method->clazz->pDvmDex->pResClasses[mir->dalvikInsn.vB]);
+            assert(classPtr != NULL);
+            loadConstant(cUnit, r0, (int) classPtr );
+            storeValue(cUnit, r0, mir->dalvikInsn.vA, r1);
+            break;
+        }
+        case OP_SGET_OBJECT:
+        case OP_SGET_BOOLEAN:
+        case OP_SGET_CHAR:
+        case OP_SGET_BYTE:
+        case OP_SGET_SHORT:
+        case OP_SGET: {
+            int valOffset = (int)&((struct StaticField*)NULL)->value;
+            void *fieldPtr = (void*)
+              (cUnit->method->clazz->pDvmDex->pResFields[mir->dalvikInsn.vB]);
+            assert(fieldPtr != NULL);
+            loadConstant(cUnit, r0,  (int) fieldPtr + valOffset);
+            newLIR3(cUnit, ARMV5TE_LDR_RRI5, r0, r0, 0);
+            storeValue(cUnit, r0, mir->dalvikInsn.vA, r2);
+            break;
+        }
+        case OP_SGET_WIDE: {
+            int valOffset = (int)&((struct StaticField*)NULL)->value;
+            void *fieldPtr = (void*)
+              (cUnit->method->clazz->pDvmDex->pResFields[mir->dalvikInsn.vB]);
+            assert(fieldPtr != NULL);
+            loadConstant(cUnit, r2,  (int) fieldPtr + valOffset);
+            newLIR2(cUnit, ARMV5TE_LDMIA, r2, (1<<r0 | 1<<r1));
+            storeValuePair(cUnit, r0, r1, mir->dalvikInsn.vA, r2);
+            break;
+        }
+        case OP_SPUT_OBJECT:
+        case OP_SPUT_BOOLEAN:
+        case OP_SPUT_CHAR:
+        case OP_SPUT_BYTE:
+        case OP_SPUT_SHORT:
+        case OP_SPUT: {
+            int valOffset = (int)&((struct StaticField*)NULL)->value;
+            void *fieldPtr = (void*)
+              (cUnit->method->clazz->pDvmDex->pResFields[mir->dalvikInsn.vB]);
+            assert(fieldPtr != NULL);
+            loadValue(cUnit, mir->dalvikInsn.vA, r0);
+            loadConstant(cUnit, r1,  (int) fieldPtr + valOffset);
+            newLIR3(cUnit, ARMV5TE_STR_RRI5, r0, r1, 0);
+            break;
+        }
+        case OP_SPUT_WIDE: {
+            int valOffset = (int)&((struct StaticField*)NULL)->value;
+            void *fieldPtr = (void*)
+              (cUnit->method->clazz->pDvmDex->pResFields[mir->dalvikInsn.vB]);
+            assert(fieldPtr != NULL);
+            loadValuePair(cUnit, mir->dalvikInsn.vA, r0, r1);
+            loadConstant(cUnit, r2,  (int) fieldPtr + valOffset);
+            newLIR2(cUnit, ARMV5TE_STMIA, r2, (1<<r0 | 1<<r1));
+            break;
+        }
+        case OP_NEW_INSTANCE: {
+            ClassObject *classPtr = (void*)
+              (cUnit->method->clazz->pDvmDex->pResClasses[mir->dalvikInsn.vB]);
+            assert(classPtr != NULL);
+            assert(classPtr->status & CLASS_INITIALIZED);
+            if ((classPtr->accessFlags & (ACC_INTERFACE|ACC_ABSTRACT)) != 0) {
+                /* It's going to throw, just let the interp. deal with it. */
+                genInterpSingleStep(cUnit, mir);
+                return false;
+            }
+            loadConstant(cUnit, r0, (int) classPtr);
+            loadConstant(cUnit, r4PC, (int)dvmAllocObject);
+            genExportPC(cUnit, mir, r2, r3 );
+            loadConstant(cUnit, r1, ALLOC_DONT_TRACK);
+            newLIR1(cUnit, ARMV5TE_BLX_R, r4PC);
+            /*
+             * TODO: As coded, we'll bail and reinterpret on alloc failure.
+             * Need a general mechanism to bail to thrown exception code.
+             */
+            genNullCheck(cUnit, r0, mir->offset, NULL);
+            storeValue(cUnit, r0, mir->dalvikInsn.vA, r1);
+            break;
+        }
+        case OP_CHECK_CAST: {
+            ClassObject *classPtr =
+              (cUnit->method->clazz->pDvmDex->pResClasses[mir->dalvikInsn.vB]);
+            loadConstant(cUnit, r1, (int) classPtr );
+            loadValue(cUnit, mir->dalvikInsn.vA, r0);  /* Ref */
+            /*
+             * TODO - in theory classPtr should be resoved by the time this
+             * instruction made into a trace, but we are seeing NULL at runtime
+             * so this check is temporarily used as a workaround.
+             */
+            Armv5teLIR * pcrLabel = genNullCheck(cUnit, r1, mir->offset, NULL);
+            newLIR2(cUnit, ARMV5TE_CMP_RI8, r0, 0);    /* Null? */
+            Armv5teLIR *branch1 =
+                newLIR2(cUnit, ARMV5TE_B_COND, 4, ARM_COND_EQ);
+            /* r0 now contains object->clazz */
+            newLIR3(cUnit, ARMV5TE_LDR_RRI5, r0, r0,
+                    offsetof(Object, clazz) >> 2);
+            loadConstant(cUnit, r4PC, (int)dvmInstanceofNonTrivial);
+            newLIR2(cUnit, ARMV5TE_CMP_RR, r0, r1);
+            Armv5teLIR *branch2 =
+                newLIR2(cUnit, ARMV5TE_B_COND, 2, ARM_COND_EQ);
+            newLIR1(cUnit, ARMV5TE_BLX_R, r4PC);
+            /* check cast failed - punt to the interpreter */
+            genNullCheck(cUnit, r0, mir->offset, pcrLabel);
+            /* check cast passed - branch target here */
+            Armv5teLIR *target = newLIR0(cUnit, ARMV5TE_PSEUDO_TARGET_LABEL);
+            branch1->generic.target = (LIR *)target;
+            branch2->generic.target = (LIR *)target;
+            break;
+        }
+        default:
+            return true;
+    }
+    return false;
+}
+
+static bool handleFmt11x(CompilationUnit *cUnit, MIR *mir)
+{
+    OpCode dalvikOpCode = mir->dalvikInsn.opCode;
+    switch (dalvikOpCode) {
+        case OP_MOVE_EXCEPTION: {
+            int offset = offsetof(InterpState, self);
+            int exOffset = offsetof(Thread, exception);
+            newLIR3(cUnit, ARMV5TE_LDR_RRI5, r0, rGLUE, offset >> 2);
+            newLIR3(cUnit, ARMV5TE_LDR_RRI5, r1, r0, exOffset >> 2);
+            storeValue(cUnit, r1, mir->dalvikInsn.vA, r0);
+           break;
+        }
+        case OP_MOVE_RESULT:
+        case OP_MOVE_RESULT_OBJECT: {
+            int offset = offsetof(InterpState, retval);
+            newLIR3(cUnit, ARMV5TE_LDR_RRI5, r0, rGLUE, offset >> 2);
+            storeValue(cUnit, r0, mir->dalvikInsn.vA, r1);
+            break;
+        }
+        case OP_MOVE_RESULT_WIDE: {
+            int offset = offsetof(InterpState, retval);
+            newLIR3(cUnit, ARMV5TE_LDR_RRI5, r0, rGLUE, offset >> 2);
+            newLIR3(cUnit, ARMV5TE_LDR_RRI5, r1, rGLUE, (offset >> 2)+1);
+            storeValuePair(cUnit, r0, r1, mir->dalvikInsn.vA, r2);
+            break;
+        }
+        case OP_RETURN_WIDE: {
+            loadValuePair(cUnit, mir->dalvikInsn.vA, r0, r1);
+            int offset = offsetof(InterpState, retval);
+            newLIR3(cUnit, ARMV5TE_STR_RRI5, r0, rGLUE, offset >> 2);
+            newLIR3(cUnit, ARMV5TE_STR_RRI5, r1, rGLUE, (offset >> 2)+1);
+            genReturnCommon(cUnit,mir);
+            break;
+        }
+        case OP_RETURN:
+        case OP_RETURN_OBJECT: {
+            loadValue(cUnit, mir->dalvikInsn.vA, r0);
+            int offset = offsetof(InterpState, retval);
+            newLIR3(cUnit, ARMV5TE_STR_RRI5, r0, rGLUE, offset >> 2);
+            genReturnCommon(cUnit,mir);
+            break;
+        }
+        /*
+         * TODO-VERIFY: May be playing a bit fast and loose here.  As coded,
+         * a failure on lock/unlock will cause us to revert to the interpeter
+         * to try again. This means we essentially ignore the first failure on
+         * the assumption that the interpreter will correctly handle the 2nd.
+         */
+        case OP_MONITOR_ENTER:
+        case OP_MONITOR_EXIT: {
+            int offset = offsetof(InterpState, self);
+            loadValue(cUnit, mir->dalvikInsn.vA, r1);
+            newLIR3(cUnit, ARMV5TE_LDR_RRI5, r0, rGLUE, offset >> 2);
+            if (dalvikOpCode == OP_MONITOR_ENTER) {
+                loadConstant(cUnit, r2, (int)dvmLockObject);
+            } else {
+                loadConstant(cUnit, r2, (int)dvmUnlockObject);
+            }
+          /*
+           * TODO-VERIFY: Note that we're not doing an EXPORT_PC, as
+           * Lock/unlock won't throw, and this code does not support
+           * DEADLOCK_PREDICTION or MONITOR_TRACKING.  Should it?
+           */
+            genNullCheck(cUnit, r1, mir->offset, NULL);
+            /* Do the call */
+            newLIR1(cUnit, ARMV5TE_BLX_R, r2);
+            break;
+        }
+        case OP_THROW: {
+            genInterpSingleStep(cUnit, mir);
+            break;
+        }
+        default:
+            return true;
+    }
+    return false;
+}
+
+static bool handleFmt12x(CompilationUnit *cUnit, MIR *mir)
+{
+    OpCode opCode = mir->dalvikInsn.opCode;
+    int vSrc1Dest = mir->dalvikInsn.vA;
+    int vSrc2 = mir->dalvikInsn.vB;
+
+    /* TODO - find the proper include file to declare these */
+    float  __aeabi_i2f(  int op1 );
+    int    __aeabi_f2iz( float op1 );
+    float  __aeabi_d2f(  double op1 );
+    double __aeabi_f2d(  float op1 );
+    double __aeabi_i2d(  int op1 );
+    int    __aeabi_d2iz( double op1 );
+    long   __aeabi_f2lz( float op1 );
+    float  __aeabi_l2f(  long op1 );
+    long   __aeabi_d2lz( double op1 );
+    double __aeabi_l2d(  long op1 );
+
+    if ( (opCode >= OP_ADD_INT_2ADDR) && (opCode <= OP_REM_DOUBLE_2ADDR)) {
+        return genArithOp( cUnit, mir );
+    }
+
+    switch (opCode) {
+        case OP_INT_TO_FLOAT:
+            return genConversion(cUnit, mir, (void*)__aeabi_i2f, 1, 1);
+        case OP_FLOAT_TO_INT:
+            return genConversion(cUnit, mir, (void*)__aeabi_f2iz, 1, 1);
+        case OP_DOUBLE_TO_FLOAT:
+            return genConversion(cUnit, mir, (void*)__aeabi_d2f, 2, 1);
+        case OP_FLOAT_TO_DOUBLE:
+            return genConversion(cUnit, mir, (void*)__aeabi_f2d, 1, 2);
+        case OP_INT_TO_DOUBLE:
+            return genConversion(cUnit, mir, (void*)__aeabi_i2d, 1, 2);
+        case OP_DOUBLE_TO_INT:
+            return genConversion(cUnit, mir, (void*)__aeabi_d2iz, 2, 1);
+        case OP_FLOAT_TO_LONG:
+            return genConversion(cUnit, mir, (void*)__aeabi_f2lz, 1, 2);
+        case OP_LONG_TO_FLOAT:
+            return genConversion(cUnit, mir, (void*)__aeabi_l2f, 2, 1);
+        case OP_DOUBLE_TO_LONG:
+            return genConversion(cUnit, mir, (void*)__aeabi_d2lz, 2, 2);
+        case OP_LONG_TO_DOUBLE:
+            return genConversion(cUnit, mir, (void*)__aeabi_l2d, 2, 2);
+        case OP_NEG_INT:
+        case OP_NOT_INT:
+            return genArithOpInt(cUnit, mir, vSrc1Dest, vSrc1Dest, vSrc2);
+        case OP_NEG_LONG:
+        case OP_NOT_LONG:
+            return genArithOpLong(cUnit,mir, vSrc1Dest, vSrc1Dest, vSrc2);
+        case OP_NEG_FLOAT:
+            return genArithOpFloat(cUnit,mir,vSrc1Dest,vSrc1Dest,vSrc2);
+        case OP_NEG_DOUBLE:
+            return genArithOpDouble(cUnit,mir,vSrc1Dest,vSrc1Dest,vSrc2);
+        case OP_MOVE_WIDE:
+            loadValuePair(cUnit, mir->dalvikInsn.vB, r0, r1);
+            storeValuePair(cUnit, r0, r1, mir->dalvikInsn.vA, r2);
+            break;
+        case OP_INT_TO_LONG:
+            loadValue(cUnit, mir->dalvikInsn.vB, r0);
+            newLIR3(cUnit, ARMV5TE_ASR, r1, r0, 31);
+            storeValuePair(cUnit, r0, r1, mir->dalvikInsn.vA, r2);
+            break;
+        case OP_MOVE:
+        case OP_MOVE_OBJECT:
+        case OP_LONG_TO_INT:
+            loadValue(cUnit, vSrc2, r0);
+            storeValue(cUnit, r0, vSrc1Dest, r1);
+            break;
+        case OP_INT_TO_BYTE:
+            loadValue(cUnit, vSrc2, r0);
+            newLIR3(cUnit, ARMV5TE_LSL, r0, r0, 24);
+            newLIR3(cUnit, ARMV5TE_ASR, r0, r0, 24);
+            storeValue(cUnit, r0, vSrc1Dest, r1);
+            break;
+        case OP_INT_TO_SHORT:
+            loadValue(cUnit, vSrc2, r0);
+            newLIR3(cUnit, ARMV5TE_LSL, r0, r0, 16);
+            newLIR3(cUnit, ARMV5TE_ASR, r0, r0, 16);
+            storeValue(cUnit, r0, vSrc1Dest, r1);
+            break;
+        case OP_INT_TO_CHAR:
+            loadValue(cUnit, vSrc2, r0);
+            newLIR3(cUnit, ARMV5TE_LSL, r0, r0, 16);
+            newLIR3(cUnit, ARMV5TE_LSR, r0, r0, 16);
+            storeValue(cUnit, r0, vSrc1Dest, r1);
+            break;
+        case OP_ARRAY_LENGTH: {
+            int lenOffset = offsetof(ArrayObject, length);
+            loadValue(cUnit, vSrc2, r0);
+            genNullCheck(cUnit, r0, mir->offset, NULL);
+            newLIR3(cUnit, ARMV5TE_LDR_RRI5, r0, r0, lenOffset >> 2);
+            storeValue(cUnit, r0, vSrc1Dest, r1);
+            break;
+        }
+        default:
+            return true;
+    }
+    return false;
+}
+
+static bool handleFmt21s(CompilationUnit *cUnit, MIR *mir)
+{
+    OpCode dalvikOpCode = mir->dalvikInsn.opCode;
+    /* It takes few instructions to handle OP_CONST_WIDE_16 inline */
+    if (dalvikOpCode == OP_CONST_WIDE_16) {
+        int rDest = mir->dalvikInsn.vA;
+        int BBBB = mir->dalvikInsn.vB;
+        int rLow = r0, rHigh = r1;
+        if (BBBB == 0) {
+            newLIR2(cUnit, ARMV5TE_MOV_IMM, rLow, 0);
+            rHigh = rLow;
+        } else if (BBBB > 0 && BBBB <= 255) {
+            /* rLow = ssssBBBB */
+            newLIR2(cUnit, ARMV5TE_MOV_IMM, rLow, BBBB);
+            /* rHigh = 0 */
+            newLIR2(cUnit, ARMV5TE_MOV_IMM, rHigh, 0);
+        } else {
+            loadConstant(cUnit, rLow, BBBB);
+            /*
+             * arithmetic-shift-right 32 bits to get the high half of long
+             * [63..32]
+             */
+            newLIR3(cUnit, ARMV5TE_ASR, rHigh, rLow, 0);
+        }
+
+        /* Save the long values to the specified Dalvik register pair */
+        /*
+         * If rDest is no greater than 30, use two "str rd, [rFP + immed_5]"
+         * instructions to store the results. Effective address is
+         * rFP + immed_5 << 2.
+         */
+        if (rDest < 31) {
+            newLIR3(cUnit, ARMV5TE_STR_RRI5, rLow, rFP, rDest);
+            newLIR3(cUnit, ARMV5TE_STR_RRI5, rHigh, rFP, rDest+1);
+        } else {
+          /*
+           * Otherwise just load the frame offset from the constant pool and add
+           * it to rFP. Then use stmia to store the results to the specified
+           * register pair.
+           */
+            /* Need to replicate the content in r0 to r1 */
+            if (rLow == rHigh) {
+                newLIR3(cUnit, ARMV5TE_ADD_RRI3, rLow+1, rLow, 0);
+            }
+            /* load the rFP offset into r2 */
+            loadConstant(cUnit, r2, rDest*4);
+            newLIR3(cUnit, ARMV5TE_ADD_RRR, r2, rFP, r2);
+            newLIR2(cUnit, ARMV5TE_STMIA, r2, (1<<r0 | 1 << r1));
+        }
+    } else if (dalvikOpCode == OP_CONST_16) {
+        int rDest = mir->dalvikInsn.vA;
+        int BBBB = mir->dalvikInsn.vB;
+        if (BBBB >= 0 && BBBB <= 255) {
+            /* r0 = BBBB */
+            newLIR2(cUnit, ARMV5TE_MOV_IMM, r0, BBBB);
+        } else {
+            loadConstant(cUnit, r0, BBBB);
+        }
+
+        /* Save the constant to the specified Dalvik register */
+        /*
+         * If rDest is no greater than 31, effective address is
+         * rFP + immed_5 << 2.
+         */
+        if (rDest < 32) {
+            newLIR3(cUnit, ARMV5TE_STR_RRI5, r0, rFP, rDest);
+        } else {
+          /*
+           * Otherwise just load the frame offset from the constant pool and add
+           * it to rFP. Then use stmia to store the results to the specified
+           * register pair.
+           */
+            /* load the rFP offset into r2 */
+            loadConstant(cUnit, r2, rDest*4);
+            newLIR3(cUnit, ARMV5TE_ADD_RRR, r2, rFP, r2);
+            newLIR3(cUnit, ARMV5TE_STR_RRI5, r0, r2, 0);
+        }
+    } else {
+        return true;
+    }
+    return false;
+}
+
+/* Compare agaist zero */
+static bool handleFmt21t(CompilationUnit *cUnit, MIR *mir, BasicBlock *bb,
+                         Armv5teLIR *labelList)
+{
+    OpCode dalvikOpCode = mir->dalvikInsn.opCode;
+    Armv5teConditionCode cond;
+
+    loadValue(cUnit, mir->dalvikInsn.vA, r0);
+    newLIR2(cUnit, ARMV5TE_CMP_RI8, r0, 0);
+
+    switch (dalvikOpCode) {
+        case OP_IF_EQZ:
+            cond = ARM_COND_EQ;
+            break;
+        case OP_IF_NEZ:
+            cond = ARM_COND_NE;
+            break;
+        case OP_IF_LTZ:
+            cond = ARM_COND_LT;
+            break;
+        case OP_IF_GEZ:
+            cond = ARM_COND_GE;
+            break;
+        case OP_IF_GTZ:
+            cond = ARM_COND_GT;
+            break;
+        case OP_IF_LEZ:
+            cond = ARM_COND_LE;
+            break;
+        default:
+            cond = 0;
+            LOGE("Unexpected opcode (%d) for Fmt21t\n", dalvikOpCode);
+            dvmAbort();
+    }
+    genConditionalBranch(cUnit, cond, &labelList[bb->taken->id]);
+    /* This mostly likely will be optimized away in a later phase */
+    genUnconditionalBranch(cUnit, &labelList[bb->fallThrough->id]);
+    return false;
+}
+
+static bool handleFmt22b_Fmt22s(CompilationUnit *cUnit, MIR *mir)
+{
+    OpCode dalvikOpCode = mir->dalvikInsn.opCode;
+    int vSrc = mir->dalvikInsn.vB;
+    int vDest = mir->dalvikInsn.vA;
+    int lit = mir->dalvikInsn.vC;
+    int armOp;
+
+    /* TODO: find the proper .h file to declare these */
+    int __aeabi_idivmod(int op1, int op2);
+    int __aeabi_idiv(int op1, int op2);
+
+    switch (dalvikOpCode) {
+        case OP_ADD_INT_LIT8:
+        case OP_ADD_INT_LIT16:
+            loadValue(cUnit, vSrc, r0);
+            if (lit <= 255 && lit >= 0) {
+                newLIR2(cUnit, ARMV5TE_ADD_RI8, r0, lit);
+                storeValue(cUnit, r0, vDest, r1);
+            } else if (lit >= -255 && lit <= 0) {
+                /* Convert to a small constant subtraction */
+                newLIR2(cUnit, ARMV5TE_SUB_RI8, r0, -lit);
+                storeValue(cUnit, r0, vDest, r1);
+            } else {
+                loadConstant(cUnit, r1, lit);
+                genBinaryOp(cUnit, vDest, ARMV5TE_ADD_RRR);
+            }
+            break;
+
+        case OP_RSUB_INT_LIT8:
+        case OP_RSUB_INT:
+            loadValue(cUnit, vSrc, r1);
+            loadConstant(cUnit, r0, lit);
+            genBinaryOp(cUnit, vDest, ARMV5TE_SUB_RRR);
+            break;
+
+        case OP_MUL_INT_LIT8:
+        case OP_MUL_INT_LIT16:
+        case OP_AND_INT_LIT8:
+        case OP_AND_INT_LIT16:
+        case OP_OR_INT_LIT8:
+        case OP_OR_INT_LIT16:
+        case OP_XOR_INT_LIT8:
+        case OP_XOR_INT_LIT16:
+            loadValue(cUnit, vSrc, r0);
+            loadConstant(cUnit, r1, lit);
+            switch (dalvikOpCode) {
+                case OP_MUL_INT_LIT8:
+                case OP_MUL_INT_LIT16:
+                    armOp = ARMV5TE_MUL;
+                    break;
+                case OP_AND_INT_LIT8:
+                case OP_AND_INT_LIT16:
+                    armOp = ARMV5TE_AND_RR;
+                    break;
+                case OP_OR_INT_LIT8:
+                case OP_OR_INT_LIT16:
+                    armOp = ARMV5TE_ORR;
+                    break;
+                case OP_XOR_INT_LIT8:
+                case OP_XOR_INT_LIT16:
+                    armOp = ARMV5TE_EOR;
+                    break;
+                default:
+                    dvmAbort();
+            }
+            genBinaryOp(cUnit, vDest, armOp);
+            break;
+
+        case OP_SHL_INT_LIT8:
+        case OP_SHR_INT_LIT8:
+        case OP_USHR_INT_LIT8:
+            loadValue(cUnit, vSrc, r0);
+            switch (dalvikOpCode) {
+                case OP_SHL_INT_LIT8:
+                    armOp = ARMV5TE_LSL;
+                    break;
+                case OP_SHR_INT_LIT8:
+                    armOp = ARMV5TE_ASR;
+                    break;
+                case OP_USHR_INT_LIT8:
+                    armOp = ARMV5TE_LSR;
+                    break;
+                default: dvmAbort();
+            }
+            newLIR3(cUnit, armOp, r0, r0, lit);
+            storeValue(cUnit, r0, vDest, r1);
+            break;
+
+        case OP_DIV_INT_LIT8:
+        case OP_DIV_INT_LIT16:
+            if (lit == 0) {
+                /* Let the interpreter deal with div by 0 */
+                genInterpSingleStep(cUnit, mir);
+                return false;
+            }
+            loadConstant(cUnit, r2, (int)__aeabi_idiv);
+            loadConstant(cUnit, r1, lit);
+            loadValue(cUnit, vSrc, r0);
+            newLIR1(cUnit, ARMV5TE_BLX_R, r2);
+            storeValue(cUnit, r0, vDest, r2);
+            break;
+
+        case OP_REM_INT_LIT8:
+        case OP_REM_INT_LIT16:
+            if (lit == 0) {
+                /* Let the interpreter deal with div by 0 */
+                genInterpSingleStep(cUnit, mir);
+                return false;
+            }
+            loadConstant(cUnit, r2, (int)__aeabi_idivmod);
+            loadConstant(cUnit, r1, lit);
+            loadValue(cUnit, vSrc, r0);
+            newLIR1(cUnit, ARMV5TE_BLX_R, r2);
+            storeValue(cUnit, r1, vDest, r2);
+            break;
+        default:
+            return true;
+    }
+    return false;
+}
+
+static bool handleFmt22c(CompilationUnit *cUnit, MIR *mir)
+{
+    OpCode dalvikOpCode = mir->dalvikInsn.opCode;
+    int fieldOffset;
+
+    if (dalvikOpCode >= OP_IGET && dalvikOpCode <= OP_IPUT_SHORT) {
+        InstField *pInstField = (InstField *)
+            cUnit->method->clazz->pDvmDex->pResFields[mir->dalvikInsn.vC];
+        int fieldOffset;
+
+        assert(pInstField != NULL);
+        fieldOffset = pInstField->byteOffset;
+    } else {
+        /* To make the compiler happy */
+        fieldOffset = 0;
+    }
+    switch (dalvikOpCode) {
+        /*
+         * TODO: I may be assuming too much here.
+         * Verify what is known at JIT time.
+         */
+        case OP_NEW_ARRAY: {
+            void *classPtr = (void*)
+              (cUnit->method->clazz->pDvmDex->pResClasses[mir->dalvikInsn.vC]);
+            assert(classPtr != NULL);
+            loadValue(cUnit, mir->dalvikInsn.vB, r1);  /* Len */
+            loadConstant(cUnit, r0, (int) classPtr );
+            loadConstant(cUnit, r4PC, (int)dvmAllocArrayByClass);
+            Armv5teLIR *pcrLabel =
+                genRegImmCheck(cUnit, ARM_COND_MI, r1, 0, mir->offset, NULL);
+            genExportPC(cUnit, mir, r2, r3 );
+            newLIR2(cUnit, ARMV5TE_MOV_IMM,r2,ALLOC_DONT_TRACK);
+            newLIR1(cUnit, ARMV5TE_BLX_R, r4PC);
+            /*
+             * TODO: As coded, we'll bail and reinterpret on alloc failure.
+             * Need a general mechanism to bail to thrown exception code.
+             */
+            genNullCheck(cUnit, r0, mir->offset, pcrLabel);
+            storeValue(cUnit, r0, mir->dalvikInsn.vA, r1);
+            break;
+        }
+        /*
+         * TODO: I may be assuming too much here.
+         * Verify what is known at JIT time.
+         */
+        case OP_INSTANCE_OF: {
+            ClassObject *classPtr =
+              (cUnit->method->clazz->pDvmDex->pResClasses[mir->dalvikInsn.vC]);
+            assert(classPtr != NULL);
+            loadValue(cUnit, mir->dalvikInsn.vB, r1);  /* Ref */
+            loadConstant(cUnit, r2, (int) classPtr );
+            loadConstant(cUnit, r0, 1);                /* Assume true */
+            newLIR2(cUnit, ARMV5TE_CMP_RI8, r1, 0);    /* Null? */
+            Armv5teLIR *branch1 = newLIR2(cUnit, ARMV5TE_B_COND, 4,
+                                          ARM_COND_EQ);
+            /* r1 now contains object->clazz */
+            newLIR3(cUnit, ARMV5TE_LDR_RRI5, r1, r1,
+                    offsetof(Object, clazz) >> 2);
+            loadConstant(cUnit, r4PC, (int)dvmInstanceofNonTrivial);
+            newLIR2(cUnit, ARMV5TE_CMP_RR, r1, r2);
+            Armv5teLIR *branch2 = newLIR2(cUnit, ARMV5TE_B_COND, 2,
+                                          ARM_COND_EQ);
+            newLIR2(cUnit, ARMV5TE_MOV_RR, r0, r1);
+            newLIR2(cUnit, ARMV5TE_MOV_RR, r1, r2);
+            newLIR1(cUnit, ARMV5TE_BLX_R, r4PC);
+            /* branch target here */
+            Armv5teLIR *target = newLIR0(cUnit, ARMV5TE_PSEUDO_TARGET_LABEL);
+            storeValue(cUnit, r0, mir->dalvikInsn.vA, r1);
+            branch1->generic.target = (LIR *)target;
+            branch2->generic.target = (LIR *)target;
+            break;
+        }
+        case OP_IGET_WIDE:
+            genIGetWide(cUnit, mir, fieldOffset);
+            break;
+        case OP_IGET:
+        case OP_IGET_OBJECT:
+            genIGet(cUnit, mir, ARMV5TE_LDR_RRR, fieldOffset);
+            break;
+        case OP_IGET_BOOLEAN:
+            genIGet(cUnit, mir, ARMV5TE_LDRB_RRR, fieldOffset);
+            break;
+        case OP_IGET_BYTE:
+            genIGet(cUnit, mir, ARMV5TE_LDRSB_RRR, fieldOffset);
+            break;
+        case OP_IGET_CHAR:
+            genIGet(cUnit, mir, ARMV5TE_LDRH_RRR, fieldOffset);
+            break;
+        case OP_IGET_SHORT:
+            genIGet(cUnit, mir, ARMV5TE_LDRSH_RRR, fieldOffset);
+            break;
+        case OP_IPUT_WIDE:
+            genIPutWide(cUnit, mir, fieldOffset);
+            break;
+        case OP_IPUT:
+        case OP_IPUT_OBJECT:
+            genIPut(cUnit, mir, ARMV5TE_STR_RRR, fieldOffset);
+            break;
+        case OP_IPUT_SHORT:
+        case OP_IPUT_CHAR:
+            genIPut(cUnit, mir, ARMV5TE_STRH_RRR, fieldOffset);
+            break;
+        case OP_IPUT_BYTE:
+        case OP_IPUT_BOOLEAN:
+            genIPut(cUnit, mir, ARMV5TE_STRB_RRR, fieldOffset);
+            break;
+        default:
+            return true;
+    }
+    return false;
+}
+
+static bool handleFmt22cs(CompilationUnit *cUnit, MIR *mir)
+{
+    OpCode dalvikOpCode = mir->dalvikInsn.opCode;
+    int fieldOffset =  mir->dalvikInsn.vC;
+    switch (dalvikOpCode) {
+        case OP_IGET_QUICK:
+        case OP_IGET_OBJECT_QUICK:
+            genIGet(cUnit, mir, ARMV5TE_LDR_RRR, fieldOffset);
+            break;
+        case OP_IPUT_QUICK:
+        case OP_IPUT_OBJECT_QUICK:
+            genIPut(cUnit, mir, ARMV5TE_STR_RRR, fieldOffset);
+            break;
+        case OP_IGET_WIDE_QUICK:
+            genIGetWide(cUnit, mir, fieldOffset);
+            break;
+        case OP_IPUT_WIDE_QUICK:
+            genIPutWide(cUnit, mir, fieldOffset);
+            break;
+        default:
+            return true;
+    }
+    return false;
+
+}
+
+/* Compare agaist zero */
+static bool handleFmt22t(CompilationUnit *cUnit, MIR *mir, BasicBlock *bb,
+                         Armv5teLIR *labelList)
+{
+    OpCode dalvikOpCode = mir->dalvikInsn.opCode;
+    Armv5teConditionCode cond;
+
+    loadValue(cUnit, mir->dalvikInsn.vA, r0);
+    loadValue(cUnit, mir->dalvikInsn.vB, r1);
+    newLIR2(cUnit, ARMV5TE_CMP_RR, r0, r1);
+
+    switch (dalvikOpCode) {
+        case OP_IF_EQ:
+            cond = ARM_COND_EQ;
+            break;
+        case OP_IF_NE:
+            cond = ARM_COND_NE;
+            break;
+        case OP_IF_LT:
+            cond = ARM_COND_LT;
+            break;
+        case OP_IF_GE:
+            cond = ARM_COND_GE;
+            break;
+        case OP_IF_GT:
+            cond = ARM_COND_GT;
+            break;
+        case OP_IF_LE:
+            cond = ARM_COND_LE;
+            break;
+        default:
+            cond = 0;
+            LOGE("Unexpected opcode (%d) for Fmt22t\n", dalvikOpCode);
+            dvmAbort();
+    }
+    genConditionalBranch(cUnit, cond, &labelList[bb->taken->id]);
+    /* This mostly likely will be optimized away in a later phase */
+    genUnconditionalBranch(cUnit, &labelList[bb->fallThrough->id]);
+    return false;
+}
+
+static bool handleFmt22x_Fmt32x(CompilationUnit *cUnit, MIR *mir)
+{
+    OpCode opCode = mir->dalvikInsn.opCode;
+    int vSrc1Dest = mir->dalvikInsn.vA;
+    int vSrc2 = mir->dalvikInsn.vB;
+
+    switch (opCode) {
+        case OP_MOVE_16:
+        case OP_MOVE_OBJECT_16:
+        case OP_MOVE_FROM16:
+        case OP_MOVE_OBJECT_FROM16:
+            loadValue(cUnit, vSrc2, r0);
+            storeValue(cUnit, r0, vSrc1Dest, r1);
+            break;
+        case OP_MOVE_WIDE_16:
+        case OP_MOVE_WIDE_FROM16:
+            loadValuePair(cUnit, vSrc2, r0, r1);
+            storeValuePair(cUnit, r0, r1, vSrc1Dest, r2);
+            break;
+        default:
+            return true;
+    }
+    return false;
+}
+
+static bool handleFmt23x(CompilationUnit *cUnit, MIR *mir)
+{
+    OpCode opCode = mir->dalvikInsn.opCode;
+    int vA = mir->dalvikInsn.vA;
+    int vB = mir->dalvikInsn.vB;
+    int vC = mir->dalvikInsn.vC;
+
+    if ( (opCode >= OP_ADD_INT) && (opCode <= OP_REM_DOUBLE)) {
+        return genArithOp( cUnit, mir );
+    }
+
+    switch (opCode) {
+        case OP_CMP_LONG:
+            loadValuePair(cUnit,vB, r0, r1);
+            loadValuePair(cUnit, vC, r2, r3);
+            genDispatchToHandler(cUnit, TEMPLATE_CMP_LONG);
+            storeValue(cUnit, r0, vA, r1);
+            break;
+        case OP_CMPL_FLOAT:
+            loadValue(cUnit, vB, r0);
+            loadValue(cUnit, vC, r1);
+            genDispatchToHandler(cUnit, TEMPLATE_CMPL_FLOAT);
+            storeValue(cUnit, r0, vA, r1);
+            break;
+        case OP_CMPG_FLOAT:
+            loadValue(cUnit, vB, r0);
+            loadValue(cUnit, vC, r1);
+            genDispatchToHandler(cUnit, TEMPLATE_CMPG_FLOAT);
+            storeValue(cUnit, r0, vA, r1);
+            break;
+        case OP_CMPL_DOUBLE:
+            loadValueAddress(cUnit, vB, r0);
+            loadValueAddress(cUnit, vC, r1);
+            genDispatchToHandler(cUnit, TEMPLATE_CMPL_DOUBLE);
+            storeValue(cUnit, r0, vA, r1);
+            break;
+        case OP_CMPG_DOUBLE:
+            loadValueAddress(cUnit, vB, r0);
+            loadValueAddress(cUnit, vC, r1);
+            genDispatchToHandler(cUnit, TEMPLATE_CMPG_DOUBLE);
+            storeValue(cUnit, r0, vA, r1);
+            break;
+        case OP_AGET_WIDE:
+            genArrayGet(cUnit, mir, ARMV5TE_LDR_RRR, vB, vC, vA, 3);
+            break;
+        case OP_AGET:
+        case OP_AGET_OBJECT:
+            genArrayGet(cUnit, mir, ARMV5TE_LDR_RRR, vB, vC, vA, 2);
+            break;
+        case OP_AGET_BOOLEAN:
+            genArrayGet(cUnit, mir, ARMV5TE_LDRB_RRR, vB, vC, vA, 0);
+            break;
+        case OP_AGET_BYTE:
+            genArrayGet(cUnit, mir, ARMV5TE_LDRSB_RRR, vB, vC, vA, 0);
+            break;
+        case OP_AGET_CHAR:
+            genArrayGet(cUnit, mir, ARMV5TE_LDRH_RRR, vB, vC, vA, 1);
+            break;
+        case OP_AGET_SHORT:
+            genArrayGet(cUnit, mir, ARMV5TE_LDRSH_RRR, vB, vC, vA, 1);
+            break;
+        case OP_APUT_WIDE:
+            genArrayPut(cUnit, mir, ARMV5TE_STR_RRR, vB, vC, vA, 3);
+            break;
+        case OP_APUT:
+        case OP_APUT_OBJECT:
+            genArrayPut(cUnit, mir, ARMV5TE_STR_RRR, vB, vC, vA, 2);
+            break;
+        case OP_APUT_SHORT:
+        case OP_APUT_CHAR:
+            genArrayPut(cUnit, mir, ARMV5TE_STRH_RRR, vB, vC, vA, 1);
+            break;
+        case OP_APUT_BYTE:
+        case OP_APUT_BOOLEAN:
+            genArrayPut(cUnit, mir, ARMV5TE_STRB_RRR, vB, vC, vA, 0);
+            break;
+        default:
+            return true;
+    }
+    return false;
+}
+
+static bool handleFmt31t(CompilationUnit *cUnit, MIR *mir)
+{
+    OpCode dalvikOpCode = mir->dalvikInsn.opCode;
+    switch (dalvikOpCode) {
+        case OP_FILL_ARRAY_DATA: {
+            loadConstant(cUnit, r4PC, (int)dvmInterpHandleFillArrayData);
+            loadValue(cUnit, mir->dalvikInsn.vA, r0);
+            loadConstant(cUnit, r1, (mir->dalvikInsn.vB << 1) +
+                 (int) (cUnit->method->insns + mir->offset));
+            genExportPC(cUnit, mir, r2, r3 );
+            newLIR1(cUnit, ARMV5TE_BLX_R, r4PC);
+            genNullCheck(cUnit, r0, mir->offset, NULL);
+            break;
+        }
+        /*
+         * TODO
+         * - Add a 1 to 3-entry per-location cache here to completely
+         *   bypass the dvmInterpHandle[Packed/Sparse]Switch call w/ chaining
+         * - Use out-of-line handlers for both of these
+         */
+        case OP_PACKED_SWITCH:
+        case OP_SPARSE_SWITCH: {
+            if (dalvikOpCode == OP_PACKED_SWITCH) {
+                loadConstant(cUnit, r4PC, (int)dvmInterpHandlePackedSwitch);
+            } else {
+                loadConstant(cUnit, r4PC, (int)dvmInterpHandleSparseSwitch);
+            }
+            loadValue(cUnit, mir->dalvikInsn.vA, r1);
+            loadConstant(cUnit, r0, (mir->dalvikInsn.vB << 1) +
+                 (int) (cUnit->method->insns + mir->offset));
+            newLIR1(cUnit, ARMV5TE_BLX_R, r4PC);
+            loadConstant(cUnit, r1, (int)(cUnit->method->insns + mir->offset));
+            newLIR3(cUnit, ARMV5TE_LDR_RRI5, r2, rGLUE,
+                offsetof(InterpState, jitToInterpEntries.dvmJitToInterpNoChain)
+                    >> 2);
+            newLIR3(cUnit, ARMV5TE_ADD_RRR, r0, r0, r0);
+            newLIR3(cUnit, ARMV5TE_ADD_RRR, r4PC, r0, r1);
+            newLIR1(cUnit, ARMV5TE_BLX_R, r2);
+            break;
+        }
+        default:
+            return true;
+    }
+    return false;
+}
+
+static bool handleFmt35c_3rc(CompilationUnit *cUnit, MIR *mir, BasicBlock *bb,
+                             Armv5teLIR *labelList)
+{
+    Armv5teLIR *retChainingCell = &labelList[bb->fallThrough->id];
+    Armv5teLIR *pcrLabel = NULL;
+
+    DecodedInstruction *dInsn = &mir->dalvikInsn;
+    switch (mir->dalvikInsn.opCode) {
+        /*
+         * calleeMethod = this->clazz->vtable[
+         *     method->clazz->pDvmDex->pResMethods[BBBB]->methodIndex
+         * ]
+         */
+        case OP_INVOKE_VIRTUAL:
+        case OP_INVOKE_VIRTUAL_RANGE: {
+            int methodIndex =
+                cUnit->method->clazz->pDvmDex->pResMethods[dInsn->vB]->
+                methodIndex;
+
+            if (mir->dalvikInsn.opCode == OP_INVOKE_VIRTUAL)
+                genProcessArgsNoRange(cUnit, mir, dInsn, &pcrLabel);
+            else
+                genProcessArgsRange(cUnit, mir, dInsn, &pcrLabel);
+
+            /* r0 now contains this->clazz */
+            newLIR3(cUnit, ARMV5TE_LDR_RRI5, r0, r0,
+                    offsetof(Object, clazz) >> 2);
+            /* r1 = &retChainingCell */
+            Armv5teLIR *addrRetChain = newLIR2(cUnit, ARMV5TE_ADD_PC_REL,
+                                                   r1, 0);
+            /* r4PC = dalvikCallsite */
+            loadConstant(cUnit, r4PC,
+                         (int) (cUnit->method->insns + mir->offset));
+
+            /* r0 now contains this->clazz->vtable */
+            newLIR3(cUnit, ARMV5TE_LDR_RRI5, r0, r0,
+                    offsetof(ClassObject, vtable) >> 2);
+            addrRetChain->generic.target = (LIR *) retChainingCell;
+
+            if (methodIndex < 32) {
+                newLIR3(cUnit, ARMV5TE_LDR_RRI5, r0, r0, methodIndex);
+            } else {
+                loadConstant(cUnit, r7, methodIndex<<2);
+                newLIR3(cUnit, ARMV5TE_LDR_RRR, r0, r0, r7);
+            }
+
+            /*
+             * r0 = calleeMethod,
+             * r1 = &ChainingCell,
+             * r4PC = callsiteDPC,
+             */
+            genDispatchToHandler(cUnit, TEMPLATE_INVOKE_METHOD_NO_OPT);
+#if defined(INVOKE_STATS)
+            gDvmJit.invokeNoOpt++;
+#endif
+            /* Handle exceptions using the interpreter */
+            genTrap(cUnit, mir->offset, pcrLabel);
+            break;
+        }
+        /*
+         * calleeMethod = method->clazz->super->vtable[method->clazz->pDvmDex
+         *                ->pResMethods[BBBB]->methodIndex]
+         */
+        /* TODO - not excersized in RunPerf.jar */
+        case OP_INVOKE_SUPER:
+        case OP_INVOKE_SUPER_RANGE: {
+            int mIndex = cUnit->method->clazz->pDvmDex->
+                pResMethods[dInsn->vB]->methodIndex;
+            const Method *calleeMethod =
+                cUnit->method->clazz->super->vtable[mIndex];
+
+            if (mir->dalvikInsn.opCode == OP_INVOKE_SUPER)
+                genProcessArgsNoRange(cUnit, mir, dInsn, &pcrLabel);
+            else
+                genProcessArgsRange(cUnit, mir, dInsn, &pcrLabel);
+
+            /* r0 = calleeMethod */
+            loadConstant(cUnit, r0, (int) calleeMethod);
+
+            genInvokeCommon(cUnit, mir, bb, labelList, pcrLabel,
+                            calleeMethod);
+            break;
+        }
+        /* calleeMethod = method->clazz->pDvmDex->pResMethods[BBBB] */
+        case OP_INVOKE_DIRECT:
+        case OP_INVOKE_DIRECT_RANGE: {
+            const Method *calleeMethod =
+                cUnit->method->clazz->pDvmDex->pResMethods[dInsn->vB];
+
+            if (mir->dalvikInsn.opCode == OP_INVOKE_DIRECT)
+                genProcessArgsNoRange(cUnit, mir, dInsn, &pcrLabel);
+            else
+                genProcessArgsRange(cUnit, mir, dInsn, &pcrLabel);
+
+            /* r0 = calleeMethod */
+            loadConstant(cUnit, r0, (int) calleeMethod);
+
+            genInvokeCommon(cUnit, mir, bb, labelList, pcrLabel,
+                            calleeMethod);
+            break;
+        }
+        /* calleeMethod = method->clazz->pDvmDex->pResMethods[BBBB] */
+        case OP_INVOKE_STATIC:
+        case OP_INVOKE_STATIC_RANGE: {
+            const Method *calleeMethod =
+                cUnit->method->clazz->pDvmDex->pResMethods[dInsn->vB];
+
+            if (mir->dalvikInsn.opCode == OP_INVOKE_STATIC)
+                genProcessArgsNoRange(cUnit, mir, dInsn,
+                                      NULL /* no null check */);
+            else
+                genProcessArgsRange(cUnit, mir, dInsn,
+                                    NULL /* no null check */);
+
+            /* r0 = calleeMethod */
+            loadConstant(cUnit, r0, (int) calleeMethod);
+
+            genInvokeCommon(cUnit, mir, bb, labelList, pcrLabel,
+                            calleeMethod);
+            break;
+        }
+        /*
+         * calleeMethod = dvmFindInterfaceMethodInCache(this->clazz,
+         *                    BBBB, method, method->clazz->pDvmDex)
+         */
+        case OP_INVOKE_INTERFACE:
+        case OP_INVOKE_INTERFACE_RANGE: {
+            int methodIndex = dInsn->vB;
+
+            if (mir->dalvikInsn.opCode == OP_INVOKE_INTERFACE)
+                genProcessArgsNoRange(cUnit, mir, dInsn, &pcrLabel);
+            else
+                genProcessArgsRange(cUnit, mir, dInsn, &pcrLabel);
+
+            /* r0 now contains this->clazz */
+            newLIR3(cUnit, ARMV5TE_LDR_RRI5, r0, r0,
+                    offsetof(Object, clazz) >> 2);
+
+            /* r1 = BBBB */
+            loadConstant(cUnit, r1, dInsn->vB);
+
+            /* r2 = method (caller) */
+            loadConstant(cUnit, r2, (int) cUnit->method);
+
+            /* r3 = pDvmDex */
+            loadConstant(cUnit, r3, (int) cUnit->method->clazz->pDvmDex);
+
+            loadConstant(cUnit, r7,
+                         (intptr_t) dvmFindInterfaceMethodInCache);
+            newLIR1(cUnit, ARMV5TE_BLX_R, r7);
+
+            /* r0 = calleeMethod (returned from dvmFindInterfaceMethodInCache */
+
+            /* r1 = &retChainingCell */
+            Armv5teLIR *addrRetChain = newLIR2(cUnit, ARMV5TE_ADD_PC_REL,
+                                               r1, 0);
+            /* r4PC = dalvikCallsite */
+            loadConstant(cUnit, r4PC,
+                         (int) (cUnit->method->insns + mir->offset));
+
+            addrRetChain->generic.target = (LIR *) retChainingCell;
+            /*
+             * r0 = this, r1 = calleeMethod,
+             * r1 = &ChainingCell,
+             * r4PC = callsiteDPC,
+             */
+            genDispatchToHandler(cUnit, TEMPLATE_INVOKE_METHOD_NO_OPT);
+#if defined(INVOKE_STATS)
+            gDvmJit.invokeNoOpt++;
+#endif
+            /* Handle exceptions using the interpreter */
+            genTrap(cUnit, mir->offset, pcrLabel);
+            break;
+        }
+        /* NOP */
+        case OP_INVOKE_DIRECT_EMPTY: {
+            return false;
+        }
+        case OP_FILLED_NEW_ARRAY:
+        case OP_FILLED_NEW_ARRAY_RANGE: {
+            /* Just let the interpreter deal with these */
+            genInterpSingleStep(cUnit, mir);
+            break;
+        }
+        default:
+            return true;
+    }
+    return false;
+}
+
+static bool handleFmt35ms_3rms(CompilationUnit *cUnit, MIR *mir,
+                               BasicBlock *bb, Armv5teLIR *labelList)
+{
+    Armv5teLIR *retChainingCell = &labelList[bb->fallThrough->id];
+    Armv5teLIR *pcrLabel = NULL;
+
+    DecodedInstruction *dInsn = &mir->dalvikInsn;
+    switch (mir->dalvikInsn.opCode) {
+        /* calleeMethod = this->clazz->vtable[BBBB] */
+        case OP_INVOKE_VIRTUAL_QUICK_RANGE:
+        case OP_INVOKE_VIRTUAL_QUICK: {
+            int methodIndex = dInsn->vB;
+            if (mir->dalvikInsn.opCode == OP_INVOKE_VIRTUAL_QUICK)
+                genProcessArgsNoRange(cUnit, mir, dInsn, &pcrLabel);
+            else
+                genProcessArgsRange(cUnit, mir, dInsn, &pcrLabel);
+
+            /* r0 now contains this->clazz */
+            newLIR3(cUnit, ARMV5TE_LDR_RRI5, r0, r0,
+                    offsetof(Object, clazz) >> 2);
+            /* r1 = &retChainingCell */
+            Armv5teLIR *addrRetChain = newLIR2(cUnit, ARMV5TE_ADD_PC_REL,
+                                               r1, 0);
+            /* r4PC = dalvikCallsite */
+            loadConstant(cUnit, r4PC,
+                         (int) (cUnit->method->insns + mir->offset));
+
+            /* r0 now contains this->clazz->vtable */
+            newLIR3(cUnit, ARMV5TE_LDR_RRI5, r0, r0,
+                    offsetof(ClassObject, vtable) >> 2);
+            addrRetChain->generic.target = (LIR *) retChainingCell;
+
+            if (methodIndex < 32) {
+                newLIR3(cUnit, ARMV5TE_LDR_RRI5, r0, r0, methodIndex);
+            } else {
+                loadConstant(cUnit, r7, methodIndex<<2);
+                newLIR3(cUnit, ARMV5TE_LDR_RRR, r0, r0, r7);
+            }
+
+            /*
+             * r0 = calleeMethod,
+             * r1 = &ChainingCell,
+             * r4PC = callsiteDPC,
+             */
+            genDispatchToHandler(cUnit, TEMPLATE_INVOKE_METHOD_NO_OPT);
+#if defined(INVOKE_STATS)
+            gDvmJit.invokeNoOpt++;
+#endif
+            break;
+        }
+        /* calleeMethod = method->clazz->super->vtable[BBBB] */
+        case OP_INVOKE_SUPER_QUICK:
+        case OP_INVOKE_SUPER_QUICK_RANGE: {
+            const Method *calleeMethod =
+                cUnit->method->clazz->super->vtable[dInsn->vB];
+
+            if (mir->dalvikInsn.opCode == OP_INVOKE_SUPER_QUICK)
+                genProcessArgsNoRange(cUnit, mir, dInsn, &pcrLabel);
+            else
+                genProcessArgsRange(cUnit, mir, dInsn, &pcrLabel);
+
+            /* r0 = calleeMethod */
+            loadConstant(cUnit, r0, (int) calleeMethod);
+
+            genInvokeCommon(cUnit, mir, bb, labelList, pcrLabel,
+                            calleeMethod);
+            break;
+        }
+        /* calleeMethod = method->clazz->super->vtable[BBBB] */
+        default:
+            return true;
+    }
+    /* Handle exceptions using the interpreter */
+    genTrap(cUnit, mir->offset, pcrLabel);
+    return false;
+}
+
+/*
+ * NOTE: We assume here that the special native inline routines
+ * are side-effect free.  By making this assumption, we can safely
+ * re-execute the routine from the interpreter if it decides it
+ * wants to throw an exception. We still need to EXPORT_PC(), though.
+ */
+static bool handleFmt3inline(CompilationUnit *cUnit, MIR *mir)
+{
+    DecodedInstruction *dInsn = &mir->dalvikInsn;
+    switch( mir->dalvikInsn.opCode) {
+        case OP_EXECUTE_INLINE: {
+            unsigned int i;
+            const InlineOperation* inLineTable = dvmGetInlineOpsTable();
+            int offset = (int) &((InterpState *) NULL)->retval;
+            int operation = dInsn->vB;
+
+            if (!strcmp(inLineTable[operation].classDescriptor,
+                        "Ljava/lang/String;") &&
+                !strcmp(inLineTable[operation].methodName,
+                        "length") &&
+                !strcmp(inLineTable[operation].methodSignature,
+                        "()I")) {
+                return genInlinedStringLength(cUnit,mir);
+            }
+
+            /* Materialize pointer to retval & push */
+            newLIR2(cUnit, ARMV5TE_MOV_RR, r4PC, rGLUE);
+            newLIR2(cUnit, ARMV5TE_ADD_RI8, r4PC, offset);
+            /* Push r4 and (just to take up space) r5) */
+            newLIR1(cUnit, ARMV5TE_PUSH, (1<<r4PC | 1<<rFP));
+
+            /* Get code pointer to inline routine */
+            loadConstant(cUnit, r4PC, (int)inLineTable[operation].func);
+
+            /* Export PC */
+            genExportPC(cUnit, mir, r0, r1 );
+
+            /* Load arguments to r0 through r3 as applicable */
+            for (i=0; i < dInsn->vA; i++) {
+                loadValue(cUnit, dInsn->arg[i], i);
+            }
+            /* Call inline routine */
+            newLIR1(cUnit, ARMV5TE_BLX_R, r4PC);
+
+            /* Strip frame */
+            newLIR1(cUnit, ARMV5TE_ADD_SPI7, 2);
+
+            /* Did we throw? If so, redo under interpreter*/
+            genNullCheck(cUnit, r0, mir->offset, NULL);
+
+            break;
+        }
+        default:
+            return true;
+    }
+    return false;
+}
+
+static bool handleFmt51l(CompilationUnit *cUnit, MIR *mir)
+{
+    loadConstant(cUnit, r0, mir->dalvikInsn.vB_wide & 0xFFFFFFFFUL);
+    loadConstant(cUnit, r1, (mir->dalvikInsn.vB_wide>>32) & 0xFFFFFFFFUL);
+    storeValuePair(cUnit, r0, r1, mir->dalvikInsn.vA, r2);
+    return false;
+}
+
+/*****************************************************************************/
+/*
+ * The following are special processing routines that handle transfer of
+ * controls between compiled code and the interpreter. Certain VM states like
+ * Dalvik PC and special-purpose registers are reconstructed here.
+ */
+
+/* Chaining cell for normal-ending compiles (eg branches) */
+static void handleGenericChainingCell(CompilationUnit *cUnit,
+                                      unsigned int offset)
+{
+    newLIR3(cUnit, ARMV5TE_LDR_RRI5, r0, rGLUE,
+        offsetof(InterpState, jitToInterpEntries.dvmJitToInterpNormal) >> 2);
+    newLIR1(cUnit, ARMV5TE_BLX_R, r0);
+    addWordData(cUnit, (int) (cUnit->method->insns + offset), true);
+}
+
+/*
+ * Chaining cell for instructions that immediately following a method
+ * invocation.
+ */
+static void handlePostInvokeChainingCell(CompilationUnit *cUnit,
+                                         unsigned int offset)
+{
+    newLIR3(cUnit, ARMV5TE_LDR_RRI5, r0, rGLUE,
+        offsetof(InterpState, jitToInterpEntries.dvmJitToTraceSelect) >> 2);
+    newLIR1(cUnit, ARMV5TE_BLX_R, r0);
+    addWordData(cUnit, (int) (cUnit->method->insns + offset), true);
+}
+
+/* Chaining cell for monomorphic method invocations. */
+static void handleInvokeChainingCell(CompilationUnit *cUnit,
+                                     const Method *callee)
+{
+    newLIR3(cUnit, ARMV5TE_LDR_RRI5, r0, rGLUE,
+        offsetof(InterpState, jitToInterpEntries.dvmJitToTraceSelect) >> 2);
+    newLIR1(cUnit, ARMV5TE_BLX_R, r0);
+    addWordData(cUnit, (int) (callee->insns), true);
+}
+
+/* Load the Dalvik PC into r0 and jump to the specified target */
+static void handlePCReconstruction(CompilationUnit *cUnit,
+                                   Armv5teLIR *targetLabel)
+{
+    Armv5teLIR **pcrLabel =
+        (Armv5teLIR **) cUnit->pcReconstructionList.elemList;
+    int numElems = cUnit->pcReconstructionList.numUsed;
+    int i;
+    for (i = 0; i < numElems; i++) {
+        dvmCompilerAppendLIR(cUnit, (LIR *) pcrLabel[i]);
+        /* r0 = dalvik PC */
+        loadConstant(cUnit, r0, pcrLabel[i]->operands[0]);
+        genUnconditionalBranch(cUnit, targetLabel);
+    }
+}
+
+/* Entry function to invoke the backend of the JIT compiler */
+void dvmCompilerMIR2LIR(CompilationUnit *cUnit)
+{
+    /* Used to hold the labels of each block */
+    Armv5teLIR *labelList =
+        dvmCompilerNew(sizeof(Armv5teLIR) * cUnit->numBlocks, true);
+    GrowableList chainingListByType[CHAINING_CELL_LAST];
+    int i;
+
+    /*
+     * Initialize the three chaining lists for generic, post-invoke, and invoke
+     * chains.
+     */
+    for (i = 0; i < CHAINING_CELL_LAST; i++) {
+        dvmInitGrowableList(&chainingListByType[i], 2);
+    }
+
+    BasicBlock **blockList = cUnit->blockList;
+
+    /* Handle the content in each basic block */
+    for (i = 0; i < cUnit->numBlocks; i++) {
+        blockList[i]->visited = true;
+        MIR *mir;
+
+        labelList[i].operands[0] = blockList[i]->startOffset;
+
+        if (blockList[i]->blockType >= CHAINING_CELL_LAST) {
+            /*
+             * Append the label pseudo LIR first. Chaining cells will be handled
+             * separately afterwards.
+             */
+            dvmCompilerAppendLIR(cUnit, (LIR *) &labelList[i]);
+        }
+
+        if (blockList[i]->blockType == DALVIK_BYTECODE) {
+            labelList[i].opCode = ARMV5TE_PSEUDO_NORMAL_BLOCK_LABEL;
+        } else {
+            switch (blockList[i]->blockType) {
+                case CHAINING_CELL_GENERIC:
+                    labelList[i].opCode = ARMV5TE_PSEUDO_CHAINING_CELL_GENERIC;
+                    /* handle the codegen later */
+                    dvmInsertGrowableList(
+                        &chainingListByType[CHAINING_CELL_GENERIC], (void *) i);
+                    break;
+                case CHAINING_CELL_INVOKE:
+                    labelList[i].opCode = ARMV5TE_PSEUDO_CHAINING_CELL_INVOKE;
+                    labelList[i].operands[0] =
+                        (int) blockList[i]->containingMethod;
+                    /* handle the codegen later */
+                    dvmInsertGrowableList(
+                        &chainingListByType[CHAINING_CELL_INVOKE], (void *) i);
+                    break;
+                case CHAINING_CELL_POST_INVOKE:
+                    labelList[i].opCode =
+                        ARMV5TE_PSEUDO_CHAINING_CELL_POST_INVOKE;
+                    /* handle the codegen later */
+                    dvmInsertGrowableList(
+                        &chainingListByType[CHAINING_CELL_POST_INVOKE],
+                        (void *) i);
+                    break;
+                case PC_RECONSTRUCTION:
+                    /* Make sure exception handling block is next */
+                    labelList[i].opCode =
+                        ARMV5TE_PSEUDO_PC_RECONSTRUCTION_BLOCK_LABEL;
+                    assert (i == cUnit->numBlocks - 2);
+                    handlePCReconstruction(cUnit, &labelList[i+1]);
+                    break;
+                case EXCEPTION_HANDLING:
+                    labelList[i].opCode = ARMV5TE_PSEUDO_EH_BLOCK_LABEL;
+                    if (cUnit->pcReconstructionList.numUsed) {
+                        newLIR3(cUnit, ARMV5TE_LDR_RRI5, r1, rGLUE,
+                            offsetof(InterpState,
+                                     jitToInterpEntries.dvmJitToInterpPunt)
+                            >> 2);
+                        newLIR1(cUnit, ARMV5TE_BLX_R, r1);
+                    }
+                    break;
+                default:
+                    break;
+            }
+            continue;
+        }
+        for (mir = blockList[i]->firstMIRInsn; mir; mir = mir->next) {
+            OpCode dalvikOpCode = mir->dalvikInsn.opCode;
+            InstructionFormat dalvikFormat =
+                dexGetInstrFormat(gDvm.instrFormat, dalvikOpCode);
+            newLIR2(cUnit, ARMV5TE_PSEUDO_DALVIK_BYTECODE_BOUNDARY,
+                    mir->offset,dalvikOpCode);
+            bool notHandled;
+            /*
+             * Debugging: screen the opcode first to see if it is in the
+             * do[-not]-compile list
+             */
+            bool singleStepMe =
+                gDvmJit.includeSelectedOp !=
+                ((gDvmJit.opList[dalvikOpCode >> 3] &
+                  (1 << (dalvikOpCode & 0x7))) !=
+                 0);
+            if (singleStepMe || cUnit->allSingleStep) {
+                notHandled = false;
+                genInterpSingleStep(cUnit, mir);
+            } else {
+                opcodeCoverage[dalvikOpCode]++;
+                switch (dalvikFormat) {
+                    case kFmt10t:
+                    case kFmt20t:
+                    case kFmt30t:
+                        notHandled = handleFmt10t_Fmt20t_Fmt30t(cUnit,
+                                  mir, blockList[i], labelList);
+                        break;
+                    case kFmt10x:
+                        notHandled = handleFmt10x(cUnit, mir);
+                        break;
+                    case kFmt11n:
+                    case kFmt31i:
+                        notHandled = handleFmt11n_Fmt31i(cUnit, mir);
+                        break;
+                    case kFmt11x:
+                        notHandled = handleFmt11x(cUnit, mir);
+                        break;
+                    case kFmt12x:
+                        notHandled = handleFmt12x(cUnit, mir);
+                        break;
+                    case kFmt20bc:
+                        notHandled = handleFmt20bc(cUnit, mir);
+                        break;
+                    case kFmt21c:
+                    case kFmt31c:
+                        notHandled = handleFmt21c_Fmt31c(cUnit, mir);
+                        break;
+                    case kFmt21h:
+                        notHandled = handleFmt21h(cUnit, mir);
+                        break;
+                    case kFmt21s:
+                        notHandled = handleFmt21s(cUnit, mir);
+                        break;
+                    case kFmt21t:
+                        notHandled = handleFmt21t(cUnit, mir, blockList[i],
+                                                  labelList);
+                        break;
+                    case kFmt22b:
+                    case kFmt22s:
+                        notHandled = handleFmt22b_Fmt22s(cUnit, mir);
+                        break;
+                    case kFmt22c:
+                        notHandled = handleFmt22c(cUnit, mir);
+                        break;
+                    case kFmt22cs:
+                        notHandled = handleFmt22cs(cUnit, mir);
+                        break;
+                    case kFmt22t:
+                        notHandled = handleFmt22t(cUnit, mir, blockList[i],
+                                                  labelList);
+                        break;
+                    case kFmt22x:
+                    case kFmt32x:
+                        notHandled = handleFmt22x_Fmt32x(cUnit, mir);
+                        break;
+                    case kFmt23x:
+                        notHandled = handleFmt23x(cUnit, mir);
+                        break;
+                    case kFmt31t:
+                        notHandled = handleFmt31t(cUnit, mir);
+                        break;
+                    case kFmt3rc:
+                    case kFmt35c:
+                        notHandled = handleFmt35c_3rc(cUnit, mir, blockList[i],
+                                                      labelList);
+                        break;
+                    case kFmt3rms:
+                    case kFmt35ms:
+                        notHandled = handleFmt35ms_3rms(cUnit, mir,blockList[i],
+                                                        labelList);
+                        break;
+                    case kFmt3inline:
+                        notHandled = handleFmt3inline(cUnit, mir);
+                        break;
+                    case kFmt51l:
+                        notHandled = handleFmt51l(cUnit, mir);
+                        break;
+                    default:
+                        notHandled = true;
+                        break;
+                }
+            }
+            if (notHandled) {
+                LOGE("%#06x: Opcode 0x%x (%s) / Fmt %d not handled\n",
+                     mir->offset,
+                     dalvikOpCode, getOpcodeName(dalvikOpCode),
+                     dalvikFormat);
+                dvmAbort();
+                break;
+            } else {
+              gDvmJit.opHistogram[dalvikOpCode]++;
+            }
+        }
+    }
+
+    /* Handle the codegen in predefined order */
+    for (i = 0; i < CHAINING_CELL_LAST; i++) {
+        size_t j;
+        int *blockIdList = (int *) chainingListByType[i].elemList;
+
+        cUnit->numChainingCells[i] = chainingListByType[i].numUsed;
+
+        /* No chaining cells of this type */
+        if (cUnit->numChainingCells[i] == 0)
+            continue;
+
+        /* Record the first LIR for a new type of chaining cell */
+        cUnit->firstChainingLIR[i] = (LIR *) &labelList[blockIdList[0]];
+
+        for (j = 0; j < chainingListByType[i].numUsed; j++) {
+            int blockId = blockIdList[j];
+
+            /* Align this chaining cell first */
+            newLIR0(cUnit, ARMV5TE_PSEUDO_ALIGN4);
+
+            /* Insert the pseudo chaining instruction */
+            dvmCompilerAppendLIR(cUnit, (LIR *) &labelList[blockId]);
+
+
+            switch (blockList[blockId]->blockType) {
+                case CHAINING_CELL_GENERIC:
+                    handleGenericChainingCell(cUnit,
+                      blockList[blockId]->startOffset);
+                    break;
+                case CHAINING_CELL_INVOKE:
+                    handleInvokeChainingCell(cUnit,
+                        blockList[blockId]->containingMethod);
+                    break;
+                case CHAINING_CELL_POST_INVOKE:
+                    handlePostInvokeChainingCell(cUnit,
+                        blockList[blockId]->startOffset);
+                    break;
+                default:
+                    dvmAbort();
+                    break;
+            }
+        }
+    }
+}
+
+/* Accept the work and start compiling */
+void *dvmCompilerDoWork(CompilerWorkOrder *work)
+{
+   void *res;
+
+   if (gDvmJit.codeCacheFull) {
+       return NULL;
+   }
+
+   switch (work->kind) {
+       case kWorkOrderMethod:
+           res = dvmCompileMethod(work->info);
+           break;
+       case kWorkOrderTrace:
+           res = dvmCompileTrace(work->info);
+           break;
+       default:
+           res = NULL;
+           dvmAbort();
+   }
+   return res;
+}
+
+/* Architecture-specific initializations and checks go here */
+bool dvmCompilerArchInit(void)
+{
+    /* First, declare dvmCompiler_TEMPLATE_XXX for each template */
+#define JIT_TEMPLATE(X) extern void dvmCompiler_TEMPLATE_##X();
+#include "../../template/armv5te/TemplateOpList.h"
+#undef JIT_TEMPLATE
+
+    int i = 0;
+    extern void dvmCompilerTemplateStart(void);
+
+    /*
+     * Then, populate the templateEntryOffsets array with the offsets from the
+     * the dvmCompilerTemplateStart symbol for each template.
+     */
+#define JIT_TEMPLATE(X) templateEntryOffsets[i++] = \
+    (intptr_t) dvmCompiler_TEMPLATE_##X - (intptr_t) dvmCompilerTemplateStart;
+#include "../../template/armv5te/TemplateOpList.h"
+#undef JIT_TEMPLATE
+
+    /* Codegen-specific assumptions */
+    assert(offsetof(ClassObject, vtable) < 128 &&
+           (offsetof(ClassObject, vtable) & 0x3) == 0);
+    assert(offsetof(ArrayObject, length) < 128 &&
+           (offsetof(ArrayObject, length) & 0x3) == 0);
+    assert(offsetof(ArrayObject, contents) < 256);
+
+    /* Up to 5 args are pushed on top of FP - sizeofStackSaveArea */
+    assert(sizeof(StackSaveArea) < 236);
+
+    /*
+     * EA is calculated by doing "Rn + imm5 << 2", and there are 5 entry points
+     * that codegen may access, make sure that the offset from the top of the
+     * struct is less than 108.
+     */
+    assert(offsetof(InterpState, jitToInterpEntries) < 108);
+    return true;
+}
+
+/* Architectural-specific debugging helpers go here */
+void dvmCompilerArchDump(void)
+{
+    /* Print compiled opcode in this VM instance */
+    int i, start, streak;
+    char buf[1024];
+
+    streak = i = 0;
+    buf[0] = 0;
+    while (opcodeCoverage[i] == 0 && i < 256) {
+        i++;
+    }
+    if (i == 256) {
+        return;
+    }
+    for (start = i++, streak = 1; i < 256; i++) {
+        if (opcodeCoverage[i]) {
+            streak++;
+        } else {
+            if (streak == 1) {
+                sprintf(buf+strlen(buf), "%x,", start);
+            } else {
+                sprintf(buf+strlen(buf), "%x-%x,", start, start + streak - 1);
+            }
+            streak = 0;
+            while (opcodeCoverage[i] == 0 && i < 256) {
+                i++;
+            }
+            if (i < 256) {
+                streak = 1;
+                start = i;
+            }
+        }
+    }
+    if (streak) {
+        if (streak == 1) {
+            sprintf(buf+strlen(buf), "%x", start);
+        } else {
+            sprintf(buf+strlen(buf), "%x-%x", start, start + streak - 1);
+        }
+    }
+    if (strlen(buf)) {
+        LOGD("dalvik.vm.jitop = %s", buf);
+    }
+}
diff --git a/vm/compiler/template/Makefile-template b/vm/compiler/template/Makefile-template
new file mode 100644
index 000000000..920318335
--- /dev/null
+++ b/vm/compiler/template/Makefile-template
@@ -0,0 +1,49 @@
+# Copyright (C) 2008 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#
+# Makefile for the Dalvik modular interpreter.  This is not currently
+# integrated into the build system.
+#
+
+SHELL := /bin/sh
+
+# Build system has TARGET_ARCH=arm, but we need the exact architecture.
+# The base assumption for an ARM platform is ARMv5TE, but we may want to
+# support older ARMv4 devices, or use special features from ARMv6 or VFP.
+# The simulator build is "desktop".
+#
+# To generate sources for all targets:
+# for arch in desktop armv5te; do TARGET_ARCH_EXT=$arch make -f Makefile-mterp; done
+#
+#TARGET_ARCH_EXT := armv5te
+
+OUTPUT_DIR := out
+
+# Accumulate all possible dependencies for the generated files in a very
+# conservative fashion.  If it's not one of the generated files in "out",
+# assume it's a dependency.
+SOURCE_DEPS := \
+	$(shell find . -path ./$(OUTPUT_DIR) -prune -o -type f -print)
+
+# Source files generated by the script.  There's always one C and one
+# assembly file, though in practice one or the other could be empty.
+GEN_SOURCES := \
+	$(OUTPUT_DIR)/CompilerTemplateAsm-$(TARGET_ARCH_EXT).S
+
+target: $(GEN_SOURCES)
+
+$(GEN_SOURCES): $(SOURCE_DEPS)
+	@mkdir -p out
+	./gen-template.py $(TARGET_ARCH_EXT) $(OUTPUT_DIR)
diff --git a/vm/compiler/template/README.txt b/vm/compiler/template/README.txt
new file mode 100644
index 000000000..fced412e9
--- /dev/null
+++ b/vm/compiler/template/README.txt
@@ -0,0 +1 @@
+See README.txt under dalvik/vm/mterp for details.
diff --git a/vm/compiler/template/armv5te/TEMPLATE_CMPG_DOUBLE.S b/vm/compiler/template/armv5te/TEMPLATE_CMPG_DOUBLE.S
new file mode 100644
index 000000000..f18f6d3ae
--- /dev/null
+++ b/vm/compiler/template/armv5te/TEMPLATE_CMPG_DOUBLE.S
@@ -0,0 +1 @@
+%include "armv5te/TEMPLATE_CMPL_DOUBLE.S" { "naninst":"mov     r0, #1" }
diff --git a/vm/compiler/template/armv5te/TEMPLATE_CMPG_FLOAT.S b/vm/compiler/template/armv5te/TEMPLATE_CMPG_FLOAT.S
new file mode 100644
index 000000000..02887e57d
--- /dev/null
+++ b/vm/compiler/template/armv5te/TEMPLATE_CMPG_FLOAT.S
@@ -0,0 +1 @@
+%include "armv5te/TEMPLATE_CMPL_FLOAT.S" { "naninst":"mov     r0, #1" }
diff --git a/vm/compiler/template/armv5te/TEMPLATE_CMPL_DOUBLE.S b/vm/compiler/template/armv5te/TEMPLATE_CMPL_DOUBLE.S
new file mode 100644
index 000000000..dfafd2c2d
--- /dev/null
+++ b/vm/compiler/template/armv5te/TEMPLATE_CMPL_DOUBLE.S
@@ -0,0 +1,39 @@
+%default { "naninst":"mvn     r0, #0" }
+    /*
+     * For the JIT: incoming arguments are pointers to the arguments in r0/r1
+     *              result in r0
+     *
+     * Compare two floating-point values.  Puts 0, 1, or -1 into the
+     * destination register based on the results of the comparison.
+     *
+     * Provide a "naninst" instruction that puts 1 or -1 into r1 depending
+     * on what value we'd like to return when one of the operands is NaN.
+     *
+     * See OP_CMPL_FLOAT for an explanation.
+     *
+     * For: cmpl-double, cmpg-double
+     */
+    /* op vAA, vBB, vCC */
+    mov     r4, lr                      @ save return address
+    mov     r9, r0                      @ save copy of &arg1
+    mov     r10, r1                     @ save copy of &arg2
+    ldmia   r9, {r0-r1}                 @ r0/r1<- vBB/vBB+1
+    ldmia   r10, {r2-r3}                @ r2/r3<- vCC/vCC+1
+    LDR_PC_LR ".L__aeabi_cdcmple"       @ PIC way of "bl __aeabi_cdcmple"
+    bhi     .L${opcode}_gt_or_nan       @ C set and Z clear, disambiguate
+    mvncc   r0, #0                      @ (less than) r1<- -1
+    moveq   r0, #0                      @ (equal) r1<- 0, trumps less than
+    bx      r4
+
+    @ Test for NaN with a second comparison.  EABI forbids testing bit
+    @ patterns, and we can't represent 0x7fc00000 in immediate form, so
+    @ make the library call.
+.L${opcode}_gt_or_nan:
+    ldmia   r10, {r0-r1}                @ reverse order
+    ldmia   r9, {r2-r3}
+    LDR_PC_LR ".L__aeabi_cdcmple"       @ r0<- Z set if eq, C clear if <
+    movcc   r0, #1                      @ (greater than) r1<- 1
+    bxcc    r4
+    $naninst                            @ r1<- 1 or -1 for NaN
+    bx      r4
+
diff --git a/vm/compiler/template/armv5te/TEMPLATE_CMPL_FLOAT.S b/vm/compiler/template/armv5te/TEMPLATE_CMPL_FLOAT.S
new file mode 100644
index 000000000..31d4cd82b
--- /dev/null
+++ b/vm/compiler/template/armv5te/TEMPLATE_CMPL_FLOAT.S
@@ -0,0 +1,56 @@
+%default { "naninst":"mvn     r0, #0" }
+    /*
+     * For the JIT: incoming arguments in r0, r1
+     *              result in r0
+     *
+     * Compare two floating-point values.  Puts 0, 1, or -1 into the
+     * destination register based on the results of the comparison.
+     *
+     * Provide a "naninst" instruction that puts 1 or -1 into r1 depending
+     * on what value we'd like to return when one of the operands is NaN.
+     *
+     * The operation we're implementing is:
+     *   if (x == y)
+     *     return 0;
+     *   else if (x < y)
+     *     return -1;
+     *   else if (x > y)
+     *     return 1;
+     *   else
+     *     return {-1,1};  // one or both operands was NaN
+     *
+     * The straightforward implementation requires 3 calls to functions
+     * that return a result in r0.  We can do it with two calls if our
+     * EABI library supports __aeabi_cfcmple (only one if we want to check
+     * for NaN directly):
+     *   check x <= y
+     *     if <, return -1
+     *     if ==, return 0
+     *   check y <= x
+     *     if <, return 1
+     *   return {-1,1}
+     *
+     * for: cmpl-float, cmpg-float
+     */
+    /* op vAA, vBB, vCC */
+    mov     r4, lr                      @ save return address
+    mov     r9, r0                      @ Save copies - we may need to redo
+    mov     r10, r1
+    LDR_PC_LR ".L__aeabi_cfcmple"       @ cmp <=: C clear if <, Z set if eq
+    bhi     .L${opcode}_gt_or_nan       @ C set and Z clear, disambiguate
+    mvncc   r0, #0                      @ (less than) r0<- -1
+    moveq   r0, #0                      @ (equal) r0<- 0, trumps less than
+    bx      r4
+    @ Test for NaN with a second comparison.  EABI forbids testing bit
+    @ patterns, and we can't represent 0x7fc00000 in immediate form, so
+    @ make the library call.
+.L${opcode}_gt_or_nan:
+    mov     r1, r9                      @ reverse order
+    mov     r0, r10
+    LDR_PC_LR ".L__aeabi_cfcmple"       @ r0<- Z set if eq, C clear if <
+    movcc   r0, #1                      @ (greater than) r1<- 1
+    bxcc    r4
+    $naninst                            @ r1<- 1 or -1 for NaN
+    bx      r4
+
+
diff --git a/vm/compiler/template/armv5te/TEMPLATE_CMP_LONG.S b/vm/compiler/template/armv5te/TEMPLATE_CMP_LONG.S
new file mode 100644
index 000000000..5f1e16b3c
--- /dev/null
+++ b/vm/compiler/template/armv5te/TEMPLATE_CMP_LONG.S
@@ -0,0 +1,34 @@
+    /*
+     * Compare two 64-bit values.  Puts 0, 1, or -1 into the destination
+     * register based on the results of the comparison.
+     *
+     * We load the full values with LDM, but in practice many values could
+     * be resolved by only looking at the high word.  This could be made
+     * faster or slower by splitting the LDM into a pair of LDRs.
+     *
+     * If we just wanted to set condition flags, we could do this:
+     *  subs    ip, r0, r2
+     *  sbcs    ip, r1, r3
+     *  subeqs  ip, r0, r2
+     * Leaving { <0, 0, >0 } in ip.  However, we have to set it to a specific
+     * integer value, which we can do with 2 conditional mov/mvn instructions
+     * (set 1, set -1; if they're equal we already have 0 in ip), giving
+     * us a constant 5-cycle path plus a branch at the end to the
+     * instruction epilogue code.  The multi-compare approach below needs
+     * 2 or 3 cycles + branch if the high word doesn't match, 6 + branch
+     * in the worst case (the 64-bit values are equal).
+     */
+    /* cmp-long vAA, vBB, vCC */
+    cmp     r1, r3                      @ compare (vBB+1, vCC+1)
+    blt     .L${opcode}_less            @ signed compare on high part
+    bgt     .L${opcode}_greater
+    subs    r0, r0, r2                  @ r0<- r0 - r2
+    bxeq     lr
+    bhi     .L${opcode}_greater         @ unsigned compare on low part
+.L${opcode}_less:
+    mvn     r0, #0                      @ r0<- -1
+    bx      lr
+.L${opcode}_greater:
+    mov     r0, #1                      @ r0<- 1
+    bx      lr
+
diff --git a/vm/compiler/template/armv5te/TEMPLATE_INVOKE_METHOD_CHAIN.S b/vm/compiler/template/armv5te/TEMPLATE_INVOKE_METHOD_CHAIN.S
new file mode 100644
index 000000000..6994f269f
--- /dev/null
+++ b/vm/compiler/template/armv5te/TEMPLATE_INVOKE_METHOD_CHAIN.S
@@ -0,0 +1,54 @@
+    /*
+     * For monomorphic callsite, setup the Dalvik frame and return to the
+     * Thumb code through the link register to transfer control to the callee
+     * method through a dedicated chaining cell.
+     */
+    @ r0 = methodToCall, r1 = returnCell, rPC = dalvikCallsite
+    ldrh    r7, [r0, #offMethod_registersSize]  @ r7<- methodToCall->regsSize
+    ldrh    r2, [r0, #offMethod_outsSize]  @ r2<- methodToCall->outsSize
+    ldr     r9, [rGLUE, #offGlue_interpStackEnd]    @ r9<- interpStackEnd
+    ldr     r8, [rGLUE, #offGlue_pSelfSuspendCount] @ r8<- &suspendCount
+    add     r3, r1, #1  @ Thumb addr is odd
+    SAVEAREA_FROM_FP(r1, rFP)           @ r1<- stack save area
+    sub     r1, r1, r7, lsl #2          @ r1<- newFp (old savearea - regsSize)
+    SAVEAREA_FROM_FP(r10, r1)           @ r10<- stack save area
+    add     r12, lr, #2                 @ setup the punt-to-interp address
+    sub     r10, r10, r2, lsl #2        @ r10<- bottom (newsave - outsSize)
+    ldr     r8, [r8]                    @ r3<- suspendCount (int)
+    cmp     r10, r9                     @ bottom < interpStackEnd?
+    bxlt    r12                         @ return to raise stack overflow excep.
+    @ r1 = newFP, r0 = methodToCall, r3 = returnCell, rPC = dalvikCallsite
+    ldr     r9, [r0, #offMethod_clazz]      @ r9<- method->clazz
+    ldr     r10, [r0, #offMethod_accessFlags] @ r10<- methodToCall->accessFlags
+    str     rPC, [rFP, #(offStackSaveArea_currentPc - sizeofStackSaveArea)]
+    str     rPC, [r1, #(offStackSaveArea_savedPc - sizeofStackSaveArea)]
+    ldr     rPC, [r0, #offMethod_insns]     @ rPC<- methodToCall->insns
+
+
+    @ set up newSaveArea
+    str     rFP, [r1, #(offStackSaveArea_prevFrame - sizeofStackSaveArea)]
+    str     r3, [r1, #(offStackSaveArea_returnAddr - sizeofStackSaveArea)]
+    str     r0, [r1, #(offStackSaveArea_method - sizeofStackSaveArea)]
+    cmp     r8, #0                      @ suspendCount != 0
+    bxne    r12                         @ bail to the interpreter
+    tst     r10, #ACC_NATIVE
+    bne     .LinvokeNative
+    /*
+     * If we want to punt to the interpreter for native call, swap the bne with
+     * the following
+     * bxne    r12
+     */
+
+
+    ldr     r3, [r9, #offClassObject_pDvmDex] @ r3<- method->clazz->pDvmDex
+    ldr     r2, [rGLUE, #offGlue_self]      @ r2<- glue->self
+
+    @ Update "glue" values for the new method
+    str     r0, [rGLUE, #offGlue_method]    @ glue->method = methodToCall
+    str     r3, [rGLUE, #offGlue_methodClassDex] @ glue->methodClassDex = ...
+    mov     rFP, r1                         @ fp = newFp
+    str     rFP, [r2, #offThread_curFrame]  @ self->curFrame = newFp
+
+    bx      lr                              @ return to the callee-chaining cell
+
+
diff --git a/vm/compiler/template/armv5te/TEMPLATE_INVOKE_METHOD_NO_OPT.S b/vm/compiler/template/armv5te/TEMPLATE_INVOKE_METHOD_NO_OPT.S
new file mode 100644
index 000000000..003459dc8
--- /dev/null
+++ b/vm/compiler/template/armv5te/TEMPLATE_INVOKE_METHOD_NO_OPT.S
@@ -0,0 +1,53 @@
+    /*
+     * For polymorphic callsites - setup the Dalvik frame and load Dalvik PC
+     * into rPC then jump to dvmJitToInterpNoChain to dispatch the
+     * runtime-resolved callee.
+     */
+    @ r0 = methodToCall, r1 = returnCell, rPC = dalvikCallsite
+    ldrh    r7, [r0, #offMethod_registersSize]  @ r7<- methodToCall->regsSize
+    ldrh    r2, [r0, #offMethod_outsSize]  @ r2<- methodToCall->outsSize
+    ldr     r9, [rGLUE, #offGlue_interpStackEnd]    @ r9<- interpStackEnd
+    ldr     r8, [rGLUE, #offGlue_pSelfSuspendCount] @ r8<- &suspendCount
+    add     r3, r1, #1  @ Thumb addr is odd
+    SAVEAREA_FROM_FP(r1, rFP)           @ r1<- stack save area
+    sub     r1, r1, r7, lsl #2          @ r1<- newFp (old savearea - regsSize)
+    SAVEAREA_FROM_FP(r10, r1)           @ r10<- stack save area
+    sub     r10, r10, r2, lsl #2        @ r10<- bottom (newsave - outsSize)
+    ldr     r8, [r8]                    @ r3<- suspendCount (int)
+    cmp     r10, r9                     @ bottom < interpStackEnd?
+    bxlt    lr                          @ return to raise stack overflow excep.
+    @ r1 = newFP, r0 = methodToCall, r3 = returnCell, rPC = dalvikCallsite
+    ldr     r9, [r0, #offMethod_clazz]      @ r9<- method->clazz
+    ldr     r10, [r0, #offMethod_accessFlags] @ r10<- methodToCall->accessFlags
+    str     rPC, [rFP, #(offStackSaveArea_currentPc - sizeofStackSaveArea)]
+    str     rPC, [r1, #(offStackSaveArea_savedPc - sizeofStackSaveArea)]
+    ldr     rPC, [r0, #offMethod_insns]     @ rPC<- methodToCall->insns
+
+
+    @ set up newSaveArea
+    str     rFP, [r1, #(offStackSaveArea_prevFrame - sizeofStackSaveArea)]
+    str     r3, [r1, #(offStackSaveArea_returnAddr - sizeofStackSaveArea)]
+    str     r0, [r1, #(offStackSaveArea_method - sizeofStackSaveArea)]
+    cmp     r8, #0                      @ suspendCount != 0
+    bxne    lr                          @ bail to the interpreter
+    tst     r10, #ACC_NATIVE
+    bne     .LinvokeNative
+    /*
+     * If we want to punt to the interpreter for native call, swap the bne with
+     * the following
+     * bxne    lr
+     */
+
+
+    ldr     r10, .LdvmJitToInterpNoChain
+    ldr     r3, [r9, #offClassObject_pDvmDex] @ r3<- method->clazz->pDvmDex
+    ldr     r2, [rGLUE, #offGlue_self]      @ r2<- glue->self
+
+    @ Update "glue" values for the new method
+    str     r0, [rGLUE, #offGlue_method]    @ glue->method = methodToCall
+    str     r3, [rGLUE, #offGlue_methodClassDex] @ glue->methodClassDex = ...
+    mov     rFP, r1                         @ fp = newFp
+    str     rFP, [r2, #offThread_curFrame]  @ self->curFrame = newFp
+
+    @ Start executing the callee
+    mov     pc, r10                         @ dvmJitToInterpNoChain
diff --git a/vm/compiler/template/armv5te/TEMPLATE_MUL_LONG.S b/vm/compiler/template/armv5te/TEMPLATE_MUL_LONG.S
new file mode 100644
index 000000000..8a9b11574
--- /dev/null
+++ b/vm/compiler/template/armv5te/TEMPLATE_MUL_LONG.S
@@ -0,0 +1,28 @@
+    /*
+     * Signed 64-bit integer multiply.
+     *
+     * For JIT: op1 in r0/r1, op2 in r2/r3, return in r0/r1
+     *
+     * Consider WXxYZ (r1r0 x r3r2) with a long multiply:
+     *        WX
+     *      x YZ
+     *  --------
+     *     ZW ZX
+     *  YW YX
+     *
+     * The low word of the result holds ZX, the high word holds
+     * (ZW+YX) + (the high overflow from ZX).  YW doesn't matter because
+     * it doesn't fit in the low 64 bits.
+     *
+     * Unlike most ARM math operations, multiply instructions have
+     * restrictions on using the same register more than once (Rd and Rm
+     * cannot be the same).
+     */
+    /* mul-long vAA, vBB, vCC */
+    mul     ip, r2, r1                  @  ip<- ZxW
+    umull   r9, r10, r2, r0             @  r9/r10 <- ZxX
+    mla     r2, r0, r3, ip              @  r2<- YxX + (ZxW)
+    add     r10, r2, r10                @  r10<- r10 + low(ZxW + (YxX))
+    mov     r0,r9
+    mov     r1,r10
+    bx      lr
diff --git a/vm/compiler/template/armv5te/TEMPLATE_RETURN.S b/vm/compiler/template/armv5te/TEMPLATE_RETURN.S
new file mode 100644
index 000000000..f0a4623a0
--- /dev/null
+++ b/vm/compiler/template/armv5te/TEMPLATE_RETURN.S
@@ -0,0 +1,38 @@
+    /*
+     * Unwind a frame from the Dalvik stack for compiled OP_RETURN_XXX.
+     * If the stored value in returnAddr
+     * is non-zero, the caller is compiled by the JIT thus return to the
+     * address in the code cache following the invoke instruction. Otherwise
+     * return to the special dvmJitToInterpNoChain entry point.
+     */
+    SAVEAREA_FROM_FP(r0, rFP)           @ r0<- saveArea (old)
+    ldr     r10, [r0, #offStackSaveArea_prevFrame] @ r10<- saveArea->prevFrame
+    ldr     r8, [rGLUE, #offGlue_pSelfSuspendCount] @ r8<- &suspendCount
+    ldr     rPC, [r0, #offStackSaveArea_savedPc] @ rPC<- saveArea->savedPc
+    ldr     r9,  [r0, #offStackSaveArea_returnAddr] @ r9<- chaining cell ret
+    ldr     r2, [r10, #(offStackSaveArea_method - sizeofStackSaveArea)]
+                                        @ r2<- method we're returning to
+    ldr     r3, [rGLUE, #offGlue_self]  @ r3<- glue->self
+    cmp     r2, #0                      @ break frame?
+    beq     1f                          @ bail to interpreter
+    ldr     r0, .LdvmJitToInterpNoChain @ defined in footer.S
+    mov     rFP, r10                    @ publish new FP
+    ldrne   r10, [r2, #offMethod_clazz] @ r10<- method->clazz
+    ldr     r8, [r8]                    @ r8<- suspendCount
+
+    str     r2, [rGLUE, #offGlue_method]@ glue->method = newSave->method
+    ldr     r1, [r10, #offClassObject_pDvmDex] @ r1<- method->clazz->pDvmDex
+    str     rFP, [r3, #offThread_curFrame] @ self->curFrame = fp
+    add     rPC, rPC, #6                @ publish new rPC (advance 6 bytes)
+    str     r1, [rGLUE, #offGlue_methodClassDex]
+    cmp     r8, #0                      @ check the suspendCount
+    movne   r9, #0                      @ clear the chaining cell address
+    cmp     r9, #0                      @ chaining cell exists?
+    blxne   r9                          @ jump to the chaining cell
+    mov     pc, r0                      @ callsite is interpreted
+1:
+    stmia   rGLUE, {rPC, rFP}           @ SAVE_PC_FP_TO_GLUE()
+    ldr     r2, .LdvmMterpStdBail       @ defined in footer.S
+    mov     r1, #0                      @ changeInterp = false
+    mov     r0, rGLUE                   @ Expecting rGLUE in r0
+    blx     r2                          @ exit the interpreter
diff --git a/vm/compiler/template/armv5te/TEMPLATE_SHL_LONG.S b/vm/compiler/template/armv5te/TEMPLATE_SHL_LONG.S
new file mode 100644
index 000000000..532f8a47a
--- /dev/null
+++ b/vm/compiler/template/armv5te/TEMPLATE_SHL_LONG.S
@@ -0,0 +1,15 @@
+    /*
+     * Long integer shift.  This is different from the generic 32/64-bit
+     * binary operations because vAA/vBB are 64-bit but vCC (the shift
+     * distance) is 32-bit.  Also, Dalvik requires us to ignore all but the low
+     * 6 bits.
+     */
+    /* shl-long vAA, vBB, vCC */
+    and     r2, r2, #63                 @ r2<- r2 & 0x3f
+    mov     r1, r1, asl r2              @  r1<- r1 << r2
+    rsb     r3, r2, #32                 @  r3<- 32 - r2
+    orr     r1, r1, r0, lsr r3          @  r1<- r1 | (r0 << (32-r2))
+    subs    ip, r2, #32                 @  ip<- r2 - 32
+    movpl   r1, r0, asl ip              @  if r2 >= 32, r1<- r0 << (r2-32)
+    mov     r0, r0, asl r2              @  r0<- r0 << r2
+    bx      lr
diff --git a/vm/compiler/template/armv5te/TEMPLATE_SHR_LONG.S b/vm/compiler/template/armv5te/TEMPLATE_SHR_LONG.S
new file mode 100644
index 000000000..ca7545a18
--- /dev/null
+++ b/vm/compiler/template/armv5te/TEMPLATE_SHR_LONG.S
@@ -0,0 +1,16 @@
+    /*
+     * Long integer shift.  This is different from the generic 32/64-bit
+     * binary operations because vAA/vBB are 64-bit but vCC (the shift
+     * distance) is 32-bit.  Also, Dalvik requires us to ignore all but the low
+     * 6 bits.
+     */
+    /* shr-long vAA, vBB, vCC */
+    and     r2, r2, #63                 @ r0<- r0 & 0x3f
+    mov     r0, r0, lsr r2              @  r0<- r2 >> r2
+    rsb     r3, r2, #32                 @  r3<- 32 - r2
+    orr     r0, r0, r1, asl r3          @  r0<- r0 | (r1 << (32-r2))
+    subs    ip, r2, #32                 @  ip<- r2 - 32
+    movpl   r0, r1, asr ip              @  if r2 >= 32, r0<-r1 >> (r2-32)
+    mov     r1, r1, asr r2              @  r1<- r1 >> r2
+    bx      lr
+
diff --git a/vm/compiler/template/armv5te/TEMPLATE_USHR_LONG.S b/vm/compiler/template/armv5te/TEMPLATE_USHR_LONG.S
new file mode 100644
index 000000000..d7c71d9df
--- /dev/null
+++ b/vm/compiler/template/armv5te/TEMPLATE_USHR_LONG.S
@@ -0,0 +1,16 @@
+    /*
+     * Long integer shift.  This is different from the generic 32/64-bit
+     * binary operations because vAA/vBB are 64-bit but vCC (the shift
+     * distance) is 32-bit.  Also, Dalvik requires us to ignore all but the low
+     * 6 bits.
+     */
+    /* ushr-long vAA, vBB, vCC */
+    and     r2, r2, #63                 @ r0<- r0 & 0x3f
+    mov     r0, r0, lsr r2              @  r0<- r2 >> r2
+    rsb     r3, r2, #32                 @  r3<- 32 - r2
+    orr     r0, r0, r1, asl r3          @  r0<- r0 | (r1 << (32-r2))
+    subs    ip, r2, #32                 @  ip<- r2 - 32
+    movpl   r0, r1, lsr ip              @  if r2 >= 32, r0<-r1 >>> (r2-32)
+    mov     r1, r1, lsr r2              @  r1<- r1 >>> r2
+    bx      lr
+
diff --git a/vm/compiler/template/armv5te/TemplateOpList.h b/vm/compiler/template/armv5te/TemplateOpList.h
new file mode 100644
index 000000000..6428ccf12
--- /dev/null
+++ b/vm/compiler/template/armv5te/TemplateOpList.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Dalvik opcode list that uses additional templates to complete JIT execution.
+ */
+#ifndef JIT_TEMPLATE
+#define JIT_TEMPLATE(X)
+#endif
+
+JIT_TEMPLATE(CMP_LONG)
+JIT_TEMPLATE(RETURN)
+JIT_TEMPLATE(INVOKE_METHOD_NO_OPT)
+JIT_TEMPLATE(INVOKE_METHOD_CHAIN)
+JIT_TEMPLATE(CMPG_DOUBLE)
+JIT_TEMPLATE(CMPL_DOUBLE)
+JIT_TEMPLATE(CMPG_FLOAT)
+JIT_TEMPLATE(CMPL_FLOAT)
+JIT_TEMPLATE(MUL_LONG)
+JIT_TEMPLATE(SHL_LONG)
+JIT_TEMPLATE(SHR_LONG)
+JIT_TEMPLATE(USHR_LONG)
diff --git a/vm/compiler/template/armv5te/footer.S b/vm/compiler/template/armv5te/footer.S
new file mode 100644
index 000000000..e961e298b
--- /dev/null
+++ b/vm/compiler/template/armv5te/footer.S
@@ -0,0 +1,58 @@
+/*
+ * ===========================================================================
+ *  Common subroutines and data
+ * ===========================================================================
+ */
+
+    .text
+    .align  2
+.LinvokeNative:
+    @ Prep for the native call
+    @ r1 = newFP, r0 = methodToCall
+    ldr     r3, [rGLUE, #offGlue_self]      @ r3<- glue->self
+    ldr     r9, [r3, #offThread_jniLocal_nextEntry] @ r9<- thread->refNext
+    str     r1, [r3, #offThread_curFrame]   @ self->curFrame = newFp
+    str     r9, [r1, #(offStackSaveArea_localRefTop - sizeofStackSaveArea)]
+                                        @ newFp->localRefTop=refNext
+    mov     r9, r3                      @ r9<- glue->self (preserve)
+    SAVEAREA_FROM_FP(r10, r1)           @ r10<- new stack save area
+
+    mov     r2, r0                      @ r2<- methodToCall
+    mov     r0, r1                      @ r0<- newFP
+    add     r1, rGLUE, #offGlue_retval  @ r1<- &retval
+
+    LDR_PC_LR "[r2, #offMethod_nativeFunc]"
+
+    @ native return; r9=self, r10=newSaveArea
+    @ equivalent to dvmPopJniLocals
+    ldr     r2, [r10, #offStackSaveArea_returnAddr] @ r2 = chaining cell ret
+    ldr     r0, [r10, #offStackSaveArea_localRefTop] @ r0<- newSave->localRefTop
+    ldr     r1, [r9, #offThread_exception] @ check for exception
+    str     rFP, [r9, #offThread_curFrame]  @ self->curFrame = fp
+    cmp     r1, #0                      @ null?
+    str     r0, [r9, #offThread_jniLocal_nextEntry] @ self->refNext<- r0
+    bne     .LhandleException             @ no, handle exception
+    bx      r2
+
+/* FIXME - untested */
+.LhandleException:
+    ldr     rIBASE, .LdvmAsmInstructionStart
+    ldr     rPC, [r10, #offStackSaveArea_savedPc] @ reload rPC
+    b       dvmMterpCommonExceptionThrown
+
+    .align  2
+.LdvmAsmInstructionStart:
+    .word   dvmAsmInstructionStart
+.LdvmJitToInterpNoChain:
+    .word   dvmJitToInterpNoChain
+.LdvmMterpStdBail:
+    .word   dvmMterpStdBail
+.L__aeabi_cdcmple:
+    .word   __aeabi_cdcmple
+.L__aeabi_cfcmple:
+    .word   __aeabi_cfcmple
+
+    .global dmvCompilerTemplateEnd
+dmvCompilerTemplateEnd:
+
+#endif /* WITH_JIT */
diff --git a/vm/compiler/template/armv5te/header.S b/vm/compiler/template/armv5te/header.S
new file mode 100644
index 000000000..65daf8d73
--- /dev/null
+++ b/vm/compiler/template/armv5te/header.S
@@ -0,0 +1,93 @@
+/*
+ * Copyright (C) 2008 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#if defined(WITH_JIT)
+
+/*
+ * ARMv5 definitions and declarations.
+ */
+
+/*
+ARM EABI general notes:
+
+r0-r3 hold first 4 args to a method; they are not preserved across method calls
+r4-r8 are available for general use
+r9 is given special treatment in some situations, but not for us
+r10 (sl) seems to be generally available
+r11 (fp) is used by gcc (unless -fomit-frame-pointer is set)
+r12 (ip) is scratch -- not preserved across method calls
+r13 (sp) should be managed carefully in case a signal arrives
+r14 (lr) must be preserved
+r15 (pc) can be tinkered with directly
+
+r0 holds returns of <= 4 bytes
+r0-r1 hold returns of 8 bytes, low word in r0
+
+Callee must save/restore r4+ (except r12) if it modifies them.
+
+Stack is "full descending".  Only the arguments that don't fit in the first 4
+registers are placed on the stack.  "sp" points at the first stacked argument
+(i.e. the 5th arg).
+
+VFP: single-precision results in s0, double-precision results in d0.
+
+In the EABI, "sp" must be 64-bit aligned on entry to a function, and any
+64-bit quantities (long long, double) must be 64-bit aligned.
+*/
+
+/*
+JIT and ARM notes:
+
+The following registers have fixed assignments:
+
+  reg nick      purpose
+  r5  rFP       interpreted frame pointer, used for accessing locals and args
+  r6  rGLUE     MterpGlue pointer
+
+The following registers have fixed assignments in mterp but are scratch
+registers in compiled code
+
+  reg nick      purpose
+  r4  rPC       interpreted program counter, used for fetching instructions
+  r7  rIBASE    interpreted instruction base pointer, used for computed goto
+  r8  rINST     first 16-bit code unit of current instruction
+
+Macros are provided for common operations.  Each macro MUST emit only
+one instruction to make instruction-counting easier.  They MUST NOT alter
+unspecified registers or condition codes.
+*/
+
+/* single-purpose registers, given names for clarity */
+#define rPC     r4
+#define rFP     r5
+#define rGLUE   r6
+#define rIBASE  r7
+#define rINST   r8
+
+/*
+ * Given a frame pointer, find the stack save area.
+ *
+ * In C this is "((StackSaveArea*)(_fp) -1)".
+ */
+#define SAVEAREA_FROM_FP(_reg, _fpreg) \
+    sub     _reg, _fpreg, #sizeofStackSaveArea
+
+/*
+ * This is a #include, not a %include, because we want the C pre-processor
+ * to expand the macros into assembler assignment statements.
+ */
+#include "../../../mterp/common/asm-constants.h"
+
diff --git a/vm/compiler/template/armv5te/platform.S b/vm/compiler/template/armv5te/platform.S
new file mode 100644
index 000000000..b960a9364
--- /dev/null
+++ b/vm/compiler/template/armv5te/platform.S
@@ -0,0 +1,16 @@
+/*
+ * ===========================================================================
+ *  CPU-version-specific defines
+ * ===========================================================================
+ */
+
+/*
+ * Macro for "MOV LR,PC / LDR PC,xxx", which is not allowed pre-ARMv5.
+ * Jump to subroutine.
+ *
+ * May modify IP and LR.
+ */
+.macro  LDR_PC_LR source
+    mov     lr, pc
+    ldr     pc, \source
+.endm
diff --git a/vm/compiler/template/config-armv5te b/vm/compiler/template/config-armv5te
new file mode 100644
index 000000000..668df1b92
--- /dev/null
+++ b/vm/compiler/template/config-armv5te
@@ -0,0 +1,45 @@
+# Copyright (C) 2009 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#
+# Configuration for ARMv5TE architecture targets.
+#
+
+# file header and basic definitions
+#import c/header.c
+import armv5te/header.S
+
+# C pre-processor defines for stub C instructions
+#import cstubs/stubdefs.c
+
+# highly-platform-specific defs
+import armv5te/platform.S
+
+# common defs for the C helpers; include this before the instruction handlers
+#import c/opcommon.c
+
+# opcode list; argument to op-start is default directory
+op-start armv5te
+
+op-end
+
+# "helper" code for C; include if you use any of the C stubs (this generates
+# object code, so it's normally excluded)
+##import c/gotoTargets.c
+
+# end of defs; include this when cstubs/stubdefs.c is included
+#import cstubs/enddefs.c
+
+# common subroutines for asm
+import armv5te/footer.S
diff --git a/vm/compiler/template/gen-template.py b/vm/compiler/template/gen-template.py
new file mode 100755
index 000000000..8a1ba0cb9
--- /dev/null
+++ b/vm/compiler/template/gen-template.py
@@ -0,0 +1,422 @@
+#!/usr/bin/env python
+#
+# Copyright (C) 2007 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#
+# Using instructions from an architecture-specific config file, generate C
+# and assembly source files for the Dalvik JIT.
+#
+
+import sys, string, re, time
+from string import Template
+
+interp_defs_file = "TemplateOpList.h" # need opcode list
+
+handler_size_bits = -1000
+handler_size_bytes = -1000
+in_op_start = 0             # 0=not started, 1=started, 2=ended
+default_op_dir = None
+opcode_locations = {}
+asm_stub_text = []
+label_prefix = ".L"         # use ".L" to hide labels from gdb
+
+
+# Exception class.
+class DataParseError(SyntaxError):
+    "Failure when parsing data file"
+
+#
+# Set any omnipresent substitution values.
+#
+def getGlobalSubDict():
+    return { "handler_size_bits":handler_size_bits,
+             "handler_size_bytes":handler_size_bytes }
+
+#
+# Parse arch config file --
+# Set handler_size_bytes to the value of tokens[1], and handler_size_bits to
+# log2(handler_size_bytes).  Throws an exception if "bytes" is not a power
+# of two.
+#
+def setHandlerSize(tokens):
+    global handler_size_bits, handler_size_bytes
+    if len(tokens) != 2:
+        raise DataParseError("handler-size requires one argument")
+    if handler_size_bits != -1000:
+        raise DataParseError("handler-size may only be set once")
+
+    # compute log2(n), and make sure n is a power of 2
+    handler_size_bytes = bytes = int(tokens[1])
+    bits = -1
+    while bytes > 0:
+        bytes //= 2     # halve with truncating division
+        bits += 1
+
+    if handler_size_bytes == 0 or handler_size_bytes != (1 << bits):
+        raise DataParseError("handler-size (%d) must be power of 2 and > 0" \
+                % orig_bytes)
+    handler_size_bits = bits
+
+#
+# Parse arch config file --
+# Copy a file in to the C or asm output file.
+#
+def importFile(tokens):
+    if len(tokens) != 2:
+        raise DataParseError("import requires one argument")
+    source = tokens[1]
+    if source.endswith(".S"):
+        appendSourceFile(tokens[1], getGlobalSubDict(), asm_fp, None)
+    else:
+        raise DataParseError("don't know how to import %s (expecting .c/.S)"
+                % source)
+
+#
+# Parse arch config file --
+# Copy a file in to the C or asm output file.
+#
+def setAsmStub(tokens):
+    global asm_stub_text
+    if len(tokens) != 2:
+        raise DataParseError("import requires one argument")
+    try:
+        stub_fp = open(tokens[1])
+        asm_stub_text = stub_fp.readlines()
+    except IOError, err:
+        stub_fp.close()
+        raise DataParseError("unable to load asm-stub: %s" % str(err))
+    stub_fp.close()
+
+#
+# Parse arch config file --
+# Start of opcode list.
+#
+def opStart(tokens):
+    global in_op_start
+    global default_op_dir
+    if len(tokens) != 2:
+        raise DataParseError("opStart takes a directory name argument")
+    if in_op_start != 0:
+        raise DataParseError("opStart can only be specified once")
+    default_op_dir = tokens[1]
+    in_op_start = 1
+
+#
+# Parse arch config file --
+# Set location of a single opcode's source file.
+#
+def opEntry(tokens):
+    #global opcode_locations
+    if len(tokens) != 3:
+        raise DataParseError("op requires exactly two arguments")
+    if in_op_start != 1:
+        raise DataParseError("op statements must be between opStart/opEnd")
+    try:
+        index = opcodes.index(tokens[1])
+    except ValueError:
+        raise DataParseError("unknown opcode %s" % tokens[1])
+    opcode_locations[tokens[1]] = tokens[2]
+
+#
+# Parse arch config file --
+# End of opcode list; emit instruction blocks.
+#
+def opEnd(tokens):
+    global in_op_start
+    if len(tokens) != 1:
+        raise DataParseError("opEnd takes no arguments")
+    if in_op_start != 1:
+        raise DataParseError("opEnd must follow opStart, and only appear once")
+    in_op_start = 2
+
+    loadAndEmitOpcodes()
+
+
+#
+# Extract an ordered list of instructions from the VM sources.  We use the
+# "goto table" definition macro, which has exactly 256 entries.
+#
+def getOpcodeList():
+    opcodes = []
+    opcode_fp = open("%s/%s" % (target_arch, interp_defs_file))
+    opcode_re = re.compile(r"^JIT_TEMPLATE\((\w+)\)", re.DOTALL)
+    for line in opcode_fp:
+        match = opcode_re.match(line)
+        if not match:
+            continue
+        opcodes.append("TEMPLATE_" + match.group(1))
+    opcode_fp.close()
+
+    return opcodes
+
+
+#
+# Load and emit opcodes for all 256 instructions.
+#
+def loadAndEmitOpcodes():
+    sister_list = []
+
+    # point dvmAsmInstructionStart at the first handler or stub
+    asm_fp.write("\n    .global dvmCompilerTemplateStart\n")
+    asm_fp.write("    .type   dvmCompilerTemplateStart, %function\n")
+    asm_fp.write("    .text\n\n")
+    asm_fp.write("dvmCompilerTemplateStart:\n\n")
+
+    for i in xrange(len(opcodes)):
+        op = opcodes[i]
+
+        if opcode_locations.has_key(op):
+            location = opcode_locations[op]
+        else:
+            location = default_op_dir
+
+        loadAndEmitAsm(location, i, sister_list)
+
+    # Use variable sized handlers now
+    # asm_fp.write("\n    .balign %d\n" % handler_size_bytes)
+    asm_fp.write("    .size   dvmCompilerTemplateStart, .-dvmCompilerTemplateStart\n")
+
+#
+# Load an assembly fragment and emit it.
+#
+def loadAndEmitAsm(location, opindex, sister_list):
+    op = opcodes[opindex]
+    source = "%s/%s.S" % (location, op)
+    dict = getGlobalSubDict()
+    dict.update({ "opcode":op, "opnum":opindex })
+    print " emit %s --> asm" % source
+
+    emitAsmHeader(asm_fp, dict)
+    appendSourceFile(source, dict, asm_fp, sister_list)
+
+#
+# Output the alignment directive and label for an assembly piece.
+#
+def emitAsmHeader(outfp, dict):
+    outfp.write("/* ------------------------------ */\n")
+    # The alignment directive ensures that the handler occupies
+    # at least the correct amount of space.  We don't try to deal
+    # with overflow here.
+    outfp.write("    .balign 4\n")
+    # Emit a label so that gdb will say the right thing.  We prepend an
+    # underscore so the symbol name doesn't clash with the OpCode enum.
+    template_name = "dvmCompiler_%(opcode)s" % dict
+    outfp.write("    .global %s\n" % template_name);
+    outfp.write("%s:\n" % template_name);
+
+#
+# Output a generic instruction stub that updates the "glue" struct and
+# calls the C implementation.
+#
+def emitAsmStub(outfp, dict):
+    emitAsmHeader(outfp, dict)
+    for line in asm_stub_text:
+        templ = Template(line)
+        outfp.write(templ.substitute(dict))
+
+#
+# Append the file specified by "source" to the open "outfp".  Each line will
+# be template-replaced using the substitution dictionary "dict".
+#
+# If the first line of the file starts with "%" it is taken as a directive.
+# A "%include" line contains a filename and, optionally, a Python-style
+# dictionary declaration with substitution strings.  (This is implemented
+# with recursion.)
+#
+# If "sister_list" is provided, and we find a line that contains only "&",
+# all subsequent lines from the file will be appended to sister_list instead
+# of copied to the output.
+#
+# This may modify "dict".
+#
+def appendSourceFile(source, dict, outfp, sister_list):
+    outfp.write("/* File: %s */\n" % source)
+    infp = open(source, "r")
+    in_sister = False
+    for line in infp:
+        if line.startswith("%include"):
+            # Parse the "include" line
+            tokens = line.strip().split(' ', 2)
+            if len(tokens) < 2:
+                raise DataParseError("malformed %%include in %s" % source)
+
+            alt_source = tokens[1].strip("\"")
+            if alt_source == source:
+                raise DataParseError("self-referential %%include in %s"
+                        % source)
+
+            new_dict = dict.copy()
+            if len(tokens) == 3:
+                new_dict.update(eval(tokens[2]))
+            #print " including src=%s dict=%s" % (alt_source, new_dict)
+            appendSourceFile(alt_source, new_dict, outfp, sister_list)
+            continue
+
+        elif line.startswith("%default"):
+            # copy keywords into dictionary
+            tokens = line.strip().split(' ', 1)
+            if len(tokens) < 2:
+                raise DataParseError("malformed %%default in %s" % source)
+            defaultValues = eval(tokens[1])
+            for entry in defaultValues:
+                dict.setdefault(entry, defaultValues[entry])
+            continue
+
+        elif line.startswith("%verify"):
+            # more to come, someday
+            continue
+
+        elif line.startswith("%break") and sister_list != None:
+            # allow more than one %break, ignoring all following the first
+            if not in_sister:
+                in_sister = True
+                sister_list.append("\n/* continuation for %(opcode)s */\n"%dict)
+            continue
+
+        # perform keyword substitution if a dictionary was provided
+        if dict != None:
+            templ = Template(line)
+            try:
+                subline = templ.substitute(dict)
+            except KeyError, err:
+                raise DataParseError("keyword substitution failed in %s: %s"
+                        % (source, str(err)))
+            except:
+                print "ERROR: substitution failed: " + line
+                raise
+        else:
+            subline = line
+
+        # write output to appropriate file
+        if in_sister:
+            sister_list.append(subline)
+        else:
+            outfp.write(subline)
+    outfp.write("\n")
+    infp.close()
+
+#
+# Emit a C-style section header comment.
+#
+def emitSectionComment(str, fp):
+    equals = "========================================" \
+             "==================================="
+
+    fp.write("\n/*\n * %s\n *  %s\n * %s\n */\n" %
+        (equals, str, equals))
+
+
+#
+# ===========================================================================
+# "main" code
+#
+
+#
+# Check args.
+#
+if len(sys.argv) != 3:
+    print "Usage: %s target-arch output-dir" % sys.argv[0]
+    sys.exit(2)
+
+target_arch = sys.argv[1]
+output_dir = sys.argv[2]
+
+#
+# Extract opcode list.
+#
+opcodes = getOpcodeList()
+#for op in opcodes:
+#    print "  %s" % op
+
+#
+# Open config file.
+#
+try:
+    config_fp = open("config-%s" % target_arch)
+except:
+    print "Unable to open config file 'config-%s'" % target_arch
+    sys.exit(1)
+
+#
+# Open and prepare output files.
+#
+try:
+    asm_fp = open("%s/CompilerTemplateAsm-%s.S" % (output_dir, target_arch), "w")
+except:
+    print "Unable to open output files"
+    print "Make sure directory '%s' exists and existing files are writable" \
+            % output_dir
+    # Ideally we'd remove the files to avoid confusing "make", but if they
+    # failed to open we probably won't be able to remove them either.
+    sys.exit(1)
+
+print "Generating %s" % (asm_fp.name)
+
+file_header = """/*
+ * This file was generated automatically by gen-template.py for '%s'.
+ *
+ * --> DO NOT EDIT <--
+ */
+
+""" % (target_arch)
+
+asm_fp.write(file_header)
+
+#
+# Process the config file.
+#
+failed = False
+try:
+    for line in config_fp:
+        line = line.strip()         # remove CRLF, leading spaces
+        tokens = line.split(' ')    # tokenize
+        #print "%d: %s" % (len(tokens), tokens)
+        if len(tokens[0]) == 0:
+            #print "  blank"
+            pass
+        elif tokens[0][0] == '#':
+            #print "  comment"
+            pass
+        else:
+            if tokens[0] == "handler-size":
+                setHandlerSize(tokens)
+            elif tokens[0] == "import":
+                importFile(tokens)
+            elif tokens[0] == "asm-stub":
+                setAsmStub(tokens)
+            elif tokens[0] == "op-start":
+                opStart(tokens)
+            elif tokens[0] == "op-end":
+                opEnd(tokens)
+            elif tokens[0] == "op":
+                opEntry(tokens)
+            else:
+                raise DataParseError, "unrecognized command '%s'" % tokens[0]
+except DataParseError, err:
+    print "Failed: " + str(err)
+    # TODO: remove output files so "make" doesn't get confused
+    failed = True
+    asm_fp.close()
+    c_fp = asm_fp = None
+
+config_fp.close()
+
+#
+# Done!
+#
+if asm_fp:
+    asm_fp.close()
+
+sys.exit(failed)
diff --git a/vm/compiler/template/out/CompilerTemplateAsm-armv5te.S b/vm/compiler/template/out/CompilerTemplateAsm-armv5te.S
new file mode 100644
index 000000000..59aa7906f
--- /dev/null
+++ b/vm/compiler/template/out/CompilerTemplateAsm-armv5te.S
@@ -0,0 +1,703 @@
+/*
+ * This file was generated automatically by gen-template.py for 'armv5te'.
+ *
+ * --> DO NOT EDIT <--
+ */
+
+/* File: armv5te/header.S */
+/*
+ * Copyright (C) 2008 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#if defined(WITH_JIT)
+
+/*
+ * ARMv5 definitions and declarations.
+ */
+
+/*
+ARM EABI general notes:
+
+r0-r3 hold first 4 args to a method; they are not preserved across method calls
+r4-r8 are available for general use
+r9 is given special treatment in some situations, but not for us
+r10 (sl) seems to be generally available
+r11 (fp) is used by gcc (unless -fomit-frame-pointer is set)
+r12 (ip) is scratch -- not preserved across method calls
+r13 (sp) should be managed carefully in case a signal arrives
+r14 (lr) must be preserved
+r15 (pc) can be tinkered with directly
+
+r0 holds returns of <= 4 bytes
+r0-r1 hold returns of 8 bytes, low word in r0
+
+Callee must save/restore r4+ (except r12) if it modifies them.
+
+Stack is "full descending".  Only the arguments that don't fit in the first 4
+registers are placed on the stack.  "sp" points at the first stacked argument
+(i.e. the 5th arg).
+
+VFP: single-precision results in s0, double-precision results in d0.
+
+In the EABI, "sp" must be 64-bit aligned on entry to a function, and any
+64-bit quantities (long long, double) must be 64-bit aligned.
+*/
+
+/*
+JIT and ARM notes:
+
+The following registers have fixed assignments:
+
+  reg nick      purpose
+  r5  rFP       interpreted frame pointer, used for accessing locals and args
+  r6  rGLUE     MterpGlue pointer
+
+The following registers have fixed assignments in mterp but are scratch
+registers in compiled code
+
+  reg nick      purpose
+  r4  rPC       interpreted program counter, used for fetching instructions
+  r7  rIBASE    interpreted instruction base pointer, used for computed goto
+  r8  rINST     first 16-bit code unit of current instruction
+
+Macros are provided for common operations.  Each macro MUST emit only
+one instruction to make instruction-counting easier.  They MUST NOT alter
+unspecified registers or condition codes.
+*/
+
+/* single-purpose registers, given names for clarity */
+#define rPC     r4
+#define rFP     r5
+#define rGLUE   r6
+#define rIBASE  r7
+#define rINST   r8
+
+/*
+ * Given a frame pointer, find the stack save area.
+ *
+ * In C this is "((StackSaveArea*)(_fp) -1)".
+ */
+#define SAVEAREA_FROM_FP(_reg, _fpreg) \
+    sub     _reg, _fpreg, #sizeofStackSaveArea
+
+/*
+ * This is a #include, not a %include, because we want the C pre-processor
+ * to expand the macros into assembler assignment statements.
+ */
+#include "../../../mterp/common/asm-constants.h"
+
+
+/* File: armv5te/platform.S */
+/*
+ * ===========================================================================
+ *  CPU-version-specific defines
+ * ===========================================================================
+ */
+
+/*
+ * Macro for "MOV LR,PC / LDR PC,xxx", which is not allowed pre-ARMv5.
+ * Jump to subroutine.
+ *
+ * May modify IP and LR.
+ */
+.macro  LDR_PC_LR source
+    mov     lr, pc
+    ldr     pc, \source
+.endm
+
+
+    .global dvmCompilerTemplateStart
+    .type   dvmCompilerTemplateStart, %function
+    .text
+
+dvmCompilerTemplateStart:
+
+/* ------------------------------ */
+    .balign 4
+    .global dvmCompiler_TEMPLATE_CMP_LONG
+dvmCompiler_TEMPLATE_CMP_LONG:
+/* File: armv5te/TEMPLATE_CMP_LONG.S */
+    /*
+     * Compare two 64-bit values.  Puts 0, 1, or -1 into the destination
+     * register based on the results of the comparison.
+     *
+     * We load the full values with LDM, but in practice many values could
+     * be resolved by only looking at the high word.  This could be made
+     * faster or slower by splitting the LDM into a pair of LDRs.
+     *
+     * If we just wanted to set condition flags, we could do this:
+     *  subs    ip, r0, r2
+     *  sbcs    ip, r1, r3
+     *  subeqs  ip, r0, r2
+     * Leaving { <0, 0, >0 } in ip.  However, we have to set it to a specific
+     * integer value, which we can do with 2 conditional mov/mvn instructions
+     * (set 1, set -1; if they're equal we already have 0 in ip), giving
+     * us a constant 5-cycle path plus a branch at the end to the
+     * instruction epilogue code.  The multi-compare approach below needs
+     * 2 or 3 cycles + branch if the high word doesn't match, 6 + branch
+     * in the worst case (the 64-bit values are equal).
+     */
+    /* cmp-long vAA, vBB, vCC */
+    cmp     r1, r3                      @ compare (vBB+1, vCC+1)
+    blt     .LTEMPLATE_CMP_LONG_less            @ signed compare on high part
+    bgt     .LTEMPLATE_CMP_LONG_greater
+    subs    r0, r0, r2                  @ r0<- r0 - r2
+    bxeq     lr
+    bhi     .LTEMPLATE_CMP_LONG_greater         @ unsigned compare on low part
+.LTEMPLATE_CMP_LONG_less:
+    mvn     r0, #0                      @ r0<- -1
+    bx      lr
+.LTEMPLATE_CMP_LONG_greater:
+    mov     r0, #1                      @ r0<- 1
+    bx      lr
+
+
+/* ------------------------------ */
+    .balign 4
+    .global dvmCompiler_TEMPLATE_RETURN
+dvmCompiler_TEMPLATE_RETURN:
+/* File: armv5te/TEMPLATE_RETURN.S */
+    /*
+     * Unwind a frame from the Dalvik stack for compiled OP_RETURN_XXX.
+     * If the stored value in returnAddr
+     * is non-zero, the caller is compiled by the JIT thus return to the
+     * address in the code cache following the invoke instruction. Otherwise
+     * return to the special dvmJitToInterpNoChain entry point.
+     */
+    SAVEAREA_FROM_FP(r0, rFP)           @ r0<- saveArea (old)
+    ldr     r10, [r0, #offStackSaveArea_prevFrame] @ r10<- saveArea->prevFrame
+    ldr     r8, [rGLUE, #offGlue_pSelfSuspendCount] @ r8<- &suspendCount
+    ldr     rPC, [r0, #offStackSaveArea_savedPc] @ rPC<- saveArea->savedPc
+    ldr     r9,  [r0, #offStackSaveArea_returnAddr] @ r9<- chaining cell ret
+    ldr     r2, [r10, #(offStackSaveArea_method - sizeofStackSaveArea)]
+                                        @ r2<- method we're returning to
+    ldr     r3, [rGLUE, #offGlue_self]  @ r3<- glue->self
+    cmp     r2, #0                      @ break frame?
+    beq     1f                          @ bail to interpreter
+    ldr     r0, .LdvmJitToInterpNoChain @ defined in footer.S
+    mov     rFP, r10                    @ publish new FP
+    ldrne   r10, [r2, #offMethod_clazz] @ r10<- method->clazz
+    ldr     r8, [r8]                    @ r8<- suspendCount
+
+    str     r2, [rGLUE, #offGlue_method]@ glue->method = newSave->method
+    ldr     r1, [r10, #offClassObject_pDvmDex] @ r1<- method->clazz->pDvmDex
+    str     rFP, [r3, #offThread_curFrame] @ self->curFrame = fp
+    add     rPC, rPC, #6                @ publish new rPC (advance 6 bytes)
+    str     r1, [rGLUE, #offGlue_methodClassDex]
+    cmp     r8, #0                      @ check the suspendCount
+    movne   r9, #0                      @ clear the chaining cell address
+    cmp     r9, #0                      @ chaining cell exists?
+    blxne   r9                          @ jump to the chaining cell
+    mov     pc, r0                      @ callsite is interpreted
+1:
+    stmia   rGLUE, {rPC, rFP}           @ SAVE_PC_FP_TO_GLUE()
+    ldr     r2, .LdvmMterpStdBail       @ defined in footer.S
+    mov     r1, #0                      @ changeInterp = false
+    mov     r0, rGLUE                   @ Expecting rGLUE in r0
+    blx     r2                          @ exit the interpreter
+
+/* ------------------------------ */
+    .balign 4
+    .global dvmCompiler_TEMPLATE_INVOKE_METHOD_NO_OPT
+dvmCompiler_TEMPLATE_INVOKE_METHOD_NO_OPT:
+/* File: armv5te/TEMPLATE_INVOKE_METHOD_NO_OPT.S */
+    /*
+     * For polymorphic callsites - setup the Dalvik frame and load Dalvik PC
+     * into rPC then jump to dvmJitToInterpNoChain to dispatch the
+     * runtime-resolved callee.
+     */
+    @ r0 = methodToCall, r1 = returnCell, rPC = dalvikCallsite
+    ldrh    r7, [r0, #offMethod_registersSize]  @ r7<- methodToCall->regsSize
+    ldrh    r2, [r0, #offMethod_outsSize]  @ r2<- methodToCall->outsSize
+    ldr     r9, [rGLUE, #offGlue_interpStackEnd]    @ r9<- interpStackEnd
+    ldr     r8, [rGLUE, #offGlue_pSelfSuspendCount] @ r8<- &suspendCount
+    add     r3, r1, #1  @ Thumb addr is odd
+    SAVEAREA_FROM_FP(r1, rFP)           @ r1<- stack save area
+    sub     r1, r1, r7, lsl #2          @ r1<- newFp (old savearea - regsSize)
+    SAVEAREA_FROM_FP(r10, r1)           @ r10<- stack save area
+    sub     r10, r10, r2, lsl #2        @ r10<- bottom (newsave - outsSize)
+    ldr     r8, [r8]                    @ r3<- suspendCount (int)
+    cmp     r10, r9                     @ bottom < interpStackEnd?
+    bxlt    lr                          @ return to raise stack overflow excep.
+    @ r1 = newFP, r0 = methodToCall, r3 = returnCell, rPC = dalvikCallsite
+    ldr     r9, [r0, #offMethod_clazz]      @ r9<- method->clazz
+    ldr     r10, [r0, #offMethod_accessFlags] @ r10<- methodToCall->accessFlags
+    str     rPC, [rFP, #(offStackSaveArea_currentPc - sizeofStackSaveArea)]
+    str     rPC, [r1, #(offStackSaveArea_savedPc - sizeofStackSaveArea)]
+    ldr     rPC, [r0, #offMethod_insns]     @ rPC<- methodToCall->insns
+
+
+    @ set up newSaveArea
+    str     rFP, [r1, #(offStackSaveArea_prevFrame - sizeofStackSaveArea)]
+    str     r3, [r1, #(offStackSaveArea_returnAddr - sizeofStackSaveArea)]
+    str     r0, [r1, #(offStackSaveArea_method - sizeofStackSaveArea)]
+    cmp     r8, #0                      @ suspendCount != 0
+    bxne    lr                          @ bail to the interpreter
+    tst     r10, #ACC_NATIVE
+    bne     .LinvokeNative
+    /*
+     * If we want to punt to the interpreter for native call, swap the bne with
+     * the following
+     * bxne    lr
+     */
+
+
+    ldr     r10, .LdvmJitToInterpNoChain
+    ldr     r3, [r9, #offClassObject_pDvmDex] @ r3<- method->clazz->pDvmDex
+    ldr     r2, [rGLUE, #offGlue_self]      @ r2<- glue->self
+
+    @ Update "glue" values for the new method
+    str     r0, [rGLUE, #offGlue_method]    @ glue->method = methodToCall
+    str     r3, [rGLUE, #offGlue_methodClassDex] @ glue->methodClassDex = ...
+    mov     rFP, r1                         @ fp = newFp
+    str     rFP, [r2, #offThread_curFrame]  @ self->curFrame = newFp
+
+    @ Start executing the callee
+    mov     pc, r10                         @ dvmJitToInterpNoChain
+
+/* ------------------------------ */
+    .balign 4
+    .global dvmCompiler_TEMPLATE_INVOKE_METHOD_CHAIN
+dvmCompiler_TEMPLATE_INVOKE_METHOD_CHAIN:
+/* File: armv5te/TEMPLATE_INVOKE_METHOD_CHAIN.S */
+    /*
+     * For monomorphic callsite, setup the Dalvik frame and return to the
+     * Thumb code through the link register to transfer control to the callee
+     * method through a dedicated chaining cell.
+     */
+    @ r0 = methodToCall, r1 = returnCell, rPC = dalvikCallsite
+    ldrh    r7, [r0, #offMethod_registersSize]  @ r7<- methodToCall->regsSize
+    ldrh    r2, [r0, #offMethod_outsSize]  @ r2<- methodToCall->outsSize
+    ldr     r9, [rGLUE, #offGlue_interpStackEnd]    @ r9<- interpStackEnd
+    ldr     r8, [rGLUE, #offGlue_pSelfSuspendCount] @ r8<- &suspendCount
+    add     r3, r1, #1  @ Thumb addr is odd
+    SAVEAREA_FROM_FP(r1, rFP)           @ r1<- stack save area
+    sub     r1, r1, r7, lsl #2          @ r1<- newFp (old savearea - regsSize)
+    SAVEAREA_FROM_FP(r10, r1)           @ r10<- stack save area
+    add     r12, lr, #2                 @ setup the punt-to-interp address
+    sub     r10, r10, r2, lsl #2        @ r10<- bottom (newsave - outsSize)
+    ldr     r8, [r8]                    @ r3<- suspendCount (int)
+    cmp     r10, r9                     @ bottom < interpStackEnd?
+    bxlt    r12                         @ return to raise stack overflow excep.
+    @ r1 = newFP, r0 = methodToCall, r3 = returnCell, rPC = dalvikCallsite
+    ldr     r9, [r0, #offMethod_clazz]      @ r9<- method->clazz
+    ldr     r10, [r0, #offMethod_accessFlags] @ r10<- methodToCall->accessFlags
+    str     rPC, [rFP, #(offStackSaveArea_currentPc - sizeofStackSaveArea)]
+    str     rPC, [r1, #(offStackSaveArea_savedPc - sizeofStackSaveArea)]
+    ldr     rPC, [r0, #offMethod_insns]     @ rPC<- methodToCall->insns
+
+
+    @ set up newSaveArea
+    str     rFP, [r1, #(offStackSaveArea_prevFrame - sizeofStackSaveArea)]
+    str     r3, [r1, #(offStackSaveArea_returnAddr - sizeofStackSaveArea)]
+    str     r0, [r1, #(offStackSaveArea_method - sizeofStackSaveArea)]
+    cmp     r8, #0                      @ suspendCount != 0
+    bxne    r12                         @ bail to the interpreter
+    tst     r10, #ACC_NATIVE
+    bne     .LinvokeNative
+    /*
+     * If we want to punt to the interpreter for native call, swap the bne with
+     * the following
+     * bxne    r12
+     */
+
+
+    ldr     r3, [r9, #offClassObject_pDvmDex] @ r3<- method->clazz->pDvmDex
+    ldr     r2, [rGLUE, #offGlue_self]      @ r2<- glue->self
+
+    @ Update "glue" values for the new method
+    str     r0, [rGLUE, #offGlue_method]    @ glue->method = methodToCall
+    str     r3, [rGLUE, #offGlue_methodClassDex] @ glue->methodClassDex = ...
+    mov     rFP, r1                         @ fp = newFp
+    str     rFP, [r2, #offThread_curFrame]  @ self->curFrame = newFp
+
+    bx      lr                              @ return to the callee-chaining cell
+
+
+
+/* ------------------------------ */
+    .balign 4
+    .global dvmCompiler_TEMPLATE_CMPG_DOUBLE
+dvmCompiler_TEMPLATE_CMPG_DOUBLE:
+/* File: armv5te/TEMPLATE_CMPG_DOUBLE.S */
+/* File: armv5te/TEMPLATE_CMPL_DOUBLE.S */
+    /*
+     * For the JIT: incoming arguments are pointers to the arguments in r0/r1
+     *              result in r0
+     *
+     * Compare two floating-point values.  Puts 0, 1, or -1 into the
+     * destination register based on the results of the comparison.
+     *
+     * Provide a "naninst" instruction that puts 1 or -1 into r1 depending
+     * on what value we'd like to return when one of the operands is NaN.
+     *
+     * See OP_CMPL_FLOAT for an explanation.
+     *
+     * For: cmpl-double, cmpg-double
+     */
+    /* op vAA, vBB, vCC */
+    mov     r4, lr                      @ save return address
+    mov     r9, r0                      @ save copy of &arg1
+    mov     r10, r1                     @ save copy of &arg2
+    ldmia   r9, {r0-r1}                 @ r0/r1<- vBB/vBB+1
+    ldmia   r10, {r2-r3}                @ r2/r3<- vCC/vCC+1
+    LDR_PC_LR ".L__aeabi_cdcmple"       @ PIC way of "bl __aeabi_cdcmple"
+    bhi     .LTEMPLATE_CMPG_DOUBLE_gt_or_nan       @ C set and Z clear, disambiguate
+    mvncc   r0, #0                      @ (less than) r1<- -1
+    moveq   r0, #0                      @ (equal) r1<- 0, trumps less than
+    bx      r4
+
+    @ Test for NaN with a second comparison.  EABI forbids testing bit
+    @ patterns, and we can't represent 0x7fc00000 in immediate form, so
+    @ make the library call.
+.LTEMPLATE_CMPG_DOUBLE_gt_or_nan:
+    ldmia   r10, {r0-r1}                @ reverse order
+    ldmia   r9, {r2-r3}
+    LDR_PC_LR ".L__aeabi_cdcmple"       @ r0<- Z set if eq, C clear if <
+    movcc   r0, #1                      @ (greater than) r1<- 1
+    bxcc    r4
+    mov     r0, #1                            @ r1<- 1 or -1 for NaN
+    bx      r4
+
+
+
+/* ------------------------------ */
+    .balign 4
+    .global dvmCompiler_TEMPLATE_CMPL_DOUBLE
+dvmCompiler_TEMPLATE_CMPL_DOUBLE:
+/* File: armv5te/TEMPLATE_CMPL_DOUBLE.S */
+    /*
+     * For the JIT: incoming arguments are pointers to the arguments in r0/r1
+     *              result in r0
+     *
+     * Compare two floating-point values.  Puts 0, 1, or -1 into the
+     * destination register based on the results of the comparison.
+     *
+     * Provide a "naninst" instruction that puts 1 or -1 into r1 depending
+     * on what value we'd like to return when one of the operands is NaN.
+     *
+     * See OP_CMPL_FLOAT for an explanation.
+     *
+     * For: cmpl-double, cmpg-double
+     */
+    /* op vAA, vBB, vCC */
+    mov     r4, lr                      @ save return address
+    mov     r9, r0                      @ save copy of &arg1
+    mov     r10, r1                     @ save copy of &arg2
+    ldmia   r9, {r0-r1}                 @ r0/r1<- vBB/vBB+1
+    ldmia   r10, {r2-r3}                @ r2/r3<- vCC/vCC+1
+    LDR_PC_LR ".L__aeabi_cdcmple"       @ PIC way of "bl __aeabi_cdcmple"
+    bhi     .LTEMPLATE_CMPL_DOUBLE_gt_or_nan       @ C set and Z clear, disambiguate
+    mvncc   r0, #0                      @ (less than) r1<- -1
+    moveq   r0, #0                      @ (equal) r1<- 0, trumps less than
+    bx      r4
+
+    @ Test for NaN with a second comparison.  EABI forbids testing bit
+    @ patterns, and we can't represent 0x7fc00000 in immediate form, so
+    @ make the library call.
+.LTEMPLATE_CMPL_DOUBLE_gt_or_nan:
+    ldmia   r10, {r0-r1}                @ reverse order
+    ldmia   r9, {r2-r3}
+    LDR_PC_LR ".L__aeabi_cdcmple"       @ r0<- Z set if eq, C clear if <
+    movcc   r0, #1                      @ (greater than) r1<- 1
+    bxcc    r4
+    mvn     r0, #0                            @ r1<- 1 or -1 for NaN
+    bx      r4
+
+
+/* ------------------------------ */
+    .balign 4
+    .global dvmCompiler_TEMPLATE_CMPG_FLOAT
+dvmCompiler_TEMPLATE_CMPG_FLOAT:
+/* File: armv5te/TEMPLATE_CMPG_FLOAT.S */
+/* File: armv5te/TEMPLATE_CMPL_FLOAT.S */
+    /*
+     * For the JIT: incoming arguments in r0, r1
+     *              result in r0
+     *
+     * Compare two floating-point values.  Puts 0, 1, or -1 into the
+     * destination register based on the results of the comparison.
+     *
+     * Provide a "naninst" instruction that puts 1 or -1 into r1 depending
+     * on what value we'd like to return when one of the operands is NaN.
+     *
+     * The operation we're implementing is:
+     *   if (x == y)
+     *     return 0;
+     *   else if (x < y)
+     *     return -1;
+     *   else if (x > y)
+     *     return 1;
+     *   else
+     *     return {-1,1};  // one or both operands was NaN
+     *
+     * The straightforward implementation requires 3 calls to functions
+     * that return a result in r0.  We can do it with two calls if our
+     * EABI library supports __aeabi_cfcmple (only one if we want to check
+     * for NaN directly):
+     *   check x <= y
+     *     if <, return -1
+     *     if ==, return 0
+     *   check y <= x
+     *     if <, return 1
+     *   return {-1,1}
+     *
+     * for: cmpl-float, cmpg-float
+     */
+    /* op vAA, vBB, vCC */
+    mov     r4, lr                      @ save return address
+    mov     r9, r0                      @ Save copies - we may need to redo
+    mov     r10, r1
+    LDR_PC_LR ".L__aeabi_cfcmple"       @ cmp <=: C clear if <, Z set if eq
+    bhi     .LTEMPLATE_CMPG_FLOAT_gt_or_nan       @ C set and Z clear, disambiguate
+    mvncc   r0, #0                      @ (less than) r0<- -1
+    moveq   r0, #0                      @ (equal) r0<- 0, trumps less than
+    bx      r4
+    @ Test for NaN with a second comparison.  EABI forbids testing bit
+    @ patterns, and we can't represent 0x7fc00000 in immediate form, so
+    @ make the library call.
+.LTEMPLATE_CMPG_FLOAT_gt_or_nan:
+    mov     r1, r9                      @ reverse order
+    mov     r0, r10
+    LDR_PC_LR ".L__aeabi_cfcmple"       @ r0<- Z set if eq, C clear if <
+    movcc   r0, #1                      @ (greater than) r1<- 1
+    bxcc    r4
+    mov     r0, #1                            @ r1<- 1 or -1 for NaN
+    bx      r4
+
+
+
+
+/* ------------------------------ */
+    .balign 4
+    .global dvmCompiler_TEMPLATE_CMPL_FLOAT
+dvmCompiler_TEMPLATE_CMPL_FLOAT:
+/* File: armv5te/TEMPLATE_CMPL_FLOAT.S */
+    /*
+     * For the JIT: incoming arguments in r0, r1
+     *              result in r0
+     *
+     * Compare two floating-point values.  Puts 0, 1, or -1 into the
+     * destination register based on the results of the comparison.
+     *
+     * Provide a "naninst" instruction that puts 1 or -1 into r1 depending
+     * on what value we'd like to return when one of the operands is NaN.
+     *
+     * The operation we're implementing is:
+     *   if (x == y)
+     *     return 0;
+     *   else if (x < y)
+     *     return -1;
+     *   else if (x > y)
+     *     return 1;
+     *   else
+     *     return {-1,1};  // one or both operands was NaN
+     *
+     * The straightforward implementation requires 3 calls to functions
+     * that return a result in r0.  We can do it with two calls if our
+     * EABI library supports __aeabi_cfcmple (only one if we want to check
+     * for NaN directly):
+     *   check x <= y
+     *     if <, return -1
+     *     if ==, return 0
+     *   check y <= x
+     *     if <, return 1
+     *   return {-1,1}
+     *
+     * for: cmpl-float, cmpg-float
+     */
+    /* op vAA, vBB, vCC */
+    mov     r4, lr                      @ save return address
+    mov     r9, r0                      @ Save copies - we may need to redo
+    mov     r10, r1
+    LDR_PC_LR ".L__aeabi_cfcmple"       @ cmp <=: C clear if <, Z set if eq
+    bhi     .LTEMPLATE_CMPL_FLOAT_gt_or_nan       @ C set and Z clear, disambiguate
+    mvncc   r0, #0                      @ (less than) r0<- -1
+    moveq   r0, #0                      @ (equal) r0<- 0, trumps less than
+    bx      r4
+    @ Test for NaN with a second comparison.  EABI forbids testing bit
+    @ patterns, and we can't represent 0x7fc00000 in immediate form, so
+    @ make the library call.
+.LTEMPLATE_CMPL_FLOAT_gt_or_nan:
+    mov     r1, r9                      @ reverse order
+    mov     r0, r10
+    LDR_PC_LR ".L__aeabi_cfcmple"       @ r0<- Z set if eq, C clear if <
+    movcc   r0, #1                      @ (greater than) r1<- 1
+    bxcc    r4
+    mvn     r0, #0                            @ r1<- 1 or -1 for NaN
+    bx      r4
+
+
+
+/* ------------------------------ */
+    .balign 4
+    .global dvmCompiler_TEMPLATE_MUL_LONG
+dvmCompiler_TEMPLATE_MUL_LONG:
+/* File: armv5te/TEMPLATE_MUL_LONG.S */
+    /*
+     * Signed 64-bit integer multiply.
+     *
+     * For JIT: op1 in r0/r1, op2 in r2/r3, return in r0/r1
+     *
+     * Consider WXxYZ (r1r0 x r3r2) with a long multiply:
+     *        WX
+     *      x YZ
+     *  --------
+     *     ZW ZX
+     *  YW YX
+     *
+     * The low word of the result holds ZX, the high word holds
+     * (ZW+YX) + (the high overflow from ZX).  YW doesn't matter because
+     * it doesn't fit in the low 64 bits.
+     *
+     * Unlike most ARM math operations, multiply instructions have
+     * restrictions on using the same register more than once (Rd and Rm
+     * cannot be the same).
+     */
+    /* mul-long vAA, vBB, vCC */
+    mul     ip, r2, r1                  @  ip<- ZxW
+    umull   r9, r10, r2, r0             @  r9/r10 <- ZxX
+    mla     r2, r0, r3, ip              @  r2<- YxX + (ZxW)
+    add     r10, r2, r10                @  r10<- r10 + low(ZxW + (YxX))
+    mov     r0,r9
+    mov     r1,r10
+    bx      lr
+
+/* ------------------------------ */
+    .balign 4
+    .global dvmCompiler_TEMPLATE_SHL_LONG
+dvmCompiler_TEMPLATE_SHL_LONG:
+/* File: armv5te/TEMPLATE_SHL_LONG.S */
+    /*
+     * Long integer shift.  This is different from the generic 32/64-bit
+     * binary operations because vAA/vBB are 64-bit but vCC (the shift
+     * distance) is 32-bit.  Also, Dalvik requires us to ignore all but the low
+     * 6 bits.
+     */
+    /* shl-long vAA, vBB, vCC */
+    and     r2, r2, #63                 @ r2<- r2 & 0x3f
+    mov     r1, r1, asl r2              @  r1<- r1 << r2
+    rsb     r3, r2, #32                 @  r3<- 32 - r2
+    orr     r1, r1, r0, lsr r3          @  r1<- r1 | (r0 << (32-r2))
+    subs    ip, r2, #32                 @  ip<- r2 - 32
+    movpl   r1, r0, asl ip              @  if r2 >= 32, r1<- r0 << (r2-32)
+    mov     r0, r0, asl r2              @  r0<- r0 << r2
+    bx      lr
+
+/* ------------------------------ */
+    .balign 4
+    .global dvmCompiler_TEMPLATE_SHR_LONG
+dvmCompiler_TEMPLATE_SHR_LONG:
+/* File: armv5te/TEMPLATE_SHR_LONG.S */
+    /*
+     * Long integer shift.  This is different from the generic 32/64-bit
+     * binary operations because vAA/vBB are 64-bit but vCC (the shift
+     * distance) is 32-bit.  Also, Dalvik requires us to ignore all but the low
+     * 6 bits.
+     */
+    /* shr-long vAA, vBB, vCC */
+    and     r2, r2, #63                 @ r0<- r0 & 0x3f
+    mov     r0, r0, lsr r2              @  r0<- r2 >> r2
+    rsb     r3, r2, #32                 @  r3<- 32 - r2
+    orr     r0, r0, r1, asl r3          @  r0<- r0 | (r1 << (32-r2))
+    subs    ip, r2, #32                 @  ip<- r2 - 32
+    movpl   r0, r1, asr ip              @  if r2 >= 32, r0<-r1 >> (r2-32)
+    mov     r1, r1, asr r2              @  r1<- r1 >> r2
+    bx      lr
+
+
+/* ------------------------------ */
+    .balign 4
+    .global dvmCompiler_TEMPLATE_USHR_LONG
+dvmCompiler_TEMPLATE_USHR_LONG:
+/* File: armv5te/TEMPLATE_USHR_LONG.S */
+    /*
+     * Long integer shift.  This is different from the generic 32/64-bit
+     * binary operations because vAA/vBB are 64-bit but vCC (the shift
+     * distance) is 32-bit.  Also, Dalvik requires us to ignore all but the low
+     * 6 bits.
+     */
+    /* ushr-long vAA, vBB, vCC */
+    and     r2, r2, #63                 @ r0<- r0 & 0x3f
+    mov     r0, r0, lsr r2              @  r0<- r2 >> r2
+    rsb     r3, r2, #32                 @  r3<- 32 - r2
+    orr     r0, r0, r1, asl r3          @  r0<- r0 | (r1 << (32-r2))
+    subs    ip, r2, #32                 @  ip<- r2 - 32
+    movpl   r0, r1, lsr ip              @  if r2 >= 32, r0<-r1 >>> (r2-32)
+    mov     r1, r1, lsr r2              @  r1<- r1 >>> r2
+    bx      lr
+
+
+    .size   dvmCompilerTemplateStart, .-dvmCompilerTemplateStart
+/* File: armv5te/footer.S */
+/*
+ * ===========================================================================
+ *  Common subroutines and data
+ * ===========================================================================
+ */
+
+    .text
+    .align  2
+.LinvokeNative:
+    @ Prep for the native call
+    @ r1 = newFP, r0 = methodToCall
+    ldr     r3, [rGLUE, #offGlue_self]      @ r3<- glue->self
+    ldr     r9, [r3, #offThread_jniLocal_nextEntry] @ r9<- thread->refNext
+    str     r1, [r3, #offThread_curFrame]   @ self->curFrame = newFp
+    str     r9, [r1, #(offStackSaveArea_localRefTop - sizeofStackSaveArea)]
+                                        @ newFp->localRefTop=refNext
+    mov     r9, r3                      @ r9<- glue->self (preserve)
+    SAVEAREA_FROM_FP(r10, r1)           @ r10<- new stack save area
+
+    mov     r2, r0                      @ r2<- methodToCall
+    mov     r0, r1                      @ r0<- newFP
+    add     r1, rGLUE, #offGlue_retval  @ r1<- &retval
+
+    LDR_PC_LR "[r2, #offMethod_nativeFunc]"
+
+    @ native return; r9=self, r10=newSaveArea
+    @ equivalent to dvmPopJniLocals
+    ldr     r2, [r10, #offStackSaveArea_returnAddr] @ r2 = chaining cell ret
+    ldr     r0, [r10, #offStackSaveArea_localRefTop] @ r0<- newSave->localRefTop
+    ldr     r1, [r9, #offThread_exception] @ check for exception
+    str     rFP, [r9, #offThread_curFrame]  @ self->curFrame = fp
+    cmp     r1, #0                      @ null?
+    str     r0, [r9, #offThread_jniLocal_nextEntry] @ self->refNext<- r0
+    bne     .LhandleException             @ no, handle exception
+    bx      r2
+
+/* FIXME - untested */
+.LhandleException:
+    ldr     rIBASE, .LdvmAsmInstructionStart
+    ldr     rPC, [r10, #offStackSaveArea_savedPc] @ reload rPC
+    b       dvmMterpCommonExceptionThrown
+
+    .align  2
+.LdvmAsmInstructionStart:
+    .word   dvmAsmInstructionStart
+.LdvmJitToInterpNoChain:
+    .word   dvmJitToInterpNoChain
+.LdvmMterpStdBail:
+    .word   dvmMterpStdBail
+.L__aeabi_cdcmple:
+    .word   __aeabi_cdcmple
+.L__aeabi_cfcmple:
+    .word   __aeabi_cfcmple
+
+    .global dmvCompilerTemplateEnd
+dmvCompilerTemplateEnd:
+
+#endif /* WITH_JIT */
+
diff --git a/vm/compiler/template/rebuild.sh b/vm/compiler/template/rebuild.sh
new file mode 100755
index 000000000..07d951650
--- /dev/null
+++ b/vm/compiler/template/rebuild.sh
@@ -0,0 +1,23 @@
+#!/bin/sh
+#
+# Copyright (C) 2008 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#
+# Rebuild for all known targets.  Necessary until the stuff in "out" gets
+# generated as part of the build.
+#
+set -e
+for arch in armv5te; do TARGET_ARCH_EXT=$arch make -f Makefile-template; done
+
diff --git a/vm/interp/Interp.c b/vm/interp/Interp.c
index de3d6760f..db86e7865 100644
--- a/vm/interp/Interp.c
+++ b/vm/interp/Interp.c
@@ -789,7 +789,6 @@ void dvmThrowVerificationError(const Method* method, int kind, int ref)
     free(msg);
 }
 
-
 /*
  * Main interpreter loop entry point.  Select "standard" or "debug"
  * interpreter and switch between them as required.
@@ -805,6 +804,27 @@ void dvmInterpret(Thread* self, const Method* method, JValue* pResult)
 {
     InterpState interpState;
     bool change;
+#if defined(WITH_JIT)
+    /* Interpreter entry points from compiled code */
+    extern void dvmJitToInterpNormal();
+    extern void dvmJitToInterpNoChain();
+    extern void dvmJitToInterpPunt();
+    extern void dvmJitToInterpSingleStep();
+    extern void dvmJitToTraceSelect();
+
+    /* 
+     * Reserve a static entity here to quickly setup runtime contents as
+     * gcc will issue block copy instructions.
+     */
+    static struct JitToInterpEntries jitToInterpEntries = {
+        dvmJitToInterpNormal,
+        dvmJitToInterpNoChain,
+        dvmJitToInterpPunt,
+        dvmJitToInterpSingleStep,
+        dvmJitToTraceSelect,
+    };
+#endif
+
 
 #if defined(WITH_TRACKREF_CHECKS)
     interpState.debugTrackedRefStart =
@@ -813,6 +833,12 @@ void dvmInterpret(Thread* self, const Method* method, JValue* pResult)
 #if defined(WITH_PROFILER) || defined(WITH_DEBUGGER)
     interpState.debugIsMethodEntry = true;
 #endif
+#if defined(WITH_JIT)
+    interpState.jitState = gDvmJit.pJitEntryTable ? kJitNormal : kJitOff;
+
+    /* Setup the Jit-to-interpreter entry points */
+    interpState.jitToInterpEntries = jitToInterpEntries;
+#endif
 
     /*
      * Initialize working state.
@@ -848,6 +874,14 @@ void dvmInterpret(Thread* self, const Method* method, JValue* pResult)
     Interpreter stdInterp;
     if (gDvm.executionMode == kExecutionModeInterpFast)
         stdInterp = dvmMterpStd;
+#if defined(WITH_JIT)
+    else if (gDvm.executionMode == kExecutionModeJit)
+/* If profiling overhead can be kept low enough, we can use a profiling
+ * mterp fast for both Jit and "fast" modes.  If overhead is too high,
+ * create a specialized profiling interpreter.
+ */
+        stdInterp = dvmMterpStd;
+#endif
     else
         stdInterp = dvmInterpretStd;
 
@@ -858,7 +892,7 @@ void dvmInterpret(Thread* self, const Method* method, JValue* pResult)
             LOGVV("threadid=%d: interp STD\n", self->threadId);
             change = (*stdInterp)(self, &interpState);
             break;
-#if defined(WITH_PROFILER) || defined(WITH_DEBUGGER)
+#if defined(WITH_PROFILER) || defined(WITH_DEBUGGER) || defined(WITH_JIT)
         case INTERP_DBG:
             LOGVV("threadid=%d: interp DBG\n", self->threadId);
             change = dvmInterpretDbg(self, &interpState);
@@ -871,4 +905,3 @@ void dvmInterpret(Thread* self, const Method* method, JValue* pResult)
 
     *pResult = interpState.retval;
 }
-
diff --git a/vm/interp/InterpDefs.h b/vm/interp/InterpDefs.h
index 856c2f586..6c67cb56d 100644
--- a/vm/interp/InterpDefs.h
+++ b/vm/interp/InterpDefs.h
@@ -32,8 +32,46 @@ typedef enum InterpEntry {
     kInterpEntryInstr = 0,      // continue to next instruction
     kInterpEntryReturn = 1,     // jump to method return
     kInterpEntryThrow = 2,      // jump to exception throw
+#if defined(WITH_JIT)
+    kInterpEntryResume = 3,     // Resume after single-step
+#endif
 } InterpEntry;
 
+#if defined(WITH_JIT)
+/*
+ * There are five entry points from the compiled code to the interpreter:
+ * 1) dvmJitToInterpNormal: find if there is a corresponding compilation for
+ *    the new dalvik PC. If so, chain the originating compilation with the
+ *    target then jump to it.
+ * 2) dvmJitToInterpInvokeNoChain: similar to 1) but don't chain. This is
+ *    for handling 1-to-many mappings like virtual method call and
+ *    packed switch.
+ * 3) dvmJitToInterpPunt: use the fast interpreter to execute the next
+ *    instruction(s) and stay there as long as it is appropriate to return 
+ *    to the compiled land. This is used when the jit'ed code is about to
+ *    throw an exception.
+ * 4) dvmJitToInterpSingleStep: use the portable interpreter to execute the
+ *    next instruction only and return to pre-specified location in the
+ *    compiled code to resume execution. This is mainly used as debugging
+ *    feature to bypass problematic opcode implementations without
+ *    disturbing the trace formation.
+ * 5) dvmJitToTraceSelect: if there is a single exit from a translation that
+ *    has already gone hot enough to be translated, we should assume that
+ *    the exit point should also be translated (this is a common case for
+ *    invokes).  This trace exit will first check for a chaining
+ *    opportunity, and if none is available will switch to the debug
+ *    interpreter immediately for trace selection (as if threshold had
+ *    just been reached).
+ */
+struct JitToInterpEntries {
+    void *dvmJitToInterpNormal;
+    void *dvmJitToInterpNoChain;
+    void *dvmJitToInterpPunt;
+    void *dvmJitToInterpSingleStep;
+    void *dvmJitToTraceSelect;
+};
+#endif
+
 /*
  * Interpreter context, used when switching from one interpreter to
  * another.  We also tuck "mterp" state in here.
@@ -78,8 +116,18 @@ typedef struct InterpState {
      * Interpreter switching.
      */
     InterpEntry entryPoint;             // what to do when we start
-    int         nextMode;               // INTERP_STD or INTERP_DBG
+    int         nextMode;               // INTERP_STD, INTERP_DBG
 
+#if defined(WITH_JIT)
+    /*
+     * Local copies of field from gDvm placed here for fast access
+     */
+    struct JitEntry*   pJitTable;
+    unsigned char*     pJitProfTable;
+    JitState           jitState;
+    void*              jitResume;
+    u2*                jitResumePC;
+#endif
 
 #if defined(WITH_PROFILER) || defined(WITH_DEBUGGER)
     bool        debugIsMethodEntry;     // used for method entry event triggers
@@ -88,6 +136,17 @@ typedef struct InterpState {
     int         debugTrackedRefStart;   // tracked refs from prior invocations
 #endif
 
+#if defined(WITH_JIT)
+    struct JitToInterpEntries jitToInterpEntries;
+
+    int currTraceRun;
+    int totalTraceLen;        // Number of Dalvik insts in trace
+    const u2* currTraceHead;        // Start of the trace we're building
+    const u2* currRunHead;          // Start of run we're building
+    int currRunLen;           // Length of run in 16-bit words
+    JitTraceRun trace[MAX_JIT_RUN_LEN];
+#endif
+
 } InterpState;
 
 /*
@@ -123,7 +182,7 @@ s4 dvmInterpHandleSparseSwitch(const u2* switchData, s4 testVal);
 /*
  * Process fill-array-data.
  */
-bool dvmInterpHandleFillArrayData(ArrayObject* arrayObject, 
+bool dvmInterpHandleFillArrayData(ArrayObject* arrayObject,
                                   const u2* arrayData);
 
 /*
@@ -145,4 +204,19 @@ static inline bool dvmDebuggerOrProfilerActive(void)
         ;
 }
 
+#if defined(WITH_JIT)
+/*
+ * Determine if the jit, debugger or profiler is currently active.  Used when
+ * selecting which interpreter to switch to.
+ */
+static inline bool dvmJitDebuggerOrProfilerActive(int jitState)
+{
+    return jitState != kJitOff
+#if defined(WITH_PROFILER)
+        || gDvm.activeProfilers != 0
+#endif
+        ||gDvm.debuggerActive;
+}
+#endif
+
 #endif /*_DALVIK_INTERP_DEFS*/
diff --git a/vm/interp/Jit.c b/vm/interp/Jit.c
new file mode 100644
index 000000000..e23361b4a
--- /dev/null
+++ b/vm/interp/Jit.c
@@ -0,0 +1,567 @@
+/*
+ * Copyright (C) 2008 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifdef WITH_JIT
+
+/*
+ * Target independent portion of Android's Jit
+ */
+
+#include "Dalvik.h"
+#include "Jit.h"
+
+
+#include "dexdump/OpCodeNames.h"
+#include <unistd.h>
+#include <pthread.h>
+#include <sys/time.h>
+#include <signal.h>
+#include "compiler/Compiler.h"
+#include <errno.h>
+
+/*
+ * Reset profile counts.  Note that we could easily lose
+ * one or more of these write because of threading.  Because these
+ * counts are considered hints, absolute correctness is not a
+ * problem and the cost of synchronizing would be prohibitive.
+ * NOTE: Experimental - 5/21/09.  Keep rough track of the last
+ * time the counts were reset to allow trace builder to ignore
+ * stale thresholds.  This is just a hint, and the only penalty
+ * for getting it wrong is a slight performance hit (far less than
+ * the cost of synchronization).
+ */
+static u8 lastProfileResetTimeUsec;
+static void resetProfileCounts() {
+    int i;
+    unsigned char *pJitProfTable = gDvmJit.pProfTable;
+    lastProfileResetTimeUsec = dvmGetRelativeTimeUsec();
+    if (pJitProfTable != NULL) {
+        for (i=0; i < JIT_PROF_SIZE; i++) {
+           pJitProfTable[i] = gDvmJit.threshold;
+        }
+    }
+}
+
+int dvmJitStartup(void)
+{
+    unsigned int i;
+    bool res = true;  /* Assume success */
+
+    // Create the compiler thread and setup miscellaneous chores */
+    res &= dvmCompilerStartup();
+
+    dvmInitMutex(&gDvmJit.tableLock);
+    if (res && gDvm.executionMode == kExecutionModeJit) {
+        struct JitEntry *pJitTable = NULL;
+        int tableSize = sizeof(*pJitTable) * gDvmJit.maxTableEntries;
+        unsigned char *pJitProfTable = NULL;
+        dvmLockMutex(&gDvmJit.tableLock);
+        assert(sizeof(*pJitTable) == 12);
+        pJitTable = (struct JitEntry*)malloc(tableSize);
+        if (!pJitTable) {
+            LOGE("jit table allocation failed\n");
+            res = false;
+            goto done;
+        }
+        memset(pJitTable,0,tableSize);
+        /*
+         * NOTE: the profile table must only be allocated once, globally.
+         * Profiling is turned on and off by nulling out gDvm.pJitProfTable
+         * and then restoring its original value.  However, this action
+         * is not syncronized for speed so threads may continue to hold
+         * and update the profile table after profiling has been turned
+         * off by null'ng the global pointer.  Be aware.
+         */
+        pJitProfTable = (unsigned char *)malloc(JIT_PROF_SIZE);
+        if (!pJitProfTable) {
+            LOGE("jit prof table allocation failed\n");
+            res = false;
+            goto done;
+        }
+        memset(pJitProfTable,0,JIT_PROF_SIZE);
+        for (i=0; i < gDvmJit.maxTableEntries; i++) {
+           pJitTable[i].chain = gDvmJit.maxTableEntries;
+        }
+        /* Is chain field wide enough for termination pattern? */
+        assert(pJitTable[0].chain == gDvm.maxJitTableEntries);
+        resetProfileCounts();
+
+done:
+        gDvmJit.pJitEntryTable = pJitTable;
+        gDvmJit.pProfTableCopy = gDvmJit.pProfTable = pJitProfTable;
+        dvmUnlockMutex(&gDvmJit.tableLock);
+    }
+    return res;
+}
+
+/*
+ * If one of our fixed tables or the translation buffer fills up,
+ * call this routine to avoid wasting cycles on future translation requests.
+ */
+void dvmJitStopTranslationRequests()
+{
+    /*
+     * Note 1: This won't necessarily stop all translation requests, and
+     * operates on a delayed mechanism.  Running threads look to the copy
+     * of this value in their private InterpState structures and won't see
+     * this change until it is refreshed (which happens on interpreter
+     * entry).
+     * Note 2: This is a one-shot memory leak on this table. Because this is a
+     * permanent off switch for Jit profiling, it is a one-time leak of 1K
+     * bytes, and no further attempt will be made to re-allocate it.  Can't
+     * free it because some thread may be holding a reference.
+     */
+    gDvmJit.pProfTable = gDvmJit.pProfTableCopy = NULL;
+}
+
+#if defined(EXIT_STATS)
+/* Convenience function to increment counter from assembly code */
+void dvmBumpNoChain()
+{
+    gDvm.jitNoChainExit++;
+}
+
+/* Convenience function to increment counter from assembly code */
+void dvmBumpNormal()
+{
+    gDvm.jitNormalExit++;
+}
+
+/* Convenience function to increment counter from assembly code */
+void dvmBumpPunt(int from)
+{
+    gDvm.jitPuntExit++;
+}
+#endif
+
+/* Dumps debugging & tuning stats to the log */
+void dvmJitStats()
+{
+    int i;
+    int hit;
+    int not_hit;
+    int chains;
+    if (gDvmJit.pJitEntryTable) {
+        for (i=0, chains=hit=not_hit=0;
+             i < (int) gDvmJit.maxTableEntries;
+             i++) {
+            if (gDvmJit.pJitEntryTable[i].dPC != 0)
+                hit++;
+            else
+                not_hit++;
+            if (gDvmJit.pJitEntryTable[i].chain != gDvmJit.maxTableEntries)
+                chains++;
+        }
+        LOGD(
+         "JIT: %d traces, %d slots, %d chains, %d maxQ, %d thresh, %s",
+         hit, not_hit + hit, chains, gDvmJit.compilerMaxQueued,
+         gDvmJit.threshold, gDvmJit.blockingMode ? "Blocking" : "Non-blocking");
+#if defined(EXIT_STATS)
+        LOGD(
+         "JIT: Lookups: %d hits, %d misses; %d NoChain, %d normal, %d punt",
+         gDvmJit.addrLookupsFound, gDvmJit.addrLookupsNotFound,
+         gDvmJit.noChainExit, gDvmJit.normalExit, gDvmJit.puntExit);
+#endif
+        LOGD("JIT: %d Translation chains", gDvmJit.translationChains);
+#if defined(INVOKE_STATS)
+        LOGD("JIT: Invoke: %d noOpt, %d chainable, %d return",
+          gDvmJit.invokeNoOpt, gDvmJit.invokeChain, gDvmJit.returnOp);
+#endif
+    }
+}
+
+/*
+ * Final JIT shutdown.  Only do this once, and do not attempt to restart
+ * the JIT later.
+ */
+void dvmJitShutdown(void)
+{
+    /* Shutdown the compiler thread */
+    dvmCompilerShutdown();
+
+    dvmCompilerDumpStats();
+
+    dvmDestroyMutex(&gDvmJit.tableLock);
+
+    if (gDvmJit.pJitEntryTable) {
+        free(gDvmJit.pJitEntryTable);
+        gDvmJit.pJitEntryTable = NULL;
+    }
+
+    if (gDvmJit.pProfTable) {
+        free(gDvmJit.pProfTable);
+        gDvmJit.pProfTable = NULL;
+    }
+}
+
+/* Returns the signed branch displacement of a Dalvik instruction. */
+int dvmGetBranchDisplacement( DecodedInstruction* decInsn )
+{
+    int res = 0;
+    switch (dexGetInstrFormat(gDvm.instrFormat, decInsn->opCode)) {
+        case kFmt22t:
+            res = decInsn->vC;
+            break;
+        case kFmt20t:
+        case kFmt21t:
+            res = decInsn->vB;
+            break;
+        case kFmt10t:
+        case kFmt30t:
+            res = decInsn->vA;
+            break;
+        default:
+            dvmAbort();
+    }
+    return res;
+}
+
+/*
+ * Adds to the current trace request one instruction at a time, just
+ * before that instruction is interpreted.  This is the primary trace
+ * selection function.  NOTE: return instruction are handled a little
+ * differently.  In general, instructions are "proposed" to be added
+ * to the current trace prior to interpretation.  If the interpreter
+ * then successfully completes the instruction, is will be considered
+ * part of the request.  This allows us to examine machine state prior
+ * to interpretation, and also abort the trace request if the instruction
+ * throws or does something unexpected.  However, return instructions
+ * will cause an immediate end to the translation request - which will
+ * be passed to the compiler before the return completes.  This is done
+ * in response to special handling of returns by the interpreter (and
+ * because returns cannot throw in a way that causes problems for the
+ * translated code.
+ */
+#define MAX_TRACE_LEN 100
+int dvmCheckJit(const u2* pc, Thread* self, InterpState* interpState)
+{
+    int flags,i,len;
+    int switchInterp = false;
+    int debugOrProfile = (gDvm.debuggerActive || self->suspendCount
+#if defined(WITH_PROFILER)
+                          || gDvm.activeProfilers
+#endif
+            );
+
+    switch (interpState->jitState) {
+        char* nopStr;
+        int target;
+        int offset;
+        DecodedInstruction decInsn;
+        case kJitTSelect:
+            dexDecodeInstruction(gDvm.instrFormat, pc, &decInsn);
+#if defined(SHOW_TRACE)
+            LOGD("TraceGen: adding %s",getOpcodeName(decInsn.opCode));
+#endif
+            flags = dexGetInstrFlags(gDvm.instrFlags, decInsn.opCode);
+            len = dexGetInstrOrTableWidthAbs(gDvm.instrWidth, pc);
+            offset = pc - interpState->method->insns;
+            if ((flags & kInstrNoJit) == kInstrNoJit) {
+                interpState->jitState = kJitTSelectEnd;
+                break;
+            } else {
+                if (pc != interpState->currRunHead + interpState->currRunLen) {
+                    int currTraceRun;
+                    /* We need to start a new trace run */
+                    currTraceRun = ++interpState->currTraceRun;
+                    interpState->currRunLen = 0;
+                    interpState->currRunHead = (u2*)pc;
+                    interpState->trace[currTraceRun].frag.startOffset = offset;
+                    interpState->trace[currTraceRun].frag.numInsts = 0;
+                    interpState->trace[currTraceRun].frag.runEnd = false;
+                    interpState->trace[currTraceRun].frag.hint = kJitHintNone;
+                }
+                interpState->trace[interpState->currTraceRun].frag.numInsts++;
+                interpState->totalTraceLen++;
+                interpState->currRunLen += len;
+                if (  ((flags & kInstrUnconditional) == 0) &&
+                      ((flags & (kInstrCanBranch |
+                                 kInstrCanSwitch |
+                                 kInstrCanReturn |
+                                 kInstrInvoke)) != 0)) {
+                        interpState->jitState = kJitTSelectEnd;
+#if defined(SHOW_TRACE)
+                LOGD("TraceGen: ending on %s, basic block end",
+                     getOpcodeName(decInsn.opCode));
+#endif
+                }
+                if (decInsn.opCode == OP_THROW) {
+                    interpState->jitState = kJitTSelectEnd;
+                }
+                if (interpState->totalTraceLen >= MAX_TRACE_LEN) {
+                    interpState->jitState = kJitTSelectEnd;
+                }
+                if (debugOrProfile) {
+                    interpState->jitState = kJitTSelectAbort;
+                    switchInterp = !debugOrProfile;
+                    break;
+                }
+                if ((flags & kInstrCanReturn) != kInstrCanReturn) {
+                    break;
+                }
+            }
+            /* NOTE: intentional fallthrough for returns */
+        case kJitTSelectEnd:
+            {
+                if (interpState->totalTraceLen == 0) {
+                    switchInterp = !debugOrProfile;
+                    break;
+                }
+                JitTraceDescription* desc =
+                   (JitTraceDescription*)malloc(sizeof(JitTraceDescription) +
+                     sizeof(JitTraceRun) * (interpState->currTraceRun+1));
+                if (desc == NULL) {
+                    LOGE("Out of memory in trace selection");
+                    dvmJitStopTranslationRequests();
+                    interpState->jitState = kJitTSelectAbort;
+                    switchInterp = !debugOrProfile;
+                    break;
+                }
+                interpState->trace[interpState->currTraceRun].frag.runEnd =
+                     true;
+                interpState->jitState = kJitNormal;
+                desc->method = interpState->method;
+                memcpy((char*)&(desc->trace[0]),
+                    (char*)&(interpState->trace[0]),
+                    sizeof(JitTraceRun) * (interpState->currTraceRun+1));
+#if defined(SHOW_TRACE)
+                LOGD("TraceGen:  trace done, adding to queue");
+#endif
+                dvmCompilerWorkEnqueue(
+                       interpState->currTraceHead,kWorkOrderTrace,desc);
+                if (gDvmJit.blockingMode) {
+                    dvmCompilerDrainQueue();
+                }
+                switchInterp = !debugOrProfile;
+            }
+            break;
+        case kJitSingleStep:
+            interpState->jitState = kJitSingleStepEnd;
+            break;
+        case kJitSingleStepEnd:
+            interpState->entryPoint = kInterpEntryResume;
+            switchInterp = !debugOrProfile;
+            break;
+        case kJitTSelectAbort:
+#if defined(SHOW_TRACE)
+            LOGD("TraceGen:  trace abort");
+#endif
+            interpState->jitState = kJitNormal;
+            switchInterp = !debugOrProfile;
+            break;
+        case kJitNormal:
+            break;
+        default:
+            dvmAbort();
+    }
+    return switchInterp;
+}
+
+static inline struct JitEntry *findJitEntry(const u2* pc)
+{
+    int idx = dvmJitHash(pc);
+
+    /* Expect a high hit rate on 1st shot */
+    if (gDvmJit.pJitEntryTable[idx].dPC == pc)
+        return &gDvmJit.pJitEntryTable[idx];
+    else {
+        int chainEndMarker = gDvmJit.maxTableEntries;
+        while (gDvmJit.pJitEntryTable[idx].chain != chainEndMarker) {
+            idx = gDvmJit.pJitEntryTable[idx].chain;
+            if (gDvmJit.pJitEntryTable[idx].dPC == pc)
+                return &gDvmJit.pJitEntryTable[idx];
+        }
+    }
+    return NULL;
+}
+
+/*
+ * If a translated code address exists for the davik byte code
+ * pointer return it.  This routine needs to be fast.
+ */
+void* dvmJitGetCodeAddr(const u2* dPC)
+{
+    int idx = dvmJitHash(dPC);
+
+    /* Expect a high hit rate on 1st shot */
+    if (gDvmJit.pJitEntryTable[idx].dPC == dPC) {
+#if defined(EXIT_STATS)
+        gDvmJit.addrLookupsFound++;
+#endif
+        return gDvmJit.pJitEntryTable[idx].codeAddress;
+    } else {
+        int chainEndMarker = gDvmJit.maxTableEntries;
+        while (gDvmJit.pJitEntryTable[idx].chain != chainEndMarker) {
+            idx = gDvmJit.pJitEntryTable[idx].chain;
+            if (gDvmJit.pJitEntryTable[idx].dPC == dPC) {
+#if defined(EXIT_STATS)
+                gDvmJit.addrLookupsFound++;
+#endif
+                return gDvmJit.pJitEntryTable[idx].codeAddress;
+            }
+        }
+    }
+#if defined(EXIT_STATS)
+    gDvmJit.addrLookupsNotFound++;
+#endif
+    return NULL;
+}
+
+/*
+ * Register the translated code pointer into the JitTable.
+ * NOTE: Once a codeAddress field transitions from NULL to
+ * JIT'd code, it must not be altered without first halting all
+ * threads.
+ */
+void dvmJitSetCodeAddr(const u2* dPC, void *nPC) {
+    struct JitEntry *jitEntry = findJitEntry(dPC);
+    assert(jitEntry);
+    /* Thumb code has odd PC */
+    jitEntry->codeAddress = (void *) ((intptr_t) nPC |1);
+}
+
+/*
+ * Determine if valid trace-bulding request is active.  Return true
+ * if we need to abort and switch back to the fast interpreter, false
+ * otherwise.  NOTE: may be called even when trace selection is not being
+ * requested
+ */
+
+#define PROFILE_STALENESS_THRESHOLD 250000LL
+bool dvmJitCheckTraceRequest(Thread* self, InterpState* interpState)
+{
+    bool res = false;    /* Assume success */
+    if (gDvmJit.pJitEntryTable != NULL) {
+        u8 delta = dvmGetRelativeTimeUsec() - lastProfileResetTimeUsec;
+        /*
+         * If the compiler is backlogged, or if a debugger or profiler is
+         * active, cancel any JIT actions
+         */
+        if ( (gDvmJit.compilerQueueLength >= gDvmJit.compilerHighWater) ||
+              gDvm.debuggerActive || self->suspendCount
+#if defined(WITH_PROFILER)
+                 || gDvm.activeProfilers
+#endif
+                                             ) {
+            if (interpState->jitState != kJitOff) {
+                interpState->jitState = kJitNormal;
+            }
+        } else if (delta > PROFILE_STALENESS_THRESHOLD) {
+            resetProfileCounts();
+            res = true;   /* Stale profile - abort */
+        } else if (interpState->jitState == kJitTSelectRequest) {
+            u4 chainEndMarker = gDvmJit.maxTableEntries;
+            u4 idx = dvmJitHash(interpState->pc);
+
+            /* Walk the bucket chain to find an exact match for our PC */
+            while ((gDvmJit.pJitEntryTable[idx].chain != chainEndMarker) &&
+                   (gDvmJit.pJitEntryTable[idx].dPC != interpState->pc)) {
+                idx = gDvmJit.pJitEntryTable[idx].chain;
+            }
+
+            if (gDvmJit.pJitEntryTable[idx].dPC == interpState->pc) {
+                /*
+                 * Got a match.  This means a trace has already
+                 * been requested for this address.  Bail back to
+                 * mterp, which will check if the translation is ready
+                 * for execution
+                 */
+                interpState->jitState = kJitTSelectAbort;
+            } else {
+               /*
+                * No match.  Aquire jitTableLock and find the last
+                * slot in the chain. Possibly continue the chain walk in case
+                * some other thread allocated the slot we were looking
+                * at previuosly
+                */
+                dvmLockMutex(&gDvmJit.tableLock);
+                /*
+                 * At this point, if .dPC is NULL, then the slot we're
+                 * looking at is the target slot from the primary hash
+                 * (the simple, and expected case).  Otherwise we're going
+                 * to have to find a free slot and chain it.
+                 */
+                MEM_BARRIER();
+                if (gDvmJit.pJitEntryTable[idx].dPC != NULL) {
+                    u4 prev;
+                    while (gDvmJit.pJitEntryTable[idx].chain != chainEndMarker) {
+                        idx = gDvmJit.pJitEntryTable[idx].chain;
+                    }
+                    /* Here, idx should be pointing to the last cell of an
+                     * active chain whose last member contains a valid dPC */
+                    assert(gDvmJit.pJitEntryTable[idx].dPC != NULL);
+                    /* Now, do a linear walk to find a free cell and add it to
+                     * end of this chain */
+                    prev = idx;
+                    while (true) {
+                        idx++;
+                        if (idx == chainEndMarker)
+                            idx = 0;  /* Wraparound */
+                        if ((gDvmJit.pJitEntryTable[idx].dPC == NULL) ||
+                            (idx == prev))
+                            break;
+                    }
+                    if (idx != prev) {
+                        /* Got it - chain */
+                        gDvmJit.pJitEntryTable[prev].chain = idx;
+                    }
+                }
+                if (gDvmJit.pJitEntryTable[idx].dPC == NULL) {
+                   /* Allocate the slot */
+                    gDvmJit.pJitEntryTable[idx].dPC = interpState->pc;
+                } else {
+                   /*
+                    * Table is full.  We could resize it, but that would
+                    * be better handled by the translator thread.  It
+                    * will be aware of how full the table is getting.
+                    * Disable further profiling and continue.
+                    */
+                   interpState->jitState = kJitTSelectAbort;
+                   LOGD("JIT: JitTable full, disabling profiling");
+                   dvmJitStopTranslationRequests();
+                }
+                dvmUnlockMutex(&gDvmJit.tableLock);
+            }
+        }
+        switch (interpState->jitState) {
+            case kJitTSelectRequest:
+                 interpState->jitState = kJitTSelect;
+                 interpState->currTraceHead = interpState->pc;
+                 interpState->currTraceRun = 0;
+                 interpState->totalTraceLen = 0;
+                 interpState->currRunHead = interpState->pc;
+                 interpState->currRunLen = 0;
+                 interpState->trace[0].frag.startOffset =
+                       interpState->pc - interpState->method->insns;
+                 interpState->trace[0].frag.numInsts = 0;
+                 interpState->trace[0].frag.runEnd = false;
+                 interpState->trace[0].frag.hint = kJitHintNone;
+                 break;
+            case kJitTSelect:
+            case kJitTSelectAbort:
+                 res = true;
+            case kJitSingleStep:
+            case kJitSingleStepEnd:
+            case kJitOff:
+            case kJitNormal:
+                break;
+            default:
+                dvmAbort();
+        }
+    }
+    return res;
+}
+
+#endif /* WITH_JIT */
diff --git a/vm/interp/Jit.h b/vm/interp/Jit.h
new file mode 100644
index 000000000..2b2cb8e26
--- /dev/null
+++ b/vm/interp/Jit.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * Jit control
+ */
+#ifndef _DALVIK_INTERP_JIT
+#define _DALVIK_INTERP_JIT
+
+#include "InterpDefs.h"
+
+#define JIT_PROF_SIZE 512
+/*
+ * JitTable hash function.
+ */
+static inline u4 dvmJitHash( const u2* p ) {
+    /*
+     * TODO - Might make sense to keep "maxTableEntries - 1" as its own
+     * variable for speed reasons.
+     */
+    return ((((u4)p>>12)^(u4)p)>>1) & (gDvmJit.maxTableEntries-1);
+}
+
+/*
+ * Entries in the JIT's address lookup hash table.
+ * with assembly hash function in mterp.
+ * TODO: rework this structure now that the profile counts have
+ * moved into their own table.
+ */
+typedef struct JitEntry {
+    u2                unused;             /* was execution count */
+    u2                chain;              /* Index of next in chain */
+    const u2*         dPC;                /* Dalvik code address */
+    void*             codeAddress;        /* Code address of native translation */
+} JitEntry;
+
+int dvmJitStartup(void);
+void dvmJitShutdown(void);
+int dvmCheckJit(const u2* pc, Thread* self, InterpState* interpState);
+void* dvmJitGetCodeAddr(const u2* dPC);
+void dvmJitSetCodeAddr(const u2* dPC, void *nPC);
+bool dvmJitCheckTraceRequest(Thread* self, InterpState* interpState);
+void* dvmJitChain(void* tgtAddr, u4* branchAddr);
+void dvmJitStopTranslationRequests();
+void dvmJitStats();
+
+#endif /*_DALVIK_INTERP_JIT*/
diff --git a/vm/interp/Stack.h b/vm/interp/Stack.h
index 1b28d49d2..f2a481b29 100644
--- a/vm/interp/Stack.h
+++ b/vm/interp/Stack.h
@@ -146,6 +146,8 @@ struct StackSaveArea {
         const u2*   currentPc;
     } xtra;
 
+    /* Native return pointer for JIT, or 0 if interpreted */
+    const u2* returnAddr;
 #ifdef PAD_SAVE_AREA
     u4          pad3, pad4, pad5;
 #endif
diff --git a/vm/mterp/Mterp.c b/vm/mterp/Mterp.c
index 53ddeb418..80a7b2b14 100644
--- a/vm/mterp/Mterp.c
+++ b/vm/mterp/Mterp.c
@@ -77,6 +77,10 @@ bool dvmMterpStd(Thread* self, InterpState* glue)
 
     glue->interpStackEnd = self->interpStackEnd;
     glue->pSelfSuspendCount = &self->suspendCount;
+#if defined(WITH_JIT)
+    glue->pJitTable = gDvmJit.pJitEntryTable;
+    glue->pJitProfTable = gDvmJit.pProfTable;
+#endif
 #if defined(WITH_DEBUGGER)
     glue->pDebuggerActive = &gDvm.debuggerActive;
 #endif
@@ -111,4 +115,3 @@ bool dvmMterpStd(Thread* self, InterpState* glue)
         return true;
     }
 }
-
diff --git a/vm/mterp/Mterp.h b/vm/mterp/Mterp.h
index ae2d2076d..8b3f7b48b 100644
--- a/vm/mterp/Mterp.h
+++ b/vm/mterp/Mterp.h
@@ -22,6 +22,9 @@
 
 #include "Dalvik.h"
 #include "interp/InterpDefs.h"
+#if defined(WITH_JIT)
+#include "interp/Jit.h"
+#endif
 
 /*
  * Interpreter state, passed into C functions from assembly stubs.  The
diff --git a/vm/mterp/armv5te/OP_GOTO.S b/vm/mterp/armv5te/OP_GOTO.S
index 3433a7325..26f0c8f54 100644
--- a/vm/mterp/armv5te/OP_GOTO.S
+++ b/vm/mterp/armv5te/OP_GOTO.S
@@ -11,7 +11,15 @@
     movs    r9, r0, asr #24             @ r9<- ssssssAA (sign-extended)
     mov     r9, r9, lsl #1              @ r9<- byte offset
     bmi     common_backwardBranch       @ backward branch, do periodic checks
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
     FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    cmp     r0,#0
+    bne     common_updateProfile
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
-
+#else
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    GET_INST_OPCODE(ip)                 @ extract opcode from rINST
+    GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
diff --git a/vm/mterp/armv5te/OP_GOTO_16.S b/vm/mterp/armv5te/OP_GOTO_16.S
index 479438eed..f738a985b 100644
--- a/vm/mterp/armv5te/OP_GOTO_16.S
+++ b/vm/mterp/armv5te/OP_GOTO_16.S
@@ -10,7 +10,16 @@
     FETCH_S(r0, 1)                      @ r0<- ssssAAAA (sign-extended)
     movs    r9, r0, asl #1              @ r9<- byte offset, check sign
     bmi     common_backwardBranch       @ backward branch, do periodic checks
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
     FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    cmp     r0,#0
+    bne     common_updateProfile
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#else
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    GET_INST_OPCODE(ip)                 @ extract opcode from rINST
+    GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
diff --git a/vm/mterp/armv5te/OP_GOTO_32.S b/vm/mterp/armv5te/OP_GOTO_32.S
index 617b8ba3a..17780b9e8 100644
--- a/vm/mterp/armv5te/OP_GOTO_32.S
+++ b/vm/mterp/armv5te/OP_GOTO_32.S
@@ -18,7 +18,15 @@
     orrs    r0, r0, r1, lsl #16         @ r0<- AAAAaaaa, check sign
     mov     r9, r0, asl #1              @ r9<- byte offset
     ble     common_backwardBranch       @ backward branch, do periodic checks
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
     FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    cmp     r0,#0
+    bne     common_updateProfile
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
-
+#else
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    GET_INST_OPCODE(ip)                 @ extract opcode from rINST
+    GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
diff --git a/vm/mterp/armv5te/OP_PACKED_SWITCH.S b/vm/mterp/armv5te/OP_PACKED_SWITCH.S
index 6fde05ba4..72e742a9e 100644
--- a/vm/mterp/armv5te/OP_PACKED_SWITCH.S
+++ b/vm/mterp/armv5te/OP_PACKED_SWITCH.S
@@ -20,7 +20,16 @@
     movs    r9, r0, asl #1              @ r9<- branch byte offset, check sign
     bmi     common_backwardBranch       @ backward branch, do periodic checks
     beq     common_backwardBranch       @ (want to use BLE but V is unknown)
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
     FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    cmp     r0,#0
+    bne     common_updateProfile
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#else
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    GET_INST_OPCODE(ip)                 @ extract opcode from rINST
+    GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
diff --git a/vm/mterp/armv5te/bincmp.S b/vm/mterp/armv5te/bincmp.S
index 9b574a3fc..1f43918e5 100644
--- a/vm/mterp/armv5te/bincmp.S
+++ b/vm/mterp/armv5te/bincmp.S
@@ -19,7 +19,14 @@
     FETCH_S(r9, 1)                      @ r9<- branch offset, in code units
     movs    r9, r9, asl #1              @ convert to bytes, check sign
     bmi     common_backwardBranch       @ yes, do periodic checks
-1:  FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+1:
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    b        common_testUpdateProfile
+#else
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
diff --git a/vm/mterp/armv5te/entry.S b/vm/mterp/armv5te/entry.S
index 5b2cde089..f9e01a35b 100644
--- a/vm/mterp/armv5te/entry.S
+++ b/vm/mterp/armv5te/entry.S
@@ -66,10 +66,21 @@ dvmMterpStdRun:
     cmp     r1, #kInterpEntryInstr      @ usual case?
     bne     .Lnot_instr                 @ no, handle it
 
+#if defined(WITH_JIT)
+.Lno_singleStep:
+    /* Entry is always a possible trace start */
+    GET_JIT_PROF_TABLE(r0)
+    FETCH_INST()
+    cmp    r0,#0
+    bne    common_updateProfile
+    GET_INST_OPCODE(ip)
+    GOTO_OPCODE(ip)
+#else
     /* start executing the instruction at rPC */
     FETCH_INST()                        @ load rINST from rPC
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
 .Lnot_instr:
     cmp     r1, #kInterpEntryReturn     @ were we returning from a method?
@@ -79,6 +90,22 @@ dvmMterpStdRun:
     cmp     r1, #kInterpEntryThrow      @ were we throwing an exception?
     beq     common_exceptionThrown
 
+#if defined(WITH_JIT)
+.Lnot_throw:
+    ldr     r0,[rGLUE, #offGlue_jitResume]
+    ldr     r2,[rGLUE, #offGlue_jitResumePC]
+    cmp     r1, #kInterpEntryResume     @ resuming after Jit single-step?
+    bne     .Lbad_arg
+    cmp     rPC,r2
+    bne     .Lno_singleStep             @ must have branched, don't resume
+    mov     r1, #kInterpEntryInstr
+    strb    r1, [rGLUE, #offGlue_entryPoint]
+    ldr     rINST, .LdvmCompilerTemplate
+    bx      r0                          @ re-enter the translation
+.LdvmCompilerTemplate:
+    .word   dvmCompilerTemplateStart
+#endif
+
 .Lbad_arg:
     ldr     r0, strBadEntryPoint
     @ r1 holds value of entryPoint
diff --git a/vm/mterp/armv5te/footer.S b/vm/mterp/armv5te/footer.S
index 22ad65a8d..aaab70ebe 100644
--- a/vm/mterp/armv5te/footer.S
+++ b/vm/mterp/armv5te/footer.S
@@ -1,12 +1,189 @@
+
 /*
  * ===========================================================================
  *  Common subroutines and data
  * ===========================================================================
  */
 
+
+
     .text
     .align  2
 
+#if defined(WITH_JIT)
+/*
+ * Return from the translation cache to the interpreter when the compiler is
+ * having issues translating/executing a Dalvik instruction. We have to skip
+ * the code cache lookup otherwise it is possible to indefinitely bouce
+ * between the interpreter and the code cache if the instruction that fails
+ * to be compiled happens to be at a trace start.
+ */
+    .global dvmJitToInterpPunt
+dvmJitToInterpPunt:
+    mov    rPC, r0
+#ifdef EXIT_STATS
+    mov    r0,lr
+    bl     dvmBumpPunt;
+#endif
+    EXPORT_PC()
+    adrl   rIBASE, dvmAsmInstructionStart
+    FETCH_INST()
+    GET_INST_OPCODE(ip)
+    GOTO_OPCODE(ip)
+
+/*
+ * Return to the interpreter to handle a single instruction.
+ * On entry:
+ *    r0 <= PC
+ *    r1 <= PC of resume instruction
+ *    lr <= resume point in translation
+ */
+    .global dvmJitToInterpSingleStep
+dvmJitToInterpSingleStep:
+    str    lr,[rGLUE,#offGlue_jitResume]
+    str    r1,[rGLUE,#offGlue_jitResumePC]
+    mov    r1,#kInterpEntryInstr
+    @ enum is 4 byte in aapcs-EABI
+    str    r1, [rGLUE, #offGlue_entryPoint]
+    mov    rPC,r0
+    EXPORT_PC()
+    adrl   rIBASE, dvmAsmInstructionStart
+    mov    r2,#kJitSingleStep     @ Ask for single step and then revert
+    str    r2,[rGLUE,#offGlue_jitState]
+    mov    r1,#1                  @ set changeInterp to bail to debug interp
+    b      common_gotoBail
+
+
+/*
+ * Return from the translation cache and immediately request
+ * a translation for the exit target.  Commonly used following
+ * invokes.
+ */
+    .global dvmJitToTraceSelect
+dvmJitToTraceSelect:
+    ldr    rPC,[r14, #-1]           @ get our target PC
+    add    rINST,r14,#-5            @ save start of chain branch
+    mov    r0,rPC
+    bl     dvmJitGetCodeAddr        @ Is there a translation?
+    cmp    r0,#0
+    beq    2f
+    mov    r1,rINST
+    bl     dvmJitChain              @ r0<- dvmJitChain(codeAddr,chainAddr)
+    ldr    rINST, .LdvmCompilerTemplateStart @ rINST is rCBASE in compiled code
+    bx     r0                       @ continue native execution
+
+/* No translation, so request one if profiling isn't disabled*/
+2:
+    GET_JIT_PROF_TABLE(r0)
+    FETCH_INST()
+    cmp    r0, #0
+    bne    common_selectTrace
+    GET_INST_OPCODE(ip)
+    GOTO_OPCODE(ip)
+
+/*
+ * Return from the translation cache to the interpreter.
+ * The return was done with a BLX from thumb mode, and
+ * the following 32-bit word contains the target rPC value.
+ * Note that lr (r14) will have its low-order bit set to denote
+ * its thumb-mode origin.
+ *
+ * We'll need to stash our lr origin away, recover the new
+ * target and then check to see if there is a translation available
+ * for our new target.  If so, we do a translation chain and
+ * go back to native execution.  Otherwise, it's back to the
+ * interpreter (after treating this entry as a potential
+ * trace start).
+ */
+    .global dvmJitToInterpNormal
+dvmJitToInterpNormal:
+    ldr    rPC,[r14, #-1]           @ get our target PC
+    add    rINST,r14,#-5            @ save start of chain branch
+#ifdef EXIT_STATS
+    bl     dvmBumpNormal
+#endif
+    mov    r0,rPC
+    bl     dvmJitGetCodeAddr        @ Is there a translation?
+    cmp    r0,#0
+    beq    1f                       @ go if not, otherwise do chain
+    mov    r1,rINST
+    bl     dvmJitChain              @ r0<- dvmJitChain(codeAddr,chainAddr)
+    ldr    rINST, .LdvmCompilerTemplateStart @ rINST is rCBASE in compiled code
+    bx     r0                       @ continue native execution
+
+/*
+ * Return from the translation cache to the interpreter to do method invocation.
+ * Check if translation exists for the callee, but don't chain to it.
+ */
+    .global dvmJitToInterpNoChain
+dvmJitToInterpNoChain:
+#ifdef EXIT_STATS
+    bl     dvmBumpNoChain
+#endif
+    mov    r0,rPC
+    bl     dvmJitGetCodeAddr        @ Is there a translation?
+    cmp    r0,#0
+    bxne   r0                       @ continue native execution if so
+
+/*
+ * No translation, restore interpreter regs and start interpreting.
+ * rGLUE & rFP were preserved in the translated code, and rPC has
+ * already been restored by the time we get here.  We'll need to set
+ * up rIBASE & rINST, and load the address of the JitTable into r0.
+ */
+1:
+    EXPORT_PC()
+    adrl   rIBASE, dvmAsmInstructionStart
+    FETCH_INST()
+    GET_JIT_PROF_TABLE(r0)
+    @ NOTE: intended fallthrough
+/*
+ * Common code to update potential trace start counter, and initiate
+ * a trace-build if appropriate.  On entry, rPC should point to the
+ * next instruction to execute, and rINST should be already loaded with
+ * the next opcode word, and r0 holds a pointer to the jit profile
+ * table (pJitProfTable).
+ */
+common_testUpdateProfile:
+    cmp     r0,#0
+    GET_INST_OPCODE(ip)
+    GOTO_OPCODE_IFEQ(ip)       @ if not profiling, fallthrough otherwise */
+
+common_updateProfile:
+    eor     r3,rPC,rPC,lsr #12 @ cheap, but fast hash function
+    lsl     r3,r3,#23          @ shift out excess 511
+    ldrb    r1,[r0,r3,lsr #23] @ get counter
+    GET_INST_OPCODE(ip)
+    subs    r1,r1,#1           @ decrement counter
+    strb    r1,[r0,r3,lsr #23] @ and store it
+    GOTO_OPCODE_IFNE(ip)       @ if not threshold, fallthrough otherwise */
+
+/*
+ * Here, we switch to the debug interpreter to request
+ * trace selection.  First, though, check to see if there
+ * is already a native translation in place (and, if so,
+ * jump to it now).
+ */
+    mov     r1,#255
+    strb    r1,[r0,r3,lsr #23] @ reset counter
+    EXPORT_PC()
+    mov     r0,rPC
+    bl      dvmJitGetCodeAddr           @ r0<- dvmJitGetCodeAddr(rPC)
+    cmp     r0,#0
+    ldrne   rINST, .LdvmCompilerTemplateStart @ rINST is rCBASE in compiled code
+    beq     common_selectTrace
+    bxne    r0                          @ jump to the translation
+common_selectTrace:
+    mov     r2,#kJitTSelectRequest      @ ask for trace selection
+    str     r2,[rGLUE,#offGlue_jitState]
+    mov     r1,#1                       @ set changeInterp
+    b       common_gotoBail
+
+.LdvmCompilerTemplateStart:
+    .word   dvmCompilerTemplateStart
+
+#endif
+
 /*
  * Common code when a backward branch is taken.
  *
@@ -16,9 +193,18 @@
 common_backwardBranch:
     mov     r0, #kInterpEntryInstr
     bl      common_periodicChecks
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    cmp     r0,#0
+    bne     common_updateProfile
+    GET_INST_OPCODE(ip)
+    GOTO_OPCODE(ip)
+#else
     FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
 
 /*
@@ -192,6 +378,10 @@ common_invokeMethodNoRange:
 #endif
     str     rFP, [r10, #offStackSaveArea_prevFrame]
     str     rPC, [r10, #offStackSaveArea_savedPc]
+#if defined(WITH_JIT)
+    mov     r9, #0
+    str     r9, [r10, #offStackSaveArea_returnAddr]
+#endif
     str     r0, [r10, #offStackSaveArea_method]
     tst     r3, #ACC_NATIVE
     bne     .LinvokeNative
@@ -220,11 +410,22 @@ common_invokeMethodNoRange:
     @ r0=methodToCall, r1=newFp, r2=self, r3=newMethodClass, r9=newINST
     str     r0, [rGLUE, #offGlue_method]    @ glue->method = methodToCall
     str     r3, [rGLUE, #offGlue_methodClassDex] @ glue->methodClassDex = ...
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
     mov     rFP, r1                         @ fp = newFp
     GET_PREFETCHED_OPCODE(ip, r9)           @ extract prefetched opcode from r9
     mov     rINST, r9                       @ publish new rINST
     str     r1, [r2, #offThread_curFrame]   @ self->curFrame = newFp
+    cmp     r0,#0
+    bne     common_updateProfile
     GOTO_OPCODE(ip)                         @ jump to next instruction
+#else
+    mov     rFP, r1                         @ fp = newFp
+    GET_PREFETCHED_OPCODE(ip, r9)           @ extract prefetched opcode from r9
+    mov     rINST, r9                       @ publish new rINST
+    str     r1, [r2, #offThread_curFrame]   @ self->curFrame = newFp
+    GOTO_OPCODE(ip)                         @ jump to next instruction
+#endif
 
 .LinvokeNative:
     @ Prep for the native call
@@ -330,10 +531,26 @@ common_returnFromMethod:
     str     r2, [rGLUE, #offGlue_method]@ glue->method = newSave->method
     ldr     r1, [r10, #offClassObject_pDvmDex]   @ r1<- method->clazz->pDvmDex
     str     rFP, [r3, #offThread_curFrame]  @ self->curFrame = fp
+#if defined(WITH_JIT)
+    ldr     r3, [r0, #offStackSaveArea_returnAddr] @ r3 = saveArea->returnAddr
+    GET_JIT_PROF_TABLE(r0)
+    mov     rPC, r9                     @ publish new rPC
+    str     r1, [rGLUE, #offGlue_methodClassDex]
+    cmp     r3, #0                      @ caller is compiled code
+    bne     1f
+    GET_INST_OPCODE(ip)                 @ extract opcode from rINST
+    cmp     r0,#0
+    bne     common_updateProfile
+    GOTO_OPCODE(ip)                     @ jump to next instruction
+1:
+    ldr     rINST, .LdvmCompilerTemplateStart @ rINST is rCBASE in compiled code
+    blx     r3
+#else
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     mov     rPC, r9                     @ publish new rPC
     str     r1, [rGLUE, #offGlue_methodClassDex]
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
     /*
      * Return handling, calls through "glue code".
@@ -356,12 +573,19 @@ common_returnFromMethod:
  *
  * This does not return.
  */
+     .global dvmMterpCommonExceptionThrown
+dvmMterpCommonExceptionThrown:
 common_exceptionThrown:
 .LexceptionNew:
     mov     r0, #kInterpEntryThrow
     mov     r9, #0
     bl      common_periodicChecks
 
+#if defined(WITH_JIT)
+    mov     r2,#kJitTSelectAbort        @ abandon trace selection in progress
+    str     r2,[rGLUE,#offGlue_jitState]
+#endif
+
     ldr     r10, [rGLUE, #offGlue_self] @ r10<- glue->self
     ldr     r9, [r10, #offThread_exception] @ r9<- self->exception
     mov     r1, r10                     @ r1<- self
diff --git a/vm/mterp/armv5te/header.S b/vm/mterp/armv5te/header.S
index 586418fca..c256f4017 100644
--- a/vm/mterp/armv5te/header.S
+++ b/vm/mterp/armv5te/header.S
@@ -168,6 +168,8 @@ unspecified registers or condition codes.
  * interpreter, we don't have to worry about pre-ARMv5 THUMB interwork.
  */
 #define GOTO_OPCODE(_reg)       add     pc, rIBASE, _reg, lsl #${handler_size_bits}
+#define GOTO_OPCODE_IFEQ(_reg)  addeq   pc, rIBASE, _reg, lsl #${handler_size_bits}
+#define GOTO_OPCODE_IFNE(_reg)  addne   pc, rIBASE, _reg, lsl #${handler_size_bits}
 
 /*
  * Get/set the 32-bit value from a Dalvik register.
@@ -175,6 +177,14 @@ unspecified registers or condition codes.
 #define GET_VREG(_reg, _vreg)   ldr     _reg, [rFP, _vreg, lsl #2]
 #define SET_VREG(_reg, _vreg)   str     _reg, [rFP, _vreg, lsl #2]
 
+#if defined(WITH_JIT)
+/*
+ * Null definition for overhead measuring purposes
+ */
+#define GET_JIT_TABLE(_reg)         ldr     _reg,[rGLUE,#offGlue_pJitTable]
+#define GET_JIT_PROF_TABLE(_reg)    ldr     _reg,[rGLUE,#offGlue_pJitProfTable]
+#endif
+
 /*
  * Convert a virtual register index into an address.
  */
diff --git a/vm/mterp/armv5te/zcmp.S b/vm/mterp/armv5te/zcmp.S
index 7942632bf..861ca5b0b 100644
--- a/vm/mterp/armv5te/zcmp.S
+++ b/vm/mterp/armv5te/zcmp.S
@@ -16,7 +16,17 @@
     FETCH_S(r9, 1)                      @ r9<- branch offset, in code units
     movs    r9, r9, asl #1              @ convert to bytes, check sign
     bmi     common_backwardBranch       @ backward branch, do periodic checks
-1:  FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+1:
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    cmp     r0,#0
+    bne     common_updateProfile
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#else
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    GET_INST_OPCODE(ip)                 @ extract opcode from rINST
+    GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
diff --git a/vm/mterp/c/gotoTargets.c b/vm/mterp/c/gotoTargets.c
index f52e3f020..37eaa2051 100644
--- a/vm/mterp/c/gotoTargets.c
+++ b/vm/mterp/c/gotoTargets.c
@@ -836,6 +836,9 @@ GOTO_TARGET(invokeMethod, bool methodCallRange, const Method* _methodToCall,
 #endif
         newSaveArea->prevFrame = fp;
         newSaveArea->savedPc = pc;
+#if defined(WITH_JIT)
+        newSaveArea->returnAddr = 0;
+#endif
         newSaveArea->method = methodToCall;
 
         if (!dvmIsNativeMethod(methodToCall)) {
@@ -929,4 +932,3 @@ GOTO_TARGET(invokeMethod, bool methodCallRange, const Method* _methodToCall,
     }
     assert(false);      // should not get here
 GOTO_TARGET_END
-
diff --git a/vm/mterp/c/header.c b/vm/mterp/c/header.c
index d2fca9cb4..174c226a7 100644
--- a/vm/mterp/c/header.c
+++ b/vm/mterp/c/header.c
@@ -19,6 +19,7 @@
 #include "interp/InterpDefs.h"
 #include "mterp/Mterp.h"
 #include <math.h>                   // needed for fmod, fmodf
+#include "mterp/common/FindInterface.h"
 
 /*
  * Configuration defines.  These affect the C implementations, i.e. the
@@ -327,29 +328,21 @@ static inline void putDoubleToArray(u4* ptr, int idx, double dval)
  * If we're building without debug and profiling support, we never switch.
  */
 #if defined(WITH_PROFILER) || defined(WITH_DEBUGGER)
+#if defined(WITH_JIT)
+# define NEED_INTERP_SWITCH(_current) (                                     \
+    (_current == INTERP_STD) ?                                              \
+        dvmJitDebuggerOrProfilerActive(interpState->jitState) :             \
+        !dvmJitDebuggerOrProfilerActive(interpState->jitState) )
+#else
 # define NEED_INTERP_SWITCH(_current) (                                     \
     (_current == INTERP_STD) ?                                              \
         dvmDebuggerOrProfilerActive() : !dvmDebuggerOrProfilerActive() )
+#endif
 #else
 # define NEED_INTERP_SWITCH(_current) (false)
 #endif
 
 /*
- * Look up an interface on a class using the cache.
- */
-INLINE Method* dvmFindInterfaceMethodInCache(ClassObject* thisClass,
-    u4 methodIdx, const Method* method, DvmDex* methodClassDex)
-{
-#define ATOMIC_CACHE_CALC \
-    dvmInterpFindInterfaceMethod(thisClass, methodIdx, method, methodClassDex)
-
-    return (Method*) ATOMIC_CACHE_LOOKUP(methodClassDex->pInterfaceCache,
-                DEX_INTERFACE_CACHE_SIZE, thisClass, methodIdx);
-
-#undef ATOMIC_CACHE_CALC
-}
-
-/*
  * Check to see if "obj" is NULL.  If so, throw an exception.  Assumes the
  * pc has already been exported to the stack.
  *
@@ -412,4 +405,3 @@ static inline bool checkForNullExportPC(Object* obj, u4* fp, const u2* pc)
 #endif
     return true;
 }
-
diff --git a/vm/mterp/common/FindInterface.h b/vm/mterp/common/FindInterface.h
new file mode 100644
index 000000000..021ed6591
--- /dev/null
+++ b/vm/mterp/common/FindInterface.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* common includes */
+#include "Dalvik.h"
+
+/*
+ * Look up an interface on a class using the cache.
+ *
+ * This function used to be defined in mterp/c/header.c, but it is now used by
+ * the JIT compiler as well so it is separated into its own header file to
+ * avoid potential out-of-sync changes in the future.
+ */
+INLINE Method* dvmFindInterfaceMethodInCache(ClassObject* thisClass,
+    u4 methodIdx, const Method* method, DvmDex* methodClassDex)
+{
+#define ATOMIC_CACHE_CALC \
+    dvmInterpFindInterfaceMethod(thisClass, methodIdx, method, methodClassDex)
+
+    return (Method*) ATOMIC_CACHE_LOOKUP(methodClassDex->pInterfaceCache,
+                DEX_INTERFACE_CACHE_SIZE, thisClass, methodIdx);
+
+#undef ATOMIC_CACHE_CALC
+}
diff --git a/vm/mterp/common/asm-constants.h b/vm/mterp/common/asm-constants.h
index 73292a9f2..b4bb1c244 100644
--- a/vm/mterp/common/asm-constants.h
+++ b/vm/mterp/common/asm-constants.h
@@ -101,14 +101,42 @@ MTERP_OFFSET(offGlue_pSelfSuspendCount, MterpGlue, pSelfSuspendCount, 36)
 MTERP_OFFSET(offGlue_pDebuggerActive,   MterpGlue, pDebuggerActive, 40)
 MTERP_OFFSET(offGlue_pActiveProfilers,  MterpGlue, pActiveProfilers, 44)
 MTERP_OFFSET(offGlue_entryPoint,        MterpGlue, entryPoint, 48)
+#if defined(WITH_JIT)
+MTERP_OFFSET(offGlue_pJitTable,         MterpGlue, pJitTable, 56)
+MTERP_OFFSET(offGlue_pJitProfTable,     MterpGlue, pJitProfTable, 60)
+MTERP_OFFSET(offGlue_jitState,          MterpGlue, jitState, 64)
+MTERP_OFFSET(offGlue_jitResume,         MterpGlue, jitResume, 68)
+MTERP_OFFSET(offGlue_jitResumePC,       MterpGlue, jitResumePC, 72)
+#endif
 #elif defined(WITH_DEBUGGER)
 MTERP_OFFSET(offGlue_pDebuggerActive,   MterpGlue, pDebuggerActive, 40)
 MTERP_OFFSET(offGlue_entryPoint,        MterpGlue, entryPoint, 44)
+#if defined(WITH_JIT)
+MTERP_OFFSET(offGlue_pJitTable,         MterpGlue, pJitTable, 52)
+MTERP_OFFSET(offGlue_pJitProfTable,     MterpGlue, pJitProfTable, 56)
+MTERP_OFFSET(offGlue_jitState,          MterpGlue, jitState, 60)
+MTERP_OFFSET(offGlue_jitResume,         MterpGlue, jitResume, 64)
+MTERP_OFFSET(offGlue_jitResumePC,       MterpGlue, jitResumePC, 68)
+#endif
 #elif defined(WITH_PROFILER)
 MTERP_OFFSET(offGlue_pActiveProfilers,  MterpGlue, pActiveProfilers, 40)
 MTERP_OFFSET(offGlue_entryPoint,        MterpGlue, entryPoint, 44)
+#if defined(WITH_JIT)
+MTERP_OFFSET(offGlue_pJitTable,         MterpGlue, pJitTable, 52)
+MTERP_OFFSET(offGlue_pJitProfTable,     MterpGlue, pJitProfTable, 56)
+MTERP_OFFSET(offGlue_jitState,          MterpGlue, jitState, 60)
+MTERP_OFFSET(offGlue_jitResume,         MterpGlue, jitResume, 64)
+MTERP_OFFSET(offGlue_jitResumePC,       MterpGlue, jitResumePC, 68)
+#endif
 #else
 MTERP_OFFSET(offGlue_entryPoint,        MterpGlue, entryPoint, 40)
+#if defined(WITH_JIT)
+MTERP_OFFSET(offGlue_pJitTable,         MterpGlue, pJitTable, 48)
+MTERP_OFFSET(offGlue_pJitProfTable,     MterpGlue, pJitProfTable, 52)
+MTERP_OFFSET(offGlue_jitState,          MterpGlue, jitState, 56)
+MTERP_OFFSET(offGlue_jitResume,         MterpGlue, jitResume, 60)
+MTERP_OFFSET(offGlue_jitResumePC,       MterpGlue, jitResumePC, 64)
+#endif
 #endif
 /* make sure all JValue union members are stored at the same offset */
 MTERP_OFFSET(offGlue_retval_z,          MterpGlue, retval.z, 8)
@@ -131,14 +159,16 @@ MTERP_OFFSET(offStackSaveArea_savedPc,  StackSaveArea, savedPc, 8)
 MTERP_OFFSET(offStackSaveArea_method,   StackSaveArea, method, 12)
 MTERP_OFFSET(offStackSaveArea_currentPc, StackSaveArea, xtra.currentPc, 16)
 MTERP_OFFSET(offStackSaveArea_localRefTop, StackSaveArea, xtra.localRefTop, 16)
-MTERP_SIZEOF(sizeofStackSaveArea,       StackSaveArea, 20)
+MTERP_OFFSET(offStackSaveArea_returnAddr, StackSaveArea, returnAddr, 20)
+MTERP_SIZEOF(sizeofStackSaveArea,       StackSaveArea, 24)
 #else
 MTERP_OFFSET(offStackSaveArea_prevFrame, StackSaveArea, prevFrame, 0)
 MTERP_OFFSET(offStackSaveArea_savedPc,  StackSaveArea, savedPc, 4)
 MTERP_OFFSET(offStackSaveArea_method,   StackSaveArea, method, 8)
 MTERP_OFFSET(offStackSaveArea_currentPc, StackSaveArea, xtra.currentPc, 12)
 MTERP_OFFSET(offStackSaveArea_localRefTop, StackSaveArea, xtra.localRefTop, 12)
-MTERP_SIZEOF(sizeofStackSaveArea,       StackSaveArea, 16)
+MTERP_OFFSET(offStackSaveArea_returnAddr, StackSaveArea, returnAddr, 16)
+MTERP_SIZEOF(sizeofStackSaveArea,       StackSaveArea, 20)
 #endif
 
 /* InstField fields */
@@ -200,6 +230,20 @@ MTERP_SIZEOF(sizeofClassStatus,         InterpEntry, MTERP_SMALL_ENUM)
 MTERP_CONSTANT(kInterpEntryInstr,   0)
 MTERP_CONSTANT(kInterpEntryReturn,  1)
 MTERP_CONSTANT(kInterpEntryThrow,   2)
+#if defined(WITH_JIT)
+MTERP_CONSTANT(kInterpEntryResume,  3)
+#endif
+
+#if defined(WITH_JIT)
+MTERP_CONSTANT(kJitOff,             0)
+MTERP_CONSTANT(kJitNormal,          1)
+MTERP_CONSTANT(kJitTSelectRequest,  2)
+MTERP_CONSTANT(kJitTSelect,         3)
+MTERP_CONSTANT(kJitTSelectAbort,    4)
+MTERP_CONSTANT(kJitTSelectEnd,      5)
+MTERP_CONSTANT(kJitSingleStep,      6)
+MTERP_CONSTANT(kJitSingleStepEnd,   7)
+#endif
 
 /* ClassStatus enumeration */
 MTERP_SIZEOF(sizeofClassStatus,         ClassStatus, MTERP_SMALL_ENUM)
diff --git a/vm/mterp/out/InterpAsm-armv4t.S b/vm/mterp/out/InterpAsm-armv4t.S
index a348e31e4..61fc16bfc 100644
--- a/vm/mterp/out/InterpAsm-armv4t.S
+++ b/vm/mterp/out/InterpAsm-armv4t.S
@@ -175,6 +175,8 @@ unspecified registers or condition codes.
  * interpreter, we don't have to worry about pre-ARMv5 THUMB interwork.
  */
 #define GOTO_OPCODE(_reg)       add     pc, rIBASE, _reg, lsl #6
+#define GOTO_OPCODE_IFEQ(_reg)  addeq   pc, rIBASE, _reg, lsl #6
+#define GOTO_OPCODE_IFNE(_reg)  addne   pc, rIBASE, _reg, lsl #6
 
 /*
  * Get/set the 32-bit value from a Dalvik register.
@@ -182,6 +184,14 @@ unspecified registers or condition codes.
 #define GET_VREG(_reg, _vreg)   ldr     _reg, [rFP, _vreg, lsl #2]
 #define SET_VREG(_reg, _vreg)   str     _reg, [rFP, _vreg, lsl #2]
 
+#if defined(WITH_JIT)
+/*
+ * Null definition for overhead measuring purposes
+ */
+#define GET_JIT_TABLE(_reg)         ldr     _reg,[rGLUE,#offGlue_pJitTable]
+#define GET_JIT_PROF_TABLE(_reg)    ldr     _reg,[rGLUE,#offGlue_pJitProfTable]
+#endif
+
 /*
  * Convert a virtual register index into an address.
  */
@@ -302,10 +312,21 @@ dvmMterpStdRun:
     cmp     r1, #kInterpEntryInstr      @ usual case?
     bne     .Lnot_instr                 @ no, handle it
 
+#if defined(WITH_JIT)
+.Lno_singleStep:
+    /* Entry is always a possible trace start */
+    GET_JIT_PROF_TABLE(r0)
+    FETCH_INST()
+    cmp    r0,#0
+    bne    common_updateProfile
+    GET_INST_OPCODE(ip)
+    GOTO_OPCODE(ip)
+#else
     /* start executing the instruction at rPC */
     FETCH_INST()                        @ load rINST from rPC
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
 .Lnot_instr:
     cmp     r1, #kInterpEntryReturn     @ were we returning from a method?
@@ -315,6 +336,22 @@ dvmMterpStdRun:
     cmp     r1, #kInterpEntryThrow      @ were we throwing an exception?
     beq     common_exceptionThrown
 
+#if defined(WITH_JIT)
+.Lnot_throw:
+    ldr     r0,[rGLUE, #offGlue_jitResume]
+    ldr     r2,[rGLUE, #offGlue_jitResumePC]
+    cmp     r1, #kInterpEntryResume     @ resuming after Jit single-step?
+    bne     .Lbad_arg
+    cmp     rPC,r2
+    bne     .Lno_singleStep             @ must have branched, don't resume
+    mov     r1, #kInterpEntryInstr
+    strb    r1, [rGLUE, #offGlue_entryPoint]
+    ldr     rINST, .LdvmCompilerTemplate
+    bx      r0                          @ re-enter the translation
+.LdvmCompilerTemplate:
+    .word   dvmCompilerTemplateStart
+#endif
+
 .Lbad_arg:
     ldr     r0, strBadEntryPoint
     @ r1 holds value of entryPoint
@@ -1111,10 +1148,18 @@ dalvik_inst:
     movs    r9, r0, asr #24             @ r9<- ssssssAA (sign-extended)
     mov     r9, r9, lsl #1              @ r9<- byte offset
     bmi     common_backwardBranch       @ backward branch, do periodic checks
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
     FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    cmp     r0,#0
+    bne     common_updateProfile
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
-
+#else
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    GET_INST_OPCODE(ip)                 @ extract opcode from rINST
+    GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
 /* ------------------------------ */
     .balign 64
@@ -1130,9 +1175,18 @@ dalvik_inst:
     FETCH_S(r0, 1)                      @ r0<- ssssAAAA (sign-extended)
     movs    r9, r0, asl #1              @ r9<- byte offset, check sign
     bmi     common_backwardBranch       @ backward branch, do periodic checks
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    cmp     r0,#0
+    bne     common_updateProfile
+    GET_INST_OPCODE(ip)                 @ extract opcode from rINST
+    GOTO_OPCODE(ip)                     @ jump to next instruction
+#else
     FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
 
 /* ------------------------------ */
@@ -1157,10 +1211,18 @@ dalvik_inst:
     orrs    r0, r0, r1, lsl #16         @ r0<- AAAAaaaa, check sign
     mov     r9, r0, asl #1              @ r9<- byte offset
     ble     common_backwardBranch       @ backward branch, do periodic checks
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
     FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    cmp     r0,#0
+    bne     common_updateProfile
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
-
+#else
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    GET_INST_OPCODE(ip)                 @ extract opcode from rINST
+    GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
 /* ------------------------------ */
     .balign 64
@@ -1186,9 +1248,18 @@ dalvik_inst:
     movs    r9, r0, asl #1              @ r9<- branch byte offset, check sign
     bmi     common_backwardBranch       @ backward branch, do periodic checks
     beq     common_backwardBranch       @ (want to use BLE but V is unknown)
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    cmp     r0,#0
+    bne     common_updateProfile
+    GET_INST_OPCODE(ip)                 @ extract opcode from rINST
+    GOTO_OPCODE(ip)                     @ jump to next instruction
+#else
     FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
 
 /* ------------------------------ */
@@ -1216,9 +1287,18 @@ dalvik_inst:
     movs    r9, r0, asl #1              @ r9<- branch byte offset, check sign
     bmi     common_backwardBranch       @ backward branch, do periodic checks
     beq     common_backwardBranch       @ (want to use BLE but V is unknown)
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
     FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    cmp     r0,#0
+    bne     common_updateProfile
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#else
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    GET_INST_OPCODE(ip)                 @ extract opcode from rINST
+    GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
 
 
@@ -1465,9 +1545,16 @@ dalvik_inst:
     FETCH_S(r9, 1)                      @ r9<- branch offset, in code units
     movs    r9, r9, asl #1              @ convert to bytes, check sign
     bmi     common_backwardBranch       @ yes, do periodic checks
-1:  FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+1:
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    b        common_testUpdateProfile
+#else
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
 
 
@@ -1495,9 +1582,16 @@ dalvik_inst:
     FETCH_S(r9, 1)                      @ r9<- branch offset, in code units
     movs    r9, r9, asl #1              @ convert to bytes, check sign
     bmi     common_backwardBranch       @ yes, do periodic checks
-1:  FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+1:
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    b        common_testUpdateProfile
+#else
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
 
 
@@ -1525,9 +1619,16 @@ dalvik_inst:
     FETCH_S(r9, 1)                      @ r9<- branch offset, in code units
     movs    r9, r9, asl #1              @ convert to bytes, check sign
     bmi     common_backwardBranch       @ yes, do periodic checks
-1:  FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+1:
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    b        common_testUpdateProfile
+#else
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
 
 
@@ -1555,9 +1656,16 @@ dalvik_inst:
     FETCH_S(r9, 1)                      @ r9<- branch offset, in code units
     movs    r9, r9, asl #1              @ convert to bytes, check sign
     bmi     common_backwardBranch       @ yes, do periodic checks
-1:  FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+1:
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    b        common_testUpdateProfile
+#else
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
 
 
@@ -1585,9 +1693,16 @@ dalvik_inst:
     FETCH_S(r9, 1)                      @ r9<- branch offset, in code units
     movs    r9, r9, asl #1              @ convert to bytes, check sign
     bmi     common_backwardBranch       @ yes, do periodic checks
-1:  FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+1:
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    b        common_testUpdateProfile
+#else
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
 
 
@@ -1615,9 +1730,16 @@ dalvik_inst:
     FETCH_S(r9, 1)                      @ r9<- branch offset, in code units
     movs    r9, r9, asl #1              @ convert to bytes, check sign
     bmi     common_backwardBranch       @ yes, do periodic checks
-1:  FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+1:
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    b        common_testUpdateProfile
+#else
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
 
 
@@ -1642,9 +1764,19 @@ dalvik_inst:
     FETCH_S(r9, 1)                      @ r9<- branch offset, in code units
     movs    r9, r9, asl #1              @ convert to bytes, check sign
     bmi     common_backwardBranch       @ backward branch, do periodic checks
-1:  FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+1:
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    cmp     r0,#0
+    bne     common_updateProfile
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#else
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    GET_INST_OPCODE(ip)                 @ extract opcode from rINST
+    GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
 
 
@@ -1669,9 +1801,19 @@ dalvik_inst:
     FETCH_S(r9, 1)                      @ r9<- branch offset, in code units
     movs    r9, r9, asl #1              @ convert to bytes, check sign
     bmi     common_backwardBranch       @ backward branch, do periodic checks
-1:  FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+1:
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    cmp     r0,#0
+    bne     common_updateProfile
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#else
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    GET_INST_OPCODE(ip)                 @ extract opcode from rINST
+    GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
 
 
@@ -1696,9 +1838,19 @@ dalvik_inst:
     FETCH_S(r9, 1)                      @ r9<- branch offset, in code units
     movs    r9, r9, asl #1              @ convert to bytes, check sign
     bmi     common_backwardBranch       @ backward branch, do periodic checks
-1:  FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+1:
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    cmp     r0,#0
+    bne     common_updateProfile
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#else
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    GET_INST_OPCODE(ip)                 @ extract opcode from rINST
+    GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
 
 
@@ -1723,9 +1875,19 @@ dalvik_inst:
     FETCH_S(r9, 1)                      @ r9<- branch offset, in code units
     movs    r9, r9, asl #1              @ convert to bytes, check sign
     bmi     common_backwardBranch       @ backward branch, do periodic checks
-1:  FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+1:
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    cmp     r0,#0
+    bne     common_updateProfile
+    GET_INST_OPCODE(ip)                 @ extract opcode from rINST
+    GOTO_OPCODE(ip)                     @ jump to next instruction
+#else
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
 
 
@@ -1750,9 +1912,19 @@ dalvik_inst:
     FETCH_S(r9, 1)                      @ r9<- branch offset, in code units
     movs    r9, r9, asl #1              @ convert to bytes, check sign
     bmi     common_backwardBranch       @ backward branch, do periodic checks
-1:  FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+1:
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    cmp     r0,#0
+    bne     common_updateProfile
+    GET_INST_OPCODE(ip)                 @ extract opcode from rINST
+    GOTO_OPCODE(ip)                     @ jump to next instruction
+#else
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
 
 
@@ -1777,9 +1949,19 @@ dalvik_inst:
     FETCH_S(r9, 1)                      @ r9<- branch offset, in code units
     movs    r9, r9, asl #1              @ convert to bytes, check sign
     bmi     common_backwardBranch       @ backward branch, do periodic checks
-1:  FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+1:
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    cmp     r0,#0
+    bne     common_updateProfile
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#else
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    GET_INST_OPCODE(ip)                 @ extract opcode from rINST
+    GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
 
 
@@ -9298,15 +9480,192 @@ d2l_doconv:
 dvmAsmSisterEnd:
 
 /* File: armv5te/footer.S */
+
 /*
  * ===========================================================================
  *  Common subroutines and data
  * ===========================================================================
  */
 
+
+
     .text
     .align  2
 
+#if defined(WITH_JIT)
+/*
+ * Return from the translation cache to the interpreter when the compiler is
+ * having issues translating/executing a Dalvik instruction. We have to skip
+ * the code cache lookup otherwise it is possible to indefinitely bouce
+ * between the interpreter and the code cache if the instruction that fails
+ * to be compiled happens to be at a trace start.
+ */
+    .global dvmJitToInterpPunt
+dvmJitToInterpPunt:
+    mov    rPC, r0
+#ifdef EXIT_STATS
+    mov    r0,lr
+    bl     dvmBumpPunt;
+#endif
+    EXPORT_PC()
+    adrl   rIBASE, dvmAsmInstructionStart
+    FETCH_INST()
+    GET_INST_OPCODE(ip)
+    GOTO_OPCODE(ip)
+
+/*
+ * Return to the interpreter to handle a single instruction.
+ * On entry:
+ *    r0 <= PC
+ *    r1 <= PC of resume instruction
+ *    lr <= resume point in translation
+ */
+    .global dvmJitToInterpSingleStep
+dvmJitToInterpSingleStep:
+    str    lr,[rGLUE,#offGlue_jitResume]
+    str    r1,[rGLUE,#offGlue_jitResumePC]
+    mov    r1,#kInterpEntryInstr
+    @ enum is 4 byte in aapcs-EABI
+    str    r1, [rGLUE, #offGlue_entryPoint]
+    mov    rPC,r0
+    EXPORT_PC()
+    adrl   rIBASE, dvmAsmInstructionStart
+    mov    r2,#kJitSingleStep     @ Ask for single step and then revert
+    str    r2,[rGLUE,#offGlue_jitState]
+    mov    r1,#1                  @ set changeInterp to bail to debug interp
+    b      common_gotoBail
+
+
+/*
+ * Return from the translation cache and immediately request
+ * a translation for the exit target.  Commonly used following
+ * invokes.
+ */
+    .global dvmJitToTraceSelect
+dvmJitToTraceSelect:
+    ldr    rPC,[r14, #-1]           @ get our target PC
+    add    rINST,r14,#-5            @ save start of chain branch
+    mov    r0,rPC
+    bl     dvmJitGetCodeAddr        @ Is there a translation?
+    cmp    r0,#0
+    beq    2f
+    mov    r1,rINST
+    bl     dvmJitChain              @ r0<- dvmJitChain(codeAddr,chainAddr)
+    ldr    rINST, .LdvmCompilerTemplateStart @ rINST is rCBASE in compiled code
+    bx     r0                       @ continue native execution
+
+/* No translation, so request one if profiling isn't disabled*/
+2:
+    GET_JIT_PROF_TABLE(r0)
+    FETCH_INST()
+    cmp    r0, #0
+    bne    common_selectTrace
+    GET_INST_OPCODE(ip)
+    GOTO_OPCODE(ip)
+
+/*
+ * Return from the translation cache to the interpreter.
+ * The return was done with a BLX from thumb mode, and
+ * the following 32-bit word contains the target rPC value.
+ * Note that lr (r14) will have its low-order bit set to denote
+ * its thumb-mode origin.
+ *
+ * We'll need to stash our lr origin away, recover the new
+ * target and then check to see if there is a translation available
+ * for our new target.  If so, we do a translation chain and
+ * go back to native execution.  Otherwise, it's back to the
+ * interpreter (after treating this entry as a potential
+ * trace start).
+ */
+    .global dvmJitToInterpNormal
+dvmJitToInterpNormal:
+    ldr    rPC,[r14, #-1]           @ get our target PC
+    add    rINST,r14,#-5            @ save start of chain branch
+#ifdef EXIT_STATS
+    bl     dvmBumpNormal
+#endif
+    mov    r0,rPC
+    bl     dvmJitGetCodeAddr        @ Is there a translation?
+    cmp    r0,#0
+    beq    1f                       @ go if not, otherwise do chain
+    mov    r1,rINST
+    bl     dvmJitChain              @ r0<- dvmJitChain(codeAddr,chainAddr)
+    ldr    rINST, .LdvmCompilerTemplateStart @ rINST is rCBASE in compiled code
+    bx     r0                       @ continue native execution
+
+/*
+ * Return from the translation cache to the interpreter to do method invocation.
+ * Check if translation exists for the callee, but don't chain to it.
+ */
+    .global dvmJitToInterpNoChain
+dvmJitToInterpNoChain:
+#ifdef EXIT_STATS
+    bl     dvmBumpNoChain
+#endif
+    mov    r0,rPC
+    bl     dvmJitGetCodeAddr        @ Is there a translation?
+    cmp    r0,#0
+    bxne   r0                       @ continue native execution if so
+
+/*
+ * No translation, restore interpreter regs and start interpreting.
+ * rGLUE & rFP were preserved in the translated code, and rPC has
+ * already been restored by the time we get here.  We'll need to set
+ * up rIBASE & rINST, and load the address of the JitTable into r0.
+ */
+1:
+    EXPORT_PC()
+    adrl   rIBASE, dvmAsmInstructionStart
+    FETCH_INST()
+    GET_JIT_PROF_TABLE(r0)
+    @ NOTE: intended fallthrough
+/*
+ * Common code to update potential trace start counter, and initiate
+ * a trace-build if appropriate.  On entry, rPC should point to the
+ * next instruction to execute, and rINST should be already loaded with
+ * the next opcode word, and r0 holds a pointer to the jit profile
+ * table (pJitProfTable).
+ */
+common_testUpdateProfile:
+    cmp     r0,#0
+    GET_INST_OPCODE(ip)
+    GOTO_OPCODE_IFEQ(ip)       @ if not profiling, fallthrough otherwise */
+
+common_updateProfile:
+    eor     r3,rPC,rPC,lsr #12 @ cheap, but fast hash function
+    lsl     r3,r3,#23          @ shift out excess 511
+    ldrb    r1,[r0,r3,lsr #23] @ get counter
+    GET_INST_OPCODE(ip)
+    subs    r1,r1,#1           @ decrement counter
+    strb    r1,[r0,r3,lsr #23] @ and store it
+    GOTO_OPCODE_IFNE(ip)       @ if not threshold, fallthrough otherwise */
+
+/*
+ * Here, we switch to the debug interpreter to request
+ * trace selection.  First, though, check to see if there
+ * is already a native translation in place (and, if so,
+ * jump to it now).
+ */
+    mov     r1,#255
+    strb    r1,[r0,r3,lsr #23] @ reset counter
+    EXPORT_PC()
+    mov     r0,rPC
+    bl      dvmJitGetCodeAddr           @ r0<- dvmJitGetCodeAddr(rPC)
+    cmp     r0,#0
+    ldrne   rINST, .LdvmCompilerTemplateStart @ rINST is rCBASE in compiled code
+    beq     common_selectTrace
+    bxne    r0                          @ jump to the translation
+common_selectTrace:
+    mov     r2,#kJitTSelectRequest      @ ask for trace selection
+    str     r2,[rGLUE,#offGlue_jitState]
+    mov     r1,#1                       @ set changeInterp
+    b       common_gotoBail
+
+.LdvmCompilerTemplateStart:
+    .word   dvmCompilerTemplateStart
+
+#endif
+
 /*
  * Common code when a backward branch is taken.
  *
@@ -9316,9 +9675,18 @@ dvmAsmSisterEnd:
 common_backwardBranch:
     mov     r0, #kInterpEntryInstr
     bl      common_periodicChecks
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    cmp     r0,#0
+    bne     common_updateProfile
+    GET_INST_OPCODE(ip)
+    GOTO_OPCODE(ip)
+#else
     FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
 
 /*
@@ -9492,6 +9860,10 @@ common_invokeMethodNoRange:
 #endif
     str     rFP, [r10, #offStackSaveArea_prevFrame]
     str     rPC, [r10, #offStackSaveArea_savedPc]
+#if defined(WITH_JIT)
+    mov     r9, #0
+    str     r9, [r10, #offStackSaveArea_returnAddr]
+#endif
     str     r0, [r10, #offStackSaveArea_method]
     tst     r3, #ACC_NATIVE
     bne     .LinvokeNative
@@ -9520,11 +9892,22 @@ common_invokeMethodNoRange:
     @ r0=methodToCall, r1=newFp, r2=self, r3=newMethodClass, r9=newINST
     str     r0, [rGLUE, #offGlue_method]    @ glue->method = methodToCall
     str     r3, [rGLUE, #offGlue_methodClassDex] @ glue->methodClassDex = ...
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
     mov     rFP, r1                         @ fp = newFp
     GET_PREFETCHED_OPCODE(ip, r9)           @ extract prefetched opcode from r9
     mov     rINST, r9                       @ publish new rINST
     str     r1, [r2, #offThread_curFrame]   @ self->curFrame = newFp
+    cmp     r0,#0
+    bne     common_updateProfile
     GOTO_OPCODE(ip)                         @ jump to next instruction
+#else
+    mov     rFP, r1                         @ fp = newFp
+    GET_PREFETCHED_OPCODE(ip, r9)           @ extract prefetched opcode from r9
+    mov     rINST, r9                       @ publish new rINST
+    str     r1, [r2, #offThread_curFrame]   @ self->curFrame = newFp
+    GOTO_OPCODE(ip)                         @ jump to next instruction
+#endif
 
 .LinvokeNative:
     @ Prep for the native call
@@ -9630,10 +10013,26 @@ common_returnFromMethod:
     str     r2, [rGLUE, #offGlue_method]@ glue->method = newSave->method
     ldr     r1, [r10, #offClassObject_pDvmDex]   @ r1<- method->clazz->pDvmDex
     str     rFP, [r3, #offThread_curFrame]  @ self->curFrame = fp
+#if defined(WITH_JIT)
+    ldr     r3, [r0, #offStackSaveArea_returnAddr] @ r3 = saveArea->returnAddr
+    GET_JIT_PROF_TABLE(r0)
+    mov     rPC, r9                     @ publish new rPC
+    str     r1, [rGLUE, #offGlue_methodClassDex]
+    cmp     r3, #0                      @ caller is compiled code
+    bne     1f
+    GET_INST_OPCODE(ip)                 @ extract opcode from rINST
+    cmp     r0,#0
+    bne     common_updateProfile
+    GOTO_OPCODE(ip)                     @ jump to next instruction
+1:
+    ldr     rINST, .LdvmCompilerTemplateStart @ rINST is rCBASE in compiled code
+    blx     r3
+#else
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     mov     rPC, r9                     @ publish new rPC
     str     r1, [rGLUE, #offGlue_methodClassDex]
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
     /*
      * Return handling, calls through "glue code".
@@ -9656,12 +10055,19 @@ common_returnFromMethod:
  *
  * This does not return.
  */
+     .global dvmMterpCommonExceptionThrown
+dvmMterpCommonExceptionThrown:
 common_exceptionThrown:
 .LexceptionNew:
     mov     r0, #kInterpEntryThrow
     mov     r9, #0
     bl      common_periodicChecks
 
+#if defined(WITH_JIT)
+    mov     r2,#kJitTSelectAbort        @ abandon trace selection in progress
+    str     r2,[rGLUE,#offGlue_jitState]
+#endif
+
     ldr     r10, [rGLUE, #offGlue_self] @ r10<- glue->self
     ldr     r9, [r10, #offThread_exception] @ r9<- self->exception
     mov     r1, r10                     @ r1<- self
diff --git a/vm/mterp/out/InterpAsm-armv5te-vfp.S b/vm/mterp/out/InterpAsm-armv5te-vfp.S
index 77f621326..3e557003e 100644
--- a/vm/mterp/out/InterpAsm-armv5te-vfp.S
+++ b/vm/mterp/out/InterpAsm-armv5te-vfp.S
@@ -175,6 +175,8 @@ unspecified registers or condition codes.
  * interpreter, we don't have to worry about pre-ARMv5 THUMB interwork.
  */
 #define GOTO_OPCODE(_reg)       add     pc, rIBASE, _reg, lsl #6
+#define GOTO_OPCODE_IFEQ(_reg)  addeq   pc, rIBASE, _reg, lsl #6
+#define GOTO_OPCODE_IFNE(_reg)  addne   pc, rIBASE, _reg, lsl #6
 
 /*
  * Get/set the 32-bit value from a Dalvik register.
@@ -182,6 +184,14 @@ unspecified registers or condition codes.
 #define GET_VREG(_reg, _vreg)   ldr     _reg, [rFP, _vreg, lsl #2]
 #define SET_VREG(_reg, _vreg)   str     _reg, [rFP, _vreg, lsl #2]
 
+#if defined(WITH_JIT)
+/*
+ * Null definition for overhead measuring purposes
+ */
+#define GET_JIT_TABLE(_reg)         ldr     _reg,[rGLUE,#offGlue_pJitTable]
+#define GET_JIT_PROF_TABLE(_reg)    ldr     _reg,[rGLUE,#offGlue_pJitProfTable]
+#endif
+
 /*
  * Convert a virtual register index into an address.
  */
@@ -302,10 +312,21 @@ dvmMterpStdRun:
     cmp     r1, #kInterpEntryInstr      @ usual case?
     bne     .Lnot_instr                 @ no, handle it
 
+#if defined(WITH_JIT)
+.Lno_singleStep:
+    /* Entry is always a possible trace start */
+    GET_JIT_PROF_TABLE(r0)
+    FETCH_INST()
+    cmp    r0,#0
+    bne    common_updateProfile
+    GET_INST_OPCODE(ip)
+    GOTO_OPCODE(ip)
+#else
     /* start executing the instruction at rPC */
     FETCH_INST()                        @ load rINST from rPC
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
 .Lnot_instr:
     cmp     r1, #kInterpEntryReturn     @ were we returning from a method?
@@ -315,6 +336,22 @@ dvmMterpStdRun:
     cmp     r1, #kInterpEntryThrow      @ were we throwing an exception?
     beq     common_exceptionThrown
 
+#if defined(WITH_JIT)
+.Lnot_throw:
+    ldr     r0,[rGLUE, #offGlue_jitResume]
+    ldr     r2,[rGLUE, #offGlue_jitResumePC]
+    cmp     r1, #kInterpEntryResume     @ resuming after Jit single-step?
+    bne     .Lbad_arg
+    cmp     rPC,r2
+    bne     .Lno_singleStep             @ must have branched, don't resume
+    mov     r1, #kInterpEntryInstr
+    strb    r1, [rGLUE, #offGlue_entryPoint]
+    ldr     rINST, .LdvmCompilerTemplate
+    bx      r0                          @ re-enter the translation
+.LdvmCompilerTemplate:
+    .word   dvmCompilerTemplateStart
+#endif
+
 .Lbad_arg:
     ldr     r0, strBadEntryPoint
     @ r1 holds value of entryPoint
@@ -1111,10 +1148,18 @@ dalvik_inst:
     movs    r9, r0, asr #24             @ r9<- ssssssAA (sign-extended)
     mov     r9, r9, lsl #1              @ r9<- byte offset
     bmi     common_backwardBranch       @ backward branch, do periodic checks
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
     FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    cmp     r0,#0
+    bne     common_updateProfile
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
-
+#else
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    GET_INST_OPCODE(ip)                 @ extract opcode from rINST
+    GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
 /* ------------------------------ */
     .balign 64
@@ -1130,9 +1175,18 @@ dalvik_inst:
     FETCH_S(r0, 1)                      @ r0<- ssssAAAA (sign-extended)
     movs    r9, r0, asl #1              @ r9<- byte offset, check sign
     bmi     common_backwardBranch       @ backward branch, do periodic checks
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    cmp     r0,#0
+    bne     common_updateProfile
+    GET_INST_OPCODE(ip)                 @ extract opcode from rINST
+    GOTO_OPCODE(ip)                     @ jump to next instruction
+#else
     FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
 
 /* ------------------------------ */
@@ -1157,10 +1211,18 @@ dalvik_inst:
     orrs    r0, r0, r1, lsl #16         @ r0<- AAAAaaaa, check sign
     mov     r9, r0, asl #1              @ r9<- byte offset
     ble     common_backwardBranch       @ backward branch, do periodic checks
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
     FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    cmp     r0,#0
+    bne     common_updateProfile
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
-
+#else
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    GET_INST_OPCODE(ip)                 @ extract opcode from rINST
+    GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
 /* ------------------------------ */
     .balign 64
@@ -1186,9 +1248,18 @@ dalvik_inst:
     movs    r9, r0, asl #1              @ r9<- branch byte offset, check sign
     bmi     common_backwardBranch       @ backward branch, do periodic checks
     beq     common_backwardBranch       @ (want to use BLE but V is unknown)
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    cmp     r0,#0
+    bne     common_updateProfile
+    GET_INST_OPCODE(ip)                 @ extract opcode from rINST
+    GOTO_OPCODE(ip)                     @ jump to next instruction
+#else
     FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
 
 /* ------------------------------ */
@@ -1216,9 +1287,18 @@ dalvik_inst:
     movs    r9, r0, asl #1              @ r9<- branch byte offset, check sign
     bmi     common_backwardBranch       @ backward branch, do periodic checks
     beq     common_backwardBranch       @ (want to use BLE but V is unknown)
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
     FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    cmp     r0,#0
+    bne     common_updateProfile
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#else
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    GET_INST_OPCODE(ip)                 @ extract opcode from rINST
+    GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
 
 
@@ -1443,9 +1523,16 @@ dalvik_inst:
     FETCH_S(r9, 1)                      @ r9<- branch offset, in code units
     movs    r9, r9, asl #1              @ convert to bytes, check sign
     bmi     common_backwardBranch       @ yes, do periodic checks
-1:  FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+1:
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    b        common_testUpdateProfile
+#else
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
 
 
@@ -1473,9 +1560,16 @@ dalvik_inst:
     FETCH_S(r9, 1)                      @ r9<- branch offset, in code units
     movs    r9, r9, asl #1              @ convert to bytes, check sign
     bmi     common_backwardBranch       @ yes, do periodic checks
-1:  FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+1:
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    b        common_testUpdateProfile
+#else
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
 
 
@@ -1503,9 +1597,16 @@ dalvik_inst:
     FETCH_S(r9, 1)                      @ r9<- branch offset, in code units
     movs    r9, r9, asl #1              @ convert to bytes, check sign
     bmi     common_backwardBranch       @ yes, do periodic checks
-1:  FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+1:
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    b        common_testUpdateProfile
+#else
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
 
 
@@ -1533,9 +1634,16 @@ dalvik_inst:
     FETCH_S(r9, 1)                      @ r9<- branch offset, in code units
     movs    r9, r9, asl #1              @ convert to bytes, check sign
     bmi     common_backwardBranch       @ yes, do periodic checks
-1:  FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+1:
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    b        common_testUpdateProfile
+#else
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
 
 
@@ -1563,9 +1671,16 @@ dalvik_inst:
     FETCH_S(r9, 1)                      @ r9<- branch offset, in code units
     movs    r9, r9, asl #1              @ convert to bytes, check sign
     bmi     common_backwardBranch       @ yes, do periodic checks
-1:  FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+1:
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    b        common_testUpdateProfile
+#else
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
 
 
@@ -1593,9 +1708,16 @@ dalvik_inst:
     FETCH_S(r9, 1)                      @ r9<- branch offset, in code units
     movs    r9, r9, asl #1              @ convert to bytes, check sign
     bmi     common_backwardBranch       @ yes, do periodic checks
-1:  FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+1:
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    b        common_testUpdateProfile
+#else
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
 
 
@@ -1620,9 +1742,19 @@ dalvik_inst:
     FETCH_S(r9, 1)                      @ r9<- branch offset, in code units
     movs    r9, r9, asl #1              @ convert to bytes, check sign
     bmi     common_backwardBranch       @ backward branch, do periodic checks
-1:  FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+1:
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    cmp     r0,#0
+    bne     common_updateProfile
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#else
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    GET_INST_OPCODE(ip)                 @ extract opcode from rINST
+    GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
 
 
@@ -1647,9 +1779,19 @@ dalvik_inst:
     FETCH_S(r9, 1)                      @ r9<- branch offset, in code units
     movs    r9, r9, asl #1              @ convert to bytes, check sign
     bmi     common_backwardBranch       @ backward branch, do periodic checks
-1:  FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+1:
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    cmp     r0,#0
+    bne     common_updateProfile
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#else
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    GET_INST_OPCODE(ip)                 @ extract opcode from rINST
+    GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
 
 
@@ -1674,9 +1816,19 @@ dalvik_inst:
     FETCH_S(r9, 1)                      @ r9<- branch offset, in code units
     movs    r9, r9, asl #1              @ convert to bytes, check sign
     bmi     common_backwardBranch       @ backward branch, do periodic checks
-1:  FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+1:
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    cmp     r0,#0
+    bne     common_updateProfile
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#else
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    GET_INST_OPCODE(ip)                 @ extract opcode from rINST
+    GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
 
 
@@ -1701,9 +1853,19 @@ dalvik_inst:
     FETCH_S(r9, 1)                      @ r9<- branch offset, in code units
     movs    r9, r9, asl #1              @ convert to bytes, check sign
     bmi     common_backwardBranch       @ backward branch, do periodic checks
-1:  FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+1:
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    cmp     r0,#0
+    bne     common_updateProfile
+    GET_INST_OPCODE(ip)                 @ extract opcode from rINST
+    GOTO_OPCODE(ip)                     @ jump to next instruction
+#else
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
 
 
@@ -1728,9 +1890,19 @@ dalvik_inst:
     FETCH_S(r9, 1)                      @ r9<- branch offset, in code units
     movs    r9, r9, asl #1              @ convert to bytes, check sign
     bmi     common_backwardBranch       @ backward branch, do periodic checks
-1:  FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+1:
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    cmp     r0,#0
+    bne     common_updateProfile
+    GET_INST_OPCODE(ip)                 @ extract opcode from rINST
+    GOTO_OPCODE(ip)                     @ jump to next instruction
+#else
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
 
 
@@ -1755,9 +1927,19 @@ dalvik_inst:
     FETCH_S(r9, 1)                      @ r9<- branch offset, in code units
     movs    r9, r9, asl #1              @ convert to bytes, check sign
     bmi     common_backwardBranch       @ backward branch, do periodic checks
-1:  FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+1:
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    cmp     r0,#0
+    bne     common_updateProfile
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#else
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    GET_INST_OPCODE(ip)                 @ extract opcode from rINST
+    GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
 
 
@@ -8812,15 +8994,192 @@ d2l_doconv:
 dvmAsmSisterEnd:
 
 /* File: armv5te/footer.S */
+
 /*
  * ===========================================================================
  *  Common subroutines and data
  * ===========================================================================
  */
 
+
+
     .text
     .align  2
 
+#if defined(WITH_JIT)
+/*
+ * Return from the translation cache to the interpreter when the compiler is
+ * having issues translating/executing a Dalvik instruction. We have to skip
+ * the code cache lookup otherwise it is possible to indefinitely bouce
+ * between the interpreter and the code cache if the instruction that fails
+ * to be compiled happens to be at a trace start.
+ */
+    .global dvmJitToInterpPunt
+dvmJitToInterpPunt:
+    mov    rPC, r0
+#ifdef EXIT_STATS
+    mov    r0,lr
+    bl     dvmBumpPunt;
+#endif
+    EXPORT_PC()
+    adrl   rIBASE, dvmAsmInstructionStart
+    FETCH_INST()
+    GET_INST_OPCODE(ip)
+    GOTO_OPCODE(ip)
+
+/*
+ * Return to the interpreter to handle a single instruction.
+ * On entry:
+ *    r0 <= PC
+ *    r1 <= PC of resume instruction
+ *    lr <= resume point in translation
+ */
+    .global dvmJitToInterpSingleStep
+dvmJitToInterpSingleStep:
+    str    lr,[rGLUE,#offGlue_jitResume]
+    str    r1,[rGLUE,#offGlue_jitResumePC]
+    mov    r1,#kInterpEntryInstr
+    @ enum is 4 byte in aapcs-EABI
+    str    r1, [rGLUE, #offGlue_entryPoint]
+    mov    rPC,r0
+    EXPORT_PC()
+    adrl   rIBASE, dvmAsmInstructionStart
+    mov    r2,#kJitSingleStep     @ Ask for single step and then revert
+    str    r2,[rGLUE,#offGlue_jitState]
+    mov    r1,#1                  @ set changeInterp to bail to debug interp
+    b      common_gotoBail
+
+
+/*
+ * Return from the translation cache and immediately request
+ * a translation for the exit target.  Commonly used following
+ * invokes.
+ */
+    .global dvmJitToTraceSelect
+dvmJitToTraceSelect:
+    ldr    rPC,[r14, #-1]           @ get our target PC
+    add    rINST,r14,#-5            @ save start of chain branch
+    mov    r0,rPC
+    bl     dvmJitGetCodeAddr        @ Is there a translation?
+    cmp    r0,#0
+    beq    2f
+    mov    r1,rINST
+    bl     dvmJitChain              @ r0<- dvmJitChain(codeAddr,chainAddr)
+    ldr    rINST, .LdvmCompilerTemplateStart @ rINST is rCBASE in compiled code
+    bx     r0                       @ continue native execution
+
+/* No translation, so request one if profiling isn't disabled*/
+2:
+    GET_JIT_PROF_TABLE(r0)
+    FETCH_INST()
+    cmp    r0, #0
+    bne    common_selectTrace
+    GET_INST_OPCODE(ip)
+    GOTO_OPCODE(ip)
+
+/*
+ * Return from the translation cache to the interpreter.
+ * The return was done with a BLX from thumb mode, and
+ * the following 32-bit word contains the target rPC value.
+ * Note that lr (r14) will have its low-order bit set to denote
+ * its thumb-mode origin.
+ *
+ * We'll need to stash our lr origin away, recover the new
+ * target and then check to see if there is a translation available
+ * for our new target.  If so, we do a translation chain and
+ * go back to native execution.  Otherwise, it's back to the
+ * interpreter (after treating this entry as a potential
+ * trace start).
+ */
+    .global dvmJitToInterpNormal
+dvmJitToInterpNormal:
+    ldr    rPC,[r14, #-1]           @ get our target PC
+    add    rINST,r14,#-5            @ save start of chain branch
+#ifdef EXIT_STATS
+    bl     dvmBumpNormal
+#endif
+    mov    r0,rPC
+    bl     dvmJitGetCodeAddr        @ Is there a translation?
+    cmp    r0,#0
+    beq    1f                       @ go if not, otherwise do chain
+    mov    r1,rINST
+    bl     dvmJitChain              @ r0<- dvmJitChain(codeAddr,chainAddr)
+    ldr    rINST, .LdvmCompilerTemplateStart @ rINST is rCBASE in compiled code
+    bx     r0                       @ continue native execution
+
+/*
+ * Return from the translation cache to the interpreter to do method invocation.
+ * Check if translation exists for the callee, but don't chain to it.
+ */
+    .global dvmJitToInterpNoChain
+dvmJitToInterpNoChain:
+#ifdef EXIT_STATS
+    bl     dvmBumpNoChain
+#endif
+    mov    r0,rPC
+    bl     dvmJitGetCodeAddr        @ Is there a translation?
+    cmp    r0,#0
+    bxne   r0                       @ continue native execution if so
+
+/*
+ * No translation, restore interpreter regs and start interpreting.
+ * rGLUE & rFP were preserved in the translated code, and rPC has
+ * already been restored by the time we get here.  We'll need to set
+ * up rIBASE & rINST, and load the address of the JitTable into r0.
+ */
+1:
+    EXPORT_PC()
+    adrl   rIBASE, dvmAsmInstructionStart
+    FETCH_INST()
+    GET_JIT_PROF_TABLE(r0)
+    @ NOTE: intended fallthrough
+/*
+ * Common code to update potential trace start counter, and initiate
+ * a trace-build if appropriate.  On entry, rPC should point to the
+ * next instruction to execute, and rINST should be already loaded with
+ * the next opcode word, and r0 holds a pointer to the jit profile
+ * table (pJitProfTable).
+ */
+common_testUpdateProfile:
+    cmp     r0,#0
+    GET_INST_OPCODE(ip)
+    GOTO_OPCODE_IFEQ(ip)       @ if not profiling, fallthrough otherwise */
+
+common_updateProfile:
+    eor     r3,rPC,rPC,lsr #12 @ cheap, but fast hash function
+    lsl     r3,r3,#23          @ shift out excess 511
+    ldrb    r1,[r0,r3,lsr #23] @ get counter
+    GET_INST_OPCODE(ip)
+    subs    r1,r1,#1           @ decrement counter
+    strb    r1,[r0,r3,lsr #23] @ and store it
+    GOTO_OPCODE_IFNE(ip)       @ if not threshold, fallthrough otherwise */
+
+/*
+ * Here, we switch to the debug interpreter to request
+ * trace selection.  First, though, check to see if there
+ * is already a native translation in place (and, if so,
+ * jump to it now).
+ */
+    mov     r1,#255
+    strb    r1,[r0,r3,lsr #23] @ reset counter
+    EXPORT_PC()
+    mov     r0,rPC
+    bl      dvmJitGetCodeAddr           @ r0<- dvmJitGetCodeAddr(rPC)
+    cmp     r0,#0
+    ldrne   rINST, .LdvmCompilerTemplateStart @ rINST is rCBASE in compiled code
+    beq     common_selectTrace
+    bxne    r0                          @ jump to the translation
+common_selectTrace:
+    mov     r2,#kJitTSelectRequest      @ ask for trace selection
+    str     r2,[rGLUE,#offGlue_jitState]
+    mov     r1,#1                       @ set changeInterp
+    b       common_gotoBail
+
+.LdvmCompilerTemplateStart:
+    .word   dvmCompilerTemplateStart
+
+#endif
+
 /*
  * Common code when a backward branch is taken.
  *
@@ -8830,9 +9189,18 @@ dvmAsmSisterEnd:
 common_backwardBranch:
     mov     r0, #kInterpEntryInstr
     bl      common_periodicChecks
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    cmp     r0,#0
+    bne     common_updateProfile
+    GET_INST_OPCODE(ip)
+    GOTO_OPCODE(ip)
+#else
     FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
 
 /*
@@ -9006,6 +9374,10 @@ common_invokeMethodNoRange:
 #endif
     str     rFP, [r10, #offStackSaveArea_prevFrame]
     str     rPC, [r10, #offStackSaveArea_savedPc]
+#if defined(WITH_JIT)
+    mov     r9, #0
+    str     r9, [r10, #offStackSaveArea_returnAddr]
+#endif
     str     r0, [r10, #offStackSaveArea_method]
     tst     r3, #ACC_NATIVE
     bne     .LinvokeNative
@@ -9034,11 +9406,22 @@ common_invokeMethodNoRange:
     @ r0=methodToCall, r1=newFp, r2=self, r3=newMethodClass, r9=newINST
     str     r0, [rGLUE, #offGlue_method]    @ glue->method = methodToCall
     str     r3, [rGLUE, #offGlue_methodClassDex] @ glue->methodClassDex = ...
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
     mov     rFP, r1                         @ fp = newFp
     GET_PREFETCHED_OPCODE(ip, r9)           @ extract prefetched opcode from r9
     mov     rINST, r9                       @ publish new rINST
     str     r1, [r2, #offThread_curFrame]   @ self->curFrame = newFp
+    cmp     r0,#0
+    bne     common_updateProfile
     GOTO_OPCODE(ip)                         @ jump to next instruction
+#else
+    mov     rFP, r1                         @ fp = newFp
+    GET_PREFETCHED_OPCODE(ip, r9)           @ extract prefetched opcode from r9
+    mov     rINST, r9                       @ publish new rINST
+    str     r1, [r2, #offThread_curFrame]   @ self->curFrame = newFp
+    GOTO_OPCODE(ip)                         @ jump to next instruction
+#endif
 
 .LinvokeNative:
     @ Prep for the native call
@@ -9144,10 +9527,26 @@ common_returnFromMethod:
     str     r2, [rGLUE, #offGlue_method]@ glue->method = newSave->method
     ldr     r1, [r10, #offClassObject_pDvmDex]   @ r1<- method->clazz->pDvmDex
     str     rFP, [r3, #offThread_curFrame]  @ self->curFrame = fp
+#if defined(WITH_JIT)
+    ldr     r3, [r0, #offStackSaveArea_returnAddr] @ r3 = saveArea->returnAddr
+    GET_JIT_PROF_TABLE(r0)
+    mov     rPC, r9                     @ publish new rPC
+    str     r1, [rGLUE, #offGlue_methodClassDex]
+    cmp     r3, #0                      @ caller is compiled code
+    bne     1f
+    GET_INST_OPCODE(ip)                 @ extract opcode from rINST
+    cmp     r0,#0
+    bne     common_updateProfile
+    GOTO_OPCODE(ip)                     @ jump to next instruction
+1:
+    ldr     rINST, .LdvmCompilerTemplateStart @ rINST is rCBASE in compiled code
+    blx     r3
+#else
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     mov     rPC, r9                     @ publish new rPC
     str     r1, [rGLUE, #offGlue_methodClassDex]
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
     /*
      * Return handling, calls through "glue code".
@@ -9170,12 +9569,19 @@ common_returnFromMethod:
  *
  * This does not return.
  */
+     .global dvmMterpCommonExceptionThrown
+dvmMterpCommonExceptionThrown:
 common_exceptionThrown:
 .LexceptionNew:
     mov     r0, #kInterpEntryThrow
     mov     r9, #0
     bl      common_periodicChecks
 
+#if defined(WITH_JIT)
+    mov     r2,#kJitTSelectAbort        @ abandon trace selection in progress
+    str     r2,[rGLUE,#offGlue_jitState]
+#endif
+
     ldr     r10, [rGLUE, #offGlue_self] @ r10<- glue->self
     ldr     r9, [r10, #offThread_exception] @ r9<- self->exception
     mov     r1, r10                     @ r1<- self
diff --git a/vm/mterp/out/InterpAsm-armv5te.S b/vm/mterp/out/InterpAsm-armv5te.S
index 8deb4aa70..9c380eaaf 100644
--- a/vm/mterp/out/InterpAsm-armv5te.S
+++ b/vm/mterp/out/InterpAsm-armv5te.S
@@ -175,6 +175,8 @@ unspecified registers or condition codes.
  * interpreter, we don't have to worry about pre-ARMv5 THUMB interwork.
  */
 #define GOTO_OPCODE(_reg)       add     pc, rIBASE, _reg, lsl #6
+#define GOTO_OPCODE_IFEQ(_reg)  addeq   pc, rIBASE, _reg, lsl #6
+#define GOTO_OPCODE_IFNE(_reg)  addne   pc, rIBASE, _reg, lsl #6
 
 /*
  * Get/set the 32-bit value from a Dalvik register.
@@ -182,6 +184,14 @@ unspecified registers or condition codes.
 #define GET_VREG(_reg, _vreg)   ldr     _reg, [rFP, _vreg, lsl #2]
 #define SET_VREG(_reg, _vreg)   str     _reg, [rFP, _vreg, lsl #2]
 
+#if defined(WITH_JIT)
+/*
+ * Null definition for overhead measuring purposes
+ */
+#define GET_JIT_TABLE(_reg)         ldr     _reg,[rGLUE,#offGlue_pJitTable]
+#define GET_JIT_PROF_TABLE(_reg)    ldr     _reg,[rGLUE,#offGlue_pJitProfTable]
+#endif
+
 /*
  * Convert a virtual register index into an address.
  */
@@ -302,10 +312,21 @@ dvmMterpStdRun:
     cmp     r1, #kInterpEntryInstr      @ usual case?
     bne     .Lnot_instr                 @ no, handle it
 
+#if defined(WITH_JIT)
+.Lno_singleStep:
+    /* Entry is always a possible trace start */
+    GET_JIT_PROF_TABLE(r0)
+    FETCH_INST()
+    cmp    r0,#0
+    bne    common_updateProfile
+    GET_INST_OPCODE(ip)
+    GOTO_OPCODE(ip)
+#else
     /* start executing the instruction at rPC */
     FETCH_INST()                        @ load rINST from rPC
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
 .Lnot_instr:
     cmp     r1, #kInterpEntryReturn     @ were we returning from a method?
@@ -315,6 +336,22 @@ dvmMterpStdRun:
     cmp     r1, #kInterpEntryThrow      @ were we throwing an exception?
     beq     common_exceptionThrown
 
+#if defined(WITH_JIT)
+.Lnot_throw:
+    ldr     r0,[rGLUE, #offGlue_jitResume]
+    ldr     r2,[rGLUE, #offGlue_jitResumePC]
+    cmp     r1, #kInterpEntryResume     @ resuming after Jit single-step?
+    bne     .Lbad_arg
+    cmp     rPC,r2
+    bne     .Lno_singleStep             @ must have branched, don't resume
+    mov     r1, #kInterpEntryInstr
+    strb    r1, [rGLUE, #offGlue_entryPoint]
+    ldr     rINST, .LdvmCompilerTemplate
+    bx      r0                          @ re-enter the translation
+.LdvmCompilerTemplate:
+    .word   dvmCompilerTemplateStart
+#endif
+
 .Lbad_arg:
     ldr     r0, strBadEntryPoint
     @ r1 holds value of entryPoint
@@ -1111,10 +1148,18 @@ dalvik_inst:
     movs    r9, r0, asr #24             @ r9<- ssssssAA (sign-extended)
     mov     r9, r9, lsl #1              @ r9<- byte offset
     bmi     common_backwardBranch       @ backward branch, do periodic checks
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
     FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    cmp     r0,#0
+    bne     common_updateProfile
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
-
+#else
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    GET_INST_OPCODE(ip)                 @ extract opcode from rINST
+    GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
 /* ------------------------------ */
     .balign 64
@@ -1130,9 +1175,18 @@ dalvik_inst:
     FETCH_S(r0, 1)                      @ r0<- ssssAAAA (sign-extended)
     movs    r9, r0, asl #1              @ r9<- byte offset, check sign
     bmi     common_backwardBranch       @ backward branch, do periodic checks
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    cmp     r0,#0
+    bne     common_updateProfile
+    GET_INST_OPCODE(ip)                 @ extract opcode from rINST
+    GOTO_OPCODE(ip)                     @ jump to next instruction
+#else
     FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
 
 /* ------------------------------ */
@@ -1157,10 +1211,18 @@ dalvik_inst:
     orrs    r0, r0, r1, lsl #16         @ r0<- AAAAaaaa, check sign
     mov     r9, r0, asl #1              @ r9<- byte offset
     ble     common_backwardBranch       @ backward branch, do periodic checks
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
     FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    cmp     r0,#0
+    bne     common_updateProfile
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
-
+#else
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    GET_INST_OPCODE(ip)                 @ extract opcode from rINST
+    GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
 /* ------------------------------ */
     .balign 64
@@ -1186,9 +1248,18 @@ dalvik_inst:
     movs    r9, r0, asl #1              @ r9<- branch byte offset, check sign
     bmi     common_backwardBranch       @ backward branch, do periodic checks
     beq     common_backwardBranch       @ (want to use BLE but V is unknown)
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    cmp     r0,#0
+    bne     common_updateProfile
+    GET_INST_OPCODE(ip)                 @ extract opcode from rINST
+    GOTO_OPCODE(ip)                     @ jump to next instruction
+#else
     FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
 
 /* ------------------------------ */
@@ -1216,9 +1287,18 @@ dalvik_inst:
     movs    r9, r0, asl #1              @ r9<- branch byte offset, check sign
     bmi     common_backwardBranch       @ backward branch, do periodic checks
     beq     common_backwardBranch       @ (want to use BLE but V is unknown)
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
     FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    cmp     r0,#0
+    bne     common_updateProfile
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#else
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    GET_INST_OPCODE(ip)                 @ extract opcode from rINST
+    GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
 
 
@@ -1465,9 +1545,16 @@ dalvik_inst:
     FETCH_S(r9, 1)                      @ r9<- branch offset, in code units
     movs    r9, r9, asl #1              @ convert to bytes, check sign
     bmi     common_backwardBranch       @ yes, do periodic checks
-1:  FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+1:
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    b        common_testUpdateProfile
+#else
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
 
 
@@ -1495,9 +1582,16 @@ dalvik_inst:
     FETCH_S(r9, 1)                      @ r9<- branch offset, in code units
     movs    r9, r9, asl #1              @ convert to bytes, check sign
     bmi     common_backwardBranch       @ yes, do periodic checks
-1:  FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+1:
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    b        common_testUpdateProfile
+#else
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
 
 
@@ -1525,9 +1619,16 @@ dalvik_inst:
     FETCH_S(r9, 1)                      @ r9<- branch offset, in code units
     movs    r9, r9, asl #1              @ convert to bytes, check sign
     bmi     common_backwardBranch       @ yes, do periodic checks
-1:  FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+1:
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    b        common_testUpdateProfile
+#else
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
 
 
@@ -1555,9 +1656,16 @@ dalvik_inst:
     FETCH_S(r9, 1)                      @ r9<- branch offset, in code units
     movs    r9, r9, asl #1              @ convert to bytes, check sign
     bmi     common_backwardBranch       @ yes, do periodic checks
-1:  FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+1:
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    b        common_testUpdateProfile
+#else
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
 
 
@@ -1585,9 +1693,16 @@ dalvik_inst:
     FETCH_S(r9, 1)                      @ r9<- branch offset, in code units
     movs    r9, r9, asl #1              @ convert to bytes, check sign
     bmi     common_backwardBranch       @ yes, do periodic checks
-1:  FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+1:
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    b        common_testUpdateProfile
+#else
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
 
 
@@ -1615,9 +1730,16 @@ dalvik_inst:
     FETCH_S(r9, 1)                      @ r9<- branch offset, in code units
     movs    r9, r9, asl #1              @ convert to bytes, check sign
     bmi     common_backwardBranch       @ yes, do periodic checks
-1:  FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+1:
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    b        common_testUpdateProfile
+#else
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
 
 
@@ -1642,9 +1764,19 @@ dalvik_inst:
     FETCH_S(r9, 1)                      @ r9<- branch offset, in code units
     movs    r9, r9, asl #1              @ convert to bytes, check sign
     bmi     common_backwardBranch       @ backward branch, do periodic checks
-1:  FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+1:
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    cmp     r0,#0
+    bne     common_updateProfile
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#else
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    GET_INST_OPCODE(ip)                 @ extract opcode from rINST
+    GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
 
 
@@ -1669,9 +1801,19 @@ dalvik_inst:
     FETCH_S(r9, 1)                      @ r9<- branch offset, in code units
     movs    r9, r9, asl #1              @ convert to bytes, check sign
     bmi     common_backwardBranch       @ backward branch, do periodic checks
-1:  FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+1:
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    cmp     r0,#0
+    bne     common_updateProfile
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#else
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    GET_INST_OPCODE(ip)                 @ extract opcode from rINST
+    GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
 
 
@@ -1696,9 +1838,19 @@ dalvik_inst:
     FETCH_S(r9, 1)                      @ r9<- branch offset, in code units
     movs    r9, r9, asl #1              @ convert to bytes, check sign
     bmi     common_backwardBranch       @ backward branch, do periodic checks
-1:  FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+1:
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    cmp     r0,#0
+    bne     common_updateProfile
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#else
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    GET_INST_OPCODE(ip)                 @ extract opcode from rINST
+    GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
 
 
@@ -1723,9 +1875,19 @@ dalvik_inst:
     FETCH_S(r9, 1)                      @ r9<- branch offset, in code units
     movs    r9, r9, asl #1              @ convert to bytes, check sign
     bmi     common_backwardBranch       @ backward branch, do periodic checks
-1:  FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+1:
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    cmp     r0,#0
+    bne     common_updateProfile
+    GET_INST_OPCODE(ip)                 @ extract opcode from rINST
+    GOTO_OPCODE(ip)                     @ jump to next instruction
+#else
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
 
 
@@ -1750,9 +1912,19 @@ dalvik_inst:
     FETCH_S(r9, 1)                      @ r9<- branch offset, in code units
     movs    r9, r9, asl #1              @ convert to bytes, check sign
     bmi     common_backwardBranch       @ backward branch, do periodic checks
-1:  FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+1:
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    cmp     r0,#0
+    bne     common_updateProfile
+    GET_INST_OPCODE(ip)                 @ extract opcode from rINST
+    GOTO_OPCODE(ip)                     @ jump to next instruction
+#else
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
 
 
@@ -1777,9 +1949,19 @@ dalvik_inst:
     FETCH_S(r9, 1)                      @ r9<- branch offset, in code units
     movs    r9, r9, asl #1              @ convert to bytes, check sign
     bmi     common_backwardBranch       @ backward branch, do periodic checks
-1:  FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+1:
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    cmp     r0,#0
+    bne     common_updateProfile
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#else
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    GET_INST_OPCODE(ip)                 @ extract opcode from rINST
+    GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
 
 
@@ -9292,15 +9474,192 @@ d2l_doconv:
 dvmAsmSisterEnd:
 
 /* File: armv5te/footer.S */
+
 /*
  * ===========================================================================
  *  Common subroutines and data
  * ===========================================================================
  */
 
+
+
     .text
     .align  2
 
+#if defined(WITH_JIT)
+/*
+ * Return from the translation cache to the interpreter when the compiler is
+ * having issues translating/executing a Dalvik instruction. We have to skip
+ * the code cache lookup otherwise it is possible to indefinitely bouce
+ * between the interpreter and the code cache if the instruction that fails
+ * to be compiled happens to be at a trace start.
+ */
+    .global dvmJitToInterpPunt
+dvmJitToInterpPunt:
+    mov    rPC, r0
+#ifdef EXIT_STATS
+    mov    r0,lr
+    bl     dvmBumpPunt;
+#endif
+    EXPORT_PC()
+    adrl   rIBASE, dvmAsmInstructionStart
+    FETCH_INST()
+    GET_INST_OPCODE(ip)
+    GOTO_OPCODE(ip)
+
+/*
+ * Return to the interpreter to handle a single instruction.
+ * On entry:
+ *    r0 <= PC
+ *    r1 <= PC of resume instruction
+ *    lr <= resume point in translation
+ */
+    .global dvmJitToInterpSingleStep
+dvmJitToInterpSingleStep:
+    str    lr,[rGLUE,#offGlue_jitResume]
+    str    r1,[rGLUE,#offGlue_jitResumePC]
+    mov    r1,#kInterpEntryInstr
+    @ enum is 4 byte in aapcs-EABI
+    str    r1, [rGLUE, #offGlue_entryPoint]
+    mov    rPC,r0
+    EXPORT_PC()
+    adrl   rIBASE, dvmAsmInstructionStart
+    mov    r2,#kJitSingleStep     @ Ask for single step and then revert
+    str    r2,[rGLUE,#offGlue_jitState]
+    mov    r1,#1                  @ set changeInterp to bail to debug interp
+    b      common_gotoBail
+
+
+/*
+ * Return from the translation cache and immediately request
+ * a translation for the exit target.  Commonly used following
+ * invokes.
+ */
+    .global dvmJitToTraceSelect
+dvmJitToTraceSelect:
+    ldr    rPC,[r14, #-1]           @ get our target PC
+    add    rINST,r14,#-5            @ save start of chain branch
+    mov    r0,rPC
+    bl     dvmJitGetCodeAddr        @ Is there a translation?
+    cmp    r0,#0
+    beq    2f
+    mov    r1,rINST
+    bl     dvmJitChain              @ r0<- dvmJitChain(codeAddr,chainAddr)
+    ldr    rINST, .LdvmCompilerTemplateStart @ rINST is rCBASE in compiled code
+    bx     r0                       @ continue native execution
+
+/* No translation, so request one if profiling isn't disabled*/
+2:
+    GET_JIT_PROF_TABLE(r0)
+    FETCH_INST()
+    cmp    r0, #0
+    bne    common_selectTrace
+    GET_INST_OPCODE(ip)
+    GOTO_OPCODE(ip)
+
+/*
+ * Return from the translation cache to the interpreter.
+ * The return was done with a BLX from thumb mode, and
+ * the following 32-bit word contains the target rPC value.
+ * Note that lr (r14) will have its low-order bit set to denote
+ * its thumb-mode origin.
+ *
+ * We'll need to stash our lr origin away, recover the new
+ * target and then check to see if there is a translation available
+ * for our new target.  If so, we do a translation chain and
+ * go back to native execution.  Otherwise, it's back to the
+ * interpreter (after treating this entry as a potential
+ * trace start).
+ */
+    .global dvmJitToInterpNormal
+dvmJitToInterpNormal:
+    ldr    rPC,[r14, #-1]           @ get our target PC
+    add    rINST,r14,#-5            @ save start of chain branch
+#ifdef EXIT_STATS
+    bl     dvmBumpNormal
+#endif
+    mov    r0,rPC
+    bl     dvmJitGetCodeAddr        @ Is there a translation?
+    cmp    r0,#0
+    beq    1f                       @ go if not, otherwise do chain
+    mov    r1,rINST
+    bl     dvmJitChain              @ r0<- dvmJitChain(codeAddr,chainAddr)
+    ldr    rINST, .LdvmCompilerTemplateStart @ rINST is rCBASE in compiled code
+    bx     r0                       @ continue native execution
+
+/*
+ * Return from the translation cache to the interpreter to do method invocation.
+ * Check if translation exists for the callee, but don't chain to it.
+ */
+    .global dvmJitToInterpNoChain
+dvmJitToInterpNoChain:
+#ifdef EXIT_STATS
+    bl     dvmBumpNoChain
+#endif
+    mov    r0,rPC
+    bl     dvmJitGetCodeAddr        @ Is there a translation?
+    cmp    r0,#0
+    bxne   r0                       @ continue native execution if so
+
+/*
+ * No translation, restore interpreter regs and start interpreting.
+ * rGLUE & rFP were preserved in the translated code, and rPC has
+ * already been restored by the time we get here.  We'll need to set
+ * up rIBASE & rINST, and load the address of the JitTable into r0.
+ */
+1:
+    EXPORT_PC()
+    adrl   rIBASE, dvmAsmInstructionStart
+    FETCH_INST()
+    GET_JIT_PROF_TABLE(r0)
+    @ NOTE: intended fallthrough
+/*
+ * Common code to update potential trace start counter, and initiate
+ * a trace-build if appropriate.  On entry, rPC should point to the
+ * next instruction to execute, and rINST should be already loaded with
+ * the next opcode word, and r0 holds a pointer to the jit profile
+ * table (pJitProfTable).
+ */
+common_testUpdateProfile:
+    cmp     r0,#0
+    GET_INST_OPCODE(ip)
+    GOTO_OPCODE_IFEQ(ip)       @ if not profiling, fallthrough otherwise */
+
+common_updateProfile:
+    eor     r3,rPC,rPC,lsr #12 @ cheap, but fast hash function
+    lsl     r3,r3,#23          @ shift out excess 511
+    ldrb    r1,[r0,r3,lsr #23] @ get counter
+    GET_INST_OPCODE(ip)
+    subs    r1,r1,#1           @ decrement counter
+    strb    r1,[r0,r3,lsr #23] @ and store it
+    GOTO_OPCODE_IFNE(ip)       @ if not threshold, fallthrough otherwise */
+
+/*
+ * Here, we switch to the debug interpreter to request
+ * trace selection.  First, though, check to see if there
+ * is already a native translation in place (and, if so,
+ * jump to it now).
+ */
+    mov     r1,#255
+    strb    r1,[r0,r3,lsr #23] @ reset counter
+    EXPORT_PC()
+    mov     r0,rPC
+    bl      dvmJitGetCodeAddr           @ r0<- dvmJitGetCodeAddr(rPC)
+    cmp     r0,#0
+    ldrne   rINST, .LdvmCompilerTemplateStart @ rINST is rCBASE in compiled code
+    beq     common_selectTrace
+    bxne    r0                          @ jump to the translation
+common_selectTrace:
+    mov     r2,#kJitTSelectRequest      @ ask for trace selection
+    str     r2,[rGLUE,#offGlue_jitState]
+    mov     r1,#1                       @ set changeInterp
+    b       common_gotoBail
+
+.LdvmCompilerTemplateStart:
+    .word   dvmCompilerTemplateStart
+
+#endif
+
 /*
  * Common code when a backward branch is taken.
  *
@@ -9310,9 +9669,18 @@ dvmAsmSisterEnd:
 common_backwardBranch:
     mov     r0, #kInterpEntryInstr
     bl      common_periodicChecks
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    cmp     r0,#0
+    bne     common_updateProfile
+    GET_INST_OPCODE(ip)
+    GOTO_OPCODE(ip)
+#else
     FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
 
 /*
@@ -9486,6 +9854,10 @@ common_invokeMethodNoRange:
 #endif
     str     rFP, [r10, #offStackSaveArea_prevFrame]
     str     rPC, [r10, #offStackSaveArea_savedPc]
+#if defined(WITH_JIT)
+    mov     r9, #0
+    str     r9, [r10, #offStackSaveArea_returnAddr]
+#endif
     str     r0, [r10, #offStackSaveArea_method]
     tst     r3, #ACC_NATIVE
     bne     .LinvokeNative
@@ -9514,11 +9886,22 @@ common_invokeMethodNoRange:
     @ r0=methodToCall, r1=newFp, r2=self, r3=newMethodClass, r9=newINST
     str     r0, [rGLUE, #offGlue_method]    @ glue->method = methodToCall
     str     r3, [rGLUE, #offGlue_methodClassDex] @ glue->methodClassDex = ...
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
     mov     rFP, r1                         @ fp = newFp
     GET_PREFETCHED_OPCODE(ip, r9)           @ extract prefetched opcode from r9
     mov     rINST, r9                       @ publish new rINST
     str     r1, [r2, #offThread_curFrame]   @ self->curFrame = newFp
+    cmp     r0,#0
+    bne     common_updateProfile
     GOTO_OPCODE(ip)                         @ jump to next instruction
+#else
+    mov     rFP, r1                         @ fp = newFp
+    GET_PREFETCHED_OPCODE(ip, r9)           @ extract prefetched opcode from r9
+    mov     rINST, r9                       @ publish new rINST
+    str     r1, [r2, #offThread_curFrame]   @ self->curFrame = newFp
+    GOTO_OPCODE(ip)                         @ jump to next instruction
+#endif
 
 .LinvokeNative:
     @ Prep for the native call
@@ -9624,10 +10007,26 @@ common_returnFromMethod:
     str     r2, [rGLUE, #offGlue_method]@ glue->method = newSave->method
     ldr     r1, [r10, #offClassObject_pDvmDex]   @ r1<- method->clazz->pDvmDex
     str     rFP, [r3, #offThread_curFrame]  @ self->curFrame = fp
+#if defined(WITH_JIT)
+    ldr     r3, [r0, #offStackSaveArea_returnAddr] @ r3 = saveArea->returnAddr
+    GET_JIT_PROF_TABLE(r0)
+    mov     rPC, r9                     @ publish new rPC
+    str     r1, [rGLUE, #offGlue_methodClassDex]
+    cmp     r3, #0                      @ caller is compiled code
+    bne     1f
+    GET_INST_OPCODE(ip)                 @ extract opcode from rINST
+    cmp     r0,#0
+    bne     common_updateProfile
+    GOTO_OPCODE(ip)                     @ jump to next instruction
+1:
+    ldr     rINST, .LdvmCompilerTemplateStart @ rINST is rCBASE in compiled code
+    blx     r3
+#else
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     mov     rPC, r9                     @ publish new rPC
     str     r1, [rGLUE, #offGlue_methodClassDex]
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
     /*
      * Return handling, calls through "glue code".
@@ -9650,12 +10049,19 @@ common_returnFromMethod:
  *
  * This does not return.
  */
+     .global dvmMterpCommonExceptionThrown
+dvmMterpCommonExceptionThrown:
 common_exceptionThrown:
 .LexceptionNew:
     mov     r0, #kInterpEntryThrow
     mov     r9, #0
     bl      common_periodicChecks
 
+#if defined(WITH_JIT)
+    mov     r2,#kJitTSelectAbort        @ abandon trace selection in progress
+    str     r2,[rGLUE,#offGlue_jitState]
+#endif
+
     ldr     r10, [rGLUE, #offGlue_self] @ r10<- glue->self
     ldr     r9, [r10, #offThread_exception] @ r9<- self->exception
     mov     r1, r10                     @ r1<- self
diff --git a/vm/mterp/out/InterpC-allstubs.c b/vm/mterp/out/InterpC-allstubs.c
index 420873e1c..0b70c9ef6 100644
--- a/vm/mterp/out/InterpC-allstubs.c
+++ b/vm/mterp/out/InterpC-allstubs.c
@@ -26,6 +26,7 @@
 #include "interp/InterpDefs.h"
 #include "mterp/Mterp.h"
 #include <math.h>                   // needed for fmod, fmodf
+#include "mterp/common/FindInterface.h"
 
 /*
  * Configuration defines.  These affect the C implementations, i.e. the
@@ -334,29 +335,21 @@ static inline void putDoubleToArray(u4* ptr, int idx, double dval)
  * If we're building without debug and profiling support, we never switch.
  */
 #if defined(WITH_PROFILER) || defined(WITH_DEBUGGER)
+#if defined(WITH_JIT)
+# define NEED_INTERP_SWITCH(_current) (                                     \
+    (_current == INTERP_STD) ?                                              \
+        dvmJitDebuggerOrProfilerActive(interpState->jitState) :             \
+        !dvmJitDebuggerOrProfilerActive(interpState->jitState) )
+#else
 # define NEED_INTERP_SWITCH(_current) (                                     \
     (_current == INTERP_STD) ?                                              \
         dvmDebuggerOrProfilerActive() : !dvmDebuggerOrProfilerActive() )
+#endif
 #else
 # define NEED_INTERP_SWITCH(_current) (false)
 #endif
 
 /*
- * Look up an interface on a class using the cache.
- */
-INLINE Method* dvmFindInterfaceMethodInCache(ClassObject* thisClass,
-    u4 methodIdx, const Method* method, DvmDex* methodClassDex)
-{
-#define ATOMIC_CACHE_CALC \
-    dvmInterpFindInterfaceMethod(thisClass, methodIdx, method, methodClassDex)
-
-    return (Method*) ATOMIC_CACHE_LOOKUP(methodClassDex->pInterfaceCache,
-                DEX_INTERFACE_CACHE_SIZE, thisClass, methodIdx);
-
-#undef ATOMIC_CACHE_CALC
-}
-
-/*
  * Check to see if "obj" is NULL.  If so, throw an exception.  Assumes the
  * pc has already been exported to the stack.
  *
@@ -420,7 +413,6 @@ static inline bool checkForNullExportPC(Object* obj, u4* fp, const u2* pc)
     return true;
 }
 
-
 /* File: cstubs/stubdefs.c */
 /* this is a standard (no debug support) interpreter */
 #define INTERP_TYPE INTERP_STD
@@ -3877,6 +3869,9 @@ GOTO_TARGET(invokeMethod, bool methodCallRange, const Method* _methodToCall,
 #endif
         newSaveArea->prevFrame = fp;
         newSaveArea->savedPc = pc;
+#if defined(WITH_JIT)
+        newSaveArea->returnAddr = 0;
+#endif
         newSaveArea->method = methodToCall;
 
         if (!dvmIsNativeMethod(methodToCall)) {
@@ -3971,7 +3966,6 @@ GOTO_TARGET(invokeMethod, bool methodCallRange, const Method* _methodToCall,
     assert(false);      // should not get here
 GOTO_TARGET_END
 
-
 /* File: cstubs/enddefs.c */
 
 /* undefine "magic" name remapping */
diff --git a/vm/mterp/out/InterpC-armv4t.c b/vm/mterp/out/InterpC-armv4t.c
index 2f4b2e80a..80e87cf53 100644
--- a/vm/mterp/out/InterpC-armv4t.c
+++ b/vm/mterp/out/InterpC-armv4t.c
@@ -26,6 +26,7 @@
 #include "interp/InterpDefs.h"
 #include "mterp/Mterp.h"
 #include <math.h>                   // needed for fmod, fmodf
+#include "mterp/common/FindInterface.h"
 
 /*
  * Configuration defines.  These affect the C implementations, i.e. the
@@ -334,29 +335,21 @@ static inline void putDoubleToArray(u4* ptr, int idx, double dval)
  * If we're building without debug and profiling support, we never switch.
  */
 #if defined(WITH_PROFILER) || defined(WITH_DEBUGGER)
+#if defined(WITH_JIT)
+# define NEED_INTERP_SWITCH(_current) (                                     \
+    (_current == INTERP_STD) ?                                              \
+        dvmJitDebuggerOrProfilerActive(interpState->jitState) :             \
+        !dvmJitDebuggerOrProfilerActive(interpState->jitState) )
+#else
 # define NEED_INTERP_SWITCH(_current) (                                     \
     (_current == INTERP_STD) ?                                              \
         dvmDebuggerOrProfilerActive() : !dvmDebuggerOrProfilerActive() )
+#endif
 #else
 # define NEED_INTERP_SWITCH(_current) (false)
 #endif
 
 /*
- * Look up an interface on a class using the cache.
- */
-INLINE Method* dvmFindInterfaceMethodInCache(ClassObject* thisClass,
-    u4 methodIdx, const Method* method, DvmDex* methodClassDex)
-{
-#define ATOMIC_CACHE_CALC \
-    dvmInterpFindInterfaceMethod(thisClass, methodIdx, method, methodClassDex)
-
-    return (Method*) ATOMIC_CACHE_LOOKUP(methodClassDex->pInterfaceCache,
-                DEX_INTERFACE_CACHE_SIZE, thisClass, methodIdx);
-
-#undef ATOMIC_CACHE_CALC
-}
-
-/*
  * Check to see if "obj" is NULL.  If so, throw an exception.  Assumes the
  * pc has already been exported to the stack.
  *
@@ -420,7 +413,6 @@ static inline bool checkForNullExportPC(Object* obj, u4* fp, const u2* pc)
     return true;
 }
 
-
 /* File: cstubs/stubdefs.c */
 /* this is a standard (no debug support) interpreter */
 #define INTERP_TYPE INTERP_STD
diff --git a/vm/mterp/out/InterpC-armv5te-vfp.c b/vm/mterp/out/InterpC-armv5te-vfp.c
index 92c29fb80..326ba3db9 100644
--- a/vm/mterp/out/InterpC-armv5te-vfp.c
+++ b/vm/mterp/out/InterpC-armv5te-vfp.c
@@ -26,6 +26,7 @@
 #include "interp/InterpDefs.h"
 #include "mterp/Mterp.h"
 #include <math.h>                   // needed for fmod, fmodf
+#include "mterp/common/FindInterface.h"
 
 /*
  * Configuration defines.  These affect the C implementations, i.e. the
@@ -334,29 +335,21 @@ static inline void putDoubleToArray(u4* ptr, int idx, double dval)
  * If we're building without debug and profiling support, we never switch.
  */
 #if defined(WITH_PROFILER) || defined(WITH_DEBUGGER)
+#if defined(WITH_JIT)
+# define NEED_INTERP_SWITCH(_current) (                                     \
+    (_current == INTERP_STD) ?                                              \
+        dvmJitDebuggerOrProfilerActive(interpState->jitState) :             \
+        !dvmJitDebuggerOrProfilerActive(interpState->jitState) )
+#else
 # define NEED_INTERP_SWITCH(_current) (                                     \
     (_current == INTERP_STD) ?                                              \
         dvmDebuggerOrProfilerActive() : !dvmDebuggerOrProfilerActive() )
+#endif
 #else
 # define NEED_INTERP_SWITCH(_current) (false)
 #endif
 
 /*
- * Look up an interface on a class using the cache.
- */
-INLINE Method* dvmFindInterfaceMethodInCache(ClassObject* thisClass,
-    u4 methodIdx, const Method* method, DvmDex* methodClassDex)
-{
-#define ATOMIC_CACHE_CALC \
-    dvmInterpFindInterfaceMethod(thisClass, methodIdx, method, methodClassDex)
-
-    return (Method*) ATOMIC_CACHE_LOOKUP(methodClassDex->pInterfaceCache,
-                DEX_INTERFACE_CACHE_SIZE, thisClass, methodIdx);
-
-#undef ATOMIC_CACHE_CALC
-}
-
-/*
  * Check to see if "obj" is NULL.  If so, throw an exception.  Assumes the
  * pc has already been exported to the stack.
  *
@@ -420,7 +413,6 @@ static inline bool checkForNullExportPC(Object* obj, u4* fp, const u2* pc)
     return true;
 }
 
-
 /* File: cstubs/stubdefs.c */
 /* this is a standard (no debug support) interpreter */
 #define INTERP_TYPE INTERP_STD
diff --git a/vm/mterp/out/InterpC-armv5te.c b/vm/mterp/out/InterpC-armv5te.c
index 70794cc12..9cf78811d 100644
--- a/vm/mterp/out/InterpC-armv5te.c
+++ b/vm/mterp/out/InterpC-armv5te.c
@@ -26,6 +26,7 @@
 #include "interp/InterpDefs.h"
 #include "mterp/Mterp.h"
 #include <math.h>                   // needed for fmod, fmodf
+#include "mterp/common/FindInterface.h"
 
 /*
  * Configuration defines.  These affect the C implementations, i.e. the
@@ -334,29 +335,21 @@ static inline void putDoubleToArray(u4* ptr, int idx, double dval)
  * If we're building without debug and profiling support, we never switch.
  */
 #if defined(WITH_PROFILER) || defined(WITH_DEBUGGER)
+#if defined(WITH_JIT)
+# define NEED_INTERP_SWITCH(_current) (                                     \
+    (_current == INTERP_STD) ?                                              \
+        dvmJitDebuggerOrProfilerActive(interpState->jitState) :             \
+        !dvmJitDebuggerOrProfilerActive(interpState->jitState) )
+#else
 # define NEED_INTERP_SWITCH(_current) (                                     \
     (_current == INTERP_STD) ?                                              \
         dvmDebuggerOrProfilerActive() : !dvmDebuggerOrProfilerActive() )
+#endif
 #else
 # define NEED_INTERP_SWITCH(_current) (false)
 #endif
 
 /*
- * Look up an interface on a class using the cache.
- */
-INLINE Method* dvmFindInterfaceMethodInCache(ClassObject* thisClass,
-    u4 methodIdx, const Method* method, DvmDex* methodClassDex)
-{
-#define ATOMIC_CACHE_CALC \
-    dvmInterpFindInterfaceMethod(thisClass, methodIdx, method, methodClassDex)
-
-    return (Method*) ATOMIC_CACHE_LOOKUP(methodClassDex->pInterfaceCache,
-                DEX_INTERFACE_CACHE_SIZE, thisClass, methodIdx);
-
-#undef ATOMIC_CACHE_CALC
-}
-
-/*
  * Check to see if "obj" is NULL.  If so, throw an exception.  Assumes the
  * pc has already been exported to the stack.
  *
@@ -420,7 +413,6 @@ static inline bool checkForNullExportPC(Object* obj, u4* fp, const u2* pc)
     return true;
 }
 
-
 /* File: cstubs/stubdefs.c */
 /* this is a standard (no debug support) interpreter */
 #define INTERP_TYPE INTERP_STD
diff --git a/vm/mterp/out/InterpC-portdbg.c b/vm/mterp/out/InterpC-portdbg.c
index 188639c16..c8f428c45 100644
--- a/vm/mterp/out/InterpC-portdbg.c
+++ b/vm/mterp/out/InterpC-portdbg.c
@@ -26,6 +26,7 @@
 #include "interp/InterpDefs.h"
 #include "mterp/Mterp.h"
 #include <math.h>                   // needed for fmod, fmodf
+#include "mterp/common/FindInterface.h"
 
 /*
  * Configuration defines.  These affect the C implementations, i.e. the
@@ -334,29 +335,21 @@ static inline void putDoubleToArray(u4* ptr, int idx, double dval)
  * If we're building without debug and profiling support, we never switch.
  */
 #if defined(WITH_PROFILER) || defined(WITH_DEBUGGER)
+#if defined(WITH_JIT)
+# define NEED_INTERP_SWITCH(_current) (                                     \
+    (_current == INTERP_STD) ?                                              \
+        dvmJitDebuggerOrProfilerActive(interpState->jitState) :             \
+        !dvmJitDebuggerOrProfilerActive(interpState->jitState) )
+#else
 # define NEED_INTERP_SWITCH(_current) (                                     \
     (_current == INTERP_STD) ?                                              \
         dvmDebuggerOrProfilerActive() : !dvmDebuggerOrProfilerActive() )
+#endif
 #else
 # define NEED_INTERP_SWITCH(_current) (false)
 #endif
 
 /*
- * Look up an interface on a class using the cache.
- */
-INLINE Method* dvmFindInterfaceMethodInCache(ClassObject* thisClass,
-    u4 methodIdx, const Method* method, DvmDex* methodClassDex)
-{
-#define ATOMIC_CACHE_CALC \
-    dvmInterpFindInterfaceMethod(thisClass, methodIdx, method, methodClassDex)
-
-    return (Method*) ATOMIC_CACHE_LOOKUP(methodClassDex->pInterfaceCache,
-                DEX_INTERFACE_CACHE_SIZE, thisClass, methodIdx);
-
-#undef ATOMIC_CACHE_CALC
-}
-
-/*
  * Check to see if "obj" is NULL.  If so, throw an exception.  Assumes the
  * pc has already been exported to the stack.
  *
@@ -420,7 +413,6 @@ static inline bool checkForNullExportPC(Object* obj, u4* fp, const u2* pc)
     return true;
 }
 
-
 /* File: portable/portdbg.c */
 #define INTERP_FUNC_NAME dvmInterpretDbg
 #define INTERP_TYPE INTERP_DBG
@@ -428,6 +420,14 @@ static inline bool checkForNullExportPC(Object* obj, u4* fp, const u2* pc)
 #define CHECK_DEBUG_AND_PROF() \
     checkDebugAndProf(pc, fp, self, curMethod, &debugIsMethodEntry)
 
+#if defined(WITH_JIT)
+#define CHECK_JIT() \
+    if (dvmCheckJit(pc, self, interpState)) GOTO_bail_switch()
+#else
+#define CHECK_JIT() \
+    ((void)0)
+#endif
+
 /* File: portable/stubdefs.c */
 /*
  * In the C mterp stubs, "goto" is a function call followed immediately
@@ -459,6 +459,7 @@ static inline bool checkForNullExportPC(Object* obj, u4* fp, const u2* pc)
         inst = FETCH(0);                                                    \
         CHECK_DEBUG_AND_PROF();                                             \
         CHECK_TRACKED_REFS();                                               \
+        CHECK_JIT();                                                        \
         goto *handlerTable[INST_INST(inst)];                                \
     }
 #else
@@ -1479,11 +1480,32 @@ bool INTERP_FUNC_NAME(Thread* self, InterpState* interpState)
     const Method* methodToCall;
     bool methodCallRange;
 
+
 #if defined(THREADED_INTERP)
     /* static computed goto table */
     DEFINE_GOTO_TABLE(handlerTable);
 #endif
 
+#if defined(WITH_JIT)
+#if 0
+    LOGD("*DebugInterp - entrypoint is %d, tgt is 0x%x, %s\n",
+         interpState->entryPoint,
+         interpState->pc,
+         interpState->method->name);
+#endif
+
+#if INTERP_TYPE == INTERP_DBG
+    /* Check to see if we've got a trace selection request.  If we do,
+     * but something is amiss, revert to the fast interpreter.
+     */
+    if (dvmJitCheckTraceRequest(self,interpState)) {
+        interpState->nextMode = INTERP_STD;
+        //LOGD("** something wrong, exiting\n");
+        return true;
+    }
+#endif
+#endif
+
     /* copy state in */
     curMethod = interpState->method;
     pc = interpState->pc;
@@ -4138,6 +4160,9 @@ GOTO_TARGET(invokeMethod, bool methodCallRange, const Method* _methodToCall,
 #endif
         newSaveArea->prevFrame = fp;
         newSaveArea->savedPc = pc;
+#if defined(WITH_JIT)
+        newSaveArea->returnAddr = 0;
+#endif
         newSaveArea->method = methodToCall;
 
         if (!dvmIsNativeMethod(methodToCall)) {
@@ -4232,7 +4257,6 @@ GOTO_TARGET(invokeMethod, bool methodCallRange, const Method* _methodToCall,
     assert(false);      // should not get here
 GOTO_TARGET_END
 
-
 /* File: portable/enddefs.c */
 /*--- end of opcodes ---*/
 
diff --git a/vm/mterp/out/InterpC-portstd.c b/vm/mterp/out/InterpC-portstd.c
index 90d4ab41c..baf7a86f3 100644
--- a/vm/mterp/out/InterpC-portstd.c
+++ b/vm/mterp/out/InterpC-portstd.c
@@ -26,6 +26,7 @@
 #include "interp/InterpDefs.h"
 #include "mterp/Mterp.h"
 #include <math.h>                   // needed for fmod, fmodf
+#include "mterp/common/FindInterface.h"
 
 /*
  * Configuration defines.  These affect the C implementations, i.e. the
@@ -334,29 +335,21 @@ static inline void putDoubleToArray(u4* ptr, int idx, double dval)
  * If we're building without debug and profiling support, we never switch.
  */
 #if defined(WITH_PROFILER) || defined(WITH_DEBUGGER)
+#if defined(WITH_JIT)
+# define NEED_INTERP_SWITCH(_current) (                                     \
+    (_current == INTERP_STD) ?                                              \
+        dvmJitDebuggerOrProfilerActive(interpState->jitState) :             \
+        !dvmJitDebuggerOrProfilerActive(interpState->jitState) )
+#else
 # define NEED_INTERP_SWITCH(_current) (                                     \
     (_current == INTERP_STD) ?                                              \
         dvmDebuggerOrProfilerActive() : !dvmDebuggerOrProfilerActive() )
+#endif
 #else
 # define NEED_INTERP_SWITCH(_current) (false)
 #endif
 
 /*
- * Look up an interface on a class using the cache.
- */
-INLINE Method* dvmFindInterfaceMethodInCache(ClassObject* thisClass,
-    u4 methodIdx, const Method* method, DvmDex* methodClassDex)
-{
-#define ATOMIC_CACHE_CALC \
-    dvmInterpFindInterfaceMethod(thisClass, methodIdx, method, methodClassDex)
-
-    return (Method*) ATOMIC_CACHE_LOOKUP(methodClassDex->pInterfaceCache,
-                DEX_INTERFACE_CACHE_SIZE, thisClass, methodIdx);
-
-#undef ATOMIC_CACHE_CALC
-}
-
-/*
  * Check to see if "obj" is NULL.  If so, throw an exception.  Assumes the
  * pc has already been exported to the stack.
  *
@@ -420,13 +413,14 @@ static inline bool checkForNullExportPC(Object* obj, u4* fp, const u2* pc)
     return true;
 }
 
-
 /* File: portable/portstd.c */
 #define INTERP_FUNC_NAME dvmInterpretStd
 #define INTERP_TYPE INTERP_STD
 
 #define CHECK_DEBUG_AND_PROF() ((void)0)
 
+#define CHECK_JIT() ((void)0)
+
 /* File: portable/stubdefs.c */
 /*
  * In the C mterp stubs, "goto" is a function call followed immediately
@@ -458,6 +452,7 @@ static inline bool checkForNullExportPC(Object* obj, u4* fp, const u2* pc)
         inst = FETCH(0);                                                    \
         CHECK_DEBUG_AND_PROF();                                             \
         CHECK_TRACKED_REFS();                                               \
+        CHECK_JIT();                                                        \
         goto *handlerTable[INST_INST(inst)];                                \
     }
 #else
@@ -1199,11 +1194,32 @@ bool INTERP_FUNC_NAME(Thread* self, InterpState* interpState)
     const Method* methodToCall;
     bool methodCallRange;
 
+
 #if defined(THREADED_INTERP)
     /* static computed goto table */
     DEFINE_GOTO_TABLE(handlerTable);
 #endif
 
+#if defined(WITH_JIT)
+#if 0
+    LOGD("*DebugInterp - entrypoint is %d, tgt is 0x%x, %s\n",
+         interpState->entryPoint,
+         interpState->pc,
+         interpState->method->name);
+#endif
+
+#if INTERP_TYPE == INTERP_DBG
+    /* Check to see if we've got a trace selection request.  If we do,
+     * but something is amiss, revert to the fast interpreter.
+     */
+    if (dvmJitCheckTraceRequest(self,interpState)) {
+        interpState->nextMode = INTERP_STD;
+        //LOGD("** something wrong, exiting\n");
+        return true;
+    }
+#endif
+#endif
+
     /* copy state in */
     curMethod = interpState->method;
     pc = interpState->pc;
@@ -3858,6 +3874,9 @@ GOTO_TARGET(invokeMethod, bool methodCallRange, const Method* _methodToCall,
 #endif
         newSaveArea->prevFrame = fp;
         newSaveArea->savedPc = pc;
+#if defined(WITH_JIT)
+        newSaveArea->returnAddr = 0;
+#endif
         newSaveArea->method = methodToCall;
 
         if (!dvmIsNativeMethod(methodToCall)) {
@@ -3952,7 +3971,6 @@ GOTO_TARGET(invokeMethod, bool methodCallRange, const Method* _methodToCall,
     assert(false);      // should not get here
 GOTO_TARGET_END
 
-
 /* File: portable/enddefs.c */
 /*--- end of opcodes ---*/
 
diff --git a/vm/mterp/out/InterpC-x86.c b/vm/mterp/out/InterpC-x86.c
index 469b69075..b17e53021 100644
--- a/vm/mterp/out/InterpC-x86.c
+++ b/vm/mterp/out/InterpC-x86.c
@@ -26,6 +26,7 @@
 #include "interp/InterpDefs.h"
 #include "mterp/Mterp.h"
 #include <math.h>                   // needed for fmod, fmodf
+#include "mterp/common/FindInterface.h"
 
 /*
  * Configuration defines.  These affect the C implementations, i.e. the
@@ -334,29 +335,21 @@ static inline void putDoubleToArray(u4* ptr, int idx, double dval)
  * If we're building without debug and profiling support, we never switch.
  */
 #if defined(WITH_PROFILER) || defined(WITH_DEBUGGER)
+#if defined(WITH_JIT)
+# define NEED_INTERP_SWITCH(_current) (                                     \
+    (_current == INTERP_STD) ?                                              \
+        dvmJitDebuggerOrProfilerActive(interpState->jitState) :             \
+        !dvmJitDebuggerOrProfilerActive(interpState->jitState) )
+#else
 # define NEED_INTERP_SWITCH(_current) (                                     \
     (_current == INTERP_STD) ?                                              \
         dvmDebuggerOrProfilerActive() : !dvmDebuggerOrProfilerActive() )
+#endif
 #else
 # define NEED_INTERP_SWITCH(_current) (false)
 #endif
 
 /*
- * Look up an interface on a class using the cache.
- */
-INLINE Method* dvmFindInterfaceMethodInCache(ClassObject* thisClass,
-    u4 methodIdx, const Method* method, DvmDex* methodClassDex)
-{
-#define ATOMIC_CACHE_CALC \
-    dvmInterpFindInterfaceMethod(thisClass, methodIdx, method, methodClassDex)
-
-    return (Method*) ATOMIC_CACHE_LOOKUP(methodClassDex->pInterfaceCache,
-                DEX_INTERFACE_CACHE_SIZE, thisClass, methodIdx);
-
-#undef ATOMIC_CACHE_CALC
-}
-
-/*
  * Check to see if "obj" is NULL.  If so, throw an exception.  Assumes the
  * pc has already been exported to the stack.
  *
@@ -420,7 +413,6 @@ static inline bool checkForNullExportPC(Object* obj, u4* fp, const u2* pc)
     return true;
 }
 
-
 /* File: cstubs/stubdefs.c */
 /* this is a standard (no debug support) interpreter */
 #define INTERP_TYPE INTERP_STD
@@ -2041,6 +2033,9 @@ GOTO_TARGET(invokeMethod, bool methodCallRange, const Method* _methodToCall,
 #endif
         newSaveArea->prevFrame = fp;
         newSaveArea->savedPc = pc;
+#if defined(WITH_JIT)
+        newSaveArea->returnAddr = 0;
+#endif
         newSaveArea->method = methodToCall;
 
         if (!dvmIsNativeMethod(methodToCall)) {
@@ -2135,7 +2130,6 @@ GOTO_TARGET(invokeMethod, bool methodCallRange, const Method* _methodToCall,
     assert(false);      // should not get here
 GOTO_TARGET_END
 
-
 /* File: cstubs/enddefs.c */
 
 /* undefine "magic" name remapping */
diff --git a/vm/mterp/portable/entry.c b/vm/mterp/portable/entry.c
index 66989597b..9c7c2d62d 100644
--- a/vm/mterp/portable/entry.c
+++ b/vm/mterp/portable/entry.c
@@ -29,11 +29,32 @@ bool INTERP_FUNC_NAME(Thread* self, InterpState* interpState)
     const Method* methodToCall;
     bool methodCallRange;
 
+
 #if defined(THREADED_INTERP)
     /* static computed goto table */
     DEFINE_GOTO_TABLE(handlerTable);
 #endif
 
+#if defined(WITH_JIT)
+#if 0
+    LOGD("*DebugInterp - entrypoint is %d, tgt is 0x%x, %s\n",
+         interpState->entryPoint,
+         interpState->pc,
+         interpState->method->name);
+#endif
+
+#if INTERP_TYPE == INTERP_DBG
+    /* Check to see if we've got a trace selection request.  If we do,
+     * but something is amiss, revert to the fast interpreter.
+     */
+    if (dvmJitCheckTraceRequest(self,interpState)) {
+        interpState->nextMode = INTERP_STD;
+        //LOGD("** something wrong, exiting\n");
+        return true;
+    }
+#endif
+#endif
+
     /* copy state in */
     curMethod = interpState->method;
     pc = interpState->pc;
diff --git a/vm/mterp/portable/portdbg.c b/vm/mterp/portable/portdbg.c
index a657f0919..04132cb41 100644
--- a/vm/mterp/portable/portdbg.c
+++ b/vm/mterp/portable/portdbg.c
@@ -3,3 +3,11 @@
 
 #define CHECK_DEBUG_AND_PROF() \
     checkDebugAndProf(pc, fp, self, curMethod, &debugIsMethodEntry)
+
+#if defined(WITH_JIT)
+#define CHECK_JIT() \
+    if (dvmCheckJit(pc, self, interpState)) GOTO_bail_switch()
+#else
+#define CHECK_JIT() \
+    ((void)0)
+#endif
diff --git a/vm/mterp/portable/portstd.c b/vm/mterp/portable/portstd.c
index 01fbda112..f55e8e7d6 100644
--- a/vm/mterp/portable/portstd.c
+++ b/vm/mterp/portable/portstd.c
@@ -2,3 +2,5 @@
 #define INTERP_TYPE INTERP_STD
 
 #define CHECK_DEBUG_AND_PROF() ((void)0)
+
+#define CHECK_JIT() ((void)0)
diff --git a/vm/mterp/portable/stubdefs.c b/vm/mterp/portable/stubdefs.c
index b809cafef..305aebbc5 100644
--- a/vm/mterp/portable/stubdefs.c
+++ b/vm/mterp/portable/stubdefs.c
@@ -28,6 +28,7 @@
         inst = FETCH(0);                                                    \
         CHECK_DEBUG_AND_PROF();                                             \
         CHECK_TRACKED_REFS();                                               \
+        CHECK_JIT();                                                        \
         goto *handlerTable[INST_INST(inst)];                                \
     }
 #else
-- 
2.11.0