OSDN Git Service

Initial port of the Dalvik JIT enging to the internal repository.
authorBen Cheng <bccheng@android.com>
Mon, 1 Jun 2009 20:00:29 +0000 (13:00 -0700)
committerBen Cheng <bccheng@android.com>
Thu, 4 Jun 2009 19:46:11 +0000 (12:46 -0700)
Fixed files with trailing spaces.
Addressed review comments from Dan.
Addressed review comments from fadden.
Addressed review comments from Dan x 2.
Addressed review comments from Dan x 3.

76 files changed:
libdex/InstrUtils.c
libdex/InstrUtils.h
vm/Android.mk
vm/Dalvik.h
vm/Globals.h
vm/Init.c
vm/SignalCatcher.c
vm/compiler/Compiler.c [new file with mode: 0644]
vm/compiler/Compiler.h [new file with mode: 0644]
vm/compiler/CompilerIR.h [new file with mode: 0644]
vm/compiler/CompilerInternals.h [new file with mode: 0644]
vm/compiler/CompilerUtility.h [new file with mode: 0644]
vm/compiler/Frontend.c [new file with mode: 0644]
vm/compiler/IntermediateRep.c [new file with mode: 0644]
vm/compiler/Utility.c [new file with mode: 0644]
vm/compiler/codegen/CompilerCodegen.h [new file with mode: 0644]
vm/compiler/codegen/armv5te/ArchUtility.c [new file with mode: 0644]
vm/compiler/codegen/armv5te/Armv5teLIR.h [new file with mode: 0644]
vm/compiler/codegen/armv5te/Assemble.c [new file with mode: 0644]
vm/compiler/codegen/armv5te/Codegen.c [new file with mode: 0644]
vm/compiler/template/Makefile-template [new file with mode: 0644]
vm/compiler/template/README.txt [new file with mode: 0644]
vm/compiler/template/armv5te/TEMPLATE_CMPG_DOUBLE.S [new file with mode: 0644]
vm/compiler/template/armv5te/TEMPLATE_CMPG_FLOAT.S [new file with mode: 0644]
vm/compiler/template/armv5te/TEMPLATE_CMPL_DOUBLE.S [new file with mode: 0644]
vm/compiler/template/armv5te/TEMPLATE_CMPL_FLOAT.S [new file with mode: 0644]
vm/compiler/template/armv5te/TEMPLATE_CMP_LONG.S [new file with mode: 0644]
vm/compiler/template/armv5te/TEMPLATE_INVOKE_METHOD_CHAIN.S [new file with mode: 0644]
vm/compiler/template/armv5te/TEMPLATE_INVOKE_METHOD_NO_OPT.S [new file with mode: 0644]
vm/compiler/template/armv5te/TEMPLATE_MUL_LONG.S [new file with mode: 0644]
vm/compiler/template/armv5te/TEMPLATE_RETURN.S [new file with mode: 0644]
vm/compiler/template/armv5te/TEMPLATE_SHL_LONG.S [new file with mode: 0644]
vm/compiler/template/armv5te/TEMPLATE_SHR_LONG.S [new file with mode: 0644]
vm/compiler/template/armv5te/TEMPLATE_USHR_LONG.S [new file with mode: 0644]
vm/compiler/template/armv5te/TemplateOpList.h [new file with mode: 0644]
vm/compiler/template/armv5te/footer.S [new file with mode: 0644]
vm/compiler/template/armv5te/header.S [new file with mode: 0644]
vm/compiler/template/armv5te/platform.S [new file with mode: 0644]
vm/compiler/template/config-armv5te [new file with mode: 0644]
vm/compiler/template/gen-template.py [new file with mode: 0755]
vm/compiler/template/out/CompilerTemplateAsm-armv5te.S [new file with mode: 0644]
vm/compiler/template/rebuild.sh [new file with mode: 0755]
vm/interp/Interp.c
vm/interp/InterpDefs.h
vm/interp/Jit.c [new file with mode: 0644]
vm/interp/Jit.h [new file with mode: 0644]
vm/interp/Stack.h
vm/mterp/Mterp.c
vm/mterp/Mterp.h
vm/mterp/armv5te/OP_GOTO.S
vm/mterp/armv5te/OP_GOTO_16.S
vm/mterp/armv5te/OP_GOTO_32.S
vm/mterp/armv5te/OP_PACKED_SWITCH.S
vm/mterp/armv5te/bincmp.S
vm/mterp/armv5te/entry.S
vm/mterp/armv5te/footer.S
vm/mterp/armv5te/header.S
vm/mterp/armv5te/zcmp.S
vm/mterp/c/gotoTargets.c
vm/mterp/c/header.c
vm/mterp/common/FindInterface.h [new file with mode: 0644]
vm/mterp/common/asm-constants.h
vm/mterp/out/InterpAsm-armv4t.S
vm/mterp/out/InterpAsm-armv5te-vfp.S
vm/mterp/out/InterpAsm-armv5te.S
vm/mterp/out/InterpC-allstubs.c
vm/mterp/out/InterpC-armv4t.c
vm/mterp/out/InterpC-armv5te-vfp.c
vm/mterp/out/InterpC-armv5te.c
vm/mterp/out/InterpC-portdbg.c
vm/mterp/out/InterpC-portstd.c
vm/mterp/out/InterpC-x86.c
vm/mterp/portable/entry.c
vm/mterp/portable/portdbg.c
vm/mterp/portable/portstd.c
vm/mterp/portable/stubdefs.c

index 33c7e7d..b58e647 100644 (file)
@@ -539,16 +539,6 @@ InstructionFlags* dexCreateInstrFlagsTable(void)
         case OP_SPUT_SHORT:
         case OP_SPUT_WIDE:
         case OP_SPUT_OBJECT:
-        case OP_INVOKE_VIRTUAL:
-        case OP_INVOKE_VIRTUAL_RANGE:
-        case OP_INVOKE_SUPER:
-        case OP_INVOKE_SUPER_RANGE:
-        case OP_INVOKE_DIRECT:
-        case OP_INVOKE_DIRECT_RANGE:
-        case OP_INVOKE_STATIC:
-        case OP_INVOKE_STATIC_RANGE:
-        case OP_INVOKE_INTERFACE:
-        case OP_INVOKE_INTERFACE_RANGE:
         case OP_DIV_INT:
         case OP_REM_INT:
         case OP_DIV_LONG:
@@ -564,6 +554,19 @@ InstructionFlags* dexCreateInstrFlagsTable(void)
             flags = kInstrCanContinue | kInstrCanThrow;
             break;
 
+        case OP_INVOKE_VIRTUAL:
+        case OP_INVOKE_VIRTUAL_RANGE:
+        case OP_INVOKE_SUPER:
+        case OP_INVOKE_SUPER_RANGE:
+        case OP_INVOKE_DIRECT:
+        case OP_INVOKE_DIRECT_RANGE:
+        case OP_INVOKE_STATIC:
+        case OP_INVOKE_STATIC_RANGE:
+        case OP_INVOKE_INTERFACE:
+        case OP_INVOKE_INTERFACE_RANGE:
+            flags = kInstrCanContinue | kInstrCanThrow | kInstrInvoke;
+            break;
+
         case OP_RETURN_VOID:
         case OP_RETURN:
         case OP_RETURN_WIDE:
@@ -579,7 +582,7 @@ InstructionFlags* dexCreateInstrFlagsTable(void)
         case OP_GOTO:
         case OP_GOTO_16:
         case OP_GOTO_32:
-            flags = kInstrCanBranch;
+            flags = kInstrCanBranch | kInstrUnconditional;
             break;
 
         /* conditional branches */
@@ -617,12 +620,15 @@ InstructionFlags* dexCreateInstrFlagsTable(void)
         case OP_IPUT_QUICK:
         case OP_IPUT_WIDE_QUICK:
         case OP_IPUT_OBJECT_QUICK:
+            flags = kInstrCanContinue | kInstrCanThrow;
+            break;
+
         case OP_INVOKE_VIRTUAL_QUICK:
         case OP_INVOKE_VIRTUAL_QUICK_RANGE:
         case OP_INVOKE_SUPER_QUICK:
         case OP_INVOKE_SUPER_QUICK_RANGE:
         case OP_INVOKE_DIRECT_EMPTY:
-            flags = kInstrCanContinue | kInstrCanThrow;
+            flags = kInstrCanContinue | kInstrCanThrow | kInstrInvoke;
             break;
 
         /* these should never appear */
@@ -651,6 +657,7 @@ InstructionFlags* dexCreateInstrFlagsTable(void)
         case OP_UNUSED_FD:
         case OP_UNUSED_FE:
         case OP_UNUSED_FF:
+            flags = kInstrNoJit;
             break;
 
         /*
@@ -1238,4 +1245,3 @@ int dexGetInstrOrTableWidthAbs(const InstructionWidth* widths, const u2* insns)
     }
     return width;
 }
-
index 7621b8e..5ca175e 100644 (file)
@@ -99,6 +99,9 @@ enum InstructionFlags {
     kInstrCanSwitch     = 1 << 2,   // switch statement
     kInstrCanThrow      = 1 << 3,   // could cause an exception to be thrown
     kInstrCanReturn     = 1 << 4,   // returns, no additional statements
+    kInstrInvoke        = 1 << 5,   // a flavor of invoke
+    kInstrUnconditional = 1 << 6,   // unconditional branch
+    kInstrNoJit         = 1 << 7,   // don't jit trace containing this
 };
 
 
index 3592910..a2c2d57 100644 (file)
@@ -184,6 +184,21 @@ LOCAL_SRC_FILES := \
        test/AtomicSpeed.c \
        test/TestHash.c
 
+ifeq ($(WITH_JIT_TUNING),true)
+  LOCAL_CFLAGS += -DWITH_JIT_TUNING
+endif
+
+ifeq ($(WITH_JIT),true)
+  LOCAL_CFLAGS += -DWITH_JIT
+  LOCAL_SRC_FILES += \
+       ../dexdump/OpCodeNames.c \
+       compiler/Compiler.c \
+       compiler/Frontend.c \
+       compiler/Utility.c \
+       compiler/IntermediateRep.c \
+       interp/Jit.c
+endif
+
 WITH_HPROF := $(strip $(WITH_HPROF))
 ifeq ($(WITH_HPROF),)
   WITH_HPROF := true
@@ -242,6 +257,14 @@ ifeq ($(TARGET_ARCH),arm)
                mterp/out/InterpC-$(TARGET_ARCH_VARIANT).c.arm \
                mterp/out/InterpAsm-$(TARGET_ARCH_VARIANT).S
   LOCAL_SHARED_LIBRARIES += libdl
+  # TODO - may become TARGET_ARCH_VARIANT specific
+  ifeq ($(WITH_JIT),true)
+    LOCAL_SRC_FILES += \
+               compiler/codegen/armv5te/Codegen.c \
+               compiler/codegen/armv5te/Assemble.c \
+               compiler/codegen/armv5te/ArchUtility.c \
+               compiler/template/out/CompilerTemplateAsm-armv5te.S
+  endif
 else
   ifeq ($(TARGET_ARCH),x86)
     LOCAL_SRC_FILES += \
index 29abc2c..618d51a 100644 (file)
@@ -73,6 +73,9 @@
 #include "libdex/InstrUtils.h"
 #include "AllocTracker.h"
 #include "PointerSet.h"
+#if defined(WITH_JIT)
+#include "compiler/Compiler.h"
+#endif
 #include "Globals.h"
 #include "reflect/Reflect.h"
 #include "oo/TypeCheck.h"
index 1a81b93..b9c73fe 100644 (file)
@@ -54,6 +54,9 @@ typedef enum ExecutionMode {
     kExecutionModeUnknown = 0,
     kExecutionModeInterpPortable,
     kExecutionModeInterpFast,
+#if defined(WITH_JIT)
+    kExecutionModeJit,
+#endif
 } ExecutionMode;
 
 /*
@@ -328,6 +331,8 @@ struct DvmGlobals {
      *  (3) a thread has hit a breakpoint or exception that the debugger
      *      has marked as a "suspend all" event;
      *  (4) the SignalCatcher caught a signal that requires suspension.
+     *  (5) (if implemented) the JIT needs to perform a heavyweight
+     *      rearrangement of the translation cache or JitTable.
      *
      * Because we use "safe point" self-suspension, it is never safe to
      * do a blocking "lock" call on this mutex -- if it has been acquired,
@@ -612,4 +617,100 @@ struct DvmGlobals {
 
 extern struct DvmGlobals gDvm;
 
+#if defined(WITH_JIT)
+/*
+ * JIT-specific global state
+ */
+struct DvmJitGlobals {
+    /*
+     * Guards writes to Dalvik PC (dPC), translated code address (codeAddr) and
+     * chain fields within the JIT hash table.  Note carefully the access
+     * mechanism.
+     * Only writes are guarded, and the guarded fields must be updated in a
+     * specific order using atomic operations.  Further, once a field is
+     * written it cannot be changed without halting all threads.
+     *
+     * The write order is:
+     *    1) codeAddr
+     *    2) dPC
+     *    3) chain [if necessary]
+     *
+     * This mutex also guards both read and write of curJitTableEntries.
+     */
+    pthread_mutex_t tableLock;
+
+    /* The JIT hash table.  Note that for access speed, copies of this pointer
+     * are stored in each thread. */
+    struct JitEntry *pJitEntryTable;
+
+    /* Array of profile threshold counters */
+    unsigned char *pProfTable;
+    unsigned char *pProfTableCopy;
+
+    /* Size of JIT hash table in entries.  Must be a power of 2 */
+    unsigned int maxTableEntries;
+
+    /* Trigger for trace selection */
+    unsigned short threshold;
+
+    /* JIT Compiler Control */
+    bool               haltCompilerThread;
+    bool               blockingMode;
+    pthread_t          compilerHandle;
+    pthread_mutex_t    compilerLock;
+    pthread_cond_t     compilerQueueActivity;
+    pthread_cond_t     compilerQueueEmpty;
+    int                compilerQueueLength;
+    int                compilerHighWater;
+    int                compilerWorkEnqueueIndex;
+    int                compilerWorkDequeueIndex;
+    CompilerWorkOrder  compilerWorkQueue[COMPILER_WORK_QUEUE_SIZE];
+
+    /* JIT internal stats */
+    int                compilerMaxQueued;
+    int                addrLookupsFound;
+    int                addrLookupsNotFound;
+    int                noChainExit;
+    int                normalExit;
+    int                puntExit;
+    int                translationChains;
+    int                invokeNoOpt;
+    int                InvokeChain;
+    int                returnOp;
+
+    /* Compiled code cache */
+    void* codeCache;
+
+    /* Bytes already used in the code cache */
+    unsigned int codeCacheByteUsed;
+
+    /* Number of installed compilations in the cache */
+    unsigned int numCompilations;
+
+    /* Flag to indicate that the code cache is full */
+    bool codeCacheFull;
+
+    /* true/false: compile/reject opcodes specified in the -Xjitop list */
+    bool includeSelectedOp;
+
+    /* true/false: compile/reject methods specified in the -Xjitmethod list */
+    bool includeSelectedMethod;
+
+    /* Disable JIT for selected opcodes - one bit for each opcode */
+    char opList[32];
+
+    /* Disable JIT for selected methods */
+    HashTable *methodTable;
+
+    /* Record how many times an opcode has been JIT'ed */
+    int opHistogram[256];
+
+    /* Flag to dump all compiled code */
+    bool printMe;
+};
+
+extern struct DvmJitGlobals gDvmJit;
+
+#endif
+
 #endif /*_DALVIK_GLOBALS*/
index 4ba10b7..5295f49 100644 (file)
--- a/vm/Init.c
+++ b/vm/Init.c
@@ -20,6 +20,7 @@
 #include "Dalvik.h"
 #include "test/Test.h"
 #include "mterp/Mterp.h"
+#include "Hash.h"
 
 #include <stdlib.h>
 #include <stdio.h>
@@ -49,6 +50,11 @@ static bool dvmInitZygote(void);
 /* global state */
 struct DvmGlobals gDvm;
 
+/* JIT-specific global state */
+#if defined(WITH_JIT)
+struct DvmJitGlobals gDvmJit;
+#endif
+
 /*
  * Show usage.
  *
@@ -83,8 +89,13 @@ static void dvmUsage(const char* progName)
         kMinStackSize / 1024, kMaxStackSize / 1024);
     dvmFprintf(stderr, "  -Xverify:{none,remote,all}\n");
     dvmFprintf(stderr, "  -Xrs\n");
+#if defined(WITH_JIT)
+    dvmFprintf(stderr,
+                "  -Xint  (extended to accept ':portable', ':fast' and ':jit')\n");
+#else
     dvmFprintf(stderr,
                 "  -Xint  (extended to accept ':portable' and ':fast')\n");
+#endif
     dvmFprintf(stderr, "\n");
     dvmFprintf(stderr, "These are unique to Dalvik:\n");
     dvmFprintf(stderr, "  -Xzygote\n");
@@ -98,6 +109,17 @@ static void dvmUsage(const char* progName)
     dvmFprintf(stderr, "  -Xgc:[no]precise\n");
     dvmFprintf(stderr, "  -Xgenregmap\n");
     dvmFprintf(stderr, "  -Xcheckdexsum\n");
+#if defined(WITH_JIT)
+    dvmFprintf(stderr, "  -Xincludeselectedop\n");
+    dvmFprintf(stderr, "  -Xjitop:hexopvalue[-endvalue]"
+                       "[,hexopvalue[-endvalue]]*\n");
+    dvmFprintf(stderr, "  -Xincludeselectedmethod\n");
+    dvmFprintf(stderr, "  -Xthreshold:decimalvalue\n");
+    dvmFprintf(stderr, "  -Xblocking\n");
+    dvmFprintf(stderr, "  -Xjitmethod:signture[,signature]* "
+                       "(eg Ljava/lang/String\\;replace)\n");
+    dvmFprintf(stderr, "  -Xjitverbose\n");
+#endif
     dvmFprintf(stderr, "\n");
     dvmFprintf(stderr, "Configured with:"
 #ifdef WITH_DEBUGGER
@@ -161,6 +183,9 @@ static void dvmUsage(const char* progName)
 #elif DVM_RESOLVER_CACHE == DVM_RC_NO_CACHE
         " resolver_cache_disabled"
 #endif
+#if defined(WITH_JIT)
+        " with_jit"
+#endif
     );
 #ifdef DVM_SHOW_EXCEPTION
     dvmFprintf(stderr, " show_exception=%d", DVM_SHOW_EXCEPTION);
@@ -531,6 +556,97 @@ static void freeAssertionCtrl(void)
     free(gDvm.assertionCtrl);
 }
 
+#if defined(WITH_JIT)
+/* Parse -Xjitop to selectively turn on/off certain opcodes for JIT */
+static void processXjitop(const char *opt)
+{
+    if (opt[7] == ':') {
+        const char *startPtr = &opt[8];
+        char *endPtr = NULL;
+
+        do {
+            long startValue, endValue;
+
+            startValue = strtol(startPtr, &endPtr, 16);
+            if (startPtr != endPtr) {
+                /* Just in case value is out of range */
+                startValue &= 0xff;
+
+                if (*endPtr == '-') {
+                    endValue = strtol(endPtr+1, &endPtr, 16);
+                    endValue &= 0xff;
+                } else {
+                    endValue = startValue;
+                }
+
+                for (; startValue <= endValue; startValue++) {
+                    LOGW("Dalvik opcode %x is selected for debugging",
+                         (unsigned int) startValue);
+                    /* Mark the corresponding bit to 1 */
+                    gDvmJit.opList[startValue >> 3] |=
+                        1 << (startValue & 0x7);
+                }
+
+                if (*endPtr == 0) {
+                    break;
+                }
+
+                startPtr = endPtr + 1;
+
+                continue;
+            } else {
+                if (*endPtr != 0) {
+                    dvmFprintf(stderr,
+                        "Warning: Unrecognized opcode value substring "
+                        "%s\n", endPtr);
+                }
+                break;
+            }
+        } while (1);
+    } else {
+        int i;
+        for (i = 0; i < 32; i++) {
+            gDvmJit.opList[i] = 0xff;
+        }
+        dvmFprintf(stderr, "Warning: select all opcodes\n");
+    }
+}
+
+/* Parse -Xjitmethod to selectively turn on/off certain methods for JIT */
+static void processXjitmethod(const char *opt)
+{
+    char *buf = strdup(&opt[12]);
+    char *start, *end;
+
+    gDvmJit.methodTable = dvmHashTableCreate(8, NULL);
+
+    start = buf;
+    /* 
+     * Break comma-separated method signatures and enter them into the hash
+     * table individually.
+     */
+    do {
+        int hashValue;
+
+        end = strchr(start, ',');
+        if (end) {
+            *end = 0;
+        }
+
+        hashValue = dvmComputeUtf8Hash(start);
+
+        dvmHashTableLookup(gDvmJit.methodTable, hashValue,
+                           strdup(start),
+                           (HashCompareFunc) strcmp, true);
+        if (end) {
+            start = end + 1;
+        } else {
+            break;
+        }
+    } while (1);
+    free(buf);
+}
+#endif
 
 /*
  * Process an argument vector full of options.  Unlike standard C programs,
@@ -760,6 +876,10 @@ static int dvmProcessOptions(int argc, const char* const argv[],
                     gDvm.executionMode = kExecutionModeInterpPortable;
                 else if (strcmp(argv[i] + 6, "fast") == 0)
                     gDvm.executionMode = kExecutionModeInterpFast;
+#ifdef WITH_JIT
+                else if (strcmp(argv[i] + 6, "jit") == 0)
+                    gDvm.executionMode = kExecutionModeJit;
+#endif
                 else {
                     dvmFprintf(stderr,
                         "Warning: Unrecognized interpreter mode %s\n",argv[i]);
@@ -769,6 +889,23 @@ static int dvmProcessOptions(int argc, const char* const argv[],
                 /* disable JIT -- nothing to do here for now */
             }
 
+#ifdef WITH_JIT
+        } else if (strncmp(argv[i], "-Xjitop", 7) == 0) {
+            processXjitop(argv[i]);
+        } else if (strncmp(argv[i], "-Xjitmethod", 11) == 0) {
+            processXjitmethod(argv[i]);
+        } else if (strncmp(argv[i], "-Xblocking", 10) == 0) {
+          gDvmJit.blockingMode = true;
+        } else if (strncmp(argv[i], "-Xthreshold:", 12) == 0) {
+          gDvmJit.threshold = atoi(argv[i] + 12);
+        } else if (strncmp(argv[i], "-Xincludeselectedop", 19) == 0) {
+          gDvmJit.includeSelectedOp = true;
+        } else if (strncmp(argv[i], "-Xincludeselectedmethod", 23) == 0) {
+          gDvmJit.includeSelectedMethod = true;
+        } else if (strncmp(argv[i], "-Xjitverbose", 12) == 0) {
+          gDvmJit.printMe = true;
+#endif
+
         } else if (strncmp(argv[i], "-Xdeadlockpredict:", 18) == 0) {
 #ifdef WITH_DEADLOCK_PREDICTION
             if (strcmp(argv[i] + 18, "off") == 0)
@@ -867,7 +1004,18 @@ static void setCommandLineDefaults()
      * we know we're using the "desktop" build we should probably be
      * using "portable" rather than "fast".
      */
+#if defined(WITH_JIT)
+    gDvm.executionMode = kExecutionModeJit;
+    /* 
+     * TODO - check system property and insert command-line options in 
+     *        frameworks/base/core/jni/AndroidRuntime.cpp
+     */
+    gDvmJit.blockingMode = false;
+    gDvmJit.maxTableEntries = 2048;
+    gDvmJit.threshold = 200;
+#else
     gDvm.executionMode = kExecutionModeInterpFast;
+#endif
 }
 
 
@@ -904,6 +1052,9 @@ static void blockSignals()
     sigemptyset(&mask);
     sigaddset(&mask, SIGQUIT);
     sigaddset(&mask, SIGUSR1);      // used to initiate heap dump
+#if defined(WITH_JIT) && defined(WITH_JIT_TUNING)
+    sigaddset(&mask, SIGUSR2);      // used to investigate JIT internals
+#endif
     //sigaddset(&mask, SIGPIPE);
     cc = sigprocmask(SIG_BLOCK, &mask, NULL);
     assert(cc == 0);
@@ -1195,6 +1346,11 @@ bool dvmInitAfterZygote(void)
         (int)(endHeap-startHeap), (int)(endQuit-startQuit),
         (int)(endJdwp-startJdwp), (int)(endJdwp-startHeap));
 
+#ifdef WITH_JIT
+    if (!dvmJitStartup())
+        return false;
+#endif
+
     return true;
 }
 
@@ -1389,6 +1545,9 @@ void dvmShutdown(void)
 
     LOGD("VM cleaning up\n");
 
+#ifdef WITH_JIT
+    dvmJitShutdown();
+#endif
     dvmDebuggerShutdown();
     dvmReflectShutdown();
 #ifdef WITH_PROFILER
index 550f777..adcff9f 100644 (file)
@@ -192,6 +192,9 @@ static void* signalCatcherThreadStart(void* arg)
     sigemptyset(&mask);
     sigaddset(&mask, SIGQUIT);
     sigaddset(&mask, SIGUSR1);
+#if defined(WITH_JIT) && defined(WITH_JIT_TUNING)
+    sigaddset(&mask, SIGUSR2);
+#endif
 
     while (true) {
         int rcvd;
@@ -253,6 +256,11 @@ loop:
             LOGI("SIGUSR1 forcing GC (no HPROF)\n");
             dvmCollectGarbage(false);
 #endif
+#if defined(WITH_JIT) && defined(WITH_JIT_TUNING)
+        } else if (rcvd == SIGUSR2) {
+            gDvmJit.printMe ^= true;
+            dvmCompilerDumpStats();
+#endif
         } else {
             LOGE("unexpected signal %d\n", rcvd);
         }
@@ -260,4 +268,3 @@ loop:
 
     return NULL;
 }
-
diff --git a/vm/compiler/Compiler.c b/vm/compiler/Compiler.c
new file mode 100644 (file)
index 0000000..dc24977
--- /dev/null
@@ -0,0 +1,242 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <sys/mman.h>
+#include <errno.h>
+
+#include "Dalvik.h"
+#include "interp/Jit.h"
+#include "CompilerInternals.h"
+
+
+static inline bool workQueueLength(void)
+{
+    return gDvmJit.compilerQueueLength;
+}
+
+static CompilerWorkOrder workDequeue(void)
+{
+    assert(gDvmJit.compilerWorkQueue[gDvmJit.compilerWorkDequeueIndex].kind
+           != kWorkOrderInvalid);
+    CompilerWorkOrder work =
+        gDvmJit.compilerWorkQueue[gDvmJit.compilerWorkDequeueIndex];
+    gDvmJit.compilerWorkQueue[gDvmJit.compilerWorkDequeueIndex++].kind =
+        kWorkOrderInvalid;
+    if (gDvmJit.compilerWorkDequeueIndex == COMPILER_WORK_QUEUE_SIZE) {
+        gDvmJit.compilerWorkDequeueIndex = 0;
+    }
+    gDvmJit.compilerQueueLength--;
+
+    /* Remember the high water mark of the queue length */
+    if (gDvmJit.compilerQueueLength > gDvmJit.compilerMaxQueued)
+        gDvmJit.compilerMaxQueued = gDvmJit.compilerQueueLength;
+
+    return work;
+}
+
+bool dvmCompilerWorkEnqueue(const u2 *pc, WorkOrderKind kind, void* info)
+{
+    int cc;
+    int i;
+    int numWork;
+
+    dvmLockMutex(&gDvmJit.compilerLock);
+
+    /* Queue full */
+    if (gDvmJit.compilerQueueLength == COMPILER_WORK_QUEUE_SIZE ||
+        gDvmJit.codeCacheFull == true) {
+        dvmUnlockMutex(&gDvmJit.compilerLock);
+        return false;
+    }
+
+    for (numWork = gDvmJit.compilerQueueLength,
+           i = gDvmJit.compilerWorkDequeueIndex;
+         numWork > 0;
+         numWork--) {
+        /* Already enqueued */
+        if (gDvmJit.compilerWorkQueue[i++].pc == pc)
+            goto done;
+        /* Wrap around */
+        if (i == COMPILER_WORK_QUEUE_SIZE)
+            i = 0;
+    }
+
+    gDvmJit.compilerWorkQueue[gDvmJit.compilerWorkEnqueueIndex].pc = pc;
+    gDvmJit.compilerWorkQueue[gDvmJit.compilerWorkEnqueueIndex].kind = kind;
+    gDvmJit.compilerWorkQueue[gDvmJit.compilerWorkEnqueueIndex].info = info;
+    gDvmJit.compilerWorkEnqueueIndex++;
+    if (gDvmJit.compilerWorkEnqueueIndex == COMPILER_WORK_QUEUE_SIZE)
+        gDvmJit.compilerWorkEnqueueIndex = 0;
+    gDvmJit.compilerQueueLength++;
+    cc = pthread_cond_signal(&gDvmJit.compilerQueueActivity);
+    assert(cc == 0);
+
+done:
+    dvmUnlockMutex(&gDvmJit.compilerLock);
+    return true;
+}
+
+/* Block until queue length is 0 */
+void dvmCompilerDrainQueue(void)
+{
+    dvmLockMutex(&gDvmJit.compilerLock);
+    while (workQueueLength() != 0 && !gDvmJit.haltCompilerThread) {
+        pthread_cond_wait(&gDvmJit.compilerQueueEmpty, &gDvmJit.compilerLock);
+    }
+    dvmUnlockMutex(&gDvmJit.compilerLock);
+}
+
+static void *compilerThreadStart(void *arg)
+{
+    dvmLockMutex(&gDvmJit.compilerLock);
+    /*
+     * Since the compiler thread will not touch any objects on the heap once
+     * being created, we just fake its state as VMWAIT so that it can be a
+     * bit late when there is suspend request pending.
+     */
+    dvmChangeStatus(NULL, THREAD_VMWAIT);
+    while (!gDvmJit.haltCompilerThread) {
+        if (workQueueLength() == 0) {
+            int cc;
+            cc = pthread_cond_signal(&gDvmJit.compilerQueueEmpty);
+            assert(cc == 0);
+            pthread_cond_wait(&gDvmJit.compilerQueueActivity,
+                              &gDvmJit.compilerLock);
+            continue;
+        } else {
+            do {
+                void *compiledCodePtr;
+                CompilerWorkOrder work = workDequeue();
+                dvmUnlockMutex(&gDvmJit.compilerLock);
+                /* Check whether there is a suspend request on me */
+                dvmCheckSuspendPending(NULL);
+                if (gDvmJit.haltCompilerThread) {
+                    LOGD("Compiler shutdown in progress - discarding request");
+                } else {
+                    compiledCodePtr = dvmCompilerDoWork(&work);
+                    /* Compilation is successful */
+                    if (compiledCodePtr) {
+                        dvmJitSetCodeAddr(work.pc, compiledCodePtr);
+                    }
+                }
+                free(work.info);
+                dvmLockMutex(&gDvmJit.compilerLock);
+            } while (workQueueLength() != 0);
+        }
+    }
+    pthread_cond_signal(&gDvmJit.compilerQueueEmpty);
+    dvmUnlockMutex(&gDvmJit.compilerLock);
+    return NULL;
+}
+
+bool dvmCompilerSetupCodeCache(void)
+{
+    extern void dvmCompilerTemplateStart(void);
+    extern void dmvCompilerTemplateEnd(void);
+
+    /* Allocate the code cache */
+    gDvmJit.codeCache = mmap(0, CODE_CACHE_SIZE,
+                          PROT_READ | PROT_WRITE | PROT_EXEC,
+                          MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+    if (gDvmJit.codeCache == MAP_FAILED) {
+        LOGE("Failed to create the code cache: %s\n", strerror(errno));
+        return false;
+    }
+
+    /* Copy the template code into the beginning of the code cache */
+    int templateSize = (intptr_t) dmvCompilerTemplateEnd -
+                       (intptr_t) dvmCompilerTemplateStart;
+    memcpy((void *) gDvmJit.codeCache,
+           (void *) dvmCompilerTemplateStart,
+           templateSize);
+    gDvmJit.codeCacheByteUsed = templateSize;
+
+    /* Flush dcache and invalidate the icache to maintain coherence */
+    cacheflush((intptr_t) gDvmJit.codeCache,
+               (intptr_t) gDvmJit.codeCache + CODE_CACHE_SIZE, 0);
+    return true;
+}
+
+bool dvmCompilerStartup(void)
+{
+    /* Make sure the BBType enum is in sane state */
+    assert(CHAINING_CELL_GENERIC == 0);
+
+    /* Architecture-specific chores to initialize */
+    if (!dvmCompilerArchInit())
+        goto fail;
+
+    /*
+     * Setup the code cache if it is not done so already. For apps it should be
+     * done by the Zygote already, but for command-line dalvikvm invocation we
+     * need to do it here.
+     */
+    if (gDvmJit.codeCache == NULL) {
+        if (!dvmCompilerSetupCodeCache())
+            goto fail;
+    }
+
+    /* Allocate the initial arena block */
+    if (dvmCompilerHeapInit() == false) {
+        goto fail;
+    }
+
+    dvmInitMutex(&gDvmJit.compilerLock);
+    pthread_cond_init(&gDvmJit.compilerQueueActivity, NULL);
+    pthread_cond_init(&gDvmJit.compilerQueueEmpty, NULL);
+
+    dvmLockMutex(&gDvmJit.compilerLock);
+
+    gDvmJit.haltCompilerThread = false;
+
+    /* Reset the work queue */
+    memset(gDvmJit.compilerWorkQueue, 0,
+           sizeof(CompilerWorkOrder) * COMPILER_WORK_QUEUE_SIZE);
+    gDvmJit.compilerWorkEnqueueIndex = gDvmJit.compilerWorkDequeueIndex = 0;
+    gDvmJit.compilerQueueLength = 0;
+    gDvmJit.compilerHighWater =
+        COMPILER_WORK_QUEUE_SIZE - (COMPILER_WORK_QUEUE_SIZE/4);
+
+    assert(gDvmJit.compilerHighWater < COMPILER_WORK_QUEUE_SIZE);
+    if (!dvmCreateInternalThread(&gDvmJit.compilerHandle, "Compiler",
+                                 compilerThreadStart, NULL)) {
+        dvmUnlockMutex(&gDvmJit.compilerLock);
+        goto fail;
+    }
+
+    dvmUnlockMutex(&gDvmJit.compilerLock);
+
+    return true;
+
+fail:
+    return false;
+}
+
+void dvmCompilerShutdown(void)
+{
+    void *threadReturn;
+
+    if (gDvmJit.compilerHandle) {
+
+        gDvmJit.haltCompilerThread = true;
+
+        dvmLockMutex(&gDvmJit.compilerLock);
+        pthread_cond_signal(&gDvmJit.compilerQueueActivity);
+        dvmUnlockMutex(&gDvmJit.compilerLock);
+
+        pthread_join(gDvmJit.compilerHandle, &threadReturn);
+    }
+}
diff --git a/vm/compiler/Compiler.h b/vm/compiler/Compiler.h
new file mode 100644 (file)
index 0000000..7209701
--- /dev/null
@@ -0,0 +1,99 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _DALVIK_VM_COMPILER
+#define _DALVIK_VM_COMPILER
+
+#define CODE_CACHE_SIZE                 1024*1024
+#define MAX_JIT_RUN_LEN                 64
+#define COMPILER_WORK_QUEUE_SIZE        100
+
+#define COMPILER_TRACED(X)
+#define COMPILER_TRACEE(X)
+#define COMPILER_TRACE_CHAINING(X)
+
+typedef enum WorkOrderKind {
+    kWorkOrderInvalid = 0,      // Should never see by the backend
+    kWorkOrderMethod = 1,       // Work is to compile a whole method
+    kWorkOrderTrace = 2,        // Work is to compile code fragment(s)
+} WorkOrderKind;
+
+typedef struct CompilerWorkOrder {
+    const u2* pc;
+    WorkOrderKind kind;
+    void* info;
+} CompilerWorkOrder;
+
+typedef enum JitState {
+    kJitOff = 0,
+    kJitNormal = 1,            // Profiling in mterp or running native
+    kJitTSelectRequest = 2,    // Transition state - start trace selection
+    kJitTSelect = 3,           // Actively selecting trace in dbg interp
+    kJitTSelectAbort = 4,      // Something threw during selection - abort
+    kJitTSelectEnd = 5,        // Done with the trace - wrap it up
+    kJitSingleStep = 6,        // Single step interpretation
+    kJitSingleStepEnd = 7,     // Done with single step, return to mterp
+} JitState;
+
+typedef enum JitHint {
+   kJitHintNone = 0,
+   kJitHintTaken = 1,         // Last inst in run was taken branch
+   kJitHintNotTaken = 2,      // Last inst in run was not taken branch
+   kJitHintNoBias = 3,        // Last inst in run was unbiased branch
+} jitHint;
+
+/*
+ * Element of a Jit trace description.  Describes a contiguous
+ * sequence of Dalvik byte codes, the last of which can be
+ * associated with a hint.
+ * Dalvik byte code
+ */
+typedef struct {
+    u2    startOffset;       // Starting offset for trace run
+    unsigned numInsts:8;     // Number of Byte codes in run
+    unsigned runEnd:1;       // Run ends with last byte code
+    jitHint  hint:7;         // Hint to apply to final code of run
+} JitCodeDesc;
+
+typedef union {
+    JitCodeDesc frag;
+    void*       hint;
+} JitTraceRun;
+
+/*
+ * Trace description as will appear in the translation cache.  Note
+ * flexible array at end, as these will be of variable size.  To
+ * conserve space in the translation cache, total length of JitTraceRun
+ * array must be recomputed via seqential scan if needed.
+ */
+typedef struct {
+    const Method* method;
+    JitTraceRun trace[];
+} JitTraceDescription;
+
+bool dvmCompilerSetupCodeCache(void);
+bool dvmCompilerArchInit(void);
+void dvmCompilerArchDump(void);
+bool dvmCompilerStartup(void);
+void dvmCompilerShutdown(void);
+bool dvmCompilerWorkEnqueue(const u2* pc, WorkOrderKind kind, void* info);
+void *dvmCheckCodeCache(void *method);
+void *dvmCompileMethod(Method *method);
+void *dvmCompileTrace(JitTraceDescription *trace);
+void dvmCompilerDumpStats(void);
+void dvmCompilerDrainQueue(void);
+
+#endif /* _DALVIK_VM_COMPILER */
diff --git a/vm/compiler/CompilerIR.h b/vm/compiler/CompilerIR.h
new file mode 100644 (file)
index 0000000..6ffdf44
--- /dev/null
@@ -0,0 +1,87 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _DALVIK_VM_COMPILER_IR
+#define _DALVIK_VM_COMPILER_IR
+
+typedef enum BBType {
+    /* For coding convenience reasons chaining cell types should appear first */
+    CHAINING_CELL_GENERIC = 0,
+    CHAINING_CELL_POST_INVOKE,
+    CHAINING_CELL_INVOKE,
+    CHAINING_CELL_LAST,
+    DALVIK_BYTECODE,
+    PC_RECONSTRUCTION,
+    EXCEPTION_HANDLING,
+} BBType;
+
+typedef struct LIR {
+    int offset;
+    struct LIR *next;
+    struct LIR *prev;
+    struct LIR *target;
+} LIR;
+
+typedef struct MIR {
+    DecodedInstruction dalvikInsn;
+    unsigned int width;
+    unsigned int offset;
+    struct MIR *prev;
+    struct MIR *next;
+} MIR;
+
+typedef struct BasicBlock {
+    int id;
+    int visited;
+    unsigned int startOffset;
+    const Method *containingMethod;     // For blocks from the callee
+    BBType blockType;
+    MIR *firstMIRInsn;
+    MIR *lastMIRInsn;
+    struct BasicBlock *fallThrough;
+    struct BasicBlock *taken;
+    struct BasicBlock *next;            // Serial link for book keeping purposes
+} BasicBlock;
+
+typedef struct CompilationUnit {
+    int numBlocks;
+    BasicBlock **blockList;
+    const Method *method;
+    const JitTraceDescription *traceDesc;
+    LIR *firstLIRInsn;
+    LIR *lastLIRInsn;
+    LIR *wordList;
+    GrowableList pcReconstructionList;
+    int dataOffset;
+    int totalSize;
+    unsigned char *codeBuffer;
+    void *baseAddr;
+    bool printMe;
+    bool allSingleStep;
+    int numChainingCells[CHAINING_CELL_LAST];
+    LIR *firstChainingLIR[CHAINING_CELL_LAST];
+} CompilationUnit;
+
+BasicBlock *dvmCompilerNewBB(BBType blockType);
+
+void dvmCompilerAppendMIR(BasicBlock *bb, MIR *mir);
+
+void dvmCompilerAppendLIR(CompilationUnit *cUnit, LIR *lir);
+
+/* Debug Utilities */
+void dvmCompilerDumpCompilationUnit(CompilationUnit *cUnit);
+
+#endif /* _DALVIK_VM_COMPILER_IR */
diff --git a/vm/compiler/CompilerInternals.h b/vm/compiler/CompilerInternals.h
new file mode 100644 (file)
index 0000000..410213a
--- /dev/null
@@ -0,0 +1,26 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _DALVIK_VM_COMPILER_INTERNAL
+#define _DALVIK_VM_COMPILER_INTERNAL
+
+#include "Dalvik.h"
+#include "CompilerUtility.h"
+#include "CompilerIR.h"
+#include "codegen/CompilerCodegen.h"
+#include "interp/Jit.h"
+
+#endif /* _DALVIK_VM_COMPILER_INTERNAL */
diff --git a/vm/compiler/CompilerUtility.h b/vm/compiler/CompilerUtility.h
new file mode 100644 (file)
index 0000000..7b4de11
--- /dev/null
@@ -0,0 +1,44 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _DALVIK_VM_COMPILER_UTILITY
+#define _DALVIK_VM_COMPILER_UTILITY
+
+#define ARENA_DEFAULT_SIZE 4096
+
+/* Allocate the initial memory block for arena-based allocation */
+bool dvmCompilerHeapInit(void);
+
+typedef struct ArenaMemBlock {
+    size_t bytesAllocated;
+    struct ArenaMemBlock *next;
+    char ptr[0];
+} ArenaMemBlock;
+
+void *dvmCompilerNew(size_t size, bool zero);
+
+void dvmCompilerArenaReset(void);
+
+typedef struct GrowableList {
+    size_t numAllocated;
+    size_t numUsed;
+    void **elemList;
+} GrowableList;
+
+void dvmInitGrowableList(GrowableList *gList, size_t initLength);
+void dvmInsertGrowableList(GrowableList *gList, void *elem);
+
+#endif /* _DALVIK_COMPILER_UTILITY */
diff --git a/vm/compiler/Frontend.c b/vm/compiler/Frontend.c
new file mode 100644 (file)
index 0000000..59a7455
--- /dev/null
@@ -0,0 +1,603 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Dalvik.h"
+#include "libdex/OpCode.h"
+#include "dexdump/OpCodeNames.h"
+#include "interp/Jit.h"
+#include "CompilerInternals.h"
+
+/*
+ * Parse an instruction, return the length of the instruction
+ */
+static inline int parseInsn(const u2 *codePtr, DecodedInstruction *decInsn,
+                            bool printMe)
+{
+    u2 instr = *codePtr;
+    OpCode opcode = instr & 0xff;
+    int insnWidth;
+
+    // Need to check if this is a real NOP or a pseudo opcode
+    if (opcode == OP_NOP && instr != 0) {
+        if (instr == kPackedSwitchSignature) {
+            insnWidth = 4 + codePtr[1] * 2;
+        } else if (instr == kSparseSwitchSignature) {
+            insnWidth = 2 + codePtr[1] * 4;
+        } else if (instr == kArrayDataSignature) {
+            int width = codePtr[1];
+            int size = codePtr[2] | (codePtr[3] << 16);
+            // The plus 1 is to round up for odd size and width
+            insnWidth = 4 + ((size * width) + 1) / 2;
+        }
+        insnWidth = 0;
+    } else {
+        insnWidth = gDvm.instrWidth[opcode];
+        if (insnWidth < 0) {
+            insnWidth = -insnWidth;
+        }
+    }
+
+    dexDecodeInstruction(gDvm.instrFormat, codePtr, decInsn);
+    if (printMe) {
+        LOGD("%p: %#06x %s\n", codePtr, opcode, getOpcodeName(opcode));
+    }
+    return insnWidth;
+}
+
+/*
+ * Identify block-ending instructions and collect supplemental information
+ * regarding the following instructions.
+ */
+static inline bool findBlockBoundary(const Method *caller, MIR *insn,
+                                     unsigned int curOffset,
+                                     unsigned int *target, bool *isInvoke,
+                                     const Method **callee)
+{
+    switch (insn->dalvikInsn.opCode) {
+        /* Target is not compile-time constant */
+        case OP_RETURN_VOID:
+        case OP_RETURN:
+        case OP_RETURN_WIDE:
+        case OP_RETURN_OBJECT:
+        case OP_THROW:
+        case OP_INVOKE_VIRTUAL:
+        case OP_INVOKE_VIRTUAL_RANGE:
+        case OP_INVOKE_INTERFACE:
+        case OP_INVOKE_INTERFACE_RANGE:
+        case OP_INVOKE_VIRTUAL_QUICK:
+        case OP_INVOKE_VIRTUAL_QUICK_RANGE:
+            *isInvoke = true;
+            break;
+        case OP_INVOKE_SUPER:
+        case OP_INVOKE_SUPER_RANGE: {
+            int mIndex = caller->clazz->pDvmDex->
+                pResMethods[insn->dalvikInsn.vB]->methodIndex;
+            const Method *calleeMethod =
+                caller->clazz->super->vtable[mIndex];
+
+            if (!dvmIsNativeMethod(calleeMethod)) {
+                *target = (unsigned int) calleeMethod->insns;
+            }
+            *isInvoke = true;
+            *callee = calleeMethod;
+            break;
+        }
+        case OP_INVOKE_STATIC:
+        case OP_INVOKE_STATIC_RANGE: {
+            const Method *calleeMethod =
+                caller->clazz->pDvmDex->pResMethods[insn->dalvikInsn.vB];
+
+            if (!dvmIsNativeMethod(calleeMethod)) {
+                *target = (unsigned int) calleeMethod->insns;
+            }
+            *isInvoke = true;
+            *callee = calleeMethod;
+            break;
+        }
+        case OP_INVOKE_SUPER_QUICK:
+        case OP_INVOKE_SUPER_QUICK_RANGE: {
+            const Method *calleeMethod =
+                caller->clazz->super->vtable[insn->dalvikInsn.vB];
+
+            if (!dvmIsNativeMethod(calleeMethod)) {
+                *target = (unsigned int) calleeMethod->insns;
+            }
+            *isInvoke = true;
+            *callee = calleeMethod;
+            break;
+        }
+        case OP_INVOKE_DIRECT:
+        case OP_INVOKE_DIRECT_RANGE: {
+            const Method *calleeMethod =
+                caller->clazz->pDvmDex->pResMethods[insn->dalvikInsn.vB];
+            if (!dvmIsNativeMethod(calleeMethod)) {
+                *target = (unsigned int) calleeMethod->insns;
+            }
+            *isInvoke = true;
+            *callee = calleeMethod;
+            break;
+        }
+        case OP_GOTO:
+        case OP_GOTO_16:
+        case OP_GOTO_32:
+            *target = curOffset + (int) insn->dalvikInsn.vA;
+            break;
+
+        case OP_IF_EQ:
+        case OP_IF_NE:
+        case OP_IF_LT:
+        case OP_IF_GE:
+        case OP_IF_GT:
+        case OP_IF_LE:
+            *target = curOffset + (int) insn->dalvikInsn.vC;
+            break;
+
+        case OP_IF_EQZ:
+        case OP_IF_NEZ:
+        case OP_IF_LTZ:
+        case OP_IF_GEZ:
+        case OP_IF_GTZ:
+        case OP_IF_LEZ:
+            *target = curOffset + (int) insn->dalvikInsn.vB;
+            break;
+
+        default:
+            return false;
+    } return true;
+}
+
+/*
+ * Identify conditional branch instructions
+ */
+static inline bool isUnconditionalBranch(MIR *insn)
+{
+    switch (insn->dalvikInsn.opCode) {
+        case OP_RETURN_VOID:
+        case OP_RETURN:
+        case OP_RETURN_WIDE:
+        case OP_RETURN_OBJECT:
+        case OP_GOTO:
+        case OP_GOTO_16:
+        case OP_GOTO_32:
+            return true;
+        default:
+            return false;
+    }
+}
+
+/*
+ * Main entry point to start trace compilation. Basic blocks are constructed
+ * first and they will be passed to the codegen routines to convert Dalvik
+ * bytecode into machine code.
+ */
+void *dvmCompileTrace(JitTraceDescription *desc)
+{
+    const DexCode *dexCode = dvmGetMethodCode(desc->method);
+    const JitTraceRun* currRun = &desc->trace[0];
+    bool done = false;
+    unsigned int curOffset = currRun->frag.startOffset;
+    unsigned int numInsts = currRun->frag.numInsts;
+    const u2 *codePtr = dexCode->insns + curOffset;
+    int traceSize = 0;
+    const u2 *startCodePtr = codePtr;
+    BasicBlock *startBB, *curBB, *lastBB;
+    int numBlocks = 0;
+    static int compilationId;
+    CompilationUnit cUnit;
+    memset(&cUnit, 0, sizeof(CompilationUnit));
+
+    /* Initialize the printMe flag */
+    cUnit.printMe = gDvmJit.printMe;
+
+    /* Identify traces that we don't want to compile */
+    if (gDvmJit.methodTable) {
+        int len = strlen(desc->method->clazz->descriptor) +
+                  strlen(desc->method->name) + 1;
+        char *fullSignature = dvmCompilerNew(len, true);
+        strcpy(fullSignature, desc->method->clazz->descriptor);
+        strcat(fullSignature, desc->method->name);
+
+        int hashValue = dvmComputeUtf8Hash(fullSignature);
+
+        /*
+         * Doing three levels of screening to see whether we want to skip
+         * compiling this method
+         */
+
+        /* First, check the full "class;method" signature */
+        bool methodFound =
+            dvmHashTableLookup(gDvmJit.methodTable, hashValue,
+                               fullSignature, (HashCompareFunc) strcmp,
+                               false) !=
+            NULL;
+
+        /* Full signature not found - check the enclosing class */
+        if (methodFound == false) {
+            int hashValue = dvmComputeUtf8Hash(desc->method->clazz->descriptor);
+            methodFound =
+                dvmHashTableLookup(gDvmJit.methodTable, hashValue,
+                               (char *) desc->method->clazz->descriptor,
+                               (HashCompareFunc) strcmp, false) !=
+                NULL;
+            /* Enclosing class not found - check the method name */
+            if (methodFound == false) {
+                int hashValue = dvmComputeUtf8Hash(desc->method->name);
+                methodFound =
+                    dvmHashTableLookup(gDvmJit.methodTable, hashValue,
+                                   (char *) desc->method->name,
+                                   (HashCompareFunc) strcmp, false) !=
+                    NULL;
+            }
+        }
+
+        /*
+         * Under the following conditions, the trace will be *conservatively*
+         * compiled by only containing single-step instructions to and from the
+         * interpreter.
+         * 1) If includeSelectedMethod == false, the method matches the full or
+         *    partial signature stored in the hash table.
+         *
+         * 2) If includeSelectedMethod == true, the method does not match the
+         *    full and partial signature stored in the hash table.
+         */
+        if (gDvmJit.includeSelectedMethod != methodFound) {
+            cUnit.allSingleStep = true;
+        } else {
+            /* Compile the trace as normal */
+
+            /* Print the method we cherry picked */
+            if (gDvmJit.includeSelectedMethod == true) {
+                cUnit.printMe = true;
+            }
+        }
+    }
+
+    /* Allocate the first basic block */
+    lastBB = startBB = curBB = dvmCompilerNewBB(DALVIK_BYTECODE);
+    curBB->startOffset = curOffset;
+    curBB->id = numBlocks++;
+
+    if (cUnit.printMe) {
+        LOGD("--------\nCompiler: Building trace for %s, offset 0x%x\n",
+             desc->method->name, curOffset);
+    }
+
+    while (!done) {
+        MIR *insn;
+        int width;
+        insn = dvmCompilerNew(sizeof(MIR),false);
+        insn->offset = curOffset;
+        width = parseInsn(codePtr, &insn->dalvikInsn, cUnit.printMe);
+        insn->width = width;
+        traceSize += width;
+        dvmCompilerAppendMIR(curBB, insn);
+        if (--numInsts==0) {
+            if (currRun->frag.runEnd) {
+                done = true;
+            } else {
+                curBB = dvmCompilerNewBB(DALVIK_BYTECODE);
+                lastBB->next = curBB;
+                lastBB = curBB;
+                curBB->id = numBlocks++;
+                currRun++;
+                curOffset = currRun->frag.startOffset;
+                numInsts = currRun->frag.numInsts;
+                curBB->startOffset = curOffset;
+                codePtr = dexCode->insns + curOffset;
+            }
+        } else {
+            curOffset += width;
+            codePtr += width;
+        }
+    }
+
+    /*
+     * Now scan basic blocks containing real code to connect the
+     * taken/fallthrough links. Also create chaining cells for code not included
+     * in the trace.
+     */
+    for (curBB = startBB; curBB; curBB = curBB->next) {
+        MIR *lastInsn = curBB->lastMIRInsn;
+        /* Hit a pseudo block - exit the search now */
+        if (lastInsn == NULL) {
+            break;
+        }
+        curOffset = lastInsn->offset;
+        unsigned int targetOffset = curOffset;
+        unsigned int fallThroughOffset = curOffset + lastInsn->width;
+        bool isInvoke = false;
+        const Method *callee = NULL;
+
+        findBlockBoundary(desc->method, curBB->lastMIRInsn, curOffset,
+                          &targetOffset, &isInvoke, &callee);
+
+        /* Link the taken and fallthrough blocks */
+        BasicBlock *searchBB;
+
+        /* No backward branch in the trace - start searching the next BB */
+        for (searchBB = curBB->next; searchBB; searchBB = searchBB->next) {
+            if (targetOffset == searchBB->startOffset) {
+                curBB->taken = searchBB;
+            }
+            if (fallThroughOffset == searchBB->startOffset) {
+                curBB->fallThrough = searchBB;
+            }
+        }
+
+        /* Target block not included in the trace */
+        if (targetOffset != curOffset && curBB->taken == NULL) {
+            lastBB->next = dvmCompilerNewBB(
+                isInvoke ? CHAINING_CELL_INVOKE : CHAINING_CELL_GENERIC);
+            lastBB = lastBB->next;
+            lastBB->id = numBlocks++;
+            if (isInvoke) {
+                lastBB->startOffset = 0;
+                lastBB->containingMethod = callee;
+            } else {
+                lastBB->startOffset = targetOffset;
+            }
+            curBB->taken = lastBB;
+        }
+
+        /* Fallthrough block not included in the trace */
+        if (!isUnconditionalBranch(lastInsn) && curBB->fallThrough == NULL) {
+            lastBB->next = dvmCompilerNewBB(
+                isInvoke ? CHAINING_CELL_POST_INVOKE : CHAINING_CELL_GENERIC);
+            lastBB = lastBB->next;
+            lastBB->id = numBlocks++;
+            lastBB->startOffset = fallThroughOffset;
+            curBB->fallThrough = lastBB;
+        }
+    }
+
+    /* Now create a special block to host PC reconstruction code */
+    lastBB->next = dvmCompilerNewBB(PC_RECONSTRUCTION);
+    lastBB = lastBB->next;
+    lastBB->id = numBlocks++;
+
+    /* And one final block that publishes the PC and raise the exception */
+    lastBB->next = dvmCompilerNewBB(EXCEPTION_HANDLING);
+    lastBB = lastBB->next;
+    lastBB->id = numBlocks++;
+
+    if (cUnit.printMe) {
+        LOGD("TRACEINFO (%d): 0x%08x %s%s 0x%x %d of %d, %d blocks",
+            compilationId++,
+            (intptr_t) desc->method->insns,
+            desc->method->clazz->descriptor,
+            desc->method->name,
+            desc->trace[0].frag.startOffset,
+            traceSize,
+            dexCode->insnsSize,
+            numBlocks);
+    }
+
+    BasicBlock **blockList;
+
+    cUnit.method = desc->method;
+    cUnit.traceDesc = desc;
+    cUnit.numBlocks = numBlocks;
+    dvmInitGrowableList(&cUnit.pcReconstructionList, 8);
+    blockList = cUnit.blockList =
+        dvmCompilerNew(sizeof(BasicBlock *) * numBlocks, true);
+
+    int i;
+
+    for (i = 0, curBB = startBB; i < numBlocks; i++) {
+        blockList[i] = curBB;
+        curBB = curBB->next;
+    }
+    /* Make sure all blocks are added to the cUnit */
+    assert(curBB == NULL);
+
+    if (cUnit.printMe) {
+        dvmCompilerDumpCompilationUnit(&cUnit);
+    }
+
+    /* Convert MIR to LIR, etc. */
+    dvmCompilerMIR2LIR(&cUnit);
+
+    /* Convert LIR into machine code */
+    dvmCompilerAssembleLIR(&cUnit);
+
+    if (cUnit.printMe) {
+        dvmCompilerCodegenDump(&cUnit);
+        LOGD("End %s%s", desc->method->clazz->descriptor, desc->method->name);
+    }
+
+    /* Reset the compiler resource pool */
+    dvmCompilerArenaReset();
+
+    return cUnit.baseAddr;
+}
+
+/*
+ * Similar to dvmCompileTrace, but the entity processed here is the whole
+ * method.
+ *
+ * TODO: implementation will be revisited when the trace builder can provide
+ * whole-method traces.
+ */
+void *dvmCompileMethod(Method *method)
+{
+    const DexCode *dexCode = dvmGetMethodCode(method);
+    const u2 *codePtr = dexCode->insns;
+    const u2 *codeEnd = dexCode->insns + dexCode->insnsSize;
+    int blockID = 0;
+    unsigned int curOffset = 0;
+
+    BasicBlock *firstBlock = dvmCompilerNewBB(DALVIK_BYTECODE);
+    firstBlock->id = blockID++;
+
+    /* Allocate the bit-vector to track the beginning of basic blocks */
+    BitVector *bbStartAddr = dvmAllocBitVector(dexCode->insnsSize+1, false);
+    dvmSetBit(bbStartAddr, 0);
+
+    /*
+     * Sequentially go through every instruction first and put them in a single
+     * basic block. Identify block boundaries at the mean time.
+     */
+    while (codePtr < codeEnd) {
+        MIR *insn = dvmCompilerNew(sizeof(MIR), false);
+        insn->offset = curOffset;
+        int width = parseInsn(codePtr, &insn->dalvikInsn, false);
+        bool isInvoke = false;
+        const Method *callee;
+        insn->width = width;
+
+        dvmCompilerAppendMIR(firstBlock, insn);
+        /*
+         * Check whether this is a block ending instruction and whether it
+         * suggests the start of a new block
+         */
+        unsigned int target = curOffset;
+
+        /*
+         * If findBlockBoundary returns true, it means the current instruction
+         * is terminating the current block. If it is a branch, the target
+         * address will be recorded in target.
+         */
+        if (findBlockBoundary(method, insn, curOffset, &target, &isInvoke,
+                              &callee)) {
+            dvmSetBit(bbStartAddr, curOffset + width);
+            if (target != curOffset) {
+                dvmSetBit(bbStartAddr, target);
+            }
+        }
+
+        codePtr += width;
+        /* each bit represents 16-bit quantity */
+        curOffset += width;
+    }
+
+    /*
+     * The number of blocks will be equal to the number of bits set to 1 in the
+     * bit vector minus 1, because the bit representing the location after the
+     * last instruction is set to one.
+     */
+    int numBlocks = dvmCountSetBits(bbStartAddr);
+    if (dvmIsBitSet(bbStartAddr, dexCode->insnsSize)) {
+        numBlocks--;
+    }
+
+    CompilationUnit cUnit;
+    BasicBlock **blockList;
+
+    memset(&cUnit, 0, sizeof(CompilationUnit));
+    cUnit.method = method;
+    blockList = cUnit.blockList =
+        dvmCompilerNew(sizeof(BasicBlock *) * numBlocks, true);
+
+    /*
+     * Register the first block onto the list and start split it into block
+     * boundaries from there.
+     */
+    blockList[0] = firstBlock;
+    cUnit.numBlocks = 1;
+
+    int i;
+    for (i = 0; i < numBlocks; i++) {
+        MIR *insn;
+        BasicBlock *curBB = blockList[i];
+        curOffset = curBB->lastMIRInsn->offset;
+
+        for (insn = curBB->firstMIRInsn->next; insn; insn = insn->next) {
+            /* Found the beginning of a new block, see if it is created yet */
+            if (dvmIsBitSet(bbStartAddr, insn->offset)) {
+                int j;
+                for (j = 0; j < cUnit.numBlocks; j++) {
+                    if (blockList[j]->firstMIRInsn->offset == insn->offset)
+                        break;
+                }
+
+                /* Block not split yet - do it now */
+                if (j == cUnit.numBlocks) {
+                    BasicBlock *newBB = dvmCompilerNewBB(DALVIK_BYTECODE);
+                    newBB->id = blockID++;
+                    newBB->firstMIRInsn = insn;
+                    newBB->lastMIRInsn = curBB->lastMIRInsn;
+                    curBB->lastMIRInsn = insn->prev;
+                    insn->prev->next = NULL;
+                    insn->prev = NULL;
+
+                    /*
+                     * If the insn is not an unconditional branch, set up the
+                     * fallthrough link.
+                     */
+                    if (!isUnconditionalBranch(curBB->lastMIRInsn)) {
+                        curBB->fallThrough = newBB;
+                    }
+
+                    /* enqueue the new block */
+                    blockList[cUnit.numBlocks++] = newBB;
+                    break;
+                }
+            }
+        }
+    }
+
+    if (numBlocks != cUnit.numBlocks) {
+        LOGE("Expect %d vs %d basic blocks\n", numBlocks, cUnit.numBlocks);
+        dvmAbort();
+    }
+
+    dvmFreeBitVector(bbStartAddr);
+
+    /* Connect the basic blocks through the taken links */
+    for (i = 0; i < numBlocks; i++) {
+        BasicBlock *curBB = blockList[i];
+        MIR *insn = curBB->lastMIRInsn;
+        unsigned int target = insn->offset;
+        bool isInvoke;
+        const Method *callee;
+
+        findBlockBoundary(method, insn, target, &target, &isInvoke, &callee);
+
+        /* Found a block ended on a branch */
+        if (target != insn->offset) {
+            int j;
+            /* Forward branch */
+            if (target > insn->offset) {
+                j = i + 1;
+            } else {
+                /* Backward branch */
+                j = 0;
+            }
+            for (; j < numBlocks; j++) {
+                if (blockList[j]->firstMIRInsn->offset == target) {
+                    curBB->taken = blockList[j];
+                    break;
+                }
+            }
+
+            if (j == numBlocks) {
+                LOGE("Target not found for insn %x: expect target %x\n",
+                     curBB->lastMIRInsn->offset, target);
+                dvmAbort();
+            }
+        }
+    }
+
+    dvmCompilerMIR2LIR(&cUnit);
+
+    dvmCompilerAssembleLIR(&cUnit);
+
+    dvmCompilerDumpCompilationUnit(&cUnit);
+
+    dvmCompilerArenaReset();
+
+    return cUnit.baseAddr;
+}
diff --git a/vm/compiler/IntermediateRep.c b/vm/compiler/IntermediateRep.c
new file mode 100644 (file)
index 0000000..2596aab
--- /dev/null
@@ -0,0 +1,59 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Dalvik.h"
+#include "CompilerInternals.h"
+
+/* Allocate a new basic block */
+BasicBlock *dvmCompilerNewBB(BBType blockType)
+{
+    BasicBlock *bb = dvmCompilerNew(sizeof(BasicBlock), true);
+    bb->blockType = blockType;
+    return bb;
+}
+
+/* Insert an MIR instruction to the end of a basic block */
+void dvmCompilerAppendMIR(BasicBlock *bb, MIR *mir)
+{
+    if (bb->firstMIRInsn == NULL) {
+        assert(bb->firstMIRInsn == NULL);
+        bb->lastMIRInsn = bb->firstMIRInsn = mir;
+        mir->prev = mir->next = NULL;
+    } else {
+        bb->lastMIRInsn->next = mir;
+        mir->prev = bb->lastMIRInsn;
+        mir->next = NULL;
+        bb->lastMIRInsn = mir;
+    }
+}
+
+/*
+ * Append an LIR instruction to the LIR list maintained by a compilation
+ * unit
+ */
+void dvmCompilerAppendLIR(CompilationUnit *cUnit, LIR *lir)
+{
+    if (cUnit->firstLIRInsn == NULL) {
+        assert(cUnit->lastLIRInsn == NULL);
+        cUnit->lastLIRInsn = cUnit->firstLIRInsn = lir;
+        lir->prev = lir->next = NULL;
+    } else {
+        cUnit->lastLIRInsn->next = lir;
+        lir->prev = cUnit->lastLIRInsn;
+        lir->next = NULL;
+        cUnit->lastLIRInsn = lir;
+    }
+}
diff --git a/vm/compiler/Utility.c b/vm/compiler/Utility.c
new file mode 100644 (file)
index 0000000..9d8e088
--- /dev/null
@@ -0,0 +1,169 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Dalvik.h"
+#include "CompilerInternals.h"
+
+static ArenaMemBlock *arenaHead, *currentArena;
+static int numArenaBlocks;
+
+/* Allocate the initial memory block for arena-based allocation */
+bool dvmCompilerHeapInit(void)
+{
+    assert(arenaHead == NULL);
+    arenaHead =
+        (ArenaMemBlock *) malloc(sizeof(ArenaMemBlock) + ARENA_DEFAULT_SIZE);
+    if (arenaHead == NULL) {
+        LOGE("No memory left to create compiler heap memory\n");
+        return false;
+    }
+    currentArena = arenaHead;
+    currentArena->bytesAllocated = 0;
+    currentArena->next = NULL;
+    numArenaBlocks = 1;
+
+    return true;
+}
+
+/* Arena-based malloc for compilation tasks */
+void * dvmCompilerNew(size_t size, bool zero)
+{
+    size = (size + 3) & ~3;
+retry:
+    /* Normal case - space is available in the current page */
+    if (size + currentArena->bytesAllocated <= ARENA_DEFAULT_SIZE) {
+        void *ptr;
+        ptr = &currentArena->ptr[currentArena->bytesAllocated];
+        currentArena->bytesAllocated += size;
+        if (zero) {
+            memset(ptr, 0, size);
+        }
+        return ptr;
+    } else {
+        /*
+         * See if there are previously allocated arena blocks before the last
+         * reset
+         */
+        if (currentArena->next) {
+            currentArena = currentArena->next;
+            goto retry;
+        }
+        /*
+         * If we allocate really large variable-sized data structures that
+         * could go above the limit we need to enhance the allocation
+         * mechanism.
+         */
+        if (size > ARENA_DEFAULT_SIZE) {
+            LOGE("Requesting %d bytes which exceed the maximal size allowed\n",
+                 size);
+            return NULL;
+        }
+        /* Time to allocate a new arena */
+        ArenaMemBlock *newArena = (ArenaMemBlock *)
+            malloc(sizeof(ArenaMemBlock) + ARENA_DEFAULT_SIZE);
+        newArena->bytesAllocated = 0;
+        newArena->next = NULL;
+        currentArena->next = newArena;
+        currentArena = newArena;
+        numArenaBlocks++;
+        goto retry;
+    }
+    return NULL;
+}
+
+/* Reclaim all the arena blocks allocated so far */
+void dvmCompilerArenaReset(void)
+{
+    ArenaMemBlock *block;
+
+    for (block = arenaHead; block; block = block->next) {
+        block->bytesAllocated = 0;
+    }
+    currentArena = arenaHead;
+}
+
+/* Growable List initialization */
+void dvmInitGrowableList(GrowableList *gList, size_t initLength)
+{
+    gList->numAllocated = initLength;
+    gList->numUsed = 0;
+    gList->elemList = (void **) dvmCompilerNew(sizeof(void *) * initLength,
+                                               true);
+}
+
+/* Expand the capacity of a growable list */
+static void expandGrowableList(GrowableList *gList)
+{
+    int newLength = gList->numAllocated;
+    if (newLength < 128) {
+        newLength <<= 1;
+    } else {
+        newLength += 128;
+    }
+    void *newArray = dvmCompilerNew(sizeof(void *) * newLength, true);
+    memcpy(newArray, gList->elemList, sizeof(void *) * gList->numAllocated);
+    gList->numAllocated = newLength;
+    gList->elemList = newArray;
+}
+
+/* Insert a new element into the growable list */
+void dvmInsertGrowableList(GrowableList *gList, void *elem)
+{
+    if (gList->numUsed == gList->numAllocated) {
+        expandGrowableList(gList);
+    }
+    gList->elemList[gList->numUsed++] = elem;
+}
+
+/* Debug Utility - dump a compilation unit */
+void dvmCompilerDumpCompilationUnit(CompilationUnit *cUnit)
+{
+    int i;
+    BasicBlock *bb;
+    LOGD("%d blocks in total\n", cUnit->numBlocks);
+
+    for (i = 0; i < cUnit->numBlocks; i++) {
+        bb = cUnit->blockList[i];
+        LOGD("Block %d (insn %04x - %04x%s)\n",
+             bb->id, bb->startOffset,
+             bb->lastMIRInsn ? bb->lastMIRInsn->offset : bb->startOffset,
+             bb->lastMIRInsn ? "" : " empty");
+        if (bb->taken) {
+            LOGD("  Taken branch: block %d (%04x)\n",
+                 bb->taken->id, bb->taken->startOffset);
+        }
+        if (bb->fallThrough) {
+            LOGD("  Fallthrough : block %d (%04x)\n",
+                 bb->fallThrough->id, bb->fallThrough->startOffset);
+        }
+    }
+}
+
+/*
+ * Dump the current stats of the compiler, including number of bytes used in
+ * the code cache, arena size, and work queue length, and various JIT stats.
+ */
+void dvmCompilerDumpStats(void)
+{
+    LOGD("%d compilations using %d bytes",
+         gDvmJit.numCompilations, gDvmJit.codeCacheByteUsed);
+    LOGD("Compiler arena uses %d blocks (%d bytes each)",
+         numArenaBlocks, ARENA_DEFAULT_SIZE);
+    LOGD("Compiler work queue length is %d/%d", gDvmJit.compilerQueueLength,
+         gDvmJit.compilerMaxQueued);
+    dvmJitStats();
+    dvmCompilerArchDump();
+}
diff --git a/vm/compiler/codegen/CompilerCodegen.h b/vm/compiler/codegen/CompilerCodegen.h
new file mode 100644 (file)
index 0000000..97077b4
--- /dev/null
@@ -0,0 +1,37 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "../CompilerIR.h"
+
+#ifndef _DALVIK_VM_COMPILERCODEGEN_H_
+#define _DALVIK_VM_COMPILERCODEGEN_H_
+
+/* Work unit is architecture dependent */
+void *dvmCompilerDoWork(CompilerWorkOrder *work);
+
+/* Lower middle-level IR to low-level IR */
+void dvmCompilerMIR2LIR(CompilationUnit *cUnit);
+
+/* Assemble LIR into machine code */
+void dvmCompilerAssembleLIR(CompilationUnit *cUnit);
+
+/* Implemented in the codegen/<target>/ArchUtility.c */
+void dvmCompilerCodegenDump(CompilationUnit *cUnit);
+
+/* Implemented in the codegen/<target>/Assembler.c */
+void* dvmJitChain(void *tgtAddr, u4* branchAddr);
+
+#endif /* _DALVIK_VM_COMPILERCODEGEN_H_ */
diff --git a/vm/compiler/codegen/armv5te/ArchUtility.c b/vm/compiler/codegen/armv5te/ArchUtility.c
new file mode 100644 (file)
index 0000000..58b181b
--- /dev/null
@@ -0,0 +1,223 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "../../CompilerInternals.h"
+#include "dexdump/OpCodeNames.h"
+#include "Armv5teLIR.h"
+
+/* Decode and print a ARM register name */
+static char * decodeRegList(int vector, char *buf)
+{
+    int i;
+    bool printed = false;
+    buf[0] = 0;
+    for (i = 0; i < 8; i++, vector >>= 1) {
+        if (vector & 0x1) {
+            if (printed) {
+                sprintf(buf + strlen(buf), ", r%d", i);
+            } else {
+                printed = true;
+                sprintf(buf, "r%d", i);
+            }
+        }
+    }
+    return buf;
+}
+
+/*
+ * Interpret a format string and build a string no longer than size
+ * See format key in Assemble.c.
+ */
+static void buildInsnString(char *fmt, Armv5teLIR *lir, char* buf,
+                            unsigned char *baseAddr, int size)
+{
+    int i;
+    char *bufEnd = &buf[size-1];
+    char *fmtEnd = &fmt[strlen(fmt)];
+    char tbuf[256];
+    char nc;
+    while (fmt < fmtEnd) {
+        int operand;
+        if (*fmt == '!') {
+            fmt++;
+            assert(fmt < fmtEnd);
+            nc = *fmt++;
+            if (nc=='!') {
+                strcpy(tbuf, "!");
+            } else {
+               assert(fmt < fmtEnd);
+               assert((unsigned)(nc-'0') < 3);
+               operand = lir->operands[nc-'0'];
+               switch(*fmt++) {
+                   case 'h':
+                       sprintf(tbuf,"%04x", operand);
+                       break;
+                   case 'd':
+                       sprintf(tbuf,"%d", operand);
+                       break;
+                   case 'D':
+                       sprintf(tbuf,"%d", operand+8);
+                       break;
+                   case 'E':
+                       sprintf(tbuf,"%d", operand*4);
+                       break;
+                   case 'F':
+                       sprintf(tbuf,"%d", operand*2);
+                       break;
+                   case 'c':
+                       switch (operand) {
+                           case ARM_COND_EQ:
+                               strcpy(tbuf, "beq");
+                               break;
+                           case ARM_COND_NE:
+                               strcpy(tbuf, "bne");
+                               break;
+                           case ARM_COND_LT:
+                               strcpy(tbuf, "blt");
+                               break;
+                           case ARM_COND_GE:
+                               strcpy(tbuf, "bge");
+                               break;
+                           case ARM_COND_GT:
+                               strcpy(tbuf, "bgt");
+                               break;
+                           case ARM_COND_LE:
+                               strcpy(tbuf, "ble");
+                               break;
+                           case ARM_COND_CS:
+                               strcpy(tbuf, "bcs");
+                               break;
+                           default:
+                               strcpy(tbuf, "");
+                               break;
+                       }
+                       break;
+                   case 't':
+                       sprintf(tbuf,"0x%08x",
+                               (int) baseAddr + lir->generic.offset + 4 +
+                               (operand << 1));
+                       break;
+                   case 'u': {
+                       int offset_1 = lir->operands[0];
+                       int offset_2 = NEXT_LIR(lir)->operands[0];
+                       intptr_t target =
+                           ((((intptr_t) baseAddr + lir->generic.offset + 4) &
+                            ~3) + (offset_1 << 21 >> 9) + (offset_2 << 1)) &
+                           0xfffffffc;
+                       sprintf(tbuf, "%p", (void *) target);
+                       break;
+                    }
+
+                   /* Nothing to print for BLX_2 */
+                   case 'v':
+                       strcpy(tbuf, "see above");
+                       break;
+                   case 'R':
+                       decodeRegList(operand, tbuf);
+                       break;
+                   default:
+                       strcpy(tbuf,"DecodeError");
+                       break;
+               }
+               if (buf+strlen(tbuf) <= bufEnd) {
+                   strcpy(buf, tbuf);
+                   buf += strlen(tbuf);
+               } else {
+                   break;
+               }
+            }
+        } else {
+           *buf++ = *fmt++;
+        }
+        if (buf == bufEnd)
+            break;
+    }
+    *buf = 0;
+}
+
+/* Pretty-print a LIR instruction */
+static void dumpLIRInsn(LIR *arg, unsigned char *baseAddr)
+{
+    Armv5teLIR *lir = (Armv5teLIR *) arg;
+    char buf[256];
+    char opName[256];
+    int offset = lir->generic.offset;
+    int dest = lir->operands[0];
+    u2 *cPtr = (u2*)baseAddr;
+    /* Handle pseudo-ops individually, and all regular insns as a group */
+    switch(lir->opCode) {
+        case ARMV5TE_PSEUDO_TARGET_LABEL:
+            break;
+        case ARMV5TE_PSEUDO_CHAINING_CELL_GENERIC:
+            LOGD("-------- chaining cell (generic): 0x%04x\n", dest);
+            break;
+        case ARMV5TE_PSEUDO_CHAINING_CELL_POST_INVOKE:
+            LOGD("-------- chaining cell (post-invoke): 0x%04x\n", dest);
+            break;
+        case ARMV5TE_PSEUDO_CHAINING_CELL_INVOKE:
+            LOGD("-------- chaining cell (invoke): %s/%p\n",
+                 ((Method *)dest)->name,
+                 ((Method *)dest)->insns);
+            break;
+        case ARMV5TE_PSEUDO_DALVIK_BYTECODE_BOUNDARY:
+            LOGD("-------- dalvik offset: 0x%04x @ %s\n", dest,
+                   getOpcodeName(lir->operands[1]));
+            break;
+        case ARMV5TE_PSEUDO_ALIGN4:
+            LOGD("%p (%04x): .align4\n", baseAddr + offset, offset);
+            break;
+        case ARMV5TE_PSEUDO_PC_RECONSTRUCTION_CELL:
+            LOGD("-------- reconstruct dalvik PC : 0x%04x @ +0x%04x\n", dest,
+                 lir->operands[1]);
+            break;
+        case ARMV5TE_PSEUDO_PC_RECONSTRUCTION_BLOCK_LABEL:
+            /* Do nothing */
+            break;
+        case ARMV5TE_PSEUDO_EH_BLOCK_LABEL:
+            LOGD("Exception_Handling:\n");
+            break;
+        case ARMV5TE_PSEUDO_NORMAL_BLOCK_LABEL:
+            LOGD("L%#06x:\n", dest);
+            break;
+        default:
+            buildInsnString(EncodingMap[lir->opCode].name, lir, opName,
+                            baseAddr, 256);
+            buildInsnString(EncodingMap[lir->opCode].fmt, lir, buf, baseAddr,
+                            256);
+            LOGD("%p (%04x): %-8s%s\n", baseAddr + offset, offset, opName, buf);
+            break;
+    }
+}
+
+/* Dump instructions and constant pool contents */
+void dvmCompilerCodegenDump(CompilationUnit *cUnit)
+{
+    LOGD("Dumping LIR insns\n");
+    LIR *lirInsn;
+    Armv5teLIR *armLIR;
+
+    LOGD("installed code is at %p\n", cUnit->baseAddr);
+    LOGD("total size is %d bytes\n", cUnit->totalSize);
+    for (lirInsn = cUnit->firstLIRInsn; lirInsn; lirInsn = lirInsn->next) {
+        dumpLIRInsn(lirInsn, cUnit->baseAddr);
+    }
+    for (lirInsn = cUnit->wordList; lirInsn; lirInsn = lirInsn->next) {
+        armLIR = (Armv5teLIR *) lirInsn;
+        LOGD("%p (%04x): .word (0x%x)\n",
+             cUnit->baseAddr + armLIR->generic.offset, armLIR->generic.offset,
+             armLIR->operands[0]);
+    }
+}
diff --git a/vm/compiler/codegen/armv5te/Armv5teLIR.h b/vm/compiler/codegen/armv5te/Armv5teLIR.h
new file mode 100644 (file)
index 0000000..208e6c0
--- /dev/null
@@ -0,0 +1,181 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Dalvik.h"
+#include "compiler/CompilerInternals.h"
+
+#ifndef _DALVIK_VM_COMPILER_CODEGEN_ARMV5TE_H
+#define _DALVIK_VM_COMPILER_CODEGEN_ARMV5TE_H
+
+/*
+ * r0, r1, r2, r3, and r7 are always scratch
+ * r4PC is scratch if used solely in the compiled land. Otherwise it holds the
+ * Dalvik PC.
+ * rFP holds the current frame pointer
+ * rGLUE holds &InterpState
+ */
+typedef enum NativeRegisterPool {
+    r0 = 0,
+    r1 = 1,
+    r2 = 2,
+    r3 = 3,
+    r4PC = 4,
+    rFP = 5,
+    rGLUE = 6,
+    r7 = 7,
+} NativeRegisterPool;
+
+/* Thumb condition encodings */
+typedef enum Armv5teConditionCode {
+    ARM_COND_EQ = 0x0,    /* 0000 */
+    ARM_COND_NE = 0x1,    /* 0001 */
+    ARM_COND_LT = 0xb,    /* 1011 */
+    ARM_COND_GE = 0xa,    /* 1010 */
+    ARM_COND_GT = 0xc,    /* 1100 */
+    ARM_COND_LE = 0xd,    /* 1101 */
+    ARM_COND_CS = 0x2,    /* 0010 */
+    ARM_COND_MI = 0x4,    /* 0100 */
+} Armv5teConditionCode;
+
+#define isPseudoOpCode(opCode) ((int)(opCode) < 0)
+
+/*
+ * The following enum defines the list of supported Thumb instructions by the
+ * assembler. Their corresponding snippet positions will be defined in
+ * Assemble.c.
+ */
+typedef enum Armv5teOpCode {
+    ARMV5TE_PSEUDO_TARGET_LABEL = -10,
+    ARMV5TE_PSEUDO_CHAINING_CELL_POST_INVOKE = -9,
+    ARMV5TE_PSEUDO_CHAINING_CELL_INVOKE = -8,
+    ARMV5TE_PSEUDO_CHAINING_CELL_GENERIC = -7,
+    ARMV5TE_PSEUDO_DALVIK_BYTECODE_BOUNDARY = -6,
+    ARMV5TE_PSEUDO_ALIGN4 = -5,
+    ARMV5TE_PSEUDO_PC_RECONSTRUCTION_CELL = -4,
+    ARMV5TE_PSEUDO_PC_RECONSTRUCTION_BLOCK_LABEL = -3,
+    ARMV5TE_PSEUDO_EH_BLOCK_LABEL = -2,
+    ARMV5TE_PSEUDO_NORMAL_BLOCK_LABEL = -1,
+    /************************************************************************/
+    ARMV5TE_16BIT_DATA,     /* DATA   [0] rd[15..0] */
+    ARMV5TE_ADC,            /* adc     [0100000101] rm[5..3] rd[2..0] */
+    ARMV5TE_ADD_RRI3,       /* add(1)  [0001110] imm_3[8..6] rn[5..3] rd[2..0]*/
+    ARMV5TE_ADD_RI8,        /* add(2)  [00110] rd[10..8] imm_8[7..0] */
+    ARMV5TE_ADD_RRR,        /* add(3)  [0001100] rm[8..6] rn[5..3] rd[2..0] */
+    ARMV5TE_ADD_RR_LH,      /* add(4)  [01000100] H12[01] rm[5..3] rd[2..0] */
+    ARMV5TE_ADD_RR_HL,      /* add(4)  [01001000] H12[10] rm[5..3] rd[2..0] */
+    ARMV5TE_ADD_RR_HH,      /* add(4)  [01001100] H12[11] rm[5..3] rd[2..0] */
+    ARMV5TE_ADD_PC_REL,     /* add(5)  [10100] rd[10..8] imm_8[7..0] */
+    ARMV5TE_ADD_SP_REL,     /* add(6)  [10101] rd[10..8] imm_8[7..0] */
+    ARMV5TE_ADD_SPI7,       /* add(7)  [101100000] imm_7[6..0] */
+    ARMV5TE_AND_RR,         /* and     [0100000000] rm[5..3] rd[2..0] */
+    ARMV5TE_ASR,            /* asr(1)  [00010] imm_5[10..6] rm[5..3] rd[2..0] */
+    ARMV5TE_ASRV,           /* asr(2)  [0100000100] rs[5..3] rd[2..0] */
+    ARMV5TE_B_COND,         /* b(1)    [1101] cond[11..8] offset_8[7..0] */
+    ARMV5TE_B_UNCOND,       /* b(2)    [11100] offset_11[10..0] */
+    ARMV5TE_BIC,            /* bic     [0100001110] rm[5..3] rd[2..0] */
+    ARMV5TE_BKPT,           /* bkpt    [10111110] imm_8[7..0] */
+    ARMV5TE_BLX_1,          /* blx(1)  [111] H[10] offset_11[10..0] */
+    ARMV5TE_BLX_2,          /* blx(1)  [111] H[01] offset_11[10..0] */
+    ARMV5TE_BL_1,           /* blx(1)  [111] H[10] offset_11[10..0] */
+    ARMV5TE_BL_2,           /* blx(1)  [111] H[11] offset_11[10..0] */
+    ARMV5TE_BLX_R,          /* blx(2)  [010001111] H2[6..6] rm[5..3] SBZ[000] */
+    ARMV5TE_BX,             /* bx      [010001110] H2[6..6] rm[5..3] SBZ[000] */
+    ARMV5TE_CMN,            /* cmn     [0100001011] rm[5..3] rd[2..0] */
+    ARMV5TE_CMP_RI8,        /* cmp(1)  [00101] rn[10..8] imm_8[7..0] */
+    ARMV5TE_CMP_RR,         /* cmp(2)  [0100001010] rm[5..3] rd[2..0] */
+    ARMV5TE_CMP_LH,         /* cmp(3)  [01000101] H12[01] rm[5..3] rd[2..0] */
+    ARMV5TE_CMP_HL,         /* cmp(3)  [01000110] H12[10] rm[5..3] rd[2..0] */
+    ARMV5TE_CMP_HH,         /* cmp(3)  [01000111] H12[11] rm[5..3] rd[2..0] */
+    ARMV5TE_EOR,            /* eor     [0100000001] rm[5..3] rd[2..0] */
+    ARMV5TE_LDMIA,          /* ldmia   [11001] rn[10..8] reglist [7..0] */
+    ARMV5TE_LDR_RRI5,       /* ldr(1)  [01101] imm_5[10..6] rn[5..3] rd[2..0] */
+    ARMV5TE_LDR_RRR,        /* ldr(2)  [0101100] rm[8..6] rn[5..3] rd[2..0] */
+    ARMV5TE_LDR_PC_REL,     /* ldr(3)  [01001] rd[10..8] imm_8[7..0] */
+    ARMV5TE_LDR_SP_REL,     /* ldr(4)  [10011] rd[10..8] imm_8[7..0] */
+    ARMV5TE_LDRB_RRI5,      /* ldrb(1) [01111] imm_5[10..6] rn[5..3] rd[2..0] */
+    ARMV5TE_LDRB_RRR,       /* ldrb(2) [0101110] rm[8..6] rn[5..3] rd[2..0] */
+    ARMV5TE_LDRH_RRI5,      /* ldrh(1) [10001] imm_5[10..6] rn[5..3] rd[2..0] */
+    ARMV5TE_LDRH_RRR,       /* ldrh(2) [0101101] rm[8..6] rn[5..3] rd[2..0] */
+    ARMV5TE_LDRSB_RRR,      /* ldrsb   [0101011] rm[8..6] rn[5..3] rd[2..0] */
+    ARMV5TE_LDRSH_RRR,      /* ldrsh   [0101111] rm[8..6] rn[5..3] rd[2..0] */
+    ARMV5TE_LSL,            /* lsl(1)  [00000] imm_5[10..6] rm[5..3] rd[2..0] */
+    ARMV5TE_LSLV,           /* lsl(2)  [0100000010] rs[5..3] rd[2..0] */
+    ARMV5TE_LSR,            /* lsr(1)  [00001] imm_5[10..6] rm[5..3] rd[2..0] */
+    ARMV5TE_LSRV,           /* lsr(2)  [0100000011] rs[5..3] rd[2..0] */
+    ARMV5TE_MOV_IMM,        /* mov(1)  [00100] rd[10..8] imm_8[7..0] */
+    ARMV5TE_MOV_RR,         /* mov(2)  [0001110000] rn[5..3] rd[2..0] */
+    ARMV5TE_MOV_RR_HL,      /* mov(3)  [01000110] H12[10] rm[5..3] rd[2..0] */
+    ARMV5TE_MOV_RR_LH,      /* mov(3)  [01000101] H12[01] rm[5..3] rd[2..0] */
+    ARMV5TE_MOV_RR_HH,      /* mov(3)  [01000111] H12[11] rm[5..3] rd[2..0] */
+    ARMV5TE_MUL,            /* mul     [0100001101] rm[5..3] rd[2..0] */
+    ARMV5TE_MVN,            /* mvn     [0100001111] rm[5..3] rd[2..0] */
+    ARMV5TE_NEG,            /* neg     [0100001001] rm[5..3] rd[2..0] */
+    ARMV5TE_ORR,            /* orr     [0100001100] rm[5..3] rd[2..0] */
+    ARMV5TE_POP,            /* pop     [1011110] r[8..8] rl[7..0] */
+    ARMV5TE_PUSH,           /* push    [1011010] r[8..8] rl[7..0] */
+    ARMV5TE_ROR,            /* ror     [0100000111] rs[5..3] rd[2..0] */
+    ARMV5TE_SBC,            /* sbc     [0100000110] rm[5..3] rd[2..0] */
+    ARMV5TE_STMIA,          /* stmia   [11000] rn[10..8] reglist [7.. 0] */
+    ARMV5TE_STR_RRI5,       /* str(1)  [01100] imm_5[10..6] rn[5..3] rd[2..0] */
+    ARMV5TE_STR_RRR,        /* str(2)  [0101000] rm[8..6] rn[5..3] rd[2..0] */
+    ARMV5TE_STR_SP_REL,     /* str(3)  [10010] rd[10..8] imm_8[7..0] */
+    ARMV5TE_STRB_RRI5,      /* strb(1) [01110] imm_5[10..6] rn[5..3] rd[2..0] */
+    ARMV5TE_STRB_RRR,       /* strb(2) [0101010] rm[8..6] rn[5..3] rd[2..0] */
+    ARMV5TE_STRH_RRI5,      /* strh(1) [10000] imm_5[10..6] rn[5..3] rd[2..0] */
+    ARMV5TE_STRH_RRR,       /* strh(2) [0101001] rm[8..6] rn[5..3] rd[2..0] */
+    ARMV5TE_SUB_RRI3,       /* sub(1)  [0001111] imm_3[8..6] rn[5..3] rd[2..0]*/
+    ARMV5TE_SUB_RI8,        /* sub(2)  [00111] rd[10..8] imm_8[7..0] */
+    ARMV5TE_SUB_RRR,        /* sub(3)  [0001101] rm[8..6] rn[5..3] rd[2..0] */
+    ARMV5TE_SUB_SPI7,       /* sub(4)  [101100001] imm_7[6..0] */
+    ARMV5TE_SWI,            /* swi     [11011111] imm_8[7..0] */
+    ARMV5TE_TST,            /* tst     [0100001000] rm[5..3] rn[2..0] */
+    ARMV5TE_LAST,
+} Armv5teOpCode;
+
+/* Struct used to define the snippet posotions for each Thumb opcode */
+typedef struct Armv5teEncodingMap {
+    short skeleton;
+    struct {
+        int end;
+        int start;
+    } fieldLoc[3];
+    Armv5teOpCode opCode;
+    int operands;
+    char *name;
+    char* fmt;
+} Armv5teEncodingMap;
+
+extern Armv5teEncodingMap EncodingMap[ARMV5TE_LAST];
+
+/*
+ * Each instance of this struct holds a pseudo or real LIR instruction:
+ * - pesudo ones (eg labels and marks) and will be discarded by the assembler.
+ * - real ones will e assembled into Thumb instructions.
+ */
+typedef struct Armv5teLIR {
+    LIR generic;
+    Armv5teOpCode opCode;
+    int operands[3]; /* dest, src1, src2 */
+} Armv5teLIR;
+
+/* Utility macros to traverse the LIR/Armv5teLIR list */
+#define NEXT_LIR(lir) ((Armv5teLIR *) lir->generic.next)
+#define PREV_LIR(lir) ((Armv5teLIR *) lir->generic.prev)
+
+#define NEXT_LIR_LVALUE(lir) (lir)->generic.next
+#define PREV_LIR_LVALUE(lir) (lir)->generic.prev
+
+#endif /* _DALVIK_VM_COMPILER_CODEGEN_ARMV5TE_H */
diff --git a/vm/compiler/codegen/armv5te/Assemble.c b/vm/compiler/codegen/armv5te/Assemble.c
new file mode 100644 (file)
index 0000000..14355cb
--- /dev/null
@@ -0,0 +1,499 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Dalvik.h"
+#include "libdex/OpCode.h"
+#include "dexdump/OpCodeNames.h"
+
+#include "../../CompilerInternals.h"
+#include "Armv5teLIR.h"
+#include <unistd.h>             /* for cacheflush */
+
+/*
+ * opcode: Armv5teOpCode enum
+ * skeleton: pre-designated bit-pattern for this opcode
+ * ds: dest start bit position
+ * de: dest end bit position
+ * s1s: src1 start bit position
+ * s1e: src1 end bit position
+ * s2s: src2 start bit position
+ * s2e: src2 end bit position
+ * operands: number of operands (for sanity check purposes)
+ * name: mnemonic name
+ * fmt: for pretty-prining
+ */
+#define ENCODING_MAP(opcode, skeleton, ds, de, s1s, s1e, s2s, s2e, operands, \
+                     name, fmt) \
+        {skeleton, {{ds, de}, {s1s, s1e}, {s2s, s2e}}, opcode, operands, name, \
+         fmt}
+
+/* Instruction dump string format keys: !pf, where "!" is the start
+ * of the key, "p" is which numeric operand to use and "f" is the
+ * print format.
+ *
+ * [p]ositions:
+ *     0 -> operands[0] (dest)
+ *     1 -> operands[1] (src1)
+ *     2 -> operands[2] (src2)
+ *
+ * [f]ormats:
+ *     h -> 4-digit hex
+ *     d -> decimal
+ *     D -> decimal+8 (used to convert 3-bit regnum field to high reg)
+ *     E -> decimal*4
+ *     F -> decimal*2
+ *     c -> branch condition (beq, bne, etc.)
+ *     t -> pc-relative target
+ *     u -> 1st half of bl[x] target
+ *     v -> 2nd half ob bl[x] target
+ *     R -> register list
+ *
+ *  [!] escape.  To insert "!", use "!!"
+ */
+/* NOTE: must be kept in sync with enum Armv5teOpcode from Armv5teLIR.h */
+Armv5teEncodingMap EncodingMap[ARMV5TE_LAST] = {
+    ENCODING_MAP(ARMV5TE_16BIT_DATA,    0x0000, 15, 0, -1, -1, -1, -1,
+                 1, "data", "0x!0h(!0d)"),
+    ENCODING_MAP(ARMV5TE_ADC,           0x4140, 2, 0, 5, 3, -1, -1,
+                 2, "adc", "r!0d, r!1d"),
+    ENCODING_MAP(ARMV5TE_ADD_RRI3,      0x1c00, 2, 0, 5, 3, 8, 6,
+                 3, "add", "r!0d, r!1d, #!2d"),
+    ENCODING_MAP(ARMV5TE_ADD_RI8,       0x3000, 10, 8, 7, 0, -1, -1,
+                 2, "add", "r!0d, r!0d, #!1d"),
+    ENCODING_MAP(ARMV5TE_ADD_RRR,       0x1800, 2, 0, 5, 3, 8, 6,
+                 3, "add", "r!0d, r!1d, r!2d"),
+    ENCODING_MAP(ARMV5TE_ADD_RR_LH,     0x4440, 2, 0, 5, 3, -1, -1,
+                 2, "add", "r!0d, r!1d"),
+    ENCODING_MAP(ARMV5TE_ADD_RR_HL,     0x4480, 2, 0, 5, 3, -1, -1,
+                 2, "add", "r!0d, r!1d"),
+    ENCODING_MAP(ARMV5TE_ADD_RR_HH,     0x44c0, 2, 0, 5, 3, -1, -1,
+                 2, "add", "r!0d, r!1d"),
+    ENCODING_MAP(ARMV5TE_ADD_PC_REL,    0xa000, 10, 8, 7, 0, -1, -1,
+                 2, "add", "r!0d, pc, #!1E"),
+    ENCODING_MAP(ARMV5TE_ADD_SP_REL,    0xa800, 10, 8, 7, 0, -1, -1,
+                 2, "add", "r!0d, sp, #!1E"),
+    ENCODING_MAP(ARMV5TE_ADD_SPI7,      0xb000, 6, 0, -1, -1, -1, -1,
+                 1, "add", "sp, #!0d*4"),
+    ENCODING_MAP(ARMV5TE_AND_RR,        0x4000, 2, 0, 5, 3, -1, -1,
+                 2, "and", "r!0d, r!1d"),
+    ENCODING_MAP(ARMV5TE_ASR,           0x1000, 2, 0, 5, 3, 10, 6,
+                 3, "asr", "r!0d, r!1d, #!2d"),
+    ENCODING_MAP(ARMV5TE_ASRV,          0x4100, 2, 0, 5, 3, -1, -1,
+                 2, "asr", "r!0d, r!1d"),
+    ENCODING_MAP(ARMV5TE_B_COND,        0xd000, 7, 0, 11, 8, -1, -1,
+                 2, "!1c", "!0t"),
+    ENCODING_MAP(ARMV5TE_B_UNCOND,      0xe000, 10, 0, -1, -1, -1, -1,
+                 0, "b", "!0t"),
+    ENCODING_MAP(ARMV5TE_BIC,           0x4380, 2, 0, 5, 3, -1, -1,
+                 2, "bic", "r!0d, r!1d"),
+    ENCODING_MAP(ARMV5TE_BKPT,          0xbe00, 7, 0, -1, -1, -1, -1,
+                 1, "bkpt", "!0d"),
+    ENCODING_MAP(ARMV5TE_BLX_1,         0xf000, 10, 0, -1, -1, -1, -1,
+                 2, "blx_1", "!0u"),
+    ENCODING_MAP(ARMV5TE_BLX_2,         0xe800, 10, 0, -1, -1, -1, -1,
+                 2, "blx_2", "!0v"),
+    ENCODING_MAP(ARMV5TE_BL_1,          0xf000, 10, 0, -1, -1, -1, -1,
+                 1, "bl_1", "!0u"),
+    ENCODING_MAP(ARMV5TE_BL_2,          0xf800, 10, 0, -1, -1, -1, -1,
+                 1, "bl_2", "!0v"),
+    ENCODING_MAP(ARMV5TE_BLX_R,         0x4780, 6, 3, -1, -1, -1, -1,
+                 1, "blx", "r!0d"),
+    ENCODING_MAP(ARMV5TE_BX,            0x4700, 6, 3, -1, -1, -1, -1,
+                 1, "bx", "r!0d"),
+    ENCODING_MAP(ARMV5TE_CMN,           0x42c0, 2, 0, 5, 3, -1, -1,
+                 2, "cmn", "r!0d, r!1d"),
+    ENCODING_MAP(ARMV5TE_CMP_RI8,       0x2800, 10, 8, 7, 0, -1, -1,
+                 2, "cmp", "r!0d, #!1d"),
+    ENCODING_MAP(ARMV5TE_CMP_RR,        0x4280, 2, 0, 5, 3, -1, -1,
+                 2, "cmp", "r!0d, r!1d"),
+    ENCODING_MAP(ARMV5TE_CMP_LH,        0x4540, 2, 0, 5, 3, -1, -1,
+                 2, "cmp", "r!0d, r!1D"),
+    ENCODING_MAP(ARMV5TE_CMP_HL,        0x4580, 2, 0, 5, 3, -1, -1,
+                 2, "cmp", "r!0D, r!1d"),
+    ENCODING_MAP(ARMV5TE_CMP_HH,        0x45c0, 2, 0, 5, 3, -1, -1,
+                 2, "cmp", "r!0D, r!1D"),
+    ENCODING_MAP(ARMV5TE_EOR,           0x4040, 2, 0, 5, 3, -1, -1,
+                 2, "eor", "r!0d, r!1d"),
+    ENCODING_MAP(ARMV5TE_LDMIA,         0xc800, 10, 8, 7, 0, -1, -1,
+                 2, "ldmia", "r!0d!!, <!1R>"),
+    ENCODING_MAP(ARMV5TE_LDR_RRI5,      0x6800, 2, 0, 5, 3, 10, 6,
+                 3, "ldr", "r!0d, [r!1d, #!2E]"),
+    ENCODING_MAP(ARMV5TE_LDR_RRR,       0x5800, 2, 0, 5, 3, 8, 6,
+                 3, "ldr", "r!0d, [r!1d, r!2d]"),
+    ENCODING_MAP(ARMV5TE_LDR_PC_REL,    0x4800, 10, 8, 7, 0, -1, -1,
+                 2, "ldr", "r!0d, [pc, #!1E]"),
+    ENCODING_MAP(ARMV5TE_LDR_SP_REL,    0x9800, 10, 8, 7, 0, -1, -1,
+                 2, "ldr", "r!0d, [sp, #!1E]"),
+    ENCODING_MAP(ARMV5TE_LDRB_RRI5,     0x7800, 2, 0, 5, 3, 10, 6,
+                 3, "ldrb", "r!0d, [r!1d, #2d]"),
+    ENCODING_MAP(ARMV5TE_LDRB_RRR,      0x5c00, 2, 0, 5, 3, 8, 6,
+                 3, "ldrb", "r!0d, [r!1d, r!2d]"),
+    ENCODING_MAP(ARMV5TE_LDRH_RRI5,     0x8800, 2, 0, 5, 3, 10, 6,
+                 3, "ldrh", "r!0d, [r!1d, #!2F]"),
+    ENCODING_MAP(ARMV5TE_LDRH_RRR,      0x5a00, 2, 0, 5, 3, 8, 6,
+                 3, "ldrh", "r!0d, [r!1d, r!2d]"),
+    ENCODING_MAP(ARMV5TE_LDRSB_RRR,     0x5600, 2, 0, 5, 3, 8, 6,
+                 3, "ldrsb", "r!0d, [r!1d, r!2d]"),
+    ENCODING_MAP(ARMV5TE_LDRSH_RRR,     0x5e00, 2, 0, 5, 3, 8, 6,
+                 3, "ldrsh", "r!0d, [r!1d, r!2d]"),
+    ENCODING_MAP(ARMV5TE_LSL,           0x0000, 2, 0, 5, 3, 10, 6,
+                 3, "lsl", "r!0d, r!1d, #!2d"),
+    ENCODING_MAP(ARMV5TE_LSLV,          0x4080, 2, 0, 5, 3, -1, -1,
+                 2, "lsl", "r!0d, r!1d"),
+    ENCODING_MAP(ARMV5TE_LSR,           0x0800, 2, 0, 5, 3, 10, 6,
+                 3, "lsr", "r!0d, r!1d, #!2d"),
+    ENCODING_MAP(ARMV5TE_LSRV,          0x40c0, 2, 0, 5, 3, -1, -1,
+                 2, "lsr", "r!0d, r!1d"),
+    ENCODING_MAP(ARMV5TE_MOV_IMM,       0x2000, 10, 8, 7, 0, -1, -1,
+                 2, "mov", "r!0d, #!1d"),
+    ENCODING_MAP(ARMV5TE_MOV_RR,        0x1c00, 2, 0, 5, 3, -1, -1,
+                 2, "mov", "r!0d, r!1d"),
+    ENCODING_MAP(ARMV5TE_MOV_RR_LH,     0x4640, 2, 0, 5, 3, -1, -1,
+                 2, "mov", "r!0D, r!1d"),
+    ENCODING_MAP(ARMV5TE_MOV_RR_HL,     0x4680, 2, 0, 5, 3, -1, -1,
+                 2, "mov", "r!0d, r!1D"),
+    ENCODING_MAP(ARMV5TE_MOV_RR_HH,     0x46c0, 2, 0, 5, 3, -1, -1,
+                 2, "mov", "r!0D, r!1D"),
+    ENCODING_MAP(ARMV5TE_MUL,           0x4340, 2, 0, 5, 3, -1, -1,
+                 2, "mul", "r!0d, r!1d"),
+    ENCODING_MAP(ARMV5TE_MVN,           0x43c0, 2, 0, 5, 3, -1, -1,
+                 2, "mvn", "r!0d, r!1d"),
+    ENCODING_MAP(ARMV5TE_NEG,           0x4240, 2, 0, 5, 3, -1, -1,
+                 2, "neg", "r!0d, r!1d"),
+    ENCODING_MAP(ARMV5TE_ORR,           0x4300, 2, 0, 5, 3, -1, -1,
+                 2, "orr", "r!0d, r!1d"),
+    ENCODING_MAP(ARMV5TE_POP,           0xbc00, 8, 0, -1, -1, -1, -1,
+                 1, "pop", "<!0R>"),
+    ENCODING_MAP(ARMV5TE_PUSH,          0xb400, 8, 0, -1, -1, -1, -1,
+                 1, "push", "<!0R>"),
+    ENCODING_MAP(ARMV5TE_ROR,           0x41c0, 2, 0, 5, 3, -1, -1,
+                 2, "ror", "r!0d, r!1d"),
+    ENCODING_MAP(ARMV5TE_SBC,           0x4180, 2, 0, 5, 3, -1, -1,
+                 2, "sbc", "r!0d, r!1d"),
+    ENCODING_MAP(ARMV5TE_STMIA,         0xc000, 10, 8, 7, 0, -1, -1,
+                 2, "stmia", "r!0d!!, <!1R>"),
+    ENCODING_MAP(ARMV5TE_STR_RRI5,      0x6000, 2, 0, 5, 3, 10, 6,
+                 3, "str", "r!0d, [r!1d, #!2E]"),
+    ENCODING_MAP(ARMV5TE_STR_RRR,       0x5000, 2, 0, 5, 3, 8, 6,
+                 3, "str", "r!0d, [r!1d, r!2d]"),
+    ENCODING_MAP(ARMV5TE_STR_SP_REL,    0x9000, 10, 8, 7, 0, -1, -1,
+                 2, "str", "r!0d, [sp, #!1E]"),
+    ENCODING_MAP(ARMV5TE_STRB_RRI5,     0x7000, 2, 0, 5, 3, 10, 6,
+                 3, "strb", "r!0d, [r!1d, #!2d]"),
+    ENCODING_MAP(ARMV5TE_STRB_RRR,      0x5400, 2, 0, 5, 3, 8, 6,
+                 3, "strb", "r!0d, [r!1d, r!2d]"),
+    ENCODING_MAP(ARMV5TE_STRH_RRI5,     0x8000, 2, 0, 5, 3, 10, 6,
+                 3, "strh", "r!0d, [r!1d, #!2F]"),
+    ENCODING_MAP(ARMV5TE_STRH_RRR,      0x5200, 2, 0, 5, 3, 8, 6,
+                 3, "strh", "r!0d, [r!1d, r!2d]"),
+    ENCODING_MAP(ARMV5TE_SUB_RRI3,      0x1e00, 2, 0, 5, 3, 8, 6,
+                 3, "sub", "r!0d, r!1d, #!2d]"),
+    ENCODING_MAP(ARMV5TE_SUB_RI8,       0x3800, 10, 8, 7, 0, -1, -1,
+                 2, "sub", "r!0d, #!1d"),
+    ENCODING_MAP(ARMV5TE_SUB_RRR,       0x1a00, 2, 0, 5, 3, 8, 6,
+                 3, "sub", "r!0d, r!1d, r!2d"),
+    ENCODING_MAP(ARMV5TE_SUB_SPI7,      0xb080, 6, 0, -1, -1, -1, -1,
+                 1, "sub", "sp, #!0d"),
+    ENCODING_MAP(ARMV5TE_SWI,           0xdf00, 7, 0, -1, -1, -1, -1,
+                 1, "swi", "!0d"),
+    ENCODING_MAP(ARMV5TE_TST,           0x4200, 2, 0, 5, 3, -1, -1,
+                 1, "tst", "r!0d, r!1d"),
+};
+
+#define PADDING_MOV_R0_R0               0x1C00
+
+/* Write the numbers in the literal pool to the codegen stream */
+static void writeDataContent(CompilationUnit *cUnit)
+{
+    int *dataPtr = (int *) (cUnit->codeBuffer + cUnit->dataOffset);
+    Armv5teLIR *dataLIR = (Armv5teLIR *) cUnit->wordList;
+    while (dataLIR) {
+        *dataPtr++ = dataLIR->operands[0];
+        dataLIR = NEXT_LIR(dataLIR);
+    }
+}
+
+/* Return TRUE if error happens */
+static bool assembleInstructions(CompilationUnit *cUnit, intptr_t startAddr)
+{
+    short *bufferAddr = (short *) cUnit->codeBuffer;
+    Armv5teLIR *lir;
+    bool retry = false;
+
+    for (lir = (Armv5teLIR *) cUnit->firstLIRInsn; lir; lir = NEXT_LIR(lir)) {
+        if (lir->opCode < 0) {
+            if ((lir->opCode == ARMV5TE_PSEUDO_ALIGN4) &&
+                (lir->operands[0] == 1) &&
+                !retry) {
+                *bufferAddr++ = PADDING_MOV_R0_R0;
+            }
+            continue;
+        }
+
+        if (lir->opCode == ARMV5TE_LDR_PC_REL ||
+            lir->opCode == ARMV5TE_ADD_PC_REL) {
+            Armv5teLIR *lirTarget = (Armv5teLIR *) lir->generic.target;
+            intptr_t pc = (lir->generic.offset + 4) & ~3;
+            intptr_t target = lirTarget->generic.offset;
+            int delta = target - pc;
+            if (delta & 0x3) {
+                LOGE("PC-rel distance is not multiples of 4: %d\n", delta);
+                dvmAbort();
+            }
+            lir->operands[1] = delta >> 2;
+        } else if (lir->opCode == ARMV5TE_B_COND) {
+            Armv5teLIR *targetLIR = (Armv5teLIR *) lir->generic.target;
+            intptr_t pc = lir->generic.offset + 4;
+            intptr_t target = targetLIR->generic.offset;
+            int delta = target - pc;
+            if (delta > 254 || delta < -256) {
+                /* Pull in the PC reconstruction code inline */
+                if (targetLIR->opCode == ARMV5TE_PSEUDO_PC_RECONSTRUCTION_CELL){
+                    /*
+                     * The original code is:
+                     *
+                     * bxx targetLIR
+                     * origNextLir
+                     *       :
+                     *       :
+                     * targetLIR (a PC reconstruction cell)
+                     *       :
+                     * lastLIR (should be a unconditional branch)
+                     *
+                     * The distance from bxx to targetLIR is too far, so we want
+                     * to rearrange the code to be:
+                     *
+                     * bxx targetLIR
+                     * branchoverLIR to origNextLir
+                     * targetLIR (a PC reconstruction cell)
+                     *       :
+                     * lastLIR (should be a unconditional branch)
+                     * origNextLir
+                     *
+                     * Although doing so adds a unconditional branchover
+                     * instruction, it can be predicted for free by ARM so
+                     * the penalty should be minimal.
+                     */
+                    Armv5teLIR *pcrLIR = targetLIR;
+                    Armv5teLIR *lastLIR = pcrLIR;
+                    Armv5teLIR *origNextLIR = NEXT_LIR(lir);
+
+                    /*
+                     * Find out the last instruction in the PC reconstruction
+                     * cell
+                     */
+                    while (lastLIR->opCode != ARMV5TE_B_UNCOND) {
+                        lastLIR = NEXT_LIR(lastLIR);
+                    }
+
+                    /* Yank out the PCR code */
+                    PREV_LIR_LVALUE(NEXT_LIR(lastLIR)) =
+                        (LIR *) PREV_LIR(targetLIR);
+                    NEXT_LIR_LVALUE(PREV_LIR(targetLIR)) =
+                        (LIR *) NEXT_LIR(lastLIR);
+
+                    /* Create the branch over instruction */
+                    Armv5teLIR *branchoverLIR =
+                        dvmCompilerNew(sizeof(Armv5teLIR), true);
+                    branchoverLIR->opCode = ARMV5TE_B_UNCOND;
+                    branchoverLIR->generic.target = (LIR *) origNextLIR;
+
+                    /* Reconnect the instructions */
+                    NEXT_LIR_LVALUE(lir) = (LIR *) branchoverLIR;
+                    PREV_LIR_LVALUE(branchoverLIR) = (LIR *) lir;
+
+                    NEXT_LIR_LVALUE(branchoverLIR) = (LIR *) targetLIR;
+                    PREV_LIR_LVALUE(targetLIR) = (LIR *) branchoverLIR;
+
+                    NEXT_LIR_LVALUE(lastLIR) = (LIR *) origNextLIR;
+                    PREV_LIR_LVALUE(origNextLIR) = (LIR *) lastLIR;
+
+                    retry = true;
+                    continue;
+                } else {
+                    LOGE("Conditional branch distance out of range: %d\n",
+                         delta);
+                    dvmAbort();
+                }
+            }
+            lir->operands[0] = delta >> 1;
+        } else if (lir->opCode == ARMV5TE_B_UNCOND) {
+            Armv5teLIR *targetLIR = (Armv5teLIR *) lir->generic.target;
+            intptr_t pc = lir->generic.offset + 4;
+            intptr_t target = targetLIR->generic.offset;
+            int delta = target - pc;
+            if (delta > 2046 || delta < -2048) {
+                LOGE("Unconditional branch distance out of range: %d\n", delta);
+                dvmAbort();
+            }
+            lir->operands[0] = delta >> 1;
+        } else if (lir->opCode == ARMV5TE_BLX_1) {
+            assert(NEXT_LIR(lir)->opCode == ARMV5TE_BLX_2);
+            /* curPC is Thumb */
+            intptr_t curPC = (startAddr + lir->generic.offset + 4) & ~3;
+            intptr_t target = lir->operands[1];
+
+            /* Match bit[1] in target with base */
+            if (curPC & 0x2) {
+                target |= 0x2;
+            }
+            int delta = target - curPC;
+            assert((delta >= -(1<<22)) && (delta <= ((1<<22)-2)));
+
+            lir->operands[0] = (delta >> 12) & 0x7ff;
+            NEXT_LIR(lir)->operands[0] = (delta>> 1) & 0x7ff;
+        }
+
+        /*
+         * The code offset will be recalculated, just continue to check if
+         * there are other places where code will be rescheduled and do not
+         * write to the output buffer
+         */
+        if (retry) {
+            continue;
+        }
+        Armv5teEncodingMap *encoder = &EncodingMap[lir->opCode];
+        short bits = encoder->skeleton;
+        int i;
+        for (i = 0; i < 3; i++) {
+            short value;
+            if (encoder->fieldLoc[i].end != -1) {
+                value = (lir->operands[i] << encoder->fieldLoc[i].start) &
+                        ((1 << (encoder->fieldLoc[i].end + 1)) - 1);
+                bits |= value;
+
+            }
+        }
+        *bufferAddr++ = bits;
+    }
+    return retry;
+}
+
+/*
+ * Go over each instruction in the list and calculate the offset from the top
+ * before sending them off to the assembler. If out-of-range branch distance is
+ * seen rearrange the instructions a bit to correct it.
+ */
+void dvmCompilerAssembleLIR(CompilationUnit *cUnit)
+{
+    LIR *lir;
+    Armv5teLIR *armLIR;
+    int offset;
+    int i;
+
+retry:
+    for (armLIR = (Armv5teLIR *) cUnit->firstLIRInsn, offset = 0;
+         armLIR;
+         armLIR = NEXT_LIR(armLIR)) {
+        armLIR->generic.offset = offset;
+        if (armLIR->opCode >= 0) {
+            offset += 2;
+        } else if (armLIR->opCode == ARMV5TE_PSEUDO_ALIGN4) {
+            if (offset & 0x2) {
+                offset += 2;
+                armLIR->operands[0] = 1;
+            } else {
+                armLIR->operands[0] = 0;
+            }
+        }
+        /* Pseudo opcodes don't consume space */
+    }
+
+    /* Const values have to be word aligned */
+    offset = ((offset + 3) >> 2) << 2;
+
+    cUnit->dataOffset = offset;
+
+    for (lir = cUnit->wordList; lir; lir = lir->next) {
+        lir->offset = offset;
+        offset += 4;
+    }
+
+    cUnit->totalSize = offset;
+
+    if (gDvmJit.codeCacheByteUsed + offset > CODE_CACHE_SIZE) {
+        gDvmJit.codeCacheFull = true;
+        cUnit->baseAddr = NULL;
+        return;
+    }
+    cUnit->codeBuffer = dvmCompilerNew(offset, true);
+    if (cUnit->codeBuffer == NULL) {
+        LOGE("Code buffer allocation failure\n");
+        cUnit->baseAddr = NULL;
+        return;
+    }
+
+    bool needRetry = assembleInstructions(
+        cUnit, (intptr_t) gDvmJit.codeCache + gDvmJit.codeCacheByteUsed);
+
+    if (needRetry)
+        goto retry;
+
+    writeDataContent(cUnit);
+
+    cUnit->baseAddr = (char *) gDvmJit.codeCache + gDvmJit.codeCacheByteUsed;
+    gDvmJit.codeCacheByteUsed += offset;
+
+
+    /* Install the compilation */
+    memcpy(cUnit->baseAddr, cUnit->codeBuffer, offset);
+    gDvmJit.numCompilations++;
+
+    /* Flush dcache and invalidate the icache to maintain coherence */
+    cacheflush((intptr_t) cUnit->baseAddr,
+               (intptr_t) (cUnit->baseAddr + offset), 0);
+}
+
+/*
+ * Perform translation chain operation.
+ * For ARM, we'll use a pair of thumb instructions to generate
+ * an unconditional chaining branch of up to 4MB in distance.
+ * Use a BL, though we don't really need the link.  The format is
+ *     111HHooooooooooo
+ * Where HH is 10 for the 1st inst, and 11 for the second and
+ * the "o" field is each instruction's 11-bit contribution to the
+ * 22-bit branch offset.
+ * TUNING: use a single-instruction variant if it reaches.
+ */
+void* dvmJitChain(void* tgtAddr, u4* branchAddr)
+{
+    int baseAddr = (u4) branchAddr + 4;
+    int branchOffset = (int) tgtAddr - baseAddr;
+    u4 thumb1;
+    u4 thumb2;
+    u4 newInst;
+
+    assert((branchOffset >= -(1<<22)) && (branchOffset <= ((1<<22)-2)));
+
+    gDvmJit.translationChains++;
+
+    COMPILER_TRACE_CHAINING(
+        LOGD("Jit Runtime: chaining 0x%x to 0x%x\n",
+             (int) branchAddr, (int) tgtAddr & -2));
+    if ((branchOffset < -2048) | (branchOffset > 2046)) {
+        thumb1 =  (0xf000 | ((branchOffset>>12) & 0x7ff));
+        thumb2 =  (0xf800 | ((branchOffset>> 1) & 0x7ff));
+    } else {
+        thumb1 =  (0xe000 | ((branchOffset>> 1) & 0x7ff));
+        thumb2 =  0x4300;  /* nop -> or r0, r0 */
+    }
+
+    newInst = thumb2<<16 | thumb1;
+    *branchAddr = newInst;
+    cacheflush((intptr_t) branchAddr, (intptr_t) branchAddr + 4, 0);
+
+    return tgtAddr;
+}
diff --git a/vm/compiler/codegen/armv5te/Codegen.c b/vm/compiler/codegen/armv5te/Codegen.c
new file mode 100644 (file)
index 0000000..178e536
--- /dev/null
@@ -0,0 +1,2892 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Dalvik.h"
+#include "interp/InterpDefs.h"
+#include "libdex/OpCode.h"
+#include "dexdump/OpCodeNames.h"
+#include "vm/compiler/CompilerInternals.h"
+#include "Armv5teLIR.h"
+#include "vm/mterp/common/FindInterface.h"
+
+/* Create the TemplateOpcode enum */
+#define JIT_TEMPLATE(X) TEMPLATE_##X,
+typedef enum {
+#include "../../template/armv5te/TemplateOpList.h"
+/*
+ * For example,
+ *     TEMPLATE_CMP_LONG,
+ *     TEMPLATE_RETURN,
+ *     ...
+ */
+    TEMPLATE_LAST_MARK,
+} TemplateOpCode;
+#undef JIT_TEMPLATE
+
+/* Array holding the entry offset of each template relative to the first one */
+static intptr_t templateEntryOffsets[TEMPLATE_LAST_MARK];
+
+/* Track exercised opcodes */
+static int opcodeCoverage[256];
+
+/*****************************************************************************/
+
+/*
+ * The following are building blocks to construct low-level IRs with 0 - 3
+ * operands.
+ */
+static Armv5teLIR *newLIR0(CompilationUnit *cUnit, Armv5teOpCode opCode)
+{
+    Armv5teLIR *insn = dvmCompilerNew(sizeof(Armv5teLIR), true);
+    assert(isPseudoOpCode(opCode) || EncodingMap[opCode].operands == 0);
+    insn->opCode = opCode;
+    dvmCompilerAppendLIR(cUnit, (LIR *) insn);
+    return insn;
+}
+
+static Armv5teLIR *newLIR1(CompilationUnit *cUnit, Armv5teOpCode opCode,
+                           int dest)
+{
+    Armv5teLIR *insn = dvmCompilerNew(sizeof(Armv5teLIR), true);
+    assert(isPseudoOpCode(opCode) || EncodingMap[opCode].operands == 1);
+    insn->opCode = opCode;
+    insn->operands[0] = dest;
+    dvmCompilerAppendLIR(cUnit, (LIR *) insn);
+    return insn;
+}
+
+static Armv5teLIR *newLIR2(CompilationUnit *cUnit, Armv5teOpCode opCode,
+                           int dest, int src1)
+{
+    Armv5teLIR *insn = dvmCompilerNew(sizeof(Armv5teLIR), true);
+    assert(isPseudoOpCode(opCode) || EncodingMap[opCode].operands == 2);
+    insn->opCode = opCode;
+    insn->operands[0] = dest;
+    insn->operands[1] = src1;
+    dvmCompilerAppendLIR(cUnit, (LIR *) insn);
+    return insn;
+}
+
+static Armv5teLIR *newLIR3(CompilationUnit *cUnit, Armv5teOpCode opCode,
+                           int dest, int src1, int src2)
+{
+    Armv5teLIR *insn = dvmCompilerNew(sizeof(Armv5teLIR), true);
+    assert(isPseudoOpCode(opCode) || EncodingMap[opCode].operands == 3);
+    insn->opCode = opCode;
+    insn->operands[0] = dest;
+    insn->operands[1] = src1;
+    insn->operands[2] = src2;
+    dvmCompilerAppendLIR(cUnit, (LIR *) insn);
+    return insn;
+}
+
+static Armv5teLIR *newLIR23(CompilationUnit *cUnit, Armv5teOpCode opCode,
+                            int srcdest, int src2)
+{
+    assert(!isPseudoOpCode(opCode));
+    if (EncodingMap[opCode].operands==2)
+        return newLIR2(cUnit, opCode, srcdest, src2);
+    else
+        return newLIR3(cUnit, opCode, srcdest, srcdest, src2);
+}
+
+/*****************************************************************************/
+
+/*
+ * The following are building blocks to insert constants into the pool or
+ * instruction streams.
+ */
+
+/* Add a 32-bit constant either in the constant pool or mixed with code */
+static Armv5teLIR *addWordData(CompilationUnit *cUnit, int value, bool inPlace)
+{
+    /* Add the constant to the literal pool */
+    if (!inPlace) {
+        Armv5teLIR *newValue = dvmCompilerNew(sizeof(Armv5teLIR), true);
+        newValue->operands[0] = value;
+        newValue->generic.next = cUnit->wordList;
+        cUnit->wordList = (LIR *) newValue;
+        return newValue;
+    } else {
+        /* Add the constant in the middle of code stream */
+        newLIR1(cUnit, ARMV5TE_16BIT_DATA, (value & 0xffff));
+        newLIR1(cUnit, ARMV5TE_16BIT_DATA, (value >> 16));
+    }
+    return NULL;
+}
+
+/*
+ * Search the existing constants in the literal pool for an exact or close match
+ * within specified delta (greater or equal to 0).
+ */
+static Armv5teLIR *scanLiteralPool(CompilationUnit *cUnit, int value,
+                                   unsigned int delta)
+{
+    LIR *dataTarget = cUnit->wordList;
+    while (dataTarget) {
+        if (((unsigned) (value - ((Armv5teLIR *) dataTarget)->operands[0])) <=
+            delta)
+            return (Armv5teLIR *) dataTarget;
+        dataTarget = dataTarget->next;
+    }
+    return NULL;
+}
+
+/*
+ * Load a immediate using a shortcut if possible; otherwise
+ * grab from the per-translation literal pool
+ */
+void loadConstant(CompilationUnit *cUnit, int rDest, int value)
+{
+    /* See if the value can be constructed cheaply */
+    if ((value >= 0) && (value <= 255)) {
+        newLIR2(cUnit, ARMV5TE_MOV_IMM, rDest, value);
+        return;
+    } else if ((value & 0xFFFFFF00) == 0xFFFFFF00) {
+        newLIR2(cUnit, ARMV5TE_MOV_IMM, rDest, ~value);
+        newLIR2(cUnit, ARMV5TE_MVN, rDest, rDest);
+        return;
+    }
+    /* No shortcut - go ahead and use literal pool */
+    Armv5teLIR *dataTarget = scanLiteralPool(cUnit, value, 255);
+    if (dataTarget == NULL) {
+        dataTarget = addWordData(cUnit, value, false);
+    }
+    Armv5teLIR *loadPcRel = dvmCompilerNew(sizeof(Armv5teLIR), true);
+    loadPcRel->opCode = ARMV5TE_LDR_PC_REL;
+    loadPcRel->generic.target = (LIR *) dataTarget;
+    loadPcRel->operands[0] = rDest;
+    dvmCompilerAppendLIR(cUnit, (LIR *) loadPcRel);
+
+    /*
+     * To save space in the constant pool, we use the ADD_RRI8 instruction to
+     * add up to 255 to an existing constant value.
+     */
+    if (dataTarget->operands[0] != value) {
+        newLIR2(cUnit, ARMV5TE_ADD_RI8, rDest, value - dataTarget->operands[0]);
+    }
+}
+
+/* Export the Dalvik PC assicated with an instruction to the StackSave area */
+static void genExportPC(CompilationUnit *cUnit, MIR *mir, int rDPC, int rAddr)
+{
+    int offset = offsetof(StackSaveArea, xtra.currentPc);
+    loadConstant(cUnit, rDPC, (int) (cUnit->method->insns + mir->offset));
+    newLIR2(cUnit, ARMV5TE_MOV_RR, rAddr, rFP);
+    newLIR2(cUnit, ARMV5TE_SUB_RI8, rAddr, sizeof(StackSaveArea) - offset);
+    newLIR3(cUnit, ARMV5TE_STR_RRI5, rDPC, rAddr, 0);
+}
+
+/* Generate conditional branch instructions */
+static void genConditionalBranch(CompilationUnit *cUnit,
+                                 Armv5teConditionCode cond,
+                                 Armv5teLIR *target)
+{
+    Armv5teLIR *branch = newLIR2(cUnit, ARMV5TE_B_COND, 0, cond);
+    branch->generic.target = (LIR *) target;
+}
+
+/* Generate unconditional branch instructions */
+static void genUnconditionalBranch(CompilationUnit *cUnit, Armv5teLIR *target)
+{
+    Armv5teLIR *branch = newLIR0(cUnit, ARMV5TE_B_UNCOND);
+    branch->generic.target = (LIR *) target;
+}
+
+#define USE_IN_CACHE_HANDLER 1
+
+/*
+ * Jump to the out-of-line handler in ARM mode to finish executing the
+ * remaining of more complex instructions.
+ */
+static void genDispatchToHandler(CompilationUnit *cUnit, TemplateOpCode opCode)
+{
+#if USE_IN_CACHE_HANDLER
+    /*
+     * NOTE - In practice BLX only needs one operand, but since the assembler
+     * may abort itself and retry due to other out-of-range conditions we
+     * cannot really use operand[0] to store the absolute target address since
+     * it may get clobbered by the final relative offset. Therefore,
+     * we fake BLX_1 is a two operand instruction and the absolute target
+     * address is stored in operand[1].
+     */
+    newLIR2(cUnit, ARMV5TE_BLX_1,
+            (int) gDvmJit.codeCache + templateEntryOffsets[opCode],
+            (int) gDvmJit.codeCache + templateEntryOffsets[opCode]);
+    newLIR2(cUnit, ARMV5TE_BLX_2,
+            (int) gDvmJit.codeCache + templateEntryOffsets[opCode],
+            (int) gDvmJit.codeCache + templateEntryOffsets[opCode]);
+#else
+    /*
+     * In case we want to access the statically compiled handlers for
+     * debugging purposes, define USE_IN_CACHE_HANDLER to 0
+     */
+    void *templatePtr;
+
+#define JIT_TEMPLATE(X) extern void dvmCompiler_TEMPLATE_##X();
+#include "../../template/armv5te/TemplateOpList.h"
+#undef JIT_TEMPLATE
+    switch (opCode) {
+#define JIT_TEMPLATE(X) \
+        case TEMPLATE_##X: { templatePtr = dvmCompiler_TEMPLATE_##X; break; }
+#include "../../template/armv5te/TemplateOpList.h"
+#undef JIT_TEMPLATE
+        default: templatePtr = NULL;
+    }
+    loadConstant(cUnit, r7, (int) templatePtr);
+    newLIR1(cUnit, ARMV5TE_BLX_R, r7);
+#endif
+}
+
+/* Perform the actual operation for OP_RETURN_* */
+static void genReturnCommon(CompilationUnit *cUnit, MIR *mir)
+{
+    genDispatchToHandler(cUnit, TEMPLATE_RETURN);
+#if defined(INVOKE_STATS)
+    gDvmJit.jitReturn++;
+#endif
+    int dPC = (int) (cUnit->method->insns + mir->offset);
+    Armv5teLIR *branch = newLIR0(cUnit, ARMV5TE_B_UNCOND);
+    /* Set up the place holder to reconstruct this Dalvik PC */
+    Armv5teLIR *pcrLabel = dvmCompilerNew(sizeof(Armv5teLIR), true);
+    pcrLabel->opCode = ARMV5TE_PSEUDO_PC_RECONSTRUCTION_CELL;
+    pcrLabel->operands[0] = dPC;
+    pcrLabel->operands[1] = mir->offset;
+    /* Insert the place holder to the growable list */
+    dvmInsertGrowableList(&cUnit->pcReconstructionList, pcrLabel);
+    /* Branch to the PC reconstruction code */
+    branch->generic.target = (LIR *) pcrLabel;
+}
+
+/*
+ * Load a pair of values of rFP[src..src+1] and store them into rDestLo and
+ * rDestHi
+ */
+static void loadValuePair(CompilationUnit *cUnit, int vSrc, int rDestLo,
+                          int rDestHi)
+{
+    /* Use reg + imm5*4 to load the values if possible */
+    if (vSrc <= 30) {
+        newLIR3(cUnit, ARMV5TE_LDR_RRI5, rDestLo, rFP, vSrc);
+        newLIR3(cUnit, ARMV5TE_LDR_RRI5, rDestHi, rFP, vSrc+1);
+    } else {
+        if (vSrc <= 64) {
+            /* Sneak 4 into the base address first */
+            newLIR3(cUnit, ARMV5TE_ADD_RRI3, rDestLo, rFP, 4);
+            newLIR2(cUnit, ARMV5TE_ADD_RI8, rDestHi, (vSrc-1)*4);
+        } else {
+            /* Offset too far from rFP */
+            loadConstant(cUnit, rDestLo, vSrc*4);
+            newLIR3(cUnit, ARMV5TE_ADD_RRR, rDestLo, rFP, rDestLo);
+        }
+        assert(rDestLo != rDestHi);
+        newLIR2(cUnit, ARMV5TE_LDMIA, rDestLo, (1<<rDestLo) | (1<<(rDestHi)));
+    }
+}
+
+/*
+ * Store a pair of values of rSrc and rSrc+1 and store them into vDest and
+ * vDest+1
+ */
+static void storeValuePair(CompilationUnit *cUnit, int rSrcLo, int rSrcHi,
+                           int vDest, int rScratch)
+{
+    /* Use reg + imm5*4 to store the values if possible */
+    if (vDest <= 30) {
+        newLIR3(cUnit, ARMV5TE_STR_RRI5, rSrcLo, rFP, vDest);
+        newLIR3(cUnit, ARMV5TE_STR_RRI5, rSrcHi, rFP, vDest+1);
+    } else {
+        if (vDest <= 64) {
+            /* Sneak 4 into the base address first */
+            newLIR3(cUnit, ARMV5TE_ADD_RRI3, rScratch, rFP, 4);
+            newLIR2(cUnit, ARMV5TE_ADD_RI8, rScratch, (vDest-1)*4);
+        } else {
+            /* Offset too far from rFP */
+            loadConstant(cUnit, rScratch, vDest*4);
+            newLIR3(cUnit, ARMV5TE_ADD_RRR, rScratch, rFP, rScratch);
+        }
+        assert(rSrcLo != rSrcHi);
+        newLIR2(cUnit, ARMV5TE_STMIA, rScratch, (1<<rSrcLo) | (1 << (rSrcHi)));
+    }
+}
+
+/* Load the address of a Dalvik register on the frame */
+static void loadValueAddress(CompilationUnit *cUnit, int vSrc, int rDest)
+{
+    /* RRI3 can add up to 7 */
+    if (vSrc <= 1) {
+        newLIR3(cUnit, ARMV5TE_ADD_RRI3, rDest, rFP, vSrc*4);
+    } else if (vSrc <= 64) {
+        /* Sneak 4 into the base address first */
+        newLIR3(cUnit, ARMV5TE_ADD_RRI3, rDest, rFP, 4);
+        newLIR2(cUnit, ARMV5TE_ADD_RI8, rDest, (vSrc-1)*4);
+    } else {
+        loadConstant(cUnit, rDest, vSrc*4);
+        newLIR3(cUnit, ARMV5TE_ADD_RRR, rDest, rFP, rDest);
+    }
+}
+
+
+/* Load a single value from rFP[src] and store them into rDest */
+static void loadValue(CompilationUnit *cUnit, int vSrc, int rDest)
+{
+    /* Use reg + imm5*4 to load the value if possible */
+    if (vSrc <= 31) {
+        newLIR3(cUnit, ARMV5TE_LDR_RRI5, rDest, rFP, vSrc);
+    } else {
+        loadConstant(cUnit, rDest, vSrc*4);
+        newLIR3(cUnit, ARMV5TE_LDR_RRR, rDest, rFP, rDest);
+    }
+}
+
+/* Store a value from rSrc to vDest */
+static void storeValue(CompilationUnit *cUnit, int rSrc, int vDest,
+                       int rScratch)
+{
+    /* Use reg + imm5*4 to store the value if possible */
+    if (vDest <= 31) {
+        newLIR3(cUnit, ARMV5TE_STR_RRI5, rSrc, rFP, vDest);
+    } else {
+        loadConstant(cUnit, rScratch, vDest*4);
+        newLIR3(cUnit, ARMV5TE_STR_RRR, rSrc, rFP, rScratch);
+    }
+}
+
+/* Calculate the address of rFP+vSrc*4 */
+static void calculateValueAddress(CompilationUnit *cUnit, int vSrc, int rDest)
+{
+    /* Use add rd, rs, imm_3 */
+    if (vSrc <= 1) {
+        newLIR3(cUnit, ARMV5TE_ADD_RRI3, rDest, rFP, vSrc*4);
+    } else if (vSrc <= 64) {
+        /* Use add rd, imm_8 */
+        /* Sneak in 4 above rFP to cover one more register offset (ie v64) */
+        newLIR3(cUnit, ARMV5TE_ADD_RRI3, rDest, rFP, 4);
+        newLIR2(cUnit, ARMV5TE_ADD_RI8, rDest, (vSrc-1)*4);
+    } else {
+        /* Load offset from the constant pool */
+        loadConstant(cUnit, rDest, vSrc*4);
+        newLIR3(cUnit, ARMV5TE_ADD_RRR, rDest, rFP, rDest);
+    }
+}
+
+/*
+ * Perform a binary operation on 64-bit operands and leave the results in the
+ * r0/r1 pair.
+ */
+static void genBinaryOpWide(CompilationUnit *cUnit, int vDest,
+                            Armv5teOpCode preinst, Armv5teOpCode inst)
+{
+    newLIR23(cUnit, preinst, r0, r2);
+    newLIR23(cUnit, inst, r1, r3);
+    storeValuePair(cUnit, r0, r1, vDest, r2);
+}
+
+/* Perform a binary operation on 32-bit operands and leave the results in r0. */
+static void genBinaryOp(CompilationUnit *cUnit, int vDest, Armv5teOpCode inst)
+{
+    newLIR23(cUnit, inst, r0, r1);
+    storeValue(cUnit, r0, vDest, r1);
+}
+
+/* Create the PC reconstruction slot if not already done */
+static inline Armv5teLIR *genCheckCommon(CompilationUnit *cUnit, int dOffset,
+                                         Armv5teLIR *branch,
+                                         Armv5teLIR *pcrLabel)
+{
+    /* Set up the place holder to reconstruct this Dalvik PC */
+    if (pcrLabel == NULL) {
+        int dPC = (int) (cUnit->method->insns + dOffset);
+        pcrLabel = dvmCompilerNew(sizeof(Armv5teLIR), true);
+        pcrLabel->opCode = ARMV5TE_PSEUDO_PC_RECONSTRUCTION_CELL;
+        pcrLabel->operands[0] = dPC;
+        pcrLabel->operands[1] = dOffset;
+        /* Insert the place holder to the growable list */
+        dvmInsertGrowableList(&cUnit->pcReconstructionList, pcrLabel);
+    }
+    /* Branch to the PC reconstruction code */
+    branch->generic.target = (LIR *) pcrLabel;
+    return pcrLabel;
+}
+
+/*
+ * Perform a "reg cmp imm" operation and jump to the PCR region if condition
+ * satisfies.
+ */
+static inline Armv5teLIR *genRegImmCheck(CompilationUnit *cUnit,
+                                         Armv5teConditionCode cond, int reg,
+                                         int checkValue, int dOffset,
+                                         Armv5teLIR *pcrLabel)
+{
+    newLIR2(cUnit, ARMV5TE_CMP_RI8, reg, checkValue);
+    Armv5teLIR *branch = newLIR2(cUnit, ARMV5TE_B_COND, 0, cond);
+    return genCheckCommon(cUnit, dOffset, branch, pcrLabel);
+}
+
+/*
+ * Perform a "reg cmp reg" operation and jump to the PCR region if condition
+ * satisfies.
+ */
+static inline Armv5teLIR *inertRegRegCheck(CompilationUnit *cUnit,
+                                           Armv5teConditionCode cond,
+                                           int reg1, int reg2, int dOffset,
+                                           Armv5teLIR *pcrLabel)
+{
+    newLIR2(cUnit, ARMV5TE_CMP_RR, reg1, reg2);
+    Armv5teLIR *branch = newLIR2(cUnit, ARMV5TE_B_COND, 0, cond);
+    return genCheckCommon(cUnit, dOffset, branch, pcrLabel);
+}
+
+/* Perform null-check on a register */
+static Armv5teLIR *genNullCheck(CompilationUnit *cUnit, int reg, int dOffset,
+                                Armv5teLIR *pcrLabel)
+{
+    return genRegImmCheck(cUnit, ARM_COND_EQ, reg, 0, dOffset, pcrLabel);
+}
+
+/* Perform bound check on two registers */
+static Armv5teLIR *genBoundsCheck(CompilationUnit *cUnit, int rIndex,
+                                  int rBound, int dOffset, Armv5teLIR *pcrLabel)
+{
+    return inertRegRegCheck(cUnit, ARM_COND_CS, rIndex, rBound, dOffset,
+                            pcrLabel);
+}
+
+/* Generate a unconditional branch to go to the interpreter */
+static inline Armv5teLIR *genTrap(CompilationUnit *cUnit, int dOffset,
+                                  Armv5teLIR *pcrLabel)
+{
+    Armv5teLIR *branch = newLIR0(cUnit, ARMV5TE_B_UNCOND);
+    return genCheckCommon(cUnit, dOffset, branch, pcrLabel);
+}
+
+/* Load a wide field from an object instance */
+static void genIGetWide(CompilationUnit *cUnit, MIR *mir, int fieldOffset)
+{
+    DecodedInstruction *dInsn = &mir->dalvikInsn;
+
+    loadValue(cUnit, dInsn->vB, r2);
+    loadConstant(cUnit, r3, fieldOffset);
+    genNullCheck(cUnit, r2, mir->offset, NULL); /* null object? */
+    newLIR3(cUnit, ARMV5TE_ADD_RRR, r2, r2, r3);
+    newLIR2(cUnit, ARMV5TE_LDMIA, r2, (1<<r0 | 1<<r1));
+    storeValuePair(cUnit, r0, r1, dInsn->vA, r3);
+}
+
+/* Store a wide field to an object instance */
+static void genIPutWide(CompilationUnit *cUnit, MIR *mir, int fieldOffset)
+{
+    DecodedInstruction *dInsn = &mir->dalvikInsn;
+
+    loadValue(cUnit, dInsn->vB, r2);
+    loadValuePair(cUnit, dInsn->vA, r0, r1);
+    loadConstant(cUnit, r3, fieldOffset);
+    genNullCheck(cUnit, r2, mir->offset, NULL); /* null object? */
+    newLIR3(cUnit, ARMV5TE_ADD_RRR, r2, r2, r3);
+    newLIR2(cUnit, ARMV5TE_STMIA, r2, (1<<r0 | 1<<r1));
+}
+
+/*
+ * Load a field from an object instance
+ *
+ * Inst should be one of:
+ *      ARMV5TE_LDR_RRR
+ *      ARMV5TE_LDRB_RRR
+ *      ARMV5TE_LDRH_RRR
+ *      ARMV5TE_LDRSB_RRR
+ *      ARMV5TE_LDRSH_RRR
+ */
+static void genIGet(CompilationUnit *cUnit, MIR *mir, Armv5teOpCode inst,
+                    int fieldOffset)
+{
+    DecodedInstruction *dInsn = &mir->dalvikInsn;
+
+    /* TUNING: write a utility routine to load via base + constant offset */
+    loadValue(cUnit, dInsn->vB, r0);
+    loadConstant(cUnit, r1, fieldOffset);
+    genNullCheck(cUnit, r0, mir->offset, NULL); /* null object? */
+    newLIR3(cUnit, inst, r0, r0, r1);
+    storeValue(cUnit, r0, dInsn->vA, r1);
+}
+
+/*
+ * Store a field to an object instance
+ *
+ * Inst should be one of:
+ *      ARMV5TE_STR_RRR
+ *      ARMV5TE_STRB_RRR
+ *      ARMV5TE_STRH_RRR
+ */
+static void genIPut(CompilationUnit *cUnit, MIR *mir, Armv5teOpCode inst,
+                    int fieldOffset)
+{
+    DecodedInstruction *dInsn = &mir->dalvikInsn;
+
+    /* TUNING: write a utility routine to load via base + constant offset */
+    loadValue(cUnit, dInsn->vB, r2);
+    loadConstant(cUnit, r1, fieldOffset);
+    loadValue(cUnit, dInsn->vA, r0);
+    genNullCheck(cUnit, r2, mir->offset, NULL); /* null object? */
+    newLIR3(cUnit, inst, r0, r2, r1);
+}
+
+
+/* TODO: This should probably be done as an out-of-line instruction handler. */
+
+/*
+ * Generate array load
+ *
+ * Inst should be one of:
+ *      ARMV5TE_LDR_RRR
+ *      ARMV5TE_LDRB_RRR
+ *      ARMV5TE_LDRH_RRR
+ *      ARMV5TE_LDRSB_RRR
+ *      ARMV5TE_LDRSH_RRR
+ */
+static void genArrayGet(CompilationUnit *cUnit, MIR *mir, Armv5teOpCode inst,
+                        int vArray, int vIndex, int vDest, int scale)
+{
+    int lenOffset = offsetof(ArrayObject, length);
+    int dataOffset = offsetof(ArrayObject, contents);
+
+    loadValue(cUnit, vArray, r2);
+    loadValue(cUnit, vIndex, r3);
+
+    /* null object? */
+    Armv5teLIR * pcrLabel = genNullCheck(cUnit, r2, mir->offset, NULL);
+    newLIR3(cUnit, ARMV5TE_LDR_RRI5, r0, r2, lenOffset >> 2);  /* Get len */
+    newLIR2(cUnit, ARMV5TE_ADD_RI8, r2, dataOffset); /* r2 -> array data */
+    genBoundsCheck(cUnit, r3, r0, mir->offset, pcrLabel);
+    if (scale) {
+        newLIR3(cUnit, ARMV5TE_LSL, r3, r3, scale);
+    }
+    if (scale==3) {
+        newLIR3(cUnit, inst, r0, r2, r3);
+        newLIR2(cUnit, ARMV5TE_ADD_RI8, r2, 4);
+        newLIR3(cUnit, inst, r1, r2, r3);
+        storeValuePair(cUnit, r0, r1, vDest, r3);
+    } else {
+        newLIR3(cUnit, inst, r0, r2, r3);
+        storeValue(cUnit, r0, vDest, r3);
+    }
+}
+
+/* TODO: This should probably be done as an out-of-line instruction handler. */
+
+/*
+ * Generate array store
+ *
+ * Inst should be one of:
+ *      ARMV5TE_STR_RRR
+ *      ARMV5TE_STRB_RRR
+ *      ARMV5TE_STRH_RRR
+ */
+static void genArrayPut(CompilationUnit *cUnit, MIR *mir, Armv5teOpCode inst,
+                        int vArray, int vIndex, int vSrc, int scale)
+{
+    int lenOffset = offsetof(ArrayObject, length);
+    int dataOffset = offsetof(ArrayObject, contents);
+
+    loadValue(cUnit, vArray, r2);
+    loadValue(cUnit, vIndex, r3);
+    genNullCheck(cUnit, r2, mir->offset, NULL); /* null object? */
+    newLIR3(cUnit, ARMV5TE_LDR_RRI5, r0, r2, lenOffset >> 2);  /* Get len */
+    newLIR2(cUnit, ARMV5TE_ADD_RI8, r2, dataOffset); /* r2 -> array data */
+    genBoundsCheck(cUnit, r3, r0, mir->offset, NULL);
+    /* at this point, r2 points to array, r3 is unscaled index */
+    if (scale==3) {
+        loadValuePair(cUnit, vSrc, r0, r1);
+    } else {
+        loadValue(cUnit, vSrc, r0);
+    }
+    if (scale) {
+        newLIR3(cUnit, ARMV5TE_LSL, r3, r3, scale);
+    }
+    /*
+     * at this point, r2 points to array, r3 is scaled index, and r0[r1] is
+     * data
+     */
+    if (scale==3) {
+        newLIR3(cUnit, inst, r0, r2, r3);
+        newLIR2(cUnit, ARMV5TE_ADD_RI8, r2, 4);
+        newLIR3(cUnit, inst, r1, r2, r3);
+    } else {
+        newLIR3(cUnit, inst, r0, r2, r3);
+    }
+}
+
+static bool genShiftOpLong(CompilationUnit *cUnit, MIR *mir, int vDest,
+                           int vSrc1, int vShift)
+{
+     loadValuePair(cUnit, vSrc1, r0, r1);
+     loadValue(cUnit, vShift, r2);
+     switch( mir->dalvikInsn.opCode) {
+         case OP_SHL_LONG:
+         case OP_SHL_LONG_2ADDR:
+             genDispatchToHandler(cUnit, TEMPLATE_SHL_LONG);
+             break;
+         case OP_SHR_LONG:
+         case OP_SHR_LONG_2ADDR:
+             genDispatchToHandler(cUnit, TEMPLATE_SHR_LONG);
+             break;
+         case OP_USHR_LONG:
+         case OP_USHR_LONG_2ADDR:
+             genDispatchToHandler(cUnit, TEMPLATE_USHR_LONG);
+             break;
+         default:
+             return true;
+     }
+     storeValuePair(cUnit, r0, r1, vDest, r2);
+     return false;
+}
+
+static bool genArithOpFloat(CompilationUnit *cUnit, MIR *mir, int vDest,
+                            int vSrc1, int vSrc2)
+{
+    void* funct;
+    /* TODO: use a proper include file to define these */
+    float __aeabi_fadd(float a, float b);
+    float __aeabi_fsub(float a, float b);
+    float __aeabi_fdiv(float a, float b);
+    float __aeabi_fmul(float a, float b);
+    float fmodf(float a, float b);
+
+    switch (mir->dalvikInsn.opCode) {
+        case OP_ADD_FLOAT_2ADDR:
+        case OP_ADD_FLOAT:
+            funct = (void*) __aeabi_fadd;
+            break;
+        case OP_SUB_FLOAT_2ADDR:
+        case OP_SUB_FLOAT:
+            funct = (void*) __aeabi_fsub;
+            break;
+        case OP_DIV_FLOAT_2ADDR:
+        case OP_DIV_FLOAT:
+            funct = (void*) __aeabi_fdiv;
+            break;
+        case OP_MUL_FLOAT_2ADDR:
+        case OP_MUL_FLOAT:
+            funct = (void*) __aeabi_fmul;
+            break;
+        case OP_REM_FLOAT_2ADDR:
+        case OP_REM_FLOAT:
+            funct = (void*) fmodf;
+            break;
+        case OP_NEG_FLOAT: {
+            loadValue(cUnit, vSrc2, r0);
+            loadConstant(cUnit, r1, 0x80000000);
+            newLIR3(cUnit, ARMV5TE_ADD_RRR, r0, r0, r1);
+            storeValue(cUnit, r0, vDest, r1);
+            return false;
+        }
+        default:
+            return true;
+    }
+    loadConstant(cUnit, r2, (int)funct);
+    loadValue(cUnit, vSrc1, r0);
+    loadValue(cUnit, vSrc2, r1);
+    newLIR1(cUnit, ARMV5TE_BLX_R, r2);
+    storeValue(cUnit, r0, vDest, r1);
+    return false;
+}
+
+static bool genArithOpDouble(CompilationUnit *cUnit, MIR *mir, int vDest,
+                             int vSrc1, int vSrc2)
+{
+    void* funct;
+    /* TODO: use a proper include file to define these */
+    double __aeabi_dadd(double a, double b);
+    double __aeabi_dsub(double a, double b);
+    double __aeabi_ddiv(double a, double b);
+    double __aeabi_dmul(double a, double b);
+    double fmod(double a, double b);
+
+    switch (mir->dalvikInsn.opCode) {
+        case OP_ADD_DOUBLE_2ADDR:
+        case OP_ADD_DOUBLE:
+            funct = (void*) __aeabi_dadd;
+            break;
+        case OP_SUB_DOUBLE_2ADDR:
+        case OP_SUB_DOUBLE:
+            funct = (void*) __aeabi_dsub;
+            break;
+        case OP_DIV_DOUBLE_2ADDR:
+        case OP_DIV_DOUBLE:
+            funct = (void*) __aeabi_ddiv;
+            break;
+        case OP_MUL_DOUBLE_2ADDR:
+        case OP_MUL_DOUBLE:
+            funct = (void*) __aeabi_dmul;
+            break;
+        case OP_REM_DOUBLE_2ADDR:
+        case OP_REM_DOUBLE:
+            funct = (void*) fmod;
+            break;
+        case OP_NEG_DOUBLE: {
+            loadValuePair(cUnit, vSrc2, r0, r1);
+            loadConstant(cUnit, r2, 0x80000000);
+            newLIR3(cUnit, ARMV5TE_ADD_RRR, r1, r1, r2);
+            storeValuePair(cUnit, r0, r1, vDest, r2);
+            return false;
+        }
+        default:
+            return true;
+    }
+    loadConstant(cUnit, r4PC, (int)funct);
+    loadValuePair(cUnit, vSrc1, r0, r1);
+    loadValuePair(cUnit, vSrc2, r2, r3);
+    newLIR1(cUnit, ARMV5TE_BLX_R, r4PC);
+    storeValuePair(cUnit, r0, r1, vDest, r2);
+    return false;
+}
+
+static bool genArithOpLong(CompilationUnit *cUnit, MIR *mir, int vDest,
+                           int vSrc1, int vSrc2)
+{
+    int firstOp = ARMV5TE_BKPT;
+    int secondOp = ARMV5TE_BKPT;
+    bool callOut = false;
+    void *callTgt;
+    int retReg = r0;
+    /* TODO - find proper .h file to declare these */
+    long long __aeabi_ldivmod(long long op1, long long op2);
+
+    switch (mir->dalvikInsn.opCode) {
+        case OP_NOT_LONG:
+            firstOp = ARMV5TE_MVN;
+            secondOp = ARMV5TE_MVN;
+            break;
+        case OP_ADD_LONG:
+        case OP_ADD_LONG_2ADDR:
+            firstOp = ARMV5TE_ADD_RRR;
+            secondOp = ARMV5TE_ADC;
+            break;
+        case OP_SUB_LONG:
+        case OP_SUB_LONG_2ADDR:
+            firstOp = ARMV5TE_SUB_RRR;
+            secondOp = ARMV5TE_SBC;
+            break;
+        case OP_MUL_LONG:
+        case OP_MUL_LONG_2ADDR:
+            loadValuePair(cUnit, vSrc1, r0, r1);
+            loadValuePair(cUnit, vSrc2, r2, r3);
+            genDispatchToHandler(cUnit, TEMPLATE_MUL_LONG);
+            storeValuePair(cUnit, r0, r1, vDest, r2);
+            return false;
+            break;
+        case OP_DIV_LONG:
+        case OP_DIV_LONG_2ADDR:
+            callOut = true;
+            retReg = r0;
+            callTgt = (void*)__aeabi_ldivmod;
+            break;
+        /* NOTE - result is in r2/r3 instead of r0/r1 */
+        case OP_REM_LONG:
+        case OP_REM_LONG_2ADDR:
+            callOut = true;
+            callTgt = (void*)__aeabi_ldivmod;
+            retReg = r2;
+            break;
+        case OP_AND_LONG:
+        case OP_AND_LONG_2ADDR:
+            firstOp = ARMV5TE_AND_RR;
+            secondOp = ARMV5TE_AND_RR;
+            break;
+        case OP_OR_LONG:
+        case OP_OR_LONG_2ADDR:
+            firstOp = ARMV5TE_ORR;
+            secondOp = ARMV5TE_ORR;
+            break;
+        case OP_XOR_LONG:
+        case OP_XOR_LONG_2ADDR:
+            firstOp = ARMV5TE_EOR;
+            secondOp = ARMV5TE_EOR;
+            break;
+        case OP_NEG_LONG:
+            loadValuePair(cUnit, vSrc2, r2, r3);
+            loadConstant(cUnit, r1, 0);
+            newLIR3(cUnit, ARMV5TE_SUB_RRR, r0, r1, r2);
+            newLIR2(cUnit, ARMV5TE_SBC, r1, r3);
+            storeValuePair(cUnit, r0, r1, vDest, r2);
+            return false;
+        default:
+            LOGE("Invalid long arith op");
+            dvmAbort();
+    }
+    if (!callOut) {
+        loadValuePair(cUnit, vSrc1, r0, r1);
+        loadValuePair(cUnit, vSrc2, r2, r3);
+        genBinaryOpWide(cUnit, vDest, firstOp, secondOp);
+    } else {
+        loadValuePair(cUnit, vSrc2, r2, r3);
+        loadConstant(cUnit, r4PC, (int) callTgt);
+        loadValuePair(cUnit, vSrc1, r0, r1);
+        newLIR1(cUnit, ARMV5TE_BLX_R, r4PC);
+        storeValuePair(cUnit, retReg, retReg+1, vDest, r4PC);
+    }
+    return false;
+}
+
+static bool genArithOpInt(CompilationUnit *cUnit, MIR *mir, int vDest,
+                          int vSrc1, int vSrc2)
+{
+    int armOp = ARMV5TE_BKPT;
+    bool callOut = false;
+    bool checkZero = false;
+    int retReg = r0;
+    void *callTgt;
+
+    /* TODO - find proper .h file to declare these */
+    int __aeabi_idivmod(int op1, int op2);
+    int __aeabi_idiv(int op1, int op2);
+
+    switch (mir->dalvikInsn.opCode) {
+        case OP_NEG_INT:
+            armOp = ARMV5TE_NEG;
+            break;
+        case OP_NOT_INT:
+            armOp = ARMV5TE_MVN;
+            break;
+        case OP_ADD_INT:
+        case OP_ADD_INT_2ADDR:
+            armOp = ARMV5TE_ADD_RRR;
+            break;
+        case OP_SUB_INT:
+        case OP_SUB_INT_2ADDR:
+            armOp = ARMV5TE_SUB_RRR;
+            break;
+        case OP_MUL_INT:
+        case OP_MUL_INT_2ADDR:
+            armOp = ARMV5TE_MUL;
+            break;
+        case OP_DIV_INT:
+        case OP_DIV_INT_2ADDR:
+            callOut = true;
+            checkZero = true;
+            callTgt = __aeabi_idiv;
+            retReg = r0;
+            break;
+        /* NOTE: returns in r1 */
+        case OP_REM_INT:
+        case OP_REM_INT_2ADDR:
+            callOut = true;
+            checkZero = true;
+            callTgt = __aeabi_idivmod;
+            retReg = r1;
+            break;
+        case OP_AND_INT:
+        case OP_AND_INT_2ADDR:
+            armOp = ARMV5TE_AND_RR;
+            break;
+        case OP_OR_INT:
+        case OP_OR_INT_2ADDR:
+            armOp = ARMV5TE_ORR;
+            break;
+        case OP_XOR_INT:
+        case OP_XOR_INT_2ADDR:
+            armOp = ARMV5TE_EOR;
+            break;
+        case OP_SHL_INT:
+        case OP_SHL_INT_2ADDR:
+            armOp = ARMV5TE_LSLV;
+            break;
+        case OP_SHR_INT:
+        case OP_SHR_INT_2ADDR:
+            armOp = ARMV5TE_ASRV;
+            break;
+        case OP_USHR_INT:
+        case OP_USHR_INT_2ADDR:
+            armOp = ARMV5TE_LSRV;
+            break;
+        default:
+            LOGE("Invalid word arith op: 0x%x(%d)",
+                 mir->dalvikInsn.opCode, mir->dalvikInsn.opCode);
+            dvmAbort();
+    }
+    if (!callOut) {
+        loadValue(cUnit, vSrc1, r0);
+        loadValue(cUnit, vSrc2, r1);
+        genBinaryOp(cUnit, vDest, armOp);
+    } else {
+        loadValue(cUnit, vSrc2, r1);
+        loadConstant(cUnit, r2, (int) callTgt);
+        loadValue(cUnit, vSrc1, r0);
+        if (checkZero) {
+            genNullCheck(cUnit, r1, mir->offset, NULL);
+        }
+        newLIR1(cUnit, ARMV5TE_BLX_R, r2);
+        storeValue(cUnit, retReg, vDest, r2);
+    }
+    return false;
+}
+
+static bool genArithOp(CompilationUnit *cUnit, MIR *mir)
+{
+    OpCode opCode = mir->dalvikInsn.opCode;
+    int vA = mir->dalvikInsn.vA;
+    int vB = mir->dalvikInsn.vB;
+    int vC = mir->dalvikInsn.vC;
+
+    if ((opCode >= OP_ADD_LONG_2ADDR) && (opCode <= OP_XOR_LONG_2ADDR)) {
+        return genArithOpLong(cUnit,mir, vA, vA, vB);
+    }
+    if ((opCode >= OP_ADD_LONG) && (opCode <= OP_XOR_LONG)) {
+        return genArithOpLong(cUnit,mir, vA, vB, vC);
+    }
+    if ((opCode >= OP_SHL_LONG_2ADDR) && (opCode <= OP_USHR_LONG_2ADDR)) {
+        return genShiftOpLong(cUnit,mir, vA, vA, vB);
+    }
+    if ((opCode >= OP_SHL_LONG) && (opCode <= OP_USHR_LONG)) {
+        return genShiftOpLong(cUnit,mir, vA, vB, vC);
+    }
+    if ((opCode >= OP_ADD_INT_2ADDR) && (opCode <= OP_USHR_INT_2ADDR)) {
+        return genArithOpInt(cUnit,mir, vA, vA, vB);
+    }
+    if ((opCode >= OP_ADD_INT) && (opCode <= OP_USHR_INT)) {
+        return genArithOpInt(cUnit,mir, vA, vB, vC);
+    }
+    if ((opCode >= OP_ADD_FLOAT_2ADDR) && (opCode <= OP_REM_FLOAT_2ADDR)) {
+        return genArithOpFloat(cUnit,mir, vA, vA, vB);
+    }
+    if ((opCode >= OP_ADD_FLOAT) && (opCode <= OP_REM_FLOAT)) {
+        return genArithOpFloat(cUnit,mir, vA, vB, vC);
+    }
+    if ((opCode >= OP_ADD_DOUBLE_2ADDR) && (opCode <= OP_REM_DOUBLE_2ADDR)) {
+        return genArithOpDouble(cUnit,mir, vA, vA, vB);
+    }
+    if ((opCode >= OP_ADD_DOUBLE) && (opCode <= OP_REM_DOUBLE)) {
+        return genArithOpDouble(cUnit,mir, vA, vB, vC);
+    }
+    return true;
+}
+
+static bool genConversion(CompilationUnit *cUnit, MIR *mir, void *funct,
+                          int srcSize, int tgtSize)
+{
+    loadConstant(cUnit, r2, (int)funct);
+    if (srcSize == 1) {
+        loadValue(cUnit, mir->dalvikInsn.vB, r0);
+    } else {
+        loadValuePair(cUnit, mir->dalvikInsn.vB, r0, r1);
+    }
+    newLIR1(cUnit, ARMV5TE_BLX_R, r2);
+    if (tgtSize == 1) {
+        storeValue(cUnit, r0, mir->dalvikInsn.vA, r1);
+    } else {
+        storeValuePair(cUnit, r0, r1, mir->dalvikInsn.vA, r2);
+    }
+    return false;
+}
+
+/* Experimental example of completely inlining a native replacement */
+static bool genInlinedStringLength(CompilationUnit *cUnit, MIR *mir)
+{
+    int offset = (int) &((InterpState *) NULL)->retval;
+    DecodedInstruction *dInsn = &mir->dalvikInsn;
+    assert(dInsn->vA == 1);
+    loadValue(cUnit, dInsn->arg[0], r0);
+    loadConstant(cUnit, r1, gDvm.offJavaLangString_count);
+    genNullCheck(cUnit, r0, mir->offset, NULL);
+    newLIR3(cUnit, ARMV5TE_LDR_RRR, r0, r0, r1);
+    newLIR3(cUnit, ARMV5TE_STR_RRI5, r0, rGLUE, offset >> 2);
+    return false;
+}
+
+static void genProcessArgsNoRange(CompilationUnit *cUnit, MIR *mir,
+                                  DecodedInstruction *dInsn,
+                                  Armv5teLIR **pcrLabel)
+{
+    unsigned int i;
+    unsigned int regMask = 0;
+
+    /* Load arguments to r0..r4 */
+    for (i = 0; i < dInsn->vA; i++) {
+        regMask |= 1 << i;
+        loadValue(cUnit, dInsn->arg[i], i);
+    }
+    if (regMask) {
+        /* Up to 5 args are pushed on top of FP - sizeofStackSaveArea */
+        newLIR2(cUnit, ARMV5TE_MOV_RR, r7, rFP);
+        newLIR2(cUnit, ARMV5TE_SUB_RI8, r7,
+                sizeof(StackSaveArea) + (dInsn->vA << 2));
+        /* generate null check */
+        if (pcrLabel) {
+            *pcrLabel = genNullCheck(cUnit, r0, mir->offset, NULL);
+        }
+        newLIR2(cUnit, ARMV5TE_STMIA, r7, regMask);
+    }
+}
+
+static void genProcessArgsRange(CompilationUnit *cUnit, MIR *mir,
+                                DecodedInstruction *dInsn,
+                                Armv5teLIR **pcrLabel)
+{
+    int srcOffset = dInsn->vC << 2;
+    int numArgs = dInsn->vA;
+    int regMask;
+    /*
+     * r4PC     : &rFP[vC]
+     * r7: &newFP[0]
+     */
+    if (srcOffset < 8) {
+        newLIR3(cUnit, ARMV5TE_ADD_RRI3, r4PC, rFP, srcOffset);
+    } else {
+        loadConstant(cUnit, r4PC, srcOffset);
+        newLIR3(cUnit, ARMV5TE_ADD_RRR, r4PC, rFP, r4PC);
+    }
+    /* load [r0 .. min(numArgs,4)] */
+    regMask = (1 << ((numArgs < 4) ? numArgs : 4)) - 1;
+    newLIR2(cUnit, ARMV5TE_LDMIA, r4PC, regMask);
+
+    if (sizeof(StackSaveArea) + (numArgs << 2) < 256) {
+        newLIR2(cUnit, ARMV5TE_MOV_RR, r7, rFP);
+        newLIR2(cUnit, ARMV5TE_SUB_RI8, r7,
+                sizeof(StackSaveArea) + (numArgs << 2));
+    } else {
+        loadConstant(cUnit, r7, sizeof(StackSaveArea) + (numArgs << 2));
+        newLIR3(cUnit, ARMV5TE_SUB_RRR, r7, rFP, r7);
+    }
+
+    /* generate null check */
+    if (pcrLabel) {
+        *pcrLabel = genNullCheck(cUnit, r0, mir->offset, NULL);
+    }
+
+    /*
+     * Handle remaining 4n arguments:
+     * store previously loaded 4 values and load the next 4 values
+     */
+    if (numArgs >= 8) {
+        Armv5teLIR *loopLabel = NULL;
+        /*
+         * r0 contains "this" and it will be used later, so push it to the stack
+         * first. Pushing r5 is just for stack alignment purposes.
+         */
+        newLIR1(cUnit, ARMV5TE_PUSH, 1 << r0 | 1 << 5);
+        /* No need to generate the loop structure if numArgs <= 11 */
+        if (numArgs > 11) {
+            loadConstant(cUnit, 5, ((numArgs - 4) >> 2) << 2);
+            loopLabel = newLIR0(cUnit, ARMV5TE_PSEUDO_TARGET_LABEL);
+        }
+        newLIR2(cUnit, ARMV5TE_STMIA, r7, regMask);
+        newLIR2(cUnit, ARMV5TE_LDMIA, r4PC, regMask);
+        /* No need to generate the loop structure if numArgs <= 11 */
+        if (numArgs > 11) {
+            newLIR2(cUnit, ARMV5TE_SUB_RI8, 5, 4);
+            genConditionalBranch(cUnit, ARM_COND_NE, loopLabel);
+        }
+    }
+
+    /* Save the last batch of loaded values */
+    newLIR2(cUnit, ARMV5TE_STMIA, r7, regMask);
+
+    /* Generate the loop epilogue - don't use r0 */
+    if ((numArgs > 4) && (numArgs % 4)) {
+        regMask = ((1 << (numArgs & 0x3)) - 1) << 1;
+        newLIR2(cUnit, ARMV5TE_LDMIA, r4PC, regMask);
+    }
+    if (numArgs >= 8)
+        newLIR1(cUnit, ARMV5TE_POP, 1 << r0 | 1 << 5);
+
+    /* Save the modulo 4 arguments */
+    if ((numArgs > 4) && (numArgs % 4)) {
+        newLIR2(cUnit, ARMV5TE_STMIA, r7, regMask);
+    }
+}
+
+static void genInvokeCommon(CompilationUnit *cUnit, MIR *mir, BasicBlock *bb,
+                            Armv5teLIR *labelList, Armv5teLIR *pcrLabel,
+                            const Method *calleeMethod)
+{
+    Armv5teLIR *retChainingCell = &labelList[bb->fallThrough->id];
+
+    /* r1 = &retChainingCell */
+    Armv5teLIR *addrRetChain = newLIR2(cUnit, ARMV5TE_ADD_PC_REL,
+                                           r1, 0);
+    /* r4PC = dalvikCallsite */
+    loadConstant(cUnit, r4PC,
+                 (int) (cUnit->method->insns + mir->offset));
+    addrRetChain->generic.target = (LIR *) retChainingCell;
+    /*
+     * r0 = calleeMethod (loaded upon calling genInvokeCommon)
+     * r1 = &ChainingCell
+     * r4PC = callsiteDPC
+     */
+    if (dvmIsNativeMethod(calleeMethod)) {
+        genDispatchToHandler(cUnit, TEMPLATE_INVOKE_METHOD_NO_OPT);
+#if defined(INVOKE_STATS)
+        gDvmJit.invokeNoOpt++;
+#endif
+    } else {
+        genDispatchToHandler(cUnit, TEMPLATE_INVOKE_METHOD_CHAIN);
+#if defined(INVOKE_STATS)
+        gDvmJit.invokeChain++;
+#endif
+        genUnconditionalBranch(cUnit, &labelList[bb->taken->id]);
+    }
+    /* Handle exceptions using the interpreter */
+    genTrap(cUnit, mir->offset, pcrLabel);
+}
+
+/* Geneate a branch to go back to the interpreter */
+static void genPuntToInterp(CompilationUnit *cUnit, unsigned int offset)
+{
+    /* r0 = dalvik pc */
+    loadConstant(cUnit, r0, (int) (cUnit->method->insns + offset));
+    newLIR3(cUnit, ARMV5TE_LDR_RRI5, r1, rGLUE,
+            offsetof(InterpState, jitToInterpEntries.dvmJitToInterpPunt) >> 2);
+    newLIR1(cUnit, ARMV5TE_BLX_R, r1);
+}
+
+/*
+ * Attempt to single step one instruction using the interpreter and return
+ * to the compiled code for the next Dalvik instruction
+ */
+static void genInterpSingleStep(CompilationUnit *cUnit, MIR *mir)
+{
+    int flags = dexGetInstrFlags(gDvm.instrFlags, mir->dalvikInsn.opCode);
+    int flagsToCheck = kInstrCanBranch | kInstrCanSwitch | kInstrCanReturn |
+                       kInstrCanThrow;
+    if ((mir->next == NULL) || (flags & flagsToCheck)) {
+       genPuntToInterp(cUnit, mir->offset);
+       return;
+    }
+    int entryAddr = offsetof(InterpState,
+                             jitToInterpEntries.dvmJitToInterpSingleStep);
+    newLIR3(cUnit, ARMV5TE_LDR_RRI5, r2, rGLUE, entryAddr >> 2);
+    /* r0 = dalvik pc */
+    loadConstant(cUnit, r0, (int) (cUnit->method->insns + mir->offset));
+    /* r1 = dalvik pc of following instruction */
+    loadConstant(cUnit, r1, (int) (cUnit->method->insns + mir->next->offset));
+    newLIR1(cUnit, ARMV5TE_BLX_R, r2);
+}
+
+
+/*****************************************************************************/
+/*
+ * The following are the first-level codegen routines that analyze the format
+ * of each bytecode then either dispatch special purpose codegen routines
+ * or produce corresponding Thumb instructions directly.
+ */
+
+static bool handleFmt10t_Fmt20t_Fmt30t(CompilationUnit *cUnit, MIR *mir,
+                                       BasicBlock *bb, Armv5teLIR *labelList)
+{
+    /* For OP_GOTO, OP_GOTO_16, and OP_GOTO_32 */
+    genUnconditionalBranch(cUnit, &labelList[bb->taken->id]);
+    return false;
+}
+
+static bool handleFmt10x(CompilationUnit *cUnit, MIR *mir)
+{
+    OpCode dalvikOpCode = mir->dalvikInsn.opCode;
+    if (((dalvikOpCode >= OP_UNUSED_3E) && (dalvikOpCode <= OP_UNUSED_43)) ||
+        ((dalvikOpCode >= OP_UNUSED_E3) && (dalvikOpCode <= OP_UNUSED_EC))) {
+        LOGE("Codegen: got unused opcode 0x%x\n",dalvikOpCode);
+        return true;
+    }
+    switch (dalvikOpCode) {
+        case OP_RETURN_VOID:
+            genReturnCommon(cUnit,mir);
+            break;
+        case OP_UNUSED_73:
+        case OP_UNUSED_79:
+        case OP_UNUSED_7A:
+            LOGE("Codegen: got unused opcode 0x%x\n",dalvikOpCode);
+            return true;
+        case OP_NOP:
+            break;
+        default:
+            return true;
+    }
+    return false;
+}
+
+static bool handleFmt11n_Fmt31i(CompilationUnit *cUnit, MIR *mir)
+{
+    switch (mir->dalvikInsn.opCode) {
+        case OP_CONST:
+        case OP_CONST_4:
+            loadConstant(cUnit, r0, mir->dalvikInsn.vB);
+            storeValue(cUnit, r0, mir->dalvikInsn.vA, r1);
+            break;
+        case OP_CONST_WIDE_32:
+            loadConstant(cUnit, r0, mir->dalvikInsn.vB);
+            newLIR3(cUnit, ARMV5TE_ASR, r1, r0, 31);
+            storeValuePair(cUnit, r0, r1, mir->dalvikInsn.vA, r2);
+            break;
+        default:
+            return true;
+    }
+    return false;
+}
+
+static bool handleFmt21h(CompilationUnit *cUnit, MIR *mir)
+{
+    switch (mir->dalvikInsn.opCode) {
+        case OP_CONST_HIGH16:
+            loadConstant(cUnit, r0, mir->dalvikInsn.vB << 16);
+            storeValue(cUnit, r0, mir->dalvikInsn.vA, r1);
+            break;
+        case OP_CONST_WIDE_HIGH16:
+            loadConstant(cUnit, r1, mir->dalvikInsn.vB << 16);
+            loadConstant(cUnit, r0, 0);
+            storeValuePair(cUnit, r0, r1, mir->dalvikInsn.vA, r2);
+            break;
+        default:
+            return true;
+    }
+    return false;
+}
+
+static bool handleFmt20bc(CompilationUnit *cUnit, MIR *mir)
+{
+    /* For OP_THROW_VERIFICATION_ERROR */
+    genInterpSingleStep(cUnit, mir);
+    return false;
+}
+
+static bool handleFmt21c_Fmt31c(CompilationUnit *cUnit, MIR *mir)
+{
+    switch (mir->dalvikInsn.opCode) {
+        /*
+         * TODO: Verify that we can ignore the resolution check here because
+         * it will have already successfully been interpreted once
+         */
+        case OP_CONST_STRING_JUMBO:
+        case OP_CONST_STRING: {
+            void *strPtr = (void*)
+              (cUnit->method->clazz->pDvmDex->pResStrings[mir->dalvikInsn.vB]);
+            assert(strPtr != NULL);
+            loadConstant(cUnit, r0, (int) strPtr );
+            storeValue(cUnit, r0, mir->dalvikInsn.vA, r1);
+            break;
+        }
+        /*
+         * TODO: Verify that we can ignore the resolution check here because
+         * it will have already successfully been interpreted once
+         */
+        case OP_CONST_CLASS: {
+            void *classPtr = (void*)
+              (cUnit->method->clazz->pDvmDex->pResClasses[mir->dalvikInsn.vB]);
+            assert(classPtr != NULL);
+            loadConstant(cUnit, r0, (int) classPtr );
+            storeValue(cUnit, r0, mir->dalvikInsn.vA, r1);
+            break;
+        }
+        case OP_SGET_OBJECT:
+        case OP_SGET_BOOLEAN:
+        case OP_SGET_CHAR:
+        case OP_SGET_BYTE:
+        case OP_SGET_SHORT:
+        case OP_SGET: {
+            int valOffset = (int)&((struct StaticField*)NULL)->value;
+            void *fieldPtr = (void*)
+              (cUnit->method->clazz->pDvmDex->pResFields[mir->dalvikInsn.vB]);
+            assert(fieldPtr != NULL);
+            loadConstant(cUnit, r0,  (int) fieldPtr + valOffset);
+            newLIR3(cUnit, ARMV5TE_LDR_RRI5, r0, r0, 0);
+            storeValue(cUnit, r0, mir->dalvikInsn.vA, r2);
+            break;
+        }
+        case OP_SGET_WIDE: {
+            int valOffset = (int)&((struct StaticField*)NULL)->value;
+            void *fieldPtr = (void*)
+              (cUnit->method->clazz->pDvmDex->pResFields[mir->dalvikInsn.vB]);
+            assert(fieldPtr != NULL);
+            loadConstant(cUnit, r2,  (int) fieldPtr + valOffset);
+            newLIR2(cUnit, ARMV5TE_LDMIA, r2, (1<<r0 | 1<<r1));
+            storeValuePair(cUnit, r0, r1, mir->dalvikInsn.vA, r2);
+            break;
+        }
+        case OP_SPUT_OBJECT:
+        case OP_SPUT_BOOLEAN:
+        case OP_SPUT_CHAR:
+        case OP_SPUT_BYTE:
+        case OP_SPUT_SHORT:
+        case OP_SPUT: {
+            int valOffset = (int)&((struct StaticField*)NULL)->value;
+            void *fieldPtr = (void*)
+              (cUnit->method->clazz->pDvmDex->pResFields[mir->dalvikInsn.vB]);
+            assert(fieldPtr != NULL);
+            loadValue(cUnit, mir->dalvikInsn.vA, r0);
+            loadConstant(cUnit, r1,  (int) fieldPtr + valOffset);
+            newLIR3(cUnit, ARMV5TE_STR_RRI5, r0, r1, 0);
+            break;
+        }
+        case OP_SPUT_WIDE: {
+            int valOffset = (int)&((struct StaticField*)NULL)->value;
+            void *fieldPtr = (void*)
+              (cUnit->method->clazz->pDvmDex->pResFields[mir->dalvikInsn.vB]);
+            assert(fieldPtr != NULL);
+            loadValuePair(cUnit, mir->dalvikInsn.vA, r0, r1);
+            loadConstant(cUnit, r2,  (int) fieldPtr + valOffset);
+            newLIR2(cUnit, ARMV5TE_STMIA, r2, (1<<r0 | 1<<r1));
+            break;
+        }
+        case OP_NEW_INSTANCE: {
+            ClassObject *classPtr = (void*)
+              (cUnit->method->clazz->pDvmDex->pResClasses[mir->dalvikInsn.vB]);
+            assert(classPtr != NULL);
+            assert(classPtr->status & CLASS_INITIALIZED);
+            if ((classPtr->accessFlags & (ACC_INTERFACE|ACC_ABSTRACT)) != 0) {
+                /* It's going to throw, just let the interp. deal with it. */
+                genInterpSingleStep(cUnit, mir);
+                return false;
+            }
+            loadConstant(cUnit, r0, (int) classPtr);
+            loadConstant(cUnit, r4PC, (int)dvmAllocObject);
+            genExportPC(cUnit, mir, r2, r3 );
+            loadConstant(cUnit, r1, ALLOC_DONT_TRACK);
+            newLIR1(cUnit, ARMV5TE_BLX_R, r4PC);
+            /*
+             * TODO: As coded, we'll bail and reinterpret on alloc failure.
+             * Need a general mechanism to bail to thrown exception code.
+             */
+            genNullCheck(cUnit, r0, mir->offset, NULL);
+            storeValue(cUnit, r0, mir->dalvikInsn.vA, r1);
+            break;
+        }
+        case OP_CHECK_CAST: {
+            ClassObject *classPtr =
+              (cUnit->method->clazz->pDvmDex->pResClasses[mir->dalvikInsn.vB]);
+            loadConstant(cUnit, r1, (int) classPtr );
+            loadValue(cUnit, mir->dalvikInsn.vA, r0);  /* Ref */
+            /*
+             * TODO - in theory classPtr should be resoved by the time this
+             * instruction made into a trace, but we are seeing NULL at runtime
+             * so this check is temporarily used as a workaround.
+             */
+            Armv5teLIR * pcrLabel = genNullCheck(cUnit, r1, mir->offset, NULL);
+            newLIR2(cUnit, ARMV5TE_CMP_RI8, r0, 0);    /* Null? */
+            Armv5teLIR *branch1 =
+                newLIR2(cUnit, ARMV5TE_B_COND, 4, ARM_COND_EQ);
+            /* r0 now contains object->clazz */
+            newLIR3(cUnit, ARMV5TE_LDR_RRI5, r0, r0,
+                    offsetof(Object, clazz) >> 2);
+            loadConstant(cUnit, r4PC, (int)dvmInstanceofNonTrivial);
+            newLIR2(cUnit, ARMV5TE_CMP_RR, r0, r1);
+            Armv5teLIR *branch2 =
+                newLIR2(cUnit, ARMV5TE_B_COND, 2, ARM_COND_EQ);
+            newLIR1(cUnit, ARMV5TE_BLX_R, r4PC);
+            /* check cast failed - punt to the interpreter */
+            genNullCheck(cUnit, r0, mir->offset, pcrLabel);
+            /* check cast passed - branch target here */
+            Armv5teLIR *target = newLIR0(cUnit, ARMV5TE_PSEUDO_TARGET_LABEL);
+            branch1->generic.target = (LIR *)target;
+            branch2->generic.target = (LIR *)target;
+            break;
+        }
+        default:
+            return true;
+    }
+    return false;
+}
+
+static bool handleFmt11x(CompilationUnit *cUnit, MIR *mir)
+{
+    OpCode dalvikOpCode = mir->dalvikInsn.opCode;
+    switch (dalvikOpCode) {
+        case OP_MOVE_EXCEPTION: {
+            int offset = offsetof(InterpState, self);
+            int exOffset = offsetof(Thread, exception);
+            newLIR3(cUnit, ARMV5TE_LDR_RRI5, r0, rGLUE, offset >> 2);
+            newLIR3(cUnit, ARMV5TE_LDR_RRI5, r1, r0, exOffset >> 2);
+            storeValue(cUnit, r1, mir->dalvikInsn.vA, r0);
+           break;
+        }
+        case OP_MOVE_RESULT:
+        case OP_MOVE_RESULT_OBJECT: {
+            int offset = offsetof(InterpState, retval);
+            newLIR3(cUnit, ARMV5TE_LDR_RRI5, r0, rGLUE, offset >> 2);
+            storeValue(cUnit, r0, mir->dalvikInsn.vA, r1);
+            break;
+        }
+        case OP_MOVE_RESULT_WIDE: {
+            int offset = offsetof(InterpState, retval);
+            newLIR3(cUnit, ARMV5TE_LDR_RRI5, r0, rGLUE, offset >> 2);
+            newLIR3(cUnit, ARMV5TE_LDR_RRI5, r1, rGLUE, (offset >> 2)+1);
+            storeValuePair(cUnit, r0, r1, mir->dalvikInsn.vA, r2);
+            break;
+        }
+        case OP_RETURN_WIDE: {
+            loadValuePair(cUnit, mir->dalvikInsn.vA, r0, r1);
+            int offset = offsetof(InterpState, retval);
+            newLIR3(cUnit, ARMV5TE_STR_RRI5, r0, rGLUE, offset >> 2);
+            newLIR3(cUnit, ARMV5TE_STR_RRI5, r1, rGLUE, (offset >> 2)+1);
+            genReturnCommon(cUnit,mir);
+            break;
+        }
+        case OP_RETURN:
+        case OP_RETURN_OBJECT: {
+            loadValue(cUnit, mir->dalvikInsn.vA, r0);
+            int offset = offsetof(InterpState, retval);
+            newLIR3(cUnit, ARMV5TE_STR_RRI5, r0, rGLUE, offset >> 2);
+            genReturnCommon(cUnit,mir);
+            break;
+        }
+        /*
+         * TODO-VERIFY: May be playing a bit fast and loose here.  As coded,
+         * a failure on lock/unlock will cause us to revert to the interpeter
+         * to try again. This means we essentially ignore the first failure on
+         * the assumption that the interpreter will correctly handle the 2nd.
+         */
+        case OP_MONITOR_ENTER:
+        case OP_MONITOR_EXIT: {
+            int offset = offsetof(InterpState, self);
+            loadValue(cUnit, mir->dalvikInsn.vA, r1);
+            newLIR3(cUnit, ARMV5TE_LDR_RRI5, r0, rGLUE, offset >> 2);
+            if (dalvikOpCode == OP_MONITOR_ENTER) {
+                loadConstant(cUnit, r2, (int)dvmLockObject);
+            } else {
+                loadConstant(cUnit, r2, (int)dvmUnlockObject);
+            }
+          /*
+           * TODO-VERIFY: Note that we're not doing an EXPORT_PC, as
+           * Lock/unlock won't throw, and this code does not support
+           * DEADLOCK_PREDICTION or MONITOR_TRACKING.  Should it?
+           */
+            genNullCheck(cUnit, r1, mir->offset, NULL);
+            /* Do the call */
+            newLIR1(cUnit, ARMV5TE_BLX_R, r2);
+            break;
+        }
+        case OP_THROW: {
+            genInterpSingleStep(cUnit, mir);
+            break;
+        }
+        default:
+            return true;
+    }
+    return false;
+}
+
+static bool handleFmt12x(CompilationUnit *cUnit, MIR *mir)
+{
+    OpCode opCode = mir->dalvikInsn.opCode;
+    int vSrc1Dest = mir->dalvikInsn.vA;
+    int vSrc2 = mir->dalvikInsn.vB;
+
+    /* TODO - find the proper include file to declare these */
+    float  __aeabi_i2f(  int op1 );
+    int    __aeabi_f2iz( float op1 );
+    float  __aeabi_d2f(  double op1 );
+    double __aeabi_f2d(  float op1 );
+    double __aeabi_i2d(  int op1 );
+    int    __aeabi_d2iz( double op1 );
+    long   __aeabi_f2lz( float op1 );
+    float  __aeabi_l2f(  long op1 );
+    long   __aeabi_d2lz( double op1 );
+    double __aeabi_l2d(  long op1 );
+
+    if ( (opCode >= OP_ADD_INT_2ADDR) && (opCode <= OP_REM_DOUBLE_2ADDR)) {
+        return genArithOp( cUnit, mir );
+    }
+
+    switch (opCode) {
+        case OP_INT_TO_FLOAT:
+            return genConversion(cUnit, mir, (void*)__aeabi_i2f, 1, 1);
+        case OP_FLOAT_TO_INT:
+            return genConversion(cUnit, mir, (void*)__aeabi_f2iz, 1, 1);
+        case OP_DOUBLE_TO_FLOAT:
+            return genConversion(cUnit, mir, (void*)__aeabi_d2f, 2, 1);
+        case OP_FLOAT_TO_DOUBLE:
+            return genConversion(cUnit, mir, (void*)__aeabi_f2d, 1, 2);
+        case OP_INT_TO_DOUBLE:
+            return genConversion(cUnit, mir, (void*)__aeabi_i2d, 1, 2);
+        case OP_DOUBLE_TO_INT:
+            return genConversion(cUnit, mir, (void*)__aeabi_d2iz, 2, 1);
+        case OP_FLOAT_TO_LONG:
+            return genConversion(cUnit, mir, (void*)__aeabi_f2lz, 1, 2);
+        case OP_LONG_TO_FLOAT:
+            return genConversion(cUnit, mir, (void*)__aeabi_l2f, 2, 1);
+        case OP_DOUBLE_TO_LONG:
+            return genConversion(cUnit, mir, (void*)__aeabi_d2lz, 2, 2);
+        case OP_LONG_TO_DOUBLE:
+            return genConversion(cUnit, mir, (void*)__aeabi_l2d, 2, 2);
+        case OP_NEG_INT:
+        case OP_NOT_INT:
+            return genArithOpInt(cUnit, mir, vSrc1Dest, vSrc1Dest, vSrc2);
+        case OP_NEG_LONG:
+        case OP_NOT_LONG:
+            return genArithOpLong(cUnit,mir, vSrc1Dest, vSrc1Dest, vSrc2);
+        case OP_NEG_FLOAT:
+            return genArithOpFloat(cUnit,mir,vSrc1Dest,vSrc1Dest,vSrc2);
+        case OP_NEG_DOUBLE:
+            return genArithOpDouble(cUnit,mir,vSrc1Dest,vSrc1Dest,vSrc2);
+        case OP_MOVE_WIDE:
+            loadValuePair(cUnit, mir->dalvikInsn.vB, r0, r1);
+            storeValuePair(cUnit, r0, r1, mir->dalvikInsn.vA, r2);
+            break;
+        case OP_INT_TO_LONG:
+            loadValue(cUnit, mir->dalvikInsn.vB, r0);
+            newLIR3(cUnit, ARMV5TE_ASR, r1, r0, 31);
+            storeValuePair(cUnit, r0, r1, mir->dalvikInsn.vA, r2);
+            break;
+        case OP_MOVE:
+        case OP_MOVE_OBJECT:
+        case OP_LONG_TO_INT:
+            loadValue(cUnit, vSrc2, r0);
+            storeValue(cUnit, r0, vSrc1Dest, r1);
+            break;
+        case OP_INT_TO_BYTE:
+            loadValue(cUnit, vSrc2, r0);
+            newLIR3(cUnit, ARMV5TE_LSL, r0, r0, 24);
+            newLIR3(cUnit, ARMV5TE_ASR, r0, r0, 24);
+            storeValue(cUnit, r0, vSrc1Dest, r1);
+            break;
+        case OP_INT_TO_SHORT:
+            loadValue(cUnit, vSrc2, r0);
+            newLIR3(cUnit, ARMV5TE_LSL, r0, r0, 16);
+            newLIR3(cUnit, ARMV5TE_ASR, r0, r0, 16);
+            storeValue(cUnit, r0, vSrc1Dest, r1);
+            break;
+        case OP_INT_TO_CHAR:
+            loadValue(cUnit, vSrc2, r0);
+            newLIR3(cUnit, ARMV5TE_LSL, r0, r0, 16);
+            newLIR3(cUnit, ARMV5TE_LSR, r0, r0, 16);
+            storeValue(cUnit, r0, vSrc1Dest, r1);
+            break;
+        case OP_ARRAY_LENGTH: {
+            int lenOffset = offsetof(ArrayObject, length);
+            loadValue(cUnit, vSrc2, r0);
+            genNullCheck(cUnit, r0, mir->offset, NULL);
+            newLIR3(cUnit, ARMV5TE_LDR_RRI5, r0, r0, lenOffset >> 2);
+            storeValue(cUnit, r0, vSrc1Dest, r1);
+            break;
+        }
+        default:
+            return true;
+    }
+    return false;
+}
+
+static bool handleFmt21s(CompilationUnit *cUnit, MIR *mir)
+{
+    OpCode dalvikOpCode = mir->dalvikInsn.opCode;
+    /* It takes few instructions to handle OP_CONST_WIDE_16 inline */
+    if (dalvikOpCode == OP_CONST_WIDE_16) {
+        int rDest = mir->dalvikInsn.vA;
+        int BBBB = mir->dalvikInsn.vB;
+        int rLow = r0, rHigh = r1;
+        if (BBBB == 0) {
+            newLIR2(cUnit, ARMV5TE_MOV_IMM, rLow, 0);
+            rHigh = rLow;
+        } else if (BBBB > 0 && BBBB <= 255) {
+            /* rLow = ssssBBBB */
+            newLIR2(cUnit, ARMV5TE_MOV_IMM, rLow, BBBB);
+            /* rHigh = 0 */
+            newLIR2(cUnit, ARMV5TE_MOV_IMM, rHigh, 0);
+        } else {
+            loadConstant(cUnit, rLow, BBBB);
+            /*
+             * arithmetic-shift-right 32 bits to get the high half of long
+             * [63..32]
+             */
+            newLIR3(cUnit, ARMV5TE_ASR, rHigh, rLow, 0);
+        }
+
+        /* Save the long values to the specified Dalvik register pair */
+        /*
+         * If rDest is no greater than 30, use two "str rd, [rFP + immed_5]"
+         * instructions to store the results. Effective address is
+         * rFP + immed_5 << 2.
+         */
+        if (rDest < 31) {
+            newLIR3(cUnit, ARMV5TE_STR_RRI5, rLow, rFP, rDest);
+            newLIR3(cUnit, ARMV5TE_STR_RRI5, rHigh, rFP, rDest+1);
+        } else {
+          /*
+           * Otherwise just load the frame offset from the constant pool and add
+           * it to rFP. Then use stmia to store the results to the specified
+           * register pair.
+           */
+            /* Need to replicate the content in r0 to r1 */
+            if (rLow == rHigh) {
+                newLIR3(cUnit, ARMV5TE_ADD_RRI3, rLow+1, rLow, 0);
+            }
+            /* load the rFP offset into r2 */
+            loadConstant(cUnit, r2, rDest*4);
+            newLIR3(cUnit, ARMV5TE_ADD_RRR, r2, rFP, r2);
+            newLIR2(cUnit, ARMV5TE_STMIA, r2, (1<<r0 | 1 << r1));
+        }
+    } else if (dalvikOpCode == OP_CONST_16) {
+        int rDest = mir->dalvikInsn.vA;
+        int BBBB = mir->dalvikInsn.vB;
+        if (BBBB >= 0 && BBBB <= 255) {
+            /* r0 = BBBB */
+            newLIR2(cUnit, ARMV5TE_MOV_IMM, r0, BBBB);
+        } else {
+            loadConstant(cUnit, r0, BBBB);
+        }
+
+        /* Save the constant to the specified Dalvik register */
+        /*
+         * If rDest is no greater than 31, effective address is
+         * rFP + immed_5 << 2.
+         */
+        if (rDest < 32) {
+            newLIR3(cUnit, ARMV5TE_STR_RRI5, r0, rFP, rDest);
+        } else {
+          /*
+           * Otherwise just load the frame offset from the constant pool and add
+           * it to rFP. Then use stmia to store the results to the specified
+           * register pair.
+           */
+            /* load the rFP offset into r2 */
+            loadConstant(cUnit, r2, rDest*4);
+            newLIR3(cUnit, ARMV5TE_ADD_RRR, r2, rFP, r2);
+            newLIR3(cUnit, ARMV5TE_STR_RRI5, r0, r2, 0);
+        }
+    } else {
+        return true;
+    }
+    return false;
+}
+
+/* Compare agaist zero */
+static bool handleFmt21t(CompilationUnit *cUnit, MIR *mir, BasicBlock *bb,
+                         Armv5teLIR *labelList)
+{
+    OpCode dalvikOpCode = mir->dalvikInsn.opCode;
+    Armv5teConditionCode cond;
+
+    loadValue(cUnit, mir->dalvikInsn.vA, r0);
+    newLIR2(cUnit, ARMV5TE_CMP_RI8, r0, 0);
+
+    switch (dalvikOpCode) {
+        case OP_IF_EQZ:
+            cond = ARM_COND_EQ;
+            break;
+        case OP_IF_NEZ:
+            cond = ARM_COND_NE;
+            break;
+        case OP_IF_LTZ:
+            cond = ARM_COND_LT;
+            break;
+        case OP_IF_GEZ:
+            cond = ARM_COND_GE;
+            break;
+        case OP_IF_GTZ:
+            cond = ARM_COND_GT;
+            break;
+        case OP_IF_LEZ:
+            cond = ARM_COND_LE;
+            break;
+        default:
+            cond = 0;
+            LOGE("Unexpected opcode (%d) for Fmt21t\n", dalvikOpCode);
+            dvmAbort();
+    }
+    genConditionalBranch(cUnit, cond, &labelList[bb->taken->id]);
+    /* This mostly likely will be optimized away in a later phase */
+    genUnconditionalBranch(cUnit, &labelList[bb->fallThrough->id]);
+    return false;
+}
+
+static bool handleFmt22b_Fmt22s(CompilationUnit *cUnit, MIR *mir)
+{
+    OpCode dalvikOpCode = mir->dalvikInsn.opCode;
+    int vSrc = mir->dalvikInsn.vB;
+    int vDest = mir->dalvikInsn.vA;
+    int lit = mir->dalvikInsn.vC;
+    int armOp;
+
+    /* TODO: find the proper .h file to declare these */
+    int __aeabi_idivmod(int op1, int op2);
+    int __aeabi_idiv(int op1, int op2);
+
+    switch (dalvikOpCode) {
+        case OP_ADD_INT_LIT8:
+        case OP_ADD_INT_LIT16:
+            loadValue(cUnit, vSrc, r0);
+            if (lit <= 255 && lit >= 0) {
+                newLIR2(cUnit, ARMV5TE_ADD_RI8, r0, lit);
+                storeValue(cUnit, r0, vDest, r1);
+            } else if (lit >= -255 && lit <= 0) {
+                /* Convert to a small constant subtraction */
+                newLIR2(cUnit, ARMV5TE_SUB_RI8, r0, -lit);
+                storeValue(cUnit, r0, vDest, r1);
+            } else {
+                loadConstant(cUnit, r1, lit);
+                genBinaryOp(cUnit, vDest, ARMV5TE_ADD_RRR);
+            }
+            break;
+
+        case OP_RSUB_INT_LIT8:
+        case OP_RSUB_INT:
+            loadValue(cUnit, vSrc, r1);
+            loadConstant(cUnit, r0, lit);
+            genBinaryOp(cUnit, vDest, ARMV5TE_SUB_RRR);
+            break;
+
+        case OP_MUL_INT_LIT8:
+        case OP_MUL_INT_LIT16:
+        case OP_AND_INT_LIT8:
+        case OP_AND_INT_LIT16:
+        case OP_OR_INT_LIT8:
+        case OP_OR_INT_LIT16:
+        case OP_XOR_INT_LIT8:
+        case OP_XOR_INT_LIT16:
+            loadValue(cUnit, vSrc, r0);
+            loadConstant(cUnit, r1, lit);
+            switch (dalvikOpCode) {
+                case OP_MUL_INT_LIT8:
+                case OP_MUL_INT_LIT16:
+                    armOp = ARMV5TE_MUL;
+                    break;
+                case OP_AND_INT_LIT8:
+                case OP_AND_INT_LIT16:
+                    armOp = ARMV5TE_AND_RR;
+                    break;
+                case OP_OR_INT_LIT8:
+                case OP_OR_INT_LIT16:
+                    armOp = ARMV5TE_ORR;
+                    break;
+                case OP_XOR_INT_LIT8:
+                case OP_XOR_INT_LIT16:
+                    armOp = ARMV5TE_EOR;
+                    break;
+                default:
+                    dvmAbort();
+            }
+            genBinaryOp(cUnit, vDest, armOp);
+            break;
+
+        case OP_SHL_INT_LIT8:
+        case OP_SHR_INT_LIT8:
+        case OP_USHR_INT_LIT8:
+            loadValue(cUnit, vSrc, r0);
+            switch (dalvikOpCode) {
+                case OP_SHL_INT_LIT8:
+                    armOp = ARMV5TE_LSL;
+                    break;
+                case OP_SHR_INT_LIT8:
+                    armOp = ARMV5TE_ASR;
+                    break;
+                case OP_USHR_INT_LIT8:
+                    armOp = ARMV5TE_LSR;
+                    break;
+                default: dvmAbort();
+            }
+            newLIR3(cUnit, armOp, r0, r0, lit);
+            storeValue(cUnit, r0, vDest, r1);
+            break;
+
+        case OP_DIV_INT_LIT8:
+        case OP_DIV_INT_LIT16:
+            if (lit == 0) {
+                /* Let the interpreter deal with div by 0 */
+                genInterpSingleStep(cUnit, mir);
+                return false;
+            }
+            loadConstant(cUnit, r2, (int)__aeabi_idiv);
+            loadConstant(cUnit, r1, lit);
+            loadValue(cUnit, vSrc, r0);
+            newLIR1(cUnit, ARMV5TE_BLX_R, r2);
+            storeValue(cUnit, r0, vDest, r2);
+            break;
+
+        case OP_REM_INT_LIT8:
+        case OP_REM_INT_LIT16:
+            if (lit == 0) {
+                /* Let the interpreter deal with div by 0 */
+                genInterpSingleStep(cUnit, mir);
+                return false;
+            }
+            loadConstant(cUnit, r2, (int)__aeabi_idivmod);
+            loadConstant(cUnit, r1, lit);
+            loadValue(cUnit, vSrc, r0);
+            newLIR1(cUnit, ARMV5TE_BLX_R, r2);
+            storeValue(cUnit, r1, vDest, r2);
+            break;
+        default:
+            return true;
+    }
+    return false;
+}
+
+static bool handleFmt22c(CompilationUnit *cUnit, MIR *mir)
+{
+    OpCode dalvikOpCode = mir->dalvikInsn.opCode;
+    int fieldOffset;
+
+    if (dalvikOpCode >= OP_IGET && dalvikOpCode <= OP_IPUT_SHORT) {
+        InstField *pInstField = (InstField *)
+            cUnit->method->clazz->pDvmDex->pResFields[mir->dalvikInsn.vC];
+        int fieldOffset;
+
+        assert(pInstField != NULL);
+        fieldOffset = pInstField->byteOffset;
+    } else {
+        /* To make the compiler happy */
+        fieldOffset = 0;
+    }
+    switch (dalvikOpCode) {
+        /*
+         * TODO: I may be assuming too much here.
+         * Verify what is known at JIT time.
+         */
+        case OP_NEW_ARRAY: {
+            void *classPtr = (void*)
+              (cUnit->method->clazz->pDvmDex->pResClasses[mir->dalvikInsn.vC]);
+            assert(classPtr != NULL);
+            loadValue(cUnit, mir->dalvikInsn.vB, r1);  /* Len */
+            loadConstant(cUnit, r0, (int) classPtr );
+            loadConstant(cUnit, r4PC, (int)dvmAllocArrayByClass);
+            Armv5teLIR *pcrLabel =
+                genRegImmCheck(cUnit, ARM_COND_MI, r1, 0, mir->offset, NULL);
+            genExportPC(cUnit, mir, r2, r3 );
+            newLIR2(cUnit, ARMV5TE_MOV_IMM,r2,ALLOC_DONT_TRACK);
+            newLIR1(cUnit, ARMV5TE_BLX_R, r4PC);
+            /*
+             * TODO: As coded, we'll bail and reinterpret on alloc failure.
+             * Need a general mechanism to bail to thrown exception code.
+             */
+            genNullCheck(cUnit, r0, mir->offset, pcrLabel);
+            storeValue(cUnit, r0, mir->dalvikInsn.vA, r1);
+            break;
+        }
+        /*
+         * TODO: I may be assuming too much here.
+         * Verify what is known at JIT time.
+         */
+        case OP_INSTANCE_OF: {
+            ClassObject *classPtr =
+              (cUnit->method->clazz->pDvmDex->pResClasses[mir->dalvikInsn.vC]);
+            assert(classPtr != NULL);
+            loadValue(cUnit, mir->dalvikInsn.vB, r1);  /* Ref */
+            loadConstant(cUnit, r2, (int) classPtr );
+            loadConstant(cUnit, r0, 1);                /* Assume true */
+            newLIR2(cUnit, ARMV5TE_CMP_RI8, r1, 0);    /* Null? */
+            Armv5teLIR *branch1 = newLIR2(cUnit, ARMV5TE_B_COND, 4,
+                                          ARM_COND_EQ);
+            /* r1 now contains object->clazz */
+            newLIR3(cUnit, ARMV5TE_LDR_RRI5, r1, r1,
+                    offsetof(Object, clazz) >> 2);
+            loadConstant(cUnit, r4PC, (int)dvmInstanceofNonTrivial);
+            newLIR2(cUnit, ARMV5TE_CMP_RR, r1, r2);
+            Armv5teLIR *branch2 = newLIR2(cUnit, ARMV5TE_B_COND, 2,
+                                          ARM_COND_EQ);
+            newLIR2(cUnit, ARMV5TE_MOV_RR, r0, r1);
+            newLIR2(cUnit, ARMV5TE_MOV_RR, r1, r2);
+            newLIR1(cUnit, ARMV5TE_BLX_R, r4PC);
+            /* branch target here */
+            Armv5teLIR *target = newLIR0(cUnit, ARMV5TE_PSEUDO_TARGET_LABEL);
+            storeValue(cUnit, r0, mir->dalvikInsn.vA, r1);
+            branch1->generic.target = (LIR *)target;
+            branch2->generic.target = (LIR *)target;
+            break;
+        }
+        case OP_IGET_WIDE:
+            genIGetWide(cUnit, mir, fieldOffset);
+            break;
+        case OP_IGET:
+        case OP_IGET_OBJECT:
+            genIGet(cUnit, mir, ARMV5TE_LDR_RRR, fieldOffset);
+            break;
+        case OP_IGET_BOOLEAN:
+            genIGet(cUnit, mir, ARMV5TE_LDRB_RRR, fieldOffset);
+            break;
+        case OP_IGET_BYTE:
+            genIGet(cUnit, mir, ARMV5TE_LDRSB_RRR, fieldOffset);
+            break;
+        case OP_IGET_CHAR:
+            genIGet(cUnit, mir, ARMV5TE_LDRH_RRR, fieldOffset);
+            break;
+        case OP_IGET_SHORT:
+            genIGet(cUnit, mir, ARMV5TE_LDRSH_RRR, fieldOffset);
+            break;
+        case OP_IPUT_WIDE:
+            genIPutWide(cUnit, mir, fieldOffset);
+            break;
+        case OP_IPUT:
+        case OP_IPUT_OBJECT:
+            genIPut(cUnit, mir, ARMV5TE_STR_RRR, fieldOffset);
+            break;
+        case OP_IPUT_SHORT:
+        case OP_IPUT_CHAR:
+            genIPut(cUnit, mir, ARMV5TE_STRH_RRR, fieldOffset);
+            break;
+        case OP_IPUT_BYTE:
+        case OP_IPUT_BOOLEAN:
+            genIPut(cUnit, mir, ARMV5TE_STRB_RRR, fieldOffset);
+            break;
+        default:
+            return true;
+    }
+    return false;
+}
+
+static bool handleFmt22cs(CompilationUnit *cUnit, MIR *mir)
+{
+    OpCode dalvikOpCode = mir->dalvikInsn.opCode;
+    int fieldOffset =  mir->dalvikInsn.vC;
+    switch (dalvikOpCode) {
+        case OP_IGET_QUICK:
+        case OP_IGET_OBJECT_QUICK:
+            genIGet(cUnit, mir, ARMV5TE_LDR_RRR, fieldOffset);
+            break;
+        case OP_IPUT_QUICK:
+        case OP_IPUT_OBJECT_QUICK:
+            genIPut(cUnit, mir, ARMV5TE_STR_RRR, fieldOffset);
+            break;
+        case OP_IGET_WIDE_QUICK:
+            genIGetWide(cUnit, mir, fieldOffset);
+            break;
+        case OP_IPUT_WIDE_QUICK:
+            genIPutWide(cUnit, mir, fieldOffset);
+            break;
+        default:
+            return true;
+    }
+    return false;
+
+}
+
+/* Compare agaist zero */
+static bool handleFmt22t(CompilationUnit *cUnit, MIR *mir, BasicBlock *bb,
+                         Armv5teLIR *labelList)
+{
+    OpCode dalvikOpCode = mir->dalvikInsn.opCode;
+    Armv5teConditionCode cond;
+
+    loadValue(cUnit, mir->dalvikInsn.vA, r0);
+    loadValue(cUnit, mir->dalvikInsn.vB, r1);
+    newLIR2(cUnit, ARMV5TE_CMP_RR, r0, r1);
+
+    switch (dalvikOpCode) {
+        case OP_IF_EQ:
+            cond = ARM_COND_EQ;
+            break;
+        case OP_IF_NE:
+            cond = ARM_COND_NE;
+            break;
+        case OP_IF_LT:
+            cond = ARM_COND_LT;
+            break;
+        case OP_IF_GE:
+            cond = ARM_COND_GE;
+            break;
+        case OP_IF_GT:
+            cond = ARM_COND_GT;
+            break;
+        case OP_IF_LE:
+            cond = ARM_COND_LE;
+            break;
+        default:
+            cond = 0;
+            LOGE("Unexpected opcode (%d) for Fmt22t\n", dalvikOpCode);
+            dvmAbort();
+    }
+    genConditionalBranch(cUnit, cond, &labelList[bb->taken->id]);
+    /* This mostly likely will be optimized away in a later phase */
+    genUnconditionalBranch(cUnit, &labelList[bb->fallThrough->id]);
+    return false;
+}
+
+static bool handleFmt22x_Fmt32x(CompilationUnit *cUnit, MIR *mir)
+{
+    OpCode opCode = mir->dalvikInsn.opCode;
+    int vSrc1Dest = mir->dalvikInsn.vA;
+    int vSrc2 = mir->dalvikInsn.vB;
+
+    switch (opCode) {
+        case OP_MOVE_16:
+        case OP_MOVE_OBJECT_16:
+        case OP_MOVE_FROM16:
+        case OP_MOVE_OBJECT_FROM16:
+            loadValue(cUnit, vSrc2, r0);
+            storeValue(cUnit, r0, vSrc1Dest, r1);
+            break;
+        case OP_MOVE_WIDE_16:
+        case OP_MOVE_WIDE_FROM16:
+            loadValuePair(cUnit, vSrc2, r0, r1);
+            storeValuePair(cUnit, r0, r1, vSrc1Dest, r2);
+            break;
+        default:
+            return true;
+    }
+    return false;
+}
+
+static bool handleFmt23x(CompilationUnit *cUnit, MIR *mir)
+{
+    OpCode opCode = mir->dalvikInsn.opCode;
+    int vA = mir->dalvikInsn.vA;
+    int vB = mir->dalvikInsn.vB;
+    int vC = mir->dalvikInsn.vC;
+
+    if ( (opCode >= OP_ADD_INT) && (opCode <= OP_REM_DOUBLE)) {
+        return genArithOp( cUnit, mir );
+    }
+
+    switch (opCode) {
+        case OP_CMP_LONG:
+            loadValuePair(cUnit,vB, r0, r1);
+            loadValuePair(cUnit, vC, r2, r3);
+            genDispatchToHandler(cUnit, TEMPLATE_CMP_LONG);
+            storeValue(cUnit, r0, vA, r1);
+            break;
+        case OP_CMPL_FLOAT:
+            loadValue(cUnit, vB, r0);
+            loadValue(cUnit, vC, r1);
+            genDispatchToHandler(cUnit, TEMPLATE_CMPL_FLOAT);
+            storeValue(cUnit, r0, vA, r1);
+            break;
+        case OP_CMPG_FLOAT:
+            loadValue(cUnit, vB, r0);
+            loadValue(cUnit, vC, r1);
+            genDispatchToHandler(cUnit, TEMPLATE_CMPG_FLOAT);
+            storeValue(cUnit, r0, vA, r1);
+            break;
+        case OP_CMPL_DOUBLE:
+            loadValueAddress(cUnit, vB, r0);
+            loadValueAddress(cUnit, vC, r1);
+            genDispatchToHandler(cUnit, TEMPLATE_CMPL_DOUBLE);
+            storeValue(cUnit, r0, vA, r1);
+            break;
+        case OP_CMPG_DOUBLE:
+            loadValueAddress(cUnit, vB, r0);
+            loadValueAddress(cUnit, vC, r1);
+            genDispatchToHandler(cUnit, TEMPLATE_CMPG_DOUBLE);
+            storeValue(cUnit, r0, vA, r1);
+            break;
+        case OP_AGET_WIDE:
+            genArrayGet(cUnit, mir, ARMV5TE_LDR_RRR, vB, vC, vA, 3);
+            break;
+        case OP_AGET:
+        case OP_AGET_OBJECT:
+            genArrayGet(cUnit, mir, ARMV5TE_LDR_RRR, vB, vC, vA, 2);
+            break;
+        case OP_AGET_BOOLEAN:
+            genArrayGet(cUnit, mir, ARMV5TE_LDRB_RRR, vB, vC, vA, 0);
+            break;
+        case OP_AGET_BYTE:
+            genArrayGet(cUnit, mir, ARMV5TE_LDRSB_RRR, vB, vC, vA, 0);
+            break;
+        case OP_AGET_CHAR:
+            genArrayGet(cUnit, mir, ARMV5TE_LDRH_RRR, vB, vC, vA, 1);
+            break;
+        case OP_AGET_SHORT:
+            genArrayGet(cUnit, mir, ARMV5TE_LDRSH_RRR, vB, vC, vA, 1);
+            break;
+        case OP_APUT_WIDE:
+            genArrayPut(cUnit, mir, ARMV5TE_STR_RRR, vB, vC, vA, 3);
+            break;
+        case OP_APUT:
+        case OP_APUT_OBJECT:
+            genArrayPut(cUnit, mir, ARMV5TE_STR_RRR, vB, vC, vA, 2);
+            break;
+        case OP_APUT_SHORT:
+        case OP_APUT_CHAR:
+            genArrayPut(cUnit, mir, ARMV5TE_STRH_RRR, vB, vC, vA, 1);
+            break;
+        case OP_APUT_BYTE:
+        case OP_APUT_BOOLEAN:
+            genArrayPut(cUnit, mir, ARMV5TE_STRB_RRR, vB, vC, vA, 0);
+            break;
+        default:
+            return true;
+    }
+    return false;
+}
+
+static bool handleFmt31t(CompilationUnit *cUnit, MIR *mir)
+{
+    OpCode dalvikOpCode = mir->dalvikInsn.opCode;
+    switch (dalvikOpCode) {
+        case OP_FILL_ARRAY_DATA: {
+            loadConstant(cUnit, r4PC, (int)dvmInterpHandleFillArrayData);
+            loadValue(cUnit, mir->dalvikInsn.vA, r0);
+            loadConstant(cUnit, r1, (mir->dalvikInsn.vB << 1) +
+                 (int) (cUnit->method->insns + mir->offset));
+            genExportPC(cUnit, mir, r2, r3 );
+            newLIR1(cUnit, ARMV5TE_BLX_R, r4PC);
+            genNullCheck(cUnit, r0, mir->offset, NULL);
+            break;
+        }
+        /*
+         * TODO
+         * - Add a 1 to 3-entry per-location cache here to completely
+         *   bypass the dvmInterpHandle[Packed/Sparse]Switch call w/ chaining
+         * - Use out-of-line handlers for both of these
+         */
+        case OP_PACKED_SWITCH:
+        case OP_SPARSE_SWITCH: {
+            if (dalvikOpCode == OP_PACKED_SWITCH) {
+                loadConstant(cUnit, r4PC, (int)dvmInterpHandlePackedSwitch);
+            } else {
+                loadConstant(cUnit, r4PC, (int)dvmInterpHandleSparseSwitch);
+            }
+            loadValue(cUnit, mir->dalvikInsn.vA, r1);
+            loadConstant(cUnit, r0, (mir->dalvikInsn.vB << 1) +
+                 (int) (cUnit->method->insns + mir->offset));
+            newLIR1(cUnit, ARMV5TE_BLX_R, r4PC);
+            loadConstant(cUnit, r1, (int)(cUnit->method->insns + mir->offset));
+            newLIR3(cUnit, ARMV5TE_LDR_RRI5, r2, rGLUE,
+                offsetof(InterpState, jitToInterpEntries.dvmJitToInterpNoChain)
+                    >> 2);
+            newLIR3(cUnit, ARMV5TE_ADD_RRR, r0, r0, r0);
+            newLIR3(cUnit, ARMV5TE_ADD_RRR, r4PC, r0, r1);
+            newLIR1(cUnit, ARMV5TE_BLX_R, r2);
+            break;
+        }
+        default:
+            return true;
+    }
+    return false;
+}
+
+static bool handleFmt35c_3rc(CompilationUnit *cUnit, MIR *mir, BasicBlock *bb,
+                             Armv5teLIR *labelList)
+{
+    Armv5teLIR *retChainingCell = &labelList[bb->fallThrough->id];
+    Armv5teLIR *pcrLabel = NULL;
+
+    DecodedInstruction *dInsn = &mir->dalvikInsn;
+    switch (mir->dalvikInsn.opCode) {
+        /*
+         * calleeMethod = this->clazz->vtable[
+         *     method->clazz->pDvmDex->pResMethods[BBBB]->methodIndex
+         * ]
+         */
+        case OP_INVOKE_VIRTUAL:
+        case OP_INVOKE_VIRTUAL_RANGE: {
+            int methodIndex =
+                cUnit->method->clazz->pDvmDex->pResMethods[dInsn->vB]->
+                methodIndex;
+
+            if (mir->dalvikInsn.opCode == OP_INVOKE_VIRTUAL)
+                genProcessArgsNoRange(cUnit, mir, dInsn, &pcrLabel);
+            else
+                genProcessArgsRange(cUnit, mir, dInsn, &pcrLabel);
+
+            /* r0 now contains this->clazz */
+            newLIR3(cUnit, ARMV5TE_LDR_RRI5, r0, r0,
+                    offsetof(Object, clazz) >> 2);
+            /* r1 = &retChainingCell */
+            Armv5teLIR *addrRetChain = newLIR2(cUnit, ARMV5TE_ADD_PC_REL,
+                                                   r1, 0);
+            /* r4PC = dalvikCallsite */
+            loadConstant(cUnit, r4PC,
+                         (int) (cUnit->method->insns + mir->offset));
+
+            /* r0 now contains this->clazz->vtable */
+            newLIR3(cUnit, ARMV5TE_LDR_RRI5, r0, r0,
+                    offsetof(ClassObject, vtable) >> 2);
+            addrRetChain->generic.target = (LIR *) retChainingCell;
+
+            if (methodIndex < 32) {
+                newLIR3(cUnit, ARMV5TE_LDR_RRI5, r0, r0, methodIndex);
+            } else {
+                loadConstant(cUnit, r7, methodIndex<<2);
+                newLIR3(cUnit, ARMV5TE_LDR_RRR, r0, r0, r7);
+            }
+
+            /*
+             * r0 = calleeMethod,
+             * r1 = &ChainingCell,
+             * r4PC = callsiteDPC,
+             */
+            genDispatchToHandler(cUnit, TEMPLATE_INVOKE_METHOD_NO_OPT);
+#if defined(INVOKE_STATS)
+            gDvmJit.invokeNoOpt++;
+#endif
+            /* Handle exceptions using the interpreter */
+            genTrap(cUnit, mir->offset, pcrLabel);
+            break;
+        }
+        /*
+         * calleeMethod = method->clazz->super->vtable[method->clazz->pDvmDex
+         *                ->pResMethods[BBBB]->methodIndex]
+         */
+        /* TODO - not excersized in RunPerf.jar */
+        case OP_INVOKE_SUPER:
+        case OP_INVOKE_SUPER_RANGE: {
+            int mIndex = cUnit->method->clazz->pDvmDex->
+                pResMethods[dInsn->vB]->methodIndex;
+            const Method *calleeMethod =
+                cUnit->method->clazz->super->vtable[mIndex];
+
+            if (mir->dalvikInsn.opCode == OP_INVOKE_SUPER)
+                genProcessArgsNoRange(cUnit, mir, dInsn, &pcrLabel);
+            else
+                genProcessArgsRange(cUnit, mir, dInsn, &pcrLabel);
+
+            /* r0 = calleeMethod */
+            loadConstant(cUnit, r0, (int) calleeMethod);
+
+            genInvokeCommon(cUnit, mir, bb, labelList, pcrLabel,
+                            calleeMethod);
+            break;
+        }
+        /* calleeMethod = method->clazz->pDvmDex->pResMethods[BBBB] */
+        case OP_INVOKE_DIRECT:
+        case OP_INVOKE_DIRECT_RANGE: {
+            const Method *calleeMethod =
+                cUnit->method->clazz->pDvmDex->pResMethods[dInsn->vB];
+
+            if (mir->dalvikInsn.opCode == OP_INVOKE_DIRECT)
+                genProcessArgsNoRange(cUnit, mir, dInsn, &pcrLabel);
+            else
+                genProcessArgsRange(cUnit, mir, dInsn, &pcrLabel);
+
+            /* r0 = calleeMethod */
+            loadConstant(cUnit, r0, (int) calleeMethod);
+
+            genInvokeCommon(cUnit, mir, bb, labelList, pcrLabel,
+                            calleeMethod);
+            break;
+        }
+        /* calleeMethod = method->clazz->pDvmDex->pResMethods[BBBB] */
+        case OP_INVOKE_STATIC:
+        case OP_INVOKE_STATIC_RANGE: {
+            const Method *calleeMethod =
+                cUnit->method->clazz->pDvmDex->pResMethods[dInsn->vB];
+
+            if (mir->dalvikInsn.opCode == OP_INVOKE_STATIC)
+                genProcessArgsNoRange(cUnit, mir, dInsn,
+                                      NULL /* no null check */);
+            else
+                genProcessArgsRange(cUnit, mir, dInsn,
+                                    NULL /* no null check */);
+
+            /* r0 = calleeMethod */
+            loadConstant(cUnit, r0, (int) calleeMethod);
+
+            genInvokeCommon(cUnit, mir, bb, labelList, pcrLabel,
+                            calleeMethod);
+            break;
+        }
+        /*
+         * calleeMethod = dvmFindInterfaceMethodInCache(this->clazz,
+         *                    BBBB, method, method->clazz->pDvmDex)
+         */
+        case OP_INVOKE_INTERFACE:
+        case OP_INVOKE_INTERFACE_RANGE: {
+            int methodIndex = dInsn->vB;
+
+            if (mir->dalvikInsn.opCode == OP_INVOKE_INTERFACE)
+                genProcessArgsNoRange(cUnit, mir, dInsn, &pcrLabel);
+            else
+                genProcessArgsRange(cUnit, mir, dInsn, &pcrLabel);
+
+            /* r0 now contains this->clazz */
+            newLIR3(cUnit, ARMV5TE_LDR_RRI5, r0, r0,
+                    offsetof(Object, clazz) >> 2);
+
+            /* r1 = BBBB */
+            loadConstant(cUnit, r1, dInsn->vB);
+
+            /* r2 = method (caller) */
+            loadConstant(cUnit, r2, (int) cUnit->method);
+
+            /* r3 = pDvmDex */
+            loadConstant(cUnit, r3, (int) cUnit->method->clazz->pDvmDex);
+
+            loadConstant(cUnit, r7,
+                         (intptr_t) dvmFindInterfaceMethodInCache);
+            newLIR1(cUnit, ARMV5TE_BLX_R, r7);
+
+            /* r0 = calleeMethod (returned from dvmFindInterfaceMethodInCache */
+
+            /* r1 = &retChainingCell */
+            Armv5teLIR *addrRetChain = newLIR2(cUnit, ARMV5TE_ADD_PC_REL,
+                                               r1, 0);
+            /* r4PC = dalvikCallsite */
+            loadConstant(cUnit, r4PC,
+                         (int) (cUnit->method->insns + mir->offset));
+
+            addrRetChain->generic.target = (LIR *) retChainingCell;
+            /*
+             * r0 = this, r1 = calleeMethod,
+             * r1 = &ChainingCell,
+             * r4PC = callsiteDPC,
+             */
+            genDispatchToHandler(cUnit, TEMPLATE_INVOKE_METHOD_NO_OPT);
+#if defined(INVOKE_STATS)
+            gDvmJit.invokeNoOpt++;
+#endif
+            /* Handle exceptions using the interpreter */
+            genTrap(cUnit, mir->offset, pcrLabel);
+            break;
+        }
+        /* NOP */
+        case OP_INVOKE_DIRECT_EMPTY: {
+            return false;
+        }
+        case OP_FILLED_NEW_ARRAY:
+        case OP_FILLED_NEW_ARRAY_RANGE: {
+            /* Just let the interpreter deal with these */
+            genInterpSingleStep(cUnit, mir);
+            break;
+        }
+        default:
+            return true;
+    }
+    return false;
+}
+
+static bool handleFmt35ms_3rms(CompilationUnit *cUnit, MIR *mir,
+                               BasicBlock *bb, Armv5teLIR *labelList)
+{
+    Armv5teLIR *retChainingCell = &labelList[bb->fallThrough->id];
+    Armv5teLIR *pcrLabel = NULL;
+
+    DecodedInstruction *dInsn = &mir->dalvikInsn;
+    switch (mir->dalvikInsn.opCode) {
+        /* calleeMethod = this->clazz->vtable[BBBB] */
+        case OP_INVOKE_VIRTUAL_QUICK_RANGE:
+        case OP_INVOKE_VIRTUAL_QUICK: {
+            int methodIndex = dInsn->vB;
+            if (mir->dalvikInsn.opCode == OP_INVOKE_VIRTUAL_QUICK)
+                genProcessArgsNoRange(cUnit, mir, dInsn, &pcrLabel);
+            else
+                genProcessArgsRange(cUnit, mir, dInsn, &pcrLabel);
+
+            /* r0 now contains this->clazz */
+            newLIR3(cUnit, ARMV5TE_LDR_RRI5, r0, r0,
+                    offsetof(Object, clazz) >> 2);
+            /* r1 = &retChainingCell */
+            Armv5teLIR *addrRetChain = newLIR2(cUnit, ARMV5TE_ADD_PC_REL,
+                                               r1, 0);
+            /* r4PC = dalvikCallsite */
+            loadConstant(cUnit, r4PC,
+                         (int) (cUnit->method->insns + mir->offset));
+
+            /* r0 now contains this->clazz->vtable */
+            newLIR3(cUnit, ARMV5TE_LDR_RRI5, r0, r0,
+                    offsetof(ClassObject, vtable) >> 2);
+            addrRetChain->generic.target = (LIR *) retChainingCell;
+
+            if (methodIndex < 32) {
+                newLIR3(cUnit, ARMV5TE_LDR_RRI5, r0, r0, methodIndex);
+            } else {
+                loadConstant(cUnit, r7, methodIndex<<2);
+                newLIR3(cUnit, ARMV5TE_LDR_RRR, r0, r0, r7);
+            }
+
+            /*
+             * r0 = calleeMethod,
+             * r1 = &ChainingCell,
+             * r4PC = callsiteDPC,
+             */
+            genDispatchToHandler(cUnit, TEMPLATE_INVOKE_METHOD_NO_OPT);
+#if defined(INVOKE_STATS)
+            gDvmJit.invokeNoOpt++;
+#endif
+            break;
+        }
+        /* calleeMethod = method->clazz->super->vtable[BBBB] */
+        case OP_INVOKE_SUPER_QUICK:
+        case OP_INVOKE_SUPER_QUICK_RANGE: {
+            const Method *calleeMethod =
+                cUnit->method->clazz->super->vtable[dInsn->vB];
+
+            if (mir->dalvikInsn.opCode == OP_INVOKE_SUPER_QUICK)
+                genProcessArgsNoRange(cUnit, mir, dInsn, &pcrLabel);
+            else
+                genProcessArgsRange(cUnit, mir, dInsn, &pcrLabel);
+
+            /* r0 = calleeMethod */
+            loadConstant(cUnit, r0, (int) calleeMethod);
+
+            genInvokeCommon(cUnit, mir, bb, labelList, pcrLabel,
+                            calleeMethod);
+            break;
+        }
+        /* calleeMethod = method->clazz->super->vtable[BBBB] */
+        default:
+            return true;
+    }
+    /* Handle exceptions using the interpreter */
+    genTrap(cUnit, mir->offset, pcrLabel);
+    return false;
+}
+
+/*
+ * NOTE: We assume here that the special native inline routines
+ * are side-effect free.  By making this assumption, we can safely
+ * re-execute the routine from the interpreter if it decides it
+ * wants to throw an exception. We still need to EXPORT_PC(), though.
+ */
+static bool handleFmt3inline(CompilationUnit *cUnit, MIR *mir)
+{
+    DecodedInstruction *dInsn = &mir->dalvikInsn;
+    switch( mir->dalvikInsn.opCode) {
+        case OP_EXECUTE_INLINE: {
+            unsigned int i;
+            const InlineOperation* inLineTable = dvmGetInlineOpsTable();
+            int offset = (int) &((InterpState *) NULL)->retval;
+            int operation = dInsn->vB;
+
+            if (!strcmp(inLineTable[operation].classDescriptor,
+                        "Ljava/lang/String;") &&
+                !strcmp(inLineTable[operation].methodName,
+                        "length") &&
+                !strcmp(inLineTable[operation].methodSignature,
+                        "()I")) {
+                return genInlinedStringLength(cUnit,mir);
+            }
+
+            /* Materialize pointer to retval & push */
+            newLIR2(cUnit, ARMV5TE_MOV_RR, r4PC, rGLUE);
+            newLIR2(cUnit, ARMV5TE_ADD_RI8, r4PC, offset);
+            /* Push r4 and (just to take up space) r5) */
+            newLIR1(cUnit, ARMV5TE_PUSH, (1<<r4PC | 1<<rFP));
+
+            /* Get code pointer to inline routine */
+            loadConstant(cUnit, r4PC, (int)inLineTable[operation].func);
+
+            /* Export PC */
+            genExportPC(cUnit, mir, r0, r1 );
+
+            /* Load arguments to r0 through r3 as applicable */
+            for (i=0; i < dInsn->vA; i++) {
+                loadValue(cUnit, dInsn->arg[i], i);
+            }
+            /* Call inline routine */
+            newLIR1(cUnit, ARMV5TE_BLX_R, r4PC);
+
+            /* Strip frame */
+            newLIR1(cUnit, ARMV5TE_ADD_SPI7, 2);
+
+            /* Did we throw? If so, redo under interpreter*/
+            genNullCheck(cUnit, r0, mir->offset, NULL);
+
+            break;
+        }
+        default:
+            return true;
+    }
+    return false;
+}
+
+static bool handleFmt51l(CompilationUnit *cUnit, MIR *mir)
+{
+    loadConstant(cUnit, r0, mir->dalvikInsn.vB_wide & 0xFFFFFFFFUL);
+    loadConstant(cUnit, r1, (mir->dalvikInsn.vB_wide>>32) & 0xFFFFFFFFUL);
+    storeValuePair(cUnit, r0, r1, mir->dalvikInsn.vA, r2);
+    return false;
+}
+
+/*****************************************************************************/
+/*
+ * The following are special processing routines that handle transfer of
+ * controls between compiled code and the interpreter. Certain VM states like
+ * Dalvik PC and special-purpose registers are reconstructed here.
+ */
+
+/* Chaining cell for normal-ending compiles (eg branches) */
+static void handleGenericChainingCell(CompilationUnit *cUnit,
+                                      unsigned int offset)
+{
+    newLIR3(cUnit, ARMV5TE_LDR_RRI5, r0, rGLUE,
+        offsetof(InterpState, jitToInterpEntries.dvmJitToInterpNormal) >> 2);
+    newLIR1(cUnit, ARMV5TE_BLX_R, r0);
+    addWordData(cUnit, (int) (cUnit->method->insns + offset), true);
+}
+
+/*
+ * Chaining cell for instructions that immediately following a method
+ * invocation.
+ */
+static void handlePostInvokeChainingCell(CompilationUnit *cUnit,
+                                         unsigned int offset)
+{
+    newLIR3(cUnit, ARMV5TE_LDR_RRI5, r0, rGLUE,
+        offsetof(InterpState, jitToInterpEntries.dvmJitToTraceSelect) >> 2);
+    newLIR1(cUnit, ARMV5TE_BLX_R, r0);
+    addWordData(cUnit, (int) (cUnit->method->insns + offset), true);
+}
+
+/* Chaining cell for monomorphic method invocations. */
+static void handleInvokeChainingCell(CompilationUnit *cUnit,
+                                     const Method *callee)
+{
+    newLIR3(cUnit, ARMV5TE_LDR_RRI5, r0, rGLUE,
+        offsetof(InterpState, jitToInterpEntries.dvmJitToTraceSelect) >> 2);
+    newLIR1(cUnit, ARMV5TE_BLX_R, r0);
+    addWordData(cUnit, (int) (callee->insns), true);
+}
+
+/* Load the Dalvik PC into r0 and jump to the specified target */
+static void handlePCReconstruction(CompilationUnit *cUnit,
+                                   Armv5teLIR *targetLabel)
+{
+    Armv5teLIR **pcrLabel =
+        (Armv5teLIR **) cUnit->pcReconstructionList.elemList;
+    int numElems = cUnit->pcReconstructionList.numUsed;
+    int i;
+    for (i = 0; i < numElems; i++) {
+        dvmCompilerAppendLIR(cUnit, (LIR *) pcrLabel[i]);
+        /* r0 = dalvik PC */
+        loadConstant(cUnit, r0, pcrLabel[i]->operands[0]);
+        genUnconditionalBranch(cUnit, targetLabel);
+    }
+}
+
+/* Entry function to invoke the backend of the JIT compiler */
+void dvmCompilerMIR2LIR(CompilationUnit *cUnit)
+{
+    /* Used to hold the labels of each block */
+    Armv5teLIR *labelList =
+        dvmCompilerNew(sizeof(Armv5teLIR) * cUnit->numBlocks, true);
+    GrowableList chainingListByType[CHAINING_CELL_LAST];
+    int i;
+
+    /*
+     * Initialize the three chaining lists for generic, post-invoke, and invoke
+     * chains.
+     */
+    for (i = 0; i < CHAINING_CELL_LAST; i++) {
+        dvmInitGrowableList(&chainingListByType[i], 2);
+    }
+
+    BasicBlock **blockList = cUnit->blockList;
+
+    /* Handle the content in each basic block */
+    for (i = 0; i < cUnit->numBlocks; i++) {
+        blockList[i]->visited = true;
+        MIR *mir;
+
+        labelList[i].operands[0] = blockList[i]->startOffset;
+
+        if (blockList[i]->blockType >= CHAINING_CELL_LAST) {
+            /*
+             * Append the label pseudo LIR first. Chaining cells will be handled
+             * separately afterwards.
+             */
+            dvmCompilerAppendLIR(cUnit, (LIR *) &labelList[i]);
+        }
+
+        if (blockList[i]->blockType == DALVIK_BYTECODE) {
+            labelList[i].opCode = ARMV5TE_PSEUDO_NORMAL_BLOCK_LABEL;
+        } else {
+            switch (blockList[i]->blockType) {
+                case CHAINING_CELL_GENERIC:
+                    labelList[i].opCode = ARMV5TE_PSEUDO_CHAINING_CELL_GENERIC;
+                    /* handle the codegen later */
+                    dvmInsertGrowableList(
+                        &chainingListByType[CHAINING_CELL_GENERIC], (void *) i);
+                    break;
+                case CHAINING_CELL_INVOKE:
+                    labelList[i].opCode = ARMV5TE_PSEUDO_CHAINING_CELL_INVOKE;
+                    labelList[i].operands[0] =
+                        (int) blockList[i]->containingMethod;
+                    /* handle the codegen later */
+                    dvmInsertGrowableList(
+                        &chainingListByType[CHAINING_CELL_INVOKE], (void *) i);
+                    break;
+                case CHAINING_CELL_POST_INVOKE:
+                    labelList[i].opCode =
+                        ARMV5TE_PSEUDO_CHAINING_CELL_POST_INVOKE;
+                    /* handle the codegen later */
+                    dvmInsertGrowableList(
+                        &chainingListByType[CHAINING_CELL_POST_INVOKE],
+                        (void *) i);
+                    break;
+                case PC_RECONSTRUCTION:
+                    /* Make sure exception handling block is next */
+                    labelList[i].opCode =
+                        ARMV5TE_PSEUDO_PC_RECONSTRUCTION_BLOCK_LABEL;
+                    assert (i == cUnit->numBlocks - 2);
+                    handlePCReconstruction(cUnit, &labelList[i+1]);
+                    break;
+                case EXCEPTION_HANDLING:
+                    labelList[i].opCode = ARMV5TE_PSEUDO_EH_BLOCK_LABEL;
+                    if (cUnit->pcReconstructionList.numUsed) {
+                        newLIR3(cUnit, ARMV5TE_LDR_RRI5, r1, rGLUE,
+                            offsetof(InterpState,
+                                     jitToInterpEntries.dvmJitToInterpPunt)
+                            >> 2);
+                        newLIR1(cUnit, ARMV5TE_BLX_R, r1);
+                    }
+                    break;
+                default:
+                    break;
+            }
+            continue;
+        }
+        for (mir = blockList[i]->firstMIRInsn; mir; mir = mir->next) {
+            OpCode dalvikOpCode = mir->dalvikInsn.opCode;
+            InstructionFormat dalvikFormat =
+                dexGetInstrFormat(gDvm.instrFormat, dalvikOpCode);
+            newLIR2(cUnit, ARMV5TE_PSEUDO_DALVIK_BYTECODE_BOUNDARY,
+                    mir->offset,dalvikOpCode);
+            bool notHandled;
+            /*
+             * Debugging: screen the opcode first to see if it is in the
+             * do[-not]-compile list
+             */
+            bool singleStepMe =
+                gDvmJit.includeSelectedOp !=
+                ((gDvmJit.opList[dalvikOpCode >> 3] &
+                  (1 << (dalvikOpCode & 0x7))) !=
+                 0);
+            if (singleStepMe || cUnit->allSingleStep) {
+                notHandled = false;
+                genInterpSingleStep(cUnit, mir);
+            } else {
+                opcodeCoverage[dalvikOpCode]++;
+                switch (dalvikFormat) {
+                    case kFmt10t:
+                    case kFmt20t:
+                    case kFmt30t:
+                        notHandled = handleFmt10t_Fmt20t_Fmt30t(cUnit,
+                                  mir, blockList[i], labelList);
+                        break;
+                    case kFmt10x:
+                        notHandled = handleFmt10x(cUnit, mir);
+                        break;
+                    case kFmt11n:
+                    case kFmt31i:
+                        notHandled = handleFmt11n_Fmt31i(cUnit, mir);
+                        break;
+                    case kFmt11x:
+                        notHandled = handleFmt11x(cUnit, mir);
+                        break;
+                    case kFmt12x:
+                        notHandled = handleFmt12x(cUnit, mir);
+                        break;
+                    case kFmt20bc:
+                        notHandled = handleFmt20bc(cUnit, mir);
+                        break;
+                    case kFmt21c:
+                    case kFmt31c:
+                        notHandled = handleFmt21c_Fmt31c(cUnit, mir);
+                        break;
+                    case kFmt21h:
+                        notHandled = handleFmt21h(cUnit, mir);
+                        break;
+                    case kFmt21s:
+                        notHandled = handleFmt21s(cUnit, mir);
+                        break;
+                    case kFmt21t:
+                        notHandled = handleFmt21t(cUnit, mir, blockList[i],
+                                                  labelList);
+                        break;
+                    case kFmt22b:
+                    case kFmt22s:
+                        notHandled = handleFmt22b_Fmt22s(cUnit, mir);
+                        break;
+                    case kFmt22c:
+                        notHandled = handleFmt22c(cUnit, mir);
+                        break;
+                    case kFmt22cs:
+                        notHandled = handleFmt22cs(cUnit, mir);
+                        break;
+                    case kFmt22t:
+                        notHandled = handleFmt22t(cUnit, mir, blockList[i],
+                                                  labelList);
+                        break;
+                    case kFmt22x:
+                    case kFmt32x:
+                        notHandled = handleFmt22x_Fmt32x(cUnit, mir);
+                        break;
+                    case kFmt23x:
+                        notHandled = handleFmt23x(cUnit, mir);
+                        break;
+                    case kFmt31t:
+                        notHandled = handleFmt31t(cUnit, mir);
+                        break;
+                    case kFmt3rc:
+                    case kFmt35c:
+                        notHandled = handleFmt35c_3rc(cUnit, mir, blockList[i],
+                                                      labelList);
+                        break;
+                    case kFmt3rms:
+                    case kFmt35ms:
+                        notHandled = handleFmt35ms_3rms(cUnit, mir,blockList[i],
+                                                        labelList);
+                        break;
+                    case kFmt3inline:
+                        notHandled = handleFmt3inline(cUnit, mir);
+                        break;
+                    case kFmt51l:
+                        notHandled = handleFmt51l(cUnit, mir);
+                        break;
+                    default:
+                        notHandled = true;
+                        break;
+                }
+            }
+            if (notHandled) {
+                LOGE("%#06x: Opcode 0x%x (%s) / Fmt %d not handled\n",
+                     mir->offset,
+                     dalvikOpCode, getOpcodeName(dalvikOpCode),
+                     dalvikFormat);
+                dvmAbort();
+                break;
+            } else {
+              gDvmJit.opHistogram[dalvikOpCode]++;
+            }
+        }
+    }
+
+    /* Handle the codegen in predefined order */
+    for (i = 0; i < CHAINING_CELL_LAST; i++) {
+        size_t j;
+        int *blockIdList = (int *) chainingListByType[i].elemList;
+
+        cUnit->numChainingCells[i] = chainingListByType[i].numUsed;
+
+        /* No chaining cells of this type */
+        if (cUnit->numChainingCells[i] == 0)
+            continue;
+
+        /* Record the first LIR for a new type of chaining cell */
+        cUnit->firstChainingLIR[i] = (LIR *) &labelList[blockIdList[0]];
+
+        for (j = 0; j < chainingListByType[i].numUsed; j++) {
+            int blockId = blockIdList[j];
+
+            /* Align this chaining cell first */
+            newLIR0(cUnit, ARMV5TE_PSEUDO_ALIGN4);
+
+            /* Insert the pseudo chaining instruction */
+            dvmCompilerAppendLIR(cUnit, (LIR *) &labelList[blockId]);
+
+
+            switch (blockList[blockId]->blockType) {
+                case CHAINING_CELL_GENERIC:
+                    handleGenericChainingCell(cUnit,
+                      blockList[blockId]->startOffset);
+                    break;
+                case CHAINING_CELL_INVOKE:
+                    handleInvokeChainingCell(cUnit,
+                        blockList[blockId]->containingMethod);
+                    break;
+                case CHAINING_CELL_POST_INVOKE:
+                    handlePostInvokeChainingCell(cUnit,
+                        blockList[blockId]->startOffset);
+                    break;
+                default:
+                    dvmAbort();
+                    break;
+            }
+        }
+    }
+}
+
+/* Accept the work and start compiling */
+void *dvmCompilerDoWork(CompilerWorkOrder *work)
+{
+   void *res;
+
+   if (gDvmJit.codeCacheFull) {
+       return NULL;
+   }
+
+   switch (work->kind) {
+       case kWorkOrderMethod:
+           res = dvmCompileMethod(work->info);
+           break;
+       case kWorkOrderTrace:
+           res = dvmCompileTrace(work->info);
+           break;
+       default:
+           res = NULL;
+           dvmAbort();
+   }
+   return res;
+}
+
+/* Architecture-specific initializations and checks go here */
+bool dvmCompilerArchInit(void)
+{
+    /* First, declare dvmCompiler_TEMPLATE_XXX for each template */
+#define JIT_TEMPLATE(X) extern void dvmCompiler_TEMPLATE_##X();
+#include "../../template/armv5te/TemplateOpList.h"
+#undef JIT_TEMPLATE
+
+    int i = 0;
+    extern void dvmCompilerTemplateStart(void);
+
+    /*
+     * Then, populate the templateEntryOffsets array with the offsets from the
+     * the dvmCompilerTemplateStart symbol for each template.
+     */
+#define JIT_TEMPLATE(X) templateEntryOffsets[i++] = \
+    (intptr_t) dvmCompiler_TEMPLATE_##X - (intptr_t) dvmCompilerTemplateStart;
+#include "../../template/armv5te/TemplateOpList.h"
+#undef JIT_TEMPLATE
+
+    /* Codegen-specific assumptions */
+    assert(offsetof(ClassObject, vtable) < 128 &&
+           (offsetof(ClassObject, vtable) & 0x3) == 0);
+    assert(offsetof(ArrayObject, length) < 128 &&
+           (offsetof(ArrayObject, length) & 0x3) == 0);
+    assert(offsetof(ArrayObject, contents) < 256);
+
+    /* Up to 5 args are pushed on top of FP - sizeofStackSaveArea */
+    assert(sizeof(StackSaveArea) < 236);
+
+    /*
+     * EA is calculated by doing "Rn + imm5 << 2", and there are 5 entry points
+     * that codegen may access, make sure that the offset from the top of the
+     * struct is less than 108.
+     */
+    assert(offsetof(InterpState, jitToInterpEntries) < 108);
+    return true;
+}
+
+/* Architectural-specific debugging helpers go here */
+void dvmCompilerArchDump(void)
+{
+    /* Print compiled opcode in this VM instance */
+    int i, start, streak;
+    char buf[1024];
+
+    streak = i = 0;
+    buf[0] = 0;
+    while (opcodeCoverage[i] == 0 && i < 256) {
+        i++;
+    }
+    if (i == 256) {
+        return;
+    }
+    for (start = i++, streak = 1; i < 256; i++) {
+        if (opcodeCoverage[i]) {
+            streak++;
+        } else {
+            if (streak == 1) {
+                sprintf(buf+strlen(buf), "%x,", start);
+            } else {
+                sprintf(buf+strlen(buf), "%x-%x,", start, start + streak - 1);
+            }
+            streak = 0;
+            while (opcodeCoverage[i] == 0 && i < 256) {
+                i++;
+            }
+            if (i < 256) {
+                streak = 1;
+                start = i;
+            }
+        }
+    }
+    if (streak) {
+        if (streak == 1) {
+            sprintf(buf+strlen(buf), "%x", start);
+        } else {
+            sprintf(buf+strlen(buf), "%x-%x", start, start + streak - 1);
+        }
+    }
+    if (strlen(buf)) {
+        LOGD("dalvik.vm.jitop = %s", buf);
+    }
+}
diff --git a/vm/compiler/template/Makefile-template b/vm/compiler/template/Makefile-template
new file mode 100644 (file)
index 0000000..9203183
--- /dev/null
@@ -0,0 +1,49 @@
+# Copyright (C) 2008 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#
+# Makefile for the Dalvik modular interpreter.  This is not currently
+# integrated into the build system.
+#
+
+SHELL := /bin/sh
+
+# Build system has TARGET_ARCH=arm, but we need the exact architecture.
+# The base assumption for an ARM platform is ARMv5TE, but we may want to
+# support older ARMv4 devices, or use special features from ARMv6 or VFP.
+# The simulator build is "desktop".
+#
+# To generate sources for all targets:
+# for arch in desktop armv5te; do TARGET_ARCH_EXT=$arch make -f Makefile-mterp; done
+#
+#TARGET_ARCH_EXT := armv5te
+
+OUTPUT_DIR := out
+
+# Accumulate all possible dependencies for the generated files in a very
+# conservative fashion.  If it's not one of the generated files in "out",
+# assume it's a dependency.
+SOURCE_DEPS := \
+       $(shell find . -path ./$(OUTPUT_DIR) -prune -o -type f -print)
+
+# Source files generated by the script.  There's always one C and one
+# assembly file, though in practice one or the other could be empty.
+GEN_SOURCES := \
+       $(OUTPUT_DIR)/CompilerTemplateAsm-$(TARGET_ARCH_EXT).S
+
+target: $(GEN_SOURCES)
+
+$(GEN_SOURCES): $(SOURCE_DEPS)
+       @mkdir -p out
+       ./gen-template.py $(TARGET_ARCH_EXT) $(OUTPUT_DIR)
diff --git a/vm/compiler/template/README.txt b/vm/compiler/template/README.txt
new file mode 100644 (file)
index 0000000..fced412
--- /dev/null
@@ -0,0 +1 @@
+See README.txt under dalvik/vm/mterp for details.
diff --git a/vm/compiler/template/armv5te/TEMPLATE_CMPG_DOUBLE.S b/vm/compiler/template/armv5te/TEMPLATE_CMPG_DOUBLE.S
new file mode 100644 (file)
index 0000000..f18f6d3
--- /dev/null
@@ -0,0 +1 @@
+%include "armv5te/TEMPLATE_CMPL_DOUBLE.S" { "naninst":"mov     r0, #1" }
diff --git a/vm/compiler/template/armv5te/TEMPLATE_CMPG_FLOAT.S b/vm/compiler/template/armv5te/TEMPLATE_CMPG_FLOAT.S
new file mode 100644 (file)
index 0000000..02887e5
--- /dev/null
@@ -0,0 +1 @@
+%include "armv5te/TEMPLATE_CMPL_FLOAT.S" { "naninst":"mov     r0, #1" }
diff --git a/vm/compiler/template/armv5te/TEMPLATE_CMPL_DOUBLE.S b/vm/compiler/template/armv5te/TEMPLATE_CMPL_DOUBLE.S
new file mode 100644 (file)
index 0000000..dfafd2c
--- /dev/null
@@ -0,0 +1,39 @@
+%default { "naninst":"mvn     r0, #0" }
+    /*
+     * For the JIT: incoming arguments are pointers to the arguments in r0/r1
+     *              result in r0
+     *
+     * Compare two floating-point values.  Puts 0, 1, or -1 into the
+     * destination register based on the results of the comparison.
+     *
+     * Provide a "naninst" instruction that puts 1 or -1 into r1 depending
+     * on what value we'd like to return when one of the operands is NaN.
+     *
+     * See OP_CMPL_FLOAT for an explanation.
+     *
+     * For: cmpl-double, cmpg-double
+     */
+    /* op vAA, vBB, vCC */
+    mov     r4, lr                      @ save return address
+    mov     r9, r0                      @ save copy of &arg1
+    mov     r10, r1                     @ save copy of &arg2
+    ldmia   r9, {r0-r1}                 @ r0/r1<- vBB/vBB+1
+    ldmia   r10, {r2-r3}                @ r2/r3<- vCC/vCC+1
+    LDR_PC_LR ".L__aeabi_cdcmple"       @ PIC way of "bl __aeabi_cdcmple"
+    bhi     .L${opcode}_gt_or_nan       @ C set and Z clear, disambiguate
+    mvncc   r0, #0                      @ (less than) r1<- -1
+    moveq   r0, #0                      @ (equal) r1<- 0, trumps less than
+    bx      r4
+
+    @ Test for NaN with a second comparison.  EABI forbids testing bit
+    @ patterns, and we can't represent 0x7fc00000 in immediate form, so
+    @ make the library call.
+.L${opcode}_gt_or_nan:
+    ldmia   r10, {r0-r1}                @ reverse order
+    ldmia   r9, {r2-r3}
+    LDR_PC_LR ".L__aeabi_cdcmple"       @ r0<- Z set if eq, C clear if <
+    movcc   r0, #1                      @ (greater than) r1<- 1
+    bxcc    r4
+    $naninst                            @ r1<- 1 or -1 for NaN
+    bx      r4
+
diff --git a/vm/compiler/template/armv5te/TEMPLATE_CMPL_FLOAT.S b/vm/compiler/template/armv5te/TEMPLATE_CMPL_FLOAT.S
new file mode 100644 (file)
index 0000000..31d4cd8
--- /dev/null
@@ -0,0 +1,56 @@
+%default { "naninst":"mvn     r0, #0" }
+    /*
+     * For the JIT: incoming arguments in r0, r1
+     *              result in r0
+     *
+     * Compare two floating-point values.  Puts 0, 1, or -1 into the
+     * destination register based on the results of the comparison.
+     *
+     * Provide a "naninst" instruction that puts 1 or -1 into r1 depending
+     * on what value we'd like to return when one of the operands is NaN.
+     *
+     * The operation we're implementing is:
+     *   if (x == y)
+     *     return 0;
+     *   else if (x < y)
+     *     return -1;
+     *   else if (x > y)
+     *     return 1;
+     *   else
+     *     return {-1,1};  // one or both operands was NaN
+     *
+     * The straightforward implementation requires 3 calls to functions
+     * that return a result in r0.  We can do it with two calls if our
+     * EABI library supports __aeabi_cfcmple (only one if we want to check
+     * for NaN directly):
+     *   check x <= y
+     *     if <, return -1
+     *     if ==, return 0
+     *   check y <= x
+     *     if <, return 1
+     *   return {-1,1}
+     *
+     * for: cmpl-float, cmpg-float
+     */
+    /* op vAA, vBB, vCC */
+    mov     r4, lr                      @ save return address
+    mov     r9, r0                      @ Save copies - we may need to redo
+    mov     r10, r1
+    LDR_PC_LR ".L__aeabi_cfcmple"       @ cmp <=: C clear if <, Z set if eq
+    bhi     .L${opcode}_gt_or_nan       @ C set and Z clear, disambiguate
+    mvncc   r0, #0                      @ (less than) r0<- -1
+    moveq   r0, #0                      @ (equal) r0<- 0, trumps less than
+    bx      r4
+    @ Test for NaN with a second comparison.  EABI forbids testing bit
+    @ patterns, and we can't represent 0x7fc00000 in immediate form, so
+    @ make the library call.
+.L${opcode}_gt_or_nan:
+    mov     r1, r9                      @ reverse order
+    mov     r0, r10
+    LDR_PC_LR ".L__aeabi_cfcmple"       @ r0<- Z set if eq, C clear if <
+    movcc   r0, #1                      @ (greater than) r1<- 1
+    bxcc    r4
+    $naninst                            @ r1<- 1 or -1 for NaN
+    bx      r4
+
+
diff --git a/vm/compiler/template/armv5te/TEMPLATE_CMP_LONG.S b/vm/compiler/template/armv5te/TEMPLATE_CMP_LONG.S
new file mode 100644 (file)
index 0000000..5f1e16b
--- /dev/null
@@ -0,0 +1,34 @@
+    /*
+     * Compare two 64-bit values.  Puts 0, 1, or -1 into the destination
+     * register based on the results of the comparison.
+     *
+     * We load the full values with LDM, but in practice many values could
+     * be resolved by only looking at the high word.  This could be made
+     * faster or slower by splitting the LDM into a pair of LDRs.
+     *
+     * If we just wanted to set condition flags, we could do this:
+     *  subs    ip, r0, r2
+     *  sbcs    ip, r1, r3
+     *  subeqs  ip, r0, r2
+     * Leaving { <0, 0, >0 } in ip.  However, we have to set it to a specific
+     * integer value, which we can do with 2 conditional mov/mvn instructions
+     * (set 1, set -1; if they're equal we already have 0 in ip), giving
+     * us a constant 5-cycle path plus a branch at the end to the
+     * instruction epilogue code.  The multi-compare approach below needs
+     * 2 or 3 cycles + branch if the high word doesn't match, 6 + branch
+     * in the worst case (the 64-bit values are equal).
+     */
+    /* cmp-long vAA, vBB, vCC */
+    cmp     r1, r3                      @ compare (vBB+1, vCC+1)
+    blt     .L${opcode}_less            @ signed compare on high part
+    bgt     .L${opcode}_greater
+    subs    r0, r0, r2                  @ r0<- r0 - r2
+    bxeq     lr
+    bhi     .L${opcode}_greater         @ unsigned compare on low part
+.L${opcode}_less:
+    mvn     r0, #0                      @ r0<- -1
+    bx      lr
+.L${opcode}_greater:
+    mov     r0, #1                      @ r0<- 1
+    bx      lr
+
diff --git a/vm/compiler/template/armv5te/TEMPLATE_INVOKE_METHOD_CHAIN.S b/vm/compiler/template/armv5te/TEMPLATE_INVOKE_METHOD_CHAIN.S
new file mode 100644 (file)
index 0000000..6994f26
--- /dev/null
@@ -0,0 +1,54 @@
+    /*
+     * For monomorphic callsite, setup the Dalvik frame and return to the
+     * Thumb code through the link register to transfer control to the callee
+     * method through a dedicated chaining cell.
+     */
+    @ r0 = methodToCall, r1 = returnCell, rPC = dalvikCallsite
+    ldrh    r7, [r0, #offMethod_registersSize]  @ r7<- methodToCall->regsSize
+    ldrh    r2, [r0, #offMethod_outsSize]  @ r2<- methodToCall->outsSize
+    ldr     r9, [rGLUE, #offGlue_interpStackEnd]    @ r9<- interpStackEnd
+    ldr     r8, [rGLUE, #offGlue_pSelfSuspendCount] @ r8<- &suspendCount
+    add     r3, r1, #1  @ Thumb addr is odd
+    SAVEAREA_FROM_FP(r1, rFP)           @ r1<- stack save area
+    sub     r1, r1, r7, lsl #2          @ r1<- newFp (old savearea - regsSize)
+    SAVEAREA_FROM_FP(r10, r1)           @ r10<- stack save area
+    add     r12, lr, #2                 @ setup the punt-to-interp address
+    sub     r10, r10, r2, lsl #2        @ r10<- bottom (newsave - outsSize)
+    ldr     r8, [r8]                    @ r3<- suspendCount (int)
+    cmp     r10, r9                     @ bottom < interpStackEnd?
+    bxlt    r12                         @ return to raise stack overflow excep.
+    @ r1 = newFP, r0 = methodToCall, r3 = returnCell, rPC = dalvikCallsite
+    ldr     r9, [r0, #offMethod_clazz]      @ r9<- method->clazz
+    ldr     r10, [r0, #offMethod_accessFlags] @ r10<- methodToCall->accessFlags
+    str     rPC, [rFP, #(offStackSaveArea_currentPc - sizeofStackSaveArea)]
+    str     rPC, [r1, #(offStackSaveArea_savedPc - sizeofStackSaveArea)]
+    ldr     rPC, [r0, #offMethod_insns]     @ rPC<- methodToCall->insns
+
+
+    @ set up newSaveArea
+    str     rFP, [r1, #(offStackSaveArea_prevFrame - sizeofStackSaveArea)]
+    str     r3, [r1, #(offStackSaveArea_returnAddr - sizeofStackSaveArea)]
+    str     r0, [r1, #(offStackSaveArea_method - sizeofStackSaveArea)]
+    cmp     r8, #0                      @ suspendCount != 0
+    bxne    r12                         @ bail to the interpreter
+    tst     r10, #ACC_NATIVE
+    bne     .LinvokeNative
+    /*
+     * If we want to punt to the interpreter for native call, swap the bne with
+     * the following
+     * bxne    r12
+     */
+
+
+    ldr     r3, [r9, #offClassObject_pDvmDex] @ r3<- method->clazz->pDvmDex
+    ldr     r2, [rGLUE, #offGlue_self]      @ r2<- glue->self
+
+    @ Update "glue" values for the new method
+    str     r0, [rGLUE, #offGlue_method]    @ glue->method = methodToCall
+    str     r3, [rGLUE, #offGlue_methodClassDex] @ glue->methodClassDex = ...
+    mov     rFP, r1                         @ fp = newFp
+    str     rFP, [r2, #offThread_curFrame]  @ self->curFrame = newFp
+
+    bx      lr                              @ return to the callee-chaining cell
+
+
diff --git a/vm/compiler/template/armv5te/TEMPLATE_INVOKE_METHOD_NO_OPT.S b/vm/compiler/template/armv5te/TEMPLATE_INVOKE_METHOD_NO_OPT.S
new file mode 100644 (file)
index 0000000..003459d
--- /dev/null
@@ -0,0 +1,53 @@
+    /*
+     * For polymorphic callsites - setup the Dalvik frame and load Dalvik PC
+     * into rPC then jump to dvmJitToInterpNoChain to dispatch the
+     * runtime-resolved callee.
+     */
+    @ r0 = methodToCall, r1 = returnCell, rPC = dalvikCallsite
+    ldrh    r7, [r0, #offMethod_registersSize]  @ r7<- methodToCall->regsSize
+    ldrh    r2, [r0, #offMethod_outsSize]  @ r2<- methodToCall->outsSize
+    ldr     r9, [rGLUE, #offGlue_interpStackEnd]    @ r9<- interpStackEnd
+    ldr     r8, [rGLUE, #offGlue_pSelfSuspendCount] @ r8<- &suspendCount
+    add     r3, r1, #1  @ Thumb addr is odd
+    SAVEAREA_FROM_FP(r1, rFP)           @ r1<- stack save area
+    sub     r1, r1, r7, lsl #2          @ r1<- newFp (old savearea - regsSize)
+    SAVEAREA_FROM_FP(r10, r1)           @ r10<- stack save area
+    sub     r10, r10, r2, lsl #2        @ r10<- bottom (newsave - outsSize)
+    ldr     r8, [r8]                    @ r3<- suspendCount (int)
+    cmp     r10, r9                     @ bottom < interpStackEnd?
+    bxlt    lr                          @ return to raise stack overflow excep.
+    @ r1 = newFP, r0 = methodToCall, r3 = returnCell, rPC = dalvikCallsite
+    ldr     r9, [r0, #offMethod_clazz]      @ r9<- method->clazz
+    ldr     r10, [r0, #offMethod_accessFlags] @ r10<- methodToCall->accessFlags
+    str     rPC, [rFP, #(offStackSaveArea_currentPc - sizeofStackSaveArea)]
+    str     rPC, [r1, #(offStackSaveArea_savedPc - sizeofStackSaveArea)]
+    ldr     rPC, [r0, #offMethod_insns]     @ rPC<- methodToCall->insns
+
+
+    @ set up newSaveArea
+    str     rFP, [r1, #(offStackSaveArea_prevFrame - sizeofStackSaveArea)]
+    str     r3, [r1, #(offStackSaveArea_returnAddr - sizeofStackSaveArea)]
+    str     r0, [r1, #(offStackSaveArea_method - sizeofStackSaveArea)]
+    cmp     r8, #0                      @ suspendCount != 0
+    bxne    lr                          @ bail to the interpreter
+    tst     r10, #ACC_NATIVE
+    bne     .LinvokeNative
+    /*
+     * If we want to punt to the interpreter for native call, swap the bne with
+     * the following
+     * bxne    lr
+     */
+
+
+    ldr     r10, .LdvmJitToInterpNoChain
+    ldr     r3, [r9, #offClassObject_pDvmDex] @ r3<- method->clazz->pDvmDex
+    ldr     r2, [rGLUE, #offGlue_self]      @ r2<- glue->self
+
+    @ Update "glue" values for the new method
+    str     r0, [rGLUE, #offGlue_method]    @ glue->method = methodToCall
+    str     r3, [rGLUE, #offGlue_methodClassDex] @ glue->methodClassDex = ...
+    mov     rFP, r1                         @ fp = newFp
+    str     rFP, [r2, #offThread_curFrame]  @ self->curFrame = newFp
+
+    @ Start executing the callee
+    mov     pc, r10                         @ dvmJitToInterpNoChain
diff --git a/vm/compiler/template/armv5te/TEMPLATE_MUL_LONG.S b/vm/compiler/template/armv5te/TEMPLATE_MUL_LONG.S
new file mode 100644 (file)
index 0000000..8a9b115
--- /dev/null
@@ -0,0 +1,28 @@
+    /*
+     * Signed 64-bit integer multiply.
+     *
+     * For JIT: op1 in r0/r1, op2 in r2/r3, return in r0/r1
+     *
+     * Consider WXxYZ (r1r0 x r3r2) with a long multiply:
+     *        WX
+     *      x YZ
+     *  --------
+     *     ZW ZX
+     *  YW YX
+     *
+     * The low word of the result holds ZX, the high word holds
+     * (ZW+YX) + (the high overflow from ZX).  YW doesn't matter because
+     * it doesn't fit in the low 64 bits.
+     *
+     * Unlike most ARM math operations, multiply instructions have
+     * restrictions on using the same register more than once (Rd and Rm
+     * cannot be the same).
+     */
+    /* mul-long vAA, vBB, vCC */
+    mul     ip, r2, r1                  @  ip<- ZxW
+    umull   r9, r10, r2, r0             @  r9/r10 <- ZxX
+    mla     r2, r0, r3, ip              @  r2<- YxX + (ZxW)
+    add     r10, r2, r10                @  r10<- r10 + low(ZxW + (YxX))
+    mov     r0,r9
+    mov     r1,r10
+    bx      lr
diff --git a/vm/compiler/template/armv5te/TEMPLATE_RETURN.S b/vm/compiler/template/armv5te/TEMPLATE_RETURN.S
new file mode 100644 (file)
index 0000000..f0a4623
--- /dev/null
@@ -0,0 +1,38 @@
+    /*
+     * Unwind a frame from the Dalvik stack for compiled OP_RETURN_XXX.
+     * If the stored value in returnAddr
+     * is non-zero, the caller is compiled by the JIT thus return to the
+     * address in the code cache following the invoke instruction. Otherwise
+     * return to the special dvmJitToInterpNoChain entry point.
+     */
+    SAVEAREA_FROM_FP(r0, rFP)           @ r0<- saveArea (old)
+    ldr     r10, [r0, #offStackSaveArea_prevFrame] @ r10<- saveArea->prevFrame
+    ldr     r8, [rGLUE, #offGlue_pSelfSuspendCount] @ r8<- &suspendCount
+    ldr     rPC, [r0, #offStackSaveArea_savedPc] @ rPC<- saveArea->savedPc
+    ldr     r9,  [r0, #offStackSaveArea_returnAddr] @ r9<- chaining cell ret
+    ldr     r2, [r10, #(offStackSaveArea_method - sizeofStackSaveArea)]
+                                        @ r2<- method we're returning to
+    ldr     r3, [rGLUE, #offGlue_self]  @ r3<- glue->self
+    cmp     r2, #0                      @ break frame?
+    beq     1f                          @ bail to interpreter
+    ldr     r0, .LdvmJitToInterpNoChain @ defined in footer.S
+    mov     rFP, r10                    @ publish new FP
+    ldrne   r10, [r2, #offMethod_clazz] @ r10<- method->clazz
+    ldr     r8, [r8]                    @ r8<- suspendCount
+
+    str     r2, [rGLUE, #offGlue_method]@ glue->method = newSave->method
+    ldr     r1, [r10, #offClassObject_pDvmDex] @ r1<- method->clazz->pDvmDex
+    str     rFP, [r3, #offThread_curFrame] @ self->curFrame = fp
+    add     rPC, rPC, #6                @ publish new rPC (advance 6 bytes)
+    str     r1, [rGLUE, #offGlue_methodClassDex]
+    cmp     r8, #0                      @ check the suspendCount
+    movne   r9, #0                      @ clear the chaining cell address
+    cmp     r9, #0                      @ chaining cell exists?
+    blxne   r9                          @ jump to the chaining cell
+    mov     pc, r0                      @ callsite is interpreted
+1:
+    stmia   rGLUE, {rPC, rFP}           @ SAVE_PC_FP_TO_GLUE()
+    ldr     r2, .LdvmMterpStdBail       @ defined in footer.S
+    mov     r1, #0                      @ changeInterp = false
+    mov     r0, rGLUE                   @ Expecting rGLUE in r0
+    blx     r2                          @ exit the interpreter
diff --git a/vm/compiler/template/armv5te/TEMPLATE_SHL_LONG.S b/vm/compiler/template/armv5te/TEMPLATE_SHL_LONG.S
new file mode 100644 (file)
index 0000000..532f8a4
--- /dev/null
@@ -0,0 +1,15 @@
+    /*
+     * Long integer shift.  This is different from the generic 32/64-bit
+     * binary operations because vAA/vBB are 64-bit but vCC (the shift
+     * distance) is 32-bit.  Also, Dalvik requires us to ignore all but the low
+     * 6 bits.
+     */
+    /* shl-long vAA, vBB, vCC */
+    and     r2, r2, #63                 @ r2<- r2 & 0x3f
+    mov     r1, r1, asl r2              @  r1<- r1 << r2
+    rsb     r3, r2, #32                 @  r3<- 32 - r2
+    orr     r1, r1, r0, lsr r3          @  r1<- r1 | (r0 << (32-r2))
+    subs    ip, r2, #32                 @  ip<- r2 - 32
+    movpl   r1, r0, asl ip              @  if r2 >= 32, r1<- r0 << (r2-32)
+    mov     r0, r0, asl r2              @  r0<- r0 << r2
+    bx      lr
diff --git a/vm/compiler/template/armv5te/TEMPLATE_SHR_LONG.S b/vm/compiler/template/armv5te/TEMPLATE_SHR_LONG.S
new file mode 100644 (file)
index 0000000..ca7545a
--- /dev/null
@@ -0,0 +1,16 @@
+    /*
+     * Long integer shift.  This is different from the generic 32/64-bit
+     * binary operations because vAA/vBB are 64-bit but vCC (the shift
+     * distance) is 32-bit.  Also, Dalvik requires us to ignore all but the low
+     * 6 bits.
+     */
+    /* shr-long vAA, vBB, vCC */
+    and     r2, r2, #63                 @ r0<- r0 & 0x3f
+    mov     r0, r0, lsr r2              @  r0<- r2 >> r2
+    rsb     r3, r2, #32                 @  r3<- 32 - r2
+    orr     r0, r0, r1, asl r3          @  r0<- r0 | (r1 << (32-r2))
+    subs    ip, r2, #32                 @  ip<- r2 - 32
+    movpl   r0, r1, asr ip              @  if r2 >= 32, r0<-r1 >> (r2-32)
+    mov     r1, r1, asr r2              @  r1<- r1 >> r2
+    bx      lr
+
diff --git a/vm/compiler/template/armv5te/TEMPLATE_USHR_LONG.S b/vm/compiler/template/armv5te/TEMPLATE_USHR_LONG.S
new file mode 100644 (file)
index 0000000..d7c71d9
--- /dev/null
@@ -0,0 +1,16 @@
+    /*
+     * Long integer shift.  This is different from the generic 32/64-bit
+     * binary operations because vAA/vBB are 64-bit but vCC (the shift
+     * distance) is 32-bit.  Also, Dalvik requires us to ignore all but the low
+     * 6 bits.
+     */
+    /* ushr-long vAA, vBB, vCC */
+    and     r2, r2, #63                 @ r0<- r0 & 0x3f
+    mov     r0, r0, lsr r2              @  r0<- r2 >> r2
+    rsb     r3, r2, #32                 @  r3<- 32 - r2
+    orr     r0, r0, r1, asl r3          @  r0<- r0 | (r1 << (32-r2))
+    subs    ip, r2, #32                 @  ip<- r2 - 32
+    movpl   r0, r1, lsr ip              @  if r2 >= 32, r0<-r1 >>> (r2-32)
+    mov     r1, r1, lsr r2              @  r1<- r1 >>> r2
+    bx      lr
+
diff --git a/vm/compiler/template/armv5te/TemplateOpList.h b/vm/compiler/template/armv5te/TemplateOpList.h
new file mode 100644 (file)
index 0000000..6428ccf
--- /dev/null
@@ -0,0 +1,35 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Dalvik opcode list that uses additional templates to complete JIT execution.
+ */
+#ifndef JIT_TEMPLATE
+#define JIT_TEMPLATE(X)
+#endif
+
+JIT_TEMPLATE(CMP_LONG)
+JIT_TEMPLATE(RETURN)
+JIT_TEMPLATE(INVOKE_METHOD_NO_OPT)
+JIT_TEMPLATE(INVOKE_METHOD_CHAIN)
+JIT_TEMPLATE(CMPG_DOUBLE)
+JIT_TEMPLATE(CMPL_DOUBLE)
+JIT_TEMPLATE(CMPG_FLOAT)
+JIT_TEMPLATE(CMPL_FLOAT)
+JIT_TEMPLATE(MUL_LONG)
+JIT_TEMPLATE(SHL_LONG)
+JIT_TEMPLATE(SHR_LONG)
+JIT_TEMPLATE(USHR_LONG)
diff --git a/vm/compiler/template/armv5te/footer.S b/vm/compiler/template/armv5te/footer.S
new file mode 100644 (file)
index 0000000..e961e29
--- /dev/null
@@ -0,0 +1,58 @@
+/*
+ * ===========================================================================
+ *  Common subroutines and data
+ * ===========================================================================
+ */
+
+    .text
+    .align  2
+.LinvokeNative:
+    @ Prep for the native call
+    @ r1 = newFP, r0 = methodToCall
+    ldr     r3, [rGLUE, #offGlue_self]      @ r3<- glue->self
+    ldr     r9, [r3, #offThread_jniLocal_nextEntry] @ r9<- thread->refNext
+    str     r1, [r3, #offThread_curFrame]   @ self->curFrame = newFp
+    str     r9, [r1, #(offStackSaveArea_localRefTop - sizeofStackSaveArea)]
+                                        @ newFp->localRefTop=refNext
+    mov     r9, r3                      @ r9<- glue->self (preserve)
+    SAVEAREA_FROM_FP(r10, r1)           @ r10<- new stack save area
+
+    mov     r2, r0                      @ r2<- methodToCall
+    mov     r0, r1                      @ r0<- newFP
+    add     r1, rGLUE, #offGlue_retval  @ r1<- &retval
+
+    LDR_PC_LR "[r2, #offMethod_nativeFunc]"
+
+    @ native return; r9=self, r10=newSaveArea
+    @ equivalent to dvmPopJniLocals
+    ldr     r2, [r10, #offStackSaveArea_returnAddr] @ r2 = chaining cell ret
+    ldr     r0, [r10, #offStackSaveArea_localRefTop] @ r0<- newSave->localRefTop
+    ldr     r1, [r9, #offThread_exception] @ check for exception
+    str     rFP, [r9, #offThread_curFrame]  @ self->curFrame = fp
+    cmp     r1, #0                      @ null?
+    str     r0, [r9, #offThread_jniLocal_nextEntry] @ self->refNext<- r0
+    bne     .LhandleException             @ no, handle exception
+    bx      r2
+
+/* FIXME - untested */
+.LhandleException:
+    ldr     rIBASE, .LdvmAsmInstructionStart
+    ldr     rPC, [r10, #offStackSaveArea_savedPc] @ reload rPC
+    b       dvmMterpCommonExceptionThrown
+
+    .align  2
+.LdvmAsmInstructionStart:
+    .word   dvmAsmInstructionStart
+.LdvmJitToInterpNoChain:
+    .word   dvmJitToInterpNoChain
+.LdvmMterpStdBail:
+    .word   dvmMterpStdBail
+.L__aeabi_cdcmple:
+    .word   __aeabi_cdcmple
+.L__aeabi_cfcmple:
+    .word   __aeabi_cfcmple
+
+    .global dmvCompilerTemplateEnd
+dmvCompilerTemplateEnd:
+
+#endif /* WITH_JIT */
diff --git a/vm/compiler/template/armv5te/header.S b/vm/compiler/template/armv5te/header.S
new file mode 100644 (file)
index 0000000..65daf8d
--- /dev/null
@@ -0,0 +1,93 @@
+/*
+ * Copyright (C) 2008 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#if defined(WITH_JIT)
+
+/*
+ * ARMv5 definitions and declarations.
+ */
+
+/*
+ARM EABI general notes:
+
+r0-r3 hold first 4 args to a method; they are not preserved across method calls
+r4-r8 are available for general use
+r9 is given special treatment in some situations, but not for us
+r10 (sl) seems to be generally available
+r11 (fp) is used by gcc (unless -fomit-frame-pointer is set)
+r12 (ip) is scratch -- not preserved across method calls
+r13 (sp) should be managed carefully in case a signal arrives
+r14 (lr) must be preserved
+r15 (pc) can be tinkered with directly
+
+r0 holds returns of <= 4 bytes
+r0-r1 hold returns of 8 bytes, low word in r0
+
+Callee must save/restore r4+ (except r12) if it modifies them.
+
+Stack is "full descending".  Only the arguments that don't fit in the first 4
+registers are placed on the stack.  "sp" points at the first stacked argument
+(i.e. the 5th arg).
+
+VFP: single-precision results in s0, double-precision results in d0.
+
+In the EABI, "sp" must be 64-bit aligned on entry to a function, and any
+64-bit quantities (long long, double) must be 64-bit aligned.
+*/
+
+/*
+JIT and ARM notes:
+
+The following registers have fixed assignments:
+
+  reg nick      purpose
+  r5  rFP       interpreted frame pointer, used for accessing locals and args
+  r6  rGLUE     MterpGlue pointer
+
+The following registers have fixed assignments in mterp but are scratch
+registers in compiled code
+
+  reg nick      purpose
+  r4  rPC       interpreted program counter, used for fetching instructions
+  r7  rIBASE    interpreted instruction base pointer, used for computed goto
+  r8  rINST     first 16-bit code unit of current instruction
+
+Macros are provided for common operations.  Each macro MUST emit only
+one instruction to make instruction-counting easier.  They MUST NOT alter
+unspecified registers or condition codes.
+*/
+
+/* single-purpose registers, given names for clarity */
+#define rPC     r4
+#define rFP     r5
+#define rGLUE   r6
+#define rIBASE  r7
+#define rINST   r8
+
+/*
+ * Given a frame pointer, find the stack save area.
+ *
+ * In C this is "((StackSaveArea*)(_fp) -1)".
+ */
+#define SAVEAREA_FROM_FP(_reg, _fpreg) \
+    sub     _reg, _fpreg, #sizeofStackSaveArea
+
+/*
+ * This is a #include, not a %include, because we want the C pre-processor
+ * to expand the macros into assembler assignment statements.
+ */
+#include "../../../mterp/common/asm-constants.h"
+
diff --git a/vm/compiler/template/armv5te/platform.S b/vm/compiler/template/armv5te/platform.S
new file mode 100644 (file)
index 0000000..b960a93
--- /dev/null
@@ -0,0 +1,16 @@
+/*
+ * ===========================================================================
+ *  CPU-version-specific defines
+ * ===========================================================================
+ */
+
+/*
+ * Macro for "MOV LR,PC / LDR PC,xxx", which is not allowed pre-ARMv5.
+ * Jump to subroutine.
+ *
+ * May modify IP and LR.
+ */
+.macro  LDR_PC_LR source
+    mov     lr, pc
+    ldr     pc, \source
+.endm
diff --git a/vm/compiler/template/config-armv5te b/vm/compiler/template/config-armv5te
new file mode 100644 (file)
index 0000000..668df1b
--- /dev/null
@@ -0,0 +1,45 @@
+# Copyright (C) 2009 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#
+# Configuration for ARMv5TE architecture targets.
+#
+
+# file header and basic definitions
+#import c/header.c
+import armv5te/header.S
+
+# C pre-processor defines for stub C instructions
+#import cstubs/stubdefs.c
+
+# highly-platform-specific defs
+import armv5te/platform.S
+
+# common defs for the C helpers; include this before the instruction handlers
+#import c/opcommon.c
+
+# opcode list; argument to op-start is default directory
+op-start armv5te
+
+op-end
+
+# "helper" code for C; include if you use any of the C stubs (this generates
+# object code, so it's normally excluded)
+##import c/gotoTargets.c
+
+# end of defs; include this when cstubs/stubdefs.c is included
+#import cstubs/enddefs.c
+
+# common subroutines for asm
+import armv5te/footer.S
diff --git a/vm/compiler/template/gen-template.py b/vm/compiler/template/gen-template.py
new file mode 100755 (executable)
index 0000000..8a1ba0c
--- /dev/null
@@ -0,0 +1,422 @@
+#!/usr/bin/env python
+#
+# Copyright (C) 2007 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#
+# Using instructions from an architecture-specific config file, generate C
+# and assembly source files for the Dalvik JIT.
+#
+
+import sys, string, re, time
+from string import Template
+
+interp_defs_file = "TemplateOpList.h" # need opcode list
+
+handler_size_bits = -1000
+handler_size_bytes = -1000
+in_op_start = 0             # 0=not started, 1=started, 2=ended
+default_op_dir = None
+opcode_locations = {}
+asm_stub_text = []
+label_prefix = ".L"         # use ".L" to hide labels from gdb
+
+
+# Exception class.
+class DataParseError(SyntaxError):
+    "Failure when parsing data file"
+
+#
+# Set any omnipresent substitution values.
+#
+def getGlobalSubDict():
+    return { "handler_size_bits":handler_size_bits,
+             "handler_size_bytes":handler_size_bytes }
+
+#
+# Parse arch config file --
+# Set handler_size_bytes to the value of tokens[1], and handler_size_bits to
+# log2(handler_size_bytes).  Throws an exception if "bytes" is not a power
+# of two.
+#
+def setHandlerSize(tokens):
+    global handler_size_bits, handler_size_bytes
+    if len(tokens) != 2:
+        raise DataParseError("handler-size requires one argument")
+    if handler_size_bits != -1000:
+        raise DataParseError("handler-size may only be set once")
+
+    # compute log2(n), and make sure n is a power of 2
+    handler_size_bytes = bytes = int(tokens[1])
+    bits = -1
+    while bytes > 0:
+        bytes //= 2     # halve with truncating division
+        bits += 1
+
+    if handler_size_bytes == 0 or handler_size_bytes != (1 << bits):
+        raise DataParseError("handler-size (%d) must be power of 2 and > 0" \
+                % orig_bytes)
+    handler_size_bits = bits
+
+#
+# Parse arch config file --
+# Copy a file in to the C or asm output file.
+#
+def importFile(tokens):
+    if len(tokens) != 2:
+        raise DataParseError("import requires one argument")
+    source = tokens[1]
+    if source.endswith(".S"):
+        appendSourceFile(tokens[1], getGlobalSubDict(), asm_fp, None)
+    else:
+        raise DataParseError("don't know how to import %s (expecting .c/.S)"
+                % source)
+
+#
+# Parse arch config file --
+# Copy a file in to the C or asm output file.
+#
+def setAsmStub(tokens):
+    global asm_stub_text
+    if len(tokens) != 2:
+        raise DataParseError("import requires one argument")
+    try:
+        stub_fp = open(tokens[1])
+        asm_stub_text = stub_fp.readlines()
+    except IOError, err:
+        stub_fp.close()
+        raise DataParseError("unable to load asm-stub: %s" % str(err))
+    stub_fp.close()
+
+#
+# Parse arch config file --
+# Start of opcode list.
+#
+def opStart(tokens):
+    global in_op_start
+    global default_op_dir
+    if len(tokens) != 2:
+        raise DataParseError("opStart takes a directory name argument")
+    if in_op_start != 0:
+        raise DataParseError("opStart can only be specified once")
+    default_op_dir = tokens[1]
+    in_op_start = 1
+
+#
+# Parse arch config file --
+# Set location of a single opcode's source file.
+#
+def opEntry(tokens):
+    #global opcode_locations
+    if len(tokens) != 3:
+        raise DataParseError("op requires exactly two arguments")
+    if in_op_start != 1:
+        raise DataParseError("op statements must be between opStart/opEnd")
+    try:
+        index = opcodes.index(tokens[1])
+    except ValueError:
+        raise DataParseError("unknown opcode %s" % tokens[1])
+    opcode_locations[tokens[1]] = tokens[2]
+
+#
+# Parse arch config file --
+# End of opcode list; emit instruction blocks.
+#
+def opEnd(tokens):
+    global in_op_start
+    if len(tokens) != 1:
+        raise DataParseError("opEnd takes no arguments")
+    if in_op_start != 1:
+        raise DataParseError("opEnd must follow opStart, and only appear once")
+    in_op_start = 2
+
+    loadAndEmitOpcodes()
+
+
+#
+# Extract an ordered list of instructions from the VM sources.  We use the
+# "goto table" definition macro, which has exactly 256 entries.
+#
+def getOpcodeList():
+    opcodes = []
+    opcode_fp = open("%s/%s" % (target_arch, interp_defs_file))
+    opcode_re = re.compile(r"^JIT_TEMPLATE\((\w+)\)", re.DOTALL)
+    for line in opcode_fp:
+        match = opcode_re.match(line)
+        if not match:
+            continue
+        opcodes.append("TEMPLATE_" + match.group(1))
+    opcode_fp.close()
+
+    return opcodes
+
+
+#
+# Load and emit opcodes for all 256 instructions.
+#
+def loadAndEmitOpcodes():
+    sister_list = []
+
+    # point dvmAsmInstructionStart at the first handler or stub
+    asm_fp.write("\n    .global dvmCompilerTemplateStart\n")
+    asm_fp.write("    .type   dvmCompilerTemplateStart, %function\n")
+    asm_fp.write("    .text\n\n")
+    asm_fp.write("dvmCompilerTemplateStart:\n\n")
+
+    for i in xrange(len(opcodes)):
+        op = opcodes[i]
+
+        if opcode_locations.has_key(op):
+            location = opcode_locations[op]
+        else:
+            location = default_op_dir
+
+        loadAndEmitAsm(location, i, sister_list)
+
+    # Use variable sized handlers now
+    # asm_fp.write("\n    .balign %d\n" % handler_size_bytes)
+    asm_fp.write("    .size   dvmCompilerTemplateStart, .-dvmCompilerTemplateStart\n")
+
+#
+# Load an assembly fragment and emit it.
+#
+def loadAndEmitAsm(location, opindex, sister_list):
+    op = opcodes[opindex]
+    source = "%s/%s.S" % (location, op)
+    dict = getGlobalSubDict()
+    dict.update({ "opcode":op, "opnum":opindex })
+    print " emit %s --> asm" % source
+
+    emitAsmHeader(asm_fp, dict)
+    appendSourceFile(source, dict, asm_fp, sister_list)
+
+#
+# Output the alignment directive and label for an assembly piece.
+#
+def emitAsmHeader(outfp, dict):
+    outfp.write("/* ------------------------------ */\n")
+    # The alignment directive ensures that the handler occupies
+    # at least the correct amount of space.  We don't try to deal
+    # with overflow here.
+    outfp.write("    .balign 4\n")
+    # Emit a label so that gdb will say the right thing.  We prepend an
+    # underscore so the symbol name doesn't clash with the OpCode enum.
+    template_name = "dvmCompiler_%(opcode)s" % dict
+    outfp.write("    .global %s\n" % template_name);
+    outfp.write("%s:\n" % template_name);
+
+#
+# Output a generic instruction stub that updates the "glue" struct and
+# calls the C implementation.
+#
+def emitAsmStub(outfp, dict):
+    emitAsmHeader(outfp, dict)
+    for line in asm_stub_text:
+        templ = Template(line)
+        outfp.write(templ.substitute(dict))
+
+#
+# Append the file specified by "source" to the open "outfp".  Each line will
+# be template-replaced using the substitution dictionary "dict".
+#
+# If the first line of the file starts with "%" it is taken as a directive.
+# A "%include" line contains a filename and, optionally, a Python-style
+# dictionary declaration with substitution strings.  (This is implemented
+# with recursion.)
+#
+# If "sister_list" is provided, and we find a line that contains only "&",
+# all subsequent lines from the file will be appended to sister_list instead
+# of copied to the output.
+#
+# This may modify "dict".
+#
+def appendSourceFile(source, dict, outfp, sister_list):
+    outfp.write("/* File: %s */\n" % source)
+    infp = open(source, "r")
+    in_sister = False
+    for line in infp:
+        if line.startswith("%include"):
+            # Parse the "include" line
+            tokens = line.strip().split(' ', 2)
+            if len(tokens) < 2:
+                raise DataParseError("malformed %%include in %s" % source)
+
+            alt_source = tokens[1].strip("\"")
+            if alt_source == source:
+                raise DataParseError("self-referential %%include in %s"
+                        % source)
+
+            new_dict = dict.copy()
+            if len(tokens) == 3:
+                new_dict.update(eval(tokens[2]))
+            #print " including src=%s dict=%s" % (alt_source, new_dict)
+            appendSourceFile(alt_source, new_dict, outfp, sister_list)
+            continue
+
+        elif line.startswith("%default"):
+            # copy keywords into dictionary
+            tokens = line.strip().split(' ', 1)
+            if len(tokens) < 2:
+                raise DataParseError("malformed %%default in %s" % source)
+            defaultValues = eval(tokens[1])
+            for entry in defaultValues:
+                dict.setdefault(entry, defaultValues[entry])
+            continue
+
+        elif line.startswith("%verify"):
+            # more to come, someday
+            continue
+
+        elif line.startswith("%break") and sister_list != None:
+            # allow more than one %break, ignoring all following the first
+            if not in_sister:
+                in_sister = True
+                sister_list.append("\n/* continuation for %(opcode)s */\n"%dict)
+            continue
+
+        # perform keyword substitution if a dictionary was provided
+        if dict != None:
+            templ = Template(line)
+            try:
+                subline = templ.substitute(dict)
+            except KeyError, err:
+                raise DataParseError("keyword substitution failed in %s: %s"
+                        % (source, str(err)))
+            except:
+                print "ERROR: substitution failed: " + line
+                raise
+        else:
+            subline = line
+
+        # write output to appropriate file
+        if in_sister:
+            sister_list.append(subline)
+        else:
+            outfp.write(subline)
+    outfp.write("\n")
+    infp.close()
+
+#
+# Emit a C-style section header comment.
+#
+def emitSectionComment(str, fp):
+    equals = "========================================" \
+             "==================================="
+
+    fp.write("\n/*\n * %s\n *  %s\n * %s\n */\n" %
+        (equals, str, equals))
+
+
+#
+# ===========================================================================
+# "main" code
+#
+
+#
+# Check args.
+#
+if len(sys.argv) != 3:
+    print "Usage: %s target-arch output-dir" % sys.argv[0]
+    sys.exit(2)
+
+target_arch = sys.argv[1]
+output_dir = sys.argv[2]
+
+#
+# Extract opcode list.
+#
+opcodes = getOpcodeList()
+#for op in opcodes:
+#    print "  %s" % op
+
+#
+# Open config file.
+#
+try:
+    config_fp = open("config-%s" % target_arch)
+except:
+    print "Unable to open config file 'config-%s'" % target_arch
+    sys.exit(1)
+
+#
+# Open and prepare output files.
+#
+try:
+    asm_fp = open("%s/CompilerTemplateAsm-%s.S" % (output_dir, target_arch), "w")
+except:
+    print "Unable to open output files"
+    print "Make sure directory '%s' exists and existing files are writable" \
+            % output_dir
+    # Ideally we'd remove the files to avoid confusing "make", but if they
+    # failed to open we probably won't be able to remove them either.
+    sys.exit(1)
+
+print "Generating %s" % (asm_fp.name)
+
+file_header = """/*
+ * This file was generated automatically by gen-template.py for '%s'.
+ *
+ * --> DO NOT EDIT <--
+ */
+
+""" % (target_arch)
+
+asm_fp.write(file_header)
+
+#
+# Process the config file.
+#
+failed = False
+try:
+    for line in config_fp:
+        line = line.strip()         # remove CRLF, leading spaces
+        tokens = line.split(' ')    # tokenize
+        #print "%d: %s" % (len(tokens), tokens)
+        if len(tokens[0]) == 0:
+            #print "  blank"
+            pass
+        elif tokens[0][0] == '#':
+            #print "  comment"
+            pass
+        else:
+            if tokens[0] == "handler-size":
+                setHandlerSize(tokens)
+            elif tokens[0] == "import":
+                importFile(tokens)
+            elif tokens[0] == "asm-stub":
+                setAsmStub(tokens)
+            elif tokens[0] == "op-start":
+                opStart(tokens)
+            elif tokens[0] == "op-end":
+                opEnd(tokens)
+            elif tokens[0] == "op":
+                opEntry(tokens)
+            else:
+                raise DataParseError, "unrecognized command '%s'" % tokens[0]
+except DataParseError, err:
+    print "Failed: " + str(err)
+    # TODO: remove output files so "make" doesn't get confused
+    failed = True
+    asm_fp.close()
+    c_fp = asm_fp = None
+
+config_fp.close()
+
+#
+# Done!
+#
+if asm_fp:
+    asm_fp.close()
+
+sys.exit(failed)
diff --git a/vm/compiler/template/out/CompilerTemplateAsm-armv5te.S b/vm/compiler/template/out/CompilerTemplateAsm-armv5te.S
new file mode 100644 (file)
index 0000000..59aa790
--- /dev/null
@@ -0,0 +1,703 @@
+/*
+ * This file was generated automatically by gen-template.py for 'armv5te'.
+ *
+ * --> DO NOT EDIT <--
+ */
+
+/* File: armv5te/header.S */
+/*
+ * Copyright (C) 2008 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#if defined(WITH_JIT)
+
+/*
+ * ARMv5 definitions and declarations.
+ */
+
+/*
+ARM EABI general notes:
+
+r0-r3 hold first 4 args to a method; they are not preserved across method calls
+r4-r8 are available for general use
+r9 is given special treatment in some situations, but not for us
+r10 (sl) seems to be generally available
+r11 (fp) is used by gcc (unless -fomit-frame-pointer is set)
+r12 (ip) is scratch -- not preserved across method calls
+r13 (sp) should be managed carefully in case a signal arrives
+r14 (lr) must be preserved
+r15 (pc) can be tinkered with directly
+
+r0 holds returns of <= 4 bytes
+r0-r1 hold returns of 8 bytes, low word in r0
+
+Callee must save/restore r4+ (except r12) if it modifies them.
+
+Stack is "full descending".  Only the arguments that don't fit in the first 4
+registers are placed on the stack.  "sp" points at the first stacked argument
+(i.e. the 5th arg).
+
+VFP: single-precision results in s0, double-precision results in d0.
+
+In the EABI, "sp" must be 64-bit aligned on entry to a function, and any
+64-bit quantities (long long, double) must be 64-bit aligned.
+*/
+
+/*
+JIT and ARM notes:
+
+The following registers have fixed assignments:
+
+  reg nick      purpose
+  r5  rFP       interpreted frame pointer, used for accessing locals and args
+  r6  rGLUE     MterpGlue pointer
+
+The following registers have fixed assignments in mterp but are scratch
+registers in compiled code
+
+  reg nick      purpose
+  r4  rPC       interpreted program counter, used for fetching instructions
+  r7  rIBASE    interpreted instruction base pointer, used for computed goto
+  r8  rINST     first 16-bit code unit of current instruction
+
+Macros are provided for common operations.  Each macro MUST emit only
+one instruction to make instruction-counting easier.  They MUST NOT alter
+unspecified registers or condition codes.
+*/
+
+/* single-purpose registers, given names for clarity */
+#define rPC     r4
+#define rFP     r5
+#define rGLUE   r6
+#define rIBASE  r7
+#define rINST   r8
+
+/*
+ * Given a frame pointer, find the stack save area.
+ *
+ * In C this is "((StackSaveArea*)(_fp) -1)".
+ */
+#define SAVEAREA_FROM_FP(_reg, _fpreg) \
+    sub     _reg, _fpreg, #sizeofStackSaveArea
+
+/*
+ * This is a #include, not a %include, because we want the C pre-processor
+ * to expand the macros into assembler assignment statements.
+ */
+#include "../../../mterp/common/asm-constants.h"
+
+
+/* File: armv5te/platform.S */
+/*
+ * ===========================================================================
+ *  CPU-version-specific defines
+ * ===========================================================================
+ */
+
+/*
+ * Macro for "MOV LR,PC / LDR PC,xxx", which is not allowed pre-ARMv5.
+ * Jump to subroutine.
+ *
+ * May modify IP and LR.
+ */
+.macro  LDR_PC_LR source
+    mov     lr, pc
+    ldr     pc, \source
+.endm
+
+
+    .global dvmCompilerTemplateStart
+    .type   dvmCompilerTemplateStart, %function
+    .text
+
+dvmCompilerTemplateStart:
+
+/* ------------------------------ */
+    .balign 4
+    .global dvmCompiler_TEMPLATE_CMP_LONG
+dvmCompiler_TEMPLATE_CMP_LONG:
+/* File: armv5te/TEMPLATE_CMP_LONG.S */
+    /*
+     * Compare two 64-bit values.  Puts 0, 1, or -1 into the destination
+     * register based on the results of the comparison.
+     *
+     * We load the full values with LDM, but in practice many values could
+     * be resolved by only looking at the high word.  This could be made
+     * faster or slower by splitting the LDM into a pair of LDRs.
+     *
+     * If we just wanted to set condition flags, we could do this:
+     *  subs    ip, r0, r2
+     *  sbcs    ip, r1, r3
+     *  subeqs  ip, r0, r2
+     * Leaving { <0, 0, >0 } in ip.  However, we have to set it to a specific
+     * integer value, which we can do with 2 conditional mov/mvn instructions
+     * (set 1, set -1; if they're equal we already have 0 in ip), giving
+     * us a constant 5-cycle path plus a branch at the end to the
+     * instruction epilogue code.  The multi-compare approach below needs
+     * 2 or 3 cycles + branch if the high word doesn't match, 6 + branch
+     * in the worst case (the 64-bit values are equal).
+     */
+    /* cmp-long vAA, vBB, vCC */
+    cmp     r1, r3                      @ compare (vBB+1, vCC+1)
+    blt     .LTEMPLATE_CMP_LONG_less            @ signed compare on high part
+    bgt     .LTEMPLATE_CMP_LONG_greater
+    subs    r0, r0, r2                  @ r0<- r0 - r2
+    bxeq     lr
+    bhi     .LTEMPLATE_CMP_LONG_greater         @ unsigned compare on low part
+.LTEMPLATE_CMP_LONG_less:
+    mvn     r0, #0                      @ r0<- -1
+    bx      lr
+.LTEMPLATE_CMP_LONG_greater:
+    mov     r0, #1                      @ r0<- 1
+    bx      lr
+
+
+/* ------------------------------ */
+    .balign 4
+    .global dvmCompiler_TEMPLATE_RETURN
+dvmCompiler_TEMPLATE_RETURN:
+/* File: armv5te/TEMPLATE_RETURN.S */
+    /*
+     * Unwind a frame from the Dalvik stack for compiled OP_RETURN_XXX.
+     * If the stored value in returnAddr
+     * is non-zero, the caller is compiled by the JIT thus return to the
+     * address in the code cache following the invoke instruction. Otherwise
+     * return to the special dvmJitToInterpNoChain entry point.
+     */
+    SAVEAREA_FROM_FP(r0, rFP)           @ r0<- saveArea (old)
+    ldr     r10, [r0, #offStackSaveArea_prevFrame] @ r10<- saveArea->prevFrame
+    ldr     r8, [rGLUE, #offGlue_pSelfSuspendCount] @ r8<- &suspendCount
+    ldr     rPC, [r0, #offStackSaveArea_savedPc] @ rPC<- saveArea->savedPc
+    ldr     r9,  [r0, #offStackSaveArea_returnAddr] @ r9<- chaining cell ret
+    ldr     r2, [r10, #(offStackSaveArea_method - sizeofStackSaveArea)]
+                                        @ r2<- method we're returning to
+    ldr     r3, [rGLUE, #offGlue_self]  @ r3<- glue->self
+    cmp     r2, #0                      @ break frame?
+    beq     1f                          @ bail to interpreter
+    ldr     r0, .LdvmJitToInterpNoChain @ defined in footer.S
+    mov     rFP, r10                    @ publish new FP
+    ldrne   r10, [r2, #offMethod_clazz] @ r10<- method->clazz
+    ldr     r8, [r8]                    @ r8<- suspendCount
+
+    str     r2, [rGLUE, #offGlue_method]@ glue->method = newSave->method
+    ldr     r1, [r10, #offClassObject_pDvmDex] @ r1<- method->clazz->pDvmDex
+    str     rFP, [r3, #offThread_curFrame] @ self->curFrame = fp
+    add     rPC, rPC, #6                @ publish new rPC (advance 6 bytes)
+    str     r1, [rGLUE, #offGlue_methodClassDex]
+    cmp     r8, #0                      @ check the suspendCount
+    movne   r9, #0                      @ clear the chaining cell address
+    cmp     r9, #0                      @ chaining cell exists?
+    blxne   r9                          @ jump to the chaining cell
+    mov     pc, r0                      @ callsite is interpreted
+1:
+    stmia   rGLUE, {rPC, rFP}           @ SAVE_PC_FP_TO_GLUE()
+    ldr     r2, .LdvmMterpStdBail       @ defined in footer.S
+    mov     r1, #0                      @ changeInterp = false
+    mov     r0, rGLUE                   @ Expecting rGLUE in r0
+    blx     r2                          @ exit the interpreter
+
+/* ------------------------------ */
+    .balign 4
+    .global dvmCompiler_TEMPLATE_INVOKE_METHOD_NO_OPT
+dvmCompiler_TEMPLATE_INVOKE_METHOD_NO_OPT:
+/* File: armv5te/TEMPLATE_INVOKE_METHOD_NO_OPT.S */
+    /*
+     * For polymorphic callsites - setup the Dalvik frame and load Dalvik PC
+     * into rPC then jump to dvmJitToInterpNoChain to dispatch the
+     * runtime-resolved callee.
+     */
+    @ r0 = methodToCall, r1 = returnCell, rPC = dalvikCallsite
+    ldrh    r7, [r0, #offMethod_registersSize]  @ r7<- methodToCall->regsSize
+    ldrh    r2, [r0, #offMethod_outsSize]  @ r2<- methodToCall->outsSize
+    ldr     r9, [rGLUE, #offGlue_interpStackEnd]    @ r9<- interpStackEnd
+    ldr     r8, [rGLUE, #offGlue_pSelfSuspendCount] @ r8<- &suspendCount
+    add     r3, r1, #1  @ Thumb addr is odd
+    SAVEAREA_FROM_FP(r1, rFP)           @ r1<- stack save area
+    sub     r1, r1, r7, lsl #2          @ r1<- newFp (old savearea - regsSize)
+    SAVEAREA_FROM_FP(r10, r1)           @ r10<- stack save area
+    sub     r10, r10, r2, lsl #2        @ r10<- bottom (newsave - outsSize)
+    ldr     r8, [r8]                    @ r3<- suspendCount (int)
+    cmp     r10, r9                     @ bottom < interpStackEnd?
+    bxlt    lr                          @ return to raise stack overflow excep.
+    @ r1 = newFP, r0 = methodToCall, r3 = returnCell, rPC = dalvikCallsite
+    ldr     r9, [r0, #offMethod_clazz]      @ r9<- method->clazz
+    ldr     r10, [r0, #offMethod_accessFlags] @ r10<- methodToCall->accessFlags
+    str     rPC, [rFP, #(offStackSaveArea_currentPc - sizeofStackSaveArea)]
+    str     rPC, [r1, #(offStackSaveArea_savedPc - sizeofStackSaveArea)]
+    ldr     rPC, [r0, #offMethod_insns]     @ rPC<- methodToCall->insns
+
+
+    @ set up newSaveArea
+    str     rFP, [r1, #(offStackSaveArea_prevFrame - sizeofStackSaveArea)]
+    str     r3, [r1, #(offStackSaveArea_returnAddr - sizeofStackSaveArea)]
+    str     r0, [r1, #(offStackSaveArea_method - sizeofStackSaveArea)]
+    cmp     r8, #0                      @ suspendCount != 0
+    bxne    lr                          @ bail to the interpreter
+    tst     r10, #ACC_NATIVE
+    bne     .LinvokeNative
+    /*
+     * If we want to punt to the interpreter for native call, swap the bne with
+     * the following
+     * bxne    lr
+     */
+
+
+    ldr     r10, .LdvmJitToInterpNoChain
+    ldr     r3, [r9, #offClassObject_pDvmDex] @ r3<- method->clazz->pDvmDex
+    ldr     r2, [rGLUE, #offGlue_self]      @ r2<- glue->self
+
+    @ Update "glue" values for the new method
+    str     r0, [rGLUE, #offGlue_method]    @ glue->method = methodToCall
+    str     r3, [rGLUE, #offGlue_methodClassDex] @ glue->methodClassDex = ...
+    mov     rFP, r1                         @ fp = newFp
+    str     rFP, [r2, #offThread_curFrame]  @ self->curFrame = newFp
+
+    @ Start executing the callee
+    mov     pc, r10                         @ dvmJitToInterpNoChain
+
+/* ------------------------------ */
+    .balign 4
+    .global dvmCompiler_TEMPLATE_INVOKE_METHOD_CHAIN
+dvmCompiler_TEMPLATE_INVOKE_METHOD_CHAIN:
+/* File: armv5te/TEMPLATE_INVOKE_METHOD_CHAIN.S */
+    /*
+     * For monomorphic callsite, setup the Dalvik frame and return to the
+     * Thumb code through the link register to transfer control to the callee
+     * method through a dedicated chaining cell.
+     */
+    @ r0 = methodToCall, r1 = returnCell, rPC = dalvikCallsite
+    ldrh    r7, [r0, #offMethod_registersSize]  @ r7<- methodToCall->regsSize
+    ldrh    r2, [r0, #offMethod_outsSize]  @ r2<- methodToCall->outsSize
+    ldr     r9, [rGLUE, #offGlue_interpStackEnd]    @ r9<- interpStackEnd
+    ldr     r8, [rGLUE, #offGlue_pSelfSuspendCount] @ r8<- &suspendCount
+    add     r3, r1, #1  @ Thumb addr is odd
+    SAVEAREA_FROM_FP(r1, rFP)           @ r1<- stack save area
+    sub     r1, r1, r7, lsl #2          @ r1<- newFp (old savearea - regsSize)
+    SAVEAREA_FROM_FP(r10, r1)           @ r10<- stack save area
+    add     r12, lr, #2                 @ setup the punt-to-interp address
+    sub     r10, r10, r2, lsl #2        @ r10<- bottom (newsave - outsSize)
+    ldr     r8, [r8]                    @ r3<- suspendCount (int)
+    cmp     r10, r9                     @ bottom < interpStackEnd?
+    bxlt    r12                         @ return to raise stack overflow excep.
+    @ r1 = newFP, r0 = methodToCall, r3 = returnCell, rPC = dalvikCallsite
+    ldr     r9, [r0, #offMethod_clazz]      @ r9<- method->clazz
+    ldr     r10, [r0, #offMethod_accessFlags] @ r10<- methodToCall->accessFlags
+    str     rPC, [rFP, #(offStackSaveArea_currentPc - sizeofStackSaveArea)]
+    str     rPC, [r1, #(offStackSaveArea_savedPc - sizeofStackSaveArea)]
+    ldr     rPC, [r0, #offMethod_insns]     @ rPC<- methodToCall->insns
+
+
+    @ set up newSaveArea
+    str     rFP, [r1, #(offStackSaveArea_prevFrame - sizeofStackSaveArea)]
+    str     r3, [r1, #(offStackSaveArea_returnAddr - sizeofStackSaveArea)]
+    str     r0, [r1, #(offStackSaveArea_method - sizeofStackSaveArea)]
+    cmp     r8, #0                      @ suspendCount != 0
+    bxne    r12                         @ bail to the interpreter
+    tst     r10, #ACC_NATIVE
+    bne     .LinvokeNative
+    /*
+     * If we want to punt to the interpreter for native call, swap the bne with
+     * the following
+     * bxne    r12
+     */
+
+
+    ldr     r3, [r9, #offClassObject_pDvmDex] @ r3<- method->clazz->pDvmDex
+    ldr     r2, [rGLUE, #offGlue_self]      @ r2<- glue->self
+
+    @ Update "glue" values for the new method
+    str     r0, [rGLUE, #offGlue_method]    @ glue->method = methodToCall
+    str     r3, [rGLUE, #offGlue_methodClassDex] @ glue->methodClassDex = ...
+    mov     rFP, r1                         @ fp = newFp
+    str     rFP, [r2, #offThread_curFrame]  @ self->curFrame = newFp
+
+    bx      lr                              @ return to the callee-chaining cell
+
+
+
+/* ------------------------------ */
+    .balign 4
+    .global dvmCompiler_TEMPLATE_CMPG_DOUBLE
+dvmCompiler_TEMPLATE_CMPG_DOUBLE:
+/* File: armv5te/TEMPLATE_CMPG_DOUBLE.S */
+/* File: armv5te/TEMPLATE_CMPL_DOUBLE.S */
+    /*
+     * For the JIT: incoming arguments are pointers to the arguments in r0/r1
+     *              result in r0
+     *
+     * Compare two floating-point values.  Puts 0, 1, or -1 into the
+     * destination register based on the results of the comparison.
+     *
+     * Provide a "naninst" instruction that puts 1 or -1 into r1 depending
+     * on what value we'd like to return when one of the operands is NaN.
+     *
+     * See OP_CMPL_FLOAT for an explanation.
+     *
+     * For: cmpl-double, cmpg-double
+     */
+    /* op vAA, vBB, vCC */
+    mov     r4, lr                      @ save return address
+    mov     r9, r0                      @ save copy of &arg1
+    mov     r10, r1                     @ save copy of &arg2
+    ldmia   r9, {r0-r1}                 @ r0/r1<- vBB/vBB+1
+    ldmia   r10, {r2-r3}                @ r2/r3<- vCC/vCC+1
+    LDR_PC_LR ".L__aeabi_cdcmple"       @ PIC way of "bl __aeabi_cdcmple"
+    bhi     .LTEMPLATE_CMPG_DOUBLE_gt_or_nan       @ C set and Z clear, disambiguate
+    mvncc   r0, #0                      @ (less than) r1<- -1
+    moveq   r0, #0                      @ (equal) r1<- 0, trumps less than
+    bx      r4
+
+    @ Test for NaN with a second comparison.  EABI forbids testing bit
+    @ patterns, and we can't represent 0x7fc00000 in immediate form, so
+    @ make the library call.
+.LTEMPLATE_CMPG_DOUBLE_gt_or_nan:
+    ldmia   r10, {r0-r1}                @ reverse order
+    ldmia   r9, {r2-r3}
+    LDR_PC_LR ".L__aeabi_cdcmple"       @ r0<- Z set if eq, C clear if <
+    movcc   r0, #1                      @ (greater than) r1<- 1
+    bxcc    r4
+    mov     r0, #1                            @ r1<- 1 or -1 for NaN
+    bx      r4
+
+
+
+/* ------------------------------ */
+    .balign 4
+    .global dvmCompiler_TEMPLATE_CMPL_DOUBLE
+dvmCompiler_TEMPLATE_CMPL_DOUBLE:
+/* File: armv5te/TEMPLATE_CMPL_DOUBLE.S */
+    /*
+     * For the JIT: incoming arguments are pointers to the arguments in r0/r1
+     *              result in r0
+     *
+     * Compare two floating-point values.  Puts 0, 1, or -1 into the
+     * destination register based on the results of the comparison.
+     *
+     * Provide a "naninst" instruction that puts 1 or -1 into r1 depending
+     * on what value we'd like to return when one of the operands is NaN.
+     *
+     * See OP_CMPL_FLOAT for an explanation.
+     *
+     * For: cmpl-double, cmpg-double
+     */
+    /* op vAA, vBB, vCC */
+    mov     r4, lr                      @ save return address
+    mov     r9, r0                      @ save copy of &arg1
+    mov     r10, r1                     @ save copy of &arg2
+    ldmia   r9, {r0-r1}                 @ r0/r1<- vBB/vBB+1
+    ldmia   r10, {r2-r3}                @ r2/r3<- vCC/vCC+1
+    LDR_PC_LR ".L__aeabi_cdcmple"       @ PIC way of "bl __aeabi_cdcmple"
+    bhi     .LTEMPLATE_CMPL_DOUBLE_gt_or_nan       @ C set and Z clear, disambiguate
+    mvncc   r0, #0                      @ (less than) r1<- -1
+    moveq   r0, #0                      @ (equal) r1<- 0, trumps less than
+    bx      r4
+
+    @ Test for NaN with a second comparison.  EABI forbids testing bit
+    @ patterns, and we can't represent 0x7fc00000 in immediate form, so
+    @ make the library call.
+.LTEMPLATE_CMPL_DOUBLE_gt_or_nan:
+    ldmia   r10, {r0-r1}                @ reverse order
+    ldmia   r9, {r2-r3}
+    LDR_PC_LR ".L__aeabi_cdcmple"       @ r0<- Z set if eq, C clear if <
+    movcc   r0, #1                      @ (greater than) r1<- 1
+    bxcc    r4
+    mvn     r0, #0                            @ r1<- 1 or -1 for NaN
+    bx      r4
+
+
+/* ------------------------------ */
+    .balign 4
+    .global dvmCompiler_TEMPLATE_CMPG_FLOAT
+dvmCompiler_TEMPLATE_CMPG_FLOAT:
+/* File: armv5te/TEMPLATE_CMPG_FLOAT.S */
+/* File: armv5te/TEMPLATE_CMPL_FLOAT.S */
+    /*
+     * For the JIT: incoming arguments in r0, r1
+     *              result in r0
+     *
+     * Compare two floating-point values.  Puts 0, 1, or -1 into the
+     * destination register based on the results of the comparison.
+     *
+     * Provide a "naninst" instruction that puts 1 or -1 into r1 depending
+     * on what value we'd like to return when one of the operands is NaN.
+     *
+     * The operation we're implementing is:
+     *   if (x == y)
+     *     return 0;
+     *   else if (x < y)
+     *     return -1;
+     *   else if (x > y)
+     *     return 1;
+     *   else
+     *     return {-1,1};  // one or both operands was NaN
+     *
+     * The straightforward implementation requires 3 calls to functions
+     * that return a result in r0.  We can do it with two calls if our
+     * EABI library supports __aeabi_cfcmple (only one if we want to check
+     * for NaN directly):
+     *   check x <= y
+     *     if <, return -1
+     *     if ==, return 0
+     *   check y <= x
+     *     if <, return 1
+     *   return {-1,1}
+     *
+     * for: cmpl-float, cmpg-float
+     */
+    /* op vAA, vBB, vCC */
+    mov     r4, lr                      @ save return address
+    mov     r9, r0                      @ Save copies - we may need to redo
+    mov     r10, r1
+    LDR_PC_LR ".L__aeabi_cfcmple"       @ cmp <=: C clear if <, Z set if eq
+    bhi     .LTEMPLATE_CMPG_FLOAT_gt_or_nan       @ C set and Z clear, disambiguate
+    mvncc   r0, #0                      @ (less than) r0<- -1
+    moveq   r0, #0                      @ (equal) r0<- 0, trumps less than
+    bx      r4
+    @ Test for NaN with a second comparison.  EABI forbids testing bit
+    @ patterns, and we can't represent 0x7fc00000 in immediate form, so
+    @ make the library call.
+.LTEMPLATE_CMPG_FLOAT_gt_or_nan:
+    mov     r1, r9                      @ reverse order
+    mov     r0, r10
+    LDR_PC_LR ".L__aeabi_cfcmple"       @ r0<- Z set if eq, C clear if <
+    movcc   r0, #1                      @ (greater than) r1<- 1
+    bxcc    r4
+    mov     r0, #1                            @ r1<- 1 or -1 for NaN
+    bx      r4
+
+
+
+
+/* ------------------------------ */
+    .balign 4
+    .global dvmCompiler_TEMPLATE_CMPL_FLOAT
+dvmCompiler_TEMPLATE_CMPL_FLOAT:
+/* File: armv5te/TEMPLATE_CMPL_FLOAT.S */
+    /*
+     * For the JIT: incoming arguments in r0, r1
+     *              result in r0
+     *
+     * Compare two floating-point values.  Puts 0, 1, or -1 into the
+     * destination register based on the results of the comparison.
+     *
+     * Provide a "naninst" instruction that puts 1 or -1 into r1 depending
+     * on what value we'd like to return when one of the operands is NaN.
+     *
+     * The operation we're implementing is:
+     *   if (x == y)
+     *     return 0;
+     *   else if (x < y)
+     *     return -1;
+     *   else if (x > y)
+     *     return 1;
+     *   else
+     *     return {-1,1};  // one or both operands was NaN
+     *
+     * The straightforward implementation requires 3 calls to functions
+     * that return a result in r0.  We can do it with two calls if our
+     * EABI library supports __aeabi_cfcmple (only one if we want to check
+     * for NaN directly):
+     *   check x <= y
+     *     if <, return -1
+     *     if ==, return 0
+     *   check y <= x
+     *     if <, return 1
+     *   return {-1,1}
+     *
+     * for: cmpl-float, cmpg-float
+     */
+    /* op vAA, vBB, vCC */
+    mov     r4, lr                      @ save return address
+    mov     r9, r0                      @ Save copies - we may need to redo
+    mov     r10, r1
+    LDR_PC_LR ".L__aeabi_cfcmple"       @ cmp <=: C clear if <, Z set if eq
+    bhi     .LTEMPLATE_CMPL_FLOAT_gt_or_nan       @ C set and Z clear, disambiguate
+    mvncc   r0, #0                      @ (less than) r0<- -1
+    moveq   r0, #0                      @ (equal) r0<- 0, trumps less than
+    bx      r4
+    @ Test for NaN with a second comparison.  EABI forbids testing bit
+    @ patterns, and we can't represent 0x7fc00000 in immediate form, so
+    @ make the library call.
+.LTEMPLATE_CMPL_FLOAT_gt_or_nan:
+    mov     r1, r9                      @ reverse order
+    mov     r0, r10
+    LDR_PC_LR ".L__aeabi_cfcmple"       @ r0<- Z set if eq, C clear if <
+    movcc   r0, #1                      @ (greater than) r1<- 1
+    bxcc    r4
+    mvn     r0, #0                            @ r1<- 1 or -1 for NaN
+    bx      r4
+
+
+
+/* ------------------------------ */
+    .balign 4
+    .global dvmCompiler_TEMPLATE_MUL_LONG
+dvmCompiler_TEMPLATE_MUL_LONG:
+/* File: armv5te/TEMPLATE_MUL_LONG.S */
+    /*
+     * Signed 64-bit integer multiply.
+     *
+     * For JIT: op1 in r0/r1, op2 in r2/r3, return in r0/r1
+     *
+     * Consider WXxYZ (r1r0 x r3r2) with a long multiply:
+     *        WX
+     *      x YZ
+     *  --------
+     *     ZW ZX
+     *  YW YX
+     *
+     * The low word of the result holds ZX, the high word holds
+     * (ZW+YX) + (the high overflow from ZX).  YW doesn't matter because
+     * it doesn't fit in the low 64 bits.
+     *
+     * Unlike most ARM math operations, multiply instructions have
+     * restrictions on using the same register more than once (Rd and Rm
+     * cannot be the same).
+     */
+    /* mul-long vAA, vBB, vCC */
+    mul     ip, r2, r1                  @  ip<- ZxW
+    umull   r9, r10, r2, r0             @  r9/r10 <- ZxX
+    mla     r2, r0, r3, ip              @  r2<- YxX + (ZxW)
+    add     r10, r2, r10                @  r10<- r10 + low(ZxW + (YxX))
+    mov     r0,r9
+    mov     r1,r10
+    bx      lr
+
+/* ------------------------------ */
+    .balign 4
+    .global dvmCompiler_TEMPLATE_SHL_LONG
+dvmCompiler_TEMPLATE_SHL_LONG:
+/* File: armv5te/TEMPLATE_SHL_LONG.S */
+    /*
+     * Long integer shift.  This is different from the generic 32/64-bit
+     * binary operations because vAA/vBB are 64-bit but vCC (the shift
+     * distance) is 32-bit.  Also, Dalvik requires us to ignore all but the low
+     * 6 bits.
+     */
+    /* shl-long vAA, vBB, vCC */
+    and     r2, r2, #63                 @ r2<- r2 & 0x3f
+    mov     r1, r1, asl r2              @  r1<- r1 << r2
+    rsb     r3, r2, #32                 @  r3<- 32 - r2
+    orr     r1, r1, r0, lsr r3          @  r1<- r1 | (r0 << (32-r2))
+    subs    ip, r2, #32                 @  ip<- r2 - 32
+    movpl   r1, r0, asl ip              @  if r2 >= 32, r1<- r0 << (r2-32)
+    mov     r0, r0, asl r2              @  r0<- r0 << r2
+    bx      lr
+
+/* ------------------------------ */
+    .balign 4
+    .global dvmCompiler_TEMPLATE_SHR_LONG
+dvmCompiler_TEMPLATE_SHR_LONG:
+/* File: armv5te/TEMPLATE_SHR_LONG.S */
+    /*
+     * Long integer shift.  This is different from the generic 32/64-bit
+     * binary operations because vAA/vBB are 64-bit but vCC (the shift
+     * distance) is 32-bit.  Also, Dalvik requires us to ignore all but the low
+     * 6 bits.
+     */
+    /* shr-long vAA, vBB, vCC */
+    and     r2, r2, #63                 @ r0<- r0 & 0x3f
+    mov     r0, r0, lsr r2              @  r0<- r2 >> r2
+    rsb     r3, r2, #32                 @  r3<- 32 - r2
+    orr     r0, r0, r1, asl r3          @  r0<- r0 | (r1 << (32-r2))
+    subs    ip, r2, #32                 @  ip<- r2 - 32
+    movpl   r0, r1, asr ip              @  if r2 >= 32, r0<-r1 >> (r2-32)
+    mov     r1, r1, asr r2              @  r1<- r1 >> r2
+    bx      lr
+
+
+/* ------------------------------ */
+    .balign 4
+    .global dvmCompiler_TEMPLATE_USHR_LONG
+dvmCompiler_TEMPLATE_USHR_LONG:
+/* File: armv5te/TEMPLATE_USHR_LONG.S */
+    /*
+     * Long integer shift.  This is different from the generic 32/64-bit
+     * binary operations because vAA/vBB are 64-bit but vCC (the shift
+     * distance) is 32-bit.  Also, Dalvik requires us to ignore all but the low
+     * 6 bits.
+     */
+    /* ushr-long vAA, vBB, vCC */
+    and     r2, r2, #63                 @ r0<- r0 & 0x3f
+    mov     r0, r0, lsr r2              @  r0<- r2 >> r2
+    rsb     r3, r2, #32                 @  r3<- 32 - r2
+    orr     r0, r0, r1, asl r3          @  r0<- r0 | (r1 << (32-r2))
+    subs    ip, r2, #32                 @  ip<- r2 - 32
+    movpl   r0, r1, lsr ip              @  if r2 >= 32, r0<-r1 >>> (r2-32)
+    mov     r1, r1, lsr r2              @  r1<- r1 >>> r2
+    bx      lr
+
+
+    .size   dvmCompilerTemplateStart, .-dvmCompilerTemplateStart
+/* File: armv5te/footer.S */
+/*
+ * ===========================================================================
+ *  Common subroutines and data
+ * ===========================================================================
+ */
+
+    .text
+    .align  2
+.LinvokeNative:
+    @ Prep for the native call
+    @ r1 = newFP, r0 = methodToCall
+    ldr     r3, [rGLUE, #offGlue_self]      @ r3<- glue->self
+    ldr     r9, [r3, #offThread_jniLocal_nextEntry] @ r9<- thread->refNext
+    str     r1, [r3, #offThread_curFrame]   @ self->curFrame = newFp
+    str     r9, [r1, #(offStackSaveArea_localRefTop - sizeofStackSaveArea)]
+                                        @ newFp->localRefTop=refNext
+    mov     r9, r3                      @ r9<- glue->self (preserve)
+    SAVEAREA_FROM_FP(r10, r1)           @ r10<- new stack save area
+
+    mov     r2, r0                      @ r2<- methodToCall
+    mov     r0, r1                      @ r0<- newFP
+    add     r1, rGLUE, #offGlue_retval  @ r1<- &retval
+
+    LDR_PC_LR "[r2, #offMethod_nativeFunc]"
+
+    @ native return; r9=self, r10=newSaveArea
+    @ equivalent to dvmPopJniLocals
+    ldr     r2, [r10, #offStackSaveArea_returnAddr] @ r2 = chaining cell ret
+    ldr     r0, [r10, #offStackSaveArea_localRefTop] @ r0<- newSave->localRefTop
+    ldr     r1, [r9, #offThread_exception] @ check for exception
+    str     rFP, [r9, #offThread_curFrame]  @ self->curFrame = fp
+    cmp     r1, #0                      @ null?
+    str     r0, [r9, #offThread_jniLocal_nextEntry] @ self->refNext<- r0
+    bne     .LhandleException             @ no, handle exception
+    bx      r2
+
+/* FIXME - untested */
+.LhandleException:
+    ldr     rIBASE, .LdvmAsmInstructionStart
+    ldr     rPC, [r10, #offStackSaveArea_savedPc] @ reload rPC
+    b       dvmMterpCommonExceptionThrown
+
+    .align  2
+.LdvmAsmInstructionStart:
+    .word   dvmAsmInstructionStart
+.LdvmJitToInterpNoChain:
+    .word   dvmJitToInterpNoChain
+.LdvmMterpStdBail:
+    .word   dvmMterpStdBail
+.L__aeabi_cdcmple:
+    .word   __aeabi_cdcmple
+.L__aeabi_cfcmple:
+    .word   __aeabi_cfcmple
+
+    .global dmvCompilerTemplateEnd
+dmvCompilerTemplateEnd:
+
+#endif /* WITH_JIT */
+
diff --git a/vm/compiler/template/rebuild.sh b/vm/compiler/template/rebuild.sh
new file mode 100755 (executable)
index 0000000..07d9516
--- /dev/null
@@ -0,0 +1,23 @@
+#!/bin/sh
+#
+# Copyright (C) 2008 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#
+# Rebuild for all known targets.  Necessary until the stuff in "out" gets
+# generated as part of the build.
+#
+set -e
+for arch in armv5te; do TARGET_ARCH_EXT=$arch make -f Makefile-template; done
+
index de3d676..db86e78 100644 (file)
@@ -789,7 +789,6 @@ void dvmThrowVerificationError(const Method* method, int kind, int ref)
     free(msg);
 }
 
-
 /*
  * Main interpreter loop entry point.  Select "standard" or "debug"
  * interpreter and switch between them as required.
@@ -805,6 +804,27 @@ void dvmInterpret(Thread* self, const Method* method, JValue* pResult)
 {
     InterpState interpState;
     bool change;
+#if defined(WITH_JIT)
+    /* Interpreter entry points from compiled code */
+    extern void dvmJitToInterpNormal();
+    extern void dvmJitToInterpNoChain();
+    extern void dvmJitToInterpPunt();
+    extern void dvmJitToInterpSingleStep();
+    extern void dvmJitToTraceSelect();
+
+    /* 
+     * Reserve a static entity here to quickly setup runtime contents as
+     * gcc will issue block copy instructions.
+     */
+    static struct JitToInterpEntries jitToInterpEntries = {
+        dvmJitToInterpNormal,
+        dvmJitToInterpNoChain,
+        dvmJitToInterpPunt,
+        dvmJitToInterpSingleStep,
+        dvmJitToTraceSelect,
+    };
+#endif
+
 
 #if defined(WITH_TRACKREF_CHECKS)
     interpState.debugTrackedRefStart =
@@ -813,6 +833,12 @@ void dvmInterpret(Thread* self, const Method* method, JValue* pResult)
 #if defined(WITH_PROFILER) || defined(WITH_DEBUGGER)
     interpState.debugIsMethodEntry = true;
 #endif
+#if defined(WITH_JIT)
+    interpState.jitState = gDvmJit.pJitEntryTable ? kJitNormal : kJitOff;
+
+    /* Setup the Jit-to-interpreter entry points */
+    interpState.jitToInterpEntries = jitToInterpEntries;
+#endif
 
     /*
      * Initialize working state.
@@ -848,6 +874,14 @@ void dvmInterpret(Thread* self, const Method* method, JValue* pResult)
     Interpreter stdInterp;
     if (gDvm.executionMode == kExecutionModeInterpFast)
         stdInterp = dvmMterpStd;
+#if defined(WITH_JIT)
+    else if (gDvm.executionMode == kExecutionModeJit)
+/* If profiling overhead can be kept low enough, we can use a profiling
+ * mterp fast for both Jit and "fast" modes.  If overhead is too high,
+ * create a specialized profiling interpreter.
+ */
+        stdInterp = dvmMterpStd;
+#endif
     else
         stdInterp = dvmInterpretStd;
 
@@ -858,7 +892,7 @@ void dvmInterpret(Thread* self, const Method* method, JValue* pResult)
             LOGVV("threadid=%d: interp STD\n", self->threadId);
             change = (*stdInterp)(self, &interpState);
             break;
-#if defined(WITH_PROFILER) || defined(WITH_DEBUGGER)
+#if defined(WITH_PROFILER) || defined(WITH_DEBUGGER) || defined(WITH_JIT)
         case INTERP_DBG:
             LOGVV("threadid=%d: interp DBG\n", self->threadId);
             change = dvmInterpretDbg(self, &interpState);
@@ -871,4 +905,3 @@ void dvmInterpret(Thread* self, const Method* method, JValue* pResult)
 
     *pResult = interpState.retval;
 }
-
index 856c2f5..6c67cb5 100644 (file)
@@ -32,8 +32,46 @@ typedef enum InterpEntry {
     kInterpEntryInstr = 0,      // continue to next instruction
     kInterpEntryReturn = 1,     // jump to method return
     kInterpEntryThrow = 2,      // jump to exception throw
+#if defined(WITH_JIT)
+    kInterpEntryResume = 3,     // Resume after single-step
+#endif
 } InterpEntry;
 
+#if defined(WITH_JIT)
+/*
+ * There are five entry points from the compiled code to the interpreter:
+ * 1) dvmJitToInterpNormal: find if there is a corresponding compilation for
+ *    the new dalvik PC. If so, chain the originating compilation with the
+ *    target then jump to it.
+ * 2) dvmJitToInterpInvokeNoChain: similar to 1) but don't chain. This is
+ *    for handling 1-to-many mappings like virtual method call and
+ *    packed switch.
+ * 3) dvmJitToInterpPunt: use the fast interpreter to execute the next
+ *    instruction(s) and stay there as long as it is appropriate to return 
+ *    to the compiled land. This is used when the jit'ed code is about to
+ *    throw an exception.
+ * 4) dvmJitToInterpSingleStep: use the portable interpreter to execute the
+ *    next instruction only and return to pre-specified location in the
+ *    compiled code to resume execution. This is mainly used as debugging
+ *    feature to bypass problematic opcode implementations without
+ *    disturbing the trace formation.
+ * 5) dvmJitToTraceSelect: if there is a single exit from a translation that
+ *    has already gone hot enough to be translated, we should assume that
+ *    the exit point should also be translated (this is a common case for
+ *    invokes).  This trace exit will first check for a chaining
+ *    opportunity, and if none is available will switch to the debug
+ *    interpreter immediately for trace selection (as if threshold had
+ *    just been reached).
+ */
+struct JitToInterpEntries {
+    void *dvmJitToInterpNormal;
+    void *dvmJitToInterpNoChain;
+    void *dvmJitToInterpPunt;
+    void *dvmJitToInterpSingleStep;
+    void *dvmJitToTraceSelect;
+};
+#endif
+
 /*
  * Interpreter context, used when switching from one interpreter to
  * another.  We also tuck "mterp" state in here.
@@ -78,8 +116,18 @@ typedef struct InterpState {
      * Interpreter switching.
      */
     InterpEntry entryPoint;             // what to do when we start
-    int         nextMode;               // INTERP_STD or INTERP_DBG
+    int         nextMode;               // INTERP_STD, INTERP_DBG
 
+#if defined(WITH_JIT)
+    /*
+     * Local copies of field from gDvm placed here for fast access
+     */
+    struct JitEntry*   pJitTable;
+    unsigned char*     pJitProfTable;
+    JitState           jitState;
+    void*              jitResume;
+    u2*                jitResumePC;
+#endif
 
 #if defined(WITH_PROFILER) || defined(WITH_DEBUGGER)
     bool        debugIsMethodEntry;     // used for method entry event triggers
@@ -88,6 +136,17 @@ typedef struct InterpState {
     int         debugTrackedRefStart;   // tracked refs from prior invocations
 #endif
 
+#if defined(WITH_JIT)
+    struct JitToInterpEntries jitToInterpEntries;
+
+    int currTraceRun;
+    int totalTraceLen;        // Number of Dalvik insts in trace
+    const u2* currTraceHead;        // Start of the trace we're building
+    const u2* currRunHead;          // Start of run we're building
+    int currRunLen;           // Length of run in 16-bit words
+    JitTraceRun trace[MAX_JIT_RUN_LEN];
+#endif
+
 } InterpState;
 
 /*
@@ -123,7 +182,7 @@ s4 dvmInterpHandleSparseSwitch(const u2* switchData, s4 testVal);
 /*
  * Process fill-array-data.
  */
-bool dvmInterpHandleFillArrayData(ArrayObject* arrayObject, 
+bool dvmInterpHandleFillArrayData(ArrayObject* arrayObject,
                                   const u2* arrayData);
 
 /*
@@ -145,4 +204,19 @@ static inline bool dvmDebuggerOrProfilerActive(void)
         ;
 }
 
+#if defined(WITH_JIT)
+/*
+ * Determine if the jit, debugger or profiler is currently active.  Used when
+ * selecting which interpreter to switch to.
+ */
+static inline bool dvmJitDebuggerOrProfilerActive(int jitState)
+{
+    return jitState != kJitOff
+#if defined(WITH_PROFILER)
+        || gDvm.activeProfilers != 0
+#endif
+        ||gDvm.debuggerActive;
+}
+#endif
+
 #endif /*_DALVIK_INTERP_DEFS*/
diff --git a/vm/interp/Jit.c b/vm/interp/Jit.c
new file mode 100644 (file)
index 0000000..e23361b
--- /dev/null
@@ -0,0 +1,567 @@
+/*
+ * Copyright (C) 2008 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifdef WITH_JIT
+
+/*
+ * Target independent portion of Android's Jit
+ */
+
+#include "Dalvik.h"
+#include "Jit.h"
+
+
+#include "dexdump/OpCodeNames.h"
+#include <unistd.h>
+#include <pthread.h>
+#include <sys/time.h>
+#include <signal.h>
+#include "compiler/Compiler.h"
+#include <errno.h>
+
+/*
+ * Reset profile counts.  Note that we could easily lose
+ * one or more of these write because of threading.  Because these
+ * counts are considered hints, absolute correctness is not a
+ * problem and the cost of synchronizing would be prohibitive.
+ * NOTE: Experimental - 5/21/09.  Keep rough track of the last
+ * time the counts were reset to allow trace builder to ignore
+ * stale thresholds.  This is just a hint, and the only penalty
+ * for getting it wrong is a slight performance hit (far less than
+ * the cost of synchronization).
+ */
+static u8 lastProfileResetTimeUsec;
+static void resetProfileCounts() {
+    int i;
+    unsigned char *pJitProfTable = gDvmJit.pProfTable;
+    lastProfileResetTimeUsec = dvmGetRelativeTimeUsec();
+    if (pJitProfTable != NULL) {
+        for (i=0; i < JIT_PROF_SIZE; i++) {
+           pJitProfTable[i] = gDvmJit.threshold;
+        }
+    }
+}
+
+int dvmJitStartup(void)
+{
+    unsigned int i;
+    bool res = true;  /* Assume success */
+
+    // Create the compiler thread and setup miscellaneous chores */
+    res &= dvmCompilerStartup();
+
+    dvmInitMutex(&gDvmJit.tableLock);
+    if (res && gDvm.executionMode == kExecutionModeJit) {
+        struct JitEntry *pJitTable = NULL;
+        int tableSize = sizeof(*pJitTable) * gDvmJit.maxTableEntries;
+        unsigned char *pJitProfTable = NULL;
+        dvmLockMutex(&gDvmJit.tableLock);
+        assert(sizeof(*pJitTable) == 12);
+        pJitTable = (struct JitEntry*)malloc(tableSize);
+        if (!pJitTable) {
+            LOGE("jit table allocation failed\n");
+            res = false;
+            goto done;
+        }
+        memset(pJitTable,0,tableSize);
+        /*
+         * NOTE: the profile table must only be allocated once, globally.
+         * Profiling is turned on and off by nulling out gDvm.pJitProfTable
+         * and then restoring its original value.  However, this action
+         * is not syncronized for speed so threads may continue to hold
+         * and update the profile table after profiling has been turned
+         * off by null'ng the global pointer.  Be aware.
+         */
+        pJitProfTable = (unsigned char *)malloc(JIT_PROF_SIZE);
+        if (!pJitProfTable) {
+            LOGE("jit prof table allocation failed\n");
+            res = false;
+            goto done;
+        }
+        memset(pJitProfTable,0,JIT_PROF_SIZE);
+        for (i=0; i < gDvmJit.maxTableEntries; i++) {
+           pJitTable[i].chain = gDvmJit.maxTableEntries;
+        }
+        /* Is chain field wide enough for termination pattern? */
+        assert(pJitTable[0].chain == gDvm.maxJitTableEntries);
+        resetProfileCounts();
+
+done:
+        gDvmJit.pJitEntryTable = pJitTable;
+        gDvmJit.pProfTableCopy = gDvmJit.pProfTable = pJitProfTable;
+        dvmUnlockMutex(&gDvmJit.tableLock);
+    }
+    return res;
+}
+
+/*
+ * If one of our fixed tables or the translation buffer fills up,
+ * call this routine to avoid wasting cycles on future translation requests.
+ */
+void dvmJitStopTranslationRequests()
+{
+    /*
+     * Note 1: This won't necessarily stop all translation requests, and
+     * operates on a delayed mechanism.  Running threads look to the copy
+     * of this value in their private InterpState structures and won't see
+     * this change until it is refreshed (which happens on interpreter
+     * entry).
+     * Note 2: This is a one-shot memory leak on this table. Because this is a
+     * permanent off switch for Jit profiling, it is a one-time leak of 1K
+     * bytes, and no further attempt will be made to re-allocate it.  Can't
+     * free it because some thread may be holding a reference.
+     */
+    gDvmJit.pProfTable = gDvmJit.pProfTableCopy = NULL;
+}
+
+#if defined(EXIT_STATS)
+/* Convenience function to increment counter from assembly code */
+void dvmBumpNoChain()
+{
+    gDvm.jitNoChainExit++;
+}
+
+/* Convenience function to increment counter from assembly code */
+void dvmBumpNormal()
+{
+    gDvm.jitNormalExit++;
+}
+
+/* Convenience function to increment counter from assembly code */
+void dvmBumpPunt(int from)
+{
+    gDvm.jitPuntExit++;
+}
+#endif
+
+/* Dumps debugging & tuning stats to the log */
+void dvmJitStats()
+{
+    int i;
+    int hit;
+    int not_hit;
+    int chains;
+    if (gDvmJit.pJitEntryTable) {
+        for (i=0, chains=hit=not_hit=0;
+             i < (int) gDvmJit.maxTableEntries;
+             i++) {
+            if (gDvmJit.pJitEntryTable[i].dPC != 0)
+                hit++;
+            else
+                not_hit++;
+            if (gDvmJit.pJitEntryTable[i].chain != gDvmJit.maxTableEntries)
+                chains++;
+        }
+        LOGD(
+         "JIT: %d traces, %d slots, %d chains, %d maxQ, %d thresh, %s",
+         hit, not_hit + hit, chains, gDvmJit.compilerMaxQueued,
+         gDvmJit.threshold, gDvmJit.blockingMode ? "Blocking" : "Non-blocking");
+#if defined(EXIT_STATS)
+        LOGD(
+         "JIT: Lookups: %d hits, %d misses; %d NoChain, %d normal, %d punt",
+         gDvmJit.addrLookupsFound, gDvmJit.addrLookupsNotFound,
+         gDvmJit.noChainExit, gDvmJit.normalExit, gDvmJit.puntExit);
+#endif
+        LOGD("JIT: %d Translation chains", gDvmJit.translationChains);
+#if defined(INVOKE_STATS)
+        LOGD("JIT: Invoke: %d noOpt, %d chainable, %d return",
+          gDvmJit.invokeNoOpt, gDvmJit.invokeChain, gDvmJit.returnOp);
+#endif
+    }
+}
+
+/*
+ * Final JIT shutdown.  Only do this once, and do not attempt to restart
+ * the JIT later.
+ */
+void dvmJitShutdown(void)
+{
+    /* Shutdown the compiler thread */
+    dvmCompilerShutdown();
+
+    dvmCompilerDumpStats();
+
+    dvmDestroyMutex(&gDvmJit.tableLock);
+
+    if (gDvmJit.pJitEntryTable) {
+        free(gDvmJit.pJitEntryTable);
+        gDvmJit.pJitEntryTable = NULL;
+    }
+
+    if (gDvmJit.pProfTable) {
+        free(gDvmJit.pProfTable);
+        gDvmJit.pProfTable = NULL;
+    }
+}
+
+/* Returns the signed branch displacement of a Dalvik instruction. */
+int dvmGetBranchDisplacement( DecodedInstruction* decInsn )
+{
+    int res = 0;
+    switch (dexGetInstrFormat(gDvm.instrFormat, decInsn->opCode)) {
+        case kFmt22t:
+            res = decInsn->vC;
+            break;
+        case kFmt20t:
+        case kFmt21t:
+            res = decInsn->vB;
+            break;
+        case kFmt10t:
+        case kFmt30t:
+            res = decInsn->vA;
+            break;
+        default:
+            dvmAbort();
+    }
+    return res;
+}
+
+/*
+ * Adds to the current trace request one instruction at a time, just
+ * before that instruction is interpreted.  This is the primary trace
+ * selection function.  NOTE: return instruction are handled a little
+ * differently.  In general, instructions are "proposed" to be added
+ * to the current trace prior to interpretation.  If the interpreter
+ * then successfully completes the instruction, is will be considered
+ * part of the request.  This allows us to examine machine state prior
+ * to interpretation, and also abort the trace request if the instruction
+ * throws or does something unexpected.  However, return instructions
+ * will cause an immediate end to the translation request - which will
+ * be passed to the compiler before the return completes.  This is done
+ * in response to special handling of returns by the interpreter (and
+ * because returns cannot throw in a way that causes problems for the
+ * translated code.
+ */
+#define MAX_TRACE_LEN 100
+int dvmCheckJit(const u2* pc, Thread* self, InterpState* interpState)
+{
+    int flags,i,len;
+    int switchInterp = false;
+    int debugOrProfile = (gDvm.debuggerActive || self->suspendCount
+#if defined(WITH_PROFILER)
+                          || gDvm.activeProfilers
+#endif
+            );
+
+    switch (interpState->jitState) {
+        char* nopStr;
+        int target;
+        int offset;
+        DecodedInstruction decInsn;
+        case kJitTSelect:
+            dexDecodeInstruction(gDvm.instrFormat, pc, &decInsn);
+#if defined(SHOW_TRACE)
+            LOGD("TraceGen: adding %s",getOpcodeName(decInsn.opCode));
+#endif
+            flags = dexGetInstrFlags(gDvm.instrFlags, decInsn.opCode);
+            len = dexGetInstrOrTableWidthAbs(gDvm.instrWidth, pc);
+            offset = pc - interpState->method->insns;
+            if ((flags & kInstrNoJit) == kInstrNoJit) {
+                interpState->jitState = kJitTSelectEnd;
+                break;
+            } else {
+                if (pc != interpState->currRunHead + interpState->currRunLen) {
+                    int currTraceRun;
+                    /* We need to start a new trace run */
+                    currTraceRun = ++interpState->currTraceRun;
+                    interpState->currRunLen = 0;
+                    interpState->currRunHead = (u2*)pc;
+                    interpState->trace[currTraceRun].frag.startOffset = offset;
+                    interpState->trace[currTraceRun].frag.numInsts = 0;
+                    interpState->trace[currTraceRun].frag.runEnd = false;
+                    interpState->trace[currTraceRun].frag.hint = kJitHintNone;
+                }
+                interpState->trace[interpState->currTraceRun].frag.numInsts++;
+                interpState->totalTraceLen++;
+                interpState->currRunLen += len;
+                if (  ((flags & kInstrUnconditional) == 0) &&
+                      ((flags & (kInstrCanBranch |
+                                 kInstrCanSwitch |
+                                 kInstrCanReturn |
+                                 kInstrInvoke)) != 0)) {
+                        interpState->jitState = kJitTSelectEnd;
+#if defined(SHOW_TRACE)
+                LOGD("TraceGen: ending on %s, basic block end",
+                     getOpcodeName(decInsn.opCode));
+#endif
+                }
+                if (decInsn.opCode == OP_THROW) {
+                    interpState->jitState = kJitTSelectEnd;
+                }
+                if (interpState->totalTraceLen >= MAX_TRACE_LEN) {
+                    interpState->jitState = kJitTSelectEnd;
+                }
+                if (debugOrProfile) {
+                    interpState->jitState = kJitTSelectAbort;
+                    switchInterp = !debugOrProfile;
+                    break;
+                }
+                if ((flags & kInstrCanReturn) != kInstrCanReturn) {
+                    break;
+                }
+            }
+            /* NOTE: intentional fallthrough for returns */
+        case kJitTSelectEnd:
+            {
+                if (interpState->totalTraceLen == 0) {
+                    switchInterp = !debugOrProfile;
+                    break;
+                }
+                JitTraceDescription* desc =
+                   (JitTraceDescription*)malloc(sizeof(JitTraceDescription) +
+                     sizeof(JitTraceRun) * (interpState->currTraceRun+1));
+                if (desc == NULL) {
+                    LOGE("Out of memory in trace selection");
+                    dvmJitStopTranslationRequests();
+                    interpState->jitState = kJitTSelectAbort;
+                    switchInterp = !debugOrProfile;
+                    break;
+                }
+                interpState->trace[interpState->currTraceRun].frag.runEnd =
+                     true;
+                interpState->jitState = kJitNormal;
+                desc->method = interpState->method;
+                memcpy((char*)&(desc->trace[0]),
+                    (char*)&(interpState->trace[0]),
+                    sizeof(JitTraceRun) * (interpState->currTraceRun+1));
+#if defined(SHOW_TRACE)
+                LOGD("TraceGen:  trace done, adding to queue");
+#endif
+                dvmCompilerWorkEnqueue(
+                       interpState->currTraceHead,kWorkOrderTrace,desc);
+                if (gDvmJit.blockingMode) {
+                    dvmCompilerDrainQueue();
+                }
+                switchInterp = !debugOrProfile;
+            }
+            break;
+        case kJitSingleStep:
+            interpState->jitState = kJitSingleStepEnd;
+            break;
+        case kJitSingleStepEnd:
+            interpState->entryPoint = kInterpEntryResume;
+            switchInterp = !debugOrProfile;
+            break;
+        case kJitTSelectAbort:
+#if defined(SHOW_TRACE)
+            LOGD("TraceGen:  trace abort");
+#endif
+            interpState->jitState = kJitNormal;
+            switchInterp = !debugOrProfile;
+            break;
+        case kJitNormal:
+            break;
+        default:
+            dvmAbort();
+    }
+    return switchInterp;
+}
+
+static inline struct JitEntry *findJitEntry(const u2* pc)
+{
+    int idx = dvmJitHash(pc);
+
+    /* Expect a high hit rate on 1st shot */
+    if (gDvmJit.pJitEntryTable[idx].dPC == pc)
+        return &gDvmJit.pJitEntryTable[idx];
+    else {
+        int chainEndMarker = gDvmJit.maxTableEntries;
+        while (gDvmJit.pJitEntryTable[idx].chain != chainEndMarker) {
+            idx = gDvmJit.pJitEntryTable[idx].chain;
+            if (gDvmJit.pJitEntryTable[idx].dPC == pc)
+                return &gDvmJit.pJitEntryTable[idx];
+        }
+    }
+    return NULL;
+}
+
+/*
+ * If a translated code address exists for the davik byte code
+ * pointer return it.  This routine needs to be fast.
+ */
+void* dvmJitGetCodeAddr(const u2* dPC)
+{
+    int idx = dvmJitHash(dPC);
+
+    /* Expect a high hit rate on 1st shot */
+    if (gDvmJit.pJitEntryTable[idx].dPC == dPC) {
+#if defined(EXIT_STATS)
+        gDvmJit.addrLookupsFound++;
+#endif
+        return gDvmJit.pJitEntryTable[idx].codeAddress;
+    } else {
+        int chainEndMarker = gDvmJit.maxTableEntries;
+        while (gDvmJit.pJitEntryTable[idx].chain != chainEndMarker) {
+            idx = gDvmJit.pJitEntryTable[idx].chain;
+            if (gDvmJit.pJitEntryTable[idx].dPC == dPC) {
+#if defined(EXIT_STATS)
+                gDvmJit.addrLookupsFound++;
+#endif
+                return gDvmJit.pJitEntryTable[idx].codeAddress;
+            }
+        }
+    }
+#if defined(EXIT_STATS)
+    gDvmJit.addrLookupsNotFound++;
+#endif
+    return NULL;
+}
+
+/*
+ * Register the translated code pointer into the JitTable.
+ * NOTE: Once a codeAddress field transitions from NULL to
+ * JIT'd code, it must not be altered without first halting all
+ * threads.
+ */
+void dvmJitSetCodeAddr(const u2* dPC, void *nPC) {
+    struct JitEntry *jitEntry = findJitEntry(dPC);
+    assert(jitEntry);
+    /* Thumb code has odd PC */
+    jitEntry->codeAddress = (void *) ((intptr_t) nPC |1);
+}
+
+/*
+ * Determine if valid trace-bulding request is active.  Return true
+ * if we need to abort and switch back to the fast interpreter, false
+ * otherwise.  NOTE: may be called even when trace selection is not being
+ * requested
+ */
+
+#define PROFILE_STALENESS_THRESHOLD 250000LL
+bool dvmJitCheckTraceRequest(Thread* self, InterpState* interpState)
+{
+    bool res = false;    /* Assume success */
+    if (gDvmJit.pJitEntryTable != NULL) {
+        u8 delta = dvmGetRelativeTimeUsec() - lastProfileResetTimeUsec;
+        /*
+         * If the compiler is backlogged, or if a debugger or profiler is
+         * active, cancel any JIT actions
+         */
+        if ( (gDvmJit.compilerQueueLength >= gDvmJit.compilerHighWater) ||
+              gDvm.debuggerActive || self->suspendCount
+#if defined(WITH_PROFILER)
+                 || gDvm.activeProfilers
+#endif
+                                             ) {
+            if (interpState->jitState != kJitOff) {
+                interpState->jitState = kJitNormal;
+            }
+        } else if (delta > PROFILE_STALENESS_THRESHOLD) {
+            resetProfileCounts();
+            res = true;   /* Stale profile - abort */
+        } else if (interpState->jitState == kJitTSelectRequest) {
+            u4 chainEndMarker = gDvmJit.maxTableEntries;
+            u4 idx = dvmJitHash(interpState->pc);
+
+            /* Walk the bucket chain to find an exact match for our PC */
+            while ((gDvmJit.pJitEntryTable[idx].chain != chainEndMarker) &&
+                   (gDvmJit.pJitEntryTable[idx].dPC != interpState->pc)) {
+                idx = gDvmJit.pJitEntryTable[idx].chain;
+            }
+
+            if (gDvmJit.pJitEntryTable[idx].dPC == interpState->pc) {
+                /*
+                 * Got a match.  This means a trace has already
+                 * been requested for this address.  Bail back to
+                 * mterp, which will check if the translation is ready
+                 * for execution
+                 */
+                interpState->jitState = kJitTSelectAbort;
+            } else {
+               /*
+                * No match.  Aquire jitTableLock and find the last
+                * slot in the chain. Possibly continue the chain walk in case
+                * some other thread allocated the slot we were looking
+                * at previuosly
+                */
+                dvmLockMutex(&gDvmJit.tableLock);
+                /*
+                 * At this point, if .dPC is NULL, then the slot we're
+                 * looking at is the target slot from the primary hash
+                 * (the simple, and expected case).  Otherwise we're going
+                 * to have to find a free slot and chain it.
+                 */
+                MEM_BARRIER();
+                if (gDvmJit.pJitEntryTable[idx].dPC != NULL) {
+                    u4 prev;
+                    while (gDvmJit.pJitEntryTable[idx].chain != chainEndMarker) {
+                        idx = gDvmJit.pJitEntryTable[idx].chain;
+                    }
+                    /* Here, idx should be pointing to the last cell of an
+                     * active chain whose last member contains a valid dPC */
+                    assert(gDvmJit.pJitEntryTable[idx].dPC != NULL);
+                    /* Now, do a linear walk to find a free cell and add it to
+                     * end of this chain */
+                    prev = idx;
+                    while (true) {
+                        idx++;
+                        if (idx == chainEndMarker)
+                            idx = 0;  /* Wraparound */
+                        if ((gDvmJit.pJitEntryTable[idx].dPC == NULL) ||
+                            (idx == prev))
+                            break;
+                    }
+                    if (idx != prev) {
+                        /* Got it - chain */
+                        gDvmJit.pJitEntryTable[prev].chain = idx;
+                    }
+                }
+                if (gDvmJit.pJitEntryTable[idx].dPC == NULL) {
+                   /* Allocate the slot */
+                    gDvmJit.pJitEntryTable[idx].dPC = interpState->pc;
+                } else {
+                   /*
+                    * Table is full.  We could resize it, but that would
+                    * be better handled by the translator thread.  It
+                    * will be aware of how full the table is getting.
+                    * Disable further profiling and continue.
+                    */
+                   interpState->jitState = kJitTSelectAbort;
+                   LOGD("JIT: JitTable full, disabling profiling");
+                   dvmJitStopTranslationRequests();
+                }
+                dvmUnlockMutex(&gDvmJit.tableLock);
+            }
+        }
+        switch (interpState->jitState) {
+            case kJitTSelectRequest:
+                 interpState->jitState = kJitTSelect;
+                 interpState->currTraceHead = interpState->pc;
+                 interpState->currTraceRun = 0;
+                 interpState->totalTraceLen = 0;
+                 interpState->currRunHead = interpState->pc;
+                 interpState->currRunLen = 0;
+                 interpState->trace[0].frag.startOffset =
+                       interpState->pc - interpState->method->insns;
+                 interpState->trace[0].frag.numInsts = 0;
+                 interpState->trace[0].frag.runEnd = false;
+                 interpState->trace[0].frag.hint = kJitHintNone;
+                 break;
+            case kJitTSelect:
+            case kJitTSelectAbort:
+                 res = true;
+            case kJitSingleStep:
+            case kJitSingleStepEnd:
+            case kJitOff:
+            case kJitNormal:
+                break;
+            default:
+                dvmAbort();
+        }
+    }
+    return res;
+}
+
+#endif /* WITH_JIT */
diff --git a/vm/interp/Jit.h b/vm/interp/Jit.h
new file mode 100644 (file)
index 0000000..2b2cb8e
--- /dev/null
@@ -0,0 +1,59 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * Jit control
+ */
+#ifndef _DALVIK_INTERP_JIT
+#define _DALVIK_INTERP_JIT
+
+#include "InterpDefs.h"
+
+#define JIT_PROF_SIZE 512
+/*
+ * JitTable hash function.
+ */
+static inline u4 dvmJitHash( const u2* p ) {
+    /*
+     * TODO - Might make sense to keep "maxTableEntries - 1" as its own
+     * variable for speed reasons.
+     */
+    return ((((u4)p>>12)^(u4)p)>>1) & (gDvmJit.maxTableEntries-1);
+}
+
+/*
+ * Entries in the JIT's address lookup hash table.
+ * with assembly hash function in mterp.
+ * TODO: rework this structure now that the profile counts have
+ * moved into their own table.
+ */
+typedef struct JitEntry {
+    u2                unused;             /* was execution count */
+    u2                chain;              /* Index of next in chain */
+    const u2*         dPC;                /* Dalvik code address */
+    void*             codeAddress;        /* Code address of native translation */
+} JitEntry;
+
+int dvmJitStartup(void);
+void dvmJitShutdown(void);
+int dvmCheckJit(const u2* pc, Thread* self, InterpState* interpState);
+void* dvmJitGetCodeAddr(const u2* dPC);
+void dvmJitSetCodeAddr(const u2* dPC, void *nPC);
+bool dvmJitCheckTraceRequest(Thread* self, InterpState* interpState);
+void* dvmJitChain(void* tgtAddr, u4* branchAddr);
+void dvmJitStopTranslationRequests();
+void dvmJitStats();
+
+#endif /*_DALVIK_INTERP_JIT*/
index 1b28d49..f2a481b 100644 (file)
@@ -146,6 +146,8 @@ struct StackSaveArea {
         const u2*   currentPc;
     } xtra;
 
+    /* Native return pointer for JIT, or 0 if interpreted */
+    const u2* returnAddr;
 #ifdef PAD_SAVE_AREA
     u4          pad3, pad4, pad5;
 #endif
index 53ddeb4..80a7b2b 100644 (file)
@@ -77,6 +77,10 @@ bool dvmMterpStd(Thread* self, InterpState* glue)
 
     glue->interpStackEnd = self->interpStackEnd;
     glue->pSelfSuspendCount = &self->suspendCount;
+#if defined(WITH_JIT)
+    glue->pJitTable = gDvmJit.pJitEntryTable;
+    glue->pJitProfTable = gDvmJit.pProfTable;
+#endif
 #if defined(WITH_DEBUGGER)
     glue->pDebuggerActive = &gDvm.debuggerActive;
 #endif
@@ -111,4 +115,3 @@ bool dvmMterpStd(Thread* self, InterpState* glue)
         return true;
     }
 }
-
index ae2d207..8b3f7b4 100644 (file)
@@ -22,6 +22,9 @@
 
 #include "Dalvik.h"
 #include "interp/InterpDefs.h"
+#if defined(WITH_JIT)
+#include "interp/Jit.h"
+#endif
 
 /*
  * Interpreter state, passed into C functions from assembly stubs.  The
index 3433a73..26f0c8f 100644 (file)
     movs    r9, r0, asr #24             @ r9<- ssssssAA (sign-extended)
     mov     r9, r9, lsl #1              @ r9<- byte offset
     bmi     common_backwardBranch       @ backward branch, do periodic checks
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
     FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    cmp     r0,#0
+    bne     common_updateProfile
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
-
+#else
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    GET_INST_OPCODE(ip)                 @ extract opcode from rINST
+    GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
index 479438e..f738a98 100644 (file)
     FETCH_S(r0, 1)                      @ r0<- ssssAAAA (sign-extended)
     movs    r9, r0, asl #1              @ r9<- byte offset, check sign
     bmi     common_backwardBranch       @ backward branch, do periodic checks
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
     FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    cmp     r0,#0
+    bne     common_updateProfile
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#else
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    GET_INST_OPCODE(ip)                 @ extract opcode from rINST
+    GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
index 617b8ba..17780b9 100644 (file)
     orrs    r0, r0, r1, lsl #16         @ r0<- AAAAaaaa, check sign
     mov     r9, r0, asl #1              @ r9<- byte offset
     ble     common_backwardBranch       @ backward branch, do periodic checks
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
     FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    cmp     r0,#0
+    bne     common_updateProfile
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
-
+#else
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    GET_INST_OPCODE(ip)                 @ extract opcode from rINST
+    GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
index 6fde05b..72e742a 100644 (file)
     movs    r9, r0, asl #1              @ r9<- branch byte offset, check sign
     bmi     common_backwardBranch       @ backward branch, do periodic checks
     beq     common_backwardBranch       @ (want to use BLE but V is unknown)
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
     FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    cmp     r0,#0
+    bne     common_updateProfile
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#else
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    GET_INST_OPCODE(ip)                 @ extract opcode from rINST
+    GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
index 9b574a3..1f43918 100644 (file)
     FETCH_S(r9, 1)                      @ r9<- branch offset, in code units
     movs    r9, r9, asl #1              @ convert to bytes, check sign
     bmi     common_backwardBranch       @ yes, do periodic checks
-1:  FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+1:
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    b        common_testUpdateProfile
+#else
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
index 5b2cde0..f9e01a3 100644 (file)
@@ -66,10 +66,21 @@ dvmMterpStdRun:
     cmp     r1, #kInterpEntryInstr      @ usual case?
     bne     .Lnot_instr                 @ no, handle it
 
+#if defined(WITH_JIT)
+.Lno_singleStep:
+    /* Entry is always a possible trace start */
+    GET_JIT_PROF_TABLE(r0)
+    FETCH_INST()
+    cmp    r0,#0
+    bne    common_updateProfile
+    GET_INST_OPCODE(ip)
+    GOTO_OPCODE(ip)
+#else
     /* start executing the instruction at rPC */
     FETCH_INST()                        @ load rINST from rPC
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
 .Lnot_instr:
     cmp     r1, #kInterpEntryReturn     @ were we returning from a method?
@@ -79,6 +90,22 @@ dvmMterpStdRun:
     cmp     r1, #kInterpEntryThrow      @ were we throwing an exception?
     beq     common_exceptionThrown
 
+#if defined(WITH_JIT)
+.Lnot_throw:
+    ldr     r0,[rGLUE, #offGlue_jitResume]
+    ldr     r2,[rGLUE, #offGlue_jitResumePC]
+    cmp     r1, #kInterpEntryResume     @ resuming after Jit single-step?
+    bne     .Lbad_arg
+    cmp     rPC,r2
+    bne     .Lno_singleStep             @ must have branched, don't resume
+    mov     r1, #kInterpEntryInstr
+    strb    r1, [rGLUE, #offGlue_entryPoint]
+    ldr     rINST, .LdvmCompilerTemplate
+    bx      r0                          @ re-enter the translation
+.LdvmCompilerTemplate:
+    .word   dvmCompilerTemplateStart
+#endif
+
 .Lbad_arg:
     ldr     r0, strBadEntryPoint
     @ r1 holds value of entryPoint
index 22ad65a..aaab70e 100644 (file)
+
 /*
  * ===========================================================================
  *  Common subroutines and data
  * ===========================================================================
  */
 
+
+
     .text
     .align  2
 
+#if defined(WITH_JIT)
+/*
+ * Return from the translation cache to the interpreter when the compiler is
+ * having issues translating/executing a Dalvik instruction. We have to skip
+ * the code cache lookup otherwise it is possible to indefinitely bouce
+ * between the interpreter and the code cache if the instruction that fails
+ * to be compiled happens to be at a trace start.
+ */
+    .global dvmJitToInterpPunt
+dvmJitToInterpPunt:
+    mov    rPC, r0
+#ifdef EXIT_STATS
+    mov    r0,lr
+    bl     dvmBumpPunt;
+#endif
+    EXPORT_PC()
+    adrl   rIBASE, dvmAsmInstructionStart
+    FETCH_INST()
+    GET_INST_OPCODE(ip)
+    GOTO_OPCODE(ip)
+
+/*
+ * Return to the interpreter to handle a single instruction.
+ * On entry:
+ *    r0 <= PC
+ *    r1 <= PC of resume instruction
+ *    lr <= resume point in translation
+ */
+    .global dvmJitToInterpSingleStep
+dvmJitToInterpSingleStep:
+    str    lr,[rGLUE,#offGlue_jitResume]
+    str    r1,[rGLUE,#offGlue_jitResumePC]
+    mov    r1,#kInterpEntryInstr
+    @ enum is 4 byte in aapcs-EABI
+    str    r1, [rGLUE, #offGlue_entryPoint]
+    mov    rPC,r0
+    EXPORT_PC()
+    adrl   rIBASE, dvmAsmInstructionStart
+    mov    r2,#kJitSingleStep     @ Ask for single step and then revert
+    str    r2,[rGLUE,#offGlue_jitState]
+    mov    r1,#1                  @ set changeInterp to bail to debug interp
+    b      common_gotoBail
+
+
+/*
+ * Return from the translation cache and immediately request
+ * a translation for the exit target.  Commonly used following
+ * invokes.
+ */
+    .global dvmJitToTraceSelect
+dvmJitToTraceSelect:
+    ldr    rPC,[r14, #-1]           @ get our target PC
+    add    rINST,r14,#-5            @ save start of chain branch
+    mov    r0,rPC
+    bl     dvmJitGetCodeAddr        @ Is there a translation?
+    cmp    r0,#0
+    beq    2f
+    mov    r1,rINST
+    bl     dvmJitChain              @ r0<- dvmJitChain(codeAddr,chainAddr)
+    ldr    rINST, .LdvmCompilerTemplateStart @ rINST is rCBASE in compiled code
+    bx     r0                       @ continue native execution
+
+/* No translation, so request one if profiling isn't disabled*/
+2:
+    GET_JIT_PROF_TABLE(r0)
+    FETCH_INST()
+    cmp    r0, #0
+    bne    common_selectTrace
+    GET_INST_OPCODE(ip)
+    GOTO_OPCODE(ip)
+
+/*
+ * Return from the translation cache to the interpreter.
+ * The return was done with a BLX from thumb mode, and
+ * the following 32-bit word contains the target rPC value.
+ * Note that lr (r14) will have its low-order bit set to denote
+ * its thumb-mode origin.
+ *
+ * We'll need to stash our lr origin away, recover the new
+ * target and then check to see if there is a translation available
+ * for our new target.  If so, we do a translation chain and
+ * go back to native execution.  Otherwise, it's back to the
+ * interpreter (after treating this entry as a potential
+ * trace start).
+ */
+    .global dvmJitToInterpNormal
+dvmJitToInterpNormal:
+    ldr    rPC,[r14, #-1]           @ get our target PC
+    add    rINST,r14,#-5            @ save start of chain branch
+#ifdef EXIT_STATS
+    bl     dvmBumpNormal
+#endif
+    mov    r0,rPC
+    bl     dvmJitGetCodeAddr        @ Is there a translation?
+    cmp    r0,#0
+    beq    1f                       @ go if not, otherwise do chain
+    mov    r1,rINST
+    bl     dvmJitChain              @ r0<- dvmJitChain(codeAddr,chainAddr)
+    ldr    rINST, .LdvmCompilerTemplateStart @ rINST is rCBASE in compiled code
+    bx     r0                       @ continue native execution
+
+/*
+ * Return from the translation cache to the interpreter to do method invocation.
+ * Check if translation exists for the callee, but don't chain to it.
+ */
+    .global dvmJitToInterpNoChain
+dvmJitToInterpNoChain:
+#ifdef EXIT_STATS
+    bl     dvmBumpNoChain
+#endif
+    mov    r0,rPC
+    bl     dvmJitGetCodeAddr        @ Is there a translation?
+    cmp    r0,#0
+    bxne   r0                       @ continue native execution if so
+
+/*
+ * No translation, restore interpreter regs and start interpreting.
+ * rGLUE & rFP were preserved in the translated code, and rPC has
+ * already been restored by the time we get here.  We'll need to set
+ * up rIBASE & rINST, and load the address of the JitTable into r0.
+ */
+1:
+    EXPORT_PC()
+    adrl   rIBASE, dvmAsmInstructionStart
+    FETCH_INST()
+    GET_JIT_PROF_TABLE(r0)
+    @ NOTE: intended fallthrough
+/*
+ * Common code to update potential trace start counter, and initiate
+ * a trace-build if appropriate.  On entry, rPC should point to the
+ * next instruction to execute, and rINST should be already loaded with
+ * the next opcode word, and r0 holds a pointer to the jit profile
+ * table (pJitProfTable).
+ */
+common_testUpdateProfile:
+    cmp     r0,#0
+    GET_INST_OPCODE(ip)
+    GOTO_OPCODE_IFEQ(ip)       @ if not profiling, fallthrough otherwise */
+
+common_updateProfile:
+    eor     r3,rPC,rPC,lsr #12 @ cheap, but fast hash function
+    lsl     r3,r3,#23          @ shift out excess 511
+    ldrb    r1,[r0,r3,lsr #23] @ get counter
+    GET_INST_OPCODE(ip)
+    subs    r1,r1,#1           @ decrement counter
+    strb    r1,[r0,r3,lsr #23] @ and store it
+    GOTO_OPCODE_IFNE(ip)       @ if not threshold, fallthrough otherwise */
+
+/*
+ * Here, we switch to the debug interpreter to request
+ * trace selection.  First, though, check to see if there
+ * is already a native translation in place (and, if so,
+ * jump to it now).
+ */
+    mov     r1,#255
+    strb    r1,[r0,r3,lsr #23] @ reset counter
+    EXPORT_PC()
+    mov     r0,rPC
+    bl      dvmJitGetCodeAddr           @ r0<- dvmJitGetCodeAddr(rPC)
+    cmp     r0,#0
+    ldrne   rINST, .LdvmCompilerTemplateStart @ rINST is rCBASE in compiled code
+    beq     common_selectTrace
+    bxne    r0                          @ jump to the translation
+common_selectTrace:
+    mov     r2,#kJitTSelectRequest      @ ask for trace selection
+    str     r2,[rGLUE,#offGlue_jitState]
+    mov     r1,#1                       @ set changeInterp
+    b       common_gotoBail
+
+.LdvmCompilerTemplateStart:
+    .word   dvmCompilerTemplateStart
+
+#endif
+
 /*
  * Common code when a backward branch is taken.
  *
 common_backwardBranch:
     mov     r0, #kInterpEntryInstr
     bl      common_periodicChecks
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    cmp     r0,#0
+    bne     common_updateProfile
+    GET_INST_OPCODE(ip)
+    GOTO_OPCODE(ip)
+#else
     FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
 
 /*
@@ -192,6 +378,10 @@ common_invokeMethodNoRange:
 #endif
     str     rFP, [r10, #offStackSaveArea_prevFrame]
     str     rPC, [r10, #offStackSaveArea_savedPc]
+#if defined(WITH_JIT)
+    mov     r9, #0
+    str     r9, [r10, #offStackSaveArea_returnAddr]
+#endif
     str     r0, [r10, #offStackSaveArea_method]
     tst     r3, #ACC_NATIVE
     bne     .LinvokeNative
@@ -220,11 +410,22 @@ common_invokeMethodNoRange:
     @ r0=methodToCall, r1=newFp, r2=self, r3=newMethodClass, r9=newINST
     str     r0, [rGLUE, #offGlue_method]    @ glue->method = methodToCall
     str     r3, [rGLUE, #offGlue_methodClassDex] @ glue->methodClassDex = ...
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
     mov     rFP, r1                         @ fp = newFp
     GET_PREFETCHED_OPCODE(ip, r9)           @ extract prefetched opcode from r9
     mov     rINST, r9                       @ publish new rINST
     str     r1, [r2, #offThread_curFrame]   @ self->curFrame = newFp
+    cmp     r0,#0
+    bne     common_updateProfile
     GOTO_OPCODE(ip)                         @ jump to next instruction
+#else
+    mov     rFP, r1                         @ fp = newFp
+    GET_PREFETCHED_OPCODE(ip, r9)           @ extract prefetched opcode from r9
+    mov     rINST, r9                       @ publish new rINST
+    str     r1, [r2, #offThread_curFrame]   @ self->curFrame = newFp
+    GOTO_OPCODE(ip)                         @ jump to next instruction
+#endif
 
 .LinvokeNative:
     @ Prep for the native call
@@ -330,10 +531,26 @@ common_returnFromMethod:
     str     r2, [rGLUE, #offGlue_method]@ glue->method = newSave->method
     ldr     r1, [r10, #offClassObject_pDvmDex]   @ r1<- method->clazz->pDvmDex
     str     rFP, [r3, #offThread_curFrame]  @ self->curFrame = fp
+#if defined(WITH_JIT)
+    ldr     r3, [r0, #offStackSaveArea_returnAddr] @ r3 = saveArea->returnAddr
+    GET_JIT_PROF_TABLE(r0)
+    mov     rPC, r9                     @ publish new rPC
+    str     r1, [rGLUE, #offGlue_methodClassDex]
+    cmp     r3, #0                      @ caller is compiled code
+    bne     1f
+    GET_INST_OPCODE(ip)                 @ extract opcode from rINST
+    cmp     r0,#0
+    bne     common_updateProfile
+    GOTO_OPCODE(ip)                     @ jump to next instruction
+1:
+    ldr     rINST, .LdvmCompilerTemplateStart @ rINST is rCBASE in compiled code
+    blx     r3
+#else
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     mov     rPC, r9                     @ publish new rPC
     str     r1, [rGLUE, #offGlue_methodClassDex]
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
     /*
      * Return handling, calls through "glue code".
@@ -356,12 +573,19 @@ common_returnFromMethod:
  *
  * This does not return.
  */
+     .global dvmMterpCommonExceptionThrown
+dvmMterpCommonExceptionThrown:
 common_exceptionThrown:
 .LexceptionNew:
     mov     r0, #kInterpEntryThrow
     mov     r9, #0
     bl      common_periodicChecks
 
+#if defined(WITH_JIT)
+    mov     r2,#kJitTSelectAbort        @ abandon trace selection in progress
+    str     r2,[rGLUE,#offGlue_jitState]
+#endif
+
     ldr     r10, [rGLUE, #offGlue_self] @ r10<- glue->self
     ldr     r9, [r10, #offThread_exception] @ r9<- self->exception
     mov     r1, r10                     @ r1<- self
index 586418f..c256f40 100644 (file)
@@ -168,6 +168,8 @@ unspecified registers or condition codes.
  * interpreter, we don't have to worry about pre-ARMv5 THUMB interwork.
  */
 #define GOTO_OPCODE(_reg)       add     pc, rIBASE, _reg, lsl #${handler_size_bits}
+#define GOTO_OPCODE_IFEQ(_reg)  addeq   pc, rIBASE, _reg, lsl #${handler_size_bits}
+#define GOTO_OPCODE_IFNE(_reg)  addne   pc, rIBASE, _reg, lsl #${handler_size_bits}
 
 /*
  * Get/set the 32-bit value from a Dalvik register.
@@ -175,6 +177,14 @@ unspecified registers or condition codes.
 #define GET_VREG(_reg, _vreg)   ldr     _reg, [rFP, _vreg, lsl #2]
 #define SET_VREG(_reg, _vreg)   str     _reg, [rFP, _vreg, lsl #2]
 
+#if defined(WITH_JIT)
+/*
+ * Null definition for overhead measuring purposes
+ */
+#define GET_JIT_TABLE(_reg)         ldr     _reg,[rGLUE,#offGlue_pJitTable]
+#define GET_JIT_PROF_TABLE(_reg)    ldr     _reg,[rGLUE,#offGlue_pJitProfTable]
+#endif
+
 /*
  * Convert a virtual register index into an address.
  */
index 7942632..861ca5b 100644 (file)
     FETCH_S(r9, 1)                      @ r9<- branch offset, in code units
     movs    r9, r9, asl #1              @ convert to bytes, check sign
     bmi     common_backwardBranch       @ backward branch, do periodic checks
-1:  FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+1:
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    cmp     r0,#0
+    bne     common_updateProfile
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#else
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    GET_INST_OPCODE(ip)                 @ extract opcode from rINST
+    GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
index f52e3f0..37eaa20 100644 (file)
@@ -836,6 +836,9 @@ GOTO_TARGET(invokeMethod, bool methodCallRange, const Method* _methodToCall,
 #endif
         newSaveArea->prevFrame = fp;
         newSaveArea->savedPc = pc;
+#if defined(WITH_JIT)
+        newSaveArea->returnAddr = 0;
+#endif
         newSaveArea->method = methodToCall;
 
         if (!dvmIsNativeMethod(methodToCall)) {
@@ -929,4 +932,3 @@ GOTO_TARGET(invokeMethod, bool methodCallRange, const Method* _methodToCall,
     }
     assert(false);      // should not get here
 GOTO_TARGET_END
-
index d2fca9c..174c226 100644 (file)
@@ -19,6 +19,7 @@
 #include "interp/InterpDefs.h"
 #include "mterp/Mterp.h"
 #include <math.h>                   // needed for fmod, fmodf
+#include "mterp/common/FindInterface.h"
 
 /*
  * Configuration defines.  These affect the C implementations, i.e. the
@@ -327,29 +328,21 @@ static inline void putDoubleToArray(u4* ptr, int idx, double dval)
  * If we're building without debug and profiling support, we never switch.
  */
 #if defined(WITH_PROFILER) || defined(WITH_DEBUGGER)
+#if defined(WITH_JIT)
+# define NEED_INTERP_SWITCH(_current) (                                     \
+    (_current == INTERP_STD) ?                                              \
+        dvmJitDebuggerOrProfilerActive(interpState->jitState) :             \
+        !dvmJitDebuggerOrProfilerActive(interpState->jitState) )
+#else
 # define NEED_INTERP_SWITCH(_current) (                                     \
     (_current == INTERP_STD) ?                                              \
         dvmDebuggerOrProfilerActive() : !dvmDebuggerOrProfilerActive() )
+#endif
 #else
 # define NEED_INTERP_SWITCH(_current) (false)
 #endif
 
 /*
- * Look up an interface on a class using the cache.
- */
-INLINE Method* dvmFindInterfaceMethodInCache(ClassObject* thisClass,
-    u4 methodIdx, const Method* method, DvmDex* methodClassDex)
-{
-#define ATOMIC_CACHE_CALC \
-    dvmInterpFindInterfaceMethod(thisClass, methodIdx, method, methodClassDex)
-
-    return (Method*) ATOMIC_CACHE_LOOKUP(methodClassDex->pInterfaceCache,
-                DEX_INTERFACE_CACHE_SIZE, thisClass, methodIdx);
-
-#undef ATOMIC_CACHE_CALC
-}
-
-/*
  * Check to see if "obj" is NULL.  If so, throw an exception.  Assumes the
  * pc has already been exported to the stack.
  *
@@ -412,4 +405,3 @@ static inline bool checkForNullExportPC(Object* obj, u4* fp, const u2* pc)
 #endif
     return true;
 }
-
diff --git a/vm/mterp/common/FindInterface.h b/vm/mterp/common/FindInterface.h
new file mode 100644 (file)
index 0000000..021ed65
--- /dev/null
@@ -0,0 +1,37 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* common includes */
+#include "Dalvik.h"
+
+/*
+ * Look up an interface on a class using the cache.
+ *
+ * This function used to be defined in mterp/c/header.c, but it is now used by
+ * the JIT compiler as well so it is separated into its own header file to
+ * avoid potential out-of-sync changes in the future.
+ */
+INLINE Method* dvmFindInterfaceMethodInCache(ClassObject* thisClass,
+    u4 methodIdx, const Method* method, DvmDex* methodClassDex)
+{
+#define ATOMIC_CACHE_CALC \
+    dvmInterpFindInterfaceMethod(thisClass, methodIdx, method, methodClassDex)
+
+    return (Method*) ATOMIC_CACHE_LOOKUP(methodClassDex->pInterfaceCache,
+                DEX_INTERFACE_CACHE_SIZE, thisClass, methodIdx);
+
+#undef ATOMIC_CACHE_CALC
+}
index 73292a9..b4bb1c2 100644 (file)
@@ -101,14 +101,42 @@ MTERP_OFFSET(offGlue_pSelfSuspendCount, MterpGlue, pSelfSuspendCount, 36)
 MTERP_OFFSET(offGlue_pDebuggerActive,   MterpGlue, pDebuggerActive, 40)
 MTERP_OFFSET(offGlue_pActiveProfilers,  MterpGlue, pActiveProfilers, 44)
 MTERP_OFFSET(offGlue_entryPoint,        MterpGlue, entryPoint, 48)
+#if defined(WITH_JIT)
+MTERP_OFFSET(offGlue_pJitTable,         MterpGlue, pJitTable, 56)
+MTERP_OFFSET(offGlue_pJitProfTable,     MterpGlue, pJitProfTable, 60)
+MTERP_OFFSET(offGlue_jitState,          MterpGlue, jitState, 64)
+MTERP_OFFSET(offGlue_jitResume,         MterpGlue, jitResume, 68)
+MTERP_OFFSET(offGlue_jitResumePC,       MterpGlue, jitResumePC, 72)
+#endif
 #elif defined(WITH_DEBUGGER)
 MTERP_OFFSET(offGlue_pDebuggerActive,   MterpGlue, pDebuggerActive, 40)
 MTERP_OFFSET(offGlue_entryPoint,        MterpGlue, entryPoint, 44)
+#if defined(WITH_JIT)
+MTERP_OFFSET(offGlue_pJitTable,         MterpGlue, pJitTable, 52)
+MTERP_OFFSET(offGlue_pJitProfTable,     MterpGlue, pJitProfTable, 56)
+MTERP_OFFSET(offGlue_jitState,          MterpGlue, jitState, 60)
+MTERP_OFFSET(offGlue_jitResume,         MterpGlue, jitResume, 64)
+MTERP_OFFSET(offGlue_jitResumePC,       MterpGlue, jitResumePC, 68)
+#endif
 #elif defined(WITH_PROFILER)
 MTERP_OFFSET(offGlue_pActiveProfilers,  MterpGlue, pActiveProfilers, 40)
 MTERP_OFFSET(offGlue_entryPoint,        MterpGlue, entryPoint, 44)
+#if defined(WITH_JIT)
+MTERP_OFFSET(offGlue_pJitTable,         MterpGlue, pJitTable, 52)
+MTERP_OFFSET(offGlue_pJitProfTable,     MterpGlue, pJitProfTable, 56)
+MTERP_OFFSET(offGlue_jitState,          MterpGlue, jitState, 60)
+MTERP_OFFSET(offGlue_jitResume,         MterpGlue, jitResume, 64)
+MTERP_OFFSET(offGlue_jitResumePC,       MterpGlue, jitResumePC, 68)
+#endif
 #else
 MTERP_OFFSET(offGlue_entryPoint,        MterpGlue, entryPoint, 40)
+#if defined(WITH_JIT)
+MTERP_OFFSET(offGlue_pJitTable,         MterpGlue, pJitTable, 48)
+MTERP_OFFSET(offGlue_pJitProfTable,     MterpGlue, pJitProfTable, 52)
+MTERP_OFFSET(offGlue_jitState,          MterpGlue, jitState, 56)
+MTERP_OFFSET(offGlue_jitResume,         MterpGlue, jitResume, 60)
+MTERP_OFFSET(offGlue_jitResumePC,       MterpGlue, jitResumePC, 64)
+#endif
 #endif
 /* make sure all JValue union members are stored at the same offset */
 MTERP_OFFSET(offGlue_retval_z,          MterpGlue, retval.z, 8)
@@ -131,14 +159,16 @@ MTERP_OFFSET(offStackSaveArea_savedPc,  StackSaveArea, savedPc, 8)
 MTERP_OFFSET(offStackSaveArea_method,   StackSaveArea, method, 12)
 MTERP_OFFSET(offStackSaveArea_currentPc, StackSaveArea, xtra.currentPc, 16)
 MTERP_OFFSET(offStackSaveArea_localRefTop, StackSaveArea, xtra.localRefTop, 16)
-MTERP_SIZEOF(sizeofStackSaveArea,       StackSaveArea, 20)
+MTERP_OFFSET(offStackSaveArea_returnAddr, StackSaveArea, returnAddr, 20)
+MTERP_SIZEOF(sizeofStackSaveArea,       StackSaveArea, 24)
 #else
 MTERP_OFFSET(offStackSaveArea_prevFrame, StackSaveArea, prevFrame, 0)
 MTERP_OFFSET(offStackSaveArea_savedPc,  StackSaveArea, savedPc, 4)
 MTERP_OFFSET(offStackSaveArea_method,   StackSaveArea, method, 8)
 MTERP_OFFSET(offStackSaveArea_currentPc, StackSaveArea, xtra.currentPc, 12)
 MTERP_OFFSET(offStackSaveArea_localRefTop, StackSaveArea, xtra.localRefTop, 12)
-MTERP_SIZEOF(sizeofStackSaveArea,       StackSaveArea, 16)
+MTERP_OFFSET(offStackSaveArea_returnAddr, StackSaveArea, returnAddr, 16)
+MTERP_SIZEOF(sizeofStackSaveArea,       StackSaveArea, 20)
 #endif
 
 /* InstField fields */
@@ -200,6 +230,20 @@ MTERP_SIZEOF(sizeofClassStatus,         InterpEntry, MTERP_SMALL_ENUM)
 MTERP_CONSTANT(kInterpEntryInstr,   0)
 MTERP_CONSTANT(kInterpEntryReturn,  1)
 MTERP_CONSTANT(kInterpEntryThrow,   2)
+#if defined(WITH_JIT)
+MTERP_CONSTANT(kInterpEntryResume,  3)
+#endif
+
+#if defined(WITH_JIT)
+MTERP_CONSTANT(kJitOff,             0)
+MTERP_CONSTANT(kJitNormal,          1)
+MTERP_CONSTANT(kJitTSelectRequest,  2)
+MTERP_CONSTANT(kJitTSelect,         3)
+MTERP_CONSTANT(kJitTSelectAbort,    4)
+MTERP_CONSTANT(kJitTSelectEnd,      5)
+MTERP_CONSTANT(kJitSingleStep,      6)
+MTERP_CONSTANT(kJitSingleStepEnd,   7)
+#endif
 
 /* ClassStatus enumeration */
 MTERP_SIZEOF(sizeofClassStatus,         ClassStatus, MTERP_SMALL_ENUM)
index a348e31..61fc16b 100644 (file)
@@ -175,6 +175,8 @@ unspecified registers or condition codes.
  * interpreter, we don't have to worry about pre-ARMv5 THUMB interwork.
  */
 #define GOTO_OPCODE(_reg)       add     pc, rIBASE, _reg, lsl #6
+#define GOTO_OPCODE_IFEQ(_reg)  addeq   pc, rIBASE, _reg, lsl #6
+#define GOTO_OPCODE_IFNE(_reg)  addne   pc, rIBASE, _reg, lsl #6
 
 /*
  * Get/set the 32-bit value from a Dalvik register.
@@ -182,6 +184,14 @@ unspecified registers or condition codes.
 #define GET_VREG(_reg, _vreg)   ldr     _reg, [rFP, _vreg, lsl #2]
 #define SET_VREG(_reg, _vreg)   str     _reg, [rFP, _vreg, lsl #2]
 
+#if defined(WITH_JIT)
+/*
+ * Null definition for overhead measuring purposes
+ */
+#define GET_JIT_TABLE(_reg)         ldr     _reg,[rGLUE,#offGlue_pJitTable]
+#define GET_JIT_PROF_TABLE(_reg)    ldr     _reg,[rGLUE,#offGlue_pJitProfTable]
+#endif
+
 /*
  * Convert a virtual register index into an address.
  */
@@ -302,10 +312,21 @@ dvmMterpStdRun:
     cmp     r1, #kInterpEntryInstr      @ usual case?
     bne     .Lnot_instr                 @ no, handle it
 
+#if defined(WITH_JIT)
+.Lno_singleStep:
+    /* Entry is always a possible trace start */
+    GET_JIT_PROF_TABLE(r0)
+    FETCH_INST()
+    cmp    r0,#0
+    bne    common_updateProfile
+    GET_INST_OPCODE(ip)
+    GOTO_OPCODE(ip)
+#else
     /* start executing the instruction at rPC */
     FETCH_INST()                        @ load rINST from rPC
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
 .Lnot_instr:
     cmp     r1, #kInterpEntryReturn     @ were we returning from a method?
@@ -315,6 +336,22 @@ dvmMterpStdRun:
     cmp     r1, #kInterpEntryThrow      @ were we throwing an exception?
     beq     common_exceptionThrown
 
+#if defined(WITH_JIT)
+.Lnot_throw:
+    ldr     r0,[rGLUE, #offGlue_jitResume]
+    ldr     r2,[rGLUE, #offGlue_jitResumePC]
+    cmp     r1, #kInterpEntryResume     @ resuming after Jit single-step?
+    bne     .Lbad_arg
+    cmp     rPC,r2
+    bne     .Lno_singleStep             @ must have branched, don't resume
+    mov     r1, #kInterpEntryInstr
+    strb    r1, [rGLUE, #offGlue_entryPoint]
+    ldr     rINST, .LdvmCompilerTemplate
+    bx      r0                          @ re-enter the translation
+.LdvmCompilerTemplate:
+    .word   dvmCompilerTemplateStart
+#endif
+
 .Lbad_arg:
     ldr     r0, strBadEntryPoint
     @ r1 holds value of entryPoint
@@ -1111,10 +1148,18 @@ dalvik_inst:
     movs    r9, r0, asr #24             @ r9<- ssssssAA (sign-extended)
     mov     r9, r9, lsl #1              @ r9<- byte offset
     bmi     common_backwardBranch       @ backward branch, do periodic checks
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
     FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    cmp     r0,#0
+    bne     common_updateProfile
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
-
+#else
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    GET_INST_OPCODE(ip)                 @ extract opcode from rINST
+    GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
 /* ------------------------------ */
     .balign 64
@@ -1130,9 +1175,18 @@ dalvik_inst:
     FETCH_S(r0, 1)                      @ r0<- ssssAAAA (sign-extended)
     movs    r9, r0, asl #1              @ r9<- byte offset, check sign
     bmi     common_backwardBranch       @ backward branch, do periodic checks
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    cmp     r0,#0
+    bne     common_updateProfile
+    GET_INST_OPCODE(ip)                 @ extract opcode from rINST
+    GOTO_OPCODE(ip)                     @ jump to next instruction
+#else
     FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
 
 /* ------------------------------ */
@@ -1157,10 +1211,18 @@ dalvik_inst:
     orrs    r0, r0, r1, lsl #16         @ r0<- AAAAaaaa, check sign
     mov     r9, r0, asl #1              @ r9<- byte offset
     ble     common_backwardBranch       @ backward branch, do periodic checks
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
     FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    cmp     r0,#0
+    bne     common_updateProfile
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
-
+#else
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    GET_INST_OPCODE(ip)                 @ extract opcode from rINST
+    GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
 /* ------------------------------ */
     .balign 64
@@ -1186,9 +1248,18 @@ dalvik_inst:
     movs    r9, r0, asl #1              @ r9<- branch byte offset, check sign
     bmi     common_backwardBranch       @ backward branch, do periodic checks
     beq     common_backwardBranch       @ (want to use BLE but V is unknown)
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    cmp     r0,#0
+    bne     common_updateProfile
+    GET_INST_OPCODE(ip)                 @ extract opcode from rINST
+    GOTO_OPCODE(ip)                     @ jump to next instruction
+#else
     FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
 
 /* ------------------------------ */
@@ -1216,9 +1287,18 @@ dalvik_inst:
     movs    r9, r0, asl #1              @ r9<- branch byte offset, check sign
     bmi     common_backwardBranch       @ backward branch, do periodic checks
     beq     common_backwardBranch       @ (want to use BLE but V is unknown)
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
     FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    cmp     r0,#0
+    bne     common_updateProfile
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#else
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    GET_INST_OPCODE(ip)                 @ extract opcode from rINST
+    GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
 
 
@@ -1465,9 +1545,16 @@ dalvik_inst:
     FETCH_S(r9, 1)                      @ r9<- branch offset, in code units
     movs    r9, r9, asl #1              @ convert to bytes, check sign
     bmi     common_backwardBranch       @ yes, do periodic checks
-1:  FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+1:
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    b        common_testUpdateProfile
+#else
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
 
 
@@ -1495,9 +1582,16 @@ dalvik_inst:
     FETCH_S(r9, 1)                      @ r9<- branch offset, in code units
     movs    r9, r9, asl #1              @ convert to bytes, check sign
     bmi     common_backwardBranch       @ yes, do periodic checks
-1:  FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+1:
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    b        common_testUpdateProfile
+#else
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
 
 
@@ -1525,9 +1619,16 @@ dalvik_inst:
     FETCH_S(r9, 1)                      @ r9<- branch offset, in code units
     movs    r9, r9, asl #1              @ convert to bytes, check sign
     bmi     common_backwardBranch       @ yes, do periodic checks
-1:  FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+1:
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    b        common_testUpdateProfile
+#else
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
 
 
@@ -1555,9 +1656,16 @@ dalvik_inst:
     FETCH_S(r9, 1)                      @ r9<- branch offset, in code units
     movs    r9, r9, asl #1              @ convert to bytes, check sign
     bmi     common_backwardBranch       @ yes, do periodic checks
-1:  FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+1:
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    b        common_testUpdateProfile
+#else
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
 
 
@@ -1585,9 +1693,16 @@ dalvik_inst:
     FETCH_S(r9, 1)                      @ r9<- branch offset, in code units
     movs    r9, r9, asl #1              @ convert to bytes, check sign
     bmi     common_backwardBranch       @ yes, do periodic checks
-1:  FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+1:
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    b        common_testUpdateProfile
+#else
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
 
 
@@ -1615,9 +1730,16 @@ dalvik_inst:
     FETCH_S(r9, 1)                      @ r9<- branch offset, in code units
     movs    r9, r9, asl #1              @ convert to bytes, check sign
     bmi     common_backwardBranch       @ yes, do periodic checks
-1:  FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+1:
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    b        common_testUpdateProfile
+#else
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
 
 
@@ -1642,9 +1764,19 @@ dalvik_inst:
     FETCH_S(r9, 1)                      @ r9<- branch offset, in code units
     movs    r9, r9, asl #1              @ convert to bytes, check sign
     bmi     common_backwardBranch       @ backward branch, do periodic checks
-1:  FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+1:
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    cmp     r0,#0
+    bne     common_updateProfile
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#else
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    GET_INST_OPCODE(ip)                 @ extract opcode from rINST
+    GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
 
 
@@ -1669,9 +1801,19 @@ dalvik_inst:
     FETCH_S(r9, 1)                      @ r9<- branch offset, in code units
     movs    r9, r9, asl #1              @ convert to bytes, check sign
     bmi     common_backwardBranch       @ backward branch, do periodic checks
-1:  FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+1:
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    cmp     r0,#0
+    bne     common_updateProfile
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#else
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    GET_INST_OPCODE(ip)                 @ extract opcode from rINST
+    GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
 
 
@@ -1696,9 +1838,19 @@ dalvik_inst:
     FETCH_S(r9, 1)                      @ r9<- branch offset, in code units
     movs    r9, r9, asl #1              @ convert to bytes, check sign
     bmi     common_backwardBranch       @ backward branch, do periodic checks
-1:  FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+1:
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    cmp     r0,#0
+    bne     common_updateProfile
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#else
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    GET_INST_OPCODE(ip)                 @ extract opcode from rINST
+    GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
 
 
@@ -1723,9 +1875,19 @@ dalvik_inst:
     FETCH_S(r9, 1)                      @ r9<- branch offset, in code units
     movs    r9, r9, asl #1              @ convert to bytes, check sign
     bmi     common_backwardBranch       @ backward branch, do periodic checks
-1:  FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+1:
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    cmp     r0,#0
+    bne     common_updateProfile
+    GET_INST_OPCODE(ip)                 @ extract opcode from rINST
+    GOTO_OPCODE(ip)                     @ jump to next instruction
+#else
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
 
 
@@ -1750,9 +1912,19 @@ dalvik_inst:
     FETCH_S(r9, 1)                      @ r9<- branch offset, in code units
     movs    r9, r9, asl #1              @ convert to bytes, check sign
     bmi     common_backwardBranch       @ backward branch, do periodic checks
-1:  FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+1:
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    cmp     r0,#0
+    bne     common_updateProfile
+    GET_INST_OPCODE(ip)                 @ extract opcode from rINST
+    GOTO_OPCODE(ip)                     @ jump to next instruction
+#else
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
 
 
@@ -1777,9 +1949,19 @@ dalvik_inst:
     FETCH_S(r9, 1)                      @ r9<- branch offset, in code units
     movs    r9, r9, asl #1              @ convert to bytes, check sign
     bmi     common_backwardBranch       @ backward branch, do periodic checks
-1:  FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+1:
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    cmp     r0,#0
+    bne     common_updateProfile
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#else
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    GET_INST_OPCODE(ip)                 @ extract opcode from rINST
+    GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
 
 
@@ -9298,15 +9480,192 @@ d2l_doconv:
 dvmAsmSisterEnd:
 
 /* File: armv5te/footer.S */
+
 /*
  * ===========================================================================
  *  Common subroutines and data
  * ===========================================================================
  */
 
+
+
     .text
     .align  2
 
+#if defined(WITH_JIT)
+/*
+ * Return from the translation cache to the interpreter when the compiler is
+ * having issues translating/executing a Dalvik instruction. We have to skip
+ * the code cache lookup otherwise it is possible to indefinitely bouce
+ * between the interpreter and the code cache if the instruction that fails
+ * to be compiled happens to be at a trace start.
+ */
+    .global dvmJitToInterpPunt
+dvmJitToInterpPunt:
+    mov    rPC, r0
+#ifdef EXIT_STATS
+    mov    r0,lr
+    bl     dvmBumpPunt;
+#endif
+    EXPORT_PC()
+    adrl   rIBASE, dvmAsmInstructionStart
+    FETCH_INST()
+    GET_INST_OPCODE(ip)
+    GOTO_OPCODE(ip)
+
+/*
+ * Return to the interpreter to handle a single instruction.
+ * On entry:
+ *    r0 <= PC
+ *    r1 <= PC of resume instruction
+ *    lr <= resume point in translation
+ */
+    .global dvmJitToInterpSingleStep
+dvmJitToInterpSingleStep:
+    str    lr,[rGLUE,#offGlue_jitResume]
+    str    r1,[rGLUE,#offGlue_jitResumePC]
+    mov    r1,#kInterpEntryInstr
+    @ enum is 4 byte in aapcs-EABI
+    str    r1, [rGLUE, #offGlue_entryPoint]
+    mov    rPC,r0
+    EXPORT_PC()
+    adrl   rIBASE, dvmAsmInstructionStart
+    mov    r2,#kJitSingleStep     @ Ask for single step and then revert
+    str    r2,[rGLUE,#offGlue_jitState]
+    mov    r1,#1                  @ set changeInterp to bail to debug interp
+    b      common_gotoBail
+
+
+/*
+ * Return from the translation cache and immediately request
+ * a translation for the exit target.  Commonly used following
+ * invokes.
+ */
+    .global dvmJitToTraceSelect
+dvmJitToTraceSelect:
+    ldr    rPC,[r14, #-1]           @ get our target PC
+    add    rINST,r14,#-5            @ save start of chain branch
+    mov    r0,rPC
+    bl     dvmJitGetCodeAddr        @ Is there a translation?
+    cmp    r0,#0
+    beq    2f
+    mov    r1,rINST
+    bl     dvmJitChain              @ r0<- dvmJitChain(codeAddr,chainAddr)
+    ldr    rINST, .LdvmCompilerTemplateStart @ rINST is rCBASE in compiled code
+    bx     r0                       @ continue native execution
+
+/* No translation, so request one if profiling isn't disabled*/
+2:
+    GET_JIT_PROF_TABLE(r0)
+    FETCH_INST()
+    cmp    r0, #0
+    bne    common_selectTrace
+    GET_INST_OPCODE(ip)
+    GOTO_OPCODE(ip)
+
+/*
+ * Return from the translation cache to the interpreter.
+ * The return was done with a BLX from thumb mode, and
+ * the following 32-bit word contains the target rPC value.
+ * Note that lr (r14) will have its low-order bit set to denote
+ * its thumb-mode origin.
+ *
+ * We'll need to stash our lr origin away, recover the new
+ * target and then check to see if there is a translation available
+ * for our new target.  If so, we do a translation chain and
+ * go back to native execution.  Otherwise, it's back to the
+ * interpreter (after treating this entry as a potential
+ * trace start).
+ */
+    .global dvmJitToInterpNormal
+dvmJitToInterpNormal:
+    ldr    rPC,[r14, #-1]           @ get our target PC
+    add    rINST,r14,#-5            @ save start of chain branch
+#ifdef EXIT_STATS
+    bl     dvmBumpNormal
+#endif
+    mov    r0,rPC
+    bl     dvmJitGetCodeAddr        @ Is there a translation?
+    cmp    r0,#0
+    beq    1f                       @ go if not, otherwise do chain
+    mov    r1,rINST
+    bl     dvmJitChain              @ r0<- dvmJitChain(codeAddr,chainAddr)
+    ldr    rINST, .LdvmCompilerTemplateStart @ rINST is rCBASE in compiled code
+    bx     r0                       @ continue native execution
+
+/*
+ * Return from the translation cache to the interpreter to do method invocation.
+ * Check if translation exists for the callee, but don't chain to it.
+ */
+    .global dvmJitToInterpNoChain
+dvmJitToInterpNoChain:
+#ifdef EXIT_STATS
+    bl     dvmBumpNoChain
+#endif
+    mov    r0,rPC
+    bl     dvmJitGetCodeAddr        @ Is there a translation?
+    cmp    r0,#0
+    bxne   r0                       @ continue native execution if so
+
+/*
+ * No translation, restore interpreter regs and start interpreting.
+ * rGLUE & rFP were preserved in the translated code, and rPC has
+ * already been restored by the time we get here.  We'll need to set
+ * up rIBASE & rINST, and load the address of the JitTable into r0.
+ */
+1:
+    EXPORT_PC()
+    adrl   rIBASE, dvmAsmInstructionStart
+    FETCH_INST()
+    GET_JIT_PROF_TABLE(r0)
+    @ NOTE: intended fallthrough
+/*
+ * Common code to update potential trace start counter, and initiate
+ * a trace-build if appropriate.  On entry, rPC should point to the
+ * next instruction to execute, and rINST should be already loaded with
+ * the next opcode word, and r0 holds a pointer to the jit profile
+ * table (pJitProfTable).
+ */
+common_testUpdateProfile:
+    cmp     r0,#0
+    GET_INST_OPCODE(ip)
+    GOTO_OPCODE_IFEQ(ip)       @ if not profiling, fallthrough otherwise */
+
+common_updateProfile:
+    eor     r3,rPC,rPC,lsr #12 @ cheap, but fast hash function
+    lsl     r3,r3,#23          @ shift out excess 511
+    ldrb    r1,[r0,r3,lsr #23] @ get counter
+    GET_INST_OPCODE(ip)
+    subs    r1,r1,#1           @ decrement counter
+    strb    r1,[r0,r3,lsr #23] @ and store it
+    GOTO_OPCODE_IFNE(ip)       @ if not threshold, fallthrough otherwise */
+
+/*
+ * Here, we switch to the debug interpreter to request
+ * trace selection.  First, though, check to see if there
+ * is already a native translation in place (and, if so,
+ * jump to it now).
+ */
+    mov     r1,#255
+    strb    r1,[r0,r3,lsr #23] @ reset counter
+    EXPORT_PC()
+    mov     r0,rPC
+    bl      dvmJitGetCodeAddr           @ r0<- dvmJitGetCodeAddr(rPC)
+    cmp     r0,#0
+    ldrne   rINST, .LdvmCompilerTemplateStart @ rINST is rCBASE in compiled code
+    beq     common_selectTrace
+    bxne    r0                          @ jump to the translation
+common_selectTrace:
+    mov     r2,#kJitTSelectRequest      @ ask for trace selection
+    str     r2,[rGLUE,#offGlue_jitState]
+    mov     r1,#1                       @ set changeInterp
+    b       common_gotoBail
+
+.LdvmCompilerTemplateStart:
+    .word   dvmCompilerTemplateStart
+
+#endif
+
 /*
  * Common code when a backward branch is taken.
  *
@@ -9316,9 +9675,18 @@ dvmAsmSisterEnd:
 common_backwardBranch:
     mov     r0, #kInterpEntryInstr
     bl      common_periodicChecks
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    cmp     r0,#0
+    bne     common_updateProfile
+    GET_INST_OPCODE(ip)
+    GOTO_OPCODE(ip)
+#else
     FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
 
 /*
@@ -9492,6 +9860,10 @@ common_invokeMethodNoRange:
 #endif
     str     rFP, [r10, #offStackSaveArea_prevFrame]
     str     rPC, [r10, #offStackSaveArea_savedPc]
+#if defined(WITH_JIT)
+    mov     r9, #0
+    str     r9, [r10, #offStackSaveArea_returnAddr]
+#endif
     str     r0, [r10, #offStackSaveArea_method]
     tst     r3, #ACC_NATIVE
     bne     .LinvokeNative
@@ -9520,11 +9892,22 @@ common_invokeMethodNoRange:
     @ r0=methodToCall, r1=newFp, r2=self, r3=newMethodClass, r9=newINST
     str     r0, [rGLUE, #offGlue_method]    @ glue->method = methodToCall
     str     r3, [rGLUE, #offGlue_methodClassDex] @ glue->methodClassDex = ...
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
     mov     rFP, r1                         @ fp = newFp
     GET_PREFETCHED_OPCODE(ip, r9)           @ extract prefetched opcode from r9
     mov     rINST, r9                       @ publish new rINST
     str     r1, [r2, #offThread_curFrame]   @ self->curFrame = newFp
+    cmp     r0,#0
+    bne     common_updateProfile
     GOTO_OPCODE(ip)                         @ jump to next instruction
+#else
+    mov     rFP, r1                         @ fp = newFp
+    GET_PREFETCHED_OPCODE(ip, r9)           @ extract prefetched opcode from r9
+    mov     rINST, r9                       @ publish new rINST
+    str     r1, [r2, #offThread_curFrame]   @ self->curFrame = newFp
+    GOTO_OPCODE(ip)                         @ jump to next instruction
+#endif
 
 .LinvokeNative:
     @ Prep for the native call
@@ -9630,10 +10013,26 @@ common_returnFromMethod:
     str     r2, [rGLUE, #offGlue_method]@ glue->method = newSave->method
     ldr     r1, [r10, #offClassObject_pDvmDex]   @ r1<- method->clazz->pDvmDex
     str     rFP, [r3, #offThread_curFrame]  @ self->curFrame = fp
+#if defined(WITH_JIT)
+    ldr     r3, [r0, #offStackSaveArea_returnAddr] @ r3 = saveArea->returnAddr
+    GET_JIT_PROF_TABLE(r0)
+    mov     rPC, r9                     @ publish new rPC
+    str     r1, [rGLUE, #offGlue_methodClassDex]
+    cmp     r3, #0                      @ caller is compiled code
+    bne     1f
+    GET_INST_OPCODE(ip)                 @ extract opcode from rINST
+    cmp     r0,#0
+    bne     common_updateProfile
+    GOTO_OPCODE(ip)                     @ jump to next instruction
+1:
+    ldr     rINST, .LdvmCompilerTemplateStart @ rINST is rCBASE in compiled code
+    blx     r3
+#else
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     mov     rPC, r9                     @ publish new rPC
     str     r1, [rGLUE, #offGlue_methodClassDex]
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
     /*
      * Return handling, calls through "glue code".
@@ -9656,12 +10055,19 @@ common_returnFromMethod:
  *
  * This does not return.
  */
+     .global dvmMterpCommonExceptionThrown
+dvmMterpCommonExceptionThrown:
 common_exceptionThrown:
 .LexceptionNew:
     mov     r0, #kInterpEntryThrow
     mov     r9, #0
     bl      common_periodicChecks
 
+#if defined(WITH_JIT)
+    mov     r2,#kJitTSelectAbort        @ abandon trace selection in progress
+    str     r2,[rGLUE,#offGlue_jitState]
+#endif
+
     ldr     r10, [rGLUE, #offGlue_self] @ r10<- glue->self
     ldr     r9, [r10, #offThread_exception] @ r9<- self->exception
     mov     r1, r10                     @ r1<- self
index 77f6213..3e55700 100644 (file)
@@ -175,6 +175,8 @@ unspecified registers or condition codes.
  * interpreter, we don't have to worry about pre-ARMv5 THUMB interwork.
  */
 #define GOTO_OPCODE(_reg)       add     pc, rIBASE, _reg, lsl #6
+#define GOTO_OPCODE_IFEQ(_reg)  addeq   pc, rIBASE, _reg, lsl #6
+#define GOTO_OPCODE_IFNE(_reg)  addne   pc, rIBASE, _reg, lsl #6
 
 /*
  * Get/set the 32-bit value from a Dalvik register.
@@ -182,6 +184,14 @@ unspecified registers or condition codes.
 #define GET_VREG(_reg, _vreg)   ldr     _reg, [rFP, _vreg, lsl #2]
 #define SET_VREG(_reg, _vreg)   str     _reg, [rFP, _vreg, lsl #2]
 
+#if defined(WITH_JIT)
+/*
+ * Null definition for overhead measuring purposes
+ */
+#define GET_JIT_TABLE(_reg)         ldr     _reg,[rGLUE,#offGlue_pJitTable]
+#define GET_JIT_PROF_TABLE(_reg)    ldr     _reg,[rGLUE,#offGlue_pJitProfTable]
+#endif
+
 /*
  * Convert a virtual register index into an address.
  */
@@ -302,10 +312,21 @@ dvmMterpStdRun:
     cmp     r1, #kInterpEntryInstr      @ usual case?
     bne     .Lnot_instr                 @ no, handle it
 
+#if defined(WITH_JIT)
+.Lno_singleStep:
+    /* Entry is always a possible trace start */
+    GET_JIT_PROF_TABLE(r0)
+    FETCH_INST()
+    cmp    r0,#0
+    bne    common_updateProfile
+    GET_INST_OPCODE(ip)
+    GOTO_OPCODE(ip)
+#else
     /* start executing the instruction at rPC */
     FETCH_INST()                        @ load rINST from rPC
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
 .Lnot_instr:
     cmp     r1, #kInterpEntryReturn     @ were we returning from a method?
@@ -315,6 +336,22 @@ dvmMterpStdRun:
     cmp     r1, #kInterpEntryThrow      @ were we throwing an exception?
     beq     common_exceptionThrown
 
+#if defined(WITH_JIT)
+.Lnot_throw:
+    ldr     r0,[rGLUE, #offGlue_jitResume]
+    ldr     r2,[rGLUE, #offGlue_jitResumePC]
+    cmp     r1, #kInterpEntryResume     @ resuming after Jit single-step?
+    bne     .Lbad_arg
+    cmp     rPC,r2
+    bne     .Lno_singleStep             @ must have branched, don't resume
+    mov     r1, #kInterpEntryInstr
+    strb    r1, [rGLUE, #offGlue_entryPoint]
+    ldr     rINST, .LdvmCompilerTemplate
+    bx      r0                          @ re-enter the translation
+.LdvmCompilerTemplate:
+    .word   dvmCompilerTemplateStart
+#endif
+
 .Lbad_arg:
     ldr     r0, strBadEntryPoint
     @ r1 holds value of entryPoint
@@ -1111,10 +1148,18 @@ dalvik_inst:
     movs    r9, r0, asr #24             @ r9<- ssssssAA (sign-extended)
     mov     r9, r9, lsl #1              @ r9<- byte offset
     bmi     common_backwardBranch       @ backward branch, do periodic checks
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
     FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    cmp     r0,#0
+    bne     common_updateProfile
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
-
+#else
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    GET_INST_OPCODE(ip)                 @ extract opcode from rINST
+    GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
 /* ------------------------------ */
     .balign 64
@@ -1130,9 +1175,18 @@ dalvik_inst:
     FETCH_S(r0, 1)                      @ r0<- ssssAAAA (sign-extended)
     movs    r9, r0, asl #1              @ r9<- byte offset, check sign
     bmi     common_backwardBranch       @ backward branch, do periodic checks
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    cmp     r0,#0
+    bne     common_updateProfile
+    GET_INST_OPCODE(ip)                 @ extract opcode from rINST
+    GOTO_OPCODE(ip)                     @ jump to next instruction
+#else
     FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
 
 /* ------------------------------ */
@@ -1157,10 +1211,18 @@ dalvik_inst:
     orrs    r0, r0, r1, lsl #16         @ r0<- AAAAaaaa, check sign
     mov     r9, r0, asl #1              @ r9<- byte offset
     ble     common_backwardBranch       @ backward branch, do periodic checks
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
     FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    cmp     r0,#0
+    bne     common_updateProfile
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
-
+#else
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    GET_INST_OPCODE(ip)                 @ extract opcode from rINST
+    GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
 /* ------------------------------ */
     .balign 64
@@ -1186,9 +1248,18 @@ dalvik_inst:
     movs    r9, r0, asl #1              @ r9<- branch byte offset, check sign
     bmi     common_backwardBranch       @ backward branch, do periodic checks
     beq     common_backwardBranch       @ (want to use BLE but V is unknown)
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    cmp     r0,#0
+    bne     common_updateProfile
+    GET_INST_OPCODE(ip)                 @ extract opcode from rINST
+    GOTO_OPCODE(ip)                     @ jump to next instruction
+#else
     FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
 
 /* ------------------------------ */
@@ -1216,9 +1287,18 @@ dalvik_inst:
     movs    r9, r0, asl #1              @ r9<- branch byte offset, check sign
     bmi     common_backwardBranch       @ backward branch, do periodic checks
     beq     common_backwardBranch       @ (want to use BLE but V is unknown)
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
     FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    cmp     r0,#0
+    bne     common_updateProfile
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#else
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    GET_INST_OPCODE(ip)                 @ extract opcode from rINST
+    GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
 
 
@@ -1443,9 +1523,16 @@ dalvik_inst:
     FETCH_S(r9, 1)                      @ r9<- branch offset, in code units
     movs    r9, r9, asl #1              @ convert to bytes, check sign
     bmi     common_backwardBranch       @ yes, do periodic checks
-1:  FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+1:
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    b        common_testUpdateProfile
+#else
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
 
 
@@ -1473,9 +1560,16 @@ dalvik_inst:
     FETCH_S(r9, 1)                      @ r9<- branch offset, in code units
     movs    r9, r9, asl #1              @ convert to bytes, check sign
     bmi     common_backwardBranch       @ yes, do periodic checks
-1:  FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+1:
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    b        common_testUpdateProfile
+#else
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
 
 
@@ -1503,9 +1597,16 @@ dalvik_inst:
     FETCH_S(r9, 1)                      @ r9<- branch offset, in code units
     movs    r9, r9, asl #1              @ convert to bytes, check sign
     bmi     common_backwardBranch       @ yes, do periodic checks
-1:  FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+1:
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    b        common_testUpdateProfile
+#else
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
 
 
@@ -1533,9 +1634,16 @@ dalvik_inst:
     FETCH_S(r9, 1)                      @ r9<- branch offset, in code units
     movs    r9, r9, asl #1              @ convert to bytes, check sign
     bmi     common_backwardBranch       @ yes, do periodic checks
-1:  FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+1:
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    b        common_testUpdateProfile
+#else
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
 
 
@@ -1563,9 +1671,16 @@ dalvik_inst:
     FETCH_S(r9, 1)                      @ r9<- branch offset, in code units
     movs    r9, r9, asl #1              @ convert to bytes, check sign
     bmi     common_backwardBranch       @ yes, do periodic checks
-1:  FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+1:
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    b        common_testUpdateProfile
+#else
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
 
 
@@ -1593,9 +1708,16 @@ dalvik_inst:
     FETCH_S(r9, 1)                      @ r9<- branch offset, in code units
     movs    r9, r9, asl #1              @ convert to bytes, check sign
     bmi     common_backwardBranch       @ yes, do periodic checks
-1:  FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+1:
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    b        common_testUpdateProfile
+#else
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
 
 
@@ -1620,9 +1742,19 @@ dalvik_inst:
     FETCH_S(r9, 1)                      @ r9<- branch offset, in code units
     movs    r9, r9, asl #1              @ convert to bytes, check sign
     bmi     common_backwardBranch       @ backward branch, do periodic checks
-1:  FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+1:
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    cmp     r0,#0
+    bne     common_updateProfile
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#else
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    GET_INST_OPCODE(ip)                 @ extract opcode from rINST
+    GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
 
 
@@ -1647,9 +1779,19 @@ dalvik_inst:
     FETCH_S(r9, 1)                      @ r9<- branch offset, in code units
     movs    r9, r9, asl #1              @ convert to bytes, check sign
     bmi     common_backwardBranch       @ backward branch, do periodic checks
-1:  FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+1:
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    cmp     r0,#0
+    bne     common_updateProfile
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#else
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    GET_INST_OPCODE(ip)                 @ extract opcode from rINST
+    GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
 
 
@@ -1674,9 +1816,19 @@ dalvik_inst:
     FETCH_S(r9, 1)                      @ r9<- branch offset, in code units
     movs    r9, r9, asl #1              @ convert to bytes, check sign
     bmi     common_backwardBranch       @ backward branch, do periodic checks
-1:  FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+1:
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    cmp     r0,#0
+    bne     common_updateProfile
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#else
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    GET_INST_OPCODE(ip)                 @ extract opcode from rINST
+    GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
 
 
@@ -1701,9 +1853,19 @@ dalvik_inst:
     FETCH_S(r9, 1)                      @ r9<- branch offset, in code units
     movs    r9, r9, asl #1              @ convert to bytes, check sign
     bmi     common_backwardBranch       @ backward branch, do periodic checks
-1:  FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+1:
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    cmp     r0,#0
+    bne     common_updateProfile
+    GET_INST_OPCODE(ip)                 @ extract opcode from rINST
+    GOTO_OPCODE(ip)                     @ jump to next instruction
+#else
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
 
 
@@ -1728,9 +1890,19 @@ dalvik_inst:
     FETCH_S(r9, 1)                      @ r9<- branch offset, in code units
     movs    r9, r9, asl #1              @ convert to bytes, check sign
     bmi     common_backwardBranch       @ backward branch, do periodic checks
-1:  FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+1:
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    cmp     r0,#0
+    bne     common_updateProfile
+    GET_INST_OPCODE(ip)                 @ extract opcode from rINST
+    GOTO_OPCODE(ip)                     @ jump to next instruction
+#else
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
 
 
@@ -1755,9 +1927,19 @@ dalvik_inst:
     FETCH_S(r9, 1)                      @ r9<- branch offset, in code units
     movs    r9, r9, asl #1              @ convert to bytes, check sign
     bmi     common_backwardBranch       @ backward branch, do periodic checks
-1:  FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+1:
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    cmp     r0,#0
+    bne     common_updateProfile
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#else
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    GET_INST_OPCODE(ip)                 @ extract opcode from rINST
+    GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
 
 
@@ -8812,15 +8994,192 @@ d2l_doconv:
 dvmAsmSisterEnd:
 
 /* File: armv5te/footer.S */
+
 /*
  * ===========================================================================
  *  Common subroutines and data
  * ===========================================================================
  */
 
+
+
     .text
     .align  2
 
+#if defined(WITH_JIT)
+/*
+ * Return from the translation cache to the interpreter when the compiler is
+ * having issues translating/executing a Dalvik instruction. We have to skip
+ * the code cache lookup otherwise it is possible to indefinitely bouce
+ * between the interpreter and the code cache if the instruction that fails
+ * to be compiled happens to be at a trace start.
+ */
+    .global dvmJitToInterpPunt
+dvmJitToInterpPunt:
+    mov    rPC, r0
+#ifdef EXIT_STATS
+    mov    r0,lr
+    bl     dvmBumpPunt;
+#endif
+    EXPORT_PC()
+    adrl   rIBASE, dvmAsmInstructionStart
+    FETCH_INST()
+    GET_INST_OPCODE(ip)
+    GOTO_OPCODE(ip)
+
+/*
+ * Return to the interpreter to handle a single instruction.
+ * On entry:
+ *    r0 <= PC
+ *    r1 <= PC of resume instruction
+ *    lr <= resume point in translation
+ */
+    .global dvmJitToInterpSingleStep
+dvmJitToInterpSingleStep:
+    str    lr,[rGLUE,#offGlue_jitResume]
+    str    r1,[rGLUE,#offGlue_jitResumePC]
+    mov    r1,#kInterpEntryInstr
+    @ enum is 4 byte in aapcs-EABI
+    str    r1, [rGLUE, #offGlue_entryPoint]
+    mov    rPC,r0
+    EXPORT_PC()
+    adrl   rIBASE, dvmAsmInstructionStart
+    mov    r2,#kJitSingleStep     @ Ask for single step and then revert
+    str    r2,[rGLUE,#offGlue_jitState]
+    mov    r1,#1                  @ set changeInterp to bail to debug interp
+    b      common_gotoBail
+
+
+/*
+ * Return from the translation cache and immediately request
+ * a translation for the exit target.  Commonly used following
+ * invokes.
+ */
+    .global dvmJitToTraceSelect
+dvmJitToTraceSelect:
+    ldr    rPC,[r14, #-1]           @ get our target PC
+    add    rINST,r14,#-5            @ save start of chain branch
+    mov    r0,rPC
+    bl     dvmJitGetCodeAddr        @ Is there a translation?
+    cmp    r0,#0
+    beq    2f
+    mov    r1,rINST
+    bl     dvmJitChain              @ r0<- dvmJitChain(codeAddr,chainAddr)
+    ldr    rINST, .LdvmCompilerTemplateStart @ rINST is rCBASE in compiled code
+    bx     r0                       @ continue native execution
+
+/* No translation, so request one if profiling isn't disabled*/
+2:
+    GET_JIT_PROF_TABLE(r0)
+    FETCH_INST()
+    cmp    r0, #0
+    bne    common_selectTrace
+    GET_INST_OPCODE(ip)
+    GOTO_OPCODE(ip)
+
+/*
+ * Return from the translation cache to the interpreter.
+ * The return was done with a BLX from thumb mode, and
+ * the following 32-bit word contains the target rPC value.
+ * Note that lr (r14) will have its low-order bit set to denote
+ * its thumb-mode origin.
+ *
+ * We'll need to stash our lr origin away, recover the new
+ * target and then check to see if there is a translation available
+ * for our new target.  If so, we do a translation chain and
+ * go back to native execution.  Otherwise, it's back to the
+ * interpreter (after treating this entry as a potential
+ * trace start).
+ */
+    .global dvmJitToInterpNormal
+dvmJitToInterpNormal:
+    ldr    rPC,[r14, #-1]           @ get our target PC
+    add    rINST,r14,#-5            @ save start of chain branch
+#ifdef EXIT_STATS
+    bl     dvmBumpNormal
+#endif
+    mov    r0,rPC
+    bl     dvmJitGetCodeAddr        @ Is there a translation?
+    cmp    r0,#0
+    beq    1f                       @ go if not, otherwise do chain
+    mov    r1,rINST
+    bl     dvmJitChain              @ r0<- dvmJitChain(codeAddr,chainAddr)
+    ldr    rINST, .LdvmCompilerTemplateStart @ rINST is rCBASE in compiled code
+    bx     r0                       @ continue native execution
+
+/*
+ * Return from the translation cache to the interpreter to do method invocation.
+ * Check if translation exists for the callee, but don't chain to it.
+ */
+    .global dvmJitToInterpNoChain
+dvmJitToInterpNoChain:
+#ifdef EXIT_STATS
+    bl     dvmBumpNoChain
+#endif
+    mov    r0,rPC
+    bl     dvmJitGetCodeAddr        @ Is there a translation?
+    cmp    r0,#0
+    bxne   r0                       @ continue native execution if so
+
+/*
+ * No translation, restore interpreter regs and start interpreting.
+ * rGLUE & rFP were preserved in the translated code, and rPC has
+ * already been restored by the time we get here.  We'll need to set
+ * up rIBASE & rINST, and load the address of the JitTable into r0.
+ */
+1:
+    EXPORT_PC()
+    adrl   rIBASE, dvmAsmInstructionStart
+    FETCH_INST()
+    GET_JIT_PROF_TABLE(r0)
+    @ NOTE: intended fallthrough
+/*
+ * Common code to update potential trace start counter, and initiate
+ * a trace-build if appropriate.  On entry, rPC should point to the
+ * next instruction to execute, and rINST should be already loaded with
+ * the next opcode word, and r0 holds a pointer to the jit profile
+ * table (pJitProfTable).
+ */
+common_testUpdateProfile:
+    cmp     r0,#0
+    GET_INST_OPCODE(ip)
+    GOTO_OPCODE_IFEQ(ip)       @ if not profiling, fallthrough otherwise */
+
+common_updateProfile:
+    eor     r3,rPC,rPC,lsr #12 @ cheap, but fast hash function
+    lsl     r3,r3,#23          @ shift out excess 511
+    ldrb    r1,[r0,r3,lsr #23] @ get counter
+    GET_INST_OPCODE(ip)
+    subs    r1,r1,#1           @ decrement counter
+    strb    r1,[r0,r3,lsr #23] @ and store it
+    GOTO_OPCODE_IFNE(ip)       @ if not threshold, fallthrough otherwise */
+
+/*
+ * Here, we switch to the debug interpreter to request
+ * trace selection.  First, though, check to see if there
+ * is already a native translation in place (and, if so,
+ * jump to it now).
+ */
+    mov     r1,#255
+    strb    r1,[r0,r3,lsr #23] @ reset counter
+    EXPORT_PC()
+    mov     r0,rPC
+    bl      dvmJitGetCodeAddr           @ r0<- dvmJitGetCodeAddr(rPC)
+    cmp     r0,#0
+    ldrne   rINST, .LdvmCompilerTemplateStart @ rINST is rCBASE in compiled code
+    beq     common_selectTrace
+    bxne    r0                          @ jump to the translation
+common_selectTrace:
+    mov     r2,#kJitTSelectRequest      @ ask for trace selection
+    str     r2,[rGLUE,#offGlue_jitState]
+    mov     r1,#1                       @ set changeInterp
+    b       common_gotoBail
+
+.LdvmCompilerTemplateStart:
+    .word   dvmCompilerTemplateStart
+
+#endif
+
 /*
  * Common code when a backward branch is taken.
  *
@@ -8830,9 +9189,18 @@ dvmAsmSisterEnd:
 common_backwardBranch:
     mov     r0, #kInterpEntryInstr
     bl      common_periodicChecks
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    cmp     r0,#0
+    bne     common_updateProfile
+    GET_INST_OPCODE(ip)
+    GOTO_OPCODE(ip)
+#else
     FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
 
 /*
@@ -9006,6 +9374,10 @@ common_invokeMethodNoRange:
 #endif
     str     rFP, [r10, #offStackSaveArea_prevFrame]
     str     rPC, [r10, #offStackSaveArea_savedPc]
+#if defined(WITH_JIT)
+    mov     r9, #0
+    str     r9, [r10, #offStackSaveArea_returnAddr]
+#endif
     str     r0, [r10, #offStackSaveArea_method]
     tst     r3, #ACC_NATIVE
     bne     .LinvokeNative
@@ -9034,11 +9406,22 @@ common_invokeMethodNoRange:
     @ r0=methodToCall, r1=newFp, r2=self, r3=newMethodClass, r9=newINST
     str     r0, [rGLUE, #offGlue_method]    @ glue->method = methodToCall
     str     r3, [rGLUE, #offGlue_methodClassDex] @ glue->methodClassDex = ...
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
     mov     rFP, r1                         @ fp = newFp
     GET_PREFETCHED_OPCODE(ip, r9)           @ extract prefetched opcode from r9
     mov     rINST, r9                       @ publish new rINST
     str     r1, [r2, #offThread_curFrame]   @ self->curFrame = newFp
+    cmp     r0,#0
+    bne     common_updateProfile
     GOTO_OPCODE(ip)                         @ jump to next instruction
+#else
+    mov     rFP, r1                         @ fp = newFp
+    GET_PREFETCHED_OPCODE(ip, r9)           @ extract prefetched opcode from r9
+    mov     rINST, r9                       @ publish new rINST
+    str     r1, [r2, #offThread_curFrame]   @ self->curFrame = newFp
+    GOTO_OPCODE(ip)                         @ jump to next instruction
+#endif
 
 .LinvokeNative:
     @ Prep for the native call
@@ -9144,10 +9527,26 @@ common_returnFromMethod:
     str     r2, [rGLUE, #offGlue_method]@ glue->method = newSave->method
     ldr     r1, [r10, #offClassObject_pDvmDex]   @ r1<- method->clazz->pDvmDex
     str     rFP, [r3, #offThread_curFrame]  @ self->curFrame = fp
+#if defined(WITH_JIT)
+    ldr     r3, [r0, #offStackSaveArea_returnAddr] @ r3 = saveArea->returnAddr
+    GET_JIT_PROF_TABLE(r0)
+    mov     rPC, r9                     @ publish new rPC
+    str     r1, [rGLUE, #offGlue_methodClassDex]
+    cmp     r3, #0                      @ caller is compiled code
+    bne     1f
+    GET_INST_OPCODE(ip)                 @ extract opcode from rINST
+    cmp     r0,#0
+    bne     common_updateProfile
+    GOTO_OPCODE(ip)                     @ jump to next instruction
+1:
+    ldr     rINST, .LdvmCompilerTemplateStart @ rINST is rCBASE in compiled code
+    blx     r3
+#else
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     mov     rPC, r9                     @ publish new rPC
     str     r1, [rGLUE, #offGlue_methodClassDex]
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
     /*
      * Return handling, calls through "glue code".
@@ -9170,12 +9569,19 @@ common_returnFromMethod:
  *
  * This does not return.
  */
+     .global dvmMterpCommonExceptionThrown
+dvmMterpCommonExceptionThrown:
 common_exceptionThrown:
 .LexceptionNew:
     mov     r0, #kInterpEntryThrow
     mov     r9, #0
     bl      common_periodicChecks
 
+#if defined(WITH_JIT)
+    mov     r2,#kJitTSelectAbort        @ abandon trace selection in progress
+    str     r2,[rGLUE,#offGlue_jitState]
+#endif
+
     ldr     r10, [rGLUE, #offGlue_self] @ r10<- glue->self
     ldr     r9, [r10, #offThread_exception] @ r9<- self->exception
     mov     r1, r10                     @ r1<- self
index 8deb4aa..9c380ea 100644 (file)
@@ -175,6 +175,8 @@ unspecified registers or condition codes.
  * interpreter, we don't have to worry about pre-ARMv5 THUMB interwork.
  */
 #define GOTO_OPCODE(_reg)       add     pc, rIBASE, _reg, lsl #6
+#define GOTO_OPCODE_IFEQ(_reg)  addeq   pc, rIBASE, _reg, lsl #6
+#define GOTO_OPCODE_IFNE(_reg)  addne   pc, rIBASE, _reg, lsl #6
 
 /*
  * Get/set the 32-bit value from a Dalvik register.
@@ -182,6 +184,14 @@ unspecified registers or condition codes.
 #define GET_VREG(_reg, _vreg)   ldr     _reg, [rFP, _vreg, lsl #2]
 #define SET_VREG(_reg, _vreg)   str     _reg, [rFP, _vreg, lsl #2]
 
+#if defined(WITH_JIT)
+/*
+ * Null definition for overhead measuring purposes
+ */
+#define GET_JIT_TABLE(_reg)         ldr     _reg,[rGLUE,#offGlue_pJitTable]
+#define GET_JIT_PROF_TABLE(_reg)    ldr     _reg,[rGLUE,#offGlue_pJitProfTable]
+#endif
+
 /*
  * Convert a virtual register index into an address.
  */
@@ -302,10 +312,21 @@ dvmMterpStdRun:
     cmp     r1, #kInterpEntryInstr      @ usual case?
     bne     .Lnot_instr                 @ no, handle it
 
+#if defined(WITH_JIT)
+.Lno_singleStep:
+    /* Entry is always a possible trace start */
+    GET_JIT_PROF_TABLE(r0)
+    FETCH_INST()
+    cmp    r0,#0
+    bne    common_updateProfile
+    GET_INST_OPCODE(ip)
+    GOTO_OPCODE(ip)
+#else
     /* start executing the instruction at rPC */
     FETCH_INST()                        @ load rINST from rPC
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
 .Lnot_instr:
     cmp     r1, #kInterpEntryReturn     @ were we returning from a method?
@@ -315,6 +336,22 @@ dvmMterpStdRun:
     cmp     r1, #kInterpEntryThrow      @ were we throwing an exception?
     beq     common_exceptionThrown
 
+#if defined(WITH_JIT)
+.Lnot_throw:
+    ldr     r0,[rGLUE, #offGlue_jitResume]
+    ldr     r2,[rGLUE, #offGlue_jitResumePC]
+    cmp     r1, #kInterpEntryResume     @ resuming after Jit single-step?
+    bne     .Lbad_arg
+    cmp     rPC,r2
+    bne     .Lno_singleStep             @ must have branched, don't resume
+    mov     r1, #kInterpEntryInstr
+    strb    r1, [rGLUE, #offGlue_entryPoint]
+    ldr     rINST, .LdvmCompilerTemplate
+    bx      r0                          @ re-enter the translation
+.LdvmCompilerTemplate:
+    .word   dvmCompilerTemplateStart
+#endif
+
 .Lbad_arg:
     ldr     r0, strBadEntryPoint
     @ r1 holds value of entryPoint
@@ -1111,10 +1148,18 @@ dalvik_inst:
     movs    r9, r0, asr #24             @ r9<- ssssssAA (sign-extended)
     mov     r9, r9, lsl #1              @ r9<- byte offset
     bmi     common_backwardBranch       @ backward branch, do periodic checks
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
     FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    cmp     r0,#0
+    bne     common_updateProfile
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
-
+#else
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    GET_INST_OPCODE(ip)                 @ extract opcode from rINST
+    GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
 /* ------------------------------ */
     .balign 64
@@ -1130,9 +1175,18 @@ dalvik_inst:
     FETCH_S(r0, 1)                      @ r0<- ssssAAAA (sign-extended)
     movs    r9, r0, asl #1              @ r9<- byte offset, check sign
     bmi     common_backwardBranch       @ backward branch, do periodic checks
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    cmp     r0,#0
+    bne     common_updateProfile
+    GET_INST_OPCODE(ip)                 @ extract opcode from rINST
+    GOTO_OPCODE(ip)                     @ jump to next instruction
+#else
     FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
 
 /* ------------------------------ */
@@ -1157,10 +1211,18 @@ dalvik_inst:
     orrs    r0, r0, r1, lsl #16         @ r0<- AAAAaaaa, check sign
     mov     r9, r0, asl #1              @ r9<- byte offset
     ble     common_backwardBranch       @ backward branch, do periodic checks
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
     FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    cmp     r0,#0
+    bne     common_updateProfile
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
-
+#else
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    GET_INST_OPCODE(ip)                 @ extract opcode from rINST
+    GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
 /* ------------------------------ */
     .balign 64
@@ -1186,9 +1248,18 @@ dalvik_inst:
     movs    r9, r0, asl #1              @ r9<- branch byte offset, check sign
     bmi     common_backwardBranch       @ backward branch, do periodic checks
     beq     common_backwardBranch       @ (want to use BLE but V is unknown)
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    cmp     r0,#0
+    bne     common_updateProfile
+    GET_INST_OPCODE(ip)                 @ extract opcode from rINST
+    GOTO_OPCODE(ip)                     @ jump to next instruction
+#else
     FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
 
 /* ------------------------------ */
@@ -1216,9 +1287,18 @@ dalvik_inst:
     movs    r9, r0, asl #1              @ r9<- branch byte offset, check sign
     bmi     common_backwardBranch       @ backward branch, do periodic checks
     beq     common_backwardBranch       @ (want to use BLE but V is unknown)
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
     FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    cmp     r0,#0
+    bne     common_updateProfile
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#else
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    GET_INST_OPCODE(ip)                 @ extract opcode from rINST
+    GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
 
 
@@ -1465,9 +1545,16 @@ dalvik_inst:
     FETCH_S(r9, 1)                      @ r9<- branch offset, in code units
     movs    r9, r9, asl #1              @ convert to bytes, check sign
     bmi     common_backwardBranch       @ yes, do periodic checks
-1:  FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+1:
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    b        common_testUpdateProfile
+#else
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
 
 
@@ -1495,9 +1582,16 @@ dalvik_inst:
     FETCH_S(r9, 1)                      @ r9<- branch offset, in code units
     movs    r9, r9, asl #1              @ convert to bytes, check sign
     bmi     common_backwardBranch       @ yes, do periodic checks
-1:  FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+1:
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    b        common_testUpdateProfile
+#else
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
 
 
@@ -1525,9 +1619,16 @@ dalvik_inst:
     FETCH_S(r9, 1)                      @ r9<- branch offset, in code units
     movs    r9, r9, asl #1              @ convert to bytes, check sign
     bmi     common_backwardBranch       @ yes, do periodic checks
-1:  FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+1:
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    b        common_testUpdateProfile
+#else
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
 
 
@@ -1555,9 +1656,16 @@ dalvik_inst:
     FETCH_S(r9, 1)                      @ r9<- branch offset, in code units
     movs    r9, r9, asl #1              @ convert to bytes, check sign
     bmi     common_backwardBranch       @ yes, do periodic checks
-1:  FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+1:
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    b        common_testUpdateProfile
+#else
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
 
 
@@ -1585,9 +1693,16 @@ dalvik_inst:
     FETCH_S(r9, 1)                      @ r9<- branch offset, in code units
     movs    r9, r9, asl #1              @ convert to bytes, check sign
     bmi     common_backwardBranch       @ yes, do periodic checks
-1:  FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+1:
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    b        common_testUpdateProfile
+#else
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
 
 
@@ -1615,9 +1730,16 @@ dalvik_inst:
     FETCH_S(r9, 1)                      @ r9<- branch offset, in code units
     movs    r9, r9, asl #1              @ convert to bytes, check sign
     bmi     common_backwardBranch       @ yes, do periodic checks
-1:  FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+1:
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    b        common_testUpdateProfile
+#else
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
 
 
@@ -1642,9 +1764,19 @@ dalvik_inst:
     FETCH_S(r9, 1)                      @ r9<- branch offset, in code units
     movs    r9, r9, asl #1              @ convert to bytes, check sign
     bmi     common_backwardBranch       @ backward branch, do periodic checks
-1:  FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+1:
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    cmp     r0,#0
+    bne     common_updateProfile
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#else
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    GET_INST_OPCODE(ip)                 @ extract opcode from rINST
+    GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
 
 
@@ -1669,9 +1801,19 @@ dalvik_inst:
     FETCH_S(r9, 1)                      @ r9<- branch offset, in code units
     movs    r9, r9, asl #1              @ convert to bytes, check sign
     bmi     common_backwardBranch       @ backward branch, do periodic checks
-1:  FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+1:
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    cmp     r0,#0
+    bne     common_updateProfile
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#else
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    GET_INST_OPCODE(ip)                 @ extract opcode from rINST
+    GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
 
 
@@ -1696,9 +1838,19 @@ dalvik_inst:
     FETCH_S(r9, 1)                      @ r9<- branch offset, in code units
     movs    r9, r9, asl #1              @ convert to bytes, check sign
     bmi     common_backwardBranch       @ backward branch, do periodic checks
-1:  FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+1:
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    cmp     r0,#0
+    bne     common_updateProfile
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#else
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    GET_INST_OPCODE(ip)                 @ extract opcode from rINST
+    GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
 
 
@@ -1723,9 +1875,19 @@ dalvik_inst:
     FETCH_S(r9, 1)                      @ r9<- branch offset, in code units
     movs    r9, r9, asl #1              @ convert to bytes, check sign
     bmi     common_backwardBranch       @ backward branch, do periodic checks
-1:  FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+1:
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    cmp     r0,#0
+    bne     common_updateProfile
+    GET_INST_OPCODE(ip)                 @ extract opcode from rINST
+    GOTO_OPCODE(ip)                     @ jump to next instruction
+#else
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
 
 
@@ -1750,9 +1912,19 @@ dalvik_inst:
     FETCH_S(r9, 1)                      @ r9<- branch offset, in code units
     movs    r9, r9, asl #1              @ convert to bytes, check sign
     bmi     common_backwardBranch       @ backward branch, do periodic checks
-1:  FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+1:
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    cmp     r0,#0
+    bne     common_updateProfile
+    GET_INST_OPCODE(ip)                 @ extract opcode from rINST
+    GOTO_OPCODE(ip)                     @ jump to next instruction
+#else
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
 
 
@@ -1777,9 +1949,19 @@ dalvik_inst:
     FETCH_S(r9, 1)                      @ r9<- branch offset, in code units
     movs    r9, r9, asl #1              @ convert to bytes, check sign
     bmi     common_backwardBranch       @ backward branch, do periodic checks
-1:  FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+1:
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    cmp     r0,#0
+    bne     common_updateProfile
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#else
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    GET_INST_OPCODE(ip)                 @ extract opcode from rINST
+    GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
 
 
@@ -9292,15 +9474,192 @@ d2l_doconv:
 dvmAsmSisterEnd:
 
 /* File: armv5te/footer.S */
+
 /*
  * ===========================================================================
  *  Common subroutines and data
  * ===========================================================================
  */
 
+
+
     .text
     .align  2
 
+#if defined(WITH_JIT)
+/*
+ * Return from the translation cache to the interpreter when the compiler is
+ * having issues translating/executing a Dalvik instruction. We have to skip
+ * the code cache lookup otherwise it is possible to indefinitely bouce
+ * between the interpreter and the code cache if the instruction that fails
+ * to be compiled happens to be at a trace start.
+ */
+    .global dvmJitToInterpPunt
+dvmJitToInterpPunt:
+    mov    rPC, r0
+#ifdef EXIT_STATS
+    mov    r0,lr
+    bl     dvmBumpPunt;
+#endif
+    EXPORT_PC()
+    adrl   rIBASE, dvmAsmInstructionStart
+    FETCH_INST()
+    GET_INST_OPCODE(ip)
+    GOTO_OPCODE(ip)
+
+/*
+ * Return to the interpreter to handle a single instruction.
+ * On entry:
+ *    r0 <= PC
+ *    r1 <= PC of resume instruction
+ *    lr <= resume point in translation
+ */
+    .global dvmJitToInterpSingleStep
+dvmJitToInterpSingleStep:
+    str    lr,[rGLUE,#offGlue_jitResume]
+    str    r1,[rGLUE,#offGlue_jitResumePC]
+    mov    r1,#kInterpEntryInstr
+    @ enum is 4 byte in aapcs-EABI
+    str    r1, [rGLUE, #offGlue_entryPoint]
+    mov    rPC,r0
+    EXPORT_PC()
+    adrl   rIBASE, dvmAsmInstructionStart
+    mov    r2,#kJitSingleStep     @ Ask for single step and then revert
+    str    r2,[rGLUE,#offGlue_jitState]
+    mov    r1,#1                  @ set changeInterp to bail to debug interp
+    b      common_gotoBail
+
+
+/*
+ * Return from the translation cache and immediately request
+ * a translation for the exit target.  Commonly used following
+ * invokes.
+ */
+    .global dvmJitToTraceSelect
+dvmJitToTraceSelect:
+    ldr    rPC,[r14, #-1]           @ get our target PC
+    add    rINST,r14,#-5            @ save start of chain branch
+    mov    r0,rPC
+    bl     dvmJitGetCodeAddr        @ Is there a translation?
+    cmp    r0,#0
+    beq    2f
+    mov    r1,rINST
+    bl     dvmJitChain              @ r0<- dvmJitChain(codeAddr,chainAddr)
+    ldr    rINST, .LdvmCompilerTemplateStart @ rINST is rCBASE in compiled code
+    bx     r0                       @ continue native execution
+
+/* No translation, so request one if profiling isn't disabled*/
+2:
+    GET_JIT_PROF_TABLE(r0)
+    FETCH_INST()
+    cmp    r0, #0
+    bne    common_selectTrace
+    GET_INST_OPCODE(ip)
+    GOTO_OPCODE(ip)
+
+/*
+ * Return from the translation cache to the interpreter.
+ * The return was done with a BLX from thumb mode, and
+ * the following 32-bit word contains the target rPC value.
+ * Note that lr (r14) will have its low-order bit set to denote
+ * its thumb-mode origin.
+ *
+ * We'll need to stash our lr origin away, recover the new
+ * target and then check to see if there is a translation available
+ * for our new target.  If so, we do a translation chain and
+ * go back to native execution.  Otherwise, it's back to the
+ * interpreter (after treating this entry as a potential
+ * trace start).
+ */
+    .global dvmJitToInterpNormal
+dvmJitToInterpNormal:
+    ldr    rPC,[r14, #-1]           @ get our target PC
+    add    rINST,r14,#-5            @ save start of chain branch
+#ifdef EXIT_STATS
+    bl     dvmBumpNormal
+#endif
+    mov    r0,rPC
+    bl     dvmJitGetCodeAddr        @ Is there a translation?
+    cmp    r0,#0
+    beq    1f                       @ go if not, otherwise do chain
+    mov    r1,rINST
+    bl     dvmJitChain              @ r0<- dvmJitChain(codeAddr,chainAddr)
+    ldr    rINST, .LdvmCompilerTemplateStart @ rINST is rCBASE in compiled code
+    bx     r0                       @ continue native execution
+
+/*
+ * Return from the translation cache to the interpreter to do method invocation.
+ * Check if translation exists for the callee, but don't chain to it.
+ */
+    .global dvmJitToInterpNoChain
+dvmJitToInterpNoChain:
+#ifdef EXIT_STATS
+    bl     dvmBumpNoChain
+#endif
+    mov    r0,rPC
+    bl     dvmJitGetCodeAddr        @ Is there a translation?
+    cmp    r0,#0
+    bxne   r0                       @ continue native execution if so
+
+/*
+ * No translation, restore interpreter regs and start interpreting.
+ * rGLUE & rFP were preserved in the translated code, and rPC has
+ * already been restored by the time we get here.  We'll need to set
+ * up rIBASE & rINST, and load the address of the JitTable into r0.
+ */
+1:
+    EXPORT_PC()
+    adrl   rIBASE, dvmAsmInstructionStart
+    FETCH_INST()
+    GET_JIT_PROF_TABLE(r0)
+    @ NOTE: intended fallthrough
+/*
+ * Common code to update potential trace start counter, and initiate
+ * a trace-build if appropriate.  On entry, rPC should point to the
+ * next instruction to execute, and rINST should be already loaded with
+ * the next opcode word, and r0 holds a pointer to the jit profile
+ * table (pJitProfTable).
+ */
+common_testUpdateProfile:
+    cmp     r0,#0
+    GET_INST_OPCODE(ip)
+    GOTO_OPCODE_IFEQ(ip)       @ if not profiling, fallthrough otherwise */
+
+common_updateProfile:
+    eor     r3,rPC,rPC,lsr #12 @ cheap, but fast hash function
+    lsl     r3,r3,#23          @ shift out excess 511
+    ldrb    r1,[r0,r3,lsr #23] @ get counter
+    GET_INST_OPCODE(ip)
+    subs    r1,r1,#1           @ decrement counter
+    strb    r1,[r0,r3,lsr #23] @ and store it
+    GOTO_OPCODE_IFNE(ip)       @ if not threshold, fallthrough otherwise */
+
+/*
+ * Here, we switch to the debug interpreter to request
+ * trace selection.  First, though, check to see if there
+ * is already a native translation in place (and, if so,
+ * jump to it now).
+ */
+    mov     r1,#255
+    strb    r1,[r0,r3,lsr #23] @ reset counter
+    EXPORT_PC()
+    mov     r0,rPC
+    bl      dvmJitGetCodeAddr           @ r0<- dvmJitGetCodeAddr(rPC)
+    cmp     r0,#0
+    ldrne   rINST, .LdvmCompilerTemplateStart @ rINST is rCBASE in compiled code
+    beq     common_selectTrace
+    bxne    r0                          @ jump to the translation
+common_selectTrace:
+    mov     r2,#kJitTSelectRequest      @ ask for trace selection
+    str     r2,[rGLUE,#offGlue_jitState]
+    mov     r1,#1                       @ set changeInterp
+    b       common_gotoBail
+
+.LdvmCompilerTemplateStart:
+    .word   dvmCompilerTemplateStart
+
+#endif
+
 /*
  * Common code when a backward branch is taken.
  *
@@ -9310,9 +9669,18 @@ dvmAsmSisterEnd:
 common_backwardBranch:
     mov     r0, #kInterpEntryInstr
     bl      common_periodicChecks
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
+    FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
+    cmp     r0,#0
+    bne     common_updateProfile
+    GET_INST_OPCODE(ip)
+    GOTO_OPCODE(ip)
+#else
     FETCH_ADVANCE_INST_RB(r9)           @ update rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
 
 /*
@@ -9486,6 +9854,10 @@ common_invokeMethodNoRange:
 #endif
     str     rFP, [r10, #offStackSaveArea_prevFrame]
     str     rPC, [r10, #offStackSaveArea_savedPc]
+#if defined(WITH_JIT)
+    mov     r9, #0
+    str     r9, [r10, #offStackSaveArea_returnAddr]
+#endif
     str     r0, [r10, #offStackSaveArea_method]
     tst     r3, #ACC_NATIVE
     bne     .LinvokeNative
@@ -9514,11 +9886,22 @@ common_invokeMethodNoRange:
     @ r0=methodToCall, r1=newFp, r2=self, r3=newMethodClass, r9=newINST
     str     r0, [rGLUE, #offGlue_method]    @ glue->method = methodToCall
     str     r3, [rGLUE, #offGlue_methodClassDex] @ glue->methodClassDex = ...
+#if defined(WITH_JIT)
+    GET_JIT_PROF_TABLE(r0)
     mov     rFP, r1                         @ fp = newFp
     GET_PREFETCHED_OPCODE(ip, r9)           @ extract prefetched opcode from r9
     mov     rINST, r9                       @ publish new rINST
     str     r1, [r2, #offThread_curFrame]   @ self->curFrame = newFp
+    cmp     r0,#0
+    bne     common_updateProfile
     GOTO_OPCODE(ip)                         @ jump to next instruction
+#else
+    mov     rFP, r1                         @ fp = newFp
+    GET_PREFETCHED_OPCODE(ip, r9)           @ extract prefetched opcode from r9
+    mov     rINST, r9                       @ publish new rINST
+    str     r1, [r2, #offThread_curFrame]   @ self->curFrame = newFp
+    GOTO_OPCODE(ip)                         @ jump to next instruction
+#endif
 
 .LinvokeNative:
     @ Prep for the native call
@@ -9624,10 +10007,26 @@ common_returnFromMethod:
     str     r2, [rGLUE, #offGlue_method]@ glue->method = newSave->method
     ldr     r1, [r10, #offClassObject_pDvmDex]   @ r1<- method->clazz->pDvmDex
     str     rFP, [r3, #offThread_curFrame]  @ self->curFrame = fp
+#if defined(WITH_JIT)
+    ldr     r3, [r0, #offStackSaveArea_returnAddr] @ r3 = saveArea->returnAddr
+    GET_JIT_PROF_TABLE(r0)
+    mov     rPC, r9                     @ publish new rPC
+    str     r1, [rGLUE, #offGlue_methodClassDex]
+    cmp     r3, #0                      @ caller is compiled code
+    bne     1f
+    GET_INST_OPCODE(ip)                 @ extract opcode from rINST
+    cmp     r0,#0
+    bne     common_updateProfile
+    GOTO_OPCODE(ip)                     @ jump to next instruction
+1:
+    ldr     rINST, .LdvmCompilerTemplateStart @ rINST is rCBASE in compiled code
+    blx     r3
+#else
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     mov     rPC, r9                     @ publish new rPC
     str     r1, [rGLUE, #offGlue_methodClassDex]
     GOTO_OPCODE(ip)                     @ jump to next instruction
+#endif
 
     /*
      * Return handling, calls through "glue code".
@@ -9650,12 +10049,19 @@ common_returnFromMethod:
  *
  * This does not return.
  */
+     .global dvmMterpCommonExceptionThrown
+dvmMterpCommonExceptionThrown:
 common_exceptionThrown:
 .LexceptionNew:
     mov     r0, #kInterpEntryThrow
     mov     r9, #0
     bl      common_periodicChecks
 
+#if defined(WITH_JIT)
+    mov     r2,#kJitTSelectAbort        @ abandon trace selection in progress
+    str     r2,[rGLUE,#offGlue_jitState]
+#endif
+
     ldr     r10, [rGLUE, #offGlue_self] @ r10<- glue->self
     ldr     r9, [r10, #offThread_exception] @ r9<- self->exception
     mov     r1, r10                     @ r1<- self
index 420873e..0b70c9e 100644 (file)
@@ -26,6 +26,7 @@
 #include "interp/InterpDefs.h"
 #include "mterp/Mterp.h"
 #include <math.h>                   // needed for fmod, fmodf
+#include "mterp/common/FindInterface.h"
 
 /*
  * Configuration defines.  These affect the C implementations, i.e. the
@@ -334,29 +335,21 @@ static inline void putDoubleToArray(u4* ptr, int idx, double dval)
  * If we're building without debug and profiling support, we never switch.
  */
 #if defined(WITH_PROFILER) || defined(WITH_DEBUGGER)
+#if defined(WITH_JIT)
+# define NEED_INTERP_SWITCH(_current) (                                     \
+    (_current == INTERP_STD) ?                                              \
+        dvmJitDebuggerOrProfilerActive(interpState->jitState) :             \
+        !dvmJitDebuggerOrProfilerActive(interpState->jitState) )
+#else
 # define NEED_INTERP_SWITCH(_current) (                                     \
     (_current == INTERP_STD) ?                                              \
         dvmDebuggerOrProfilerActive() : !dvmDebuggerOrProfilerActive() )
+#endif
 #else
 # define NEED_INTERP_SWITCH(_current) (false)
 #endif
 
 /*
- * Look up an interface on a class using the cache.
- */
-INLINE Method* dvmFindInterfaceMethodInCache(ClassObject* thisClass,
-    u4 methodIdx, const Method* method, DvmDex* methodClassDex)
-{
-#define ATOMIC_CACHE_CALC \
-    dvmInterpFindInterfaceMethod(thisClass, methodIdx, method, methodClassDex)
-
-    return (Method*) ATOMIC_CACHE_LOOKUP(methodClassDex->pInterfaceCache,
-                DEX_INTERFACE_CACHE_SIZE, thisClass, methodIdx);
-
-#undef ATOMIC_CACHE_CALC
-}
-
-/*
  * Check to see if "obj" is NULL.  If so, throw an exception.  Assumes the
  * pc has already been exported to the stack.
  *
@@ -420,7 +413,6 @@ static inline bool checkForNullExportPC(Object* obj, u4* fp, const u2* pc)
     return true;
 }
 
-
 /* File: cstubs/stubdefs.c */
 /* this is a standard (no debug support) interpreter */
 #define INTERP_TYPE INTERP_STD
@@ -3877,6 +3869,9 @@ GOTO_TARGET(invokeMethod, bool methodCallRange, const Method* _methodToCall,
 #endif
         newSaveArea->prevFrame = fp;
         newSaveArea->savedPc = pc;
+#if defined(WITH_JIT)
+        newSaveArea->returnAddr = 0;
+#endif
         newSaveArea->method = methodToCall;
 
         if (!dvmIsNativeMethod(methodToCall)) {
@@ -3971,7 +3966,6 @@ GOTO_TARGET(invokeMethod, bool methodCallRange, const Method* _methodToCall,
     assert(false);      // should not get here
 GOTO_TARGET_END
 
-
 /* File: cstubs/enddefs.c */
 
 /* undefine "magic" name remapping */
index 2f4b2e8..80e87cf 100644 (file)
@@ -26,6 +26,7 @@
 #include "interp/InterpDefs.h"
 #include "mterp/Mterp.h"
 #include <math.h>                   // needed for fmod, fmodf
+#include "mterp/common/FindInterface.h"
 
 /*
  * Configuration defines.  These affect the C implementations, i.e. the
@@ -334,29 +335,21 @@ static inline void putDoubleToArray(u4* ptr, int idx, double dval)
  * If we're building without debug and profiling support, we never switch.
  */
 #if defined(WITH_PROFILER) || defined(WITH_DEBUGGER)
+#if defined(WITH_JIT)
+# define NEED_INTERP_SWITCH(_current) (                                     \
+    (_current == INTERP_STD) ?                                              \
+        dvmJitDebuggerOrProfilerActive(interpState->jitState) :             \
+        !dvmJitDebuggerOrProfilerActive(interpState->jitState) )
+#else
 # define NEED_INTERP_SWITCH(_current) (                                     \
     (_current == INTERP_STD) ?                                              \
         dvmDebuggerOrProfilerActive() : !dvmDebuggerOrProfilerActive() )
+#endif
 #else
 # define NEED_INTERP_SWITCH(_current) (false)
 #endif
 
 /*
- * Look up an interface on a class using the cache.
- */
-INLINE Method* dvmFindInterfaceMethodInCache(ClassObject* thisClass,
-    u4 methodIdx, const Method* method, DvmDex* methodClassDex)
-{
-#define ATOMIC_CACHE_CALC \
-    dvmInterpFindInterfaceMethod(thisClass, methodIdx, method, methodClassDex)
-
-    return (Method*) ATOMIC_CACHE_LOOKUP(methodClassDex->pInterfaceCache,
-                DEX_INTERFACE_CACHE_SIZE, thisClass, methodIdx);
-
-#undef ATOMIC_CACHE_CALC
-}
-
-/*
  * Check to see if "obj" is NULL.  If so, throw an exception.  Assumes the
  * pc has already been exported to the stack.
  *
@@ -420,7 +413,6 @@ static inline bool checkForNullExportPC(Object* obj, u4* fp, const u2* pc)
     return true;
 }
 
-
 /* File: cstubs/stubdefs.c */
 /* this is a standard (no debug support) interpreter */
 #define INTERP_TYPE INTERP_STD
index 92c29fb..326ba3d 100644 (file)
@@ -26,6 +26,7 @@
 #include "interp/InterpDefs.h"
 #include "mterp/Mterp.h"
 #include <math.h>                   // needed for fmod, fmodf
+#include "mterp/common/FindInterface.h"
 
 /*
  * Configuration defines.  These affect the C implementations, i.e. the
@@ -334,29 +335,21 @@ static inline void putDoubleToArray(u4* ptr, int idx, double dval)
  * If we're building without debug and profiling support, we never switch.
  */
 #if defined(WITH_PROFILER) || defined(WITH_DEBUGGER)
+#if defined(WITH_JIT)
+# define NEED_INTERP_SWITCH(_current) (                                     \
+    (_current == INTERP_STD) ?                                              \
+        dvmJitDebuggerOrProfilerActive(interpState->jitState) :             \
+        !dvmJitDebuggerOrProfilerActive(interpState->jitState) )
+#else
 # define NEED_INTERP_SWITCH(_current) (                                     \
     (_current == INTERP_STD) ?                                              \
         dvmDebuggerOrProfilerActive() : !dvmDebuggerOrProfilerActive() )
+#endif
 #else
 # define NEED_INTERP_SWITCH(_current) (false)
 #endif
 
 /*
- * Look up an interface on a class using the cache.
- */
-INLINE Method* dvmFindInterfaceMethodInCache(ClassObject* thisClass,
-    u4 methodIdx, const Method* method, DvmDex* methodClassDex)
-{
-#define ATOMIC_CACHE_CALC \
-    dvmInterpFindInterfaceMethod(thisClass, methodIdx, method, methodClassDex)
-
-    return (Method*) ATOMIC_CACHE_LOOKUP(methodClassDex->pInterfaceCache,
-                DEX_INTERFACE_CACHE_SIZE, thisClass, methodIdx);
-
-#undef ATOMIC_CACHE_CALC
-}
-
-/*
  * Check to see if "obj" is NULL.  If so, throw an exception.  Assumes the
  * pc has already been exported to the stack.
  *
@@ -420,7 +413,6 @@ static inline bool checkForNullExportPC(Object* obj, u4* fp, const u2* pc)
     return true;
 }
 
-
 /* File: cstubs/stubdefs.c */
 /* this is a standard (no debug support) interpreter */
 #define INTERP_TYPE INTERP_STD
index 70794cc..9cf7881 100644 (file)
@@ -26,6 +26,7 @@
 #include "interp/InterpDefs.h"
 #include "mterp/Mterp.h"
 #include <math.h>                   // needed for fmod, fmodf
+#include "mterp/common/FindInterface.h"
 
 /*
  * Configuration defines.  These affect the C implementations, i.e. the
@@ -334,29 +335,21 @@ static inline void putDoubleToArray(u4* ptr, int idx, double dval)
  * If we're building without debug and profiling support, we never switch.
  */
 #if defined(WITH_PROFILER) || defined(WITH_DEBUGGER)
+#if defined(WITH_JIT)
+# define NEED_INTERP_SWITCH(_current) (                                     \
+    (_current == INTERP_STD) ?                                              \
+        dvmJitDebuggerOrProfilerActive(interpState->jitState) :             \
+        !dvmJitDebuggerOrProfilerActive(interpState->jitState) )
+#else
 # define NEED_INTERP_SWITCH(_current) (                                     \
     (_current == INTERP_STD) ?                                              \
         dvmDebuggerOrProfilerActive() : !dvmDebuggerOrProfilerActive() )
+#endif
 #else
 # define NEED_INTERP_SWITCH(_current) (false)
 #endif
 
 /*
- * Look up an interface on a class using the cache.
- */
-INLINE Method* dvmFindInterfaceMethodInCache(ClassObject* thisClass,
-    u4 methodIdx, const Method* method, DvmDex* methodClassDex)
-{
-#define ATOMIC_CACHE_CALC \
-    dvmInterpFindInterfaceMethod(thisClass, methodIdx, method, methodClassDex)
-
-    return (Method*) ATOMIC_CACHE_LOOKUP(methodClassDex->pInterfaceCache,
-                DEX_INTERFACE_CACHE_SIZE, thisClass, methodIdx);
-
-#undef ATOMIC_CACHE_CALC
-}
-
-/*
  * Check to see if "obj" is NULL.  If so, throw an exception.  Assumes the
  * pc has already been exported to the stack.
  *
@@ -420,7 +413,6 @@ static inline bool checkForNullExportPC(Object* obj, u4* fp, const u2* pc)
     return true;
 }
 
-
 /* File: cstubs/stubdefs.c */
 /* this is a standard (no debug support) interpreter */
 #define INTERP_TYPE INTERP_STD
index 188639c..c8f428c 100644 (file)
@@ -26,6 +26,7 @@
 #include "interp/InterpDefs.h"
 #include "mterp/Mterp.h"
 #include <math.h>                   // needed for fmod, fmodf
+#include "mterp/common/FindInterface.h"
 
 /*
  * Configuration defines.  These affect the C implementations, i.e. the
@@ -334,29 +335,21 @@ static inline void putDoubleToArray(u4* ptr, int idx, double dval)
  * If we're building without debug and profiling support, we never switch.
  */
 #if defined(WITH_PROFILER) || defined(WITH_DEBUGGER)
+#if defined(WITH_JIT)
+# define NEED_INTERP_SWITCH(_current) (                                     \
+    (_current == INTERP_STD) ?                                              \
+        dvmJitDebuggerOrProfilerActive(interpState->jitState) :             \
+        !dvmJitDebuggerOrProfilerActive(interpState->jitState) )
+#else
 # define NEED_INTERP_SWITCH(_current) (                                     \
     (_current == INTERP_STD) ?                                              \
         dvmDebuggerOrProfilerActive() : !dvmDebuggerOrProfilerActive() )
+#endif
 #else
 # define NEED_INTERP_SWITCH(_current) (false)
 #endif
 
 /*
- * Look up an interface on a class using the cache.
- */
-INLINE Method* dvmFindInterfaceMethodInCache(ClassObject* thisClass,
-    u4 methodIdx, const Method* method, DvmDex* methodClassDex)
-{
-#define ATOMIC_CACHE_CALC \
-    dvmInterpFindInterfaceMethod(thisClass, methodIdx, method, methodClassDex)
-
-    return (Method*) ATOMIC_CACHE_LOOKUP(methodClassDex->pInterfaceCache,
-                DEX_INTERFACE_CACHE_SIZE, thisClass, methodIdx);
-
-#undef ATOMIC_CACHE_CALC
-}
-
-/*
  * Check to see if "obj" is NULL.  If so, throw an exception.  Assumes the
  * pc has already been exported to the stack.
  *
@@ -420,7 +413,6 @@ static inline bool checkForNullExportPC(Object* obj, u4* fp, const u2* pc)
     return true;
 }
 
-
 /* File: portable/portdbg.c */
 #define INTERP_FUNC_NAME dvmInterpretDbg
 #define INTERP_TYPE INTERP_DBG
@@ -428,6 +420,14 @@ static inline bool checkForNullExportPC(Object* obj, u4* fp, const u2* pc)
 #define CHECK_DEBUG_AND_PROF() \
     checkDebugAndProf(pc, fp, self, curMethod, &debugIsMethodEntry)
 
+#if defined(WITH_JIT)
+#define CHECK_JIT() \
+    if (dvmCheckJit(pc, self, interpState)) GOTO_bail_switch()
+#else
+#define CHECK_JIT() \
+    ((void)0)
+#endif
+
 /* File: portable/stubdefs.c */
 /*
  * In the C mterp stubs, "goto" is a function call followed immediately
@@ -459,6 +459,7 @@ static inline bool checkForNullExportPC(Object* obj, u4* fp, const u2* pc)
         inst = FETCH(0);                                                    \
         CHECK_DEBUG_AND_PROF();                                             \
         CHECK_TRACKED_REFS();                                               \
+        CHECK_JIT();                                                        \
         goto *handlerTable[INST_INST(inst)];                                \
     }
 #else
@@ -1479,11 +1480,32 @@ bool INTERP_FUNC_NAME(Thread* self, InterpState* interpState)
     const Method* methodToCall;
     bool methodCallRange;
 
+
 #if defined(THREADED_INTERP)
     /* static computed goto table */
     DEFINE_GOTO_TABLE(handlerTable);
 #endif
 
+#if defined(WITH_JIT)
+#if 0
+    LOGD("*DebugInterp - entrypoint is %d, tgt is 0x%x, %s\n",
+         interpState->entryPoint,
+         interpState->pc,
+         interpState->method->name);
+#endif
+
+#if INTERP_TYPE == INTERP_DBG
+    /* Check to see if we've got a trace selection request.  If we do,
+     * but something is amiss, revert to the fast interpreter.
+     */
+    if (dvmJitCheckTraceRequest(self,interpState)) {
+        interpState->nextMode = INTERP_STD;
+        //LOGD("** something wrong, exiting\n");
+        return true;
+    }
+#endif
+#endif
+
     /* copy state in */
     curMethod = interpState->method;
     pc = interpState->pc;
@@ -4138,6 +4160,9 @@ GOTO_TARGET(invokeMethod, bool methodCallRange, const Method* _methodToCall,
 #endif
         newSaveArea->prevFrame = fp;
         newSaveArea->savedPc = pc;
+#if defined(WITH_JIT)
+        newSaveArea->returnAddr = 0;
+#endif
         newSaveArea->method = methodToCall;
 
         if (!dvmIsNativeMethod(methodToCall)) {
@@ -4232,7 +4257,6 @@ GOTO_TARGET(invokeMethod, bool methodCallRange, const Method* _methodToCall,
     assert(false);      // should not get here
 GOTO_TARGET_END
 
-
 /* File: portable/enddefs.c */
 /*--- end of opcodes ---*/
 
index 90d4ab4..baf7a86 100644 (file)
@@ -26,6 +26,7 @@
 #include "interp/InterpDefs.h"
 #include "mterp/Mterp.h"
 #include <math.h>                   // needed for fmod, fmodf
+#include "mterp/common/FindInterface.h"
 
 /*
  * Configuration defines.  These affect the C implementations, i.e. the
@@ -334,29 +335,21 @@ static inline void putDoubleToArray(u4* ptr, int idx, double dval)
  * If we're building without debug and profiling support, we never switch.
  */
 #if defined(WITH_PROFILER) || defined(WITH_DEBUGGER)
+#if defined(WITH_JIT)
+# define NEED_INTERP_SWITCH(_current) (                                     \
+    (_current == INTERP_STD) ?                                              \
+        dvmJitDebuggerOrProfilerActive(interpState->jitState) :             \
+        !dvmJitDebuggerOrProfilerActive(interpState->jitState) )
+#else
 # define NEED_INTERP_SWITCH(_current) (                                     \
     (_current == INTERP_STD) ?                                              \
         dvmDebuggerOrProfilerActive() : !dvmDebuggerOrProfilerActive() )
+#endif
 #else
 # define NEED_INTERP_SWITCH(_current) (false)
 #endif
 
 /*
- * Look up an interface on a class using the cache.
- */
-INLINE Method* dvmFindInterfaceMethodInCache(ClassObject* thisClass,
-    u4 methodIdx, const Method* method, DvmDex* methodClassDex)
-{
-#define ATOMIC_CACHE_CALC \
-    dvmInterpFindInterfaceMethod(thisClass, methodIdx, method, methodClassDex)
-
-    return (Method*) ATOMIC_CACHE_LOOKUP(methodClassDex->pInterfaceCache,
-                DEX_INTERFACE_CACHE_SIZE, thisClass, methodIdx);
-
-#undef ATOMIC_CACHE_CALC
-}
-
-/*
  * Check to see if "obj" is NULL.  If so, throw an exception.  Assumes the
  * pc has already been exported to the stack.
  *
@@ -420,13 +413,14 @@ static inline bool checkForNullExportPC(Object* obj, u4* fp, const u2* pc)
     return true;
 }
 
-
 /* File: portable/portstd.c */
 #define INTERP_FUNC_NAME dvmInterpretStd
 #define INTERP_TYPE INTERP_STD
 
 #define CHECK_DEBUG_AND_PROF() ((void)0)
 
+#define CHECK_JIT() ((void)0)
+
 /* File: portable/stubdefs.c */
 /*
  * In the C mterp stubs, "goto" is a function call followed immediately
@@ -458,6 +452,7 @@ static inline bool checkForNullExportPC(Object* obj, u4* fp, const u2* pc)
         inst = FETCH(0);                                                    \
         CHECK_DEBUG_AND_PROF();                                             \
         CHECK_TRACKED_REFS();                                               \
+        CHECK_JIT();                                                        \
         goto *handlerTable[INST_INST(inst)];                                \
     }
 #else
@@ -1199,11 +1194,32 @@ bool INTERP_FUNC_NAME(Thread* self, InterpState* interpState)
     const Method* methodToCall;
     bool methodCallRange;
 
+
 #if defined(THREADED_INTERP)
     /* static computed goto table */
     DEFINE_GOTO_TABLE(handlerTable);
 #endif
 
+#if defined(WITH_JIT)
+#if 0
+    LOGD("*DebugInterp - entrypoint is %d, tgt is 0x%x, %s\n",
+         interpState->entryPoint,
+         interpState->pc,
+         interpState->method->name);
+#endif
+
+#if INTERP_TYPE == INTERP_DBG
+    /* Check to see if we've got a trace selection request.  If we do,
+     * but something is amiss, revert to the fast interpreter.
+     */
+    if (dvmJitCheckTraceRequest(self,interpState)) {
+        interpState->nextMode = INTERP_STD;
+        //LOGD("** something wrong, exiting\n");
+        return true;
+    }
+#endif
+#endif
+
     /* copy state in */
     curMethod = interpState->method;
     pc = interpState->pc;
@@ -3858,6 +3874,9 @@ GOTO_TARGET(invokeMethod, bool methodCallRange, const Method* _methodToCall,
 #endif
         newSaveArea->prevFrame = fp;
         newSaveArea->savedPc = pc;
+#if defined(WITH_JIT)
+        newSaveArea->returnAddr = 0;
+#endif
         newSaveArea->method = methodToCall;
 
         if (!dvmIsNativeMethod(methodToCall)) {
@@ -3952,7 +3971,6 @@ GOTO_TARGET(invokeMethod, bool methodCallRange, const Method* _methodToCall,
     assert(false);      // should not get here
 GOTO_TARGET_END
 
-
 /* File: portable/enddefs.c */
 /*--- end of opcodes ---*/
 
index 469b690..b17e530 100644 (file)
@@ -26,6 +26,7 @@
 #include "interp/InterpDefs.h"
 #include "mterp/Mterp.h"
 #include <math.h>                   // needed for fmod, fmodf
+#include "mterp/common/FindInterface.h"
 
 /*
  * Configuration defines.  These affect the C implementations, i.e. the
@@ -334,29 +335,21 @@ static inline void putDoubleToArray(u4* ptr, int idx, double dval)
  * If we're building without debug and profiling support, we never switch.
  */
 #if defined(WITH_PROFILER) || defined(WITH_DEBUGGER)
+#if defined(WITH_JIT)
+# define NEED_INTERP_SWITCH(_current) (                                     \
+    (_current == INTERP_STD) ?                                              \
+        dvmJitDebuggerOrProfilerActive(interpState->jitState) :             \
+        !dvmJitDebuggerOrProfilerActive(interpState->jitState) )
+#else
 # define NEED_INTERP_SWITCH(_current) (                                     \
     (_current == INTERP_STD) ?                                              \
         dvmDebuggerOrProfilerActive() : !dvmDebuggerOrProfilerActive() )
+#endif
 #else
 # define NEED_INTERP_SWITCH(_current) (false)
 #endif
 
 /*
- * Look up an interface on a class using the cache.
- */
-INLINE Method* dvmFindInterfaceMethodInCache(ClassObject* thisClass,
-    u4 methodIdx, const Method* method, DvmDex* methodClassDex)
-{
-#define ATOMIC_CACHE_CALC \
-    dvmInterpFindInterfaceMethod(thisClass, methodIdx, method, methodClassDex)
-
-    return (Method*) ATOMIC_CACHE_LOOKUP(methodClassDex->pInterfaceCache,
-                DEX_INTERFACE_CACHE_SIZE, thisClass, methodIdx);
-
-#undef ATOMIC_CACHE_CALC
-}
-
-/*
  * Check to see if "obj" is NULL.  If so, throw an exception.  Assumes the
  * pc has already been exported to the stack.
  *
@@ -420,7 +413,6 @@ static inline bool checkForNullExportPC(Object* obj, u4* fp, const u2* pc)
     return true;
 }
 
-
 /* File: cstubs/stubdefs.c */
 /* this is a standard (no debug support) interpreter */
 #define INTERP_TYPE INTERP_STD
@@ -2041,6 +2033,9 @@ GOTO_TARGET(invokeMethod, bool methodCallRange, const Method* _methodToCall,
 #endif
         newSaveArea->prevFrame = fp;
         newSaveArea->savedPc = pc;
+#if defined(WITH_JIT)
+        newSaveArea->returnAddr = 0;
+#endif
         newSaveArea->method = methodToCall;
 
         if (!dvmIsNativeMethod(methodToCall)) {
@@ -2135,7 +2130,6 @@ GOTO_TARGET(invokeMethod, bool methodCallRange, const Method* _methodToCall,
     assert(false);      // should not get here
 GOTO_TARGET_END
 
-
 /* File: cstubs/enddefs.c */
 
 /* undefine "magic" name remapping */
index 6698959..9c7c2d6 100644 (file)
@@ -29,11 +29,32 @@ bool INTERP_FUNC_NAME(Thread* self, InterpState* interpState)
     const Method* methodToCall;
     bool methodCallRange;
 
+
 #if defined(THREADED_INTERP)
     /* static computed goto table */
     DEFINE_GOTO_TABLE(handlerTable);
 #endif
 
+#if defined(WITH_JIT)
+#if 0
+    LOGD("*DebugInterp - entrypoint is %d, tgt is 0x%x, %s\n",
+         interpState->entryPoint,
+         interpState->pc,
+         interpState->method->name);
+#endif
+
+#if INTERP_TYPE == INTERP_DBG
+    /* Check to see if we've got a trace selection request.  If we do,
+     * but something is amiss, revert to the fast interpreter.
+     */
+    if (dvmJitCheckTraceRequest(self,interpState)) {
+        interpState->nextMode = INTERP_STD;
+        //LOGD("** something wrong, exiting\n");
+        return true;
+    }
+#endif
+#endif
+
     /* copy state in */
     curMethod = interpState->method;
     pc = interpState->pc;
index a657f09..04132cb 100644 (file)
@@ -3,3 +3,11 @@
 
 #define CHECK_DEBUG_AND_PROF() \
     checkDebugAndProf(pc, fp, self, curMethod, &debugIsMethodEntry)
+
+#if defined(WITH_JIT)
+#define CHECK_JIT() \
+    if (dvmCheckJit(pc, self, interpState)) GOTO_bail_switch()
+#else
+#define CHECK_JIT() \
+    ((void)0)
+#endif
index 01fbda1..f55e8e7 100644 (file)
@@ -2,3 +2,5 @@
 #define INTERP_TYPE INTERP_STD
 
 #define CHECK_DEBUG_AND_PROF() ((void)0)
+
+#define CHECK_JIT() ((void)0)
index b809caf..305aebb 100644 (file)
@@ -28,6 +28,7 @@
         inst = FETCH(0);                                                    \
         CHECK_DEBUG_AND_PROF();                                             \
         CHECK_TRACKED_REFS();                                               \
+        CHECK_JIT();                                                        \
         goto *handlerTable[INST_INST(inst)];                                \
     }
 #else