OSDN Git Service

am de241376: am 91027c67: Merge "Remove unused compiler templates for armv7-a*"
[android-x86/dalvik.git] / vm / compiler / template / out / CompilerTemplateAsm-armv7-a-neon.S
index 9fb8892..4b2c15c 100644 (file)
@@ -62,7 +62,7 @@ The following registers have fixed assignments:
 
   reg nick      purpose
   r5  rFP       interpreted frame pointer, used for accessing locals and args
-  r6  rGLUE     MterpGlue pointer
+  r6  rSELF     thread pointer
 
 The following registers have fixed assignments in mterp but are scratch
 registers in compiled code
@@ -80,7 +80,7 @@ unspecified registers or condition codes.
 /* single-purpose registers, given names for clarity */
 #define rPC     r4
 #define rFP     r5
-#define rGLUE   r6
+#define rSELF   r6
 #define rINST   r7
 #define rIBASE  r8
 
@@ -108,21 +108,10 @@ unspecified registers or condition codes.
  * ===========================================================================
  */
 
-/*
- * Macro for "MOV LR,PC / LDR PC,xxx", which is not allowed pre-ARMv5.
- * Jump to subroutine.
- *
- * May modify IP and LR.
- */
-.macro  LDR_PC_LR source
-    mov     lr, pc
-    ldr     pc, \source
-.endm
-
 
     .global dvmCompilerTemplateStart
     .type   dvmCompilerTemplateStart, %function
-    .text
+    .section .data.rel.ro
 
 dvmCompilerTemplateStart:
 
@@ -177,25 +166,25 @@ dvmCompiler_TEMPLATE_RETURN:
      * address in the code cache following the invoke instruction. Otherwise
      * return to the special dvmJitToInterpNoChain entry point.
      */
-#if defined(WITH_INLINE_PROFILING)
+#if defined(TEMPLATE_INLINE_PROFILING)
     stmfd   sp!, {r0-r2,lr}             @ preserve live registers
     mov     r0, r6
-    @ r0=rGlue
-    LDR_PC_LR ".LdvmFastJavaMethodTraceExit"
+    @ r0=rSELF
+    ldr     ip, .LdvmFastMethodTraceExit
+    blx     ip
     ldmfd   sp!, {r0-r2,lr}             @ restore live registers
 #endif
     SAVEAREA_FROM_FP(r0, rFP)           @ r0<- saveArea (old)
     ldr     r10, [r0, #offStackSaveArea_prevFrame] @ r10<- saveArea->prevFrame
-    ldr     r8, [rGLUE, #offGlue_pSelfSuspendCount] @ r8<- &suspendCount
+    ldrb    r8, [rSELF, #offThread_breakFlags] @ r8<- breakFlags
     ldr     rPC, [r0, #offStackSaveArea_savedPc] @ rPC<- saveArea->savedPc
+    ldr     r2, [r10, #(offStackSaveArea_method - sizeofStackSaveArea)]
 #if !defined(WITH_SELF_VERIFICATION)
     ldr     r9,  [r0, #offStackSaveArea_returnAddr] @ r9<- chaining cell ret
 #else
     mov     r9, #0                      @ disable chaining
 #endif
-    ldr     r2, [r10, #(offStackSaveArea_method - sizeofStackSaveArea)]
                                         @ r2<- method we're returning to
-    ldr     r3, [rGLUE, #offGlue_self]  @ r3<- glue->self
     cmp     r2, #0                      @ break frame?
 #if !defined(WITH_SELF_VERIFICATION)
     beq     1f                          @ bail to interpreter
@@ -205,16 +194,15 @@ dvmCompiler_TEMPLATE_RETURN:
     ldr     r1, .LdvmJitToInterpNoChainNoProfile @ defined in footer.S
     mov     rFP, r10                    @ publish new FP
     ldr     r10, [r2, #offMethod_clazz] @ r10<- method->clazz
-    ldr     r8, [r8]                    @ r8<- suspendCount
 
-    str     r2, [rGLUE, #offGlue_method]@ glue->method = newSave->method
+    str     r2, [rSELF, #offThread_method]@ self->method = newSave->method
     ldr     r0, [r10, #offClassObject_pDvmDex] @ r0<- method->clazz->pDvmDex
-    str     rFP, [r3, #offThread_curFrame] @ self->curFrame = fp
+    str     rFP, [rSELF, #offThread_curFrame] @ curFrame = fp
     add     rPC, rPC, #6                @ publish new rPC (advance 6 bytes)
-    str     r0, [rGLUE, #offGlue_methodClassDex]
-    cmp     r8, #0                      @ check the suspendCount
+    str     r0, [rSELF, #offThread_methodClassDex]
+    cmp     r8, #0                      @ check the break flags
     movne   r9, #0                      @ clear the chaining cell address
-    str     r9, [r3, #offThread_inJitCodeCache] @ in code cache or not
+    str     r9, [rSELF, #offThread_inJitCodeCache] @ in code cache or not
     cmp     r9, #0                      @ chaining cell exists?
     blxne   r9                          @ jump to the chaining cell
 #if defined(WITH_JIT_TUNING)
@@ -222,10 +210,11 @@ dvmCompiler_TEMPLATE_RETURN:
 #endif
     mov     pc, r1                      @ callsite is interpreted
 1:
-    stmia   rGLUE, {rPC, rFP}           @ SAVE_PC_FP_TO_GLUE()
+    mov     r0, #0
+    str     r0, [rSELF, #offThread_inJitCodeCache] @ reset inJitCodeCache
+    stmia   rSELF, {rPC, rFP}           @ SAVE_PC_FP_TO_SELF()
     ldr     r2, .LdvmMterpStdBail       @ defined in footer.S
-    mov     r1, #0                      @ changeInterp = false
-    mov     r0, rGLUE                   @ Expecting rGLUE in r0
+    mov     r0, rSELF                   @ Expecting rSELF in r0
     blx     r2                          @ exit the interpreter
 
 /* ------------------------------ */
@@ -241,14 +230,13 @@ dvmCompiler_TEMPLATE_INVOKE_METHOD_NO_OPT:
     @ r0 = methodToCall, r1 = returnCell, rPC = dalvikCallsite
     ldrh    r7, [r0, #offMethod_registersSize]  @ r7<- methodToCall->regsSize
     ldrh    r2, [r0, #offMethod_outsSize]  @ r2<- methodToCall->outsSize
-    ldr     r9, [rGLUE, #offGlue_interpStackEnd]    @ r9<- interpStackEnd
-    ldr     r8, [rGLUE, #offGlue_pSelfSuspendCount] @ r8<- &suspendCount
+    ldr     r9, [rSELF, #offThread_interpStackEnd]    @ r9<- interpStackEnd
+    ldrb    r8, [rSELF, #offThread_breakFlags] @ r8<- breakFlags
     add     r3, r1, #1  @ Thumb addr is odd
     SAVEAREA_FROM_FP(r1, rFP)           @ r1<- stack save area
     sub     r1, r1, r7, lsl #2          @ r1<- newFp (old savearea - regsSize)
     SAVEAREA_FROM_FP(r10, r1)           @ r10<- stack save area
     sub     r10, r10, r2, lsl #2        @ r10<- bottom (newsave - outsSize)
-    ldr     r8, [r8]                    @ r8<- suspendCount (int)
     cmp     r10, r9                     @ bottom < interpStackEnd?
     bxlo    lr                          @ return to raise stack overflow excep.
     @ r1 = newFP, r0 = methodToCall, r3 = returnCell, rPC = dalvikCallsite
@@ -263,7 +251,7 @@ dvmCompiler_TEMPLATE_INVOKE_METHOD_NO_OPT:
     str     rFP, [r1, #(offStackSaveArea_prevFrame - sizeofStackSaveArea)]
     str     r3, [r1, #(offStackSaveArea_returnAddr - sizeofStackSaveArea)]
     str     r0, [r1, #(offStackSaveArea_method - sizeofStackSaveArea)]
-    cmp     r8, #0                      @ suspendCount != 0
+    cmp     r8, #0                      @ breakFlags != 0
     bxne    lr                          @ bail to the interpreter
     tst     r10, #ACC_NATIVE
 #if !defined(WITH_SELF_VERIFICATION)
@@ -274,18 +262,18 @@ dvmCompiler_TEMPLATE_INVOKE_METHOD_NO_OPT:
 
     ldr     r10, .LdvmJitToInterpTraceSelectNoChain
     ldr     r3, [r9, #offClassObject_pDvmDex] @ r3<- method->clazz->pDvmDex
-    ldr     r2, [rGLUE, #offGlue_self]      @ r2<- glue->self
 
-    @ Update "glue" values for the new method
-    str     r0, [rGLUE, #offGlue_method]    @ glue->method = methodToCall
-    str     r3, [rGLUE, #offGlue_methodClassDex] @ glue->methodClassDex = ...
+    @ Update "thread" values for the new method
+    str     r0, [rSELF, #offThread_method]    @ self->method = methodToCall
+    str     r3, [rSELF, #offThread_methodClassDex] @ self->methodClassDex = ...
     mov     rFP, r1                         @ fp = newFp
-    str     rFP, [r2, #offThread_curFrame]  @ self->curFrame = newFp
-#if defined(WITH_INLINE_PROFILING)
+    str     rFP, [rSELF, #offThread_curFrame]  @ curFrame = newFp
+#if defined(TEMPLATE_INLINE_PROFILING)
     stmfd   sp!, {r0-r3}                    @ preserve r0-r3
     mov     r1, r6
-    @ r0=methodToCall, r1=rGlue
-    LDR_PC_LR ".LdvmFastMethodTraceEnter"
+    @ r0=methodToCall, r1=rSELF
+    ldr     ip, .LdvmFastMethodTraceEnter
+    blx     ip
     ldmfd   sp!, {r0-r3}                    @ restore r0-r3
 #endif
 
@@ -293,7 +281,7 @@ dvmCompiler_TEMPLATE_INVOKE_METHOD_NO_OPT:
 #if defined(WITH_JIT_TUNING)
     mov     r0, #kInlineCacheMiss
 #endif
-    mov     pc, r10                         @ dvmJitToInterpTraceSelectNoChain
+    bx      r10                         @ dvmJitToInterpTraceSelectNoChain
 
 /* ------------------------------ */
     .balign 4
@@ -309,15 +297,14 @@ dvmCompiler_TEMPLATE_INVOKE_METHOD_CHAIN:
     @ rPC = dalvikCallsite, r7 = methodToCall->registersSize
     @ methodToCall is guaranteed to be non-native
 .LinvokeChain:
-    ldr     r9, [rGLUE, #offGlue_interpStackEnd]    @ r9<- interpStackEnd
-    ldr     r8, [rGLUE, #offGlue_pSelfSuspendCount] @ r8<- &suspendCount
+    ldr     r9, [rSELF, #offThread_interpStackEnd]    @ r9<- interpStackEnd
+    ldrb    r8, [rSELF, #offThread_breakFlags]        @ r8<- breakFlags
     add     r3, r1, #1  @ Thumb addr is odd
     SAVEAREA_FROM_FP(r1, rFP)           @ r1<- stack save area
     sub     r1, r1, r7, lsl #2          @ r1<- newFp (old savearea - regsSize)
     SAVEAREA_FROM_FP(r10, r1)           @ r10<- stack save area
     add     r12, lr, #2                 @ setup the punt-to-interp address
     sub     r10, r10, r2, lsl #2        @ r10<- bottom (newsave - outsSize)
-    ldr     r8, [r8]                    @ r8<- suspendCount (int)
     cmp     r10, r9                     @ bottom < interpStackEnd?
     bxlo    r12                         @ return to raise stack overflow excep.
     @ r1 = newFP, r0 = methodToCall, r3 = returnCell, rPC = dalvikCallsite
@@ -329,22 +316,22 @@ dvmCompiler_TEMPLATE_INVOKE_METHOD_CHAIN:
     str     rFP, [r1, #(offStackSaveArea_prevFrame - sizeofStackSaveArea)]
     str     r3, [r1, #(offStackSaveArea_returnAddr - sizeofStackSaveArea)]
     str     r0, [r1, #(offStackSaveArea_method - sizeofStackSaveArea)]
-    cmp     r8, #0                      @ suspendCount != 0
+    cmp     r8, #0                      @ breakFlags != 0
     bxne    r12                         @ bail to the interpreter
 
     ldr     r3, [r9, #offClassObject_pDvmDex] @ r3<- method->clazz->pDvmDex
-    ldr     r2, [rGLUE, #offGlue_self]      @ r2<- glue->self
 
-    @ Update "glue" values for the new method
-    str     r0, [rGLUE, #offGlue_method]    @ glue->method = methodToCall
-    str     r3, [rGLUE, #offGlue_methodClassDex] @ glue->methodClassDex = ...
+    @ Update "thread" values for the new method
+    str     r0, [rSELF, #offThread_method]    @ self->method = methodToCall
+    str     r3, [rSELF, #offThread_methodClassDex] @ self->methodClassDex = ...
     mov     rFP, r1                         @ fp = newFp
-    str     rFP, [r2, #offThread_curFrame]  @ self->curFrame = newFp
-#if defined(WITH_INLINE_PROFILING)
+    str     rFP, [rSELF, #offThread_curFrame]  @ curFrame = newFp
+#if defined(TEMPLATE_INLINE_PROFILING)
     stmfd   sp!, {r0-r2,lr}             @ preserve clobbered live registers
     mov     r1, r6
-    @ r0=methodToCall, r1=rGlue
-    LDR_PC_LR ".LdvmFastMethodTraceEnter"
+    @ r0=methodToCall, r1=rSELF
+    ldr     ip, .LdvmFastMethodTraceEnter
+    blx     ip
     ldmfd   sp!, {r0-r2,lr}             @ restore registers
 #endif
 
@@ -382,7 +369,7 @@ dvmCompiler_TEMPLATE_INVOKE_METHOD_PREDICTED_CHAIN:
     ldr     r3, [r0, #offObject_clazz]  @ r3 <- this->class
     ldr     r8, [r2, #4]    @ r8 <- predictedChainCell->clazz
     ldr     r0, [r2, #8]    @ r0 <- predictedChainCell->method
-    ldr     r9, [rGLUE, #offGlue_icRechainCount]   @ r1 <- shared rechainCount
+    ldr     r9, [rSELF, #offThread_icRechainCount] @ r1 <- shared rechainCount
     cmp     r3, r8          @ predicted class == actual class?
 #if defined(WITH_JIT_TUNING)
     ldr     r7, .LdvmICHitCount
@@ -404,7 +391,7 @@ dvmCompiler_TEMPLATE_INVOKE_METHOD_PREDICTED_CHAIN:
     cmp     r8, #0          @ initialized class or not
     moveq   r1, #0
     subne   r1, r9, #1      @ count--
-    strne   r1, [rGLUE, #offGlue_icRechainCount]   @ write back to InterpState
+    strne   r1, [rSELF, #offThread_icRechainCount]  @ write back to thread
     add     lr, lr, #4      @ return to fully-resolve landing pad
     /*
      * r1 <- count
@@ -422,13 +409,12 @@ dvmCompiler_TEMPLATE_INVOKE_METHOD_NATIVE:
 /* File: armv5te/TEMPLATE_INVOKE_METHOD_NATIVE.S */
     @ r0 = methodToCall, r1 = returnCell, rPC = dalvikCallsite
     @ r7 = methodToCall->registersSize
-    ldr     r9, [rGLUE, #offGlue_interpStackEnd]    @ r9<- interpStackEnd
-    ldr     r8, [rGLUE, #offGlue_pSelfSuspendCount] @ r8<- &suspendCount
+    ldr     r9, [rSELF, #offThread_interpStackEnd]    @ r9<- interpStackEnd
+    ldrb    r8, [rSELF, #offThread_breakFlags]        @ r8<- breakFlags
     add     r3, r1, #1  @ Thumb addr is odd
     SAVEAREA_FROM_FP(r1, rFP)           @ r1<- stack save area
     sub     r1, r1, r7, lsl #2          @ r1<- newFp (old savearea - regsSize)
     SAVEAREA_FROM_FP(r10, r1)           @ r10<- stack save area
-    ldr     r8, [r8]                    @ r3<- suspendCount (int)
     cmp     r10, r9                     @ bottom < interpStackEnd?
     bxlo    lr                          @ return to raise stack overflow excep.
     @ r1 = newFP, r0 = methodToCall, r3 = returnCell, rPC = dalvikCallsite
@@ -438,9 +424,8 @@ dvmCompiler_TEMPLATE_INVOKE_METHOD_NATIVE:
     @ set up newSaveArea
     str     rFP, [r1, #(offStackSaveArea_prevFrame - sizeofStackSaveArea)]
     str     r3, [r1, #(offStackSaveArea_returnAddr - sizeofStackSaveArea)]
-    ldr     r3, [rGLUE, #offGlue_self]      @ r3<- glue->self
     str     r0, [r1, #(offStackSaveArea_method - sizeofStackSaveArea)]
-    cmp     r8, #0                      @ suspendCount != 0
+    cmp     r8, #0                      @ breakFlags != 0
     ldr     r8, [r0, #offMethod_nativeFunc] @ r8<- method->nativeFunc
 #if !defined(WITH_SELF_VERIFICATION)
     bxne    lr                          @ bail to the interpreter
@@ -449,50 +434,52 @@ dvmCompiler_TEMPLATE_INVOKE_METHOD_NATIVE:
 #endif
 
     @ go ahead and transfer control to the native code
-    ldr     r9, [r3, #offThread_jniLocal_topCookie] @ r9<- thread->localRef->...
+    ldr     r9, [rSELF, #offThread_jniLocal_topCookie]@r9<-thread->localRef->...
     mov     r2, #0
-    str     r1, [r3, #offThread_curFrame]   @ self->curFrame = newFp
-    str     r2, [r3, #offThread_inJitCodeCache] @ not in the jit code cache
+    str     r1, [rSELF, #offThread_curFrame]   @ curFrame = newFp
+    str     r2, [rSELF, #offThread_inJitCodeCache] @ not in the jit code cache
     str     r9, [r1, #(offStackSaveArea_localRefCookie - sizeofStackSaveArea)]
                                         @ newFp->localRefCookie=top
-    mov     r9, r3                      @ r9<- glue->self (preserve)
     SAVEAREA_FROM_FP(r10, r1)           @ r10<- new stack save area
 
-    mov     r2, r0                      @ r2<- methodToCall
-    mov     r0, r1                      @ r0<- newFP
-    add     r1, rGLUE, #offGlue_retval  @ r1<- &retval
-#if defined(WITH_INLINE_PROFILING)
-    @ r2=methodToCall, r6=rGLUE
+    mov     r2, r0                        @ arg2<- methodToCall
+    mov     r0, r1                        @ arg0<- newFP
+    add     r1, rSELF, #offThread_retval  @ arg1<- &retval
+    mov     r3, rSELF                     @ arg3<- self
+#if defined(TEMPLATE_INLINE_PROFILING)
+    @ r2=methodToCall, r6=rSELF
     stmfd   sp!, {r2,r6}                @ to be consumed after JNI return
     stmfd   sp!, {r0-r3}                @ preserve r0-r3
     mov     r0, r2
     mov     r1, r6
-    @ r0=JNIMethod, r1=rGlue
-    LDR_PC_LR ".LdvmFastMethodTraceEnter"
+    @ r0=JNIMethod, r1=rSELF
+    ldr     ip, .LdvmFastMethodTraceEnter
+    blx     ip
     ldmfd   sp!, {r0-r3}                @ restore r0-r3
 #endif
 
     blx     r8                          @ off to the native code
 
-#if defined(WITH_INLINE_PROFILING)
+#if defined(TEMPLATE_INLINE_PROFILING)
     ldmfd   sp!, {r0-r1}                @ restore r2 and r6
-    @ r0=JNIMethod, r1=rGlue
-    LDR_PC_LR ".LdvmFastNativeMethodTraceExit"
+    @ r0=JNIMethod, r1=rSELF
+    ldr     ip, .LdvmFastNativeMethodTraceExit
+    blx     ip
 #endif
-    @ native return; r9=self, r10=newSaveArea
+    @ native return; r10=newSaveArea
     @ equivalent to dvmPopJniLocals
     ldr     r2, [r10, #offStackSaveArea_returnAddr] @ r2 = chaining cell ret
     ldr     r0, [r10, #offStackSaveArea_localRefCookie] @ r0<- saved->top
-    ldr     r1, [r9, #offThread_exception] @ check for exception
-    str     rFP, [r9, #offThread_curFrame]  @ self->curFrame = fp
+    ldr     r1, [rSELF, #offThread_exception] @ check for exception
+    str     rFP, [rSELF, #offThread_curFrame]  @ curFrame = fp
     cmp     r1, #0                      @ null?
-    str     r0, [r9, #offThread_jniLocal_topCookie] @ new top <- old top
+    str     r0, [rSELF, #offThread_jniLocal_topCookie] @ new top <- old top
     ldr     r0, [rFP, #(offStackSaveArea_currentPc - sizeofStackSaveArea)]
 
     @ r0 = dalvikCallsitePC
     bne     .LhandleException           @ no, handle exception
 
-    str     r2, [r9, #offThread_inJitCodeCache] @ set the mode properly
+    str     r2, [rSELF, #offThread_inJitCodeCache] @ set the mode properly
     cmp     r2, #0                      @ return chaining cell still exists?
     bxne    r2                          @ yes - go ahead
 
@@ -533,9 +520,8 @@ dvmCompiler_TEMPLATE_MUL_LONG:
     mul     ip, r2, r1                  @  ip<- ZxW
     umull   r9, r10, r2, r0             @  r9/r10 <- ZxX
     mla     r2, r0, r3, ip              @  r2<- YxX + (ZxW)
-    add     r10, r2, r10                @  r10<- r10 + low(ZxW + (YxX))
-    mov     r0,r9
-    mov     r1,r10
+    mov     r0, r9
+    add     r1, r2, r10                 @  r1<- r10 + low(ZxW + (YxX))
     bx      lr
 
 /* ------------------------------ */
@@ -603,485 +589,6 @@ dvmCompiler_TEMPLATE_USHR_LONG:
 
 /* ------------------------------ */
     .balign 4
-    .global dvmCompiler_TEMPLATE_ADD_FLOAT_VFP
-dvmCompiler_TEMPLATE_ADD_FLOAT_VFP:
-/* File: armv5te-vfp/TEMPLATE_ADD_FLOAT_VFP.S */
-/* File: armv5te-vfp/fbinop.S */
-    /*
-     * Generic 32-bit floating point operation.  Provide an "instr" line that
-     * specifies an instruction that performs s2 = s0 op s1.
-     *
-     * On entry:
-     *     r0 = target dalvik register address
-     *     r1 = op1 address
-     *     r2 = op2 address
-     */
-     flds    s0,[r1]
-     flds    s1,[r2]
-     fadds   s2, s0, s1
-     fsts    s2,[r0]
-     bx      lr
-
-
-/* ------------------------------ */
-    .balign 4
-    .global dvmCompiler_TEMPLATE_SUB_FLOAT_VFP
-dvmCompiler_TEMPLATE_SUB_FLOAT_VFP:
-/* File: armv5te-vfp/TEMPLATE_SUB_FLOAT_VFP.S */
-/* File: armv5te-vfp/fbinop.S */
-    /*
-     * Generic 32-bit floating point operation.  Provide an "instr" line that
-     * specifies an instruction that performs s2 = s0 op s1.
-     *
-     * On entry:
-     *     r0 = target dalvik register address
-     *     r1 = op1 address
-     *     r2 = op2 address
-     */
-     flds    s0,[r1]
-     flds    s1,[r2]
-     fsubs   s2, s0, s1
-     fsts    s2,[r0]
-     bx      lr
-
-
-/* ------------------------------ */
-    .balign 4
-    .global dvmCompiler_TEMPLATE_MUL_FLOAT_VFP
-dvmCompiler_TEMPLATE_MUL_FLOAT_VFP:
-/* File: armv5te-vfp/TEMPLATE_MUL_FLOAT_VFP.S */
-/* File: armv5te-vfp/fbinop.S */
-    /*
-     * Generic 32-bit floating point operation.  Provide an "instr" line that
-     * specifies an instruction that performs s2 = s0 op s1.
-     *
-     * On entry:
-     *     r0 = target dalvik register address
-     *     r1 = op1 address
-     *     r2 = op2 address
-     */
-     flds    s0,[r1]
-     flds    s1,[r2]
-     fmuls   s2, s0, s1
-     fsts    s2,[r0]
-     bx      lr
-
-
-/* ------------------------------ */
-    .balign 4
-    .global dvmCompiler_TEMPLATE_DIV_FLOAT_VFP
-dvmCompiler_TEMPLATE_DIV_FLOAT_VFP:
-/* File: armv5te-vfp/TEMPLATE_DIV_FLOAT_VFP.S */
-/* File: armv5te-vfp/fbinop.S */
-    /*
-     * Generic 32-bit floating point operation.  Provide an "instr" line that
-     * specifies an instruction that performs s2 = s0 op s1.
-     *
-     * On entry:
-     *     r0 = target dalvik register address
-     *     r1 = op1 address
-     *     r2 = op2 address
-     */
-     flds    s0,[r1]
-     flds    s1,[r2]
-     fdivs   s2, s0, s1
-     fsts    s2,[r0]
-     bx      lr
-
-
-/* ------------------------------ */
-    .balign 4
-    .global dvmCompiler_TEMPLATE_ADD_DOUBLE_VFP
-dvmCompiler_TEMPLATE_ADD_DOUBLE_VFP:
-/* File: armv5te-vfp/TEMPLATE_ADD_DOUBLE_VFP.S */
-/* File: armv5te-vfp/fbinopWide.S */
-    /*
-     * Generic 64-bit floating point operation.  Provide an "instr" line that
-     * specifies an instruction that performs s2 = s0 op s1.
-     *
-     * On entry:
-     *     r0 = target dalvik register address
-     *     r1 = op1 address
-     *     r2 = op2 address
-     */
-     fldd    d0,[r1]
-     fldd    d1,[r2]
-     faddd   d2, d0, d1
-     fstd    d2,[r0]
-     bx      lr
-
-
-/* ------------------------------ */
-    .balign 4
-    .global dvmCompiler_TEMPLATE_SUB_DOUBLE_VFP
-dvmCompiler_TEMPLATE_SUB_DOUBLE_VFP:
-/* File: armv5te-vfp/TEMPLATE_SUB_DOUBLE_VFP.S */
-/* File: armv5te-vfp/fbinopWide.S */
-    /*
-     * Generic 64-bit floating point operation.  Provide an "instr" line that
-     * specifies an instruction that performs s2 = s0 op s1.
-     *
-     * On entry:
-     *     r0 = target dalvik register address
-     *     r1 = op1 address
-     *     r2 = op2 address
-     */
-     fldd    d0,[r1]
-     fldd    d1,[r2]
-     fsubd   d2, d0, d1
-     fstd    d2,[r0]
-     bx      lr
-
-
-/* ------------------------------ */
-    .balign 4
-    .global dvmCompiler_TEMPLATE_MUL_DOUBLE_VFP
-dvmCompiler_TEMPLATE_MUL_DOUBLE_VFP:
-/* File: armv5te-vfp/TEMPLATE_MUL_DOUBLE_VFP.S */
-/* File: armv5te-vfp/fbinopWide.S */
-    /*
-     * Generic 64-bit floating point operation.  Provide an "instr" line that
-     * specifies an instruction that performs s2 = s0 op s1.
-     *
-     * On entry:
-     *     r0 = target dalvik register address
-     *     r1 = op1 address
-     *     r2 = op2 address
-     */
-     fldd    d0,[r1]
-     fldd    d1,[r2]
-     fmuld   d2, d0, d1
-     fstd    d2,[r0]
-     bx      lr
-
-
-/* ------------------------------ */
-    .balign 4
-    .global dvmCompiler_TEMPLATE_DIV_DOUBLE_VFP
-dvmCompiler_TEMPLATE_DIV_DOUBLE_VFP:
-/* File: armv5te-vfp/TEMPLATE_DIV_DOUBLE_VFP.S */
-/* File: armv5te-vfp/fbinopWide.S */
-    /*
-     * Generic 64-bit floating point operation.  Provide an "instr" line that
-     * specifies an instruction that performs s2 = s0 op s1.
-     *
-     * On entry:
-     *     r0 = target dalvik register address
-     *     r1 = op1 address
-     *     r2 = op2 address
-     */
-     fldd    d0,[r1]
-     fldd    d1,[r2]
-     fdivd   d2, d0, d1
-     fstd    d2,[r0]
-     bx      lr
-
-
-/* ------------------------------ */
-    .balign 4
-    .global dvmCompiler_TEMPLATE_DOUBLE_TO_FLOAT_VFP
-dvmCompiler_TEMPLATE_DOUBLE_TO_FLOAT_VFP:
-/* File: armv5te-vfp/TEMPLATE_DOUBLE_TO_FLOAT_VFP.S */
-/* File: armv5te-vfp/funopNarrower.S */
-    /*
-     * Generic 64bit-to-32bit floating point unary operation.  Provide an
-     * "instr" line that specifies an instruction that performs "s0 = op d0".
-     *
-     * For: double-to-int, double-to-float
-     *
-     * On entry:
-     *     r0 = target dalvik register address
-     *     r1 = src dalvik register address
-     */
-    /* unop vA, vB */
-    fldd    d0, [r1]                    @ d0<- vB
-    fcvtsd  s0, d0                              @ s0<- op d0
-    fsts    s0, [r0]                    @ vA<- s0
-    bx      lr
-
-
-/* ------------------------------ */
-    .balign 4
-    .global dvmCompiler_TEMPLATE_DOUBLE_TO_INT_VFP
-dvmCompiler_TEMPLATE_DOUBLE_TO_INT_VFP:
-/* File: armv5te-vfp/TEMPLATE_DOUBLE_TO_INT_VFP.S */
-/* File: armv5te-vfp/funopNarrower.S */
-    /*
-     * Generic 64bit-to-32bit floating point unary operation.  Provide an
-     * "instr" line that specifies an instruction that performs "s0 = op d0".
-     *
-     * For: double-to-int, double-to-float
-     *
-     * On entry:
-     *     r0 = target dalvik register address
-     *     r1 = src dalvik register address
-     */
-    /* unop vA, vB */
-    fldd    d0, [r1]                    @ d0<- vB
-    ftosizd  s0, d0                              @ s0<- op d0
-    fsts    s0, [r0]                    @ vA<- s0
-    bx      lr
-
-
-/* ------------------------------ */
-    .balign 4
-    .global dvmCompiler_TEMPLATE_FLOAT_TO_DOUBLE_VFP
-dvmCompiler_TEMPLATE_FLOAT_TO_DOUBLE_VFP:
-/* File: armv5te-vfp/TEMPLATE_FLOAT_TO_DOUBLE_VFP.S */
-/* File: armv5te-vfp/funopWider.S */
-    /*
-     * Generic 32bit-to-64bit floating point unary operation.  Provide an
-     * "instr" line that specifies an instruction that performs "d0 = op s0".
-     *
-     * For: int-to-double, float-to-double
-     *
-     * On entry:
-     *     r0 = target dalvik register address
-     *     r1 = src dalvik register address
-     */
-    /* unop vA, vB */
-    flds    s0, [r1]                    @ s0<- vB
-    fcvtds  d0, s0                              @ d0<- op s0
-    fstd    d0, [r0]                    @ vA<- d0
-    bx      lr
-
-
-/* ------------------------------ */
-    .balign 4
-    .global dvmCompiler_TEMPLATE_FLOAT_TO_INT_VFP
-dvmCompiler_TEMPLATE_FLOAT_TO_INT_VFP:
-/* File: armv5te-vfp/TEMPLATE_FLOAT_TO_INT_VFP.S */
-/* File: armv5te-vfp/funop.S */
-    /*
-     * Generic 32bit-to-32bit floating point unary operation.  Provide an
-     * "instr" line that specifies an instruction that performs "s1 = op s0".
-     *
-     * For: float-to-int, int-to-float
-     *
-     * On entry:
-     *     r0 = target dalvik register address
-     *     r1 = src dalvik register address
-     */
-    /* unop vA, vB */
-    flds    s0, [r1]                    @ s0<- vB
-    ftosizs s1, s0                              @ s1<- op s0
-    fsts    s1, [r0]                    @ vA<- s1
-    bx      lr
-
-
-/* ------------------------------ */
-    .balign 4
-    .global dvmCompiler_TEMPLATE_INT_TO_DOUBLE_VFP
-dvmCompiler_TEMPLATE_INT_TO_DOUBLE_VFP:
-/* File: armv5te-vfp/TEMPLATE_INT_TO_DOUBLE_VFP.S */
-/* File: armv5te-vfp/funopWider.S */
-    /*
-     * Generic 32bit-to-64bit floating point unary operation.  Provide an
-     * "instr" line that specifies an instruction that performs "d0 = op s0".
-     *
-     * For: int-to-double, float-to-double
-     *
-     * On entry:
-     *     r0 = target dalvik register address
-     *     r1 = src dalvik register address
-     */
-    /* unop vA, vB */
-    flds    s0, [r1]                    @ s0<- vB
-    fsitod  d0, s0                              @ d0<- op s0
-    fstd    d0, [r0]                    @ vA<- d0
-    bx      lr
-
-
-/* ------------------------------ */
-    .balign 4
-    .global dvmCompiler_TEMPLATE_INT_TO_FLOAT_VFP
-dvmCompiler_TEMPLATE_INT_TO_FLOAT_VFP:
-/* File: armv5te-vfp/TEMPLATE_INT_TO_FLOAT_VFP.S */
-/* File: armv5te-vfp/funop.S */
-    /*
-     * Generic 32bit-to-32bit floating point unary operation.  Provide an
-     * "instr" line that specifies an instruction that performs "s1 = op s0".
-     *
-     * For: float-to-int, int-to-float
-     *
-     * On entry:
-     *     r0 = target dalvik register address
-     *     r1 = src dalvik register address
-     */
-    /* unop vA, vB */
-    flds    s0, [r1]                    @ s0<- vB
-    fsitos  s1, s0                              @ s1<- op s0
-    fsts    s1, [r0]                    @ vA<- s1
-    bx      lr
-
-
-/* ------------------------------ */
-    .balign 4
-    .global dvmCompiler_TEMPLATE_CMPG_DOUBLE_VFP
-dvmCompiler_TEMPLATE_CMPG_DOUBLE_VFP:
-/* File: armv5te-vfp/TEMPLATE_CMPG_DOUBLE_VFP.S */
-    /*
-     * Compare two floating-point values.  Puts 0, 1, or -1 into the
-     * destination register based on the results of the comparison.
-     *
-     * int compare(x, y) {
-     *     if (x == y) {
-     *         return 0;
-     *     } else if (x < y) {
-     *         return -1;
-     *     } else if (x > y) {
-     *         return 1;
-     *     } else {
-     *         return 1;
-     *     }
-     * }
-     *
-     * On entry:
-     *    r0 = &op1 [vBB]
-     *    r1 = &op2 [vCC]
-     */
-    /* op vAA, vBB, vCC */
-    fldd    d0, [r0]                    @ d0<- vBB
-    fldd    d1, [r1]                    @ d1<- vCC
-    fcmpd  d0, d1                       @ compare (vBB, vCC)
-    mov     r0, #1                      @ r0<- 1 (default)
-    fmstat                              @ export status flags
-    mvnmi   r0, #0                      @ (less than) r0<- -1
-    moveq   r0, #0                      @ (equal) r0<- 0
-    bx      lr
-
-/* ------------------------------ */
-    .balign 4
-    .global dvmCompiler_TEMPLATE_CMPL_DOUBLE_VFP
-dvmCompiler_TEMPLATE_CMPL_DOUBLE_VFP:
-/* File: armv5te-vfp/TEMPLATE_CMPL_DOUBLE_VFP.S */
-    /*
-     * Compare two floating-point values.  Puts 0, 1, or -1 into the
-     * destination register based on the results of the comparison.
-     *
-     * int compare(x, y) {
-     *     if (x == y) {
-     *         return 0;
-     *     } else if (x > y) {
-     *         return 1;
-     *     } else if (x < y) {
-     *         return -1;
-     *     } else {
-     *         return -1;
-     *     }
-     * }
-     * On entry:
-     *    r0 = &op1 [vBB]
-     *    r1 = &op2 [vCC]
-     */
-    /* op vAA, vBB, vCC */
-    fldd    d0, [r0]                    @ d0<- vBB
-    fldd    d1, [r1]                    @ d1<- vCC
-    fcmped  d0, d1                      @ compare (vBB, vCC)
-    mvn     r0, #0                      @ r0<- -1 (default)
-    fmstat                              @ export status flags
-    movgt   r0, #1                      @ (greater than) r0<- 1
-    moveq   r0, #0                      @ (equal) r0<- 0
-    bx      lr
-
-/* ------------------------------ */
-    .balign 4
-    .global dvmCompiler_TEMPLATE_CMPG_FLOAT_VFP
-dvmCompiler_TEMPLATE_CMPG_FLOAT_VFP:
-/* File: armv5te-vfp/TEMPLATE_CMPG_FLOAT_VFP.S */
-    /*
-     * Compare two floating-point values.  Puts 0, 1, or -1 into the
-     * destination register based on the results of the comparison.
-     *
-     * int compare(x, y) {
-     *     if (x == y) {
-     *         return 0;
-     *     } else if (x < y) {
-     *         return -1;
-     *     } else if (x > y) {
-     *         return 1;
-     *     } else {
-     *         return 1;
-     *     }
-     * }
-     * On entry:
-     *    r0 = &op1 [vBB]
-     *    r1 = &op2 [vCC]
-     */
-    /* op vAA, vBB, vCC */
-    flds    s0, [r0]                    @ d0<- vBB
-    flds    s1, [r1]                    @ d1<- vCC
-    fcmps  s0, s1                      @ compare (vBB, vCC)
-    mov     r0, #1                      @ r0<- 1 (default)
-    fmstat                              @ export status flags
-    mvnmi   r0, #0                      @ (less than) r0<- -1
-    moveq   r0, #0                      @ (equal) r0<- 0
-    bx      lr
-
-/* ------------------------------ */
-    .balign 4
-    .global dvmCompiler_TEMPLATE_CMPL_FLOAT_VFP
-dvmCompiler_TEMPLATE_CMPL_FLOAT_VFP:
-/* File: armv5te-vfp/TEMPLATE_CMPL_FLOAT_VFP.S */
-    /*
-     * Compare two floating-point values.  Puts 0, 1, or -1 into the
-     * destination register based on the results of the comparison.
-     *
-     * int compare(x, y) {
-     *     if (x == y) {
-     *         return 0;
-     *     } else if (x > y) {
-     *         return 1;
-     *     } else if (x < y) {
-     *         return -1;
-     *     } else {
-     *         return -1;
-     *     }
-     * }
-     * On entry:
-     *    r0 = &op1 [vBB]
-     *    r1 = &op2 [vCC]
-     */
-    /* op vAA, vBB, vCC */
-    flds    s0, [r0]                    @ d0<- vBB
-    flds    s1, [r1]                    @ d1<- vCC
-    fcmps  s0, s1                      @ compare (vBB, vCC)
-    mvn     r0, #0                      @ r0<- -1 (default)
-    fmstat                              @ export status flags
-    movgt   r0, #1                      @ (greater than) r0<- 1
-    moveq   r0, #0                      @ (equal) r0<- 0
-    bx      lr
-
-/* ------------------------------ */
-    .balign 4
-    .global dvmCompiler_TEMPLATE_SQRT_DOUBLE_VFP
-dvmCompiler_TEMPLATE_SQRT_DOUBLE_VFP:
-/* File: armv5te-vfp/TEMPLATE_SQRT_DOUBLE_VFP.S */
-    /*
-     * 64-bit floating point vfp sqrt operation.
-     * If the result is a NaN, bail out to library code to do
-     * the right thing.
-     *
-     * On entry:
-     *     r2 src addr of op1
-     * On exit:
-     *     r0,r1 = res
-     */
-    fldd    d0, [r2]
-    fsqrtd  d1, d0
-    fcmpd   d1, d1
-    fmstat
-    fmrrd   r0, r1, d1
-    bxeq    lr   @ Result OK - return
-    ldr     r2, .Lsqrt
-    fmrrd   r0, r1, d0   @ reload orig operand
-    bx      r2   @ tail call to sqrt library routine
-
-.Lsqrt:
-    .word   sqrt
-
-/* ------------------------------ */
-    .balign 4
     .global dvmCompiler_TEMPLATE_THROW_EXCEPTION_COMMON
 dvmCompiler_TEMPLATE_THROW_EXCEPTION_COMMON:
 /* File: armv5te/TEMPLATE_THROW_EXCEPTION_COMMON.S */
@@ -1108,9 +615,9 @@ dvmCompiler_TEMPLATE_MEM_OP_DECODE:
      */
     vpush   {d0-d15}                    @ save out all fp registers
     push    {r0-r12,lr}                 @ save out all registers
+    ldr     r2, .LdvmSelfVerificationMemOpDecode @ defined in footer.S
     mov     r0, lr                      @ arg0 <- link register
     mov     r1, sp                      @ arg1 <- stack pointer
-    ldr     r2, .LdvmSelfVerificationMemOpDecode @ defined in footer.S
     blx     r2                          @ decode and handle the mem op
     pop     {r0-r12,lr}                 @ restore all registers
     vpop    {d0-d15}                    @ restore all fp registers
@@ -1278,22 +785,23 @@ dvmCompiler_TEMPLATE_STRING_INDEXOF:
      *    r2:   Starting offset in string data
      */
 
+    ldr    r3, [r0, #STRING_FIELDOFF_VALUE]
     ldr    r7, [r0, #STRING_FIELDOFF_OFFSET]
     ldr    r8, [r0, #STRING_FIELDOFF_COUNT]
-    ldr    r0, [r0, #STRING_FIELDOFF_VALUE]
+
 
     /*
      * At this point, we have:
-     *    r0: object pointer
      *    r1: char to match
      *    r2: starting offset
+     *    r3: object pointer (final result -> r0)
      *    r7: offset
      *    r8: string length
      */
 
      /* Build pointer to start of string data */
-     add   r0, #16
-     add   r0, r0, r7, lsl #1
+     add   r3, #16
+     add   r0, r3, r7, lsl #1
 
      /* Save a copy of starting data in r7 */
      mov   r7, r0
@@ -1390,7 +898,7 @@ dvmCompiler_TEMPLATE_INTERPRET:
      *        r1 - the Dalvik PC to begin interpretation.
      *    else
      *        [lr, #3] contains Dalvik PC to begin interpretation
-     *    rGLUE - pointer to interpState
+     *    rSELF - pointer to thread
      *    rFP - Dalvik frame pointer
      */
     cmp     lr, #0
@@ -1430,11 +938,7 @@ dvmCompiler_TEMPLATE_MONITOR_ENTER:
     mov     r3, #0                       @ Record that we're not returning
     str     r3, [r0, #offThread_inJitCodeCache]
     blx     r2                           @ dvmLockObject(self, obj)
-    @ refresh Jit's on/off status
-    ldr     r0, [rGLUE, #offGlue_ppJitProfTable]
-    ldr     r0, [r0]
     ldr     r2, .LdvmJitToInterpNoChain
-    str     r0, [rGLUE, #offGlue_pJitProfTable]
     @ Bail to interpreter - no chain [note - r4 still contains rPC]
 #if defined(WITH_JIT_TUNING)
     mov     r0, #kHeavyweightMonitor
@@ -1461,12 +965,8 @@ dvmCompiler_TEMPLATE_MONITOR_ENTER_DEBUG:
     mov     r3, #0                       @ Record that we're not returning
     str     r3, [r0, #offThread_inJitCodeCache]
     blx     r2             @ dvmLockObject(self, obj)
-    @ refresh Jit's on/off status & test for exception
-    ldr     r0, [rGLUE, #offGlue_ppJitProfTable]
-    ldr     r1, [rGLUE, #offGlue_self]
-    ldr     r0, [r0]
-    ldr     r1, [r1, #offThread_exception]
-    str     r0, [rGLUE, #offGlue_pJitProfTable]
+    @ test for exception
+    ldr     r1, [rSELF, #offThread_exception]
     cmp     r1, #0
     beq     1f
     ldr     r2, .LhandleException
@@ -1479,6 +979,395 @@ dvmCompiler_TEMPLATE_MONITOR_ENTER_DEBUG:
 #endif
     ldr     pc, .LdvmJitToInterpNoChain
 
+/* ------------------------------ */
+    .balign 4
+    .global dvmCompiler_TEMPLATE_PERIODIC_PROFILING
+dvmCompiler_TEMPLATE_PERIODIC_PROFILING:
+/* File: armv5te/TEMPLATE_PERIODIC_PROFILING.S */
+    /*
+     * Increment profile counter for this trace, and decrement
+     * sample counter.  If sample counter goes below zero, turn
+     * off profiling.
+     *
+     * On entry
+     * (lr-11) is address of pointer to counter.  Note: the counter
+     *    actually exists 10 bytes before the return target, but because
+     *    we are arriving from thumb mode, lr will have its low bit set.
+     */
+     ldr    r0, [lr,#-11]
+     ldr    r1, [rSELF, #offThread_pProfileCountdown]
+     ldr    r2, [r0]                    @ get counter
+     ldr    r3, [r1]                    @ get countdown timer
+     add    r2, #1
+     subs   r2, #1
+     blt    .LTEMPLATE_PERIODIC_PROFILING_disable_profiling
+     str    r2, [r0]
+     str    r3, [r1]
+     bx     lr
+
+.LTEMPLATE_PERIODIC_PROFILING_disable_profiling:
+     mov    r4, lr                     @ preserve lr
+     ldr    r0, .LdvmJitTraceProfilingOff
+     blx    r0
+     bx     r4
+
+/* ------------------------------ */
+    .balign 4
+    .global dvmCompiler_TEMPLATE_RETURN_PROF
+dvmCompiler_TEMPLATE_RETURN_PROF:
+/* File: armv5te/TEMPLATE_RETURN_PROF.S */
+#define TEMPLATE_INLINE_PROFILING
+/* File: armv5te/TEMPLATE_RETURN.S */
+    /*
+     * Unwind a frame from the Dalvik stack for compiled OP_RETURN_XXX.
+     * If the stored value in returnAddr
+     * is non-zero, the caller is compiled by the JIT thus return to the
+     * address in the code cache following the invoke instruction. Otherwise
+     * return to the special dvmJitToInterpNoChain entry point.
+     */
+#if defined(TEMPLATE_INLINE_PROFILING)
+    stmfd   sp!, {r0-r2,lr}             @ preserve live registers
+    mov     r0, r6
+    @ r0=rSELF
+    ldr     ip, .LdvmFastMethodTraceExit
+    blx     ip
+    ldmfd   sp!, {r0-r2,lr}             @ restore live registers
+#endif
+    SAVEAREA_FROM_FP(r0, rFP)           @ r0<- saveArea (old)
+    ldr     r10, [r0, #offStackSaveArea_prevFrame] @ r10<- saveArea->prevFrame
+    ldrb    r8, [rSELF, #offThread_breakFlags] @ r8<- breakFlags
+    ldr     rPC, [r0, #offStackSaveArea_savedPc] @ rPC<- saveArea->savedPc
+    ldr     r2, [r10, #(offStackSaveArea_method - sizeofStackSaveArea)]
+#if !defined(WITH_SELF_VERIFICATION)
+    ldr     r9,  [r0, #offStackSaveArea_returnAddr] @ r9<- chaining cell ret
+#else
+    mov     r9, #0                      @ disable chaining
+#endif
+                                        @ r2<- method we're returning to
+    cmp     r2, #0                      @ break frame?
+#if !defined(WITH_SELF_VERIFICATION)
+    beq     1f                          @ bail to interpreter
+#else
+    blxeq   lr                          @ punt to interpreter and compare state
+#endif
+    ldr     r1, .LdvmJitToInterpNoChainNoProfile @ defined in footer.S
+    mov     rFP, r10                    @ publish new FP
+    ldr     r10, [r2, #offMethod_clazz] @ r10<- method->clazz
+
+    str     r2, [rSELF, #offThread_method]@ self->method = newSave->method
+    ldr     r0, [r10, #offClassObject_pDvmDex] @ r0<- method->clazz->pDvmDex
+    str     rFP, [rSELF, #offThread_curFrame] @ curFrame = fp
+    add     rPC, rPC, #6                @ publish new rPC (advance 6 bytes)
+    str     r0, [rSELF, #offThread_methodClassDex]
+    cmp     r8, #0                      @ check the break flags
+    movne   r9, #0                      @ clear the chaining cell address
+    str     r9, [rSELF, #offThread_inJitCodeCache] @ in code cache or not
+    cmp     r9, #0                      @ chaining cell exists?
+    blxne   r9                          @ jump to the chaining cell
+#if defined(WITH_JIT_TUNING)
+    mov     r0, #kCallsiteInterpreted
+#endif
+    mov     pc, r1                      @ callsite is interpreted
+1:
+    mov     r0, #0
+    str     r0, [rSELF, #offThread_inJitCodeCache] @ reset inJitCodeCache
+    stmia   rSELF, {rPC, rFP}           @ SAVE_PC_FP_TO_SELF()
+    ldr     r2, .LdvmMterpStdBail       @ defined in footer.S
+    mov     r0, rSELF                   @ Expecting rSELF in r0
+    blx     r2                          @ exit the interpreter
+
+#undef TEMPLATE_INLINE_PROFILING
+
+/* ------------------------------ */
+    .balign 4
+    .global dvmCompiler_TEMPLATE_INVOKE_METHOD_NO_OPT_PROF
+dvmCompiler_TEMPLATE_INVOKE_METHOD_NO_OPT_PROF:
+/* File: armv5te/TEMPLATE_INVOKE_METHOD_NO_OPT_PROF.S */
+#define TEMPLATE_INLINE_PROFILING
+/* File: armv5te/TEMPLATE_INVOKE_METHOD_NO_OPT.S */
+    /*
+     * For polymorphic callsites - setup the Dalvik frame and load Dalvik PC
+     * into rPC then jump to dvmJitToInterpNoChain to dispatch the
+     * runtime-resolved callee.
+     */
+    @ r0 = methodToCall, r1 = returnCell, rPC = dalvikCallsite
+    ldrh    r7, [r0, #offMethod_registersSize]  @ r7<- methodToCall->regsSize
+    ldrh    r2, [r0, #offMethod_outsSize]  @ r2<- methodToCall->outsSize
+    ldr     r9, [rSELF, #offThread_interpStackEnd]    @ r9<- interpStackEnd
+    ldrb    r8, [rSELF, #offThread_breakFlags] @ r8<- breakFlags
+    add     r3, r1, #1  @ Thumb addr is odd
+    SAVEAREA_FROM_FP(r1, rFP)           @ r1<- stack save area
+    sub     r1, r1, r7, lsl #2          @ r1<- newFp (old savearea - regsSize)
+    SAVEAREA_FROM_FP(r10, r1)           @ r10<- stack save area
+    sub     r10, r10, r2, lsl #2        @ r10<- bottom (newsave - outsSize)
+    cmp     r10, r9                     @ bottom < interpStackEnd?
+    bxlo    lr                          @ return to raise stack overflow excep.
+    @ r1 = newFP, r0 = methodToCall, r3 = returnCell, rPC = dalvikCallsite
+    ldr     r9, [r0, #offMethod_clazz]      @ r9<- method->clazz
+    ldr     r10, [r0, #offMethod_accessFlags] @ r10<- methodToCall->accessFlags
+    str     rPC, [rFP, #(offStackSaveArea_currentPc - sizeofStackSaveArea)]
+    str     rPC, [r1, #(offStackSaveArea_savedPc - sizeofStackSaveArea)]
+    ldr     rPC, [r0, #offMethod_insns]     @ rPC<- methodToCall->insns
+
+
+    @ set up newSaveArea
+    str     rFP, [r1, #(offStackSaveArea_prevFrame - sizeofStackSaveArea)]
+    str     r3, [r1, #(offStackSaveArea_returnAddr - sizeofStackSaveArea)]
+    str     r0, [r1, #(offStackSaveArea_method - sizeofStackSaveArea)]
+    cmp     r8, #0                      @ breakFlags != 0
+    bxne    lr                          @ bail to the interpreter
+    tst     r10, #ACC_NATIVE
+#if !defined(WITH_SELF_VERIFICATION)
+    bne     .LinvokeNative
+#else
+    bxne    lr                          @ bail to the interpreter
+#endif
+
+    ldr     r10, .LdvmJitToInterpTraceSelectNoChain
+    ldr     r3, [r9, #offClassObject_pDvmDex] @ r3<- method->clazz->pDvmDex
+
+    @ Update "thread" values for the new method
+    str     r0, [rSELF, #offThread_method]    @ self->method = methodToCall
+    str     r3, [rSELF, #offThread_methodClassDex] @ self->methodClassDex = ...
+    mov     rFP, r1                         @ fp = newFp
+    str     rFP, [rSELF, #offThread_curFrame]  @ curFrame = newFp
+#if defined(TEMPLATE_INLINE_PROFILING)
+    stmfd   sp!, {r0-r3}                    @ preserve r0-r3
+    mov     r1, r6
+    @ r0=methodToCall, r1=rSELF
+    ldr     ip, .LdvmFastMethodTraceEnter
+    blx     ip
+    ldmfd   sp!, {r0-r3}                    @ restore r0-r3
+#endif
+
+    @ Start executing the callee
+#if defined(WITH_JIT_TUNING)
+    mov     r0, #kInlineCacheMiss
+#endif
+    bx      r10                         @ dvmJitToInterpTraceSelectNoChain
+
+#undef TEMPLATE_INLINE_PROFILING
+
+/* ------------------------------ */
+    .balign 4
+    .global dvmCompiler_TEMPLATE_INVOKE_METHOD_CHAIN_PROF
+dvmCompiler_TEMPLATE_INVOKE_METHOD_CHAIN_PROF:
+/* File: armv5te/TEMPLATE_INVOKE_METHOD_CHAIN_PROF.S */
+#define TEMPLATE_INLINE_PROFILING
+/* File: armv5te/TEMPLATE_INVOKE_METHOD_CHAIN.S */
+    /*
+     * For monomorphic callsite, setup the Dalvik frame and return to the
+     * Thumb code through the link register to transfer control to the callee
+     * method through a dedicated chaining cell.
+     */
+    @ r0 = methodToCall, r1 = returnCell, r2 = methodToCall->outsSize
+    @ rPC = dalvikCallsite, r7 = methodToCall->registersSize
+    @ methodToCall is guaranteed to be non-native
+.LinvokeChainProf:
+    ldr     r9, [rSELF, #offThread_interpStackEnd]    @ r9<- interpStackEnd
+    ldrb    r8, [rSELF, #offThread_breakFlags]        @ r8<- breakFlags
+    add     r3, r1, #1  @ Thumb addr is odd
+    SAVEAREA_FROM_FP(r1, rFP)           @ r1<- stack save area
+    sub     r1, r1, r7, lsl #2          @ r1<- newFp (old savearea - regsSize)
+    SAVEAREA_FROM_FP(r10, r1)           @ r10<- stack save area
+    add     r12, lr, #2                 @ setup the punt-to-interp address
+    sub     r10, r10, r2, lsl #2        @ r10<- bottom (newsave - outsSize)
+    cmp     r10, r9                     @ bottom < interpStackEnd?
+    bxlo    r12                         @ return to raise stack overflow excep.
+    @ r1 = newFP, r0 = methodToCall, r3 = returnCell, rPC = dalvikCallsite
+    ldr     r9, [r0, #offMethod_clazz]      @ r9<- method->clazz
+    str     rPC, [rFP, #(offStackSaveArea_currentPc - sizeofStackSaveArea)]
+    str     rPC, [r1, #(offStackSaveArea_savedPc - sizeofStackSaveArea)]
+
+    @ set up newSaveArea
+    str     rFP, [r1, #(offStackSaveArea_prevFrame - sizeofStackSaveArea)]
+    str     r3, [r1, #(offStackSaveArea_returnAddr - sizeofStackSaveArea)]
+    str     r0, [r1, #(offStackSaveArea_method - sizeofStackSaveArea)]
+    cmp     r8, #0                      @ breakFlags != 0
+    bxne    r12                         @ bail to the interpreter
+
+    ldr     r3, [r9, #offClassObject_pDvmDex] @ r3<- method->clazz->pDvmDex
+
+    @ Update "thread" values for the new method
+    str     r0, [rSELF, #offThread_method]    @ self->method = methodToCall
+    str     r3, [rSELF, #offThread_methodClassDex] @ self->methodClassDex = ...
+    mov     rFP, r1                         @ fp = newFp
+    str     rFP, [rSELF, #offThread_curFrame]  @ curFrame = newFp
+#if defined(TEMPLATE_INLINE_PROFILING)
+    stmfd   sp!, {r0-r2,lr}             @ preserve clobbered live registers
+    mov     r1, r6
+    @ r0=methodToCall, r1=rSELF
+    ldr     ip, .LdvmFastMethodTraceEnter
+    blx     ip
+    ldmfd   sp!, {r0-r2,lr}             @ restore registers
+#endif
+
+    bx      lr                              @ return to the callee-chaining cell
+
+#undef TEMPLATE_INLINE_PROFILING
+
+/* ------------------------------ */
+    .balign 4
+    .global dvmCompiler_TEMPLATE_INVOKE_METHOD_PREDICTED_CHAIN_PROF
+dvmCompiler_TEMPLATE_INVOKE_METHOD_PREDICTED_CHAIN_PROF:
+/* File: armv5te/TEMPLATE_INVOKE_METHOD_PREDICTED_CHAIN_PROF.S */
+#define TEMPLATE_INLINE_PROFILING
+/* File: armv5te/TEMPLATE_INVOKE_METHOD_PREDICTED_CHAIN.S */
+    /*
+     * For polymorphic callsite, check whether the cached class pointer matches
+     * the current one. If so setup the Dalvik frame and return to the
+     * Thumb code through the link register to transfer control to the callee
+     * method through a dedicated chaining cell.
+     *
+     * The predicted chaining cell is declared in ArmLIR.h with the
+     * following layout:
+     *
+     *  typedef struct PredictedChainingCell {
+     *      u4 branch;
+     *      const ClassObject *clazz;
+     *      const Method *method;
+     *      u4 counter;
+     *  } PredictedChainingCell;
+     *
+     * Upon returning to the callsite:
+     *    - lr  : to branch to the chaining cell
+     *    - lr+2: to punt to the interpreter
+     *    - lr+4: to fully resolve the callee and may rechain.
+     *            r3 <- class
+     *            r9 <- counter
+     */
+    @ r0 = this, r1 = returnCell, r2 = predictedChainCell, rPC = dalvikCallsite
+    ldr     r3, [r0, #offObject_clazz]  @ r3 <- this->class
+    ldr     r8, [r2, #4]    @ r8 <- predictedChainCell->clazz
+    ldr     r0, [r2, #8]    @ r0 <- predictedChainCell->method
+    ldr     r9, [rSELF, #offThread_icRechainCount] @ r1 <- shared rechainCount
+    cmp     r3, r8          @ predicted class == actual class?
+#if defined(WITH_JIT_TUNING)
+    ldr     r7, .LdvmICHitCount
+#if defined(WORKAROUND_CORTEX_A9_745320)
+    /* Don't use conditional loads if the HW defect exists */
+    bne     101f
+    ldr     r10, [r7, #0]
+101:
+#else
+    ldreq   r10, [r7, #0]
+#endif
+    add     r10, r10, #1
+    streq   r10, [r7, #0]
+#endif
+    ldreqh  r7, [r0, #offMethod_registersSize]  @ r7<- methodToCall->regsSize
+    ldreqh  r2, [r0, #offMethod_outsSize]  @ r2<- methodToCall->outsSize
+    beq     .LinvokeChainProf   @ predicted chain is valid
+    ldr     r7, [r3, #offClassObject_vtable] @ r7 <- this->class->vtable
+    cmp     r8, #0          @ initialized class or not
+    moveq   r1, #0
+    subne   r1, r9, #1      @ count--
+    strne   r1, [rSELF, #offThread_icRechainCount]  @ write back to thread
+    add     lr, lr, #4      @ return to fully-resolve landing pad
+    /*
+     * r1 <- count
+     * r2 <- &predictedChainCell
+     * r3 <- this->class
+     * r4 <- dPC
+     * r7 <- this->class->vtable
+     */
+    bx      lr
+
+#undef TEMPLATE_INLINE_PROFILING
+
+/* ------------------------------ */
+    .balign 4
+    .global dvmCompiler_TEMPLATE_INVOKE_METHOD_NATIVE_PROF
+dvmCompiler_TEMPLATE_INVOKE_METHOD_NATIVE_PROF:
+/* File: armv5te/TEMPLATE_INVOKE_METHOD_NATIVE_PROF.S */
+#define TEMPLATE_INLINE_PROFILING
+/* File: armv5te/TEMPLATE_INVOKE_METHOD_NATIVE.S */
+    @ r0 = methodToCall, r1 = returnCell, rPC = dalvikCallsite
+    @ r7 = methodToCall->registersSize
+    ldr     r9, [rSELF, #offThread_interpStackEnd]    @ r9<- interpStackEnd
+    ldrb    r8, [rSELF, #offThread_breakFlags]        @ r8<- breakFlags
+    add     r3, r1, #1  @ Thumb addr is odd
+    SAVEAREA_FROM_FP(r1, rFP)           @ r1<- stack save area
+    sub     r1, r1, r7, lsl #2          @ r1<- newFp (old savearea - regsSize)
+    SAVEAREA_FROM_FP(r10, r1)           @ r10<- stack save area
+    cmp     r10, r9                     @ bottom < interpStackEnd?
+    bxlo    lr                          @ return to raise stack overflow excep.
+    @ r1 = newFP, r0 = methodToCall, r3 = returnCell, rPC = dalvikCallsite
+    str     rPC, [rFP, #(offStackSaveArea_currentPc - sizeofStackSaveArea)]
+    str     rPC, [r1, #(offStackSaveArea_savedPc - sizeofStackSaveArea)]
+
+    @ set up newSaveArea
+    str     rFP, [r1, #(offStackSaveArea_prevFrame - sizeofStackSaveArea)]
+    str     r3, [r1, #(offStackSaveArea_returnAddr - sizeofStackSaveArea)]
+    str     r0, [r1, #(offStackSaveArea_method - sizeofStackSaveArea)]
+    cmp     r8, #0                      @ breakFlags != 0
+    ldr     r8, [r0, #offMethod_nativeFunc] @ r8<- method->nativeFunc
+#if !defined(WITH_SELF_VERIFICATION)
+    bxne    lr                          @ bail to the interpreter
+#else
+    bx      lr                          @ bail to interpreter unconditionally
+#endif
+
+    @ go ahead and transfer control to the native code
+    ldr     r9, [rSELF, #offThread_jniLocal_topCookie]@r9<-thread->localRef->...
+    mov     r2, #0
+    str     r1, [rSELF, #offThread_curFrame]   @ curFrame = newFp
+    str     r2, [rSELF, #offThread_inJitCodeCache] @ not in the jit code cache
+    str     r9, [r1, #(offStackSaveArea_localRefCookie - sizeofStackSaveArea)]
+                                        @ newFp->localRefCookie=top
+    SAVEAREA_FROM_FP(r10, r1)           @ r10<- new stack save area
+
+    mov     r2, r0                        @ arg2<- methodToCall
+    mov     r0, r1                        @ arg0<- newFP
+    add     r1, rSELF, #offThread_retval  @ arg1<- &retval
+    mov     r3, rSELF                     @ arg3<- self
+#if defined(TEMPLATE_INLINE_PROFILING)
+    @ r2=methodToCall, r6=rSELF
+    stmfd   sp!, {r2,r6}                @ to be consumed after JNI return
+    stmfd   sp!, {r0-r3}                @ preserve r0-r3
+    mov     r0, r2
+    mov     r1, r6
+    @ r0=JNIMethod, r1=rSELF
+    ldr     ip, .LdvmFastMethodTraceEnter
+    blx     ip
+    ldmfd   sp!, {r0-r3}                @ restore r0-r3
+#endif
+
+    blx     r8                          @ off to the native code
+
+#if defined(TEMPLATE_INLINE_PROFILING)
+    ldmfd   sp!, {r0-r1}                @ restore r2 and r6
+    @ r0=JNIMethod, r1=rSELF
+    ldr     ip, .LdvmFastNativeMethodTraceExit
+    blx     ip
+#endif
+    @ native return; r10=newSaveArea
+    @ equivalent to dvmPopJniLocals
+    ldr     r2, [r10, #offStackSaveArea_returnAddr] @ r2 = chaining cell ret
+    ldr     r0, [r10, #offStackSaveArea_localRefCookie] @ r0<- saved->top
+    ldr     r1, [rSELF, #offThread_exception] @ check for exception
+    str     rFP, [rSELF, #offThread_curFrame]  @ curFrame = fp
+    cmp     r1, #0                      @ null?
+    str     r0, [rSELF, #offThread_jniLocal_topCookie] @ new top <- old top
+    ldr     r0, [rFP, #(offStackSaveArea_currentPc - sizeofStackSaveArea)]
+
+    @ r0 = dalvikCallsitePC
+    bne     .LhandleException           @ no, handle exception
+
+    str     r2, [rSELF, #offThread_inJitCodeCache] @ set the mode properly
+    cmp     r2, #0                      @ return chaining cell still exists?
+    bxne    r2                          @ yes - go ahead
+
+    @ continue executing the next instruction through the interpreter
+    ldr     r1, .LdvmJitToInterpTraceSelectNoChain @ defined in footer.S
+    add     rPC, r0, #6                 @ reconstruct new rPC (advance 6 bytes)
+#if defined(WITH_JIT_TUNING)
+    mov     r0, #kCallsiteInterpreted
+#endif
+    mov     pc, r1
+
+#undef TEMPLATE_INLINE_PROFILING
+
     .size   dvmCompilerTemplateStart, .-dvmCompilerTemplateStart
 /* File: armv5te/footer.S */
 /*
@@ -1487,59 +1376,61 @@ dvmCompiler_TEMPLATE_MONITOR_ENTER_DEBUG:
  * ===========================================================================
  */
 
-    .text
+    .section .data.rel.ro
     .align  2
 .LinvokeNative:
     @ Prep for the native call
     @ r1 = newFP, r0 = methodToCall
-    ldr     r3, [rGLUE, #offGlue_self]      @ r3<- glue->self
     mov     r2, #0
-    ldr     r9, [r3, #offThread_jniLocal_topCookie] @ r9<- thread->localRef->...
-    str     r2, [r3, #offThread_inJitCodeCache] @ not in jit code cache
-    str     r1, [r3, #offThread_curFrame]   @ self->curFrame = newFp
+    ldr     r9, [rSELF, #offThread_jniLocal_topCookie]@r9<-thread->localRef->...
+    str     r2, [rSELF, #offThread_inJitCodeCache] @ not in jit code cache
+    str     r1, [rSELF, #offThread_curFrame]   @ curFrame = newFp
     str     r9, [r1, #(offStackSaveArea_localRefCookie - sizeofStackSaveArea)]
                                         @ newFp->localRefCookie=top
-    mov     r9, r3                      @ r9<- glue->self (preserve)
+    ldrh    lr, [rSELF, #offThread_subMode]
     SAVEAREA_FROM_FP(r10, r1)           @ r10<- new stack save area
 
     mov     r2, r0                      @ r2<- methodToCall
     mov     r0, r1                      @ r0<- newFP
-    add     r1, rGLUE, #offGlue_retval  @ r1<- &retval
-#if defined(WITH_INLINE_PROFILING)
-    @ r2: methodToCall, r6: rGLUE
+    add     r1, rSELF, #offThread_retval  @ r1<- &retval
+    mov     r3, rSELF                   @ arg3<- self
+    ands    lr, #kSubModeMethodTrace
+    beq     121f                        @ hop if not profiling
+    @ r2: methodToCall, r6: rSELF
     stmfd   sp!, {r2,r6}
     stmfd   sp!, {r0-r3}
     mov     r0, r2
     mov     r1, r6
-    LDR_PC_LR ".LdvmFastMethodTraceEnter"
+    ldr     ip, .LdvmFastMethodTraceEnter
+    blx     ip
     ldmfd   sp!, {r0-r3}
-#endif
 
-    LDR_PC_LR "[r2, #offMethod_nativeFunc]"
+    ldr     ip, [r2, #offMethod_nativeFunc]
+    blx     ip
 
-#if defined(WITH_INLINE_PROFILING)
     ldmfd   sp!, {r0-r1}
-    LDR_PC_LR ".LdvmFastNativeMethodTraceExit"
-#endif
-    @ Refresh Jit's on/off status
-    ldr     r3, [rGLUE, #offGlue_ppJitProfTable]
-
-    @ native return; r9=self, r10=newSaveArea
+    ldr     ip, .LdvmFastNativeMethodTraceExit
+    blx     ip
+    b       212f
+121:
+    ldr     ip, [r2, #offMethod_nativeFunc]
+    blx     ip
+212:
+
+    @ native return; r10=newSaveArea
     @ equivalent to dvmPopJniLocals
     ldr     r2, [r10, #offStackSaveArea_returnAddr] @ r2 = chaining cell ret
     ldr     r0, [r10, #offStackSaveArea_localRefCookie] @ r0<- saved->top
-    ldr     r1, [r9, #offThread_exception] @ check for exception
-    ldr     r3, [r3]    @ r1 <- pointer to Jit profile table
-    str     rFP, [r9, #offThread_curFrame]  @ self->curFrame = fp
+    ldr     r1, [rSELF, #offThread_exception] @ check for exception
+    str     rFP, [rSELF, #offThread_curFrame]  @ curFrame = fp
     cmp     r1, #0                      @ null?
-    str     r0, [r9, #offThread_jniLocal_topCookie] @ new top <- old top
+    str     r0, [rSELF, #offThread_jniLocal_topCookie] @ new top <- old top
     ldr     r0, [r10, #offStackSaveArea_savedPc] @ reload rPC
-    str     r3, [rGLUE, #offGlue_pJitProfTable]  @ cache current JitProfTable
 
     @ r0 = dalvikCallsitePC
     bne     .LhandleException           @ no, handle exception
 
-    str     r2, [r9, #offThread_inJitCodeCache] @ set the new mode
+    str     r2, [rSELF, #offThread_inJitCodeCache] @ set the new mode
     cmp     r2, #0                      @ return chaining cell still exists?
     bxne    r2                          @ yes - go ahead
 
@@ -1549,7 +1440,7 @@ dvmCompiler_TEMPLATE_MONITOR_ENTER_DEBUG:
 #if defined(WITH_JIT_TUNING)
     mov     r0, #kCallsiteInterpreted
 #endif
-    mov     pc, r1
+    bx      r1
 
 /*
  * On entry:
@@ -1561,13 +1452,12 @@ dvmCompiler_TEMPLATE_MONITOR_ENTER_DEBUG:
 .LdeadFood:
     .word   0xdeadf00d
 #endif
-    ldr     r3, [rGLUE, #offGlue_self]  @ r3<- glue->self
     mov     r2, #0
-    str     r2, [r3, #offThread_inJitCodeCache] @ in interpreter land
+    str     r2, [rSELF, #offThread_inJitCodeCache] @ in interpreter land
     ldr     r1, .LdvmMterpCommonExceptionThrown @ PIC way of getting &func
     ldr     rIBASE, .LdvmAsmInstructionStart    @ same as above
     mov     rPC, r0                 @ reload the faulting Dalvik address
-    mov     pc, r1                  @ branch to dvmMterpCommonExceptionThrown
+    bx      r1                  @ branch to dvmMterpCommonExceptionThrown
 
     .align  2
 .LdvmAsmInstructionStart:
@@ -1584,6 +1474,8 @@ dvmCompiler_TEMPLATE_MONITOR_ENTER_DEBUG:
     .word   dvmMterpCommonExceptionThrown
 .LdvmLockObject:
     .word   dvmLockObject
+.LdvmJitTraceProfilingOff:
+    .word   dvmJitTraceProfilingOff
 #if defined(WITH_JIT_TUNING)
 .LdvmICHitCount:
     .word   gDvmICHitCount
@@ -1592,21 +1484,19 @@ dvmCompiler_TEMPLATE_MONITOR_ENTER_DEBUG:
 .LdvmSelfVerificationMemOpDecode:
     .word   dvmSelfVerificationMemOpDecode
 #endif
-#if defined(WITH_INLINE_PROFILING)
 .LdvmFastMethodTraceEnter:
     .word   dvmFastMethodTraceEnter
 .LdvmFastNativeMethodTraceExit:
     .word   dvmFastNativeMethodTraceExit
-.LdvmFastJavaMethodTraceExit:
-    .word   dvmFastJavaMethodTraceExit
-#endif
+.LdvmFastMethodTraceExit:
+    .word   dvmFastMethodTraceExit
 .L__aeabi_cdcmple:
     .word   __aeabi_cdcmple
 .L__aeabi_cfcmple:
     .word   __aeabi_cfcmple
 
-    .global dmvCompilerTemplateEnd
-dmvCompilerTemplateEnd:
+    .global dvmCompilerTemplateEnd
+dvmCompilerTemplateEnd:
 
 #endif /* WITH_JIT */