OSDN Git Service

Implement SSA-based loop optimizations.
authorBen Cheng <bccheng@android.com>
Mon, 24 Aug 2009 23:32:22 +0000 (16:32 -0700)
committerBen Cheng <bccheng@android.com>
Tue, 1 Sep 2009 18:08:35 +0000 (11:08 -0700)
For traces of simple natural loops (ie no invokes/side exits) null and range
checks will be hoisted in to entry block.

For acyclic traces SSA representation will be formed but no optimizations are
 applied (for now).

SSA representation will be printed with the normal verbose output. For example:

D/dalvikvm( 1248): Dumping LIR insns
D/dalvikvm( 1248): installed code is at 0x428559d4
D/dalvikvm( 1248): total size is 324 bytes
D/dalvikvm( 1248): 0x428559d4 (0000): data    0x012c(300)
D/dalvikvm( 1248): -------- entry offset: 0x002b
D/dalvikvm( 1248): -------- MIR_OP_NULL_N_RANGE_UP_CHECK
D/dalvikvm( 1248): 0x428559d6 (0002): ldr     r0, [r5, #36]
D/dalvikvm( 1248): 0x428559d8 (0004): ldr     r1, [r5, #12]
D/dalvikvm( 1248): 0x428559da (0006): cbz     r0,0x42855a06
D/dalvikvm( 1248): 0x428559dc (0008): ldr     r0, [r0, #8]
D/dalvikvm( 1248): 0x428559de (000a): subs    r1, #1
D/dalvikvm( 1248): 0x428559e0 (000c): cmp     r1, r0
D/dalvikvm( 1248): 0x428559e2 (000e): bge     0x42855a06
D/dalvikvm( 1248): -------- MIR_OP_NULL_N_RANGE_UP_CHECK
D/dalvikvm( 1248): 0x428559e4 (0010): ldr     r0, [r5, #40]
D/dalvikvm( 1248): 0x428559e6 (0012): ldr     r1, [r5, #12]
D/dalvikvm( 1248): 0x428559e8 (0014): cbz     r0,0x42855a06
D/dalvikvm( 1248): 0x428559ea (0016): ldr     r0, [r0, #8]
D/dalvikvm( 1248): 0x428559ec (0018): subs    r1, #1
D/dalvikvm( 1248): 0x428559ee (001a): cmp     r1, r0
D/dalvikvm( 1248): 0x428559f0 (001c): bge     0x42855a06
D/dalvikvm( 1248): -------- MIR_OP_NULL_N_RANGE_UP_CHECK
D/dalvikvm( 1248): 0x428559f2 (001e): ldr     r0, [r5, #32]
D/dalvikvm( 1248): 0x428559f4 (0020): ldr     r1, [r5, #12]
D/dalvikvm( 1248): 0x428559f6 (0022): cbz     r0,0x42855a06
D/dalvikvm( 1248): 0x428559f8 (0024): ldr     r0, [r0, #8]
D/dalvikvm( 1248): 0x428559fa (0026): cmp     r1, r0
D/dalvikvm( 1248): 0x428559fc (0028): bge     0x42855a06
D/dalvikvm( 1248): -------- MIR_OP_LOWER_BOUND_CHECK
D/dalvikvm( 1248): 0x428559fe (002a): ldr     r0, [r5, #44]
D/dalvikvm( 1248): 0x42855a00 (002c): cmp     r0, #1
D/dalvikvm( 1248): 0x42855a02 (002e): blt     0x42855a06
D/dalvikvm( 1248): 0x42855a04 (0030): b       0x42855a08
D/dalvikvm( 1248): 0x42855a06 (0032): b       0x42855af0
D/dalvikvm( 1248): L0x002b:
D/dalvikvm( 1248): -------- MIR_OP_PHI
D/dalvikvm( 1248): -------- s20(v11_1) <- s11(v11_0) s46(v11_2)
D/dalvikvm( 1248): -------- dalvik offset: 0x002b @ aget-wide
D/dalvikvm( 1248): -------- s21(v12_1) s22(v13_1) <- s9(v9_0) s20(v11_1)
D/dalvikvm( 1248): 0x42855a08 (0034): ldr     r2, [r5, #36]
D/dalvikvm( 1248): 0x42855a0a (0036): ldr     r3, [r5, #44]
D/dalvikvm( 1248): 0x42855a0c (0038): adds    r2, r2, #16
D/dalvikvm( 1248): 0x42855a0e (003a): lsls    r3, r3, #3
D/dalvikvm( 1248): 0x42855a10 (003c): ldr     r0, [r2, r3]
D/dalvikvm( 1248): 0x42855a12 (003e): adds    r2, r2, #4
D/dalvikvm( 1248): 0x42855a14 (0040): ldr     r1, [r2, r3]
D/dalvikvm( 1248): -------- dalvik offset: 0x002d @ aget-wide
D/dalvikvm( 1248): -------- s23(v14_1) s24(v15_1) <- s10(v10_0) s20(v11_1)
D/dalvikvm( 1248): 0x42855a16 (0042): ldr     r3, [r5, #40]
D/dalvikvm( 1248): 0x42855a18 (0044): str     r0, [r5, #48]
D/dalvikvm( 1248): 0x42855a1a (0046): ldr     r0, [r5, #44]
D/dalvikvm( 1248): 0x42855a1c (0048): adds    r3, r3, #16
D/dalvikvm( 1248): 0x42855a1e (004a): lsls    r0, r0, #3
D/dalvikvm( 1248): 0x42855a20 (004c): str     r1, [r5, #52]
D/dalvikvm( 1248): 0x42855a22 (004e): ldr     r1, [r3, r0]
D/dalvikvm( 1248): 0x42855a24 (0050): adds    r3, r3, #4
D/dalvikvm( 1248): 0x42855a26 (0052): ldr     r2, [r3, r0]
D/dalvikvm( 1248): -------- dalvik offset: 0x002f @ add-double/2addr
D/dalvikvm( 1248): -------- s25(v12_2) s26(v13_2) <- s21(v12_1) s22(v13_1) s23(v14_1) s24(v15_1)
D/dalvikvm( 1248): 0x42855a28 (0054): str     r1, [r5, #56]
D/dalvikvm( 1248): 0x42855a2a (0056): str     r2, [r5, #60]
D/dalvikvm( 1248): 0x42855a2c (0058): vldr    d1, [r5, #48]
D/dalvikvm( 1248): 0x42855a30 (005c): vldr    d2, [r5, #56]
D/dalvikvm( 1248): 0x42855a34 (0060): vadd    d0, d1, d2
D/dalvikvm( 1248): -------- dalvik offset: 0x0030 @ const/4
D/dalvikvm( 1248): -------- s27(v14_2) <-
D/dalvikvm( 1248): 0x42855a38 (0064): movs    r2, #1
D/dalvikvm( 1248): -------- dalvik offset: 0x0031 @ sub-int
D/dalvikvm( 1248): -------- s28(v14_3) <- s20(v11_1) s27(v14_2)
D/dalvikvm( 1248): 0x42855a3a (0066): ldr     r3, [r5, #44]
D/dalvikvm( 1248): 0x42855a3c (0068): subs    r0, r3, r2
D/dalvikvm( 1248): -------- dalvik offset: 0x0033 @ aget-wide
D/dalvikvm( 1248): -------- s29(v14_4) s30(v15_2) <- s8(v8_0) s28(v14_3)
D/dalvikvm( 1248): 0x42855a3e (006a): ldr     r3, [r5, #32]
D/dalvikvm( 1248): 0x42855a40 (006c): adds    r3, r3, #16
D/dalvikvm( 1248): 0x42855a42 (006e): str     r0, [r5, #56]
D/dalvikvm( 1248): 0x42855a44 (0070): lsls    r0, r0, #3
D/dalvikvm( 1248): 0x42855a46 (0072): vstr    d0, [r5, #48]
D/dalvikvm( 1248): 0x42855a4a (0076): ldr     r1, [r3, r0]
D/dalvikvm( 1248): 0x42855a4c (0078): adds    r3, r3, #4
D/dalvikvm( 1248): 0x42855a4e (007a): ldr     r2, [r3, r0]
D/dalvikvm( 1248): -------- dalvik offset: 0x0035 @ add-double/2addr
D/dalvikvm( 1248): -------- s31(v12_3) s32(v13_3) <- s25(v12_2) s26(v13_2) s29(v14_4) s30(v15_2)
D/dalvikvm( 1248): 0x42855a50 (007c): str     r1, [r5, #56]
D/dalvikvm( 1248): 0x42855a52 (007e): str     r2, [r5, #60]
D/dalvikvm( 1248): 0x42855a54 (0080): vldr    d1, [r5, #48]
D/dalvikvm( 1248): 0x42855a58 (0084): vldr    d2, [r5, #56]
D/dalvikvm( 1248): 0x42855a5c (0088): vadd    d0, d1, d2
D/dalvikvm( 1248): -------- dalvik offset: 0x0036 @ add-int/lit8
D/dalvikvm( 1248): -------- s33(v14_5) <- s20(v11_1)
D/dalvikvm( 1248): 0x42855a60 (008c): ldr     r2, [r5, #44]
D/dalvikvm( 1248): 0x42855a62 (008e): adds    r2, r2, #1
D/dalvikvm( 1248): -------- dalvik offset: 0x0038 @ aget-wide
D/dalvikvm( 1248): -------- s34(v14_6) s35(v15_3) <- s8(v8_0) s33(v14_5)
D/dalvikvm( 1248): 0x42855a64 (0090): ldr     r1, [r5, #32]
D/dalvikvm( 1248): 0x42855a66 (0092): adds    r1, r1, #16
D/dalvikvm( 1248): 0x42855a68 (0094): str     r2, [r5, #56]
D/dalvikvm( 1248): 0x42855a6a (0096): lsls    r2, r2, #3
D/dalvikvm( 1248): 0x42855a6c (0098): vstr    d0, [r5, #48]
D/dalvikvm( 1248): 0x42855a70 (009c): ldr     r3, [r1, r2]
D/dalvikvm( 1248): 0x42855a72 (009e): adds    r1, r1, #4
D/dalvikvm( 1248): 0x42855a74 (00a0): ldr     r0, [r1, r2]
D/dalvikvm( 1248): -------- dalvik offset: 0x003a @ add-double/2addr
D/dalvikvm( 1248): -------- s36(v12_4) s37(v13_4) <- s31(v12_3) s32(v13_3) s34(v14_6) s35(v15_3)
D/dalvikvm( 1248): 0x42855a76 (00a2): str     r3, [r5, #56]
D/dalvikvm( 1248): 0x42855a78 (00a4): str     r0, [r5, #60]
D/dalvikvm( 1248): 0x42855a7a (00a6): vldr    d1, [r5, #48]
D/dalvikvm( 1248): 0x42855a7e (00aa): vldr    d2, [r5, #56]
D/dalvikvm( 1248): 0x42855a82 (00ae): vadd    d0, d1, d2
D/dalvikvm( 1248): 0x42855a86 (00b2): vstr    d0, [r5, #48]
D/dalvikvm( 1248): -------- dalvik offset: 0x003b @ mul-double/2addr
D/dalvikvm( 1248): -------- s38(v12_5) s39(v13_5) <- s36(v12_4) s37(v13_4) s4(v4_0) s5(v5_0)
D/dalvikvm( 1248): 0x42855a8a (00b6): vmov.f64 s2, s0
D/dalvikvm( 1248): 0x42855a8e (00ba): vldr    d2, [r5, #16]
D/dalvikvm( 1248): 0x42855a92 (00be): vmuld   d0, d1, d2
D/dalvikvm( 1248): -------- dalvik offset: 0x003c @ aget-wide
D/dalvikvm( 1248): -------- s40(v14_7) s41(v15_4) <- s8(v8_0) s20(v11_1)
D/dalvikvm( 1248): 0x42855a96 (00c2): ldr     r2, [r5, #32]
D/dalvikvm( 1248): 0x42855a98 (00c4): ldr     r3, [r5, #44]
D/dalvikvm( 1248): 0x42855a9a (00c6): adds    r2, r2, #16
D/dalvikvm( 1248): 0x42855a9c (00c8): lsls    r3, r3, #3
D/dalvikvm( 1248): 0x42855a9e (00ca): vstr    d0, [r5, #48]
D/dalvikvm( 1248): 0x42855aa2 (00ce): ldr     r0, [r2, r3]
D/dalvikvm( 1248): 0x42855aa4 (00d0): adds    r2, r2, #4
D/dalvikvm( 1248): 0x42855aa6 (00d2): ldr     r1, [r2, r3]
D/dalvikvm( 1248): 0x42855aa8 (00d4): str     r0, [r5, #56]
D/dalvikvm( 1248): 0x42855aaa (00d6): str     r1, [r5, #60]
D/dalvikvm( 1248): -------- dalvik offset: 0x003e @ mul-double
D/dalvikvm( 1248): -------- s42(v14_8) s43(v15_5) <- s40(v14_7) s41(v15_4) s16(v16_0) s17(v17_0)
D/dalvikvm( 1248): 0x42855aac (00d8): vldr    d1, [r5, #56]
D/dalvikvm( 1248): 0x42855ab0 (00dc): vldr    d2, [r5, #64]
D/dalvikvm( 1248): 0x42855ab4 (00e0): vmuld   d0, d1, d2
D/dalvikvm( 1248): 0x42855ab8 (00e4): vstr    d0, [r5, #56]
D/dalvikvm( 1248): -------- dalvik offset: 0x0040 @ add-double/2addr
D/dalvikvm( 1248): -------- s44(v12_6) s45(v13_6) <- s38(v12_5) s39(v13_5) s42(v14_8) s43(v15_5)
D/dalvikvm( 1248): 0x42855abc (00e8): vldr    d1, [r5, #48]
D/dalvikvm( 1248): 0x42855ac0 (00ec): vldr    d2, [r5, #56]
D/dalvikvm( 1248): 0x42855ac4 (00f0): vadd    d0, d1, d2
D/dalvikvm( 1248): 0x42855ac8 (00f4): vstr    d0, [r5, #48]
D/dalvikvm( 1248): -------- dalvik offset: 0x0041 @ aput-wide
D/dalvikvm( 1248): -------- s44(v12_6) s45(v13_6) s8(v8_0) s20(v11_1)
D/dalvikvm( 1248): 0x42855acc (00f8): ldr     r3, [r5, #32]
D/dalvikvm( 1248): 0x42855ace (00fa): ldr     r0, [r5, #44]
D/dalvikvm( 1248): 0x42855ad0 (00fc): adds    r3, r3, #16
D/dalvikvm( 1248): 0x42855ad2 (00fe): ldr     r1, [r5, #48]
D/dalvikvm( 1248): 0x42855ad4 (0100): ldr     r2, [r5, #52]
D/dalvikvm( 1248): 0x42855ad6 (0102): lsls    r0, r0, #3
D/dalvikvm( 1248): 0x42855ad8 (0104): str     r1, [r3, r0]
D/dalvikvm( 1248): 0x42855ada (0106): adds    r3, r3, #4
D/dalvikvm( 1248): 0x42855adc (0108): str     r2, [r3, r0]
D/dalvikvm( 1248): -------- dalvik offset: 0x0043 @ add-int/lit8
D/dalvikvm( 1248): -------- s46(v11_2) <- s20(v11_1)
D/dalvikvm( 1248): 0x42855ade (010a): ldr     r2, [r5, #44]
D/dalvikvm( 1248): 0x42855ae0 (010c): adds    r2, r2, #1
D/dalvikvm( 1248): 0x42855ae2 (010e): str     r2, [r5, #44]
D/dalvikvm( 1248): -------- dalvik offset: 0x0045 @ goto
D/dalvikvm( 1248): --------
D/dalvikvm( 1248): L0x0029:
D/dalvikvm( 1248): -------- dalvik offset: 0x0029 @ if-ge
D/dalvikvm( 1248): -------- s46(v11_2) s3(v3_0)
D/dalvikvm( 1248): 0x42855ae4 (0110): ldr     r0, [r5, #44]
D/dalvikvm( 1248): 0x42855ae6 (0112): ldr     r1, [r5, #12]
D/dalvikvm( 1248): 0x42855ae8 (0114): cmp     r0, r1
D/dalvikvm( 1248): 0x42855aea (0116): bge     0x42855aee
D/dalvikvm( 1248): 0x42855aec (0118): b       0x42855a08
D/dalvikvm( 1248): -------- exit offset: 0x0046
D/dalvikvm( 1248): 0x42855aee (011a): b       0x42855af8
D/dalvikvm( 1248): -------- reconstruct dalvik PC : 0x42a644d6 @ +0x002b
D/dalvikvm( 1248): 0x42855af0 (011c): ldr     r0, [pc, #32]
D/dalvikvm( 1248): Exception_Handling:
D/dalvikvm( 1248): 0x42855af2 (011e): ldr     r1, [r6, #84]
D/dalvikvm( 1248): 0x42855af4 (0120): blx     r1
D/dalvikvm( 1248): 0x42855af6 (0122): .align4
D/dalvikvm( 1248): -------- chaining cell (normal): 0x0046
D/dalvikvm( 1248): 0x42855af8 (0124): ldr     r0, [r6, #76]
D/dalvikvm( 1248): 0x42855afa (0126): blx     r0
D/dalvikvm( 1248): 0x42855afc (0128): data    0x450c(17676)
D/dalvikvm( 1248): 0x42855afe (012a): data    0x42a6(17062)
D/dalvikvm( 1248): 0x42855b14 (0140): .word (0x42a644d6)
D/dalvikvm( 1248): End Ljnt/scimark2/SOR;execute, 18 Dalvik instructions

19 files changed:
vm/Dvm.mk
vm/Misc.c
vm/Misc.h
vm/compiler/Compiler.h
vm/compiler/CompilerIR.h
vm/compiler/CompilerInternals.h
vm/compiler/CompilerUtility.h
vm/compiler/Dataflow.c [new file with mode: 0644]
vm/compiler/Dataflow.h [new file with mode: 0644]
vm/compiler/Frontend.c
vm/compiler/IntermediateRep.c
vm/compiler/Loop.c [new file with mode: 0644]
vm/compiler/Loop.h [new file with mode: 0644]
vm/compiler/Utility.c
vm/compiler/codegen/CompilerCodegen.h
vm/compiler/codegen/Optimizer.h
vm/compiler/codegen/arm/ArchUtility.c
vm/compiler/codegen/arm/ArmLIR.h
vm/compiler/codegen/arm/Codegen.c

index 71ebf37..672d3d7 100644 (file)
--- a/vm/Dvm.mk
+++ b/vm/Dvm.mk
@@ -206,6 +206,8 @@ ifeq ($(WITH_JIT),true)
        compiler/Frontend.c \
        compiler/Utility.c \
        compiler/IntermediateRep.c \
+       compiler/Dataflow.c \
+       compiler/Loop.c \
        interp/Jit.c
 endif
 
index f8f7256..61c13f0 100644 (file)
--- a/vm/Misc.c
+++ b/vm/Misc.c
@@ -345,6 +345,39 @@ int dvmCountSetBits(const BitVector* pBits)
 }
 
 /*
+ * Copy a whole vector to the other. Only do that when the both vectors have
+ * the same size and attribute.
+ */
+bool dvmCopyBitVector(BitVector *dest, const BitVector *src)
+{
+    if (dest->storageSize != src->storageSize ||
+        dest->expandable != src->expandable)
+        return false;
+    memcpy(dest->storage, src->storage, sizeof(u4) * dest->storageSize);
+    return true;
+}
+
+/*
+ * Intersect two bit vectores and merge the result on top of the pre-existing
+ * value in the dest vector.
+ */
+bool dvmIntersectBitVectors(BitVector *dest, const BitVector *src1,
+                            const BitVector *src2)
+{
+    if (dest->storageSize != src1->storageSize ||
+        dest->storageSize != src2->storageSize ||
+        dest->expandable != src1->expandable ||
+        dest->expandable != src2->expandable)
+        return false;
+
+    int i;
+    for (i = 0; i < dest->storageSize; i++) {
+        dest->storage[i] |= src1->storage[i] & src2->storage[i];
+    }
+    return true;
+}
+
+/*
  * Return a newly-allocated string in which all occurrences of '.' have
  * been changed to '/'.  If we find a '/' in the original string, NULL
  * is returned to avoid ambiguity.
index 1566161..6f76872 100644 (file)
--- a/vm/Misc.h
+++ b/vm/Misc.h
@@ -164,6 +164,16 @@ bool dvmIsBitSet(const BitVector* pBits, int num);
 /* count the number of bits that have been set */
 int dvmCountSetBits(const BitVector* pBits);
 
+/* copy one vector to the other compatible one */
+bool dvmCopyBitVector(BitVector *dest, const BitVector *src);
+
+/*
+ * Intersect two bit vectores and merge the result on top of the pre-existing
+ * value in the dest vector.
+ */
+bool dvmIntersectBitVectors(BitVector *dest, const BitVector *src1,
+                            const BitVector *src2);
+
 #define kBitVectorGrowth    4   /* increase by 4 u4s when limit hit */
 
 
index 2cf762f..47de33d 100644 (file)
@@ -137,4 +137,26 @@ void dvmCompilerDrainQueue(void);
 void dvmJitUnchainAll(void);
 void dvmCompilerSortAndPrintTraceProfiles(void);
 
+struct CompilationUnit;
+struct BasicBlock;
+struct SSARepresentation;
+struct GrowableList;
+
+void dvmInitializeSSAConversion(struct CompilationUnit *cUnit);
+int dvmConvertSSARegToDalvik(struct CompilationUnit *cUnit, int ssaReg);
+void dvmCompilerLoopOpt(struct CompilationUnit *cUnit);
+void dvmCompilerNonLoopAnalysis(struct CompilationUnit *cUnit);
+void dvmCompilerFindLiveIn(struct CompilationUnit *cUnit,
+                           struct BasicBlock *bb);
+void dvmCompilerDoSSAConversion(struct CompilationUnit *cUnit,
+                                struct BasicBlock *bb);
+void dvmCompilerDoConstantPropagation(struct CompilationUnit *cUnit,
+                                      struct BasicBlock *bb);
+void dvmCompilerFindInductionVariables(struct CompilationUnit *cUnit,
+                                       struct BasicBlock *bb);
+char *dvmCompilerGetSSAString(struct CompilationUnit *cUnit,
+                              struct SSARepresentation *ssaRep);
+void dvmCompilerDataFlowAnalysisDispatcher(struct CompilationUnit *cUnit,
+                void (*func)(struct CompilationUnit *, struct BasicBlock *));
+
 #endif /* _DALVIK_VM_COMPILER */
index 389ac7c..948f3e7 100644 (file)
  * limitations under the License.
  */
 
-#include "codegen/Optimizer.h"
-
 #ifndef _DALVIK_VM_COMPILER_IR
 #define _DALVIK_VM_COMPILER_IR
 
+#include "codegen/Optimizer.h"
+
 typedef enum BBType {
     /* For coding convenience reasons chaining cell types should appear first */
     CHAINING_CELL_NORMAL = 0,
@@ -27,7 +27,9 @@ typedef enum BBType {
     CHAINING_CELL_INVOKE_PREDICTED,
     CHAINING_CELL_BACKWARD_BRANCH,
     CHAINING_CELL_LAST,
+    ENTRY_BLOCK,
     DALVIK_BYTECODE,
+    EXIT_BLOCK,
     PC_RECONSTRUCTION,
     EXCEPTION_HANDLING,
 } BBType;
@@ -46,14 +48,42 @@ typedef struct LIR {
     struct LIR *target;
 } LIR;
 
+enum ExtendedMIROpcode {
+    MIR_OP_FIRST = 256,
+    MIR_OP_PHI = MIR_OP_FIRST,
+    MIR_OP_NULL_N_RANGE_UP_CHECK,
+    MIR_OP_NULL_N_RANGE_DOWN_CHECK,
+    MIR_OP_LOWER_BOUND_CHECK,
+    MIR_OP_PUNT,
+    MIR_OP_LAST,
+};
+
+struct SSARepresentation;
+
+typedef enum {
+    kMIRIgnoreNullCheck = 0,
+    kMIRNullCheckOnly,
+    kMIRIgnoreRangeCheck,
+    kMIRRangeCheckOnly,
+} MIROptimizationFlagPositons;
+
+#define MIR_IGNORE_NULL_CHECK           (1 << kMIRIgnoreNullCheck)
+#define MIR_NULL_CHECK_ONLY             (1 << kMIRNullCheckOnly)
+#define MIR_IGNORE_RANGE_CHECK          (1 << kMIRIgnoreRangeCheck)
+#define MIR_RANGE_CHECK_ONLY            (1 << kMIRRangeCheckOnly)
+
 typedef struct MIR {
     DecodedInstruction dalvikInsn;
     unsigned int width;
     unsigned int offset;
     struct MIR *prev;
     struct MIR *next;
+    struct SSARepresentation *ssaRep;
+    int OptimizationFlags;
 } MIR;
 
+struct BasicBlockDataFlow;
+
 typedef struct BasicBlock {
     int id;
     int visited;
@@ -66,8 +96,11 @@ typedef struct BasicBlock {
     struct BasicBlock *fallThrough;
     struct BasicBlock *taken;
     struct BasicBlock *next;            // Serial link for book keeping purposes
+    struct BasicBlockDataFlow *dataFlowInfo;
 } BasicBlock;
 
+struct LoopAnalysis;
+
 typedef struct CompilationUnit {
     int numInsts;
     int numBlocks;
@@ -88,17 +121,33 @@ typedef struct CompilationUnit {
     bool allSingleStep;
     bool halveInstCount;
     bool executionCount;                // Add code to count trace executions
+    bool hasLoop;
     int numChainingCells[CHAINING_CELL_LAST];
     LIR *firstChainingLIR[CHAINING_CELL_LAST];
     RegisterScoreboard registerScoreboard;      // Track register dependency
     int optRound;                       // round number to tell an LIR's age
     JitInstructionSetType instructionSet;
+    /* Number of total regs used in the whole cUnit after SSA transformation */
+    int numSSARegs;
+    /* Map SSA reg i to the Dalvik[15..0]/Sub[31..16] pair. */
+    GrowableList *ssaToDalvikMap;
+
+    /* The following are new data structures to support SSA representations */
+    /* Map original Dalvik reg i to the SSA[15..0]/Sub[31..16] pair */
+    int *dalvikToSSAMap;                // length == method->registersSize
+    BitVector *isConstantV;             // length == numSSAReg
+    int *constantValues;                // length == numSSAReg
+
+    /* Data structure for loop analysis and optimizations */
+    struct LoopAnalysis *loopAnalysis;
 } CompilationUnit;
 
 BasicBlock *dvmCompilerNewBB(BBType blockType);
 
 void dvmCompilerAppendMIR(BasicBlock *bb, MIR *mir);
 
+void dvmCompilerPrependMIR(BasicBlock *bb, MIR *mir);
+
 void dvmCompilerAppendLIR(CompilationUnit *cUnit, LIR *lir);
 
 void dvmCompilerInsertLIRBefore(LIR *currentLIR, LIR *newLIR);
index 410213a..9a30b34 100644 (file)
@@ -19,7 +19,6 @@
 
 #include "Dalvik.h"
 #include "CompilerUtility.h"
-#include "CompilerIR.h"
 #include "codegen/CompilerCodegen.h"
 #include "interp/Jit.h"
 
index 7b4de11..c72a8da 100644 (file)
@@ -38,7 +38,13 @@ typedef struct GrowableList {
     void **elemList;
 } GrowableList;
 
+#define GET_ELEM_N(LIST, TYPE, N) (((TYPE*) LIST->elemList)[N])
+
 void dvmInitGrowableList(GrowableList *gList, size_t initLength);
 void dvmInsertGrowableList(GrowableList *gList, void *elem);
 
+BitVector* dvmCompilerAllocBitVector(int startBits, bool expandable);
+bool dvmCompilerSetBit(BitVector* pBits, int num);
+void dvmDebugBitVector(char *msg, const BitVector *bv, int length);
+
 #endif /* _DALVIK_COMPILER_UTILITY */
diff --git a/vm/compiler/Dataflow.c b/vm/compiler/Dataflow.c
new file mode 100644 (file)
index 0000000..9a2e5c0
--- /dev/null
@@ -0,0 +1,1394 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Dalvik.h"
+#include "Dataflow.h"
+#include "Loop.h"
+
+/*
+ * Main table containing data flow attributes for each bytecode. The first
+ * 256 entries are for Dalvik bytecode instructions, where extended opcode at
+ * the MIR level are appended afterwards.
+ *
+ * TODO - many optimization flags are incomplete - they will only limit the
+ * scope of optimizations but will not cause mis-optimizations.
+ */
+int dvmCompilerDataFlowAttributes[MIR_OP_LAST] = {
+    // 00 OP_NOP
+    DF_NOP,
+
+    // 01 OP_MOVE vA, vB
+    DF_DA | DF_UB | DF_IS_MOVE,
+
+    // 02 OP_MOVE_FROM16 vAA, vBBBB
+    DF_DA | DF_UB | DF_IS_MOVE,
+
+    // 03 OP_MOVE_16 vAAAA, vBBBB
+    DF_DA | DF_UB | DF_IS_MOVE,
+
+    // 04 OP_MOVE_WIDE vA, vB
+    DF_DA_WIDE | DF_UB_WIDE | DF_IS_MOVE,
+
+    // 05 OP_MOVE_WIDE_FROM16 vAA, vBBBB
+    DF_DA_WIDE | DF_UB_WIDE | DF_IS_MOVE,
+
+    // 06 OP_MOVE_WIDE_16 vAAAA, vBBBB
+    DF_DA_WIDE | DF_UB_WIDE | DF_IS_MOVE,
+
+    // 07 OP_MOVE_OBJECT vA, vB
+    DF_DA | DF_UB | DF_IS_MOVE,
+
+    // 08 OP_MOVE_OBJECT_FROM16 vAA, vBBBB
+    DF_DA | DF_UB | DF_IS_MOVE,
+
+    // 09 OP_MOVE_OBJECT_16 vAAAA, vBBBB
+    DF_DA | DF_UB | DF_IS_MOVE,
+
+    // 0A OP_MOVE_RESULT vAA
+    DF_DA,
+
+    // 0B OP_MOVE_RESULT_WIDE vAA
+    DF_DA_WIDE,
+
+    // 0C OP_MOVE_RESULT_OBJECT vAA
+    DF_DA,
+
+    // 0D OP_MOVE_EXCEPTION vAA
+    DF_DA,
+
+    // 0E OP_RETURN_VOID
+    DF_NOP,
+
+    // 0F OP_RETURN vAA
+    DF_UA,
+
+    // 10 OP_RETURN_WIDE vAA
+    DF_UA_WIDE,
+
+    // 11 OP_RETURN_OBJECT vAA
+    DF_UA,
+
+    // 12 OP_CONST_4 vA, #+B
+    DF_DA | DF_SETS_CONST,
+
+    // 13 OP_CONST_16 vAA, #+BBBB
+    DF_DA | DF_SETS_CONST,
+
+    // 14 OP_CONST vAA, #+BBBBBBBB
+    DF_DA | DF_SETS_CONST,
+
+    // 15 OP_CONST_HIGH16 VAA, #+BBBB0000
+    DF_DA | DF_SETS_CONST,
+
+    // 16 OP_CONST_WIDE_16 vAA, #+BBBB
+    DF_DA_WIDE | DF_SETS_CONST,
+
+    // 17 OP_CONST_WIDE_32 vAA, #+BBBBBBBB
+    DF_DA_WIDE | DF_SETS_CONST,
+
+    // 18 OP_CONST_WIDE vAA, #+BBBBBBBBBBBBBBBB
+    DF_DA_WIDE | DF_SETS_CONST,
+
+    // 19 OP_CONST_WIDE_HIGH16 vAA, #+BBBB000000000000
+    DF_DA_WIDE | DF_SETS_CONST,
+
+    // 1A OP_CONST_STRING vAA, string@BBBB
+    DF_DA,
+
+    // 1B OP_CONST_STRING_JUMBO vAA, string@BBBBBBBB
+    DF_DA,
+
+    // 1C OP_CONST_CLASS vAA, type@BBBB
+    DF_DA,
+
+    // 1D OP_MONITOR_ENTER vAA
+    DF_UA,
+
+    // 1E OP_MONITOR_EXIT vAA
+    DF_UA,
+
+    // 1F OP_CHECK_CAST vAA, type@BBBB
+    DF_UA,
+
+    // 20 OP_INSTANCE_OF vA, vB, type@CCCC
+    DF_DA | DF_UB,
+
+    // 21 OP_ARRAY_LENGTH vA, vB
+    DF_DA | DF_UB,
+
+    // 22 OP_NEW_INSTANCE vAA, type@BBBB
+    DF_DA,
+
+    // 23 OP_NEW_ARRAY vA, vB, type@CCCC
+    DF_DA | DF_UB,
+
+    // 24 OP_FILLED_NEW_ARRAY {vD, vE, vF, vG, vA}
+    DF_FORMAT_35C,
+
+    // 25 OP_FILLED_NEW_ARRAY_RANGE {vCCCC .. vNNNN}, type@BBBB
+    DF_FORMAT_3RC,
+
+    // 26 OP_FILL_ARRAY_DATA vAA, +BBBBBBBB
+    DF_UA,
+
+    // 27 OP_THROW vAA
+    DF_UA,
+
+    // 28 OP_GOTO
+    DF_NOP,
+
+    // 29 OP_GOTO_16
+    DF_NOP,
+
+    // 2A OP_GOTO_32
+    DF_NOP,
+
+    // 2B OP_PACKED_SWITCH vAA, +BBBBBBBB
+    DF_UA,
+
+    // 2C OP_SPARSE_SWITCH vAA, +BBBBBBBB
+    DF_UA,
+
+    // 2D OP_CMPL_FLOAT vAA, vBB, vCC
+    DF_DA | DF_UB | DF_UC,
+
+    // 2E OP_CMPG_FLOAT vAA, vBB, vCC
+    DF_DA | DF_UB | DF_UC,
+
+    // 2F OP_CMPL_DOUBLE vAA, vBB, vCC
+    DF_DA_WIDE | DF_UB_WIDE | DF_UC_WIDE,
+
+    // 30 OP_CMPG_DOUBLE vAA, vBB, vCC
+    DF_DA_WIDE | DF_UB_WIDE | DF_UC_WIDE,
+
+    // 31 OP_CMP_LONG vAA, vBB, vCC
+    DF_DA_WIDE | DF_UB_WIDE | DF_UC_WIDE,
+
+    // 32 OP_IF_EQ vA, vB, +CCCC
+    DF_UA | DF_UB,
+
+    // 33 OP_IF_NE vA, vB, +CCCC
+    DF_UA | DF_UB,
+
+    // 34 OP_IF_LT vA, vB, +CCCC
+    DF_UA | DF_UB,
+
+    // 35 OP_IF_GE vA, vB, +CCCC
+    DF_UA | DF_UB,
+
+    // 36 OP_IF_GT vA, vB, +CCCC
+    DF_UA | DF_UB,
+
+    // 37 OP_IF_LE vA, vB, +CCCC
+    DF_UA | DF_UB,
+
+
+    // 38 OP_IF_EQZ vAA, +BBBB
+    DF_UA,
+
+    // 39 OP_IF_NEZ vAA, +BBBB
+    DF_UA,
+
+    // 3A OP_IF_LTZ vAA, +BBBB
+    DF_UA,
+
+    // 3B OP_IF_GEZ vAA, +BBBB
+    DF_UA,
+
+    // 3C OP_IF_GTZ vAA, +BBBB
+    DF_UA,
+
+    // 3D OP_IF_LEZ vAA, +BBBB
+    DF_UA,
+
+    // 3E OP_UNUSED_3E
+    DF_NOP,
+
+    // 3F OP_UNUSED_3F
+    DF_NOP,
+
+    // 40 OP_UNUSED_40
+    DF_NOP,
+
+    // 41 OP_UNUSED_41
+    DF_NOP,
+
+    // 42 OP_UNUSED_42
+    DF_NOP,
+
+    // 43 OP_UNUSED_43
+    DF_NOP,
+
+    // 44 OP_AGET vAA, vBB, vCC
+    DF_DA | DF_UB | DF_UC | DF_NULL_N_RANGE_CHECK_0,
+
+    // 45 OP_AGET_WIDE vAA, vBB, vCC
+    DF_DA_WIDE | DF_UB | DF_UC | DF_NULL_N_RANGE_CHECK_0,
+
+    // 46 OP_AGET_OBJECT vAA, vBB, vCC
+    DF_DA | DF_UB | DF_UC | DF_NULL_N_RANGE_CHECK_0,
+
+    // 47 OP_AGET_BOOLEAN vAA, vBB, vCC
+    DF_DA | DF_UB | DF_UC | DF_NULL_N_RANGE_CHECK_0,
+
+    // 48 OP_AGET_BYTE vAA, vBB, vCC
+    DF_DA | DF_UB | DF_UC | DF_NULL_N_RANGE_CHECK_0,
+
+    // 49 OP_AGET_CHAR vAA, vBB, vCC
+    DF_DA | DF_UB | DF_UC | DF_NULL_N_RANGE_CHECK_0,
+
+    // 4A OP_AGET_SHORT vAA, vBB, vCC
+    DF_DA | DF_UB | DF_UC | DF_NULL_N_RANGE_CHECK_0,
+
+    // 4B OP_APUT vAA, vBB, vCC
+    DF_UA | DF_UB | DF_UC | DF_NULL_N_RANGE_CHECK_1,
+
+    // 4C OP_APUT_WIDE vAA, vBB, vCC
+    DF_UA_WIDE | DF_UB | DF_UC | DF_NULL_N_RANGE_CHECK_2,
+
+    // 4D OP_APUT_OBJECT vAA, vBB, vCC
+    DF_UA | DF_UB | DF_UC | DF_NULL_N_RANGE_CHECK_1,
+
+    // 4E OP_APUT_BOOLEAN vAA, vBB, vCC
+    DF_UA | DF_UB | DF_UC | DF_NULL_N_RANGE_CHECK_1,
+
+    // 4F OP_APUT_BYTE vAA, vBB, vCC
+    DF_UA | DF_UB | DF_UC | DF_NULL_N_RANGE_CHECK_1,
+
+    // 50 OP_APUT_CHAR vAA, vBB, vCC
+    DF_UA | DF_UB | DF_UC | DF_NULL_N_RANGE_CHECK_1,
+
+    // 51 OP_APUT_SHORT vAA, vBB, vCC
+    DF_UA | DF_UB | DF_UC | DF_NULL_N_RANGE_CHECK_1,
+
+    // 52 OP_IGET vA, vB, field@CCCC
+    DF_DA | DF_UB,
+
+    // 53 OP_IGET_WIDE vA, vB, field@CCCC
+    DF_DA_WIDE | DF_UB,
+
+    // 54 OP_IGET_OBJECT vA, vB, field@CCCC
+    DF_DA | DF_UB,
+
+    // 55 OP_IGET_BOOLEAN vA, vB, field@CCCC
+    DF_DA | DF_UB,
+
+    // 56 OP_IGET_BYTE vA, vB, field@CCCC
+    DF_DA | DF_UB,
+
+    // 57 OP_IGET_CHAR vA, vB, field@CCCC
+    DF_DA | DF_UB,
+
+    // 58 OP_IGET_SHORT vA, vB, field@CCCC
+    DF_DA | DF_UB,
+
+    // 59 OP_IPUT vA, vB, field@CCCC
+    DF_UA | DF_UB,
+
+    // 5A OP_IPUT_WIDE vA, vB, field@CCCC
+    DF_UA_WIDE | DF_UB,
+
+    // 5B OP_IPUT_OBJECT vA, vB, field@CCCC
+    DF_UA | DF_UB,
+
+    // 5C OP_IPUT_BOOLEAN vA, vB, field@CCCC
+    DF_UA | DF_UB,
+
+    // 5D OP_IPUT_BYTE vA, vB, field@CCCC
+    DF_UA | DF_UB,
+
+    // 5E OP_IPUT_CHAR vA, vB, field@CCCC
+    DF_UA | DF_UB,
+
+    // 5F OP_IPUT_SHORT vA, vB, field@CCCC
+    DF_UA | DF_UB,
+
+    // 60 OP_SGET vAA, field@BBBB
+    DF_DA,
+
+    // 61 OP_SGET_WIDE vAA, field@BBBB
+    DF_DA_WIDE,
+
+    // 62 OP_SGET_OBJECT vAA, field@BBBB
+    DF_DA,
+
+    // 63 OP_SGET_BOOLEAN vAA, field@BBBB
+    DF_DA,
+
+    // 64 OP_SGET_BYTE vAA, field@BBBB
+    DF_DA,
+
+    // 65 OP_SGET_CHAR vAA, field@BBBB
+    DF_DA,
+
+    // 66 OP_SGET_SHORT vAA, field@BBBB
+    DF_DA,
+
+    // 67 OP_SPUT vAA, field@BBBB
+    DF_UA,
+
+    // 68 OP_SPUT_WIDE vAA, field@BBBB
+    DF_UA_WIDE,
+
+    // 69 OP_SPUT_OBJECT vAA, field@BBBB
+    DF_UA,
+
+    // 6A OP_SPUT_BOOLEAN vAA, field@BBBB
+    DF_UA,
+
+    // 6B OP_SPUT_BYTE vAA, field@BBBB
+    DF_UA,
+
+    // 6C OP_SPUT_CHAR vAA, field@BBBB
+    DF_UA,
+
+    // 6D OP_SPUT_SHORT vAA, field@BBBB
+    DF_UA,
+
+    // 6E OP_INVOKE_VIRTUAL {vD, vE, vF, vG, vA}
+    DF_FORMAT_35C,
+
+    // 6F OP_INVOKE_SUPER {vD, vE, vF, vG, vA}
+    DF_FORMAT_35C,
+
+    // 70 OP_INVOKE_DIRECT {vD, vE, vF, vG, vA}
+    DF_FORMAT_35C,
+
+    // 71 OP_INVOKE_STATIC {vD, vE, vF, vG, vA}
+    DF_FORMAT_35C,
+
+    // 72 OP_INVOKE_INTERFACE {vD, vE, vF, vG, vA}
+    DF_FORMAT_35C,
+
+    // 73 OP_UNUSED_73
+    DF_NOP,
+
+    // 74 OP_INVOKE_VIRTUAL_RANGE {vCCCC .. vNNNN}
+    DF_FORMAT_3RC,
+
+    // 75 OP_INVOKE_SUPER_RANGE {vCCCC .. vNNNN}
+    DF_FORMAT_3RC,
+
+    // 76 OP_INVOKE_DIRECT_RANGE {vCCCC .. vNNNN}
+    DF_FORMAT_3RC,
+
+    // 77 OP_INVOKE_STATIC_RANGE {vCCCC .. vNNNN}
+    DF_FORMAT_3RC,
+
+    // 78 OP_INVOKE_INTERFACE_RANGE {vCCCC .. vNNNN}
+    DF_FORMAT_3RC,
+
+    // 79 OP_UNUSED_79
+    DF_NOP,
+
+    // 7A OP_UNUSED_7A
+    DF_NOP,
+
+    // 7B OP_NEG_INT vA, vB
+    DF_DA | DF_UB,
+
+    // 7C OP_NOT_INT vA, vB
+    DF_DA | DF_UB,
+
+    // 7D OP_NEG_LONG vA, vB
+    DF_DA_WIDE | DF_UB_WIDE,
+
+    // 7E OP_NOT_LONG vA, vB
+    DF_DA_WIDE | DF_UB_WIDE,
+
+    // 7F OP_NEG_FLOAT vA, vB
+    DF_DA | DF_UB,
+
+    // 80 OP_NEG_DOUBLE vA, vB
+    DF_DA_WIDE | DF_UB_WIDE,
+
+    // 81 OP_INT_TO_LONG vA, vB
+    DF_DA_WIDE | DF_UB,
+
+    // 82 OP_INT_TO_FLOAT vA, vB
+    DF_DA | DF_UB,
+
+    // 83 OP_INT_TO_DOUBLE vA, vB
+    DF_DA_WIDE | DF_UB,
+
+    // 84 OP_LONG_TO_INT vA, vB
+    DF_DA | DF_UB_WIDE,
+
+    // 85 OP_LONG_TO_FLOAT vA, vB
+    DF_DA | DF_UB_WIDE,
+
+    // 86 OP_LONG_TO_DOUBLE vA, vB
+    DF_DA_WIDE | DF_UB_WIDE,
+
+    // 87 OP_FLOAT_TO_INT vA, vB
+    DF_DA | DF_UB,
+
+    // 88 OP_FLOAT_TO_LONG vA, vB
+    DF_DA_WIDE | DF_UB,
+
+    // 89 OP_FLOAT_TO_DOUBLE vA, vB
+    DF_DA_WIDE | DF_UB,
+
+    // 8A OP_DOUBLE_TO_INT vA, vB
+    DF_DA | DF_UB_WIDE,
+
+    // 8B OP_DOUBLE_TO_LONG vA, vB
+    DF_DA_WIDE | DF_UB_WIDE,
+
+    // 8C OP_DOUBLE_TO_FLOAT vA, vB
+    DF_DA | DF_UB_WIDE,
+
+    // 8D OP_INT_TO_BYTE vA, vB
+    DF_DA | DF_UB,
+
+    // 8E OP_INT_TO_CHAR vA, vB
+    DF_DA | DF_UB,
+
+    // 8F OP_INT_TO_SHORT vA, vB
+    DF_DA | DF_UB,
+
+    // 90 OP_ADD_INT vAA, vBB, vCC
+    DF_DA | DF_UB | DF_UC | DF_IS_LINEAR,
+
+    // 91 OP_SUB_INT vAA, vBB, vCC
+    DF_DA | DF_UB | DF_UC | DF_IS_LINEAR,
+
+    // 92 OP_MUL_INT vAA, vBB, vCC
+    DF_DA | DF_UB | DF_UC,
+
+    // 93 OP_DIV_INT vAA, vBB, vCC
+    DF_DA | DF_UB | DF_UC,
+
+    // 94 OP_REM_INT vAA, vBB, vCC
+    DF_DA | DF_UB | DF_UC,
+
+    // 95 OP_AND_INT vAA, vBB, vCC
+    DF_DA | DF_UB | DF_UC,
+
+    // 96 OP_OR_INT vAA, vBB, vCC
+    DF_DA | DF_UB | DF_UC,
+
+    // 97 OP_XOR_INT vAA, vBB, vCC
+    DF_DA | DF_UB | DF_UC,
+
+    // 98 OP_SHL_INT vAA, vBB, vCC
+    DF_DA | DF_UB | DF_UC,
+
+    // 99 OP_SHR_INT vAA, vBB, vCC
+    DF_DA | DF_UB | DF_UC,
+
+    // 9A OP_USHR_INT vAA, vBB, vCC
+    DF_DA | DF_UB | DF_UC,
+
+    // 9B OP_ADD_LONG vAA, vBB, vCC
+    DF_DA_WIDE | DF_UB_WIDE | DF_UC_WIDE,
+
+    // 9C OP_SUB_LONG vAA, vBB, vCC
+    DF_DA_WIDE | DF_UB_WIDE | DF_UC_WIDE,
+
+    // 9D OP_MUL_LONG vAA, vBB, vCC
+    DF_DA_WIDE | DF_UB_WIDE | DF_UC_WIDE,
+
+    // 9E OP_DIV_LONG vAA, vBB, vCC
+    DF_DA_WIDE | DF_UB_WIDE | DF_UC_WIDE,
+
+    // 9F OP_REM_LONG vAA, vBB, vCC
+    DF_DA_WIDE | DF_UB_WIDE | DF_UC_WIDE,
+
+    // A0 OP_AND_LONG vAA, vBB, vCC
+    DF_DA_WIDE | DF_UB_WIDE | DF_UC_WIDE,
+
+    // A1 OP_OR_LONG vAA, vBB, vCC
+    DF_DA_WIDE | DF_UB_WIDE | DF_UC_WIDE,
+
+    // A2 OP_XOR_LONG vAA, vBB, vCC
+    DF_DA_WIDE | DF_UB_WIDE | DF_UC_WIDE,
+
+    // A3 OP_SHL_LONG vAA, vBB, vCC
+    DF_DA_WIDE | DF_UB_WIDE | DF_UC_WIDE,
+
+    // A4 OP_SHR_LONG vAA, vBB, vCC
+    DF_DA_WIDE | DF_UB_WIDE | DF_UC_WIDE,
+
+    // A5 OP_USHR_LONG vAA, vBB, vCC
+    DF_DA_WIDE | DF_UB_WIDE | DF_UC_WIDE,
+
+    // A6 OP_ADD_FLOAT vAA, vBB, vCC
+    DF_DA | DF_UB | DF_UC,
+
+    // A7 OP_SUB_FLOAT vAA, vBB, vCC
+    DF_DA | DF_UB | DF_UC,
+
+    // A8 OP_MUL_FLOAT vAA, vBB, vCC
+    DF_DA | DF_UB | DF_UC,
+
+    // A9 OP_DIV_FLOAT vAA, vBB, vCC
+    DF_DA | DF_UB | DF_UC,
+
+    // AA OP_REM_FLOAT vAA, vBB, vCC
+    DF_DA | DF_UB | DF_UC,
+
+    // AB OP_ADD_DOUBLE vAA, vBB, vCC
+    DF_DA_WIDE | DF_UB_WIDE | DF_UC_WIDE,
+
+    // AC OP_SUB_DOUBLE vAA, vBB, vCC
+    DF_DA_WIDE | DF_UB_WIDE | DF_UC_WIDE,
+
+    // AD OP_MUL_DOUBLE vAA, vBB, vCC
+    DF_DA_WIDE | DF_UB_WIDE | DF_UC_WIDE,
+
+    // AE OP_DIV_DOUBLE vAA, vBB, vCC
+    DF_DA_WIDE | DF_UB_WIDE | DF_UC_WIDE,
+
+    // AF OP_REM_DOUBLE vAA, vBB, vCC
+    DF_DA_WIDE | DF_UB_WIDE | DF_UC_WIDE,
+
+    // B0 OP_ADD_INT_2ADDR vA, vB
+    DF_DA | DF_UA | DF_UB,
+
+    // B1 OP_SUB_INT_2ADDR vA, vB
+    DF_DA | DF_UA | DF_UB,
+
+    // B2 OP_MUL_INT_2ADDR vA, vB
+    DF_DA | DF_UA | DF_UB,
+
+    // B3 OP_DIV_INT_2ADDR vA, vB
+    DF_DA | DF_UA | DF_UB,
+
+    // B4 OP_REM_INT_2ADDR vA, vB
+    DF_DA | DF_UA | DF_UB,
+
+    // B5 OP_AND_INT_2ADDR vA, vB
+    DF_DA | DF_UA | DF_UB,
+
+    // B6 OP_OR_INT_2ADDR vA, vB
+    DF_DA | DF_UA | DF_UB,
+
+    // B7 OP_XOR_INT_2ADDR vA, vB
+    DF_DA | DF_UA | DF_UB,
+
+    // B8 OP_SHL_INT_2ADDR vA, vB
+    DF_DA | DF_UA | DF_UB,
+
+    // B9 OP_SHR_INT_2ADDR vA, vB
+    DF_DA | DF_UA | DF_UB,
+
+    // BA OP_USHR_INT_2ADDR vA, vB
+    DF_DA | DF_UA | DF_UB,
+
+    // BB OP_ADD_LONG_2ADDR vA, vB
+    DF_DA_WIDE | DF_UA_WIDE | DF_UB_WIDE,
+
+    // BC OP_SUB_LONG_2ADDR vA, vB
+    DF_DA_WIDE | DF_UA_WIDE | DF_UB_WIDE,
+
+    // BD OP_MUL_LONG_2ADDR vA, vB
+    DF_DA_WIDE | DF_UA_WIDE | DF_UB_WIDE,
+
+    // BE OP_DIV_LONG_2ADDR vA, vB
+    DF_DA_WIDE | DF_UA_WIDE | DF_UB_WIDE,
+
+    // BF OP_REM_LONG_2ADDR vA, vB
+    DF_DA_WIDE | DF_UA_WIDE | DF_UB_WIDE,
+
+    // C0 OP_AND_LONG_2ADDR vA, vB
+    DF_DA_WIDE | DF_UA_WIDE | DF_UB_WIDE,
+
+    // C1 OP_OR_LONG_2ADDR vA, vB
+    DF_DA_WIDE | DF_UA_WIDE | DF_UB_WIDE,
+
+    // C2 OP_XOR_LONG_2ADDR vA, vB
+    DF_DA_WIDE | DF_UA_WIDE | DF_UB_WIDE,
+
+    // C3 OP_SHL_LONG_2ADDR vA, vB
+    DF_DA_WIDE | DF_UA_WIDE | DF_UB_WIDE,
+
+    // C4 OP_SHR_LONG_2ADDR vA, vB
+    DF_DA_WIDE | DF_UA_WIDE | DF_UB_WIDE,
+
+    // C5 OP_USHR_LONG_2ADDR vA, vB
+    DF_DA_WIDE | DF_UA_WIDE | DF_UB_WIDE,
+
+    // C6 OP_ADD_FLOAT_2ADDR vA, vB
+    DF_DA | DF_UA | DF_UB,
+
+    // C7 OP_SUB_FLOAT_2ADDR vA, vB
+    DF_DA | DF_UA | DF_UB,
+
+    // C8 OP_MUL_FLOAT_2ADDR vA, vB
+    DF_DA | DF_UA | DF_UB,
+
+    // C9 OP_DIV_FLOAT_2ADDR vA, vB
+    DF_DA | DF_UA | DF_UB,
+
+    // CA OP_REM_FLOAT_2ADDR vA, vB
+    DF_DA | DF_UA | DF_UB,
+
+    // CB OP_ADD_DOUBLE_2ADDR vA, vB
+    DF_DA_WIDE | DF_UA_WIDE | DF_UB_WIDE,
+
+    // CC OP_SUB_DOUBLE_2ADDR vA, vB
+    DF_DA_WIDE | DF_UA_WIDE | DF_UB_WIDE,
+
+    // CD OP_MUL_DOUBLE_2ADDR vA, vB
+    DF_DA_WIDE | DF_UA_WIDE | DF_UB_WIDE,
+
+    // CE OP_DIV_DOUBLE_2ADDR vA, vB
+    DF_DA_WIDE | DF_UA_WIDE | DF_UB_WIDE,
+
+    // CF OP_REM_DOUBLE_2ADDR vA, vB
+    DF_DA_WIDE | DF_UA_WIDE | DF_UB_WIDE,
+
+    // D0 OP_ADD_INT_LIT16 vA, vB, #+CCCC
+    DF_DA | DF_UB,
+
+    // D1 OP_RSUB_INT vA, vB, #+CCCC
+    DF_DA | DF_UB,
+
+    // D2 OP_MUL_INT_LIT16 vA, vB, #+CCCC
+    DF_DA | DF_UB,
+
+    // D3 OP_DIV_INT_LIT16 vA, vB, #+CCCC
+    DF_DA | DF_UB,
+
+    // D4 OP_REM_INT_LIT16 vA, vB, #+CCCC
+    DF_DA | DF_UB,
+
+    // D5 OP_AND_INT_LIT16 vA, vB, #+CCCC
+    DF_DA | DF_UB,
+
+    // D6 OP_OR_INT_LIT16 vA, vB, #+CCCC
+    DF_DA | DF_UB,
+
+    // D7 OP_XOR_INT_LIT16 vA, vB, #+CCCC
+    DF_DA | DF_UB,
+
+    // D8 OP_ADD_INT_LIT8 vAA, vBB, #+CC
+    DF_DA | DF_UB | DF_IS_LINEAR,
+
+    // D9 OP_RSUB_INT_LIT8 vAA, vBB, #+CC
+    DF_DA | DF_UB,
+
+    // DA OP_MUL_INT_LIT8 vAA, vBB, #+CC
+    DF_DA | DF_UB,
+
+    // DB OP_DIV_INT_LIT8 vAA, vBB, #+CC
+    DF_DA | DF_UB,
+
+    // DC OP_REM_INT_LIT8 vAA, vBB, #+CC
+    DF_DA | DF_UB,
+
+    // DD OP_AND_INT_LIT8 vAA, vBB, #+CC
+    DF_DA | DF_UB,
+
+    // DE OP_OR_INT_LIT8 vAA, vBB, #+CC
+    DF_DA | DF_UB,
+
+    // DF OP_XOR_INT_LIT8 vAA, vBB, #+CC
+    DF_DA | DF_UB,
+
+    // E0 OP_SHL_INT_LIT8 vAA, vBB, #+CC
+    DF_DA | DF_UB,
+
+    // E1 OP_SHR_INT_LIT8 vAA, vBB, #+CC
+    DF_DA | DF_UB,
+
+    // E2 OP_USHR_INT_LIT8 vAA, vBB, #+CC
+    DF_DA | DF_UB,
+
+    // E3 OP_UNUSED_E3
+    DF_NOP,
+
+    // E4 OP_UNUSED_E4
+    DF_NOP,
+
+    // E5 OP_UNUSED_E5
+    DF_NOP,
+
+    // E6 OP_UNUSED_E6
+    DF_NOP,
+
+    // E7 OP_UNUSED_E7
+    DF_NOP,
+
+    // E8 OP_UNUSED_E8
+    DF_NOP,
+
+    // E9 OP_UNUSED_E9
+    DF_NOP,
+
+    // EA OP_UNUSED_EA
+    DF_NOP,
+
+    // EB OP_UNUSED_EB
+    DF_NOP,
+
+    // EC OP_UNUSED_EC
+    DF_NOP,
+
+    // ED OP_THROW_VERIFICATION_ERROR
+    DF_NOP,
+
+    // EE OP_EXECUTE_INLINE
+    DF_FORMAT_35C,
+
+    // EF OP_UNUSED_EF
+    DF_NOP,
+
+    // F0 OP_INVOKE_DIRECT_EMPTY
+    DF_NOP,
+
+    // F1 OP_UNUSED_F1
+    DF_NOP,
+
+    // F2 OP_IGET_QUICK
+    DF_DA | DF_UB,
+
+    // F3 OP_IGET_WIDE_QUICK
+    DF_DA_WIDE | DF_UB,
+
+    // F4 OP_IGET_OBJECT_QUICK
+    DF_DA | DF_UB,
+
+    // F5 OP_IPUT_QUICK
+    DF_UA | DF_UB,
+
+    // F6 OP_IPUT_WIDE_QUICK
+    DF_UA_WIDE | DF_UB,
+
+    // F7 OP_IPUT_OBJECT_QUICK
+    DF_UA | DF_UB,
+
+    // F8 OP_INVOKE_VIRTUAL_QUICK
+    DF_FORMAT_35C,
+
+    // F9 OP_INVOKE_VIRTUAL_QUICK_RANGE
+    DF_FORMAT_3RC,
+
+    // FA OP_INVOKE_SUPER_QUICK
+    DF_FORMAT_35C,
+
+    // FB OP_INVOKE_SUPER_QUICK_RANGE
+    DF_FORMAT_3RC,
+
+    // FC OP_UNUSED_FC
+    DF_NOP,
+
+    // FD OP_UNUSED_FD
+    DF_NOP,
+
+    // FE OP_UNUSED_FE
+    DF_NOP,
+
+    // FF OP_UNUSED_FF
+    DF_NOP,
+
+    // Beginning of extended MIR opcodes
+    // 100 OP_MIR_PHI
+    DF_PHI | DF_DA,
+
+    /*
+     * For extended MIR inserted at the MIR2LIR stage, it is okay to have
+     * undefined values here.
+     */
+};
+
+/* Return the Dalvik register/subscript pair of a given SSA register */
+int dvmConvertSSARegToDalvik(CompilationUnit *cUnit, int ssaReg)
+{
+      return GET_ELEM_N(cUnit->ssaToDalvikMap, int, ssaReg);
+}
+
+/*
+ * Utility function to convert encoded SSA register value into Dalvik register
+ * and subscript pair. Each SSA register can be used to index the
+ * ssaToDalvikMap list to get the subscript[31..16]/dalvik_reg[15..0] mapping.
+ */
+char *dvmCompilerGetSSAString(CompilationUnit *cUnit, SSARepresentation *ssaRep)
+{
+    char buffer[256];
+    char *ret;
+    int i;
+
+    buffer[0] = 0;
+    for (i = 0; i < ssaRep->numDefs; i++) {
+        int ssa2DalvikValue = dvmConvertSSARegToDalvik(cUnit, ssaRep->defs[i]);
+
+        sprintf(buffer + strlen(buffer), "s%d(v%d_%d) ",
+                ssaRep->defs[i], DECODE_REG(ssa2DalvikValue),
+                DECODE_SUB(ssa2DalvikValue));
+    }
+
+    if (ssaRep->numDefs) {
+        strcat(buffer, "<- ");
+    }
+
+    for (i = 0; i < ssaRep->numUses; i++) {
+        int ssa2DalvikValue = dvmConvertSSARegToDalvik(cUnit, ssaRep->uses[i]);
+        int len = strlen(buffer);
+
+        if (snprintf(buffer + len, 250 - len, "s%d(v%d_%d) ",
+                     ssaRep->uses[i], DECODE_REG(ssa2DalvikValue),
+                     DECODE_SUB(ssa2DalvikValue)) >= (250 - len)) {
+            strcat(buffer, "...");
+            break;
+        }
+    }
+
+    int length = strlen(buffer) + 1;
+    ret = dvmCompilerNew(length, false);
+    memcpy(ret, buffer, length);
+    return ret;
+}
+
+/* Any register that is used before being defined is considered live-in */
+static inline void handleLiveInUse(BitVector *useV, BitVector *defV,
+                                   BitVector *liveInV, int dalvikRegId)
+{
+    dvmCompilerSetBit(useV, dalvikRegId);
+    if (!dvmIsBitSet(defV, dalvikRegId)) {
+        dvmCompilerSetBit(liveInV, dalvikRegId);
+    }
+}
+
+/* Mark a reg as being defined */
+static inline void handleLiveInDef(BitVector *defV, int dalvikRegId)
+{
+    dvmCompilerSetBit(defV, dalvikRegId);
+}
+
+/*
+ * Find out live-in variables for natural loops. Variables that are live-in in
+ * the main loop body are considered to be defined in the entry block.
+ */
+void dvmCompilerFindLiveIn(CompilationUnit *cUnit, BasicBlock *bb)
+{
+    MIR *mir;
+    BitVector *useV, *defV, *liveInV;
+
+    if (bb->blockType != DALVIK_BYTECODE &&
+        bb->blockType != ENTRY_BLOCK) {
+        return;
+    }
+
+    useV = bb->dataFlowInfo->useV =
+        dvmCompilerAllocBitVector(cUnit->method->registersSize, false);
+    defV = bb->dataFlowInfo->defV =
+        dvmCompilerAllocBitVector(cUnit->method->registersSize, false);
+    liveInV = bb->dataFlowInfo->liveInV =
+        dvmCompilerAllocBitVector(cUnit->method->registersSize, false);
+
+    for (mir = bb->firstMIRInsn; mir; mir = mir->next) {
+        int dfAttributes =
+            dvmCompilerDataFlowAttributes[mir->dalvikInsn.opCode];
+        DecodedInstruction *dInsn = &mir->dalvikInsn;
+
+        if (dfAttributes & DF_HAS_USES) {
+            if (dfAttributes & DF_UA) {
+                handleLiveInUse(useV, defV, liveInV, dInsn->vA);
+            } else if (dfAttributes & DF_UA_WIDE) {
+                handleLiveInUse(useV, defV, liveInV, dInsn->vA);
+                handleLiveInUse(useV, defV, liveInV, dInsn->vA+1);
+            }
+            if (dfAttributes & DF_UB) {
+                handleLiveInUse(useV, defV, liveInV, dInsn->vB);
+            } else if (dfAttributes & DF_UB_WIDE) {
+                handleLiveInUse(useV, defV, liveInV, dInsn->vB);
+                handleLiveInUse(useV, defV, liveInV, dInsn->vB+1);
+            }
+            if (dfAttributes & DF_UC) {
+                handleLiveInUse(useV, defV, liveInV, dInsn->vC);
+            } else if (dfAttributes & DF_UC_WIDE) {
+                handleLiveInUse(useV, defV, liveInV, dInsn->vC);
+                handleLiveInUse(useV, defV, liveInV, dInsn->vC+1);
+            }
+        }
+        if (dfAttributes & DF_HAS_DEFS) {
+            handleLiveInDef(defV, dInsn->vA);
+            if (dfAttributes & DF_DA_WIDE) {
+                handleLiveInDef(defV, dInsn->vA+1);
+            }
+        }
+    }
+}
+
+/* Find out the latest SSA register for a given Dalvik register */
+static void handleSSAUse(CompilationUnit *cUnit, int *uses, int dalvikReg,
+                         int regIndex)
+{
+    int encodedValue = cUnit->dalvikToSSAMap[dalvikReg];
+    int ssaReg = DECODE_REG(encodedValue);
+    uses[regIndex] = ssaReg;
+}
+
+/* Setup a new SSA register for a given Dalvik register */
+static void handleSSADef(CompilationUnit *cUnit, int *defs, int dalvikReg,
+                         int regIndex)
+{
+    int encodedValue = cUnit->dalvikToSSAMap[dalvikReg];
+    int ssaReg = cUnit->numSSARegs++;
+    /* Bump up the subscript */
+    int dalvikSub = DECODE_SUB(encodedValue) + 1;
+    int newD2SMapping = ENCODE_REG_SUB(ssaReg, dalvikSub);
+
+    cUnit->dalvikToSSAMap[dalvikReg] = newD2SMapping;
+
+    int newS2DMapping = ENCODE_REG_SUB(dalvikReg, dalvikSub);
+    dvmInsertGrowableList(cUnit->ssaToDalvikMap, (void *) newS2DMapping);
+
+    defs[regIndex] = ssaReg;
+}
+
+/* Loop up new SSA names for format_35c instructions */
+static void dataFlowSSAFormat35C(CompilationUnit *cUnit, MIR *mir)
+{
+    DecodedInstruction *dInsn = &mir->dalvikInsn;
+    int numUses = dInsn->vA;
+    int i;
+
+    mir->ssaRep->numUses = numUses;
+    mir->ssaRep->uses = dvmCompilerNew(sizeof(int) * numUses, false);
+
+    for (i = 0; i < numUses; i++) {
+        handleSSAUse(cUnit, mir->ssaRep->uses, dInsn->arg[i], i);
+    }
+}
+
+/* Loop up new SSA names for format_3rc instructions */
+static void dataFlowSSAFormat3RC(CompilationUnit *cUnit, MIR *mir)
+{
+    DecodedInstruction *dInsn = &mir->dalvikInsn;
+    int numUses = dInsn->vA;
+    int i;
+
+    mir->ssaRep->numUses = numUses;
+    mir->ssaRep->uses = dvmCompilerNew(sizeof(int) * numUses, false);
+
+    for (i = 0; i < numUses; i++) {
+        handleSSAUse(cUnit, mir->ssaRep->uses, dInsn->vC+i, i);
+    }
+}
+
+/* Entry function to convert a block into SSA representation */
+void dvmCompilerDoSSAConversion(CompilationUnit *cUnit, BasicBlock *bb)
+{
+    MIR *mir;
+
+    if (bb->blockType != DALVIK_BYTECODE && bb->blockType != ENTRY_BLOCK) {
+        return;
+    }
+
+    for (mir = bb->firstMIRInsn; mir; mir = mir->next) {
+        mir->ssaRep = dvmCompilerNew(sizeof(SSARepresentation), true);
+
+        int dfAttributes =
+            dvmCompilerDataFlowAttributes[mir->dalvikInsn.opCode];
+
+        int numUses = 0;
+
+        if (dfAttributes & DF_FORMAT_35C) {
+            dataFlowSSAFormat35C(cUnit, mir);
+            continue;
+        }
+
+        if (dfAttributes & DF_FORMAT_3RC) {
+            dataFlowSSAFormat3RC(cUnit, mir);
+            continue;
+        }
+
+        if (dfAttributes & DF_HAS_USES) {
+            if (dfAttributes & DF_UA) {
+                numUses++;
+            } else if (dfAttributes & DF_UA_WIDE) {
+                numUses += 2;
+            }
+            if (dfAttributes & DF_UB) {
+                numUses++;
+            } else if (dfAttributes & DF_UB_WIDE) {
+                numUses += 2;
+            }
+            if (dfAttributes & DF_UC) {
+                numUses++;
+            } else if (dfAttributes & DF_UC_WIDE) {
+                numUses += 2;
+            }
+        }
+
+        if (numUses) {
+            mir->ssaRep->numUses = numUses;
+            mir->ssaRep->uses = dvmCompilerNew(sizeof(int) * numUses, false);
+        }
+
+        int numDefs = 0;
+
+        if (dfAttributes & DF_HAS_DEFS) {
+            numDefs++;
+            if (dfAttributes & DF_DA_WIDE) {
+                numDefs++;
+            }
+        }
+
+        if (numDefs) {
+            mir->ssaRep->numDefs = numDefs;
+            mir->ssaRep->defs = dvmCompilerNew(sizeof(int) * numDefs, false);
+        }
+
+        DecodedInstruction *dInsn = &mir->dalvikInsn;
+
+        if (dfAttributes & DF_HAS_USES) {
+            numUses = 0;
+            if (dfAttributes & DF_UA) {
+                handleSSAUse(cUnit, mir->ssaRep->uses, dInsn->vA, numUses++);
+            } else if (dfAttributes & DF_UA_WIDE) {
+                handleSSAUse(cUnit, mir->ssaRep->uses, dInsn->vA, numUses++);
+                handleSSAUse(cUnit, mir->ssaRep->uses, dInsn->vA+1, numUses++);
+            }
+            if (dfAttributes & DF_UB) {
+                handleSSAUse(cUnit, mir->ssaRep->uses, dInsn->vB, numUses++);
+            } else if (dfAttributes & DF_UB_WIDE) {
+                handleSSAUse(cUnit, mir->ssaRep->uses, dInsn->vB, numUses++);
+                handleSSAUse(cUnit, mir->ssaRep->uses, dInsn->vB+1, numUses++);
+            }
+            if (dfAttributes & DF_UC) {
+                handleSSAUse(cUnit, mir->ssaRep->uses, dInsn->vC, numUses++);
+            } else if (dfAttributes & DF_UC_WIDE) {
+                handleSSAUse(cUnit, mir->ssaRep->uses, dInsn->vC, numUses++);
+                handleSSAUse(cUnit, mir->ssaRep->uses, dInsn->vC+1, numUses++);
+            }
+        }
+        if (dfAttributes & DF_HAS_DEFS) {
+            handleSSADef(cUnit, mir->ssaRep->defs, dInsn->vA, 0);
+            if (dfAttributes & DF_DA_WIDE) {
+                handleSSADef(cUnit, mir->ssaRep->defs, dInsn->vA+1, 1);
+            }
+        }
+    }
+
+    bb->dataFlowInfo->dalvikToSSAMap =
+        dvmCompilerNew(sizeof(int) * cUnit->method->registersSize, false);
+
+    /* Take a snapshot of Dalvik->SSA mapping at the end of each block */
+    memcpy(bb->dataFlowInfo->dalvikToSSAMap, cUnit->dalvikToSSAMap,
+           sizeof(int) * cUnit->method->registersSize);
+}
+
+/* Setup a constant value for opcodes thare have the DF_SETS_CONST attribute */
+static void setConstant(CompilationUnit *cUnit, int ssaReg, int value)
+{
+    dvmSetBit(cUnit->isConstantV, ssaReg);
+    cUnit->constantValues[ssaReg] = value;
+}
+
+void dvmCompilerDoConstantPropagation(CompilationUnit *cUnit, BasicBlock *bb)
+{
+    MIR *mir;
+    BitVector *isConstantV = cUnit->isConstantV;
+
+    for (mir = bb->firstMIRInsn; mir; mir = mir->next) {
+        int dfAttributes =
+            dvmCompilerDataFlowAttributes[mir->dalvikInsn.opCode];
+
+        int numUses = 0;
+
+        DecodedInstruction *dInsn = &mir->dalvikInsn;
+
+        if (!(dfAttributes & DF_HAS_DEFS)) continue;
+
+        /* Handle instructions that set up constants directly */
+        if (dfAttributes & DF_SETS_CONST) {
+            if (dfAttributes & DF_DA) {
+                switch (dInsn->opCode) {
+                    case OP_CONST_4:
+                    case OP_CONST_16:
+                    case OP_CONST:
+                        setConstant(cUnit, mir->ssaRep->defs[0], dInsn->vB);
+                        break;
+                    case OP_CONST_HIGH16:
+                        setConstant(cUnit, mir->ssaRep->defs[0],
+                                    dInsn->vB << 16);
+                        break;
+                    default:
+                        break;
+                }
+            } else if (dfAttributes & DF_DA_WIDE) {
+                switch (dInsn->opCode) {
+                    case OP_CONST_WIDE_16:
+                    case OP_CONST_WIDE_32:
+                        setConstant(cUnit, mir->ssaRep->defs[0], dInsn->vB);
+                        setConstant(cUnit, mir->ssaRep->defs[1], 0);
+                        break;
+                    case OP_CONST_WIDE:
+                        setConstant(cUnit, mir->ssaRep->defs[0],
+                                    (int) dInsn->vB_wide);
+                        setConstant(cUnit, mir->ssaRep->defs[1],
+                                    (int) (dInsn->vB_wide >> 32));
+                        break;
+                    case OP_CONST_WIDE_HIGH16:
+                        setConstant(cUnit, mir->ssaRep->defs[0], 0);
+                        setConstant(cUnit, mir->ssaRep->defs[1],
+                                    dInsn->vB << 16);
+                        break;
+                    default:
+                        break;
+                }
+            }
+        /* Handle instructions that set up constants directly */
+        } else if (dfAttributes & DF_IS_MOVE) {
+            int i;
+
+            for (i = 0; i < mir->ssaRep->numUses; i++) {
+                if (!dvmIsBitSet(isConstantV, mir->ssaRep->uses[i])) break;
+            }
+            /* Move a register holding a constant to another register */
+            if (i == mir->ssaRep->numUses) {
+                setConstant(cUnit, mir->ssaRep->defs[0],
+                            cUnit->constantValues[mir->ssaRep->uses[0]]);
+                if (dfAttributes & DF_DA_WIDE) {
+                    setConstant(cUnit, mir->ssaRep->defs[1],
+                                cUnit->constantValues[mir->ssaRep->uses[1]]);
+                }
+            }
+        }
+    }
+    /* TODO: implement code to handle arithmetic operations */
+}
+
+void dvmCompilerFindInductionVariables(struct CompilationUnit *cUnit,
+                                       struct BasicBlock *bb)
+{
+    BitVector *isIndVarV = cUnit->loopAnalysis->isIndVarV;
+    BitVector *isConstantV = cUnit->isConstantV;
+    GrowableList *ivList = cUnit->loopAnalysis->ivList;
+    MIR *mir;
+
+    if (bb->blockType != DALVIK_BYTECODE &&
+        bb->blockType != ENTRY_BLOCK) {
+        return;
+    }
+
+    /* If the bb doesn't have a phi it cannot contain an induction variable */
+    if (bb->firstMIRInsn == NULL ||
+        bb->firstMIRInsn->dalvikInsn.opCode != MIR_OP_PHI) {
+        return;
+    }
+
+    /* Find basic induction variable first */
+    for (mir = bb->firstMIRInsn; mir; mir = mir->next) {
+        int dfAttributes =
+            dvmCompilerDataFlowAttributes[mir->dalvikInsn.opCode];
+
+        if (!(dfAttributes & DF_IS_LINEAR)) continue;
+
+        /*
+         * For a basic induction variable:
+         *   1) use[0] should belong to the output of a phi node
+         *   2) def[0] should belong to the input of the same phi node
+         *   3) the value added/subtracted is a constant
+         */
+        MIR *phi;
+        for (phi = bb->firstMIRInsn; phi; phi = phi->next) {
+            if (phi->dalvikInsn.opCode != MIR_OP_PHI) break;
+
+            if (phi->ssaRep->defs[0] == mir->ssaRep->uses[0] &&
+                phi->ssaRep->uses[1] == mir->ssaRep->defs[0]) {
+                bool deltaIsConstant = false;
+                int deltaValue;
+
+                switch (mir->dalvikInsn.opCode) {
+                    case OP_ADD_INT:
+                        if (dvmIsBitSet(isConstantV,
+                                        mir->ssaRep->uses[1])) {
+                            deltaValue =
+                                cUnit->constantValues[mir->ssaRep->uses[1]];
+                            deltaIsConstant = true;
+                        }
+                        break;
+                    case OP_SUB_INT:
+                        if (dvmIsBitSet(isConstantV,
+                                        mir->ssaRep->uses[1])) {
+                            deltaValue =
+                                -cUnit->constantValues[mir->ssaRep->uses[1]];
+                            deltaIsConstant = true;
+                        }
+                        break;
+                    case OP_ADD_INT_LIT8:
+                        deltaValue = mir->dalvikInsn.vC;
+                        deltaIsConstant = true;
+                        break;
+                    default:
+                        break;
+                }
+                if (deltaIsConstant) {
+                    dvmSetBit(isIndVarV, mir->ssaRep->uses[0]);
+                    InductionVariableInfo *ivInfo =
+                        dvmCompilerNew(sizeof(InductionVariableInfo),
+                                       false);
+
+                    ivInfo->ssaReg = mir->ssaRep->uses[0];
+                    ivInfo->basicSSAReg = mir->ssaRep->uses[0];
+                    ivInfo->m = 1;         // always 1 to basic iv
+                    ivInfo->c = 0;         // N/A to basic iv
+                    ivInfo->inc = deltaValue;
+                    dvmInsertGrowableList(ivList, (void *) ivInfo);
+                    cUnit->loopAnalysis->numBasicIV++;
+                    break;
+                }
+            }
+        }
+    }
+
+    /* Find dependent induction variable now */
+    for (mir = bb->firstMIRInsn; mir; mir = mir->next) {
+        int dfAttributes =
+            dvmCompilerDataFlowAttributes[mir->dalvikInsn.opCode];
+
+        if (!(dfAttributes & DF_IS_LINEAR)) continue;
+
+        /* Skip already identified induction variables */
+        if (dvmIsBitSet(isIndVarV, mir->ssaRep->defs[0])) continue;
+
+        /*
+         * For a dependent induction variable:
+         *  1) use[0] should be an induction variable (basic/dependent)
+         *  2) operand2 should be a constant
+         */
+        if (dvmIsBitSet(isIndVarV, mir->ssaRep->uses[0])) {
+            int srcDalvikReg = dvmConvertSSARegToDalvik(cUnit,
+                                                        mir->ssaRep->uses[0]);
+            int dstDalvikReg = dvmConvertSSARegToDalvik(cUnit,
+                                                        mir->ssaRep->defs[0]);
+
+            bool cIsConstant = false;
+            int c = 0;
+
+            switch (mir->dalvikInsn.opCode) {
+                case OP_ADD_INT:
+                    if (dvmIsBitSet(isConstantV,
+                                    mir->ssaRep->uses[1])) {
+                        c = cUnit->constantValues[mir->ssaRep->uses[1]];
+                        cIsConstant = true;
+                    }
+                    break;
+                case OP_SUB_INT:
+                    if (dvmIsBitSet(isConstantV,
+                                    mir->ssaRep->uses[1])) {
+                        c = -cUnit->constantValues[mir->ssaRep->uses[1]];
+                        cIsConstant = true;
+                    }
+                    break;
+                case OP_ADD_INT_LIT8:
+                    c = mir->dalvikInsn.vC;
+                    cIsConstant = true;
+                    break;
+                default:
+                    break;
+            }
+
+            /* Ignore the update to the basic induction variable itself */
+            if (DECODE_REG(srcDalvikReg) == DECODE_REG(dstDalvikReg))  {
+                cUnit->loopAnalysis->ssaBIV = mir->ssaRep->defs[0];
+                cIsConstant = false;
+            }
+
+            if (cIsConstant) {
+                unsigned int i;
+                dvmSetBit(isIndVarV, mir->ssaRep->defs[0]);
+                InductionVariableInfo *ivInfo =
+                    dvmCompilerNew(sizeof(InductionVariableInfo),
+                                   false);
+                InductionVariableInfo *ivInfoOld = NULL ;
+
+                for (i = 0; i < ivList->numUsed; i++) {
+                    ivInfoOld = ivList->elemList[i];
+                    if (ivInfoOld->ssaReg == mir->ssaRep->uses[0]) break;
+                }
+
+                /* Guaranteed to find an element */
+                assert(i < ivList->numUsed);
+
+                ivInfo->ssaReg = mir->ssaRep->defs[0];
+                ivInfo->basicSSAReg = ivInfoOld->basicSSAReg;
+                ivInfo->m = ivInfoOld->m;
+                ivInfo->c = c + ivInfoOld->c;
+                ivInfo->inc = ivInfoOld->inc;
+                dvmInsertGrowableList(ivList, (void *) ivInfo);
+            }
+        }
+    }
+}
+
+/* Setup the basic data structures for SSA conversion */
+void dvmInitializeSSAConversion(CompilationUnit *cUnit)
+{
+    int i;
+    int numDalvikReg = cUnit->method->registersSize;
+
+    cUnit->ssaToDalvikMap = dvmCompilerNew(sizeof(GrowableList), false);
+    dvmInitGrowableList(cUnit->ssaToDalvikMap, numDalvikReg);
+
+    /*
+     * Initial number of SSA registers is equal to the number of Dalvik
+     * registers.
+     */
+    cUnit->numSSARegs = numDalvikReg;
+
+    /*
+     * Initialize the SSA2Dalvik map list. For the first numDalvikReg elements,
+     * the subscript is 0 so we use the ENCODE_REG_SUB macro to encode the value
+     * into "(0 << 16) | i"
+     */
+    for (i = 0; i < numDalvikReg; i++) {
+        dvmInsertGrowableList(cUnit->ssaToDalvikMap,
+                              (void *) ENCODE_REG_SUB(i, 0));
+    }
+
+    /*
+     * Initialize the DalvikToSSAMap map. The low 16 bit is the SSA register id,
+     * while the high 16 bit is the current subscript. The original Dalvik
+     * register N is mapped to SSA register N with subscript 0.
+     */
+    cUnit->dalvikToSSAMap = dvmCompilerNew(sizeof(int) * numDalvikReg, false);
+    for (i = 0; i < numDalvikReg; i++) {
+        cUnit->dalvikToSSAMap[i] = i;
+    }
+
+    /*
+     * Allocate the BasicBlockDataFlow structure for the entry and code blocks
+     */
+    for (i = 0; i < cUnit->numBlocks; i++) {
+        BasicBlock *bb = cUnit->blockList[i];
+        if (bb->blockType == DALVIK_BYTECODE ||
+            bb->blockType == ENTRY_BLOCK) {
+            bb->dataFlowInfo = dvmCompilerNew(sizeof(BasicBlockDataFlow), true);
+        }
+    }
+}
+
+void dvmCompilerDataFlowAnalysisDispatcher(CompilationUnit *cUnit,
+                void (*func)(CompilationUnit *, BasicBlock *))
+{
+    int i;
+    for (i = 0; i < cUnit->numBlocks; i++) {
+        BasicBlock *bb = cUnit->blockList[i];
+        (*func)(cUnit, bb);
+    }
+}
+
+/* Main entry point to do SSA conversion for non-loop traces */
+void dvmCompilerNonLoopAnalysis(CompilationUnit *cUnit)
+{
+    dvmCompilerDataFlowAnalysisDispatcher(cUnit, dvmCompilerDoSSAConversion);
+}
diff --git a/vm/compiler/Dataflow.h b/vm/compiler/Dataflow.h
new file mode 100644 (file)
index 0000000..e4ae93d
--- /dev/null
@@ -0,0 +1,107 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _DALVIK_VM_DATAFLOW
+#define _DALVIK_VM_DATAFLOW
+
+#include "Dalvik.h"
+#include "CompilerInternals.h"
+
+typedef enum DataFlowAttributePos {
+    kUA = 0,
+    kUB,
+    kUC,
+    kUAWide,
+    kUBWide,
+    kUCWide,
+    kDA,
+    kDAWide,
+    kIsMove,
+    kIsLinear,
+    kSetsConst,
+    kFormat35c,
+    kFormat3rc,
+    kPhi,
+    kNullNRangeCheck0,
+    kNullNRangeCheck1,
+    kNullNRangeCheck2,
+} DataFlowAttributes;
+
+#define DF_NOP                  0
+#define DF_UA                   (1 << kUA)
+#define DF_UB                   (1 << kUB)
+#define DF_UC                   (1 << kUC)
+#define DF_UA_WIDE              (1 << kUAWide)
+#define DF_UB_WIDE              (1 << kUBWide)
+#define DF_UC_WIDE              (1 << kUCWide)
+#define DF_DA                   (1 << kDA)
+#define DF_DA_WIDE              (1 << kDAWide)
+#define DF_IS_MOVE              (1 << kIsMove)
+#define DF_IS_LINEAR            (1 << kIsLinear)
+#define DF_SETS_CONST           (1 << kSetsConst)
+#define DF_FORMAT_35C           (1 << kFormat35c)
+#define DF_FORMAT_3RC           (1 << kFormat3rc)
+#define DF_PHI                  (1 << kPhi)
+#define DF_NULL_N_RANGE_CHECK_0 (1 << kNullNRangeCheck0)
+#define DF_NULL_N_RANGE_CHECK_1 (1 << kNullNRangeCheck1)
+#define DF_NULL_N_RANGE_CHECK_2 (1 << kNullNRangeCheck2)
+
+#define DF_HAS_USES             (DF_UA | DF_UB | DF_UC | DF_UA_WIDE | \
+                                 DF_UB_WIDE | DF_UC_WIDE)
+
+#define DF_HAS_DEFS             (DF_DA | DF_DA_WIDE)
+
+#define DF_HAS_NR_CHECKS        (DF_NULL_N_RANGE_CHECK_0 | \
+                                 DF_NULL_N_RANGE_CHECK_1 | \
+                                 DF_NULL_N_RANGE_CHECK_2)
+
+extern int dvmCompilerDataFlowAttributes[MIR_OP_LAST];
+
+typedef struct BasicBlockDataFlow {
+    BitVector *useV;
+    BitVector *defV;
+    BitVector *liveInV;
+    BitVector *phiV;
+    int *dalvikToSSAMap;
+} BasicBlockDataFlow;
+
+typedef struct SSARepresentation {
+    int numUses;
+    int *uses;
+    int numDefs;
+    int *defs;
+} SSARepresentation;
+
+typedef struct InductionVariableInfo {
+    int ssaReg;
+    int basicSSAReg;
+    int m;
+    int c;
+    int inc;
+} InductionVariableInfo;
+
+typedef struct ArrayAccessInfo {
+    int arrayReg;
+    int ivReg;
+    int maxC;                   // For DIV - will affect upper bound checking
+    int minC;                   // For DIV - will affect lower bound checking
+} ArrayAccessInfo;
+
+#define ENCODE_REG_SUB(r,s)             ((s<<16) | r)
+#define DECODE_REG(v)                   (v & 0xffff)
+#define DECODE_SUB(v)                   (((unsigned int) v) >> 16)
+
+#endif /* _DALVIK_VM_DATAFLOW */
index 6ed32ca..ec3a98d 100644 (file)
@@ -262,7 +262,7 @@ bool dvmCompileTrace(JitTraceDescription *desc, int numMaxInsts,
     methodStats = analyzeMethodBody(desc->method);
 
     cUnit.registerScoreboard.nullCheckedRegs =
-        dvmAllocBitVector(desc->method->registersSize, false);
+        dvmCompilerAllocBitVector(desc->method->registersSize, false);
 
     /* Initialize the printMe flag */
     cUnit.printMe = gDvmJit.printMe;
@@ -333,11 +333,20 @@ bool dvmCompileTrace(JitTraceDescription *desc, int numMaxInsts,
         }
     }
 
-    /* Allocate the first basic block */
-    lastBB = startBB = curBB = dvmCompilerNewBB(DALVIK_BYTECODE);
+    /* Allocate the entry block */
+    lastBB = startBB = curBB = dvmCompilerNewBB(ENTRY_BLOCK);
     curBB->startOffset = curOffset;
     curBB->id = numBlocks++;
 
+    curBB = dvmCompilerNewBB(DALVIK_BYTECODE);
+    curBB->startOffset = curOffset;
+    curBB->id = numBlocks++;
+
+    /* Make the first real dalvik block the fallthrough of the entry block */
+    startBB->fallThrough = curBB;
+    lastBB->next = curBB;
+    lastBB = curBB;
+
     if (cUnit.printMe) {
         LOGD("--------\nCompiler: Building trace for %s, offset 0x%x\n",
              desc->method->name, curOffset);
@@ -350,7 +359,7 @@ bool dvmCompileTrace(JitTraceDescription *desc, int numMaxInsts,
     while (1) {
         MIR *insn;
         int width;
-        insn = dvmCompilerNew(sizeof(MIR),false);
+        insn = dvmCompilerNew(sizeof(MIR), true);
         insn->offset = curOffset;
         width = parseInsn(codePtr, &insn->dalvikInsn, cUnit.printMe);
 
@@ -394,9 +403,9 @@ bool dvmCompileTrace(JitTraceDescription *desc, int numMaxInsts,
      */
     for (curBB = startBB; curBB; curBB = curBB->next) {
         MIR *lastInsn = curBB->lastMIRInsn;
-        /* Hit a pseudo block - exit the search now */
+        /* Skip empty blocks */
         if (lastInsn == NULL) {
-            break;
+            continue;
         }
         curOffset = lastInsn->offset;
         unsigned int targetOffset = curOffset;
@@ -437,6 +446,36 @@ bool dvmCompileTrace(JitTraceDescription *desc, int numMaxInsts,
             (lastInsn->dalvikInsn.opCode == OP_INVOKE_DIRECT_EMPTY);
 
 
+        if (curBB->taken == NULL &&
+            curBB->fallThrough == NULL &&
+            flags == (kInstrCanBranch | kInstrCanContinue) &&
+            fallThroughOffset == startBB->startOffset) {
+            BasicBlock *newBB;
+
+            if (cUnit.printMe) {
+                LOGD("Natural loop detected!");
+            }
+            newBB = dvmCompilerNewBB(EXIT_BLOCK);
+            newBB->startOffset = targetOffset;
+            newBB->id = numBlocks++;
+            newBB->needFallThroughBranch = true;
+
+            lastBB->next = newBB;
+            lastBB = newBB;
+            curBB->taken = newBB;
+            curBB->fallThrough = startBB->next;
+
+            /* Create the chaining cell */
+            newBB = dvmCompilerNewBB(CHAINING_CELL_NORMAL);
+            newBB->startOffset = targetOffset;
+            newBB->id = numBlocks++;
+
+            lastBB->fallThrough = newBB;
+            lastBB->next = newBB;
+            lastBB = newBB;
+            cUnit.hasLoop = true;
+        }
+
         /* Target block not included in the trace */
         if (curBB->taken == NULL &&
             (isInvoke || (targetOffset != curOffset))) {
@@ -537,6 +576,16 @@ bool dvmCompileTrace(JitTraceDescription *desc, int numMaxInsts,
     /* Make sure all blocks are added to the cUnit */
     assert(curBB == NULL);
 
+    /* Preparation for SSA conversion */
+    dvmInitializeSSAConversion(&cUnit);
+
+    if (cUnit.hasLoop) {
+        dvmCompilerLoopOpt(&cUnit);
+    }
+    else {
+        dvmCompilerNonLoopAnalysis(&cUnit);
+    }
+
     if (cUnit.printMe) {
         dvmCompilerDumpCompilationUnit(&cUnit);
     }
@@ -564,11 +613,8 @@ bool dvmCompileTrace(JitTraceDescription *desc, int numMaxInsts,
     /* Reset the compiler resource pool */
     dvmCompilerArenaReset();
 
-    /* Free the bit vector tracking null-checked registers */
-    dvmFreeBitVector(cUnit.registerScoreboard.nullCheckedRegs);
-
-    if (!cUnit.halveInstCount) {
     /* Success */
+    if (!cUnit.halveInstCount) {
         methodStats->nativeSize += cUnit.totalSize;
         return info->codeAddress != NULL;
 
@@ -597,15 +643,16 @@ bool dvmCompileMethod(const Method *method, JitTranslationInfo *info)
     firstBlock->id = blockID++;
 
     /* Allocate the bit-vector to track the beginning of basic blocks */
-    BitVector *bbStartAddr = dvmAllocBitVector(dexCode->insnsSize+1, false);
-    dvmSetBit(bbStartAddr, 0);
+    BitVector *bbStartAddr = dvmCompilerAllocBitVector(dexCode->insnsSize+1,
+                                                       false);
+    dvmCompilerSetBit(bbStartAddr, 0);
 
     /*
      * Sequentially go through every instruction first and put them in a single
      * basic block. Identify block boundaries at the mean time.
      */
     while (codePtr < codeEnd) {
-        MIR *insn = dvmCompilerNew(sizeof(MIR), false);
+        MIR *insn = dvmCompilerNew(sizeof(MIR), true);
         insn->offset = curOffset;
         int width = parseInsn(codePtr, &insn->dalvikInsn, false);
         bool isInvoke = false;
@@ -629,9 +676,9 @@ bool dvmCompileMethod(const Method *method, JitTranslationInfo *info)
          */
         if (findBlockBoundary(method, insn, curOffset, &target, &isInvoke,
                               &callee)) {
-            dvmSetBit(bbStartAddr, curOffset + width);
+            dvmCompilerSetBit(bbStartAddr, curOffset + width);
             if (target != curOffset) {
-                dvmSetBit(bbStartAddr, target);
+                dvmCompilerSetBit(bbStartAddr, target);
             }
         }
 
@@ -712,8 +759,6 @@ bool dvmCompileMethod(const Method *method, JitTranslationInfo *info)
         dvmAbort();
     }
 
-    dvmFreeBitVector(bbStartAddr);
-
     /* Connect the basic blocks through the taken links */
     for (i = 0; i < numBlocks; i++) {
         BasicBlock *curBB = blockList[i];
index 91b7af7..a8bfda0 100644 (file)
@@ -29,7 +29,7 @@ BasicBlock *dvmCompilerNewBB(BBType blockType)
 void dvmCompilerAppendMIR(BasicBlock *bb, MIR *mir)
 {
     if (bb->firstMIRInsn == NULL) {
-        assert(bb->firstMIRInsn == NULL);
+        assert(bb->lastMIRInsn == NULL);
         bb->lastMIRInsn = bb->firstMIRInsn = mir;
         mir->prev = mir->next = NULL;
     } else {
@@ -40,6 +40,21 @@ void dvmCompilerAppendMIR(BasicBlock *bb, MIR *mir)
     }
 }
 
+/* Insert an MIR instruction to the head of a basic block */
+void dvmCompilerPrependMIR(BasicBlock *bb, MIR *mir)
+{
+    if (bb->firstMIRInsn == NULL) {
+        assert(bb->lastMIRInsn == NULL);
+        bb->lastMIRInsn = bb->firstMIRInsn = mir;
+        mir->prev = mir->next = NULL;
+    } else {
+        bb->firstMIRInsn->prev = mir;
+        mir->next = bb->firstMIRInsn;
+        mir->prev = NULL;
+        bb->firstMIRInsn = mir;
+    }
+}
+
 /*
  * Append an LIR instruction to the LIR list maintained by a compilation
  * unit
diff --git a/vm/compiler/Loop.c b/vm/compiler/Loop.c
new file mode 100644 (file)
index 0000000..ffd5883
--- /dev/null
@@ -0,0 +1,512 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Dalvik.h"
+#include "CompilerInternals.h"
+#include "Dataflow.h"
+#include "Loop.h"
+
+#define DEBUG_LOOP(X)
+
+/*
+ * Given the current simple natural loops, the phi node placement can be
+ * determined in the following fashion:
+ *                    entry (B0)
+ *              +---v   v
+ *              |  loop body (B1)
+ *              |       v
+ *              |  loop back (B2)
+ *              +---+   v
+ *                     exit (B3)
+ *
+ *  1) Add live-ins of B1 to B0 as defs
+ *  2) The intersect of defs(B0)/defs(B1) and defs(B2)/def(B0) are the variables
+ *     that need PHI nodes in B1.
+ */
+static void handlePhiPlacement(CompilationUnit *cUnit)
+{
+    BasicBlock *entry = cUnit->blockList[0];
+    BasicBlock *loopBody = cUnit->blockList[1];
+    BasicBlock *loopBranch = cUnit->blockList[2];
+    dvmCopyBitVector(entry->dataFlowInfo->defV,
+                     loopBody->dataFlowInfo->liveInV);
+
+    BitVector *phiV = dvmCompilerAllocBitVector(cUnit->method->registersSize,
+                                                false);
+    dvmIntersectBitVectors(phiV, entry->dataFlowInfo->defV,
+                           loopBody->dataFlowInfo->defV);
+    dvmIntersectBitVectors(phiV, entry->dataFlowInfo->defV,
+                           loopBranch->dataFlowInfo->defV);
+
+    /* Insert the PHI MIRs */
+    int i;
+    for (i = 0; i < cUnit->method->registersSize; i++) {
+        if (!dvmIsBitSet(phiV, i)) {
+            continue;
+        }
+        MIR *phi = dvmCompilerNew(sizeof(MIR), true);
+        phi->dalvikInsn.opCode = MIR_OP_PHI;
+        phi->dalvikInsn.vA = i;
+        dvmCompilerPrependMIR(loopBody, phi);
+    }
+}
+
+static void fillPhiNodeContents(CompilationUnit *cUnit)
+{
+    BasicBlock *entry = cUnit->blockList[0];
+    BasicBlock *loopBody = cUnit->blockList[1];
+    BasicBlock *loopBranch = cUnit->blockList[2];
+    MIR *mir;
+
+    for (mir = loopBody->firstMIRInsn; mir; mir = mir->next) {
+        if (mir->dalvikInsn.opCode != MIR_OP_PHI) break;
+        int dalvikReg = mir->dalvikInsn.vA;
+
+        mir->ssaRep->numUses = 2;
+        mir->ssaRep->uses = dvmCompilerNew(sizeof(int) * 2, false);
+        mir->ssaRep->uses[0] =
+            DECODE_REG(entry->dataFlowInfo->dalvikToSSAMap[dalvikReg]);
+        mir->ssaRep->uses[1] =
+            DECODE_REG(loopBranch->dataFlowInfo->dalvikToSSAMap[dalvikReg]);
+    }
+
+
+}
+
+static void dumpConstants(CompilationUnit *cUnit)
+{
+    int i;
+    for (i = 0; i < cUnit->numSSARegs; i++) {
+        if (dvmIsBitSet(cUnit->isConstantV, i)) {
+            int subNReg = dvmConvertSSARegToDalvik(cUnit, i);
+            LOGE("s%d(v%d_%d) has %d", i,
+                 DECODE_REG(subNReg), DECODE_SUB(subNReg),
+                 cUnit->constantValues[i]);
+        }
+    }
+}
+
+static void dumpIVList(CompilationUnit *cUnit)
+{
+    unsigned int i;
+    GrowableList *ivList = cUnit->loopAnalysis->ivList;
+    int *ssaToDalvikMap = (int *) cUnit->ssaToDalvikMap->elemList;
+
+    for (i = 0; i < ivList->numUsed; i++) {
+        InductionVariableInfo *ivInfo = ivList->elemList[i];
+        /* Basic IV */
+        if (ivInfo->ssaReg == ivInfo->basicSSAReg) {
+            LOGE("BIV %d: s%d(v%d) + %d", i,
+                 ivInfo->ssaReg,
+                 ssaToDalvikMap[ivInfo->ssaReg] & 0xffff,
+                 ivInfo->inc);
+        /* Dependent IV */
+        } else {
+            LOGE("DIV %d: s%d(v%d) = %d * s%d(v%d) + %d", i,
+                 ivInfo->ssaReg,
+                 ssaToDalvikMap[ivInfo->ssaReg] & 0xffff,
+                 ivInfo->m,
+                 ivInfo->basicSSAReg,
+                 ssaToDalvikMap[ivInfo->basicSSAReg] & 0xffff,
+                 ivInfo->c);
+        }
+    }
+}
+
+/*
+ * A loop is considered optimizable if:
+ * 1) It has one basic induction variable
+ * 2) The loop back branch compares the BIV with a constant
+ * 3) If it is a count-up loop, the condition is GE/GT, or LE/LT/LEZ/LTZ for
+ *    a count-down loop.
+ */
+static bool isLoopOptimizable(CompilationUnit *cUnit)
+{
+    unsigned int i;
+    BasicBlock *loopBranch = cUnit->blockList[2];
+    LoopAnalysis *loopAnalysis = cUnit->loopAnalysis;
+
+    if (loopAnalysis->numBasicIV != 1) return false;
+    for (i = 0; i < loopAnalysis->ivList->numUsed; i++) {
+        InductionVariableInfo *ivInfo;
+
+        ivInfo = GET_ELEM_N(loopAnalysis->ivList, InductionVariableInfo*, i);
+        /* Count up or down loop? */
+        if (ivInfo->ssaReg == ivInfo->basicSSAReg) {
+            loopAnalysis->isCountUpLoop = ivInfo->inc > 0;
+            break;
+        }
+    }
+
+    MIR *branch = loopBranch->lastMIRInsn;
+    OpCode opCode = branch->dalvikInsn.opCode;
+
+    /*
+     * If the instruction is not accessing the IV as the first operand, return
+     * false.
+     */
+    if (branch->ssaRep->numUses == 0 || branch->ssaRep->numDefs != 0) {
+        return false;
+    }
+
+    /*
+     * If the first operand of the comparison is not the basic induction
+     * variable, return false.
+     */
+    if (branch->ssaRep->uses[0] != loopAnalysis->ssaBIV) {
+        return false;
+    }
+
+    if (loopAnalysis->isCountUpLoop) {
+        /*
+         * If the condition op is not > or >=, this is not an optimization
+         * candidate.
+         */
+        if (opCode != OP_IF_GT && opCode != OP_IF_GE) {
+            return false;
+        }
+        /*
+         * If the comparison is not between the BIV and a loop invariant,
+         * return false.
+         */
+        int endReg = dvmConvertSSARegToDalvik(cUnit, branch->ssaRep->uses[1]);
+
+        if (DECODE_SUB(endReg) != 0) {
+            return false;
+        }
+        loopAnalysis->endConditionReg = DECODE_REG(endReg);
+    } else  {
+        /*
+         * If the condition op is not < or <=, this is not an optimization
+         * candidate.
+         */
+        if (opCode == OP_IF_LT || opCode == OP_IF_LE) {
+            /*
+             * If the comparison is not between the BIV and a loop invariant,
+             * return false.
+             */
+            int endReg = dvmConvertSSARegToDalvik(cUnit,
+                                                  branch->ssaRep->uses[1]);
+
+            if (DECODE_SUB(endReg) != 0) {
+                return false;
+            }
+            loopAnalysis->endConditionReg = DECODE_REG(endReg);
+        } else if (opCode != OP_IF_LTZ && opCode != OP_IF_LEZ) {
+            return false;
+        }
+    }
+    loopAnalysis->loopBranchOpcode = opCode;
+    return true;
+}
+
+/*
+ * Record the upper and lower bound information for range checks for each
+ * induction variable. If array A is accessed by index "i+5", the upper and
+ * lower bound will be len(A)-5 and -5, respectively.
+ */
+static void updateRangeCheckInfo(CompilationUnit *cUnit, int arrayReg,
+                                 int idxReg)
+{
+    InductionVariableInfo *ivInfo;
+    LoopAnalysis *loopAnalysis = cUnit->loopAnalysis;
+    unsigned int i, j;
+
+    for (i = 0; i < loopAnalysis->ivList->numUsed; i++) {
+        ivInfo = GET_ELEM_N(loopAnalysis->ivList, InductionVariableInfo*, i);
+        if (ivInfo->ssaReg == idxReg) {
+            ArrayAccessInfo *arrayAccessInfo = NULL;
+            for (j = 0; j < loopAnalysis->arrayAccessInfo->numUsed; j++) {
+                ArrayAccessInfo *existingArrayAccessInfo =
+                    GET_ELEM_N(loopAnalysis->arrayAccessInfo,
+                               ArrayAccessInfo*,
+                               j);
+                if (existingArrayAccessInfo->arrayReg == arrayReg) {
+                    if (ivInfo->c > existingArrayAccessInfo->maxC) {
+                        existingArrayAccessInfo->maxC = ivInfo->c;
+                    }
+                    if (ivInfo->c < existingArrayAccessInfo->minC) {
+                        existingArrayAccessInfo->minC = ivInfo->c;
+                    }
+                    arrayAccessInfo = existingArrayAccessInfo;
+                    break;
+                }
+            }
+            if (arrayAccessInfo == NULL) {
+                arrayAccessInfo =
+                    dvmCompilerNew(sizeof(ArrayAccessInfo), false);
+                arrayAccessInfo->ivReg = ivInfo->basicSSAReg;
+                arrayAccessInfo->arrayReg = arrayReg;
+                arrayAccessInfo->maxC = (ivInfo->c > 0) ? ivInfo->c : 0;
+                arrayAccessInfo->minC = (ivInfo->c < 0) ? ivInfo->c : 0;
+                dvmInsertGrowableList(loopAnalysis->arrayAccessInfo,
+                                      arrayAccessInfo);
+            }
+            break;
+        }
+    }
+}
+
+/* Returns true if the loop body cannot throw any exceptions */
+static bool doLoopBodyCodeMotion(CompilationUnit *cUnit)
+{
+    BasicBlock *entry = cUnit->blockList[0];
+    BasicBlock *loopBody = cUnit->blockList[1];
+    MIR *mir;
+    bool loopBodyCanThrow = false;
+    int numDalvikRegs = cUnit->method->registersSize;
+
+    for (mir = loopBody->firstMIRInsn; mir; mir = mir->next) {
+        DecodedInstruction *dInsn = &mir->dalvikInsn;
+        int dfAttributes =
+            dvmCompilerDataFlowAttributes[mir->dalvikInsn.opCode];
+
+        /* Skip extended MIR instructions */
+        if (dInsn->opCode > 255) continue;
+
+        int instrFlags = dexGetInstrFlags(gDvm.instrFlags, dInsn->opCode);
+
+        /* Instruction is clean */
+        if ((instrFlags & kInstrCanThrow) == 0) continue;
+
+        /*
+         * Currently we can only optimize away null and range checks. Punt on
+         * instructions that can throw due to other exceptions.
+         */
+        if (!(dfAttributes & DF_HAS_NR_CHECKS)) {
+            loopBodyCanThrow = true;
+            continue;
+        }
+
+        /*
+         * This comparison is redundant now, but we will have more than one
+         * group of flags to check soon.
+         */
+        if (dfAttributes & DF_HAS_NR_CHECKS) {
+            /*
+             * Check if the null check is applied on a loop invariant register?
+             * If the register's SSA id is less than the number of Dalvik
+             * registers, then it is loop invariant.
+             */
+            int refIdx;
+            switch (dfAttributes & DF_HAS_NR_CHECKS) {
+                case DF_NULL_N_RANGE_CHECK_0:
+                    refIdx = 0;
+                    break;
+                case DF_NULL_N_RANGE_CHECK_1:
+                    refIdx = 1;
+                    break;
+                case DF_NULL_N_RANGE_CHECK_2:
+                    refIdx = 2;
+                    break;
+                default:
+                    refIdx = 0;
+                    dvmAbort();
+            }
+
+            int useIdx = refIdx + 1;
+            int subNRegArray =
+                dvmConvertSSARegToDalvik(cUnit, mir->ssaRep->uses[refIdx]);
+            int arrayReg = DECODE_REG(subNRegArray);
+            int arraySub = DECODE_SUB(subNRegArray);
+
+            /*
+             * If the register is never updated in the loop (ie subscript == 0),
+             * it is an optimization candidate.
+             */
+            if (arraySub != 0) {
+                loopBodyCanThrow = true;
+                continue;
+            }
+
+            /*
+             * Then check if the range check can be hoisted out of the loop if
+             * it is basic or dependent induction variable.
+             */
+            if (dvmIsBitSet(cUnit->loopAnalysis->isIndVarV,
+                            mir->ssaRep->uses[useIdx])) {
+                mir->OptimizationFlags |=
+                    MIR_IGNORE_RANGE_CHECK |  MIR_IGNORE_NULL_CHECK;
+                updateRangeCheckInfo(cUnit, mir->ssaRep->uses[refIdx],
+                                     mir->ssaRep->uses[useIdx]);
+            }
+        }
+    }
+
+    return !loopBodyCanThrow;
+}
+
+static void dumpHoistedChecks(CompilationUnit *cUnit)
+{
+    ArrayAccessInfo *arrayAccessInfo;
+    LoopAnalysis *loopAnalysis = cUnit->loopAnalysis;
+    unsigned int i;
+
+    for (i = 0; i < loopAnalysis->arrayAccessInfo->numUsed; i++) {
+        ArrayAccessInfo *arrayAccessInfo =
+            GET_ELEM_N(loopAnalysis->arrayAccessInfo,
+                       ArrayAccessInfo*, i);
+        int arrayReg = DECODE_REG(
+            dvmConvertSSARegToDalvik(cUnit, arrayAccessInfo->arrayReg));
+        int idxReg = DECODE_REG(
+            dvmConvertSSARegToDalvik(cUnit, arrayAccessInfo->ivReg));
+        LOGE("Array access %d", i);
+        LOGE("  arrayReg %d", arrayReg);
+        LOGE("  idxReg %d", idxReg);
+        LOGE("  endReg %d", loopAnalysis->endConditionReg);
+        LOGE("  maxC %d", arrayAccessInfo->maxC);
+        LOGE("  minC %d", arrayAccessInfo->minC);
+        LOGE("  opcode %d", loopAnalysis->loopBranchOpcode);
+    }
+}
+
+static void genHoistedChecks(CompilationUnit *cUnit)
+{
+    unsigned int i;
+    BasicBlock *entry = cUnit->blockList[0];
+    LoopAnalysis *loopAnalysis = cUnit->loopAnalysis;
+    ArrayAccessInfo *arrayAccessInfo;
+    int globalMaxC = 0;
+    int globalMinC = 0;
+    /* Should be loop invariant */
+    int idxReg = 0;
+
+    for (i = 0; i < loopAnalysis->arrayAccessInfo->numUsed; i++) {
+        ArrayAccessInfo *arrayAccessInfo =
+            GET_ELEM_N(loopAnalysis->arrayAccessInfo,
+                       ArrayAccessInfo*, i);
+        int arrayReg = DECODE_REG(
+            dvmConvertSSARegToDalvik(cUnit, arrayAccessInfo->arrayReg));
+        idxReg = DECODE_REG(
+            dvmConvertSSARegToDalvik(cUnit, arrayAccessInfo->ivReg));
+
+        MIR *rangeCheckMIR = dvmCompilerNew(sizeof(MIR), true);
+        rangeCheckMIR->dalvikInsn.opCode = (loopAnalysis->isCountUpLoop) ?
+            MIR_OP_NULL_N_RANGE_UP_CHECK : MIR_OP_NULL_N_RANGE_DOWN_CHECK;
+        rangeCheckMIR->dalvikInsn.vA = arrayReg;
+        rangeCheckMIR->dalvikInsn.vB = idxReg;
+        rangeCheckMIR->dalvikInsn.vC = loopAnalysis->endConditionReg;
+        rangeCheckMIR->dalvikInsn.arg[0] = arrayAccessInfo->maxC;
+        rangeCheckMIR->dalvikInsn.arg[1] = arrayAccessInfo->minC;
+        rangeCheckMIR->dalvikInsn.arg[2] = loopAnalysis->loopBranchOpcode;
+        dvmCompilerAppendMIR(entry, rangeCheckMIR);
+        if (arrayAccessInfo->maxC > globalMaxC) {
+            globalMaxC = arrayAccessInfo->maxC;
+        }
+        if (arrayAccessInfo->minC < globalMinC) {
+            globalMinC = arrayAccessInfo->minC;
+        }
+    }
+
+    if (loopAnalysis->arrayAccessInfo->numUsed != 0) {
+        if (loopAnalysis->isCountUpLoop) {
+            MIR *boundCheckMIR = dvmCompilerNew(sizeof(MIR), true);
+            boundCheckMIR->dalvikInsn.opCode = MIR_OP_LOWER_BOUND_CHECK;
+            boundCheckMIR->dalvikInsn.vA = idxReg;
+            boundCheckMIR->dalvikInsn.vB = globalMinC;
+            dvmCompilerAppendMIR(entry, boundCheckMIR);
+        } else {
+            if (loopAnalysis->loopBranchOpcode == OP_IF_LT ||
+                loopAnalysis->loopBranchOpcode == OP_IF_LE) {
+                MIR *boundCheckMIR = dvmCompilerNew(sizeof(MIR), true);
+                boundCheckMIR->dalvikInsn.opCode = MIR_OP_LOWER_BOUND_CHECK;
+                boundCheckMIR->dalvikInsn.vA = loopAnalysis->endConditionReg;
+                boundCheckMIR->dalvikInsn.vB = globalMinC;
+                /*
+                 * If the end condition is ">", add 1 back to the constant field
+                 * to reflect the fact that the smallest index value is
+                 * "endValue + constant + 1".
+                 */
+                if (loopAnalysis->loopBranchOpcode == OP_IF_LT) {
+                    boundCheckMIR->dalvikInsn.vB++;
+                }
+                dvmCompilerAppendMIR(entry, boundCheckMIR);
+            } else if (loopAnalysis->loopBranchOpcode == OP_IF_LTZ) {
+                /* Array index will fall below 0 */
+                if (globalMinC < 0) {
+                    MIR *boundCheckMIR = dvmCompilerNew(sizeof(MIR), true);
+                    boundCheckMIR->dalvikInsn.opCode = MIR_OP_PUNT;
+                    dvmCompilerAppendMIR(entry, boundCheckMIR);
+                }
+            } else if (loopAnalysis->loopBranchOpcode == OP_IF_LEZ) {
+                /* Array index will fall below 0 */
+                if (globalMinC < -1) {
+                    MIR *boundCheckMIR = dvmCompilerNew(sizeof(MIR), true);
+                    boundCheckMIR->dalvikInsn.opCode = MIR_OP_PUNT;
+                    dvmCompilerAppendMIR(entry, boundCheckMIR);
+                }
+            } else {
+                dvmAbort();
+            }
+        }
+
+    }
+}
+
+/* Main entry point to do loop optimization */
+void dvmCompilerLoopOpt(CompilationUnit *cUnit)
+{
+    int numDalvikReg = cUnit->method->registersSize;
+    LoopAnalysis *loopAnalysis = dvmCompilerNew(sizeof(LoopAnalysis), true);
+
+    assert(cUnit->blockList[0]->blockType == ENTRY_BLOCK);
+    assert(cUnit->blockList[2]->blockType == DALVIK_BYTECODE);
+    assert(cUnit->blockList[3]->blockType == EXIT_BLOCK);
+
+    cUnit->loopAnalysis = loopAnalysis;
+    /*
+     * Find live-in variables to the loop body so that we can fake their
+     * definitions in the entry block.
+     */
+    dvmCompilerDataFlowAnalysisDispatcher(cUnit, dvmCompilerFindLiveIn);
+
+    /* Insert phi nodes to the loop body */
+    handlePhiPlacement(cUnit);
+
+    dvmCompilerDataFlowAnalysisDispatcher(cUnit, dvmCompilerDoSSAConversion);
+    fillPhiNodeContents(cUnit);
+
+    /* Constant propagation */
+    cUnit->isConstantV = dvmAllocBitVector(cUnit->numSSARegs, false);
+    cUnit->constantValues = dvmCompilerNew(sizeof(int) * cUnit->numSSARegs,
+                                           true);
+    dvmCompilerDataFlowAnalysisDispatcher(cUnit,
+                                          dvmCompilerDoConstantPropagation);
+    DEBUG_LOOP(dumpConstants(cUnit);)
+
+    /* Find induction variables - basic and dependent */
+    loopAnalysis->ivList = dvmCompilerNew(sizeof(GrowableList), true);
+    dvmInitGrowableList(loopAnalysis->ivList, 4);
+    loopAnalysis->isIndVarV = dvmAllocBitVector(cUnit->numSSARegs, false);
+    dvmCompilerDataFlowAnalysisDispatcher(cUnit,
+                                          dvmCompilerFindInductionVariables);
+    DEBUG_LOOP(dumpIVList(cUnit);)
+
+    /* If the loop turns out to be non-optimizable, return early */
+    if (!isLoopOptimizable(cUnit))
+        return;
+
+    loopAnalysis->arrayAccessInfo = dvmCompilerNew(sizeof(GrowableList), true);
+    dvmInitGrowableList(loopAnalysis->arrayAccessInfo, 4);
+    loopAnalysis->bodyIsClean = doLoopBodyCodeMotion(cUnit);
+    DEBUG_LOOP(dumpHoistedChecks(cUnit);)
+
+    /*
+     * Convert the array access information into extended MIR code in the loop
+     * header.
+     */
+    genHoistedChecks(cUnit);
+}
diff --git a/vm/compiler/Loop.h b/vm/compiler/Loop.h
new file mode 100644 (file)
index 0000000..0de2baf
--- /dev/null
@@ -0,0 +1,37 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _DALVIK_VM_LOOP
+#define _DALVIK_VM_LOOP
+
+#include "Dalvik.h"
+#include "CompilerInternals.h"
+
+typedef struct LoopAnalysis {
+    BitVector *isIndVarV;               // length == numSSAReg
+    GrowableList *ivList;               // induction variables
+    GrowableList *arrayAccessInfo;      // hoisted checks for array accesses
+    int numBasicIV;                     // number of basic induction variables
+    int ssaBIV;                         // basic IV in SSA name
+    bool isCountUpLoop;                 // count up or down loop
+    OpCode loopBranchOpcode;            // OP_IF_XXX for the loop back branch
+    int endConditionReg;                // vB in "vA op vB"
+    LIR *branchToBody;                  // branch over to the body from entry
+    LIR *branchToPCR;                   // branch over to the PCR cell
+    bool bodyIsClean;                   // loop body cannot throw any exceptions
+} LoopAnalysis;
+
+#endif /* _DALVIK_VM_LOOP */
index 715f750..1ad1e27 100644 (file)
@@ -210,3 +210,71 @@ void dvmCompilerDumpStats(void)
          totalMethodStats.dalvikSize,
          totalMethodStats.nativeSize);
 }
+
+/*
+ * Allocate a bit vector with enough space to hold at least the specified
+ * number of bits.
+ *
+ * NOTE: this is the sister implementation of dvmAllocBitVector. In this version
+ * memory is allocated from the compiler arena.
+ */
+BitVector* dvmCompilerAllocBitVector(int startBits, bool expandable)
+{
+    BitVector* bv;
+    int count;
+
+    assert(sizeof(bv->storage[0]) == 4);        /* assuming 32-bit units */
+    assert(startBits >= 0);
+
+    bv = (BitVector*) dvmCompilerNew(sizeof(BitVector), false);
+
+    count = (startBits + 31) >> 5;
+
+    bv->storageSize = count;
+    bv->expandable = expandable;
+    bv->storage = (u4*) dvmCompilerNew(count * sizeof(u4), true);
+    return bv;
+}
+
+/*
+ * Mark the specified bit as "set".
+ *
+ * Returns "false" if the bit is outside the range of the vector and we're
+ * not allowed to expand.
+ *
+ * NOTE: this is the sister implementation of dvmSetBit. In this version
+ * memory is allocated from the compiler arena.
+ */
+bool dvmCompilerSetBit(BitVector *pBits, int num)
+{
+    assert(num >= 0);
+    if (num >= pBits->storageSize * (int)sizeof(u4) * 8) {
+        if (!pBits->expandable)
+            return false;
+
+        int newSize = (num + 31) >> 5;
+        assert(newSize > pBits->storageSize);
+        u4 *newStorage = dvmCompilerNew(newSize * sizeof(u4), false);
+        memcpy(newStorage, pBits->storage, pBits->storageSize * sizeof(u4));
+        memset(&newStorage[pBits->storageSize], 0,
+               (newSize - pBits->storageSize) * sizeof(u4));
+        pBits->storage = newStorage;
+        pBits->storageSize = newSize;
+    }
+
+    pBits->storage[num >> 5] |= 1 << (num & 0x1f);
+    return true;
+}
+
+/* FIXME */
+void dvmDebugBitVector(char *msg, const BitVector *bv, int length)
+{
+    int i;
+
+    LOGE("%s", msg);
+    for (i = 0; i < length; i++) {
+        if (dvmIsBitSet(bv, i)) {
+            LOGE("Bit %d is set", i);
+        }
+    }
+}
index c9e6bd6..28e13a5 100644 (file)
  * limitations under the License.
  */
 
-#include "../CompilerIR.h"
-
 #ifndef _DALVIK_VM_COMPILERCODEGEN_H_
 #define _DALVIK_VM_COMPILERCODEGEN_H_
 
+#include "compiler/CompilerIR.h"
+
 /* Work unit is architecture dependent */
 bool dvmCompilerDoWork(CompilerWorkOrder *work);
 
index 1a891b1..432b368 100644 (file)
  * limitations under the License.
  */
 
-#include "Dalvik.h"
-#include "compiler/CompilerInternals.h"
-
 #ifndef _DALVIK_VM_COMPILER_OPTIMIZATION_H
 #define _DALVIK_VM_COMPILER_OPTIMIZATION_H
 
+#include "Dalvik.h"
+
 /* Forward declarations */
 struct CompilationUnit;
 struct LIR;
index 3d55abd..ab46b44 100644 (file)
@@ -204,8 +204,16 @@ static void dumpLIRInsn(LIR *arg, unsigned char *baseAddr)
     u2 *cPtr = (u2*)baseAddr;
     /* Handle pseudo-ops individually, and all regular insns as a group */
     switch(lir->opCode) {
+        case ARM_PSEUDO_EXTENDED_MIR:
+            /* intentional fallthrough */
+        case ARM_PSEUDO_SSA_REP:
+            LOGD("-------- %s\n", (char *) dest);
+            break;
         case ARM_PSEUDO_TARGET_LABEL:
             break;
+        case ARM_PSEUDO_CHAINING_CELL_BACKWARD_BRANCH:
+            LOGD("-------- chaining cell (backward branch): 0x%04x\n", dest);
+            break;
         case ARM_PSEUDO_CHAINING_CELL_NORMAL:
             LOGD("-------- chaining cell (normal): 0x%04x\n", dest);
             break;
@@ -220,13 +228,16 @@ static void dumpLIRInsn(LIR *arg, unsigned char *baseAddr)
                  ((Method *)dest)->name,
                  ((Method *)dest)->insns);
             break;
-        case ARM_PSEUDO_CHAINING_CELL_BACKWARD_BRANCH:
-            LOGD("-------- chaining cell (backward branch): 0x%04x\n", dest);
+        case ARM_PSEUDO_ENTRY_BLOCK:
+            LOGD("-------- entry offset: 0x%04x\n", dest);
             break;
         case ARM_PSEUDO_DALVIK_BYTECODE_BOUNDARY:
             LOGD("-------- dalvik offset: 0x%04x @ %s\n", dest,
                    getOpcodeName(lir->operands[1]));
             break;
+        case ARM_PSEUDO_EXIT_BLOCK:
+            LOGD("-------- exit offset: 0x%04x\n", dest);
+            break;
         case ARM_PSEUDO_ALIGN4:
             LOGD("%p (%04x): .align4\n", baseAddr + offset, offset);
             break;
index 001486d..87978d8 100644 (file)
@@ -220,6 +220,10 @@ typedef enum ArmConditionCode {
  * Assemble.c.
  */
 typedef enum ArmOpCode {
+    ARM_PSEUDO_EXTENDED_MIR = -16,
+    ARM_PSEUDO_SSA_REP = -15,
+    ARM_PSEUDO_ENTRY_BLOCK = -14,
+    ARM_PSEUDO_EXIT_BLOCK = -13,
     ARM_PSEUDO_TARGET_LABEL = -12,
     ARM_PSEUDO_CHAINING_CELL_BACKWARD_BRANCH = -11,
     ARM_PSEUDO_CHAINING_CELL_HOT = -10,
index d9a29e8..d142ca0 100644 (file)
@@ -24,6 +24,7 @@
  * applicable directory below this one.
  */
 
+#include "compiler/Loop.h"
 
 /* Array holding the entry offset of each template relative to the first one */
 static intptr_t templateEntryOffsets[TEMPLATE_LAST_MARK];
@@ -853,10 +854,22 @@ static void genArrayGet(CompilationUnit *cUnit, MIR *mir, OpSize size,
     loadValue(cUnit, vIndex, reg3);
 
     /* null object? */
-    ArmLIR * pcrLabel = genNullCheck(cUnit, vArray, reg2, mir->offset, NULL);
-    loadWordDisp(cUnit, reg2, lenOffset, reg0);  /* Get len */
-    opRegImm(cUnit, OP_ADD, reg2, dataOffset, rNone); /* reg2 -> array data */
-    genBoundsCheck(cUnit, reg3, reg0, mir->offset, pcrLabel);
+    ArmLIR * pcrLabel = NULL;
+
+    if (!(mir->OptimizationFlags & MIR_IGNORE_NULL_CHECK)) {
+        pcrLabel = genNullCheck(cUnit, vArray, reg2, mir->offset, NULL);
+    }
+
+    if (!(mir->OptimizationFlags & MIR_IGNORE_RANGE_CHECK)) {
+        /* Get len */
+        loadWordDisp(cUnit, reg2, lenOffset, reg0);
+        /* reg2 -> array data */
+        opRegImm(cUnit, OP_ADD, reg2, dataOffset, rNone);
+        genBoundsCheck(cUnit, reg3, reg0, mir->offset, pcrLabel);
+    } else {
+        /* reg2 -> array data */
+        opRegImm(cUnit, OP_ADD, reg2, dataOffset, rNone);
+    }
 #if !defined(WITH_SELF_VERIFICATION)
     if ((size == LONG) || (size == DOUBLE)) {
         //TUNING: redo.  Make specific wide routine, perhaps use ldmia/fp regs
@@ -933,11 +946,23 @@ static void genArrayPut(CompilationUnit *cUnit, MIR *mir, OpSize size,
     loadValue(cUnit, vIndex, reg3);
 
     /* null object? */
-    ArmLIR * pcrLabel = genNullCheck(cUnit, vArray, reg2, mir->offset,
-                                         NULL);
-    loadWordDisp(cUnit, reg2, lenOffset, reg0);  /* Get len */
-    opRegImm(cUnit, OP_ADD, reg2, dataOffset, rNone); /* reg2 -> array data */
-    genBoundsCheck(cUnit, reg3, reg0, mir->offset, pcrLabel);
+    ArmLIR * pcrLabel = NULL;
+
+    if (!(mir->OptimizationFlags & MIR_IGNORE_NULL_CHECK)) {
+        pcrLabel = genNullCheck(cUnit, vArray, reg2, mir->offset, NULL);
+    }
+
+    if (!(mir->OptimizationFlags & MIR_IGNORE_RANGE_CHECK)) {
+        /* Get len */
+        loadWordDisp(cUnit, reg2, lenOffset, reg0);
+        /* reg2 -> array data */
+        opRegImm(cUnit, OP_ADD, reg2, dataOffset, rNone);
+        genBoundsCheck(cUnit, reg3, reg0, mir->offset, pcrLabel);
+    } else {
+        /* reg2 -> array data */
+        opRegImm(cUnit, OP_ADD, reg2, dataOffset, rNone);
+    }
+
     /* at this point, reg2 points to array, reg3 is unscaled index */
 #if !defined(WITH_SELF_VERIFICATION)
     if ((size == LONG) || (size == DOUBLE)) {
@@ -3414,7 +3439,183 @@ static void handlePCReconstruction(CompilationUnit *cUnit,
     }
 }
 
-/* Entry function to invoke the backend of the JIT compiler */
+static char *extendedMIROpNames[MIR_OP_LAST - MIR_OP_FIRST] = {
+    "MIR_OP_PHI",
+    "MIR_OP_NULL_N_RANGE_UP_CHECK",
+    "MIR_OP_NULL_N_RANGE_DOWN_CHECK",
+    "MIR_OP_LOWER_BOUND_CHECK",
+    "MIR_OP_PUNT",
+};
+
+/*
+ * vA = arrayReg;
+ * vB = idxReg;
+ * vC = endConditionReg;
+ * arg[0] = maxC
+ * arg[1] = minC
+ * arg[2] = loopBranchConditionCode
+ */
+static void genHoistedChecksForCountUpLoop(CompilationUnit *cUnit, MIR *mir)
+{
+    DecodedInstruction *dInsn = &mir->dalvikInsn;
+    const int lenOffset = offsetof(ArrayObject, length);
+    const int regArray = 0;
+    const int regIdxEnd = NEXT_REG(regArray);
+    const int regLength = regArray;
+    const int maxC = dInsn->arg[0];
+    const int minC = dInsn->arg[1];
+
+    /* regArray <- arrayRef */
+    loadValue(cUnit, mir->dalvikInsn.vA, regArray);
+    loadValue(cUnit, mir->dalvikInsn.vC, regIdxEnd);
+    genRegImmCheck(cUnit, ARM_COND_EQ, regArray, 0, 0,
+                   (ArmLIR *) cUnit->loopAnalysis->branchToPCR);
+
+    /* regLength <- len(arrayRef) */
+    loadWordDisp(cUnit, regArray, lenOffset, regLength);
+
+    int delta = maxC;
+    /*
+     * If the loop end condition is ">=" instead of ">", then the largest value
+     * of the index is "endCondition - 1".
+     */
+    if (dInsn->arg[2] == OP_IF_GE) {
+        delta--;
+    }
+
+    if (delta) {
+        opRegImm(cUnit, OP_ADD, regIdxEnd, delta, regIdxEnd);
+    }
+    /* Punt if "regIdxEnd < len(Array)" is false */
+    insertRegRegCheck(cUnit, ARM_COND_GE, regIdxEnd, regLength, 0,
+                      (ArmLIR *) cUnit->loopAnalysis->branchToPCR);
+}
+
+/*
+ * vA = arrayReg;
+ * vB = idxReg;
+ * vC = endConditionReg;
+ * arg[0] = maxC
+ * arg[1] = minC
+ * arg[2] = loopBranchConditionCode
+ */
+static void genHoistedChecksForCountDownLoop(CompilationUnit *cUnit, MIR *mir)
+{
+    DecodedInstruction *dInsn = &mir->dalvikInsn;
+    const int lenOffset = offsetof(ArrayObject, length);
+    const int regArray = 0;
+    const int regIdxInit = NEXT_REG(regArray);
+    const int regLength = regArray;
+    const int maxC = dInsn->arg[0];
+    const int minC = dInsn->arg[1];
+
+    /* regArray <- arrayRef */
+    loadValue(cUnit, mir->dalvikInsn.vA, regArray);
+    loadValue(cUnit, mir->dalvikInsn.vB, regIdxInit);
+    genRegImmCheck(cUnit, ARM_COND_EQ, regArray, 0, 0,
+                   (ArmLIR *) cUnit->loopAnalysis->branchToPCR);
+
+    /* regLength <- len(arrayRef) */
+    loadWordDisp(cUnit, regArray, lenOffset, regLength);
+
+    if (maxC) {
+        opRegImm(cUnit, OP_ADD, regIdxInit, maxC, regIdxInit);
+    }
+
+    /* Punt if "regIdxInit < len(Array)" is false */
+    insertRegRegCheck(cUnit, ARM_COND_GE, regIdxInit, regLength, 0,
+                      (ArmLIR *) cUnit->loopAnalysis->branchToPCR);
+}
+
+/*
+ * vA = idxReg;
+ * vB = minC;
+ */
+static void genHoistedLowerBoundCheck(CompilationUnit *cUnit, MIR *mir)
+{
+    DecodedInstruction *dInsn = &mir->dalvikInsn;
+    const int regIdx = 0;
+    const int minC = dInsn->vB;
+
+    /* regIdx <- initial index value */
+    loadValue(cUnit, mir->dalvikInsn.vA, regIdx);
+
+    /* Punt if "regIdxInit + minC >= 0" is false */
+    genRegImmCheck(cUnit, ARM_COND_LT, regIdx, -minC, 0,
+                   (ArmLIR *) cUnit->loopAnalysis->branchToPCR);
+}
+
+/* Extended MIR instructions like PHI */
+static void handleExtendedMIR(CompilationUnit *cUnit, MIR *mir)
+{
+    int opOffset = mir->dalvikInsn.opCode - MIR_OP_FIRST;
+    char *msg = dvmCompilerNew(strlen(extendedMIROpNames[opOffset]) + 1,
+                               false);
+    strcpy(msg, extendedMIROpNames[opOffset]);
+    newLIR1(cUnit, ARM_PSEUDO_EXTENDED_MIR, (int) msg);
+
+    switch (mir->dalvikInsn.opCode) {
+        case MIR_OP_PHI: {
+            char *ssaString = dvmCompilerGetSSAString(cUnit, mir->ssaRep);
+            newLIR1(cUnit, ARM_PSEUDO_SSA_REP, (int) ssaString);
+            break;
+        }
+        case MIR_OP_NULL_N_RANGE_UP_CHECK: {
+            genHoistedChecksForCountUpLoop(cUnit, mir);
+            break;
+        }
+        case MIR_OP_NULL_N_RANGE_DOWN_CHECK: {
+            genHoistedChecksForCountDownLoop(cUnit, mir);
+            break;
+        }
+        case MIR_OP_LOWER_BOUND_CHECK: {
+            genHoistedLowerBoundCheck(cUnit, mir);
+            break;
+        }
+        case MIR_OP_PUNT: {
+            genUnconditionalBranch(cUnit,
+                                   (ArmLIR *) cUnit->loopAnalysis->branchToPCR);
+            break;
+        }
+        default:
+            break;
+    }
+}
+
+/*
+ * Create a PC-reconstruction cell for the starting offset of this trace.
+ * Since the PCR cell is placed near the end of the compiled code which is
+ * usually out of range for a conditional branch, we put two branches (one
+ * branch over to the loop body and one layover branch to the actual PCR) at the
+ * end of the entry block.
+ */
+static void setupLoopEntryBlock(CompilationUnit *cUnit, BasicBlock *entry,
+                                ArmLIR *bodyLabel)
+{
+    /* Set up the place holder to reconstruct this Dalvik PC */
+    ArmLIR *pcrLabel = dvmCompilerNew(sizeof(ArmLIR), true);
+    pcrLabel->opCode = ARM_PSEUDO_PC_RECONSTRUCTION_CELL;
+    pcrLabel->operands[0] =
+        (int) (cUnit->method->insns + entry->startOffset);
+    pcrLabel->operands[1] = entry->startOffset;
+    /* Insert the place holder to the growable list */
+    dvmInsertGrowableList(&cUnit->pcReconstructionList, pcrLabel);
+
+    /*
+     * Next, create two branches - one branch over to the loop body and the
+     * other branch to the PCR cell to punt.
+     */
+    ArmLIR *branchToBody = dvmCompilerNew(sizeof(ArmLIR), true);
+    branchToBody->opCode = THUMB_B_UNCOND;
+    branchToBody->generic.target = (LIR *) bodyLabel;
+    cUnit->loopAnalysis->branchToBody = (LIR *) branchToBody;
+
+    ArmLIR *branchToPCR = dvmCompilerNew(sizeof(ArmLIR), true);
+    branchToPCR->opCode = THUMB_B_UNCOND;
+    branchToPCR->generic.target = (LIR *) pcrLabel;
+    cUnit->loopAnalysis->branchToPCR = (LIR *) branchToPCR;
+}
+
 void dvmCompilerMIR2LIR(CompilationUnit *cUnit)
 {
     /* Used to hold the labels of each block */
@@ -3481,7 +3682,18 @@ void dvmCompilerMIR2LIR(CompilationUnit *cUnit)
             dvmCompilerAppendLIR(cUnit, (LIR *) &labelList[i]);
         }
 
-        if (blockList[i]->blockType == DALVIK_BYTECODE) {
+        if (blockList[i]->blockType == ENTRY_BLOCK) {
+            labelList[i].opCode = ARM_PSEUDO_ENTRY_BLOCK;
+            if (blockList[i]->firstMIRInsn == NULL) {
+                continue;
+            } else {
+              setupLoopEntryBlock(cUnit, blockList[i],
+                                  &labelList[blockList[i]->fallThrough->id]);
+            }
+        } else if (blockList[i]->blockType == EXIT_BLOCK) {
+            labelList[i].opCode = ARM_PSEUDO_EXIT_BLOCK;
+            goto gen_fallthrough;
+        } else if (blockList[i]->blockType == DALVIK_BYTECODE) {
             labelList[i].opCode = ARM_PSEUDO_NORMAL_BLOCK_LABEL;
             /* Reset the register state */
             resetRegisterScoreboard(cUnit);
@@ -3554,16 +3766,27 @@ void dvmCompilerMIR2LIR(CompilationUnit *cUnit)
         ArmLIR *headLIR = NULL;
 
         for (mir = blockList[i]->firstMIRInsn; mir; mir = mir->next) {
+            if (mir->dalvikInsn.opCode >= MIR_OP_FIRST) {
+                handleExtendedMIR(cUnit, mir);
+                continue;
+            }
+
             OpCode dalvikOpCode = mir->dalvikInsn.opCode;
             InstructionFormat dalvikFormat =
                 dexGetInstrFormat(gDvm.instrFormat, dalvikOpCode);
             ArmLIR *boundaryLIR =
                 newLIR2(cUnit, ARM_PSEUDO_DALVIK_BYTECODE_BOUNDARY,
-                        mir->offset,dalvikOpCode);
+                        mir->offset, dalvikOpCode);
+            if (mir->ssaRep) {
+                char *ssaString = dvmCompilerGetSSAString(cUnit, mir->ssaRep);
+                newLIR1(cUnit, ARM_PSEUDO_SSA_REP, (int) ssaString);
+            }
+
             /* Remember the first LIR for this block */
             if (headLIR == NULL) {
                 headLIR = boundaryLIR;
             }
+
             bool notHandled;
             /*
              * Debugging: screen the opcode first to see if it is in the
@@ -3675,9 +3898,24 @@ void dvmCompilerMIR2LIR(CompilationUnit *cUnit)
                 break;
             }
         }
-        /* Eliminate redundant loads/stores and delay stores into later slots */
-        dvmCompilerApplyLocalOptimizations(cUnit, (LIR *) headLIR,
-                                           cUnit->lastLIRInsn);
+
+        if (blockList[i]->blockType == ENTRY_BLOCK) {
+            dvmCompilerAppendLIR(cUnit,
+                                 (LIR *) cUnit->loopAnalysis->branchToBody);
+            dvmCompilerAppendLIR(cUnit,
+                                 (LIR *) cUnit->loopAnalysis->branchToPCR);
+        }
+
+        if (headLIR) {
+            /*
+             * Eliminate redundant loads/stores and delay stores into later
+             * slots
+             */
+            dvmCompilerApplyLocalOptimizations(cUnit, (LIR *) headLIR,
+                                               cUnit->lastLIRInsn);
+        }
+
+gen_fallthrough:
         /*
          * Check if the block is terminated due to trace length constraint -
          * insert an unconditional branch to the chaining cell.