From d7d426a1d746f70edeaeccf77886f3ad8298e28c Mon Sep 17 00:00:00 2001 From: Ben Cheng Date: Tue, 22 Sep 2009 11:23:36 -0700 Subject: [PATCH] Implemented a new scheduler and FP register allocator. Improved performance by 50% over existing JIT for some FP benchmarks. --- vm/compiler/Compiler.c | 5 - vm/compiler/CompilerUtility.h | 5 +- vm/compiler/codegen/Optimizer.h | 2 + vm/compiler/codegen/arm/ArchUtility.c | 72 +++++-- vm/compiler/codegen/arm/ArmLIR.h | 35 ++-- vm/compiler/codegen/arm/Assemble.c | 44 ++--- vm/compiler/codegen/arm/Codegen.c | 122 +++++++++++- vm/compiler/codegen/arm/Codegen.h | 6 +- vm/compiler/codegen/arm/LocalOptimizations.c | 228 +++++++++++++--------- vm/compiler/codegen/arm/Thumb2Util.c | 156 +++++++++++---- vm/compiler/codegen/arm/ThumbUtil.c | 41 ++-- vm/compiler/codegen/arm/armv5te-vfp/ArchVariant.c | 2 +- vm/compiler/codegen/arm/armv5te/ArchVariant.c | 2 +- vm/compiler/codegen/arm/armv7-a/ArchVariant.c | 87 ++++++--- 14 files changed, 575 insertions(+), 232 deletions(-) diff --git a/vm/compiler/Compiler.c b/vm/compiler/Compiler.c index 47881cadd..39a988d94 100644 --- a/vm/compiler/Compiler.c +++ b/vm/compiler/Compiler.c @@ -227,11 +227,6 @@ bool dvmCompilerStartup(void) /* Track method-level compilation statistics */ gDvmJit.methodStatsTable = dvmHashTableCreate(32, NULL); - /* - * FIXME - temporarily disable optimizations for this intermediate checkin - */ - gDvmJit.disableOpt = 0x3; - dvmUnlockMutex(&gDvmJit.compilerLock); return true; diff --git a/vm/compiler/CompilerUtility.h b/vm/compiler/CompilerUtility.h index c72a8da34..409de2967 100644 --- a/vm/compiler/CompilerUtility.h +++ b/vm/compiler/CompilerUtility.h @@ -40,11 +40,14 @@ typedef struct GrowableList { #define GET_ELEM_N(LIST, TYPE, N) (((TYPE*) LIST->elemList)[N]) +struct LIR; + void dvmInitGrowableList(GrowableList *gList, size_t initLength); void dvmInsertGrowableList(GrowableList *gList, void *elem); - BitVector* dvmCompilerAllocBitVector(int startBits, bool expandable); bool dvmCompilerSetBit(BitVector* pBits, int num); void dvmDebugBitVector(char *msg, const BitVector *bv, int length); +void dvmDumpLIRInsn(struct LIR *lir, unsigned char *baseAddr); +void dvmDumpResourceMask(struct LIR *lir, u8 mask, const char *prefix); #endif /* _DALVIK_COMPILER_UTILITY */ diff --git a/vm/compiler/codegen/Optimizer.h b/vm/compiler/codegen/Optimizer.h index 0931df3d0..487e8648e 100644 --- a/vm/compiler/codegen/Optimizer.h +++ b/vm/compiler/codegen/Optimizer.h @@ -44,6 +44,8 @@ typedef struct RegisterScoreboard { int nativeReg; // And the mapped native register int nativeRegHi; // And the mapped native register bool isWide; // Whether a pair of registers are alive + int fp[32]; // Track the Dalvik register held in a SFP reg + int nextFP; // Next index for FP register allocation } RegisterScoreboard; void dvmCompilerApplyLocalOptimizations(struct CompilationUnit *cUnit, diff --git a/vm/compiler/codegen/arm/ArchUtility.c b/vm/compiler/codegen/arm/ArchUtility.c index 551e1f753..d1f9870ff 100644 --- a/vm/compiler/codegen/arm/ArchUtility.c +++ b/vm/compiler/codegen/arm/ArchUtility.c @@ -110,9 +110,6 @@ static void buildInsnString(char *fmt, ArmLIR *lir, char* buf, case 'd': sprintf(tbuf,"%d", operand); break; - case 'D': - sprintf(tbuf,"%d", operand+8); - break; case 'E': sprintf(tbuf,"%d", operand*4); break; @@ -193,8 +190,49 @@ static void buildInsnString(char *fmt, ArmLIR *lir, char* buf, *buf = 0; } +void dvmDumpResourceMask(LIR *lir, u8 mask, const char *prefix) +{ + char buf[256]; + buf[0] = 0; + ArmLIR *armLIR = (ArmLIR *) lir; + + if (mask == ENCODE_ALL) { + strcpy(buf, "all"); + } else { + char num[8]; + int i; + + for (i = 0; i < kRegEnd; i++) { + if (mask & (1ULL << i)) { + sprintf(num, "%d ", i); + strcat(buf, num); + } + } + + if (mask & ENCODE_CCODE) { + strcat(buf, "cc "); + } + if (mask & ENCODE_FP_STATUS) { + strcat(buf, "fpcc "); + } + if (armLIR && (mask & ENCODE_DALVIK_REG)) { + sprintf(buf + strlen(buf), "dr%d%s", armLIR->aliasInfo & 0xffff, + (armLIR->aliasInfo & 0x80000000) ? "(+1)" : ""); + } + } + if (buf[0]) { + LOGD("%s: %s", prefix, buf); + } +} + +/* + * Debugging macros + */ +#define DUMP_RESOURCE_MASK(X) +#define DUMP_SSA_REP(X) + /* Pretty-print a LIR instruction */ -static void dumpLIRInsn(LIR *arg, unsigned char *baseAddr) +void dvmDumpLIRInsn(LIR *arg, unsigned char *baseAddr) { ArmLIR *lir = (ArmLIR *) arg; char buf[256]; @@ -202,15 +240,17 @@ static void dumpLIRInsn(LIR *arg, unsigned char *baseAddr) int offset = lir->generic.offset; int dest = lir->operands[0]; u2 *cPtr = (u2*)baseAddr; + const bool dumpNop = false; + /* Handle pseudo-ops individually, and all regular insns as a group */ switch(lir->opCode) { - case ARM_PSEUDO_IT_BOTTOM: - LOGD("-------- IT_Bottom"); + case ARM_PSEUDO_BARRIER: + LOGD("-------- BARRIER"); break; case ARM_PSEUDO_EXTENDED_MIR: /* intentional fallthrough */ case ARM_PSEUDO_SSA_REP: - LOGD("-------- %s\n", (char *) dest); + DUMP_SSA_REP(LOGD("-------- %s\n", (char *) dest)); break; case ARM_PSEUDO_TARGET_LABEL: break; @@ -258,17 +298,27 @@ static void dumpLIRInsn(LIR *arg, unsigned char *baseAddr) LOGD("L%#06x:\n", dest); break; default: - if (lir->isNop) { + if (lir->isNop && !dumpNop) { break; } buildInsnString(EncodingMap[lir->opCode].name, lir, opName, baseAddr, 256); buildInsnString(EncodingMap[lir->opCode].fmt, lir, buf, baseAddr, 256); - LOGD("%p (%04x): %-8s%s\n", - baseAddr + offset, offset, opName, buf); + LOGD("%p (%04x): %-8s%s%s\n", + baseAddr + offset, offset, opName, buf, + lir->isNop ? "(nop)" : ""); break; } + + if (lir->useMask && (!lir->isNop || dumpNop)) { + DUMP_RESOURCE_MASK(dvmDumpResourceMask((LIR *) lir, + lir->useMask, "use")); + } + if (lir->defMask && (!lir->isNop || dumpNop)) { + DUMP_RESOURCE_MASK(dvmDumpResourceMask((LIR *) lir, + lir->defMask, "def")); + } } /* Dump instructions and constant pool contents */ @@ -281,7 +331,7 @@ void dvmCompilerCodegenDump(CompilationUnit *cUnit) LOGD("installed code is at %p\n", cUnit->baseAddr); LOGD("total size is %d bytes\n", cUnit->totalSize); for (lirInsn = cUnit->firstLIRInsn; lirInsn; lirInsn = lirInsn->next) { - dumpLIRInsn(lirInsn, cUnit->baseAddr); + dvmDumpLIRInsn(lirInsn, cUnit->baseAddr); } for (lirInsn = cUnit->wordList; lirInsn; lirInsn = lirInsn->next) { armLIR = (ArmLIR *) lirInsn; diff --git a/vm/compiler/codegen/arm/ArmLIR.h b/vm/compiler/codegen/arm/ArmLIR.h index 20fb6bf66..8fedda24c 100644 --- a/vm/compiler/codegen/arm/ArmLIR.h +++ b/vm/compiler/codegen/arm/ArmLIR.h @@ -66,6 +66,7 @@ /* Offset to distinguish DP FP regs */ #define FP_DOUBLE 64 /* Reg types */ +#define REGTYPE(x) (x & (FP_REG_OFFSET | FP_DOUBLE)) #define FPREG(x) ((x & FP_REG_OFFSET) == FP_REG_OFFSET) #define LOWREG(x) ((x & 0x7) == x) #define DOUBLEREG(x) ((x & FP_DOUBLE) == FP_DOUBLE) @@ -83,21 +84,23 @@ typedef enum ResourceEncodingPos { kRegLR = 14, kRegPC = 15, kFPReg0 = 16, - kITBlock = 48, - kCCode = 49, - kFPStatus = 50, + kRegEnd = 48, + kCCode = kRegEnd, + kFPStatus, + kDalvikReg, } ResourceEncodingPos; -#define ENCODE_GP_REG(N) (1ULL << N) #define ENCODE_REG_LIST(N) ((u8) N) #define ENCODE_REG_SP (1ULL << kRegSP) #define ENCODE_REG_LR (1ULL << kRegLR) #define ENCODE_REG_PC (1ULL << kRegPC) -#define ENCODE_SFP_REG(N) (1ULL << (N - FP_REG_OFFSET + kFPReg0)) -#define ENCODE_DFP_REG(N) (3ULL << (((N - FP_DOUBLE) << 1) + kFPReg0)) -#define ENCODE_IT_BLOCK (1ULL << kITBlock) #define ENCODE_CCODE (1ULL << kCCode) #define ENCODE_FP_STATUS (1ULL << kFPStatus) +#define ENCODE_DALVIK_REG (1ULL << kDalvikReg) +#define ENCODE_ALL (~0ULL) + +#define DECODE_ALIAS_INFO_REG(X) (X & 0xffff) +#define DECODE_ALIAS_INFO_WIDE(X) ((X & 0x80000000) ? 1 : 0) typedef enum OpSize { WORD, @@ -240,7 +243,7 @@ typedef enum ArmConditionCode { * Assemble.c. */ typedef enum ArmOpCode { - ARM_PSEUDO_IT_BOTTOM = -17, + ARM_PSEUDO_BARRIER = -17, ARM_PSEUDO_EXTENDED_MIR = -16, ARM_PSEUDO_SSA_REP = -15, ARM_PSEUDO_ENTRY_BLOCK = -14, @@ -532,6 +535,7 @@ typedef enum ArmOpFeatureFlags { kRegDef0, kRegDef1, kRegDefSP, + kRegDefLR, kRegDefList0, kRegDefList1, kRegUse0, @@ -555,13 +559,14 @@ typedef enum ArmOpFeatureFlags { #define REG_DEF0 (1 << kRegDef0) #define REG_DEF1 (1 << kRegDef1) #define REG_DEF_SP (1 << kRegDefSP) +#define REG_DEF_LR (1 << kRegDefLR) #define REG_DEF_LIST0 (1 << kRegDefList0) #define REG_DEF_LIST1 (1 << kRegDefList1) #define REG_USE0 (1 << kRegUse0) #define REG_USE1 (1 << kRegUse1) #define REG_USE2 (1 << kRegUse2) -#define REG_USE_PC (1 << kRegUsePC) #define REG_USE_SP (1 << kRegUseSP) +#define REG_USE_PC (1 << kRegUsePC) #define REG_USE_LIST0 (1 << kRegUseList0) #define REG_USE_LIST1 (1 << kRegUseList1) #define NO_OPERAND (1 << kNoOperand) @@ -574,11 +579,14 @@ typedef enum ArmOpFeatureFlags { #define USES_CCODES (1 << kUsesCCodes) /* Common combo register usage patterns */ -#define REG_DEF0_USE1 (REG_DEF0 | REG_USE1) -#define REG_DEF0_USE01 (REG_DEF0 | REG_USE0 | REG_USE1) -#define REG_DEF0_USE12 (REG_DEF0 | REG_USE1 | REG_USE2) #define REG_USE01 (REG_USE0 | REG_USE1) -#define REG_USE012 (REG_USE0 | REG_USE1 | REG_USE2) +#define REG_USE012 (REG_USE01 | REG_USE2) +#define REG_USE12 (REG_USE1 | REG_USE2) +#define REG_DEF0_USE0 (REG_DEF0 | REG_USE0) +#define REG_DEF0_USE1 (REG_DEF0 | REG_USE1) +#define REG_DEF0_USE01 (REG_DEF0 | REG_USE01) +#define REG_DEF0_USE12 (REG_DEF0 | REG_USE12) +#define REG_DEF01_USE2 (REG_DEF0 | REG_DEF1 | REG_USE2) /* Instruction assembly fieldLoc kind */ typedef enum ArmEncodingKind { @@ -635,6 +643,7 @@ typedef struct ArmLIR { bool isNop; // LIR is optimized away int age; // default is 0, set lazily by the optimizer int size; // 16-bit unit size (1 for thumb, 1 or 2 for thumb2) + int aliasInfo; // For Dalvik register access disambiguation u8 useMask; // Resource mask for use u8 defMask; // Resource mask for def } ArmLIR; diff --git a/vm/compiler/codegen/arm/Assemble.c b/vm/compiler/codegen/arm/Assemble.c index 4d7dadc36..6147fd6e1 100644 --- a/vm/compiler/codegen/arm/Assemble.c +++ b/vm/compiler/codegen/arm/Assemble.c @@ -56,7 +56,6 @@ * [f]ormats: * h -> 4-digit hex * d -> decimal - * D -> decimal+8 (used to convert 3-bit regnum field to high reg) * E -> decimal*4 * F -> decimal*2 * c -> branch condition (beq, bne, etc.) @@ -89,7 +88,7 @@ ArmEncodingMap EncodingMap[ARM_LAST] = { "adds", "r!0d, r!1d, #!2d", 1), ENCODING_MAP(THUMB_ADD_RI8, 0x3000, BITBLT, 10, 8, BITBLT, 7, 0, UNUSED, -1, -1, UNUSED, -1, -1, - IS_BINARY_OP | REG_DEF0 | REG_USE0 | SETS_CCODES, + IS_BINARY_OP | REG_DEF0_USE0 | SETS_CCODES, "adds", "r!0d, r!0d, #!1d", 1), ENCODING_MAP(THUMB_ADD_RRR, 0x1800, BITBLT, 2, 0, BITBLT, 5, 3, BITBLT, 8, 6, UNUSED, -1, -1, @@ -149,23 +148,23 @@ ArmEncodingMap EncodingMap[ARM_LAST] = { "bkpt", "!0d", 1), ENCODING_MAP(THUMB_BLX_1, 0xf000, BITBLT, 10, 0, UNUSED, -1, -1, UNUSED, -1, -1, UNUSED, -1, -1, - IS_BINARY_OP | IS_BRANCH, + IS_BINARY_OP | IS_BRANCH | REG_DEF_LR, "blx_1", "!0u", 1), ENCODING_MAP(THUMB_BLX_2, 0xe800, BITBLT, 10, 0, UNUSED, -1, -1, UNUSED, -1, -1, UNUSED, -1, -1, - IS_BINARY_OP | IS_BRANCH, + IS_BINARY_OP | IS_BRANCH | REG_DEF_LR, "blx_2", "!0v", 1), ENCODING_MAP(THUMB_BL_1, 0xf000, BITBLT, 10, 0, UNUSED, -1, -1, UNUSED, -1, -1, UNUSED, -1, -1, - IS_UNARY_OP | IS_BRANCH, + IS_UNARY_OP | IS_BRANCH | REG_DEF_LR, "bl_1", "!0u", 1), ENCODING_MAP(THUMB_BL_2, 0xf800, BITBLT, 10, 0, UNUSED, -1, -1, UNUSED, -1, -1, UNUSED, -1, -1, - IS_UNARY_OP | IS_BRANCH, + IS_UNARY_OP | IS_BRANCH | REG_DEF_LR, "bl_2", "!0v", 1), ENCODING_MAP(THUMB_BLX_R, 0x4780, BITBLT, 6, 3, UNUSED, -1, -1, UNUSED, -1, -1, UNUSED, -1, -1, - IS_UNARY_OP | IS_BRANCH, + IS_UNARY_OP | REG_USE0 | IS_BRANCH | REG_DEF_LR, "blx", "r!0d", 1), ENCODING_MAP(THUMB_BX, 0x4700, BITBLT, 6, 3, UNUSED, -1, -1, UNUSED, -1, -1, UNUSED, -1, -1, @@ -186,22 +185,22 @@ ArmEncodingMap EncodingMap[ARM_LAST] = { ENCODING_MAP(THUMB_CMP_LH, 0x4540, BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1, UNUSED, -1, -1, IS_BINARY_OP | REG_USE01 | SETS_CCODES, - "cmp", "r!0d, r!1D", 1), + "cmp", "r!0d, r!1d", 1), ENCODING_MAP(THUMB_CMP_HL, 0x4580, BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1, UNUSED, -1, -1, IS_BINARY_OP | REG_USE01 | SETS_CCODES, - "cmp", "r!0D, r!1d", 1), + "cmp", "r!0d, r!1d", 1), ENCODING_MAP(THUMB_CMP_HH, 0x45c0, BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1, UNUSED, -1, -1, IS_BINARY_OP | REG_USE01 | SETS_CCODES, - "cmp", "r!0D, r!1D", 1), + "cmp", "r!0d, r!1d", 1), ENCODING_MAP(THUMB_EOR_RR, 0x4040, BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1, UNUSED, -1, -1, IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES, "eors", "r!0d, r!1d", 1), ENCODING_MAP(THUMB_LDMIA, 0xc800, BITBLT, 10, 8, BITBLT, 7, 0, UNUSED, -1, -1, UNUSED, -1, -1, - IS_BINARY_OP | REG_DEF0 | REG_USE0 | REG_DEF_LIST1, + IS_BINARY_OP | REG_DEF0_USE0 | REG_DEF_LIST1, "ldmia", "r!0d!!, ", 1), ENCODING_MAP(THUMB_LDR_RRI5, 0x6800, BITBLT, 2, 0, BITBLT, 5, 3, BITBLT, 10, 6, UNUSED, -1, -1, @@ -270,15 +269,15 @@ ArmEncodingMap EncodingMap[ARM_LAST] = { ENCODING_MAP(THUMB_MOV_RR_H2H, 0x46c0, BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1, UNUSED, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, - "mov", "r!0D, r!1D", 1), + "mov", "r!0d, r!1d", 1), ENCODING_MAP(THUMB_MOV_RR_H2L, 0x4640, BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1, UNUSED, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, - "mov", "r!0d, r!1D", 1), + "mov", "r!0d, r!1d", 1), ENCODING_MAP(THUMB_MOV_RR_L2H, 0x4680, BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1, UNUSED, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, - "mov", "r!0D, r!1d", 1), + "mov", "r!0d, r!1d", 1), ENCODING_MAP(THUMB_MUL, 0x4340, BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1, UNUSED, -1, -1, IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES, @@ -349,7 +348,7 @@ ArmEncodingMap EncodingMap[ARM_LAST] = { "subs", "r!0d, r!1d, #!2d]", 1), ENCODING_MAP(THUMB_SUB_RI8, 0x3800, BITBLT, 10, 8, BITBLT, 7, 0, UNUSED, -1, -1, UNUSED, -1, -1, - IS_BINARY_OP | REG_DEF0 | REG_USE0 | SETS_CCODES, + IS_BINARY_OP | REG_DEF0_USE0 | SETS_CCODES, "subs", "r!0d, #!1d", 1), ENCODING_MAP(THUMB_SUB_RRR, 0x1a00, BITBLT, 2, 0, BITBLT, 5, 3, BITBLT, 8, 6, UNUSED, -1, -1, @@ -471,13 +470,13 @@ ArmEncodingMap EncodingMap[ARM_LAST] = { BITBLT, 15, 12, BITBLT, 19, 16, BITBLT, 8, 0, UNUSED, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1, "ldr", "r!0d,[r!1d, #-!2d]", 2), - ENCODING_MAP(THUMB2_CBNZ, 0xb900, + ENCODING_MAP(THUMB2_CBNZ, 0xb900, /* Note: does not affect flags */ BITBLT, 2, 0, IMM6, -1, -1, UNUSED, -1, -1, UNUSED, -1, -1, - IS_BINARY_OP, /* Note: does not affect flags */ + IS_BINARY_OP | REG_USE0 | IS_BRANCH, "cbnz", "r!0d,!1t", 1), - ENCODING_MAP(THUMB2_CBZ, 0xb100, + ENCODING_MAP(THUMB2_CBZ, 0xb100, /* Note: does not affect flags */ BITBLT, 2, 0, IMM6, -1, -1, UNUSED, -1, -1, UNUSED, -1, -1, - IS_BINARY_OP, /* Note: does not affect flags */ + IS_BINARY_OP | REG_USE0 | IS_BRANCH, "cbz", "r!0d,!1t", 1), ENCODING_MAP(THUMB2_ADD_RRI12, 0xf2000000, BITBLT, 11, 8, BITBLT, 19, 16, IMM12, -1, -1, UNUSED, -1, -1, @@ -497,11 +496,11 @@ ArmEncodingMap EncodingMap[ARM_LAST] = { "vmov.f64 ", " !0S, !1S", 2), ENCODING_MAP(THUMB2_LDMIA, 0xe8900000, BITBLT, 19, 16, BITBLT, 15, 0, UNUSED, -1, -1, UNUSED, -1, -1, - IS_BINARY_OP | REG_DEF0 | REG_USE0 | REG_DEF_LIST1, + IS_BINARY_OP | REG_DEF0_USE0 | REG_DEF_LIST1, "ldmia", "r!0d!!, ", 2), ENCODING_MAP(THUMB2_STMIA, 0xe8800000, BITBLT, 19, 16, BITBLT, 15, 0, UNUSED, -1, -1, UNUSED, -1, -1, - IS_BINARY_OP | REG_DEF0 | REG_USE0 | REG_USE_LIST1, + IS_BINARY_OP | REG_DEF0_USE0 | REG_USE_LIST1, "stmia", "r!0d!!, ", 2), ENCODING_MAP(THUMB2_ADD_RRR, 0xeb100000, /* setflags encoding */ BITBLT, 11, 8, BITBLT, 19, 16, BITBLT, 3, 0, SHIFT, -1, -1, @@ -758,7 +757,7 @@ ArmEncodingMap EncodingMap[ARM_LAST] = { "fmsr", "!0s, r!1d", 2), ENCODING_MAP(THUMB2_FMRRD, 0xec500b10, BITBLT, 15, 12, BITBLT, 19, 16, DFP, 5, 0, UNUSED, -1, -1, - IS_TERTIARY_OP | REG_DEF0 | REG_DEF1 | REG_USE2, + IS_TERTIARY_OP | REG_DEF01_USE2, "fmrrd", "r!0d, r!1d, !2S", 2), ENCODING_MAP(THUMB2_FMDRR, 0xec400b10, DFP, 5, 0, BITBLT, 15, 12, BITBLT, 19, 16, UNUSED, -1, -1, @@ -766,7 +765,6 @@ ArmEncodingMap EncodingMap[ARM_LAST] = { "fmdrr", "!0S, r!1d, r!2d", 2), }; - /* * The fake NOP of moving r0 to r0 actually will incur data stalls if r0 is * not ready. Since r5 (rFP) is not updated often, it is less likely to diff --git a/vm/compiler/codegen/arm/Codegen.c b/vm/compiler/codegen/arm/Codegen.c index 30dc508e5..65792bea8 100644 --- a/vm/compiler/codegen/arm/Codegen.c +++ b/vm/compiler/codegen/arm/Codegen.c @@ -122,9 +122,10 @@ static void selfVerificationLoad(InterpState* interpState) if (heapSpacePtr == shadowSpace->heapSpaceTail) data = *((unsigned int*) addr); - //LOGD("*** HEAP LOAD: Addr: 0x%x Data: 0x%x", addr, data); - int reg = (heapArgSpace->regMap >> 4) & 0xF; + + //LOGD("*** HEAP LOAD: Reg:%d Addr: 0x%x Data: 0x%x", reg, addr, data); + selfVerificationLoadDecodeData(heapArgSpace, data, reg); } @@ -483,6 +484,48 @@ static void selfVerificationMemOpWrapper(CompilationUnit *cUnit, int regMap, #endif /* + * Mark load/store instructions that access Dalvik registers through rFP + + * offset. + */ +static void annotateDalvikRegAccess(ArmLIR *lir, int regId, bool isLoad) +{ + if (isLoad) { + lir->useMask |= ENCODE_DALVIK_REG; + } else { + lir->defMask |= ENCODE_DALVIK_REG; + } + + /* + * Store the Dalvik register id in aliasInfo. Mark he MSB if it is a 64-bit + * access. + */ + lir->aliasInfo = regId; + if (DOUBLEREG(lir->operands[0])) { + lir->aliasInfo |= 0x80000000; + } +} + +/* + * Decode the register id and mark the corresponding bit(s). + */ +static inline void setupRegMask(u8 *mask, int reg) +{ + u8 seed; + int shift; + int regId = reg & 0x1f; + + /* + * Each double register is equal to a pair of single-precision FP registers + */ + seed = DOUBLEREG(reg) ? 3 : 1; + /* FP register starts at bit position 16 */ + shift = FPREG(reg) ? kFPReg0 : 0; + /* Expand the double register id into single offset */ + shift += regId; + *mask |= seed << shift; +} + +/* * Set up the proper fields in the resource mask */ static void setupResourceMasks(ArmLIR *lir) @@ -500,20 +543,25 @@ static void setupResourceMasks(ArmLIR *lir) /* Set up the mask for resources that are updated */ if (flags & IS_BRANCH) { lir->defMask |= ENCODE_REG_PC; + lir->useMask |= ENCODE_REG_PC; } if (flags & REG_DEF0) { - lir->defMask |= ENCODE_GP_REG(lir->operands[0]); + setupRegMask(&lir->defMask, lir->operands[0]); } if (flags & REG_DEF1) { - lir->defMask |= ENCODE_GP_REG(lir->operands[1]); + setupRegMask(&lir->defMask, lir->operands[1]); } if (flags & REG_DEF_SP) { lir->defMask |= ENCODE_REG_SP; } + if (flags & REG_DEF_SP) { + lir->defMask |= ENCODE_REG_LR; + } + if (flags & REG_DEF_LIST0) { lir->defMask |= ENCODE_REG_LIST(lir->operands[0]); } @@ -528,7 +576,7 @@ static void setupResourceMasks(ArmLIR *lir) /* Conservatively treat the IT block */ if (flags & IS_IT) { - lir->defMask = -1; + lir->defMask = ENCODE_ALL; } /* Set up the mask for resources that are used */ @@ -541,7 +589,7 @@ static void setupResourceMasks(ArmLIR *lir) for (i = 0; i < 3; i++) { if (flags & (1 << (kRegUse0 + i))) { - lir->useMask |= ENCODE_GP_REG(lir->operands[i]); + setupRegMask(&lir->useMask, lir->operands[i]); } } } @@ -698,6 +746,17 @@ static ArmLIR *scanLiteralPool(CompilationUnit *cUnit, int value, return NULL; } +/* + * Generate an ARM_PSEUDO_BARRIER marker to indicate the boundary of special + * blocks. + */ +static void genBarrier(CompilationUnit *cUnit) +{ + ArmLIR *barrier = newLIR0(cUnit, ARM_PSEUDO_BARRIER); + /* Mark all resources as being clobbered */ + barrier->defMask = -1; +} + /* Perform the actual operation for OP_RETURN_* */ static void genReturnCommon(CompilationUnit *cUnit, MIR *mir) { @@ -1603,7 +1662,13 @@ static void genProcessArgsRange(CompilationUnit *cUnit, MIR *mir, opRegRegImm(cUnit, OP_ADD, r4PC, rFP, srcOffset, rNone); /* load [r0 .. min(numArgs,4)] */ regMask = (1 << ((numArgs < 4) ? numArgs : 4)) - 1; + /* + * Protect the loadMultiple instruction from being reordered with other + * Dalvik stack accesses. + */ + genBarrier(cUnit); loadMultiple(cUnit, r4PC, regMask); + genBarrier(cUnit); opRegRegImm(cUnit, OP_SUB, r7, rFP, sizeof(StackSaveArea) + (numArgs << 2), rNone); @@ -1627,9 +1692,16 @@ static void genProcessArgsRange(CompilationUnit *cUnit, MIR *mir, if (numArgs > 11) { loadConstant(cUnit, 5, ((numArgs - 4) >> 2) << 2); loopLabel = newLIR0(cUnit, ARM_PSEUDO_TARGET_LABEL); + loopLabel->defMask = ENCODE_ALL; } storeMultiple(cUnit, r7, regMask); + /* + * Protect the loadMultiple instruction from being reordered with other + * Dalvik stack accesses. + */ + genBarrier(cUnit); loadMultiple(cUnit, r4PC, regMask); + genBarrier(cUnit); /* No need to generate the loop structure if numArgs <= 11 */ if (numArgs > 11) { opRegImm(cUnit, OP_SUB, rFP, 4, rNone); @@ -1643,7 +1715,13 @@ static void genProcessArgsRange(CompilationUnit *cUnit, MIR *mir, /* Generate the loop epilogue - don't use r0 */ if ((numArgs > 4) && (numArgs % 4)) { regMask = ((1 << (numArgs & 0x3)) - 1) << 1; + /* + * Protect the loadMultiple instruction from being reordered with other + * Dalvik stack accesses. + */ + genBarrier(cUnit); loadMultiple(cUnit, r4PC, regMask); + genBarrier(cUnit); } if (numArgs >= 8) opImm(cUnit, OP_POP, (1 << r0 | 1 << rFP)); @@ -1960,7 +2038,13 @@ static ArmLIR *loadValuePair(CompilationUnit *cUnit, int vSrc, int rDestLo, } else { assert(rDestLo < rDestHi); res = loadValueAddress(cUnit, vSrc, rDestLo); + /* + * Protect the loadMultiple instruction from being reordered with other + * Dalvik stack accesses. + */ + genBarrier(cUnit); loadMultiple(cUnit, rDestLo, (1<offset, NULL); /* check cast passed - branch target here */ ArmLIR *target = newLIR0(cUnit, ARM_PSEUDO_TARGET_LABEL); + target->defMask = ENCODE_ALL; branch1->generic.target = (LIR *)target; branch2->generic.target = (LIR *)target; break; @@ -2738,6 +2829,7 @@ static bool handleFmt22c(CompilationUnit *cUnit, MIR *mir) opReg(cUnit, OP_BLX, r4PC); /* branch target here */ ArmLIR *target = newLIR0(cUnit, ARM_PSEUDO_TARGET_LABEL); + target->defMask = ENCODE_ALL; storeValue(cUnit, r0, mir->dalvikInsn.vA, r1); branch1->generic.target = (LIR *)target; branch2->generic.target = (LIR *)target; @@ -3888,6 +3980,8 @@ void dvmCompilerMIR2LIR(CompilationUnit *cUnit) /* Remember the first LIR for this block */ if (headLIR == NULL) { headLIR = boundaryLIR; + /* Set the first boundaryLIR as a scheduling barrier */ + headLIR->defMask = ENCODE_ALL; } bool notHandled; @@ -4155,3 +4249,19 @@ void dvmCompilerArchDump(void) LOGD("dalvik.vm.jit.op = %s", buf); } } + +/* Common initialization routine for an architecture family */ +bool dvmCompilerArchInit() +{ + int i; + + for (i = 0; i < ARM_LAST; i++) { + if (EncodingMap[i].opCode != i) { + LOGE("Encoding order for %s is wrong: expecting %d, seeing %d", + EncodingMap[i].name, i, EncodingMap[i].opCode); + dvmAbort(); + } + } + + return compilerArchVariantInit(); +} diff --git a/vm/compiler/codegen/arm/Codegen.h b/vm/compiler/codegen/arm/Codegen.h index 50a94fdd7..b484cd23d 100644 --- a/vm/compiler/codegen/arm/Codegen.h +++ b/vm/compiler/codegen/arm/Codegen.h @@ -22,6 +22,7 @@ * variant code such as ThumbUtilty.c */ +static void annotateDalvikRegAccess(ArmLIR *lir, int regId, bool isLoad); static void setupResourceMasks(ArmLIR *lir); static ArmLIR *newLIR0(CompilationUnit *cUnit, ArmOpCode opCode); static ArmLIR *newLIR1(CompilationUnit *cUnit, ArmOpCode opCode, @@ -38,10 +39,10 @@ static ArmLIR *addWordData(CompilationUnit *cUnit, int value, bool inPlace); static inline ArmLIR *genCheckCommon(CompilationUnit *cUnit, int dOffset, ArmLIR *branch, ArmLIR *pcrLabel); +static void genBarrier(CompilationUnit *cUnit); /* Routines which must be supplied by the variant-specific code */ static void genDispatchToHandler(CompilationUnit *cUnit, TemplateOpCode opCode); -bool dvmCompilerArchInit(void); static bool genInlineSqrt(CompilationUnit *cUnit, MIR *mir); static bool genInlineCos(CompilationUnit *cUnit, MIR *mir); static bool genInlineSin(CompilationUnit *cUnit, MIR *mir); @@ -52,7 +53,6 @@ static bool genArithOpDouble(CompilationUnit *cUnit, MIR *mir, int vDest, int vSrc1, int vSrc2); static bool genCmpX(CompilationUnit *cUnit, MIR *mir, int vDest, int vSrc1, int vSrc2); - - +static bool compilerArchVariantInit(); #endif /* _DALVIK_VM_COMPILER_CODEGEN_ARM_CODEGEN_H */ diff --git a/vm/compiler/codegen/arm/LocalOptimizations.c b/vm/compiler/codegen/arm/LocalOptimizations.c index fbc0e26f1..71fc01410 100644 --- a/vm/compiler/codegen/arm/LocalOptimizations.c +++ b/vm/compiler/codegen/arm/LocalOptimizations.c @@ -18,49 +18,40 @@ #include "vm/compiler/CompilerInternals.h" #include "ArmLIR.h" +#define DEBUG_OPT(X) + ArmLIR* dvmCompilerGenCopy(CompilationUnit *cUnit, int rDest, int rSrc); /* Is this a Dalvik register access? */ static inline bool isDalvikLoad(ArmLIR *lir) { - return ((lir->operands[1] == rFP) && - ((lir->opCode == THUMB_LDR_RRI5) || - (lir->opCode == THUMB2_LDR_RRI12) || - (lir->opCode == THUMB2_VLDRS) || - (lir->opCode == THUMB2_VLDRD))); + return (lir->useMask != ~0ULL) && (lir->useMask & ENCODE_DALVIK_REG); } static inline bool isDalvikStore(ArmLIR *lir) { - return ((lir->operands[1] == rFP) && - ((lir->opCode == THUMB_STR_RRI5) || - (lir->opCode == THUMB2_STR_RRI12) || - (lir->opCode == THUMB2_VSTRS) || - (lir->opCode == THUMB2_VSTRD))); + return (lir->defMask != ~0ULL) && (lir->defMask & ENCODE_DALVIK_REG); } -/* Double regs overlap float regs. Return true if collision */ -static bool regClobber(int reg1, int reg2) +static inline bool isDalvikRegisterPartiallyClobbered(ArmLIR *lir1, + ArmLIR *lir2) { - int reg1a, reg1b; - int reg2a, reg2b; - if (!FPREG(reg1) || !FPREG(reg2)) - return (reg1 == reg2); - if (DOUBLEREG(reg1)) { - reg1a = reg1 & FP_REG_MASK; - reg1b = reg1a + 1; - } else { - reg1a = reg1b = reg1 & FP_REG_MASK; - } - if (DOUBLEREG(reg2)) { - reg2a = reg2 & FP_REG_MASK; - reg2b = reg2a + 1; - } else { - reg2a = reg2b = reg2 & FP_REG_MASK; - } - return (reg1a == reg2a) || (reg1a == reg2b) || - (reg1b == reg2a) || (reg1b == reg2b); + int reg1Lo = DECODE_ALIAS_INFO_REG(lir1->aliasInfo); + int reg1Hi = reg1Lo + DECODE_ALIAS_INFO_WIDE(lir1->aliasInfo); + int reg2Lo = DECODE_ALIAS_INFO_REG(lir2->aliasInfo); + int reg2Hi = reg2Lo + DECODE_ALIAS_INFO_WIDE(lir2->aliasInfo); + + return (reg1Lo == reg2Hi) || (reg1Hi == reg2Lo); +} + +static void dumpDependentInsnPair(ArmLIR *thisLIR, ArmLIR *checkLIR, + const char *optimization) +{ + LOGD("************ %s ************", optimization); + dvmDumpLIRInsn((LIR *) thisLIR, 0); + dvmDumpLIRInsn((LIR *) checkLIR, 0); } + /* * Perform a pass of top-down walk to * 1) Eliminate redundant loads and stores @@ -81,15 +72,18 @@ static void applyLoadStoreElimination(CompilationUnit *cUnit, continue; } if (isDalvikStore(thisLIR)) { - int dRegId = thisLIR->operands[2]; + int dRegId = DECODE_ALIAS_INFO_REG(thisLIR->aliasInfo); + int dRegIdHi = dRegId + DECODE_ALIAS_INFO_WIDE(thisLIR->aliasInfo); int nativeRegId = thisLIR->operands[0]; ArmLIR *checkLIR; int sinkDistance = 0; /* * Add r15 (pc) to the mask to prevent this instruction - * from sinking past branch instructions. + * from sinking past branch instructions. Unset the Dalvik register + * bit when checking with native resource constraints. */ - u8 stopMask = ENCODE_GP_REG(rpc) | thisLIR->useMask; + u8 stopMask = (ENCODE_REG_PC | thisLIR->useMask) & + ~ENCODE_DALVIK_REG; for (checkLIR = NEXT_LIR(thisLIR); checkLIR != tailLIR; @@ -97,10 +91,8 @@ static void applyLoadStoreElimination(CompilationUnit *cUnit, /* Check if a Dalvik register load is redundant */ if (isDalvikLoad(checkLIR) && - checkLIR->operands[2] == dRegId ) { - if (FPREG(nativeRegId) != FPREG(checkLIR->operands[0])) { - break; // TODO: handle gen<=>float copies - } + (checkLIR->aliasInfo == thisLIR->aliasInfo) && + (REGTYPE(checkLIR->operands[0]) == REGTYPE(nativeRegId))) { /* Insert a move to replace the load */ if (checkLIR->operands[0] != nativeRegId) { ArmLIR *moveLIR; @@ -117,39 +109,34 @@ static void applyLoadStoreElimination(CompilationUnit *cUnit, checkLIR->isNop = true; continue; - /* Found a true output dependency - nuke the previous store */ + /* + * Found a true output dependency - nuke the previous store. + * The register type doesn't matter here. + */ } else if (isDalvikStore(checkLIR) && - checkLIR->operands[2] == dRegId) { + (checkLIR->aliasInfo == thisLIR->aliasInfo)) { thisLIR->isNop = true; break; /* Find out the latest slot that the store can be sunk into */ } else { - bool stopHere = false; - /* Last instruction reached */ - stopHere |= NEXT_LIR(checkLIR) == tailLIR; - - /* - * Conservatively assume there is a memory dependency - * for st/ld multiples and reg+reg address mode - */ - stopHere |= checkLIR->opCode == THUMB_STMIA || - checkLIR->opCode == THUMB_LDMIA || - checkLIR->opCode == THUMB_STR_RRR || - checkLIR->opCode == THUMB_LDR_RRR || - checkLIR->opCode == THUMB2_STR_RRR || - checkLIR->opCode == THUMB2_LDR_RRR || - checkLIR->opCode == THUMB2_STMIA || - checkLIR->opCode == THUMB2_LDMIA || - checkLIR->opCode == THUMB2_VLDRD || - checkLIR->opCode == THUMB2_VSTRD; - + bool stopHere = (NEXT_LIR(checkLIR) == tailLIR); /* Store data is clobbered */ - stopHere |= (stopMask & checkLIR->defMask) != 0; + stopHere |= ((stopMask & checkLIR->defMask) != 0); + + /* Store data partially clobbers the Dalvik register */ + if (stopHere == false && + ((checkLIR->useMask | checkLIR->defMask) & + ENCODE_DALVIK_REG)) { + stopHere = isDalvikRegisterPartiallyClobbered(thisLIR, + checkLIR); + } /* Found a new place to put the store - move it here */ if (stopHere == true) { + DEBUG_OPT(dumpDependentInsnPair(thisLIR, checkLIR, + "SINK STORE")); /* The store can be sunk for at least one cycle */ if (sinkDistance != 0) { ArmLIR *newStoreLIR = @@ -195,25 +182,73 @@ static void applyLoadHoisting(CompilationUnit *cUnit, continue; } if (isDalvikLoad(thisLIR)) { - int dRegId = thisLIR->operands[2]; + int dRegId = DECODE_ALIAS_INFO_REG(thisLIR->aliasInfo); + int dRegIdHi = dRegId + DECODE_ALIAS_INFO_WIDE(thisLIR->aliasInfo); int nativeRegId = thisLIR->operands[0]; ArmLIR *checkLIR; int hoistDistance = 0; - u8 stopUseMask = ENCODE_GP_REG(rpc) | thisLIR->useMask; - u8 stopDefMask = thisLIR->defMask; + u8 stopUseMask = (ENCODE_REG_PC | thisLIR->useMask) & + ~ENCODE_DALVIK_REG; + u8 stopDefMask = thisLIR->defMask & ~ENCODE_DALVIK_REG; + /* First check if the load can be completely elinimated */ for (checkLIR = PREV_LIR(thisLIR); checkLIR != headLIR; checkLIR = PREV_LIR(checkLIR)) { if (checkLIR->isNop) continue; + /* + * Check if the Dalvik register is previously accessed + * with exactly the same type. + */ + if ((isDalvikLoad(checkLIR) || isDalvikStore(checkLIR)) && + (checkLIR->aliasInfo == thisLIR->aliasInfo) && + (checkLIR->operands[0] == nativeRegId)) { + /* + * If it is previously accessed but with a different type, + * the search will terminate later at the point checking + * for partially overlapping stores. + */ + thisLIR->isNop = true; + break; + } + + /* + * No earlier use/def can reach this load if: + * 1) Head instruction is reached + * 2) load target register is clobbered + * 3) A branch is seen (stopUseMask has the PC bit set). + */ + if ((checkLIR == headLIR) || + (stopUseMask | stopDefMask) & checkLIR->defMask) { + break; + } + + /* Store data partially clobbers the Dalvik register */ + if (isDalvikStore(checkLIR) && + isDalvikRegisterPartiallyClobbered(thisLIR, checkLIR)) { + break; + } + } + + /* The load has been eliminated */ + if (thisLIR->isNop) continue; + + /* + * The load cannot be eliminated. See if it can be hoisted to an + * earlier spot. + */ + for (checkLIR = PREV_LIR(thisLIR); + /* empty by intention */; + checkLIR = PREV_LIR(checkLIR)) { + + if (checkLIR->isNop) continue; + /* Check if the current load is redundant */ if ((isDalvikLoad(checkLIR) || isDalvikStore(checkLIR)) && - checkLIR->operands[2] == dRegId ) { - if (FPREG(nativeRegId) != FPREG(checkLIR->operands[0])) { - break; // TODO: handle gen<=>float copies - } + (checkLIR->aliasInfo == thisLIR->aliasInfo) && + (REGTYPE(checkLIR->operands[0]) == REGTYPE(nativeRegId))) { /* Insert a move to replace the load */ if (checkLIR->operands[0] != nativeRegId) { ArmLIR *moveLIR; @@ -226,41 +261,54 @@ static void applyLoadHoisting(CompilationUnit *cUnit, dvmCompilerInsertLIRAfter((LIR *) checkLIR, (LIR *) moveLIR); } - cUnit->printMe = true; thisLIR->isNop = true; break; /* Find out if the load can be yanked past the checkLIR */ } else { - bool stopHere = false; - /* Last instruction reached */ - stopHere |= PREV_LIR(checkLIR) == headLIR; - - /* - * Conservatively assume there is a memory dependency - * for st/ld multiples and reg+reg address mode - */ - stopHere |= checkLIR->opCode == THUMB_STMIA || - checkLIR->opCode == THUMB_LDMIA || - checkLIR->opCode == THUMB_STR_RRR || - checkLIR->opCode == THUMB_LDR_RRR || - checkLIR->opCode == THUMB2_STR_RRR || - checkLIR->opCode == THUMB2_LDR_RRR || - checkLIR->opCode == THUMB2_STMIA || - checkLIR->opCode == THUMB2_LDMIA || - checkLIR->opCode == THUMB2_VLDRD || - checkLIR->opCode == THUMB2_VSTRD; + bool stopHere = (checkLIR == headLIR); /* Base address is clobbered by checkLIR */ - stopHere |= (stopUseMask & checkLIR->defMask) != 0; + stopHere |= ((stopUseMask & checkLIR->defMask) != 0); /* Load target clobbers use/def in checkLIR */ - stopHere |= (stopDefMask & - (checkLIR->useMask | checkLIR->defMask)) != 0; + stopHere |= ((stopDefMask & + (checkLIR->useMask | checkLIR->defMask)) != 0); + + /* Store data partially clobbers the Dalvik register */ + if (stopHere == false && + (checkLIR->defMask & ENCODE_DALVIK_REG)) { + stopHere = isDalvikRegisterPartiallyClobbered(thisLIR, + checkLIR); + } + + /* + * Stop at an earlier Dalvik load if the offset of checkLIR + * is not less than thisLIR + * + * Experiments show that doing + * + * ldr r1, [r5, #16] + * ldr r0, [r5, #20] + * + * is much faster than + * + * ldr r0, [r5, #20] + * ldr r1, [r5, #16] + */ + if (isDalvikLoad(checkLIR)) { + int dRegId2 = + DECODE_ALIAS_INFO_REG(checkLIR->aliasInfo); + if (dRegId2 <= dRegId) { + stopHere = true; + } + } /* Found a new place to put the load - move it here */ if (stopHere == true) { + DEBUG_OPT(dumpDependentInsnPair(thisLIR, checkLIR, + "HOIST LOAD")); /* The store can be hoisted for at least one cycle */ if (hoistDistance != 0) { ArmLIR *newLoadLIR = @@ -274,13 +322,13 @@ static void applyLoadHoisting(CompilationUnit *cUnit, dvmCompilerInsertLIRAfter((LIR *) checkLIR, (LIR *) newLoadLIR); thisLIR->isNop = true; - cUnit->printMe = true; } break; } /* - * Saw a real instruction that the store can be sunk after + * Saw a real instruction that hosting the load is + * beneficial */ if (!isPseudoOpCode(checkLIR->opCode)) { hoistDistance++; diff --git a/vm/compiler/codegen/arm/Thumb2Util.c b/vm/compiler/codegen/arm/Thumb2Util.c index b9d1a239e..b40656de3 100644 --- a/vm/compiler/codegen/arm/Thumb2Util.c +++ b/vm/compiler/codegen/arm/Thumb2Util.c @@ -107,11 +107,16 @@ static bool genInlinedAbsLong(CompilationUnit *cUnit, MIR *mir); static inline void resetRegisterScoreboard(CompilationUnit *cUnit) { RegisterScoreboard *registerScoreboard = &cUnit->registerScoreboard; + int i; dvmClearAllBits(registerScoreboard->nullCheckedRegs); registerScoreboard->liveDalvikReg = vNone; registerScoreboard->nativeReg = vNone; registerScoreboard->nativeRegHi = vNone; + for (i = 0; i < 32; i++) { + registerScoreboard->fp[i] = vNone; + } + registerScoreboard->nextFP = 0; } /* Kill the corresponding bit in the null-checked register list */ @@ -168,17 +173,6 @@ static inline int selectFirstRegister(CompilationUnit *cUnit, int vSrc, } else { return (registerScoreboard->nativeReg + 1) & 3; } - -} - -/* - * Generate a ARM_PSEUDO_IT_BOTTOM marker to indicate the end of an IT block - */ -static void genITBottom(CompilationUnit *cUnit) -{ - ArmLIR *itBottom = newLIR0(cUnit, ARM_PSEUDO_IT_BOTTOM); - /* Mark all resources as being clobbered */ - itBottom->defMask = -1; } /* @@ -457,42 +451,116 @@ static ArmLIR *fpVarAccess(CompilationUnit *cUnit, int vSrcDest, { ArmLIR *res; if (vSrcDest > 255) { - res = opRegRegImm(cUnit, OP_ADD, r7, rFP, vSrcDest * 4, rNone); - newLIR3(cUnit, opCode, rSrcDest, r7, 0); + opRegRegImm(cUnit, OP_ADD, r7, rFP, vSrcDest * 4, rNone); + res = newLIR3(cUnit, opCode, rSrcDest, r7, 0); } else { res = newLIR3(cUnit, opCode, rSrcDest, rFP, vSrcDest); } return res; } + +static int nextFPReg(CompilationUnit *cUnit, int dalvikReg, bool isDouble) +{ + RegisterScoreboard *registerScoreboard = &cUnit->registerScoreboard; + int reg; + + if (isDouble) { + reg = ((registerScoreboard->nextFP + 1) & ~1) % 32; + registerScoreboard->nextFP = reg + 2; + registerScoreboard->nextFP %= 32; + registerScoreboard->fp[reg] = dalvikReg; + return dr0 + reg; + } + else { + reg = registerScoreboard->nextFP++; + registerScoreboard->nextFP %= 32; + registerScoreboard->fp[reg] = dalvikReg; + return fr0 + reg; + } +} + +/* + * Select a SFP register for the dalvikReg + */ +static int selectSFPReg(CompilationUnit *cUnit, int dalvikReg) +{ + RegisterScoreboard *registerScoreboard = &cUnit->registerScoreboard; + int i; + + if (dalvikReg == vNone) { + return nextFPReg(cUnit, dalvikReg, false);; + } + + for (i = 0; i < 32; i++) { + if (registerScoreboard->fp[i] == dalvikReg) { + return fr0 + i; + } + } + return nextFPReg(cUnit, dalvikReg, false);; +} + +/* + * Select a DFP register for the dalvikReg + */ +static int selectDFPReg(CompilationUnit *cUnit, int dalvikReg) +{ + RegisterScoreboard *registerScoreboard = &cUnit->registerScoreboard; + int i; + + if (dalvikReg == vNone) { + return nextFPReg(cUnit, dalvikReg, true);; + } + + for (i = 0; i < 32; i += 2) { + if (registerScoreboard->fp[i] == dalvikReg) { + return dr0 + i; + } + } + return nextFPReg(cUnit, dalvikReg, true); +} + static ArmLIR *loadFloat(CompilationUnit *cUnit, int vSrc, int rDest) { assert(SINGLEREG(rDest)); - return fpVarAccess(cUnit, vSrc, rDest, THUMB2_VLDRS); + ArmLIR *lir = fpVarAccess(cUnit, vSrc, rDest, THUMB2_VLDRS); + annotateDalvikRegAccess(lir, vSrc, true /* isLoad */); + return lir; } /* Store a float to a Dalvik register */ -static ArmLIR *storeFloat(CompilationUnit *cUnit, int rSrc, int vDest, - int rScratch) +static ArmLIR *storeFloat(CompilationUnit *cUnit, int rSrc, int vDest) { + RegisterScoreboard *registerScoreboard = &cUnit->registerScoreboard; + assert(SINGLEREG(rSrc)); - return fpVarAccess(cUnit, vDest, rSrc, THUMB2_VSTRS); + registerScoreboard->fp[rSrc % 32] = vDest; + + ArmLIR *lir = fpVarAccess(cUnit, vDest, rSrc, THUMB2_VSTRS); + annotateDalvikRegAccess(lir, vDest, false /* isLoad */); + return lir; } /* Load a double from a Dalvik register */ static ArmLIR *loadDouble(CompilationUnit *cUnit, int vSrc, int rDest) { assert(DOUBLEREG(rDest)); - return fpVarAccess(cUnit, vSrc, rDest, THUMB2_VLDRD); + ArmLIR *lir = fpVarAccess(cUnit, vSrc, rDest, THUMB2_VLDRD); + annotateDalvikRegAccess(lir, vSrc, true /* isLoad */); + return lir; } /* Store a double to a Dalvik register */ -static ArmLIR *storeDouble(CompilationUnit *cUnit, int rSrc, int vDest, - int rScratch) +static ArmLIR *storeDouble(CompilationUnit *cUnit, int rSrc, int vDest) { + RegisterScoreboard *registerScoreboard = &cUnit->registerScoreboard; + assert(DOUBLEREG(rSrc)); - return fpVarAccess(cUnit, vDest, rSrc, THUMB2_VSTRD); -} + registerScoreboard->fp[rSrc % 32] = vDest; + ArmLIR *lir = fpVarAccess(cUnit, vDest, rSrc, THUMB2_VSTRD); + annotateDalvikRegAccess(lir, vDest, false /* isLoad */); + return lir; +} /* * Load value from base + displacement. Optionally perform null check @@ -507,28 +575,30 @@ static ArmLIR *loadBaseDisp(CompilationUnit *cUnit, MIR *mir, int rBase, bool nullCheck, int vReg) { ArmLIR *first = NULL; - ArmLIR *res; + ArmLIR *res, *load; ArmOpCode opCode = THUMB_BKPT; bool shortForm = false; bool thumb2Form = (displacement < 4092 && displacement >= 0); int shortMax = 128; bool allLowRegs = (LOWREG(rBase) && LOWREG(rDest)); + int encodedDisp = displacement; + switch (size) { case WORD: if (LOWREG(rDest) && (rBase == rpc) && (displacement <= 1020) && (displacement >= 0)) { shortForm = true; - displacement >>= 2; + encodedDisp >>= 2; opCode = THUMB_LDR_PC_REL; } else if (LOWREG(rDest) && (rBase == r13) && (displacement <= 1020) && (displacement >= 0)) { shortForm = true; - displacement >>= 2; + encodedDisp >>= 2; opCode = THUMB_LDR_SP_REL; } else if (allLowRegs && displacement < 128 && displacement >= 0) { assert((displacement & 0x3) == 0); shortForm = true; - displacement >>= 2; + encodedDisp >>= 2; opCode = THUMB_LDR_RRI5; } else if (thumb2Form) { shortForm = true; @@ -539,7 +609,7 @@ static ArmLIR *loadBaseDisp(CompilationUnit *cUnit, MIR *mir, int rBase, if (allLowRegs && displacement < 64 && displacement >= 0) { assert((displacement & 0x1) == 0); shortForm = true; - displacement >>= 1; + encodedDisp >>= 1; opCode = THUMB_LDRH_RRI5; } else if (displacement < 4092 && displacement >= 0) { shortForm = true; @@ -573,11 +643,15 @@ static ArmLIR *loadBaseDisp(CompilationUnit *cUnit, MIR *mir, int rBase, if (nullCheck) first = genNullCheck(cUnit, vReg, rBase, mir->offset, NULL); if (shortForm) { - res = newLIR3(cUnit, opCode, rDest, rBase, displacement); + load = res = newLIR3(cUnit, opCode, rDest, rBase, encodedDisp); } else { assert(rBase != rDest); - res = loadConstant(cUnit, rDest, displacement); - loadBaseIndexed(cUnit, rBase, rDest, rDest, 0, size); + res = loadConstant(cUnit, rDest, encodedDisp); + load = loadBaseIndexed(cUnit, rBase, rDest, rDest, 0, size); + } + + if (rBase == rFP) { + annotateDalvikRegAccess(load, displacement >> 2, true /* isLoad */); } return (first) ? first : res; } @@ -586,12 +660,14 @@ static ArmLIR *storeBaseDisp(CompilationUnit *cUnit, int rBase, int displacement, int rSrc, OpSize size, int rScratch) { - ArmLIR *res; + ArmLIR *res, *store; ArmOpCode opCode = THUMB_BKPT; bool shortForm = false; bool thumb2Form = (displacement < 4092 && displacement >= 0); int shortMax = 128; bool allLowRegs = (LOWREG(rBase) && LOWREG(rSrc)); + int encodedDisp = displacement; + if (rScratch != -1) allLowRegs &= LOWREG(rScratch); switch (size) { @@ -599,7 +675,7 @@ static ArmLIR *storeBaseDisp(CompilationUnit *cUnit, int rBase, if (allLowRegs && displacement < 128 && displacement >= 0) { assert((displacement & 0x3) == 0); shortForm = true; - displacement >>= 2; + encodedDisp >>= 2; opCode = THUMB_STR_RRI5; } else if (thumb2Form) { shortForm = true; @@ -611,7 +687,7 @@ static ArmLIR *storeBaseDisp(CompilationUnit *cUnit, int rBase, if (displacement < 64 && displacement >= 0) { assert((displacement & 0x1) == 0); shortForm = true; - displacement >>= 1; + encodedDisp >>= 1; opCode = THUMB_STRH_RRI5; } else if (thumb2Form) { shortForm = true; @@ -632,11 +708,15 @@ static ArmLIR *storeBaseDisp(CompilationUnit *cUnit, int rBase, assert(0); } if (shortForm) { - res = newLIR3(cUnit, opCode, rSrc, rBase, displacement); + store = res = newLIR3(cUnit, opCode, rSrc, rBase, encodedDisp); } else { assert(rScratch != -1); - res = loadConstant(cUnit, rScratch, displacement); - storeBaseIndexed(cUnit, rBase, rScratch, rSrc, 0, size); + res = loadConstant(cUnit, rScratch, encodedDisp); + store = storeBaseIndexed(cUnit, rBase, rScratch, rSrc, 0, size); + } + + if (rBase == rFP) { + annotateDalvikRegAccess(store, displacement >> 2, false /* isLoad */); } return res; } @@ -1139,7 +1219,7 @@ static void genCmpLong(CompilationUnit *cUnit, MIR *mir, branch1->generic.target = (LIR *) genIT(cUnit, ARM_COND_HI, "E"); newLIR2(cUnit, THUMB2_MOV_IMM_SHIFT, r7, modifiedImmediate(-1)); newLIR2(cUnit, THUMB_MOV_IMM, r7, 1); - genITBottom(cUnit); + genBarrier(cUnit); branch2->generic.target = (LIR *) opRegReg(cUnit, OP_NEG, r7, r7); branch1->generic.target = (LIR *) storeValue(cUnit, r7, vDest, r4PC); @@ -1279,7 +1359,7 @@ static bool genInlinedMinMaxInt(CompilationUnit *cUnit, MIR *mir, bool isMin) //TODO: need assertion mechanism to validate IT region size genIT(cUnit, (isMin) ? ARM_COND_GT : ARM_COND_LT, ""); opRegReg(cUnit, OP_MOV, reg0, reg1); - genITBottom(cUnit); + genBarrier(cUnit); if (vDest >= 0) storeValue(cUnit, reg0, vDest, reg1); else diff --git a/vm/compiler/codegen/arm/ThumbUtil.c b/vm/compiler/codegen/arm/ThumbUtil.c index 1794638fe..49e04b414 100644 --- a/vm/compiler/codegen/arm/ThumbUtil.c +++ b/vm/compiler/codegen/arm/ThumbUtil.c @@ -322,26 +322,28 @@ static ArmLIR *loadBaseDisp(CompilationUnit *cUnit, MIR *mir, int rBase, bool nullCheck, int vReg) { ArmLIR *first = NULL; - ArmLIR *res; + ArmLIR *res, *load; ArmOpCode opCode = THUMB_BKPT; bool shortForm = false; int shortMax = 128; + int encodedDisp = displacement; + switch (size) { case WORD: if (LOWREG(rDest) && (rBase == rpc) && (displacement <= 1020) && (displacement >= 0)) { shortForm = true; - displacement >>= 2; + encodedDisp >>= 2; opCode = THUMB_LDR_PC_REL; } else if (LOWREG(rDest) && (rBase == r13) && (displacement <= 1020) && (displacement >= 0)) { shortForm = true; - displacement >>= 2; + encodedDisp >>= 2; opCode = THUMB_LDR_SP_REL; } else if (displacement < 128 && displacement >= 0) { assert((displacement & 0x3) == 0); shortForm = true; - displacement >>= 2; + encodedDisp >>= 2; opCode = THUMB_LDR_RRI5; } else { opCode = THUMB_LDR_RRR; @@ -351,7 +353,7 @@ static ArmLIR *loadBaseDisp(CompilationUnit *cUnit, MIR *mir, int rBase, if (displacement < 64 && displacement >= 0) { assert((displacement & 0x1) == 0); shortForm = true; - displacement >>= 1; + encodedDisp >>= 1; opCode = THUMB_LDRH_RRI5; } else { opCode = THUMB_LDRH_RRR; @@ -377,12 +379,17 @@ static ArmLIR *loadBaseDisp(CompilationUnit *cUnit, MIR *mir, int rBase, if (nullCheck) first = genNullCheck(cUnit, vReg, rBase, mir->offset, NULL); if (shortForm) { - res = newLIR3(cUnit, opCode, rDest, rBase, displacement); + load = res = newLIR3(cUnit, opCode, rDest, rBase, encodedDisp); } else { assert(rBase != rDest); - res = loadConstant(cUnit, rDest, displacement); - newLIR3(cUnit, opCode, rDest, rBase, rDest); + res = loadConstant(cUnit, rDest, encodedDisp); + load = newLIR3(cUnit, opCode, rDest, rBase, rDest); } + + if (rBase == rFP) { + annotateDalvikRegAccess(load, displacement >> 2, true /* isLoad */); + } + return (first) ? first : res; } @@ -390,16 +397,18 @@ static ArmLIR *storeBaseDisp(CompilationUnit *cUnit, int rBase, int displacement, int rSrc, OpSize size, int rScratch) { - ArmLIR *res; + ArmLIR *res, *store; ArmOpCode opCode = THUMB_BKPT; bool shortForm = false; int shortMax = 128; + int encodedDisp = displacement; + switch (size) { case WORD: if (displacement < 128 && displacement >= 0) { assert((displacement & 0x3) == 0); shortForm = true; - displacement >>= 2; + encodedDisp >>= 2; opCode = THUMB_STR_RRI5; } else { opCode = THUMB_STR_RRR; @@ -410,7 +419,7 @@ static ArmLIR *storeBaseDisp(CompilationUnit *cUnit, int rBase, if (displacement < 64 && displacement >= 0) { assert((displacement & 0x1) == 0); shortForm = true; - displacement >>= 1; + encodedDisp >>= 1; opCode = THUMB_STRH_RRI5; } else { opCode = THUMB_STRH_RRR; @@ -429,11 +438,15 @@ static ArmLIR *storeBaseDisp(CompilationUnit *cUnit, int rBase, assert(0); } if (shortForm) { - res = newLIR3(cUnit, opCode, rSrc, rBase, displacement); + store = res = newLIR3(cUnit, opCode, rSrc, rBase, encodedDisp); } else { assert(rScratch != -1); - res = loadConstant(cUnit, rScratch, displacement); - newLIR3(cUnit, opCode, rSrc, rBase, rScratch); + res = loadConstant(cUnit, rScratch, encodedDisp); + store = newLIR3(cUnit, opCode, rSrc, rBase, rScratch); + } + + if (rBase == rFP) { + annotateDalvikRegAccess(store, displacement >> 2, false /* isLoad */); } return res; } diff --git a/vm/compiler/codegen/arm/armv5te-vfp/ArchVariant.c b/vm/compiler/codegen/arm/armv5te-vfp/ArchVariant.c index 41a79de0f..d07d96e4c 100644 --- a/vm/compiler/codegen/arm/armv5te-vfp/ArchVariant.c +++ b/vm/compiler/codegen/arm/armv5te-vfp/ArchVariant.c @@ -74,7 +74,7 @@ static void genDispatchToHandler(CompilationUnit *cUnit, TemplateOpCode opCode) } /* Architecture-specific initializations and checks go here */ -bool dvmCompilerArchInit(void) +static bool compilerArchVariantInit(void) { /* First, declare dvmCompiler_TEMPLATE_XXX for each template */ #define JIT_TEMPLATE(X) extern void dvmCompiler_TEMPLATE_##X(); diff --git a/vm/compiler/codegen/arm/armv5te/ArchVariant.c b/vm/compiler/codegen/arm/armv5te/ArchVariant.c index 4bd354b45..b4a38480a 100644 --- a/vm/compiler/codegen/arm/armv5te/ArchVariant.c +++ b/vm/compiler/codegen/arm/armv5te/ArchVariant.c @@ -74,7 +74,7 @@ static void genDispatchToHandler(CompilationUnit *cUnit, TemplateOpCode opCode) } /* Architecture-specific initializations and checks go here */ -bool dvmCompilerArchInit(void) +static bool compilerArchVariantInit(void) { /* First, declare dvmCompiler_TEMPLATE_XXX for each template */ #define JIT_TEMPLATE(X) extern void dvmCompiler_TEMPLATE_##X(); diff --git a/vm/compiler/codegen/arm/armv7-a/ArchVariant.c b/vm/compiler/codegen/arm/armv7-a/ArchVariant.c index 65e0ec0e8..fba1e3259 100644 --- a/vm/compiler/codegen/arm/armv7-a/ArchVariant.c +++ b/vm/compiler/codegen/arm/armv7-a/ArchVariant.c @@ -77,7 +77,7 @@ static void genDispatchToHandler(CompilationUnit *cUnit, TemplateOpCode opCode) } /* Architecture-specific initializations and checks go here */ -bool dvmCompilerArchInit(void) +static bool compilerArchVariantInit(void) { /* First, declare dvmCompiler_TEMPLATE_XXX for each template */ #define JIT_TEMPLATE(X) extern void dvmCompiler_TEMPLATE_##X(); @@ -132,11 +132,13 @@ static bool genInlineSqrt(CompilationUnit *cUnit, MIR *mir) newLIR3(cUnit, THUMB2_FMRRD, r0, r1, dr1); newLIR1(cUnit, THUMB_BLX_R, r2); newLIR3(cUnit, THUMB2_FMDRR, dr0, r0, r1); + ArmLIR *label = newLIR0(cUnit, ARM_PSEUDO_TARGET_LABEL); + label->defMask = ENCODE_ALL; + branch->generic.target = (LIR *)label; if (vDest >= 0) - target = storeDouble(cUnit, dr0, vDest, rNone); + storeDouble(cUnit, dr0, vDest); else - target = newLIR3(cUnit, THUMB2_VSTRD, dr0, rGLUE, offset >> 2); - branch->generic.target = (LIR *)target; + newLIR3(cUnit, THUMB2_VSTRD, dr0, rGLUE, offset >> 2); resetRegisterScoreboard(cUnit); return true; } @@ -175,10 +177,21 @@ static bool genArithOpFloat(CompilationUnit *cUnit, MIR *mir, int vDest, default: return true; } - loadFloat(cUnit, vSrc1, fr2); - loadFloat(cUnit, vSrc2, fr4); - newLIR3(cUnit, op, fr0, fr2, fr4); - storeFloat(cUnit, fr0, vDest, 0); + int reg0, reg1, reg2; + reg1 = selectSFPReg(cUnit, vSrc1); + reg2 = selectSFPReg(cUnit, vSrc2); + /* + * The register mapping is overly optimistic and lazily updated so we + * need to detect false sharing here. + */ + if (reg1 == reg2 && vSrc1 != vSrc2) { + reg2 = nextFPReg(cUnit, vSrc2, false /* isDouble */); + } + loadFloat(cUnit, vSrc1, reg1); + loadFloat(cUnit, vSrc2, reg2); + reg0 = selectSFPReg(cUnit, vDest); + newLIR3(cUnit, op, reg0, reg1, reg2); + storeFloat(cUnit, reg0, vDest); return false; } @@ -212,10 +225,19 @@ static bool genArithOpDouble(CompilationUnit *cUnit, MIR *mir, int vDest, default: return true; } - loadDouble(cUnit, vSrc1, dr1); - loadDouble(cUnit, vSrc2, dr2); - newLIR3(cUnit, op, dr0, dr1, dr2); - storeDouble(cUnit, dr0, vDest, rNone); + + int reg0, reg1, reg2; + reg1 = selectDFPReg(cUnit, vSrc1); + reg2 = selectDFPReg(cUnit, vSrc2); + if (reg1 == reg2 && vSrc1 != vSrc2) { + reg2 = nextFPReg(cUnit, vSrc2, true /* isDouble */); + } + loadDouble(cUnit, vSrc1, reg1); + loadDouble(cUnit, vSrc2, reg2); + /* Rename the new vDest to a new register */ + reg0 = selectDFPReg(cUnit, vNone); + newLIR3(cUnit, op, reg0, reg1, reg2); + storeDouble(cUnit, reg0, vDest); return false; } @@ -270,18 +292,20 @@ static bool genConversion(CompilationUnit *cUnit, MIR *mir) return true; } if (longSrc) { - srcReg = dr1; + srcReg = selectDFPReg(cUnit, vSrc2); loadDouble(cUnit, vSrc2, srcReg); } else { - srcReg = fr2; + srcReg = selectSFPReg(cUnit, vSrc2); loadFloat(cUnit, vSrc2, srcReg); } if (longDest) { - newLIR2(cUnit, op, dr0, srcReg); - storeDouble(cUnit, dr0, vSrc1Dest, rNone); + int destReg = selectDFPReg(cUnit, vNone); + newLIR2(cUnit, op, destReg, srcReg); + storeDouble(cUnit, destReg, vSrc1Dest); } else { - newLIR2(cUnit, op, fr0, srcReg); - storeFloat(cUnit, fr0, vSrc1Dest, 0); + int destReg = selectSFPReg(cUnit, vNone); + newLIR2(cUnit, op, destReg, srcReg); + storeFloat(cUnit, destReg, vSrc1Dest); } return false; } @@ -292,6 +316,7 @@ static bool genCmpX(CompilationUnit *cUnit, MIR *mir, int vDest, int vSrc1, bool isDouble; int defaultResult; bool ltNaNBias; + int fpReg1, fpReg2; switch(mir->dalvikInsn.opCode) { case OP_CMPL_FLOAT: @@ -314,17 +339,27 @@ static bool genCmpX(CompilationUnit *cUnit, MIR *mir, int vDest, int vSrc1, return true; } if (isDouble) { - loadDouble(cUnit, vSrc1, dr0); - loadDouble(cUnit, vSrc2, dr1); + fpReg1 = selectDFPReg(cUnit, vSrc1); + fpReg2 = selectDFPReg(cUnit, vSrc2); + if (fpReg1 == fpReg2 && vSrc1 != vSrc2) { + fpReg2 = nextFPReg(cUnit, vSrc2, true /* isDouble */); + } + loadDouble(cUnit, vSrc1, fpReg1); + loadDouble(cUnit, vSrc2, fpReg2); // Hard-coded use of r7 as temp. Revisit - loadConstant(cUnit,r7, defaultResult); - newLIR2(cUnit, THUMB2_VCMPD, dr0, dr1); + loadConstant(cUnit, r7, defaultResult); + newLIR2(cUnit, THUMB2_VCMPD, fpReg1, fpReg2); } else { - loadFloat(cUnit, vSrc1, fr0); - loadFloat(cUnit, vSrc2, fr2); + fpReg1 = selectSFPReg(cUnit, vSrc1); + fpReg2 = selectSFPReg(cUnit, vSrc2); + if (fpReg1 == fpReg2 && vSrc1 != vSrc2) { + fpReg2 = nextFPReg(cUnit, vSrc2, false /* isDouble */); + } + loadFloat(cUnit, vSrc1, fpReg1); + loadFloat(cUnit, vSrc2, fpReg2); // Hard-coded use of r7 as temp. Revisit - loadConstant(cUnit,r7, defaultResult); - newLIR2(cUnit, THUMB2_VCMPS, fr0, fr2); + loadConstant(cUnit, r7, defaultResult); + newLIR2(cUnit, THUMB2_VCMPS, fpReg1, fpReg2); } newLIR0(cUnit, THUMB2_FMSTAT); genIT(cUnit, (defaultResult == -1) ? ARM_COND_GT : ARM_COND_MI, ""); -- 2.11.0