From d72564ca7aa66c6d95b6ca34299258b65ecfd1cb Mon Sep 17 00:00:00 2001 From: Ben Cheng Date: Tue, 8 Feb 2011 17:09:25 -0800 Subject: [PATCH] Misc goodies in the JIT in preparation for more aggressive code motion. - Set up resource masks correctly for Thumb push/pop when LR/PC are involved. - Preserve LR around simulated heap references under self-verification mode. - Compact a few simple flags in ArmLIR into bit fields. - Minor performance tuning in TEMPLATE_MEM_OP_DECODE Change-Id: Id73edac837c5bb37dfd21f372d6fa21c238cf42a --- vm/compiler/Compiler.h | 1 + vm/compiler/CompilerIR.h | 1 + vm/compiler/codegen/RallocUtil.c | 2 +- vm/compiler/codegen/arm/ArchUtility.c | 8 +-- vm/compiler/codegen/arm/ArmLIR.h | 20 +++--- vm/compiler/codegen/arm/Assemble.c | 8 +-- vm/compiler/codegen/arm/CodegenCommon.c | 32 ++++++++- vm/compiler/codegen/arm/CodegenDriver.c | 50 +++++++++++++- vm/compiler/codegen/arm/GlobalOptimizations.c | 2 +- vm/compiler/codegen/arm/LocalOptimizations.c | 40 ++++++------ vm/compiler/codegen/arm/Thumb/Factory.c | 76 ++++++++++++++++------ vm/compiler/codegen/arm/Thumb2/Factory.c | 57 +++++++++------- .../template/armv5te-vfp/TEMPLATE_MEM_OP_DECODE.S | 2 +- .../template/armv5te/TEMPLATE_MEM_OP_DECODE.S | 2 +- .../template/out/CompilerTemplateAsm-armv5te-vfp.S | 2 +- .../template/out/CompilerTemplateAsm-armv5te.S | 2 +- .../out/CompilerTemplateAsm-armv7-a-neon.S | 2 +- .../template/out/CompilerTemplateAsm-armv7-a.S | 2 +- 18 files changed, 218 insertions(+), 91 deletions(-) diff --git a/vm/compiler/Compiler.h b/vm/compiler/Compiler.h index 2f93ebb47..7533e2630 100644 --- a/vm/compiler/Compiler.h +++ b/vm/compiler/Compiler.h @@ -310,4 +310,5 @@ JitTraceDescription *dvmCopyTraceDescriptor(const u2 *pc, const struct JitEntry *desc); void *dvmCompilerGetInterpretTemplate(); JitInstructionSetType dvmCompilerGetInterpretTemplateSet(); +u8 dvmGetRegResourceMask(int reg); #endif /* _DALVIK_VM_COMPILER */ diff --git a/vm/compiler/CompilerIR.h b/vm/compiler/CompilerIR.h index dd1d441bc..89cd1414d 100644 --- a/vm/compiler/CompilerIR.h +++ b/vm/compiler/CompilerIR.h @@ -211,6 +211,7 @@ typedef struct CompilationUnit { bool hasLoop; // Contains a loop bool hasInvoke; // Contains an invoke instruction bool heapMemOp; // Mark mem ops for self verification + bool usesLinkRegister; // For self-verification only int profileCodeSize; // Size of the profile prefix in bytes int numChainingCells[kChainingCellGap]; LIR *firstChainingLIR[kChainingCellGap]; diff --git a/vm/compiler/codegen/RallocUtil.c b/vm/compiler/codegen/RallocUtil.c index 51162430a..27d1f0597 100644 --- a/vm/compiler/codegen/RallocUtil.c +++ b/vm/compiler/codegen/RallocUtil.c @@ -448,7 +448,7 @@ static void nullifyRange(CompilationUnit *cUnit, LIR *start, LIR *finish, LIR *p; assert(sReg1 == sReg2); for (p = start; ;p = p->next) { - ((ArmLIR *)p)->isNop = true; + ((ArmLIR *)p)->flags.isNop = true; if (p == finish) break; } diff --git a/vm/compiler/codegen/arm/ArchUtility.c b/vm/compiler/codegen/arm/ArchUtility.c index 5af4f3b5c..c6bcac2bc 100644 --- a/vm/compiler/codegen/arm/ArchUtility.c +++ b/vm/compiler/codegen/arm/ArchUtility.c @@ -348,7 +348,7 @@ void dvmDumpLIRInsn(LIR *arg, unsigned char *baseAddr) LOGD("L%p:\n", lir); break; default: - if (lir->isNop && !dumpNop) { + if (lir->flags.isNop && !dumpNop) { break; } buildInsnString(EncodingMap[lir->opcode].name, lir, opName, @@ -357,15 +357,15 @@ void dvmDumpLIRInsn(LIR *arg, unsigned char *baseAddr) 256); LOGD("%p (%04x): %-8s%s%s\n", baseAddr + offset, offset, opName, buf, - lir->isNop ? "(nop)" : ""); + lir->flags.isNop ? "(nop)" : ""); break; } - if (lir->useMask && (!lir->isNop || dumpNop)) { + if (lir->useMask && (!lir->flags.isNop || dumpNop)) { DUMP_RESOURCE_MASK(dvmDumpResourceMask((LIR *) lir, lir->useMask, "use")); } - if (lir->defMask && (!lir->isNop || dumpNop)) { + if (lir->defMask && (!lir->flags.isNop || dumpNop)) { DUMP_RESOURCE_MASK(dvmDumpResourceMask((LIR *) lir, lir->defMask, "def")); } diff --git a/vm/compiler/codegen/arm/ArmLIR.h b/vm/compiler/codegen/arm/ArmLIR.h index 0ee821d3f..d3e145ef4 100644 --- a/vm/compiler/codegen/arm/ArmLIR.h +++ b/vm/compiler/codegen/arm/ArmLIR.h @@ -757,15 +757,17 @@ extern ArmEncodingMap EncodingMap[kArmLast]; typedef struct ArmLIR { LIR generic; ArmOpcode opcode; - int operands[4]; // [0..3] = [dest, src1, src2, extra] - bool isNop; // LIR is optimized away - bool branchInsertSV;// mark for insertion of branch before this instruction, - // used to identify mem ops for self verification mode - int age; // default is 0, set lazily by the optimizer - int size; // 16-bit unit size (1 for thumb, 1 or 2 for thumb2) - int aliasInfo; // For Dalvik register access & litpool disambiguation - u8 useMask; // Resource mask for use - u8 defMask; // Resource mask for def + int operands[4]; // [0..3] = [dest, src1, src2, extra] + struct { + bool isNop:1; // LIR is optimized away + bool insertWrapper:1; // insert branch to emulate memory accesses + unsigned int age:4; // default is 0, set lazily by the optimizer + unsigned int size:3; // bytes (2 for thumb, 2/4 for thumb2) + unsigned int unused:23; + } flags; + int aliasInfo; // For Dalvik register & litpool disambiguation + u8 useMask; // Resource mask for use + u8 defMask; // Resource mask for def } ArmLIR; /* Init values when a predicted chain is initially assembled */ diff --git a/vm/compiler/codegen/arm/Assemble.c b/vm/compiler/codegen/arm/Assemble.c index 8f62b1e14..f5fecbc47 100644 --- a/vm/compiler/codegen/arm/Assemble.c +++ b/vm/compiler/codegen/arm/Assemble.c @@ -943,7 +943,7 @@ static AssemblerStatus assembleInstructions(CompilationUnit *cUnit, continue; } - if (lir->isNop) { + if (lir->flags.isNop) { continue; } @@ -1234,9 +1234,9 @@ void dvmCompilerAssembleLIR(CompilationUnit *cUnit, JitTranslationInfo *info) armLIR; armLIR = NEXT_LIR(armLIR)) { armLIR->generic.offset = offset; - if (armLIR->opcode >= 0 && !armLIR->isNop) { - armLIR->size = EncodingMap[armLIR->opcode].size * 2; - offset += armLIR->size; + if (armLIR->opcode >= 0 && !armLIR->flags.isNop) { + armLIR->flags.size = EncodingMap[armLIR->opcode].size * 2; + offset += armLIR->flags.size; } else if (armLIR->opcode == kArmPseudoPseudoAlign4) { if (offset & 0x2) { offset += 2; diff --git a/vm/compiler/codegen/arm/CodegenCommon.c b/vm/compiler/codegen/arm/CodegenCommon.c index c29efa640..f4ca95c4a 100644 --- a/vm/compiler/codegen/arm/CodegenCommon.c +++ b/vm/compiler/codegen/arm/CodegenCommon.c @@ -84,9 +84,9 @@ static void annotateDalvikRegAccess(ArmLIR *lir, int regId, bool isLoad) } /* - * Decode the register id and mark the corresponding bit(s). + * Decode the register id. */ -static inline void setupRegMask(u8 *mask, int reg) +static inline u8 getRegMaskCommon(int reg) { u8 seed; int shift; @@ -100,7 +100,21 @@ static inline void setupRegMask(u8 *mask, int reg) shift = FPREG(reg) ? kFPReg0 : 0; /* Expand the double register id into single offset */ shift += regId; - *mask |= seed << shift; + return (seed << shift); +} + +/* External version of getRegMaskCommon */ +u8 dvmGetRegResourceMask(int reg) +{ + return getRegMaskCommon(reg); +} + +/* + * Mark the corresponding bit(s). + */ +static inline void setupRegMask(u8 *mask, int reg) +{ + *mask |= getRegMaskCommon(reg); } /* @@ -196,6 +210,18 @@ static void setupResourceMasks(ArmLIR *lir) if (flags & USES_CCODES) { lir->useMask |= ENCODE_CCODE; } + + /* Fixup for kThumbPush/lr and kThumbPop/pc */ + if (opcode == kThumbPush || opcode == kThumbPop) { + u8 r8Mask = getRegMaskCommon(r8); + if ((opcode == kThumbPush) && (lir->useMask & r8Mask)) { + lir->useMask &= ~r8Mask; + lir->useMask |= ENCODE_REG_LR; + } else if ((opcode == kThumbPop) && (lir->defMask & r8Mask)) { + lir->defMask &= ~r8Mask; + lir->defMask |= ENCODE_REG_PC; + } + } } /* diff --git a/vm/compiler/codegen/arm/CodegenDriver.c b/vm/compiler/codegen/arm/CodegenDriver.c index 1b7c6ed02..74f648bcb 100644 --- a/vm/compiler/codegen/arm/CodegenDriver.c +++ b/vm/compiler/codegen/arm/CodegenDriver.c @@ -167,6 +167,9 @@ static bool genArithOpDoublePortable(CompilationUnit *cUnit, MIR *mir, dvmCompilerClobberCallRegs(cUnit); rlResult = dvmCompilerGetReturnWide(cUnit); storeValueWide(cUnit, rlDest, rlResult); +#if defined(WITH_SELF_VERIFICATION) + cUnit->usesLinkRegister = true; +#endif return false; } @@ -213,6 +216,31 @@ static void selfVerificationBranchInsert(LIR *currentLIR, ArmOpcode opcode, dvmCompilerInsertLIRBefore(currentLIR, (LIR *) insn); } +/* + * Example where r14 (LR) is preserved around a heap access under + * self-verification mode in Thumb2: + * + * D/dalvikvm( 1538): 0x59414c5e (0026): ldr r14, [rpc, #220] <-hoisted + * D/dalvikvm( 1538): 0x59414c62 (002a): mla r4, r0, r8, r4 + * D/dalvikvm( 1538): 0x59414c66 (002e): adds r3, r4, r3 + * D/dalvikvm( 1538): 0x59414c6a (0032): push ---+ + * D/dalvikvm( 1538): 0x59414c6c (0034): blx_1 0x5940f494 | + * D/dalvikvm( 1538): 0x59414c6e (0036): blx_2 see above <-MEM_OP_DECODE + * D/dalvikvm( 1538): 0x59414c70 (0038): ldr r10, [r9, #0] | + * D/dalvikvm( 1538): 0x59414c74 (003c): pop ---+ + * D/dalvikvm( 1538): 0x59414c78 (0040): mov r11, r10 + * D/dalvikvm( 1538): 0x59414c7a (0042): asr r12, r11, #31 + * D/dalvikvm( 1538): 0x59414c7e (0046): movs r0, r2 + * D/dalvikvm( 1538): 0x59414c80 (0048): movs r1, r3 + * D/dalvikvm( 1538): 0x59414c82 (004a): str r2, [r5, #16] + * D/dalvikvm( 1538): 0x59414c84 (004c): mov r2, r11 + * D/dalvikvm( 1538): 0x59414c86 (004e): str r3, [r5, #20] + * D/dalvikvm( 1538): 0x59414c88 (0050): mov r3, r12 + * D/dalvikvm( 1538): 0x59414c8a (0052): str r11, [r5, #24] + * D/dalvikvm( 1538): 0x59414c8e (0056): str r12, [r5, #28] + * D/dalvikvm( 1538): 0x59414c92 (005a): blx r14 <-use of LR + * + */ static void selfVerificationBranchInsertPass(CompilationUnit *cUnit) { ArmLIR *thisLIR; @@ -221,7 +249,19 @@ static void selfVerificationBranchInsertPass(CompilationUnit *cUnit) for (thisLIR = (ArmLIR *) cUnit->firstLIRInsn; thisLIR != (ArmLIR *) cUnit->lastLIRInsn; thisLIR = NEXT_LIR(thisLIR)) { - if (thisLIR->branchInsertSV) { + if (!thisLIR->flags.isNop && thisLIR->flags.insertWrapper) { + /* + * Push r5(FP) and r14(LR) onto stack. We need to make sure that + * SP is 8-byte aligned, and we use r5 as a temp to restore LR + * for Thumb-only target since LR cannot be directly accessed in + * Thumb mode. Another reason to choose r5 here is it is the Dalvik + * frame pointer and cannot be the target of the emulated heap + * load. + */ + if (cUnit->usesLinkRegister) { + genSelfVerificationPreBranch(cUnit, thisLIR); + } + /* Branch to mem op decode template */ selfVerificationBranchInsert((LIR *) thisLIR, kThumbBlx1, (int) gDvmJit.codeCache + templateEntryOffsets[opcode], @@ -229,6 +269,11 @@ static void selfVerificationBranchInsertPass(CompilationUnit *cUnit) selfVerificationBranchInsert((LIR *) thisLIR, kThumbBlx2, (int) gDvmJit.codeCache + templateEntryOffsets[opcode], (int) gDvmJit.codeCache + templateEntryOffsets[opcode]); + + /* Restore LR */ + if (cUnit->usesLinkRegister) { + genSelfVerificationPostBranch(cUnit, thisLIR); + } } } } @@ -708,6 +753,9 @@ static bool genArithOpLong(CompilationUnit *cUnit, MIR *mir, else rlResult = dvmCompilerGetReturnWideAlt(cUnit); storeValueWide(cUnit, rlDest, rlResult); +#if defined(WITH_SELF_VERIFICATION) + cUnit->usesLinkRegister = true; +#endif } return false; } diff --git a/vm/compiler/codegen/arm/GlobalOptimizations.c b/vm/compiler/codegen/arm/GlobalOptimizations.c index c1e69c3e3..872bddfc7 100644 --- a/vm/compiler/codegen/arm/GlobalOptimizations.c +++ b/vm/compiler/codegen/arm/GlobalOptimizations.c @@ -41,7 +41,7 @@ static void applyRedundantBranchElimination(CompilationUnit *cUnit) * Is the branch target the next instruction? */ if (nextLIR == (ArmLIR *) thisLIR->generic.target) { - thisLIR->isNop = true; + thisLIR->flags.isNop = true; break; } diff --git a/vm/compiler/codegen/arm/LocalOptimizations.c b/vm/compiler/codegen/arm/LocalOptimizations.c index d91734fcf..ae98a568e 100644 --- a/vm/compiler/codegen/arm/LocalOptimizations.c +++ b/vm/compiler/codegen/arm/LocalOptimizations.c @@ -77,7 +77,7 @@ static void applyLoadStoreElimination(CompilationUnit *cUnit, thisLIR != tailLIR; thisLIR = NEXT_LIR(thisLIR)) { /* Skip newly added instructions */ - if (thisLIR->age >= cUnit->optRound) { + if (thisLIR->flags.age >= cUnit->optRound) { continue; } if (isDalvikStore(thisLIR)) { @@ -114,7 +114,7 @@ static void applyLoadStoreElimination(CompilationUnit *cUnit, dvmCompilerInsertLIRAfter((LIR *) checkLIR, (LIR *) moveLIR); } - checkLIR->isNop = true; + checkLIR->flags.isNop = true; continue; /* @@ -123,7 +123,7 @@ static void applyLoadStoreElimination(CompilationUnit *cUnit, */ } else if (isDalvikStore(checkLIR) && (checkLIR->aliasInfo == thisLIR->aliasInfo)) { - thisLIR->isNop = true; + thisLIR->flags.isNop = true; break; /* Find out the latest slot that the store can be sunk into */ } else { @@ -149,7 +149,7 @@ static void applyLoadStoreElimination(CompilationUnit *cUnit, ArmLIR *newStoreLIR = (ArmLIR *)dvmCompilerNew(sizeof(ArmLIR), true); *newStoreLIR = *thisLIR; - newStoreLIR->age = cUnit->optRound; + newStoreLIR->flags.age = cUnit->optRound; /* * Stop point found - insert *before* the checkLIR * since the instruction list is scanned in the @@ -157,7 +157,7 @@ static void applyLoadStoreElimination(CompilationUnit *cUnit, */ dvmCompilerInsertLIRBefore((LIR *) checkLIR, (LIR *) newStoreLIR); - thisLIR->isNop = true; + thisLIR->flags.isNop = true; } break; } @@ -191,8 +191,8 @@ static void applyLoadHoisting(CompilationUnit *cUnit, thisLIR != tailLIR; thisLIR = NEXT_LIR(thisLIR)) { /* Skip newly added instructions */ - if (thisLIR->age >= cUnit->optRound || - thisLIR->isNop == true) { + if (thisLIR->flags.age >= cUnit->optRound || + thisLIR->flags.isNop == true) { continue; } @@ -221,7 +221,7 @@ static void applyLoadHoisting(CompilationUnit *cUnit, checkLIR != headLIR; checkLIR = PREV_LIR(checkLIR)) { - if (checkLIR->isNop) continue; + if (checkLIR->flags.isNop) continue; /* * Check if the Dalvik register is previously accessed @@ -235,7 +235,7 @@ static void applyLoadHoisting(CompilationUnit *cUnit, * the search will terminate later at the point checking * for partially overlapping stores. */ - thisLIR->isNop = true; + thisLIR->flags.isNop = true; break; } @@ -274,7 +274,7 @@ static void applyLoadHoisting(CompilationUnit *cUnit, } /* The load has been eliminated */ - if (thisLIR->isNop) continue; + if (thisLIR->flags.isNop) continue; /* * The load cannot be eliminated. See if it can be hoisted to an @@ -284,7 +284,7 @@ static void applyLoadHoisting(CompilationUnit *cUnit, /* empty by intention */; checkLIR = PREV_LIR(checkLIR)) { - if (checkLIR->isNop) continue; + if (checkLIR->flags.isNop) continue; /* * Check if the "thisLIR" load is redundant @@ -308,7 +308,7 @@ static void applyLoadHoisting(CompilationUnit *cUnit, dvmCompilerInsertLIRAfter((LIR *) checkLIR, (LIR *) moveLIR); } - thisLIR->isNop = true; + thisLIR->flags.isNop = true; break; /* Find out if the load can be yanked past the checkLIR */ @@ -371,7 +371,7 @@ static void applyLoadHoisting(CompilationUnit *cUnit, ArmLIR *newLoadLIR = (ArmLIR *)dvmCompilerNew(sizeof(ArmLIR), true); *newLoadLIR = *thisLIR; - newLoadLIR->age = cUnit->optRound; + newLoadLIR->flags.age = cUnit->optRound; /* * Stop point found - insert *after* the checkLIR * since the instruction list is scanned in the @@ -379,7 +379,7 @@ static void applyLoadHoisting(CompilationUnit *cUnit, */ dvmCompilerInsertLIRAfter((LIR *) checkLIR, (LIR *) newLoadLIR); - thisLIR->isNop = true; + thisLIR->flags.isNop = true; } break; } @@ -407,13 +407,13 @@ static void applyLoadHoisting(CompilationUnit *cUnit, checkLIR != headLIR; checkLIR = PREV_LIR(checkLIR)) { - if (checkLIR->isNop) continue; + if (checkLIR->flags.isNop) continue; /* Reloading same literal into same tgt reg? Eliminate if so */ if (isLiteralLoad(checkLIR) && (checkLIR->aliasInfo == litVal) && (checkLIR->operands[0] == nativeRegId)) { - thisLIR->isNop = true; + thisLIR->flags.isNop = true; break; } @@ -430,7 +430,7 @@ static void applyLoadHoisting(CompilationUnit *cUnit, } /* The load has been eliminated */ - if (thisLIR->isNop) continue; + if (thisLIR->flags.isNop) continue; /* * The load cannot be eliminated. See if it can be hoisted to an @@ -440,7 +440,7 @@ static void applyLoadHoisting(CompilationUnit *cUnit, /* empty by intention */; checkLIR = PREV_LIR(checkLIR)) { - if (checkLIR->isNop) continue; + if (checkLIR->flags.isNop) continue; /* * TUNING: once a full scheduler exists, check here @@ -475,14 +475,14 @@ static void applyLoadHoisting(CompilationUnit *cUnit, ArmLIR *newLoadLIR = (ArmLIR *)dvmCompilerNew(sizeof(ArmLIR), true); *newLoadLIR = *thisLIR; - newLoadLIR->age = cUnit->optRound; + newLoadLIR->flags.age = cUnit->optRound; /* * Insertion is guaranteed to succeed since checkLIR * is never the first LIR on the list */ dvmCompilerInsertLIRAfter((LIR *) checkLIR, (LIR *) newLoadLIR); - thisLIR->isNop = true; + thisLIR->flags.isNop = true; } break; } diff --git a/vm/compiler/codegen/arm/Thumb/Factory.c b/vm/compiler/codegen/arm/Thumb/Factory.c index 53dc2ce55..c0a8c32a9 100644 --- a/vm/compiler/codegen/arm/Thumb/Factory.c +++ b/vm/compiler/codegen/arm/Thumb/Factory.c @@ -78,16 +78,7 @@ static ArmLIR *loadConstantNoClobber(CompilationUnit *cUnit, int rDest, loadPcRel->generic.target = (LIR *) dataTarget; loadPcRel->operands[0] = tDest; setupResourceMasks(loadPcRel); - /* - * Special case for literal loads with a link register target. - * Self-cosim mode will insert calls prior to heap references - * after optimization, and those will destroy r14. The easy - * workaround is to treat literal loads into r14 as heap references - * to prevent them from being hoisted. Use of r14 in this manner - * is currently rare. Revist if that changes. - */ - if (rDest != rlr) - setMemRefType(loadPcRel, true, kLiteral); + setMemRefType(loadPcRel, true, kLiteral); loadPcRel->aliasInfo = dataTarget->operands[0]; res = loadPcRel; dvmCompilerAppendLIR(cUnit, (LIR *) loadPcRel); @@ -480,7 +471,7 @@ static ArmLIR *loadBaseIndexed(CompilationUnit *cUnit, int rBase, res = newLIR3(cUnit, opcode, rDest, rBase, rNewIndex); #if defined(WITH_SELF_VERIFICATION) if (cUnit->heapMemOp) - res->branchInsertSV = true; + res->flags.insertWrapper = true; #endif if (scale) dvmCompilerFreeTemp(cUnit, rNewIndex); @@ -518,7 +509,7 @@ static ArmLIR *storeBaseIndexed(CompilationUnit *cUnit, int rBase, res = newLIR3(cUnit, opcode, rSrc, rBase, rNewIndex); #if defined(WITH_SELF_VERIFICATION) if (cUnit->heapMemOp) - res->branchInsertSV = true; + res->flags.insertWrapper = true; #endif if (scale) dvmCompilerFreeTemp(cUnit, rNewIndex); @@ -532,7 +523,7 @@ static ArmLIR *loadMultiple(CompilationUnit *cUnit, int rBase, int rMask) res = newLIR2(cUnit, kThumbLdmia, rBase, rMask); #if defined(WITH_SELF_VERIFICATION) if (cUnit->heapMemOp) - res->branchInsertSV = true; + res->flags.insertWrapper = true; #endif genBarrier(cUnit); return res; @@ -545,7 +536,7 @@ static ArmLIR *storeMultiple(CompilationUnit *cUnit, int rBase, int rMask) res = newLIR2(cUnit, kThumbStmia, rBase, rMask); #if defined(WITH_SELF_VERIFICATION) if (cUnit->heapMemOp) - res->branchInsertSV = true; + res->flags.insertWrapper = true; #endif genBarrier(cUnit); return res; @@ -666,9 +657,9 @@ static ArmLIR *loadBaseDispBody(CompilationUnit *cUnit, MIR *mir, int rBase, } #if defined(WITH_SELF_VERIFICATION) if (load != NULL && cUnit->heapMemOp) - load->branchInsertSV = true; + load->flags.insertWrapper = true; if (load2 != NULL && cUnit->heapMemOp) - load2->branchInsertSV = true; + load2->flags.insertWrapper = true; #endif return res; } @@ -776,9 +767,9 @@ static ArmLIR *storeBaseDispBody(CompilationUnit *cUnit, int rBase, } #if defined(WITH_SELF_VERIFICATION) if (store != NULL && cUnit->heapMemOp) - store->branchInsertSV = true; + store->flags.insertWrapper = true; if (store2 != NULL && cUnit->heapMemOp) - store2->branchInsertSV = true; + store2->flags.insertWrapper = true; #endif return res; } @@ -834,7 +825,7 @@ static ArmLIR* genRegCopyNoInsert(CompilationUnit *cUnit, int rDest, int rSrc) res->opcode = opcode; setupResourceMasks(res); if (rDest == rSrc) { - res->isNop = true; + res->flags.isNop = true; } return res; } @@ -874,3 +865,50 @@ static ArmLIR *genCmpImmBranch(CompilationUnit *cUnit, ArmLIR *branch = newLIR2(cUnit, kThumbBCond, 0, cond); return branch; } + +#if defined(WITH_SELF_VERIFICATION) +static void genSelfVerificationPreBranch(CompilationUnit *cUnit, + ArmLIR *origLIR) { + /* + * We need two separate pushes, since we want r5 to be pushed first. + * Store multiple will push LR first. + */ + ArmLIR *pushFP = (ArmLIR *) dvmCompilerNew(sizeof(ArmLIR), true); + pushFP->opcode = kThumbPush; + pushFP->operands[0] = 1 << rFP; + setupResourceMasks(pushFP); + dvmCompilerInsertLIRBefore((LIR *) origLIR, (LIR *) pushFP); + + ArmLIR *pushLR = (ArmLIR *) dvmCompilerNew(sizeof(ArmLIR), true); + pushLR->opcode = kThumbPush; + /* Thumb push can handle LR, but is encoded differently at bit 8 */ + pushLR->operands[0] = 1 << 8; + setupResourceMasks(pushLR); + dvmCompilerInsertLIRBefore((LIR *) origLIR, (LIR *) pushLR); +} + +static void genSelfVerificationPostBranch(CompilationUnit *cUnit, + ArmLIR *origLIR) { + /* + * Since Thumb cannot pop memory content into LR, we have to pop LR + * to a temp first (r5 in this case). Then we move r5 to LR, then pop the + * original r5 from stack. + */ + /* Pop memory content(LR) into r5 first */ + ArmLIR *popForLR = (ArmLIR *) dvmCompilerNew(sizeof(ArmLIR), true); + popForLR->opcode = kThumbPop; + popForLR->operands[0] = 1 << rFP; + setupResourceMasks(popForLR); + dvmCompilerInsertLIRAfter((LIR *) origLIR, (LIR *) popForLR); + + ArmLIR *copy = genRegCopyNoInsert(cUnit, rlr, rFP); + dvmCompilerInsertLIRAfter((LIR *) popForLR, (LIR *) copy); + + /* Now restore the original r5 */ + ArmLIR *popFP = (ArmLIR *) dvmCompilerNew(sizeof(ArmLIR), true); + popFP->opcode = kThumbPop; + popFP->operands[0] = 1 << rFP; + setupResourceMasks(popFP); + dvmCompilerInsertLIRAfter((LIR *) copy, (LIR *) popFP); +} +#endif diff --git a/vm/compiler/codegen/arm/Thumb2/Factory.c b/vm/compiler/codegen/arm/Thumb2/Factory.c index fb30292e7..f68ef943e 100644 --- a/vm/compiler/codegen/arm/Thumb2/Factory.c +++ b/vm/compiler/codegen/arm/Thumb2/Factory.c @@ -66,9 +66,7 @@ static ArmLIR *loadFPConstantValue(CompilationUnit *cUnit, int rDest, loadPcRel->operands[0] = rDest; loadPcRel->operands[1] = rpc; setupResourceMasks(loadPcRel); - // Self-cosim workaround. - if (rDest != rlr) - setMemRefType(loadPcRel, true, kLiteral); + setMemRefType(loadPcRel, true, kLiteral); loadPcRel->aliasInfo = dataTarget->operands[0]; dvmCompilerAppendLIR(cUnit, (LIR *) loadPcRel); return loadPcRel; @@ -175,16 +173,7 @@ static ArmLIR *loadConstantNoClobber(CompilationUnit *cUnit, int rDest, loadPcRel->generic.target = (LIR *) dataTarget; loadPcRel->operands[0] = rDest; setupResourceMasks(loadPcRel); - /* - * Special case for literal loads with a link register target. - * Self-cosim mode will insert calls prior to heap references - * after optimization, and those will destroy r14. The easy - * workaround is to treat literal loads into r14 as heap references - * to prevent them from being hoisted. Use of r14 in this manner - * is currently rare. Revisit if that changes. - */ - if (rDest != rlr) - setMemRefType(loadPcRel, true, kLiteral); + setMemRefType(loadPcRel, true, kLiteral); loadPcRel->aliasInfo = dataTarget->operands[0]; res = loadPcRel; dvmCompilerAppendLIR(cUnit, (LIR *) loadPcRel); @@ -722,7 +711,7 @@ static ArmLIR *loadBaseIndexed(CompilationUnit *cUnit, int rBase, load = newLIR3(cUnit, opcode, rDest, regPtr, 0); #if defined(WITH_SELF_VERIFICATION) if (cUnit->heapMemOp) - load->branchInsertSV = true; + load->flags.insertWrapper = true; #endif return load; case kWord: @@ -750,7 +739,7 @@ static ArmLIR *loadBaseIndexed(CompilationUnit *cUnit, int rBase, #if defined(WITH_SELF_VERIFICATION) if (cUnit->heapMemOp) - load->branchInsertSV = true; + load->flags.insertWrapper = true; #endif return load; } @@ -786,7 +775,7 @@ static ArmLIR *storeBaseIndexed(CompilationUnit *cUnit, int rBase, store = newLIR3(cUnit, opcode, rSrc, regPtr, 0); #if defined(WITH_SELF_VERIFICATION) if (cUnit->heapMemOp) - store->branchInsertSV = true; + store->flags.insertWrapper = true; #endif return store; case kWord: @@ -810,7 +799,7 @@ static ArmLIR *storeBaseIndexed(CompilationUnit *cUnit, int rBase, #if defined(WITH_SELF_VERIFICATION) if (cUnit->heapMemOp) - store->branchInsertSV = true; + store->flags.insertWrapper = true; #endif return store; } @@ -932,7 +921,7 @@ static ArmLIR *loadBaseDispBody(CompilationUnit *cUnit, MIR *mir, int rBase, } #if defined(WITH_SELF_VERIFICATION) if (cUnit->heapMemOp) - load->branchInsertSV = true; + load->flags.insertWrapper = true; #endif return res; } @@ -1045,7 +1034,7 @@ static ArmLIR *storeBaseDispBody(CompilationUnit *cUnit, int rBase, } #if defined(WITH_SELF_VERIFICATION) if (cUnit->heapMemOp) - store->branchInsertSV = true; + store->flags.insertWrapper = true; #endif return res; } @@ -1073,7 +1062,7 @@ static ArmLIR *loadMultiple(CompilationUnit *cUnit, int rBase, int rMask) } #if defined(WITH_SELF_VERIFICATION) if (cUnit->heapMemOp) - res->branchInsertSV = true; + res->flags.insertWrapper = true; #endif genBarrier(cUnit); return res; @@ -1090,7 +1079,7 @@ static ArmLIR *storeMultiple(CompilationUnit *cUnit, int rBase, int rMask) } #if defined(WITH_SELF_VERIFICATION) if (cUnit->heapMemOp) - res->branchInsertSV = true; + res->flags.insertWrapper = true; #endif genBarrier(cUnit); return res; @@ -1143,7 +1132,7 @@ static ArmLIR *fpRegCopy(CompilationUnit *cUnit, int rDest, int rSrc) res->operands[0] = rDest; res->operands[1] = rSrc; if (rDest == rSrc) { - res->isNop = true; + res->flags.isNop = true; } else { assert(DOUBLEREG(rDest) == DOUBLEREG(rSrc)); if (DOUBLEREG(rDest)) { @@ -1184,7 +1173,7 @@ static ArmLIR* genRegCopyNoInsert(CompilationUnit *cUnit, int rDest, int rSrc) res->opcode = opcode; setupResourceMasks(res); if (rDest == rSrc) { - res->isNop = true; + res->flags.isNop = true; } return res; } @@ -1224,3 +1213,25 @@ static void genRegCopyWide(CompilationUnit *cUnit, int destLo, int destHi, } } } + +#if defined(WITH_SELF_VERIFICATION) +static void genSelfVerificationPreBranch(CompilationUnit *cUnit, + ArmLIR *origLIR) { + ArmLIR *push = (ArmLIR *) dvmCompilerNew(sizeof(ArmLIR), true); + push->opcode = kThumbPush; + /* Thumb push can handle LR (encoded at bit 8) */ + push->operands[0] = (1 << rFP | 1 << 8); + setupResourceMasks(push); + dvmCompilerInsertLIRBefore((LIR *) origLIR, (LIR *) push); +} + +static void genSelfVerificationPostBranch(CompilationUnit *cUnit, + ArmLIR *origLIR) { + ArmLIR *pop = (ArmLIR *) dvmCompilerNew(sizeof(ArmLIR), true); + /* Thumb pop cannot store into LR - use Thumb2 here */ + pop->opcode = kThumb2Pop; + pop->operands[0] = (1 << rFP | 1 << rlr); + setupResourceMasks(pop); + dvmCompilerInsertLIRAfter((LIR *) origLIR, (LIR *) pop); +} +#endif diff --git a/vm/compiler/template/armv5te-vfp/TEMPLATE_MEM_OP_DECODE.S b/vm/compiler/template/armv5te-vfp/TEMPLATE_MEM_OP_DECODE.S index 21e23a9ae..8bee85344 100644 --- a/vm/compiler/template/armv5te-vfp/TEMPLATE_MEM_OP_DECODE.S +++ b/vm/compiler/template/armv5te-vfp/TEMPLATE_MEM_OP_DECODE.S @@ -9,9 +9,9 @@ */ vpush {d0-d15} @ save out all fp registers push {r0-r12,lr} @ save out all registers + ldr r2, .LdvmSelfVerificationMemOpDecode @ defined in footer.S mov r0, lr @ arg0 <- link register mov r1, sp @ arg1 <- stack pointer - ldr r2, .LdvmSelfVerificationMemOpDecode @ defined in footer.S blx r2 @ decode and handle the mem op pop {r0-r12,lr} @ restore all registers vpop {d0-d15} @ restore all fp registers diff --git a/vm/compiler/template/armv5te/TEMPLATE_MEM_OP_DECODE.S b/vm/compiler/template/armv5te/TEMPLATE_MEM_OP_DECODE.S index ecd4eaabc..03926b69c 100644 --- a/vm/compiler/template/armv5te/TEMPLATE_MEM_OP_DECODE.S +++ b/vm/compiler/template/armv5te/TEMPLATE_MEM_OP_DECODE.S @@ -8,9 +8,9 @@ * skip the memory op so it never gets executed. */ push {r0-r12,lr} @ save out all registers + ldr r2, .LdvmSelfVerificationMemOpDecode @ defined in footer.S mov r0, lr @ arg0 <- link register mov r1, sp @ arg1 <- stack pointer - ldr r2, .LdvmSelfVerificationMemOpDecode @ defined in footer.S blx r2 @ decode and handle the mem op pop {r0-r12,lr} @ restore all registers bx lr @ return to compiled code diff --git a/vm/compiler/template/out/CompilerTemplateAsm-armv5te-vfp.S b/vm/compiler/template/out/CompilerTemplateAsm-armv5te-vfp.S index cd530960d..7615b95fe 100644 --- a/vm/compiler/template/out/CompilerTemplateAsm-armv5te-vfp.S +++ b/vm/compiler/template/out/CompilerTemplateAsm-armv5te-vfp.S @@ -1102,9 +1102,9 @@ dvmCompiler_TEMPLATE_MEM_OP_DECODE: */ vpush {d0-d15} @ save out all fp registers push {r0-r12,lr} @ save out all registers + ldr r2, .LdvmSelfVerificationMemOpDecode @ defined in footer.S mov r0, lr @ arg0 <- link register mov r1, sp @ arg1 <- stack pointer - ldr r2, .LdvmSelfVerificationMemOpDecode @ defined in footer.S blx r2 @ decode and handle the mem op pop {r0-r12,lr} @ restore all registers vpop {d0-d15} @ restore all fp registers diff --git a/vm/compiler/template/out/CompilerTemplateAsm-armv5te.S b/vm/compiler/template/out/CompilerTemplateAsm-armv5te.S index 57d0aff9a..2d69a3d85 100644 --- a/vm/compiler/template/out/CompilerTemplateAsm-armv5te.S +++ b/vm/compiler/template/out/CompilerTemplateAsm-armv5te.S @@ -834,9 +834,9 @@ dvmCompiler_TEMPLATE_MEM_OP_DECODE: * skip the memory op so it never gets executed. */ push {r0-r12,lr} @ save out all registers + ldr r2, .LdvmSelfVerificationMemOpDecode @ defined in footer.S mov r0, lr @ arg0 <- link register mov r1, sp @ arg1 <- stack pointer - ldr r2, .LdvmSelfVerificationMemOpDecode @ defined in footer.S blx r2 @ decode and handle the mem op pop {r0-r12,lr} @ restore all registers bx lr @ return to compiled code diff --git a/vm/compiler/template/out/CompilerTemplateAsm-armv7-a-neon.S b/vm/compiler/template/out/CompilerTemplateAsm-armv7-a-neon.S index 10541d39a..5f8e8087a 100644 --- a/vm/compiler/template/out/CompilerTemplateAsm-armv7-a-neon.S +++ b/vm/compiler/template/out/CompilerTemplateAsm-armv7-a-neon.S @@ -1102,9 +1102,9 @@ dvmCompiler_TEMPLATE_MEM_OP_DECODE: */ vpush {d0-d15} @ save out all fp registers push {r0-r12,lr} @ save out all registers + ldr r2, .LdvmSelfVerificationMemOpDecode @ defined in footer.S mov r0, lr @ arg0 <- link register mov r1, sp @ arg1 <- stack pointer - ldr r2, .LdvmSelfVerificationMemOpDecode @ defined in footer.S blx r2 @ decode and handle the mem op pop {r0-r12,lr} @ restore all registers vpop {d0-d15} @ restore all fp registers diff --git a/vm/compiler/template/out/CompilerTemplateAsm-armv7-a.S b/vm/compiler/template/out/CompilerTemplateAsm-armv7-a.S index d5847444b..0b24631c3 100644 --- a/vm/compiler/template/out/CompilerTemplateAsm-armv7-a.S +++ b/vm/compiler/template/out/CompilerTemplateAsm-armv7-a.S @@ -1102,9 +1102,9 @@ dvmCompiler_TEMPLATE_MEM_OP_DECODE: */ vpush {d0-d15} @ save out all fp registers push {r0-r12,lr} @ save out all registers + ldr r2, .LdvmSelfVerificationMemOpDecode @ defined in footer.S mov r0, lr @ arg0 <- link register mov r1, sp @ arg1 <- stack pointer - ldr r2, .LdvmSelfVerificationMemOpDecode @ defined in footer.S blx r2 @ decode and handle the mem op pop {r0-r12,lr} @ restore all registers vpop {d0-d15} @ restore all fp registers -- 2.11.0