/* Flag to dump all compiled code */
bool printMe;
+
+ /* Flag to count trace execution */
+ bool profile;
};
extern struct DvmJitGlobals gDvmJit;
dvmFprintf(stderr, " -Xjitmethod:signture[,signature]* "
"(eg Ljava/lang/String\\;replace)\n");
dvmFprintf(stderr, " -Xjitverbose\n");
+ dvmFprintf(stderr, " -Xjitprofile\n");
#endif
dvmFprintf(stderr, "\n");
dvmFprintf(stderr, "Configured with:"
gDvmJit.includeSelectedMethod = true;
} else if (strncmp(argv[i], "-Xjitverbose", 12) == 0) {
gDvmJit.printMe = true;
+ } else if (strncmp(argv[i], "-Xjitprofile", 12) == 0) {
+ gDvmJit.profile = true;
#endif
} else if (strncmp(argv[i], "-Xdeadlockpredict:", 18) == 0) {
LIR *firstLIRInsn;
LIR *lastLIRInsn;
LIR *wordList;
+ LIR *chainCellOffsetLIR;
GrowableList pcReconstructionList;
int headerSize; // bytes before the first code ptr
int dataOffset; // starting offset of literal pool
bool printMe;
bool allSingleStep;
bool halveInstCount;
+ bool executionCount; // Add code to count trace executions
int numChainingCells[CHAINING_CELL_LAST];
LIR *firstChainingLIR[CHAINING_CELL_LAST];
RegisterScoreboard registerScoreboard; // Track register dependency
/* Initialize the printMe flag */
cUnit.printMe = gDvmJit.printMe;
+ /* Initialize the profile flag */
+ cUnit.executionCount = gDvmJit.profile;
+
/* Identify traces that we don't want to compile */
if (gDvmJit.methodTable) {
int len = strlen(desc->method->clazz->descriptor) +
rFP = 5,
rGLUE = 6,
r7 = 7,
+ r8 = 8,
+ r9 = 9,
+ r10 = 10,
+ r11 = 11,
+ r12 = 12,
+ r13 = 13,
+ rlr = 14,
+ rpc = 15
} NativeRegisterPool;
+/* Mask to convert high reg to low for Thumb */
+#define THUMB_REG_MASK 0x7
+
/* Thumb condition encodings */
typedef enum Armv5teConditionCode {
ARM_COND_EQ = 0x0, /* 0000 */
* before sending them off to the assembler. If out-of-range branch distance is
* seen rearrange the instructions a bit to correct it.
*/
-#define CHAIN_CELL_OFFSET_SIZE 2
void dvmCompilerAssembleLIR(CompilationUnit *cUnit)
{
LIR *lir;
/* Add space for chain cell counts & trace description */
u4 chainCellOffset = offset;
- Armv5teLIR *chainCellOffsetLIR = (Armv5teLIR *) (cUnit->firstLIRInsn);
+ Armv5teLIR *chainCellOffsetLIR = cUnit->chainCellOffsetLIR;
+ assert(chainCellOffsetLIR);
assert(chainCellOffset < 0x10000);
assert(chainCellOffsetLIR->opCode == ARMV5TE_16BIT_DATA &&
chainCellOffsetLIR->operands[0] == CHAIN_CELL_OFFSET_TAG);
return;
}
+
cUnit->baseAddr = (char *) gDvmJit.codeCache + gDvmJit.codeCacheByteUsed;
- cUnit->headerSize = CHAIN_CELL_OFFSET_SIZE;
gDvmJit.codeCacheByteUsed += offset;
/* Install the code block */
BasicBlock **blockList = cUnit->blockList;
- /*
- * Reserve space at the beginning of each translation with fillers
- * + Chain cell count (2 bytes)
- */
- newLIR1(cUnit, ARMV5TE_16BIT_DATA, CHAIN_CELL_OFFSET_TAG);
+ if (cUnit->executionCount) {
+ /*
+ * Reserve 6 bytes at the beginning of the trace
+ * +----------------------------+
+ * | execution count (4 bytes) |
+ * +----------------------------+
+ * | chain cell offset (2 bytes)|
+ * +----------------------------+
+ * ...and then code to increment the execution
+ * count:
+ * mov r0, pc @ move adr of "mov r0,pc" + 4 to r0
+ * sub r0, #10 @ back up to addr of executionCount
+ * ldr r1, [r0]
+ * add r1, #1
+ * str r1, [r0]
+ */
+ newLIR1(cUnit, ARMV5TE_16BIT_DATA, 0);
+ newLIR1(cUnit, ARMV5TE_16BIT_DATA, 0);
+ cUnit->chainCellOffsetLIR = newLIR1(cUnit, ARMV5TE_16BIT_DATA, CHAIN_CELL_OFFSET_TAG);
+ cUnit->headerSize = 6;
+ newLIR2(cUnit, ARMV5TE_MOV_RR_HL, r0, rpc & THUMB_REG_MASK);
+ newLIR2(cUnit, ARMV5TE_SUB_RI8, r0, 10);
+ newLIR3(cUnit, ARMV5TE_LDR_RRI5, r1, r0, 0);
+ newLIR2(cUnit, ARMV5TE_ADD_RI8, r1, 1);
+ newLIR3(cUnit, ARMV5TE_STR_RRI5, r1, r0, 0);
+ } else {
+ /* Just reserve 2 bytes for the chain cell offset */
+ cUnit->chainCellOffsetLIR = newLIR1(cUnit, ARMV5TE_16BIT_DATA, CHAIN_CELL_OFFSET_TAG);
+ cUnit->headerSize = 2;
+ }
/* Handle the content in each basic block */
for (i = 0; i < cUnit->numBlocks; i++) {
#include <sys/time.h>
#include <signal.h>
#include "compiler/Compiler.h"
+#include "compiler/CompilerUtility.h"
+#include "compiler/CompilerIR.h"
#include <errno.h>
/*
}
#endif
+/* Dumps profile info for a single trace */
+void dvmCompilerDumpTraceProfile(struct JitEntry *p)
+{
+ ChainCellCounts* pCellCounts;
+ char* traceBase;
+ u4* pExecutionCount;
+ u2* pCellOffset;
+ JitTraceDescription *desc;
+ const Method* method;
+
+ /*
+ * The codeAddress field has the low bit set to mark thumb
+ * mode. We need to strip that off before reconstructing the
+ * trace data. See the diagram in Assemble.c for more info
+ * on the trace layout in memory.
+ */
+ traceBase = (char*)p->codeAddress - 7;
+
+ if (p->codeAddress == NULL) {
+ LOGD("TRACEPROFILE 0x%08x 0 NULL 0 0", (int)traceBase);
+ return;
+ }
+
+ pExecutionCount = (u4*) (traceBase);
+ pCellOffset = (u2*) (traceBase + 4);
+ pCellCounts = (ChainCellCounts*) (traceBase + *pCellOffset);
+ desc = (JitTraceDescription*) ((char*)pCellCounts + sizeof(*pCellCounts));
+ method = desc->method;
+ LOGD("TRACEPROFILE 0x%08x % 10d %s%s [0x%x,%d]", (int)traceBase,
+ *pExecutionCount, method->clazz->descriptor, method->name,
+ desc->trace[0].frag.startOffset,
+ desc->trace[0].frag.numInsts);
+}
+
/* Dumps debugging & tuning stats to the log */
void dvmJitStats()
{
LOGD("JIT: Invoke: %d noOpt, %d chainable, %d return",
gDvmJit.invokeNoOpt, gDvmJit.invokeChain, gDvmJit.returnOp);
#endif
+ if (gDvmJit.profile) {
+ for (i=0; i < (int) gDvmJit.jitTableSize; i++) {
+ if (gDvmJit.pJitEntryTable[i].dPC != 0) {
+ dvmCompilerDumpTraceProfile( &gDvmJit.pJitEntryTable[i] );
+ }
+ }
+ }
}
}
* requested
*/
-#define PROFILE_STALENESS_THRESHOLD 250000LL
+#define PROFILE_STALENESS_THRESHOLD 100000LL
bool dvmJitCheckTraceRequest(Thread* self, InterpState* interpState)
{
bool res = false; /* Assume success */
#include "InterpDefs.h"
-#define JIT_PROF_SIZE 512
+#define JIT_PROF_SIZE 4096
#define JIT_MAX_TRACE_LEN 100
common_updateProfile:
eor r3,rPC,rPC,lsr #12 @ cheap, but fast hash function
- lsl r3,r3,#23 @ shift out excess 511
- ldrb r1,[r0,r3,lsr #23] @ get counter
+ lsl r3,r3,#20 @ shift out excess 4095
+ ldrb r1,[r0,r3,lsr #20] @ get counter
GET_INST_OPCODE(ip)
subs r1,r1,#1 @ decrement counter
- strb r1,[r0,r3,lsr #23] @ and store it
+ strb r1,[r0,r3,lsr #20] @ and store it
GOTO_OPCODE_IFNE(ip) @ if not threshold, fallthrough otherwise */
/*
* jump to it now).
*/
mov r1,#255
- strb r1,[r0,r3,lsr #23] @ reset counter
+ strb r1,[r0,r3,lsr #20] @ reset counter
EXPORT_PC()
mov r0,rPC
bl dvmJitGetCodeAddr @ r0<- dvmJitGetCodeAddr(rPC)
*/
#include "../common/asm-constants.h"
+/*
+ * Power of 2 width in bits of the hash table size.
+ * for ex: 9 -> 512, 10-> 1024, etc.
+#define JIT_PROF_TAB_WIDTH 12
+#define JIT_PROF_TAB_LSHIFT (32 - JIT_PROF_TAB_WIDTH)
+#defnie JIT_PROF_TAB_THRESH_RESET 255
/* File: armv5te/platform.S */
/*
common_updateProfile:
eor r3,rPC,rPC,lsr #12 @ cheap, but fast hash function
- lsl r3,r3,#23 @ shift out excess 511
- ldrb r1,[r0,r3,lsr #23] @ get counter
+ lsl r3,r3,#20 @ shift out excess 4095
+ ldrb r1,[r0,r3,lsr #20] @ get counter
GET_INST_OPCODE(ip)
subs r1,r1,#1 @ decrement counter
- strb r1,[r0,r3,lsr #23] @ and store it
+ strb r1,[r0,r3,lsr #20] @ and store it
GOTO_OPCODE_IFNE(ip) @ if not threshold, fallthrough otherwise */
/*
* jump to it now).
*/
mov r1,#255
- strb r1,[r0,r3,lsr #23] @ reset counter
+ strb r1,[r0,r3,lsr #20] @ reset counter
EXPORT_PC()
mov r0,rPC
bl dvmJitGetCodeAddr @ r0<- dvmJitGetCodeAddr(rPC)
*/
#include "../common/asm-constants.h"
+/*
+ * Power of 2 width in bits of the hash table size.
+ * for ex: 9 -> 512, 10-> 1024, etc.
+#define JIT_PROF_TAB_WIDTH 12
+#define JIT_PROF_TAB_LSHIFT (32 - JIT_PROF_TAB_WIDTH)
+#defnie JIT_PROF_TAB_THRESH_RESET 255
/* File: armv5te/platform.S */
/*
common_updateProfile:
eor r3,rPC,rPC,lsr #12 @ cheap, but fast hash function
- lsl r3,r3,#23 @ shift out excess 511
- ldrb r1,[r0,r3,lsr #23] @ get counter
+ lsl r3,r3,#20 @ shift out excess 4095
+ ldrb r1,[r0,r3,lsr #20] @ get counter
GET_INST_OPCODE(ip)
subs r1,r1,#1 @ decrement counter
- strb r1,[r0,r3,lsr #23] @ and store it
+ strb r1,[r0,r3,lsr #20] @ and store it
GOTO_OPCODE_IFNE(ip) @ if not threshold, fallthrough otherwise */
/*
* jump to it now).
*/
mov r1,#255
- strb r1,[r0,r3,lsr #23] @ reset counter
+ strb r1,[r0,r3,lsr #20] @ reset counter
EXPORT_PC()
mov r0,rPC
bl dvmJitGetCodeAddr @ r0<- dvmJitGetCodeAddr(rPC)
*/
#include "../common/asm-constants.h"
+/*
+ * Power of 2 width in bits of the hash table size.
+ * for ex: 9 -> 512, 10-> 1024, etc.
+#define JIT_PROF_TAB_WIDTH 12
+#define JIT_PROF_TAB_LSHIFT (32 - JIT_PROF_TAB_WIDTH)
+#defnie JIT_PROF_TAB_THRESH_RESET 255
/* File: armv5te/platform.S */
/*
common_updateProfile:
eor r3,rPC,rPC,lsr #12 @ cheap, but fast hash function
- lsl r3,r3,#23 @ shift out excess 511
- ldrb r1,[r0,r3,lsr #23] @ get counter
+ lsl r3,r3,#20 @ shift out excess 4095
+ ldrb r1,[r0,r3,lsr #20] @ get counter
GET_INST_OPCODE(ip)
subs r1,r1,#1 @ decrement counter
- strb r1,[r0,r3,lsr #23] @ and store it
+ strb r1,[r0,r3,lsr #20] @ and store it
GOTO_OPCODE_IFNE(ip) @ if not threshold, fallthrough otherwise */
/*
* jump to it now).
*/
mov r1,#255
- strb r1,[r0,r3,lsr #23] @ reset counter
+ strb r1,[r0,r3,lsr #20] @ reset counter
EXPORT_PC()
mov r0,rPC
bl dvmJitGetCodeAddr @ r0<- dvmJitGetCodeAddr(rPC)