2 * Copyright (C) 2008 The Android Open Source Project
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
18 * Android's method call profiling goodies.
22 #ifdef WITH_PROFILER // -- include rest of file
33 #ifdef HAVE_ANDROID_OS
34 # define UPDATE_MAGIC_PAGE 1
35 # define MAGIC_PAGE_BASE_ADDR 0x08000000
37 # define PAGESIZE 4096
52 * u8 start date/time in usec
56 * u4 method ID | method action
57 * u4 time delta since start, in usec
59 * 32 bits of microseconds is 70 minutes.
61 * All values are stored in little-endian order.
63 #define TRACE_REC_SIZE 9
64 #define TRACE_MAGIC 0x574f4c53
65 #define TRACE_HEADER_LEN 32
69 * Get the wall-clock date/time, in usec.
71 static inline u8 getTimeInUsec()
75 gettimeofday(&tv, NULL);
76 return tv.tv_sec * 1000000LL + tv.tv_usec;
80 * Get the current time, in microseconds.
82 * This can mean one of two things. In "global clock" mode, we get the
83 * same time across all threads. If we use CLOCK_THREAD_CPUTIME_ID, we
84 * get a per-thread CPU usage timer. The latter is better, but a bit
85 * more complicated to implement.
87 static inline u8 getClock()
89 #if defined(HAVE_POSIX_CLOCKS)
92 clock_gettime(CLOCK_THREAD_CPUTIME_ID, &tm);
93 //assert(tm.tv_nsec >= 0 && tm.tv_nsec < 1*1000*1000*1000);
94 if (!(tm.tv_nsec >= 0 && tm.tv_nsec < 1*1000*1000*1000)) {
95 LOGE("bad nsec: %ld\n", tm.tv_nsec);
99 return tm.tv_sec * 1000000LL + tm.tv_nsec / 1000;
103 gettimeofday(&tv, NULL);
104 return tv.tv_sec * 1000000LL + tv.tv_usec;
109 * Write little-endian data.
111 static inline void storeShortLE(u1* buf, u2 val)
114 *buf++ = (u1) (val >> 8);
116 static inline void storeIntLE(u1* buf, u4 val)
119 *buf++ = (u1) (val >> 8);
120 *buf++ = (u1) (val >> 16);
121 *buf++ = (u1) (val >> 24);
123 static inline void storeLongLE(u1* buf, u8 val)
126 *buf++ = (u1) (val >> 8);
127 *buf++ = (u1) (val >> 16);
128 *buf++ = (u1) (val >> 24);
129 *buf++ = (u1) (val >> 32);
130 *buf++ = (u1) (val >> 40);
131 *buf++ = (u1) (val >> 48);
132 *buf++ = (u1) (val >> 56);
138 bool dvmProfilingStartup(void)
141 * Initialize "dmtrace" method profiling.
143 memset(&gDvm.methodTrace, 0, sizeof(gDvm.methodTrace));
144 dvmInitMutex(&gDvm.methodTrace.startStopLock);
145 pthread_cond_init(&gDvm.methodTrace.threadExitCond, NULL);
148 dvmFindClassNoInit("Ldalvik/system/VMDebug;", NULL);
149 assert(clazz != NULL);
150 gDvm.methodTrace.gcMethod =
151 dvmFindDirectMethodByDescriptor(clazz, "startGC", "()V");
152 gDvm.methodTrace.classPrepMethod =
153 dvmFindDirectMethodByDescriptor(clazz, "startClassPrep", "()V");
154 if (gDvm.methodTrace.gcMethod == NULL ||
155 gDvm.methodTrace.classPrepMethod == NULL)
157 LOGE("Unable to find startGC or startClassPrep\n");
161 assert(!dvmCheckException(dvmThreadSelf()));
164 * Allocate storage for instruction counters.
166 gDvm.executedInstrCounts = (int*) malloc(kNumDalvikInstructions * sizeof(int));
167 if (gDvm.executedInstrCounts == NULL)
169 memset(gDvm.executedInstrCounts, 0, kNumDalvikInstructions * sizeof(int));
171 #ifdef UPDATE_MAGIC_PAGE
173 * If we're running on the emulator, there's a magic page into which
174 * we can put interpreted method information. This allows interpreted
175 * methods to show up in the emulator's code traces.
177 * We could key this off of the "ro.kernel.qemu" property, but there's
178 * no real harm in doing this on a real device.
180 gDvm.emulatorTracePage = mmap((void*) MAGIC_PAGE_BASE_ADDR,
181 PAGESIZE, PROT_READ|PROT_WRITE, MAP_FIXED|MAP_SHARED|MAP_ANON, -1, 0);
182 if (gDvm.emulatorTracePage == MAP_FAILED) {
183 LOGE("Unable to mmap magic page (0x%08x)\n", MAGIC_PAGE_BASE_ADDR);
186 *(u4*) gDvm.emulatorTracePage = 0;
188 assert(gDvm.emulatorTracePage == NULL);
195 * Free up profiling resources.
197 void dvmProfilingShutdown(void)
199 #ifdef UPDATE_MAGIC_PAGE
200 if (gDvm.emulatorTracePage != NULL)
201 munmap(gDvm.emulatorTracePage, PAGESIZE);
203 free(gDvm.executedInstrCounts);
207 * Update the "active profilers" count.
209 * "count" should be +1 or -1.
211 static void updateActiveProfilers(int count)
213 int oldValue, newValue;
216 oldValue = gDvm.activeProfilers;
217 newValue = oldValue + count;
219 LOGE("Can't have %d active profilers\n", newValue);
222 } while (!ATOMIC_CMP_SWAP(&gDvm.activeProfilers, oldValue, newValue));
224 LOGD("+++ active profiler count now %d\n", newValue);
229 * Reset the "cpuClockBase" field in all threads.
231 static void resetCpuClockBase(void)
235 dvmLockThreadList(NULL);
236 for (thread = gDvm.threadList; thread != NULL; thread = thread->next) {
237 thread->cpuClockBaseSet = false;
238 thread->cpuClockBase = 0;
240 dvmUnlockThreadList();
244 * Dump the thread list to the specified file.
246 static void dumpThreadList(FILE* fp)
250 dvmLockThreadList(NULL);
251 for (thread = gDvm.threadList; thread != NULL; thread = thread->next) {
252 char* name = dvmGetThreadName(thread);
254 fprintf(fp, "%d\t%s\n", thread->threadId, name);
257 dvmUnlockThreadList();
261 * This is a dvmHashForeach callback.
263 static int dumpMarkedMethods(void* vclazz, void* vfp)
265 DexStringCache stringCache;
266 ClassObject* clazz = (ClassObject*) vclazz;
267 FILE* fp = (FILE*) vfp;
272 dexStringCacheInit(&stringCache);
274 for (i = 0; i < clazz->virtualMethodCount; i++) {
275 meth = &clazz->virtualMethods[i];
276 if (meth->inProfile) {
277 name = dvmDescriptorToName(meth->clazz->descriptor);
278 fprintf(fp, "0x%08x\t%s\t%s\t%s\t%s\t%d\n", (int) meth,
280 dexProtoGetMethodDescriptor(&meth->prototype, &stringCache),
281 dvmGetMethodSourceFile(meth), dvmLineNumFromPC(meth, 0));
282 meth->inProfile = false;
287 for (i = 0; i < clazz->directMethodCount; i++) {
288 meth = &clazz->directMethods[i];
289 if (meth->inProfile) {
290 name = dvmDescriptorToName(meth->clazz->descriptor);
291 fprintf(fp, "0x%08x\t%s\t%s\t%s\t%s\t%d\n", (int) meth,
293 dexProtoGetMethodDescriptor(&meth->prototype, &stringCache),
294 dvmGetMethodSourceFile(meth), dvmLineNumFromPC(meth, 0));
295 meth->inProfile = false;
300 dexStringCacheRelease(&stringCache);
306 * Dump the list of "marked" methods to the specified file.
308 static void dumpMethodList(FILE* fp)
310 dvmHashTableLock(gDvm.loadedClasses);
311 dvmHashForeach(gDvm.loadedClasses, dumpMarkedMethods, (void*) fp);
312 dvmHashTableUnlock(gDvm.loadedClasses);
316 * Start method tracing. This opens the file and allocates the buffer.
317 * If any of these fail, we throw an exception and return.
319 * Method tracing is global to the VM.
321 void dvmMethodTraceStart(const char* traceFileName, int bufferSize, int flags)
323 MethodTraceState* state = &gDvm.methodTrace;
325 assert(bufferSize > 0);
327 if (state->traceEnabled != 0) {
328 LOGI("TRACE start requested, but already in progress; stopping\n");
329 dvmMethodTraceStop();
331 updateActiveProfilers(1);
332 LOGI("TRACE STARTED: '%s' %dKB\n",
333 traceFileName, bufferSize / 1024);
334 dvmLockMutex(&state->startStopLock);
337 * Allocate storage and open files.
339 * We don't need to initialize the buffer, but doing so might remove
340 * some fault overhead if the pages aren't mapped until touched.
342 state->buf = (u1*) malloc(bufferSize);
343 if (state->buf == NULL) {
344 dvmThrowException("Ljava/lang/InternalError;", "buffer alloc failed");
347 state->traceFile = fopen(traceFileName, "w");
348 if (state->traceFile == NULL) {
349 LOGE("Unable to open trace file '%s': %s\n",
350 traceFileName, strerror(errno));
351 dvmThrowException("Ljava/lang/RuntimeException;", "file open failed");
354 memset(state->buf, 0xee, bufferSize);
356 state->bufferSize = bufferSize;
357 state->overflow = false;
360 * Enable alloc counts if we've been requested to do so.
362 state->flags = flags;
363 if ((flags & TRACE_ALLOC_COUNTS) != 0)
364 dvmStartAllocCounting();
366 /* reset our notion of the start time for all CPU threads */
369 state->startWhen = getTimeInUsec();
374 memset(state->buf, 0, TRACE_HEADER_LEN);
375 storeIntLE(state->buf + 0, TRACE_MAGIC);
376 storeShortLE(state->buf + 4, TRACE_VERSION);
377 storeShortLE(state->buf + 6, TRACE_HEADER_LEN);
378 storeLongLE(state->buf + 8, state->startWhen);
379 state->curOffset = TRACE_HEADER_LEN;
384 * Set the "enabled" flag. Once we do this, threads will wait to be
385 * signaled before exiting, so we have to make sure we wake them up.
387 state->traceEnabled = true;
388 dvmUnlockMutex(&state->startStopLock);
392 updateActiveProfilers(-1);
393 if (state->traceFile != NULL) {
394 fclose(state->traceFile);
395 state->traceFile = NULL;
397 if (state->buf != NULL) {
401 dvmUnlockMutex(&state->startStopLock);
405 * Run through the data buffer and pull out the methods that were visited.
406 * Set a mark so that we know which ones to output.
408 static void markTouchedMethods(void)
410 u1* ptr = gDvm.methodTrace.buf + TRACE_HEADER_LEN;
411 u1* end = gDvm.methodTrace.buf + gDvm.methodTrace.curOffset;
412 unsigned int methodVal;
416 methodVal = *(ptr+1) | (*(ptr+2) << 8) | (*(ptr+3) << 16)
418 method = (Method*) METHOD_ID(methodVal);
420 method->inProfile = true;
421 ptr += TRACE_REC_SIZE;
426 * Compute the amount of overhead in a clock call, in nsec.
428 * This value is going to vary depending on what else is going on in the
429 * system. When examined across several runs a pattern should emerge.
431 static u4 getClockOverhead(void)
433 u8 calStart, calElapsed;
436 calStart = getClock();
437 for (i = 1000 * 4; i > 0; i--) {
448 calElapsed = getClock() - calStart;
449 return (int) (calElapsed / (8*4));
453 * Returns "true" if method tracing is currently active.
455 bool dvmIsMethodTraceActive(void)
457 const MethodTraceState* state = &gDvm.methodTrace;
458 return state->traceEnabled;
462 * Stop method tracing. We write the buffer to disk and generate a key
463 * file so we can interpret it.
465 void dvmMethodTraceStop(void)
467 MethodTraceState* state = &gDvm.methodTrace;
471 * We need this to prevent somebody from starting a new trace while
472 * we're in the process of stopping the old.
474 dvmLockMutex(&state->startStopLock);
476 if (!state->traceEnabled) {
477 /* somebody already stopped it, or it was never started */
478 LOGD("TRACE stop requested, but not running\n");
479 dvmUnlockMutex(&state->startStopLock);
482 updateActiveProfilers(-1);
485 /* compute elapsed time */
486 elapsed = getTimeInUsec() - state->startWhen;
489 * Globally disable it, and allow other threads to notice. We want
490 * to stall here for at least as long as dvmMethodTraceAdd needs
491 * to finish. There's no real risk though -- it will take a while to
492 * write the data to disk, and we don't clear the buffer pointer until
493 * after that completes.
495 state->traceEnabled = false;
499 if ((state->flags & TRACE_ALLOC_COUNTS) != 0)
500 dvmStopAllocCounting();
502 LOGI("TRACE STOPPED%s: writing %d records\n",
503 state->overflow ? " (NOTE: overflowed buffer)" : "",
504 (state->curOffset - TRACE_HEADER_LEN) / TRACE_REC_SIZE);
505 if (gDvm.debuggerActive) {
506 LOGW("WARNING: a debugger is active; method-tracing results "
511 * Do a quick calibration test to see how expensive our clock call is.
513 u4 clockNsec = getClockOverhead();
515 markTouchedMethods();
517 fprintf(state->traceFile, "%cversion\n", TOKEN_CHAR);
518 fprintf(state->traceFile, "%d\n", TRACE_VERSION);
519 fprintf(state->traceFile, "data-file-overflow=%s\n",
520 state->overflow ? "true" : "false");
521 #if defined(HAVE_POSIX_CLOCKS)
522 fprintf(state->traceFile, "clock=thread-cpu\n");
524 fprintf(state->traceFile, "clock=global\n");
526 fprintf(state->traceFile, "elapsed-time-usec=%llu\n", elapsed);
527 fprintf(state->traceFile, "num-method-calls=%d\n",
528 (state->curOffset - TRACE_HEADER_LEN) / TRACE_REC_SIZE);
529 fprintf(state->traceFile, "clock-call-overhead-nsec=%d\n", clockNsec);
530 fprintf(state->traceFile, "vm=dalvik\n");
531 if ((state->flags & TRACE_ALLOC_COUNTS) != 0) {
532 fprintf(state->traceFile, "alloc-count=%d\n",
533 gDvm.allocProf.allocCount);
534 fprintf(state->traceFile, "alloc-size=%d\n",
535 gDvm.allocProf.allocSize);
536 fprintf(state->traceFile, "gc-count=%d\n",
537 gDvm.allocProf.gcCount);
539 fprintf(state->traceFile, "%cthreads\n", TOKEN_CHAR);
540 dumpThreadList(state->traceFile);
541 fprintf(state->traceFile, "%cmethods\n", TOKEN_CHAR);
542 dumpMethodList(state->traceFile);
543 fprintf(state->traceFile, "%cend\n", TOKEN_CHAR);
545 if (fwrite(state->buf, state->curOffset, 1, state->traceFile) != 1) {
546 LOGE("trace fwrite(%d) failed, errno=%d\n", state->curOffset, errno);
547 dvmThrowException("Ljava/lang/RuntimeException;", "data write failed");
554 fclose(state->traceFile);
555 state->traceFile = NULL;
557 int cc = pthread_cond_broadcast(&state->threadExitCond);
559 dvmUnlockMutex(&state->startStopLock);
564 * We just did something with a method. Emit a record.
566 * Multiple threads may be banging on this all at once. We use atomic ops
567 * rather than mutexes for speed.
569 void dvmMethodTraceAdd(Thread* self, const Method* method, int action)
571 MethodTraceState* state = &gDvm.methodTrace;
572 u4 clockDiff, methodVal;
573 int oldOffset, newOffset;
577 * We can only access the per-thread CPU clock from within the
578 * thread, so we have to initialize the base time on the first use.
579 * (Looks like pthread_getcpuclockid(thread, &id) will do what we
580 * want, but it doesn't appear to be defined on the device.)
582 if (!self->cpuClockBaseSet) {
583 self->cpuClockBase = getClock();
584 self->cpuClockBaseSet = true;
585 //LOGI("thread base id=%d 0x%llx\n",
586 // self->threadId, self->cpuClockBase);
590 * Advance "curOffset" atomically.
593 oldOffset = state->curOffset;
594 newOffset = oldOffset + TRACE_REC_SIZE;
595 if (newOffset > state->bufferSize) {
596 state->overflow = true;
599 } while (!ATOMIC_CMP_SWAP(&state->curOffset, oldOffset, newOffset));
601 //assert(METHOD_ACTION((u4) method) == 0);
604 clockDiff = (u4) (now - self->cpuClockBase);
606 methodVal = METHOD_COMBINE((u4) method, action);
609 * Write data into "oldOffset".
611 ptr = state->buf + oldOffset;
612 *ptr++ = self->threadId;
613 *ptr++ = (u1) methodVal;
614 *ptr++ = (u1) (methodVal >> 8);
615 *ptr++ = (u1) (methodVal >> 16);
616 *ptr++ = (u1) (methodVal >> 24);
617 *ptr++ = (u1) clockDiff;
618 *ptr++ = (u1) (clockDiff >> 8);
619 *ptr++ = (u1) (clockDiff >> 16);
620 *ptr++ = (u1) (clockDiff >> 24);
624 * We just did something with a method. Emit a record by setting a value
625 * in a magic memory location.
627 void dvmEmitEmulatorTrace(const Method* method, int action)
629 #ifdef UPDATE_MAGIC_PAGE
631 * We want to store the address of the Dalvik bytecodes. Native and
632 * abstract methods don't have any, so we don't do this for those.
633 * (Abstract methods are never called, but in Dalvik they can be
634 * because we do a "late trap" to generate the abstract method
635 * exception. However, we trap to a native method, so we don't need
636 * an explicit check for abstract here.)
638 if (dvmIsNativeMethod(method))
640 assert(method->insns != NULL);
642 u4* pMagic = ((u4*) MAGIC_PAGE_BASE_ADDR) +1;
645 * The dexlist output shows the &DexCode.insns offset value, which
646 * is offset from the start of the base DEX header. Method.insns
647 * is the absolute address, effectively offset from the start of
648 * the optimized DEX header. We either need to return the
649 * optimized DEX base file address offset by the right amount, or
650 * take the "real" address and subtract off the size of the
651 * optimized DEX header.
653 * Would be nice to factor this out at dexlist time, but we can't count
654 * on having access to the correct optimized DEX file.
658 DexFile* pDexFile = method->clazz->pDvmDex->pDexFile;
659 addr = (u4)pDexFile->pOptHeader; /* file starts at "opt" header */
660 addr += dvmGetMethodCode(method)->insnsOff;
662 const DexOptHeader* pOptHdr = method->clazz->pDvmDex->pDexFile->pOptHeader;
663 addr = (u4) method->insns - pOptHdr->dexOffset;
665 assert(METHOD_TRACE_ENTER == 0);
666 assert(METHOD_TRACE_EXIT == 1);
667 assert(METHOD_TRACE_UNROLL == 2);
668 *(pMagic+action) = addr;
669 LOGVV("Set %p = 0x%08x (%s.%s)\n",
670 pMagic+action, addr, method->clazz->descriptor, method->name);
675 * The GC calls this when it's about to start. We add a marker to the
676 * trace output so the tool can exclude the GC cost from the results.
678 void dvmMethodTraceGCBegin(void)
680 TRACE_METHOD_ENTER(dvmThreadSelf(), gDvm.methodTrace.gcMethod);
682 void dvmMethodTraceGCEnd(void)
684 TRACE_METHOD_EXIT(dvmThreadSelf(), gDvm.methodTrace.gcMethod);
688 * The class loader calls this when it's loading or initializing a class.
690 void dvmMethodTraceClassPrepBegin(void)
692 TRACE_METHOD_ENTER(dvmThreadSelf(), gDvm.methodTrace.classPrepMethod);
694 void dvmMethodTraceClassPrepEnd(void)
696 TRACE_METHOD_EXIT(dvmThreadSelf(), gDvm.methodTrace.classPrepMethod);
701 * Enable emulator trace info.
703 void dvmEmulatorTraceStart(void)
705 updateActiveProfilers(1);
707 /* in theory we should make this an atomic inc; in practice not important */
708 gDvm.emulatorTraceEnableCount++;
709 if (gDvm.emulatorTraceEnableCount == 1)
710 LOGD("--- emulator method traces enabled\n");
714 * Disable emulator trace info.
716 void dvmEmulatorTraceStop(void)
718 if (gDvm.emulatorTraceEnableCount == 0) {
719 LOGE("ERROR: emulator tracing not enabled\n");
722 updateActiveProfilers(-1);
723 /* in theory we should make this an atomic inc; in practice not important */
724 gDvm.emulatorTraceEnableCount--;
725 if (gDvm.emulatorTraceEnableCount == 0)
726 LOGD("--- emulator method traces disabled\n");
731 * Start instruction counting.
733 void dvmStartInstructionCounting()
735 updateActiveProfilers(1);
736 /* in theory we should make this an atomic inc; in practice not important */
737 gDvm.instructionCountEnableCount++;
741 * Start instruction counting.
743 void dvmStopInstructionCounting()
745 if (gDvm.instructionCountEnableCount == 0) {
746 LOGE("ERROR: instruction counting not enabled\n");
749 updateActiveProfilers(-1);
750 gDvm.instructionCountEnableCount--;
755 * Start alloc counting. Note this doesn't affect the "active profilers"
756 * count, since the interpreter loop is not involved.
758 void dvmStartAllocCounting(void)
760 gDvm.allocProf.enabled = true;
764 * Stop alloc counting.
766 void dvmStopAllocCounting(void)
768 gDvm.allocProf.enabled = false;
771 #endif /*WITH_PROFILER*/