2 * Copyright (C) 2008 The Android Open Source Project
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
18 * Android's method call profiling goodies.
22 #ifdef WITH_PROFILER // -- include rest of file
34 #ifdef HAVE_ANDROID_OS
35 # define UPDATE_MAGIC_PAGE 1
37 # define PAGESIZE 4096
52 * u8 start date/time in usec
56 * u4 method ID | method action
57 * u4 time delta since start, in usec
59 * 32 bits of microseconds is 70 minutes.
61 * All values are stored in little-endian order.
63 #define TRACE_REC_SIZE 9
64 #define TRACE_MAGIC 0x574f4c53
65 #define TRACE_HEADER_LEN 32
67 #define FILL_PATTERN 0xeeeeeeee
71 * Get the wall-clock date/time, in usec.
73 static inline u8 getTimeInUsec()
77 gettimeofday(&tv, NULL);
78 return tv.tv_sec * 1000000LL + tv.tv_usec;
82 * Get the current time, in microseconds.
84 * This can mean one of two things. In "global clock" mode, we get the
85 * same time across all threads. If we use CLOCK_THREAD_CPUTIME_ID, we
86 * get a per-thread CPU usage timer. The latter is better, but a bit
87 * more complicated to implement.
89 static inline u8 getClock()
91 #if defined(HAVE_POSIX_CLOCKS)
94 clock_gettime(CLOCK_THREAD_CPUTIME_ID, &tm);
95 //assert(tm.tv_nsec >= 0 && tm.tv_nsec < 1*1000*1000*1000);
96 if (!(tm.tv_nsec >= 0 && tm.tv_nsec < 1*1000*1000*1000)) {
97 LOGE("bad nsec: %ld\n", tm.tv_nsec);
101 return tm.tv_sec * 1000000LL + tm.tv_nsec / 1000;
105 gettimeofday(&tv, NULL);
106 return tv.tv_sec * 1000000LL + tv.tv_usec;
111 * Write little-endian data.
113 static inline void storeShortLE(u1* buf, u2 val)
116 *buf++ = (u1) (val >> 8);
118 static inline void storeIntLE(u1* buf, u4 val)
121 *buf++ = (u1) (val >> 8);
122 *buf++ = (u1) (val >> 16);
123 *buf++ = (u1) (val >> 24);
125 static inline void storeLongLE(u1* buf, u8 val)
128 *buf++ = (u1) (val >> 8);
129 *buf++ = (u1) (val >> 16);
130 *buf++ = (u1) (val >> 24);
131 *buf++ = (u1) (val >> 32);
132 *buf++ = (u1) (val >> 40);
133 *buf++ = (u1) (val >> 48);
134 *buf++ = (u1) (val >> 56);
140 bool dvmProfilingStartup(void)
143 * Initialize "dmtrace" method profiling.
145 memset(&gDvm.methodTrace, 0, sizeof(gDvm.methodTrace));
146 dvmInitMutex(&gDvm.methodTrace.startStopLock);
147 pthread_cond_init(&gDvm.methodTrace.threadExitCond, NULL);
150 dvmFindClassNoInit("Ldalvik/system/VMDebug;", NULL);
151 assert(clazz != NULL);
152 gDvm.methodTrace.gcMethod =
153 dvmFindDirectMethodByDescriptor(clazz, "startGC", "()V");
154 gDvm.methodTrace.classPrepMethod =
155 dvmFindDirectMethodByDescriptor(clazz, "startClassPrep", "()V");
156 if (gDvm.methodTrace.gcMethod == NULL ||
157 gDvm.methodTrace.classPrepMethod == NULL)
159 LOGE("Unable to find startGC or startClassPrep\n");
163 assert(!dvmCheckException(dvmThreadSelf()));
166 * Allocate storage for instruction counters.
168 gDvm.executedInstrCounts = (int*) malloc(kNumDalvikInstructions * sizeof(int));
169 if (gDvm.executedInstrCounts == NULL)
171 memset(gDvm.executedInstrCounts, 0, kNumDalvikInstructions * sizeof(int));
173 #ifdef UPDATE_MAGIC_PAGE
175 * If we're running on the emulator, there's a magic page into which
176 * we can put interpreted method information. This allows interpreted
177 * methods to show up in the emulator's code traces.
179 * We could key this off of the "ro.kernel.qemu" property, but there's
180 * no real harm in doing this on a real device.
182 int fd = open("/dev/qemu_trace", O_RDWR);
184 LOGV("Unable to open /dev/qemu_trace\n");
186 gDvm.emulatorTracePage = mmap(0, PAGESIZE, PROT_READ|PROT_WRITE,
189 if (gDvm.emulatorTracePage == MAP_FAILED) {
190 LOGE("Unable to mmap /dev/qemu_trace\n");
191 gDvm.emulatorTracePage = NULL;
193 *(u4*) gDvm.emulatorTracePage = 0;
197 assert(gDvm.emulatorTracePage == NULL);
204 * Free up profiling resources.
206 void dvmProfilingShutdown(void)
208 #ifdef UPDATE_MAGIC_PAGE
209 if (gDvm.emulatorTracePage != NULL)
210 munmap(gDvm.emulatorTracePage, PAGESIZE);
212 free(gDvm.executedInstrCounts);
216 * Update the "active profilers" count.
218 * "count" should be +1 or -1.
220 static void updateActiveProfilers(int count)
222 int oldValue, newValue;
225 oldValue = gDvm.activeProfilers;
226 newValue = oldValue + count;
228 LOGE("Can't have %d active profilers\n", newValue);
231 } while (!ATOMIC_CMP_SWAP(&gDvm.activeProfilers, oldValue, newValue));
233 LOGD("+++ active profiler count now %d\n", newValue);
238 * Reset the "cpuClockBase" field in all threads.
240 static void resetCpuClockBase(void)
244 dvmLockThreadList(NULL);
245 for (thread = gDvm.threadList; thread != NULL; thread = thread->next) {
246 thread->cpuClockBaseSet = false;
247 thread->cpuClockBase = 0;
249 dvmUnlockThreadList();
253 * Dump the thread list to the specified file.
255 static void dumpThreadList(FILE* fp)
259 dvmLockThreadList(NULL);
260 for (thread = gDvm.threadList; thread != NULL; thread = thread->next) {
261 char* name = dvmGetThreadName(thread);
263 fprintf(fp, "%d\t%s\n", thread->threadId, name);
266 dvmUnlockThreadList();
270 * This is a dvmHashForeach callback.
272 static int dumpMarkedMethods(void* vclazz, void* vfp)
274 DexStringCache stringCache;
275 ClassObject* clazz = (ClassObject*) vclazz;
276 FILE* fp = (FILE*) vfp;
281 dexStringCacheInit(&stringCache);
283 for (i = 0; i < clazz->virtualMethodCount; i++) {
284 meth = &clazz->virtualMethods[i];
285 if (meth->inProfile) {
286 name = dvmDescriptorToName(meth->clazz->descriptor);
287 fprintf(fp, "0x%08x\t%s\t%s\t%s\t%s\t%d\n", (int) meth,
289 dexProtoGetMethodDescriptor(&meth->prototype, &stringCache),
290 dvmGetMethodSourceFile(meth), dvmLineNumFromPC(meth, 0));
291 meth->inProfile = false;
296 for (i = 0; i < clazz->directMethodCount; i++) {
297 meth = &clazz->directMethods[i];
298 if (meth->inProfile) {
299 name = dvmDescriptorToName(meth->clazz->descriptor);
300 fprintf(fp, "0x%08x\t%s\t%s\t%s\t%s\t%d\n", (int) meth,
302 dexProtoGetMethodDescriptor(&meth->prototype, &stringCache),
303 dvmGetMethodSourceFile(meth), dvmLineNumFromPC(meth, 0));
304 meth->inProfile = false;
309 dexStringCacheRelease(&stringCache);
315 * Dump the list of "marked" methods to the specified file.
317 static void dumpMethodList(FILE* fp)
319 dvmHashTableLock(gDvm.loadedClasses);
320 dvmHashForeach(gDvm.loadedClasses, dumpMarkedMethods, (void*) fp);
321 dvmHashTableUnlock(gDvm.loadedClasses);
325 * Start method tracing. This opens the file (if an already open fd has not
326 * been supplied) and allocates the buffer.
327 * If any of these fail, we throw an exception and return.
329 * Method tracing is global to the VM.
331 void dvmMethodTraceStart(const char* traceFileName, int traceFd, int bufferSize,
334 MethodTraceState* state = &gDvm.methodTrace;
336 assert(bufferSize > 0);
338 if (state->traceEnabled != 0) {
339 LOGI("TRACE start requested, but already in progress; stopping\n");
340 dvmMethodTraceStop();
342 updateActiveProfilers(1);
343 LOGI("TRACE STARTED: '%s' %dKB\n",
344 traceFileName, bufferSize / 1024);
345 dvmLockMutex(&state->startStopLock);
348 * Allocate storage and open files.
350 * We don't need to initialize the buffer, but doing so might remove
351 * some fault overhead if the pages aren't mapped until touched.
353 state->buf = (u1*) malloc(bufferSize);
354 if (state->buf == NULL) {
355 dvmThrowException("Ljava/lang/InternalError;", "buffer alloc failed");
359 state->traceFile = fopen(traceFileName, "w");
361 state->traceFile = fdopen(traceFd, "w");
363 if (state->traceFile == NULL) {
364 LOGE("Unable to open trace file '%s': %s\n",
365 traceFileName, strerror(errno));
366 dvmThrowException("Ljava/lang/RuntimeException;", "file open failed");
369 memset(state->buf, (char)FILL_PATTERN, bufferSize);
371 state->bufferSize = bufferSize;
372 state->overflow = false;
375 * Enable alloc counts if we've been requested to do so.
377 state->flags = flags;
378 if ((flags & TRACE_ALLOC_COUNTS) != 0)
379 dvmStartAllocCounting();
381 /* reset our notion of the start time for all CPU threads */
384 state->startWhen = getTimeInUsec();
389 memset(state->buf, 0, TRACE_HEADER_LEN);
390 storeIntLE(state->buf + 0, TRACE_MAGIC);
391 storeShortLE(state->buf + 4, TRACE_VERSION);
392 storeShortLE(state->buf + 6, TRACE_HEADER_LEN);
393 storeLongLE(state->buf + 8, state->startWhen);
394 state->curOffset = TRACE_HEADER_LEN;
399 * Set the "enabled" flag. Once we do this, threads will wait to be
400 * signaled before exiting, so we have to make sure we wake them up.
402 state->traceEnabled = true;
403 dvmUnlockMutex(&state->startStopLock);
407 updateActiveProfilers(-1);
408 if (state->traceFile != NULL) {
409 fclose(state->traceFile);
410 state->traceFile = NULL;
412 if (state->buf != NULL) {
416 dvmUnlockMutex(&state->startStopLock);
420 * Run through the data buffer and pull out the methods that were visited.
421 * Set a mark so that we know which ones to output.
423 static void markTouchedMethods(int endOffset)
425 u1* ptr = gDvm.methodTrace.buf + TRACE_HEADER_LEN;
426 u1* end = gDvm.methodTrace.buf + endOffset;
427 unsigned int methodVal;
431 methodVal = *(ptr+1) | (*(ptr+2) << 8) | (*(ptr+3) << 16)
433 method = (Method*) METHOD_ID(methodVal);
435 method->inProfile = true;
436 ptr += TRACE_REC_SIZE;
441 * Compute the amount of overhead in a clock call, in nsec.
443 * This value is going to vary depending on what else is going on in the
444 * system. When examined across several runs a pattern should emerge.
446 static u4 getClockOverhead(void)
448 u8 calStart, calElapsed;
451 calStart = getClock();
452 for (i = 1000 * 4; i > 0; i--) {
463 calElapsed = getClock() - calStart;
464 return (int) (calElapsed / (8*4));
468 * Returns "true" if method tracing is currently active.
470 bool dvmIsMethodTraceActive(void)
472 const MethodTraceState* state = &gDvm.methodTrace;
473 return state->traceEnabled;
477 * Stop method tracing. We write the buffer to disk and generate a key
478 * file so we can interpret it.
480 void dvmMethodTraceStop(void)
482 MethodTraceState* state = &gDvm.methodTrace;
486 * We need this to prevent somebody from starting a new trace while
487 * we're in the process of stopping the old.
489 dvmLockMutex(&state->startStopLock);
491 if (!state->traceEnabled) {
492 /* somebody already stopped it, or it was never started */
493 LOGD("TRACE stop requested, but not running\n");
494 dvmUnlockMutex(&state->startStopLock);
497 updateActiveProfilers(-1);
500 /* compute elapsed time */
501 elapsed = getTimeInUsec() - state->startWhen;
504 * Globally disable it, and allow other threads to notice. We want
505 * to stall here for at least as long as dvmMethodTraceAdd needs
506 * to finish. There's no real risk though -- it will take a while to
507 * write the data to disk, and we don't clear the buffer pointer until
508 * after that completes.
510 state->traceEnabled = false;
515 if ((state->flags & TRACE_ALLOC_COUNTS) != 0)
516 dvmStopAllocCounting();
519 * It's possible under some circumstances for a thread to have advanced
520 * the data pointer but not written the method value. It's possible
521 * (though less likely) for the data pointer to be advanced, or partial
522 * data written, while we're doing work here.
524 * To avoid seeing partially-written data, we grab state->curOffset here,
525 * and use our local copy from here on. We then scan through what's
526 * already written. If we see the fill pattern in what should be the
527 * method pointer, we cut things off early. (If we don't, we'll fail
528 * when we dereference the pointer.)
530 * There's a theoretical possibility of interrupting another thread
531 * after it has partially written the method pointer, in which case
532 * we'll likely crash when we dereference it. The possibility of
533 * this actually happening should be at or near zero. Fixing it
534 * completely could be done by writing the thread number last and
535 * using a sentinel value to indicate a partially-written record,
536 * but that requires memory barriers.
538 int finalCurOffset = state->curOffset;
540 if (finalCurOffset > TRACE_HEADER_LEN) {
541 u4 fillVal = METHOD_ID(FILL_PATTERN);
542 u1* scanPtr = state->buf + TRACE_HEADER_LEN;
544 while (scanPtr < state->buf + finalCurOffset) {
545 u4 methodVal = scanPtr[1] | (scanPtr[2] << 8) | (scanPtr[3] << 16)
546 | (scanPtr[4] << 24);
547 if (METHOD_ID(methodVal) == fillVal) {
548 u1* scanBase = state->buf + TRACE_HEADER_LEN;
549 LOGW("Found unfilled record at %d (of %d)\n",
550 (scanPtr - scanBase) / TRACE_REC_SIZE,
551 (finalCurOffset - TRACE_HEADER_LEN) / TRACE_REC_SIZE);
552 finalCurOffset = scanPtr - state->buf;
556 scanPtr += TRACE_REC_SIZE;
560 LOGI("TRACE STOPPED%s: writing %d records\n",
561 state->overflow ? " (NOTE: overflowed buffer)" : "",
562 (finalCurOffset - TRACE_HEADER_LEN) / TRACE_REC_SIZE);
563 if (gDvm.debuggerActive) {
564 LOGW("WARNING: a debugger is active; method-tracing results "
569 * Do a quick calibration test to see how expensive our clock call is.
571 u4 clockNsec = getClockOverhead();
573 markTouchedMethods(finalCurOffset);
575 fprintf(state->traceFile, "%cversion\n", TOKEN_CHAR);
576 fprintf(state->traceFile, "%d\n", TRACE_VERSION);
577 fprintf(state->traceFile, "data-file-overflow=%s\n",
578 state->overflow ? "true" : "false");
579 #if defined(HAVE_POSIX_CLOCKS)
580 fprintf(state->traceFile, "clock=thread-cpu\n");
582 fprintf(state->traceFile, "clock=global\n");
584 fprintf(state->traceFile, "elapsed-time-usec=%llu\n", elapsed);
585 fprintf(state->traceFile, "num-method-calls=%d\n",
586 (finalCurOffset - TRACE_HEADER_LEN) / TRACE_REC_SIZE);
587 fprintf(state->traceFile, "clock-call-overhead-nsec=%d\n", clockNsec);
588 fprintf(state->traceFile, "vm=dalvik\n");
589 if ((state->flags & TRACE_ALLOC_COUNTS) != 0) {
590 fprintf(state->traceFile, "alloc-count=%d\n",
591 gDvm.allocProf.allocCount);
592 fprintf(state->traceFile, "alloc-size=%d\n",
593 gDvm.allocProf.allocSize);
594 fprintf(state->traceFile, "gc-count=%d\n",
595 gDvm.allocProf.gcCount);
597 fprintf(state->traceFile, "%cthreads\n", TOKEN_CHAR);
598 dumpThreadList(state->traceFile);
599 fprintf(state->traceFile, "%cmethods\n", TOKEN_CHAR);
600 dumpMethodList(state->traceFile);
601 fprintf(state->traceFile, "%cend\n", TOKEN_CHAR);
603 if (fwrite(state->buf, finalCurOffset, 1, state->traceFile) != 1) {
604 LOGE("trace fwrite(%d) failed, errno=%d\n", finalCurOffset, errno);
605 dvmThrowException("Ljava/lang/RuntimeException;", "data write failed");
612 fclose(state->traceFile);
613 state->traceFile = NULL;
615 int cc = pthread_cond_broadcast(&state->threadExitCond);
617 dvmUnlockMutex(&state->startStopLock);
622 * We just did something with a method. Emit a record.
624 * Multiple threads may be banging on this all at once. We use atomic ops
625 * rather than mutexes for speed.
627 void dvmMethodTraceAdd(Thread* self, const Method* method, int action)
629 MethodTraceState* state = &gDvm.methodTrace;
630 u4 clockDiff, methodVal;
631 int oldOffset, newOffset;
635 * We can only access the per-thread CPU clock from within the
636 * thread, so we have to initialize the base time on the first use.
637 * (Looks like pthread_getcpuclockid(thread, &id) will do what we
638 * want, but it doesn't appear to be defined on the device.)
640 if (!self->cpuClockBaseSet) {
641 self->cpuClockBase = getClock();
642 self->cpuClockBaseSet = true;
643 //LOGI("thread base id=%d 0x%llx\n",
644 // self->threadId, self->cpuClockBase);
648 * Advance "curOffset" atomically.
651 oldOffset = state->curOffset;
652 newOffset = oldOffset + TRACE_REC_SIZE;
653 if (newOffset > state->bufferSize) {
654 state->overflow = true;
657 } while (!ATOMIC_CMP_SWAP(&state->curOffset, oldOffset, newOffset));
659 //assert(METHOD_ACTION((u4) method) == 0);
662 clockDiff = (u4) (now - self->cpuClockBase);
664 methodVal = METHOD_COMBINE((u4) method, action);
667 * Write data into "oldOffset".
669 ptr = state->buf + oldOffset;
670 *ptr++ = self->threadId;
671 *ptr++ = (u1) methodVal;
672 *ptr++ = (u1) (methodVal >> 8);
673 *ptr++ = (u1) (methodVal >> 16);
674 *ptr++ = (u1) (methodVal >> 24);
675 *ptr++ = (u1) clockDiff;
676 *ptr++ = (u1) (clockDiff >> 8);
677 *ptr++ = (u1) (clockDiff >> 16);
678 *ptr++ = (u1) (clockDiff >> 24);
682 * We just did something with a method. Emit a record by setting a value
683 * in a magic memory location.
685 void dvmEmitEmulatorTrace(const Method* method, int action)
687 #ifdef UPDATE_MAGIC_PAGE
689 * We store the address of the Dalvik bytecodes to the memory-mapped
690 * trace page for normal Java methods. We also trace calls to native
691 * functions by storing the address of the native function to the
693 * Abstract methods don't have any bytecodes, so we don't trace them.
694 * (Abstract methods are never called, but in Dalvik they can be
695 * because we do a "late trap" to a native method to generate the
696 * abstract method exception.)
698 if (dvmIsAbstractMethod(method))
701 u4* pMagic = (u4*) gDvm.emulatorTracePage;
704 if (dvmIsNativeMethod(method)) {
706 * The "action" parameter is one of:
710 * To help the trace tools reconstruct the runtime stack containing
711 * a mix of Java plus native methods, we add 4 to the action if this
712 * is a native method.
717 * Get the address of the native function.
718 * This isn't the right address -- how do I get it?
719 * Fortunately, the trace tools can get by without the address, but
720 * it would be nice to fix this.
722 addr = (u4) method->nativeFunc;
725 * The dexlist output shows the &DexCode.insns offset value, which
726 * is offset from the start of the base DEX header. Method.insns
727 * is the absolute address, effectively offset from the start of
728 * the optimized DEX header. We either need to return the
729 * optimized DEX base file address offset by the right amount, or
730 * take the "real" address and subtract off the size of the
731 * optimized DEX header.
733 * Would be nice to factor this out at dexlist time, but we can't count
734 * on having access to the correct optimized DEX file.
736 assert(method->insns != NULL);
737 const DexOptHeader* pOptHdr = method->clazz->pDvmDex->pDexFile->pOptHeader;
738 addr = (u4) method->insns - pOptHdr->dexOffset;
741 *(pMagic+action) = addr;
742 LOGVV("Set %p = 0x%08x (%s.%s)\n",
743 pMagic+action, addr, method->clazz->descriptor, method->name);
748 * The GC calls this when it's about to start. We add a marker to the
749 * trace output so the tool can exclude the GC cost from the results.
751 void dvmMethodTraceGCBegin(void)
753 TRACE_METHOD_ENTER(dvmThreadSelf(), gDvm.methodTrace.gcMethod);
755 void dvmMethodTraceGCEnd(void)
757 TRACE_METHOD_EXIT(dvmThreadSelf(), gDvm.methodTrace.gcMethod);
761 * The class loader calls this when it's loading or initializing a class.
763 void dvmMethodTraceClassPrepBegin(void)
765 TRACE_METHOD_ENTER(dvmThreadSelf(), gDvm.methodTrace.classPrepMethod);
767 void dvmMethodTraceClassPrepEnd(void)
769 TRACE_METHOD_EXIT(dvmThreadSelf(), gDvm.methodTrace.classPrepMethod);
774 * Enable emulator trace info.
776 void dvmEmulatorTraceStart(void)
778 /* If we could not map the emulator trace page, then do not enable tracing */
779 if (gDvm.emulatorTracePage == NULL)
782 updateActiveProfilers(1);
784 /* in theory we should make this an atomic inc; in practice not important */
785 gDvm.emulatorTraceEnableCount++;
786 if (gDvm.emulatorTraceEnableCount == 1)
787 LOGD("--- emulator method traces enabled\n");
791 * Disable emulator trace info.
793 void dvmEmulatorTraceStop(void)
795 if (gDvm.emulatorTraceEnableCount == 0) {
796 LOGE("ERROR: emulator tracing not enabled\n");
799 updateActiveProfilers(-1);
800 /* in theory we should make this an atomic inc; in practice not important */
801 gDvm.emulatorTraceEnableCount--;
802 if (gDvm.emulatorTraceEnableCount == 0)
803 LOGD("--- emulator method traces disabled\n");
808 * Start instruction counting.
810 void dvmStartInstructionCounting()
812 updateActiveProfilers(1);
813 /* in theory we should make this an atomic inc; in practice not important */
814 gDvm.instructionCountEnableCount++;
818 * Start instruction counting.
820 void dvmStopInstructionCounting()
822 if (gDvm.instructionCountEnableCount == 0) {
823 LOGE("ERROR: instruction counting not enabled\n");
826 updateActiveProfilers(-1);
827 gDvm.instructionCountEnableCount--;
832 * Start alloc counting. Note this doesn't affect the "active profilers"
833 * count, since the interpreter loop is not involved.
835 void dvmStartAllocCounting(void)
837 gDvm.allocProf.enabled = true;
841 * Stop alloc counting.
843 void dvmStopAllocCounting(void)
845 gDvm.allocProf.enabled = false;
848 #endif /*WITH_PROFILER*/