OSDN Git Service

Add support for sample profiling in traceview.
authorJeff Hao <jeffhao@google.com>
Wed, 21 Aug 2013 23:54:45 +0000 (16:54 -0700)
committerBrian Carlstrom <bdc@google.com>
Thu, 29 Aug 2013 05:43:58 +0000 (22:43 -0700)
Instead of method instrumentation, allow traceview to periodically
sample stack traces of threads. Disabled by default until we add
gui support for this new mode.

(cherry picked from commit 676f8a527fb62abd39663d55c7d9208f5ca03093)

Change-Id: Ia5d0d57012305a5830d042bcb903a429432b035b

vm/Profile.cpp
vm/Profile.h
vm/Thread.cpp
vm/Thread.h
vm/interp/Interp.cpp
vm/interp/InterpState.h

index dfb50b3..8492cd5 100644 (file)
@@ -118,11 +118,12 @@ static inline u8 getWallTimeInUsec()
  * We use this clock when we can because it enables us to track the time that
  * a thread spends running and not blocked.
  */
-static inline u8 getThreadCpuTimeInUsec()
+static inline u8 getThreadCpuTimeInUsec(Thread* thread)
 {
+    clockid_t cid;
     struct timespec tm;
-
-    clock_gettime(CLOCK_THREAD_CPUTIME_ID, &tm);
+    pthread_getcpuclockid(thread->handle, &cid);
+    clock_gettime(cid, &tm);
     if (!(tm.tv_nsec >= 0 && tm.tv_nsec < 1*1000*1000*1000)) {
         ALOGE("bad nsec: %ld", tm.tv_nsec);
         dvmAbort();
@@ -137,7 +138,7 @@ static inline u8 getThreadCpuTimeInUsec()
 static inline u8 getStopwatchClock()
 {
 #if defined(HAVE_POSIX_CLOCKS)
-    return getThreadCpuTimeInUsec();
+    return getThreadCpuTimeInUsec(dvmThreadSelf());
 #else
     return getWallTimeInUsec();
 #endif
@@ -171,6 +172,131 @@ static inline void storeLongLE(u1* buf, u8 val)
 }
 
 /*
+ * Gets a thread's stack trace as an array of method pointers of length pCount.
+ * The returned array must be freed by the caller.
+ */
+static const Method** getStackTrace(Thread* thread, size_t* pCount)
+{
+    void* fp = thread->interpSave.curFrame;
+    assert(thread == dvmThreadSelf() || dvmIsSuspended(thread));
+
+    /* Compute the stack depth. */
+    size_t stackDepth = 0;
+    while (fp != NULL) {
+        const StackSaveArea* saveArea = SAVEAREA_FROM_FP(fp);
+
+        if (!dvmIsBreakFrame((u4*) fp))
+            stackDepth++;
+
+        assert(fp != saveArea->prevFrame);
+        fp = saveArea->prevFrame;
+    }
+    *pCount = stackDepth;
+
+    /*
+     * Allocate memory for stack trace. This must be freed later, either by
+     * freeThreadStackTraceSamples when tracing stops or by freeThread.
+     */
+    const Method** stackTrace = (const Method**) malloc(sizeof(Method*) *
+                                                        stackDepth);
+    if (stackTrace == NULL)
+        return NULL;
+
+    /* Walk the stack a second time, filling in the stack trace. */
+    const Method** ptr = stackTrace;
+    fp = thread->interpSave.curFrame;
+    while (fp != NULL) {
+        const StackSaveArea* saveArea = SAVEAREA_FROM_FP(fp);
+        const Method* method = saveArea->method;
+
+        if (!dvmIsBreakFrame((u4*) fp)) {
+            *ptr++ = method;
+            stackDepth--;
+        }
+        assert(fp != saveArea->prevFrame);
+        fp = saveArea->prevFrame;
+    }
+    assert(stackDepth == 0);
+
+    return stackTrace;
+}
+/*
+ * Get a sample of the stack trace for a thread.
+ */
+static void getSample(Thread* thread)
+{
+    /* Get old and new stack trace for thread. */
+    size_t newLength = 0;
+    const Method** newStackTrace = getStackTrace(thread, &newLength);
+    size_t oldLength = thread->stackTraceSampleLength;
+    const Method** oldStackTrace = thread->stackTraceSample;
+
+    /* Read time clocks to use for all events in this trace. */
+    u4 cpuClockDiff = 0;
+    u4 wallClockDiff = 0;
+    dvmMethodTraceReadClocks(thread, &cpuClockDiff, &wallClockDiff);
+    if (oldStackTrace == NULL) {
+        /*
+         * If there's no previous stack trace sample, log an entry event for
+         * every method in the trace.
+         */
+        for (int i = newLength - 1; i >= 0; --i) {
+            dvmMethodTraceAdd(thread, newStackTrace[i], METHOD_TRACE_ENTER,
+                              cpuClockDiff, wallClockDiff);
+        }
+    } else {
+        /*
+         * If there's a previous stack trace, diff the traces and emit entry
+         * and exit events accordingly.
+         */
+        int diffIndexOld = oldLength - 1;
+        int diffIndexNew = newLength - 1;
+        /* Iterate bottom-up until there's a difference between traces. */
+        while (diffIndexOld >= 0 && diffIndexNew >= 0 &&
+               oldStackTrace[diffIndexOld] == newStackTrace[diffIndexNew]) {
+            diffIndexOld--;
+            diffIndexNew--;
+        }
+        /* Iterate top-down over old trace until diff, emitting exit events. */
+        for (int i = 0; i <= diffIndexOld; ++i) {
+            dvmMethodTraceAdd(thread, oldStackTrace[i], METHOD_TRACE_EXIT,
+                              cpuClockDiff, wallClockDiff);
+        }
+        /* Iterate bottom-up over new trace from diff, emitting entry events. */
+        for (int i = diffIndexNew; i >= 0; --i) {
+            dvmMethodTraceAdd(thread, newStackTrace[i], METHOD_TRACE_ENTER,
+                              cpuClockDiff, wallClockDiff);
+        }
+    }
+
+    /* Free the old stack trace and update the thread's stack trace sample. */
+    free(oldStackTrace);
+    thread->stackTraceSample = newStackTrace;
+    thread->stackTraceSampleLength = newLength;
+}
+
+/*
+ * Entry point for sampling thread.
+ */
+static void* runSamplingThread(void* arg)
+{
+    while (gDvm.methodTrace.traceEnabled) {
+        dvmSuspendAllThreads(SUSPEND_FOR_SAMPLING);
+
+        dvmLockThreadList(dvmThreadSelf());
+        for (Thread *thread = gDvm.threadList; thread != NULL; thread = thread->next) {
+            getSample(thread);
+        }
+        dvmUnlockThreadList();
+
+        dvmResumeAllThreads(SUSPEND_FOR_SAMPLING);
+
+        usleep(gDvm.methodTrace.samplingIntervalUs);
+    }
+    return NULL;
+}
+
+/*
  * Boot-time init.
  */
 bool dvmProfilingStartup()
@@ -282,6 +408,21 @@ static void resetCpuClockBase()
 }
 
 /*
+ * Free and reset the "stackTraceSample" field in all threads.
+ */
+static void freeThreadStackTraceSamples()
+{
+    Thread* thread;
+
+    dvmLockThreadList(NULL);
+    for (thread = gDvm.threadList; thread != NULL; thread = thread->next) {
+        free(thread->stackTraceSample);
+        thread->stackTraceSample = NULL;
+    }
+    dvmUnlockThreadList();
+}
+
+/*
  * Dump the thread list to the specified file.
  */
 static void dumpThreadList(FILE* fp) {
@@ -428,6 +569,10 @@ void dvmMethodTraceStart(const char* traceFileName, int traceFd, int bufferSize,
         state->recordSize = TRACE_REC_SIZE_SINGLE_CLOCK;
     }
 
+    /* TODO: Control sampling through gui. */
+    state->samplingEnabled = false;
+    state->samplingIntervalUs = 1000;
+
     /*
      * Output the header.
      */
@@ -452,7 +597,17 @@ void dvmMethodTraceStart(const char* traceFileName, int traceFd, int bufferSize,
      * following to take a Thread* argument, and set the appropriate
      * interpBreak flags only on the target thread.
      */
-    updateActiveProfilers(kSubModeMethodTrace, true);
+    if (state->samplingEnabled) {
+        updateActiveProfilers(kSubModeSampleTrace, true);
+        /* Start the sampling thread. */
+        if (!dvmCreateInternalThread(&state->samplingThreadHandle,
+                "Sampling Thread", &runSamplingThread, NULL)) {
+            dvmThrowInternalError("failed to create sampling thread");
+            goto fail;
+        }
+    } else {
+        updateActiveProfilers(kSubModeMethodTrace, true);
+    }
 
     dvmUnlockMutex(&state->startStopLock);
     return;
@@ -500,7 +655,7 @@ static inline void measureClockOverhead()
 {
 #if defined(HAVE_POSIX_CLOCKS)
     if (useThreadCpuClock()) {
-        getThreadCpuTimeInUsec();
+        getThreadCpuTimeInUsec(dvmThreadSelf());
     }
 #endif
     if (useWallClock()) {
@@ -565,7 +720,11 @@ void dvmMethodTraceStop()
         dvmUnlockMutex(&state->startStopLock);
         return;
     } else {
-        updateActiveProfilers(kSubModeMethodTrace, false);
+        if (state->samplingEnabled) {
+            updateActiveProfilers(kSubModeSampleTrace, false);
+        } else {
+            updateActiveProfilers(kSubModeMethodTrace, false);
+        }
     }
 
     /* compute elapsed time */
@@ -719,9 +878,42 @@ void dvmMethodTraceStop()
     fclose(state->traceFile);
     state->traceFile = NULL;
 
+    /* free and clear sampling traces held by all threads */
+    if (state->samplingEnabled) {
+        freeThreadStackTraceSamples();
+    }
+
     /* wake any threads that were waiting for profiling to complete */
     dvmBroadcastCond(&state->threadExitCond);
     dvmUnlockMutex(&state->startStopLock);
+
+    /* make sure the sampling thread has stopped */
+    if (state->samplingEnabled &&
+        pthread_join(state->samplingThreadHandle, NULL) != 0) {
+        ALOGW("Sampling thread join failed");
+    }
+}
+
+/*
+ * Read clocks and generate time diffs for method trace events.
+ */
+void dvmMethodTraceReadClocks(Thread* self, u4* cpuClockDiff,
+                              u4* wallClockDiff)
+{
+#if defined(HAVE_POSIX_CLOCKS)
+    if (useThreadCpuClock()) {
+        if (!self->cpuClockBaseSet) {
+            /* Initialize per-thread CPU clock base time on first use. */
+            self->cpuClockBase = getThreadCpuTimeInUsec(self);
+            self->cpuClockBaseSet = true;
+        } else {
+            *cpuClockDiff = getThreadCpuTimeInUsec(self) - self->cpuClockBase;
+        }
+    }
+#endif
+    if (useWallClock()) {
+        *wallClockDiff = getWallTimeInUsec() - gDvm.methodTrace.startWhen;
+    }
 }
 
 /*
@@ -730,7 +922,8 @@ void dvmMethodTraceStop()
  * Multiple threads may be banging on this all at once.  We use atomic ops
  * rather than mutexes for speed.
  */
-void dvmMethodTraceAdd(Thread* self, const Method* method, int action)
+void dvmMethodTraceAdd(Thread* self, const Method* method, int action,
+                       u4 cpuClockDiff, u4 wallClockDiff)
 {
     MethodTraceState* state = &gDvm.methodTrace;
     u4 methodVal;
@@ -739,21 +932,6 @@ void dvmMethodTraceAdd(Thread* self, const Method* method, int action)
 
     assert(method != NULL);
 
-#if defined(HAVE_POSIX_CLOCKS)
-    /*
-     * We can only access the per-thread CPU clock from within the
-     * thread, so we have to initialize the base time on the first use.
-     * (Looks like pthread_getcpuclockid(thread, &id) will do what we
-     * want, but it doesn't appear to be defined on the device.)
-     */
-    if (!self->cpuClockBaseSet) {
-        self->cpuClockBase = getThreadCpuTimeInUsec();
-        self->cpuClockBaseSet = true;
-        //ALOGI("thread base id=%d 0x%llx",
-        //    self->threadId, self->cpuClockBase);
-    }
-#endif
-
     /*
      * Advance "curOffset" atomically.
      */
@@ -784,7 +962,6 @@ void dvmMethodTraceAdd(Thread* self, const Method* method, int action)
 
 #if defined(HAVE_POSIX_CLOCKS)
     if (useThreadCpuClock()) {
-        u4 cpuClockDiff = (u4) (getThreadCpuTimeInUsec() - self->cpuClockBase);
         *ptr++ = (u1) cpuClockDiff;
         *ptr++ = (u1) (cpuClockDiff >> 8);
         *ptr++ = (u1) (cpuClockDiff >> 16);
@@ -793,7 +970,6 @@ void dvmMethodTraceAdd(Thread* self, const Method* method, int action)
 #endif
 
     if (useWallClock()) {
-        u4 wallClockDiff = (u4) (getWallTimeInUsec() - state->startWhen);
         *ptr++ = (u1) wallClockDiff;
         *ptr++ = (u1) (wallClockDiff >> 8);
         *ptr++ = (u1) (wallClockDiff >> 16);
@@ -809,7 +985,11 @@ void dvmMethodTraceAdd(Thread* self, const Method* method, int action)
 void dvmFastMethodTraceEnter(const Method* method, Thread* self)
 {
     if (self->interpBreak.ctl.subMode & kSubModeMethodTrace) {
-        dvmMethodTraceAdd(self, method, METHOD_TRACE_ENTER);
+        u4 cpuClockDiff = 0;
+        u4 wallClockDiff = 0;
+        dvmMethodTraceReadClocks(self, &cpuClockDiff, &wallClockDiff);
+        dvmMethodTraceAdd(self, method, METHOD_TRACE_ENTER, cpuClockDiff,
+                          wallClockDiff);
     }
 }
 
@@ -821,8 +1001,11 @@ void dvmFastMethodTraceEnter(const Method* method, Thread* self)
 void dvmFastMethodTraceExit(Thread* self)
 {
     if (self->interpBreak.ctl.subMode & kSubModeMethodTrace) {
+        u4 cpuClockDiff = 0;
+        u4 wallClockDiff = 0;
+        dvmMethodTraceReadClocks(self, &cpuClockDiff, &wallClockDiff);
         dvmMethodTraceAdd(self, self->interpSave.method,
-                          METHOD_TRACE_EXIT);
+                          METHOD_TRACE_EXIT, cpuClockDiff, wallClockDiff);
     }
 }
 
@@ -834,7 +1017,11 @@ void dvmFastMethodTraceExit(Thread* self)
 void dvmFastNativeMethodTraceExit(const Method* method, Thread* self)
 {
     if (self->interpBreak.ctl.subMode & kSubModeMethodTrace) {
-        dvmMethodTraceAdd(self, method, METHOD_TRACE_EXIT);
+        u4 cpuClockDiff = 0;
+        u4 wallClockDiff = 0;
+        dvmMethodTraceReadClocks(self, &cpuClockDiff, &wallClockDiff);
+        dvmMethodTraceAdd(self, method, METHOD_TRACE_EXIT, cpuClockDiff,
+                          wallClockDiff);
     }
 }
 
index 6c9e1c7..0ec8705 100644 (file)
@@ -52,6 +52,10 @@ struct MethodTraceState {
 
     int     traceVersion;
     size_t  recordSize;
+
+    bool    samplingEnabled;
+    int     samplingIntervalUs;
+    pthread_t       samplingThreadHandle;
 };
 
 /*
@@ -112,27 +116,45 @@ enum {
  */
 #define TRACE_METHOD_ENTER(_self, _method)                                  \
     do {                                                                    \
-        if (_self->interpBreak.ctl.subMode & kSubModeMethodTrace)           \
-            dvmMethodTraceAdd(_self, _method, METHOD_TRACE_ENTER);          \
+        if (_self->interpBreak.ctl.subMode & kSubModeMethodTrace) {         \
+            u4 cpuClockDiff = 0;                                            \
+            u4 wallClockDiff = 0;                                           \
+            dvmMethodTraceReadClocks(_self, &cpuClockDiff, &wallClockDiff); \
+            dvmMethodTraceAdd(_self, _method, METHOD_TRACE_ENTER,           \
+                              cpuClockDiff, wallClockDiff);                 \
+        }                                                                   \
         if (_self->interpBreak.ctl.subMode & kSubModeEmulatorTrace)         \
             dvmEmitEmulatorTrace(_method, METHOD_TRACE_ENTER);              \
     } while(0);
 #define TRACE_METHOD_EXIT(_self, _method)                                   \
     do {                                                                    \
-        if (_self->interpBreak.ctl.subMode & kSubModeMethodTrace)           \
-            dvmMethodTraceAdd(_self, _method, METHOD_TRACE_EXIT);           \
+        if (_self->interpBreak.ctl.subMode & kSubModeMethodTrace) {         \
+            u4 cpuClockDiff = 0;                                            \
+            u4 wallClockDiff = 0;                                           \
+            dvmMethodTraceReadClocks(_self, &cpuClockDiff, &wallClockDiff); \
+            dvmMethodTraceAdd(_self, _method, METHOD_TRACE_EXIT,            \
+                              cpuClockDiff, wallClockDiff);                 \
+        }                                                                   \
         if (_self->interpBreak.ctl.subMode & kSubModeEmulatorTrace)         \
             dvmEmitEmulatorTrace(_method, METHOD_TRACE_EXIT);               \
     } while(0);
 #define TRACE_METHOD_UNROLL(_self, _method)                                 \
     do {                                                                    \
-        if (_self->interpBreak.ctl.subMode & kSubModeMethodTrace)           \
-            dvmMethodTraceAdd(_self, _method, METHOD_TRACE_UNROLL);         \
+        if (_self->interpBreak.ctl.subMode & kSubModeMethodTrace) {         \
+            u4 cpuClockDiff = 0;                                            \
+            u4 wallClockDiff = 0;                                           \
+            dvmMethodTraceReadClocks(_self, &cpuClockDiff, &wallClockDiff); \
+            dvmMethodTraceAdd(_self, _method, METHOD_TRACE_UNROLL,          \
+                              cpuClockDiff, wallClockDiff);                 \
+        }                                                                   \
         if (_self->interpBreak.ctl.subMode & kSubModeEmulatorTrace)         \
             dvmEmitEmulatorTrace(_method, METHOD_TRACE_UNROLL);             \
     } while(0);
 
-void dvmMethodTraceAdd(struct Thread* self, const Method* method, int action);
+void dvmMethodTraceReadClocks(Thread* self, u4* cpuClockDiff,
+                              u4* wallClockDiff);
+void dvmMethodTraceAdd(struct Thread* self, const Method* method, int action,
+                       u4 cpuClockDiff, u4 wallClockDiff);
 void dvmEmitEmulatorTrace(const Method* method, int action);
 
 void dvmMethodTraceGCBegin(void);
index 205167e..1ebfca7 100644 (file)
@@ -997,6 +997,7 @@ static void freeThread(Thread* thread)
 #if defined(WITH_SELF_VERIFICATION)
     dvmSelfVerificationShadowSpaceFree(thread);
 #endif
+    free(thread->stackTraceSample);
     free(thread);
 }
 
index 8deef6e..19bd49c 100644 (file)
@@ -287,6 +287,10 @@ struct Thread {
     bool        cpuClockBaseSet;
     u8          cpuClockBase;
 
+    /* previous stack trace sample and length (used by sampling profiler) */
+    const Method** stackTraceSample;
+    size_t stackTraceSampleLength;
+
     /* memory allocation profiling state */
     AllocProfState allocProf;
 
@@ -358,6 +362,7 @@ enum SuspendCause {
     SUSPEND_FOR_DEX_OPT,
     SUSPEND_FOR_VERIFY,
     SUSPEND_FOR_HPROF,
+    SUSPEND_FOR_SAMPLING,
 #if defined(WITH_JIT)
     SUSPEND_FOR_TBL_RESIZE,  // jit-table resize
     SUSPEND_FOR_IC_PATCH,    // polymorphic callsite inline-cache patch
index fa77523..2b13bd8 100644 (file)
@@ -1661,7 +1661,11 @@ void dvmInitializeInterpBreak(Thread* thread)
         dvmEnableSubMode(thread, kSubModeInstCounting);
     }
     if (dvmIsMethodTraceActive()) {
-        dvmEnableSubMode(thread, kSubModeMethodTrace);
+        if (gDvm.methodTrace.samplingEnabled) {
+            dvmEnableSubMode(thread, kSubModeSampleTrace);
+        } else {
+            dvmEnableSubMode(thread, kSubModeMethodTrace);
+        }
     }
     if (gDvm.emulatorTraceEnableCount > 0) {
         dvmEnableSubMode(thread, kSubModeEmulatorTrace);
index eaac337..8d2c224 100644 (file)
@@ -63,6 +63,7 @@ enum ExecutionSubModes {
     kSubModeCallbackPending   = 0x0020,
     kSubModeCountedStep       = 0x0040,
     kSubModeCheckAlways       = 0x0080,
+    kSubModeSampleTrace       = 0x0100,
     kSubModeJitTraceBuild     = 0x4000,
     kSubModeJitSV             = 0x8000,
     kSubModeDebugProfile   = (kSubModeMethodTrace |