2 * Copyright (c) 2015, The Android Open Source Project
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer
12 * in the documentation and/or other materials provided with the
14 * * Neither the name of Google, Inc. nor the names of its contributors
15 * may be used to endorse or promote products derived from this
16 * software without specific prior written permission.
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
21 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
22 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
24 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
25 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
26 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
28 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 #include <binder/IBinder.h>
33 #include <binder/IServiceManager.h>
34 #include <binder/Parcel.h>
37 #include <cutils/properties.h>
44 #include <sys/resource.h>
47 #include <sys/types.h>
50 #include <utils/Log.h>
51 #include <utils/String8.h>
52 #include <utils/Trace.h>
55 using namespace android;
57 #define CHECK_PERIOD 1 // in sec
58 #define TRACING_CHECK_PERIOD 500000 // in micro sec
59 #define MIN_BUFFER_SIZE 4
60 #define MIN_BUFFER_SIZE_STR "4"
61 #define MAX_BUFFER_SIZE 128
62 #define MAX_BUFFER_SIZE_STR "128"
63 #define CPU_STAT_ENTRIES 7 // number of cpu stat entries
69 #define LOG_TAG "anrdaemon"
71 typedef struct cpu_stat {
72 unsigned long utime, ntime, stime, itime;
73 unsigned long iowtime, irqtime, sirqtime, steal;
77 /* Make the logging on/off threshold equal to 95% cpu usage. */
78 static int idle_threshold = 5;
80 static bool quit = false;
81 static bool suspend= false;
82 static bool err = false;
83 static char err_msg[100];
84 static bool tracing = false;
86 static const char *buf_size_kb = "16";
87 static uint64_t tag = 0;
88 static const char* apps = "";
90 static cpu_stat_t new_cpu;
91 static cpu_stat_t old_cpu;
92 static Vector<String16> targets;
94 /* Log certain kernel activity when enabled */
95 static bool log_sched = false;
96 static bool log_stack = false;
97 static bool log_irq = false;
98 static bool log_sync = false;
99 static bool log_workq = false;
101 /* Paths for debugfs controls*/
102 static const char* dfs_trace_output_path =
104 static const char* dfs_irq_path =
105 "/d/tracing/events/irq/enable";
106 static const char* dfs_sync_path =
107 "/d/tracing/events/sync/enable";
108 static const char* dfs_workq_path =
109 "/d/tracing/events/workqueue/enable";
110 static const char* dfs_stack_path =
111 "/d/tracing/options/stacktrace";
112 static const char* dfs_sched_switch_path =
113 "/d/tracing/events/sched/sched_switch/enable";
114 static const char* dfs_sched_wakeup_path =
115 "/d/tracing/events/sched/sched_wakeup/enable";
116 static const char* dfs_control_path =
117 "/d/tracing/tracing_on";
118 static const char* dfs_buffer_size_path =
119 "/d/tracing/buffer_size_kb";
120 static const char* dfs_tags_property = "debug.atrace.tags.enableflags";
121 static const char* dfs_apps_property = "debug.atrace.app_cmdlines";
124 * Read accumulated cpu data from /proc/stat
126 static void get_cpu_stat(cpu_stat_t *cpu) {
128 const char *params = "cpu %lu %lu %lu %lu %lu %lu %lu %*d %*d %*d\n";
130 if ((fp = fopen("/proc/stat", "r")) == NULL) {
132 sprintf(err_msg, "can't read from /proc/stat with errno %d", errno);
134 if (fscanf(fp, params, &cpu->utime, &cpu->ntime,
135 &cpu->stime, &cpu->itime, &cpu->iowtime, &cpu->irqtime,
136 &cpu->sirqtime) != CPU_STAT_ENTRIES) {
138 * If failed in getting status, new_cpu won't be updated and
139 * is_heavy_loaded() will return false.
141 ALOGE("Error in getting cpu status. Skipping this check.");
145 cpu->total = cpu->utime + cpu->ntime + cpu->stime + cpu->itime
146 + cpu->iowtime + cpu->irqtime + cpu->sirqtime;
153 * Calculate cpu usage in the past interval.
154 * If tracing is on, increase the idle threshold by 1% so that we do not
155 * turn on and off tracing frequently whe the cpu load is right close to
158 static bool is_heavy_load(void) {
159 unsigned long diff_idle, diff_total;
160 int threshold = idle_threshold + (tracing?1:0);
161 get_cpu_stat(&new_cpu);
162 diff_idle = new_cpu.itime - old_cpu.itime;
163 diff_total = new_cpu.total - old_cpu.total;
165 return (diff_idle * 100 < diff_total * threshold);
169 * Force the userland processes to refresh their property for logging.
171 static void dfs_poke_binder(Vector<String16> services) {
172 sp<IServiceManager> sm = defaultServiceManager();
173 services = sm->listServices();
174 for (size_t i = 0; i < services.size(); i++) {
175 sp<IBinder> obj = sm->checkService(services[i]);
178 obj->transact(IBinder::SYSPROPS_TRANSACTION, data, NULL, 0);
184 * Enable/disable a debugfs property by writing 0/1 to its path.
186 static int dfs_enable(bool enable, const char* path) {
187 int fd = open(path, O_WRONLY);
190 sprintf(err_msg, "Can't open %s. Error: %d", path, errno);
193 const char* control = (enable?"1":"0");
194 ssize_t len = strlen(control);
195 int max_try = 10; // Fail if write was interrupted for 10 times
196 while (write(fd, control, len) != len) {
197 if (errno == EINTR && max_try-- > 0)
201 sprintf(err_msg, "Error %d in writing to %s.", errno, path);
208 * Set the userland tracing properties.
210 static void dfs_set_property(uint64_t mtag, const char* mapp, bool enable) {
212 snprintf(buf, 64, "%#" PRIx64, mtag);
213 if (property_set(dfs_tags_property, buf) < 0) {
215 sprintf(err_msg, "Failed to set debug tags system properties.");
219 && property_set(dfs_apps_property, mapp) < 0) {
221 sprintf(err_msg, "Failed to set debug applications.");
225 dfs_enable(enable, dfs_sched_switch_path);
226 dfs_enable(enable, dfs_sched_wakeup_path);
229 dfs_enable(enable, dfs_stack_path);
232 dfs_enable(enable, dfs_irq_path);
235 dfs_enable(enable, dfs_sync_path);
238 dfs_enable(enable, dfs_workq_path);
243 * Start logging when cpu usage is high. Meanwhile, moniter the cpu usage and
244 * stop logging when it drops down.
246 static void start_tracing(void) {
247 ALOGD("High cpu usage, start logging.");
249 dfs_set_property(tag, apps, true);
250 dfs_poke_binder(targets);
252 if (dfs_enable(true, dfs_control_path) != 0) {
253 ALOGE("Failed to start tracing.");
258 /* Stop logging when cpu usage drops or the daemon is suspended.*/
260 usleep(TRACING_CHECK_PERIOD);
261 } while (!suspend && is_heavy_load());
263 if (dfs_enable(false, dfs_control_path) != 0) {
264 ALOGE("Failed to stop tracing.");
267 dfs_set_property(0, "", false);
268 dfs_poke_binder(targets);
270 ALOGD("Usage back to low, stop logging.");
275 * Set the tracing log buffer size.
276 * Note the actual buffer size will be buf_size_kb * number of cores.
277 * E.g. for dory, the total buffer size is buf_size_kb * 4.
279 static int set_tracing_buffer_size(void) {
280 int fd = open(dfs_buffer_size_path, O_WRONLY);
283 sprintf(err_msg, "Can't open atrace buffer size file under /d/tracing.");
286 ssize_t len = strlen(buf_size_kb);
287 if (write(fd, buf_size_kb, len) != len) {
289 sprintf(err_msg, "Error in writing to atrace buffer size file.");
297 * Main loop to moniter the cpu usage and decided whether to start logging.
299 static void start(void) {
300 if ((set_tracing_buffer_size()) != 0)
302 get_cpu_stat(&old_cpu);
305 while (!quit && !err) {
306 if (!suspend && is_heavy_load()) {
308 * Increase process priority to make sure we can stop logging when
309 * necessary and do not overwrite the buffer
311 setpriority(PRIO_PROCESS, 0, -20);
313 setpriority(PRIO_PROCESS, 0, 0);
321 * Dump the log in a compressed format for systrace to visualize.
323 static void dump_trace()
325 int remain_attempts = 5;
328 ALOGI("Waiting logging to stop.");
329 usleep(TRACING_CHECK_PERIOD);
331 if (remain_attempts == 0) {
332 ALOGE("Can't stop logging after 5 attempts. Dump aborted.");
338 * Create a dump file "dump_of_anrdaemon.<current_time>" under /data/anr/
340 time_t now = time(0);
344 const char* header = " done\nTRACE:\n";
345 ssize_t header_len = strlen(header);
346 tstruct = *localtime(&now);
347 strftime(time_buf, sizeof(time_buf), "%Y-%m-%d.%X", &tstruct);
348 sprintf(path_buf, "/data/anr/dump_of_anrdaemon.%s", time_buf);
349 int output_fd = creat(path_buf, S_IRWXU);
350 if (output_fd == -1) {
351 ALOGE("Failed to create %s. Dump aborted.", path_buf);
355 if (write(output_fd, header, strlen(header)) != header_len) {
356 ALOGE("Failed to write the header.");
361 int trace_fd = open(dfs_trace_output_path, O_RDWR);
362 if (trace_fd == -1) {
363 ALOGE("Failed to open %s. Dump aborted.", dfs_trace_output_path);
372 memset(&zs, 0, sizeof(zs));
373 result = deflateInit(&zs, Z_DEFAULT_COMPRESSION);
374 if (result != Z_OK) {
375 ALOGE("error initializing zlib: %d\n", result);
381 const size_t bufSize = 64*1024;
382 in = (uint8_t*)malloc(bufSize);
383 out = (uint8_t*)malloc(bufSize);
387 zs.avail_out = bufSize;
390 if (zs.avail_in == 0) {
391 result = read(trace_fd, in, bufSize);
393 ALOGE("error reading trace: %s", strerror(errno));
394 result = Z_STREAM_END;
396 } else if (result == 0) {
400 zs.avail_in = result;
404 if (zs.avail_out == 0) {
405 result = write(output_fd, out, bufSize);
406 if ((size_t)result < bufSize) {
407 ALOGE("error writing deflated trace: %s", strerror(errno));
408 result = Z_STREAM_END;
409 zs.avail_out = bufSize;
413 zs.avail_out = bufSize;
416 } while ((result = deflate(&zs, flush)) == Z_OK);
418 if (result != Z_STREAM_END) {
419 ALOGE("error deflating trace: %s\n", zs.msg);
422 if (zs.avail_out < bufSize) {
423 size_t bytes = bufSize - zs.avail_out;
424 result = write(output_fd, out, bytes);
425 if ((size_t)result < bytes) {
426 ALOGE("error writing deflated trace: %s", strerror(errno));
430 result = deflateEnd(&zs);
431 if (result != Z_OK) {
432 ALOGE("error cleaning up zlib: %d\n", result);
442 ALOGI("Finished dump. Output file stored at: %s", path_buf);
445 static void handle_signal(int signo)
464 * Set the signal handler:
465 * SIGQUIT: Reset debugfs and tracing property and terminate the daemon.
466 * SIGSTOP: Stop logging and suspend the daemon.
467 * SIGCONT: Resume the daemon as normal.
468 * SIGUSR1: Dump the logging to a compressed format for systrace to visualize.
470 static void register_sighandler(void)
475 sigemptyset(&block_mask);
476 sigaddset (&block_mask, SIGQUIT);
477 sigaddset (&block_mask, SIGSTOP);
478 sigaddset (&block_mask, SIGCONT);
479 sigaddset (&block_mask, SIGUSR1);
482 sa.sa_mask = block_mask;
483 sa.sa_handler = handle_signal;
484 sigaction(SIGQUIT, &sa, NULL);
485 sigaction(SIGSTOP, &sa, NULL);
486 sigaction(SIGCONT, &sa, NULL);
487 sigaction(SIGUSR1, &sa, NULL);
490 static void show_help(void) {
492 fprintf(stderr, "usage: ANRdaemon [options] [categoris...]\n");
493 fprintf(stdout, "Options includes:\n"
494 " -a appname enable app-level tracing for a comma "
495 "separated list of cmdlines\n"
496 " -t N cpu threshold for logging to start "
497 "(min = 50, max = 100, default = 95)\n"
498 " -s N use a trace buffer size of N KB "
501 fprintf(stdout, "Categoris includes:\n"
502 " am - activity manager\n"
503 " sm - sync manager\n"
505 " app - application\n"
506 " dalvik - dalvik VM\n"
507 " irq - kernel irq events\n"
508 " sched - kernel scheduler activity\n"
509 " stack - kernel stack\n"
510 " sync - kernel sync activity\n"
511 " workq - kernel work queues\n");
512 fprintf(stdout, "Control includes:\n"
513 " SIGQUIT: terminate the process\n"
514 " SIGSTOP: suspend all function of the daemon\n"
515 " SIGCONT: resume the normal function\n"
516 " SIGUSR1: dump the current logging in a compressed form\n");
520 static int get_options(int argc, char *argv[]) {
523 while ((opt = getopt(argc, argv, "a:s:t:h")) >= 0) {
529 if (atoi(optarg) > MAX_BUFFER_SIZE)
530 buf_size_kb = MAX_BUFFER_SIZE_STR;
531 else if (atoi(optarg) < MIN_BUFFER_SIZE)
532 buf_size_kb = MIN_BUFFER_SIZE_STR;
534 buf_size_kb = optarg;
537 threshold = atoi(optarg);
538 if (threshold > 100 || threshold < 50) {
539 fprintf(stderr, "logging threshold should be 50-100\n");
542 idle_threshold = 100 - threshold;
548 fprintf(stderr, "Error in getting options.\n"
549 "run \"%s -h\" for usage.\n", argv[0]);
554 for (int i = optind; i < argc; i++) {
555 if (strcmp(argv[i], "am") == 0) {
556 tag |= ATRACE_TAG_ACTIVITY_MANAGER;
557 } else if (strcmp(argv[i], "input") == 0) {
558 tag |= ATRACE_TAG_INPUT;
559 } else if (strcmp(argv[i], "sm") == 0) {
560 tag |= ATRACE_TAG_SYNC_MANAGER;
561 } else if (strcmp(argv[i], "app") == 0) {
562 tag |= ATRACE_TAG_APP;
563 } else if (strcmp(argv[i], "dalvik") == 0) {
564 tag |= ATRACE_TAG_DALVIK;
565 } else if (strcmp(argv[i], "sched") == 0) {
567 } else if (strcmp(argv[i], "stack") == 0) {
569 } else if (strcmp(argv[i], "workq") == 0) {
571 } else if (strcmp(argv[i], "irq") == 0) {
573 } else if (strcmp(argv[i], "sync") == 0) {
576 fprintf(stderr, "invalid category: %s\n"
577 "run \"%s -h\" for usage.\n", argv[i], argv[0]);
582 bool kernel_log = log_sched || log_stack || log_workq || log_irq || log_sync;
583 bool app_log = (tag == 0);
586 * There are ~80 services. Too expensive to poke all of them. Just include
587 * service that may help high CPU ANR analysis.
590 targets.push_back(String16("activity"));
591 targets.push_back(String16("alarm"));
592 targets.push_back(String16("appops"));
593 targets.push_back(String16("cpuinfo"));
594 targets.push_back(String16("meminfo"));
595 targets.push_back(String16("procstats"));
596 targets.push_back(String16("input"));
597 targets.push_back(String16("lancherapps"));
598 targets.push_back(String16("bluetooth_manager"));
599 targets.push_back(String16("SurfaceFlinger"));
600 targets.push_back(String16("ClockworkProxyNativeService"));
602 if (!kernel_log && !app_log) {
603 tag |= ATRACE_TAG_ACTIVITY_MANAGER;
604 targets.push_back(String16("activity"));
610 int main(int argc, char *argv[])
612 if(get_options(argc, argv) != 0)
615 if (daemon(0, 0) != 0)
618 register_sighandler();
620 /* Clear any the trace log file by overwrite it with a new file */
621 int fd = creat(dfs_trace_output_path, 0);
623 ALOGE("Faield to open and cleaup previous log");
628 ALOGI("ANRdaemon starting");
632 ALOGE("ANRdaemon stopped due to Error: %s", err_msg);
634 ALOGI("ANRdaemon terminated.");