OSDN Git Service

perf sched: Add option to merge like comms to lat output
[sagit-ice-cold/kernel_xiaomi_msm8998.git] / tools / perf / builtin-sched.c
index 79273ec..3396261 100644 (file)
@@ -95,6 +95,7 @@ struct work_atoms {
        u64                     total_lat;
        u64                     nb_atoms;
        u64                     total_runtime;
+       int                     num_merged;
 };
 
 typedef int (*sort_fn_t)(struct work_atoms *, struct work_atoms *);
@@ -168,9 +169,10 @@ struct perf_sched {
        u64              all_runtime;
        u64              all_count;
        u64              cpu_last_switched[MAX_CPUS];
-       struct rb_root   atom_root, sorted_atom_root;
+       struct rb_root   atom_root, sorted_atom_root, merged_atom_root;
        struct list_head sort_list, cmp_pid;
        bool force;
+       bool skip_merge;
 };
 
 static u64 get_nsecs(void)
@@ -1182,7 +1184,10 @@ static void output_lat_thread(struct perf_sched *sched, struct work_atoms *work_
        sched->all_runtime += work_list->total_runtime;
        sched->all_count   += work_list->nb_atoms;
 
-       ret = printf("  %s:%d ", thread__comm_str(work_list->thread), work_list->thread->tid);
+       if (work_list->num_merged > 1)
+               ret = printf("  %s:(%d) ", thread__comm_str(work_list->thread), work_list->num_merged);
+       else
+               ret = printf("  %s:%d ", thread__comm_str(work_list->thread), work_list->thread->tid);
 
        for (i = 0; i < 24 - ret; i++)
                printf(" ");
@@ -1302,17 +1307,22 @@ static int sort_dimension__add(const char *tok, struct list_head *list)
 static void perf_sched__sort_lat(struct perf_sched *sched)
 {
        struct rb_node *node;
-
+       struct rb_root *root = &sched->atom_root;
+again:
        for (;;) {
                struct work_atoms *data;
-               node = rb_first(&sched->atom_root);
+               node = rb_first(root);
                if (!node)
                        break;
 
-               rb_erase(node, &sched->atom_root);
+               rb_erase(node, root);
                data = rb_entry(node, struct work_atoms, node);
                __thread_latency_insert(&sched->sorted_atom_root, data, &sched->sort_list);
        }
+       if (root == &sched->atom_root) {
+               root = &sched->merged_atom_root;
+               goto again;
+       }
 }
 
 static int process_sched_wakeup_event(struct perf_tool *tool,
@@ -1572,6 +1582,59 @@ static void print_bad_events(struct perf_sched *sched)
        }
 }
 
+static void __merge_work_atoms(struct rb_root *root, struct work_atoms *data)
+{
+       struct rb_node **new = &(root->rb_node), *parent = NULL;
+       struct work_atoms *this;
+       const char *comm = thread__comm_str(data->thread), *this_comm;
+
+       while (*new) {
+               int cmp;
+
+               this = container_of(*new, struct work_atoms, node);
+               parent = *new;
+
+               this_comm = thread__comm_str(this->thread);
+               cmp = strcmp(comm, this_comm);
+               if (cmp > 0) {
+                       new = &((*new)->rb_left);
+               } else if (cmp < 0) {
+                       new = &((*new)->rb_right);
+               } else {
+                       this->num_merged++;
+                       this->total_runtime += data->total_runtime;
+                       this->nb_atoms += data->nb_atoms;
+                       this->total_lat += data->total_lat;
+                       list_splice(&data->work_list, &this->work_list);
+                       if (this->max_lat < data->max_lat) {
+                               this->max_lat = data->max_lat;
+                               this->max_lat_at = data->max_lat_at;
+                       }
+                       zfree(&data);
+                       return;
+               }
+       }
+
+       data->num_merged++;
+       rb_link_node(&data->node, parent, new);
+       rb_insert_color(&data->node, root);
+}
+
+static void perf_sched__merge_lat(struct perf_sched *sched)
+{
+       struct work_atoms *data;
+       struct rb_node *node;
+
+       if (sched->skip_merge)
+               return;
+
+       while ((node = rb_first(&sched->atom_root))) {
+               rb_erase(node, &sched->atom_root);
+               data = rb_entry(node, struct work_atoms, node);
+               __merge_work_atoms(&sched->merged_atom_root, data);
+       }
+}
+
 static int perf_sched__lat(struct perf_sched *sched)
 {
        struct rb_node *next;
@@ -1581,6 +1644,7 @@ static int perf_sched__lat(struct perf_sched *sched)
        if (perf_sched__read_events(sched))
                return -1;
 
+       perf_sched__merge_lat(sched);
        perf_sched__sort_lat(sched);
 
        printf("\n -----------------------------------------------------------------------------------------------------------------\n");
@@ -1732,6 +1796,7 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused)
                .profile_cpu          = -1,
                .next_shortname1      = 'A',
                .next_shortname2      = '0',
+               .skip_merge           = 0,
        };
        const struct option latency_options[] = {
        OPT_STRING('s', "sort", &sched.sort_order, "key[,key2...]",
@@ -1742,6 +1807,8 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused)
                    "CPU to profile on"),
        OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
                    "dump raw trace in ASCII"),
+       OPT_BOOLEAN('p', "pids", &sched.skip_merge,
+                   "latency stats per pid instead of per comm"),
        OPT_END()
        };
        const struct option replay_options[] = {