uint64_t time_running = 0;
};
+struct ThreadInfo {
+ pid_t tid;
+ pid_t pid;
+ std::string name;
+};
+
struct CounterSummary {
std::string type_name;
std::string modifier;
uint32_t group_id;
+ const ThreadInfo* thread;
uint64_t count;
double scale;
std::string readable_count;
std::string comment;
bool auto_generated;
- CounterSummary(const std::string& type_name, const std::string& modifier,
- uint32_t group_id, uint64_t count, double scale,
- bool auto_generated, bool csv)
+ CounterSummary(const std::string& type_name, const std::string& modifier, uint32_t group_id,
+ const ThreadInfo* thread, uint64_t count, double scale, bool auto_generated,
+ bool csv)
: type_name(type_name),
modifier(modifier),
group_id(group_id),
+ thread(thread),
count(count),
scale(scale),
auto_generated(auto_generated) {
explicit CounterSummaries(bool csv) : csv_(csv) {}
std::vector<CounterSummary>& Summaries() { return summaries_; }
- const CounterSummary* FindSummary(const std::string& type_name,
- const std::string& modifier) {
+ const CounterSummary* FindSummary(const std::string& type_name, const std::string& modifier,
+ const ThreadInfo* thread) {
for (const auto& s : summaries_) {
- if (s.type_name == type_name && s.modifier == modifier) {
+ if (s.type_name == type_name && s.modifier == modifier && s.thread == thread) {
return &s;
}
}
for (size_t i = 0; i < summaries_.size(); ++i) {
const CounterSummary& s = summaries_[i];
if (s.modifier == "u") {
- const CounterSummary* other = FindSummary(s.type_name, "k");
+ const CounterSummary* other = FindSummary(s.type_name, "k", s.thread);
if (other != nullptr && other->IsMonitoredAtTheSameTime(s)) {
- if (FindSummary(s.type_name, "") == nullptr) {
- Summaries().emplace_back(s.type_name, "", s.group_id, s.count + other->count, s.scale,
- true, csv_);
+ if (FindSummary(s.type_name, "", s.thread) == nullptr) {
+ Summaries().emplace_back(s.type_name, "", s.group_id, s.thread,
+ s.count + other->count, s.scale, true, csv_);
}
}
}
}
void Show(FILE* fp) {
- size_t count_column_width = 0;
- size_t name_column_width = 0;
- size_t comment_column_width = 0;
+ if (csv_) {
+ ShowCSV(fp);
+ } else {
+ ShowText(fp);
+ }
+ }
+
+ void ShowCSV(FILE* fp) {
for (auto& s : summaries_) {
- count_column_width =
- std::max(count_column_width, s.readable_count.size());
- name_column_width = std::max(name_column_width, s.Name().size());
- comment_column_width = std::max(comment_column_width, s.comment.size());
+ if (s.thread != nullptr) {
+ fprintf(fp, "%s,%d,%d,", s.thread->name.c_str(), s.thread->pid, s.thread->tid);
+ }
+ fprintf(fp, "%s,%s,%s,(%.0lf%%)%s\n", s.readable_count.c_str(), s.Name().c_str(),
+ s.comment.c_str(), 1.0 / s.scale * 100, (s.auto_generated ? " (generated)," : ","));
+ }
+ }
+
+ const std::vector<std::string> thread_titles_ = {
+ "thread_name", "pid", "tid", "count", "event_name"
+ };
+
+ void ShowText(FILE* fp) {
+ std::vector<size_t> width(6, 0);
+ bool show_thread = !summaries_.empty() && summaries_[0].thread != nullptr;
+
+ if (show_thread) {
+ for (size_t i = 0; i < thread_titles_.size(); i++) {
+ width[i] = std::max(width[i], thread_titles_[i].size());
+ }
}
for (auto& s : summaries_) {
- if (csv_) {
- fprintf(fp, "%s,%s,%s,(%.0lf%%)%s\n", s.readable_count.c_str(),
- s.Name().c_str(), s.comment.c_str(), 1.0 / s.scale * 100,
- (s.auto_generated ? " (generated)," : ","));
- } else {
- fprintf(fp, " %*s %-*s # %-*s (%.0lf%%)%s\n",
- static_cast<int>(count_column_width), s.readable_count.c_str(),
- static_cast<int>(name_column_width), s.Name().c_str(),
- static_cast<int>(comment_column_width), s.comment.c_str(),
- 1.0 / s.scale * 100, (s.auto_generated ? " (generated)" : ""));
+ if (show_thread) {
+ width[0] = std::max(width[0], s.thread->name.size());
+ width[1] = std::max(width[1], std::to_string(s.thread->pid).size());
+ width[2] = std::max(width[2], std::to_string(s.thread->tid).size());
+ }
+ width[3] = std::max(width[3], s.readable_count.size());
+ width[4] = std::max(width[4], s.Name().size());
+ width[5] = std::max(width[5], s.comment.size());
+ }
+
+ if (show_thread) {
+ fprintf(fp, "# ");
+ for (size_t i = 0; i < thread_titles_.size(); i++) {
+ fprintf(fp, "%-*s", static_cast<int>(width[i]), thread_titles_[i].c_str());
+ if (i + 1 < thread_titles_.size()) {
+ fprintf(fp, " ");
+ }
}
+ fprintf(fp, "\n");
}
+ for (auto& s : summaries_) {
+ if (show_thread) {
+ fprintf(fp, " %-*s", static_cast<int>(width[0]), s.thread->name.c_str());
+ fprintf(fp, " %-*d", static_cast<int>(width[1]), s.thread->pid);
+ fprintf(fp, " %-*d", static_cast<int>(width[2]), s.thread->tid);
+ }
+ fprintf(fp, " %*s %-*s # %-*s (%.0lf%%)%s\n",
+ static_cast<int>(width[3]), s.readable_count.c_str(),
+ static_cast<int>(width[4]), s.Name().c_str(),
+ static_cast<int>(width[5]), s.comment.c_str(),
+ 1.0 / s.scale * 100, (s.auto_generated ? " (generated)" : ""));
+ }
}
private:
return android::base::StringPrintf("%lf%cGHz", hz / 1e9, sap_mid);
}
if (s.type_name == "instructions" && s.count != 0) {
- const CounterSummary* other = FindSummary("cpu-cycles", s.modifier);
+ const CounterSummary* other = FindSummary("cpu-cycles", s.modifier, s.thread);
if (other != nullptr && other->IsMonitoredAtTheSameTime(s)) {
double cpi = static_cast<double>(other->count) / s.count;
return android::base::StringPrintf("%lf%ccycles per instruction", cpi,
rate_desc = "miss rate";
}
if (!event_name.empty()) {
- const CounterSummary* other = FindSummary(event_name, s.modifier);
+ const CounterSummary* other = FindSummary(event_name, s.modifier, s.thread);
if (other != nullptr && other->IsMonitoredAtTheSameTime(s) && other->count != 0) {
double miss_rate = static_cast<double>(s.count) / other->count;
return android::base::StringPrintf("%f%%%c%s", miss_rate * 100, sep, rate_desc.c_str());
" same time.\n"
"--no-inherit Don't stat created child threads/processes.\n"
"-o output_filename Write report to output_filename instead of standard output.\n"
+"--per-thread Print counters for each thread.\n"
"-p pid1,pid2,... Stat events on existing processes. Mutually exclusive with -a.\n"
"-t tid1,tid2,... Stat events on existing threads. Mutually exclusive with -a.\n"
#if defined(__ANDROID__)
std::vector<std::string>* non_option_args);
bool AddDefaultMeasuredEventTypes();
void SetEventSelectionFlags();
+ void MonitorEachThread();
bool ShowCounters(const std::vector<CountersInfo>& counters,
double duration_in_sec, FILE* fp);
bool in_app_context_;
android::base::unique_fd stop_signal_fd_;
bool use_devfreq_counters_ = false;
+
+ // used to report event count for each thread
+ bool report_per_thread_ = false;
+ std::unordered_map<pid_t, ThreadInfo> thread_info_;
};
bool StatCommand::Run(const std::vector<std::string>& args) {
if (!CheckPerfEventLimit()) {
return false;
}
+ AllowMoreOpenedFiles();
// 1. Parse options, and use default measured event types if not given.
std::vector<std::string> workload_args;
}
bool need_to_check_targets = false;
if (system_wide_collection_) {
- event_selection_set_.AddMonitoredThreads({-1});
+ if (report_per_thread_) {
+ event_selection_set_.AddMonitoredProcesses(GetAllProcesses());
+ } else {
+ event_selection_set_.AddMonitoredThreads({-1});
+ }
} else if (!event_selection_set_.HasMonitoredTarget()) {
if (workload != nullptr) {
event_selection_set_.AddMonitoredProcesses({workload->GetPid()});
need_to_check_targets = true;
}
+ if (report_per_thread_) {
+ MonitorEachThread();
+ }
+
// 3. Open perf_event_files and output file if defined.
- if (!system_wide_collection_ && cpus_.empty()) {
- cpus_.push_back(-1); // Monitor on all cpus.
+ if (cpus_.empty() && (report_per_thread_ || !system_wide_collection_)) {
+ cpus_.push_back(-1); // Get event count for each thread on all cpus.
}
if (!event_selection_set_.OpenEventFiles(cpus_)) {
return false;
return false;
}
out_fd_.reset(fd);
+ } else if (args[i] == "--per-thread") {
+ report_per_thread_ = true;
} else if (args[i] == "-p") {
if (!NextArgumentOrError(args, &i)) {
return false;
event_selection_set_.SetInherit(child_inherit_);
}
+void StatCommand::MonitorEachThread() {
+ std::vector<pid_t> threads;
+ for (auto pid : event_selection_set_.GetMonitoredProcesses()) {
+ for (auto tid : GetThreadsInProcess(pid)) {
+ ThreadInfo info;
+ if (GetThreadName(tid, &info.name)) {
+ info.tid = tid;
+ info.pid = pid;
+ thread_info_[tid] = std::move(info);
+ threads.push_back(tid);
+ }
+ }
+ }
+ for (auto tid : event_selection_set_.GetMonitoredThreads()) {
+ ThreadInfo info;
+ if (ReadThreadNameAndPid(tid, &info.name, &info.pid)) {
+ info.tid = tid;
+ thread_info_[tid] = std::move(info);
+ threads.push_back(tid);
+ }
+ }
+ event_selection_set_.ClearMonitoredTargets();
+ event_selection_set_.AddMonitoredThreads(threads);
+}
+
bool StatCommand::ShowCounters(const std::vector<CountersInfo>& counters,
double duration_in_sec, FILE* fp) {
if (csv_) {
bool counters_always_available = true;
CounterSummaries summaries(csv_);
- for (size_t i = 0; i < counters.size(); ++i) {
- const CountersInfo& counters_info = counters[i];
- CounterSum sum;
- for (auto& counter_info : counters_info.counters) {
- sum.value += counter_info.counter.value;
- sum.time_enabled += counter_info.counter.time_enabled;
- sum.time_running += counter_info.counter.time_running;
- }
+ size_t last_sum_id = 0;
+
+ auto add_summary = [&](const CountersInfo& info, pid_t tid, CounterSum& sum) {
if (interval_only_values_) {
- if (last_sum_values_.size() < counters.size()) {
- last_sum_values_.resize(counters.size());
+ if (last_sum_values_.size() < last_sum_id + 1) {
+ last_sum_values_.resize(last_sum_id + 1);
}
CounterSum tmp = sum;
- sum.value -= last_sum_values_[i].value;
- sum.time_enabled -= last_sum_values_[i].time_enabled;
- sum.time_running -= last_sum_values_[i].time_running;
- last_sum_values_[i] = tmp;
+ sum.value -= last_sum_values_[last_sum_id].value;
+ sum.time_enabled -= last_sum_values_[last_sum_id].time_enabled;
+ sum.time_running -= last_sum_values_[last_sum_id].time_running;
+ last_sum_values_[last_sum_id++] = tmp;
}
double scale = 1.0;
if (sum.time_running < sum.time_enabled && sum.time_running != 0) {
scale = static_cast<double>(sum.time_enabled) / sum.time_running;
}
- summaries.Summaries().emplace_back(counters_info.event_name, counters_info.event_modifier,
- counters_info.group_id, sum.value, scale, false, csv_);
+ if (system_wide_collection_ && report_per_thread_ && sum.time_running == 0) {
+ // No need to report threads not running in system wide per thread report.
+ return;
+ }
+ ThreadInfo* thread = nullptr;
+ if (report_per_thread_) {
+ auto it = thread_info_.find(tid);
+ CHECK(it != thread_info_.end());
+ thread = &it->second;
+ }
+ summaries.Summaries().emplace_back(info.event_name, info.event_modifier, info.group_id,
+ thread, sum.value, scale, false, csv_);
counters_always_available &= summaries.Summaries().back().IsMonitoredAllTheTime();
+ };
+
+ for (const auto& counters_info : counters) {
+ CounterSum sum;
+ size_t prev_sum_count = summaries.Summaries().size();
+ for (size_t i = 0; i < counters_info.counters.size(); i++) {
+ auto& counter = counters_info.counters[i];
+ sum.value += counter.counter.value;
+ sum.time_enabled += counter.counter.time_enabled;
+ sum.time_running += counter.counter.time_running;
+ if (i + 1 == counters_info.counters.size() ||
+ (report_per_thread_ && counter.tid != counters_info.counters[i + 1].tid)) {
+ add_summary(counters_info, counter.tid, sum);
+ sum.value = 0;
+ sum.time_enabled = 0;
+ sum.time_running = 0;
+ }
+ }
+ if (report_per_thread_) {
+ // Show threads with bigger event counts first.
+ std::sort(
+ summaries.Summaries().begin() + prev_sum_count, summaries.Summaries().end(),
+ [](const CounterSummary& s1, const CounterSummary& s2) { return s1.count > s2.count; });
+ }
}
summaries.AutoGenerateSummaries();
summaries.GenerateComments(duration_in_sec);
## Table of Contents
-- [How simpleperf works](#how-simpleperf-works)
-- [Commands](#commands)
-- [The list command](#the-list-command)
-- [The stat command](#the-stat-command)
+- [Executable commands reference](#executable-commands-reference)
+ - [Table of Contents](#table-of-contents)
+ - [How simpleperf works](#how-simpleperf-works)
+ - [Commands](#commands)
+ - [The list command](#the-list-command)
+ - [The stat command](#the-stat-command)
- [Select events to stat](#select-events-to-stat)
- [Select target to stat](#select-target-to-stat)
- [Decide how long to stat](#decide-how-long-to-stat)
- [Decide the print interval](#decide-the-print-interval)
- [Display counters in systrace](#display-counters-in-systrace)
-- [The record command](#the-record-command)
+ - [Show event count per thread](#show-event-count-per-thread)
+ - [The record command](#the-record-command)
- [Select events to record](#select-events-to-record)
- [Select target to record](#select-target-to-record)
- [Set the frequency to record](#set-the-frequency-to-record)
- [Decide how long to record](#decide-how-long-to-record)
- [Set the path to store profiling data](#set-the-path-to-store-profiling-data)
- - [Record call graphs](#record-call-graphs)
+ - [Record call graphs](#record-call-graphs)
- [Record both on CPU time and off CPU time](#record-both-on-cpu-time-and-off-cpu-time)
-- [The report command](#the-report-command)
+ - [The report command](#the-report-command)
- [Set the path to read profiling data](#set-the-path-to-read-profiling-data)
- [Set the path to find binaries](#set-the-path-to-find-binaries)
- [Filter samples](#filter-samples)
- [Group samples into sample entries](#group-samples-into-sample-entries)
- - [Report call graphs](#report-call-graphs)
+ - [Report call graphs](#report-call-graphs)
## How simpleperf works
# Open the collected new.html in browser and perf counters will be shown up.
```
+### Show event count per thread
+
+By default, stat cmd outputs an event count sum for all monitored targets. But when `--per-thread`
+option is used, stat cmd outputs an event count for each thread in monitored targets. It can be
+used to find busy threads in a process or system wide. With `--per-thread` option, stat cmd opens
+a perf_event_file for each exisiting thread. If a monitored thread creates new threads, event
+count for new threads will be added to the monitored thread by default, otherwise omitted if
+`--no-inherit` option is also used.
+
+```sh
+# Print event counts for each thread in process 11904. Event counts for threads created after
+# stat cmd will be added to threads creating them.
+$ simpleperf stat --per-thread -p 11904 --duration 1
+
+# Print event counts for all threads running in the system every 1s. Threads not running will not
+# be reported.
+$ su 0 simpleperf stat --per-thread -a --interval 1000 --interval-only-values
+
+# Print event counts for all threads running in the system every 1s. Event counts for threads
+# created after stat cmd will be omitted.
+$ su 0 simpleperf stat --per-thread -a --interval 1000 --interval-only-values --no-inherit
+```
+
## The record command
The record command is used to dump samples of the profiled processes. Each sample can contain
return true;
}
-static std::map<pid_t, std::set<pid_t>> PrepareThreads(const std::set<pid_t>& processes,
- const std::set<pid_t>& threads) {
- std::map<pid_t, std::set<pid_t>> result;
+static std::set<pid_t> PrepareThreads(const std::set<pid_t>& processes,
+ const std::set<pid_t>& threads) {
+ std::set<pid_t> result = threads;
for (auto& pid : processes) {
std::vector<pid_t> tids = GetThreadsInProcess(pid);
- std::set<pid_t>& threads_in_process = result[pid];
- threads_in_process.insert(tids.begin(), tids.end());
- }
- for (auto& tid : threads) {
- // tid = -1 means monitoring all threads.
- if (tid == -1) {
- result[-1].insert(-1);
- } else {
- pid_t pid;
- if (GetProcessForThread(tid, &pid)) {
- result[pid].insert(tid);
- }
- }
+ result.insert(tids.begin(), tids.end());
}
return result;
}
-bool EventSelectionSet::OpenEventFiles(const std::vector<int>& on_cpus) {
- std::vector<int> cpus = on_cpus;
- if (!cpus.empty()) {
- // cpus = {-1} means open an event file for all cpus.
- if (!(cpus.size() == 1 && cpus[0] == -1) && !CheckIfCpusOnline(cpus)) {
+bool EventSelectionSet::OpenEventFiles(const std::vector<int>& cpus) {
+ std::vector<int> monitored_cpus;
+ if (cpus.empty()) {
+ monitored_cpus = GetOnlineCpus();
+ } else if (cpus.size() == 1 && cpus[0] == -1) {
+ monitored_cpus = {-1};
+ } else {
+ if (!CheckIfCpusOnline(cpus)) {
return false;
}
- } else {
- cpus = GetOnlineCpus();
+ monitored_cpus = cpus;
}
- std::map<pid_t, std::set<pid_t>> process_map = PrepareThreads(processes_, threads_);
+ std::set<pid_t> threads = PrepareThreads(processes_, threads_);
for (auto& group : groups_) {
- for (const auto& pair : process_map) {
- size_t success_count = 0;
- std::string failed_event_type;
- for (const auto& tid : pair.second) {
+ size_t success_count = 0;
+ std::string failed_event_type;
+ for (const auto tid : threads) {
+ const std::vector<int>* pcpus = &monitored_cpus;
+ if (!group[0].allowed_cpus.empty()) {
// override cpu list if event's PMU has a cpumask as those PMUs are
// agnostic to cpu and it's meaningless to specify cpus for them.
- auto& evsel = group[0];
- if (!evsel.allowed_cpus.empty()) cpus = evsel.allowed_cpus;
- for (const auto& cpu : cpus) {
- if (OpenEventFilesOnGroup(group, tid, cpu, &failed_event_type)) {
- success_count++;
- }
- }
+ pcpus = &group[0].allowed_cpus;
}
- // We can't guarantee to open perf event file successfully for each thread on each cpu.
- // Because threads may exit between PrepareThreads() and OpenEventFilesOnGroup(), and
- // cpus may be offlined between GetOnlineCpus() and OpenEventFilesOnGroup().
- // So we only check that we can at least monitor one thread for each process.
- if (success_count == 0) {
- PLOG(ERROR) << "failed to open perf event file for event_type " << failed_event_type
- << " for "
- << (pair.first == -1 ? "all threads"
- : "threads in process " + std::to_string(pair.first));
- return false;
+ for (const auto& cpu : *pcpus) {
+ if (OpenEventFilesOnGroup(group, tid, cpu, &failed_event_type)) {
+ success_count++;
+ }
}
}
+ // We can't guarantee to open perf event file successfully for each thread on each cpu.
+ // Because threads may exit between PrepareThreads() and OpenEventFilesOnGroup(), and
+ // cpus may be offlined between GetOnlineCpus() and OpenEventFilesOnGroup().
+ // So we only check that we can at least monitor one thread for each event group.
+ if (success_count == 0) {
+ PLOG(ERROR) << "failed to open perf event file for event_type " << failed_event_type;
+ return false;
+ }
}
return ApplyFilters();
}