2 * Copyright (C) 2015 The Android Open Source Project
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
21 #include <sys/prctl.h>
29 #include <android-base/file.h>
30 #include <android-base/logging.h>
31 #include <android-base/parsedouble.h>
32 #include <android-base/strings.h>
35 #include "environment.h"
36 #include "event_attr.h"
38 #include "event_selection_set.h"
39 #include "event_type.h"
40 #include "IOEventLoop.h"
46 static std::vector<std::string> default_measured_event_types{
47 "cpu-cycles", "stalled-cycles-frontend", "stalled-cycles-backend",
48 "instructions", "branch-instructions", "branch-misses",
49 "task-clock", "context-switches", "page-faults",
52 struct CounterSummary {
53 std::string type_name;
58 std::string readable_count;
62 CounterSummary(const std::string& type_name, const std::string& modifier,
63 uint32_t group_id, uint64_t count, double scale,
64 bool auto_generated, bool csv)
65 : type_name(type_name),
70 auto_generated(auto_generated) {
71 readable_count = ReadableCountValue(csv);
74 bool IsMonitoredAtTheSameTime(const CounterSummary& other) const {
75 // Two summaries are monitored at the same time if they are in the same
76 // group or are monitored all the time.
77 if (group_id == other.group_id) {
80 return IsMonitoredAllTheTime() && other.IsMonitoredAllTheTime();
83 std::string Name() const {
84 if (modifier.empty()) {
87 return type_name + ":" + modifier;
91 std::string ReadableCountValue(bool csv) {
92 if (type_name == "cpu-clock" || type_name == "task-clock") {
93 // Convert nanoseconds to milliseconds.
94 double value = count / 1e6;
95 return android::base::StringPrintf("%lf(ms)", value);
97 // Convert big numbers to human friendly mode. For example,
98 // 1000000 will be converted to 1,000,000.
99 std::string s = android::base::StringPrintf("%" PRIu64, count);
103 for (size_t i = s.size() - 1, j = 1; i > 0; --i, ++j) {
105 s.insert(s.begin() + i, ',');
114 bool IsMonitoredAllTheTime() const {
115 // If an event runs all the time it is enabled (by not sharing hardware
116 // counters with other events), the scale of its summary is usually within
117 // [1, 1 + 1e-5]. By setting SCALE_ERROR_LIMIT to 1e-5, We can identify
118 // events monitored all the time in most cases while keeping the report
119 // error rate <= 1e-5.
120 constexpr double SCALE_ERROR_LIMIT = 1e-5;
121 return (fabs(scale - 1.0) < SCALE_ERROR_LIMIT);
125 class CounterSummaries {
127 explicit CounterSummaries(bool csv) : csv_(csv) {}
128 void AddSummary(const CounterSummary& summary) {
129 summaries_.push_back(summary);
132 const CounterSummary* FindSummary(const std::string& type_name,
133 const std::string& modifier) {
134 for (const auto& s : summaries_) {
135 if (s.type_name == type_name && s.modifier == modifier) {
142 // If we have two summaries monitoring the same event type at the same time,
143 // that one is for user space only, and the other is for kernel space only;
144 // then we can automatically generate a summary combining the two results.
145 // For example, a summary of branch-misses:u and a summary for branch-misses:k
146 // can generate a summary of branch-misses.
147 void AutoGenerateSummaries() {
148 for (size_t i = 0; i < summaries_.size(); ++i) {
149 const CounterSummary& s = summaries_[i];
150 if (s.modifier == "u") {
151 const CounterSummary* other = FindSummary(s.type_name, "k");
152 if (other != nullptr && other->IsMonitoredAtTheSameTime(s)) {
153 if (FindSummary(s.type_name, "") == nullptr) {
154 AddSummary(CounterSummary(s.type_name, "", s.group_id,
155 s.count + other->count, s.scale, true,
163 void GenerateComments(double duration_in_sec) {
164 for (auto& s : summaries_) {
165 s.comment = GetCommentForSummary(s, duration_in_sec);
169 void Show(FILE* fp) {
170 size_t count_column_width = 0;
171 size_t name_column_width = 0;
172 size_t comment_column_width = 0;
173 for (auto& s : summaries_) {
175 std::max(count_column_width, s.readable_count.size());
176 name_column_width = std::max(name_column_width, s.Name().size());
177 comment_column_width = std::max(comment_column_width, s.comment.size());
180 for (auto& s : summaries_) {
182 fprintf(fp, "%s,%s,%s,(%.0lf%%)%s\n", s.readable_count.c_str(),
183 s.Name().c_str(), s.comment.c_str(), 1.0 / s.scale * 100,
184 (s.auto_generated ? " (generated)," : ","));
186 fprintf(fp, " %*s %-*s # %-*s (%.0lf%%)%s\n",
187 static_cast<int>(count_column_width), s.readable_count.c_str(),
188 static_cast<int>(name_column_width), s.Name().c_str(),
189 static_cast<int>(comment_column_width), s.comment.c_str(),
190 1.0 / s.scale * 100, (s.auto_generated ? " (generated)" : ""));
196 std::string GetCommentForSummary(const CounterSummary& s,
197 double duration_in_sec) {
204 if (s.type_name == "task-clock") {
205 double run_sec = s.count / 1e9;
206 double used_cpus = run_sec / (duration_in_sec / s.scale);
207 return android::base::StringPrintf("%lf%ccpus used", used_cpus, sap_mid);
209 if (s.type_name == "cpu-clock") {
212 if (s.type_name == "cpu-cycles") {
213 double hz = s.count / (duration_in_sec / s.scale);
214 return android::base::StringPrintf("%lf%cGHz", hz / 1e9, sap_mid);
216 if (s.type_name == "instructions" && s.count != 0) {
217 const CounterSummary* other = FindSummary("cpu-cycles", s.modifier);
218 if (other != nullptr && other->IsMonitoredAtTheSameTime(s)) {
219 double cpi = static_cast<double>(other->count) / s.count;
220 return android::base::StringPrintf("%lf%ccycles per instruction", cpi,
224 if (android::base::EndsWith(s.type_name, "-misses")) {
225 std::string other_name;
226 if (s.type_name == "cache-misses") {
227 other_name = "cache-references";
228 } else if (s.type_name == "branch-misses") {
229 other_name = "branch-instructions";
232 s.type_name.substr(0, s.type_name.size() - strlen("-misses")) + "s";
234 const CounterSummary* other = FindSummary(other_name, s.modifier);
235 if (other != nullptr && other->IsMonitoredAtTheSameTime(s) &&
237 double miss_rate = static_cast<double>(s.count) / other->count;
238 return android::base::StringPrintf("%lf%%%cmiss rate", miss_rate * 100,
242 if (android::base::EndsWith(s.type_name, "-refill")) {
243 std::string other_name = s.type_name.substr(0, s.type_name.size() - strlen("-refill"));
244 const CounterSummary* other = FindSummary(other_name, s.modifier);
245 if (other != nullptr && other->IsMonitoredAtTheSameTime(s) && other->count != 0) {
246 double miss_rate = static_cast<double>(s.count) / other->count;
247 return android::base::StringPrintf("%f%%%cmiss rate", miss_rate * 100, sap_mid);
250 double rate = s.count / (duration_in_sec / s.scale);
252 return android::base::StringPrintf("%.3lf%cG/sec", rate / 1e9, sap_mid);
255 return android::base::StringPrintf("%.3lf%cM/sec", rate / 1e6, sap_mid);
258 return android::base::StringPrintf("%.3lf%cK/sec", rate / 1e3, sap_mid);
260 return android::base::StringPrintf("%.3lf%c/sec", rate, sap_mid);
264 std::vector<CounterSummary> summaries_;
268 class StatCommand : public Command {
271 : Command("stat", "gather performance counter information",
273 "Usage: simpleperf stat [options] [command [command-args]]\n"
274 " Gather performance counter information of running [command].\n"
275 " And -a/-p/-t option can be used to change target of counter information.\n"
276 "-a Collect system-wide information.\n"
277 #if defined(__ANDROID__)
278 "--app package_name Profile the process of an Android application.\n"
279 " On non-rooted devices, the app must be debuggable,\n"
280 " because we use run-as to switch to the app's context.\n"
282 "--cpu cpu_item1,cpu_item2,...\n"
283 " Collect information only on the selected cpus. cpu_item can\n"
284 " be a cpu number like 1, or a cpu range like 0-3.\n"
285 "--csv Write report in comma separate form.\n"
286 "--duration time_in_sec Monitor for time_in_sec seconds instead of running\n"
287 " [command]. Here time_in_sec may be any positive\n"
288 " floating point number.\n"
289 "--interval time_in_ms Print stat for every time_in_ms milliseconds.\n"
290 " Here time_in_ms may be any positive floating point\n"
292 "-e event1[:modifier1],event2[:modifier2],...\n"
293 " Select the event list to count. Use `simpleperf list` to find\n"
294 " all possible event names. Modifiers can be added to define\n"
295 " how the event should be monitored. Possible modifiers are:\n"
296 " u - monitor user space events only\n"
297 " k - monitor kernel space events only\n"
298 "--group event1[:modifier],event2[:modifier2],...\n"
299 " Similar to -e option. But events specified in the same --group\n"
300 " option are monitored as a group, and scheduled in and out at the\n"
302 "--no-inherit Don't stat created child threads/processes.\n"
303 "-o output_filename Write report to output_filename instead of standard output.\n"
304 "-p pid1,pid2,... Stat events on existing processes. Mutually exclusive with -a.\n"
305 "-t tid1,tid2,... Stat events on existing threads. Mutually exclusive with -a.\n"
306 "--verbose Show result in verbose mode.\n"
308 // Below options are only used internally and shouldn't be visible to the public.
309 "--in-app We are already running in the app's context.\n"
310 "--tracepoint-events file_name Read tracepoint events from [file_name] instead of tracefs.\n"
314 verbose_mode_(false),
315 system_wide_collection_(false),
316 child_inherit_(true),
319 event_selection_set_(true),
321 in_app_context_(false) {
322 // Die if parent exits.
323 prctl(PR_SET_PDEATHSIG, SIGHUP, 0, 0, 0);
324 app_package_name_ = GetDefaultAppPackageName();
327 bool Run(const std::vector<std::string>& args);
330 bool ParseOptions(const std::vector<std::string>& args,
331 std::vector<std::string>* non_option_args);
332 bool AddDefaultMeasuredEventTypes();
333 void SetEventSelectionFlags();
334 bool ShowCounters(const std::vector<CountersInfo>& counters,
335 double duration_in_sec, FILE* fp);
338 bool system_wide_collection_;
340 double duration_in_sec_;
341 double interval_in_ms_;
342 std::vector<int> cpus_;
343 EventSelectionSet event_selection_set_;
344 std::string output_filename_;
346 std::string app_package_name_;
347 bool in_app_context_;
350 bool StatCommand::Run(const std::vector<std::string>& args) {
351 if (!CheckPerfEventLimit()) {
355 // 1. Parse options, and use default measured event types if not given.
356 std::vector<std::string> workload_args;
357 if (!ParseOptions(args, &workload_args)) {
360 if (!app_package_name_.empty() && !in_app_context_) {
362 return RunInAppContext(app_package_name_, "stat", args, workload_args.size(),
363 output_filename_, !event_selection_set_.GetTracepointEvents().empty());
366 if (event_selection_set_.empty()) {
367 if (!AddDefaultMeasuredEventTypes()) {
371 SetEventSelectionFlags();
373 // 2. Create workload.
374 std::unique_ptr<Workload> workload;
375 if (!workload_args.empty()) {
376 workload = Workload::CreateWorkload(workload_args);
377 if (workload == nullptr) {
381 bool need_to_check_targets = false;
382 if (system_wide_collection_) {
383 event_selection_set_.AddMonitoredThreads({-1});
384 } else if (!event_selection_set_.HasMonitoredTarget()) {
385 if (workload != nullptr) {
386 event_selection_set_.AddMonitoredProcesses({workload->GetPid()});
387 event_selection_set_.SetEnableOnExec(true);
388 } else if (!app_package_name_.empty()) {
389 int pid = WaitForAppProcess(app_package_name_);
390 event_selection_set_.AddMonitoredProcesses({pid});
393 << "No threads to monitor. Try `simpleperf help stat` for help\n";
397 need_to_check_targets = true;
400 // 3. Open perf_event_files and output file if defined.
401 if (!system_wide_collection_ && cpus_.empty()) {
402 cpus_.push_back(-1); // Monitor on all cpus.
404 if (!event_selection_set_.OpenEventFiles(cpus_)) {
407 std::unique_ptr<FILE, decltype(&fclose)> fp_holder(nullptr, fclose);
409 if (!output_filename_.empty()) {
410 fp_holder.reset(fopen(output_filename_.c_str(), "w"));
411 if (fp_holder == nullptr) {
412 PLOG(ERROR) << "failed to open " << output_filename_;
415 fp = fp_holder.get();
418 // 4. Add signal/periodic Events.
419 std::chrono::time_point<std::chrono::steady_clock> start_time;
420 std::vector<CountersInfo> counters;
421 if (system_wide_collection_ || (!cpus_.empty() && cpus_[0] != -1)) {
422 if (!event_selection_set_.HandleCpuHotplugEvents(cpus_)) {
426 if (need_to_check_targets && !event_selection_set_.StopWhenNoMoreTargets()) {
429 IOEventLoop* loop = event_selection_set_.GetIOEventLoop();
430 if (!loop->AddSignalEvents({SIGCHLD, SIGINT, SIGTERM, SIGHUP},
431 [&]() { return loop->ExitLoop(); })) {
434 if (duration_in_sec_ != 0) {
435 if (!loop->AddPeriodicEvent(SecondToTimeval(duration_in_sec_),
436 [&]() { return loop->ExitLoop(); })) {
440 auto print_counters = [&]() {
441 auto end_time = std::chrono::steady_clock::now();
442 if (!event_selection_set_.ReadCounters(&counters)) {
445 double duration_in_sec =
446 std::chrono::duration_cast<std::chrono::duration<double>>(end_time -
449 if (!ShowCounters(counters, duration_in_sec, fp)) {
455 if (interval_in_ms_ != 0) {
456 if (!loop->AddPeriodicEvent(SecondToTimeval(interval_in_ms_ / 1000.0),
462 // 5. Count events while workload running.
463 start_time = std::chrono::steady_clock::now();
464 if (workload != nullptr && !workload->Start()) {
467 if (!loop->RunLoop()) {
471 // 6. Read and print counters.
473 return print_counters();
476 bool StatCommand::ParseOptions(const std::vector<std::string>& args,
477 std::vector<std::string>* non_option_args) {
478 std::set<pid_t> tid_set;
480 for (i = 0; i < args.size() && args[i].size() > 0 && args[i][0] == '-'; ++i) {
481 if (args[i] == "-a") {
482 system_wide_collection_ = true;
483 } else if (args[i] == "--app") {
484 if (!NextArgumentOrError(args, &i)) {
487 app_package_name_ = args[i];
488 } else if (args[i] == "--cpu") {
489 if (!NextArgumentOrError(args, &i)) {
492 cpus_ = GetCpusFromString(args[i]);
493 } else if (args[i] == "--csv") {
495 } else if (args[i] == "--duration") {
496 if (!NextArgumentOrError(args, &i)) {
499 if (!android::base::ParseDouble(args[i].c_str(), &duration_in_sec_,
501 LOG(ERROR) << "Invalid duration: " << args[i].c_str();
504 } else if (args[i] == "--interval") {
505 if (!NextArgumentOrError(args, &i)) {
508 if (!android::base::ParseDouble(args[i].c_str(), &interval_in_ms_,
510 LOG(ERROR) << "Invalid interval: " << args[i].c_str();
513 } else if (args[i] == "-e") {
514 if (!NextArgumentOrError(args, &i)) {
517 std::vector<std::string> event_types = android::base::Split(args[i], ",");
518 for (auto& event_type : event_types) {
519 if (!event_selection_set_.AddEventType(event_type)) {
523 } else if (args[i] == "--group") {
524 if (!NextArgumentOrError(args, &i)) {
527 std::vector<std::string> event_types = android::base::Split(args[i], ",");
528 if (!event_selection_set_.AddEventGroup(event_types)) {
531 } else if (args[i] == "--in-app") {
532 in_app_context_ = true;
533 } else if (args[i] == "--no-inherit") {
534 child_inherit_ = false;
535 } else if (args[i] == "-o") {
536 if (!NextArgumentOrError(args, &i)) {
539 output_filename_ = args[i];
540 } else if (args[i] == "-p") {
541 if (!NextArgumentOrError(args, &i)) {
544 std::set<pid_t> pids;
545 if (!GetValidThreadsFromThreadString(args[i], &pids)) {
548 event_selection_set_.AddMonitoredProcesses(pids);
549 } else if (args[i] == "-t") {
550 if (!NextArgumentOrError(args, &i)) {
553 std::set<pid_t> tids;
554 if (!GetValidThreadsFromThreadString(args[i], &tids)) {
557 event_selection_set_.AddMonitoredThreads(tids);
558 } else if (args[i] == "--tracepoint-events") {
559 if (!NextArgumentOrError(args, &i)) {
562 if (!SetTracepointEventsFilePath(args[i])) {
565 } else if (args[i] == "--verbose") {
566 verbose_mode_ = true;
568 ReportUnknownOption(args, i);
573 if (system_wide_collection_ && event_selection_set_.HasMonitoredTarget()) {
574 LOG(ERROR) << "Stat system wide and existing processes/threads can't be "
575 "used at the same time.";
578 if (system_wide_collection_ && !IsRoot()) {
579 LOG(ERROR) << "System wide profiling needs root privilege.";
583 non_option_args->clear();
584 for (; i < args.size(); ++i) {
585 non_option_args->push_back(args[i]);
590 bool StatCommand::AddDefaultMeasuredEventTypes() {
591 for (auto& name : default_measured_event_types) {
592 // It is not an error when some event types in the default list are not
593 // supported by the kernel.
594 const EventType* type = FindEventTypeByName(name);
595 if (type != nullptr &&
596 IsEventAttrSupported(CreateDefaultPerfEventAttr(*type))) {
597 if (!event_selection_set_.AddEventType(name)) {
602 if (event_selection_set_.empty()) {
603 LOG(ERROR) << "Failed to add any supported default measured types";
609 void StatCommand::SetEventSelectionFlags() {
610 event_selection_set_.SetInherit(child_inherit_);
613 bool StatCommand::ShowCounters(const std::vector<CountersInfo>& counters,
614 double duration_in_sec, FILE* fp) {
616 fprintf(fp, "Performance counter statistics,\n");
618 fprintf(fp, "Performance counter statistics:\n\n");
622 for (auto& counters_info : counters) {
623 for (auto& counter_info : counters_info.counters) {
625 fprintf(fp, "%s,tid,%d,cpu,%d,count,%" PRIu64 ",time_enabled,%" PRIu64
626 ",time running,%" PRIu64 ",id,%" PRIu64 ",\n",
627 counters_info.event_name.c_str(), counter_info.tid,
628 counter_info.cpu, counter_info.counter.value,
629 counter_info.counter.time_enabled,
630 counter_info.counter.time_running, counter_info.counter.id);
633 "%s(tid %d, cpu %d): count %" PRIu64 ", time_enabled %" PRIu64
634 ", time running %" PRIu64 ", id %" PRIu64 "\n",
635 counters_info.event_name.c_str(), counter_info.tid,
636 counter_info.cpu, counter_info.counter.value,
637 counter_info.counter.time_enabled,
638 counter_info.counter.time_running, counter_info.counter.id);
644 CounterSummaries summaries(csv_);
645 for (auto& counters_info : counters) {
646 uint64_t value_sum = 0;
647 uint64_t time_enabled_sum = 0;
648 uint64_t time_running_sum = 0;
649 for (auto& counter_info : counters_info.counters) {
650 value_sum += counter_info.counter.value;
651 time_enabled_sum += counter_info.counter.time_enabled;
652 time_running_sum += counter_info.counter.time_running;
655 if (time_running_sum < time_enabled_sum && time_running_sum != 0) {
656 scale = static_cast<double>(time_enabled_sum) / time_running_sum;
658 summaries.AddSummary(
659 CounterSummary(counters_info.event_name, counters_info.event_modifier,
660 counters_info.group_id, value_sum, scale, false, csv_));
662 summaries.AutoGenerateSummaries();
663 summaries.GenerateComments(duration_in_sec);
667 fprintf(fp, "Total test time,%lf,seconds,\n", duration_in_sec);
669 fprintf(fp, "\nTotal test time: %lf seconds.\n", duration_in_sec);
675 void RegisterStatCommand() {
676 RegisterCommand("stat",
677 [] { return std::unique_ptr<Command>(new StatCommand); });