2 * Copyright (C) 2015 The Android Open Source Project
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
19 #include <sys/prctl.h>
20 #include <sys/utsname.h>
24 #include <unordered_map>
27 #include <android-base/logging.h>
28 #include <android-base/file.h>
29 #include <android-base/parsedouble.h>
30 #include <android-base/parseint.h>
31 #include <android-base/strings.h>
32 #include <android-base/test_utils.h>
35 #include "dwarf_unwind.h"
36 #include "environment.h"
37 #include "event_selection_set.h"
38 #include "event_type.h"
39 #include "IOEventLoop.h"
40 #include "perf_clock.h"
44 #include "record_file.h"
45 #include "thread_tree.h"
50 static std::string default_measured_event_type = "cpu-cycles";
52 static std::unordered_map<std::string, uint64_t> branch_sampling_type_map = {
53 {"u", PERF_SAMPLE_BRANCH_USER},
54 {"k", PERF_SAMPLE_BRANCH_KERNEL},
55 {"any", PERF_SAMPLE_BRANCH_ANY},
56 {"any_call", PERF_SAMPLE_BRANCH_ANY_CALL},
57 {"any_ret", PERF_SAMPLE_BRANCH_ANY_RETURN},
58 {"ind_call", PERF_SAMPLE_BRANCH_IND_CALL},
61 // The max size of records dumped by kernel is 65535, and dump stack size
62 // should be a multiply of 8, so MAX_DUMP_STACK_SIZE is 65528.
63 constexpr uint32_t MAX_DUMP_STACK_SIZE = 65528;
65 // The max allowed pages in mapped buffer is decided by rlimit(RLIMIT_MEMLOCK).
66 // Here 1024 is a desired value for pages in mapped buffer. If mapped
67 // successfully, the buffer size = 1024 * 4K (page size) = 4M.
68 constexpr size_t DESIRED_PAGES_IN_MAPPED_BUFFER = 1024;
70 class RecordCommand : public Command {
74 "record", "record sampling info in perf.data",
76 "Usage: simpleperf record [options] [command [command-args]]\n"
77 " Gather sampling information of running [command]. And -a/-p/-t option\n"
78 " can be used to change target of sampling information.\n"
79 " The default options are: -e cpu-cycles -f 4000 -o perf.data.\n"
80 "-a System-wide collection.\n"
81 #if defined(__ANDROID__)
82 "--app package_name Profile the process of an Android application.\n"
83 " On non-rooted devices, the app must be debuggable,\n"
84 " because we use run-as to switch to the app's context.\n"
86 "-b Enable take branch stack sampling. Same as '-j any'\n"
87 "-c count Set event sample period. It means recording one sample when\n"
88 " [count] events happen. Can't be used with -f/-F option.\n"
89 " For tracepoint events, the default option is -c 1.\n"
90 "--call-graph fp | dwarf[,<dump_stack_size>]\n"
91 " Enable call graph recording. Use frame pointer or dwarf debug\n"
92 " frame as the method to parse call graph in stack.\n"
93 " Default is dwarf,65528.\n"
94 "--cpu cpu_item1,cpu_item2,...\n"
95 " Collect samples only on the selected cpus. cpu_item can be cpu\n"
96 " number like 1, or cpu range like 0-3.\n"
97 "--duration time_in_sec Monitor for time_in_sec seconds instead of running\n"
98 " [command]. Here time_in_sec may be any positive\n"
99 " floating point number.\n"
100 "-e event1[:modifier1],event2[:modifier2],...\n"
101 " Select the event list to sample. Use `simpleperf list` to find\n"
102 " all possible event names. Modifiers can be added to define how\n"
103 " the event should be monitored.\n"
104 " Possible modifiers are:\n"
105 " u - monitor user space events only\n"
106 " k - monitor kernel space events only\n"
107 "-f freq Set event sample frequency. It means recording at most [freq]\n"
108 " samples every second. For non-tracepoint events, the default\n"
109 " option is -f 4000.\n"
110 "-F freq Same as '-f freq'.\n"
111 "-g Same as '--call-graph dwarf'.\n"
112 "--group event1[:modifier],event2[:modifier2],...\n"
113 " Similar to -e option. But events specified in the same --group\n"
114 " option are monitored as a group, and scheduled in and out at the\n"
116 "-j branch_filter1,branch_filter2,...\n"
117 " Enable taken branch stack sampling. Each sample captures a series\n"
118 " of consecutive taken branches.\n"
119 " The following filters are defined:\n"
120 " any: any type of branch\n"
121 " any_call: any function call or system call\n"
122 " any_ret: any function return or system call return\n"
123 " ind_call: any indirect branch\n"
124 " u: only when the branch target is at the user level\n"
125 " k: only when the branch target is in the kernel\n"
126 " This option requires at least one branch type among any, any_call,\n"
127 " any_ret, ind_call.\n"
128 "-m mmap_pages Set the size of the buffer used to receiving sample data from\n"
129 " the kernel. It should be a power of 2. If not set, the max\n"
130 " possible value <= 1024 will be used.\n"
131 "--no-dump-kernel-symbols Don't dump kernel symbols in perf.data. By default\n"
132 " kernel symbols will be dumped when needed.\n"
133 "--no-dump-symbols Don't dump symbols in perf.data. By default symbols are\n"
134 " dumped in perf.data, to support reporting in another\n"
136 "--no-inherit Don't record created child threads/processes.\n"
137 "--no-unwind If `--call-graph dwarf` option is used, then the user's stack\n"
138 " will be unwound by default. Use this option to disable the\n"
139 " unwinding of the user's stack.\n"
140 "-o record_file_name Set record file name, default is perf.data.\n"
141 "-p pid1,pid2,... Record events on existing processes. Mutually exclusive\n"
143 "--post-unwind If `--call-graph dwarf` option is used, then the user's stack\n"
144 " will be unwound while recording by default. But it may lose\n"
145 " records as stacking unwinding can be time consuming. Use this\n"
146 " option to unwind the user's stack after recording.\n"
147 "--start_profiling_fd fd_no After starting profiling, write \"STARTED\" to\n"
148 " <fd_no>, then close <fd_no>.\n"
149 "--symfs <dir> Look for files with symbols relative to this directory.\n"
150 " This option is used to provide files with symbol table and\n"
151 " debug information, which are used for unwinding and dumping symbols.\n"
152 "-t tid1,tid2,... Record events on existing threads. Mutually exclusive with -a.\n"
154 // Below options are only used internally and shouldn't be visible to the public.
155 "--in-app We are already running in the app's context.\n"
159 use_sample_freq_(false),
161 use_sample_period_(false),
163 system_wide_collection_(false),
165 fp_callchain_sampling_(false),
166 dwarf_callchain_sampling_(false),
167 dump_stack_size_in_dwarf_sampling_(MAX_DUMP_STACK_SIZE),
168 unwind_dwarf_callchain_(true),
170 child_inherit_(true),
172 can_dump_kernel_symbols_(true),
174 event_selection_set_(false),
175 mmap_page_range_(std::make_pair(1, DESIRED_PAGES_IN_MAPPED_BUFFER)),
176 record_filename_("perf.data"),
177 start_sampling_time_in_ns_(0),
178 sample_record_count_(0),
179 lost_record_count_(0),
180 start_profiling_fd_(-1),
181 in_app_context_(false) {
182 // Stop profiling if parent exits.
183 prctl(PR_SET_PDEATHSIG, SIGHUP, 0, 0, 0);
184 app_package_name_ = GetDefaultAppPackageName();
187 bool Run(const std::vector<std::string>& args);
190 bool ParseOptions(const std::vector<std::string>& args,
191 std::vector<std::string>* non_option_args);
192 bool SetEventSelectionFlags();
193 bool CreateAndInitRecordFile();
194 std::unique_ptr<RecordFileWriter> CreateRecordFile(
195 const std::string& filename);
196 bool DumpKernelSymbol();
197 bool DumpTracingData();
198 bool DumpKernelAndModuleMmaps(const perf_event_attr& attr, uint64_t event_id);
199 bool DumpThreadCommAndMmaps(const perf_event_attr& attr, uint64_t event_id);
200 bool ProcessRecord(Record* record);
201 void UpdateRecordForEmbeddedElfPath(Record* record);
202 bool UnwindRecord(Record* record);
203 bool PostUnwind(const std::vector<std::string>& args);
204 bool DumpAdditionalFeatures(const std::vector<std::string>& args);
205 bool DumpBuildIdFeature();
206 bool DumpFileFeature();
207 void CollectHitFileInfo(const SampleRecord& r);
209 bool use_sample_freq_;
210 uint64_t sample_freq_; // Sample 'sample_freq_' times per second.
211 bool use_sample_period_;
212 uint64_t sample_period_; // Sample once when 'sample_period_' events occur.
214 bool system_wide_collection_;
215 uint64_t branch_sampling_;
216 bool fp_callchain_sampling_;
217 bool dwarf_callchain_sampling_;
218 uint32_t dump_stack_size_in_dwarf_sampling_;
219 bool unwind_dwarf_callchain_;
222 double duration_in_sec_;
223 bool can_dump_kernel_symbols_;
225 std::vector<int> cpus_;
226 EventSelectionSet event_selection_set_;
228 std::pair<size_t, size_t> mmap_page_range_;
230 ThreadTree thread_tree_;
231 std::string record_filename_;
232 std::unique_ptr<RecordFileWriter> record_file_writer_;
234 uint64_t start_sampling_time_in_ns_; // nanoseconds from machine starting
236 uint64_t sample_record_count_;
237 uint64_t lost_record_count_;
238 int start_profiling_fd_;
239 std::string app_package_name_;
240 bool in_app_context_;
243 bool RecordCommand::Run(const std::vector<std::string>& args) {
244 // 0. Do some environment preparation.
245 if (!CheckPerfEventLimit()) {
248 if (!InitPerfClock()) {
253 // 1. Parse options, and use default measured event type if not given.
254 std::vector<std::string> workload_args;
255 if (!ParseOptions(args, &workload_args)) {
258 if (!app_package_name_.empty() && !in_app_context_) {
259 // Some users want to profile non debuggable apps on rooted devices. If we use run-as,
260 // it will be impossible when using --app. So don't switch to app's context when we are
263 return RunInAppContext(app_package_name_, "record", args, workload_args.size(),
267 if (event_selection_set_.empty()) {
268 if (!event_selection_set_.AddEventType(default_measured_event_type)) {
272 if (!SetEventSelectionFlags()) {
275 ScopedCurrentArch scoped_arch(GetMachineArch());
277 // 2. Create workload.
278 std::unique_ptr<Workload> workload;
279 if (!workload_args.empty()) {
280 workload = Workload::CreateWorkload(workload_args);
281 if (workload == nullptr) {
285 bool need_to_check_targets = false;
286 if (system_wide_collection_) {
287 event_selection_set_.AddMonitoredThreads({-1});
288 } else if (!event_selection_set_.HasMonitoredTarget()) {
289 if (workload != nullptr) {
290 event_selection_set_.AddMonitoredProcesses({workload->GetPid()});
291 event_selection_set_.SetEnableOnExec(true);
292 if (event_selection_set_.HasInplaceSampler()) {
293 // Start worker early, because the worker process has to setup inplace-sampler server
294 // before we try to connect it.
295 if (!workload->Start()) {
299 } else if (!app_package_name_.empty()) {
300 // If app process is not created, wait for it. This allows simpleperf starts before
301 // app process. In this way, we can have a better support of app start-up time profiling.
302 int pid = WaitForAppProcess(app_package_name_);
303 event_selection_set_.AddMonitoredProcesses({pid});
306 << "No threads to monitor. Try `simpleperf help record` for help";
310 need_to_check_targets = true;
313 // 3. Open perf_event_files, create mapped buffers for perf_event_files.
314 if (!event_selection_set_.OpenEventFiles(cpus_)) {
317 if (!event_selection_set_.MmapEventFiles(mmap_page_range_.first,
318 mmap_page_range_.second)) {
322 // 4. Create perf.data.
323 if (!CreateAndInitRecordFile()) {
327 // 5. Add read/signal/periodic Events.
329 std::bind(&RecordCommand::ProcessRecord, this, std::placeholders::_1);
330 if (!event_selection_set_.PrepareToReadMmapEventData(callback)) {
333 if (!event_selection_set_.HandleCpuHotplugEvents(cpus_)) {
336 if (need_to_check_targets && !event_selection_set_.StopWhenNoMoreTargets()) {
339 IOEventLoop* loop = event_selection_set_.GetIOEventLoop();
340 if (!loop->AddSignalEvents({SIGCHLD, SIGINT, SIGTERM, SIGHUP},
341 [&]() { return loop->ExitLoop(); })) {
344 if (duration_in_sec_ != 0) {
345 if (!loop->AddPeriodicEvent(SecondToTimeval(duration_in_sec_),
346 [&]() { return loop->ExitLoop(); })) {
351 // 6. Write records in mapped buffers of perf_event_files to output file while
352 // workload is running.
353 start_sampling_time_in_ns_ = GetPerfClock();
354 LOG(VERBOSE) << "start_sampling_time is " << start_sampling_time_in_ns_
356 if (workload != nullptr && !workload->IsStarted() && !workload->Start()) {
359 if (start_profiling_fd_ != -1) {
360 if (!android::base::WriteStringToFd("STARTED", start_profiling_fd_)) {
361 PLOG(ERROR) << "failed to write to start_profiling_fd_";
363 close(start_profiling_fd_);
365 if (!loop->RunLoop()) {
368 if (!event_selection_set_.FinishReadMmapEventData()) {
372 // 7. Dump additional features, and close record file.
373 if (!DumpAdditionalFeatures(args)) {
376 if (!record_file_writer_->Close()) {
380 // 8. Unwind dwarf callchain.
382 if (!PostUnwind(args)) {
387 // 9. Show brief record result.
388 LOG(INFO) << "Samples recorded: " << sample_record_count_
389 << ". Samples lost: " << lost_record_count_ << ".";
390 if (sample_record_count_ + lost_record_count_ != 0) {
391 double lost_percent = static_cast<double>(lost_record_count_) /
392 (lost_record_count_ + sample_record_count_);
393 constexpr double LOST_PERCENT_WARNING_BAR = 0.1;
394 if (lost_percent >= LOST_PERCENT_WARNING_BAR) {
395 LOG(WARNING) << "Lost " << (lost_percent * 100) << "% of samples, "
396 << "consider increasing mmap_pages(-m), "
397 << "or decreasing sample frequency(-f), "
398 << "or increasing sample period(-c).";
404 bool RecordCommand::ParseOptions(const std::vector<std::string>& args,
405 std::vector<std::string>* non_option_args) {
407 for (i = 0; i < args.size() && !args[i].empty() && args[i][0] == '-'; ++i) {
408 if (args[i] == "-a") {
409 system_wide_collection_ = true;
410 } else if (args[i] == "--app") {
411 if (!NextArgumentOrError(args, &i)) {
414 app_package_name_ = args[i];
415 } else if (args[i] == "-b") {
416 branch_sampling_ = branch_sampling_type_map["any"];
417 } else if (args[i] == "-c") {
418 if (!NextArgumentOrError(args, &i)) {
422 sample_period_ = strtoull(args[i].c_str(), &endptr, 0);
423 if (*endptr != '\0' || sample_period_ == 0) {
424 LOG(ERROR) << "Invalid sample period: '" << args[i] << "'";
427 use_sample_period_ = true;
428 } else if (args[i] == "--call-graph") {
429 if (!NextArgumentOrError(args, &i)) {
432 std::vector<std::string> strs = android::base::Split(args[i], ",");
433 if (strs[0] == "fp") {
434 fp_callchain_sampling_ = true;
435 dwarf_callchain_sampling_ = false;
436 } else if (strs[0] == "dwarf") {
437 fp_callchain_sampling_ = false;
438 dwarf_callchain_sampling_ = true;
439 if (strs.size() > 1) {
441 uint64_t size = strtoull(strs[1].c_str(), &endptr, 0);
442 if (*endptr != '\0' || size > UINT_MAX) {
443 LOG(ERROR) << "invalid dump stack size in --call-graph option: "
447 if ((size & 7) != 0) {
448 LOG(ERROR) << "dump stack size " << size
449 << " is not 8-byte aligned.";
452 if (size >= MAX_DUMP_STACK_SIZE) {
453 LOG(ERROR) << "dump stack size " << size
454 << " is bigger than max allowed size "
455 << MAX_DUMP_STACK_SIZE << ".";
458 dump_stack_size_in_dwarf_sampling_ = static_cast<uint32_t>(size);
461 LOG(ERROR) << "unexpected argument for --call-graph option: "
465 } else if (args[i] == "--cpu") {
466 if (!NextArgumentOrError(args, &i)) {
469 cpus_ = GetCpusFromString(args[i]);
470 } else if (args[i] == "--duration") {
471 if (!NextArgumentOrError(args, &i)) {
474 if (!android::base::ParseDouble(args[i].c_str(), &duration_in_sec_,
476 LOG(ERROR) << "Invalid duration: " << args[i].c_str();
479 } else if (args[i] == "-e") {
480 if (!NextArgumentOrError(args, &i)) {
483 std::vector<std::string> event_types = android::base::Split(args[i], ",");
484 for (auto& event_type : event_types) {
485 if (!event_selection_set_.AddEventType(event_type)) {
489 } else if (args[i] == "-f" || args[i] == "-F") {
490 if (!NextArgumentOrError(args, &i)) {
493 if (!android::base::ParseUint(args[i].c_str(), &sample_freq_)) {
494 LOG(ERROR) << "Invalid sample frequency: " << args[i];
497 if (!CheckSampleFrequency(sample_freq_)) {
500 use_sample_freq_ = true;
501 } else if (args[i] == "-g") {
502 fp_callchain_sampling_ = false;
503 dwarf_callchain_sampling_ = true;
504 } else if (args[i] == "--group") {
505 if (!NextArgumentOrError(args, &i)) {
508 std::vector<std::string> event_types = android::base::Split(args[i], ",");
509 if (!event_selection_set_.AddEventGroup(event_types)) {
512 } else if (args[i] == "--in-app") {
513 in_app_context_ = true;
514 } else if (args[i] == "-j") {
515 if (!NextArgumentOrError(args, &i)) {
518 std::vector<std::string> branch_sampling_types =
519 android::base::Split(args[i], ",");
520 for (auto& type : branch_sampling_types) {
521 auto it = branch_sampling_type_map.find(type);
522 if (it == branch_sampling_type_map.end()) {
523 LOG(ERROR) << "unrecognized branch sampling filter: " << type;
526 branch_sampling_ |= it->second;
528 } else if (args[i] == "-m") {
529 if (!NextArgumentOrError(args, &i)) {
533 uint64_t pages = strtoull(args[i].c_str(), &endptr, 0);
534 if (*endptr != '\0' || !IsPowerOfTwo(pages)) {
535 LOG(ERROR) << "Invalid mmap_pages: '" << args[i] << "'";
538 mmap_page_range_.first = mmap_page_range_.second = pages;
539 } else if (args[i] == "--no-dump-kernel-symbols") {
540 can_dump_kernel_symbols_ = false;
541 } else if (args[i] == "--no-dump-symbols") {
542 dump_symbols_ = false;
543 } else if (args[i] == "--no-inherit") {
544 child_inherit_ = false;
545 } else if (args[i] == "--no-unwind") {
546 unwind_dwarf_callchain_ = false;
547 } else if (args[i] == "-o") {
548 if (!NextArgumentOrError(args, &i)) {
551 record_filename_ = args[i];
552 } else if (args[i] == "-p") {
553 if (!NextArgumentOrError(args, &i)) {
556 std::set<pid_t> pids;
557 if (!GetValidThreadsFromThreadString(args[i], &pids)) {
560 event_selection_set_.AddMonitoredProcesses(pids);
561 } else if (args[i] == "--post-unwind") {
563 } else if (args[i] == "--start_profiling_fd") {
564 if (!NextArgumentOrError(args, &i)) {
567 if (!android::base::ParseInt(args[i].c_str(), &start_profiling_fd_, 0)) {
568 LOG(ERROR) << "Invalid start_profiling_fd: " << args[i];
571 } else if (args[i] == "--symfs") {
572 if (!NextArgumentOrError(args, &i)) {
575 if (!Dso::SetSymFsDir(args[i])) {
578 } else if (args[i] == "-t") {
579 if (!NextArgumentOrError(args, &i)) {
582 std::set<pid_t> tids;
583 if (!GetValidThreadsFromThreadString(args[i], &tids)) {
586 event_selection_set_.AddMonitoredThreads(tids);
588 ReportUnknownOption(args, i);
593 if (use_sample_freq_ && use_sample_period_) {
594 LOG(ERROR) << "-f option can't be used with -c option.";
598 if (!dwarf_callchain_sampling_) {
599 if (!unwind_dwarf_callchain_) {
601 << "--no-unwind is only used with `--call-graph dwarf` option.";
604 unwind_dwarf_callchain_ = false;
607 if (!dwarf_callchain_sampling_) {
609 << "--post-unwind is only used with `--call-graph dwarf` option.";
612 if (!unwind_dwarf_callchain_) {
613 LOG(ERROR) << "--post-unwind can't be used with `--no-unwind` option.";
618 if (fp_callchain_sampling_) {
619 if (GetBuildArch() == ARCH_ARM) {
620 LOG(WARNING) << "`--callgraph fp` option doesn't work well on arm architecture, "
621 << "consider using `-g` option or profiling on aarch64 architecture.";
625 if (system_wide_collection_ && event_selection_set_.HasMonitoredTarget()) {
626 LOG(ERROR) << "Record system wide and existing processes/threads can't be "
627 "used at the same time.";
631 if (system_wide_collection_ && !IsRoot()) {
632 LOG(ERROR) << "System wide profiling needs root privilege.";
636 if (dump_symbols_ && can_dump_kernel_symbols_) {
637 // No need to dump kernel symbols as we will dump all required symbols.
638 can_dump_kernel_symbols_ = false;
641 non_option_args->clear();
642 for (; i < args.size(); ++i) {
643 non_option_args->push_back(args[i]);
648 bool RecordCommand::SetEventSelectionFlags() {
649 if (use_sample_freq_) {
650 event_selection_set_.SetSampleFreq(sample_freq_);
651 } else if (use_sample_period_) {
652 event_selection_set_.SetSamplePeriod(sample_period_);
654 event_selection_set_.UseDefaultSampleFreq();
656 event_selection_set_.SampleIdAll();
657 if (!event_selection_set_.SetBranchSampling(branch_sampling_)) {
660 if (fp_callchain_sampling_) {
661 event_selection_set_.EnableFpCallChainSampling();
662 } else if (dwarf_callchain_sampling_) {
663 if (!event_selection_set_.EnableDwarfCallChainSampling(
664 dump_stack_size_in_dwarf_sampling_)) {
668 event_selection_set_.SetInherit(child_inherit_);
672 bool RecordCommand::CreateAndInitRecordFile() {
673 record_file_writer_ = CreateRecordFile(record_filename_);
674 if (record_file_writer_ == nullptr) {
677 // Use first perf_event_attr and first event id to dump mmap and comm records.
678 EventAttrWithId attr_id = event_selection_set_.GetEventAttrWithId()[0];
679 if (!DumpKernelSymbol()) {
682 if (!DumpTracingData()) {
685 if (!DumpKernelAndModuleMmaps(*attr_id.attr, attr_id.ids[0])) {
688 if (!DumpThreadCommAndMmaps(*attr_id.attr, attr_id.ids[0])) {
694 std::unique_ptr<RecordFileWriter> RecordCommand::CreateRecordFile(
695 const std::string& filename) {
696 std::unique_ptr<RecordFileWriter> writer =
697 RecordFileWriter::CreateInstance(filename);
698 if (writer == nullptr) {
702 if (!writer->WriteAttrSection(event_selection_set_.GetEventAttrWithId())) {
708 bool RecordCommand::DumpKernelSymbol() {
709 if (can_dump_kernel_symbols_) {
710 std::string kallsyms;
711 if (event_selection_set_.NeedKernelSymbol() &&
712 CheckKernelSymbolAddresses()) {
713 if (!android::base::ReadFileToString("/proc/kallsyms", &kallsyms)) {
714 PLOG(ERROR) << "failed to read /proc/kallsyms";
717 KernelSymbolRecord r(kallsyms);
718 if (!ProcessRecord(&r)) {
726 bool RecordCommand::DumpTracingData() {
727 std::vector<const EventType*> tracepoint_event_types =
728 event_selection_set_.GetTracepointEvents();
729 if (tracepoint_event_types.empty()) {
730 return true; // No need to dump tracing data.
732 std::vector<char> tracing_data;
733 if (!GetTracingData(tracepoint_event_types, &tracing_data)) {
736 TracingDataRecord record(tracing_data);
737 if (!ProcessRecord(&record)) {
743 bool RecordCommand::DumpKernelAndModuleMmaps(const perf_event_attr& attr,
745 KernelMmap kernel_mmap;
746 std::vector<KernelMmap> module_mmaps;
747 GetKernelAndModuleMmaps(&kernel_mmap, &module_mmaps);
749 MmapRecord mmap_record(attr, true, UINT_MAX, 0, kernel_mmap.start_addr,
750 kernel_mmap.len, 0, kernel_mmap.filepath, event_id);
751 if (!ProcessRecord(&mmap_record)) {
754 for (auto& module_mmap : module_mmaps) {
755 MmapRecord mmap_record(attr, true, UINT_MAX, 0, module_mmap.start_addr,
756 module_mmap.len, 0, module_mmap.filepath, event_id);
757 if (!ProcessRecord(&mmap_record)) {
764 bool RecordCommand::DumpThreadCommAndMmaps(const perf_event_attr& attr,
766 // Decide which processes and threads to dump.
767 // For system_wide profiling, dump all threads.
768 // For non system wide profiling, build dump_threads.
769 bool all_threads = system_wide_collection_;
770 std::set<pid_t> dump_threads = event_selection_set_.GetMonitoredThreads();
771 for (const auto& pid : event_selection_set_.GetMonitoredProcesses()) {
772 std::vector<pid_t> tids = GetThreadsInProcess(pid);
773 dump_threads.insert(tids.begin(), tids.end());
776 // Collect processes to dump.
777 std::vector<pid_t> processes;
779 processes = GetAllProcesses();
781 std::set<pid_t> process_set;
782 for (const auto& tid : dump_threads) {
784 if (!GetProcessForThread(tid, &pid)) {
787 process_set.insert(pid);
789 processes.insert(processes.end(), process_set.begin(), process_set.end());
792 // Dump each process and its threads.
793 for (auto& pid : processes) {
794 // Dump mmap records.
795 std::vector<ThreadMmap> thread_mmaps;
796 if (!GetThreadMmapsInProcess(pid, &thread_mmaps)) {
797 // The process may exit before we get its info.
800 for (const auto& map : thread_mmaps) {
801 if (map.executable == 0) {
802 continue; // No need to dump non-executable mmap info.
804 MmapRecord record(attr, false, pid, pid, map.start_addr, map.len,
805 map.pgoff, map.name, event_id);
806 if (!ProcessRecord(&record)) {
810 // Dump process name.
812 if (GetThreadName(pid, &name)) {
813 CommRecord record(attr, pid, pid, name, event_id, 0);
814 if (!ProcessRecord(&record)) {
819 std::vector<pid_t> threads = GetThreadsInProcess(pid);
820 for (const auto& tid : threads) {
824 if (all_threads || dump_threads.find(tid) != dump_threads.end()) {
825 ForkRecord fork_record(attr, pid, tid, pid, pid, event_id);
826 if (!ProcessRecord(&fork_record)) {
829 if (GetThreadName(tid, &name)) {
830 CommRecord comm_record(attr, pid, tid, name, event_id, 0);
831 if (!ProcessRecord(&comm_record)) {
841 bool RecordCommand::ProcessRecord(Record* record) {
842 if (system_wide_collection_ && record->type() == PERF_RECORD_SAMPLE) {
843 auto& r = *static_cast<SampleRecord*>(record);
844 // Omit samples get before start sampling time.
845 if (r.time_data.time < start_sampling_time_in_ns_) {
849 UpdateRecordForEmbeddedElfPath(record);
850 if (unwind_dwarf_callchain_ && !post_unwind_) {
851 thread_tree_.Update(*record);
852 if (!UnwindRecord(record)) {
856 if (record->type() == PERF_RECORD_SAMPLE) {
857 sample_record_count_++;
858 } else if (record->type() == PERF_RECORD_LOST) {
859 lost_record_count_ += static_cast<LostRecord*>(record)->lost;
861 bool result = record_file_writer_->WriteRecord(*record);
865 template <class RecordType>
866 void UpdateMmapRecordForEmbeddedElfPath(RecordType* record) {
867 RecordType& r = *record;
868 if (!r.InKernel() && r.data->pgoff != 0) {
869 // For the case of a shared library "foobar.so" embedded
870 // inside an APK, we rewrite the original MMAP from
871 // ["path.apk" offset=X] to ["path.apk!/foobar.so" offset=W]
872 // so as to make the library name explicit. This update is
873 // done here (as part of the record operation) as opposed to
874 // on the host during the report, since we want to report
875 // the correct library name even if the the APK in question
876 // is not present on the host. The new offset W is
877 // calculated to be with respect to the start of foobar.so,
878 // not to the start of path.apk.
880 ApkInspector::FindElfInApkByOffset(r.filename, r.data->pgoff);
882 // Compute new offset relative to start of elf in APK.
884 data.pgoff -= ee->entry_offset();
885 r.SetDataAndFilename(data, GetUrlInApk(r.filename, ee->entry_name()));
890 void RecordCommand::UpdateRecordForEmbeddedElfPath(Record* record) {
891 if (record->type() == PERF_RECORD_MMAP) {
892 UpdateMmapRecordForEmbeddedElfPath(static_cast<MmapRecord*>(record));
893 } else if (record->type() == PERF_RECORD_MMAP2) {
894 UpdateMmapRecordForEmbeddedElfPath(static_cast<Mmap2Record*>(record));
898 bool RecordCommand::UnwindRecord(Record* record) {
899 if (record->type() == PERF_RECORD_SAMPLE) {
900 SampleRecord& r = *static_cast<SampleRecord*>(record);
901 if ((r.sample_type & PERF_SAMPLE_CALLCHAIN) &&
902 (r.sample_type & PERF_SAMPLE_REGS_USER) &&
903 (r.regs_user_data.reg_mask != 0) &&
904 (r.sample_type & PERF_SAMPLE_STACK_USER) &&
905 (r.GetValidStackSize() > 0)) {
906 ThreadEntry* thread =
907 thread_tree_.FindThreadOrNew(r.tid_data.pid, r.tid_data.tid);
908 RegSet regs = CreateRegSet(r.regs_user_data.abi,
909 r.regs_user_data.reg_mask,
910 r.regs_user_data.regs);
911 // Normally do strict arch check when unwinding stack. But allow unwinding
912 // 32-bit processes on 64-bit devices for system wide profiling.
913 bool strict_arch_check = !system_wide_collection_;
914 std::vector<uint64_t> unwind_ips =
915 UnwindCallChain(r.regs_user_data.abi, *thread, regs,
916 r.stack_user_data.data,
917 r.GetValidStackSize(), strict_arch_check);
918 r.ReplaceRegAndStackWithCallChain(unwind_ips);
924 bool RecordCommand::PostUnwind(const std::vector<std::string>& args) {
925 thread_tree_.ClearThreadAndMap();
926 std::unique_ptr<RecordFileReader> reader =
927 RecordFileReader::CreateInstance(record_filename_);
928 if (reader == nullptr) {
931 std::string tmp_filename = record_filename_ + ".tmp";
932 record_file_writer_ = CreateRecordFile(tmp_filename);
933 if (record_file_writer_ == nullptr) {
936 bool result = reader->ReadDataSection(
937 [this](std::unique_ptr<Record> record) {
938 thread_tree_.Update(*record);
939 if (!UnwindRecord(record.get())) {
942 return record_file_writer_->WriteRecord(*record);
948 if (!DumpAdditionalFeatures(args)) {
951 if (!record_file_writer_->Close()) {
955 if (unlink(record_filename_.c_str()) != 0) {
956 PLOG(ERROR) << "failed to remove " << record_filename_;
959 if (rename(tmp_filename.c_str(), record_filename_.c_str()) != 0) {
960 PLOG(ERROR) << "failed to rename " << tmp_filename << " to "
967 bool RecordCommand::DumpAdditionalFeatures(
968 const std::vector<std::string>& args) {
969 // Read data section of perf.data to collect hit file information.
970 thread_tree_.ClearThreadAndMap();
971 if (CheckKernelSymbolAddresses()) {
972 Dso::ReadKernelSymbolsFromProc();
974 auto callback = [&](const Record* r) {
975 thread_tree_.Update(*r);
976 if (r->type() == PERF_RECORD_SAMPLE) {
977 CollectHitFileInfo(*reinterpret_cast<const SampleRecord*>(r));
980 if (!record_file_writer_->ReadDataSection(callback)) {
984 size_t feature_count = 5;
985 if (branch_sampling_) {
991 if (!record_file_writer_->BeginWriteFeatures(feature_count)) {
994 if (!DumpBuildIdFeature()) {
997 if (dump_symbols_ && !DumpFileFeature()) {
1001 if (TEMP_FAILURE_RETRY(uname(&uname_buf)) != 0) {
1002 PLOG(ERROR) << "uname() failed";
1005 if (!record_file_writer_->WriteFeatureString(PerfFileFormat::FEAT_OSRELEASE,
1006 uname_buf.release)) {
1009 if (!record_file_writer_->WriteFeatureString(PerfFileFormat::FEAT_ARCH,
1010 uname_buf.machine)) {
1014 std::string exec_path = android::base::GetExecutablePath();
1015 if (exec_path.empty()) exec_path = "simpleperf";
1016 std::vector<std::string> cmdline;
1017 cmdline.push_back(exec_path);
1018 cmdline.push_back("record");
1019 cmdline.insert(cmdline.end(), args.begin(), args.end());
1020 if (!record_file_writer_->WriteCmdlineFeature(cmdline)) {
1023 if (branch_sampling_ != 0 &&
1024 !record_file_writer_->WriteBranchStackFeature()) {
1028 std::unordered_map<std::string, std::string> info_map;
1029 info_map["simpleperf_version"] = GetSimpleperfVersion();
1030 if (!record_file_writer_->WriteMetaInfoFeature(info_map)) {
1034 if (!record_file_writer_->EndWriteFeatures()) {
1040 bool RecordCommand::DumpBuildIdFeature() {
1041 std::vector<BuildIdRecord> build_id_records;
1043 std::vector<Dso*> dso_v = thread_tree_.GetAllDsos();
1044 for (Dso* dso : dso_v) {
1045 if (!dso->HasDumpId()) {
1048 if (dso->type() == DSO_KERNEL) {
1049 if (!GetKernelBuildId(&build_id)) {
1052 build_id_records.push_back(
1053 BuildIdRecord(true, UINT_MAX, build_id, dso->Path()));
1054 } else if (dso->type() == DSO_KERNEL_MODULE) {
1055 std::string path = dso->Path();
1056 std::string module_name = basename(&path[0]);
1057 if (android::base::EndsWith(module_name, ".ko")) {
1058 module_name = module_name.substr(0, module_name.size() - 3);
1060 if (!GetModuleBuildId(module_name, &build_id)) {
1061 LOG(DEBUG) << "can't read build_id for module " << module_name;
1064 build_id_records.push_back(BuildIdRecord(true, UINT_MAX, build_id, path));
1066 if (dso->Path() == DEFAULT_EXECNAME_FOR_THREAD_MMAP) {
1069 auto tuple = SplitUrlInApk(dso->Path());
1070 if (std::get<0>(tuple)) {
1071 ElfStatus result = GetBuildIdFromApkFile(std::get<1>(tuple),
1072 std::get<2>(tuple), &build_id);
1073 if (result != ElfStatus::NO_ERROR) {
1074 LOG(DEBUG) << "can't read build_id from file " << dso->Path() << ": "
1079 ElfStatus result = GetBuildIdFromElfFile(dso->Path(), &build_id);
1080 if (result != ElfStatus::NO_ERROR) {
1081 LOG(DEBUG) << "can't read build_id from file " << dso->Path() << ": "
1086 build_id_records.push_back(
1087 BuildIdRecord(false, UINT_MAX, build_id, dso->Path()));
1090 if (!record_file_writer_->WriteBuildIdFeature(build_id_records)) {
1096 bool RecordCommand::DumpFileFeature() {
1097 std::vector<Dso*> dso_v = thread_tree_.GetAllDsos();
1098 for (Dso* dso : dso_v) {
1099 if (!dso->HasDumpId()) {
1102 uint32_t dso_type = dso->type();
1103 uint64_t min_vaddr = dso->MinVirtualAddress();
1105 // Dumping all symbols in hit files takes too much space, so only dump
1107 const std::vector<Symbol>& symbols = dso->GetSymbols();
1108 std::vector<const Symbol*> dump_symbols;
1109 for (const auto& sym : symbols) {
1110 if (sym.HasDumpId()) {
1111 dump_symbols.push_back(&sym);
1114 std::sort(dump_symbols.begin(), dump_symbols.end(), Symbol::CompareByAddr);
1116 if (!record_file_writer_->WriteFileFeature(dso->Path(), dso_type, min_vaddr,
1124 void RecordCommand::CollectHitFileInfo(const SampleRecord& r) {
1125 const ThreadEntry* thread =
1126 thread_tree_.FindThreadOrNew(r.tid_data.pid, r.tid_data.tid);
1127 const MapEntry* map =
1128 thread_tree_.FindMap(thread, r.ip_data.ip, r.InKernel());
1129 Dso* dso = map->dso;
1130 const Symbol* symbol;
1131 if (dump_symbols_) {
1132 symbol = thread_tree_.FindSymbol(map, r.ip_data.ip, nullptr, &dso);
1133 if (!symbol->HasDumpId()) {
1134 dso->CreateSymbolDumpId(symbol);
1137 if (!dso->HasDumpId()) {
1138 dso->CreateDumpId();
1140 if (r.sample_type & PERF_SAMPLE_CALLCHAIN) {
1141 bool in_kernel = r.InKernel();
1142 bool first_ip = true;
1143 for (uint64_t i = 0; i < r.callchain_data.ip_nr; ++i) {
1144 uint64_t ip = r.callchain_data.ips[i];
1145 if (ip >= PERF_CONTEXT_MAX) {
1147 case PERF_CONTEXT_KERNEL:
1150 case PERF_CONTEXT_USER:
1154 LOG(DEBUG) << "Unexpected perf_context in callchain: " << std::hex
1160 // Remove duplication with sample ip.
1161 if (ip == r.ip_data.ip) {
1165 map = thread_tree_.FindMap(thread, ip, in_kernel);
1167 if (dump_symbols_) {
1168 symbol = thread_tree_.FindSymbol(map, ip, nullptr, &dso);
1169 if (!symbol->HasDumpId()) {
1170 dso->CreateSymbolDumpId(symbol);
1173 if (!dso->HasDumpId()) {
1174 dso->CreateDumpId();
1181 void RegisterRecordCommand() {
1182 RegisterCommand("record",
1183 [] { return std::unique_ptr<Command>(new RecordCommand()); });