2 * Copyright (C) 2015 The Android Open Source Project
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 #ifndef SIMPLE_PERF_SAMPLE_TREE_H_
18 #define SIMPLE_PERF_SAMPLE_TREE_H_
20 #include "callchain.h"
21 #include "dwarf_unwind.h"
22 #include "perf_regs.h"
24 #include "SampleComparator.h"
25 #include "SampleDisplayer.h"
26 #include "thread_tree.h"
28 // A SampleTree is a collection of samples. A profiling report is mainly about
29 // constructing a SampleTree and display it. There are three steps involved:
30 // build the tree, sort the tree, and display it. For example, if we want to
31 // show how many cpu-cycles are spent in different functions, we should do as
33 // 1. Build a SampleTree from SampleRecords with each sample containing
34 // (cpu-cycles, function name). When building the tree, we should merge
35 // samples containing the same function name.
36 // 2. Sort the SampleTree by cpu-cycles in the sample. As we want to display the
37 // samples in a decreasing order of cpu-cycles, we should sort it like this.
38 // 3. Display the SampleTree, each sample prints its (cpu-cycles, function name)
41 // We represent the three steps with three template classes.
42 // 1. A SampleTree is built by SampleTreeBuilder. The comparator passed in
43 // SampleTreeBuilder's constructor decides the property of samples should be
45 // 2. After a SampleTree is built and got from SampleTreeBuilder, it should be
46 // sorted by SampleTreeSorter. The sort result decides the order to show
48 // 3. At last, the sorted SampleTree is passed to SampleTreeDisplayer, which
49 // displays each sample in the SampleTree.
51 template <typename EntryT, typename AccumulateInfoT>
52 class SampleTreeBuilder {
54 explicit SampleTreeBuilder(SampleComparator<EntryT> comparator)
55 : sample_set_(comparator),
56 accumulate_callchain_(false),
57 sample_comparator_(comparator),
58 callchain_sample_set_(comparator),
59 use_branch_address_(false),
60 build_callchain_(false),
61 use_caller_as_callchain_root_(false),
62 strict_unwind_arch_check_(false) {}
64 virtual ~SampleTreeBuilder() {}
66 void SetBranchSampleOption(bool use_branch_address) {
67 use_branch_address_ = use_branch_address;
70 void SetCallChainSampleOptions(bool accumulate_callchain,
72 bool use_caller_as_callchain_root,
73 bool strict_unwind_arch_check) {
74 accumulate_callchain_ = accumulate_callchain;
75 build_callchain_ = build_callchain;
76 use_caller_as_callchain_root_ = use_caller_as_callchain_root;
77 strict_unwind_arch_check_ = strict_unwind_arch_check;
80 void ProcessSampleRecord(const SampleRecord& r) {
81 if (use_branch_address_ && (r.sample_type & PERF_SAMPLE_BRANCH_STACK)) {
82 for (uint64_t i = 0; i < r.branch_stack_data.stack_nr; ++i) {
83 auto& item = r.branch_stack_data.stack[i];
84 if (item.from != 0 && item.to != 0) {
85 CreateBranchSample(r, item);
90 bool in_kernel = r.InKernel();
91 AccumulateInfoT acc_info;
92 EntryT* sample = CreateSample(r, in_kernel, &acc_info);
93 if (sample == nullptr) {
96 if (accumulate_callchain_) {
97 std::vector<uint64_t> ips;
98 if (r.sample_type & PERF_SAMPLE_CALLCHAIN) {
99 ips.insert(ips.end(), r.callchain_data.ips,
100 r.callchain_data.ips + r.callchain_data.ip_nr);
102 const ThreadEntry* thread = GetThreadOfSample(sample);
103 // Use stack_user_data.data.size() instead of stack_user_data.dyn_size, to
104 // make up for the missing kernel patch in N9. See b/22612370.
105 if (thread != nullptr && (r.sample_type & PERF_SAMPLE_REGS_USER) &&
106 (r.regs_user_data.reg_mask != 0) &&
107 (r.sample_type & PERF_SAMPLE_STACK_USER) &&
108 (r.GetValidStackSize() > 0)) {
109 RegSet regs = CreateRegSet(r.regs_user_data.abi,
110 r.regs_user_data.reg_mask,
111 r.regs_user_data.regs);
112 std::vector<uint64_t> unwind_ips =
113 UnwindCallChain(r.regs_user_data.abi, *thread, regs,
114 r.stack_user_data.data,
115 r.GetValidStackSize(), strict_unwind_arch_check_);
116 if (!unwind_ips.empty()) {
117 ips.push_back(PERF_CONTEXT_USER);
118 ips.insert(ips.end(), unwind_ips.begin(), unwind_ips.end());
122 std::vector<EntryT*> callchain;
123 callchain.push_back(sample);
125 bool first_ip = true;
126 for (auto& ip : ips) {
127 if (ip >= PERF_CONTEXT_MAX) {
129 case PERF_CONTEXT_KERNEL:
132 case PERF_CONTEXT_USER:
136 LOG(DEBUG) << "Unexpected perf_context in callchain: " << ip;
141 // Remove duplication with sampled ip.
142 if (ip == r.ip_data.ip) {
146 EntryT* callchain_sample =
147 CreateCallChainSample(sample, ip, in_kernel, callchain, acc_info);
148 if (callchain_sample == nullptr) {
151 callchain.push_back(callchain_sample);
155 if (build_callchain_) {
156 std::set<EntryT*> added_set;
157 if (use_caller_as_callchain_root_) {
158 std::reverse(callchain.begin(), callchain.end());
160 while (callchain.size() >= 2) {
161 EntryT* sample = callchain[0];
162 callchain.erase(callchain.begin());
163 // Add only once for recursive calls on callchain.
164 if (added_set.find(sample) != added_set.end()) {
167 added_set.insert(sample);
168 InsertCallChainForSample(sample, callchain, acc_info);
174 std::vector<EntryT*> GetSamples() const {
175 std::vector<EntryT*> result;
176 for (auto& entry : sample_set_) {
177 result.push_back(entry);
183 virtual EntryT* CreateSample(const SampleRecord& r, bool in_kernel,
184 AccumulateInfoT* acc_info) = 0;
185 virtual EntryT* CreateBranchSample(const SampleRecord& r,
186 const BranchStackItemType& item) = 0;
187 virtual EntryT* CreateCallChainSample(const EntryT* sample, uint64_t ip,
189 const std::vector<EntryT*>& callchain,
190 const AccumulateInfoT& acc_info) = 0;
191 virtual const ThreadEntry* GetThreadOfSample(EntryT*) = 0;
192 virtual uint64_t GetPeriodForCallChain(const AccumulateInfoT& acc_info) = 0;
193 virtual bool FilterSample(const EntryT*) { return true; }
195 virtual void UpdateSummary(const EntryT*) {}
197 virtual void MergeSample(EntryT* sample1, EntryT* sample2) = 0;
199 EntryT* InsertSample(std::unique_ptr<EntryT> sample) {
200 if (sample == nullptr || !FilterSample(sample.get())) {
203 UpdateSummary(sample.get());
205 auto it = sample_set_.find(sample.get());
206 if (it == sample_set_.end()) {
207 result = sample.get();
208 sample_set_.insert(sample.get());
209 sample_storage_.push_back(std::move(sample));
212 MergeSample(*it, sample.get());
217 EntryT* InsertCallChainSample(std::unique_ptr<EntryT> sample,
218 const std::vector<EntryT*>& callchain) {
219 if (sample == nullptr) {
222 if (!FilterSample(sample.get())) {
223 // Store in callchain_sample_set_ for use in other EntryT's callchain.
224 auto it = callchain_sample_set_.find(sample.get());
225 if (it != callchain_sample_set_.end()) {
228 EntryT* result = sample.get();
229 callchain_sample_set_.insert(sample.get());
230 sample_storage_.push_back(std::move(sample));
234 auto it = sample_set_.find(sample.get());
235 if (it != sample_set_.end()) {
236 EntryT* sample = *it;
237 // Process only once for recursive function call.
238 if (std::find(callchain.begin(), callchain.end(), sample) !=
243 return InsertSample(std::move(sample));
246 void InsertCallChainForSample(EntryT* sample,
247 const std::vector<EntryT*>& callchain,
248 const AccumulateInfoT& acc_info) {
249 uint64_t period = GetPeriodForCallChain(acc_info);
250 sample->callchain.AddCallChain(
251 callchain, period, [&](const EntryT* s1, const EntryT* s2) {
252 return sample_comparator_.IsSameSample(s1, s2);
256 std::set<EntryT*, SampleComparator<EntryT>> sample_set_;
257 bool accumulate_callchain_;
260 const SampleComparator<EntryT> sample_comparator_;
261 // If a CallChainSample is filtered out, it is stored in callchain_sample_set_
262 // and only used in other EntryT's callchain.
263 std::set<EntryT*, SampleComparator<EntryT>> callchain_sample_set_;
264 std::vector<std::unique_ptr<EntryT>> sample_storage_;
266 bool use_branch_address_;
267 bool build_callchain_;
268 bool use_caller_as_callchain_root_;
269 bool strict_unwind_arch_check_;
272 template <typename EntryT>
273 class SampleTreeSorter {
275 explicit SampleTreeSorter(SampleComparator<EntryT> comparator)
276 : comparator_(comparator) {}
278 virtual ~SampleTreeSorter() {}
280 void Sort(std::vector<EntryT*>& v, bool sort_callchain) {
281 if (sort_callchain) {
282 for (auto& sample : v) {
283 SortCallChain(sample);
286 if (!comparator_.empty()) {
287 std::sort(v.begin(), v.end(), [this](const EntryT* s1, const EntryT* s2) {
288 return comparator_(s1, s2);
294 void SortCallChain(EntryT* sample) { sample->callchain.SortByPeriod(); }
297 SampleComparator<EntryT> comparator_;
300 template <typename EntryT, typename InfoT>
301 class SampleTreeDisplayer {
303 explicit SampleTreeDisplayer(SampleDisplayer<EntryT, InfoT> displayer)
304 : displayer_(displayer) {}
306 virtual ~SampleTreeDisplayer() {}
308 void DisplaySamples(FILE* fp, const std::vector<EntryT*>& samples,
310 displayer_.SetInfo(info);
311 for (const auto& sample : samples) {
312 displayer_.AdjustWidth(sample);
314 displayer_.PrintNames(fp);
315 for (const auto& sample : samples) {
316 displayer_.PrintSample(fp, sample);
321 SampleDisplayer<EntryT, InfoT> displayer_;
324 #endif // SIMPLE_PERF_SAMPLE_TREE_H_