OSDN Git Service

Merge "libf2fs_sparseblock: Android.mk->Android.bp"
[android-x86/system-extras.git] / simpleperf / cmd_kmem.cpp
1 /*
2  * Copyright (C) 2016 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 #include "command.h"
18
19 #include <unordered_map>
20
21 #include <android-base/logging.h>
22 #include <android-base/strings.h>
23
24 #include "callchain.h"
25 #include "event_attr.h"
26 #include "event_type.h"
27 #include "record_file.h"
28 #include "sample_tree.h"
29 #include "tracing.h"
30 #include "utils.h"
31
32 namespace {
33
34 struct SlabSample {
35   const Symbol* symbol;            // the function making allocation
36   uint64_t ptr;                    // the start address of the allocated space
37   uint64_t bytes_req;              // requested space size
38   uint64_t bytes_alloc;            // allocated space size
39   uint64_t sample_count;           // count of allocations
40   uint64_t gfp_flags;              // flags used for allocation
41   uint64_t cross_cpu_allocations;  // count of allocations freed not on the
42                                    // cpu allocating them
43   CallChainRoot<SlabSample> callchain;  // a callchain tree representing all
44                                         // callchains in this sample
45   SlabSample(const Symbol* symbol, uint64_t ptr, uint64_t bytes_req,
46              uint64_t bytes_alloc, uint64_t sample_count, uint64_t gfp_flags,
47              uint64_t cross_cpu_allocations)
48       : symbol(symbol),
49         ptr(ptr),
50         bytes_req(bytes_req),
51         bytes_alloc(bytes_alloc),
52         sample_count(sample_count),
53         gfp_flags(gfp_flags),
54         cross_cpu_allocations(cross_cpu_allocations) {}
55
56   uint64_t GetPeriod() const {
57     return sample_count;
58   }
59 };
60
61 struct SlabAccumulateInfo {
62   uint64_t bytes_req;
63   uint64_t bytes_alloc;
64 };
65
66 BUILD_COMPARE_VALUE_FUNCTION(ComparePtr, ptr);
67 BUILD_COMPARE_VALUE_FUNCTION_REVERSE(CompareBytesReq, bytes_req);
68 BUILD_COMPARE_VALUE_FUNCTION_REVERSE(CompareBytesAlloc, bytes_alloc);
69 BUILD_COMPARE_VALUE_FUNCTION(CompareGfpFlags, gfp_flags);
70 BUILD_COMPARE_VALUE_FUNCTION_REVERSE(CompareCrossCpuAllocations,
71                                      cross_cpu_allocations);
72
73 BUILD_DISPLAY_HEX64_FUNCTION(DisplayPtr, ptr);
74 BUILD_DISPLAY_UINT64_FUNCTION(DisplayBytesReq, bytes_req);
75 BUILD_DISPLAY_UINT64_FUNCTION(DisplayBytesAlloc, bytes_alloc);
76 BUILD_DISPLAY_HEX64_FUNCTION(DisplayGfpFlags, gfp_flags);
77 BUILD_DISPLAY_UINT64_FUNCTION(DisplayCrossCpuAllocations,
78                               cross_cpu_allocations);
79
80 static int CompareFragment(const SlabSample* sample1,
81                            const SlabSample* sample2) {
82   uint64_t frag1 = sample1->bytes_alloc - sample1->bytes_req;
83   uint64_t frag2 = sample2->bytes_alloc - sample2->bytes_req;
84   return Compare(frag2, frag1);
85 }
86
87 static std::string DisplayFragment(const SlabSample* sample) {
88   return android::base::StringPrintf("%" PRIu64,
89                                      sample->bytes_alloc - sample->bytes_req);
90 }
91
92 struct SlabSampleTree {
93   std::vector<SlabSample*> samples;
94   uint64_t total_requested_bytes;
95   uint64_t total_allocated_bytes;
96   uint64_t nr_allocations;
97   uint64_t nr_frees;
98   uint64_t nr_cross_cpu_allocations;
99 };
100
101 struct SlabFormat {
102   enum {
103     KMEM_ALLOC,
104     KMEM_FREE,
105   } type;
106   TracingFieldPlace call_site;
107   TracingFieldPlace ptr;
108   TracingFieldPlace bytes_req;
109   TracingFieldPlace bytes_alloc;
110   TracingFieldPlace gfp_flags;
111 };
112
113 class SlabSampleTreeBuilder
114     : public SampleTreeBuilder<SlabSample, SlabAccumulateInfo> {
115  public:
116   SlabSampleTreeBuilder(SampleComparator<SlabSample> sample_comparator,
117                         ThreadTree* thread_tree)
118       : SampleTreeBuilder(sample_comparator),
119         thread_tree_(thread_tree),
120         total_requested_bytes_(0),
121         total_allocated_bytes_(0),
122         nr_allocations_(0),
123         nr_cross_cpu_allocations_(0) {}
124
125   SlabSampleTree GetSampleTree() const {
126     SlabSampleTree sample_tree;
127     sample_tree.samples = GetSamples();
128     sample_tree.total_requested_bytes = total_requested_bytes_;
129     sample_tree.total_allocated_bytes = total_allocated_bytes_;
130     sample_tree.nr_allocations = nr_allocations_;
131     sample_tree.nr_frees = nr_frees_;
132     sample_tree.nr_cross_cpu_allocations = nr_cross_cpu_allocations_;
133     return sample_tree;
134   }
135
136   void AddSlabFormat(const std::vector<uint64_t>& event_ids,
137                      SlabFormat format) {
138     std::unique_ptr<SlabFormat> p(new SlabFormat(format));
139     for (auto id : event_ids) {
140       event_id_to_format_map_[id] = p.get();
141     }
142     formats_.push_back(std::move(p));
143   }
144
145  protected:
146   SlabSample* CreateSample(const SampleRecord& r, bool in_kernel,
147                            SlabAccumulateInfo* acc_info) override {
148     if (!in_kernel) {
149       // Normally we don't parse records in user space because tracepoint
150       // events all happen in kernel. But if r.ip_data.ip == 0, it may be
151       // a kernel record failed to dump ip register and is still useful.
152       if (r.ip_data.ip == 0) {
153         // It seems we are on a kernel can't dump regset for tracepoint events
154         // because of lacking perf_arch_fetch_caller_regs(). We can't get
155         // callchain, but we can still do a normal report.
156         static bool first = true;
157         if (first) {
158           first = false;
159           if (accumulate_callchain_) {
160             // The kernel doesn't seem to support dumping registers for
161             // tracepoint events because of lacking
162             // perf_arch_fetch_caller_regs().
163             LOG(WARNING) << "simpleperf may not get callchains for tracepoint"
164                          << " events because of lacking kernel support.";
165           }
166         }
167       } else {
168         return nullptr;
169       }
170     }
171     uint64_t id = r.id_data.id;
172     auto it = event_id_to_format_map_.find(id);
173     if (it == event_id_to_format_map_.end()) {
174       return nullptr;
175     }
176     const char* raw_data = r.raw_data.data;
177     SlabFormat* format = it->second;
178     if (format->type == SlabFormat::KMEM_ALLOC) {
179       uint64_t call_site = format->call_site.ReadFromData(raw_data);
180       const Symbol* symbol = thread_tree_->FindKernelSymbol(call_site);
181       uint64_t ptr = format->ptr.ReadFromData(raw_data);
182       uint64_t bytes_req = format->bytes_req.ReadFromData(raw_data);
183       uint64_t bytes_alloc = format->bytes_alloc.ReadFromData(raw_data);
184       uint64_t gfp_flags = format->gfp_flags.ReadFromData(raw_data);
185       SlabSample* sample =
186           InsertSample(std::unique_ptr<SlabSample>(new SlabSample(
187               symbol, ptr, bytes_req, bytes_alloc, 1, gfp_flags, 0)));
188       alloc_cpu_record_map_.insert(
189           std::make_pair(ptr, std::make_pair(r.cpu_data.cpu, sample)));
190       acc_info->bytes_req = bytes_req;
191       acc_info->bytes_alloc = bytes_alloc;
192       return sample;
193     } else if (format->type == SlabFormat::KMEM_FREE) {
194       uint64_t ptr = format->ptr.ReadFromData(raw_data);
195       auto it = alloc_cpu_record_map_.find(ptr);
196       if (it != alloc_cpu_record_map_.end()) {
197         SlabSample* sample = it->second.second;
198         if (r.cpu_data.cpu != it->second.first) {
199           sample->cross_cpu_allocations++;
200           nr_cross_cpu_allocations_++;
201         }
202         alloc_cpu_record_map_.erase(it);
203       }
204       nr_frees_++;
205     }
206     return nullptr;
207   }
208
209   SlabSample* CreateBranchSample(const SampleRecord&,
210                                  const BranchStackItemType&) override {
211     return nullptr;
212   }
213
214   SlabSample* CreateCallChainSample(
215       const SlabSample* sample, uint64_t ip, bool in_kernel,
216       const std::vector<SlabSample*>& callchain,
217       const SlabAccumulateInfo& acc_info) override {
218     if (!in_kernel) {
219       return nullptr;
220     }
221     const Symbol* symbol = thread_tree_->FindKernelSymbol(ip);
222     return InsertCallChainSample(
223         std::unique_ptr<SlabSample>(
224             new SlabSample(symbol, sample->ptr, acc_info.bytes_req,
225                            acc_info.bytes_alloc, 1, sample->gfp_flags, 0)),
226         callchain);
227   }
228
229   const ThreadEntry* GetThreadOfSample(SlabSample*) override { return nullptr; }
230
231   uint64_t GetPeriodForCallChain(const SlabAccumulateInfo&) override {
232     // Decide the percentage of callchain by the sample_count, so use 1 as the
233     // period when calling AddCallChain().
234     return 1;
235   }
236
237   void UpdateSummary(const SlabSample* sample) override {
238     total_requested_bytes_ += sample->bytes_req;
239     total_allocated_bytes_ += sample->bytes_alloc;
240     nr_allocations_++;
241   }
242
243   void MergeSample(SlabSample* sample1, SlabSample* sample2) override {
244     sample1->bytes_req += sample2->bytes_req;
245     sample1->bytes_alloc += sample2->bytes_alloc;
246     sample1->sample_count += sample2->sample_count;
247   }
248
249  private:
250   ThreadTree* thread_tree_;
251   uint64_t total_requested_bytes_;
252   uint64_t total_allocated_bytes_;
253   uint64_t nr_allocations_;
254   uint64_t nr_frees_;
255   uint64_t nr_cross_cpu_allocations_;
256
257   std::unordered_map<uint64_t, SlabFormat*> event_id_to_format_map_;
258   std::vector<std::unique_ptr<SlabFormat>> formats_;
259   std::unordered_map<uint64_t, std::pair<uint32_t, SlabSample*>>
260       alloc_cpu_record_map_;
261 };
262
263 using SlabSampleTreeSorter = SampleTreeSorter<SlabSample>;
264 using SlabSampleTreeDisplayer = SampleTreeDisplayer<SlabSample, SlabSampleTree>;
265 using SlabSampleCallgraphDisplayer =
266     CallgraphDisplayer<SlabSample, CallChainNode<SlabSample>>;
267
268 struct EventAttrWithName {
269   perf_event_attr attr;
270   std::string name;
271   std::vector<uint64_t> event_ids;
272 };
273
274 class KmemCommand : public Command {
275  public:
276   KmemCommand()
277       : Command(
278             "kmem", "collect kernel memory allocation information",
279             // clang-format off
280 "Usage: kmem (record [record options] | report [report options])\n"
281 "kmem record\n"
282 "-g        Enable call graph recording. Same as '--call-graph fp'.\n"
283 "--slab    Collect slab allocation information. Default option.\n"
284 "Other record options provided by simpleperf record command are also available.\n"
285 "kmem report\n"
286 "--children  Print the accumulated allocation info appeared in the callchain.\n"
287 "            Can be used on perf.data recorded with `--call-graph fp` option.\n"
288 "-g [callee|caller]  Print call graph for perf.data recorded with\n"
289 "                    `--call-graph fp` option. If callee mode is used, the graph\n"
290 "                     shows how functions are called from others. Otherwise, the\n"
291 "                     graph shows how functions call others. Default is callee\n"
292 "                     mode. The percentage shown in the graph is determined by\n"
293 "                     the hit count of the callchain.\n"
294 "-i          Specify path of record file, default is perf.data\n"
295 "-o report_file_name  Set report file name, default is stdout.\n"
296 "--slab      Report slab allocation information. Default option.\n"
297 "--slab-sort key1,key2,...\n"
298 "            Select the keys to sort and print slab allocation information.\n"
299 "            Should be used with --slab option. Possible keys include:\n"
300 "              hit         -- the allocation count.\n"
301 "              caller      -- the function calling allocation.\n"
302 "              ptr         -- the address of the allocated space.\n"
303 "              bytes_req   -- the total requested space size.\n"
304 "              bytes_alloc -- the total allocated space size.\n"
305 "              fragment    -- the extra allocated space size\n"
306 "                             (bytes_alloc - bytes_req).\n"
307 "              gfp_flags   -- the flags used for allocation.\n"
308 "              pingpong    -- the count of allocations that are freed not on\n"
309 "                             the cpu allocating them.\n"
310 "            The default slab sort keys are:\n"
311 "              hit,caller,bytes_req,bytes_alloc,fragment,pingpong.\n"
312             // clang-format on
313             ),
314         is_record_(false),
315         use_slab_(false),
316         accumulate_callchain_(false),
317         print_callgraph_(false),
318         callgraph_show_callee_(false),
319         record_filename_("perf.data"),
320         record_file_arch_(GetBuildArch()) {}
321
322   bool Run(const std::vector<std::string>& args);
323
324  private:
325   bool ParseOptions(const std::vector<std::string>& args,
326                     std::vector<std::string>* left_args);
327   bool RecordKmemInfo(const std::vector<std::string>& record_args);
328   bool ReportKmemInfo();
329   bool PrepareToBuildSampleTree();
330   void ReadEventAttrsFromRecordFile();
331   bool ReadFeaturesFromRecordFile();
332   bool ReadSampleTreeFromRecordFile();
333   bool ProcessRecord(std::unique_ptr<Record> record);
334   void ProcessTracingData(const std::vector<char>& data);
335   bool PrintReport();
336   void PrintReportContext(FILE* fp);
337   void PrintSlabReportContext(FILE* fp);
338
339   bool is_record_;
340   bool use_slab_;
341   std::vector<std::string> slab_sort_keys_;
342   bool accumulate_callchain_;
343   bool print_callgraph_;
344   bool callgraph_show_callee_;
345
346   std::string record_filename_;
347   std::unique_ptr<RecordFileReader> record_file_reader_;
348   std::vector<EventAttrWithName> event_attrs_;
349   std::string record_cmdline_;
350   ArchType record_file_arch_;
351
352   ThreadTree thread_tree_;
353   SlabSampleTree slab_sample_tree_;
354   std::unique_ptr<SlabSampleTreeBuilder> slab_sample_tree_builder_;
355   std::unique_ptr<SlabSampleTreeSorter> slab_sample_tree_sorter_;
356   std::unique_ptr<SlabSampleTreeDisplayer> slab_sample_tree_displayer_;
357
358   std::string report_filename_;
359 };
360
361 bool KmemCommand::Run(const std::vector<std::string>& args) {
362   std::vector<std::string> left_args;
363   if (!ParseOptions(args, &left_args)) {
364     return false;
365   }
366   if (!use_slab_) {
367     use_slab_ = true;
368   }
369   if (is_record_) {
370     return RecordKmemInfo(left_args);
371   }
372   return ReportKmemInfo();
373 }
374
375 bool KmemCommand::ParseOptions(const std::vector<std::string>& args,
376                                std::vector<std::string>* left_args) {
377   if (args.empty()) {
378     LOG(ERROR) << "No subcommand specified";
379     return false;
380   }
381   if (args[0] == "record") {
382     if (!IsRoot()) {
383       LOG(ERROR) << "simpleperf kmem record command needs root privilege";
384       return false;
385     }
386     is_record_ = true;
387     size_t i;
388     for (i = 1; i < args.size() && !args[i].empty() && args[i][0] == '-'; ++i) {
389       if (args[i] == "-g") {
390         left_args->push_back("--call-graph");
391         left_args->push_back("fp");
392       } else if (args[i] == "--slab") {
393         use_slab_ = true;
394       } else {
395         left_args->push_back(args[i]);
396       }
397     }
398     left_args->insert(left_args->end(), args.begin() + i, args.end());
399   } else if (args[0] == "report") {
400     is_record_ = false;
401     for (size_t i = 1; i < args.size(); ++i) {
402       if (args[i] == "--children") {
403         accumulate_callchain_ = true;
404       } else if (args[i] == "-g") {
405         print_callgraph_ = true;
406         accumulate_callchain_ = true;
407         callgraph_show_callee_ = true;
408         if (i + 1 < args.size() && args[i + 1][0] != '-') {
409           ++i;
410           if (args[i] == "callee") {
411             callgraph_show_callee_ = true;
412           } else if (args[i] == "caller") {
413             callgraph_show_callee_ = false;
414           } else {
415             LOG(ERROR) << "Unknown argument with -g option: " << args[i];
416             return false;
417           }
418         }
419       } else if (args[i] == "-i") {
420         if (!NextArgumentOrError(args, &i)) {
421           return false;
422         }
423         record_filename_ = args[i];
424       } else if (args[i] == "-o") {
425         if (!NextArgumentOrError(args, &i)) {
426           return false;
427         }
428         report_filename_ = args[i];
429       } else if (args[i] == "--slab") {
430         use_slab_ = true;
431       } else if (args[i] == "--slab-sort") {
432         if (!NextArgumentOrError(args, &i)) {
433           return false;
434         }
435         slab_sort_keys_ = android::base::Split(args[i], ",");
436       } else {
437         ReportUnknownOption(args, i);
438         return false;
439       }
440     }
441   } else {
442     LOG(ERROR) << "Unknown subcommand for " << Name() << ": " << args[0]
443                << ". Try `simpleperf help " << Name() << "`";
444     return false;
445   }
446   return true;
447 }
448
449 bool KmemCommand::RecordKmemInfo(const std::vector<std::string>& record_args) {
450   std::vector<std::string> args;
451   if (use_slab_) {
452     std::vector<std::string> trace_events = {
453         "kmem:kmalloc",      "kmem:kmem_cache_alloc",
454         "kmem:kmalloc_node", "kmem:kmem_cache_alloc_node",
455         "kmem:kfree",        "kmem:kmem_cache_free"};
456     for (const auto& name : trace_events) {
457       if (ParseEventType(name)) {
458         args.insert(args.end(), {"-e", name});
459       }
460     }
461   }
462   if (args.empty()) {
463     LOG(ERROR) << "Kernel allocation related trace events are not supported.";
464     return false;
465   }
466   args.push_back("-a");
467   args.insert(args.end(), record_args.begin(), record_args.end());
468   std::unique_ptr<Command> record_cmd = CreateCommandInstance("record");
469   if (record_cmd == nullptr) {
470     LOG(ERROR) << "record command isn't available";
471     return false;
472   }
473   return record_cmd->Run(args);
474 }
475
476 bool KmemCommand::ReportKmemInfo() {
477   if (!PrepareToBuildSampleTree()) {
478     return false;
479   }
480   record_file_reader_ = RecordFileReader::CreateInstance(record_filename_);
481   if (record_file_reader_ == nullptr) {
482     return false;
483   }
484   ReadEventAttrsFromRecordFile();
485   if (!ReadFeaturesFromRecordFile()) {
486     return false;
487   }
488   if (!ReadSampleTreeFromRecordFile()) {
489     return false;
490   }
491   if (!PrintReport()) {
492     return false;
493   }
494   return true;
495 }
496
497 bool KmemCommand::PrepareToBuildSampleTree() {
498   if (use_slab_) {
499     if (slab_sort_keys_.empty()) {
500       slab_sort_keys_ = {"hit",         "caller",   "bytes_req",
501                          "bytes_alloc", "fragment", "pingpong"};
502     }
503     SampleComparator<SlabSample> comparator;
504     SampleComparator<SlabSample> sort_comparator;
505     SampleDisplayer<SlabSample, SlabSampleTree> displayer;
506     std::string accumulated_name = accumulate_callchain_ ? "Accumulated_" : "";
507
508     if (print_callgraph_) {
509       displayer.AddExclusiveDisplayFunction(SlabSampleCallgraphDisplayer());
510     }
511
512     for (const auto& key : slab_sort_keys_) {
513       if (key == "hit") {
514         sort_comparator.AddCompareFunction(CompareSampleCount);
515         displayer.AddDisplayFunction(accumulated_name + "Hit",
516                                      DisplaySampleCount);
517       } else if (key == "caller") {
518         comparator.AddCompareFunction(CompareSymbol);
519         displayer.AddDisplayFunction("Caller", DisplaySymbol);
520       } else if (key == "ptr") {
521         comparator.AddCompareFunction(ComparePtr);
522         displayer.AddDisplayFunction("Ptr", DisplayPtr);
523       } else if (key == "bytes_req") {
524         sort_comparator.AddCompareFunction(CompareBytesReq);
525         displayer.AddDisplayFunction(accumulated_name + "BytesReq",
526                                      DisplayBytesReq);
527       } else if (key == "bytes_alloc") {
528         sort_comparator.AddCompareFunction(CompareBytesAlloc);
529         displayer.AddDisplayFunction(accumulated_name + "BytesAlloc",
530                                      DisplayBytesAlloc);
531       } else if (key == "fragment") {
532         sort_comparator.AddCompareFunction(CompareFragment);
533         displayer.AddDisplayFunction(accumulated_name + "Fragment",
534                                      DisplayFragment);
535       } else if (key == "gfp_flags") {
536         comparator.AddCompareFunction(CompareGfpFlags);
537         displayer.AddDisplayFunction("GfpFlags", DisplayGfpFlags);
538       } else if (key == "pingpong") {
539         sort_comparator.AddCompareFunction(CompareCrossCpuAllocations);
540         displayer.AddDisplayFunction("Pingpong", DisplayCrossCpuAllocations);
541       } else {
542         LOG(ERROR) << "Unknown sort key for slab allocation: " << key;
543         return false;
544       }
545       slab_sample_tree_builder_.reset(
546           new SlabSampleTreeBuilder(comparator, &thread_tree_));
547       slab_sample_tree_builder_->SetCallChainSampleOptions(
548           accumulate_callchain_, print_callgraph_, !callgraph_show_callee_,
549           false);
550       sort_comparator.AddComparator(comparator);
551       slab_sample_tree_sorter_.reset(new SlabSampleTreeSorter(sort_comparator));
552       slab_sample_tree_displayer_.reset(new SlabSampleTreeDisplayer(displayer));
553     }
554   }
555   return true;
556 }
557
558 void KmemCommand::ReadEventAttrsFromRecordFile() {
559   std::vector<EventAttrWithId> attrs = record_file_reader_->AttrSection();
560   for (const auto& attr_with_id : attrs) {
561     EventAttrWithName attr;
562     attr.attr = *attr_with_id.attr;
563     attr.event_ids = attr_with_id.ids;
564     attr.name = GetEventNameByAttr(attr.attr);
565     event_attrs_.push_back(attr);
566   }
567 }
568
569 bool KmemCommand::ReadFeaturesFromRecordFile() {
570   record_file_reader_->LoadBuildIdAndFileFeatures(thread_tree_);
571   std::string arch =
572       record_file_reader_->ReadFeatureString(PerfFileFormat::FEAT_ARCH);
573   if (!arch.empty()) {
574     record_file_arch_ = GetArchType(arch);
575     if (record_file_arch_ == ARCH_UNSUPPORTED) {
576       return false;
577     }
578   }
579   std::vector<std::string> cmdline = record_file_reader_->ReadCmdlineFeature();
580   if (!cmdline.empty()) {
581     record_cmdline_ = android::base::Join(cmdline, ' ');
582   }
583   if (record_file_reader_->HasFeature(PerfFileFormat::FEAT_TRACING_DATA)) {
584     std::vector<char> tracing_data;
585     if (!record_file_reader_->ReadFeatureSection(
586             PerfFileFormat::FEAT_TRACING_DATA, &tracing_data)) {
587       return false;
588     }
589     ProcessTracingData(tracing_data);
590   }
591   return true;
592 }
593
594 bool KmemCommand::ReadSampleTreeFromRecordFile() {
595   if (!record_file_reader_->ReadDataSection(
596           [this](std::unique_ptr<Record> record) {
597             return ProcessRecord(std::move(record));
598           })) {
599     return false;
600   }
601   if (use_slab_) {
602     slab_sample_tree_ = slab_sample_tree_builder_->GetSampleTree();
603     slab_sample_tree_sorter_->Sort(slab_sample_tree_.samples, print_callgraph_);
604   }
605   return true;
606 }
607
608 bool KmemCommand::ProcessRecord(std::unique_ptr<Record> record) {
609   thread_tree_.Update(*record);
610   if (record->type() == PERF_RECORD_SAMPLE) {
611     if (use_slab_) {
612       slab_sample_tree_builder_->ProcessSampleRecord(
613           *static_cast<const SampleRecord*>(record.get()));
614     }
615   } else if (record->type() == PERF_RECORD_TRACING_DATA) {
616     const auto& r = *static_cast<TracingDataRecord*>(record.get());
617     ProcessTracingData(std::vector<char>(r.data, r.data + r.data_size));
618   }
619   return true;
620 }
621
622 void KmemCommand::ProcessTracingData(const std::vector<char>& data) {
623   Tracing tracing(data);
624   for (auto& attr : event_attrs_) {
625     if (attr.attr.type == PERF_TYPE_TRACEPOINT) {
626       uint64_t trace_event_id = attr.attr.config;
627       attr.name = tracing.GetTracingEventNameHavingId(trace_event_id);
628       TracingFormat format = tracing.GetTracingFormatHavingId(trace_event_id);
629       if (use_slab_) {
630         if (format.name == "kmalloc" || format.name == "kmem_cache_alloc" ||
631             format.name == "kmalloc_node" ||
632             format.name == "kmem_cache_alloc_node") {
633           SlabFormat f;
634           f.type = SlabFormat::KMEM_ALLOC;
635           format.GetField("call_site", f.call_site);
636           format.GetField("ptr", f.ptr);
637           format.GetField("bytes_req", f.bytes_req);
638           format.GetField("bytes_alloc", f.bytes_alloc);
639           format.GetField("gfp_flags", f.gfp_flags);
640           slab_sample_tree_builder_->AddSlabFormat(attr.event_ids, f);
641         } else if (format.name == "kfree" || format.name == "kmem_cache_free") {
642           SlabFormat f;
643           f.type = SlabFormat::KMEM_FREE;
644           format.GetField("call_site", f.call_site);
645           format.GetField("ptr", f.ptr);
646           slab_sample_tree_builder_->AddSlabFormat(attr.event_ids, f);
647         }
648       }
649     }
650   }
651 }
652
653 bool KmemCommand::PrintReport() {
654   std::unique_ptr<FILE, decltype(&fclose)> file_handler(nullptr, fclose);
655   FILE* report_fp = stdout;
656   if (!report_filename_.empty()) {
657     file_handler.reset(fopen(report_filename_.c_str(), "w"));
658     if (file_handler == nullptr) {
659       PLOG(ERROR) << "failed to open " << report_filename_;
660       return false;
661     }
662     report_fp = file_handler.get();
663   }
664   PrintReportContext(report_fp);
665   if (use_slab_) {
666     fprintf(report_fp, "\n\n");
667     PrintSlabReportContext(report_fp);
668     slab_sample_tree_displayer_->DisplaySamples(
669         report_fp, slab_sample_tree_.samples, &slab_sample_tree_);
670   }
671   return true;
672 }
673
674 void KmemCommand::PrintReportContext(FILE* fp) {
675   if (!record_cmdline_.empty()) {
676     fprintf(fp, "Cmdline: %s\n", record_cmdline_.c_str());
677   }
678   fprintf(fp, "Arch: %s\n", GetArchString(record_file_arch_).c_str());
679   for (const auto& attr : event_attrs_) {
680     fprintf(fp, "Event: %s (type %u, config %llu)\n", attr.name.c_str(),
681             attr.attr.type, attr.attr.config);
682   }
683 }
684
685 void KmemCommand::PrintSlabReportContext(FILE* fp) {
686   fprintf(fp, "Slab allocation information:\n");
687   fprintf(fp, "Total requested bytes: %" PRIu64 "\n",
688           slab_sample_tree_.total_requested_bytes);
689   fprintf(fp, "Total allocated bytes: %" PRIu64 "\n",
690           slab_sample_tree_.total_allocated_bytes);
691   uint64_t fragment = slab_sample_tree_.total_allocated_bytes -
692                       slab_sample_tree_.total_requested_bytes;
693   double percentage = 0.0;
694   if (slab_sample_tree_.total_allocated_bytes != 0) {
695     percentage = 100.0 * fragment / slab_sample_tree_.total_allocated_bytes;
696   }
697   fprintf(fp, "Total fragment: %" PRIu64 ", %f%%\n", fragment, percentage);
698   fprintf(fp, "Total allocations: %" PRIu64 "\n",
699           slab_sample_tree_.nr_allocations);
700   fprintf(fp, "Total frees: %" PRIu64 "\n", slab_sample_tree_.nr_frees);
701   percentage = 0.0;
702   if (slab_sample_tree_.nr_allocations != 0) {
703     percentage = 100.0 * slab_sample_tree_.nr_cross_cpu_allocations /
704                  slab_sample_tree_.nr_allocations;
705   }
706   fprintf(fp, "Total cross cpu allocation/free: %" PRIu64 ", %f%%\n",
707           slab_sample_tree_.nr_cross_cpu_allocations, percentage);
708   fprintf(fp, "\n");
709 }
710
711 }  // namespace
712
713 void RegisterKmemCommand() {
714   RegisterCommand("kmem",
715                   [] { return std::unique_ptr<Command>(new KmemCommand()); });
716 }