3 # Copyright (C) 2017 The Android Open Source Project
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
9 # http://www.apache.org/licenses/LICENSE-2.0
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
17 # Super simplistic printer of a perfprofd output proto. Illustrates
18 # how to parse and traverse a perfprofd output proto in Python.
20 # This relies on libunwindstack's unwind_symbol. Build with
21 # mmma system/core/libunwindstack
28 logging.basicConfig(format = "%(message)s")
33 from sorted_collection import SortedCollection
36 # aprotoc -I=external/perf_data_converter/src/quipper \
37 # --python_out=system/extras/perfprofd/scripts \
38 # external/perf_data_converter/src/quipper/perf_data.proto
39 # aprotoc -I=external/perf_data_converter/src/quipper -I=system/extras/perfprofd \
40 # --python_out=system/extras/perfprofd/scripts \
41 # system/extras/perfprofd/perfprofd_record.proto
42 import perfprofd_record_pb2
44 # Make sure that symbol is on the PYTHONPATH, e.g., run as
45 # PYTHONPATH=$PYTHONPATH:$ANDROID_BUILD_TOP/development/scripts python ...
47 from symbol import SymbolInformation
49 # This is wrong. But then the symbol module is a bad quagmire.
50 # TODO: Check build IDs.
51 symbol.SetAbi(["ABI: 'arm64'"])
53 class MmapState(object):
55 self._list = SortedCollection((), lambda x : x[0])
57 def add_map(self, start, length, pgoff, name):
58 tuple = (start, length, pgoff, name)
59 self._list.insert(tuple)
63 tuple = self._list.find_le(addr)
64 if addr < tuple[0] + tuple[1]:
72 ret._list = self._list.copy()
76 return "MmapState: " + self._list.__str__()
80 class SymbolMap(object):
81 def __init__(self, min_v):
82 self._list = SortedCollection((), lambda x : x[0])
83 self._min_vaddr = min_v
85 def add_symbol(self, start, length, name):
86 tuple = (start, length, name)
87 self._list.insert(tuple)
91 tuple = self._list.find_le(addr)
92 if addr < tuple[0] + tuple[1]:
100 ret._list = self._list.copy()
104 return "SymbolMap: " + self._list.__str__()
106 return self.__str__()
109 return intern(u.encode('ascii', 'replace'))
111 def collect_tid_names(perf_data):
113 for event in perf_data.events:
114 if event.HasField('comm_event'):
115 tid_name_map[event.comm_event.tid] = intern_uni(event.comm_event.comm)
118 def create_symbol_maps(profile):
120 for si in profile.symbol_info:
121 map = SymbolMap(si.min_vaddr)
122 symbol_maps[si.filename] = map
123 for sym in si.symbols:
124 map.add_symbol(sym.addr, sym.size, intern_uni(sym.name))
127 def update_mmap_states(event, state_map):
128 if event.HasField('mmap_event'):
129 mmap_event = event.mmap_event
131 if mmap_event.tid == 0:
133 # Create new map, if necessary.
134 if not mmap_event.pid in state_map:
135 state_map[mmap_event.pid] = MmapState()
136 state_map[mmap_event.pid].add_map(mmap_event.start, mmap_event.len, mmap_event.pgoff,
137 intern_uni(mmap_event.filename))
138 elif event.HasField('fork_event'):
139 fork_event = event.fork_event
141 if fork_event.pid == fork_event.ppid:
143 if fork_event.ppid not in state_map:
144 logging.warn("fork from %d without map", fork_event.ppid)
146 state_map[fork_event.pid] = state_map[fork_event.ppid].copy()
151 def find_vaddr(vaddr_map, filename):
152 if filename in vaddr_map:
153 return vaddr_map[filename]
155 path = "%s/%s" % (symbol.SYMBOLS_DIR, filename)
156 if not os.path.isfile(path):
157 logging.warn('Cannot find %s for min_vaddr', filename)
158 vaddr_map[filename] = 0
162 # Use "-W" to have single-line format.
163 res = subprocess.check_output(['readelf', '-lW', path])
164 lines = res.split("\n")
165 reading_headers = False
167 min_fn = lambda x, y: y if x is None else min(x, y)
168 # Using counting loop for access to next line.
169 for i in range(0, len(lines) - 1):
170 line = lines[i].strip()
173 # Block is done, won't find anything else.
175 if line.startswith("LOAD"):
176 # Look at the current line to distinguish 32-bit from 64-bit
177 line_split = line.split()
178 if len(line_split) >= 8:
180 # Found something expected. So parse VirtAddr.
182 min_vaddr = min_fn(min_vaddr, int(line_split[2], 0))
186 logging.warn('Could not parse readelf line %s', line)
188 if line.strip() == "Program Headers:":
189 reading_headers = True
191 if min_vaddr is None:
193 logging.debug("min_vaddr for %s is %d", filename, min_vaddr)
194 vaddr_map[filename] = min_vaddr
195 except subprocess.CalledProcessError:
196 logging.warn('Error finding min_vaddr for %s', filename)
197 vaddr_map[filename] = 0
198 return vaddr_map[filename]
200 unwind_symbols_cache = {}
201 unwind_symbols_warn_missing_cache = set()
202 def run_unwind_symbols(filename, offset_hex):
203 path = "%s/%s" % (symbol.SYMBOLS_DIR, filename)
204 if not os.path.isfile(path):
205 if path not in unwind_symbols_warn_missing_cache:
206 logging.warn('Cannot find %s for unwind_symbols', filename)
207 unwind_symbols_warn_missing_cache.add(path)
210 if (path, offset_hex) in unwind_symbols_cache:
211 return [(unwind_symbols_cache[(path, offset_hex)], filename)]
214 res = subprocess.check_output(['unwind_symbols', path, offset_hex])
215 lines = res.split("\n")
217 if line.startswith('<0x'):
218 parts = line.split(' ', 1)
220 # TODO C++ demangling necessary.
221 logging.debug('unwind_symbols: %s %s -> %s', filename, offset_hex, parts[1])
222 sym = intern(parts[1])
223 unwind_symbols_cache[(path, offset_hex)] = sym
224 return [(sym, filename)]
225 except subprocess.CalledProcessError:
226 logging.warn('Failed running unwind_symbols for %s', filename)
227 unwind_symbols_cache[(path, offset_hex)] = None
230 def decode_with_symbol_lib(name, addr_rel_hex):
231 info = SymbolInformation(name, addr_rel_hex)
232 # As-is, only info[0] (inner-most inlined function) is recognized.
233 (source_symbol, source_location, object_symbol_with_offset) = info[0]
235 if object_symbol_with_offset is not None:
236 ret.append((intern(object_symbol_with_offset), name))
237 if source_symbol is not None:
238 iterinfo = iter(info)
240 for (sym_inlined, loc_inlined, _) in iterinfo:
241 # TODO: Figure out what's going on here:
242 if sym_inlined is not None:
243 ret.insert(0, (intern(sym_inlined), name))
248 def decode_addr(addr, mmap_state, device_symbols):
249 """Try to decode the given address against the current mmap table and device symbols.
251 First, look up the address in the mmap state. If none is found, use a simple address
252 heuristic to guess kernel frames on 64-bit devices.
254 Next, check on-device symbolization for a hit.
256 Last, try to symbolize against host information. First try the symbol module. However,
257 as it is based on addr2line, it will not work for pure-gnu_debugdata DSOs (e.g., ART
258 preopt artifacts). For that case, use libunwindstack's unwind_symbols.
261 map = mmap_state.find(addr)
263 # If it looks large enough, assume it's from
265 if addr > 18000000000000000000:
266 return [("[kernel]", "[kernel]")]
267 return [("%d (no mapped segment)" % addr, None)]
269 logging.debug('%d is %s (%d +%d)', addr, name, map[0], map[1])
271 # Once relocation packer is off, it would be:
272 # offset = addr - map.start + map.pgoff
274 # offset = addr - map.start (+ min_vaddr)
275 # Note that on-device symbolization doesn't include min_vaddr but
276 # does include pgoff.
277 offset = addr - map[0]
279 if name in device_symbols:
280 offset = offset + map[2]
281 symbol = device_symbols[name].find(offset)
283 return [("%s +%d (missing on-device symbol)" % (name, offset), name)]
285 return [(symbol, name)]
286 offset = offset + find_vaddr(vaddr, name)
287 if (name, offset) in skip_dso:
288 # We already failed, skip symbol finding.
289 return [("%s +%d" % (name, offset), name)]
291 addr_rel_hex = intern("%x" % offset)
292 ret = decode_with_symbol_lib(name, addr_rel_hex)
293 if ret is not None and len(ret) != 0:
294 # Addr2line may report oatexec+xyz. Let unwind_symbols take care of that.
295 if len(ret) != 1 or not ret[0][0].startswith("oatexec+"):
296 logging.debug('Got result from symbol module: %s', str(ret))
299 ret = run_unwind_symbols(name, addr_rel_hex)
300 if ret is not None and len(ret) != 0:
302 logging.warn("Failed to find symbol for %s +%d (%d)", name, offset, addr)
304 skip_dso.add((name, offset))
305 return [("%s +%d" % (name, offset), name)]
308 def print_sample(sample, tid_name_map):
309 if sample[0] in tid_name_map:
310 pid_name = "%s (%d)" % (tid_name_map[sample[0]], sample[0])
312 pid_name = "kernel (0)"
314 pid_name = "unknown (%d)" % (sample[0])
315 if sample[1] in tid_name_map:
316 tid_name = "%s (%d)" % (tid_name_map[sample[1]], sample[1])
318 tid_name = "kernel (0)"
320 tid_name = "unknown (%d)" % (sample[1])
321 print " %s - %s:" % (pid_name, tid_name)
322 for sym in sample[2]:
323 print " %s (%s)" % (sym[0], sym[1])
325 def print_samples(samples, tid_name_map):
326 for sample in samples:
327 print_sample(sample, tid_name_map)
329 def symbolize_events(perf_data, device_symbols, tid_name_map, printSamples = False,
330 removeKernelTop = False):
333 for event in perf_data.events:
334 update_mmap_states(event, mmap_states)
335 if event.HasField('sample_event'):
336 sample_ev = event.sample_event
339 if sample_ev.pid in mmap_states:
340 mmap_state = mmap_states[sample_ev.pid]
341 ip_sym = decode_addr(sample_ev.ip, mmap_state, device_symbols)
343 for cc_ip in sample_ev.callchain:
344 cc_sym = decode_addr(cc_ip, mmap_state, device_symbols)
347 while len(stack) > 1 and stack[0][0] == "[kernel]":
349 new_sample = (sample_ev.pid, sample_ev.tid, stack)
351 # Handle kernel symbols specially.
352 if sample_ev.pid == 0:
353 samples.append((0, sample_ev.tid, [("[kernel]", "[kernel]")]))
354 elif sample_ev.pid in tid_name_map:
355 samples.append((sample_ev.pid, sample_ev.tid, [(tid_name_map[sample_ev.pid], None)]))
357 samples.append((sample_ev.pid, sample_ev.tid, [("[unknown]", None)]))
358 if new_sample is not None:
359 samples.append(new_sample)
361 print_sample(new_sample, tid_name_map)
364 def count_key_reduce_function(x, y, key_fn):
371 def print_histogram(samples, reduce_key_fn, label_key_fn, size):
372 # Create a sorted list of top samples.
373 sorted_count_list = sorted(
374 reduce(lambda x, y: count_key_reduce_function(x, y, reduce_key_fn), samples, {}).
376 cmp=lambda x,y: cmp(x[1], y[1]),
378 sorted_count_topX = list(itertools.islice(sorted_count_list, size))
380 # Print top-size samples.
381 print 'Histogram top-%d:' % (size)
382 for i in xrange(0, len(sorted_count_topX)):
383 print ' %d: %s (%s)' % (i+1, label_key_fn(sorted_count_topX[i][0]),
384 sorted_count_topX[i][1])
387 if pid in tid_name_map:
388 return tid_name_map[pid]
393 parser = argparse.ArgumentParser(description='Process a perfprofd record.')
395 parser.add_argument('file', help='proto file to parse', metavar='file', nargs=1)
396 parser.add_argument('--syms', help='directory for symbols', nargs=1)
397 parser.add_argument('--json-out', help='output file for JSON', nargs=1)
398 parser.add_argument('--print-samples', help='print samples', action='store_const', const=True)
399 parser.add_argument('--skip-kernel-syms', help='skip kernel symbols at the top of stack',
400 action='store_const', const=True)
401 parser.add_argument('--print-pid-histogram', help='print a top-25 histogram of processes',
402 action='store_const', const=True)
403 parser.add_argument('--print-sym-histogram', help='print a top-100 histogram of symbols',
404 action='store_const', const=True)
405 parser.add_argument('--print-dso-histogram', help='print a top-25 histogram of maps',
406 action='store_const', const=True)
408 args = parser.parse_args()
410 if args.syms is not None:
411 symbol.SYMBOLS_DIR = args.syms[0]
412 print_symbols = args.print_samples is not None
413 skip_kernel_syms = args.skip_kernel_syms is not None
415 # TODO: accept argument for parsing.
416 file = open(args.file[0], 'rb')
420 profile = perfprofd_record_pb2.PerfprofdRecord()
421 profile.ParseFromString(data)
423 perf_data = profile.perf_data
425 print "Stats: ", perf_data.stats
427 tid_name_map = collect_tid_names(perf_data)
428 symbol_maps = create_symbol_maps(profile)
430 samples = symbolize_events(perf_data, symbol_maps, tid_name_map, printSamples=print_symbols,
431 removeKernelTop=skip_kernel_syms)
433 if args.print_pid_histogram is not None:
434 print_histogram(samples, lambda x: x[0], lambda x: get_name(x), 25)
435 if args.print_sym_histogram is not None:
436 print_histogram(samples, lambda x: x[2][0][0], lambda x: x, 100)
437 if args.print_dso_histogram is not None:
438 print_histogram(samples, lambda x: x[2][0][1], lambda x: x, 25)
440 if args.json_out is not None:
441 json_file = open(args.json_out[0], 'w')
442 json.dump(samples, json_file)