OSDN Git Service

perf trace: Add support for pagefault tracing
[android-x86/kernel.git] / tools / perf / builtin-trace.c
1 #include <traceevent/event-parse.h>
2 #include "builtin.h"
3 #include "util/color.h"
4 #include "util/debug.h"
5 #include "util/evlist.h"
6 #include "util/machine.h"
7 #include "util/session.h"
8 #include "util/thread.h"
9 #include "util/parse-options.h"
10 #include "util/strlist.h"
11 #include "util/intlist.h"
12 #include "util/thread_map.h"
13 #include "util/stat.h"
14 #include "trace-event.h"
15 #include "util/parse-events.h"
16
17 #include <libaudit.h>
18 #include <stdlib.h>
19 #include <sys/eventfd.h>
20 #include <sys/mman.h>
21 #include <linux/futex.h>
22
23 /* For older distros: */
24 #ifndef MAP_STACK
25 # define MAP_STACK              0x20000
26 #endif
27
28 #ifndef MADV_HWPOISON
29 # define MADV_HWPOISON          100
30 #endif
31
32 #ifndef MADV_MERGEABLE
33 # define MADV_MERGEABLE         12
34 #endif
35
36 #ifndef MADV_UNMERGEABLE
37 # define MADV_UNMERGEABLE       13
38 #endif
39
40 #ifndef EFD_SEMAPHORE
41 # define EFD_SEMAPHORE          1
42 #endif
43
44 struct tp_field {
45         int offset;
46         union {
47                 u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
48                 void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
49         };
50 };
51
52 #define TP_UINT_FIELD(bits) \
53 static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
54 { \
55         return *(u##bits *)(sample->raw_data + field->offset); \
56 }
57
58 TP_UINT_FIELD(8);
59 TP_UINT_FIELD(16);
60 TP_UINT_FIELD(32);
61 TP_UINT_FIELD(64);
62
63 #define TP_UINT_FIELD__SWAPPED(bits) \
64 static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
65 { \
66         u##bits value = *(u##bits *)(sample->raw_data + field->offset); \
67         return bswap_##bits(value);\
68 }
69
70 TP_UINT_FIELD__SWAPPED(16);
71 TP_UINT_FIELD__SWAPPED(32);
72 TP_UINT_FIELD__SWAPPED(64);
73
74 static int tp_field__init_uint(struct tp_field *field,
75                                struct format_field *format_field,
76                                bool needs_swap)
77 {
78         field->offset = format_field->offset;
79
80         switch (format_field->size) {
81         case 1:
82                 field->integer = tp_field__u8;
83                 break;
84         case 2:
85                 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
86                 break;
87         case 4:
88                 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
89                 break;
90         case 8:
91                 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
92                 break;
93         default:
94                 return -1;
95         }
96
97         return 0;
98 }
99
100 static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
101 {
102         return sample->raw_data + field->offset;
103 }
104
105 static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
106 {
107         field->offset = format_field->offset;
108         field->pointer = tp_field__ptr;
109         return 0;
110 }
111
112 struct syscall_tp {
113         struct tp_field id;
114         union {
115                 struct tp_field args, ret;
116         };
117 };
118
119 static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
120                                           struct tp_field *field,
121                                           const char *name)
122 {
123         struct format_field *format_field = perf_evsel__field(evsel, name);
124
125         if (format_field == NULL)
126                 return -1;
127
128         return tp_field__init_uint(field, format_field, evsel->needs_swap);
129 }
130
131 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \
132         ({ struct syscall_tp *sc = evsel->priv;\
133            perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
134
135 static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
136                                          struct tp_field *field,
137                                          const char *name)
138 {
139         struct format_field *format_field = perf_evsel__field(evsel, name);
140
141         if (format_field == NULL)
142                 return -1;
143
144         return tp_field__init_ptr(field, format_field);
145 }
146
147 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
148         ({ struct syscall_tp *sc = evsel->priv;\
149            perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
150
151 static void perf_evsel__delete_priv(struct perf_evsel *evsel)
152 {
153         zfree(&evsel->priv);
154         perf_evsel__delete(evsel);
155 }
156
157 static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
158 {
159         evsel->priv = malloc(sizeof(struct syscall_tp));
160         if (evsel->priv != NULL) {
161                 if (perf_evsel__init_sc_tp_uint_field(evsel, id))
162                         goto out_delete;
163
164                 evsel->handler = handler;
165                 return 0;
166         }
167
168         return -ENOMEM;
169
170 out_delete:
171         zfree(&evsel->priv);
172         return -ENOENT;
173 }
174
175 static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
176 {
177         struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
178
179         /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
180         if (evsel == NULL)
181                 evsel = perf_evsel__newtp("syscalls", direction);
182
183         if (evsel) {
184                 if (perf_evsel__init_syscall_tp(evsel, handler))
185                         goto out_delete;
186         }
187
188         return evsel;
189
190 out_delete:
191         perf_evsel__delete_priv(evsel);
192         return NULL;
193 }
194
195 #define perf_evsel__sc_tp_uint(evsel, name, sample) \
196         ({ struct syscall_tp *fields = evsel->priv; \
197            fields->name.integer(&fields->name, sample); })
198
199 #define perf_evsel__sc_tp_ptr(evsel, name, sample) \
200         ({ struct syscall_tp *fields = evsel->priv; \
201            fields->name.pointer(&fields->name, sample); })
202
203 static int perf_evlist__add_syscall_newtp(struct perf_evlist *evlist,
204                                           void *sys_enter_handler,
205                                           void *sys_exit_handler)
206 {
207         int ret = -1;
208         struct perf_evsel *sys_enter, *sys_exit;
209
210         sys_enter = perf_evsel__syscall_newtp("sys_enter", sys_enter_handler);
211         if (sys_enter == NULL)
212                 goto out;
213
214         if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
215                 goto out_delete_sys_enter;
216
217         sys_exit = perf_evsel__syscall_newtp("sys_exit", sys_exit_handler);
218         if (sys_exit == NULL)
219                 goto out_delete_sys_enter;
220
221         if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
222                 goto out_delete_sys_exit;
223
224         perf_evlist__add(evlist, sys_enter);
225         perf_evlist__add(evlist, sys_exit);
226
227         ret = 0;
228 out:
229         return ret;
230
231 out_delete_sys_exit:
232         perf_evsel__delete_priv(sys_exit);
233 out_delete_sys_enter:
234         perf_evsel__delete_priv(sys_enter);
235         goto out;
236 }
237
238
239 struct syscall_arg {
240         unsigned long val;
241         struct thread *thread;
242         struct trace  *trace;
243         void          *parm;
244         u8            idx;
245         u8            mask;
246 };
247
248 struct strarray {
249         int         offset;
250         int         nr_entries;
251         const char **entries;
252 };
253
254 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
255         .nr_entries = ARRAY_SIZE(array), \
256         .entries = array, \
257 }
258
259 #define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
260         .offset     = off, \
261         .nr_entries = ARRAY_SIZE(array), \
262         .entries = array, \
263 }
264
265 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
266                                                 const char *intfmt,
267                                                 struct syscall_arg *arg)
268 {
269         struct strarray *sa = arg->parm;
270         int idx = arg->val - sa->offset;
271
272         if (idx < 0 || idx >= sa->nr_entries)
273                 return scnprintf(bf, size, intfmt, arg->val);
274
275         return scnprintf(bf, size, "%s", sa->entries[idx]);
276 }
277
278 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
279                                               struct syscall_arg *arg)
280 {
281         return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
282 }
283
284 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
285
286 #if defined(__i386__) || defined(__x86_64__)
287 /*
288  * FIXME: Make this available to all arches as soon as the ioctl beautifier
289  *        gets rewritten to support all arches.
290  */
291 static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
292                                                  struct syscall_arg *arg)
293 {
294         return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
295 }
296
297 #define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
298 #endif /* defined(__i386__) || defined(__x86_64__) */
299
300 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
301                                         struct syscall_arg *arg);
302
303 #define SCA_FD syscall_arg__scnprintf_fd
304
305 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
306                                            struct syscall_arg *arg)
307 {
308         int fd = arg->val;
309
310         if (fd == AT_FDCWD)
311                 return scnprintf(bf, size, "CWD");
312
313         return syscall_arg__scnprintf_fd(bf, size, arg);
314 }
315
316 #define SCA_FDAT syscall_arg__scnprintf_fd_at
317
318 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
319                                               struct syscall_arg *arg);
320
321 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
322
323 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
324                                          struct syscall_arg *arg)
325 {
326         return scnprintf(bf, size, "%#lx", arg->val);
327 }
328
329 #define SCA_HEX syscall_arg__scnprintf_hex
330
331 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
332                                                struct syscall_arg *arg)
333 {
334         int printed = 0, prot = arg->val;
335
336         if (prot == PROT_NONE)
337                 return scnprintf(bf, size, "NONE");
338 #define P_MMAP_PROT(n) \
339         if (prot & PROT_##n) { \
340                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
341                 prot &= ~PROT_##n; \
342         }
343
344         P_MMAP_PROT(EXEC);
345         P_MMAP_PROT(READ);
346         P_MMAP_PROT(WRITE);
347 #ifdef PROT_SEM
348         P_MMAP_PROT(SEM);
349 #endif
350         P_MMAP_PROT(GROWSDOWN);
351         P_MMAP_PROT(GROWSUP);
352 #undef P_MMAP_PROT
353
354         if (prot)
355                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
356
357         return printed;
358 }
359
360 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
361
362 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
363                                                 struct syscall_arg *arg)
364 {
365         int printed = 0, flags = arg->val;
366
367 #define P_MMAP_FLAG(n) \
368         if (flags & MAP_##n) { \
369                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
370                 flags &= ~MAP_##n; \
371         }
372
373         P_MMAP_FLAG(SHARED);
374         P_MMAP_FLAG(PRIVATE);
375 #ifdef MAP_32BIT
376         P_MMAP_FLAG(32BIT);
377 #endif
378         P_MMAP_FLAG(ANONYMOUS);
379         P_MMAP_FLAG(DENYWRITE);
380         P_MMAP_FLAG(EXECUTABLE);
381         P_MMAP_FLAG(FILE);
382         P_MMAP_FLAG(FIXED);
383         P_MMAP_FLAG(GROWSDOWN);
384 #ifdef MAP_HUGETLB
385         P_MMAP_FLAG(HUGETLB);
386 #endif
387         P_MMAP_FLAG(LOCKED);
388         P_MMAP_FLAG(NONBLOCK);
389         P_MMAP_FLAG(NORESERVE);
390         P_MMAP_FLAG(POPULATE);
391         P_MMAP_FLAG(STACK);
392 #ifdef MAP_UNINITIALIZED
393         P_MMAP_FLAG(UNINITIALIZED);
394 #endif
395 #undef P_MMAP_FLAG
396
397         if (flags)
398                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
399
400         return printed;
401 }
402
403 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
404
405 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
406                                                       struct syscall_arg *arg)
407 {
408         int behavior = arg->val;
409
410         switch (behavior) {
411 #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
412         P_MADV_BHV(NORMAL);
413         P_MADV_BHV(RANDOM);
414         P_MADV_BHV(SEQUENTIAL);
415         P_MADV_BHV(WILLNEED);
416         P_MADV_BHV(DONTNEED);
417         P_MADV_BHV(REMOVE);
418         P_MADV_BHV(DONTFORK);
419         P_MADV_BHV(DOFORK);
420         P_MADV_BHV(HWPOISON);
421 #ifdef MADV_SOFT_OFFLINE
422         P_MADV_BHV(SOFT_OFFLINE);
423 #endif
424         P_MADV_BHV(MERGEABLE);
425         P_MADV_BHV(UNMERGEABLE);
426 #ifdef MADV_HUGEPAGE
427         P_MADV_BHV(HUGEPAGE);
428 #endif
429 #ifdef MADV_NOHUGEPAGE
430         P_MADV_BHV(NOHUGEPAGE);
431 #endif
432 #ifdef MADV_DONTDUMP
433         P_MADV_BHV(DONTDUMP);
434 #endif
435 #ifdef MADV_DODUMP
436         P_MADV_BHV(DODUMP);
437 #endif
438 #undef P_MADV_PHV
439         default: break;
440         }
441
442         return scnprintf(bf, size, "%#x", behavior);
443 }
444
445 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
446
447 static size_t syscall_arg__scnprintf_flock(char *bf, size_t size,
448                                            struct syscall_arg *arg)
449 {
450         int printed = 0, op = arg->val;
451
452         if (op == 0)
453                 return scnprintf(bf, size, "NONE");
454 #define P_CMD(cmd) \
455         if ((op & LOCK_##cmd) == LOCK_##cmd) { \
456                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \
457                 op &= ~LOCK_##cmd; \
458         }
459
460         P_CMD(SH);
461         P_CMD(EX);
462         P_CMD(NB);
463         P_CMD(UN);
464         P_CMD(MAND);
465         P_CMD(RW);
466         P_CMD(READ);
467         P_CMD(WRITE);
468 #undef P_OP
469
470         if (op)
471                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op);
472
473         return printed;
474 }
475
476 #define SCA_FLOCK syscall_arg__scnprintf_flock
477
478 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
479 {
480         enum syscall_futex_args {
481                 SCF_UADDR   = (1 << 0),
482                 SCF_OP      = (1 << 1),
483                 SCF_VAL     = (1 << 2),
484                 SCF_TIMEOUT = (1 << 3),
485                 SCF_UADDR2  = (1 << 4),
486                 SCF_VAL3    = (1 << 5),
487         };
488         int op = arg->val;
489         int cmd = op & FUTEX_CMD_MASK;
490         size_t printed = 0;
491
492         switch (cmd) {
493 #define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
494         P_FUTEX_OP(WAIT);           arg->mask |= SCF_VAL3|SCF_UADDR2;             break;
495         P_FUTEX_OP(WAKE);           arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
496         P_FUTEX_OP(FD);             arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
497         P_FUTEX_OP(REQUEUE);        arg->mask |= SCF_VAL3|SCF_TIMEOUT;            break;
498         P_FUTEX_OP(CMP_REQUEUE);    arg->mask |= SCF_TIMEOUT;                     break;
499         P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT;                     break;
500         P_FUTEX_OP(WAKE_OP);                                                      break;
501         P_FUTEX_OP(LOCK_PI);        arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
502         P_FUTEX_OP(UNLOCK_PI);      arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
503         P_FUTEX_OP(TRYLOCK_PI);     arg->mask |= SCF_VAL3|SCF_UADDR2;             break;
504         P_FUTEX_OP(WAIT_BITSET);    arg->mask |= SCF_UADDR2;                      break;
505         P_FUTEX_OP(WAKE_BITSET);    arg->mask |= SCF_UADDR2;                      break;
506         P_FUTEX_OP(WAIT_REQUEUE_PI);                                              break;
507         default: printed = scnprintf(bf, size, "%#x", cmd);                       break;
508         }
509
510         if (op & FUTEX_PRIVATE_FLAG)
511                 printed += scnprintf(bf + printed, size - printed, "|PRIV");
512
513         if (op & FUTEX_CLOCK_REALTIME)
514                 printed += scnprintf(bf + printed, size - printed, "|CLKRT");
515
516         return printed;
517 }
518
519 #define SCA_FUTEX_OP  syscall_arg__scnprintf_futex_op
520
521 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
522 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
523
524 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
525 static DEFINE_STRARRAY(itimers);
526
527 static const char *whences[] = { "SET", "CUR", "END",
528 #ifdef SEEK_DATA
529 "DATA",
530 #endif
531 #ifdef SEEK_HOLE
532 "HOLE",
533 #endif
534 };
535 static DEFINE_STRARRAY(whences);
536
537 static const char *fcntl_cmds[] = {
538         "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
539         "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
540         "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
541         "F_GETOWNER_UIDS",
542 };
543 static DEFINE_STRARRAY(fcntl_cmds);
544
545 static const char *rlimit_resources[] = {
546         "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
547         "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
548         "RTTIME",
549 };
550 static DEFINE_STRARRAY(rlimit_resources);
551
552 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
553 static DEFINE_STRARRAY(sighow);
554
555 static const char *clockid[] = {
556         "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
557         "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE",
558 };
559 static DEFINE_STRARRAY(clockid);
560
561 static const char *socket_families[] = {
562         "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
563         "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
564         "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
565         "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
566         "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
567         "ALG", "NFC", "VSOCK",
568 };
569 static DEFINE_STRARRAY(socket_families);
570
571 #ifndef SOCK_TYPE_MASK
572 #define SOCK_TYPE_MASK 0xf
573 #endif
574
575 static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
576                                                       struct syscall_arg *arg)
577 {
578         size_t printed;
579         int type = arg->val,
580             flags = type & ~SOCK_TYPE_MASK;
581
582         type &= SOCK_TYPE_MASK;
583         /*
584          * Can't use a strarray, MIPS may override for ABI reasons.
585          */
586         switch (type) {
587 #define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
588         P_SK_TYPE(STREAM);
589         P_SK_TYPE(DGRAM);
590         P_SK_TYPE(RAW);
591         P_SK_TYPE(RDM);
592         P_SK_TYPE(SEQPACKET);
593         P_SK_TYPE(DCCP);
594         P_SK_TYPE(PACKET);
595 #undef P_SK_TYPE
596         default:
597                 printed = scnprintf(bf, size, "%#x", type);
598         }
599
600 #define P_SK_FLAG(n) \
601         if (flags & SOCK_##n) { \
602                 printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
603                 flags &= ~SOCK_##n; \
604         }
605
606         P_SK_FLAG(CLOEXEC);
607         P_SK_FLAG(NONBLOCK);
608 #undef P_SK_FLAG
609
610         if (flags)
611                 printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
612
613         return printed;
614 }
615
616 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
617
618 #ifndef MSG_PROBE
619 #define MSG_PROBE            0x10
620 #endif
621 #ifndef MSG_WAITFORONE
622 #define MSG_WAITFORONE  0x10000
623 #endif
624 #ifndef MSG_SENDPAGE_NOTLAST
625 #define MSG_SENDPAGE_NOTLAST 0x20000
626 #endif
627 #ifndef MSG_FASTOPEN
628 #define MSG_FASTOPEN         0x20000000
629 #endif
630
631 static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
632                                                struct syscall_arg *arg)
633 {
634         int printed = 0, flags = arg->val;
635
636         if (flags == 0)
637                 return scnprintf(bf, size, "NONE");
638 #define P_MSG_FLAG(n) \
639         if (flags & MSG_##n) { \
640                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
641                 flags &= ~MSG_##n; \
642         }
643
644         P_MSG_FLAG(OOB);
645         P_MSG_FLAG(PEEK);
646         P_MSG_FLAG(DONTROUTE);
647         P_MSG_FLAG(TRYHARD);
648         P_MSG_FLAG(CTRUNC);
649         P_MSG_FLAG(PROBE);
650         P_MSG_FLAG(TRUNC);
651         P_MSG_FLAG(DONTWAIT);
652         P_MSG_FLAG(EOR);
653         P_MSG_FLAG(WAITALL);
654         P_MSG_FLAG(FIN);
655         P_MSG_FLAG(SYN);
656         P_MSG_FLAG(CONFIRM);
657         P_MSG_FLAG(RST);
658         P_MSG_FLAG(ERRQUEUE);
659         P_MSG_FLAG(NOSIGNAL);
660         P_MSG_FLAG(MORE);
661         P_MSG_FLAG(WAITFORONE);
662         P_MSG_FLAG(SENDPAGE_NOTLAST);
663         P_MSG_FLAG(FASTOPEN);
664         P_MSG_FLAG(CMSG_CLOEXEC);
665 #undef P_MSG_FLAG
666
667         if (flags)
668                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
669
670         return printed;
671 }
672
673 #define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
674
675 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
676                                                  struct syscall_arg *arg)
677 {
678         size_t printed = 0;
679         int mode = arg->val;
680
681         if (mode == F_OK) /* 0 */
682                 return scnprintf(bf, size, "F");
683 #define P_MODE(n) \
684         if (mode & n##_OK) { \
685                 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
686                 mode &= ~n##_OK; \
687         }
688
689         P_MODE(R);
690         P_MODE(W);
691         P_MODE(X);
692 #undef P_MODE
693
694         if (mode)
695                 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
696
697         return printed;
698 }
699
700 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
701
702 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
703                                                struct syscall_arg *arg)
704 {
705         int printed = 0, flags = arg->val;
706
707         if (!(flags & O_CREAT))
708                 arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
709
710         if (flags == 0)
711                 return scnprintf(bf, size, "RDONLY");
712 #define P_FLAG(n) \
713         if (flags & O_##n) { \
714                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
715                 flags &= ~O_##n; \
716         }
717
718         P_FLAG(APPEND);
719         P_FLAG(ASYNC);
720         P_FLAG(CLOEXEC);
721         P_FLAG(CREAT);
722         P_FLAG(DIRECT);
723         P_FLAG(DIRECTORY);
724         P_FLAG(EXCL);
725         P_FLAG(LARGEFILE);
726         P_FLAG(NOATIME);
727         P_FLAG(NOCTTY);
728 #ifdef O_NONBLOCK
729         P_FLAG(NONBLOCK);
730 #elif O_NDELAY
731         P_FLAG(NDELAY);
732 #endif
733 #ifdef O_PATH
734         P_FLAG(PATH);
735 #endif
736         P_FLAG(RDWR);
737 #ifdef O_DSYNC
738         if ((flags & O_SYNC) == O_SYNC)
739                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
740         else {
741                 P_FLAG(DSYNC);
742         }
743 #else
744         P_FLAG(SYNC);
745 #endif
746         P_FLAG(TRUNC);
747         P_FLAG(WRONLY);
748 #undef P_FLAG
749
750         if (flags)
751                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
752
753         return printed;
754 }
755
756 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
757
758 static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
759                                                    struct syscall_arg *arg)
760 {
761         int printed = 0, flags = arg->val;
762
763         if (flags == 0)
764                 return scnprintf(bf, size, "NONE");
765 #define P_FLAG(n) \
766         if (flags & EFD_##n) { \
767                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
768                 flags &= ~EFD_##n; \
769         }
770
771         P_FLAG(SEMAPHORE);
772         P_FLAG(CLOEXEC);
773         P_FLAG(NONBLOCK);
774 #undef P_FLAG
775
776         if (flags)
777                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
778
779         return printed;
780 }
781
782 #define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
783
784 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
785                                                 struct syscall_arg *arg)
786 {
787         int printed = 0, flags = arg->val;
788
789 #define P_FLAG(n) \
790         if (flags & O_##n) { \
791                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
792                 flags &= ~O_##n; \
793         }
794
795         P_FLAG(CLOEXEC);
796         P_FLAG(NONBLOCK);
797 #undef P_FLAG
798
799         if (flags)
800                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
801
802         return printed;
803 }
804
805 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
806
807 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
808 {
809         int sig = arg->val;
810
811         switch (sig) {
812 #define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
813         P_SIGNUM(HUP);
814         P_SIGNUM(INT);
815         P_SIGNUM(QUIT);
816         P_SIGNUM(ILL);
817         P_SIGNUM(TRAP);
818         P_SIGNUM(ABRT);
819         P_SIGNUM(BUS);
820         P_SIGNUM(FPE);
821         P_SIGNUM(KILL);
822         P_SIGNUM(USR1);
823         P_SIGNUM(SEGV);
824         P_SIGNUM(USR2);
825         P_SIGNUM(PIPE);
826         P_SIGNUM(ALRM);
827         P_SIGNUM(TERM);
828         P_SIGNUM(CHLD);
829         P_SIGNUM(CONT);
830         P_SIGNUM(STOP);
831         P_SIGNUM(TSTP);
832         P_SIGNUM(TTIN);
833         P_SIGNUM(TTOU);
834         P_SIGNUM(URG);
835         P_SIGNUM(XCPU);
836         P_SIGNUM(XFSZ);
837         P_SIGNUM(VTALRM);
838         P_SIGNUM(PROF);
839         P_SIGNUM(WINCH);
840         P_SIGNUM(IO);
841         P_SIGNUM(PWR);
842         P_SIGNUM(SYS);
843 #ifdef SIGEMT
844         P_SIGNUM(EMT);
845 #endif
846 #ifdef SIGSTKFLT
847         P_SIGNUM(STKFLT);
848 #endif
849 #ifdef SIGSWI
850         P_SIGNUM(SWI);
851 #endif
852         default: break;
853         }
854
855         return scnprintf(bf, size, "%#x", sig);
856 }
857
858 #define SCA_SIGNUM syscall_arg__scnprintf_signum
859
860 #if defined(__i386__) || defined(__x86_64__)
861 /*
862  * FIXME: Make this available to all arches.
863  */
864 #define TCGETS          0x5401
865
866 static const char *tioctls[] = {
867         "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
868         "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
869         "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
870         "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
871         "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
872         "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
873         "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
874         "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
875         "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
876         "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
877         "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
878         [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
879         "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
880         "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
881         "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
882 };
883
884 static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
885 #endif /* defined(__i386__) || defined(__x86_64__) */
886
887 #define STRARRAY(arg, name, array) \
888           .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
889           .arg_parm      = { [arg] = &strarray__##array, }
890
891 static struct syscall_fmt {
892         const char *name;
893         const char *alias;
894         size_t     (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
895         void       *arg_parm[6];
896         bool       errmsg;
897         bool       timeout;
898         bool       hexret;
899 } syscall_fmts[] = {
900         { .name     = "access",     .errmsg = true,
901           .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, },
902         { .name     = "arch_prctl", .errmsg = true, .alias = "prctl", },
903         { .name     = "brk",        .hexret = true,
904           .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
905         { .name     = "clock_gettime",  .errmsg = true, STRARRAY(0, clk_id, clockid), },
906         { .name     = "close",      .errmsg = true,
907           .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, }, 
908         { .name     = "connect",    .errmsg = true, },
909         { .name     = "dup",        .errmsg = true,
910           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
911         { .name     = "dup2",       .errmsg = true,
912           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
913         { .name     = "dup3",       .errmsg = true,
914           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
915         { .name     = "epoll_ctl",  .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
916         { .name     = "eventfd2",   .errmsg = true,
917           .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
918         { .name     = "faccessat",  .errmsg = true,
919           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
920         { .name     = "fadvise64",  .errmsg = true,
921           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
922         { .name     = "fallocate",  .errmsg = true,
923           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
924         { .name     = "fchdir",     .errmsg = true,
925           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
926         { .name     = "fchmod",     .errmsg = true,
927           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
928         { .name     = "fchmodat",   .errmsg = true,
929           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
930         { .name     = "fchown",     .errmsg = true,
931           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
932         { .name     = "fchownat",   .errmsg = true,
933           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
934         { .name     = "fcntl",      .errmsg = true,
935           .arg_scnprintf = { [0] = SCA_FD, /* fd */
936                              [1] = SCA_STRARRAY, /* cmd */ },
937           .arg_parm      = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
938         { .name     = "fdatasync",  .errmsg = true,
939           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
940         { .name     = "flock",      .errmsg = true,
941           .arg_scnprintf = { [0] = SCA_FD, /* fd */
942                              [1] = SCA_FLOCK, /* cmd */ }, },
943         { .name     = "fsetxattr",  .errmsg = true,
944           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
945         { .name     = "fstat",      .errmsg = true, .alias = "newfstat",
946           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
947         { .name     = "fstatat",    .errmsg = true, .alias = "newfstatat",
948           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
949         { .name     = "fstatfs",    .errmsg = true,
950           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
951         { .name     = "fsync",    .errmsg = true,
952           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
953         { .name     = "ftruncate", .errmsg = true,
954           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
955         { .name     = "futex",      .errmsg = true,
956           .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
957         { .name     = "futimesat", .errmsg = true,
958           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
959         { .name     = "getdents",   .errmsg = true,
960           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
961         { .name     = "getdents64", .errmsg = true,
962           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
963         { .name     = "getitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
964         { .name     = "getrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
965         { .name     = "ioctl",      .errmsg = true,
966           .arg_scnprintf = { [0] = SCA_FD, /* fd */ 
967 #if defined(__i386__) || defined(__x86_64__)
968 /*
969  * FIXME: Make this available to all arches.
970  */
971                              [1] = SCA_STRHEXARRAY, /* cmd */
972                              [2] = SCA_HEX, /* arg */ },
973           .arg_parm      = { [1] = &strarray__tioctls, /* cmd */ }, },
974 #else
975                              [2] = SCA_HEX, /* arg */ }, },
976 #endif
977         { .name     = "kill",       .errmsg = true,
978           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
979         { .name     = "linkat",     .errmsg = true,
980           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
981         { .name     = "lseek",      .errmsg = true,
982           .arg_scnprintf = { [0] = SCA_FD, /* fd */
983                              [2] = SCA_STRARRAY, /* whence */ },
984           .arg_parm      = { [2] = &strarray__whences, /* whence */ }, },
985         { .name     = "lstat",      .errmsg = true, .alias = "newlstat", },
986         { .name     = "madvise",    .errmsg = true,
987           .arg_scnprintf = { [0] = SCA_HEX,      /* start */
988                              [2] = SCA_MADV_BHV, /* behavior */ }, },
989         { .name     = "mkdirat",    .errmsg = true,
990           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
991         { .name     = "mknodat",    .errmsg = true,
992           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
993         { .name     = "mlock",      .errmsg = true,
994           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
995         { .name     = "mlockall",   .errmsg = true,
996           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
997         { .name     = "mmap",       .hexret = true,
998           .arg_scnprintf = { [0] = SCA_HEX,       /* addr */
999                              [2] = SCA_MMAP_PROT, /* prot */
1000                              [3] = SCA_MMAP_FLAGS, /* flags */
1001                              [4] = SCA_FD,        /* fd */ }, },
1002         { .name     = "mprotect",   .errmsg = true,
1003           .arg_scnprintf = { [0] = SCA_HEX, /* start */
1004                              [2] = SCA_MMAP_PROT, /* prot */ }, },
1005         { .name     = "mremap",     .hexret = true,
1006           .arg_scnprintf = { [0] = SCA_HEX, /* addr */
1007                              [4] = SCA_HEX, /* new_addr */ }, },
1008         { .name     = "munlock",    .errmsg = true,
1009           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1010         { .name     = "munmap",     .errmsg = true,
1011           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1012         { .name     = "name_to_handle_at", .errmsg = true,
1013           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
1014         { .name     = "newfstatat", .errmsg = true,
1015           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
1016         { .name     = "open",       .errmsg = true,
1017           .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, },
1018         { .name     = "open_by_handle_at", .errmsg = true,
1019           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1020                              [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1021         { .name     = "openat",     .errmsg = true,
1022           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1023                              [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1024         { .name     = "pipe2",      .errmsg = true,
1025           .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
1026         { .name     = "poll",       .errmsg = true, .timeout = true, },
1027         { .name     = "ppoll",      .errmsg = true, .timeout = true, },
1028         { .name     = "pread",      .errmsg = true, .alias = "pread64",
1029           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1030         { .name     = "preadv",     .errmsg = true, .alias = "pread",
1031           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1032         { .name     = "prlimit64",  .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
1033         { .name     = "pwrite",     .errmsg = true, .alias = "pwrite64",
1034           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1035         { .name     = "pwritev",    .errmsg = true,
1036           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1037         { .name     = "read",       .errmsg = true,
1038           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1039         { .name     = "readlinkat", .errmsg = true,
1040           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
1041         { .name     = "readv",      .errmsg = true,
1042           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1043         { .name     = "recvfrom",   .errmsg = true,
1044           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1045         { .name     = "recvmmsg",   .errmsg = true,
1046           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1047         { .name     = "recvmsg",    .errmsg = true,
1048           .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1049         { .name     = "renameat",   .errmsg = true,
1050           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
1051         { .name     = "rt_sigaction", .errmsg = true,
1052           .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
1053         { .name     = "rt_sigprocmask",  .errmsg = true, STRARRAY(0, how, sighow), },
1054         { .name     = "rt_sigqueueinfo", .errmsg = true,
1055           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1056         { .name     = "rt_tgsigqueueinfo", .errmsg = true,
1057           .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1058         { .name     = "select",     .errmsg = true, .timeout = true, },
1059         { .name     = "sendmmsg",    .errmsg = true,
1060           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1061         { .name     = "sendmsg",    .errmsg = true,
1062           .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1063         { .name     = "sendto",     .errmsg = true,
1064           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1065         { .name     = "setitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
1066         { .name     = "setrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1067         { .name     = "shutdown",   .errmsg = true,
1068           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1069         { .name     = "socket",     .errmsg = true,
1070           .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1071                              [1] = SCA_SK_TYPE, /* type */ },
1072           .arg_parm      = { [0] = &strarray__socket_families, /* family */ }, },
1073         { .name     = "socketpair", .errmsg = true,
1074           .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1075                              [1] = SCA_SK_TYPE, /* type */ },
1076           .arg_parm      = { [0] = &strarray__socket_families, /* family */ }, },
1077         { .name     = "stat",       .errmsg = true, .alias = "newstat", },
1078         { .name     = "symlinkat",  .errmsg = true,
1079           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
1080         { .name     = "tgkill",     .errmsg = true,
1081           .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1082         { .name     = "tkill",      .errmsg = true,
1083           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1084         { .name     = "uname",      .errmsg = true, .alias = "newuname", },
1085         { .name     = "unlinkat",   .errmsg = true,
1086           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1087         { .name     = "utimensat",  .errmsg = true,
1088           .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */ }, },
1089         { .name     = "write",      .errmsg = true,
1090           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1091         { .name     = "writev",     .errmsg = true,
1092           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1093 };
1094
1095 static int syscall_fmt__cmp(const void *name, const void *fmtp)
1096 {
1097         const struct syscall_fmt *fmt = fmtp;
1098         return strcmp(name, fmt->name);
1099 }
1100
1101 static struct syscall_fmt *syscall_fmt__find(const char *name)
1102 {
1103         const int nmemb = ARRAY_SIZE(syscall_fmts);
1104         return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
1105 }
1106
1107 struct syscall {
1108         struct event_format *tp_format;
1109         const char          *name;
1110         bool                filtered;
1111         bool                is_exit;
1112         struct syscall_fmt  *fmt;
1113         size_t              (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
1114         void                **arg_parm;
1115 };
1116
1117 static size_t fprintf_duration(unsigned long t, FILE *fp)
1118 {
1119         double duration = (double)t / NSEC_PER_MSEC;
1120         size_t printed = fprintf(fp, "(");
1121
1122         if (duration >= 1.0)
1123                 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
1124         else if (duration >= 0.01)
1125                 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
1126         else
1127                 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
1128         return printed + fprintf(fp, "): ");
1129 }
1130
1131 struct thread_trace {
1132         u64               entry_time;
1133         u64               exit_time;
1134         bool              entry_pending;
1135         unsigned long     nr_events;
1136         char              *entry_str;
1137         double            runtime_ms;
1138         struct {
1139                 int       max;
1140                 char      **table;
1141         } paths;
1142
1143         struct intlist *syscall_stats;
1144 };
1145
1146 static struct thread_trace *thread_trace__new(void)
1147 {
1148         struct thread_trace *ttrace =  zalloc(sizeof(struct thread_trace));
1149
1150         if (ttrace)
1151                 ttrace->paths.max = -1;
1152
1153         ttrace->syscall_stats = intlist__new(NULL);
1154
1155         return ttrace;
1156 }
1157
1158 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
1159 {
1160         struct thread_trace *ttrace;
1161
1162         if (thread == NULL)
1163                 goto fail;
1164
1165         if (thread->priv == NULL)
1166                 thread->priv = thread_trace__new();
1167                 
1168         if (thread->priv == NULL)
1169                 goto fail;
1170
1171         ttrace = thread->priv;
1172         ++ttrace->nr_events;
1173
1174         return ttrace;
1175 fail:
1176         color_fprintf(fp, PERF_COLOR_RED,
1177                       "WARNING: not enough memory, dropping samples!\n");
1178         return NULL;
1179 }
1180
1181 #define TRACE_PFMAJ             (1 << 0)
1182 #define TRACE_PFMIN             (1 << 1)
1183
1184 struct trace {
1185         struct perf_tool        tool;
1186         struct {
1187                 int             machine;
1188                 int             open_id;
1189         }                       audit;
1190         struct {
1191                 int             max;
1192                 struct syscall  *table;
1193         } syscalls;
1194         struct record_opts      opts;
1195         struct machine          *host;
1196         u64                     base_time;
1197         FILE                    *output;
1198         unsigned long           nr_events;
1199         struct strlist          *ev_qualifier;
1200         const char              *last_vfs_getname;
1201         struct intlist          *tid_list;
1202         struct intlist          *pid_list;
1203         double                  duration_filter;
1204         double                  runtime_ms;
1205         struct {
1206                 u64             vfs_getname,
1207                                 proc_getname;
1208         } stats;
1209         bool                    not_ev_qualifier;
1210         bool                    live;
1211         bool                    full_time;
1212         bool                    sched;
1213         bool                    multiple_threads;
1214         bool                    summary;
1215         bool                    summary_only;
1216         bool                    show_comm;
1217         bool                    show_tool_stats;
1218         int                     trace_pgfaults;
1219 };
1220
1221 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
1222 {
1223         struct thread_trace *ttrace = thread->priv;
1224
1225         if (fd > ttrace->paths.max) {
1226                 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
1227
1228                 if (npath == NULL)
1229                         return -1;
1230
1231                 if (ttrace->paths.max != -1) {
1232                         memset(npath + ttrace->paths.max + 1, 0,
1233                                (fd - ttrace->paths.max) * sizeof(char *));
1234                 } else {
1235                         memset(npath, 0, (fd + 1) * sizeof(char *));
1236                 }
1237
1238                 ttrace->paths.table = npath;
1239                 ttrace->paths.max   = fd;
1240         }
1241
1242         ttrace->paths.table[fd] = strdup(pathname);
1243
1244         return ttrace->paths.table[fd] != NULL ? 0 : -1;
1245 }
1246
1247 static int thread__read_fd_path(struct thread *thread, int fd)
1248 {
1249         char linkname[PATH_MAX], pathname[PATH_MAX];
1250         struct stat st;
1251         int ret;
1252
1253         if (thread->pid_ == thread->tid) {
1254                 scnprintf(linkname, sizeof(linkname),
1255                           "/proc/%d/fd/%d", thread->pid_, fd);
1256         } else {
1257                 scnprintf(linkname, sizeof(linkname),
1258                           "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
1259         }
1260
1261         if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
1262                 return -1;
1263
1264         ret = readlink(linkname, pathname, sizeof(pathname));
1265
1266         if (ret < 0 || ret > st.st_size)
1267                 return -1;
1268
1269         pathname[ret] = '\0';
1270         return trace__set_fd_pathname(thread, fd, pathname);
1271 }
1272
1273 static const char *thread__fd_path(struct thread *thread, int fd,
1274                                    struct trace *trace)
1275 {
1276         struct thread_trace *ttrace = thread->priv;
1277
1278         if (ttrace == NULL)
1279                 return NULL;
1280
1281         if (fd < 0)
1282                 return NULL;
1283
1284         if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) {
1285                 if (!trace->live)
1286                         return NULL;
1287                 ++trace->stats.proc_getname;
1288                 if (thread__read_fd_path(thread, fd))
1289                         return NULL;
1290         }
1291
1292         return ttrace->paths.table[fd];
1293 }
1294
1295 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
1296                                         struct syscall_arg *arg)
1297 {
1298         int fd = arg->val;
1299         size_t printed = scnprintf(bf, size, "%d", fd);
1300         const char *path = thread__fd_path(arg->thread, fd, arg->trace);
1301
1302         if (path)
1303                 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1304
1305         return printed;
1306 }
1307
1308 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1309                                               struct syscall_arg *arg)
1310 {
1311         int fd = arg->val;
1312         size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1313         struct thread_trace *ttrace = arg->thread->priv;
1314
1315         if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
1316                 zfree(&ttrace->paths.table[fd]);
1317
1318         return printed;
1319 }
1320
1321 static bool trace__filter_duration(struct trace *trace, double t)
1322 {
1323         return t < (trace->duration_filter * NSEC_PER_MSEC);
1324 }
1325
1326 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1327 {
1328         double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1329
1330         return fprintf(fp, "%10.3f ", ts);
1331 }
1332
1333 static bool done = false;
1334 static bool interrupted = false;
1335
1336 static void sig_handler(int sig)
1337 {
1338         done = true;
1339         interrupted = sig == SIGINT;
1340 }
1341
1342 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1343                                         u64 duration, u64 tstamp, FILE *fp)
1344 {
1345         size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1346         printed += fprintf_duration(duration, fp);
1347
1348         if (trace->multiple_threads) {
1349                 if (trace->show_comm)
1350                         printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
1351                 printed += fprintf(fp, "%d ", thread->tid);
1352         }
1353
1354         return printed;
1355 }
1356
1357 static int trace__process_event(struct trace *trace, struct machine *machine,
1358                                 union perf_event *event, struct perf_sample *sample)
1359 {
1360         int ret = 0;
1361
1362         switch (event->header.type) {
1363         case PERF_RECORD_LOST:
1364                 color_fprintf(trace->output, PERF_COLOR_RED,
1365                               "LOST %" PRIu64 " events!\n", event->lost.lost);
1366                 ret = machine__process_lost_event(machine, event, sample);
1367         default:
1368                 ret = machine__process_event(machine, event, sample);
1369                 break;
1370         }
1371
1372         return ret;
1373 }
1374
1375 static int trace__tool_process(struct perf_tool *tool,
1376                                union perf_event *event,
1377                                struct perf_sample *sample,
1378                                struct machine *machine)
1379 {
1380         struct trace *trace = container_of(tool, struct trace, tool);
1381         return trace__process_event(trace, machine, event, sample);
1382 }
1383
1384 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1385 {
1386         int err = symbol__init();
1387
1388         if (err)
1389                 return err;
1390
1391         trace->host = machine__new_host();
1392         if (trace->host == NULL)
1393                 return -ENOMEM;
1394
1395         err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
1396                                             evlist->threads, trace__tool_process, false);
1397         if (err)
1398                 symbol__exit();
1399
1400         return err;
1401 }
1402
1403 static int syscall__set_arg_fmts(struct syscall *sc)
1404 {
1405         struct format_field *field;
1406         int idx = 0;
1407
1408         sc->arg_scnprintf = calloc(sc->tp_format->format.nr_fields - 1, sizeof(void *));
1409         if (sc->arg_scnprintf == NULL)
1410                 return -1;
1411
1412         if (sc->fmt)
1413                 sc->arg_parm = sc->fmt->arg_parm;
1414
1415         for (field = sc->tp_format->format.fields->next; field; field = field->next) {
1416                 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1417                         sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
1418                 else if (field->flags & FIELD_IS_POINTER)
1419                         sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
1420                 ++idx;
1421         }
1422
1423         return 0;
1424 }
1425
1426 static int trace__read_syscall_info(struct trace *trace, int id)
1427 {
1428         char tp_name[128];
1429         struct syscall *sc;
1430         const char *name = audit_syscall_to_name(id, trace->audit.machine);
1431
1432         if (name == NULL)
1433                 return -1;
1434
1435         if (id > trace->syscalls.max) {
1436                 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1437
1438                 if (nsyscalls == NULL)
1439                         return -1;
1440
1441                 if (trace->syscalls.max != -1) {
1442                         memset(nsyscalls + trace->syscalls.max + 1, 0,
1443                                (id - trace->syscalls.max) * sizeof(*sc));
1444                 } else {
1445                         memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1446                 }
1447
1448                 trace->syscalls.table = nsyscalls;
1449                 trace->syscalls.max   = id;
1450         }
1451
1452         sc = trace->syscalls.table + id;
1453         sc->name = name;
1454
1455         if (trace->ev_qualifier) {
1456                 bool in = strlist__find(trace->ev_qualifier, name) != NULL;
1457
1458                 if (!(in ^ trace->not_ev_qualifier)) {
1459                         sc->filtered = true;
1460                         /*
1461                          * No need to do read tracepoint information since this will be
1462                          * filtered out.
1463                          */
1464                         return 0;
1465                 }
1466         }
1467
1468         sc->fmt  = syscall_fmt__find(sc->name);
1469
1470         snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1471         sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1472
1473         if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) {
1474                 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1475                 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1476         }
1477
1478         if (sc->tp_format == NULL)
1479                 return -1;
1480
1481         sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
1482
1483         return syscall__set_arg_fmts(sc);
1484 }
1485
1486 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1487                                       unsigned long *args, struct trace *trace,
1488                                       struct thread *thread)
1489 {
1490         size_t printed = 0;
1491
1492         if (sc->tp_format != NULL) {
1493                 struct format_field *field;
1494                 u8 bit = 1;
1495                 struct syscall_arg arg = {
1496                         .idx    = 0,
1497                         .mask   = 0,
1498                         .trace  = trace,
1499                         .thread = thread,
1500                 };
1501
1502                 for (field = sc->tp_format->format.fields->next; field;
1503                      field = field->next, ++arg.idx, bit <<= 1) {
1504                         if (arg.mask & bit)
1505                                 continue;
1506                         /*
1507                          * Suppress this argument if its value is zero and
1508                          * and we don't have a string associated in an
1509                          * strarray for it.
1510                          */
1511                         if (args[arg.idx] == 0 &&
1512                             !(sc->arg_scnprintf &&
1513                               sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1514                               sc->arg_parm[arg.idx]))
1515                                 continue;
1516
1517                         printed += scnprintf(bf + printed, size - printed,
1518                                              "%s%s: ", printed ? ", " : "", field->name);
1519                         if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
1520                                 arg.val = args[arg.idx];
1521                                 if (sc->arg_parm)
1522                                         arg.parm = sc->arg_parm[arg.idx];
1523                                 printed += sc->arg_scnprintf[arg.idx](bf + printed,
1524                                                                       size - printed, &arg);
1525                         } else {
1526                                 printed += scnprintf(bf + printed, size - printed,
1527                                                      "%ld", args[arg.idx]);
1528                         }
1529                 }
1530         } else {
1531                 int i = 0;
1532
1533                 while (i < 6) {
1534                         printed += scnprintf(bf + printed, size - printed,
1535                                              "%sarg%d: %ld",
1536                                              printed ? ", " : "", i, args[i]);
1537                         ++i;
1538                 }
1539         }
1540
1541         return printed;
1542 }
1543
1544 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1545                                   union perf_event *event,
1546                                   struct perf_sample *sample);
1547
1548 static struct syscall *trace__syscall_info(struct trace *trace,
1549                                            struct perf_evsel *evsel, int id)
1550 {
1551
1552         if (id < 0) {
1553
1554                 /*
1555                  * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1556                  * before that, leaving at a higher verbosity level till that is
1557                  * explained. Reproduced with plain ftrace with:
1558                  *
1559                  * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1560                  * grep "NR -1 " /t/trace_pipe
1561                  *
1562                  * After generating some load on the machine.
1563                  */
1564                 if (verbose > 1) {
1565                         static u64 n;
1566                         fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1567                                 id, perf_evsel__name(evsel), ++n);
1568                 }
1569                 return NULL;
1570         }
1571
1572         if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1573             trace__read_syscall_info(trace, id))
1574                 goto out_cant_read;
1575
1576         if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1577                 goto out_cant_read;
1578
1579         return &trace->syscalls.table[id];
1580
1581 out_cant_read:
1582         if (verbose) {
1583                 fprintf(trace->output, "Problems reading syscall %d", id);
1584                 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1585                         fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1586                 fputs(" information\n", trace->output);
1587         }
1588         return NULL;
1589 }
1590
1591 static void thread__update_stats(struct thread_trace *ttrace,
1592                                  int id, struct perf_sample *sample)
1593 {
1594         struct int_node *inode;
1595         struct stats *stats;
1596         u64 duration = 0;
1597
1598         inode = intlist__findnew(ttrace->syscall_stats, id);
1599         if (inode == NULL)
1600                 return;
1601
1602         stats = inode->priv;
1603         if (stats == NULL) {
1604                 stats = malloc(sizeof(struct stats));
1605                 if (stats == NULL)
1606                         return;
1607                 init_stats(stats);
1608                 inode->priv = stats;
1609         }
1610
1611         if (ttrace->entry_time && sample->time > ttrace->entry_time)
1612                 duration = sample->time - ttrace->entry_time;
1613
1614         update_stats(stats, duration);
1615 }
1616
1617 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1618                             union perf_event *event __maybe_unused,
1619                             struct perf_sample *sample)
1620 {
1621         char *msg;
1622         void *args;
1623         size_t printed = 0;
1624         struct thread *thread;
1625         int id = perf_evsel__sc_tp_uint(evsel, id, sample);
1626         struct syscall *sc = trace__syscall_info(trace, evsel, id);
1627         struct thread_trace *ttrace;
1628
1629         if (sc == NULL)
1630                 return -1;
1631
1632         if (sc->filtered)
1633                 return 0;
1634
1635         thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1636         ttrace = thread__trace(thread, trace->output);
1637         if (ttrace == NULL)
1638                 return -1;
1639
1640         args = perf_evsel__sc_tp_ptr(evsel, args, sample);
1641
1642         if (ttrace->entry_str == NULL) {
1643                 ttrace->entry_str = malloc(1024);
1644                 if (!ttrace->entry_str)
1645                         return -1;
1646         }
1647
1648         ttrace->entry_time = sample->time;
1649         msg = ttrace->entry_str;
1650         printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name);
1651
1652         printed += syscall__scnprintf_args(sc, msg + printed, 1024 - printed,
1653                                            args, trace, thread);
1654
1655         if (sc->is_exit) {
1656                 if (!trace->duration_filter && !trace->summary_only) {
1657                         trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
1658                         fprintf(trace->output, "%-70s\n", ttrace->entry_str);
1659                 }
1660         } else
1661                 ttrace->entry_pending = true;
1662
1663         return 0;
1664 }
1665
1666 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
1667                            union perf_event *event __maybe_unused,
1668                            struct perf_sample *sample)
1669 {
1670         int ret;
1671         u64 duration = 0;
1672         struct thread *thread;
1673         int id = perf_evsel__sc_tp_uint(evsel, id, sample);
1674         struct syscall *sc = trace__syscall_info(trace, evsel, id);
1675         struct thread_trace *ttrace;
1676
1677         if (sc == NULL)
1678                 return -1;
1679
1680         if (sc->filtered)
1681                 return 0;
1682
1683         thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1684         ttrace = thread__trace(thread, trace->output);
1685         if (ttrace == NULL)
1686                 return -1;
1687
1688         if (trace->summary)
1689                 thread__update_stats(ttrace, id, sample);
1690
1691         ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
1692
1693         if (id == trace->audit.open_id && ret >= 0 && trace->last_vfs_getname) {
1694                 trace__set_fd_pathname(thread, ret, trace->last_vfs_getname);
1695                 trace->last_vfs_getname = NULL;
1696                 ++trace->stats.vfs_getname;
1697         }
1698
1699         ttrace->exit_time = sample->time;
1700
1701         if (ttrace->entry_time) {
1702                 duration = sample->time - ttrace->entry_time;
1703                 if (trace__filter_duration(trace, duration))
1704                         goto out;
1705         } else if (trace->duration_filter)
1706                 goto out;
1707
1708         if (trace->summary_only)
1709                 goto out;
1710
1711         trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
1712
1713         if (ttrace->entry_pending) {
1714                 fprintf(trace->output, "%-70s", ttrace->entry_str);
1715         } else {
1716                 fprintf(trace->output, " ... [");
1717                 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1718                 fprintf(trace->output, "]: %s()", sc->name);
1719         }
1720
1721         if (sc->fmt == NULL) {
1722 signed_print:
1723                 fprintf(trace->output, ") = %d", ret);
1724         } else if (ret < 0 && sc->fmt->errmsg) {
1725                 char bf[256];
1726                 const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
1727                            *e = audit_errno_to_name(-ret);
1728
1729                 fprintf(trace->output, ") = -1 %s %s", e, emsg);
1730         } else if (ret == 0 && sc->fmt->timeout)
1731                 fprintf(trace->output, ") = 0 Timeout");
1732         else if (sc->fmt->hexret)
1733                 fprintf(trace->output, ") = %#x", ret);
1734         else
1735                 goto signed_print;
1736
1737         fputc('\n', trace->output);
1738 out:
1739         ttrace->entry_pending = false;
1740
1741         return 0;
1742 }
1743
1744 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
1745                               union perf_event *event __maybe_unused,
1746                               struct perf_sample *sample)
1747 {
1748         trace->last_vfs_getname = perf_evsel__rawptr(evsel, sample, "pathname");
1749         return 0;
1750 }
1751
1752 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
1753                                      union perf_event *event __maybe_unused,
1754                                      struct perf_sample *sample)
1755 {
1756         u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1757         double runtime_ms = (double)runtime / NSEC_PER_MSEC;
1758         struct thread *thread = machine__findnew_thread(trace->host,
1759                                                         sample->pid,
1760                                                         sample->tid);
1761         struct thread_trace *ttrace = thread__trace(thread, trace->output);
1762
1763         if (ttrace == NULL)
1764                 goto out_dump;
1765
1766         ttrace->runtime_ms += runtime_ms;
1767         trace->runtime_ms += runtime_ms;
1768         return 0;
1769
1770 out_dump:
1771         fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1772                evsel->name,
1773                perf_evsel__strval(evsel, sample, "comm"),
1774                (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1775                runtime,
1776                perf_evsel__intval(evsel, sample, "vruntime"));
1777         return 0;
1778 }
1779
1780 static void print_location(FILE *f, struct perf_sample *sample,
1781                            struct addr_location *al,
1782                            bool print_dso, bool print_sym)
1783 {
1784
1785         if ((verbose || print_dso) && al->map)
1786                 fprintf(f, "%s@", al->map->dso->long_name);
1787
1788         if ((verbose || print_sym) && al->sym)
1789                 fprintf(f, "%s+0x%lx", al->sym->name,
1790                         al->addr - al->sym->start);
1791         else if (al->map)
1792                 fprintf(f, "0x%lx", al->addr);
1793         else
1794                 fprintf(f, "0x%lx", sample->addr);
1795 }
1796
1797 static int trace__pgfault(struct trace *trace,
1798                           struct perf_evsel *evsel,
1799                           union perf_event *event,
1800                           struct perf_sample *sample)
1801 {
1802         struct thread *thread;
1803         u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
1804         struct addr_location al;
1805         char map_type = 'd';
1806
1807         thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1808
1809         thread__find_addr_location(thread, trace->host, cpumode, MAP__FUNCTION,
1810                               sample->ip, &al);
1811
1812         trace__fprintf_entry_head(trace, thread, 0, sample->time, trace->output);
1813
1814         fprintf(trace->output, "%sfault [",
1815                 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
1816                 "maj" : "min");
1817
1818         print_location(trace->output, sample, &al, false, true);
1819
1820         fprintf(trace->output, "] => ");
1821
1822         thread__find_addr_location(thread, trace->host, cpumode, MAP__VARIABLE,
1823                                    sample->addr, &al);
1824
1825         if (!al.map) {
1826                 thread__find_addr_location(thread, trace->host, cpumode,
1827                                            MAP__FUNCTION, sample->addr, &al);
1828
1829                 if (al.map)
1830                         map_type = 'x';
1831                 else
1832                         map_type = '?';
1833         }
1834
1835         print_location(trace->output, sample, &al, true, false);
1836
1837         fprintf(trace->output, " (%c%c)\n", map_type, al.level);
1838
1839         return 0;
1840 }
1841
1842 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
1843 {
1844         if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
1845             (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
1846                 return false;
1847
1848         if (trace->pid_list || trace->tid_list)
1849                 return true;
1850
1851         return false;
1852 }
1853
1854 static int trace__process_sample(struct perf_tool *tool,
1855                                  union perf_event *event,
1856                                  struct perf_sample *sample,
1857                                  struct perf_evsel *evsel,
1858                                  struct machine *machine __maybe_unused)
1859 {
1860         struct trace *trace = container_of(tool, struct trace, tool);
1861         int err = 0;
1862
1863         tracepoint_handler handler = evsel->handler;
1864
1865         if (skip_sample(trace, sample))
1866                 return 0;
1867
1868         if (!trace->full_time && trace->base_time == 0)
1869                 trace->base_time = sample->time;
1870
1871         if (handler) {
1872                 ++trace->nr_events;
1873                 handler(trace, evsel, event, sample);
1874         }
1875
1876         return err;
1877 }
1878
1879 static int parse_target_str(struct trace *trace)
1880 {
1881         if (trace->opts.target.pid) {
1882                 trace->pid_list = intlist__new(trace->opts.target.pid);
1883                 if (trace->pid_list == NULL) {
1884                         pr_err("Error parsing process id string\n");
1885                         return -EINVAL;
1886                 }
1887         }
1888
1889         if (trace->opts.target.tid) {
1890                 trace->tid_list = intlist__new(trace->opts.target.tid);
1891                 if (trace->tid_list == NULL) {
1892                         pr_err("Error parsing thread id string\n");
1893                         return -EINVAL;
1894                 }
1895         }
1896
1897         return 0;
1898 }
1899
1900 static int trace__record(int argc, const char **argv)
1901 {
1902         unsigned int rec_argc, i, j;
1903         const char **rec_argv;
1904         const char * const record_args[] = {
1905                 "record",
1906                 "-R",
1907                 "-m", "1024",
1908                 "-c", "1",
1909                 "-e",
1910         };
1911
1912         /* +1 is for the event string below */
1913         rec_argc = ARRAY_SIZE(record_args) + 1 + argc;
1914         rec_argv = calloc(rec_argc + 1, sizeof(char *));
1915
1916         if (rec_argv == NULL)
1917                 return -ENOMEM;
1918
1919         for (i = 0; i < ARRAY_SIZE(record_args); i++)
1920                 rec_argv[i] = record_args[i];
1921
1922         /* event string may be different for older kernels - e.g., RHEL6 */
1923         if (is_valid_tracepoint("raw_syscalls:sys_enter"))
1924                 rec_argv[i] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
1925         else if (is_valid_tracepoint("syscalls:sys_enter"))
1926                 rec_argv[i] = "syscalls:sys_enter,syscalls:sys_exit";
1927         else {
1928                 pr_err("Neither raw_syscalls nor syscalls events exist.\n");
1929                 return -1;
1930         }
1931         i++;
1932
1933         for (j = 0; j < (unsigned int)argc; j++, i++)
1934                 rec_argv[i] = argv[j];
1935
1936         return cmd_record(i, rec_argv, NULL);
1937 }
1938
1939 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
1940
1941 static void perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
1942 {
1943         struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
1944         if (evsel == NULL)
1945                 return;
1946
1947         if (perf_evsel__field(evsel, "pathname") == NULL) {
1948                 perf_evsel__delete(evsel);
1949                 return;
1950         }
1951
1952         evsel->handler = trace__vfs_getname;
1953         perf_evlist__add(evlist, evsel);
1954 }
1955
1956 static int perf_evlist__add_pgfault(struct perf_evlist *evlist,
1957                                     u64 config)
1958 {
1959         struct perf_evsel *evsel;
1960         struct perf_event_attr attr = {
1961                 .type = PERF_TYPE_SOFTWARE,
1962                 .mmap_data = 1,
1963                 .sample_period = 1,
1964         };
1965
1966         attr.config = config;
1967
1968         event_attr_init(&attr);
1969
1970         evsel = perf_evsel__new(&attr);
1971         if (!evsel)
1972                 return -ENOMEM;
1973
1974         evsel->handler = trace__pgfault;
1975         perf_evlist__add(evlist, evsel);
1976
1977         return 0;
1978 }
1979
1980 static int trace__run(struct trace *trace, int argc, const char **argv)
1981 {
1982         struct perf_evlist *evlist = perf_evlist__new();
1983         struct perf_evsel *evsel;
1984         int err = -1, i;
1985         unsigned long before;
1986         const bool forks = argc > 0;
1987
1988         trace->live = true;
1989
1990         if (evlist == NULL) {
1991                 fprintf(trace->output, "Not enough memory to run!\n");
1992                 goto out;
1993         }
1994
1995         if (perf_evlist__add_syscall_newtp(evlist, trace__sys_enter, trace__sys_exit))
1996                 goto out_error_tp;
1997
1998         perf_evlist__add_vfs_getname(evlist);
1999
2000         if ((trace->trace_pgfaults & TRACE_PFMAJ) &&
2001             perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MAJ))
2002                 goto out_error_tp;
2003
2004         if ((trace->trace_pgfaults & TRACE_PFMIN) &&
2005             perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MIN))
2006                 goto out_error_tp;
2007
2008         if (trace->sched &&
2009                 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
2010                                 trace__sched_stat_runtime))
2011                 goto out_error_tp;
2012
2013         err = perf_evlist__create_maps(evlist, &trace->opts.target);
2014         if (err < 0) {
2015                 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
2016                 goto out_delete_evlist;
2017         }
2018
2019         err = trace__symbols_init(trace, evlist);
2020         if (err < 0) {
2021                 fprintf(trace->output, "Problems initializing symbol libraries!\n");
2022                 goto out_delete_evlist;
2023         }
2024
2025         perf_evlist__config(evlist, &trace->opts);
2026
2027         signal(SIGCHLD, sig_handler);
2028         signal(SIGINT, sig_handler);
2029
2030         if (forks) {
2031                 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
2032                                                     argv, false, NULL);
2033                 if (err < 0) {
2034                         fprintf(trace->output, "Couldn't run the workload!\n");
2035                         goto out_delete_evlist;
2036                 }
2037         }
2038
2039         err = perf_evlist__open(evlist);
2040         if (err < 0)
2041                 goto out_error_open;
2042
2043         err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
2044         if (err < 0) {
2045                 fprintf(trace->output, "Couldn't mmap the events: %s\n", strerror(errno));
2046                 goto out_delete_evlist;
2047         }
2048
2049         perf_evlist__enable(evlist);
2050
2051         if (forks)
2052                 perf_evlist__start_workload(evlist);
2053
2054         trace->multiple_threads = evlist->threads->map[0] == -1 || evlist->threads->nr > 1;
2055 again:
2056         before = trace->nr_events;
2057
2058         for (i = 0; i < evlist->nr_mmaps; i++) {
2059                 union perf_event *event;
2060
2061                 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
2062                         const u32 type = event->header.type;
2063                         tracepoint_handler handler;
2064                         struct perf_sample sample;
2065
2066                         ++trace->nr_events;
2067
2068                         err = perf_evlist__parse_sample(evlist, event, &sample);
2069                         if (err) {
2070                                 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
2071                                 goto next_event;
2072                         }
2073
2074                         if (!trace->full_time && trace->base_time == 0)
2075                                 trace->base_time = sample.time;
2076
2077                         if (type != PERF_RECORD_SAMPLE) {
2078                                 trace__process_event(trace, trace->host, event, &sample);
2079                                 continue;
2080                         }
2081
2082                         evsel = perf_evlist__id2evsel(evlist, sample.id);
2083                         if (evsel == NULL) {
2084                                 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample.id);
2085                                 goto next_event;
2086                         }
2087
2088                         if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
2089                             sample.raw_data == NULL) {
2090                                 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
2091                                        perf_evsel__name(evsel), sample.tid,
2092                                        sample.cpu, sample.raw_size);
2093                                 goto next_event;
2094                         }
2095
2096                         handler = evsel->handler;
2097                         handler(trace, evsel, event, &sample);
2098 next_event:
2099                         perf_evlist__mmap_consume(evlist, i);
2100
2101                         if (interrupted)
2102                                 goto out_disable;
2103                 }
2104         }
2105
2106         if (trace->nr_events == before) {
2107                 int timeout = done ? 100 : -1;
2108
2109                 if (poll(evlist->pollfd, evlist->nr_fds, timeout) > 0)
2110                         goto again;
2111         } else {
2112                 goto again;
2113         }
2114
2115 out_disable:
2116         perf_evlist__disable(evlist);
2117
2118         if (!err) {
2119                 if (trace->summary)
2120                         trace__fprintf_thread_summary(trace, trace->output);
2121
2122                 if (trace->show_tool_stats) {
2123                         fprintf(trace->output, "Stats:\n "
2124                                                " vfs_getname : %" PRIu64 "\n"
2125                                                " proc_getname: %" PRIu64 "\n",
2126                                 trace->stats.vfs_getname,
2127                                 trace->stats.proc_getname);
2128                 }
2129         }
2130
2131 out_delete_evlist:
2132         perf_evlist__delete(evlist);
2133 out:
2134         trace->live = false;
2135         return err;
2136 {
2137         char errbuf[BUFSIZ];
2138
2139 out_error_tp:
2140         perf_evlist__strerror_tp(evlist, errno, errbuf, sizeof(errbuf));
2141         goto out_error;
2142
2143 out_error_open:
2144         perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2145
2146 out_error:
2147         fprintf(trace->output, "%s\n", errbuf);
2148         goto out_delete_evlist;
2149 }
2150 }
2151
2152 static int trace__replay(struct trace *trace)
2153 {
2154         const struct perf_evsel_str_handler handlers[] = {
2155                 { "probe:vfs_getname",       trace__vfs_getname, },
2156         };
2157         struct perf_data_file file = {
2158                 .path  = input_name,
2159                 .mode  = PERF_DATA_MODE_READ,
2160         };
2161         struct perf_session *session;
2162         struct perf_evsel *evsel;
2163         int err = -1;
2164
2165         trace->tool.sample        = trace__process_sample;
2166         trace->tool.mmap          = perf_event__process_mmap;
2167         trace->tool.mmap2         = perf_event__process_mmap2;
2168         trace->tool.comm          = perf_event__process_comm;
2169         trace->tool.exit          = perf_event__process_exit;
2170         trace->tool.fork          = perf_event__process_fork;
2171         trace->tool.attr          = perf_event__process_attr;
2172         trace->tool.tracing_data = perf_event__process_tracing_data;
2173         trace->tool.build_id      = perf_event__process_build_id;
2174
2175         trace->tool.ordered_samples = true;
2176         trace->tool.ordering_requires_timestamps = true;
2177
2178         /* add tid to output */
2179         trace->multiple_threads = true;
2180
2181         if (symbol__init() < 0)
2182                 return -1;
2183
2184         session = perf_session__new(&file, false, &trace->tool);
2185         if (session == NULL)
2186                 return -ENOMEM;
2187
2188         trace->host = &session->machines.host;
2189
2190         err = perf_session__set_tracepoints_handlers(session, handlers);
2191         if (err)
2192                 goto out;
2193
2194         evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2195                                                      "raw_syscalls:sys_enter");
2196         /* older kernels have syscalls tp versus raw_syscalls */
2197         if (evsel == NULL)
2198                 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2199                                                              "syscalls:sys_enter");
2200         if (evsel == NULL) {
2201                 pr_err("Data file does not have raw_syscalls:sys_enter event\n");
2202                 goto out;
2203         }
2204
2205         if (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
2206             perf_evsel__init_sc_tp_ptr_field(evsel, args)) {
2207                 pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2208                 goto out;
2209         }
2210
2211         evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2212                                                      "raw_syscalls:sys_exit");
2213         if (evsel == NULL)
2214                 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2215                                                              "syscalls:sys_exit");
2216         if (evsel == NULL) {
2217                 pr_err("Data file does not have raw_syscalls:sys_exit event\n");
2218                 goto out;
2219         }
2220
2221         if (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
2222             perf_evsel__init_sc_tp_uint_field(evsel, ret)) {
2223                 pr_err("Error during initialize raw_syscalls:sys_exit event\n");
2224                 goto out;
2225         }
2226
2227         err = parse_target_str(trace);
2228         if (err != 0)
2229                 goto out;
2230
2231         setup_pager();
2232
2233         err = perf_session__process_events(session, &trace->tool);
2234         if (err)
2235                 pr_err("Failed to process events, error %d", err);
2236
2237         else if (trace->summary)
2238                 trace__fprintf_thread_summary(trace, trace->output);
2239
2240 out:
2241         perf_session__delete(session);
2242
2243         return err;
2244 }
2245
2246 static size_t trace__fprintf_threads_header(FILE *fp)
2247 {
2248         size_t printed;
2249
2250         printed  = fprintf(fp, "\n Summary of events:\n\n");
2251
2252         return printed;
2253 }
2254
2255 static size_t thread__dump_stats(struct thread_trace *ttrace,
2256                                  struct trace *trace, FILE *fp)
2257 {
2258         struct stats *stats;
2259         size_t printed = 0;
2260         struct syscall *sc;
2261         struct int_node *inode = intlist__first(ttrace->syscall_stats);
2262
2263         if (inode == NULL)
2264                 return 0;
2265
2266         printed += fprintf(fp, "\n");
2267
2268         printed += fprintf(fp, "   syscall            calls      min       avg       max      stddev\n");
2269         printed += fprintf(fp, "                               (msec)    (msec)    (msec)        (%%)\n");
2270         printed += fprintf(fp, "   --------------- -------- --------- --------- ---------     ------\n");
2271
2272         /* each int_node is a syscall */
2273         while (inode) {
2274                 stats = inode->priv;
2275                 if (stats) {
2276                         double min = (double)(stats->min) / NSEC_PER_MSEC;
2277                         double max = (double)(stats->max) / NSEC_PER_MSEC;
2278                         double avg = avg_stats(stats);
2279                         double pct;
2280                         u64 n = (u64) stats->n;
2281
2282                         pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2283                         avg /= NSEC_PER_MSEC;
2284
2285                         sc = &trace->syscalls.table[inode->i];
2286                         printed += fprintf(fp, "   %-15s", sc->name);
2287                         printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f",
2288                                            n, min, avg);
2289                         printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
2290                 }
2291
2292                 inode = intlist__next(inode);
2293         }
2294
2295         printed += fprintf(fp, "\n\n");
2296
2297         return printed;
2298 }
2299
2300 /* struct used to pass data to per-thread function */
2301 struct summary_data {
2302         FILE *fp;
2303         struct trace *trace;
2304         size_t printed;
2305 };
2306
2307 static int trace__fprintf_one_thread(struct thread *thread, void *priv)
2308 {
2309         struct summary_data *data = priv;
2310         FILE *fp = data->fp;
2311         size_t printed = data->printed;
2312         struct trace *trace = data->trace;
2313         struct thread_trace *ttrace = thread->priv;
2314         double ratio;
2315
2316         if (ttrace == NULL)
2317                 return 0;
2318
2319         ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2320
2321         printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
2322         printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
2323         printed += fprintf(fp, "%.1f%%", ratio);
2324         printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
2325         printed += thread__dump_stats(ttrace, trace, fp);
2326
2327         data->printed += printed;
2328
2329         return 0;
2330 }
2331
2332 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2333 {
2334         struct summary_data data = {
2335                 .fp = fp,
2336                 .trace = trace
2337         };
2338         data.printed = trace__fprintf_threads_header(fp);
2339
2340         machine__for_each_thread(trace->host, trace__fprintf_one_thread, &data);
2341
2342         return data.printed;
2343 }
2344
2345 static int trace__set_duration(const struct option *opt, const char *str,
2346                                int unset __maybe_unused)
2347 {
2348         struct trace *trace = opt->value;
2349
2350         trace->duration_filter = atof(str);
2351         return 0;
2352 }
2353
2354 static int trace__open_output(struct trace *trace, const char *filename)
2355 {
2356         struct stat st;
2357
2358         if (!stat(filename, &st) && st.st_size) {
2359                 char oldname[PATH_MAX];
2360
2361                 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2362                 unlink(oldname);
2363                 rename(filename, oldname);
2364         }
2365
2366         trace->output = fopen(filename, "w");
2367
2368         return trace->output == NULL ? -errno : 0;
2369 }
2370
2371 static int parse_pagefaults(const struct option *opt, const char *str,
2372                             int unset __maybe_unused)
2373 {
2374         int *trace_pgfaults = opt->value;
2375
2376         if (strcmp(str, "all") == 0)
2377                 *trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN;
2378         else if (strcmp(str, "maj") == 0)
2379                 *trace_pgfaults |= TRACE_PFMAJ;
2380         else if (strcmp(str, "min") == 0)
2381                 *trace_pgfaults |= TRACE_PFMIN;
2382         else
2383                 return -1;
2384
2385         return 0;
2386 }
2387
2388 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
2389 {
2390         const char * const trace_usage[] = {
2391                 "perf trace [<options>] [<command>]",
2392                 "perf trace [<options>] -- <command> [<options>]",
2393                 "perf trace record [<options>] [<command>]",
2394                 "perf trace record [<options>] -- <command> [<options>]",
2395                 NULL
2396         };
2397         struct trace trace = {
2398                 .audit = {
2399                         .machine = audit_detect_machine(),
2400                         .open_id = audit_name_to_syscall("open", trace.audit.machine),
2401                 },
2402                 .syscalls = {
2403                         . max = -1,
2404                 },
2405                 .opts = {
2406                         .target = {
2407                                 .uid       = UINT_MAX,
2408                                 .uses_mmap = true,
2409                         },
2410                         .user_freq     = UINT_MAX,
2411                         .user_interval = ULLONG_MAX,
2412                         .no_buffering  = true,
2413                         .mmap_pages    = 1024,
2414                 },
2415                 .output = stdout,
2416                 .show_comm = true,
2417         };
2418         const char *output_name = NULL;
2419         const char *ev_qualifier_str = NULL;
2420         const struct option trace_options[] = {
2421         OPT_BOOLEAN(0, "comm", &trace.show_comm,
2422                     "show the thread COMM next to its id"),
2423         OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
2424         OPT_STRING('e', "expr", &ev_qualifier_str, "expr",
2425                     "list of events to trace"),
2426         OPT_STRING('o', "output", &output_name, "file", "output file name"),
2427         OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
2428         OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
2429                     "trace events on existing process id"),
2430         OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
2431                     "trace events on existing thread id"),
2432         OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
2433                     "system-wide collection from all CPUs"),
2434         OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
2435                     "list of cpus to monitor"),
2436         OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
2437                     "child tasks do not inherit counters"),
2438         OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
2439                      "number of mmap data pages",
2440                      perf_evlist__parse_mmap_pages),
2441         OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
2442                    "user to profile"),
2443         OPT_CALLBACK(0, "duration", &trace, "float",
2444                      "show only events with duration > N.M ms",
2445                      trace__set_duration),
2446         OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
2447         OPT_INCR('v', "verbose", &verbose, "be more verbose"),
2448         OPT_BOOLEAN('T', "time", &trace.full_time,
2449                     "Show full timestamp, not time relative to first start"),
2450         OPT_BOOLEAN('s', "summary", &trace.summary_only,
2451                     "Show only syscall summary with statistics"),
2452         OPT_BOOLEAN('S', "with-summary", &trace.summary,
2453                     "Show all syscalls and summary with statistics"),
2454         OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
2455                      "Trace pagefaults", parse_pagefaults, "maj"),
2456         OPT_END()
2457         };
2458         int err;
2459         char bf[BUFSIZ];
2460
2461         if ((argc > 1) && (strcmp(argv[1], "record") == 0))
2462                 return trace__record(argc-2, &argv[2]);
2463
2464         argc = parse_options(argc, argv, trace_options, trace_usage, 0);
2465
2466         /* summary_only implies summary option, but don't overwrite summary if set */
2467         if (trace.summary_only)
2468                 trace.summary = trace.summary_only;
2469
2470         if (trace.trace_pgfaults) {
2471                 trace.opts.sample_address = true;
2472                 trace.opts.sample_time = true;
2473         }
2474
2475         if (output_name != NULL) {
2476                 err = trace__open_output(&trace, output_name);
2477                 if (err < 0) {
2478                         perror("failed to create output file");
2479                         goto out;
2480                 }
2481         }
2482
2483         if (ev_qualifier_str != NULL) {
2484                 const char *s = ev_qualifier_str;
2485
2486                 trace.not_ev_qualifier = *s == '!';
2487                 if (trace.not_ev_qualifier)
2488                         ++s;
2489                 trace.ev_qualifier = strlist__new(true, s);
2490                 if (trace.ev_qualifier == NULL) {
2491                         fputs("Not enough memory to parse event qualifier",
2492                               trace.output);
2493                         err = -ENOMEM;
2494                         goto out_close;
2495                 }
2496         }
2497
2498         err = target__validate(&trace.opts.target);
2499         if (err) {
2500                 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2501                 fprintf(trace.output, "%s", bf);
2502                 goto out_close;
2503         }
2504
2505         err = target__parse_uid(&trace.opts.target);
2506         if (err) {
2507                 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2508                 fprintf(trace.output, "%s", bf);
2509                 goto out_close;
2510         }
2511
2512         if (!argc && target__none(&trace.opts.target))
2513                 trace.opts.target.system_wide = true;
2514
2515         if (input_name)
2516                 err = trace__replay(&trace);
2517         else
2518                 err = trace__run(&trace, argc, argv);
2519
2520 out_close:
2521         if (output_name != NULL)
2522                 fclose(trace.output);
2523 out:
2524         return err;
2525 }