OSDN Git Service

93b80f12f35e6797f76cfe201e89fbae82016b36
[sagit-ice-cold/kernel_xiaomi_msm8998.git] / tools / perf / builtin-trace.c
1 /*
2  * builtin-trace.c
3  *
4  * Builtin 'trace' command:
5  *
6  * Display a continuously updated trace of any workload, CPU, specific PID,
7  * system wide, etc.  Default format is loosely strace like, but any other
8  * event may be specified using --event.
9  *
10  * Copyright (C) 2012, 2013, 2014, 2015 Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
11  *
12  * Initially based on the 'trace' prototype by Thomas Gleixner:
13  *
14  * http://lwn.net/Articles/415728/ ("Announcing a new utility: 'trace'")
15  *
16  * Released under the GPL v2. (and only v2, not any later version)
17  */
18
19 #include <traceevent/event-parse.h>
20 #include <api/fs/tracing_path.h>
21 #include "builtin.h"
22 #include "util/color.h"
23 #include "util/debug.h"
24 #include "util/evlist.h"
25 #include "util/exec_cmd.h"
26 #include "util/machine.h"
27 #include "util/session.h"
28 #include "util/thread.h"
29 #include "util/parse-options.h"
30 #include "util/strlist.h"
31 #include "util/intlist.h"
32 #include "util/thread_map.h"
33 #include "util/stat.h"
34 #include "trace-event.h"
35 #include "util/parse-events.h"
36
37 #include <libaudit.h>
38 #include <stdlib.h>
39 #include <sys/mman.h>
40 #include <linux/futex.h>
41 #include <linux/err.h>
42
43 /* For older distros: */
44 #ifndef MAP_STACK
45 # define MAP_STACK              0x20000
46 #endif
47
48 #ifndef MADV_HWPOISON
49 # define MADV_HWPOISON          100
50
51 #endif
52
53 #ifndef MADV_MERGEABLE
54 # define MADV_MERGEABLE         12
55 #endif
56
57 #ifndef MADV_UNMERGEABLE
58 # define MADV_UNMERGEABLE       13
59 #endif
60
61 #ifndef EFD_SEMAPHORE
62 # define EFD_SEMAPHORE          1
63 #endif
64
65 #ifndef EFD_NONBLOCK
66 # define EFD_NONBLOCK           00004000
67 #endif
68
69 #ifndef EFD_CLOEXEC
70 # define EFD_CLOEXEC            02000000
71 #endif
72
73 #ifndef O_CLOEXEC
74 # define O_CLOEXEC              02000000
75 #endif
76
77 #ifndef SOCK_DCCP
78 # define SOCK_DCCP              6
79 #endif
80
81 #ifndef SOCK_CLOEXEC
82 # define SOCK_CLOEXEC           02000000
83 #endif
84
85 #ifndef SOCK_NONBLOCK
86 # define SOCK_NONBLOCK          00004000
87 #endif
88
89 #ifndef MSG_CMSG_CLOEXEC
90 # define MSG_CMSG_CLOEXEC       0x40000000
91 #endif
92
93 #ifndef PERF_FLAG_FD_NO_GROUP
94 # define PERF_FLAG_FD_NO_GROUP          (1UL << 0)
95 #endif
96
97 #ifndef PERF_FLAG_FD_OUTPUT
98 # define PERF_FLAG_FD_OUTPUT            (1UL << 1)
99 #endif
100
101 #ifndef PERF_FLAG_PID_CGROUP
102 # define PERF_FLAG_PID_CGROUP           (1UL << 2) /* pid=cgroup id, per-cpu mode only */
103 #endif
104
105 #ifndef PERF_FLAG_FD_CLOEXEC
106 # define PERF_FLAG_FD_CLOEXEC           (1UL << 3) /* O_CLOEXEC */
107 #endif
108
109
110 struct tp_field {
111         int offset;
112         union {
113                 u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
114                 void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
115         };
116 };
117
118 #define TP_UINT_FIELD(bits) \
119 static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
120 { \
121         u##bits value; \
122         memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
123         return value;  \
124 }
125
126 TP_UINT_FIELD(8);
127 TP_UINT_FIELD(16);
128 TP_UINT_FIELD(32);
129 TP_UINT_FIELD(64);
130
131 #define TP_UINT_FIELD__SWAPPED(bits) \
132 static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
133 { \
134         u##bits value; \
135         memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
136         return bswap_##bits(value);\
137 }
138
139 TP_UINT_FIELD__SWAPPED(16);
140 TP_UINT_FIELD__SWAPPED(32);
141 TP_UINT_FIELD__SWAPPED(64);
142
143 static int tp_field__init_uint(struct tp_field *field,
144                                struct format_field *format_field,
145                                bool needs_swap)
146 {
147         field->offset = format_field->offset;
148
149         switch (format_field->size) {
150         case 1:
151                 field->integer = tp_field__u8;
152                 break;
153         case 2:
154                 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
155                 break;
156         case 4:
157                 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
158                 break;
159         case 8:
160                 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
161                 break;
162         default:
163                 return -1;
164         }
165
166         return 0;
167 }
168
169 static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
170 {
171         return sample->raw_data + field->offset;
172 }
173
174 static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
175 {
176         field->offset = format_field->offset;
177         field->pointer = tp_field__ptr;
178         return 0;
179 }
180
181 struct syscall_tp {
182         struct tp_field id;
183         union {
184                 struct tp_field args, ret;
185         };
186 };
187
188 static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
189                                           struct tp_field *field,
190                                           const char *name)
191 {
192         struct format_field *format_field = perf_evsel__field(evsel, name);
193
194         if (format_field == NULL)
195                 return -1;
196
197         return tp_field__init_uint(field, format_field, evsel->needs_swap);
198 }
199
200 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \
201         ({ struct syscall_tp *sc = evsel->priv;\
202            perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
203
204 static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
205                                          struct tp_field *field,
206                                          const char *name)
207 {
208         struct format_field *format_field = perf_evsel__field(evsel, name);
209
210         if (format_field == NULL)
211                 return -1;
212
213         return tp_field__init_ptr(field, format_field);
214 }
215
216 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
217         ({ struct syscall_tp *sc = evsel->priv;\
218            perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
219
220 static void perf_evsel__delete_priv(struct perf_evsel *evsel)
221 {
222         zfree(&evsel->priv);
223         perf_evsel__delete(evsel);
224 }
225
226 static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
227 {
228         evsel->priv = malloc(sizeof(struct syscall_tp));
229         if (evsel->priv != NULL) {
230                 if (perf_evsel__init_sc_tp_uint_field(evsel, id))
231                         goto out_delete;
232
233                 evsel->handler = handler;
234                 return 0;
235         }
236
237         return -ENOMEM;
238
239 out_delete:
240         zfree(&evsel->priv);
241         return -ENOENT;
242 }
243
244 static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
245 {
246         struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
247
248         /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
249         if (IS_ERR(evsel))
250                 evsel = perf_evsel__newtp("syscalls", direction);
251
252         if (IS_ERR(evsel))
253                 return NULL;
254
255         if (perf_evsel__init_syscall_tp(evsel, handler))
256                 goto out_delete;
257
258         return evsel;
259
260 out_delete:
261         perf_evsel__delete_priv(evsel);
262         return NULL;
263 }
264
265 #define perf_evsel__sc_tp_uint(evsel, name, sample) \
266         ({ struct syscall_tp *fields = evsel->priv; \
267            fields->name.integer(&fields->name, sample); })
268
269 #define perf_evsel__sc_tp_ptr(evsel, name, sample) \
270         ({ struct syscall_tp *fields = evsel->priv; \
271            fields->name.pointer(&fields->name, sample); })
272
273 struct syscall_arg {
274         unsigned long val;
275         struct thread *thread;
276         struct trace  *trace;
277         void          *parm;
278         u8            idx;
279         u8            mask;
280 };
281
282 struct strarray {
283         int         offset;
284         int         nr_entries;
285         const char **entries;
286 };
287
288 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
289         .nr_entries = ARRAY_SIZE(array), \
290         .entries = array, \
291 }
292
293 #define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
294         .offset     = off, \
295         .nr_entries = ARRAY_SIZE(array), \
296         .entries = array, \
297 }
298
299 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
300                                                 const char *intfmt,
301                                                 struct syscall_arg *arg)
302 {
303         struct strarray *sa = arg->parm;
304         int idx = arg->val - sa->offset;
305
306         if (idx < 0 || idx >= sa->nr_entries)
307                 return scnprintf(bf, size, intfmt, arg->val);
308
309         return scnprintf(bf, size, "%s", sa->entries[idx]);
310 }
311
312 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
313                                               struct syscall_arg *arg)
314 {
315         return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
316 }
317
318 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
319
320 #if defined(__i386__) || defined(__x86_64__)
321 /*
322  * FIXME: Make this available to all arches as soon as the ioctl beautifier
323  *        gets rewritten to support all arches.
324  */
325 static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
326                                                  struct syscall_arg *arg)
327 {
328         return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
329 }
330
331 #define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
332 #endif /* defined(__i386__) || defined(__x86_64__) */
333
334 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
335                                         struct syscall_arg *arg);
336
337 #define SCA_FD syscall_arg__scnprintf_fd
338
339 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
340                                            struct syscall_arg *arg)
341 {
342         int fd = arg->val;
343
344         if (fd == AT_FDCWD)
345                 return scnprintf(bf, size, "CWD");
346
347         return syscall_arg__scnprintf_fd(bf, size, arg);
348 }
349
350 #define SCA_FDAT syscall_arg__scnprintf_fd_at
351
352 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
353                                               struct syscall_arg *arg);
354
355 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
356
357 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
358                                          struct syscall_arg *arg)
359 {
360         return scnprintf(bf, size, "%#lx", arg->val);
361 }
362
363 #define SCA_HEX syscall_arg__scnprintf_hex
364
365 static size_t syscall_arg__scnprintf_int(char *bf, size_t size,
366                                          struct syscall_arg *arg)
367 {
368         return scnprintf(bf, size, "%d", arg->val);
369 }
370
371 #define SCA_INT syscall_arg__scnprintf_int
372
373 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
374                                                struct syscall_arg *arg)
375 {
376         int printed = 0, prot = arg->val;
377
378         if (prot == PROT_NONE)
379                 return scnprintf(bf, size, "NONE");
380 #define P_MMAP_PROT(n) \
381         if (prot & PROT_##n) { \
382                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
383                 prot &= ~PROT_##n; \
384         }
385
386         P_MMAP_PROT(EXEC);
387         P_MMAP_PROT(READ);
388         P_MMAP_PROT(WRITE);
389 #ifdef PROT_SEM
390         P_MMAP_PROT(SEM);
391 #endif
392         P_MMAP_PROT(GROWSDOWN);
393         P_MMAP_PROT(GROWSUP);
394 #undef P_MMAP_PROT
395
396         if (prot)
397                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
398
399         return printed;
400 }
401
402 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
403
404 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
405                                                 struct syscall_arg *arg)
406 {
407         int printed = 0, flags = arg->val;
408
409 #define P_MMAP_FLAG(n) \
410         if (flags & MAP_##n) { \
411                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
412                 flags &= ~MAP_##n; \
413         }
414
415         P_MMAP_FLAG(SHARED);
416         P_MMAP_FLAG(PRIVATE);
417 #ifdef MAP_32BIT
418         P_MMAP_FLAG(32BIT);
419 #endif
420         P_MMAP_FLAG(ANONYMOUS);
421         P_MMAP_FLAG(DENYWRITE);
422         P_MMAP_FLAG(EXECUTABLE);
423         P_MMAP_FLAG(FILE);
424         P_MMAP_FLAG(FIXED);
425         P_MMAP_FLAG(GROWSDOWN);
426 #ifdef MAP_HUGETLB
427         P_MMAP_FLAG(HUGETLB);
428 #endif
429         P_MMAP_FLAG(LOCKED);
430         P_MMAP_FLAG(NONBLOCK);
431         P_MMAP_FLAG(NORESERVE);
432         P_MMAP_FLAG(POPULATE);
433         P_MMAP_FLAG(STACK);
434 #ifdef MAP_UNINITIALIZED
435         P_MMAP_FLAG(UNINITIALIZED);
436 #endif
437 #undef P_MMAP_FLAG
438
439         if (flags)
440                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
441
442         return printed;
443 }
444
445 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
446
447 static size_t syscall_arg__scnprintf_mremap_flags(char *bf, size_t size,
448                                                   struct syscall_arg *arg)
449 {
450         int printed = 0, flags = arg->val;
451
452 #define P_MREMAP_FLAG(n) \
453         if (flags & MREMAP_##n) { \
454                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
455                 flags &= ~MREMAP_##n; \
456         }
457
458         P_MREMAP_FLAG(MAYMOVE);
459 #ifdef MREMAP_FIXED
460         P_MREMAP_FLAG(FIXED);
461 #endif
462 #undef P_MREMAP_FLAG
463
464         if (flags)
465                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
466
467         return printed;
468 }
469
470 #define SCA_MREMAP_FLAGS syscall_arg__scnprintf_mremap_flags
471
472 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
473                                                       struct syscall_arg *arg)
474 {
475         int behavior = arg->val;
476
477         switch (behavior) {
478 #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
479         P_MADV_BHV(NORMAL);
480         P_MADV_BHV(RANDOM);
481         P_MADV_BHV(SEQUENTIAL);
482         P_MADV_BHV(WILLNEED);
483         P_MADV_BHV(DONTNEED);
484         P_MADV_BHV(REMOVE);
485         P_MADV_BHV(DONTFORK);
486         P_MADV_BHV(DOFORK);
487         P_MADV_BHV(HWPOISON);
488 #ifdef MADV_SOFT_OFFLINE
489         P_MADV_BHV(SOFT_OFFLINE);
490 #endif
491         P_MADV_BHV(MERGEABLE);
492         P_MADV_BHV(UNMERGEABLE);
493 #ifdef MADV_HUGEPAGE
494         P_MADV_BHV(HUGEPAGE);
495 #endif
496 #ifdef MADV_NOHUGEPAGE
497         P_MADV_BHV(NOHUGEPAGE);
498 #endif
499 #ifdef MADV_DONTDUMP
500         P_MADV_BHV(DONTDUMP);
501 #endif
502 #ifdef MADV_DODUMP
503         P_MADV_BHV(DODUMP);
504 #endif
505 #undef P_MADV_PHV
506         default: break;
507         }
508
509         return scnprintf(bf, size, "%#x", behavior);
510 }
511
512 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
513
514 static size_t syscall_arg__scnprintf_flock(char *bf, size_t size,
515                                            struct syscall_arg *arg)
516 {
517         int printed = 0, op = arg->val;
518
519         if (op == 0)
520                 return scnprintf(bf, size, "NONE");
521 #define P_CMD(cmd) \
522         if ((op & LOCK_##cmd) == LOCK_##cmd) { \
523                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \
524                 op &= ~LOCK_##cmd; \
525         }
526
527         P_CMD(SH);
528         P_CMD(EX);
529         P_CMD(NB);
530         P_CMD(UN);
531         P_CMD(MAND);
532         P_CMD(RW);
533         P_CMD(READ);
534         P_CMD(WRITE);
535 #undef P_OP
536
537         if (op)
538                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op);
539
540         return printed;
541 }
542
543 #define SCA_FLOCK syscall_arg__scnprintf_flock
544
545 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
546 {
547         enum syscall_futex_args {
548                 SCF_UADDR   = (1 << 0),
549                 SCF_OP      = (1 << 1),
550                 SCF_VAL     = (1 << 2),
551                 SCF_TIMEOUT = (1 << 3),
552                 SCF_UADDR2  = (1 << 4),
553                 SCF_VAL3    = (1 << 5),
554         };
555         int op = arg->val;
556         int cmd = op & FUTEX_CMD_MASK;
557         size_t printed = 0;
558
559         switch (cmd) {
560 #define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
561         P_FUTEX_OP(WAIT);           arg->mask |= SCF_VAL3|SCF_UADDR2;             break;
562         P_FUTEX_OP(WAKE);           arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
563         P_FUTEX_OP(FD);             arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
564         P_FUTEX_OP(REQUEUE);        arg->mask |= SCF_VAL3|SCF_TIMEOUT;            break;
565         P_FUTEX_OP(CMP_REQUEUE);    arg->mask |= SCF_TIMEOUT;                     break;
566         P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT;                     break;
567         P_FUTEX_OP(WAKE_OP);                                                      break;
568         P_FUTEX_OP(LOCK_PI);        arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
569         P_FUTEX_OP(UNLOCK_PI);      arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
570         P_FUTEX_OP(TRYLOCK_PI);     arg->mask |= SCF_VAL3|SCF_UADDR2;             break;
571         P_FUTEX_OP(WAIT_BITSET);    arg->mask |= SCF_UADDR2;                      break;
572         P_FUTEX_OP(WAKE_BITSET);    arg->mask |= SCF_UADDR2;                      break;
573         P_FUTEX_OP(WAIT_REQUEUE_PI);                                              break;
574         default: printed = scnprintf(bf, size, "%#x", cmd);                       break;
575         }
576
577         if (op & FUTEX_PRIVATE_FLAG)
578                 printed += scnprintf(bf + printed, size - printed, "|PRIV");
579
580         if (op & FUTEX_CLOCK_REALTIME)
581                 printed += scnprintf(bf + printed, size - printed, "|CLKRT");
582
583         return printed;
584 }
585
586 #define SCA_FUTEX_OP  syscall_arg__scnprintf_futex_op
587
588 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
589 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
590
591 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
592 static DEFINE_STRARRAY(itimers);
593
594 static const char *keyctl_options[] = {
595         "GET_KEYRING_ID", "JOIN_SESSION_KEYRING", "UPDATE", "REVOKE", "CHOWN",
596         "SETPERM", "DESCRIBE", "CLEAR", "LINK", "UNLINK", "SEARCH", "READ",
597         "INSTANTIATE", "NEGATE", "SET_REQKEY_KEYRING", "SET_TIMEOUT",
598         "ASSUME_AUTHORITY", "GET_SECURITY", "SESSION_TO_PARENT", "REJECT",
599         "INSTANTIATE_IOV", "INVALIDATE", "GET_PERSISTENT",
600 };
601 static DEFINE_STRARRAY(keyctl_options);
602
603 static const char *whences[] = { "SET", "CUR", "END",
604 #ifdef SEEK_DATA
605 "DATA",
606 #endif
607 #ifdef SEEK_HOLE
608 "HOLE",
609 #endif
610 };
611 static DEFINE_STRARRAY(whences);
612
613 static const char *fcntl_cmds[] = {
614         "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
615         "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
616         "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
617         "F_GETOWNER_UIDS",
618 };
619 static DEFINE_STRARRAY(fcntl_cmds);
620
621 static const char *rlimit_resources[] = {
622         "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
623         "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
624         "RTTIME",
625 };
626 static DEFINE_STRARRAY(rlimit_resources);
627
628 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
629 static DEFINE_STRARRAY(sighow);
630
631 static const char *clockid[] = {
632         "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
633         "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE", "BOOTTIME",
634         "REALTIME_ALARM", "BOOTTIME_ALARM", "SGI_CYCLE", "TAI"
635 };
636 static DEFINE_STRARRAY(clockid);
637
638 static const char *socket_families[] = {
639         "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
640         "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
641         "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
642         "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
643         "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
644         "ALG", "NFC", "VSOCK",
645 };
646 static DEFINE_STRARRAY(socket_families);
647
648 #ifndef SOCK_TYPE_MASK
649 #define SOCK_TYPE_MASK 0xf
650 #endif
651
652 static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
653                                                       struct syscall_arg *arg)
654 {
655         size_t printed;
656         int type = arg->val,
657             flags = type & ~SOCK_TYPE_MASK;
658
659         type &= SOCK_TYPE_MASK;
660         /*
661          * Can't use a strarray, MIPS may override for ABI reasons.
662          */
663         switch (type) {
664 #define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
665         P_SK_TYPE(STREAM);
666         P_SK_TYPE(DGRAM);
667         P_SK_TYPE(RAW);
668         P_SK_TYPE(RDM);
669         P_SK_TYPE(SEQPACKET);
670         P_SK_TYPE(DCCP);
671         P_SK_TYPE(PACKET);
672 #undef P_SK_TYPE
673         default:
674                 printed = scnprintf(bf, size, "%#x", type);
675         }
676
677 #define P_SK_FLAG(n) \
678         if (flags & SOCK_##n) { \
679                 printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
680                 flags &= ~SOCK_##n; \
681         }
682
683         P_SK_FLAG(CLOEXEC);
684         P_SK_FLAG(NONBLOCK);
685 #undef P_SK_FLAG
686
687         if (flags)
688                 printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
689
690         return printed;
691 }
692
693 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
694
695 #ifndef MSG_PROBE
696 #define MSG_PROBE            0x10
697 #endif
698 #ifndef MSG_WAITFORONE
699 #define MSG_WAITFORONE  0x10000
700 #endif
701 #ifndef MSG_SENDPAGE_NOTLAST
702 #define MSG_SENDPAGE_NOTLAST 0x20000
703 #endif
704 #ifndef MSG_FASTOPEN
705 #define MSG_FASTOPEN         0x20000000
706 #endif
707
708 static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
709                                                struct syscall_arg *arg)
710 {
711         int printed = 0, flags = arg->val;
712
713         if (flags == 0)
714                 return scnprintf(bf, size, "NONE");
715 #define P_MSG_FLAG(n) \
716         if (flags & MSG_##n) { \
717                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
718                 flags &= ~MSG_##n; \
719         }
720
721         P_MSG_FLAG(OOB);
722         P_MSG_FLAG(PEEK);
723         P_MSG_FLAG(DONTROUTE);
724         P_MSG_FLAG(TRYHARD);
725         P_MSG_FLAG(CTRUNC);
726         P_MSG_FLAG(PROBE);
727         P_MSG_FLAG(TRUNC);
728         P_MSG_FLAG(DONTWAIT);
729         P_MSG_FLAG(EOR);
730         P_MSG_FLAG(WAITALL);
731         P_MSG_FLAG(FIN);
732         P_MSG_FLAG(SYN);
733         P_MSG_FLAG(CONFIRM);
734         P_MSG_FLAG(RST);
735         P_MSG_FLAG(ERRQUEUE);
736         P_MSG_FLAG(NOSIGNAL);
737         P_MSG_FLAG(MORE);
738         P_MSG_FLAG(WAITFORONE);
739         P_MSG_FLAG(SENDPAGE_NOTLAST);
740         P_MSG_FLAG(FASTOPEN);
741         P_MSG_FLAG(CMSG_CLOEXEC);
742 #undef P_MSG_FLAG
743
744         if (flags)
745                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
746
747         return printed;
748 }
749
750 #define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
751
752 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
753                                                  struct syscall_arg *arg)
754 {
755         size_t printed = 0;
756         int mode = arg->val;
757
758         if (mode == F_OK) /* 0 */
759                 return scnprintf(bf, size, "F");
760 #define P_MODE(n) \
761         if (mode & n##_OK) { \
762                 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
763                 mode &= ~n##_OK; \
764         }
765
766         P_MODE(R);
767         P_MODE(W);
768         P_MODE(X);
769 #undef P_MODE
770
771         if (mode)
772                 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
773
774         return printed;
775 }
776
777 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
778
779 static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
780                                               struct syscall_arg *arg);
781
782 #define SCA_FILENAME syscall_arg__scnprintf_filename
783
784 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
785                                                struct syscall_arg *arg)
786 {
787         int printed = 0, flags = arg->val;
788
789         if (!(flags & O_CREAT))
790                 arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
791
792         if (flags == 0)
793                 return scnprintf(bf, size, "RDONLY");
794 #define P_FLAG(n) \
795         if (flags & O_##n) { \
796                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
797                 flags &= ~O_##n; \
798         }
799
800         P_FLAG(APPEND);
801         P_FLAG(ASYNC);
802         P_FLAG(CLOEXEC);
803         P_FLAG(CREAT);
804         P_FLAG(DIRECT);
805         P_FLAG(DIRECTORY);
806         P_FLAG(EXCL);
807         P_FLAG(LARGEFILE);
808         P_FLAG(NOATIME);
809         P_FLAG(NOCTTY);
810 #ifdef O_NONBLOCK
811         P_FLAG(NONBLOCK);
812 #elif O_NDELAY
813         P_FLAG(NDELAY);
814 #endif
815 #ifdef O_PATH
816         P_FLAG(PATH);
817 #endif
818         P_FLAG(RDWR);
819 #ifdef O_DSYNC
820         if ((flags & O_SYNC) == O_SYNC)
821                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
822         else {
823                 P_FLAG(DSYNC);
824         }
825 #else
826         P_FLAG(SYNC);
827 #endif
828         P_FLAG(TRUNC);
829         P_FLAG(WRONLY);
830 #undef P_FLAG
831
832         if (flags)
833                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
834
835         return printed;
836 }
837
838 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
839
840 static size_t syscall_arg__scnprintf_perf_flags(char *bf, size_t size,
841                                                 struct syscall_arg *arg)
842 {
843         int printed = 0, flags = arg->val;
844
845         if (flags == 0)
846                 return 0;
847
848 #define P_FLAG(n) \
849         if (flags & PERF_FLAG_##n) { \
850                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
851                 flags &= ~PERF_FLAG_##n; \
852         }
853
854         P_FLAG(FD_NO_GROUP);
855         P_FLAG(FD_OUTPUT);
856         P_FLAG(PID_CGROUP);
857         P_FLAG(FD_CLOEXEC);
858 #undef P_FLAG
859
860         if (flags)
861                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
862
863         return printed;
864 }
865
866 #define SCA_PERF_FLAGS syscall_arg__scnprintf_perf_flags
867
868 static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
869                                                    struct syscall_arg *arg)
870 {
871         int printed = 0, flags = arg->val;
872
873         if (flags == 0)
874                 return scnprintf(bf, size, "NONE");
875 #define P_FLAG(n) \
876         if (flags & EFD_##n) { \
877                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
878                 flags &= ~EFD_##n; \
879         }
880
881         P_FLAG(SEMAPHORE);
882         P_FLAG(CLOEXEC);
883         P_FLAG(NONBLOCK);
884 #undef P_FLAG
885
886         if (flags)
887                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
888
889         return printed;
890 }
891
892 #define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
893
894 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
895                                                 struct syscall_arg *arg)
896 {
897         int printed = 0, flags = arg->val;
898
899 #define P_FLAG(n) \
900         if (flags & O_##n) { \
901                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
902                 flags &= ~O_##n; \
903         }
904
905         P_FLAG(CLOEXEC);
906         P_FLAG(NONBLOCK);
907 #undef P_FLAG
908
909         if (flags)
910                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
911
912         return printed;
913 }
914
915 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
916
917 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
918 {
919         int sig = arg->val;
920
921         switch (sig) {
922 #define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
923         P_SIGNUM(HUP);
924         P_SIGNUM(INT);
925         P_SIGNUM(QUIT);
926         P_SIGNUM(ILL);
927         P_SIGNUM(TRAP);
928         P_SIGNUM(ABRT);
929         P_SIGNUM(BUS);
930         P_SIGNUM(FPE);
931         P_SIGNUM(KILL);
932         P_SIGNUM(USR1);
933         P_SIGNUM(SEGV);
934         P_SIGNUM(USR2);
935         P_SIGNUM(PIPE);
936         P_SIGNUM(ALRM);
937         P_SIGNUM(TERM);
938         P_SIGNUM(CHLD);
939         P_SIGNUM(CONT);
940         P_SIGNUM(STOP);
941         P_SIGNUM(TSTP);
942         P_SIGNUM(TTIN);
943         P_SIGNUM(TTOU);
944         P_SIGNUM(URG);
945         P_SIGNUM(XCPU);
946         P_SIGNUM(XFSZ);
947         P_SIGNUM(VTALRM);
948         P_SIGNUM(PROF);
949         P_SIGNUM(WINCH);
950         P_SIGNUM(IO);
951         P_SIGNUM(PWR);
952         P_SIGNUM(SYS);
953 #ifdef SIGEMT
954         P_SIGNUM(EMT);
955 #endif
956 #ifdef SIGSTKFLT
957         P_SIGNUM(STKFLT);
958 #endif
959 #ifdef SIGSWI
960         P_SIGNUM(SWI);
961 #endif
962         default: break;
963         }
964
965         return scnprintf(bf, size, "%#x", sig);
966 }
967
968 #define SCA_SIGNUM syscall_arg__scnprintf_signum
969
970 #if defined(__i386__) || defined(__x86_64__)
971 /*
972  * FIXME: Make this available to all arches.
973  */
974 #define TCGETS          0x5401
975
976 static const char *tioctls[] = {
977         "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
978         "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
979         "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
980         "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
981         "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
982         "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
983         "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
984         "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
985         "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
986         "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
987         "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
988         [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
989         "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
990         "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
991         "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
992 };
993
994 static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
995 #endif /* defined(__i386__) || defined(__x86_64__) */
996
997 #define STRARRAY(arg, name, array) \
998           .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
999           .arg_parm      = { [arg] = &strarray__##array, }
1000
1001 static struct syscall_fmt {
1002         const char *name;
1003         const char *alias;
1004         size_t     (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
1005         void       *arg_parm[6];
1006         bool       errmsg;
1007         bool       timeout;
1008         bool       hexret;
1009 } syscall_fmts[] = {
1010         { .name     = "access",     .errmsg = true,
1011           .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */
1012                              [1] = SCA_ACCMODE,  /* mode */ }, },
1013         { .name     = "arch_prctl", .errmsg = true, .alias = "prctl", },
1014         { .name     = "brk",        .hexret = true,
1015           .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
1016         { .name     = "chdir",      .errmsg = true,
1017           .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1018         { .name     = "chmod",      .errmsg = true,
1019           .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1020         { .name     = "chroot",     .errmsg = true,
1021           .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1022         { .name     = "clock_gettime",  .errmsg = true, STRARRAY(0, clk_id, clockid), },
1023         { .name     = "close",      .errmsg = true,
1024           .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, },
1025         { .name     = "connect",    .errmsg = true, },
1026         { .name     = "creat",      .errmsg = true,
1027           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1028         { .name     = "dup",        .errmsg = true,
1029           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1030         { .name     = "dup2",       .errmsg = true,
1031           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1032         { .name     = "dup3",       .errmsg = true,
1033           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1034         { .name     = "epoll_ctl",  .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
1035         { .name     = "eventfd2",   .errmsg = true,
1036           .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
1037         { .name     = "faccessat",  .errmsg = true,
1038           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1039                              [1] = SCA_FILENAME, /* filename */ }, },
1040         { .name     = "fadvise64",  .errmsg = true,
1041           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1042         { .name     = "fallocate",  .errmsg = true,
1043           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1044         { .name     = "fchdir",     .errmsg = true,
1045           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1046         { .name     = "fchmod",     .errmsg = true,
1047           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1048         { .name     = "fchmodat",   .errmsg = true,
1049           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1050                              [1] = SCA_FILENAME, /* filename */ }, },
1051         { .name     = "fchown",     .errmsg = true,
1052           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1053         { .name     = "fchownat",   .errmsg = true,
1054           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1055                              [1] = SCA_FILENAME, /* filename */ }, },
1056         { .name     = "fcntl",      .errmsg = true,
1057           .arg_scnprintf = { [0] = SCA_FD, /* fd */
1058                              [1] = SCA_STRARRAY, /* cmd */ },
1059           .arg_parm      = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
1060         { .name     = "fdatasync",  .errmsg = true,
1061           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1062         { .name     = "flock",      .errmsg = true,
1063           .arg_scnprintf = { [0] = SCA_FD, /* fd */
1064                              [1] = SCA_FLOCK, /* cmd */ }, },
1065         { .name     = "fsetxattr",  .errmsg = true,
1066           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1067         { .name     = "fstat",      .errmsg = true, .alias = "newfstat",
1068           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1069         { .name     = "fstatat",    .errmsg = true, .alias = "newfstatat",
1070           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1071                              [1] = SCA_FILENAME, /* filename */ }, },
1072         { .name     = "fstatfs",    .errmsg = true,
1073           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1074         { .name     = "fsync",    .errmsg = true,
1075           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1076         { .name     = "ftruncate", .errmsg = true,
1077           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1078         { .name     = "futex",      .errmsg = true,
1079           .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
1080         { .name     = "futimesat", .errmsg = true,
1081           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1082                              [1] = SCA_FILENAME, /* filename */ }, },
1083         { .name     = "getdents",   .errmsg = true,
1084           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1085         { .name     = "getdents64", .errmsg = true,
1086           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1087         { .name     = "getitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
1088         { .name     = "getrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1089         { .name     = "getxattr",    .errmsg = true,
1090           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1091         { .name     = "inotify_add_watch",          .errmsg = true,
1092           .arg_scnprintf = { [1] = SCA_FILENAME, /* pathname */ }, },
1093         { .name     = "ioctl",      .errmsg = true,
1094           .arg_scnprintf = { [0] = SCA_FD, /* fd */
1095 #if defined(__i386__) || defined(__x86_64__)
1096 /*
1097  * FIXME: Make this available to all arches.
1098  */
1099                              [1] = SCA_STRHEXARRAY, /* cmd */
1100                              [2] = SCA_HEX, /* arg */ },
1101           .arg_parm      = { [1] = &strarray__tioctls, /* cmd */ }, },
1102 #else
1103                              [2] = SCA_HEX, /* arg */ }, },
1104 #endif
1105         { .name     = "keyctl",     .errmsg = true, STRARRAY(0, option, keyctl_options), },
1106         { .name     = "kill",       .errmsg = true,
1107           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1108         { .name     = "lchown",    .errmsg = true,
1109           .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1110         { .name     = "lgetxattr",  .errmsg = true,
1111           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1112         { .name     = "linkat",     .errmsg = true,
1113           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1114         { .name     = "listxattr",  .errmsg = true,
1115           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1116         { .name     = "llistxattr", .errmsg = true,
1117           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1118         { .name     = "lremovexattr",  .errmsg = true,
1119           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1120         { .name     = "lseek",      .errmsg = true,
1121           .arg_scnprintf = { [0] = SCA_FD, /* fd */
1122                              [2] = SCA_STRARRAY, /* whence */ },
1123           .arg_parm      = { [2] = &strarray__whences, /* whence */ }, },
1124         { .name     = "lsetxattr",  .errmsg = true,
1125           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1126         { .name     = "lstat",      .errmsg = true, .alias = "newlstat",
1127           .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1128         { .name     = "lsxattr",    .errmsg = true,
1129           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1130         { .name     = "madvise",    .errmsg = true,
1131           .arg_scnprintf = { [0] = SCA_HEX,      /* start */
1132                              [2] = SCA_MADV_BHV, /* behavior */ }, },
1133         { .name     = "mkdir",    .errmsg = true,
1134           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1135         { .name     = "mkdirat",    .errmsg = true,
1136           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1137                              [1] = SCA_FILENAME, /* pathname */ }, },
1138         { .name     = "mknod",      .errmsg = true,
1139           .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1140         { .name     = "mknodat",    .errmsg = true,
1141           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1142                              [1] = SCA_FILENAME, /* filename */ }, },
1143         { .name     = "mlock",      .errmsg = true,
1144           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1145         { .name     = "mlockall",   .errmsg = true,
1146           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1147         { .name     = "mmap",       .hexret = true,
1148           .arg_scnprintf = { [0] = SCA_HEX,       /* addr */
1149                              [2] = SCA_MMAP_PROT, /* prot */
1150                              [3] = SCA_MMAP_FLAGS, /* flags */
1151                              [4] = SCA_FD,        /* fd */ }, },
1152         { .name     = "mprotect",   .errmsg = true,
1153           .arg_scnprintf = { [0] = SCA_HEX, /* start */
1154                              [2] = SCA_MMAP_PROT, /* prot */ }, },
1155         { .name     = "mq_unlink", .errmsg = true,
1156           .arg_scnprintf = { [0] = SCA_FILENAME, /* u_name */ }, },
1157         { .name     = "mremap",     .hexret = true,
1158           .arg_scnprintf = { [0] = SCA_HEX, /* addr */
1159                              [3] = SCA_MREMAP_FLAGS, /* flags */
1160                              [4] = SCA_HEX, /* new_addr */ }, },
1161         { .name     = "munlock",    .errmsg = true,
1162           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1163         { .name     = "munmap",     .errmsg = true,
1164           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1165         { .name     = "name_to_handle_at", .errmsg = true,
1166           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1167         { .name     = "newfstatat", .errmsg = true,
1168           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1169                              [1] = SCA_FILENAME, /* filename */ }, },
1170         { .name     = "open",       .errmsg = true,
1171           .arg_scnprintf = { [0] = SCA_FILENAME,   /* filename */
1172                              [1] = SCA_OPEN_FLAGS, /* flags */ }, },
1173         { .name     = "open_by_handle_at", .errmsg = true,
1174           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1175                              [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1176         { .name     = "openat",     .errmsg = true,
1177           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1178                              [1] = SCA_FILENAME, /* filename */
1179                              [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1180         { .name     = "perf_event_open", .errmsg = true,
1181           .arg_scnprintf = { [1] = SCA_INT, /* pid */
1182                              [2] = SCA_INT, /* cpu */
1183                              [3] = SCA_FD,  /* group_fd */
1184                              [4] = SCA_PERF_FLAGS,  /* flags */ }, },
1185         { .name     = "pipe2",      .errmsg = true,
1186           .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
1187         { .name     = "poll",       .errmsg = true, .timeout = true, },
1188         { .name     = "ppoll",      .errmsg = true, .timeout = true, },
1189         { .name     = "pread",      .errmsg = true, .alias = "pread64",
1190           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1191         { .name     = "preadv",     .errmsg = true, .alias = "pread",
1192           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1193         { .name     = "prlimit64",  .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
1194         { .name     = "pwrite",     .errmsg = true, .alias = "pwrite64",
1195           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1196         { .name     = "pwritev",    .errmsg = true,
1197           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1198         { .name     = "read",       .errmsg = true,
1199           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1200         { .name     = "readlink",   .errmsg = true,
1201           .arg_scnprintf = { [0] = SCA_FILENAME, /* path */ }, },
1202         { .name     = "readlinkat", .errmsg = true,
1203           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1204                              [1] = SCA_FILENAME, /* pathname */ }, },
1205         { .name     = "readv",      .errmsg = true,
1206           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1207         { .name     = "recvfrom",   .errmsg = true,
1208           .arg_scnprintf = { [0] = SCA_FD, /* fd */
1209                              [3] = SCA_MSG_FLAGS, /* flags */ }, },
1210         { .name     = "recvmmsg",   .errmsg = true,
1211           .arg_scnprintf = { [0] = SCA_FD, /* fd */
1212                              [3] = SCA_MSG_FLAGS, /* flags */ }, },
1213         { .name     = "recvmsg",    .errmsg = true,
1214           .arg_scnprintf = { [0] = SCA_FD, /* fd */
1215                              [2] = SCA_MSG_FLAGS, /* flags */ }, },
1216         { .name     = "removexattr", .errmsg = true,
1217           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1218         { .name     = "renameat",   .errmsg = true,
1219           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1220         { .name     = "rmdir",    .errmsg = true,
1221           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1222         { .name     = "rt_sigaction", .errmsg = true,
1223           .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
1224         { .name     = "rt_sigprocmask",  .errmsg = true, STRARRAY(0, how, sighow), },
1225         { .name     = "rt_sigqueueinfo", .errmsg = true,
1226           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1227         { .name     = "rt_tgsigqueueinfo", .errmsg = true,
1228           .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1229         { .name     = "select",     .errmsg = true, .timeout = true, },
1230         { .name     = "sendmmsg",    .errmsg = true,
1231           .arg_scnprintf = { [0] = SCA_FD, /* fd */
1232                              [3] = SCA_MSG_FLAGS, /* flags */ }, },
1233         { .name     = "sendmsg",    .errmsg = true,
1234           .arg_scnprintf = { [0] = SCA_FD, /* fd */
1235                              [2] = SCA_MSG_FLAGS, /* flags */ }, },
1236         { .name     = "sendto",     .errmsg = true,
1237           .arg_scnprintf = { [0] = SCA_FD, /* fd */
1238                              [3] = SCA_MSG_FLAGS, /* flags */ }, },
1239         { .name     = "setitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
1240         { .name     = "setrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1241         { .name     = "setxattr",   .errmsg = true,
1242           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1243         { .name     = "shutdown",   .errmsg = true,
1244           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1245         { .name     = "socket",     .errmsg = true,
1246           .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1247                              [1] = SCA_SK_TYPE, /* type */ },
1248           .arg_parm      = { [0] = &strarray__socket_families, /* family */ }, },
1249         { .name     = "socketpair", .errmsg = true,
1250           .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1251                              [1] = SCA_SK_TYPE, /* type */ },
1252           .arg_parm      = { [0] = &strarray__socket_families, /* family */ }, },
1253         { .name     = "stat",       .errmsg = true, .alias = "newstat",
1254           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1255         { .name     = "statfs",     .errmsg = true,
1256           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1257         { .name     = "swapoff",    .errmsg = true,
1258           .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
1259         { .name     = "swapon",     .errmsg = true,
1260           .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
1261         { .name     = "symlinkat",  .errmsg = true,
1262           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1263         { .name     = "tgkill",     .errmsg = true,
1264           .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1265         { .name     = "tkill",      .errmsg = true,
1266           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1267         { .name     = "truncate",   .errmsg = true,
1268           .arg_scnprintf = { [0] = SCA_FILENAME, /* path */ }, },
1269         { .name     = "uname",      .errmsg = true, .alias = "newuname", },
1270         { .name     = "unlinkat",   .errmsg = true,
1271           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1272                              [1] = SCA_FILENAME, /* pathname */ }, },
1273         { .name     = "utime",  .errmsg = true,
1274           .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1275         { .name     = "utimensat",  .errmsg = true,
1276           .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */
1277                              [1] = SCA_FILENAME, /* filename */ }, },
1278         { .name     = "utimes",  .errmsg = true,
1279           .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1280         { .name     = "vmsplice",  .errmsg = true,
1281           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1282         { .name     = "write",      .errmsg = true,
1283           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1284         { .name     = "writev",     .errmsg = true,
1285           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1286 };
1287
1288 static int syscall_fmt__cmp(const void *name, const void *fmtp)
1289 {
1290         const struct syscall_fmt *fmt = fmtp;
1291         return strcmp(name, fmt->name);
1292 }
1293
1294 static struct syscall_fmt *syscall_fmt__find(const char *name)
1295 {
1296         const int nmemb = ARRAY_SIZE(syscall_fmts);
1297         return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
1298 }
1299
1300 struct syscall {
1301         struct event_format *tp_format;
1302         int                 nr_args;
1303         struct format_field *args;
1304         const char          *name;
1305         bool                is_exit;
1306         struct syscall_fmt  *fmt;
1307         size_t              (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
1308         void                **arg_parm;
1309 };
1310
1311 static size_t fprintf_duration(unsigned long t, FILE *fp)
1312 {
1313         double duration = (double)t / NSEC_PER_MSEC;
1314         size_t printed = fprintf(fp, "(");
1315
1316         if (duration >= 1.0)
1317                 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
1318         else if (duration >= 0.01)
1319                 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
1320         else
1321                 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
1322         return printed + fprintf(fp, "): ");
1323 }
1324
1325 /**
1326  * filename.ptr: The filename char pointer that will be vfs_getname'd
1327  * filename.entry_str_pos: Where to insert the string translated from
1328  *                         filename.ptr by the vfs_getname tracepoint/kprobe.
1329  */
1330 struct thread_trace {
1331         u64               entry_time;
1332         u64               exit_time;
1333         bool              entry_pending;
1334         unsigned long     nr_events;
1335         unsigned long     pfmaj, pfmin;
1336         char              *entry_str;
1337         double            runtime_ms;
1338         struct {
1339                 unsigned long ptr;
1340                 short int     entry_str_pos;
1341                 bool          pending_open;
1342                 unsigned int  namelen;
1343                 char          *name;
1344         } filename;
1345         struct {
1346                 int       max;
1347                 char      **table;
1348         } paths;
1349
1350         struct intlist *syscall_stats;
1351 };
1352
1353 static struct thread_trace *thread_trace__new(void)
1354 {
1355         struct thread_trace *ttrace =  zalloc(sizeof(struct thread_trace));
1356
1357         if (ttrace)
1358                 ttrace->paths.max = -1;
1359
1360         ttrace->syscall_stats = intlist__new(NULL);
1361
1362         return ttrace;
1363 }
1364
1365 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
1366 {
1367         struct thread_trace *ttrace;
1368
1369         if (thread == NULL)
1370                 goto fail;
1371
1372         if (thread__priv(thread) == NULL)
1373                 thread__set_priv(thread, thread_trace__new());
1374
1375         if (thread__priv(thread) == NULL)
1376                 goto fail;
1377
1378         ttrace = thread__priv(thread);
1379         ++ttrace->nr_events;
1380
1381         return ttrace;
1382 fail:
1383         color_fprintf(fp, PERF_COLOR_RED,
1384                       "WARNING: not enough memory, dropping samples!\n");
1385         return NULL;
1386 }
1387
1388 #define TRACE_PFMAJ             (1 << 0)
1389 #define TRACE_PFMIN             (1 << 1)
1390
1391 static const size_t trace__entry_str_size = 2048;
1392
1393 struct trace {
1394         struct perf_tool        tool;
1395         struct {
1396                 int             machine;
1397                 int             open_id;
1398         }                       audit;
1399         struct {
1400                 int             max;
1401                 struct syscall  *table;
1402                 struct {
1403                         struct perf_evsel *sys_enter,
1404                                           *sys_exit;
1405                 }               events;
1406         } syscalls;
1407         struct record_opts      opts;
1408         struct perf_evlist      *evlist;
1409         struct machine          *host;
1410         struct thread           *current;
1411         u64                     base_time;
1412         FILE                    *output;
1413         unsigned long           nr_events;
1414         struct strlist          *ev_qualifier;
1415         struct {
1416                 size_t          nr;
1417                 int             *entries;
1418         }                       ev_qualifier_ids;
1419         struct intlist          *tid_list;
1420         struct intlist          *pid_list;
1421         struct {
1422                 size_t          nr;
1423                 pid_t           *entries;
1424         }                       filter_pids;
1425         double                  duration_filter;
1426         double                  runtime_ms;
1427         struct {
1428                 u64             vfs_getname,
1429                                 proc_getname;
1430         } stats;
1431         bool                    not_ev_qualifier;
1432         bool                    live;
1433         bool                    full_time;
1434         bool                    sched;
1435         bool                    multiple_threads;
1436         bool                    summary;
1437         bool                    summary_only;
1438         bool                    show_comm;
1439         bool                    show_tool_stats;
1440         bool                    trace_syscalls;
1441         bool                    force;
1442         bool                    vfs_getname;
1443         int                     trace_pgfaults;
1444 };
1445
1446 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
1447 {
1448         struct thread_trace *ttrace = thread__priv(thread);
1449
1450         if (fd > ttrace->paths.max) {
1451                 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
1452
1453                 if (npath == NULL)
1454                         return -1;
1455
1456                 if (ttrace->paths.max != -1) {
1457                         memset(npath + ttrace->paths.max + 1, 0,
1458                                (fd - ttrace->paths.max) * sizeof(char *));
1459                 } else {
1460                         memset(npath, 0, (fd + 1) * sizeof(char *));
1461                 }
1462
1463                 ttrace->paths.table = npath;
1464                 ttrace->paths.max   = fd;
1465         }
1466
1467         ttrace->paths.table[fd] = strdup(pathname);
1468
1469         return ttrace->paths.table[fd] != NULL ? 0 : -1;
1470 }
1471
1472 static int thread__read_fd_path(struct thread *thread, int fd)
1473 {
1474         char linkname[PATH_MAX], pathname[PATH_MAX];
1475         struct stat st;
1476         int ret;
1477
1478         if (thread->pid_ == thread->tid) {
1479                 scnprintf(linkname, sizeof(linkname),
1480                           "/proc/%d/fd/%d", thread->pid_, fd);
1481         } else {
1482                 scnprintf(linkname, sizeof(linkname),
1483                           "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
1484         }
1485
1486         if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
1487                 return -1;
1488
1489         ret = readlink(linkname, pathname, sizeof(pathname));
1490
1491         if (ret < 0 || ret > st.st_size)
1492                 return -1;
1493
1494         pathname[ret] = '\0';
1495         return trace__set_fd_pathname(thread, fd, pathname);
1496 }
1497
1498 static const char *thread__fd_path(struct thread *thread, int fd,
1499                                    struct trace *trace)
1500 {
1501         struct thread_trace *ttrace = thread__priv(thread);
1502
1503         if (ttrace == NULL)
1504                 return NULL;
1505
1506         if (fd < 0)
1507                 return NULL;
1508
1509         if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) {
1510                 if (!trace->live)
1511                         return NULL;
1512                 ++trace->stats.proc_getname;
1513                 if (thread__read_fd_path(thread, fd))
1514                         return NULL;
1515         }
1516
1517         return ttrace->paths.table[fd];
1518 }
1519
1520 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
1521                                         struct syscall_arg *arg)
1522 {
1523         int fd = arg->val;
1524         size_t printed = scnprintf(bf, size, "%d", fd);
1525         const char *path = thread__fd_path(arg->thread, fd, arg->trace);
1526
1527         if (path)
1528                 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1529
1530         return printed;
1531 }
1532
1533 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1534                                               struct syscall_arg *arg)
1535 {
1536         int fd = arg->val;
1537         size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1538         struct thread_trace *ttrace = thread__priv(arg->thread);
1539
1540         if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
1541                 zfree(&ttrace->paths.table[fd]);
1542
1543         return printed;
1544 }
1545
1546 static void thread__set_filename_pos(struct thread *thread, const char *bf,
1547                                      unsigned long ptr)
1548 {
1549         struct thread_trace *ttrace = thread__priv(thread);
1550
1551         ttrace->filename.ptr = ptr;
1552         ttrace->filename.entry_str_pos = bf - ttrace->entry_str;
1553 }
1554
1555 static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
1556                                               struct syscall_arg *arg)
1557 {
1558         unsigned long ptr = arg->val;
1559
1560         if (!arg->trace->vfs_getname)
1561                 return scnprintf(bf, size, "%#x", ptr);
1562
1563         thread__set_filename_pos(arg->thread, bf, ptr);
1564         return 0;
1565 }
1566
1567 static bool trace__filter_duration(struct trace *trace, double t)
1568 {
1569         return t < (trace->duration_filter * NSEC_PER_MSEC);
1570 }
1571
1572 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1573 {
1574         double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1575
1576         return fprintf(fp, "%10.3f ", ts);
1577 }
1578
1579 static bool done = false;
1580 static bool interrupted = false;
1581
1582 static void sig_handler(int sig)
1583 {
1584         done = true;
1585         interrupted = sig == SIGINT;
1586 }
1587
1588 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1589                                         u64 duration, u64 tstamp, FILE *fp)
1590 {
1591         size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1592         printed += fprintf_duration(duration, fp);
1593
1594         if (trace->multiple_threads) {
1595                 if (trace->show_comm)
1596                         printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
1597                 printed += fprintf(fp, "%d ", thread->tid);
1598         }
1599
1600         return printed;
1601 }
1602
1603 static int trace__process_event(struct trace *trace, struct machine *machine,
1604                                 union perf_event *event, struct perf_sample *sample)
1605 {
1606         int ret = 0;
1607
1608         switch (event->header.type) {
1609         case PERF_RECORD_LOST:
1610                 color_fprintf(trace->output, PERF_COLOR_RED,
1611                               "LOST %" PRIu64 " events!\n", event->lost.lost);
1612                 ret = machine__process_lost_event(machine, event, sample);
1613         default:
1614                 ret = machine__process_event(machine, event, sample);
1615                 break;
1616         }
1617
1618         return ret;
1619 }
1620
1621 static int trace__tool_process(struct perf_tool *tool,
1622                                union perf_event *event,
1623                                struct perf_sample *sample,
1624                                struct machine *machine)
1625 {
1626         struct trace *trace = container_of(tool, struct trace, tool);
1627         return trace__process_event(trace, machine, event, sample);
1628 }
1629
1630 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1631 {
1632         int err = symbol__init(NULL);
1633
1634         if (err)
1635                 return err;
1636
1637         trace->host = machine__new_host();
1638         if (trace->host == NULL)
1639                 return -ENOMEM;
1640
1641         if (trace_event__register_resolver(trace->host, machine__resolve_kernel_addr) < 0)
1642                 return -errno;
1643
1644         err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
1645                                             evlist->threads, trace__tool_process, false,
1646                                             trace->opts.proc_map_timeout);
1647         if (err)
1648                 symbol__exit();
1649
1650         return err;
1651 }
1652
1653 static int syscall__set_arg_fmts(struct syscall *sc)
1654 {
1655         struct format_field *field;
1656         int idx = 0;
1657
1658         sc->arg_scnprintf = calloc(sc->nr_args, sizeof(void *));
1659         if (sc->arg_scnprintf == NULL)
1660                 return -1;
1661
1662         if (sc->fmt)
1663                 sc->arg_parm = sc->fmt->arg_parm;
1664
1665         for (field = sc->args; field; field = field->next) {
1666                 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1667                         sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
1668                 else if (field->flags & FIELD_IS_POINTER)
1669                         sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
1670                 ++idx;
1671         }
1672
1673         return 0;
1674 }
1675
1676 static int trace__read_syscall_info(struct trace *trace, int id)
1677 {
1678         char tp_name[128];
1679         struct syscall *sc;
1680         const char *name = audit_syscall_to_name(id, trace->audit.machine);
1681
1682         if (name == NULL)
1683                 return -1;
1684
1685         if (id > trace->syscalls.max) {
1686                 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1687
1688                 if (nsyscalls == NULL)
1689                         return -1;
1690
1691                 if (trace->syscalls.max != -1) {
1692                         memset(nsyscalls + trace->syscalls.max + 1, 0,
1693                                (id - trace->syscalls.max) * sizeof(*sc));
1694                 } else {
1695                         memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1696                 }
1697
1698                 trace->syscalls.table = nsyscalls;
1699                 trace->syscalls.max   = id;
1700         }
1701
1702         sc = trace->syscalls.table + id;
1703         sc->name = name;
1704
1705         sc->fmt  = syscall_fmt__find(sc->name);
1706
1707         snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1708         sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1709
1710         if (IS_ERR(sc->tp_format) && sc->fmt && sc->fmt->alias) {
1711                 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1712                 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1713         }
1714
1715         if (IS_ERR(sc->tp_format))
1716                 return -1;
1717
1718         sc->args = sc->tp_format->format.fields;
1719         sc->nr_args = sc->tp_format->format.nr_fields;
1720         /* drop nr field - not relevant here; does not exist on older kernels */
1721         if (sc->args && strcmp(sc->args->name, "nr") == 0) {
1722                 sc->args = sc->args->next;
1723                 --sc->nr_args;
1724         }
1725
1726         sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
1727
1728         return syscall__set_arg_fmts(sc);
1729 }
1730
1731 static int trace__validate_ev_qualifier(struct trace *trace)
1732 {
1733         int err = 0, i;
1734         struct str_node *pos;
1735
1736         trace->ev_qualifier_ids.nr = strlist__nr_entries(trace->ev_qualifier);
1737         trace->ev_qualifier_ids.entries = malloc(trace->ev_qualifier_ids.nr *
1738                                                  sizeof(trace->ev_qualifier_ids.entries[0]));
1739
1740         if (trace->ev_qualifier_ids.entries == NULL) {
1741                 fputs("Error:\tNot enough memory for allocating events qualifier ids\n",
1742                        trace->output);
1743                 err = -EINVAL;
1744                 goto out;
1745         }
1746
1747         i = 0;
1748
1749         strlist__for_each(pos, trace->ev_qualifier) {
1750                 const char *sc = pos->s;
1751                 int id = audit_name_to_syscall(sc, trace->audit.machine);
1752
1753                 if (id < 0) {
1754                         if (err == 0) {
1755                                 fputs("Error:\tInvalid syscall ", trace->output);
1756                                 err = -EINVAL;
1757                         } else {
1758                                 fputs(", ", trace->output);
1759                         }
1760
1761                         fputs(sc, trace->output);
1762                 }
1763
1764                 trace->ev_qualifier_ids.entries[i++] = id;
1765         }
1766
1767         if (err < 0) {
1768                 fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'"
1769                       "\nHint:\tand: 'man syscalls'\n", trace->output);
1770                 zfree(&trace->ev_qualifier_ids.entries);
1771                 trace->ev_qualifier_ids.nr = 0;
1772         }
1773 out:
1774         return err;
1775 }
1776
1777 /*
1778  * args is to be interpreted as a series of longs but we need to handle
1779  * 8-byte unaligned accesses. args points to raw_data within the event
1780  * and raw_data is guaranteed to be 8-byte unaligned because it is
1781  * preceded by raw_size which is a u32. So we need to copy args to a temp
1782  * variable to read it. Most notably this avoids extended load instructions
1783  * on unaligned addresses
1784  */
1785
1786 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1787                                       unsigned char *args, struct trace *trace,
1788                                       struct thread *thread)
1789 {
1790         size_t printed = 0;
1791         unsigned char *p;
1792         unsigned long val;
1793
1794         if (sc->args != NULL) {
1795                 struct format_field *field;
1796                 u8 bit = 1;
1797                 struct syscall_arg arg = {
1798                         .idx    = 0,
1799                         .mask   = 0,
1800                         .trace  = trace,
1801                         .thread = thread,
1802                 };
1803
1804                 for (field = sc->args; field;
1805                      field = field->next, ++arg.idx, bit <<= 1) {
1806                         if (arg.mask & bit)
1807                                 continue;
1808
1809                         /* special care for unaligned accesses */
1810                         p = args + sizeof(unsigned long) * arg.idx;
1811                         memcpy(&val, p, sizeof(val));
1812
1813                         /*
1814                          * Suppress this argument if its value is zero and
1815                          * and we don't have a string associated in an
1816                          * strarray for it.
1817                          */
1818                         if (val == 0 &&
1819                             !(sc->arg_scnprintf &&
1820                               sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1821                               sc->arg_parm[arg.idx]))
1822                                 continue;
1823
1824                         printed += scnprintf(bf + printed, size - printed,
1825                                              "%s%s: ", printed ? ", " : "", field->name);
1826                         if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
1827                                 arg.val = val;
1828                                 if (sc->arg_parm)
1829                                         arg.parm = sc->arg_parm[arg.idx];
1830                                 printed += sc->arg_scnprintf[arg.idx](bf + printed,
1831                                                                       size - printed, &arg);
1832                         } else {
1833                                 printed += scnprintf(bf + printed, size - printed,
1834                                                      "%ld", val);
1835                         }
1836                 }
1837         } else {
1838                 int i = 0;
1839
1840                 while (i < 6) {
1841                         /* special care for unaligned accesses */
1842                         p = args + sizeof(unsigned long) * i;
1843                         memcpy(&val, p, sizeof(val));
1844                         printed += scnprintf(bf + printed, size - printed,
1845                                              "%sarg%d: %ld",
1846                                              printed ? ", " : "", i, val);
1847                         ++i;
1848                 }
1849         }
1850
1851         return printed;
1852 }
1853
1854 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1855                                   union perf_event *event,
1856                                   struct perf_sample *sample);
1857
1858 static struct syscall *trace__syscall_info(struct trace *trace,
1859                                            struct perf_evsel *evsel, int id)
1860 {
1861
1862         if (id < 0) {
1863
1864                 /*
1865                  * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1866                  * before that, leaving at a higher verbosity level till that is
1867                  * explained. Reproduced with plain ftrace with:
1868                  *
1869                  * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1870                  * grep "NR -1 " /t/trace_pipe
1871                  *
1872                  * After generating some load on the machine.
1873                  */
1874                 if (verbose > 1) {
1875                         static u64 n;
1876                         fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1877                                 id, perf_evsel__name(evsel), ++n);
1878                 }
1879                 return NULL;
1880         }
1881
1882         if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1883             trace__read_syscall_info(trace, id))
1884                 goto out_cant_read;
1885
1886         if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1887                 goto out_cant_read;
1888
1889         return &trace->syscalls.table[id];
1890
1891 out_cant_read:
1892         if (verbose) {
1893                 fprintf(trace->output, "Problems reading syscall %d", id);
1894                 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1895                         fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1896                 fputs(" information\n", trace->output);
1897         }
1898         return NULL;
1899 }
1900
1901 static void thread__update_stats(struct thread_trace *ttrace,
1902                                  int id, struct perf_sample *sample)
1903 {
1904         struct int_node *inode;
1905         struct stats *stats;
1906         u64 duration = 0;
1907
1908         inode = intlist__findnew(ttrace->syscall_stats, id);
1909         if (inode == NULL)
1910                 return;
1911
1912         stats = inode->priv;
1913         if (stats == NULL) {
1914                 stats = malloc(sizeof(struct stats));
1915                 if (stats == NULL)
1916                         return;
1917                 init_stats(stats);
1918                 inode->priv = stats;
1919         }
1920
1921         if (ttrace->entry_time && sample->time > ttrace->entry_time)
1922                 duration = sample->time - ttrace->entry_time;
1923
1924         update_stats(stats, duration);
1925 }
1926
1927 static int trace__printf_interrupted_entry(struct trace *trace, struct perf_sample *sample)
1928 {
1929         struct thread_trace *ttrace;
1930         u64 duration;
1931         size_t printed;
1932
1933         if (trace->current == NULL)
1934                 return 0;
1935
1936         ttrace = thread__priv(trace->current);
1937
1938         if (!ttrace->entry_pending)
1939                 return 0;
1940
1941         duration = sample->time - ttrace->entry_time;
1942
1943         printed  = trace__fprintf_entry_head(trace, trace->current, duration, sample->time, trace->output);
1944         printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str);
1945         ttrace->entry_pending = false;
1946
1947         return printed;
1948 }
1949
1950 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1951                             union perf_event *event __maybe_unused,
1952                             struct perf_sample *sample)
1953 {
1954         char *msg;
1955         void *args;
1956         size_t printed = 0;
1957         struct thread *thread;
1958         int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
1959         struct syscall *sc = trace__syscall_info(trace, evsel, id);
1960         struct thread_trace *ttrace;
1961
1962         if (sc == NULL)
1963                 return -1;
1964
1965         thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1966         ttrace = thread__trace(thread, trace->output);
1967         if (ttrace == NULL)
1968                 goto out_put;
1969
1970         args = perf_evsel__sc_tp_ptr(evsel, args, sample);
1971
1972         if (ttrace->entry_str == NULL) {
1973                 ttrace->entry_str = malloc(trace__entry_str_size);
1974                 if (!ttrace->entry_str)
1975                         goto out_put;
1976         }
1977
1978         if (!trace->summary_only)
1979                 trace__printf_interrupted_entry(trace, sample);
1980
1981         ttrace->entry_time = sample->time;
1982         msg = ttrace->entry_str;
1983         printed += scnprintf(msg + printed, trace__entry_str_size - printed, "%s(", sc->name);
1984
1985         printed += syscall__scnprintf_args(sc, msg + printed, trace__entry_str_size - printed,
1986                                            args, trace, thread);
1987
1988         if (sc->is_exit) {
1989                 if (!trace->duration_filter && !trace->summary_only) {
1990                         trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
1991                         fprintf(trace->output, "%-70s\n", ttrace->entry_str);
1992                 }
1993         } else {
1994                 ttrace->entry_pending = true;
1995                 /* See trace__vfs_getname & trace__sys_exit */
1996                 ttrace->filename.pending_open = false;
1997         }
1998
1999         if (trace->current != thread) {
2000                 thread__put(trace->current);
2001                 trace->current = thread__get(thread);
2002         }
2003         err = 0;
2004 out_put:
2005         thread__put(thread);
2006         return err;
2007 }
2008
2009 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
2010                            union perf_event *event __maybe_unused,
2011                            struct perf_sample *sample)
2012 {
2013         long ret;
2014         u64 duration = 0;
2015         struct thread *thread;
2016         int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
2017         struct syscall *sc = trace__syscall_info(trace, evsel, id);
2018         struct thread_trace *ttrace;
2019
2020         if (sc == NULL)
2021                 return -1;
2022
2023         thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2024         ttrace = thread__trace(thread, trace->output);
2025         if (ttrace == NULL)
2026                 goto out_put;
2027
2028         if (trace->summary)
2029                 thread__update_stats(ttrace, id, sample);
2030
2031         ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
2032
2033         if (id == trace->audit.open_id && ret >= 0 && ttrace->filename.pending_open) {
2034                 trace__set_fd_pathname(thread, ret, ttrace->filename.name);
2035                 ttrace->filename.pending_open = false;
2036                 ++trace->stats.vfs_getname;
2037         }
2038
2039         ttrace->exit_time = sample->time;
2040
2041         if (ttrace->entry_time) {
2042                 duration = sample->time - ttrace->entry_time;
2043                 if (trace__filter_duration(trace, duration))
2044                         goto out;
2045         } else if (trace->duration_filter)
2046                 goto out;
2047
2048         if (trace->summary_only)
2049                 goto out;
2050
2051         trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
2052
2053         if (ttrace->entry_pending) {
2054                 fprintf(trace->output, "%-70s", ttrace->entry_str);
2055         } else {
2056                 fprintf(trace->output, " ... [");
2057                 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
2058                 fprintf(trace->output, "]: %s()", sc->name);
2059         }
2060
2061         if (sc->fmt == NULL) {
2062 signed_print:
2063                 fprintf(trace->output, ") = %ld", ret);
2064         } else if (ret < 0 && sc->fmt->errmsg) {
2065                 char bf[STRERR_BUFSIZE];
2066                 const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
2067                            *e = audit_errno_to_name(-ret);
2068
2069                 fprintf(trace->output, ") = -1 %s %s", e, emsg);
2070         } else if (ret == 0 && sc->fmt->timeout)
2071                 fprintf(trace->output, ") = 0 Timeout");
2072         else if (sc->fmt->hexret)
2073                 fprintf(trace->output, ") = %#lx", ret);
2074         else
2075                 goto signed_print;
2076
2077         fputc('\n', trace->output);
2078 out:
2079         ttrace->entry_pending = false;
2080         err = 0;
2081 out_put:
2082         thread__put(thread);
2083         return err;
2084 }
2085
2086 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
2087                               union perf_event *event __maybe_unused,
2088                               struct perf_sample *sample)
2089 {
2090         struct thread *thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2091         struct thread_trace *ttrace;
2092         size_t filename_len, entry_str_len, to_move;
2093         ssize_t remaining_space;
2094         char *pos;
2095         const char *filename = perf_evsel__rawptr(evsel, sample, "pathname");
2096
2097         if (!thread)
2098                 goto out;
2099
2100         ttrace = thread__priv(thread);
2101         if (!ttrace)
2102                 goto out;
2103
2104         filename_len = strlen(filename);
2105
2106         if (ttrace->filename.namelen < filename_len) {
2107                 char *f = realloc(ttrace->filename.name, filename_len + 1);
2108
2109                 if (f == NULL)
2110                                 goto out;
2111
2112                 ttrace->filename.namelen = filename_len;
2113                 ttrace->filename.name = f;
2114         }
2115
2116         strcpy(ttrace->filename.name, filename);
2117         ttrace->filename.pending_open = true;
2118
2119         if (!ttrace->filename.ptr)
2120                 goto out;
2121
2122         entry_str_len = strlen(ttrace->entry_str);
2123         remaining_space = trace__entry_str_size - entry_str_len - 1; /* \0 */
2124         if (remaining_space <= 0)
2125                 goto out;
2126
2127         if (filename_len > (size_t)remaining_space) {
2128                 filename += filename_len - remaining_space;
2129                 filename_len = remaining_space;
2130         }
2131
2132         to_move = entry_str_len - ttrace->filename.entry_str_pos + 1; /* \0 */
2133         pos = ttrace->entry_str + ttrace->filename.entry_str_pos;
2134         memmove(pos + filename_len, pos, to_move);
2135         memcpy(pos, filename, filename_len);
2136
2137         ttrace->filename.ptr = 0;
2138         ttrace->filename.entry_str_pos = 0;
2139 out:
2140         return 0;
2141 }
2142
2143 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
2144                                      union perf_event *event __maybe_unused,
2145                                      struct perf_sample *sample)
2146 {
2147         u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
2148         double runtime_ms = (double)runtime / NSEC_PER_MSEC;
2149         struct thread *thread = machine__findnew_thread(trace->host,
2150                                                         sample->pid,
2151                                                         sample->tid);
2152         struct thread_trace *ttrace = thread__trace(thread, trace->output);
2153
2154         if (ttrace == NULL)
2155                 goto out_dump;
2156
2157         ttrace->runtime_ms += runtime_ms;
2158         trace->runtime_ms += runtime_ms;
2159         thread__put(thread);
2160         return 0;
2161
2162 out_dump:
2163         fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
2164                evsel->name,
2165                perf_evsel__strval(evsel, sample, "comm"),
2166                (pid_t)perf_evsel__intval(evsel, sample, "pid"),
2167                runtime,
2168                perf_evsel__intval(evsel, sample, "vruntime"));
2169         thread__put(thread);
2170         return 0;
2171 }
2172
2173 static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
2174                                 union perf_event *event __maybe_unused,
2175                                 struct perf_sample *sample)
2176 {
2177         trace__printf_interrupted_entry(trace, sample);
2178         trace__fprintf_tstamp(trace, sample->time, trace->output);
2179
2180         if (trace->trace_syscalls)
2181                 fprintf(trace->output, "(         ): ");
2182
2183         fprintf(trace->output, "%s:", evsel->name);
2184
2185         if (evsel->tp_format) {
2186                 event_format__fprintf(evsel->tp_format, sample->cpu,
2187                                       sample->raw_data, sample->raw_size,
2188                                       trace->output);
2189         }
2190
2191         fprintf(trace->output, ")\n");
2192         return 0;
2193 }
2194
2195 static void print_location(FILE *f, struct perf_sample *sample,
2196                            struct addr_location *al,
2197                            bool print_dso, bool print_sym)
2198 {
2199
2200         if ((verbose || print_dso) && al->map)
2201                 fprintf(f, "%s@", al->map->dso->long_name);
2202
2203         if ((verbose || print_sym) && al->sym)
2204                 fprintf(f, "%s+0x%" PRIx64, al->sym->name,
2205                         al->addr - al->sym->start);
2206         else if (al->map)
2207                 fprintf(f, "0x%" PRIx64, al->addr);
2208         else
2209                 fprintf(f, "0x%" PRIx64, sample->addr);
2210 }
2211
2212 static int trace__pgfault(struct trace *trace,
2213                           struct perf_evsel *evsel,
2214                           union perf_event *event,
2215                           struct perf_sample *sample)
2216 {
2217         struct thread *thread;
2218         u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
2219         struct addr_location al;
2220         char map_type = 'd';
2221         struct thread_trace *ttrace;
2222         int err = -1;
2223
2224         thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2225         ttrace = thread__trace(thread, trace->output);
2226         if (ttrace == NULL)
2227                 goto out_put;
2228
2229         if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
2230                 ttrace->pfmaj++;
2231         else
2232                 ttrace->pfmin++;
2233
2234         if (trace->summary_only)
2235                 goto out;
2236
2237         thread__find_addr_location(thread, cpumode, MAP__FUNCTION,
2238                               sample->ip, &al);
2239
2240         trace__fprintf_entry_head(trace, thread, 0, sample->time, trace->output);
2241
2242         fprintf(trace->output, "%sfault [",
2243                 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
2244                 "maj" : "min");
2245
2246         print_location(trace->output, sample, &al, false, true);
2247
2248         fprintf(trace->output, "] => ");
2249
2250         thread__find_addr_location(thread, cpumode, MAP__VARIABLE,
2251                                    sample->addr, &al);
2252
2253         if (!al.map) {
2254                 thread__find_addr_location(thread, cpumode,
2255                                            MAP__FUNCTION, sample->addr, &al);
2256
2257                 if (al.map)
2258                         map_type = 'x';
2259                 else
2260                         map_type = '?';
2261         }
2262
2263         print_location(trace->output, sample, &al, true, false);
2264
2265         fprintf(trace->output, " (%c%c)\n", map_type, al.level);
2266 out:
2267         err = 0;
2268 out_put:
2269         thread__put(thread);
2270         return err;
2271 }
2272
2273 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
2274 {
2275         if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
2276             (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
2277                 return false;
2278
2279         if (trace->pid_list || trace->tid_list)
2280                 return true;
2281
2282         return false;
2283 }
2284
2285 static int trace__process_sample(struct perf_tool *tool,
2286                                  union perf_event *event,
2287                                  struct perf_sample *sample,
2288                                  struct perf_evsel *evsel,
2289                                  struct machine *machine __maybe_unused)
2290 {
2291         struct trace *trace = container_of(tool, struct trace, tool);
2292         int err = 0;
2293
2294         tracepoint_handler handler = evsel->handler;
2295
2296         if (skip_sample(trace, sample))
2297                 return 0;
2298
2299         if (!trace->full_time && trace->base_time == 0)
2300                 trace->base_time = sample->time;
2301
2302         if (handler) {
2303                 ++trace->nr_events;
2304                 handler(trace, evsel, event, sample);
2305         }
2306
2307         return err;
2308 }
2309
2310 static int parse_target_str(struct trace *trace)
2311 {
2312         if (trace->opts.target.pid) {
2313                 trace->pid_list = intlist__new(trace->opts.target.pid);
2314                 if (trace->pid_list == NULL) {
2315                         pr_err("Error parsing process id string\n");
2316                         return -EINVAL;
2317                 }
2318         }
2319
2320         if (trace->opts.target.tid) {
2321                 trace->tid_list = intlist__new(trace->opts.target.tid);
2322                 if (trace->tid_list == NULL) {
2323                         pr_err("Error parsing thread id string\n");
2324                         return -EINVAL;
2325                 }
2326         }
2327
2328         return 0;
2329 }
2330
2331 static int trace__record(struct trace *trace, int argc, const char **argv)
2332 {
2333         unsigned int rec_argc, i, j;
2334         const char **rec_argv;
2335         const char * const record_args[] = {
2336                 "record",
2337                 "-R",
2338                 "-m", "1024",
2339                 "-c", "1",
2340         };
2341
2342         const char * const sc_args[] = { "-e", };
2343         unsigned int sc_args_nr = ARRAY_SIZE(sc_args);
2344         const char * const majpf_args[] = { "-e", "major-faults" };
2345         unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args);
2346         const char * const minpf_args[] = { "-e", "minor-faults" };
2347         unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args);
2348
2349         /* +1 is for the event string below */
2350         rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 +
2351                 majpf_args_nr + minpf_args_nr + argc;
2352         rec_argv = calloc(rec_argc + 1, sizeof(char *));
2353
2354         if (rec_argv == NULL)
2355                 return -ENOMEM;
2356
2357         j = 0;
2358         for (i = 0; i < ARRAY_SIZE(record_args); i++)
2359                 rec_argv[j++] = record_args[i];
2360
2361         if (trace->trace_syscalls) {
2362                 for (i = 0; i < sc_args_nr; i++)
2363                         rec_argv[j++] = sc_args[i];
2364
2365                 /* event string may be different for older kernels - e.g., RHEL6 */
2366                 if (is_valid_tracepoint("raw_syscalls:sys_enter"))
2367                         rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
2368                 else if (is_valid_tracepoint("syscalls:sys_enter"))
2369                         rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
2370                 else {
2371                         pr_err("Neither raw_syscalls nor syscalls events exist.\n");
2372                         return -1;
2373                 }
2374         }
2375
2376         if (trace->trace_pgfaults & TRACE_PFMAJ)
2377                 for (i = 0; i < majpf_args_nr; i++)
2378                         rec_argv[j++] = majpf_args[i];
2379
2380         if (trace->trace_pgfaults & TRACE_PFMIN)
2381                 for (i = 0; i < minpf_args_nr; i++)
2382                         rec_argv[j++] = minpf_args[i];
2383
2384         for (i = 0; i < (unsigned int)argc; i++)
2385                 rec_argv[j++] = argv[i];
2386
2387         return cmd_record(j, rec_argv, NULL);
2388 }
2389
2390 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
2391
2392 static bool perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
2393 {
2394         struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
2395
2396         if (IS_ERR(evsel))
2397                 return false;
2398
2399         if (perf_evsel__field(evsel, "pathname") == NULL) {
2400                 perf_evsel__delete(evsel);
2401                 return false;
2402         }
2403
2404         evsel->handler = trace__vfs_getname;
2405         perf_evlist__add(evlist, evsel);
2406         return true;
2407 }
2408
2409 static int perf_evlist__add_pgfault(struct perf_evlist *evlist,
2410                                     u64 config)
2411 {
2412         struct perf_evsel *evsel;
2413         struct perf_event_attr attr = {
2414                 .type = PERF_TYPE_SOFTWARE,
2415                 .mmap_data = 1,
2416         };
2417
2418         attr.config = config;
2419         attr.sample_period = 1;
2420
2421         event_attr_init(&attr);
2422
2423         evsel = perf_evsel__new(&attr);
2424         if (!evsel)
2425                 return -ENOMEM;
2426
2427         evsel->handler = trace__pgfault;
2428         perf_evlist__add(evlist, evsel);
2429
2430         return 0;
2431 }
2432
2433 static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample)
2434 {
2435         const u32 type = event->header.type;
2436         struct perf_evsel *evsel;
2437
2438         if (!trace->full_time && trace->base_time == 0)
2439                 trace->base_time = sample->time;
2440
2441         if (type != PERF_RECORD_SAMPLE) {
2442                 trace__process_event(trace, trace->host, event, sample);
2443                 return;
2444         }
2445
2446         evsel = perf_evlist__id2evsel(trace->evlist, sample->id);
2447         if (evsel == NULL) {
2448                 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample->id);
2449                 return;
2450         }
2451
2452         if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
2453             sample->raw_data == NULL) {
2454                 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
2455                        perf_evsel__name(evsel), sample->tid,
2456                        sample->cpu, sample->raw_size);
2457         } else {
2458                 tracepoint_handler handler = evsel->handler;
2459                 handler(trace, evsel, event, sample);
2460         }
2461 }
2462
2463 static int trace__add_syscall_newtp(struct trace *trace)
2464 {
2465         int ret = -1;
2466         struct perf_evlist *evlist = trace->evlist;
2467         struct perf_evsel *sys_enter, *sys_exit;
2468
2469         sys_enter = perf_evsel__syscall_newtp("sys_enter", trace__sys_enter);
2470         if (sys_enter == NULL)
2471                 goto out;
2472
2473         if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
2474                 goto out_delete_sys_enter;
2475
2476         sys_exit = perf_evsel__syscall_newtp("sys_exit", trace__sys_exit);
2477         if (sys_exit == NULL)
2478                 goto out_delete_sys_enter;
2479
2480         if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
2481                 goto out_delete_sys_exit;
2482
2483         perf_evlist__add(evlist, sys_enter);
2484         perf_evlist__add(evlist, sys_exit);
2485
2486         trace->syscalls.events.sys_enter = sys_enter;
2487         trace->syscalls.events.sys_exit  = sys_exit;
2488
2489         ret = 0;
2490 out:
2491         return ret;
2492
2493 out_delete_sys_exit:
2494         perf_evsel__delete_priv(sys_exit);
2495 out_delete_sys_enter:
2496         perf_evsel__delete_priv(sys_enter);
2497         goto out;
2498 }
2499
2500 static int trace__set_ev_qualifier_filter(struct trace *trace)
2501 {
2502         int err = -1;
2503         char *filter = asprintf_expr_inout_ints("id", !trace->not_ev_qualifier,
2504                                                 trace->ev_qualifier_ids.nr,
2505                                                 trace->ev_qualifier_ids.entries);
2506
2507         if (filter == NULL)
2508                 goto out_enomem;
2509
2510         if (!perf_evsel__append_filter(trace->syscalls.events.sys_enter, "&&", filter))
2511                 err = perf_evsel__append_filter(trace->syscalls.events.sys_exit, "&&", filter);
2512
2513         free(filter);
2514 out:
2515         return err;
2516 out_enomem:
2517         errno = ENOMEM;
2518         goto out;
2519 }
2520
2521 static int trace__run(struct trace *trace, int argc, const char **argv)
2522 {
2523         struct perf_evlist *evlist = trace->evlist;
2524         struct perf_evsel *evsel;
2525         int err = -1, i;
2526         unsigned long before;
2527         const bool forks = argc > 0;
2528         bool draining = false;
2529
2530         trace->live = true;
2531
2532         if (trace->trace_syscalls && trace__add_syscall_newtp(trace))
2533                 goto out_error_raw_syscalls;
2534
2535         if (trace->trace_syscalls)
2536                 trace->vfs_getname = perf_evlist__add_vfs_getname(evlist);
2537
2538         if ((trace->trace_pgfaults & TRACE_PFMAJ) &&
2539             perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MAJ)) {
2540                 goto out_error_mem;
2541         }
2542
2543         if ((trace->trace_pgfaults & TRACE_PFMIN) &&
2544             perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MIN))
2545                 goto out_error_mem;
2546
2547         if (trace->sched &&
2548             perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
2549                                    trace__sched_stat_runtime))
2550                 goto out_error_sched_stat_runtime;
2551
2552         err = perf_evlist__create_maps(evlist, &trace->opts.target);
2553         if (err < 0) {
2554                 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
2555                 goto out_delete_evlist;
2556         }
2557
2558         err = trace__symbols_init(trace, evlist);
2559         if (err < 0) {
2560                 fprintf(trace->output, "Problems initializing symbol libraries!\n");
2561                 goto out_delete_evlist;
2562         }
2563
2564         perf_evlist__config(evlist, &trace->opts);
2565
2566         signal(SIGCHLD, sig_handler);
2567         signal(SIGINT, sig_handler);
2568
2569         if (forks) {
2570                 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
2571                                                     argv, false, NULL);
2572                 if (err < 0) {
2573                         fprintf(trace->output, "Couldn't run the workload!\n");
2574                         goto out_delete_evlist;
2575                 }
2576         }
2577
2578         err = perf_evlist__open(evlist);
2579         if (err < 0)
2580                 goto out_error_open;
2581
2582         /*
2583          * Better not use !target__has_task() here because we need to cover the
2584          * case where no threads were specified in the command line, but a
2585          * workload was, and in that case we will fill in the thread_map when
2586          * we fork the workload in perf_evlist__prepare_workload.
2587          */
2588         if (trace->filter_pids.nr > 0)
2589                 err = perf_evlist__set_filter_pids(evlist, trace->filter_pids.nr, trace->filter_pids.entries);
2590         else if (thread_map__pid(evlist->threads, 0) == -1)
2591                 err = perf_evlist__set_filter_pid(evlist, getpid());
2592
2593         if (err < 0)
2594                 goto out_error_mem;
2595
2596         if (trace->ev_qualifier_ids.nr > 0) {
2597                 err = trace__set_ev_qualifier_filter(trace);
2598                 if (err < 0)
2599                         goto out_errno;
2600
2601                 pr_debug("event qualifier tracepoint filter: %s\n",
2602                          trace->syscalls.events.sys_exit->filter);
2603         }
2604
2605         err = perf_evlist__apply_filters(evlist, &evsel);
2606         if (err < 0)
2607                 goto out_error_apply_filters;
2608
2609         err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
2610         if (err < 0)
2611                 goto out_error_mmap;
2612
2613         if (!target__none(&trace->opts.target))
2614                 perf_evlist__enable(evlist);
2615
2616         if (forks)
2617                 perf_evlist__start_workload(evlist);
2618
2619         trace->multiple_threads = thread_map__pid(evlist->threads, 0) == -1 ||
2620                                   evlist->threads->nr > 1 ||
2621                                   perf_evlist__first(evlist)->attr.inherit;
2622 again:
2623         before = trace->nr_events;
2624
2625         for (i = 0; i < evlist->nr_mmaps; i++) {
2626                 union perf_event *event;
2627
2628                 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
2629                         struct perf_sample sample;
2630
2631                         ++trace->nr_events;
2632
2633                         err = perf_evlist__parse_sample(evlist, event, &sample);
2634                         if (err) {
2635                                 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
2636                                 goto next_event;
2637                         }
2638
2639                         trace__handle_event(trace, event, &sample);
2640 next_event:
2641                         perf_evlist__mmap_consume(evlist, i);
2642
2643                         if (interrupted)
2644                                 goto out_disable;
2645
2646                         if (done && !draining) {
2647                                 perf_evlist__disable(evlist);
2648                                 draining = true;
2649                         }
2650                 }
2651         }
2652
2653         if (trace->nr_events == before) {
2654                 int timeout = done ? 100 : -1;
2655
2656                 if (!draining && perf_evlist__poll(evlist, timeout) > 0) {
2657                         if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0)
2658                                 draining = true;
2659
2660                         goto again;
2661                 }
2662         } else {
2663                 goto again;
2664         }
2665
2666 out_disable:
2667         thread__zput(trace->current);
2668
2669         perf_evlist__disable(evlist);
2670
2671         if (!err) {
2672                 if (trace->summary)
2673                         trace__fprintf_thread_summary(trace, trace->output);
2674
2675                 if (trace->show_tool_stats) {
2676                         fprintf(trace->output, "Stats:\n "
2677                                                " vfs_getname : %" PRIu64 "\n"
2678                                                " proc_getname: %" PRIu64 "\n",
2679                                 trace->stats.vfs_getname,
2680                                 trace->stats.proc_getname);
2681                 }
2682         }
2683
2684 out_delete_evlist:
2685         perf_evlist__delete(evlist);
2686         trace->evlist = NULL;
2687         trace->live = false;
2688         return err;
2689 {
2690         char errbuf[BUFSIZ];
2691
2692 out_error_sched_stat_runtime:
2693         tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime");
2694         goto out_error;
2695
2696 out_error_raw_syscalls:
2697         tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)");
2698         goto out_error;
2699
2700 out_error_mmap:
2701         perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
2702         goto out_error;
2703
2704 out_error_open:
2705         perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2706
2707 out_error:
2708         fprintf(trace->output, "%s\n", errbuf);
2709         goto out_delete_evlist;
2710
2711 out_error_apply_filters:
2712         fprintf(trace->output,
2713                 "Failed to set filter \"%s\" on event %s with %d (%s)\n",
2714                 evsel->filter, perf_evsel__name(evsel), errno,
2715                 strerror_r(errno, errbuf, sizeof(errbuf)));
2716         goto out_delete_evlist;
2717 }
2718 out_error_mem:
2719         fprintf(trace->output, "Not enough memory to run!\n");
2720         goto out_delete_evlist;
2721
2722 out_errno:
2723         fprintf(trace->output, "errno=%d,%s\n", errno, strerror(errno));
2724         goto out_delete_evlist;
2725 }
2726
2727 static int trace__replay(struct trace *trace)
2728 {
2729         const struct perf_evsel_str_handler handlers[] = {
2730                 { "probe:vfs_getname",       trace__vfs_getname, },
2731         };
2732         struct perf_data_file file = {
2733                 .path  = input_name,
2734                 .mode  = PERF_DATA_MODE_READ,
2735                 .force = trace->force,
2736         };
2737         struct perf_session *session;
2738         struct perf_evsel *evsel;
2739         int err = -1;
2740
2741         trace->tool.sample        = trace__process_sample;
2742         trace->tool.mmap          = perf_event__process_mmap;
2743         trace->tool.mmap2         = perf_event__process_mmap2;
2744         trace->tool.comm          = perf_event__process_comm;
2745         trace->tool.exit          = perf_event__process_exit;
2746         trace->tool.fork          = perf_event__process_fork;
2747         trace->tool.attr          = perf_event__process_attr;
2748         trace->tool.tracing_data = perf_event__process_tracing_data;
2749         trace->tool.build_id      = perf_event__process_build_id;
2750
2751         trace->tool.ordered_events = true;
2752         trace->tool.ordering_requires_timestamps = true;
2753
2754         /* add tid to output */
2755         trace->multiple_threads = true;
2756
2757         session = perf_session__new(&file, false, &trace->tool);
2758         if (session == NULL)
2759                 return -1;
2760
2761         if (symbol__init(&session->header.env) < 0)
2762                 goto out;
2763
2764         trace->host = &session->machines.host;
2765
2766         err = perf_session__set_tracepoints_handlers(session, handlers);
2767         if (err)
2768                 goto out;
2769
2770         evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2771                                                      "raw_syscalls:sys_enter");
2772         /* older kernels have syscalls tp versus raw_syscalls */
2773         if (evsel == NULL)
2774                 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2775                                                              "syscalls:sys_enter");
2776
2777         if (evsel &&
2778             (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
2779             perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
2780                 pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2781                 goto out;
2782         }
2783
2784         evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2785                                                      "raw_syscalls:sys_exit");
2786         if (evsel == NULL)
2787                 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2788                                                              "syscalls:sys_exit");
2789         if (evsel &&
2790             (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
2791             perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
2792                 pr_err("Error during initialize raw_syscalls:sys_exit event\n");
2793                 goto out;
2794         }
2795
2796         evlist__for_each(session->evlist, evsel) {
2797                 if (evsel->attr.type == PERF_TYPE_SOFTWARE &&
2798                     (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
2799                      evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
2800                      evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS))
2801                         evsel->handler = trace__pgfault;
2802         }
2803
2804         err = parse_target_str(trace);
2805         if (err != 0)
2806                 goto out;
2807
2808         setup_pager();
2809
2810         err = perf_session__process_events(session);
2811         if (err)
2812                 pr_err("Failed to process events, error %d", err);
2813
2814         else if (trace->summary)
2815                 trace__fprintf_thread_summary(trace, trace->output);
2816
2817 out:
2818         perf_session__delete(session);
2819
2820         return err;
2821 }
2822
2823 static size_t trace__fprintf_threads_header(FILE *fp)
2824 {
2825         size_t printed;
2826
2827         printed  = fprintf(fp, "\n Summary of events:\n\n");
2828
2829         return printed;
2830 }
2831
2832 static size_t thread__dump_stats(struct thread_trace *ttrace,
2833                                  struct trace *trace, FILE *fp)
2834 {
2835         struct stats *stats;
2836         size_t printed = 0;
2837         struct syscall *sc;
2838         struct int_node *inode = intlist__first(ttrace->syscall_stats);
2839
2840         if (inode == NULL)
2841                 return 0;
2842
2843         printed += fprintf(fp, "\n");
2844
2845         printed += fprintf(fp, "   syscall            calls    total       min       avg       max      stddev\n");
2846         printed += fprintf(fp, "                               (msec)    (msec)    (msec)    (msec)        (%%)\n");
2847         printed += fprintf(fp, "   --------------- -------- --------- --------- --------- ---------     ------\n");
2848
2849         /* each int_node is a syscall */
2850         while (inode) {
2851                 stats = inode->priv;
2852                 if (stats) {
2853                         double min = (double)(stats->min) / NSEC_PER_MSEC;
2854                         double max = (double)(stats->max) / NSEC_PER_MSEC;
2855                         double avg = avg_stats(stats);
2856                         double pct;
2857                         u64 n = (u64) stats->n;
2858
2859                         pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2860                         avg /= NSEC_PER_MSEC;
2861
2862                         sc = &trace->syscalls.table[inode->i];
2863                         printed += fprintf(fp, "   %-15s", sc->name);
2864                         printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f %9.3f",
2865                                            n, avg * n, min, avg);
2866                         printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
2867                 }
2868
2869                 inode = intlist__next(inode);
2870         }
2871
2872         printed += fprintf(fp, "\n\n");
2873
2874         return printed;
2875 }
2876
2877 /* struct used to pass data to per-thread function */
2878 struct summary_data {
2879         FILE *fp;
2880         struct trace *trace;
2881         size_t printed;
2882 };
2883
2884 static int trace__fprintf_one_thread(struct thread *thread, void *priv)
2885 {
2886         struct summary_data *data = priv;
2887         FILE *fp = data->fp;
2888         size_t printed = data->printed;
2889         struct trace *trace = data->trace;
2890         struct thread_trace *ttrace = thread__priv(thread);
2891         double ratio;
2892
2893         if (ttrace == NULL)
2894                 return 0;
2895
2896         ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2897
2898         printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
2899         printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
2900         printed += fprintf(fp, "%.1f%%", ratio);
2901         if (ttrace->pfmaj)
2902                 printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj);
2903         if (ttrace->pfmin)
2904                 printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin);
2905         printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
2906         printed += thread__dump_stats(ttrace, trace, fp);
2907
2908         data->printed += printed;
2909
2910         return 0;
2911 }
2912
2913 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2914 {
2915         struct summary_data data = {
2916                 .fp = fp,
2917                 .trace = trace
2918         };
2919         data.printed = trace__fprintf_threads_header(fp);
2920
2921         machine__for_each_thread(trace->host, trace__fprintf_one_thread, &data);
2922
2923         return data.printed;
2924 }
2925
2926 static int trace__set_duration(const struct option *opt, const char *str,
2927                                int unset __maybe_unused)
2928 {
2929         struct trace *trace = opt->value;
2930
2931         trace->duration_filter = atof(str);
2932         return 0;
2933 }
2934
2935 static int trace__set_filter_pids(const struct option *opt, const char *str,
2936                                   int unset __maybe_unused)
2937 {
2938         int ret = -1;
2939         size_t i;
2940         struct trace *trace = opt->value;
2941         /*
2942          * FIXME: introduce a intarray class, plain parse csv and create a
2943          * { int nr, int entries[] } struct...
2944          */
2945         struct intlist *list = intlist__new(str);
2946
2947         if (list == NULL)
2948                 return -1;
2949
2950         i = trace->filter_pids.nr = intlist__nr_entries(list) + 1;
2951         trace->filter_pids.entries = calloc(i, sizeof(pid_t));
2952
2953         if (trace->filter_pids.entries == NULL)
2954                 goto out;
2955
2956         trace->filter_pids.entries[0] = getpid();
2957
2958         for (i = 1; i < trace->filter_pids.nr; ++i)
2959                 trace->filter_pids.entries[i] = intlist__entry(list, i - 1)->i;
2960
2961         intlist__delete(list);
2962         ret = 0;
2963 out:
2964         return ret;
2965 }
2966
2967 static int trace__open_output(struct trace *trace, const char *filename)
2968 {
2969         struct stat st;
2970
2971         if (!stat(filename, &st) && st.st_size) {
2972                 char oldname[PATH_MAX];
2973
2974                 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2975                 unlink(oldname);
2976                 rename(filename, oldname);
2977         }
2978
2979         trace->output = fopen(filename, "w");
2980
2981         return trace->output == NULL ? -errno : 0;
2982 }
2983
2984 static int parse_pagefaults(const struct option *opt, const char *str,
2985                             int unset __maybe_unused)
2986 {
2987         int *trace_pgfaults = opt->value;
2988
2989         if (strcmp(str, "all") == 0)
2990                 *trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN;
2991         else if (strcmp(str, "maj") == 0)
2992                 *trace_pgfaults |= TRACE_PFMAJ;
2993         else if (strcmp(str, "min") == 0)
2994                 *trace_pgfaults |= TRACE_PFMIN;
2995         else
2996                 return -1;
2997
2998         return 0;
2999 }
3000
3001 static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler)
3002 {
3003         struct perf_evsel *evsel;
3004
3005         evlist__for_each(evlist, evsel)
3006                 evsel->handler = handler;
3007 }
3008
3009 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
3010 {
3011         const char *trace_usage[] = {
3012                 "perf trace [<options>] [<command>]",
3013                 "perf trace [<options>] -- <command> [<options>]",
3014                 "perf trace record [<options>] [<command>]",
3015                 "perf trace record [<options>] -- <command> [<options>]",
3016                 NULL
3017         };
3018         struct trace trace = {
3019                 .audit = {
3020                         .machine = audit_detect_machine(),
3021                         .open_id = audit_name_to_syscall("open", trace.audit.machine),
3022                 },
3023                 .syscalls = {
3024                         . max = -1,
3025                 },
3026                 .opts = {
3027                         .target = {
3028                                 .uid       = UINT_MAX,
3029                                 .uses_mmap = true,
3030                         },
3031                         .user_freq     = UINT_MAX,
3032                         .user_interval = ULLONG_MAX,
3033                         .no_buffering  = true,
3034                         .mmap_pages    = UINT_MAX,
3035                         .proc_map_timeout  = 500,
3036                 },
3037                 .output = stderr,
3038                 .show_comm = true,
3039                 .trace_syscalls = true,
3040         };
3041         const char *output_name = NULL;
3042         const char *ev_qualifier_str = NULL;
3043         const struct option trace_options[] = {
3044         OPT_CALLBACK(0, "event", &trace.evlist, "event",
3045                      "event selector. use 'perf list' to list available events",
3046                      parse_events_option),
3047         OPT_BOOLEAN(0, "comm", &trace.show_comm,
3048                     "show the thread COMM next to its id"),
3049         OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
3050         OPT_STRING('e', "expr", &ev_qualifier_str, "expr", "list of syscalls to trace"),
3051         OPT_STRING('o', "output", &output_name, "file", "output file name"),
3052         OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
3053         OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
3054                     "trace events on existing process id"),
3055         OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
3056                     "trace events on existing thread id"),
3057         OPT_CALLBACK(0, "filter-pids", &trace, "CSV list of pids",
3058                      "pids to filter (by the kernel)", trace__set_filter_pids),
3059         OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
3060                     "system-wide collection from all CPUs"),
3061         OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
3062                     "list of cpus to monitor"),
3063         OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
3064                     "child tasks do not inherit counters"),
3065         OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
3066                      "number of mmap data pages",
3067                      perf_evlist__parse_mmap_pages),
3068         OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
3069                    "user to profile"),
3070         OPT_CALLBACK(0, "duration", &trace, "float",
3071                      "show only events with duration > N.M ms",
3072                      trace__set_duration),
3073         OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
3074         OPT_INCR('v', "verbose", &verbose, "be more verbose"),
3075         OPT_BOOLEAN('T', "time", &trace.full_time,
3076                     "Show full timestamp, not time relative to first start"),
3077         OPT_BOOLEAN('s', "summary", &trace.summary_only,
3078                     "Show only syscall summary with statistics"),
3079         OPT_BOOLEAN('S', "with-summary", &trace.summary,
3080                     "Show all syscalls and summary with statistics"),
3081         OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
3082                      "Trace pagefaults", parse_pagefaults, "maj"),
3083         OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
3084         OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"),
3085         OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout,
3086                         "per thread proc mmap processing timeout in ms"),
3087         OPT_END()
3088         };
3089         const char * const trace_subcommands[] = { "record", NULL };
3090         int err;
3091         char bf[BUFSIZ];
3092
3093         signal(SIGSEGV, sighandler_dump_stack);
3094         signal(SIGFPE, sighandler_dump_stack);
3095
3096         trace.evlist = perf_evlist__new();
3097
3098         if (trace.evlist == NULL) {
3099                 pr_err("Not enough memory to run!\n");
3100                 err = -ENOMEM;
3101                 goto out;
3102         }
3103
3104         argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands,
3105                                  trace_usage, PARSE_OPT_STOP_AT_NON_OPTION);
3106
3107         if (trace.trace_pgfaults) {
3108                 trace.opts.sample_address = true;
3109                 trace.opts.sample_time = true;
3110         }
3111
3112         if (trace.evlist->nr_entries > 0)
3113                 evlist__set_evsel_handler(trace.evlist, trace__event_handler);
3114
3115         if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
3116                 return trace__record(&trace, argc-1, &argv[1]);
3117
3118         /* summary_only implies summary option, but don't overwrite summary if set */
3119         if (trace.summary_only)
3120                 trace.summary = trace.summary_only;
3121
3122         if (!trace.trace_syscalls && !trace.trace_pgfaults &&
3123             trace.evlist->nr_entries == 0 /* Was --events used? */) {
3124                 pr_err("Please specify something to trace.\n");
3125                 return -1;
3126         }
3127
3128         if (output_name != NULL) {
3129                 err = trace__open_output(&trace, output_name);
3130                 if (err < 0) {
3131                         perror("failed to create output file");
3132                         goto out;
3133                 }
3134         }
3135
3136         if (ev_qualifier_str != NULL) {
3137                 const char *s = ev_qualifier_str;
3138                 struct strlist_config slist_config = {
3139                         .dirname = system_path(STRACE_GROUPS_DIR),
3140                 };
3141
3142                 trace.not_ev_qualifier = *s == '!';
3143                 if (trace.not_ev_qualifier)
3144                         ++s;
3145                 trace.ev_qualifier = strlist__new(s, &slist_config);
3146                 if (trace.ev_qualifier == NULL) {
3147                         fputs("Not enough memory to parse event qualifier",
3148                               trace.output);
3149                         err = -ENOMEM;
3150                         goto out_close;
3151                 }
3152
3153                 err = trace__validate_ev_qualifier(&trace);
3154                 if (err)
3155                         goto out_close;
3156         }
3157
3158         err = target__validate(&trace.opts.target);
3159         if (err) {
3160                 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
3161                 fprintf(trace.output, "%s", bf);
3162                 goto out_close;
3163         }
3164
3165         err = target__parse_uid(&trace.opts.target);
3166         if (err) {
3167                 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
3168                 fprintf(trace.output, "%s", bf);
3169                 goto out_close;
3170         }
3171
3172         if (!argc && target__none(&trace.opts.target))
3173                 trace.opts.target.system_wide = true;
3174
3175         if (input_name)
3176                 err = trace__replay(&trace);
3177         else
3178                 err = trace__run(&trace, argc, argv);
3179
3180 out_close:
3181         if (output_name != NULL)
3182                 fclose(trace.output);
3183 out:
3184         return err;
3185 }