OSDN Git Service

Merge "icnss: Correct condition to check invalid address range"
[sagit-ice-cold/kernel_xiaomi_msm8998.git] / tools / perf / builtin-trace.c
1 /*
2  * builtin-trace.c
3  *
4  * Builtin 'trace' command:
5  *
6  * Display a continuously updated trace of any workload, CPU, specific PID,
7  * system wide, etc.  Default format is loosely strace like, but any other
8  * event may be specified using --event.
9  *
10  * Copyright (C) 2012, 2013, 2014, 2015 Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
11  *
12  * Initially based on the 'trace' prototype by Thomas Gleixner:
13  *
14  * http://lwn.net/Articles/415728/ ("Announcing a new utility: 'trace'")
15  *
16  * Released under the GPL v2. (and only v2, not any later version)
17  */
18
19 #include <traceevent/event-parse.h>
20 #include <api/fs/tracing_path.h>
21 #include "builtin.h"
22 #include "util/color.h"
23 #include "util/debug.h"
24 #include "util/evlist.h"
25 #include "util/exec_cmd.h"
26 #include "util/machine.h"
27 #include "util/session.h"
28 #include "util/thread.h"
29 #include "util/parse-options.h"
30 #include "util/strlist.h"
31 #include "util/intlist.h"
32 #include "util/thread_map.h"
33 #include "util/stat.h"
34 #include "trace-event.h"
35 #include "util/parse-events.h"
36
37 #include <libaudit.h>
38 #include <stdlib.h>
39 #include <sys/mman.h>
40 #include <linux/futex.h>
41 #include <linux/err.h>
42
43 /* For older distros: */
44 #ifndef MAP_STACK
45 # define MAP_STACK              0x20000
46 #endif
47
48 #ifndef MADV_HWPOISON
49 # define MADV_HWPOISON          100
50
51 #endif
52
53 #ifndef MADV_MERGEABLE
54 # define MADV_MERGEABLE         12
55 #endif
56
57 #ifndef MADV_UNMERGEABLE
58 # define MADV_UNMERGEABLE       13
59 #endif
60
61 #ifndef EFD_SEMAPHORE
62 # define EFD_SEMAPHORE          1
63 #endif
64
65 #ifndef EFD_NONBLOCK
66 # define EFD_NONBLOCK           00004000
67 #endif
68
69 #ifndef EFD_CLOEXEC
70 # define EFD_CLOEXEC            02000000
71 #endif
72
73 #ifndef O_CLOEXEC
74 # define O_CLOEXEC              02000000
75 #endif
76
77 #ifndef SOCK_DCCP
78 # define SOCK_DCCP              6
79 #endif
80
81 #ifndef SOCK_CLOEXEC
82 # define SOCK_CLOEXEC           02000000
83 #endif
84
85 #ifndef SOCK_NONBLOCK
86 # define SOCK_NONBLOCK          00004000
87 #endif
88
89 #ifndef MSG_CMSG_CLOEXEC
90 # define MSG_CMSG_CLOEXEC       0x40000000
91 #endif
92
93 #ifndef PERF_FLAG_FD_NO_GROUP
94 # define PERF_FLAG_FD_NO_GROUP          (1UL << 0)
95 #endif
96
97 #ifndef PERF_FLAG_FD_OUTPUT
98 # define PERF_FLAG_FD_OUTPUT            (1UL << 1)
99 #endif
100
101 #ifndef PERF_FLAG_PID_CGROUP
102 # define PERF_FLAG_PID_CGROUP           (1UL << 2) /* pid=cgroup id, per-cpu mode only */
103 #endif
104
105 #ifndef PERF_FLAG_FD_CLOEXEC
106 # define PERF_FLAG_FD_CLOEXEC           (1UL << 3) /* O_CLOEXEC */
107 #endif
108
109
110 struct tp_field {
111         int offset;
112         union {
113                 u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
114                 void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
115         };
116 };
117
118 #define TP_UINT_FIELD(bits) \
119 static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
120 { \
121         u##bits value; \
122         memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
123         return value;  \
124 }
125
126 TP_UINT_FIELD(8);
127 TP_UINT_FIELD(16);
128 TP_UINT_FIELD(32);
129 TP_UINT_FIELD(64);
130
131 #define TP_UINT_FIELD__SWAPPED(bits) \
132 static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
133 { \
134         u##bits value; \
135         memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
136         return bswap_##bits(value);\
137 }
138
139 TP_UINT_FIELD__SWAPPED(16);
140 TP_UINT_FIELD__SWAPPED(32);
141 TP_UINT_FIELD__SWAPPED(64);
142
143 static int tp_field__init_uint(struct tp_field *field,
144                                struct format_field *format_field,
145                                bool needs_swap)
146 {
147         field->offset = format_field->offset;
148
149         switch (format_field->size) {
150         case 1:
151                 field->integer = tp_field__u8;
152                 break;
153         case 2:
154                 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
155                 break;
156         case 4:
157                 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
158                 break;
159         case 8:
160                 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
161                 break;
162         default:
163                 return -1;
164         }
165
166         return 0;
167 }
168
169 static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
170 {
171         return sample->raw_data + field->offset;
172 }
173
174 static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
175 {
176         field->offset = format_field->offset;
177         field->pointer = tp_field__ptr;
178         return 0;
179 }
180
181 struct syscall_tp {
182         struct tp_field id;
183         union {
184                 struct tp_field args, ret;
185         };
186 };
187
188 static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
189                                           struct tp_field *field,
190                                           const char *name)
191 {
192         struct format_field *format_field = perf_evsel__field(evsel, name);
193
194         if (format_field == NULL)
195                 return -1;
196
197         return tp_field__init_uint(field, format_field, evsel->needs_swap);
198 }
199
200 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \
201         ({ struct syscall_tp *sc = evsel->priv;\
202            perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
203
204 static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
205                                          struct tp_field *field,
206                                          const char *name)
207 {
208         struct format_field *format_field = perf_evsel__field(evsel, name);
209
210         if (format_field == NULL)
211                 return -1;
212
213         return tp_field__init_ptr(field, format_field);
214 }
215
216 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
217         ({ struct syscall_tp *sc = evsel->priv;\
218            perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
219
220 static void perf_evsel__delete_priv(struct perf_evsel *evsel)
221 {
222         zfree(&evsel->priv);
223         perf_evsel__delete(evsel);
224 }
225
226 static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
227 {
228         evsel->priv = malloc(sizeof(struct syscall_tp));
229         if (evsel->priv != NULL) {
230                 if (perf_evsel__init_sc_tp_uint_field(evsel, id))
231                         goto out_delete;
232
233                 evsel->handler = handler;
234                 return 0;
235         }
236
237         return -ENOMEM;
238
239 out_delete:
240         zfree(&evsel->priv);
241         return -ENOENT;
242 }
243
244 static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
245 {
246         struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
247
248         /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
249         if (IS_ERR(evsel))
250                 evsel = perf_evsel__newtp("syscalls", direction);
251
252         if (IS_ERR(evsel))
253                 return NULL;
254
255         if (perf_evsel__init_syscall_tp(evsel, handler))
256                 goto out_delete;
257
258         return evsel;
259
260 out_delete:
261         perf_evsel__delete_priv(evsel);
262         return NULL;
263 }
264
265 #define perf_evsel__sc_tp_uint(evsel, name, sample) \
266         ({ struct syscall_tp *fields = evsel->priv; \
267            fields->name.integer(&fields->name, sample); })
268
269 #define perf_evsel__sc_tp_ptr(evsel, name, sample) \
270         ({ struct syscall_tp *fields = evsel->priv; \
271            fields->name.pointer(&fields->name, sample); })
272
273 struct syscall_arg {
274         unsigned long val;
275         struct thread *thread;
276         struct trace  *trace;
277         void          *parm;
278         u8            idx;
279         u8            mask;
280 };
281
282 struct strarray {
283         int         offset;
284         int         nr_entries;
285         const char **entries;
286 };
287
288 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
289         .nr_entries = ARRAY_SIZE(array), \
290         .entries = array, \
291 }
292
293 #define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
294         .offset     = off, \
295         .nr_entries = ARRAY_SIZE(array), \
296         .entries = array, \
297 }
298
299 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
300                                                 const char *intfmt,
301                                                 struct syscall_arg *arg)
302 {
303         struct strarray *sa = arg->parm;
304         int idx = arg->val - sa->offset;
305
306         if (idx < 0 || idx >= sa->nr_entries)
307                 return scnprintf(bf, size, intfmt, arg->val);
308
309         return scnprintf(bf, size, "%s", sa->entries[idx]);
310 }
311
312 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
313                                               struct syscall_arg *arg)
314 {
315         return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
316 }
317
318 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
319
320 #if defined(__i386__) || defined(__x86_64__)
321 /*
322  * FIXME: Make this available to all arches as soon as the ioctl beautifier
323  *        gets rewritten to support all arches.
324  */
325 static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
326                                                  struct syscall_arg *arg)
327 {
328         return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
329 }
330
331 #define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
332 #endif /* defined(__i386__) || defined(__x86_64__) */
333
334 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
335                                         struct syscall_arg *arg);
336
337 #define SCA_FD syscall_arg__scnprintf_fd
338
339 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
340                                            struct syscall_arg *arg)
341 {
342         int fd = arg->val;
343
344         if (fd == AT_FDCWD)
345                 return scnprintf(bf, size, "CWD");
346
347         return syscall_arg__scnprintf_fd(bf, size, arg);
348 }
349
350 #define SCA_FDAT syscall_arg__scnprintf_fd_at
351
352 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
353                                               struct syscall_arg *arg);
354
355 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
356
357 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
358                                          struct syscall_arg *arg)
359 {
360         return scnprintf(bf, size, "%#lx", arg->val);
361 }
362
363 #define SCA_HEX syscall_arg__scnprintf_hex
364
365 static size_t syscall_arg__scnprintf_int(char *bf, size_t size,
366                                          struct syscall_arg *arg)
367 {
368         return scnprintf(bf, size, "%d", arg->val);
369 }
370
371 #define SCA_INT syscall_arg__scnprintf_int
372
373 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
374                                                struct syscall_arg *arg)
375 {
376         int printed = 0, prot = arg->val;
377
378         if (prot == PROT_NONE)
379                 return scnprintf(bf, size, "NONE");
380 #define P_MMAP_PROT(n) \
381         if (prot & PROT_##n) { \
382                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
383                 prot &= ~PROT_##n; \
384         }
385
386         P_MMAP_PROT(EXEC);
387         P_MMAP_PROT(READ);
388         P_MMAP_PROT(WRITE);
389 #ifdef PROT_SEM
390         P_MMAP_PROT(SEM);
391 #endif
392         P_MMAP_PROT(GROWSDOWN);
393         P_MMAP_PROT(GROWSUP);
394 #undef P_MMAP_PROT
395
396         if (prot)
397                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
398
399         return printed;
400 }
401
402 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
403
404 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
405                                                 struct syscall_arg *arg)
406 {
407         int printed = 0, flags = arg->val;
408
409 #define P_MMAP_FLAG(n) \
410         if (flags & MAP_##n) { \
411                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
412                 flags &= ~MAP_##n; \
413         }
414
415         P_MMAP_FLAG(SHARED);
416         P_MMAP_FLAG(PRIVATE);
417 #ifdef MAP_32BIT
418         P_MMAP_FLAG(32BIT);
419 #endif
420         P_MMAP_FLAG(ANONYMOUS);
421         P_MMAP_FLAG(DENYWRITE);
422         P_MMAP_FLAG(EXECUTABLE);
423         P_MMAP_FLAG(FILE);
424         P_MMAP_FLAG(FIXED);
425         P_MMAP_FLAG(GROWSDOWN);
426 #ifdef MAP_HUGETLB
427         P_MMAP_FLAG(HUGETLB);
428 #endif
429         P_MMAP_FLAG(LOCKED);
430         P_MMAP_FLAG(NONBLOCK);
431         P_MMAP_FLAG(NORESERVE);
432         P_MMAP_FLAG(POPULATE);
433         P_MMAP_FLAG(STACK);
434 #ifdef MAP_UNINITIALIZED
435         P_MMAP_FLAG(UNINITIALIZED);
436 #endif
437 #undef P_MMAP_FLAG
438
439         if (flags)
440                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
441
442         return printed;
443 }
444
445 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
446
447 static size_t syscall_arg__scnprintf_mremap_flags(char *bf, size_t size,
448                                                   struct syscall_arg *arg)
449 {
450         int printed = 0, flags = arg->val;
451
452 #define P_MREMAP_FLAG(n) \
453         if (flags & MREMAP_##n) { \
454                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
455                 flags &= ~MREMAP_##n; \
456         }
457
458         P_MREMAP_FLAG(MAYMOVE);
459 #ifdef MREMAP_FIXED
460         P_MREMAP_FLAG(FIXED);
461 #endif
462 #undef P_MREMAP_FLAG
463
464         if (flags)
465                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
466
467         return printed;
468 }
469
470 #define SCA_MREMAP_FLAGS syscall_arg__scnprintf_mremap_flags
471
472 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
473                                                       struct syscall_arg *arg)
474 {
475         int behavior = arg->val;
476
477         switch (behavior) {
478 #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
479         P_MADV_BHV(NORMAL);
480         P_MADV_BHV(RANDOM);
481         P_MADV_BHV(SEQUENTIAL);
482         P_MADV_BHV(WILLNEED);
483         P_MADV_BHV(DONTNEED);
484         P_MADV_BHV(REMOVE);
485         P_MADV_BHV(DONTFORK);
486         P_MADV_BHV(DOFORK);
487         P_MADV_BHV(HWPOISON);
488 #ifdef MADV_SOFT_OFFLINE
489         P_MADV_BHV(SOFT_OFFLINE);
490 #endif
491         P_MADV_BHV(MERGEABLE);
492         P_MADV_BHV(UNMERGEABLE);
493 #ifdef MADV_HUGEPAGE
494         P_MADV_BHV(HUGEPAGE);
495 #endif
496 #ifdef MADV_NOHUGEPAGE
497         P_MADV_BHV(NOHUGEPAGE);
498 #endif
499 #ifdef MADV_DONTDUMP
500         P_MADV_BHV(DONTDUMP);
501 #endif
502 #ifdef MADV_DODUMP
503         P_MADV_BHV(DODUMP);
504 #endif
505 #undef P_MADV_PHV
506         default: break;
507         }
508
509         return scnprintf(bf, size, "%#x", behavior);
510 }
511
512 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
513
514 static size_t syscall_arg__scnprintf_flock(char *bf, size_t size,
515                                            struct syscall_arg *arg)
516 {
517         int printed = 0, op = arg->val;
518
519         if (op == 0)
520                 return scnprintf(bf, size, "NONE");
521 #define P_CMD(cmd) \
522         if ((op & LOCK_##cmd) == LOCK_##cmd) { \
523                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \
524                 op &= ~LOCK_##cmd; \
525         }
526
527         P_CMD(SH);
528         P_CMD(EX);
529         P_CMD(NB);
530         P_CMD(UN);
531         P_CMD(MAND);
532         P_CMD(RW);
533         P_CMD(READ);
534         P_CMD(WRITE);
535 #undef P_OP
536
537         if (op)
538                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op);
539
540         return printed;
541 }
542
543 #define SCA_FLOCK syscall_arg__scnprintf_flock
544
545 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
546 {
547         enum syscall_futex_args {
548                 SCF_UADDR   = (1 << 0),
549                 SCF_OP      = (1 << 1),
550                 SCF_VAL     = (1 << 2),
551                 SCF_TIMEOUT = (1 << 3),
552                 SCF_UADDR2  = (1 << 4),
553                 SCF_VAL3    = (1 << 5),
554         };
555         int op = arg->val;
556         int cmd = op & FUTEX_CMD_MASK;
557         size_t printed = 0;
558
559         switch (cmd) {
560 #define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
561         P_FUTEX_OP(WAIT);           arg->mask |= SCF_VAL3|SCF_UADDR2;             break;
562         P_FUTEX_OP(WAKE);           arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
563         P_FUTEX_OP(FD);             arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
564         P_FUTEX_OP(REQUEUE);        arg->mask |= SCF_VAL3|SCF_TIMEOUT;            break;
565         P_FUTEX_OP(CMP_REQUEUE);    arg->mask |= SCF_TIMEOUT;                     break;
566         P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT;                     break;
567         P_FUTEX_OP(WAKE_OP);                                                      break;
568         P_FUTEX_OP(LOCK_PI);        arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
569         P_FUTEX_OP(UNLOCK_PI);      arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
570         P_FUTEX_OP(TRYLOCK_PI);     arg->mask |= SCF_VAL3|SCF_UADDR2;             break;
571         P_FUTEX_OP(WAIT_BITSET);    arg->mask |= SCF_UADDR2;                      break;
572         P_FUTEX_OP(WAKE_BITSET);    arg->mask |= SCF_UADDR2;                      break;
573         P_FUTEX_OP(WAIT_REQUEUE_PI);                                              break;
574         default: printed = scnprintf(bf, size, "%#x", cmd);                       break;
575         }
576
577         if (op & FUTEX_PRIVATE_FLAG)
578                 printed += scnprintf(bf + printed, size - printed, "|PRIV");
579
580         if (op & FUTEX_CLOCK_REALTIME)
581                 printed += scnprintf(bf + printed, size - printed, "|CLKRT");
582
583         return printed;
584 }
585
586 #define SCA_FUTEX_OP  syscall_arg__scnprintf_futex_op
587
588 static const char *bpf_cmd[] = {
589         "MAP_CREATE", "MAP_LOOKUP_ELEM", "MAP_UPDATE_ELEM", "MAP_DELETE_ELEM",
590         "MAP_GET_NEXT_KEY", "PROG_LOAD",
591 };
592 static DEFINE_STRARRAY(bpf_cmd);
593
594 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
595 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
596
597 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
598 static DEFINE_STRARRAY(itimers);
599
600 static const char *keyctl_options[] = {
601         "GET_KEYRING_ID", "JOIN_SESSION_KEYRING", "UPDATE", "REVOKE", "CHOWN",
602         "SETPERM", "DESCRIBE", "CLEAR", "LINK", "UNLINK", "SEARCH", "READ",
603         "INSTANTIATE", "NEGATE", "SET_REQKEY_KEYRING", "SET_TIMEOUT",
604         "ASSUME_AUTHORITY", "GET_SECURITY", "SESSION_TO_PARENT", "REJECT",
605         "INSTANTIATE_IOV", "INVALIDATE", "GET_PERSISTENT",
606 };
607 static DEFINE_STRARRAY(keyctl_options);
608
609 static const char *whences[] = { "SET", "CUR", "END",
610 #ifdef SEEK_DATA
611 "DATA",
612 #endif
613 #ifdef SEEK_HOLE
614 "HOLE",
615 #endif
616 };
617 static DEFINE_STRARRAY(whences);
618
619 static const char *fcntl_cmds[] = {
620         "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
621         "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
622         "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
623         "F_GETOWNER_UIDS",
624 };
625 static DEFINE_STRARRAY(fcntl_cmds);
626
627 static const char *rlimit_resources[] = {
628         "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
629         "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
630         "RTTIME",
631 };
632 static DEFINE_STRARRAY(rlimit_resources);
633
634 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
635 static DEFINE_STRARRAY(sighow);
636
637 static const char *clockid[] = {
638         "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
639         "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE", "BOOTTIME",
640         "REALTIME_ALARM", "BOOTTIME_ALARM", "SGI_CYCLE", "TAI"
641 };
642 static DEFINE_STRARRAY(clockid);
643
644 static const char *socket_families[] = {
645         "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
646         "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
647         "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
648         "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
649         "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
650         "ALG", "NFC", "VSOCK",
651 };
652 static DEFINE_STRARRAY(socket_families);
653
654 #ifndef SOCK_TYPE_MASK
655 #define SOCK_TYPE_MASK 0xf
656 #endif
657
658 static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
659                                                       struct syscall_arg *arg)
660 {
661         size_t printed;
662         int type = arg->val,
663             flags = type & ~SOCK_TYPE_MASK;
664
665         type &= SOCK_TYPE_MASK;
666         /*
667          * Can't use a strarray, MIPS may override for ABI reasons.
668          */
669         switch (type) {
670 #define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
671         P_SK_TYPE(STREAM);
672         P_SK_TYPE(DGRAM);
673         P_SK_TYPE(RAW);
674         P_SK_TYPE(RDM);
675         P_SK_TYPE(SEQPACKET);
676         P_SK_TYPE(DCCP);
677         P_SK_TYPE(PACKET);
678 #undef P_SK_TYPE
679         default:
680                 printed = scnprintf(bf, size, "%#x", type);
681         }
682
683 #define P_SK_FLAG(n) \
684         if (flags & SOCK_##n) { \
685                 printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
686                 flags &= ~SOCK_##n; \
687         }
688
689         P_SK_FLAG(CLOEXEC);
690         P_SK_FLAG(NONBLOCK);
691 #undef P_SK_FLAG
692
693         if (flags)
694                 printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
695
696         return printed;
697 }
698
699 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
700
701 #ifndef MSG_PROBE
702 #define MSG_PROBE            0x10
703 #endif
704 #ifndef MSG_WAITFORONE
705 #define MSG_WAITFORONE  0x10000
706 #endif
707 #ifndef MSG_SENDPAGE_NOTLAST
708 #define MSG_SENDPAGE_NOTLAST 0x20000
709 #endif
710 #ifndef MSG_FASTOPEN
711 #define MSG_FASTOPEN         0x20000000
712 #endif
713
714 static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
715                                                struct syscall_arg *arg)
716 {
717         int printed = 0, flags = arg->val;
718
719         if (flags == 0)
720                 return scnprintf(bf, size, "NONE");
721 #define P_MSG_FLAG(n) \
722         if (flags & MSG_##n) { \
723                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
724                 flags &= ~MSG_##n; \
725         }
726
727         P_MSG_FLAG(OOB);
728         P_MSG_FLAG(PEEK);
729         P_MSG_FLAG(DONTROUTE);
730         P_MSG_FLAG(TRYHARD);
731         P_MSG_FLAG(CTRUNC);
732         P_MSG_FLAG(PROBE);
733         P_MSG_FLAG(TRUNC);
734         P_MSG_FLAG(DONTWAIT);
735         P_MSG_FLAG(EOR);
736         P_MSG_FLAG(WAITALL);
737         P_MSG_FLAG(FIN);
738         P_MSG_FLAG(SYN);
739         P_MSG_FLAG(CONFIRM);
740         P_MSG_FLAG(RST);
741         P_MSG_FLAG(ERRQUEUE);
742         P_MSG_FLAG(NOSIGNAL);
743         P_MSG_FLAG(MORE);
744         P_MSG_FLAG(WAITFORONE);
745         P_MSG_FLAG(SENDPAGE_NOTLAST);
746         P_MSG_FLAG(FASTOPEN);
747         P_MSG_FLAG(CMSG_CLOEXEC);
748 #undef P_MSG_FLAG
749
750         if (flags)
751                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
752
753         return printed;
754 }
755
756 #define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
757
758 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
759                                                  struct syscall_arg *arg)
760 {
761         size_t printed = 0;
762         int mode = arg->val;
763
764         if (mode == F_OK) /* 0 */
765                 return scnprintf(bf, size, "F");
766 #define P_MODE(n) \
767         if (mode & n##_OK) { \
768                 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
769                 mode &= ~n##_OK; \
770         }
771
772         P_MODE(R);
773         P_MODE(W);
774         P_MODE(X);
775 #undef P_MODE
776
777         if (mode)
778                 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
779
780         return printed;
781 }
782
783 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
784
785 static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
786                                               struct syscall_arg *arg);
787
788 #define SCA_FILENAME syscall_arg__scnprintf_filename
789
790 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
791                                                struct syscall_arg *arg)
792 {
793         int printed = 0, flags = arg->val;
794
795         if (!(flags & O_CREAT))
796                 arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
797
798         if (flags == 0)
799                 return scnprintf(bf, size, "RDONLY");
800 #define P_FLAG(n) \
801         if (flags & O_##n) { \
802                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
803                 flags &= ~O_##n; \
804         }
805
806         P_FLAG(APPEND);
807         P_FLAG(ASYNC);
808         P_FLAG(CLOEXEC);
809         P_FLAG(CREAT);
810         P_FLAG(DIRECT);
811         P_FLAG(DIRECTORY);
812         P_FLAG(EXCL);
813         P_FLAG(LARGEFILE);
814         P_FLAG(NOATIME);
815         P_FLAG(NOCTTY);
816 #ifdef O_NONBLOCK
817         P_FLAG(NONBLOCK);
818 #elif O_NDELAY
819         P_FLAG(NDELAY);
820 #endif
821 #ifdef O_PATH
822         P_FLAG(PATH);
823 #endif
824         P_FLAG(RDWR);
825 #ifdef O_DSYNC
826         if ((flags & O_SYNC) == O_SYNC)
827                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
828         else {
829                 P_FLAG(DSYNC);
830         }
831 #else
832         P_FLAG(SYNC);
833 #endif
834         P_FLAG(TRUNC);
835         P_FLAG(WRONLY);
836 #undef P_FLAG
837
838         if (flags)
839                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
840
841         return printed;
842 }
843
844 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
845
846 static size_t syscall_arg__scnprintf_perf_flags(char *bf, size_t size,
847                                                 struct syscall_arg *arg)
848 {
849         int printed = 0, flags = arg->val;
850
851         if (flags == 0)
852                 return 0;
853
854 #define P_FLAG(n) \
855         if (flags & PERF_FLAG_##n) { \
856                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
857                 flags &= ~PERF_FLAG_##n; \
858         }
859
860         P_FLAG(FD_NO_GROUP);
861         P_FLAG(FD_OUTPUT);
862         P_FLAG(PID_CGROUP);
863         P_FLAG(FD_CLOEXEC);
864 #undef P_FLAG
865
866         if (flags)
867                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
868
869         return printed;
870 }
871
872 #define SCA_PERF_FLAGS syscall_arg__scnprintf_perf_flags
873
874 static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
875                                                    struct syscall_arg *arg)
876 {
877         int printed = 0, flags = arg->val;
878
879         if (flags == 0)
880                 return scnprintf(bf, size, "NONE");
881 #define P_FLAG(n) \
882         if (flags & EFD_##n) { \
883                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
884                 flags &= ~EFD_##n; \
885         }
886
887         P_FLAG(SEMAPHORE);
888         P_FLAG(CLOEXEC);
889         P_FLAG(NONBLOCK);
890 #undef P_FLAG
891
892         if (flags)
893                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
894
895         return printed;
896 }
897
898 #define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
899
900 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
901                                                 struct syscall_arg *arg)
902 {
903         int printed = 0, flags = arg->val;
904
905 #define P_FLAG(n) \
906         if (flags & O_##n) { \
907                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
908                 flags &= ~O_##n; \
909         }
910
911         P_FLAG(CLOEXEC);
912         P_FLAG(NONBLOCK);
913 #undef P_FLAG
914
915         if (flags)
916                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
917
918         return printed;
919 }
920
921 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
922
923 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
924 {
925         int sig = arg->val;
926
927         switch (sig) {
928 #define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
929         P_SIGNUM(HUP);
930         P_SIGNUM(INT);
931         P_SIGNUM(QUIT);
932         P_SIGNUM(ILL);
933         P_SIGNUM(TRAP);
934         P_SIGNUM(ABRT);
935         P_SIGNUM(BUS);
936         P_SIGNUM(FPE);
937         P_SIGNUM(KILL);
938         P_SIGNUM(USR1);
939         P_SIGNUM(SEGV);
940         P_SIGNUM(USR2);
941         P_SIGNUM(PIPE);
942         P_SIGNUM(ALRM);
943         P_SIGNUM(TERM);
944         P_SIGNUM(CHLD);
945         P_SIGNUM(CONT);
946         P_SIGNUM(STOP);
947         P_SIGNUM(TSTP);
948         P_SIGNUM(TTIN);
949         P_SIGNUM(TTOU);
950         P_SIGNUM(URG);
951         P_SIGNUM(XCPU);
952         P_SIGNUM(XFSZ);
953         P_SIGNUM(VTALRM);
954         P_SIGNUM(PROF);
955         P_SIGNUM(WINCH);
956         P_SIGNUM(IO);
957         P_SIGNUM(PWR);
958         P_SIGNUM(SYS);
959 #ifdef SIGEMT
960         P_SIGNUM(EMT);
961 #endif
962 #ifdef SIGSTKFLT
963         P_SIGNUM(STKFLT);
964 #endif
965 #ifdef SIGSWI
966         P_SIGNUM(SWI);
967 #endif
968         default: break;
969         }
970
971         return scnprintf(bf, size, "%#x", sig);
972 }
973
974 #define SCA_SIGNUM syscall_arg__scnprintf_signum
975
976 #if defined(__i386__) || defined(__x86_64__)
977 /*
978  * FIXME: Make this available to all arches.
979  */
980 #define TCGETS          0x5401
981
982 static const char *tioctls[] = {
983         "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
984         "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
985         "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
986         "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
987         "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
988         "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
989         "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
990         "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
991         "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
992         "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
993         "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
994         [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
995         "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
996         "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
997         "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
998 };
999
1000 static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
1001 #endif /* defined(__i386__) || defined(__x86_64__) */
1002
1003 #define STRARRAY(arg, name, array) \
1004           .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
1005           .arg_parm      = { [arg] = &strarray__##array, }
1006
1007 static struct syscall_fmt {
1008         const char *name;
1009         const char *alias;
1010         size_t     (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
1011         void       *arg_parm[6];
1012         bool       errmsg;
1013         bool       timeout;
1014         bool       hexret;
1015 } syscall_fmts[] = {
1016         { .name     = "access",     .errmsg = true,
1017           .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */
1018                              [1] = SCA_ACCMODE,  /* mode */ }, },
1019         { .name     = "arch_prctl", .errmsg = true, .alias = "prctl", },
1020         { .name     = "bpf",        .errmsg = true, STRARRAY(0, cmd, bpf_cmd), },
1021         { .name     = "brk",        .hexret = true,
1022           .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
1023         { .name     = "chdir",      .errmsg = true,
1024           .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1025         { .name     = "chmod",      .errmsg = true,
1026           .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1027         { .name     = "chroot",     .errmsg = true,
1028           .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1029         { .name     = "clock_gettime",  .errmsg = true, STRARRAY(0, clk_id, clockid), },
1030         { .name     = "close",      .errmsg = true,
1031           .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, },
1032         { .name     = "connect",    .errmsg = true, },
1033         { .name     = "creat",      .errmsg = true,
1034           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1035         { .name     = "dup",        .errmsg = true,
1036           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1037         { .name     = "dup2",       .errmsg = true,
1038           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1039         { .name     = "dup3",       .errmsg = true,
1040           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1041         { .name     = "epoll_ctl",  .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
1042         { .name     = "eventfd2",   .errmsg = true,
1043           .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
1044         { .name     = "faccessat",  .errmsg = true,
1045           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1046                              [1] = SCA_FILENAME, /* filename */ }, },
1047         { .name     = "fadvise64",  .errmsg = true,
1048           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1049         { .name     = "fallocate",  .errmsg = true,
1050           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1051         { .name     = "fchdir",     .errmsg = true,
1052           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1053         { .name     = "fchmod",     .errmsg = true,
1054           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1055         { .name     = "fchmodat",   .errmsg = true,
1056           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1057                              [1] = SCA_FILENAME, /* filename */ }, },
1058         { .name     = "fchown",     .errmsg = true,
1059           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1060         { .name     = "fchownat",   .errmsg = true,
1061           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1062                              [1] = SCA_FILENAME, /* filename */ }, },
1063         { .name     = "fcntl",      .errmsg = true,
1064           .arg_scnprintf = { [0] = SCA_FD, /* fd */
1065                              [1] = SCA_STRARRAY, /* cmd */ },
1066           .arg_parm      = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
1067         { .name     = "fdatasync",  .errmsg = true,
1068           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1069         { .name     = "flock",      .errmsg = true,
1070           .arg_scnprintf = { [0] = SCA_FD, /* fd */
1071                              [1] = SCA_FLOCK, /* cmd */ }, },
1072         { .name     = "fsetxattr",  .errmsg = true,
1073           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1074         { .name     = "fstat",      .errmsg = true, .alias = "newfstat",
1075           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1076         { .name     = "fstatat",    .errmsg = true, .alias = "newfstatat",
1077           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1078                              [1] = SCA_FILENAME, /* filename */ }, },
1079         { .name     = "fstatfs",    .errmsg = true,
1080           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1081         { .name     = "fsync",    .errmsg = true,
1082           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1083         { .name     = "ftruncate", .errmsg = true,
1084           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1085         { .name     = "futex",      .errmsg = true,
1086           .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
1087         { .name     = "futimesat", .errmsg = true,
1088           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1089                              [1] = SCA_FILENAME, /* filename */ }, },
1090         { .name     = "getdents",   .errmsg = true,
1091           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1092         { .name     = "getdents64", .errmsg = true,
1093           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1094         { .name     = "getitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
1095         { .name     = "getrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1096         { .name     = "getxattr",    .errmsg = true,
1097           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1098         { .name     = "inotify_add_watch",          .errmsg = true,
1099           .arg_scnprintf = { [1] = SCA_FILENAME, /* pathname */ }, },
1100         { .name     = "ioctl",      .errmsg = true,
1101           .arg_scnprintf = { [0] = SCA_FD, /* fd */
1102 #if defined(__i386__) || defined(__x86_64__)
1103 /*
1104  * FIXME: Make this available to all arches.
1105  */
1106                              [1] = SCA_STRHEXARRAY, /* cmd */
1107                              [2] = SCA_HEX, /* arg */ },
1108           .arg_parm      = { [1] = &strarray__tioctls, /* cmd */ }, },
1109 #else
1110                              [2] = SCA_HEX, /* arg */ }, },
1111 #endif
1112         { .name     = "keyctl",     .errmsg = true, STRARRAY(0, option, keyctl_options), },
1113         { .name     = "kill",       .errmsg = true,
1114           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1115         { .name     = "lchown",    .errmsg = true,
1116           .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1117         { .name     = "lgetxattr",  .errmsg = true,
1118           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1119         { .name     = "linkat",     .errmsg = true,
1120           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1121         { .name     = "listxattr",  .errmsg = true,
1122           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1123         { .name     = "llistxattr", .errmsg = true,
1124           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1125         { .name     = "lremovexattr",  .errmsg = true,
1126           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1127         { .name     = "lseek",      .errmsg = true,
1128           .arg_scnprintf = { [0] = SCA_FD, /* fd */
1129                              [2] = SCA_STRARRAY, /* whence */ },
1130           .arg_parm      = { [2] = &strarray__whences, /* whence */ }, },
1131         { .name     = "lsetxattr",  .errmsg = true,
1132           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1133         { .name     = "lstat",      .errmsg = true, .alias = "newlstat",
1134           .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1135         { .name     = "lsxattr",    .errmsg = true,
1136           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1137         { .name     = "madvise",    .errmsg = true,
1138           .arg_scnprintf = { [0] = SCA_HEX,      /* start */
1139                              [2] = SCA_MADV_BHV, /* behavior */ }, },
1140         { .name     = "mkdir",    .errmsg = true,
1141           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1142         { .name     = "mkdirat",    .errmsg = true,
1143           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1144                              [1] = SCA_FILENAME, /* pathname */ }, },
1145         { .name     = "mknod",      .errmsg = true,
1146           .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1147         { .name     = "mknodat",    .errmsg = true,
1148           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1149                              [1] = SCA_FILENAME, /* filename */ }, },
1150         { .name     = "mlock",      .errmsg = true,
1151           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1152         { .name     = "mlockall",   .errmsg = true,
1153           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1154         { .name     = "mmap",       .hexret = true,
1155 /* The standard mmap maps to old_mmap on s390x */
1156 #if defined(__s390x__)
1157         .alias = "old_mmap",
1158 #endif
1159           .arg_scnprintf = { [0] = SCA_HEX,       /* addr */
1160                              [2] = SCA_MMAP_PROT, /* prot */
1161                              [3] = SCA_MMAP_FLAGS, /* flags */
1162                              [4] = SCA_FD,        /* fd */ }, },
1163         { .name     = "mprotect",   .errmsg = true,
1164           .arg_scnprintf = { [0] = SCA_HEX, /* start */
1165                              [2] = SCA_MMAP_PROT, /* prot */ }, },
1166         { .name     = "mq_unlink", .errmsg = true,
1167           .arg_scnprintf = { [0] = SCA_FILENAME, /* u_name */ }, },
1168         { .name     = "mremap",     .hexret = true,
1169           .arg_scnprintf = { [0] = SCA_HEX, /* addr */
1170                              [3] = SCA_MREMAP_FLAGS, /* flags */
1171                              [4] = SCA_HEX, /* new_addr */ }, },
1172         { .name     = "munlock",    .errmsg = true,
1173           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1174         { .name     = "munmap",     .errmsg = true,
1175           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1176         { .name     = "name_to_handle_at", .errmsg = true,
1177           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1178         { .name     = "newfstatat", .errmsg = true,
1179           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1180                              [1] = SCA_FILENAME, /* filename */ }, },
1181         { .name     = "open",       .errmsg = true,
1182           .arg_scnprintf = { [0] = SCA_FILENAME,   /* filename */
1183                              [1] = SCA_OPEN_FLAGS, /* flags */ }, },
1184         { .name     = "open_by_handle_at", .errmsg = true,
1185           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1186                              [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1187         { .name     = "openat",     .errmsg = true,
1188           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1189                              [1] = SCA_FILENAME, /* filename */
1190                              [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1191         { .name     = "perf_event_open", .errmsg = true,
1192           .arg_scnprintf = { [1] = SCA_INT, /* pid */
1193                              [2] = SCA_INT, /* cpu */
1194                              [3] = SCA_FD,  /* group_fd */
1195                              [4] = SCA_PERF_FLAGS,  /* flags */ }, },
1196         { .name     = "pipe2",      .errmsg = true,
1197           .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
1198         { .name     = "poll",       .errmsg = true, .timeout = true, },
1199         { .name     = "ppoll",      .errmsg = true, .timeout = true, },
1200         { .name     = "pread",      .errmsg = true, .alias = "pread64",
1201           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1202         { .name     = "preadv",     .errmsg = true, .alias = "pread",
1203           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1204         { .name     = "prlimit64",  .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
1205         { .name     = "pwrite",     .errmsg = true, .alias = "pwrite64",
1206           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1207         { .name     = "pwritev",    .errmsg = true,
1208           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1209         { .name     = "read",       .errmsg = true,
1210           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1211         { .name     = "readlink",   .errmsg = true,
1212           .arg_scnprintf = { [0] = SCA_FILENAME, /* path */ }, },
1213         { .name     = "readlinkat", .errmsg = true,
1214           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1215                              [1] = SCA_FILENAME, /* pathname */ }, },
1216         { .name     = "readv",      .errmsg = true,
1217           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1218         { .name     = "recvfrom",   .errmsg = true,
1219           .arg_scnprintf = { [0] = SCA_FD, /* fd */
1220                              [3] = SCA_MSG_FLAGS, /* flags */ }, },
1221         { .name     = "recvmmsg",   .errmsg = true,
1222           .arg_scnprintf = { [0] = SCA_FD, /* fd */
1223                              [3] = SCA_MSG_FLAGS, /* flags */ }, },
1224         { .name     = "recvmsg",    .errmsg = true,
1225           .arg_scnprintf = { [0] = SCA_FD, /* fd */
1226                              [2] = SCA_MSG_FLAGS, /* flags */ }, },
1227         { .name     = "removexattr", .errmsg = true,
1228           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1229         { .name     = "renameat",   .errmsg = true,
1230           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1231         { .name     = "rmdir",    .errmsg = true,
1232           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1233         { .name     = "rt_sigaction", .errmsg = true,
1234           .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
1235         { .name     = "rt_sigprocmask",  .errmsg = true, STRARRAY(0, how, sighow), },
1236         { .name     = "rt_sigqueueinfo", .errmsg = true,
1237           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1238         { .name     = "rt_tgsigqueueinfo", .errmsg = true,
1239           .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1240         { .name     = "select",     .errmsg = true, .timeout = true, },
1241         { .name     = "sendmmsg",    .errmsg = true,
1242           .arg_scnprintf = { [0] = SCA_FD, /* fd */
1243                              [3] = SCA_MSG_FLAGS, /* flags */ }, },
1244         { .name     = "sendmsg",    .errmsg = true,
1245           .arg_scnprintf = { [0] = SCA_FD, /* fd */
1246                              [2] = SCA_MSG_FLAGS, /* flags */ }, },
1247         { .name     = "sendto",     .errmsg = true,
1248           .arg_scnprintf = { [0] = SCA_FD, /* fd */
1249                              [3] = SCA_MSG_FLAGS, /* flags */ }, },
1250         { .name     = "setitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
1251         { .name     = "setrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1252         { .name     = "setxattr",   .errmsg = true,
1253           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1254         { .name     = "shutdown",   .errmsg = true,
1255           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1256         { .name     = "socket",     .errmsg = true,
1257           .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1258                              [1] = SCA_SK_TYPE, /* type */ },
1259           .arg_parm      = { [0] = &strarray__socket_families, /* family */ }, },
1260         { .name     = "socketpair", .errmsg = true,
1261           .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1262                              [1] = SCA_SK_TYPE, /* type */ },
1263           .arg_parm      = { [0] = &strarray__socket_families, /* family */ }, },
1264         { .name     = "stat",       .errmsg = true, .alias = "newstat",
1265           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1266         { .name     = "statfs",     .errmsg = true,
1267           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1268         { .name     = "swapoff",    .errmsg = true,
1269           .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
1270         { .name     = "swapon",     .errmsg = true,
1271           .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
1272         { .name     = "symlinkat",  .errmsg = true,
1273           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1274         { .name     = "tgkill",     .errmsg = true,
1275           .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1276         { .name     = "tkill",      .errmsg = true,
1277           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1278         { .name     = "truncate",   .errmsg = true,
1279           .arg_scnprintf = { [0] = SCA_FILENAME, /* path */ }, },
1280         { .name     = "uname",      .errmsg = true, .alias = "newuname", },
1281         { .name     = "unlinkat",   .errmsg = true,
1282           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1283                              [1] = SCA_FILENAME, /* pathname */ }, },
1284         { .name     = "utime",  .errmsg = true,
1285           .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1286         { .name     = "utimensat",  .errmsg = true,
1287           .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */
1288                              [1] = SCA_FILENAME, /* filename */ }, },
1289         { .name     = "utimes",  .errmsg = true,
1290           .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1291         { .name     = "vmsplice",  .errmsg = true,
1292           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1293         { .name     = "write",      .errmsg = true,
1294           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1295         { .name     = "writev",     .errmsg = true,
1296           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1297 };
1298
1299 static int syscall_fmt__cmp(const void *name, const void *fmtp)
1300 {
1301         const struct syscall_fmt *fmt = fmtp;
1302         return strcmp(name, fmt->name);
1303 }
1304
1305 static struct syscall_fmt *syscall_fmt__find(const char *name)
1306 {
1307         const int nmemb = ARRAY_SIZE(syscall_fmts);
1308         return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
1309 }
1310
1311 struct syscall {
1312         struct event_format *tp_format;
1313         int                 nr_args;
1314         struct format_field *args;
1315         const char          *name;
1316         bool                is_exit;
1317         struct syscall_fmt  *fmt;
1318         size_t              (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
1319         void                **arg_parm;
1320 };
1321
1322 static size_t fprintf_duration(unsigned long t, FILE *fp)
1323 {
1324         double duration = (double)t / NSEC_PER_MSEC;
1325         size_t printed = fprintf(fp, "(");
1326
1327         if (duration >= 1.0)
1328                 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
1329         else if (duration >= 0.01)
1330                 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
1331         else
1332                 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
1333         return printed + fprintf(fp, "): ");
1334 }
1335
1336 /**
1337  * filename.ptr: The filename char pointer that will be vfs_getname'd
1338  * filename.entry_str_pos: Where to insert the string translated from
1339  *                         filename.ptr by the vfs_getname tracepoint/kprobe.
1340  */
1341 struct thread_trace {
1342         u64               entry_time;
1343         u64               exit_time;
1344         bool              entry_pending;
1345         unsigned long     nr_events;
1346         unsigned long     pfmaj, pfmin;
1347         char              *entry_str;
1348         double            runtime_ms;
1349         struct {
1350                 unsigned long ptr;
1351                 short int     entry_str_pos;
1352                 bool          pending_open;
1353                 unsigned int  namelen;
1354                 char          *name;
1355         } filename;
1356         struct {
1357                 int       max;
1358                 char      **table;
1359         } paths;
1360
1361         struct intlist *syscall_stats;
1362 };
1363
1364 static struct thread_trace *thread_trace__new(void)
1365 {
1366         struct thread_trace *ttrace =  zalloc(sizeof(struct thread_trace));
1367
1368         if (ttrace)
1369                 ttrace->paths.max = -1;
1370
1371         ttrace->syscall_stats = intlist__new(NULL);
1372
1373         return ttrace;
1374 }
1375
1376 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
1377 {
1378         struct thread_trace *ttrace;
1379
1380         if (thread == NULL)
1381                 goto fail;
1382
1383         if (thread__priv(thread) == NULL)
1384                 thread__set_priv(thread, thread_trace__new());
1385
1386         if (thread__priv(thread) == NULL)
1387                 goto fail;
1388
1389         ttrace = thread__priv(thread);
1390         ++ttrace->nr_events;
1391
1392         return ttrace;
1393 fail:
1394         color_fprintf(fp, PERF_COLOR_RED,
1395                       "WARNING: not enough memory, dropping samples!\n");
1396         return NULL;
1397 }
1398
1399 #define TRACE_PFMAJ             (1 << 0)
1400 #define TRACE_PFMIN             (1 << 1)
1401
1402 static const size_t trace__entry_str_size = 2048;
1403
1404 struct trace {
1405         struct perf_tool        tool;
1406         struct {
1407                 int             machine;
1408                 int             open_id;
1409         }                       audit;
1410         struct {
1411                 int             max;
1412                 struct syscall  *table;
1413                 struct {
1414                         struct perf_evsel *sys_enter,
1415                                           *sys_exit;
1416                 }               events;
1417         } syscalls;
1418         struct record_opts      opts;
1419         struct perf_evlist      *evlist;
1420         struct machine          *host;
1421         struct thread           *current;
1422         u64                     base_time;
1423         FILE                    *output;
1424         unsigned long           nr_events;
1425         struct strlist          *ev_qualifier;
1426         struct {
1427                 size_t          nr;
1428                 int             *entries;
1429         }                       ev_qualifier_ids;
1430         struct intlist          *tid_list;
1431         struct intlist          *pid_list;
1432         struct {
1433                 size_t          nr;
1434                 pid_t           *entries;
1435         }                       filter_pids;
1436         double                  duration_filter;
1437         double                  runtime_ms;
1438         struct {
1439                 u64             vfs_getname,
1440                                 proc_getname;
1441         } stats;
1442         bool                    not_ev_qualifier;
1443         bool                    live;
1444         bool                    full_time;
1445         bool                    sched;
1446         bool                    multiple_threads;
1447         bool                    summary;
1448         bool                    summary_only;
1449         bool                    show_comm;
1450         bool                    show_tool_stats;
1451         bool                    trace_syscalls;
1452         bool                    force;
1453         bool                    vfs_getname;
1454         int                     trace_pgfaults;
1455 };
1456
1457 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
1458 {
1459         struct thread_trace *ttrace = thread__priv(thread);
1460
1461         if (fd > ttrace->paths.max) {
1462                 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
1463
1464                 if (npath == NULL)
1465                         return -1;
1466
1467                 if (ttrace->paths.max != -1) {
1468                         memset(npath + ttrace->paths.max + 1, 0,
1469                                (fd - ttrace->paths.max) * sizeof(char *));
1470                 } else {
1471                         memset(npath, 0, (fd + 1) * sizeof(char *));
1472                 }
1473
1474                 ttrace->paths.table = npath;
1475                 ttrace->paths.max   = fd;
1476         }
1477
1478         ttrace->paths.table[fd] = strdup(pathname);
1479
1480         return ttrace->paths.table[fd] != NULL ? 0 : -1;
1481 }
1482
1483 static int thread__read_fd_path(struct thread *thread, int fd)
1484 {
1485         char linkname[PATH_MAX], pathname[PATH_MAX];
1486         struct stat st;
1487         int ret;
1488
1489         if (thread->pid_ == thread->tid) {
1490                 scnprintf(linkname, sizeof(linkname),
1491                           "/proc/%d/fd/%d", thread->pid_, fd);
1492         } else {
1493                 scnprintf(linkname, sizeof(linkname),
1494                           "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
1495         }
1496
1497         if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
1498                 return -1;
1499
1500         ret = readlink(linkname, pathname, sizeof(pathname));
1501
1502         if (ret < 0 || ret > st.st_size)
1503                 return -1;
1504
1505         pathname[ret] = '\0';
1506         return trace__set_fd_pathname(thread, fd, pathname);
1507 }
1508
1509 static const char *thread__fd_path(struct thread *thread, int fd,
1510                                    struct trace *trace)
1511 {
1512         struct thread_trace *ttrace = thread__priv(thread);
1513
1514         if (ttrace == NULL)
1515                 return NULL;
1516
1517         if (fd < 0)
1518                 return NULL;
1519
1520         if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) {
1521                 if (!trace->live)
1522                         return NULL;
1523                 ++trace->stats.proc_getname;
1524                 if (thread__read_fd_path(thread, fd))
1525                         return NULL;
1526         }
1527
1528         return ttrace->paths.table[fd];
1529 }
1530
1531 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
1532                                         struct syscall_arg *arg)
1533 {
1534         int fd = arg->val;
1535         size_t printed = scnprintf(bf, size, "%d", fd);
1536         const char *path = thread__fd_path(arg->thread, fd, arg->trace);
1537
1538         if (path)
1539                 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1540
1541         return printed;
1542 }
1543
1544 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1545                                               struct syscall_arg *arg)
1546 {
1547         int fd = arg->val;
1548         size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1549         struct thread_trace *ttrace = thread__priv(arg->thread);
1550
1551         if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
1552                 zfree(&ttrace->paths.table[fd]);
1553
1554         return printed;
1555 }
1556
1557 static void thread__set_filename_pos(struct thread *thread, const char *bf,
1558                                      unsigned long ptr)
1559 {
1560         struct thread_trace *ttrace = thread__priv(thread);
1561
1562         ttrace->filename.ptr = ptr;
1563         ttrace->filename.entry_str_pos = bf - ttrace->entry_str;
1564 }
1565
1566 static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
1567                                               struct syscall_arg *arg)
1568 {
1569         unsigned long ptr = arg->val;
1570
1571         if (!arg->trace->vfs_getname)
1572                 return scnprintf(bf, size, "%#x", ptr);
1573
1574         thread__set_filename_pos(arg->thread, bf, ptr);
1575         return 0;
1576 }
1577
1578 static bool trace__filter_duration(struct trace *trace, double t)
1579 {
1580         return t < (trace->duration_filter * NSEC_PER_MSEC);
1581 }
1582
1583 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1584 {
1585         double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1586
1587         return fprintf(fp, "%10.3f ", ts);
1588 }
1589
1590 static bool done = false;
1591 static bool interrupted = false;
1592
1593 static void sig_handler(int sig)
1594 {
1595         done = true;
1596         interrupted = sig == SIGINT;
1597 }
1598
1599 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1600                                         u64 duration, u64 tstamp, FILE *fp)
1601 {
1602         size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1603         printed += fprintf_duration(duration, fp);
1604
1605         if (trace->multiple_threads) {
1606                 if (trace->show_comm)
1607                         printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
1608                 printed += fprintf(fp, "%d ", thread->tid);
1609         }
1610
1611         return printed;
1612 }
1613
1614 static int trace__process_event(struct trace *trace, struct machine *machine,
1615                                 union perf_event *event, struct perf_sample *sample)
1616 {
1617         int ret = 0;
1618
1619         switch (event->header.type) {
1620         case PERF_RECORD_LOST:
1621                 color_fprintf(trace->output, PERF_COLOR_RED,
1622                               "LOST %" PRIu64 " events!\n", event->lost.lost);
1623                 ret = machine__process_lost_event(machine, event, sample);
1624                 break;
1625         default:
1626                 ret = machine__process_event(machine, event, sample);
1627                 break;
1628         }
1629
1630         return ret;
1631 }
1632
1633 static int trace__tool_process(struct perf_tool *tool,
1634                                union perf_event *event,
1635                                struct perf_sample *sample,
1636                                struct machine *machine)
1637 {
1638         struct trace *trace = container_of(tool, struct trace, tool);
1639         return trace__process_event(trace, machine, event, sample);
1640 }
1641
1642 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1643 {
1644         int err = symbol__init(NULL);
1645
1646         if (err)
1647                 return err;
1648
1649         trace->host = machine__new_host();
1650         if (trace->host == NULL)
1651                 return -ENOMEM;
1652
1653         if (trace_event__register_resolver(trace->host, machine__resolve_kernel_addr) < 0)
1654                 return -errno;
1655
1656         err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
1657                                             evlist->threads, trace__tool_process, false,
1658                                             trace->opts.proc_map_timeout);
1659         if (err)
1660                 symbol__exit();
1661
1662         return err;
1663 }
1664
1665 static int syscall__set_arg_fmts(struct syscall *sc)
1666 {
1667         struct format_field *field;
1668         int idx = 0;
1669
1670         sc->arg_scnprintf = calloc(sc->nr_args, sizeof(void *));
1671         if (sc->arg_scnprintf == NULL)
1672                 return -1;
1673
1674         if (sc->fmt)
1675                 sc->arg_parm = sc->fmt->arg_parm;
1676
1677         for (field = sc->args; field; field = field->next) {
1678                 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1679                         sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
1680                 else if (field->flags & FIELD_IS_POINTER)
1681                         sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
1682                 ++idx;
1683         }
1684
1685         return 0;
1686 }
1687
1688 static int trace__read_syscall_info(struct trace *trace, int id)
1689 {
1690         char tp_name[128];
1691         struct syscall *sc;
1692         const char *name = audit_syscall_to_name(id, trace->audit.machine);
1693
1694         if (name == NULL)
1695                 return -1;
1696
1697         if (id > trace->syscalls.max) {
1698                 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1699
1700                 if (nsyscalls == NULL)
1701                         return -1;
1702
1703                 if (trace->syscalls.max != -1) {
1704                         memset(nsyscalls + trace->syscalls.max + 1, 0,
1705                                (id - trace->syscalls.max) * sizeof(*sc));
1706                 } else {
1707                         memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1708                 }
1709
1710                 trace->syscalls.table = nsyscalls;
1711                 trace->syscalls.max   = id;
1712         }
1713
1714         sc = trace->syscalls.table + id;
1715         sc->name = name;
1716
1717         sc->fmt  = syscall_fmt__find(sc->name);
1718
1719         snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1720         sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1721
1722         if (IS_ERR(sc->tp_format) && sc->fmt && sc->fmt->alias) {
1723                 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1724                 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1725         }
1726
1727         if (IS_ERR(sc->tp_format))
1728                 return -1;
1729
1730         sc->args = sc->tp_format->format.fields;
1731         sc->nr_args = sc->tp_format->format.nr_fields;
1732         /* drop nr field - not relevant here; does not exist on older kernels */
1733         if (sc->args && strcmp(sc->args->name, "nr") == 0) {
1734                 sc->args = sc->args->next;
1735                 --sc->nr_args;
1736         }
1737
1738         sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
1739
1740         return syscall__set_arg_fmts(sc);
1741 }
1742
1743 static int trace__validate_ev_qualifier(struct trace *trace)
1744 {
1745         int err = 0, i;
1746         struct str_node *pos;
1747
1748         trace->ev_qualifier_ids.nr = strlist__nr_entries(trace->ev_qualifier);
1749         trace->ev_qualifier_ids.entries = malloc(trace->ev_qualifier_ids.nr *
1750                                                  sizeof(trace->ev_qualifier_ids.entries[0]));
1751
1752         if (trace->ev_qualifier_ids.entries == NULL) {
1753                 fputs("Error:\tNot enough memory for allocating events qualifier ids\n",
1754                        trace->output);
1755                 err = -EINVAL;
1756                 goto out;
1757         }
1758
1759         i = 0;
1760
1761         strlist__for_each(pos, trace->ev_qualifier) {
1762                 const char *sc = pos->s;
1763                 int id = audit_name_to_syscall(sc, trace->audit.machine);
1764
1765                 if (id < 0) {
1766                         if (err == 0) {
1767                                 fputs("Error:\tInvalid syscall ", trace->output);
1768                                 err = -EINVAL;
1769                         } else {
1770                                 fputs(", ", trace->output);
1771                         }
1772
1773                         fputs(sc, trace->output);
1774                 }
1775
1776                 trace->ev_qualifier_ids.entries[i++] = id;
1777         }
1778
1779         if (err < 0) {
1780                 fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'"
1781                       "\nHint:\tand: 'man syscalls'\n", trace->output);
1782                 zfree(&trace->ev_qualifier_ids.entries);
1783                 trace->ev_qualifier_ids.nr = 0;
1784         }
1785 out:
1786         return err;
1787 }
1788
1789 /*
1790  * args is to be interpreted as a series of longs but we need to handle
1791  * 8-byte unaligned accesses. args points to raw_data within the event
1792  * and raw_data is guaranteed to be 8-byte unaligned because it is
1793  * preceded by raw_size which is a u32. So we need to copy args to a temp
1794  * variable to read it. Most notably this avoids extended load instructions
1795  * on unaligned addresses
1796  */
1797
1798 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1799                                       unsigned char *args, struct trace *trace,
1800                                       struct thread *thread)
1801 {
1802         size_t printed = 0;
1803         unsigned char *p;
1804         unsigned long val;
1805
1806         if (sc->args != NULL) {
1807                 struct format_field *field;
1808                 u8 bit = 1;
1809                 struct syscall_arg arg = {
1810                         .idx    = 0,
1811                         .mask   = 0,
1812                         .trace  = trace,
1813                         .thread = thread,
1814                 };
1815
1816                 for (field = sc->args; field;
1817                      field = field->next, ++arg.idx, bit <<= 1) {
1818                         if (arg.mask & bit)
1819                                 continue;
1820
1821                         /* special care for unaligned accesses */
1822                         p = args + sizeof(unsigned long) * arg.idx;
1823                         memcpy(&val, p, sizeof(val));
1824
1825                         /*
1826                          * Suppress this argument if its value is zero and
1827                          * and we don't have a string associated in an
1828                          * strarray for it.
1829                          */
1830                         if (val == 0 &&
1831                             !(sc->arg_scnprintf &&
1832                               sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1833                               sc->arg_parm[arg.idx]))
1834                                 continue;
1835
1836                         printed += scnprintf(bf + printed, size - printed,
1837                                              "%s%s: ", printed ? ", " : "", field->name);
1838                         if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
1839                                 arg.val = val;
1840                                 if (sc->arg_parm)
1841                                         arg.parm = sc->arg_parm[arg.idx];
1842                                 printed += sc->arg_scnprintf[arg.idx](bf + printed,
1843                                                                       size - printed, &arg);
1844                         } else {
1845                                 printed += scnprintf(bf + printed, size - printed,
1846                                                      "%ld", val);
1847                         }
1848                 }
1849         } else {
1850                 int i = 0;
1851
1852                 while (i < 6) {
1853                         /* special care for unaligned accesses */
1854                         p = args + sizeof(unsigned long) * i;
1855                         memcpy(&val, p, sizeof(val));
1856                         printed += scnprintf(bf + printed, size - printed,
1857                                              "%sarg%d: %ld",
1858                                              printed ? ", " : "", i, val);
1859                         ++i;
1860                 }
1861         }
1862
1863         return printed;
1864 }
1865
1866 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1867                                   union perf_event *event,
1868                                   struct perf_sample *sample);
1869
1870 static struct syscall *trace__syscall_info(struct trace *trace,
1871                                            struct perf_evsel *evsel, int id)
1872 {
1873
1874         if (id < 0) {
1875
1876                 /*
1877                  * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1878                  * before that, leaving at a higher verbosity level till that is
1879                  * explained. Reproduced with plain ftrace with:
1880                  *
1881                  * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1882                  * grep "NR -1 " /t/trace_pipe
1883                  *
1884                  * After generating some load on the machine.
1885                  */
1886                 if (verbose > 1) {
1887                         static u64 n;
1888                         fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1889                                 id, perf_evsel__name(evsel), ++n);
1890                 }
1891                 return NULL;
1892         }
1893
1894         if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1895             trace__read_syscall_info(trace, id))
1896                 goto out_cant_read;
1897
1898         if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1899                 goto out_cant_read;
1900
1901         return &trace->syscalls.table[id];
1902
1903 out_cant_read:
1904         if (verbose) {
1905                 fprintf(trace->output, "Problems reading syscall %d", id);
1906                 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1907                         fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1908                 fputs(" information\n", trace->output);
1909         }
1910         return NULL;
1911 }
1912
1913 static void thread__update_stats(struct thread_trace *ttrace,
1914                                  int id, struct perf_sample *sample)
1915 {
1916         struct int_node *inode;
1917         struct stats *stats;
1918         u64 duration = 0;
1919
1920         inode = intlist__findnew(ttrace->syscall_stats, id);
1921         if (inode == NULL)
1922                 return;
1923
1924         stats = inode->priv;
1925         if (stats == NULL) {
1926                 stats = malloc(sizeof(struct stats));
1927                 if (stats == NULL)
1928                         return;
1929                 init_stats(stats);
1930                 inode->priv = stats;
1931         }
1932
1933         if (ttrace->entry_time && sample->time > ttrace->entry_time)
1934                 duration = sample->time - ttrace->entry_time;
1935
1936         update_stats(stats, duration);
1937 }
1938
1939 static int trace__printf_interrupted_entry(struct trace *trace, struct perf_sample *sample)
1940 {
1941         struct thread_trace *ttrace;
1942         u64 duration;
1943         size_t printed;
1944
1945         if (trace->current == NULL)
1946                 return 0;
1947
1948         ttrace = thread__priv(trace->current);
1949
1950         if (!ttrace->entry_pending)
1951                 return 0;
1952
1953         duration = sample->time - ttrace->entry_time;
1954
1955         printed  = trace__fprintf_entry_head(trace, trace->current, duration, sample->time, trace->output);
1956         printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str);
1957         ttrace->entry_pending = false;
1958
1959         return printed;
1960 }
1961
1962 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1963                             union perf_event *event __maybe_unused,
1964                             struct perf_sample *sample)
1965 {
1966         char *msg;
1967         void *args;
1968         size_t printed = 0;
1969         struct thread *thread;
1970         int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
1971         struct syscall *sc = trace__syscall_info(trace, evsel, id);
1972         struct thread_trace *ttrace;
1973
1974         if (sc == NULL)
1975                 return -1;
1976
1977         thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1978         ttrace = thread__trace(thread, trace->output);
1979         if (ttrace == NULL)
1980                 goto out_put;
1981
1982         args = perf_evsel__sc_tp_ptr(evsel, args, sample);
1983
1984         if (ttrace->entry_str == NULL) {
1985                 ttrace->entry_str = malloc(trace__entry_str_size);
1986                 if (!ttrace->entry_str)
1987                         goto out_put;
1988         }
1989
1990         if (!trace->summary_only)
1991                 trace__printf_interrupted_entry(trace, sample);
1992
1993         ttrace->entry_time = sample->time;
1994         msg = ttrace->entry_str;
1995         printed += scnprintf(msg + printed, trace__entry_str_size - printed, "%s(", sc->name);
1996
1997         printed += syscall__scnprintf_args(sc, msg + printed, trace__entry_str_size - printed,
1998                                            args, trace, thread);
1999
2000         if (sc->is_exit) {
2001                 if (!trace->duration_filter && !trace->summary_only) {
2002                         trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
2003                         fprintf(trace->output, "%-70s\n", ttrace->entry_str);
2004                 }
2005         } else {
2006                 ttrace->entry_pending = true;
2007                 /* See trace__vfs_getname & trace__sys_exit */
2008                 ttrace->filename.pending_open = false;
2009         }
2010
2011         if (trace->current != thread) {
2012                 thread__put(trace->current);
2013                 trace->current = thread__get(thread);
2014         }
2015         err = 0;
2016 out_put:
2017         thread__put(thread);
2018         return err;
2019 }
2020
2021 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
2022                            union perf_event *event __maybe_unused,
2023                            struct perf_sample *sample)
2024 {
2025         long ret;
2026         u64 duration = 0;
2027         struct thread *thread;
2028         int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
2029         struct syscall *sc = trace__syscall_info(trace, evsel, id);
2030         struct thread_trace *ttrace;
2031
2032         if (sc == NULL)
2033                 return -1;
2034
2035         thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2036         ttrace = thread__trace(thread, trace->output);
2037         if (ttrace == NULL)
2038                 goto out_put;
2039
2040         if (trace->summary)
2041                 thread__update_stats(ttrace, id, sample);
2042
2043         ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
2044
2045         if (id == trace->audit.open_id && ret >= 0 && ttrace->filename.pending_open) {
2046                 trace__set_fd_pathname(thread, ret, ttrace->filename.name);
2047                 ttrace->filename.pending_open = false;
2048                 ++trace->stats.vfs_getname;
2049         }
2050
2051         ttrace->exit_time = sample->time;
2052
2053         if (ttrace->entry_time) {
2054                 duration = sample->time - ttrace->entry_time;
2055                 if (trace__filter_duration(trace, duration))
2056                         goto out;
2057         } else if (trace->duration_filter)
2058                 goto out;
2059
2060         if (trace->summary_only)
2061                 goto out;
2062
2063         trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
2064
2065         if (ttrace->entry_pending) {
2066                 fprintf(trace->output, "%-70s", ttrace->entry_str);
2067         } else {
2068                 fprintf(trace->output, " ... [");
2069                 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
2070                 fprintf(trace->output, "]: %s()", sc->name);
2071         }
2072
2073         if (sc->fmt == NULL) {
2074 signed_print:
2075                 fprintf(trace->output, ") = %ld", ret);
2076         } else if (ret < 0 && sc->fmt->errmsg) {
2077                 char bf[STRERR_BUFSIZE];
2078                 const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
2079                            *e = audit_errno_to_name(-ret);
2080
2081                 fprintf(trace->output, ") = -1 %s %s", e, emsg);
2082         } else if (ret == 0 && sc->fmt->timeout)
2083                 fprintf(trace->output, ") = 0 Timeout");
2084         else if (sc->fmt->hexret)
2085                 fprintf(trace->output, ") = %#lx", ret);
2086         else
2087                 goto signed_print;
2088
2089         fputc('\n', trace->output);
2090 out:
2091         ttrace->entry_pending = false;
2092         err = 0;
2093 out_put:
2094         thread__put(thread);
2095         return err;
2096 }
2097
2098 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
2099                               union perf_event *event __maybe_unused,
2100                               struct perf_sample *sample)
2101 {
2102         struct thread *thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2103         struct thread_trace *ttrace;
2104         size_t filename_len, entry_str_len, to_move;
2105         ssize_t remaining_space;
2106         char *pos;
2107         const char *filename = perf_evsel__rawptr(evsel, sample, "pathname");
2108
2109         if (!thread)
2110                 goto out;
2111
2112         ttrace = thread__priv(thread);
2113         if (!ttrace)
2114                 goto out;
2115
2116         filename_len = strlen(filename);
2117
2118         if (ttrace->filename.namelen < filename_len) {
2119                 char *f = realloc(ttrace->filename.name, filename_len + 1);
2120
2121                 if (f == NULL)
2122                                 goto out;
2123
2124                 ttrace->filename.namelen = filename_len;
2125                 ttrace->filename.name = f;
2126         }
2127
2128         strcpy(ttrace->filename.name, filename);
2129         ttrace->filename.pending_open = true;
2130
2131         if (!ttrace->filename.ptr)
2132                 goto out;
2133
2134         entry_str_len = strlen(ttrace->entry_str);
2135         remaining_space = trace__entry_str_size - entry_str_len - 1; /* \0 */
2136         if (remaining_space <= 0)
2137                 goto out;
2138
2139         if (filename_len > (size_t)remaining_space) {
2140                 filename += filename_len - remaining_space;
2141                 filename_len = remaining_space;
2142         }
2143
2144         to_move = entry_str_len - ttrace->filename.entry_str_pos + 1; /* \0 */
2145         pos = ttrace->entry_str + ttrace->filename.entry_str_pos;
2146         memmove(pos + filename_len, pos, to_move);
2147         memcpy(pos, filename, filename_len);
2148
2149         ttrace->filename.ptr = 0;
2150         ttrace->filename.entry_str_pos = 0;
2151 out:
2152         return 0;
2153 }
2154
2155 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
2156                                      union perf_event *event __maybe_unused,
2157                                      struct perf_sample *sample)
2158 {
2159         u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
2160         double runtime_ms = (double)runtime / NSEC_PER_MSEC;
2161         struct thread *thread = machine__findnew_thread(trace->host,
2162                                                         sample->pid,
2163                                                         sample->tid);
2164         struct thread_trace *ttrace = thread__trace(thread, trace->output);
2165
2166         if (ttrace == NULL)
2167                 goto out_dump;
2168
2169         ttrace->runtime_ms += runtime_ms;
2170         trace->runtime_ms += runtime_ms;
2171         thread__put(thread);
2172         return 0;
2173
2174 out_dump:
2175         fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
2176                evsel->name,
2177                perf_evsel__strval(evsel, sample, "comm"),
2178                (pid_t)perf_evsel__intval(evsel, sample, "pid"),
2179                runtime,
2180                perf_evsel__intval(evsel, sample, "vruntime"));
2181         thread__put(thread);
2182         return 0;
2183 }
2184
2185 static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
2186                                 union perf_event *event __maybe_unused,
2187                                 struct perf_sample *sample)
2188 {
2189         trace__printf_interrupted_entry(trace, sample);
2190         trace__fprintf_tstamp(trace, sample->time, trace->output);
2191
2192         if (trace->trace_syscalls)
2193                 fprintf(trace->output, "(         ): ");
2194
2195         fprintf(trace->output, "%s:", evsel->name);
2196
2197         if (evsel->tp_format) {
2198                 event_format__fprintf(evsel->tp_format, sample->cpu,
2199                                       sample->raw_data, sample->raw_size,
2200                                       trace->output);
2201         }
2202
2203         fprintf(trace->output, ")\n");
2204         return 0;
2205 }
2206
2207 static void print_location(FILE *f, struct perf_sample *sample,
2208                            struct addr_location *al,
2209                            bool print_dso, bool print_sym)
2210 {
2211
2212         if ((verbose || print_dso) && al->map)
2213                 fprintf(f, "%s@", al->map->dso->long_name);
2214
2215         if ((verbose || print_sym) && al->sym)
2216                 fprintf(f, "%s+0x%" PRIx64, al->sym->name,
2217                         al->addr - al->sym->start);
2218         else if (al->map)
2219                 fprintf(f, "0x%" PRIx64, al->addr);
2220         else
2221                 fprintf(f, "0x%" PRIx64, sample->addr);
2222 }
2223
2224 static int trace__pgfault(struct trace *trace,
2225                           struct perf_evsel *evsel,
2226                           union perf_event *event,
2227                           struct perf_sample *sample)
2228 {
2229         struct thread *thread;
2230         u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
2231         struct addr_location al;
2232         char map_type = 'd';
2233         struct thread_trace *ttrace;
2234         int err = -1;
2235
2236         thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2237         ttrace = thread__trace(thread, trace->output);
2238         if (ttrace == NULL)
2239                 goto out_put;
2240
2241         if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
2242                 ttrace->pfmaj++;
2243         else
2244                 ttrace->pfmin++;
2245
2246         if (trace->summary_only)
2247                 goto out;
2248
2249         thread__find_addr_location(thread, cpumode, MAP__FUNCTION,
2250                               sample->ip, &al);
2251
2252         trace__fprintf_entry_head(trace, thread, 0, sample->time, trace->output);
2253
2254         fprintf(trace->output, "%sfault [",
2255                 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
2256                 "maj" : "min");
2257
2258         print_location(trace->output, sample, &al, false, true);
2259
2260         fprintf(trace->output, "] => ");
2261
2262         thread__find_addr_location(thread, cpumode, MAP__VARIABLE,
2263                                    sample->addr, &al);
2264
2265         if (!al.map) {
2266                 thread__find_addr_location(thread, cpumode,
2267                                            MAP__FUNCTION, sample->addr, &al);
2268
2269                 if (al.map)
2270                         map_type = 'x';
2271                 else
2272                         map_type = '?';
2273         }
2274
2275         print_location(trace->output, sample, &al, true, false);
2276
2277         fprintf(trace->output, " (%c%c)\n", map_type, al.level);
2278 out:
2279         err = 0;
2280 out_put:
2281         thread__put(thread);
2282         return err;
2283 }
2284
2285 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
2286 {
2287         if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
2288             (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
2289                 return false;
2290
2291         if (trace->pid_list || trace->tid_list)
2292                 return true;
2293
2294         return false;
2295 }
2296
2297 static int trace__process_sample(struct perf_tool *tool,
2298                                  union perf_event *event,
2299                                  struct perf_sample *sample,
2300                                  struct perf_evsel *evsel,
2301                                  struct machine *machine __maybe_unused)
2302 {
2303         struct trace *trace = container_of(tool, struct trace, tool);
2304         int err = 0;
2305
2306         tracepoint_handler handler = evsel->handler;
2307
2308         if (skip_sample(trace, sample))
2309                 return 0;
2310
2311         if (!trace->full_time && trace->base_time == 0)
2312                 trace->base_time = sample->time;
2313
2314         if (handler) {
2315                 ++trace->nr_events;
2316                 handler(trace, evsel, event, sample);
2317         }
2318
2319         return err;
2320 }
2321
2322 static int parse_target_str(struct trace *trace)
2323 {
2324         if (trace->opts.target.pid) {
2325                 trace->pid_list = intlist__new(trace->opts.target.pid);
2326                 if (trace->pid_list == NULL) {
2327                         pr_err("Error parsing process id string\n");
2328                         return -EINVAL;
2329                 }
2330         }
2331
2332         if (trace->opts.target.tid) {
2333                 trace->tid_list = intlist__new(trace->opts.target.tid);
2334                 if (trace->tid_list == NULL) {
2335                         pr_err("Error parsing thread id string\n");
2336                         return -EINVAL;
2337                 }
2338         }
2339
2340         return 0;
2341 }
2342
2343 static int trace__record(struct trace *trace, int argc, const char **argv)
2344 {
2345         unsigned int rec_argc, i, j;
2346         const char **rec_argv;
2347         const char * const record_args[] = {
2348                 "record",
2349                 "-R",
2350                 "-m", "1024",
2351                 "-c", "1",
2352         };
2353
2354         const char * const sc_args[] = { "-e", };
2355         unsigned int sc_args_nr = ARRAY_SIZE(sc_args);
2356         const char * const majpf_args[] = { "-e", "major-faults" };
2357         unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args);
2358         const char * const minpf_args[] = { "-e", "minor-faults" };
2359         unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args);
2360
2361         /* +1 is for the event string below */
2362         rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 +
2363                 majpf_args_nr + minpf_args_nr + argc;
2364         rec_argv = calloc(rec_argc + 1, sizeof(char *));
2365
2366         if (rec_argv == NULL)
2367                 return -ENOMEM;
2368
2369         j = 0;
2370         for (i = 0; i < ARRAY_SIZE(record_args); i++)
2371                 rec_argv[j++] = record_args[i];
2372
2373         if (trace->trace_syscalls) {
2374                 for (i = 0; i < sc_args_nr; i++)
2375                         rec_argv[j++] = sc_args[i];
2376
2377                 /* event string may be different for older kernels - e.g., RHEL6 */
2378                 if (is_valid_tracepoint("raw_syscalls:sys_enter"))
2379                         rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
2380                 else if (is_valid_tracepoint("syscalls:sys_enter"))
2381                         rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
2382                 else {
2383                         pr_err("Neither raw_syscalls nor syscalls events exist.\n");
2384                         return -1;
2385                 }
2386         }
2387
2388         if (trace->trace_pgfaults & TRACE_PFMAJ)
2389                 for (i = 0; i < majpf_args_nr; i++)
2390                         rec_argv[j++] = majpf_args[i];
2391
2392         if (trace->trace_pgfaults & TRACE_PFMIN)
2393                 for (i = 0; i < minpf_args_nr; i++)
2394                         rec_argv[j++] = minpf_args[i];
2395
2396         for (i = 0; i < (unsigned int)argc; i++)
2397                 rec_argv[j++] = argv[i];
2398
2399         return cmd_record(j, rec_argv, NULL);
2400 }
2401
2402 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
2403
2404 static bool perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
2405 {
2406         struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
2407
2408         if (IS_ERR(evsel))
2409                 return false;
2410
2411         if (perf_evsel__field(evsel, "pathname") == NULL) {
2412                 perf_evsel__delete(evsel);
2413                 return false;
2414         }
2415
2416         evsel->handler = trace__vfs_getname;
2417         perf_evlist__add(evlist, evsel);
2418         return true;
2419 }
2420
2421 static int perf_evlist__add_pgfault(struct perf_evlist *evlist,
2422                                     u64 config)
2423 {
2424         struct perf_evsel *evsel;
2425         struct perf_event_attr attr = {
2426                 .type = PERF_TYPE_SOFTWARE,
2427                 .mmap_data = 1,
2428         };
2429
2430         attr.config = config;
2431         attr.sample_period = 1;
2432
2433         event_attr_init(&attr);
2434
2435         evsel = perf_evsel__new(&attr);
2436         if (!evsel)
2437                 return -ENOMEM;
2438
2439         evsel->handler = trace__pgfault;
2440         perf_evlist__add(evlist, evsel);
2441
2442         return 0;
2443 }
2444
2445 static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample)
2446 {
2447         const u32 type = event->header.type;
2448         struct perf_evsel *evsel;
2449
2450         if (!trace->full_time && trace->base_time == 0)
2451                 trace->base_time = sample->time;
2452
2453         if (type != PERF_RECORD_SAMPLE) {
2454                 trace__process_event(trace, trace->host, event, sample);
2455                 return;
2456         }
2457
2458         evsel = perf_evlist__id2evsel(trace->evlist, sample->id);
2459         if (evsel == NULL) {
2460                 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample->id);
2461                 return;
2462         }
2463
2464         if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
2465             sample->raw_data == NULL) {
2466                 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
2467                        perf_evsel__name(evsel), sample->tid,
2468                        sample->cpu, sample->raw_size);
2469         } else {
2470                 tracepoint_handler handler = evsel->handler;
2471                 handler(trace, evsel, event, sample);
2472         }
2473 }
2474
2475 static int trace__add_syscall_newtp(struct trace *trace)
2476 {
2477         int ret = -1;
2478         struct perf_evlist *evlist = trace->evlist;
2479         struct perf_evsel *sys_enter, *sys_exit;
2480
2481         sys_enter = perf_evsel__syscall_newtp("sys_enter", trace__sys_enter);
2482         if (sys_enter == NULL)
2483                 goto out;
2484
2485         if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
2486                 goto out_delete_sys_enter;
2487
2488         sys_exit = perf_evsel__syscall_newtp("sys_exit", trace__sys_exit);
2489         if (sys_exit == NULL)
2490                 goto out_delete_sys_enter;
2491
2492         if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
2493                 goto out_delete_sys_exit;
2494
2495         perf_evlist__add(evlist, sys_enter);
2496         perf_evlist__add(evlist, sys_exit);
2497
2498         trace->syscalls.events.sys_enter = sys_enter;
2499         trace->syscalls.events.sys_exit  = sys_exit;
2500
2501         ret = 0;
2502 out:
2503         return ret;
2504
2505 out_delete_sys_exit:
2506         perf_evsel__delete_priv(sys_exit);
2507 out_delete_sys_enter:
2508         perf_evsel__delete_priv(sys_enter);
2509         goto out;
2510 }
2511
2512 static int trace__set_ev_qualifier_filter(struct trace *trace)
2513 {
2514         int err = -1;
2515         char *filter = asprintf_expr_inout_ints("id", !trace->not_ev_qualifier,
2516                                                 trace->ev_qualifier_ids.nr,
2517                                                 trace->ev_qualifier_ids.entries);
2518
2519         if (filter == NULL)
2520                 goto out_enomem;
2521
2522         if (!perf_evsel__append_filter(trace->syscalls.events.sys_enter, "&&", filter))
2523                 err = perf_evsel__append_filter(trace->syscalls.events.sys_exit, "&&", filter);
2524
2525         free(filter);
2526 out:
2527         return err;
2528 out_enomem:
2529         errno = ENOMEM;
2530         goto out;
2531 }
2532
2533 static int trace__run(struct trace *trace, int argc, const char **argv)
2534 {
2535         struct perf_evlist *evlist = trace->evlist;
2536         struct perf_evsel *evsel;
2537         int err = -1, i;
2538         unsigned long before;
2539         const bool forks = argc > 0;
2540         bool draining = false;
2541
2542         trace->live = true;
2543
2544         if (trace->trace_syscalls && trace__add_syscall_newtp(trace))
2545                 goto out_error_raw_syscalls;
2546
2547         if (trace->trace_syscalls)
2548                 trace->vfs_getname = perf_evlist__add_vfs_getname(evlist);
2549
2550         if ((trace->trace_pgfaults & TRACE_PFMAJ) &&
2551             perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MAJ)) {
2552                 goto out_error_mem;
2553         }
2554
2555         if ((trace->trace_pgfaults & TRACE_PFMIN) &&
2556             perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MIN))
2557                 goto out_error_mem;
2558
2559         if (trace->sched &&
2560             perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
2561                                    trace__sched_stat_runtime))
2562                 goto out_error_sched_stat_runtime;
2563
2564         err = perf_evlist__create_maps(evlist, &trace->opts.target);
2565         if (err < 0) {
2566                 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
2567                 goto out_delete_evlist;
2568         }
2569
2570         err = trace__symbols_init(trace, evlist);
2571         if (err < 0) {
2572                 fprintf(trace->output, "Problems initializing symbol libraries!\n");
2573                 goto out_delete_evlist;
2574         }
2575
2576         perf_evlist__config(evlist, &trace->opts);
2577
2578         signal(SIGCHLD, sig_handler);
2579         signal(SIGINT, sig_handler);
2580
2581         if (forks) {
2582                 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
2583                                                     argv, false, NULL);
2584                 if (err < 0) {
2585                         fprintf(trace->output, "Couldn't run the workload!\n");
2586                         goto out_delete_evlist;
2587                 }
2588         }
2589
2590         err = perf_evlist__open(evlist);
2591         if (err < 0)
2592                 goto out_error_open;
2593
2594         /*
2595          * Better not use !target__has_task() here because we need to cover the
2596          * case where no threads were specified in the command line, but a
2597          * workload was, and in that case we will fill in the thread_map when
2598          * we fork the workload in perf_evlist__prepare_workload.
2599          */
2600         if (trace->filter_pids.nr > 0)
2601                 err = perf_evlist__set_filter_pids(evlist, trace->filter_pids.nr, trace->filter_pids.entries);
2602         else if (thread_map__pid(evlist->threads, 0) == -1)
2603                 err = perf_evlist__set_filter_pid(evlist, getpid());
2604
2605         if (err < 0)
2606                 goto out_error_mem;
2607
2608         if (trace->ev_qualifier_ids.nr > 0) {
2609                 err = trace__set_ev_qualifier_filter(trace);
2610                 if (err < 0)
2611                         goto out_errno;
2612
2613                 pr_debug("event qualifier tracepoint filter: %s\n",
2614                          trace->syscalls.events.sys_exit->filter);
2615         }
2616
2617         err = perf_evlist__apply_filters(evlist, &evsel);
2618         if (err < 0)
2619                 goto out_error_apply_filters;
2620
2621         err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
2622         if (err < 0)
2623                 goto out_error_mmap;
2624
2625         if (!target__none(&trace->opts.target))
2626                 perf_evlist__enable(evlist);
2627
2628         if (forks)
2629                 perf_evlist__start_workload(evlist);
2630
2631         trace->multiple_threads = thread_map__pid(evlist->threads, 0) == -1 ||
2632                                   evlist->threads->nr > 1 ||
2633                                   perf_evlist__first(evlist)->attr.inherit;
2634 again:
2635         before = trace->nr_events;
2636
2637         for (i = 0; i < evlist->nr_mmaps; i++) {
2638                 union perf_event *event;
2639
2640                 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
2641                         struct perf_sample sample;
2642
2643                         ++trace->nr_events;
2644
2645                         err = perf_evlist__parse_sample(evlist, event, &sample);
2646                         if (err) {
2647                                 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
2648                                 goto next_event;
2649                         }
2650
2651                         trace__handle_event(trace, event, &sample);
2652 next_event:
2653                         perf_evlist__mmap_consume(evlist, i);
2654
2655                         if (interrupted)
2656                                 goto out_disable;
2657
2658                         if (done && !draining) {
2659                                 perf_evlist__disable(evlist);
2660                                 draining = true;
2661                         }
2662                 }
2663         }
2664
2665         if (trace->nr_events == before) {
2666                 int timeout = done ? 100 : -1;
2667
2668                 if (!draining && perf_evlist__poll(evlist, timeout) > 0) {
2669                         if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0)
2670                                 draining = true;
2671
2672                         goto again;
2673                 }
2674         } else {
2675                 goto again;
2676         }
2677
2678 out_disable:
2679         thread__zput(trace->current);
2680
2681         perf_evlist__disable(evlist);
2682
2683         if (!err) {
2684                 if (trace->summary)
2685                         trace__fprintf_thread_summary(trace, trace->output);
2686
2687                 if (trace->show_tool_stats) {
2688                         fprintf(trace->output, "Stats:\n "
2689                                                " vfs_getname : %" PRIu64 "\n"
2690                                                " proc_getname: %" PRIu64 "\n",
2691                                 trace->stats.vfs_getname,
2692                                 trace->stats.proc_getname);
2693                 }
2694         }
2695
2696 out_delete_evlist:
2697         perf_evlist__delete(evlist);
2698         trace->evlist = NULL;
2699         trace->live = false;
2700         return err;
2701 {
2702         char errbuf[BUFSIZ];
2703
2704 out_error_sched_stat_runtime:
2705         tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime");
2706         goto out_error;
2707
2708 out_error_raw_syscalls:
2709         tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)");
2710         goto out_error;
2711
2712 out_error_mmap:
2713         perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
2714         goto out_error;
2715
2716 out_error_open:
2717         perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2718
2719 out_error:
2720         fprintf(trace->output, "%s\n", errbuf);
2721         goto out_delete_evlist;
2722
2723 out_error_apply_filters:
2724         fprintf(trace->output,
2725                 "Failed to set filter \"%s\" on event %s with %d (%s)\n",
2726                 evsel->filter, perf_evsel__name(evsel), errno,
2727                 strerror_r(errno, errbuf, sizeof(errbuf)));
2728         goto out_delete_evlist;
2729 }
2730 out_error_mem:
2731         fprintf(trace->output, "Not enough memory to run!\n");
2732         goto out_delete_evlist;
2733
2734 out_errno:
2735         fprintf(trace->output, "errno=%d,%s\n", errno, strerror(errno));
2736         goto out_delete_evlist;
2737 }
2738
2739 static int trace__replay(struct trace *trace)
2740 {
2741         const struct perf_evsel_str_handler handlers[] = {
2742                 { "probe:vfs_getname",       trace__vfs_getname, },
2743         };
2744         struct perf_data_file file = {
2745                 .path  = input_name,
2746                 .mode  = PERF_DATA_MODE_READ,
2747                 .force = trace->force,
2748         };
2749         struct perf_session *session;
2750         struct perf_evsel *evsel;
2751         int err = -1;
2752
2753         trace->tool.sample        = trace__process_sample;
2754         trace->tool.mmap          = perf_event__process_mmap;
2755         trace->tool.mmap2         = perf_event__process_mmap2;
2756         trace->tool.comm          = perf_event__process_comm;
2757         trace->tool.exit          = perf_event__process_exit;
2758         trace->tool.fork          = perf_event__process_fork;
2759         trace->tool.attr          = perf_event__process_attr;
2760         trace->tool.tracing_data = perf_event__process_tracing_data;
2761         trace->tool.build_id      = perf_event__process_build_id;
2762
2763         trace->tool.ordered_events = true;
2764         trace->tool.ordering_requires_timestamps = true;
2765
2766         /* add tid to output */
2767         trace->multiple_threads = true;
2768
2769         session = perf_session__new(&file, false, &trace->tool);
2770         if (session == NULL)
2771                 return -1;
2772
2773         if (symbol__init(&session->header.env) < 0)
2774                 goto out;
2775
2776         trace->host = &session->machines.host;
2777
2778         err = perf_session__set_tracepoints_handlers(session, handlers);
2779         if (err)
2780                 goto out;
2781
2782         evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2783                                                      "raw_syscalls:sys_enter");
2784         /* older kernels have syscalls tp versus raw_syscalls */
2785         if (evsel == NULL)
2786                 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2787                                                              "syscalls:sys_enter");
2788
2789         if (evsel &&
2790             (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
2791             perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
2792                 pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2793                 goto out;
2794         }
2795
2796         evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2797                                                      "raw_syscalls:sys_exit");
2798         if (evsel == NULL)
2799                 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2800                                                              "syscalls:sys_exit");
2801         if (evsel &&
2802             (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
2803             perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
2804                 pr_err("Error during initialize raw_syscalls:sys_exit event\n");
2805                 goto out;
2806         }
2807
2808         evlist__for_each(session->evlist, evsel) {
2809                 if (evsel->attr.type == PERF_TYPE_SOFTWARE &&
2810                     (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
2811                      evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
2812                      evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS))
2813                         evsel->handler = trace__pgfault;
2814         }
2815
2816         err = parse_target_str(trace);
2817         if (err != 0)
2818                 goto out;
2819
2820         setup_pager();
2821
2822         err = perf_session__process_events(session);
2823         if (err)
2824                 pr_err("Failed to process events, error %d", err);
2825
2826         else if (trace->summary)
2827                 trace__fprintf_thread_summary(trace, trace->output);
2828
2829 out:
2830         perf_session__delete(session);
2831
2832         return err;
2833 }
2834
2835 static size_t trace__fprintf_threads_header(FILE *fp)
2836 {
2837         size_t printed;
2838
2839         printed  = fprintf(fp, "\n Summary of events:\n\n");
2840
2841         return printed;
2842 }
2843
2844 static size_t thread__dump_stats(struct thread_trace *ttrace,
2845                                  struct trace *trace, FILE *fp)
2846 {
2847         struct stats *stats;
2848         size_t printed = 0;
2849         struct syscall *sc;
2850         struct int_node *inode = intlist__first(ttrace->syscall_stats);
2851
2852         if (inode == NULL)
2853                 return 0;
2854
2855         printed += fprintf(fp, "\n");
2856
2857         printed += fprintf(fp, "   syscall            calls    total       min       avg       max      stddev\n");
2858         printed += fprintf(fp, "                               (msec)    (msec)    (msec)    (msec)        (%%)\n");
2859         printed += fprintf(fp, "   --------------- -------- --------- --------- --------- ---------     ------\n");
2860
2861         /* each int_node is a syscall */
2862         while (inode) {
2863                 stats = inode->priv;
2864                 if (stats) {
2865                         double min = (double)(stats->min) / NSEC_PER_MSEC;
2866                         double max = (double)(stats->max) / NSEC_PER_MSEC;
2867                         double avg = avg_stats(stats);
2868                         double pct;
2869                         u64 n = (u64) stats->n;
2870
2871                         pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2872                         avg /= NSEC_PER_MSEC;
2873
2874                         sc = &trace->syscalls.table[inode->i];
2875                         printed += fprintf(fp, "   %-15s", sc->name);
2876                         printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f %9.3f",
2877                                            n, avg * n, min, avg);
2878                         printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
2879                 }
2880
2881                 inode = intlist__next(inode);
2882         }
2883
2884         printed += fprintf(fp, "\n\n");
2885
2886         return printed;
2887 }
2888
2889 /* struct used to pass data to per-thread function */
2890 struct summary_data {
2891         FILE *fp;
2892         struct trace *trace;
2893         size_t printed;
2894 };
2895
2896 static int trace__fprintf_one_thread(struct thread *thread, void *priv)
2897 {
2898         struct summary_data *data = priv;
2899         FILE *fp = data->fp;
2900         size_t printed = data->printed;
2901         struct trace *trace = data->trace;
2902         struct thread_trace *ttrace = thread__priv(thread);
2903         double ratio;
2904
2905         if (ttrace == NULL)
2906                 return 0;
2907
2908         ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2909
2910         printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
2911         printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
2912         printed += fprintf(fp, "%.1f%%", ratio);
2913         if (ttrace->pfmaj)
2914                 printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj);
2915         if (ttrace->pfmin)
2916                 printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin);
2917         printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
2918         printed += thread__dump_stats(ttrace, trace, fp);
2919
2920         data->printed += printed;
2921
2922         return 0;
2923 }
2924
2925 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2926 {
2927         struct summary_data data = {
2928                 .fp = fp,
2929                 .trace = trace
2930         };
2931         data.printed = trace__fprintf_threads_header(fp);
2932
2933         machine__for_each_thread(trace->host, trace__fprintf_one_thread, &data);
2934
2935         return data.printed;
2936 }
2937
2938 static int trace__set_duration(const struct option *opt, const char *str,
2939                                int unset __maybe_unused)
2940 {
2941         struct trace *trace = opt->value;
2942
2943         trace->duration_filter = atof(str);
2944         return 0;
2945 }
2946
2947 static int trace__set_filter_pids(const struct option *opt, const char *str,
2948                                   int unset __maybe_unused)
2949 {
2950         int ret = -1;
2951         size_t i;
2952         struct trace *trace = opt->value;
2953         /*
2954          * FIXME: introduce a intarray class, plain parse csv and create a
2955          * { int nr, int entries[] } struct...
2956          */
2957         struct intlist *list = intlist__new(str);
2958
2959         if (list == NULL)
2960                 return -1;
2961
2962         i = trace->filter_pids.nr = intlist__nr_entries(list) + 1;
2963         trace->filter_pids.entries = calloc(i, sizeof(pid_t));
2964
2965         if (trace->filter_pids.entries == NULL)
2966                 goto out;
2967
2968         trace->filter_pids.entries[0] = getpid();
2969
2970         for (i = 1; i < trace->filter_pids.nr; ++i)
2971                 trace->filter_pids.entries[i] = intlist__entry(list, i - 1)->i;
2972
2973         intlist__delete(list);
2974         ret = 0;
2975 out:
2976         return ret;
2977 }
2978
2979 static int trace__open_output(struct trace *trace, const char *filename)
2980 {
2981         struct stat st;
2982
2983         if (!stat(filename, &st) && st.st_size) {
2984                 char oldname[PATH_MAX];
2985
2986                 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2987                 unlink(oldname);
2988                 rename(filename, oldname);
2989         }
2990
2991         trace->output = fopen(filename, "w");
2992
2993         return trace->output == NULL ? -errno : 0;
2994 }
2995
2996 static int parse_pagefaults(const struct option *opt, const char *str,
2997                             int unset __maybe_unused)
2998 {
2999         int *trace_pgfaults = opt->value;
3000
3001         if (strcmp(str, "all") == 0)
3002                 *trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN;
3003         else if (strcmp(str, "maj") == 0)
3004                 *trace_pgfaults |= TRACE_PFMAJ;
3005         else if (strcmp(str, "min") == 0)
3006                 *trace_pgfaults |= TRACE_PFMIN;
3007         else
3008                 return -1;
3009
3010         return 0;
3011 }
3012
3013 static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler)
3014 {
3015         struct perf_evsel *evsel;
3016
3017         evlist__for_each(evlist, evsel)
3018                 evsel->handler = handler;
3019 }
3020
3021 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
3022 {
3023         const char *trace_usage[] = {
3024                 "perf trace [<options>] [<command>]",
3025                 "perf trace [<options>] -- <command> [<options>]",
3026                 "perf trace record [<options>] [<command>]",
3027                 "perf trace record [<options>] -- <command> [<options>]",
3028                 NULL
3029         };
3030         struct trace trace = {
3031                 .audit = {
3032                         .machine = audit_detect_machine(),
3033                         .open_id = audit_name_to_syscall("open", trace.audit.machine),
3034                 },
3035                 .syscalls = {
3036                         . max = -1,
3037                 },
3038                 .opts = {
3039                         .target = {
3040                                 .uid       = UINT_MAX,
3041                                 .uses_mmap = true,
3042                         },
3043                         .user_freq     = UINT_MAX,
3044                         .user_interval = ULLONG_MAX,
3045                         .no_buffering  = true,
3046                         .mmap_pages    = UINT_MAX,
3047                         .proc_map_timeout  = 500,
3048                 },
3049                 .output = stderr,
3050                 .show_comm = true,
3051                 .trace_syscalls = true,
3052         };
3053         const char *output_name = NULL;
3054         const char *ev_qualifier_str = NULL;
3055         const struct option trace_options[] = {
3056         OPT_CALLBACK(0, "event", &trace.evlist, "event",
3057                      "event selector. use 'perf list' to list available events",
3058                      parse_events_option),
3059         OPT_BOOLEAN(0, "comm", &trace.show_comm,
3060                     "show the thread COMM next to its id"),
3061         OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
3062         OPT_STRING('e', "expr", &ev_qualifier_str, "expr", "list of syscalls to trace"),
3063         OPT_STRING('o', "output", &output_name, "file", "output file name"),
3064         OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
3065         OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
3066                     "trace events on existing process id"),
3067         OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
3068                     "trace events on existing thread id"),
3069         OPT_CALLBACK(0, "filter-pids", &trace, "CSV list of pids",
3070                      "pids to filter (by the kernel)", trace__set_filter_pids),
3071         OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
3072                     "system-wide collection from all CPUs"),
3073         OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
3074                     "list of cpus to monitor"),
3075         OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
3076                     "child tasks do not inherit counters"),
3077         OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
3078                      "number of mmap data pages",
3079                      perf_evlist__parse_mmap_pages),
3080         OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
3081                    "user to profile"),
3082         OPT_CALLBACK(0, "duration", &trace, "float",
3083                      "show only events with duration > N.M ms",
3084                      trace__set_duration),
3085         OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
3086         OPT_INCR('v', "verbose", &verbose, "be more verbose"),
3087         OPT_BOOLEAN('T', "time", &trace.full_time,
3088                     "Show full timestamp, not time relative to first start"),
3089         OPT_BOOLEAN('s', "summary", &trace.summary_only,
3090                     "Show only syscall summary with statistics"),
3091         OPT_BOOLEAN('S', "with-summary", &trace.summary,
3092                     "Show all syscalls and summary with statistics"),
3093         OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
3094                      "Trace pagefaults", parse_pagefaults, "maj"),
3095         OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
3096         OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"),
3097         OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout,
3098                         "per thread proc mmap processing timeout in ms"),
3099         OPT_END()
3100         };
3101         const char * const trace_subcommands[] = { "record", NULL };
3102         int err;
3103         char bf[BUFSIZ];
3104
3105         signal(SIGSEGV, sighandler_dump_stack);
3106         signal(SIGFPE, sighandler_dump_stack);
3107
3108         trace.evlist = perf_evlist__new();
3109
3110         if (trace.evlist == NULL) {
3111                 pr_err("Not enough memory to run!\n");
3112                 err = -ENOMEM;
3113                 goto out;
3114         }
3115
3116         argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands,
3117                                  trace_usage, PARSE_OPT_STOP_AT_NON_OPTION);
3118
3119         if (trace.trace_pgfaults) {
3120                 trace.opts.sample_address = true;
3121                 trace.opts.sample_time = true;
3122         }
3123
3124         if (trace.evlist->nr_entries > 0)
3125                 evlist__set_evsel_handler(trace.evlist, trace__event_handler);
3126
3127         if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
3128                 return trace__record(&trace, argc-1, &argv[1]);
3129
3130         /* summary_only implies summary option, but don't overwrite summary if set */
3131         if (trace.summary_only)
3132                 trace.summary = trace.summary_only;
3133
3134         if (!trace.trace_syscalls && !trace.trace_pgfaults &&
3135             trace.evlist->nr_entries == 0 /* Was --events used? */) {
3136                 pr_err("Please specify something to trace.\n");
3137                 return -1;
3138         }
3139
3140         if (output_name != NULL) {
3141                 err = trace__open_output(&trace, output_name);
3142                 if (err < 0) {
3143                         perror("failed to create output file");
3144                         goto out;
3145                 }
3146         }
3147
3148         if (ev_qualifier_str != NULL) {
3149                 const char *s = ev_qualifier_str;
3150                 struct strlist_config slist_config = {
3151                         .dirname = system_path(STRACE_GROUPS_DIR),
3152                 };
3153
3154                 trace.not_ev_qualifier = *s == '!';
3155                 if (trace.not_ev_qualifier)
3156                         ++s;
3157                 trace.ev_qualifier = strlist__new(s, &slist_config);
3158                 if (trace.ev_qualifier == NULL) {
3159                         fputs("Not enough memory to parse event qualifier",
3160                               trace.output);
3161                         err = -ENOMEM;
3162                         goto out_close;
3163                 }
3164
3165                 err = trace__validate_ev_qualifier(&trace);
3166                 if (err)
3167                         goto out_close;
3168         }
3169
3170         err = target__validate(&trace.opts.target);
3171         if (err) {
3172                 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
3173                 fprintf(trace.output, "%s", bf);
3174                 goto out_close;
3175         }
3176
3177         err = target__parse_uid(&trace.opts.target);
3178         if (err) {
3179                 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
3180                 fprintf(trace.output, "%s", bf);
3181                 goto out_close;
3182         }
3183
3184         if (!argc && target__none(&trace.opts.target))
3185                 trace.opts.target.system_wide = true;
3186
3187         if (input_name)
3188                 err = trace__replay(&trace);
3189         else
3190                 err = trace__run(&trace, argc, argv);
3191
3192 out_close:
3193         if (output_name != NULL)
3194                 fclose(trace.output);
3195 out:
3196         return err;
3197 }