OSDN Git Service

004f5f99e943925607110f8df145060230c9d185
[tomoyo/tomoyo-test1.git] / kernel / trace / trace.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/irqflags.h>
23 #include <linux/debugfs.h>
24 #include <linux/tracefs.h>
25 #include <linux/pagemap.h>
26 #include <linux/hardirq.h>
27 #include <linux/linkage.h>
28 #include <linux/uaccess.h>
29 #include <linux/vmalloc.h>
30 #include <linux/ftrace.h>
31 #include <linux/module.h>
32 #include <linux/percpu.h>
33 #include <linux/splice.h>
34 #include <linux/kdebug.h>
35 #include <linux/string.h>
36 #include <linux/mount.h>
37 #include <linux/rwsem.h>
38 #include <linux/slab.h>
39 #include <linux/ctype.h>
40 #include <linux/init.h>
41 #include <linux/panic_notifier.h>
42 #include <linux/poll.h>
43 #include <linux/nmi.h>
44 #include <linux/fs.h>
45 #include <linux/trace.h>
46 #include <linux/sched/clock.h>
47 #include <linux/sched/rt.h>
48 #include <linux/fsnotify.h>
49 #include <linux/irq_work.h>
50 #include <linux/workqueue.h>
51
52 #include <asm/setup.h> /* COMMAND_LINE_SIZE */
53
54 #include "trace.h"
55 #include "trace_output.h"
56
57 /*
58  * On boot up, the ring buffer is set to the minimum size, so that
59  * we do not waste memory on systems that are not using tracing.
60  */
61 bool ring_buffer_expanded;
62
63 #ifdef CONFIG_FTRACE_STARTUP_TEST
64 /*
65  * We need to change this state when a selftest is running.
66  * A selftest will lurk into the ring-buffer to count the
67  * entries inserted during the selftest although some concurrent
68  * insertions into the ring-buffer such as trace_printk could occurred
69  * at the same time, giving false positive or negative results.
70  */
71 static bool __read_mostly tracing_selftest_running;
72
73 /*
74  * If boot-time tracing including tracers/events via kernel cmdline
75  * is running, we do not want to run SELFTEST.
76  */
77 bool __read_mostly tracing_selftest_disabled;
78
79 void __init disable_tracing_selftest(const char *reason)
80 {
81         if (!tracing_selftest_disabled) {
82                 tracing_selftest_disabled = true;
83                 pr_info("Ftrace startup test is disabled due to %s\n", reason);
84         }
85 }
86 #else
87 #define tracing_selftest_running        0
88 #define tracing_selftest_disabled       0
89 #endif
90
91 /* Pipe tracepoints to printk */
92 static struct trace_iterator *tracepoint_print_iter;
93 int tracepoint_printk;
94 static bool tracepoint_printk_stop_on_boot __initdata;
95 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
96
97 /* For tracers that don't implement custom flags */
98 static struct tracer_opt dummy_tracer_opt[] = {
99         { }
100 };
101
102 static int
103 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
104 {
105         return 0;
106 }
107
108 /*
109  * To prevent the comm cache from being overwritten when no
110  * tracing is active, only save the comm when a trace event
111  * occurred.
112  */
113 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
114
115 /*
116  * Kill all tracing for good (never come back).
117  * It is initialized to 1 but will turn to zero if the initialization
118  * of the tracer is successful. But that is the only place that sets
119  * this back to zero.
120  */
121 static int tracing_disabled = 1;
122
123 cpumask_var_t __read_mostly     tracing_buffer_mask;
124
125 /*
126  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
127  *
128  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
129  * is set, then ftrace_dump is called. This will output the contents
130  * of the ftrace buffers to the console.  This is very useful for
131  * capturing traces that lead to crashes and outputing it to a
132  * serial console.
133  *
134  * It is default off, but you can enable it with either specifying
135  * "ftrace_dump_on_oops" in the kernel command line, or setting
136  * /proc/sys/kernel/ftrace_dump_on_oops
137  * Set 1 if you want to dump buffers of all CPUs
138  * Set 2 if you want to dump the buffer of the CPU that triggered oops
139  */
140
141 enum ftrace_dump_mode ftrace_dump_on_oops;
142
143 /* When set, tracing will stop when a WARN*() is hit */
144 int __disable_trace_on_warning;
145
146 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
147 /* Map of enums to their values, for "eval_map" file */
148 struct trace_eval_map_head {
149         struct module                   *mod;
150         unsigned long                   length;
151 };
152
153 union trace_eval_map_item;
154
155 struct trace_eval_map_tail {
156         /*
157          * "end" is first and points to NULL as it must be different
158          * than "mod" or "eval_string"
159          */
160         union trace_eval_map_item       *next;
161         const char                      *end;   /* points to NULL */
162 };
163
164 static DEFINE_MUTEX(trace_eval_mutex);
165
166 /*
167  * The trace_eval_maps are saved in an array with two extra elements,
168  * one at the beginning, and one at the end. The beginning item contains
169  * the count of the saved maps (head.length), and the module they
170  * belong to if not built in (head.mod). The ending item contains a
171  * pointer to the next array of saved eval_map items.
172  */
173 union trace_eval_map_item {
174         struct trace_eval_map           map;
175         struct trace_eval_map_head      head;
176         struct trace_eval_map_tail      tail;
177 };
178
179 static union trace_eval_map_item *trace_eval_maps;
180 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
181
182 int tracing_set_tracer(struct trace_array *tr, const char *buf);
183 static void ftrace_trace_userstack(struct trace_array *tr,
184                                    struct trace_buffer *buffer,
185                                    unsigned int trace_ctx);
186
187 #define MAX_TRACER_SIZE         100
188 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
189 static char *default_bootup_tracer;
190
191 static bool allocate_snapshot;
192 static bool snapshot_at_boot;
193
194 static char boot_instance_info[COMMAND_LINE_SIZE] __initdata;
195 static int boot_instance_index;
196
197 static char boot_snapshot_info[COMMAND_LINE_SIZE] __initdata;
198 static int boot_snapshot_index;
199
200 static int __init set_cmdline_ftrace(char *str)
201 {
202         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
203         default_bootup_tracer = bootup_tracer_buf;
204         /* We are using ftrace early, expand it */
205         ring_buffer_expanded = true;
206         return 1;
207 }
208 __setup("ftrace=", set_cmdline_ftrace);
209
210 static int __init set_ftrace_dump_on_oops(char *str)
211 {
212         if (*str++ != '=' || !*str || !strcmp("1", str)) {
213                 ftrace_dump_on_oops = DUMP_ALL;
214                 return 1;
215         }
216
217         if (!strcmp("orig_cpu", str) || !strcmp("2", str)) {
218                 ftrace_dump_on_oops = DUMP_ORIG;
219                 return 1;
220         }
221
222         return 0;
223 }
224 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
225
226 static int __init stop_trace_on_warning(char *str)
227 {
228         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
229                 __disable_trace_on_warning = 1;
230         return 1;
231 }
232 __setup("traceoff_on_warning", stop_trace_on_warning);
233
234 static int __init boot_alloc_snapshot(char *str)
235 {
236         char *slot = boot_snapshot_info + boot_snapshot_index;
237         int left = sizeof(boot_snapshot_info) - boot_snapshot_index;
238         int ret;
239
240         if (str[0] == '=') {
241                 str++;
242                 if (strlen(str) >= left)
243                         return -1;
244
245                 ret = snprintf(slot, left, "%s\t", str);
246                 boot_snapshot_index += ret;
247         } else {
248                 allocate_snapshot = true;
249                 /* We also need the main ring buffer expanded */
250                 ring_buffer_expanded = true;
251         }
252         return 1;
253 }
254 __setup("alloc_snapshot", boot_alloc_snapshot);
255
256
257 static int __init boot_snapshot(char *str)
258 {
259         snapshot_at_boot = true;
260         boot_alloc_snapshot(str);
261         return 1;
262 }
263 __setup("ftrace_boot_snapshot", boot_snapshot);
264
265
266 static int __init boot_instance(char *str)
267 {
268         char *slot = boot_instance_info + boot_instance_index;
269         int left = sizeof(boot_instance_info) - boot_instance_index;
270         int ret;
271
272         if (strlen(str) >= left)
273                 return -1;
274
275         ret = snprintf(slot, left, "%s\t", str);
276         boot_instance_index += ret;
277
278         return 1;
279 }
280 __setup("trace_instance=", boot_instance);
281
282
283 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
284
285 static int __init set_trace_boot_options(char *str)
286 {
287         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
288         return 1;
289 }
290 __setup("trace_options=", set_trace_boot_options);
291
292 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
293 static char *trace_boot_clock __initdata;
294
295 static int __init set_trace_boot_clock(char *str)
296 {
297         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
298         trace_boot_clock = trace_boot_clock_buf;
299         return 1;
300 }
301 __setup("trace_clock=", set_trace_boot_clock);
302
303 static int __init set_tracepoint_printk(char *str)
304 {
305         /* Ignore the "tp_printk_stop_on_boot" param */
306         if (*str == '_')
307                 return 0;
308
309         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
310                 tracepoint_printk = 1;
311         return 1;
312 }
313 __setup("tp_printk", set_tracepoint_printk);
314
315 static int __init set_tracepoint_printk_stop(char *str)
316 {
317         tracepoint_printk_stop_on_boot = true;
318         return 1;
319 }
320 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
321
322 unsigned long long ns2usecs(u64 nsec)
323 {
324         nsec += 500;
325         do_div(nsec, 1000);
326         return nsec;
327 }
328
329 static void
330 trace_process_export(struct trace_export *export,
331                struct ring_buffer_event *event, int flag)
332 {
333         struct trace_entry *entry;
334         unsigned int size = 0;
335
336         if (export->flags & flag) {
337                 entry = ring_buffer_event_data(event);
338                 size = ring_buffer_event_length(event);
339                 export->write(export, entry, size);
340         }
341 }
342
343 static DEFINE_MUTEX(ftrace_export_lock);
344
345 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
346
347 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
348 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
349 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
350
351 static inline void ftrace_exports_enable(struct trace_export *export)
352 {
353         if (export->flags & TRACE_EXPORT_FUNCTION)
354                 static_branch_inc(&trace_function_exports_enabled);
355
356         if (export->flags & TRACE_EXPORT_EVENT)
357                 static_branch_inc(&trace_event_exports_enabled);
358
359         if (export->flags & TRACE_EXPORT_MARKER)
360                 static_branch_inc(&trace_marker_exports_enabled);
361 }
362
363 static inline void ftrace_exports_disable(struct trace_export *export)
364 {
365         if (export->flags & TRACE_EXPORT_FUNCTION)
366                 static_branch_dec(&trace_function_exports_enabled);
367
368         if (export->flags & TRACE_EXPORT_EVENT)
369                 static_branch_dec(&trace_event_exports_enabled);
370
371         if (export->flags & TRACE_EXPORT_MARKER)
372                 static_branch_dec(&trace_marker_exports_enabled);
373 }
374
375 static void ftrace_exports(struct ring_buffer_event *event, int flag)
376 {
377         struct trace_export *export;
378
379         preempt_disable_notrace();
380
381         export = rcu_dereference_raw_check(ftrace_exports_list);
382         while (export) {
383                 trace_process_export(export, event, flag);
384                 export = rcu_dereference_raw_check(export->next);
385         }
386
387         preempt_enable_notrace();
388 }
389
390 static inline void
391 add_trace_export(struct trace_export **list, struct trace_export *export)
392 {
393         rcu_assign_pointer(export->next, *list);
394         /*
395          * We are entering export into the list but another
396          * CPU might be walking that list. We need to make sure
397          * the export->next pointer is valid before another CPU sees
398          * the export pointer included into the list.
399          */
400         rcu_assign_pointer(*list, export);
401 }
402
403 static inline int
404 rm_trace_export(struct trace_export **list, struct trace_export *export)
405 {
406         struct trace_export **p;
407
408         for (p = list; *p != NULL; p = &(*p)->next)
409                 if (*p == export)
410                         break;
411
412         if (*p != export)
413                 return -1;
414
415         rcu_assign_pointer(*p, (*p)->next);
416
417         return 0;
418 }
419
420 static inline void
421 add_ftrace_export(struct trace_export **list, struct trace_export *export)
422 {
423         ftrace_exports_enable(export);
424
425         add_trace_export(list, export);
426 }
427
428 static inline int
429 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
430 {
431         int ret;
432
433         ret = rm_trace_export(list, export);
434         ftrace_exports_disable(export);
435
436         return ret;
437 }
438
439 int register_ftrace_export(struct trace_export *export)
440 {
441         if (WARN_ON_ONCE(!export->write))
442                 return -1;
443
444         mutex_lock(&ftrace_export_lock);
445
446         add_ftrace_export(&ftrace_exports_list, export);
447
448         mutex_unlock(&ftrace_export_lock);
449
450         return 0;
451 }
452 EXPORT_SYMBOL_GPL(register_ftrace_export);
453
454 int unregister_ftrace_export(struct trace_export *export)
455 {
456         int ret;
457
458         mutex_lock(&ftrace_export_lock);
459
460         ret = rm_ftrace_export(&ftrace_exports_list, export);
461
462         mutex_unlock(&ftrace_export_lock);
463
464         return ret;
465 }
466 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
467
468 /* trace_flags holds trace_options default values */
469 #define TRACE_DEFAULT_FLAGS                                             \
470         (FUNCTION_DEFAULT_FLAGS |                                       \
471          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
472          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
473          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
474          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS |                     \
475          TRACE_ITER_HASH_PTR)
476
477 /* trace_options that are only supported by global_trace */
478 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
479                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
480
481 /* trace_flags that are default zero for instances */
482 #define ZEROED_TRACE_FLAGS \
483         (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
484
485 /*
486  * The global_trace is the descriptor that holds the top-level tracing
487  * buffers for the live tracing.
488  */
489 static struct trace_array global_trace = {
490         .trace_flags = TRACE_DEFAULT_FLAGS,
491 };
492
493 LIST_HEAD(ftrace_trace_arrays);
494
495 int trace_array_get(struct trace_array *this_tr)
496 {
497         struct trace_array *tr;
498         int ret = -ENODEV;
499
500         mutex_lock(&trace_types_lock);
501         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
502                 if (tr == this_tr) {
503                         tr->ref++;
504                         ret = 0;
505                         break;
506                 }
507         }
508         mutex_unlock(&trace_types_lock);
509
510         return ret;
511 }
512
513 static void __trace_array_put(struct trace_array *this_tr)
514 {
515         WARN_ON(!this_tr->ref);
516         this_tr->ref--;
517 }
518
519 /**
520  * trace_array_put - Decrement the reference counter for this trace array.
521  * @this_tr : pointer to the trace array
522  *
523  * NOTE: Use this when we no longer need the trace array returned by
524  * trace_array_get_by_name(). This ensures the trace array can be later
525  * destroyed.
526  *
527  */
528 void trace_array_put(struct trace_array *this_tr)
529 {
530         if (!this_tr)
531                 return;
532
533         mutex_lock(&trace_types_lock);
534         __trace_array_put(this_tr);
535         mutex_unlock(&trace_types_lock);
536 }
537 EXPORT_SYMBOL_GPL(trace_array_put);
538
539 int tracing_check_open_get_tr(struct trace_array *tr)
540 {
541         int ret;
542
543         ret = security_locked_down(LOCKDOWN_TRACEFS);
544         if (ret)
545                 return ret;
546
547         if (tracing_disabled)
548                 return -ENODEV;
549
550         if (tr && trace_array_get(tr) < 0)
551                 return -ENODEV;
552
553         return 0;
554 }
555
556 int call_filter_check_discard(struct trace_event_call *call, void *rec,
557                               struct trace_buffer *buffer,
558                               struct ring_buffer_event *event)
559 {
560         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
561             !filter_match_preds(call->filter, rec)) {
562                 __trace_event_discard_commit(buffer, event);
563                 return 1;
564         }
565
566         return 0;
567 }
568
569 /**
570  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
571  * @filtered_pids: The list of pids to check
572  * @search_pid: The PID to find in @filtered_pids
573  *
574  * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
575  */
576 bool
577 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
578 {
579         return trace_pid_list_is_set(filtered_pids, search_pid);
580 }
581
582 /**
583  * trace_ignore_this_task - should a task be ignored for tracing
584  * @filtered_pids: The list of pids to check
585  * @filtered_no_pids: The list of pids not to be traced
586  * @task: The task that should be ignored if not filtered
587  *
588  * Checks if @task should be traced or not from @filtered_pids.
589  * Returns true if @task should *NOT* be traced.
590  * Returns false if @task should be traced.
591  */
592 bool
593 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
594                        struct trace_pid_list *filtered_no_pids,
595                        struct task_struct *task)
596 {
597         /*
598          * If filtered_no_pids is not empty, and the task's pid is listed
599          * in filtered_no_pids, then return true.
600          * Otherwise, if filtered_pids is empty, that means we can
601          * trace all tasks. If it has content, then only trace pids
602          * within filtered_pids.
603          */
604
605         return (filtered_pids &&
606                 !trace_find_filtered_pid(filtered_pids, task->pid)) ||
607                 (filtered_no_pids &&
608                  trace_find_filtered_pid(filtered_no_pids, task->pid));
609 }
610
611 /**
612  * trace_filter_add_remove_task - Add or remove a task from a pid_list
613  * @pid_list: The list to modify
614  * @self: The current task for fork or NULL for exit
615  * @task: The task to add or remove
616  *
617  * If adding a task, if @self is defined, the task is only added if @self
618  * is also included in @pid_list. This happens on fork and tasks should
619  * only be added when the parent is listed. If @self is NULL, then the
620  * @task pid will be removed from the list, which would happen on exit
621  * of a task.
622  */
623 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
624                                   struct task_struct *self,
625                                   struct task_struct *task)
626 {
627         if (!pid_list)
628                 return;
629
630         /* For forks, we only add if the forking task is listed */
631         if (self) {
632                 if (!trace_find_filtered_pid(pid_list, self->pid))
633                         return;
634         }
635
636         /* "self" is set for forks, and NULL for exits */
637         if (self)
638                 trace_pid_list_set(pid_list, task->pid);
639         else
640                 trace_pid_list_clear(pid_list, task->pid);
641 }
642
643 /**
644  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
645  * @pid_list: The pid list to show
646  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
647  * @pos: The position of the file
648  *
649  * This is used by the seq_file "next" operation to iterate the pids
650  * listed in a trace_pid_list structure.
651  *
652  * Returns the pid+1 as we want to display pid of zero, but NULL would
653  * stop the iteration.
654  */
655 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
656 {
657         long pid = (unsigned long)v;
658         unsigned int next;
659
660         (*pos)++;
661
662         /* pid already is +1 of the actual previous bit */
663         if (trace_pid_list_next(pid_list, pid, &next) < 0)
664                 return NULL;
665
666         pid = next;
667
668         /* Return pid + 1 to allow zero to be represented */
669         return (void *)(pid + 1);
670 }
671
672 /**
673  * trace_pid_start - Used for seq_file to start reading pid lists
674  * @pid_list: The pid list to show
675  * @pos: The position of the file
676  *
677  * This is used by seq_file "start" operation to start the iteration
678  * of listing pids.
679  *
680  * Returns the pid+1 as we want to display pid of zero, but NULL would
681  * stop the iteration.
682  */
683 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
684 {
685         unsigned long pid;
686         unsigned int first;
687         loff_t l = 0;
688
689         if (trace_pid_list_first(pid_list, &first) < 0)
690                 return NULL;
691
692         pid = first;
693
694         /* Return pid + 1 so that zero can be the exit value */
695         for (pid++; pid && l < *pos;
696              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
697                 ;
698         return (void *)pid;
699 }
700
701 /**
702  * trace_pid_show - show the current pid in seq_file processing
703  * @m: The seq_file structure to write into
704  * @v: A void pointer of the pid (+1) value to display
705  *
706  * Can be directly used by seq_file operations to display the current
707  * pid value.
708  */
709 int trace_pid_show(struct seq_file *m, void *v)
710 {
711         unsigned long pid = (unsigned long)v - 1;
712
713         seq_printf(m, "%lu\n", pid);
714         return 0;
715 }
716
717 /* 128 should be much more than enough */
718 #define PID_BUF_SIZE            127
719
720 int trace_pid_write(struct trace_pid_list *filtered_pids,
721                     struct trace_pid_list **new_pid_list,
722                     const char __user *ubuf, size_t cnt)
723 {
724         struct trace_pid_list *pid_list;
725         struct trace_parser parser;
726         unsigned long val;
727         int nr_pids = 0;
728         ssize_t read = 0;
729         ssize_t ret;
730         loff_t pos;
731         pid_t pid;
732
733         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
734                 return -ENOMEM;
735
736         /*
737          * Always recreate a new array. The write is an all or nothing
738          * operation. Always create a new array when adding new pids by
739          * the user. If the operation fails, then the current list is
740          * not modified.
741          */
742         pid_list = trace_pid_list_alloc();
743         if (!pid_list) {
744                 trace_parser_put(&parser);
745                 return -ENOMEM;
746         }
747
748         if (filtered_pids) {
749                 /* copy the current bits to the new max */
750                 ret = trace_pid_list_first(filtered_pids, &pid);
751                 while (!ret) {
752                         trace_pid_list_set(pid_list, pid);
753                         ret = trace_pid_list_next(filtered_pids, pid + 1, &pid);
754                         nr_pids++;
755                 }
756         }
757
758         ret = 0;
759         while (cnt > 0) {
760
761                 pos = 0;
762
763                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
764                 if (ret < 0)
765                         break;
766
767                 read += ret;
768                 ubuf += ret;
769                 cnt -= ret;
770
771                 if (!trace_parser_loaded(&parser))
772                         break;
773
774                 ret = -EINVAL;
775                 if (kstrtoul(parser.buffer, 0, &val))
776                         break;
777
778                 pid = (pid_t)val;
779
780                 if (trace_pid_list_set(pid_list, pid) < 0) {
781                         ret = -1;
782                         break;
783                 }
784                 nr_pids++;
785
786                 trace_parser_clear(&parser);
787                 ret = 0;
788         }
789         trace_parser_put(&parser);
790
791         if (ret < 0) {
792                 trace_pid_list_free(pid_list);
793                 return ret;
794         }
795
796         if (!nr_pids) {
797                 /* Cleared the list of pids */
798                 trace_pid_list_free(pid_list);
799                 pid_list = NULL;
800         }
801
802         *new_pid_list = pid_list;
803
804         return read;
805 }
806
807 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
808 {
809         u64 ts;
810
811         /* Early boot up does not have a buffer yet */
812         if (!buf->buffer)
813                 return trace_clock_local();
814
815         ts = ring_buffer_time_stamp(buf->buffer);
816         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
817
818         return ts;
819 }
820
821 u64 ftrace_now(int cpu)
822 {
823         return buffer_ftrace_now(&global_trace.array_buffer, cpu);
824 }
825
826 /**
827  * tracing_is_enabled - Show if global_trace has been enabled
828  *
829  * Shows if the global trace has been enabled or not. It uses the
830  * mirror flag "buffer_disabled" to be used in fast paths such as for
831  * the irqsoff tracer. But it may be inaccurate due to races. If you
832  * need to know the accurate state, use tracing_is_on() which is a little
833  * slower, but accurate.
834  */
835 int tracing_is_enabled(void)
836 {
837         /*
838          * For quick access (irqsoff uses this in fast path), just
839          * return the mirror variable of the state of the ring buffer.
840          * It's a little racy, but we don't really care.
841          */
842         smp_rmb();
843         return !global_trace.buffer_disabled;
844 }
845
846 /*
847  * trace_buf_size is the size in bytes that is allocated
848  * for a buffer. Note, the number of bytes is always rounded
849  * to page size.
850  *
851  * This number is purposely set to a low number of 16384.
852  * If the dump on oops happens, it will be much appreciated
853  * to not have to wait for all that output. Anyway this can be
854  * boot time and run time configurable.
855  */
856 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
857
858 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
859
860 /* trace_types holds a link list of available tracers. */
861 static struct tracer            *trace_types __read_mostly;
862
863 /*
864  * trace_types_lock is used to protect the trace_types list.
865  */
866 DEFINE_MUTEX(trace_types_lock);
867
868 /*
869  * serialize the access of the ring buffer
870  *
871  * ring buffer serializes readers, but it is low level protection.
872  * The validity of the events (which returns by ring_buffer_peek() ..etc)
873  * are not protected by ring buffer.
874  *
875  * The content of events may become garbage if we allow other process consumes
876  * these events concurrently:
877  *   A) the page of the consumed events may become a normal page
878  *      (not reader page) in ring buffer, and this page will be rewritten
879  *      by events producer.
880  *   B) The page of the consumed events may become a page for splice_read,
881  *      and this page will be returned to system.
882  *
883  * These primitives allow multi process access to different cpu ring buffer
884  * concurrently.
885  *
886  * These primitives don't distinguish read-only and read-consume access.
887  * Multi read-only access are also serialized.
888  */
889
890 #ifdef CONFIG_SMP
891 static DECLARE_RWSEM(all_cpu_access_lock);
892 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
893
894 static inline void trace_access_lock(int cpu)
895 {
896         if (cpu == RING_BUFFER_ALL_CPUS) {
897                 /* gain it for accessing the whole ring buffer. */
898                 down_write(&all_cpu_access_lock);
899         } else {
900                 /* gain it for accessing a cpu ring buffer. */
901
902                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
903                 down_read(&all_cpu_access_lock);
904
905                 /* Secondly block other access to this @cpu ring buffer. */
906                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
907         }
908 }
909
910 static inline void trace_access_unlock(int cpu)
911 {
912         if (cpu == RING_BUFFER_ALL_CPUS) {
913                 up_write(&all_cpu_access_lock);
914         } else {
915                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
916                 up_read(&all_cpu_access_lock);
917         }
918 }
919
920 static inline void trace_access_lock_init(void)
921 {
922         int cpu;
923
924         for_each_possible_cpu(cpu)
925                 mutex_init(&per_cpu(cpu_access_lock, cpu));
926 }
927
928 #else
929
930 static DEFINE_MUTEX(access_lock);
931
932 static inline void trace_access_lock(int cpu)
933 {
934         (void)cpu;
935         mutex_lock(&access_lock);
936 }
937
938 static inline void trace_access_unlock(int cpu)
939 {
940         (void)cpu;
941         mutex_unlock(&access_lock);
942 }
943
944 static inline void trace_access_lock_init(void)
945 {
946 }
947
948 #endif
949
950 #ifdef CONFIG_STACKTRACE
951 static void __ftrace_trace_stack(struct trace_buffer *buffer,
952                                  unsigned int trace_ctx,
953                                  int skip, struct pt_regs *regs);
954 static inline void ftrace_trace_stack(struct trace_array *tr,
955                                       struct trace_buffer *buffer,
956                                       unsigned int trace_ctx,
957                                       int skip, struct pt_regs *regs);
958
959 #else
960 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
961                                         unsigned int trace_ctx,
962                                         int skip, struct pt_regs *regs)
963 {
964 }
965 static inline void ftrace_trace_stack(struct trace_array *tr,
966                                       struct trace_buffer *buffer,
967                                       unsigned long trace_ctx,
968                                       int skip, struct pt_regs *regs)
969 {
970 }
971
972 #endif
973
974 static __always_inline void
975 trace_event_setup(struct ring_buffer_event *event,
976                   int type, unsigned int trace_ctx)
977 {
978         struct trace_entry *ent = ring_buffer_event_data(event);
979
980         tracing_generic_entry_update(ent, type, trace_ctx);
981 }
982
983 static __always_inline struct ring_buffer_event *
984 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
985                           int type,
986                           unsigned long len,
987                           unsigned int trace_ctx)
988 {
989         struct ring_buffer_event *event;
990
991         event = ring_buffer_lock_reserve(buffer, len);
992         if (event != NULL)
993                 trace_event_setup(event, type, trace_ctx);
994
995         return event;
996 }
997
998 void tracer_tracing_on(struct trace_array *tr)
999 {
1000         if (tr->array_buffer.buffer)
1001                 ring_buffer_record_on(tr->array_buffer.buffer);
1002         /*
1003          * This flag is looked at when buffers haven't been allocated
1004          * yet, or by some tracers (like irqsoff), that just want to
1005          * know if the ring buffer has been disabled, but it can handle
1006          * races of where it gets disabled but we still do a record.
1007          * As the check is in the fast path of the tracers, it is more
1008          * important to be fast than accurate.
1009          */
1010         tr->buffer_disabled = 0;
1011         /* Make the flag seen by readers */
1012         smp_wmb();
1013 }
1014
1015 /**
1016  * tracing_on - enable tracing buffers
1017  *
1018  * This function enables tracing buffers that may have been
1019  * disabled with tracing_off.
1020  */
1021 void tracing_on(void)
1022 {
1023         tracer_tracing_on(&global_trace);
1024 }
1025 EXPORT_SYMBOL_GPL(tracing_on);
1026
1027
1028 static __always_inline void
1029 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
1030 {
1031         __this_cpu_write(trace_taskinfo_save, true);
1032
1033         /* If this is the temp buffer, we need to commit fully */
1034         if (this_cpu_read(trace_buffered_event) == event) {
1035                 /* Length is in event->array[0] */
1036                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
1037                 /* Release the temp buffer */
1038                 this_cpu_dec(trace_buffered_event_cnt);
1039                 /* ring_buffer_unlock_commit() enables preemption */
1040                 preempt_enable_notrace();
1041         } else
1042                 ring_buffer_unlock_commit(buffer);
1043 }
1044
1045 int __trace_array_puts(struct trace_array *tr, unsigned long ip,
1046                        const char *str, int size)
1047 {
1048         struct ring_buffer_event *event;
1049         struct trace_buffer *buffer;
1050         struct print_entry *entry;
1051         unsigned int trace_ctx;
1052         int alloc;
1053
1054         if (!(tr->trace_flags & TRACE_ITER_PRINTK))
1055                 return 0;
1056
1057         if (unlikely(tracing_selftest_running || tracing_disabled))
1058                 return 0;
1059
1060         alloc = sizeof(*entry) + size + 2; /* possible \n added */
1061
1062         trace_ctx = tracing_gen_ctx();
1063         buffer = tr->array_buffer.buffer;
1064         ring_buffer_nest_start(buffer);
1065         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1066                                             trace_ctx);
1067         if (!event) {
1068                 size = 0;
1069                 goto out;
1070         }
1071
1072         entry = ring_buffer_event_data(event);
1073         entry->ip = ip;
1074
1075         memcpy(&entry->buf, str, size);
1076
1077         /* Add a newline if necessary */
1078         if (entry->buf[size - 1] != '\n') {
1079                 entry->buf[size] = '\n';
1080                 entry->buf[size + 1] = '\0';
1081         } else
1082                 entry->buf[size] = '\0';
1083
1084         __buffer_unlock_commit(buffer, event);
1085         ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL);
1086  out:
1087         ring_buffer_nest_end(buffer);
1088         return size;
1089 }
1090 EXPORT_SYMBOL_GPL(__trace_array_puts);
1091
1092 /**
1093  * __trace_puts - write a constant string into the trace buffer.
1094  * @ip:    The address of the caller
1095  * @str:   The constant string to write
1096  * @size:  The size of the string.
1097  */
1098 int __trace_puts(unsigned long ip, const char *str, int size)
1099 {
1100         return __trace_array_puts(&global_trace, ip, str, size);
1101 }
1102 EXPORT_SYMBOL_GPL(__trace_puts);
1103
1104 /**
1105  * __trace_bputs - write the pointer to a constant string into trace buffer
1106  * @ip:    The address of the caller
1107  * @str:   The constant string to write to the buffer to
1108  */
1109 int __trace_bputs(unsigned long ip, const char *str)
1110 {
1111         struct ring_buffer_event *event;
1112         struct trace_buffer *buffer;
1113         struct bputs_entry *entry;
1114         unsigned int trace_ctx;
1115         int size = sizeof(struct bputs_entry);
1116         int ret = 0;
1117
1118         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1119                 return 0;
1120
1121         if (unlikely(tracing_selftest_running || tracing_disabled))
1122                 return 0;
1123
1124         trace_ctx = tracing_gen_ctx();
1125         buffer = global_trace.array_buffer.buffer;
1126
1127         ring_buffer_nest_start(buffer);
1128         event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1129                                             trace_ctx);
1130         if (!event)
1131                 goto out;
1132
1133         entry = ring_buffer_event_data(event);
1134         entry->ip                       = ip;
1135         entry->str                      = str;
1136
1137         __buffer_unlock_commit(buffer, event);
1138         ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1139
1140         ret = 1;
1141  out:
1142         ring_buffer_nest_end(buffer);
1143         return ret;
1144 }
1145 EXPORT_SYMBOL_GPL(__trace_bputs);
1146
1147 #ifdef CONFIG_TRACER_SNAPSHOT
1148 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1149                                            void *cond_data)
1150 {
1151         struct tracer *tracer = tr->current_trace;
1152         unsigned long flags;
1153
1154         if (in_nmi()) {
1155                 trace_array_puts(tr, "*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1156                 trace_array_puts(tr, "*** snapshot is being ignored        ***\n");
1157                 return;
1158         }
1159
1160         if (!tr->allocated_snapshot) {
1161                 trace_array_puts(tr, "*** SNAPSHOT NOT ALLOCATED ***\n");
1162                 trace_array_puts(tr, "*** stopping trace here!   ***\n");
1163                 tracer_tracing_off(tr);
1164                 return;
1165         }
1166
1167         /* Note, snapshot can not be used when the tracer uses it */
1168         if (tracer->use_max_tr) {
1169                 trace_array_puts(tr, "*** LATENCY TRACER ACTIVE ***\n");
1170                 trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
1171                 return;
1172         }
1173
1174         local_irq_save(flags);
1175         update_max_tr(tr, current, smp_processor_id(), cond_data);
1176         local_irq_restore(flags);
1177 }
1178
1179 void tracing_snapshot_instance(struct trace_array *tr)
1180 {
1181         tracing_snapshot_instance_cond(tr, NULL);
1182 }
1183
1184 /**
1185  * tracing_snapshot - take a snapshot of the current buffer.
1186  *
1187  * This causes a swap between the snapshot buffer and the current live
1188  * tracing buffer. You can use this to take snapshots of the live
1189  * trace when some condition is triggered, but continue to trace.
1190  *
1191  * Note, make sure to allocate the snapshot with either
1192  * a tracing_snapshot_alloc(), or by doing it manually
1193  * with: echo 1 > /sys/kernel/tracing/snapshot
1194  *
1195  * If the snapshot buffer is not allocated, it will stop tracing.
1196  * Basically making a permanent snapshot.
1197  */
1198 void tracing_snapshot(void)
1199 {
1200         struct trace_array *tr = &global_trace;
1201
1202         tracing_snapshot_instance(tr);
1203 }
1204 EXPORT_SYMBOL_GPL(tracing_snapshot);
1205
1206 /**
1207  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1208  * @tr:         The tracing instance to snapshot
1209  * @cond_data:  The data to be tested conditionally, and possibly saved
1210  *
1211  * This is the same as tracing_snapshot() except that the snapshot is
1212  * conditional - the snapshot will only happen if the
1213  * cond_snapshot.update() implementation receiving the cond_data
1214  * returns true, which means that the trace array's cond_snapshot
1215  * update() operation used the cond_data to determine whether the
1216  * snapshot should be taken, and if it was, presumably saved it along
1217  * with the snapshot.
1218  */
1219 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1220 {
1221         tracing_snapshot_instance_cond(tr, cond_data);
1222 }
1223 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1224
1225 /**
1226  * tracing_cond_snapshot_data - get the user data associated with a snapshot
1227  * @tr:         The tracing instance
1228  *
1229  * When the user enables a conditional snapshot using
1230  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1231  * with the snapshot.  This accessor is used to retrieve it.
1232  *
1233  * Should not be called from cond_snapshot.update(), since it takes
1234  * the tr->max_lock lock, which the code calling
1235  * cond_snapshot.update() has already done.
1236  *
1237  * Returns the cond_data associated with the trace array's snapshot.
1238  */
1239 void *tracing_cond_snapshot_data(struct trace_array *tr)
1240 {
1241         void *cond_data = NULL;
1242
1243         local_irq_disable();
1244         arch_spin_lock(&tr->max_lock);
1245
1246         if (tr->cond_snapshot)
1247                 cond_data = tr->cond_snapshot->cond_data;
1248
1249         arch_spin_unlock(&tr->max_lock);
1250         local_irq_enable();
1251
1252         return cond_data;
1253 }
1254 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1255
1256 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1257                                         struct array_buffer *size_buf, int cpu_id);
1258 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1259
1260 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1261 {
1262         int ret;
1263
1264         if (!tr->allocated_snapshot) {
1265
1266                 /* allocate spare buffer */
1267                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
1268                                    &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1269                 if (ret < 0)
1270                         return ret;
1271
1272                 tr->allocated_snapshot = true;
1273         }
1274
1275         return 0;
1276 }
1277
1278 static void free_snapshot(struct trace_array *tr)
1279 {
1280         /*
1281          * We don't free the ring buffer. instead, resize it because
1282          * The max_tr ring buffer has some state (e.g. ring->clock) and
1283          * we want preserve it.
1284          */
1285         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1286         set_buffer_entries(&tr->max_buffer, 1);
1287         tracing_reset_online_cpus(&tr->max_buffer);
1288         tr->allocated_snapshot = false;
1289 }
1290
1291 /**
1292  * tracing_alloc_snapshot - allocate snapshot buffer.
1293  *
1294  * This only allocates the snapshot buffer if it isn't already
1295  * allocated - it doesn't also take a snapshot.
1296  *
1297  * This is meant to be used in cases where the snapshot buffer needs
1298  * to be set up for events that can't sleep but need to be able to
1299  * trigger a snapshot.
1300  */
1301 int tracing_alloc_snapshot(void)
1302 {
1303         struct trace_array *tr = &global_trace;
1304         int ret;
1305
1306         ret = tracing_alloc_snapshot_instance(tr);
1307         WARN_ON(ret < 0);
1308
1309         return ret;
1310 }
1311 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1312
1313 /**
1314  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1315  *
1316  * This is similar to tracing_snapshot(), but it will allocate the
1317  * snapshot buffer if it isn't already allocated. Use this only
1318  * where it is safe to sleep, as the allocation may sleep.
1319  *
1320  * This causes a swap between the snapshot buffer and the current live
1321  * tracing buffer. You can use this to take snapshots of the live
1322  * trace when some condition is triggered, but continue to trace.
1323  */
1324 void tracing_snapshot_alloc(void)
1325 {
1326         int ret;
1327
1328         ret = tracing_alloc_snapshot();
1329         if (ret < 0)
1330                 return;
1331
1332         tracing_snapshot();
1333 }
1334 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1335
1336 /**
1337  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1338  * @tr:         The tracing instance
1339  * @cond_data:  User data to associate with the snapshot
1340  * @update:     Implementation of the cond_snapshot update function
1341  *
1342  * Check whether the conditional snapshot for the given instance has
1343  * already been enabled, or if the current tracer is already using a
1344  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1345  * save the cond_data and update function inside.
1346  *
1347  * Returns 0 if successful, error otherwise.
1348  */
1349 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1350                                  cond_update_fn_t update)
1351 {
1352         struct cond_snapshot *cond_snapshot;
1353         int ret = 0;
1354
1355         cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1356         if (!cond_snapshot)
1357                 return -ENOMEM;
1358
1359         cond_snapshot->cond_data = cond_data;
1360         cond_snapshot->update = update;
1361
1362         mutex_lock(&trace_types_lock);
1363
1364         ret = tracing_alloc_snapshot_instance(tr);
1365         if (ret)
1366                 goto fail_unlock;
1367
1368         if (tr->current_trace->use_max_tr) {
1369                 ret = -EBUSY;
1370                 goto fail_unlock;
1371         }
1372
1373         /*
1374          * The cond_snapshot can only change to NULL without the
1375          * trace_types_lock. We don't care if we race with it going
1376          * to NULL, but we want to make sure that it's not set to
1377          * something other than NULL when we get here, which we can
1378          * do safely with only holding the trace_types_lock and not
1379          * having to take the max_lock.
1380          */
1381         if (tr->cond_snapshot) {
1382                 ret = -EBUSY;
1383                 goto fail_unlock;
1384         }
1385
1386         local_irq_disable();
1387         arch_spin_lock(&tr->max_lock);
1388         tr->cond_snapshot = cond_snapshot;
1389         arch_spin_unlock(&tr->max_lock);
1390         local_irq_enable();
1391
1392         mutex_unlock(&trace_types_lock);
1393
1394         return ret;
1395
1396  fail_unlock:
1397         mutex_unlock(&trace_types_lock);
1398         kfree(cond_snapshot);
1399         return ret;
1400 }
1401 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1402
1403 /**
1404  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1405  * @tr:         The tracing instance
1406  *
1407  * Check whether the conditional snapshot for the given instance is
1408  * enabled; if so, free the cond_snapshot associated with it,
1409  * otherwise return -EINVAL.
1410  *
1411  * Returns 0 if successful, error otherwise.
1412  */
1413 int tracing_snapshot_cond_disable(struct trace_array *tr)
1414 {
1415         int ret = 0;
1416
1417         local_irq_disable();
1418         arch_spin_lock(&tr->max_lock);
1419
1420         if (!tr->cond_snapshot)
1421                 ret = -EINVAL;
1422         else {
1423                 kfree(tr->cond_snapshot);
1424                 tr->cond_snapshot = NULL;
1425         }
1426
1427         arch_spin_unlock(&tr->max_lock);
1428         local_irq_enable();
1429
1430         return ret;
1431 }
1432 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1433 #else
1434 void tracing_snapshot(void)
1435 {
1436         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1437 }
1438 EXPORT_SYMBOL_GPL(tracing_snapshot);
1439 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1440 {
1441         WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1442 }
1443 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1444 int tracing_alloc_snapshot(void)
1445 {
1446         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1447         return -ENODEV;
1448 }
1449 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1450 void tracing_snapshot_alloc(void)
1451 {
1452         /* Give warning */
1453         tracing_snapshot();
1454 }
1455 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1456 void *tracing_cond_snapshot_data(struct trace_array *tr)
1457 {
1458         return NULL;
1459 }
1460 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1461 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1462 {
1463         return -ENODEV;
1464 }
1465 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1466 int tracing_snapshot_cond_disable(struct trace_array *tr)
1467 {
1468         return false;
1469 }
1470 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1471 #define free_snapshot(tr)       do { } while (0)
1472 #endif /* CONFIG_TRACER_SNAPSHOT */
1473
1474 void tracer_tracing_off(struct trace_array *tr)
1475 {
1476         if (tr->array_buffer.buffer)
1477                 ring_buffer_record_off(tr->array_buffer.buffer);
1478         /*
1479          * This flag is looked at when buffers haven't been allocated
1480          * yet, or by some tracers (like irqsoff), that just want to
1481          * know if the ring buffer has been disabled, but it can handle
1482          * races of where it gets disabled but we still do a record.
1483          * As the check is in the fast path of the tracers, it is more
1484          * important to be fast than accurate.
1485          */
1486         tr->buffer_disabled = 1;
1487         /* Make the flag seen by readers */
1488         smp_wmb();
1489 }
1490
1491 /**
1492  * tracing_off - turn off tracing buffers
1493  *
1494  * This function stops the tracing buffers from recording data.
1495  * It does not disable any overhead the tracers themselves may
1496  * be causing. This function simply causes all recording to
1497  * the ring buffers to fail.
1498  */
1499 void tracing_off(void)
1500 {
1501         tracer_tracing_off(&global_trace);
1502 }
1503 EXPORT_SYMBOL_GPL(tracing_off);
1504
1505 void disable_trace_on_warning(void)
1506 {
1507         if (__disable_trace_on_warning) {
1508                 trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1509                         "Disabling tracing due to warning\n");
1510                 tracing_off();
1511         }
1512 }
1513
1514 /**
1515  * tracer_tracing_is_on - show real state of ring buffer enabled
1516  * @tr : the trace array to know if ring buffer is enabled
1517  *
1518  * Shows real state of the ring buffer if it is enabled or not.
1519  */
1520 bool tracer_tracing_is_on(struct trace_array *tr)
1521 {
1522         if (tr->array_buffer.buffer)
1523                 return ring_buffer_record_is_on(tr->array_buffer.buffer);
1524         return !tr->buffer_disabled;
1525 }
1526
1527 /**
1528  * tracing_is_on - show state of ring buffers enabled
1529  */
1530 int tracing_is_on(void)
1531 {
1532         return tracer_tracing_is_on(&global_trace);
1533 }
1534 EXPORT_SYMBOL_GPL(tracing_is_on);
1535
1536 static int __init set_buf_size(char *str)
1537 {
1538         unsigned long buf_size;
1539
1540         if (!str)
1541                 return 0;
1542         buf_size = memparse(str, &str);
1543         /*
1544          * nr_entries can not be zero and the startup
1545          * tests require some buffer space. Therefore
1546          * ensure we have at least 4096 bytes of buffer.
1547          */
1548         trace_buf_size = max(4096UL, buf_size);
1549         return 1;
1550 }
1551 __setup("trace_buf_size=", set_buf_size);
1552
1553 static int __init set_tracing_thresh(char *str)
1554 {
1555         unsigned long threshold;
1556         int ret;
1557
1558         if (!str)
1559                 return 0;
1560         ret = kstrtoul(str, 0, &threshold);
1561         if (ret < 0)
1562                 return 0;
1563         tracing_thresh = threshold * 1000;
1564         return 1;
1565 }
1566 __setup("tracing_thresh=", set_tracing_thresh);
1567
1568 unsigned long nsecs_to_usecs(unsigned long nsecs)
1569 {
1570         return nsecs / 1000;
1571 }
1572
1573 /*
1574  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1575  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1576  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1577  * of strings in the order that the evals (enum) were defined.
1578  */
1579 #undef C
1580 #define C(a, b) b
1581
1582 /* These must match the bit positions in trace_iterator_flags */
1583 static const char *trace_options[] = {
1584         TRACE_FLAGS
1585         NULL
1586 };
1587
1588 static struct {
1589         u64 (*func)(void);
1590         const char *name;
1591         int in_ns;              /* is this clock in nanoseconds? */
1592 } trace_clocks[] = {
1593         { trace_clock_local,            "local",        1 },
1594         { trace_clock_global,           "global",       1 },
1595         { trace_clock_counter,          "counter",      0 },
1596         { trace_clock_jiffies,          "uptime",       0 },
1597         { trace_clock,                  "perf",         1 },
1598         { ktime_get_mono_fast_ns,       "mono",         1 },
1599         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1600         { ktime_get_boot_fast_ns,       "boot",         1 },
1601         { ktime_get_tai_fast_ns,        "tai",          1 },
1602         ARCH_TRACE_CLOCKS
1603 };
1604
1605 bool trace_clock_in_ns(struct trace_array *tr)
1606 {
1607         if (trace_clocks[tr->clock_id].in_ns)
1608                 return true;
1609
1610         return false;
1611 }
1612
1613 /*
1614  * trace_parser_get_init - gets the buffer for trace parser
1615  */
1616 int trace_parser_get_init(struct trace_parser *parser, int size)
1617 {
1618         memset(parser, 0, sizeof(*parser));
1619
1620         parser->buffer = kmalloc(size, GFP_KERNEL);
1621         if (!parser->buffer)
1622                 return 1;
1623
1624         parser->size = size;
1625         return 0;
1626 }
1627
1628 /*
1629  * trace_parser_put - frees the buffer for trace parser
1630  */
1631 void trace_parser_put(struct trace_parser *parser)
1632 {
1633         kfree(parser->buffer);
1634         parser->buffer = NULL;
1635 }
1636
1637 /*
1638  * trace_get_user - reads the user input string separated by  space
1639  * (matched by isspace(ch))
1640  *
1641  * For each string found the 'struct trace_parser' is updated,
1642  * and the function returns.
1643  *
1644  * Returns number of bytes read.
1645  *
1646  * See kernel/trace/trace.h for 'struct trace_parser' details.
1647  */
1648 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1649         size_t cnt, loff_t *ppos)
1650 {
1651         char ch;
1652         size_t read = 0;
1653         ssize_t ret;
1654
1655         if (!*ppos)
1656                 trace_parser_clear(parser);
1657
1658         ret = get_user(ch, ubuf++);
1659         if (ret)
1660                 goto out;
1661
1662         read++;
1663         cnt--;
1664
1665         /*
1666          * The parser is not finished with the last write,
1667          * continue reading the user input without skipping spaces.
1668          */
1669         if (!parser->cont) {
1670                 /* skip white space */
1671                 while (cnt && isspace(ch)) {
1672                         ret = get_user(ch, ubuf++);
1673                         if (ret)
1674                                 goto out;
1675                         read++;
1676                         cnt--;
1677                 }
1678
1679                 parser->idx = 0;
1680
1681                 /* only spaces were written */
1682                 if (isspace(ch) || !ch) {
1683                         *ppos += read;
1684                         ret = read;
1685                         goto out;
1686                 }
1687         }
1688
1689         /* read the non-space input */
1690         while (cnt && !isspace(ch) && ch) {
1691                 if (parser->idx < parser->size - 1)
1692                         parser->buffer[parser->idx++] = ch;
1693                 else {
1694                         ret = -EINVAL;
1695                         goto out;
1696                 }
1697                 ret = get_user(ch, ubuf++);
1698                 if (ret)
1699                         goto out;
1700                 read++;
1701                 cnt--;
1702         }
1703
1704         /* We either got finished input or we have to wait for another call. */
1705         if (isspace(ch) || !ch) {
1706                 parser->buffer[parser->idx] = 0;
1707                 parser->cont = false;
1708         } else if (parser->idx < parser->size - 1) {
1709                 parser->cont = true;
1710                 parser->buffer[parser->idx++] = ch;
1711                 /* Make sure the parsed string always terminates with '\0'. */
1712                 parser->buffer[parser->idx] = 0;
1713         } else {
1714                 ret = -EINVAL;
1715                 goto out;
1716         }
1717
1718         *ppos += read;
1719         ret = read;
1720
1721 out:
1722         return ret;
1723 }
1724
1725 /* TODO add a seq_buf_to_buffer() */
1726 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1727 {
1728         int len;
1729
1730         if (trace_seq_used(s) <= s->seq.readpos)
1731                 return -EBUSY;
1732
1733         len = trace_seq_used(s) - s->seq.readpos;
1734         if (cnt > len)
1735                 cnt = len;
1736         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1737
1738         s->seq.readpos += cnt;
1739         return cnt;
1740 }
1741
1742 unsigned long __read_mostly     tracing_thresh;
1743
1744 #ifdef CONFIG_TRACER_MAX_TRACE
1745 static const struct file_operations tracing_max_lat_fops;
1746
1747 #ifdef LATENCY_FS_NOTIFY
1748
1749 static struct workqueue_struct *fsnotify_wq;
1750
1751 static void latency_fsnotify_workfn(struct work_struct *work)
1752 {
1753         struct trace_array *tr = container_of(work, struct trace_array,
1754                                               fsnotify_work);
1755         fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1756 }
1757
1758 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1759 {
1760         struct trace_array *tr = container_of(iwork, struct trace_array,
1761                                               fsnotify_irqwork);
1762         queue_work(fsnotify_wq, &tr->fsnotify_work);
1763 }
1764
1765 static void trace_create_maxlat_file(struct trace_array *tr,
1766                                      struct dentry *d_tracer)
1767 {
1768         INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1769         init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1770         tr->d_max_latency = trace_create_file("tracing_max_latency",
1771                                               TRACE_MODE_WRITE,
1772                                               d_tracer, &tr->max_latency,
1773                                               &tracing_max_lat_fops);
1774 }
1775
1776 __init static int latency_fsnotify_init(void)
1777 {
1778         fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1779                                       WQ_UNBOUND | WQ_HIGHPRI, 0);
1780         if (!fsnotify_wq) {
1781                 pr_err("Unable to allocate tr_max_lat_wq\n");
1782                 return -ENOMEM;
1783         }
1784         return 0;
1785 }
1786
1787 late_initcall_sync(latency_fsnotify_init);
1788
1789 void latency_fsnotify(struct trace_array *tr)
1790 {
1791         if (!fsnotify_wq)
1792                 return;
1793         /*
1794          * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1795          * possible that we are called from __schedule() or do_idle(), which
1796          * could cause a deadlock.
1797          */
1798         irq_work_queue(&tr->fsnotify_irqwork);
1799 }
1800
1801 #else /* !LATENCY_FS_NOTIFY */
1802
1803 #define trace_create_maxlat_file(tr, d_tracer)                          \
1804         trace_create_file("tracing_max_latency", TRACE_MODE_WRITE,      \
1805                           d_tracer, &tr->max_latency, &tracing_max_lat_fops)
1806
1807 #endif
1808
1809 /*
1810  * Copy the new maximum trace into the separate maximum-trace
1811  * structure. (this way the maximum trace is permanently saved,
1812  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1813  */
1814 static void
1815 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1816 {
1817         struct array_buffer *trace_buf = &tr->array_buffer;
1818         struct array_buffer *max_buf = &tr->max_buffer;
1819         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1820         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1821
1822         max_buf->cpu = cpu;
1823         max_buf->time_start = data->preempt_timestamp;
1824
1825         max_data->saved_latency = tr->max_latency;
1826         max_data->critical_start = data->critical_start;
1827         max_data->critical_end = data->critical_end;
1828
1829         strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1830         max_data->pid = tsk->pid;
1831         /*
1832          * If tsk == current, then use current_uid(), as that does not use
1833          * RCU. The irq tracer can be called out of RCU scope.
1834          */
1835         if (tsk == current)
1836                 max_data->uid = current_uid();
1837         else
1838                 max_data->uid = task_uid(tsk);
1839
1840         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1841         max_data->policy = tsk->policy;
1842         max_data->rt_priority = tsk->rt_priority;
1843
1844         /* record this tasks comm */
1845         tracing_record_cmdline(tsk);
1846         latency_fsnotify(tr);
1847 }
1848
1849 /**
1850  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1851  * @tr: tracer
1852  * @tsk: the task with the latency
1853  * @cpu: The cpu that initiated the trace.
1854  * @cond_data: User data associated with a conditional snapshot
1855  *
1856  * Flip the buffers between the @tr and the max_tr and record information
1857  * about which task was the cause of this latency.
1858  */
1859 void
1860 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1861               void *cond_data)
1862 {
1863         if (tr->stop_count)
1864                 return;
1865
1866         WARN_ON_ONCE(!irqs_disabled());
1867
1868         if (!tr->allocated_snapshot) {
1869                 /* Only the nop tracer should hit this when disabling */
1870                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1871                 return;
1872         }
1873
1874         arch_spin_lock(&tr->max_lock);
1875
1876         /* Inherit the recordable setting from array_buffer */
1877         if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1878                 ring_buffer_record_on(tr->max_buffer.buffer);
1879         else
1880                 ring_buffer_record_off(tr->max_buffer.buffer);
1881
1882 #ifdef CONFIG_TRACER_SNAPSHOT
1883         if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) {
1884                 arch_spin_unlock(&tr->max_lock);
1885                 return;
1886         }
1887 #endif
1888         swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1889
1890         __update_max_tr(tr, tsk, cpu);
1891
1892         arch_spin_unlock(&tr->max_lock);
1893 }
1894
1895 /**
1896  * update_max_tr_single - only copy one trace over, and reset the rest
1897  * @tr: tracer
1898  * @tsk: task with the latency
1899  * @cpu: the cpu of the buffer to copy.
1900  *
1901  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1902  */
1903 void
1904 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1905 {
1906         int ret;
1907
1908         if (tr->stop_count)
1909                 return;
1910
1911         WARN_ON_ONCE(!irqs_disabled());
1912         if (!tr->allocated_snapshot) {
1913                 /* Only the nop tracer should hit this when disabling */
1914                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1915                 return;
1916         }
1917
1918         arch_spin_lock(&tr->max_lock);
1919
1920         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1921
1922         if (ret == -EBUSY) {
1923                 /*
1924                  * We failed to swap the buffer due to a commit taking
1925                  * place on this CPU. We fail to record, but we reset
1926                  * the max trace buffer (no one writes directly to it)
1927                  * and flag that it failed.
1928                  */
1929                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1930                         "Failed to swap buffers due to commit in progress\n");
1931         }
1932
1933         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1934
1935         __update_max_tr(tr, tsk, cpu);
1936         arch_spin_unlock(&tr->max_lock);
1937 }
1938
1939 #endif /* CONFIG_TRACER_MAX_TRACE */
1940
1941 static int wait_on_pipe(struct trace_iterator *iter, int full)
1942 {
1943         /* Iterators are static, they should be filled or empty */
1944         if (trace_buffer_iter(iter, iter->cpu_file))
1945                 return 0;
1946
1947         return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file,
1948                                 full);
1949 }
1950
1951 #ifdef CONFIG_FTRACE_STARTUP_TEST
1952 static bool selftests_can_run;
1953
1954 struct trace_selftests {
1955         struct list_head                list;
1956         struct tracer                   *type;
1957 };
1958
1959 static LIST_HEAD(postponed_selftests);
1960
1961 static int save_selftest(struct tracer *type)
1962 {
1963         struct trace_selftests *selftest;
1964
1965         selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1966         if (!selftest)
1967                 return -ENOMEM;
1968
1969         selftest->type = type;
1970         list_add(&selftest->list, &postponed_selftests);
1971         return 0;
1972 }
1973
1974 static int run_tracer_selftest(struct tracer *type)
1975 {
1976         struct trace_array *tr = &global_trace;
1977         struct tracer *saved_tracer = tr->current_trace;
1978         int ret;
1979
1980         if (!type->selftest || tracing_selftest_disabled)
1981                 return 0;
1982
1983         /*
1984          * If a tracer registers early in boot up (before scheduling is
1985          * initialized and such), then do not run its selftests yet.
1986          * Instead, run it a little later in the boot process.
1987          */
1988         if (!selftests_can_run)
1989                 return save_selftest(type);
1990
1991         if (!tracing_is_on()) {
1992                 pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
1993                         type->name);
1994                 return 0;
1995         }
1996
1997         /*
1998          * Run a selftest on this tracer.
1999          * Here we reset the trace buffer, and set the current
2000          * tracer to be this tracer. The tracer can then run some
2001          * internal tracing to verify that everything is in order.
2002          * If we fail, we do not register this tracer.
2003          */
2004         tracing_reset_online_cpus(&tr->array_buffer);
2005
2006         tr->current_trace = type;
2007
2008 #ifdef CONFIG_TRACER_MAX_TRACE
2009         if (type->use_max_tr) {
2010                 /* If we expanded the buffers, make sure the max is expanded too */
2011                 if (ring_buffer_expanded)
2012                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
2013                                            RING_BUFFER_ALL_CPUS);
2014                 tr->allocated_snapshot = true;
2015         }
2016 #endif
2017
2018         /* the test is responsible for initializing and enabling */
2019         pr_info("Testing tracer %s: ", type->name);
2020         ret = type->selftest(type, tr);
2021         /* the test is responsible for resetting too */
2022         tr->current_trace = saved_tracer;
2023         if (ret) {
2024                 printk(KERN_CONT "FAILED!\n");
2025                 /* Add the warning after printing 'FAILED' */
2026                 WARN_ON(1);
2027                 return -1;
2028         }
2029         /* Only reset on passing, to avoid touching corrupted buffers */
2030         tracing_reset_online_cpus(&tr->array_buffer);
2031
2032 #ifdef CONFIG_TRACER_MAX_TRACE
2033         if (type->use_max_tr) {
2034                 tr->allocated_snapshot = false;
2035
2036                 /* Shrink the max buffer again */
2037                 if (ring_buffer_expanded)
2038                         ring_buffer_resize(tr->max_buffer.buffer, 1,
2039                                            RING_BUFFER_ALL_CPUS);
2040         }
2041 #endif
2042
2043         printk(KERN_CONT "PASSED\n");
2044         return 0;
2045 }
2046
2047 static int do_run_tracer_selftest(struct tracer *type)
2048 {
2049         int ret;
2050
2051         /*
2052          * Tests can take a long time, especially if they are run one after the
2053          * other, as does happen during bootup when all the tracers are
2054          * registered. This could cause the soft lockup watchdog to trigger.
2055          */
2056         cond_resched();
2057
2058         tracing_selftest_running = true;
2059         ret = run_tracer_selftest(type);
2060         tracing_selftest_running = false;
2061
2062         return ret;
2063 }
2064
2065 static __init int init_trace_selftests(void)
2066 {
2067         struct trace_selftests *p, *n;
2068         struct tracer *t, **last;
2069         int ret;
2070
2071         selftests_can_run = true;
2072
2073         mutex_lock(&trace_types_lock);
2074
2075         if (list_empty(&postponed_selftests))
2076                 goto out;
2077
2078         pr_info("Running postponed tracer tests:\n");
2079
2080         tracing_selftest_running = true;
2081         list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2082                 /* This loop can take minutes when sanitizers are enabled, so
2083                  * lets make sure we allow RCU processing.
2084                  */
2085                 cond_resched();
2086                 ret = run_tracer_selftest(p->type);
2087                 /* If the test fails, then warn and remove from available_tracers */
2088                 if (ret < 0) {
2089                         WARN(1, "tracer: %s failed selftest, disabling\n",
2090                              p->type->name);
2091                         last = &trace_types;
2092                         for (t = trace_types; t; t = t->next) {
2093                                 if (t == p->type) {
2094                                         *last = t->next;
2095                                         break;
2096                                 }
2097                                 last = &t->next;
2098                         }
2099                 }
2100                 list_del(&p->list);
2101                 kfree(p);
2102         }
2103         tracing_selftest_running = false;
2104
2105  out:
2106         mutex_unlock(&trace_types_lock);
2107
2108         return 0;
2109 }
2110 core_initcall(init_trace_selftests);
2111 #else
2112 static inline int run_tracer_selftest(struct tracer *type)
2113 {
2114         return 0;
2115 }
2116 static inline int do_run_tracer_selftest(struct tracer *type)
2117 {
2118         return 0;
2119 }
2120 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2121
2122 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2123
2124 static void __init apply_trace_boot_options(void);
2125
2126 /**
2127  * register_tracer - register a tracer with the ftrace system.
2128  * @type: the plugin for the tracer
2129  *
2130  * Register a new plugin tracer.
2131  */
2132 int __init register_tracer(struct tracer *type)
2133 {
2134         struct tracer *t;
2135         int ret = 0;
2136
2137         if (!type->name) {
2138                 pr_info("Tracer must have a name\n");
2139                 return -1;
2140         }
2141
2142         if (strlen(type->name) >= MAX_TRACER_SIZE) {
2143                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2144                 return -1;
2145         }
2146
2147         if (security_locked_down(LOCKDOWN_TRACEFS)) {
2148                 pr_warn("Can not register tracer %s due to lockdown\n",
2149                            type->name);
2150                 return -EPERM;
2151         }
2152
2153         mutex_lock(&trace_types_lock);
2154
2155         for (t = trace_types; t; t = t->next) {
2156                 if (strcmp(type->name, t->name) == 0) {
2157                         /* already found */
2158                         pr_info("Tracer %s already registered\n",
2159                                 type->name);
2160                         ret = -1;
2161                         goto out;
2162                 }
2163         }
2164
2165         if (!type->set_flag)
2166                 type->set_flag = &dummy_set_flag;
2167         if (!type->flags) {
2168                 /*allocate a dummy tracer_flags*/
2169                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2170                 if (!type->flags) {
2171                         ret = -ENOMEM;
2172                         goto out;
2173                 }
2174                 type->flags->val = 0;
2175                 type->flags->opts = dummy_tracer_opt;
2176         } else
2177                 if (!type->flags->opts)
2178                         type->flags->opts = dummy_tracer_opt;
2179
2180         /* store the tracer for __set_tracer_option */
2181         type->flags->trace = type;
2182
2183         ret = do_run_tracer_selftest(type);
2184         if (ret < 0)
2185                 goto out;
2186
2187         type->next = trace_types;
2188         trace_types = type;
2189         add_tracer_options(&global_trace, type);
2190
2191  out:
2192         mutex_unlock(&trace_types_lock);
2193
2194         if (ret || !default_bootup_tracer)
2195                 goto out_unlock;
2196
2197         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2198                 goto out_unlock;
2199
2200         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2201         /* Do we want this tracer to start on bootup? */
2202         tracing_set_tracer(&global_trace, type->name);
2203         default_bootup_tracer = NULL;
2204
2205         apply_trace_boot_options();
2206
2207         /* disable other selftests, since this will break it. */
2208         disable_tracing_selftest("running a tracer");
2209
2210  out_unlock:
2211         return ret;
2212 }
2213
2214 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2215 {
2216         struct trace_buffer *buffer = buf->buffer;
2217
2218         if (!buffer)
2219                 return;
2220
2221         ring_buffer_record_disable(buffer);
2222
2223         /* Make sure all commits have finished */
2224         synchronize_rcu();
2225         ring_buffer_reset_cpu(buffer, cpu);
2226
2227         ring_buffer_record_enable(buffer);
2228 }
2229
2230 void tracing_reset_online_cpus(struct array_buffer *buf)
2231 {
2232         struct trace_buffer *buffer = buf->buffer;
2233
2234         if (!buffer)
2235                 return;
2236
2237         ring_buffer_record_disable(buffer);
2238
2239         /* Make sure all commits have finished */
2240         synchronize_rcu();
2241
2242         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2243
2244         ring_buffer_reset_online_cpus(buffer);
2245
2246         ring_buffer_record_enable(buffer);
2247 }
2248
2249 /* Must have trace_types_lock held */
2250 void tracing_reset_all_online_cpus_unlocked(void)
2251 {
2252         struct trace_array *tr;
2253
2254         lockdep_assert_held(&trace_types_lock);
2255
2256         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2257                 if (!tr->clear_trace)
2258                         continue;
2259                 tr->clear_trace = false;
2260                 tracing_reset_online_cpus(&tr->array_buffer);
2261 #ifdef CONFIG_TRACER_MAX_TRACE
2262                 tracing_reset_online_cpus(&tr->max_buffer);
2263 #endif
2264         }
2265 }
2266
2267 void tracing_reset_all_online_cpus(void)
2268 {
2269         mutex_lock(&trace_types_lock);
2270         tracing_reset_all_online_cpus_unlocked();
2271         mutex_unlock(&trace_types_lock);
2272 }
2273
2274 /*
2275  * The tgid_map array maps from pid to tgid; i.e. the value stored at index i
2276  * is the tgid last observed corresponding to pid=i.
2277  */
2278 static int *tgid_map;
2279
2280 /* The maximum valid index into tgid_map. */
2281 static size_t tgid_map_max;
2282
2283 #define SAVED_CMDLINES_DEFAULT 128
2284 #define NO_CMDLINE_MAP UINT_MAX
2285 /*
2286  * Preemption must be disabled before acquiring trace_cmdline_lock.
2287  * The various trace_arrays' max_lock must be acquired in a context
2288  * where interrupt is disabled.
2289  */
2290 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2291 struct saved_cmdlines_buffer {
2292         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2293         unsigned *map_cmdline_to_pid;
2294         unsigned cmdline_num;
2295         int cmdline_idx;
2296         char *saved_cmdlines;
2297 };
2298 static struct saved_cmdlines_buffer *savedcmd;
2299
2300 static inline char *get_saved_cmdlines(int idx)
2301 {
2302         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2303 }
2304
2305 static inline void set_cmdline(int idx, const char *cmdline)
2306 {
2307         strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2308 }
2309
2310 static int allocate_cmdlines_buffer(unsigned int val,
2311                                     struct saved_cmdlines_buffer *s)
2312 {
2313         s->map_cmdline_to_pid = kmalloc_array(val,
2314                                               sizeof(*s->map_cmdline_to_pid),
2315                                               GFP_KERNEL);
2316         if (!s->map_cmdline_to_pid)
2317                 return -ENOMEM;
2318
2319         s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2320         if (!s->saved_cmdlines) {
2321                 kfree(s->map_cmdline_to_pid);
2322                 return -ENOMEM;
2323         }
2324
2325         s->cmdline_idx = 0;
2326         s->cmdline_num = val;
2327         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2328                sizeof(s->map_pid_to_cmdline));
2329         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2330                val * sizeof(*s->map_cmdline_to_pid));
2331
2332         return 0;
2333 }
2334
2335 static int trace_create_savedcmd(void)
2336 {
2337         int ret;
2338
2339         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2340         if (!savedcmd)
2341                 return -ENOMEM;
2342
2343         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2344         if (ret < 0) {
2345                 kfree(savedcmd);
2346                 savedcmd = NULL;
2347                 return -ENOMEM;
2348         }
2349
2350         return 0;
2351 }
2352
2353 int is_tracing_stopped(void)
2354 {
2355         return global_trace.stop_count;
2356 }
2357
2358 /**
2359  * tracing_start - quick start of the tracer
2360  *
2361  * If tracing is enabled but was stopped by tracing_stop,
2362  * this will start the tracer back up.
2363  */
2364 void tracing_start(void)
2365 {
2366         struct trace_buffer *buffer;
2367         unsigned long flags;
2368
2369         if (tracing_disabled)
2370                 return;
2371
2372         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2373         if (--global_trace.stop_count) {
2374                 if (global_trace.stop_count < 0) {
2375                         /* Someone screwed up their debugging */
2376                         WARN_ON_ONCE(1);
2377                         global_trace.stop_count = 0;
2378                 }
2379                 goto out;
2380         }
2381
2382         /* Prevent the buffers from switching */
2383         arch_spin_lock(&global_trace.max_lock);
2384
2385         buffer = global_trace.array_buffer.buffer;
2386         if (buffer)
2387                 ring_buffer_record_enable(buffer);
2388
2389 #ifdef CONFIG_TRACER_MAX_TRACE
2390         buffer = global_trace.max_buffer.buffer;
2391         if (buffer)
2392                 ring_buffer_record_enable(buffer);
2393 #endif
2394
2395         arch_spin_unlock(&global_trace.max_lock);
2396
2397  out:
2398         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2399 }
2400
2401 static void tracing_start_tr(struct trace_array *tr)
2402 {
2403         struct trace_buffer *buffer;
2404         unsigned long flags;
2405
2406         if (tracing_disabled)
2407                 return;
2408
2409         /* If global, we need to also start the max tracer */
2410         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2411                 return tracing_start();
2412
2413         raw_spin_lock_irqsave(&tr->start_lock, flags);
2414
2415         if (--tr->stop_count) {
2416                 if (tr->stop_count < 0) {
2417                         /* Someone screwed up their debugging */
2418                         WARN_ON_ONCE(1);
2419                         tr->stop_count = 0;
2420                 }
2421                 goto out;
2422         }
2423
2424         buffer = tr->array_buffer.buffer;
2425         if (buffer)
2426                 ring_buffer_record_enable(buffer);
2427
2428  out:
2429         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2430 }
2431
2432 /**
2433  * tracing_stop - quick stop of the tracer
2434  *
2435  * Light weight way to stop tracing. Use in conjunction with
2436  * tracing_start.
2437  */
2438 void tracing_stop(void)
2439 {
2440         struct trace_buffer *buffer;
2441         unsigned long flags;
2442
2443         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2444         if (global_trace.stop_count++)
2445                 goto out;
2446
2447         /* Prevent the buffers from switching */
2448         arch_spin_lock(&global_trace.max_lock);
2449
2450         buffer = global_trace.array_buffer.buffer;
2451         if (buffer)
2452                 ring_buffer_record_disable(buffer);
2453
2454 #ifdef CONFIG_TRACER_MAX_TRACE
2455         buffer = global_trace.max_buffer.buffer;
2456         if (buffer)
2457                 ring_buffer_record_disable(buffer);
2458 #endif
2459
2460         arch_spin_unlock(&global_trace.max_lock);
2461
2462  out:
2463         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2464 }
2465
2466 static void tracing_stop_tr(struct trace_array *tr)
2467 {
2468         struct trace_buffer *buffer;
2469         unsigned long flags;
2470
2471         /* If global, we need to also stop the max tracer */
2472         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2473                 return tracing_stop();
2474
2475         raw_spin_lock_irqsave(&tr->start_lock, flags);
2476         if (tr->stop_count++)
2477                 goto out;
2478
2479         buffer = tr->array_buffer.buffer;
2480         if (buffer)
2481                 ring_buffer_record_disable(buffer);
2482
2483  out:
2484         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2485 }
2486
2487 static int trace_save_cmdline(struct task_struct *tsk)
2488 {
2489         unsigned tpid, idx;
2490
2491         /* treat recording of idle task as a success */
2492         if (!tsk->pid)
2493                 return 1;
2494
2495         tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
2496
2497         /*
2498          * It's not the end of the world if we don't get
2499          * the lock, but we also don't want to spin
2500          * nor do we want to disable interrupts,
2501          * so if we miss here, then better luck next time.
2502          *
2503          * This is called within the scheduler and wake up, so interrupts
2504          * had better been disabled and run queue lock been held.
2505          */
2506         lockdep_assert_preemption_disabled();
2507         if (!arch_spin_trylock(&trace_cmdline_lock))
2508                 return 0;
2509
2510         idx = savedcmd->map_pid_to_cmdline[tpid];
2511         if (idx == NO_CMDLINE_MAP) {
2512                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2513
2514                 savedcmd->map_pid_to_cmdline[tpid] = idx;
2515                 savedcmd->cmdline_idx = idx;
2516         }
2517
2518         savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2519         set_cmdline(idx, tsk->comm);
2520
2521         arch_spin_unlock(&trace_cmdline_lock);
2522
2523         return 1;
2524 }
2525
2526 static void __trace_find_cmdline(int pid, char comm[])
2527 {
2528         unsigned map;
2529         int tpid;
2530
2531         if (!pid) {
2532                 strcpy(comm, "<idle>");
2533                 return;
2534         }
2535
2536         if (WARN_ON_ONCE(pid < 0)) {
2537                 strcpy(comm, "<XXX>");
2538                 return;
2539         }
2540
2541         tpid = pid & (PID_MAX_DEFAULT - 1);
2542         map = savedcmd->map_pid_to_cmdline[tpid];
2543         if (map != NO_CMDLINE_MAP) {
2544                 tpid = savedcmd->map_cmdline_to_pid[map];
2545                 if (tpid == pid) {
2546                         strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2547                         return;
2548                 }
2549         }
2550         strcpy(comm, "<...>");
2551 }
2552
2553 void trace_find_cmdline(int pid, char comm[])
2554 {
2555         preempt_disable();
2556         arch_spin_lock(&trace_cmdline_lock);
2557
2558         __trace_find_cmdline(pid, comm);
2559
2560         arch_spin_unlock(&trace_cmdline_lock);
2561         preempt_enable();
2562 }
2563
2564 static int *trace_find_tgid_ptr(int pid)
2565 {
2566         /*
2567          * Pairs with the smp_store_release in set_tracer_flag() to ensure that
2568          * if we observe a non-NULL tgid_map then we also observe the correct
2569          * tgid_map_max.
2570          */
2571         int *map = smp_load_acquire(&tgid_map);
2572
2573         if (unlikely(!map || pid > tgid_map_max))
2574                 return NULL;
2575
2576         return &map[pid];
2577 }
2578
2579 int trace_find_tgid(int pid)
2580 {
2581         int *ptr = trace_find_tgid_ptr(pid);
2582
2583         return ptr ? *ptr : 0;
2584 }
2585
2586 static int trace_save_tgid(struct task_struct *tsk)
2587 {
2588         int *ptr;
2589
2590         /* treat recording of idle task as a success */
2591         if (!tsk->pid)
2592                 return 1;
2593
2594         ptr = trace_find_tgid_ptr(tsk->pid);
2595         if (!ptr)
2596                 return 0;
2597
2598         *ptr = tsk->tgid;
2599         return 1;
2600 }
2601
2602 static bool tracing_record_taskinfo_skip(int flags)
2603 {
2604         if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2605                 return true;
2606         if (!__this_cpu_read(trace_taskinfo_save))
2607                 return true;
2608         return false;
2609 }
2610
2611 /**
2612  * tracing_record_taskinfo - record the task info of a task
2613  *
2614  * @task:  task to record
2615  * @flags: TRACE_RECORD_CMDLINE for recording comm
2616  *         TRACE_RECORD_TGID for recording tgid
2617  */
2618 void tracing_record_taskinfo(struct task_struct *task, int flags)
2619 {
2620         bool done;
2621
2622         if (tracing_record_taskinfo_skip(flags))
2623                 return;
2624
2625         /*
2626          * Record as much task information as possible. If some fail, continue
2627          * to try to record the others.
2628          */
2629         done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2630         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2631
2632         /* If recording any information failed, retry again soon. */
2633         if (!done)
2634                 return;
2635
2636         __this_cpu_write(trace_taskinfo_save, false);
2637 }
2638
2639 /**
2640  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2641  *
2642  * @prev: previous task during sched_switch
2643  * @next: next task during sched_switch
2644  * @flags: TRACE_RECORD_CMDLINE for recording comm
2645  *         TRACE_RECORD_TGID for recording tgid
2646  */
2647 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2648                                           struct task_struct *next, int flags)
2649 {
2650         bool done;
2651
2652         if (tracing_record_taskinfo_skip(flags))
2653                 return;
2654
2655         /*
2656          * Record as much task information as possible. If some fail, continue
2657          * to try to record the others.
2658          */
2659         done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2660         done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2661         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2662         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2663
2664         /* If recording any information failed, retry again soon. */
2665         if (!done)
2666                 return;
2667
2668         __this_cpu_write(trace_taskinfo_save, false);
2669 }
2670
2671 /* Helpers to record a specific task information */
2672 void tracing_record_cmdline(struct task_struct *task)
2673 {
2674         tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2675 }
2676
2677 void tracing_record_tgid(struct task_struct *task)
2678 {
2679         tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2680 }
2681
2682 /*
2683  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2684  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2685  * simplifies those functions and keeps them in sync.
2686  */
2687 enum print_line_t trace_handle_return(struct trace_seq *s)
2688 {
2689         return trace_seq_has_overflowed(s) ?
2690                 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2691 }
2692 EXPORT_SYMBOL_GPL(trace_handle_return);
2693
2694 static unsigned short migration_disable_value(void)
2695 {
2696 #if defined(CONFIG_SMP)
2697         return current->migration_disabled;
2698 #else
2699         return 0;
2700 #endif
2701 }
2702
2703 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2704 {
2705         unsigned int trace_flags = irqs_status;
2706         unsigned int pc;
2707
2708         pc = preempt_count();
2709
2710         if (pc & NMI_MASK)
2711                 trace_flags |= TRACE_FLAG_NMI;
2712         if (pc & HARDIRQ_MASK)
2713                 trace_flags |= TRACE_FLAG_HARDIRQ;
2714         if (in_serving_softirq())
2715                 trace_flags |= TRACE_FLAG_SOFTIRQ;
2716         if (softirq_count() >> (SOFTIRQ_SHIFT + 1))
2717                 trace_flags |= TRACE_FLAG_BH_OFF;
2718
2719         if (tif_need_resched())
2720                 trace_flags |= TRACE_FLAG_NEED_RESCHED;
2721         if (test_preempt_need_resched())
2722                 trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2723         return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
2724                 (min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
2725 }
2726
2727 struct ring_buffer_event *
2728 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2729                           int type,
2730                           unsigned long len,
2731                           unsigned int trace_ctx)
2732 {
2733         return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2734 }
2735
2736 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2737 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2738 static int trace_buffered_event_ref;
2739
2740 /**
2741  * trace_buffered_event_enable - enable buffering events
2742  *
2743  * When events are being filtered, it is quicker to use a temporary
2744  * buffer to write the event data into if there's a likely chance
2745  * that it will not be committed. The discard of the ring buffer
2746  * is not as fast as committing, and is much slower than copying
2747  * a commit.
2748  *
2749  * When an event is to be filtered, allocate per cpu buffers to
2750  * write the event data into, and if the event is filtered and discarded
2751  * it is simply dropped, otherwise, the entire data is to be committed
2752  * in one shot.
2753  */
2754 void trace_buffered_event_enable(void)
2755 {
2756         struct ring_buffer_event *event;
2757         struct page *page;
2758         int cpu;
2759
2760         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2761
2762         if (trace_buffered_event_ref++)
2763                 return;
2764
2765         for_each_tracing_cpu(cpu) {
2766                 page = alloc_pages_node(cpu_to_node(cpu),
2767                                         GFP_KERNEL | __GFP_NORETRY, 0);
2768                 if (!page)
2769                         goto failed;
2770
2771                 event = page_address(page);
2772                 memset(event, 0, sizeof(*event));
2773
2774                 per_cpu(trace_buffered_event, cpu) = event;
2775
2776                 preempt_disable();
2777                 if (cpu == smp_processor_id() &&
2778                     __this_cpu_read(trace_buffered_event) !=
2779                     per_cpu(trace_buffered_event, cpu))
2780                         WARN_ON_ONCE(1);
2781                 preempt_enable();
2782         }
2783
2784         return;
2785  failed:
2786         trace_buffered_event_disable();
2787 }
2788
2789 static void enable_trace_buffered_event(void *data)
2790 {
2791         /* Probably not needed, but do it anyway */
2792         smp_rmb();
2793         this_cpu_dec(trace_buffered_event_cnt);
2794 }
2795
2796 static void disable_trace_buffered_event(void *data)
2797 {
2798         this_cpu_inc(trace_buffered_event_cnt);
2799 }
2800
2801 /**
2802  * trace_buffered_event_disable - disable buffering events
2803  *
2804  * When a filter is removed, it is faster to not use the buffered
2805  * events, and to commit directly into the ring buffer. Free up
2806  * the temp buffers when there are no more users. This requires
2807  * special synchronization with current events.
2808  */
2809 void trace_buffered_event_disable(void)
2810 {
2811         int cpu;
2812
2813         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2814
2815         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2816                 return;
2817
2818         if (--trace_buffered_event_ref)
2819                 return;
2820
2821         preempt_disable();
2822         /* For each CPU, set the buffer as used. */
2823         smp_call_function_many(tracing_buffer_mask,
2824                                disable_trace_buffered_event, NULL, 1);
2825         preempt_enable();
2826
2827         /* Wait for all current users to finish */
2828         synchronize_rcu();
2829
2830         for_each_tracing_cpu(cpu) {
2831                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2832                 per_cpu(trace_buffered_event, cpu) = NULL;
2833         }
2834         /*
2835          * Make sure trace_buffered_event is NULL before clearing
2836          * trace_buffered_event_cnt.
2837          */
2838         smp_wmb();
2839
2840         preempt_disable();
2841         /* Do the work on each cpu */
2842         smp_call_function_many(tracing_buffer_mask,
2843                                enable_trace_buffered_event, NULL, 1);
2844         preempt_enable();
2845 }
2846
2847 static struct trace_buffer *temp_buffer;
2848
2849 struct ring_buffer_event *
2850 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2851                           struct trace_event_file *trace_file,
2852                           int type, unsigned long len,
2853                           unsigned int trace_ctx)
2854 {
2855         struct ring_buffer_event *entry;
2856         struct trace_array *tr = trace_file->tr;
2857         int val;
2858
2859         *current_rb = tr->array_buffer.buffer;
2860
2861         if (!tr->no_filter_buffering_ref &&
2862             (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED))) {
2863                 preempt_disable_notrace();
2864                 /*
2865                  * Filtering is on, so try to use the per cpu buffer first.
2866                  * This buffer will simulate a ring_buffer_event,
2867                  * where the type_len is zero and the array[0] will
2868                  * hold the full length.
2869                  * (see include/linux/ring-buffer.h for details on
2870                  *  how the ring_buffer_event is structured).
2871                  *
2872                  * Using a temp buffer during filtering and copying it
2873                  * on a matched filter is quicker than writing directly
2874                  * into the ring buffer and then discarding it when
2875                  * it doesn't match. That is because the discard
2876                  * requires several atomic operations to get right.
2877                  * Copying on match and doing nothing on a failed match
2878                  * is still quicker than no copy on match, but having
2879                  * to discard out of the ring buffer on a failed match.
2880                  */
2881                 if ((entry = __this_cpu_read(trace_buffered_event))) {
2882                         int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2883
2884                         val = this_cpu_inc_return(trace_buffered_event_cnt);
2885
2886                         /*
2887                          * Preemption is disabled, but interrupts and NMIs
2888                          * can still come in now. If that happens after
2889                          * the above increment, then it will have to go
2890                          * back to the old method of allocating the event
2891                          * on the ring buffer, and if the filter fails, it
2892                          * will have to call ring_buffer_discard_commit()
2893                          * to remove it.
2894                          *
2895                          * Need to also check the unlikely case that the
2896                          * length is bigger than the temp buffer size.
2897                          * If that happens, then the reserve is pretty much
2898                          * guaranteed to fail, as the ring buffer currently
2899                          * only allows events less than a page. But that may
2900                          * change in the future, so let the ring buffer reserve
2901                          * handle the failure in that case.
2902                          */
2903                         if (val == 1 && likely(len <= max_len)) {
2904                                 trace_event_setup(entry, type, trace_ctx);
2905                                 entry->array[0] = len;
2906                                 /* Return with preemption disabled */
2907                                 return entry;
2908                         }
2909                         this_cpu_dec(trace_buffered_event_cnt);
2910                 }
2911                 /* __trace_buffer_lock_reserve() disables preemption */
2912                 preempt_enable_notrace();
2913         }
2914
2915         entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2916                                             trace_ctx);
2917         /*
2918          * If tracing is off, but we have triggers enabled
2919          * we still need to look at the event data. Use the temp_buffer
2920          * to store the trace event for the trigger to use. It's recursive
2921          * safe and will not be recorded anywhere.
2922          */
2923         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2924                 *current_rb = temp_buffer;
2925                 entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2926                                                     trace_ctx);
2927         }
2928         return entry;
2929 }
2930 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2931
2932 static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
2933 static DEFINE_MUTEX(tracepoint_printk_mutex);
2934
2935 static void output_printk(struct trace_event_buffer *fbuffer)
2936 {
2937         struct trace_event_call *event_call;
2938         struct trace_event_file *file;
2939         struct trace_event *event;
2940         unsigned long flags;
2941         struct trace_iterator *iter = tracepoint_print_iter;
2942
2943         /* We should never get here if iter is NULL */
2944         if (WARN_ON_ONCE(!iter))
2945                 return;
2946
2947         event_call = fbuffer->trace_file->event_call;
2948         if (!event_call || !event_call->event.funcs ||
2949             !event_call->event.funcs->trace)
2950                 return;
2951
2952         file = fbuffer->trace_file;
2953         if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2954             (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2955              !filter_match_preds(file->filter, fbuffer->entry)))
2956                 return;
2957
2958         event = &fbuffer->trace_file->event_call->event;
2959
2960         raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
2961         trace_seq_init(&iter->seq);
2962         iter->ent = fbuffer->entry;
2963         event_call->event.funcs->trace(iter, 0, event);
2964         trace_seq_putc(&iter->seq, 0);
2965         printk("%s", iter->seq.buffer);
2966
2967         raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2968 }
2969
2970 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2971                              void *buffer, size_t *lenp,
2972                              loff_t *ppos)
2973 {
2974         int save_tracepoint_printk;
2975         int ret;
2976
2977         mutex_lock(&tracepoint_printk_mutex);
2978         save_tracepoint_printk = tracepoint_printk;
2979
2980         ret = proc_dointvec(table, write, buffer, lenp, ppos);
2981
2982         /*
2983          * This will force exiting early, as tracepoint_printk
2984          * is always zero when tracepoint_printk_iter is not allocated
2985          */
2986         if (!tracepoint_print_iter)
2987                 tracepoint_printk = 0;
2988
2989         if (save_tracepoint_printk == tracepoint_printk)
2990                 goto out;
2991
2992         if (tracepoint_printk)
2993                 static_key_enable(&tracepoint_printk_key.key);
2994         else
2995                 static_key_disable(&tracepoint_printk_key.key);
2996
2997  out:
2998         mutex_unlock(&tracepoint_printk_mutex);
2999
3000         return ret;
3001 }
3002
3003 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
3004 {
3005         enum event_trigger_type tt = ETT_NONE;
3006         struct trace_event_file *file = fbuffer->trace_file;
3007
3008         if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
3009                         fbuffer->entry, &tt))
3010                 goto discard;
3011
3012         if (static_key_false(&tracepoint_printk_key.key))
3013                 output_printk(fbuffer);
3014
3015         if (static_branch_unlikely(&trace_event_exports_enabled))
3016                 ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
3017
3018         trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
3019                         fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
3020
3021 discard:
3022         if (tt)
3023                 event_triggers_post_call(file, tt);
3024
3025 }
3026 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
3027
3028 /*
3029  * Skip 3:
3030  *
3031  *   trace_buffer_unlock_commit_regs()
3032  *   trace_event_buffer_commit()
3033  *   trace_event_raw_event_xxx()
3034  */
3035 # define STACK_SKIP 3
3036
3037 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
3038                                      struct trace_buffer *buffer,
3039                                      struct ring_buffer_event *event,
3040                                      unsigned int trace_ctx,
3041                                      struct pt_regs *regs)
3042 {
3043         __buffer_unlock_commit(buffer, event);
3044
3045         /*
3046          * If regs is not set, then skip the necessary functions.
3047          * Note, we can still get here via blktrace, wakeup tracer
3048          * and mmiotrace, but that's ok if they lose a function or
3049          * two. They are not that meaningful.
3050          */
3051         ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
3052         ftrace_trace_userstack(tr, buffer, trace_ctx);
3053 }
3054
3055 /*
3056  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
3057  */
3058 void
3059 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
3060                                    struct ring_buffer_event *event)
3061 {
3062         __buffer_unlock_commit(buffer, event);
3063 }
3064
3065 void
3066 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
3067                parent_ip, unsigned int trace_ctx)
3068 {
3069         struct trace_event_call *call = &event_function;
3070         struct trace_buffer *buffer = tr->array_buffer.buffer;
3071         struct ring_buffer_event *event;
3072         struct ftrace_entry *entry;
3073
3074         event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
3075                                             trace_ctx);
3076         if (!event)
3077                 return;
3078         entry   = ring_buffer_event_data(event);
3079         entry->ip                       = ip;
3080         entry->parent_ip                = parent_ip;
3081
3082         if (!call_filter_check_discard(call, entry, buffer, event)) {
3083                 if (static_branch_unlikely(&trace_function_exports_enabled))
3084                         ftrace_exports(event, TRACE_EXPORT_FUNCTION);
3085                 __buffer_unlock_commit(buffer, event);
3086         }
3087 }
3088
3089 #ifdef CONFIG_STACKTRACE
3090
3091 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
3092 #define FTRACE_KSTACK_NESTING   4
3093
3094 #define FTRACE_KSTACK_ENTRIES   (PAGE_SIZE / FTRACE_KSTACK_NESTING)
3095
3096 struct ftrace_stack {
3097         unsigned long           calls[FTRACE_KSTACK_ENTRIES];
3098 };
3099
3100
3101 struct ftrace_stacks {
3102         struct ftrace_stack     stacks[FTRACE_KSTACK_NESTING];
3103 };
3104
3105 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
3106 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
3107
3108 static void __ftrace_trace_stack(struct trace_buffer *buffer,
3109                                  unsigned int trace_ctx,
3110                                  int skip, struct pt_regs *regs)
3111 {
3112         struct trace_event_call *call = &event_kernel_stack;
3113         struct ring_buffer_event *event;
3114         unsigned int size, nr_entries;
3115         struct ftrace_stack *fstack;
3116         struct stack_entry *entry;
3117         int stackidx;
3118
3119         /*
3120          * Add one, for this function and the call to save_stack_trace()
3121          * If regs is set, then these functions will not be in the way.
3122          */
3123 #ifndef CONFIG_UNWINDER_ORC
3124         if (!regs)
3125                 skip++;
3126 #endif
3127
3128         preempt_disable_notrace();
3129
3130         stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
3131
3132         /* This should never happen. If it does, yell once and skip */
3133         if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
3134                 goto out;
3135
3136         /*
3137          * The above __this_cpu_inc_return() is 'atomic' cpu local. An
3138          * interrupt will either see the value pre increment or post
3139          * increment. If the interrupt happens pre increment it will have
3140          * restored the counter when it returns.  We just need a barrier to
3141          * keep gcc from moving things around.
3142          */
3143         barrier();
3144
3145         fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
3146         size = ARRAY_SIZE(fstack->calls);
3147
3148         if (regs) {
3149                 nr_entries = stack_trace_save_regs(regs, fstack->calls,
3150                                                    size, skip);
3151         } else {
3152                 nr_entries = stack_trace_save(fstack->calls, size, skip);
3153         }
3154
3155         size = nr_entries * sizeof(unsigned long);
3156         event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
3157                                     (sizeof(*entry) - sizeof(entry->caller)) + size,
3158                                     trace_ctx);
3159         if (!event)
3160                 goto out;
3161         entry = ring_buffer_event_data(event);
3162
3163         memcpy(&entry->caller, fstack->calls, size);
3164         entry->size = nr_entries;
3165
3166         if (!call_filter_check_discard(call, entry, buffer, event))
3167                 __buffer_unlock_commit(buffer, event);
3168
3169  out:
3170         /* Again, don't let gcc optimize things here */
3171         barrier();
3172         __this_cpu_dec(ftrace_stack_reserve);
3173         preempt_enable_notrace();
3174
3175 }
3176
3177 static inline void ftrace_trace_stack(struct trace_array *tr,
3178                                       struct trace_buffer *buffer,
3179                                       unsigned int trace_ctx,
3180                                       int skip, struct pt_regs *regs)
3181 {
3182         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3183                 return;
3184
3185         __ftrace_trace_stack(buffer, trace_ctx, skip, regs);
3186 }
3187
3188 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3189                    int skip)
3190 {
3191         struct trace_buffer *buffer = tr->array_buffer.buffer;
3192
3193         if (rcu_is_watching()) {
3194                 __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3195                 return;
3196         }
3197
3198         if (WARN_ON_ONCE(IS_ENABLED(CONFIG_GENERIC_ENTRY)))
3199                 return;
3200
3201         /*
3202          * When an NMI triggers, RCU is enabled via ct_nmi_enter(),
3203          * but if the above rcu_is_watching() failed, then the NMI
3204          * triggered someplace critical, and ct_irq_enter() should
3205          * not be called from NMI.
3206          */
3207         if (unlikely(in_nmi()))
3208                 return;
3209
3210         ct_irq_enter_irqson();
3211         __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3212         ct_irq_exit_irqson();
3213 }
3214
3215 /**
3216  * trace_dump_stack - record a stack back trace in the trace buffer
3217  * @skip: Number of functions to skip (helper handlers)
3218  */
3219 void trace_dump_stack(int skip)
3220 {
3221         if (tracing_disabled || tracing_selftest_running)
3222                 return;
3223
3224 #ifndef CONFIG_UNWINDER_ORC
3225         /* Skip 1 to skip this function. */
3226         skip++;
3227 #endif
3228         __ftrace_trace_stack(global_trace.array_buffer.buffer,
3229                              tracing_gen_ctx(), skip, NULL);
3230 }
3231 EXPORT_SYMBOL_GPL(trace_dump_stack);
3232
3233 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3234 static DEFINE_PER_CPU(int, user_stack_count);
3235
3236 static void
3237 ftrace_trace_userstack(struct trace_array *tr,
3238                        struct trace_buffer *buffer, unsigned int trace_ctx)
3239 {
3240         struct trace_event_call *call = &event_user_stack;
3241         struct ring_buffer_event *event;
3242         struct userstack_entry *entry;
3243
3244         if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3245                 return;
3246
3247         /*
3248          * NMIs can not handle page faults, even with fix ups.
3249          * The save user stack can (and often does) fault.
3250          */
3251         if (unlikely(in_nmi()))
3252                 return;
3253
3254         /*
3255          * prevent recursion, since the user stack tracing may
3256          * trigger other kernel events.
3257          */
3258         preempt_disable();
3259         if (__this_cpu_read(user_stack_count))
3260                 goto out;
3261
3262         __this_cpu_inc(user_stack_count);
3263
3264         event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3265                                             sizeof(*entry), trace_ctx);
3266         if (!event)
3267                 goto out_drop_count;
3268         entry   = ring_buffer_event_data(event);
3269
3270         entry->tgid             = current->tgid;
3271         memset(&entry->caller, 0, sizeof(entry->caller));
3272
3273         stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3274         if (!call_filter_check_discard(call, entry, buffer, event))
3275                 __buffer_unlock_commit(buffer, event);
3276
3277  out_drop_count:
3278         __this_cpu_dec(user_stack_count);
3279  out:
3280         preempt_enable();
3281 }
3282 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3283 static void ftrace_trace_userstack(struct trace_array *tr,
3284                                    struct trace_buffer *buffer,
3285                                    unsigned int trace_ctx)
3286 {
3287 }
3288 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3289
3290 #endif /* CONFIG_STACKTRACE */
3291
3292 static inline void
3293 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3294                           unsigned long long delta)
3295 {
3296         entry->bottom_delta_ts = delta & U32_MAX;
3297         entry->top_delta_ts = (delta >> 32);
3298 }
3299
3300 void trace_last_func_repeats(struct trace_array *tr,
3301                              struct trace_func_repeats *last_info,
3302                              unsigned int trace_ctx)
3303 {
3304         struct trace_buffer *buffer = tr->array_buffer.buffer;
3305         struct func_repeats_entry *entry;
3306         struct ring_buffer_event *event;
3307         u64 delta;
3308
3309         event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3310                                             sizeof(*entry), trace_ctx);
3311         if (!event)
3312                 return;
3313
3314         delta = ring_buffer_event_time_stamp(buffer, event) -
3315                 last_info->ts_last_call;
3316
3317         entry = ring_buffer_event_data(event);
3318         entry->ip = last_info->ip;
3319         entry->parent_ip = last_info->parent_ip;
3320         entry->count = last_info->count;
3321         func_repeats_set_delta_ts(entry, delta);
3322
3323         __buffer_unlock_commit(buffer, event);
3324 }
3325
3326 /* created for use with alloc_percpu */
3327 struct trace_buffer_struct {
3328         int nesting;
3329         char buffer[4][TRACE_BUF_SIZE];
3330 };
3331
3332 static struct trace_buffer_struct __percpu *trace_percpu_buffer;
3333
3334 /*
3335  * This allows for lockless recording.  If we're nested too deeply, then
3336  * this returns NULL.
3337  */
3338 static char *get_trace_buf(void)
3339 {
3340         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3341
3342         if (!trace_percpu_buffer || buffer->nesting >= 4)
3343                 return NULL;
3344
3345         buffer->nesting++;
3346
3347         /* Interrupts must see nesting incremented before we use the buffer */
3348         barrier();
3349         return &buffer->buffer[buffer->nesting - 1][0];
3350 }
3351
3352 static void put_trace_buf(void)
3353 {
3354         /* Don't let the decrement of nesting leak before this */
3355         barrier();
3356         this_cpu_dec(trace_percpu_buffer->nesting);
3357 }
3358
3359 static int alloc_percpu_trace_buffer(void)
3360 {
3361         struct trace_buffer_struct __percpu *buffers;
3362
3363         if (trace_percpu_buffer)
3364                 return 0;
3365
3366         buffers = alloc_percpu(struct trace_buffer_struct);
3367         if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3368                 return -ENOMEM;
3369
3370         trace_percpu_buffer = buffers;
3371         return 0;
3372 }
3373
3374 static int buffers_allocated;
3375
3376 void trace_printk_init_buffers(void)
3377 {
3378         if (buffers_allocated)
3379                 return;
3380
3381         if (alloc_percpu_trace_buffer())
3382                 return;
3383
3384         /* trace_printk() is for debug use only. Don't use it in production. */
3385
3386         pr_warn("\n");
3387         pr_warn("**********************************************************\n");
3388         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3389         pr_warn("**                                                      **\n");
3390         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3391         pr_warn("**                                                      **\n");
3392         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3393         pr_warn("** unsafe for production use.                           **\n");
3394         pr_warn("**                                                      **\n");
3395         pr_warn("** If you see this message and you are not debugging    **\n");
3396         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3397         pr_warn("**                                                      **\n");
3398         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3399         pr_warn("**********************************************************\n");
3400
3401         /* Expand the buffers to set size */
3402         tracing_update_buffers();
3403
3404         buffers_allocated = 1;
3405
3406         /*
3407          * trace_printk_init_buffers() can be called by modules.
3408          * If that happens, then we need to start cmdline recording
3409          * directly here. If the global_trace.buffer is already
3410          * allocated here, then this was called by module code.
3411          */
3412         if (global_trace.array_buffer.buffer)
3413                 tracing_start_cmdline_record();
3414 }
3415 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3416
3417 void trace_printk_start_comm(void)
3418 {
3419         /* Start tracing comms if trace printk is set */
3420         if (!buffers_allocated)
3421                 return;
3422         tracing_start_cmdline_record();
3423 }
3424
3425 static void trace_printk_start_stop_comm(int enabled)
3426 {
3427         if (!buffers_allocated)
3428                 return;
3429
3430         if (enabled)
3431                 tracing_start_cmdline_record();
3432         else
3433                 tracing_stop_cmdline_record();
3434 }
3435
3436 /**
3437  * trace_vbprintk - write binary msg to tracing buffer
3438  * @ip:    The address of the caller
3439  * @fmt:   The string format to write to the buffer
3440  * @args:  Arguments for @fmt
3441  */
3442 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3443 {
3444         struct trace_event_call *call = &event_bprint;
3445         struct ring_buffer_event *event;
3446         struct trace_buffer *buffer;
3447         struct trace_array *tr = &global_trace;
3448         struct bprint_entry *entry;
3449         unsigned int trace_ctx;
3450         char *tbuffer;
3451         int len = 0, size;
3452
3453         if (unlikely(tracing_selftest_running || tracing_disabled))
3454                 return 0;
3455
3456         /* Don't pollute graph traces with trace_vprintk internals */
3457         pause_graph_tracing();
3458
3459         trace_ctx = tracing_gen_ctx();
3460         preempt_disable_notrace();
3461
3462         tbuffer = get_trace_buf();
3463         if (!tbuffer) {
3464                 len = 0;
3465                 goto out_nobuffer;
3466         }
3467
3468         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3469
3470         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3471                 goto out_put;
3472
3473         size = sizeof(*entry) + sizeof(u32) * len;
3474         buffer = tr->array_buffer.buffer;
3475         ring_buffer_nest_start(buffer);
3476         event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3477                                             trace_ctx);
3478         if (!event)
3479                 goto out;
3480         entry = ring_buffer_event_data(event);
3481         entry->ip                       = ip;
3482         entry->fmt                      = fmt;
3483
3484         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3485         if (!call_filter_check_discard(call, entry, buffer, event)) {
3486                 __buffer_unlock_commit(buffer, event);
3487                 ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3488         }
3489
3490 out:
3491         ring_buffer_nest_end(buffer);
3492 out_put:
3493         put_trace_buf();
3494
3495 out_nobuffer:
3496         preempt_enable_notrace();
3497         unpause_graph_tracing();
3498
3499         return len;
3500 }
3501 EXPORT_SYMBOL_GPL(trace_vbprintk);
3502
3503 __printf(3, 0)
3504 static int
3505 __trace_array_vprintk(struct trace_buffer *buffer,
3506                       unsigned long ip, const char *fmt, va_list args)
3507 {
3508         struct trace_event_call *call = &event_print;
3509         struct ring_buffer_event *event;
3510         int len = 0, size;
3511         struct print_entry *entry;
3512         unsigned int trace_ctx;
3513         char *tbuffer;
3514
3515         if (tracing_disabled || tracing_selftest_running)
3516                 return 0;
3517
3518         /* Don't pollute graph traces with trace_vprintk internals */
3519         pause_graph_tracing();
3520
3521         trace_ctx = tracing_gen_ctx();
3522         preempt_disable_notrace();
3523
3524
3525         tbuffer = get_trace_buf();
3526         if (!tbuffer) {
3527                 len = 0;
3528                 goto out_nobuffer;
3529         }
3530
3531         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3532
3533         size = sizeof(*entry) + len + 1;
3534         ring_buffer_nest_start(buffer);
3535         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3536                                             trace_ctx);
3537         if (!event)
3538                 goto out;
3539         entry = ring_buffer_event_data(event);
3540         entry->ip = ip;
3541
3542         memcpy(&entry->buf, tbuffer, len + 1);
3543         if (!call_filter_check_discard(call, entry, buffer, event)) {
3544                 __buffer_unlock_commit(buffer, event);
3545                 ftrace_trace_stack(&global_trace, buffer, trace_ctx, 6, NULL);
3546         }
3547
3548 out:
3549         ring_buffer_nest_end(buffer);
3550         put_trace_buf();
3551
3552 out_nobuffer:
3553         preempt_enable_notrace();
3554         unpause_graph_tracing();
3555
3556         return len;
3557 }
3558
3559 __printf(3, 0)
3560 int trace_array_vprintk(struct trace_array *tr,
3561                         unsigned long ip, const char *fmt, va_list args)
3562 {
3563         return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3564 }
3565
3566 /**
3567  * trace_array_printk - Print a message to a specific instance
3568  * @tr: The instance trace_array descriptor
3569  * @ip: The instruction pointer that this is called from.
3570  * @fmt: The format to print (printf format)
3571  *
3572  * If a subsystem sets up its own instance, they have the right to
3573  * printk strings into their tracing instance buffer using this
3574  * function. Note, this function will not write into the top level
3575  * buffer (use trace_printk() for that), as writing into the top level
3576  * buffer should only have events that can be individually disabled.
3577  * trace_printk() is only used for debugging a kernel, and should not
3578  * be ever incorporated in normal use.
3579  *
3580  * trace_array_printk() can be used, as it will not add noise to the
3581  * top level tracing buffer.
3582  *
3583  * Note, trace_array_init_printk() must be called on @tr before this
3584  * can be used.
3585  */
3586 __printf(3, 0)
3587 int trace_array_printk(struct trace_array *tr,
3588                        unsigned long ip, const char *fmt, ...)
3589 {
3590         int ret;
3591         va_list ap;
3592
3593         if (!tr)
3594                 return -ENOENT;
3595
3596         /* This is only allowed for created instances */
3597         if (tr == &global_trace)
3598                 return 0;
3599
3600         if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3601                 return 0;
3602
3603         va_start(ap, fmt);
3604         ret = trace_array_vprintk(tr, ip, fmt, ap);
3605         va_end(ap);
3606         return ret;
3607 }
3608 EXPORT_SYMBOL_GPL(trace_array_printk);
3609
3610 /**
3611  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3612  * @tr: The trace array to initialize the buffers for
3613  *
3614  * As trace_array_printk() only writes into instances, they are OK to
3615  * have in the kernel (unlike trace_printk()). This needs to be called
3616  * before trace_array_printk() can be used on a trace_array.
3617  */
3618 int trace_array_init_printk(struct trace_array *tr)
3619 {
3620         if (!tr)
3621                 return -ENOENT;
3622
3623         /* This is only allowed for created instances */
3624         if (tr == &global_trace)
3625                 return -EINVAL;
3626
3627         return alloc_percpu_trace_buffer();
3628 }
3629 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3630
3631 __printf(3, 4)
3632 int trace_array_printk_buf(struct trace_buffer *buffer,
3633                            unsigned long ip, const char *fmt, ...)
3634 {
3635         int ret;
3636         va_list ap;
3637
3638         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3639                 return 0;
3640
3641         va_start(ap, fmt);
3642         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3643         va_end(ap);
3644         return ret;
3645 }
3646
3647 __printf(2, 0)
3648 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3649 {
3650         return trace_array_vprintk(&global_trace, ip, fmt, args);
3651 }
3652 EXPORT_SYMBOL_GPL(trace_vprintk);
3653
3654 static void trace_iterator_increment(struct trace_iterator *iter)
3655 {
3656         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3657
3658         iter->idx++;
3659         if (buf_iter)
3660                 ring_buffer_iter_advance(buf_iter);
3661 }
3662
3663 static struct trace_entry *
3664 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3665                 unsigned long *lost_events)
3666 {
3667         struct ring_buffer_event *event;
3668         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3669
3670         if (buf_iter) {
3671                 event = ring_buffer_iter_peek(buf_iter, ts);
3672                 if (lost_events)
3673                         *lost_events = ring_buffer_iter_dropped(buf_iter) ?
3674                                 (unsigned long)-1 : 0;
3675         } else {
3676                 event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3677                                          lost_events);
3678         }
3679
3680         if (event) {
3681                 iter->ent_size = ring_buffer_event_length(event);
3682                 return ring_buffer_event_data(event);
3683         }
3684         iter->ent_size = 0;
3685         return NULL;
3686 }
3687
3688 static struct trace_entry *
3689 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3690                   unsigned long *missing_events, u64 *ent_ts)
3691 {
3692         struct trace_buffer *buffer = iter->array_buffer->buffer;
3693         struct trace_entry *ent, *next = NULL;
3694         unsigned long lost_events = 0, next_lost = 0;
3695         int cpu_file = iter->cpu_file;
3696         u64 next_ts = 0, ts;
3697         int next_cpu = -1;
3698         int next_size = 0;
3699         int cpu;
3700
3701         /*
3702          * If we are in a per_cpu trace file, don't bother by iterating over
3703          * all cpu and peek directly.
3704          */
3705         if (cpu_file > RING_BUFFER_ALL_CPUS) {
3706                 if (ring_buffer_empty_cpu(buffer, cpu_file))
3707                         return NULL;
3708                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3709                 if (ent_cpu)
3710                         *ent_cpu = cpu_file;
3711
3712                 return ent;
3713         }
3714
3715         for_each_tracing_cpu(cpu) {
3716
3717                 if (ring_buffer_empty_cpu(buffer, cpu))
3718                         continue;
3719
3720                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3721
3722                 /*
3723                  * Pick the entry with the smallest timestamp:
3724                  */
3725                 if (ent && (!next || ts < next_ts)) {
3726                         next = ent;
3727                         next_cpu = cpu;
3728                         next_ts = ts;
3729                         next_lost = lost_events;
3730                         next_size = iter->ent_size;
3731                 }
3732         }
3733
3734         iter->ent_size = next_size;
3735
3736         if (ent_cpu)
3737                 *ent_cpu = next_cpu;
3738
3739         if (ent_ts)
3740                 *ent_ts = next_ts;
3741
3742         if (missing_events)
3743                 *missing_events = next_lost;
3744
3745         return next;
3746 }
3747
3748 #define STATIC_FMT_BUF_SIZE     128
3749 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3750
3751 char *trace_iter_expand_format(struct trace_iterator *iter)
3752 {
3753         char *tmp;
3754
3755         /*
3756          * iter->tr is NULL when used with tp_printk, which makes
3757          * this get called where it is not safe to call krealloc().
3758          */
3759         if (!iter->tr || iter->fmt == static_fmt_buf)
3760                 return NULL;
3761
3762         tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3763                        GFP_KERNEL);
3764         if (tmp) {
3765                 iter->fmt_size += STATIC_FMT_BUF_SIZE;
3766                 iter->fmt = tmp;
3767         }
3768
3769         return tmp;
3770 }
3771
3772 /* Returns true if the string is safe to dereference from an event */
3773 static bool trace_safe_str(struct trace_iterator *iter, const char *str,
3774                            bool star, int len)
3775 {
3776         unsigned long addr = (unsigned long)str;
3777         struct trace_event *trace_event;
3778         struct trace_event_call *event;
3779
3780         /* Ignore strings with no length */
3781         if (star && !len)
3782                 return true;
3783
3784         /* OK if part of the event data */
3785         if ((addr >= (unsigned long)iter->ent) &&
3786             (addr < (unsigned long)iter->ent + iter->ent_size))
3787                 return true;
3788
3789         /* OK if part of the temp seq buffer */
3790         if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3791             (addr < (unsigned long)iter->tmp_seq.buffer + PAGE_SIZE))
3792                 return true;
3793
3794         /* Core rodata can not be freed */
3795         if (is_kernel_rodata(addr))
3796                 return true;
3797
3798         if (trace_is_tracepoint_string(str))
3799                 return true;
3800
3801         /*
3802          * Now this could be a module event, referencing core module
3803          * data, which is OK.
3804          */
3805         if (!iter->ent)
3806                 return false;
3807
3808         trace_event = ftrace_find_event(iter->ent->type);
3809         if (!trace_event)
3810                 return false;
3811
3812         event = container_of(trace_event, struct trace_event_call, event);
3813         if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
3814                 return false;
3815
3816         /* Would rather have rodata, but this will suffice */
3817         if (within_module_core(addr, event->module))
3818                 return true;
3819
3820         return false;
3821 }
3822
3823 static const char *show_buffer(struct trace_seq *s)
3824 {
3825         struct seq_buf *seq = &s->seq;
3826
3827         seq_buf_terminate(seq);
3828
3829         return seq->buffer;
3830 }
3831
3832 static DEFINE_STATIC_KEY_FALSE(trace_no_verify);
3833
3834 static int test_can_verify_check(const char *fmt, ...)
3835 {
3836         char buf[16];
3837         va_list ap;
3838         int ret;
3839
3840         /*
3841          * The verifier is dependent on vsnprintf() modifies the va_list
3842          * passed to it, where it is sent as a reference. Some architectures
3843          * (like x86_32) passes it by value, which means that vsnprintf()
3844          * does not modify the va_list passed to it, and the verifier
3845          * would then need to be able to understand all the values that
3846          * vsnprintf can use. If it is passed by value, then the verifier
3847          * is disabled.
3848          */
3849         va_start(ap, fmt);
3850         vsnprintf(buf, 16, "%d", ap);
3851         ret = va_arg(ap, int);
3852         va_end(ap);
3853
3854         return ret;
3855 }
3856
3857 static void test_can_verify(void)
3858 {
3859         if (!test_can_verify_check("%d %d", 0, 1)) {
3860                 pr_info("trace event string verifier disabled\n");
3861                 static_branch_inc(&trace_no_verify);
3862         }
3863 }
3864
3865 /**
3866  * trace_check_vprintf - Check dereferenced strings while writing to the seq buffer
3867  * @iter: The iterator that holds the seq buffer and the event being printed
3868  * @fmt: The format used to print the event
3869  * @ap: The va_list holding the data to print from @fmt.
3870  *
3871  * This writes the data into the @iter->seq buffer using the data from
3872  * @fmt and @ap. If the format has a %s, then the source of the string
3873  * is examined to make sure it is safe to print, otherwise it will
3874  * warn and print "[UNSAFE MEMORY]" in place of the dereferenced string
3875  * pointer.
3876  */
3877 void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
3878                          va_list ap)
3879 {
3880         const char *p = fmt;
3881         const char *str;
3882         int i, j;
3883
3884         if (WARN_ON_ONCE(!fmt))
3885                 return;
3886
3887         if (static_branch_unlikely(&trace_no_verify))
3888                 goto print;
3889
3890         /* Don't bother checking when doing a ftrace_dump() */
3891         if (iter->fmt == static_fmt_buf)
3892                 goto print;
3893
3894         while (*p) {
3895                 bool star = false;
3896                 int len = 0;
3897
3898                 j = 0;
3899
3900                 /* We only care about %s and variants */
3901                 for (i = 0; p[i]; i++) {
3902                         if (i + 1 >= iter->fmt_size) {
3903                                 /*
3904                                  * If we can't expand the copy buffer,
3905                                  * just print it.
3906                                  */
3907                                 if (!trace_iter_expand_format(iter))
3908                                         goto print;
3909                         }
3910
3911                         if (p[i] == '\\' && p[i+1]) {
3912                                 i++;
3913                                 continue;
3914                         }
3915                         if (p[i] == '%') {
3916                                 /* Need to test cases like %08.*s */
3917                                 for (j = 1; p[i+j]; j++) {
3918                                         if (isdigit(p[i+j]) ||
3919                                             p[i+j] == '.')
3920                                                 continue;
3921                                         if (p[i+j] == '*') {
3922                                                 star = true;
3923                                                 continue;
3924                                         }
3925                                         break;
3926                                 }
3927                                 if (p[i+j] == 's')
3928                                         break;
3929                                 star = false;
3930                         }
3931                         j = 0;
3932                 }
3933                 /* If no %s found then just print normally */
3934                 if (!p[i])
3935                         break;
3936
3937                 /* Copy up to the %s, and print that */
3938                 strncpy(iter->fmt, p, i);
3939                 iter->fmt[i] = '\0';
3940                 trace_seq_vprintf(&iter->seq, iter->fmt, ap);
3941
3942                 /*
3943                  * If iter->seq is full, the above call no longer guarantees
3944                  * that ap is in sync with fmt processing, and further calls
3945                  * to va_arg() can return wrong positional arguments.
3946                  *
3947                  * Ensure that ap is no longer used in this case.
3948                  */
3949                 if (iter->seq.full) {
3950                         p = "";
3951                         break;
3952                 }
3953
3954                 if (star)
3955                         len = va_arg(ap, int);
3956
3957                 /* The ap now points to the string data of the %s */
3958                 str = va_arg(ap, const char *);
3959
3960                 /*
3961                  * If you hit this warning, it is likely that the
3962                  * trace event in question used %s on a string that
3963                  * was saved at the time of the event, but may not be
3964                  * around when the trace is read. Use __string(),
3965                  * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3966                  * instead. See samples/trace_events/trace-events-sample.h
3967                  * for reference.
3968                  */
3969                 if (WARN_ONCE(!trace_safe_str(iter, str, star, len),
3970                               "fmt: '%s' current_buffer: '%s'",
3971                               fmt, show_buffer(&iter->seq))) {
3972                         int ret;
3973
3974                         /* Try to safely read the string */
3975                         if (star) {
3976                                 if (len + 1 > iter->fmt_size)
3977                                         len = iter->fmt_size - 1;
3978                                 if (len < 0)
3979                                         len = 0;
3980                                 ret = copy_from_kernel_nofault(iter->fmt, str, len);
3981                                 iter->fmt[len] = 0;
3982                                 star = false;
3983                         } else {
3984                                 ret = strncpy_from_kernel_nofault(iter->fmt, str,
3985                                                                   iter->fmt_size);
3986                         }
3987                         if (ret < 0)
3988                                 trace_seq_printf(&iter->seq, "(0x%px)", str);
3989                         else
3990                                 trace_seq_printf(&iter->seq, "(0x%px:%s)",
3991                                                  str, iter->fmt);
3992                         str = "[UNSAFE-MEMORY]";
3993                         strcpy(iter->fmt, "%s");
3994                 } else {
3995                         strncpy(iter->fmt, p + i, j + 1);
3996                         iter->fmt[j+1] = '\0';
3997                 }
3998                 if (star)
3999                         trace_seq_printf(&iter->seq, iter->fmt, len, str);
4000                 else
4001                         trace_seq_printf(&iter->seq, iter->fmt, str);
4002
4003                 p += i + j + 1;
4004         }
4005  print:
4006         if (*p)
4007                 trace_seq_vprintf(&iter->seq, p, ap);
4008 }
4009
4010 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
4011 {
4012         const char *p, *new_fmt;
4013         char *q;
4014
4015         if (WARN_ON_ONCE(!fmt))
4016                 return fmt;
4017
4018         if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
4019                 return fmt;
4020
4021         p = fmt;
4022         new_fmt = q = iter->fmt;
4023         while (*p) {
4024                 if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
4025                         if (!trace_iter_expand_format(iter))
4026                                 return fmt;
4027
4028                         q += iter->fmt - new_fmt;
4029                         new_fmt = iter->fmt;
4030                 }
4031
4032                 *q++ = *p++;
4033
4034                 /* Replace %p with %px */
4035                 if (p[-1] == '%') {
4036                         if (p[0] == '%') {
4037                                 *q++ = *p++;
4038                         } else if (p[0] == 'p' && !isalnum(p[1])) {
4039                                 *q++ = *p++;
4040                                 *q++ = 'x';
4041                         }
4042                 }
4043         }
4044         *q = '\0';
4045
4046         return new_fmt;
4047 }
4048
4049 #define STATIC_TEMP_BUF_SIZE    128
4050 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
4051
4052 /* Find the next real entry, without updating the iterator itself */
4053 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
4054                                           int *ent_cpu, u64 *ent_ts)
4055 {
4056         /* __find_next_entry will reset ent_size */
4057         int ent_size = iter->ent_size;
4058         struct trace_entry *entry;
4059
4060         /*
4061          * If called from ftrace_dump(), then the iter->temp buffer
4062          * will be the static_temp_buf and not created from kmalloc.
4063          * If the entry size is greater than the buffer, we can
4064          * not save it. Just return NULL in that case. This is only
4065          * used to add markers when two consecutive events' time
4066          * stamps have a large delta. See trace_print_lat_context()
4067          */
4068         if (iter->temp == static_temp_buf &&
4069             STATIC_TEMP_BUF_SIZE < ent_size)
4070                 return NULL;
4071
4072         /*
4073          * The __find_next_entry() may call peek_next_entry(), which may
4074          * call ring_buffer_peek() that may make the contents of iter->ent
4075          * undefined. Need to copy iter->ent now.
4076          */
4077         if (iter->ent && iter->ent != iter->temp) {
4078                 if ((!iter->temp || iter->temp_size < iter->ent_size) &&
4079                     !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
4080                         void *temp;
4081                         temp = kmalloc(iter->ent_size, GFP_KERNEL);
4082                         if (!temp)
4083                                 return NULL;
4084                         kfree(iter->temp);
4085                         iter->temp = temp;
4086                         iter->temp_size = iter->ent_size;
4087                 }
4088                 memcpy(iter->temp, iter->ent, iter->ent_size);
4089                 iter->ent = iter->temp;
4090         }
4091         entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
4092         /* Put back the original ent_size */
4093         iter->ent_size = ent_size;
4094
4095         return entry;
4096 }
4097
4098 /* Find the next real entry, and increment the iterator to the next entry */
4099 void *trace_find_next_entry_inc(struct trace_iterator *iter)
4100 {
4101         iter->ent = __find_next_entry(iter, &iter->cpu,
4102                                       &iter->lost_events, &iter->ts);
4103
4104         if (iter->ent)
4105                 trace_iterator_increment(iter);
4106
4107         return iter->ent ? iter : NULL;
4108 }
4109
4110 static void trace_consume(struct trace_iterator *iter)
4111 {
4112         ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
4113                             &iter->lost_events);
4114 }
4115
4116 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
4117 {
4118         struct trace_iterator *iter = m->private;
4119         int i = (int)*pos;
4120         void *ent;
4121
4122         WARN_ON_ONCE(iter->leftover);
4123
4124         (*pos)++;
4125
4126         /* can't go backwards */
4127         if (iter->idx > i)
4128                 return NULL;
4129
4130         if (iter->idx < 0)
4131                 ent = trace_find_next_entry_inc(iter);
4132         else
4133                 ent = iter;
4134
4135         while (ent && iter->idx < i)
4136                 ent = trace_find_next_entry_inc(iter);
4137
4138         iter->pos = *pos;
4139
4140         return ent;
4141 }
4142
4143 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
4144 {
4145         struct ring_buffer_iter *buf_iter;
4146         unsigned long entries = 0;
4147         u64 ts;
4148
4149         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
4150
4151         buf_iter = trace_buffer_iter(iter, cpu);
4152         if (!buf_iter)
4153                 return;
4154
4155         ring_buffer_iter_reset(buf_iter);
4156
4157         /*
4158          * We could have the case with the max latency tracers
4159          * that a reset never took place on a cpu. This is evident
4160          * by the timestamp being before the start of the buffer.
4161          */
4162         while (ring_buffer_iter_peek(buf_iter, &ts)) {
4163                 if (ts >= iter->array_buffer->time_start)
4164                         break;
4165                 entries++;
4166                 ring_buffer_iter_advance(buf_iter);
4167         }
4168
4169         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
4170 }
4171
4172 /*
4173  * The current tracer is copied to avoid a global locking
4174  * all around.
4175  */
4176 static void *s_start(struct seq_file *m, loff_t *pos)
4177 {
4178         struct trace_iterator *iter = m->private;
4179         struct trace_array *tr = iter->tr;
4180         int cpu_file = iter->cpu_file;
4181         void *p = NULL;
4182         loff_t l = 0;
4183         int cpu;
4184
4185         /*
4186          * copy the tracer to avoid using a global lock all around.
4187          * iter->trace is a copy of current_trace, the pointer to the
4188          * name may be used instead of a strcmp(), as iter->trace->name
4189          * will point to the same string as current_trace->name.
4190          */
4191         mutex_lock(&trace_types_lock);
4192         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
4193                 *iter->trace = *tr->current_trace;
4194         mutex_unlock(&trace_types_lock);
4195
4196 #ifdef CONFIG_TRACER_MAX_TRACE
4197         if (iter->snapshot && iter->trace->use_max_tr)
4198                 return ERR_PTR(-EBUSY);
4199 #endif
4200
4201         if (*pos != iter->pos) {
4202                 iter->ent = NULL;
4203                 iter->cpu = 0;
4204                 iter->idx = -1;
4205
4206                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
4207                         for_each_tracing_cpu(cpu)
4208                                 tracing_iter_reset(iter, cpu);
4209                 } else
4210                         tracing_iter_reset(iter, cpu_file);
4211
4212                 iter->leftover = 0;
4213                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
4214                         ;
4215
4216         } else {
4217                 /*
4218                  * If we overflowed the seq_file before, then we want
4219                  * to just reuse the trace_seq buffer again.
4220                  */
4221                 if (iter->leftover)
4222                         p = iter;
4223                 else {
4224                         l = *pos - 1;
4225                         p = s_next(m, p, &l);
4226                 }
4227         }
4228
4229         trace_event_read_lock();
4230         trace_access_lock(cpu_file);
4231         return p;
4232 }
4233
4234 static void s_stop(struct seq_file *m, void *p)
4235 {
4236         struct trace_iterator *iter = m->private;
4237
4238 #ifdef CONFIG_TRACER_MAX_TRACE
4239         if (iter->snapshot && iter->trace->use_max_tr)
4240                 return;
4241 #endif
4242
4243         trace_access_unlock(iter->cpu_file);
4244         trace_event_read_unlock();
4245 }
4246
4247 static void
4248 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4249                       unsigned long *entries, int cpu)
4250 {
4251         unsigned long count;
4252
4253         count = ring_buffer_entries_cpu(buf->buffer, cpu);
4254         /*
4255          * If this buffer has skipped entries, then we hold all
4256          * entries for the trace and we need to ignore the
4257          * ones before the time stamp.
4258          */
4259         if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4260                 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4261                 /* total is the same as the entries */
4262                 *total = count;
4263         } else
4264                 *total = count +
4265                         ring_buffer_overrun_cpu(buf->buffer, cpu);
4266         *entries = count;
4267 }
4268
4269 static void
4270 get_total_entries(struct array_buffer *buf,
4271                   unsigned long *total, unsigned long *entries)
4272 {
4273         unsigned long t, e;
4274         int cpu;
4275
4276         *total = 0;
4277         *entries = 0;
4278
4279         for_each_tracing_cpu(cpu) {
4280                 get_total_entries_cpu(buf, &t, &e, cpu);
4281                 *total += t;
4282                 *entries += e;
4283         }
4284 }
4285
4286 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4287 {
4288         unsigned long total, entries;
4289
4290         if (!tr)
4291                 tr = &global_trace;
4292
4293         get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4294
4295         return entries;
4296 }
4297
4298 unsigned long trace_total_entries(struct trace_array *tr)
4299 {
4300         unsigned long total, entries;
4301
4302         if (!tr)
4303                 tr = &global_trace;
4304
4305         get_total_entries(&tr->array_buffer, &total, &entries);
4306
4307         return entries;
4308 }
4309
4310 static void print_lat_help_header(struct seq_file *m)
4311 {
4312         seq_puts(m, "#                    _------=> CPU#            \n"
4313                     "#                   / _-----=> irqs-off/BH-disabled\n"
4314                     "#                  | / _----=> need-resched    \n"
4315                     "#                  || / _---=> hardirq/softirq \n"
4316                     "#                  ||| / _--=> preempt-depth   \n"
4317                     "#                  |||| / _-=> migrate-disable \n"
4318                     "#                  ||||| /     delay           \n"
4319                     "#  cmd     pid     |||||| time  |   caller     \n"
4320                     "#     \\   /        ||||||  \\    |    /       \n");
4321 }
4322
4323 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4324 {
4325         unsigned long total;
4326         unsigned long entries;
4327
4328         get_total_entries(buf, &total, &entries);
4329         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
4330                    entries, total, num_online_cpus());
4331         seq_puts(m, "#\n");
4332 }
4333
4334 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4335                                    unsigned int flags)
4336 {
4337         bool tgid = flags & TRACE_ITER_RECORD_TGID;
4338
4339         print_event_info(buf, m);
4340
4341         seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
4342         seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
4343 }
4344
4345 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4346                                        unsigned int flags)
4347 {
4348         bool tgid = flags & TRACE_ITER_RECORD_TGID;
4349         static const char space[] = "            ";
4350         int prec = tgid ? 12 : 2;
4351
4352         print_event_info(buf, m);
4353
4354         seq_printf(m, "#                            %.*s  _-----=> irqs-off/BH-disabled\n", prec, space);
4355         seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
4356         seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
4357         seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
4358         seq_printf(m, "#                            %.*s||| / _-=> migrate-disable\n", prec, space);
4359         seq_printf(m, "#                            %.*s|||| /     delay\n", prec, space);
4360         seq_printf(m, "#           TASK-PID  %.*s CPU#  |||||  TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
4361         seq_printf(m, "#              | |    %.*s   |   |||||     |         |\n", prec, "       |    ");
4362 }
4363
4364 void
4365 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4366 {
4367         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4368         struct array_buffer *buf = iter->array_buffer;
4369         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4370         struct tracer *type = iter->trace;
4371         unsigned long entries;
4372         unsigned long total;
4373         const char *name = type->name;
4374
4375         get_total_entries(buf, &total, &entries);
4376
4377         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4378                    name, UTS_RELEASE);
4379         seq_puts(m, "# -----------------------------------"
4380                  "---------------------------------\n");
4381         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4382                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4383                    nsecs_to_usecs(data->saved_latency),
4384                    entries,
4385                    total,
4386                    buf->cpu,
4387                    preempt_model_none()      ? "server" :
4388                    preempt_model_voluntary() ? "desktop" :
4389                    preempt_model_full()      ? "preempt" :
4390                    preempt_model_rt()        ? "preempt_rt" :
4391                    "unknown",
4392                    /* These are reserved for later use */
4393                    0, 0, 0, 0);
4394 #ifdef CONFIG_SMP
4395         seq_printf(m, " #P:%d)\n", num_online_cpus());
4396 #else
4397         seq_puts(m, ")\n");
4398 #endif
4399         seq_puts(m, "#    -----------------\n");
4400         seq_printf(m, "#    | task: %.16s-%d "
4401                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4402                    data->comm, data->pid,
4403                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4404                    data->policy, data->rt_priority);
4405         seq_puts(m, "#    -----------------\n");
4406
4407         if (data->critical_start) {
4408                 seq_puts(m, "#  => started at: ");
4409                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4410                 trace_print_seq(m, &iter->seq);
4411                 seq_puts(m, "\n#  => ended at:   ");
4412                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4413                 trace_print_seq(m, &iter->seq);
4414                 seq_puts(m, "\n#\n");
4415         }
4416
4417         seq_puts(m, "#\n");
4418 }
4419
4420 static void test_cpu_buff_start(struct trace_iterator *iter)
4421 {
4422         struct trace_seq *s = &iter->seq;
4423         struct trace_array *tr = iter->tr;
4424
4425         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4426                 return;
4427
4428         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4429                 return;
4430
4431         if (cpumask_available(iter->started) &&
4432             cpumask_test_cpu(iter->cpu, iter->started))
4433                 return;
4434
4435         if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4436                 return;
4437
4438         if (cpumask_available(iter->started))
4439                 cpumask_set_cpu(iter->cpu, iter->started);
4440
4441         /* Don't print started cpu buffer for the first entry of the trace */
4442         if (iter->idx > 1)
4443                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4444                                 iter->cpu);
4445 }
4446
4447 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4448 {
4449         struct trace_array *tr = iter->tr;
4450         struct trace_seq *s = &iter->seq;
4451         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4452         struct trace_entry *entry;
4453         struct trace_event *event;
4454
4455         entry = iter->ent;
4456
4457         test_cpu_buff_start(iter);
4458
4459         event = ftrace_find_event(entry->type);
4460
4461         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4462                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4463                         trace_print_lat_context(iter);
4464                 else
4465                         trace_print_context(iter);
4466         }
4467
4468         if (trace_seq_has_overflowed(s))
4469                 return TRACE_TYPE_PARTIAL_LINE;
4470
4471         if (event) {
4472                 if (tr->trace_flags & TRACE_ITER_FIELDS)
4473                         return print_event_fields(iter, event);
4474                 return event->funcs->trace(iter, sym_flags, event);
4475         }
4476
4477         trace_seq_printf(s, "Unknown type %d\n", entry->type);
4478
4479         return trace_handle_return(s);
4480 }
4481
4482 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4483 {
4484         struct trace_array *tr = iter->tr;
4485         struct trace_seq *s = &iter->seq;
4486         struct trace_entry *entry;
4487         struct trace_event *event;
4488
4489         entry = iter->ent;
4490
4491         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4492                 trace_seq_printf(s, "%d %d %llu ",
4493                                  entry->pid, iter->cpu, iter->ts);
4494
4495         if (trace_seq_has_overflowed(s))
4496                 return TRACE_TYPE_PARTIAL_LINE;
4497
4498         event = ftrace_find_event(entry->type);
4499         if (event)
4500                 return event->funcs->raw(iter, 0, event);
4501
4502         trace_seq_printf(s, "%d ?\n", entry->type);
4503
4504         return trace_handle_return(s);
4505 }
4506
4507 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4508 {
4509         struct trace_array *tr = iter->tr;
4510         struct trace_seq *s = &iter->seq;
4511         unsigned char newline = '\n';
4512         struct trace_entry *entry;
4513         struct trace_event *event;
4514
4515         entry = iter->ent;
4516
4517         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4518                 SEQ_PUT_HEX_FIELD(s, entry->pid);
4519                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
4520                 SEQ_PUT_HEX_FIELD(s, iter->ts);
4521                 if (trace_seq_has_overflowed(s))
4522                         return TRACE_TYPE_PARTIAL_LINE;
4523         }
4524
4525         event = ftrace_find_event(entry->type);
4526         if (event) {
4527                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
4528                 if (ret != TRACE_TYPE_HANDLED)
4529                         return ret;
4530         }
4531
4532         SEQ_PUT_FIELD(s, newline);
4533
4534         return trace_handle_return(s);
4535 }
4536
4537 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4538 {
4539         struct trace_array *tr = iter->tr;
4540         struct trace_seq *s = &iter->seq;
4541         struct trace_entry *entry;
4542         struct trace_event *event;
4543
4544         entry = iter->ent;
4545
4546         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4547                 SEQ_PUT_FIELD(s, entry->pid);
4548                 SEQ_PUT_FIELD(s, iter->cpu);
4549                 SEQ_PUT_FIELD(s, iter->ts);
4550                 if (trace_seq_has_overflowed(s))
4551                         return TRACE_TYPE_PARTIAL_LINE;
4552         }
4553
4554         event = ftrace_find_event(entry->type);
4555         return event ? event->funcs->binary(iter, 0, event) :
4556                 TRACE_TYPE_HANDLED;
4557 }
4558
4559 int trace_empty(struct trace_iterator *iter)
4560 {
4561         struct ring_buffer_iter *buf_iter;
4562         int cpu;
4563
4564         /* If we are looking at one CPU buffer, only check that one */
4565         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4566                 cpu = iter->cpu_file;
4567                 buf_iter = trace_buffer_iter(iter, cpu);
4568                 if (buf_iter) {
4569                         if (!ring_buffer_iter_empty(buf_iter))
4570                                 return 0;
4571                 } else {
4572                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4573                                 return 0;
4574                 }
4575                 return 1;
4576         }
4577
4578         for_each_tracing_cpu(cpu) {
4579                 buf_iter = trace_buffer_iter(iter, cpu);
4580                 if (buf_iter) {
4581                         if (!ring_buffer_iter_empty(buf_iter))
4582                                 return 0;
4583                 } else {
4584                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4585                                 return 0;
4586                 }
4587         }
4588
4589         return 1;
4590 }
4591
4592 /*  Called with trace_event_read_lock() held. */
4593 enum print_line_t print_trace_line(struct trace_iterator *iter)
4594 {
4595         struct trace_array *tr = iter->tr;
4596         unsigned long trace_flags = tr->trace_flags;
4597         enum print_line_t ret;
4598
4599         if (iter->lost_events) {
4600                 if (iter->lost_events == (unsigned long)-1)
4601                         trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4602                                          iter->cpu);
4603                 else
4604                         trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4605                                          iter->cpu, iter->lost_events);
4606                 if (trace_seq_has_overflowed(&iter->seq))
4607                         return TRACE_TYPE_PARTIAL_LINE;
4608         }
4609
4610         if (iter->trace && iter->trace->print_line) {
4611                 ret = iter->trace->print_line(iter);
4612                 if (ret != TRACE_TYPE_UNHANDLED)
4613                         return ret;
4614         }
4615
4616         if (iter->ent->type == TRACE_BPUTS &&
4617                         trace_flags & TRACE_ITER_PRINTK &&
4618                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4619                 return trace_print_bputs_msg_only(iter);
4620
4621         if (iter->ent->type == TRACE_BPRINT &&
4622                         trace_flags & TRACE_ITER_PRINTK &&
4623                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4624                 return trace_print_bprintk_msg_only(iter);
4625
4626         if (iter->ent->type == TRACE_PRINT &&
4627                         trace_flags & TRACE_ITER_PRINTK &&
4628                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4629                 return trace_print_printk_msg_only(iter);
4630
4631         if (trace_flags & TRACE_ITER_BIN)
4632                 return print_bin_fmt(iter);
4633
4634         if (trace_flags & TRACE_ITER_HEX)
4635                 return print_hex_fmt(iter);
4636
4637         if (trace_flags & TRACE_ITER_RAW)
4638                 return print_raw_fmt(iter);
4639
4640         return print_trace_fmt(iter);
4641 }
4642
4643 void trace_latency_header(struct seq_file *m)
4644 {
4645         struct trace_iterator *iter = m->private;
4646         struct trace_array *tr = iter->tr;
4647
4648         /* print nothing if the buffers are empty */
4649         if (trace_empty(iter))
4650                 return;
4651
4652         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4653                 print_trace_header(m, iter);
4654
4655         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4656                 print_lat_help_header(m);
4657 }
4658
4659 void trace_default_header(struct seq_file *m)
4660 {
4661         struct trace_iterator *iter = m->private;
4662         struct trace_array *tr = iter->tr;
4663         unsigned long trace_flags = tr->trace_flags;
4664
4665         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4666                 return;
4667
4668         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4669                 /* print nothing if the buffers are empty */
4670                 if (trace_empty(iter))
4671                         return;
4672                 print_trace_header(m, iter);
4673                 if (!(trace_flags & TRACE_ITER_VERBOSE))
4674                         print_lat_help_header(m);
4675         } else {
4676                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4677                         if (trace_flags & TRACE_ITER_IRQ_INFO)
4678                                 print_func_help_header_irq(iter->array_buffer,
4679                                                            m, trace_flags);
4680                         else
4681                                 print_func_help_header(iter->array_buffer, m,
4682                                                        trace_flags);
4683                 }
4684         }
4685 }
4686
4687 static void test_ftrace_alive(struct seq_file *m)
4688 {
4689         if (!ftrace_is_dead())
4690                 return;
4691         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4692                     "#          MAY BE MISSING FUNCTION EVENTS\n");
4693 }
4694
4695 #ifdef CONFIG_TRACER_MAX_TRACE
4696 static void show_snapshot_main_help(struct seq_file *m)
4697 {
4698         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4699                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4700                     "#                      Takes a snapshot of the main buffer.\n"
4701                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4702                     "#                      (Doesn't have to be '2' works with any number that\n"
4703                     "#                       is not a '0' or '1')\n");
4704 }
4705
4706 static void show_snapshot_percpu_help(struct seq_file *m)
4707 {
4708         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4709 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4710         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4711                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
4712 #else
4713         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4714                     "#                     Must use main snapshot file to allocate.\n");
4715 #endif
4716         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4717                     "#                      (Doesn't have to be '2' works with any number that\n"
4718                     "#                       is not a '0' or '1')\n");
4719 }
4720
4721 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4722 {
4723         if (iter->tr->allocated_snapshot)
4724                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4725         else
4726                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4727
4728         seq_puts(m, "# Snapshot commands:\n");
4729         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4730                 show_snapshot_main_help(m);
4731         else
4732                 show_snapshot_percpu_help(m);
4733 }
4734 #else
4735 /* Should never be called */
4736 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4737 #endif
4738
4739 static int s_show(struct seq_file *m, void *v)
4740 {
4741         struct trace_iterator *iter = v;
4742         int ret;
4743
4744         if (iter->ent == NULL) {
4745                 if (iter->tr) {
4746                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
4747                         seq_puts(m, "#\n");
4748                         test_ftrace_alive(m);
4749                 }
4750                 if (iter->snapshot && trace_empty(iter))
4751                         print_snapshot_help(m, iter);
4752                 else if (iter->trace && iter->trace->print_header)
4753                         iter->trace->print_header(m);
4754                 else
4755                         trace_default_header(m);
4756
4757         } else if (iter->leftover) {
4758                 /*
4759                  * If we filled the seq_file buffer earlier, we
4760                  * want to just show it now.
4761                  */
4762                 ret = trace_print_seq(m, &iter->seq);
4763
4764                 /* ret should this time be zero, but you never know */
4765                 iter->leftover = ret;
4766
4767         } else {
4768                 print_trace_line(iter);
4769                 ret = trace_print_seq(m, &iter->seq);
4770                 /*
4771                  * If we overflow the seq_file buffer, then it will
4772                  * ask us for this data again at start up.
4773                  * Use that instead.
4774                  *  ret is 0 if seq_file write succeeded.
4775                  *        -1 otherwise.
4776                  */
4777                 iter->leftover = ret;
4778         }
4779
4780         return 0;
4781 }
4782
4783 /*
4784  * Should be used after trace_array_get(), trace_types_lock
4785  * ensures that i_cdev was already initialized.
4786  */
4787 static inline int tracing_get_cpu(struct inode *inode)
4788 {
4789         if (inode->i_cdev) /* See trace_create_cpu_file() */
4790                 return (long)inode->i_cdev - 1;
4791         return RING_BUFFER_ALL_CPUS;
4792 }
4793
4794 static const struct seq_operations tracer_seq_ops = {
4795         .start          = s_start,
4796         .next           = s_next,
4797         .stop           = s_stop,
4798         .show           = s_show,
4799 };
4800
4801 static struct trace_iterator *
4802 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4803 {
4804         struct trace_array *tr = inode->i_private;
4805         struct trace_iterator *iter;
4806         int cpu;
4807
4808         if (tracing_disabled)
4809                 return ERR_PTR(-ENODEV);
4810
4811         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4812         if (!iter)
4813                 return ERR_PTR(-ENOMEM);
4814
4815         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4816                                     GFP_KERNEL);
4817         if (!iter->buffer_iter)
4818                 goto release;
4819
4820         /*
4821          * trace_find_next_entry() may need to save off iter->ent.
4822          * It will place it into the iter->temp buffer. As most
4823          * events are less than 128, allocate a buffer of that size.
4824          * If one is greater, then trace_find_next_entry() will
4825          * allocate a new buffer to adjust for the bigger iter->ent.
4826          * It's not critical if it fails to get allocated here.
4827          */
4828         iter->temp = kmalloc(128, GFP_KERNEL);
4829         if (iter->temp)
4830                 iter->temp_size = 128;
4831
4832         /*
4833          * trace_event_printf() may need to modify given format
4834          * string to replace %p with %px so that it shows real address
4835          * instead of hash value. However, that is only for the event
4836          * tracing, other tracer may not need. Defer the allocation
4837          * until it is needed.
4838          */
4839         iter->fmt = NULL;
4840         iter->fmt_size = 0;
4841
4842         /*
4843          * We make a copy of the current tracer to avoid concurrent
4844          * changes on it while we are reading.
4845          */
4846         mutex_lock(&trace_types_lock);
4847         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4848         if (!iter->trace)
4849                 goto fail;
4850
4851         *iter->trace = *tr->current_trace;
4852
4853         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4854                 goto fail;
4855
4856         iter->tr = tr;
4857
4858 #ifdef CONFIG_TRACER_MAX_TRACE
4859         /* Currently only the top directory has a snapshot */
4860         if (tr->current_trace->print_max || snapshot)
4861                 iter->array_buffer = &tr->max_buffer;
4862         else
4863 #endif
4864                 iter->array_buffer = &tr->array_buffer;
4865         iter->snapshot = snapshot;
4866         iter->pos = -1;
4867         iter->cpu_file = tracing_get_cpu(inode);
4868         mutex_init(&iter->mutex);
4869
4870         /* Notify the tracer early; before we stop tracing. */
4871         if (iter->trace->open)
4872                 iter->trace->open(iter);
4873
4874         /* Annotate start of buffers if we had overruns */
4875         if (ring_buffer_overruns(iter->array_buffer->buffer))
4876                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
4877
4878         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4879         if (trace_clocks[tr->clock_id].in_ns)
4880                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4881
4882         /*
4883          * If pause-on-trace is enabled, then stop the trace while
4884          * dumping, unless this is the "snapshot" file
4885          */
4886         if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4887                 tracing_stop_tr(tr);
4888
4889         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4890                 for_each_tracing_cpu(cpu) {
4891                         iter->buffer_iter[cpu] =
4892                                 ring_buffer_read_prepare(iter->array_buffer->buffer,
4893                                                          cpu, GFP_KERNEL);
4894                 }
4895                 ring_buffer_read_prepare_sync();
4896                 for_each_tracing_cpu(cpu) {
4897                         ring_buffer_read_start(iter->buffer_iter[cpu]);
4898                         tracing_iter_reset(iter, cpu);
4899                 }
4900         } else {
4901                 cpu = iter->cpu_file;
4902                 iter->buffer_iter[cpu] =
4903                         ring_buffer_read_prepare(iter->array_buffer->buffer,
4904                                                  cpu, GFP_KERNEL);
4905                 ring_buffer_read_prepare_sync();
4906                 ring_buffer_read_start(iter->buffer_iter[cpu]);
4907                 tracing_iter_reset(iter, cpu);
4908         }
4909
4910         mutex_unlock(&trace_types_lock);
4911
4912         return iter;
4913
4914  fail:
4915         mutex_unlock(&trace_types_lock);
4916         kfree(iter->trace);
4917         kfree(iter->temp);
4918         kfree(iter->buffer_iter);
4919 release:
4920         seq_release_private(inode, file);
4921         return ERR_PTR(-ENOMEM);
4922 }
4923
4924 int tracing_open_generic(struct inode *inode, struct file *filp)
4925 {
4926         int ret;
4927
4928         ret = tracing_check_open_get_tr(NULL);
4929         if (ret)
4930                 return ret;
4931
4932         filp->private_data = inode->i_private;
4933         return 0;
4934 }
4935
4936 bool tracing_is_disabled(void)
4937 {
4938         return (tracing_disabled) ? true: false;
4939 }
4940
4941 /*
4942  * Open and update trace_array ref count.
4943  * Must have the current trace_array passed to it.
4944  */
4945 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4946 {
4947         struct trace_array *tr = inode->i_private;
4948         int ret;
4949
4950         ret = tracing_check_open_get_tr(tr);
4951         if (ret)
4952                 return ret;
4953
4954         filp->private_data = inode->i_private;
4955
4956         return 0;
4957 }
4958
4959 static int tracing_mark_open(struct inode *inode, struct file *filp)
4960 {
4961         stream_open(inode, filp);
4962         return tracing_open_generic_tr(inode, filp);
4963 }
4964
4965 static int tracing_release(struct inode *inode, struct file *file)
4966 {
4967         struct trace_array *tr = inode->i_private;
4968         struct seq_file *m = file->private_data;
4969         struct trace_iterator *iter;
4970         int cpu;
4971
4972         if (!(file->f_mode & FMODE_READ)) {
4973                 trace_array_put(tr);
4974                 return 0;
4975         }
4976
4977         /* Writes do not use seq_file */
4978         iter = m->private;
4979         mutex_lock(&trace_types_lock);
4980
4981         for_each_tracing_cpu(cpu) {
4982                 if (iter->buffer_iter[cpu])
4983                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
4984         }
4985
4986         if (iter->trace && iter->trace->close)
4987                 iter->trace->close(iter);
4988
4989         if (!iter->snapshot && tr->stop_count)
4990                 /* reenable tracing if it was previously enabled */
4991                 tracing_start_tr(tr);
4992
4993         __trace_array_put(tr);
4994
4995         mutex_unlock(&trace_types_lock);
4996
4997         mutex_destroy(&iter->mutex);
4998         free_cpumask_var(iter->started);
4999         kfree(iter->fmt);
5000         kfree(iter->temp);
5001         kfree(iter->trace);
5002         kfree(iter->buffer_iter);
5003         seq_release_private(inode, file);
5004
5005         return 0;
5006 }
5007
5008 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
5009 {
5010         struct trace_array *tr = inode->i_private;
5011
5012         trace_array_put(tr);
5013         return 0;
5014 }
5015
5016 static int tracing_single_release_tr(struct inode *inode, struct file *file)
5017 {
5018         struct trace_array *tr = inode->i_private;
5019
5020         trace_array_put(tr);
5021
5022         return single_release(inode, file);
5023 }
5024
5025 static int tracing_open(struct inode *inode, struct file *file)
5026 {
5027         struct trace_array *tr = inode->i_private;
5028         struct trace_iterator *iter;
5029         int ret;
5030
5031         ret = tracing_check_open_get_tr(tr);
5032         if (ret)
5033                 return ret;
5034
5035         /* If this file was open for write, then erase contents */
5036         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
5037                 int cpu = tracing_get_cpu(inode);
5038                 struct array_buffer *trace_buf = &tr->array_buffer;
5039
5040 #ifdef CONFIG_TRACER_MAX_TRACE
5041                 if (tr->current_trace->print_max)
5042                         trace_buf = &tr->max_buffer;
5043 #endif
5044
5045                 if (cpu == RING_BUFFER_ALL_CPUS)
5046                         tracing_reset_online_cpus(trace_buf);
5047                 else
5048                         tracing_reset_cpu(trace_buf, cpu);
5049         }
5050
5051         if (file->f_mode & FMODE_READ) {
5052                 iter = __tracing_open(inode, file, false);
5053                 if (IS_ERR(iter))
5054                         ret = PTR_ERR(iter);
5055                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5056                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
5057         }
5058
5059         if (ret < 0)
5060                 trace_array_put(tr);
5061
5062         return ret;
5063 }
5064
5065 /*
5066  * Some tracers are not suitable for instance buffers.
5067  * A tracer is always available for the global array (toplevel)
5068  * or if it explicitly states that it is.
5069  */
5070 static bool
5071 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
5072 {
5073         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
5074 }
5075
5076 /* Find the next tracer that this trace array may use */
5077 static struct tracer *
5078 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
5079 {
5080         while (t && !trace_ok_for_array(t, tr))
5081                 t = t->next;
5082
5083         return t;
5084 }
5085
5086 static void *
5087 t_next(struct seq_file *m, void *v, loff_t *pos)
5088 {
5089         struct trace_array *tr = m->private;
5090         struct tracer *t = v;
5091
5092         (*pos)++;
5093
5094         if (t)
5095                 t = get_tracer_for_array(tr, t->next);
5096
5097         return t;
5098 }
5099
5100 static void *t_start(struct seq_file *m, loff_t *pos)
5101 {
5102         struct trace_array *tr = m->private;
5103         struct tracer *t;
5104         loff_t l = 0;
5105
5106         mutex_lock(&trace_types_lock);
5107
5108         t = get_tracer_for_array(tr, trace_types);
5109         for (; t && l < *pos; t = t_next(m, t, &l))
5110                         ;
5111
5112         return t;
5113 }
5114
5115 static void t_stop(struct seq_file *m, void *p)
5116 {
5117         mutex_unlock(&trace_types_lock);
5118 }
5119
5120 static int t_show(struct seq_file *m, void *v)
5121 {
5122         struct tracer *t = v;
5123
5124         if (!t)
5125                 return 0;
5126
5127         seq_puts(m, t->name);
5128         if (t->next)
5129                 seq_putc(m, ' ');
5130         else
5131                 seq_putc(m, '\n');
5132
5133         return 0;
5134 }
5135
5136 static const struct seq_operations show_traces_seq_ops = {
5137         .start          = t_start,
5138         .next           = t_next,
5139         .stop           = t_stop,
5140         .show           = t_show,
5141 };
5142
5143 static int show_traces_open(struct inode *inode, struct file *file)
5144 {
5145         struct trace_array *tr = inode->i_private;
5146         struct seq_file *m;
5147         int ret;
5148
5149         ret = tracing_check_open_get_tr(tr);
5150         if (ret)
5151                 return ret;
5152
5153         ret = seq_open(file, &show_traces_seq_ops);
5154         if (ret) {
5155                 trace_array_put(tr);
5156                 return ret;
5157         }
5158
5159         m = file->private_data;
5160         m->private = tr;
5161
5162         return 0;
5163 }
5164
5165 static int show_traces_release(struct inode *inode, struct file *file)
5166 {
5167         struct trace_array *tr = inode->i_private;
5168
5169         trace_array_put(tr);
5170         return seq_release(inode, file);
5171 }
5172
5173 static ssize_t
5174 tracing_write_stub(struct file *filp, const char __user *ubuf,
5175                    size_t count, loff_t *ppos)
5176 {
5177         return count;
5178 }
5179
5180 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
5181 {
5182         int ret;
5183
5184         if (file->f_mode & FMODE_READ)
5185                 ret = seq_lseek(file, offset, whence);
5186         else
5187                 file->f_pos = ret = 0;
5188
5189         return ret;
5190 }
5191
5192 static const struct file_operations tracing_fops = {
5193         .open           = tracing_open,
5194         .read           = seq_read,
5195         .read_iter      = seq_read_iter,
5196         .splice_read    = generic_file_splice_read,
5197         .write          = tracing_write_stub,
5198         .llseek         = tracing_lseek,
5199         .release        = tracing_release,
5200 };
5201
5202 static const struct file_operations show_traces_fops = {
5203         .open           = show_traces_open,
5204         .read           = seq_read,
5205         .llseek         = seq_lseek,
5206         .release        = show_traces_release,
5207 };
5208
5209 static ssize_t
5210 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5211                      size_t count, loff_t *ppos)
5212 {
5213         struct trace_array *tr = file_inode(filp)->i_private;
5214         char *mask_str;
5215         int len;
5216
5217         len = snprintf(NULL, 0, "%*pb\n",
5218                        cpumask_pr_args(tr->tracing_cpumask)) + 1;
5219         mask_str = kmalloc(len, GFP_KERNEL);
5220         if (!mask_str)
5221                 return -ENOMEM;
5222
5223         len = snprintf(mask_str, len, "%*pb\n",
5224                        cpumask_pr_args(tr->tracing_cpumask));
5225         if (len >= count) {
5226                 count = -EINVAL;
5227                 goto out_err;
5228         }
5229         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5230
5231 out_err:
5232         kfree(mask_str);
5233
5234         return count;
5235 }
5236
5237 int tracing_set_cpumask(struct trace_array *tr,
5238                         cpumask_var_t tracing_cpumask_new)
5239 {
5240         int cpu;
5241
5242         if (!tr)
5243                 return -EINVAL;
5244
5245         local_irq_disable();
5246         arch_spin_lock(&tr->max_lock);
5247         for_each_tracing_cpu(cpu) {
5248                 /*
5249                  * Increase/decrease the disabled counter if we are
5250                  * about to flip a bit in the cpumask:
5251                  */
5252                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5253                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5254                         atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5255                         ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5256                 }
5257                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5258                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5259                         atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5260                         ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5261                 }
5262         }
5263         arch_spin_unlock(&tr->max_lock);
5264         local_irq_enable();
5265
5266         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5267
5268         return 0;
5269 }
5270
5271 static ssize_t
5272 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5273                       size_t count, loff_t *ppos)
5274 {
5275         struct trace_array *tr = file_inode(filp)->i_private;
5276         cpumask_var_t tracing_cpumask_new;
5277         int err;
5278
5279         if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5280                 return -ENOMEM;
5281
5282         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5283         if (err)
5284                 goto err_free;
5285
5286         err = tracing_set_cpumask(tr, tracing_cpumask_new);
5287         if (err)
5288                 goto err_free;
5289
5290         free_cpumask_var(tracing_cpumask_new);
5291
5292         return count;
5293
5294 err_free:
5295         free_cpumask_var(tracing_cpumask_new);
5296
5297         return err;
5298 }
5299
5300 static const struct file_operations tracing_cpumask_fops = {
5301         .open           = tracing_open_generic_tr,
5302         .read           = tracing_cpumask_read,
5303         .write          = tracing_cpumask_write,
5304         .release        = tracing_release_generic_tr,
5305         .llseek         = generic_file_llseek,
5306 };
5307
5308 static int tracing_trace_options_show(struct seq_file *m, void *v)
5309 {
5310         struct tracer_opt *trace_opts;
5311         struct trace_array *tr = m->private;
5312         u32 tracer_flags;
5313         int i;
5314
5315         mutex_lock(&trace_types_lock);
5316         tracer_flags = tr->current_trace->flags->val;
5317         trace_opts = tr->current_trace->flags->opts;
5318
5319         for (i = 0; trace_options[i]; i++) {
5320                 if (tr->trace_flags & (1 << i))
5321                         seq_printf(m, "%s\n", trace_options[i]);
5322                 else
5323                         seq_printf(m, "no%s\n", trace_options[i]);
5324         }
5325
5326         for (i = 0; trace_opts[i].name; i++) {
5327                 if (tracer_flags & trace_opts[i].bit)
5328                         seq_printf(m, "%s\n", trace_opts[i].name);
5329                 else
5330                         seq_printf(m, "no%s\n", trace_opts[i].name);
5331         }
5332         mutex_unlock(&trace_types_lock);
5333
5334         return 0;
5335 }
5336
5337 static int __set_tracer_option(struct trace_array *tr,
5338                                struct tracer_flags *tracer_flags,
5339                                struct tracer_opt *opts, int neg)
5340 {
5341         struct tracer *trace = tracer_flags->trace;
5342         int ret;
5343
5344         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5345         if (ret)
5346                 return ret;
5347
5348         if (neg)
5349                 tracer_flags->val &= ~opts->bit;
5350         else
5351                 tracer_flags->val |= opts->bit;
5352         return 0;
5353 }
5354
5355 /* Try to assign a tracer specific option */
5356 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5357 {
5358         struct tracer *trace = tr->current_trace;
5359         struct tracer_flags *tracer_flags = trace->flags;
5360         struct tracer_opt *opts = NULL;
5361         int i;
5362
5363         for (i = 0; tracer_flags->opts[i].name; i++) {
5364                 opts = &tracer_flags->opts[i];
5365
5366                 if (strcmp(cmp, opts->name) == 0)
5367                         return __set_tracer_option(tr, trace->flags, opts, neg);
5368         }
5369
5370         return -EINVAL;
5371 }
5372
5373 /* Some tracers require overwrite to stay enabled */
5374 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5375 {
5376         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5377                 return -1;
5378
5379         return 0;
5380 }
5381
5382 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5383 {
5384         int *map;
5385
5386         if ((mask == TRACE_ITER_RECORD_TGID) ||
5387             (mask == TRACE_ITER_RECORD_CMD))
5388                 lockdep_assert_held(&event_mutex);
5389
5390         /* do nothing if flag is already set */
5391         if (!!(tr->trace_flags & mask) == !!enabled)
5392                 return 0;
5393
5394         /* Give the tracer a chance to approve the change */
5395         if (tr->current_trace->flag_changed)
5396                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5397                         return -EINVAL;
5398
5399         if (enabled)
5400                 tr->trace_flags |= mask;
5401         else
5402                 tr->trace_flags &= ~mask;
5403
5404         if (mask == TRACE_ITER_RECORD_CMD)
5405                 trace_event_enable_cmd_record(enabled);
5406
5407         if (mask == TRACE_ITER_RECORD_TGID) {
5408                 if (!tgid_map) {
5409                         tgid_map_max = pid_max;
5410                         map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map),
5411                                        GFP_KERNEL);
5412
5413                         /*
5414                          * Pairs with smp_load_acquire() in
5415                          * trace_find_tgid_ptr() to ensure that if it observes
5416                          * the tgid_map we just allocated then it also observes
5417                          * the corresponding tgid_map_max value.
5418                          */
5419                         smp_store_release(&tgid_map, map);
5420                 }
5421                 if (!tgid_map) {
5422                         tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5423                         return -ENOMEM;
5424                 }
5425
5426                 trace_event_enable_tgid_record(enabled);
5427         }
5428
5429         if (mask == TRACE_ITER_EVENT_FORK)
5430                 trace_event_follow_fork(tr, enabled);
5431
5432         if (mask == TRACE_ITER_FUNC_FORK)
5433                 ftrace_pid_follow_fork(tr, enabled);
5434
5435         if (mask == TRACE_ITER_OVERWRITE) {
5436                 ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5437 #ifdef CONFIG_TRACER_MAX_TRACE
5438                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5439 #endif
5440         }
5441
5442         if (mask == TRACE_ITER_PRINTK) {
5443                 trace_printk_start_stop_comm(enabled);
5444                 trace_printk_control(enabled);
5445         }
5446
5447         return 0;
5448 }
5449
5450 int trace_set_options(struct trace_array *tr, char *option)
5451 {
5452         char *cmp;
5453         int neg = 0;
5454         int ret;
5455         size_t orig_len = strlen(option);
5456         int len;
5457
5458         cmp = strstrip(option);
5459
5460         len = str_has_prefix(cmp, "no");
5461         if (len)
5462                 neg = 1;
5463
5464         cmp += len;
5465
5466         mutex_lock(&event_mutex);
5467         mutex_lock(&trace_types_lock);
5468
5469         ret = match_string(trace_options, -1, cmp);
5470         /* If no option could be set, test the specific tracer options */
5471         if (ret < 0)
5472                 ret = set_tracer_option(tr, cmp, neg);
5473         else
5474                 ret = set_tracer_flag(tr, 1 << ret, !neg);
5475
5476         mutex_unlock(&trace_types_lock);
5477         mutex_unlock(&event_mutex);
5478
5479         /*
5480          * If the first trailing whitespace is replaced with '\0' by strstrip,
5481          * turn it back into a space.
5482          */
5483         if (orig_len > strlen(option))
5484                 option[strlen(option)] = ' ';
5485
5486         return ret;
5487 }
5488
5489 static void __init apply_trace_boot_options(void)
5490 {
5491         char *buf = trace_boot_options_buf;
5492         char *option;
5493
5494         while (true) {
5495                 option = strsep(&buf, ",");
5496
5497                 if (!option)
5498                         break;
5499
5500                 if (*option)
5501                         trace_set_options(&global_trace, option);
5502
5503                 /* Put back the comma to allow this to be called again */
5504                 if (buf)
5505                         *(buf - 1) = ',';
5506         }
5507 }
5508
5509 static ssize_t
5510 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5511                         size_t cnt, loff_t *ppos)
5512 {
5513         struct seq_file *m = filp->private_data;
5514         struct trace_array *tr = m->private;
5515         char buf[64];
5516         int ret;
5517
5518         if (cnt >= sizeof(buf))
5519                 return -EINVAL;
5520
5521         if (copy_from_user(buf, ubuf, cnt))
5522                 return -EFAULT;
5523
5524         buf[cnt] = 0;
5525
5526         ret = trace_set_options(tr, buf);
5527         if (ret < 0)
5528                 return ret;
5529
5530         *ppos += cnt;
5531
5532         return cnt;
5533 }
5534
5535 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5536 {
5537         struct trace_array *tr = inode->i_private;
5538         int ret;
5539
5540         ret = tracing_check_open_get_tr(tr);
5541         if (ret)
5542                 return ret;
5543
5544         ret = single_open(file, tracing_trace_options_show, inode->i_private);
5545         if (ret < 0)
5546                 trace_array_put(tr);
5547
5548         return ret;
5549 }
5550
5551 static const struct file_operations tracing_iter_fops = {
5552         .open           = tracing_trace_options_open,
5553         .read           = seq_read,
5554         .llseek         = seq_lseek,
5555         .release        = tracing_single_release_tr,
5556         .write          = tracing_trace_options_write,
5557 };
5558
5559 static const char readme_msg[] =
5560         "tracing mini-HOWTO:\n\n"
5561         "# echo 0 > tracing_on : quick way to disable tracing\n"
5562         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5563         " Important files:\n"
5564         "  trace\t\t\t- The static contents of the buffer\n"
5565         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
5566         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5567         "  current_tracer\t- function and latency tracers\n"
5568         "  available_tracers\t- list of configured tracers for current_tracer\n"
5569         "  error_log\t- error log for failed commands (that support it)\n"
5570         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5571         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5572         "  trace_clock\t\t- change the clock used to order events\n"
5573         "       local:   Per cpu clock but may not be synced across CPUs\n"
5574         "      global:   Synced across CPUs but slows tracing down.\n"
5575         "     counter:   Not a clock, but just an increment\n"
5576         "      uptime:   Jiffy counter from time of boot\n"
5577         "        perf:   Same clock that perf events use\n"
5578 #ifdef CONFIG_X86_64
5579         "     x86-tsc:   TSC cycle counter\n"
5580 #endif
5581         "\n  timestamp_mode\t- view the mode used to timestamp events\n"
5582         "       delta:   Delta difference against a buffer-wide timestamp\n"
5583         "    absolute:   Absolute (standalone) timestamp\n"
5584         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5585         "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5586         "  tracing_cpumask\t- Limit which CPUs to trace\n"
5587         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5588         "\t\t\t  Remove sub-buffer with rmdir\n"
5589         "  trace_options\t\t- Set format or modify how tracing happens\n"
5590         "\t\t\t  Disable an option by prefixing 'no' to the\n"
5591         "\t\t\t  option name\n"
5592         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5593 #ifdef CONFIG_DYNAMIC_FTRACE
5594         "\n  available_filter_functions - list of functions that can be filtered on\n"
5595         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
5596         "\t\t\t  functions\n"
5597         "\t     accepts: func_full_name or glob-matching-pattern\n"
5598         "\t     modules: Can select a group via module\n"
5599         "\t      Format: :mod:<module-name>\n"
5600         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5601         "\t    triggers: a command to perform when function is hit\n"
5602         "\t      Format: <function>:<trigger>[:count]\n"
5603         "\t     trigger: traceon, traceoff\n"
5604         "\t\t      enable_event:<system>:<event>\n"
5605         "\t\t      disable_event:<system>:<event>\n"
5606 #ifdef CONFIG_STACKTRACE
5607         "\t\t      stacktrace\n"
5608 #endif
5609 #ifdef CONFIG_TRACER_SNAPSHOT
5610         "\t\t      snapshot\n"
5611 #endif
5612         "\t\t      dump\n"
5613         "\t\t      cpudump\n"
5614         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5615         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5616         "\t     The first one will disable tracing every time do_fault is hit\n"
5617         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5618         "\t       The first time do trap is hit and it disables tracing, the\n"
5619         "\t       counter will decrement to 2. If tracing is already disabled,\n"
5620         "\t       the counter will not decrement. It only decrements when the\n"
5621         "\t       trigger did work\n"
5622         "\t     To remove trigger without count:\n"
5623         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5624         "\t     To remove trigger with a count:\n"
5625         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5626         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5627         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5628         "\t    modules: Can select a group via module command :mod:\n"
5629         "\t    Does not accept triggers\n"
5630 #endif /* CONFIG_DYNAMIC_FTRACE */
5631 #ifdef CONFIG_FUNCTION_TRACER
5632         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5633         "\t\t    (function)\n"
5634         "  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5635         "\t\t    (function)\n"
5636 #endif
5637 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5638         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5639         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5640         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5641 #endif
5642 #ifdef CONFIG_TRACER_SNAPSHOT
5643         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5644         "\t\t\t  snapshot buffer. Read the contents for more\n"
5645         "\t\t\t  information\n"
5646 #endif
5647 #ifdef CONFIG_STACK_TRACER
5648         "  stack_trace\t\t- Shows the max stack trace when active\n"
5649         "  stack_max_size\t- Shows current max stack size that was traced\n"
5650         "\t\t\t  Write into this file to reset the max size (trigger a\n"
5651         "\t\t\t  new trace)\n"
5652 #ifdef CONFIG_DYNAMIC_FTRACE
5653         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5654         "\t\t\t  traces\n"
5655 #endif
5656 #endif /* CONFIG_STACK_TRACER */
5657 #ifdef CONFIG_DYNAMIC_EVENTS
5658         "  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5659         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5660 #endif
5661 #ifdef CONFIG_KPROBE_EVENTS
5662         "  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5663         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5664 #endif
5665 #ifdef CONFIG_UPROBE_EVENTS
5666         "  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5667         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5668 #endif
5669 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5670         "\t  accepts: event-definitions (one definition per line)\n"
5671         "\t   Format: p[:[<group>/][<event>]] <place> [<args>]\n"
5672         "\t           r[maxactive][:[<group>/][<event>]] <place> [<args>]\n"
5673 #ifdef CONFIG_HIST_TRIGGERS
5674         "\t           s:[synthetic/]<event> <field> [<field>]\n"
5675 #endif
5676         "\t           e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>] [if <filter>]\n"
5677         "\t           -:[<group>/][<event>]\n"
5678 #ifdef CONFIG_KPROBE_EVENTS
5679         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5680   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5681 #endif
5682 #ifdef CONFIG_UPROBE_EVENTS
5683   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5684 #endif
5685         "\t     args: <name>=fetcharg[:type]\n"
5686         "\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
5687 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5688         "\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5689 #else
5690         "\t           $stack<index>, $stack, $retval, $comm,\n"
5691 #endif
5692         "\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5693         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, char, string, symbol,\n"
5694         "\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5695         "\t           symstr, <type>\\[<array-size>\\]\n"
5696 #ifdef CONFIG_HIST_TRIGGERS
5697         "\t    field: <stype> <name>;\n"
5698         "\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5699         "\t           [unsigned] char/int/long\n"
5700 #endif
5701         "\t    efield: For event probes ('e' types), the field is on of the fields\n"
5702         "\t            of the <attached-group>/<attached-event>.\n"
5703 #endif
5704         "  events/\t\t- Directory containing all trace event subsystems:\n"
5705         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5706         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
5707         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5708         "\t\t\t  events\n"
5709         "      filter\t\t- If set, only events passing filter are traced\n"
5710         "  events/<system>/<event>/\t- Directory containing control files for\n"
5711         "\t\t\t  <event>:\n"
5712         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5713         "      filter\t\t- If set, only events passing filter are traced\n"
5714         "      trigger\t\t- If set, a command to perform when event is hit\n"
5715         "\t    Format: <trigger>[:count][if <filter>]\n"
5716         "\t   trigger: traceon, traceoff\n"
5717         "\t            enable_event:<system>:<event>\n"
5718         "\t            disable_event:<system>:<event>\n"
5719 #ifdef CONFIG_HIST_TRIGGERS
5720         "\t            enable_hist:<system>:<event>\n"
5721         "\t            disable_hist:<system>:<event>\n"
5722 #endif
5723 #ifdef CONFIG_STACKTRACE
5724         "\t\t    stacktrace\n"
5725 #endif
5726 #ifdef CONFIG_TRACER_SNAPSHOT
5727         "\t\t    snapshot\n"
5728 #endif
5729 #ifdef CONFIG_HIST_TRIGGERS
5730         "\t\t    hist (see below)\n"
5731 #endif
5732         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5733         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5734         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5735         "\t                  events/block/block_unplug/trigger\n"
5736         "\t   The first disables tracing every time block_unplug is hit.\n"
5737         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5738         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5739         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5740         "\t   Like function triggers, the counter is only decremented if it\n"
5741         "\t    enabled or disabled tracing.\n"
5742         "\t   To remove a trigger without a count:\n"
5743         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
5744         "\t   To remove a trigger with a count:\n"
5745         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5746         "\t   Filters can be ignored when removing a trigger.\n"
5747 #ifdef CONFIG_HIST_TRIGGERS
5748         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5749         "\t    Format: hist:keys=<field1[,field2,...]>\n"
5750         "\t            [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n"
5751         "\t            [:values=<field1[,field2,...]>]\n"
5752         "\t            [:sort=<field1[,field2,...]>]\n"
5753         "\t            [:size=#entries]\n"
5754         "\t            [:pause][:continue][:clear]\n"
5755         "\t            [:name=histname1]\n"
5756         "\t            [:nohitcount]\n"
5757         "\t            [:<handler>.<action>]\n"
5758         "\t            [if <filter>]\n\n"
5759         "\t    Note, special fields can be used as well:\n"
5760         "\t            common_timestamp - to record current timestamp\n"
5761         "\t            common_cpu - to record the CPU the event happened on\n"
5762         "\n"
5763         "\t    A hist trigger variable can be:\n"
5764         "\t        - a reference to a field e.g. x=current_timestamp,\n"
5765         "\t        - a reference to another variable e.g. y=$x,\n"
5766         "\t        - a numeric literal: e.g. ms_per_sec=1000,\n"
5767         "\t        - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n"
5768         "\n"
5769         "\t    hist trigger arithmetic expressions support addition(+), subtraction(-),\n"
5770         "\t    multiplication(*) and division(/) operators. An operand can be either a\n"
5771         "\t    variable reference, field or numeric literal.\n"
5772         "\n"
5773         "\t    When a matching event is hit, an entry is added to a hash\n"
5774         "\t    table using the key(s) and value(s) named, and the value of a\n"
5775         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
5776         "\t    correspond to fields in the event's format description.  Keys\n"
5777         "\t    can be any field, or the special string 'common_stacktrace'.\n"
5778         "\t    Compound keys consisting of up to two fields can be specified\n"
5779         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5780         "\t    fields.  Sort keys consisting of up to two fields can be\n"
5781         "\t    specified using the 'sort' keyword.  The sort direction can\n"
5782         "\t    be modified by appending '.descending' or '.ascending' to a\n"
5783         "\t    sort field.  The 'size' parameter can be used to specify more\n"
5784         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
5785         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
5786         "\t    its histogram data will be shared with other triggers of the\n"
5787         "\t    same name, and trigger hits will update this common data.\n\n"
5788         "\t    Reading the 'hist' file for the event will dump the hash\n"
5789         "\t    table in its entirety to stdout.  If there are multiple hist\n"
5790         "\t    triggers attached to an event, there will be a table for each\n"
5791         "\t    trigger in the output.  The table displayed for a named\n"
5792         "\t    trigger will be the same as any other instance having the\n"
5793         "\t    same name.  The default format used to display a given field\n"
5794         "\t    can be modified by appending any of the following modifiers\n"
5795         "\t    to the field name, as applicable:\n\n"
5796         "\t            .hex        display a number as a hex value\n"
5797         "\t            .sym        display an address as a symbol\n"
5798         "\t            .sym-offset display an address as a symbol and offset\n"
5799         "\t            .execname   display a common_pid as a program name\n"
5800         "\t            .syscall    display a syscall id as a syscall name\n"
5801         "\t            .log2       display log2 value rather than raw number\n"
5802         "\t            .buckets=size  display values in groups of size rather than raw number\n"
5803         "\t            .usecs      display a common_timestamp in microseconds\n"
5804         "\t            .percent    display a number of percentage value\n"
5805         "\t            .graph      display a bar-graph of a value\n\n"
5806         "\t    The 'pause' parameter can be used to pause an existing hist\n"
5807         "\t    trigger or to start a hist trigger but not log any events\n"
5808         "\t    until told to do so.  'continue' can be used to start or\n"
5809         "\t    restart a paused hist trigger.\n\n"
5810         "\t    The 'clear' parameter will clear the contents of a running\n"
5811         "\t    hist trigger and leave its current paused/active state\n"
5812         "\t    unchanged.\n\n"
5813         "\t    The 'nohitcount' (or NOHC) parameter will suppress display of\n"
5814         "\t    raw hitcount in the histogram.\n\n"
5815         "\t    The enable_hist and disable_hist triggers can be used to\n"
5816         "\t    have one event conditionally start and stop another event's\n"
5817         "\t    already-attached hist trigger.  The syntax is analogous to\n"
5818         "\t    the enable_event and disable_event triggers.\n\n"
5819         "\t    Hist trigger handlers and actions are executed whenever a\n"
5820         "\t    a histogram entry is added or updated.  They take the form:\n\n"
5821         "\t        <handler>.<action>\n\n"
5822         "\t    The available handlers are:\n\n"
5823         "\t        onmatch(matching.event)  - invoke on addition or update\n"
5824         "\t        onmax(var)               - invoke if var exceeds current max\n"
5825         "\t        onchange(var)            - invoke action if var changes\n\n"
5826         "\t    The available actions are:\n\n"
5827         "\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5828         "\t        save(field,...)                      - save current event fields\n"
5829 #ifdef CONFIG_TRACER_SNAPSHOT
5830         "\t        snapshot()                           - snapshot the trace buffer\n\n"
5831 #endif
5832 #ifdef CONFIG_SYNTH_EVENTS
5833         "  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5834         "\t  Write into this file to define/undefine new synthetic events.\n"
5835         "\t     example: echo 'myevent u64 lat; char name[]; long[] stack' >> synthetic_events\n"
5836 #endif
5837 #endif
5838 ;
5839
5840 static ssize_t
5841 tracing_readme_read(struct file *filp, char __user *ubuf,
5842                        size_t cnt, loff_t *ppos)
5843 {
5844         return simple_read_from_buffer(ubuf, cnt, ppos,
5845                                         readme_msg, strlen(readme_msg));
5846 }
5847
5848 static const struct file_operations tracing_readme_fops = {
5849         .open           = tracing_open_generic,
5850         .read           = tracing_readme_read,
5851         .llseek         = generic_file_llseek,
5852 };
5853
5854 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5855 {
5856         int pid = ++(*pos);
5857
5858         return trace_find_tgid_ptr(pid);
5859 }
5860
5861 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5862 {
5863         int pid = *pos;
5864
5865         return trace_find_tgid_ptr(pid);
5866 }
5867
5868 static void saved_tgids_stop(struct seq_file *m, void *v)
5869 {
5870 }
5871
5872 static int saved_tgids_show(struct seq_file *m, void *v)
5873 {
5874         int *entry = (int *)v;
5875         int pid = entry - tgid_map;
5876         int tgid = *entry;
5877
5878         if (tgid == 0)
5879                 return SEQ_SKIP;
5880
5881         seq_printf(m, "%d %d\n", pid, tgid);
5882         return 0;
5883 }
5884
5885 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5886         .start          = saved_tgids_start,
5887         .stop           = saved_tgids_stop,
5888         .next           = saved_tgids_next,
5889         .show           = saved_tgids_show,
5890 };
5891
5892 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5893 {
5894         int ret;
5895
5896         ret = tracing_check_open_get_tr(NULL);
5897         if (ret)
5898                 return ret;
5899
5900         return seq_open(filp, &tracing_saved_tgids_seq_ops);
5901 }
5902
5903
5904 static const struct file_operations tracing_saved_tgids_fops = {
5905         .open           = tracing_saved_tgids_open,
5906         .read           = seq_read,
5907         .llseek         = seq_lseek,
5908         .release        = seq_release,
5909 };
5910
5911 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5912 {
5913         unsigned int *ptr = v;
5914
5915         if (*pos || m->count)
5916                 ptr++;
5917
5918         (*pos)++;
5919
5920         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5921              ptr++) {
5922                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5923                         continue;
5924
5925                 return ptr;
5926         }
5927
5928         return NULL;
5929 }
5930
5931 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5932 {
5933         void *v;
5934         loff_t l = 0;
5935
5936         preempt_disable();
5937         arch_spin_lock(&trace_cmdline_lock);
5938
5939         v = &savedcmd->map_cmdline_to_pid[0];
5940         while (l <= *pos) {
5941                 v = saved_cmdlines_next(m, v, &l);
5942                 if (!v)
5943                         return NULL;
5944         }
5945
5946         return v;
5947 }
5948
5949 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5950 {
5951         arch_spin_unlock(&trace_cmdline_lock);
5952         preempt_enable();
5953 }
5954
5955 static int saved_cmdlines_show(struct seq_file *m, void *v)
5956 {
5957         char buf[TASK_COMM_LEN];
5958         unsigned int *pid = v;
5959
5960         __trace_find_cmdline(*pid, buf);
5961         seq_printf(m, "%d %s\n", *pid, buf);
5962         return 0;
5963 }
5964
5965 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5966         .start          = saved_cmdlines_start,
5967         .next           = saved_cmdlines_next,
5968         .stop           = saved_cmdlines_stop,
5969         .show           = saved_cmdlines_show,
5970 };
5971
5972 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5973 {
5974         int ret;
5975
5976         ret = tracing_check_open_get_tr(NULL);
5977         if (ret)
5978                 return ret;
5979
5980         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5981 }
5982
5983 static const struct file_operations tracing_saved_cmdlines_fops = {
5984         .open           = tracing_saved_cmdlines_open,
5985         .read           = seq_read,
5986         .llseek         = seq_lseek,
5987         .release        = seq_release,
5988 };
5989
5990 static ssize_t
5991 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5992                                  size_t cnt, loff_t *ppos)
5993 {
5994         char buf[64];
5995         int r;
5996
5997         preempt_disable();
5998         arch_spin_lock(&trace_cmdline_lock);
5999         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
6000         arch_spin_unlock(&trace_cmdline_lock);
6001         preempt_enable();
6002
6003         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6004 }
6005
6006 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
6007 {
6008         kfree(s->saved_cmdlines);
6009         kfree(s->map_cmdline_to_pid);
6010         kfree(s);
6011 }
6012
6013 static int tracing_resize_saved_cmdlines(unsigned int val)
6014 {
6015         struct saved_cmdlines_buffer *s, *savedcmd_temp;
6016
6017         s = kmalloc(sizeof(*s), GFP_KERNEL);
6018         if (!s)
6019                 return -ENOMEM;
6020
6021         if (allocate_cmdlines_buffer(val, s) < 0) {
6022                 kfree(s);
6023                 return -ENOMEM;
6024         }
6025
6026         preempt_disable();
6027         arch_spin_lock(&trace_cmdline_lock);
6028         savedcmd_temp = savedcmd;
6029         savedcmd = s;
6030         arch_spin_unlock(&trace_cmdline_lock);
6031         preempt_enable();
6032         free_saved_cmdlines_buffer(savedcmd_temp);
6033
6034         return 0;
6035 }
6036
6037 static ssize_t
6038 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
6039                                   size_t cnt, loff_t *ppos)
6040 {
6041         unsigned long val;
6042         int ret;
6043
6044         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6045         if (ret)
6046                 return ret;
6047
6048         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
6049         if (!val || val > PID_MAX_DEFAULT)
6050                 return -EINVAL;
6051
6052         ret = tracing_resize_saved_cmdlines((unsigned int)val);
6053         if (ret < 0)
6054                 return ret;
6055
6056         *ppos += cnt;
6057
6058         return cnt;
6059 }
6060
6061 static const struct file_operations tracing_saved_cmdlines_size_fops = {
6062         .open           = tracing_open_generic,
6063         .read           = tracing_saved_cmdlines_size_read,
6064         .write          = tracing_saved_cmdlines_size_write,
6065 };
6066
6067 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
6068 static union trace_eval_map_item *
6069 update_eval_map(union trace_eval_map_item *ptr)
6070 {
6071         if (!ptr->map.eval_string) {
6072                 if (ptr->tail.next) {
6073                         ptr = ptr->tail.next;
6074                         /* Set ptr to the next real item (skip head) */
6075                         ptr++;
6076                 } else
6077                         return NULL;
6078         }
6079         return ptr;
6080 }
6081
6082 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
6083 {
6084         union trace_eval_map_item *ptr = v;
6085
6086         /*
6087          * Paranoid! If ptr points to end, we don't want to increment past it.
6088          * This really should never happen.
6089          */
6090         (*pos)++;
6091         ptr = update_eval_map(ptr);
6092         if (WARN_ON_ONCE(!ptr))
6093                 return NULL;
6094
6095         ptr++;
6096         ptr = update_eval_map(ptr);
6097
6098         return ptr;
6099 }
6100
6101 static void *eval_map_start(struct seq_file *m, loff_t *pos)
6102 {
6103         union trace_eval_map_item *v;
6104         loff_t l = 0;
6105
6106         mutex_lock(&trace_eval_mutex);
6107
6108         v = trace_eval_maps;
6109         if (v)
6110                 v++;
6111
6112         while (v && l < *pos) {
6113                 v = eval_map_next(m, v, &l);
6114         }
6115
6116         return v;
6117 }
6118
6119 static void eval_map_stop(struct seq_file *m, void *v)
6120 {
6121         mutex_unlock(&trace_eval_mutex);
6122 }
6123
6124 static int eval_map_show(struct seq_file *m, void *v)
6125 {
6126         union trace_eval_map_item *ptr = v;
6127
6128         seq_printf(m, "%s %ld (%s)\n",
6129                    ptr->map.eval_string, ptr->map.eval_value,
6130                    ptr->map.system);
6131
6132         return 0;
6133 }
6134
6135 static const struct seq_operations tracing_eval_map_seq_ops = {
6136         .start          = eval_map_start,
6137         .next           = eval_map_next,
6138         .stop           = eval_map_stop,
6139         .show           = eval_map_show,
6140 };
6141
6142 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
6143 {
6144         int ret;
6145
6146         ret = tracing_check_open_get_tr(NULL);
6147         if (ret)
6148                 return ret;
6149
6150         return seq_open(filp, &tracing_eval_map_seq_ops);
6151 }
6152
6153 static const struct file_operations tracing_eval_map_fops = {
6154         .open           = tracing_eval_map_open,
6155         .read           = seq_read,
6156         .llseek         = seq_lseek,
6157         .release        = seq_release,
6158 };
6159
6160 static inline union trace_eval_map_item *
6161 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
6162 {
6163         /* Return tail of array given the head */
6164         return ptr + ptr->head.length + 1;
6165 }
6166
6167 static void
6168 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
6169                            int len)
6170 {
6171         struct trace_eval_map **stop;
6172         struct trace_eval_map **map;
6173         union trace_eval_map_item *map_array;
6174         union trace_eval_map_item *ptr;
6175
6176         stop = start + len;
6177
6178         /*
6179          * The trace_eval_maps contains the map plus a head and tail item,
6180          * where the head holds the module and length of array, and the
6181          * tail holds a pointer to the next list.
6182          */
6183         map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
6184         if (!map_array) {
6185                 pr_warn("Unable to allocate trace eval mapping\n");
6186                 return;
6187         }
6188
6189         mutex_lock(&trace_eval_mutex);
6190
6191         if (!trace_eval_maps)
6192                 trace_eval_maps = map_array;
6193         else {
6194                 ptr = trace_eval_maps;
6195                 for (;;) {
6196                         ptr = trace_eval_jmp_to_tail(ptr);
6197                         if (!ptr->tail.next)
6198                                 break;
6199                         ptr = ptr->tail.next;
6200
6201                 }
6202                 ptr->tail.next = map_array;
6203         }
6204         map_array->head.mod = mod;
6205         map_array->head.length = len;
6206         map_array++;
6207
6208         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
6209                 map_array->map = **map;
6210                 map_array++;
6211         }
6212         memset(map_array, 0, sizeof(*map_array));
6213
6214         mutex_unlock(&trace_eval_mutex);
6215 }
6216
6217 static void trace_create_eval_file(struct dentry *d_tracer)
6218 {
6219         trace_create_file("eval_map", TRACE_MODE_READ, d_tracer,
6220                           NULL, &tracing_eval_map_fops);
6221 }
6222
6223 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
6224 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
6225 static inline void trace_insert_eval_map_file(struct module *mod,
6226                               struct trace_eval_map **start, int len) { }
6227 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
6228
6229 static void trace_insert_eval_map(struct module *mod,
6230                                   struct trace_eval_map **start, int len)
6231 {
6232         struct trace_eval_map **map;
6233
6234         if (len <= 0)
6235                 return;
6236
6237         map = start;
6238
6239         trace_event_eval_update(map, len);
6240
6241         trace_insert_eval_map_file(mod, start, len);
6242 }
6243
6244 static ssize_t
6245 tracing_set_trace_read(struct file *filp, char __user *ubuf,
6246                        size_t cnt, loff_t *ppos)
6247 {
6248         struct trace_array *tr = filp->private_data;
6249         char buf[MAX_TRACER_SIZE+2];
6250         int r;
6251
6252         mutex_lock(&trace_types_lock);
6253         r = sprintf(buf, "%s\n", tr->current_trace->name);
6254         mutex_unlock(&trace_types_lock);
6255
6256         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6257 }
6258
6259 int tracer_init(struct tracer *t, struct trace_array *tr)
6260 {
6261         tracing_reset_online_cpus(&tr->array_buffer);
6262         return t->init(tr);
6263 }
6264
6265 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
6266 {
6267         int cpu;
6268
6269         for_each_tracing_cpu(cpu)
6270                 per_cpu_ptr(buf->data, cpu)->entries = val;
6271 }
6272
6273 #ifdef CONFIG_TRACER_MAX_TRACE
6274 /* resize @tr's buffer to the size of @size_tr's entries */
6275 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
6276                                         struct array_buffer *size_buf, int cpu_id)
6277 {
6278         int cpu, ret = 0;
6279
6280         if (cpu_id == RING_BUFFER_ALL_CPUS) {
6281                 for_each_tracing_cpu(cpu) {
6282                         ret = ring_buffer_resize(trace_buf->buffer,
6283                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
6284                         if (ret < 0)
6285                                 break;
6286                         per_cpu_ptr(trace_buf->data, cpu)->entries =
6287                                 per_cpu_ptr(size_buf->data, cpu)->entries;
6288                 }
6289         } else {
6290                 ret = ring_buffer_resize(trace_buf->buffer,
6291                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
6292                 if (ret == 0)
6293                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
6294                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
6295         }
6296
6297         return ret;
6298 }
6299 #endif /* CONFIG_TRACER_MAX_TRACE */
6300
6301 static int __tracing_resize_ring_buffer(struct trace_array *tr,
6302                                         unsigned long size, int cpu)
6303 {
6304         int ret;
6305
6306         /*
6307          * If kernel or user changes the size of the ring buffer
6308          * we use the size that was given, and we can forget about
6309          * expanding it later.
6310          */
6311         ring_buffer_expanded = true;
6312
6313         /* May be called before buffers are initialized */
6314         if (!tr->array_buffer.buffer)
6315                 return 0;
6316
6317         ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
6318         if (ret < 0)
6319                 return ret;
6320
6321 #ifdef CONFIG_TRACER_MAX_TRACE
6322         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
6323             !tr->current_trace->use_max_tr)
6324                 goto out;
6325
6326         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
6327         if (ret < 0) {
6328                 int r = resize_buffer_duplicate_size(&tr->array_buffer,
6329                                                      &tr->array_buffer, cpu);
6330                 if (r < 0) {
6331                         /*
6332                          * AARGH! We are left with different
6333                          * size max buffer!!!!
6334                          * The max buffer is our "snapshot" buffer.
6335                          * When a tracer needs a snapshot (one of the
6336                          * latency tracers), it swaps the max buffer
6337                          * with the saved snap shot. We succeeded to
6338                          * update the size of the main buffer, but failed to
6339                          * update the size of the max buffer. But when we tried
6340                          * to reset the main buffer to the original size, we
6341                          * failed there too. This is very unlikely to
6342                          * happen, but if it does, warn and kill all
6343                          * tracing.
6344                          */
6345                         WARN_ON(1);
6346                         tracing_disabled = 1;
6347                 }
6348                 return ret;
6349         }
6350
6351         if (cpu == RING_BUFFER_ALL_CPUS)
6352                 set_buffer_entries(&tr->max_buffer, size);
6353         else
6354                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
6355
6356  out:
6357 #endif /* CONFIG_TRACER_MAX_TRACE */
6358
6359         if (cpu == RING_BUFFER_ALL_CPUS)
6360                 set_buffer_entries(&tr->array_buffer, size);
6361         else
6362                 per_cpu_ptr(tr->array_buffer.data, cpu)->entries = size;
6363
6364         return ret;
6365 }
6366
6367 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6368                                   unsigned long size, int cpu_id)
6369 {
6370         int ret;
6371
6372         mutex_lock(&trace_types_lock);
6373
6374         if (cpu_id != RING_BUFFER_ALL_CPUS) {
6375                 /* make sure, this cpu is enabled in the mask */
6376                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
6377                         ret = -EINVAL;
6378                         goto out;
6379                 }
6380         }
6381
6382         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
6383         if (ret < 0)
6384                 ret = -ENOMEM;
6385
6386 out:
6387         mutex_unlock(&trace_types_lock);
6388
6389         return ret;
6390 }
6391
6392
6393 /**
6394  * tracing_update_buffers - used by tracing facility to expand ring buffers
6395  *
6396  * To save on memory when the tracing is never used on a system with it
6397  * configured in. The ring buffers are set to a minimum size. But once
6398  * a user starts to use the tracing facility, then they need to grow
6399  * to their default size.
6400  *
6401  * This function is to be called when a tracer is about to be used.
6402  */
6403 int tracing_update_buffers(void)
6404 {
6405         int ret = 0;
6406
6407         mutex_lock(&trace_types_lock);
6408         if (!ring_buffer_expanded)
6409                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
6410                                                 RING_BUFFER_ALL_CPUS);
6411         mutex_unlock(&trace_types_lock);
6412
6413         return ret;
6414 }
6415
6416 struct trace_option_dentry;
6417
6418 static void
6419 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6420
6421 /*
6422  * Used to clear out the tracer before deletion of an instance.
6423  * Must have trace_types_lock held.
6424  */
6425 static void tracing_set_nop(struct trace_array *tr)
6426 {
6427         if (tr->current_trace == &nop_trace)
6428                 return;
6429         
6430         tr->current_trace->enabled--;
6431
6432         if (tr->current_trace->reset)
6433                 tr->current_trace->reset(tr);
6434
6435         tr->current_trace = &nop_trace;
6436 }
6437
6438 static bool tracer_options_updated;
6439
6440 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6441 {
6442         /* Only enable if the directory has been created already. */
6443         if (!tr->dir)
6444                 return;
6445
6446         /* Only create trace option files after update_tracer_options finish */
6447         if (!tracer_options_updated)
6448                 return;
6449
6450         create_trace_option_files(tr, t);
6451 }
6452
6453 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6454 {
6455         struct tracer *t;
6456 #ifdef CONFIG_TRACER_MAX_TRACE
6457         bool had_max_tr;
6458 #endif
6459         int ret = 0;
6460
6461         mutex_lock(&trace_types_lock);
6462
6463         if (!ring_buffer_expanded) {
6464                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6465                                                 RING_BUFFER_ALL_CPUS);
6466                 if (ret < 0)
6467                         goto out;
6468                 ret = 0;
6469         }
6470
6471         for (t = trace_types; t; t = t->next) {
6472                 if (strcmp(t->name, buf) == 0)
6473                         break;
6474         }
6475         if (!t) {
6476                 ret = -EINVAL;
6477                 goto out;
6478         }
6479         if (t == tr->current_trace)
6480                 goto out;
6481
6482 #ifdef CONFIG_TRACER_SNAPSHOT
6483         if (t->use_max_tr) {
6484                 local_irq_disable();
6485                 arch_spin_lock(&tr->max_lock);
6486                 if (tr->cond_snapshot)
6487                         ret = -EBUSY;
6488                 arch_spin_unlock(&tr->max_lock);
6489                 local_irq_enable();
6490                 if (ret)
6491                         goto out;
6492         }
6493 #endif
6494         /* Some tracers won't work on kernel command line */
6495         if (system_state < SYSTEM_RUNNING && t->noboot) {
6496                 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6497                         t->name);
6498                 goto out;
6499         }
6500
6501         /* Some tracers are only allowed for the top level buffer */
6502         if (!trace_ok_for_array(t, tr)) {
6503                 ret = -EINVAL;
6504                 goto out;
6505         }
6506
6507         /* If trace pipe files are being read, we can't change the tracer */
6508         if (tr->trace_ref) {
6509                 ret = -EBUSY;
6510                 goto out;
6511         }
6512
6513         trace_branch_disable();
6514
6515         tr->current_trace->enabled--;
6516
6517         if (tr->current_trace->reset)
6518                 tr->current_trace->reset(tr);
6519
6520 #ifdef CONFIG_TRACER_MAX_TRACE
6521         had_max_tr = tr->current_trace->use_max_tr;
6522
6523         /* Current trace needs to be nop_trace before synchronize_rcu */
6524         tr->current_trace = &nop_trace;
6525
6526         if (had_max_tr && !t->use_max_tr) {
6527                 /*
6528                  * We need to make sure that the update_max_tr sees that
6529                  * current_trace changed to nop_trace to keep it from
6530                  * swapping the buffers after we resize it.
6531                  * The update_max_tr is called from interrupts disabled
6532                  * so a synchronized_sched() is sufficient.
6533                  */
6534                 synchronize_rcu();
6535                 free_snapshot(tr);
6536         }
6537
6538         if (t->use_max_tr && !tr->allocated_snapshot) {
6539                 ret = tracing_alloc_snapshot_instance(tr);
6540                 if (ret < 0)
6541                         goto out;
6542         }
6543 #else
6544         tr->current_trace = &nop_trace;
6545 #endif
6546
6547         if (t->init) {
6548                 ret = tracer_init(t, tr);
6549                 if (ret)
6550                         goto out;
6551         }
6552
6553         tr->current_trace = t;
6554         tr->current_trace->enabled++;
6555         trace_branch_enable(tr);
6556  out:
6557         mutex_unlock(&trace_types_lock);
6558
6559         return ret;
6560 }
6561
6562 static ssize_t
6563 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6564                         size_t cnt, loff_t *ppos)
6565 {
6566         struct trace_array *tr = filp->private_data;
6567         char buf[MAX_TRACER_SIZE+1];
6568         char *name;
6569         size_t ret;
6570         int err;
6571
6572         ret = cnt;
6573
6574         if (cnt > MAX_TRACER_SIZE)
6575                 cnt = MAX_TRACER_SIZE;
6576
6577         if (copy_from_user(buf, ubuf, cnt))
6578                 return -EFAULT;
6579
6580         buf[cnt] = 0;
6581
6582         name = strim(buf);
6583
6584         err = tracing_set_tracer(tr, name);
6585         if (err)
6586                 return err;
6587
6588         *ppos += ret;
6589
6590         return ret;
6591 }
6592
6593 static ssize_t
6594 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6595                    size_t cnt, loff_t *ppos)
6596 {
6597         char buf[64];
6598         int r;
6599
6600         r = snprintf(buf, sizeof(buf), "%ld\n",
6601                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6602         if (r > sizeof(buf))
6603                 r = sizeof(buf);
6604         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6605 }
6606
6607 static ssize_t
6608 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6609                     size_t cnt, loff_t *ppos)
6610 {
6611         unsigned long val;
6612         int ret;
6613
6614         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6615         if (ret)
6616                 return ret;
6617
6618         *ptr = val * 1000;
6619
6620         return cnt;
6621 }
6622
6623 static ssize_t
6624 tracing_thresh_read(struct file *filp, char __user *ubuf,
6625                     size_t cnt, loff_t *ppos)
6626 {
6627         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6628 }
6629
6630 static ssize_t
6631 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6632                      size_t cnt, loff_t *ppos)
6633 {
6634         struct trace_array *tr = filp->private_data;
6635         int ret;
6636
6637         mutex_lock(&trace_types_lock);
6638         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6639         if (ret < 0)
6640                 goto out;
6641
6642         if (tr->current_trace->update_thresh) {
6643                 ret = tr->current_trace->update_thresh(tr);
6644                 if (ret < 0)
6645                         goto out;
6646         }
6647
6648         ret = cnt;
6649 out:
6650         mutex_unlock(&trace_types_lock);
6651
6652         return ret;
6653 }
6654
6655 #ifdef CONFIG_TRACER_MAX_TRACE
6656
6657 static ssize_t
6658 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6659                      size_t cnt, loff_t *ppos)
6660 {
6661         return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
6662 }
6663
6664 static ssize_t
6665 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6666                       size_t cnt, loff_t *ppos)
6667 {
6668         return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
6669 }
6670
6671 #endif
6672
6673 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6674 {
6675         struct trace_array *tr = inode->i_private;
6676         struct trace_iterator *iter;
6677         int ret;
6678
6679         ret = tracing_check_open_get_tr(tr);
6680         if (ret)
6681                 return ret;
6682
6683         mutex_lock(&trace_types_lock);
6684
6685         /* create a buffer to store the information to pass to userspace */
6686         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6687         if (!iter) {
6688                 ret = -ENOMEM;
6689                 __trace_array_put(tr);
6690                 goto out;
6691         }
6692
6693         trace_seq_init(&iter->seq);
6694         iter->trace = tr->current_trace;
6695
6696         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6697                 ret = -ENOMEM;
6698                 goto fail;
6699         }
6700
6701         /* trace pipe does not show start of buffer */
6702         cpumask_setall(iter->started);
6703
6704         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6705                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
6706
6707         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6708         if (trace_clocks[tr->clock_id].in_ns)
6709                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6710
6711         iter->tr = tr;
6712         iter->array_buffer = &tr->array_buffer;
6713         iter->cpu_file = tracing_get_cpu(inode);
6714         mutex_init(&iter->mutex);
6715         filp->private_data = iter;
6716
6717         if (iter->trace->pipe_open)
6718                 iter->trace->pipe_open(iter);
6719
6720         nonseekable_open(inode, filp);
6721
6722         tr->trace_ref++;
6723 out:
6724         mutex_unlock(&trace_types_lock);
6725         return ret;
6726
6727 fail:
6728         kfree(iter);
6729         __trace_array_put(tr);
6730         mutex_unlock(&trace_types_lock);
6731         return ret;
6732 }
6733
6734 static int tracing_release_pipe(struct inode *inode, struct file *file)
6735 {
6736         struct trace_iterator *iter = file->private_data;
6737         struct trace_array *tr = inode->i_private;
6738
6739         mutex_lock(&trace_types_lock);
6740
6741         tr->trace_ref--;
6742
6743         if (iter->trace->pipe_close)
6744                 iter->trace->pipe_close(iter);
6745
6746         mutex_unlock(&trace_types_lock);
6747
6748         free_cpumask_var(iter->started);
6749         kfree(iter->fmt);
6750         mutex_destroy(&iter->mutex);
6751         kfree(iter);
6752
6753         trace_array_put(tr);
6754
6755         return 0;
6756 }
6757
6758 static __poll_t
6759 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6760 {
6761         struct trace_array *tr = iter->tr;
6762
6763         /* Iterators are static, they should be filled or empty */
6764         if (trace_buffer_iter(iter, iter->cpu_file))
6765                 return EPOLLIN | EPOLLRDNORM;
6766
6767         if (tr->trace_flags & TRACE_ITER_BLOCK)
6768                 /*
6769                  * Always select as readable when in blocking mode
6770                  */
6771                 return EPOLLIN | EPOLLRDNORM;
6772         else
6773                 return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6774                                              filp, poll_table, iter->tr->buffer_percent);
6775 }
6776
6777 static __poll_t
6778 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6779 {
6780         struct trace_iterator *iter = filp->private_data;
6781
6782         return trace_poll(iter, filp, poll_table);
6783 }
6784
6785 /* Must be called with iter->mutex held. */
6786 static int tracing_wait_pipe(struct file *filp)
6787 {
6788         struct trace_iterator *iter = filp->private_data;
6789         int ret;
6790
6791         while (trace_empty(iter)) {
6792
6793                 if ((filp->f_flags & O_NONBLOCK)) {
6794                         return -EAGAIN;
6795                 }
6796
6797                 /*
6798                  * We block until we read something and tracing is disabled.
6799                  * We still block if tracing is disabled, but we have never
6800                  * read anything. This allows a user to cat this file, and
6801                  * then enable tracing. But after we have read something,
6802                  * we give an EOF when tracing is again disabled.
6803                  *
6804                  * iter->pos will be 0 if we haven't read anything.
6805                  */
6806                 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6807                         break;
6808
6809                 mutex_unlock(&iter->mutex);
6810
6811                 ret = wait_on_pipe(iter, 0);
6812
6813                 mutex_lock(&iter->mutex);
6814
6815                 if (ret)
6816                         return ret;
6817         }
6818
6819         return 1;
6820 }
6821
6822 /*
6823  * Consumer reader.
6824  */
6825 static ssize_t
6826 tracing_read_pipe(struct file *filp, char __user *ubuf,
6827                   size_t cnt, loff_t *ppos)
6828 {
6829         struct trace_iterator *iter = filp->private_data;
6830         ssize_t sret;
6831
6832         /*
6833          * Avoid more than one consumer on a single file descriptor
6834          * This is just a matter of traces coherency, the ring buffer itself
6835          * is protected.
6836          */
6837         mutex_lock(&iter->mutex);
6838
6839         /* return any leftover data */
6840         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6841         if (sret != -EBUSY)
6842                 goto out;
6843
6844         trace_seq_init(&iter->seq);
6845
6846         if (iter->trace->read) {
6847                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6848                 if (sret)
6849                         goto out;
6850         }
6851
6852 waitagain:
6853         sret = tracing_wait_pipe(filp);
6854         if (sret <= 0)
6855                 goto out;
6856
6857         /* stop when tracing is finished */
6858         if (trace_empty(iter)) {
6859                 sret = 0;
6860                 goto out;
6861         }
6862
6863         if (cnt >= PAGE_SIZE)
6864                 cnt = PAGE_SIZE - 1;
6865
6866         /* reset all but tr, trace, and overruns */
6867         trace_iterator_reset(iter);
6868         cpumask_clear(iter->started);
6869         trace_seq_init(&iter->seq);
6870
6871         trace_event_read_lock();
6872         trace_access_lock(iter->cpu_file);
6873         while (trace_find_next_entry_inc(iter) != NULL) {
6874                 enum print_line_t ret;
6875                 int save_len = iter->seq.seq.len;
6876
6877                 ret = print_trace_line(iter);
6878                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6879                         /*
6880                          * If one print_trace_line() fills entire trace_seq in one shot,
6881                          * trace_seq_to_user() will returns -EBUSY because save_len == 0,
6882                          * In this case, we need to consume it, otherwise, loop will peek
6883                          * this event next time, resulting in an infinite loop.
6884                          */
6885                         if (save_len == 0) {
6886                                 iter->seq.full = 0;
6887                                 trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
6888                                 trace_consume(iter);
6889                                 break;
6890                         }
6891
6892                         /* In other cases, don't print partial lines */
6893                         iter->seq.seq.len = save_len;
6894                         break;
6895                 }
6896                 if (ret != TRACE_TYPE_NO_CONSUME)
6897                         trace_consume(iter);
6898
6899                 if (trace_seq_used(&iter->seq) >= cnt)
6900                         break;
6901
6902                 /*
6903                  * Setting the full flag means we reached the trace_seq buffer
6904                  * size and we should leave by partial output condition above.
6905                  * One of the trace_seq_* functions is not used properly.
6906                  */
6907                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6908                           iter->ent->type);
6909         }
6910         trace_access_unlock(iter->cpu_file);
6911         trace_event_read_unlock();
6912
6913         /* Now copy what we have to the user */
6914         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6915         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6916                 trace_seq_init(&iter->seq);
6917
6918         /*
6919          * If there was nothing to send to user, in spite of consuming trace
6920          * entries, go back to wait for more entries.
6921          */
6922         if (sret == -EBUSY)
6923                 goto waitagain;
6924
6925 out:
6926         mutex_unlock(&iter->mutex);
6927
6928         return sret;
6929 }
6930
6931 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6932                                      unsigned int idx)
6933 {
6934         __free_page(spd->pages[idx]);
6935 }
6936
6937 static size_t
6938 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6939 {
6940         size_t count;
6941         int save_len;
6942         int ret;
6943
6944         /* Seq buffer is page-sized, exactly what we need. */
6945         for (;;) {
6946                 save_len = iter->seq.seq.len;
6947                 ret = print_trace_line(iter);
6948
6949                 if (trace_seq_has_overflowed(&iter->seq)) {
6950                         iter->seq.seq.len = save_len;
6951                         break;
6952                 }
6953
6954                 /*
6955                  * This should not be hit, because it should only
6956                  * be set if the iter->seq overflowed. But check it
6957                  * anyway to be safe.
6958                  */
6959                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6960                         iter->seq.seq.len = save_len;
6961                         break;
6962                 }
6963
6964                 count = trace_seq_used(&iter->seq) - save_len;
6965                 if (rem < count) {
6966                         rem = 0;
6967                         iter->seq.seq.len = save_len;
6968                         break;
6969                 }
6970
6971                 if (ret != TRACE_TYPE_NO_CONSUME)
6972                         trace_consume(iter);
6973                 rem -= count;
6974                 if (!trace_find_next_entry_inc(iter))   {
6975                         rem = 0;
6976                         iter->ent = NULL;
6977                         break;
6978                 }
6979         }
6980
6981         return rem;
6982 }
6983
6984 static ssize_t tracing_splice_read_pipe(struct file *filp,
6985                                         loff_t *ppos,
6986                                         struct pipe_inode_info *pipe,
6987                                         size_t len,
6988                                         unsigned int flags)
6989 {
6990         struct page *pages_def[PIPE_DEF_BUFFERS];
6991         struct partial_page partial_def[PIPE_DEF_BUFFERS];
6992         struct trace_iterator *iter = filp->private_data;
6993         struct splice_pipe_desc spd = {
6994                 .pages          = pages_def,
6995                 .partial        = partial_def,
6996                 .nr_pages       = 0, /* This gets updated below. */
6997                 .nr_pages_max   = PIPE_DEF_BUFFERS,
6998                 .ops            = &default_pipe_buf_ops,
6999                 .spd_release    = tracing_spd_release_pipe,
7000         };
7001         ssize_t ret;
7002         size_t rem;
7003         unsigned int i;
7004
7005         if (splice_grow_spd(pipe, &spd))
7006                 return -ENOMEM;
7007
7008         mutex_lock(&iter->mutex);
7009
7010         if (iter->trace->splice_read) {
7011                 ret = iter->trace->splice_read(iter, filp,
7012                                                ppos, pipe, len, flags);
7013                 if (ret)
7014                         goto out_err;
7015         }
7016
7017         ret = tracing_wait_pipe(filp);
7018         if (ret <= 0)
7019                 goto out_err;
7020
7021         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
7022                 ret = -EFAULT;
7023                 goto out_err;
7024         }
7025
7026         trace_event_read_lock();
7027         trace_access_lock(iter->cpu_file);
7028
7029         /* Fill as many pages as possible. */
7030         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
7031                 spd.pages[i] = alloc_page(GFP_KERNEL);
7032                 if (!spd.pages[i])
7033                         break;
7034
7035                 rem = tracing_fill_pipe_page(rem, iter);
7036
7037                 /* Copy the data into the page, so we can start over. */
7038                 ret = trace_seq_to_buffer(&iter->seq,
7039                                           page_address(spd.pages[i]),
7040                                           trace_seq_used(&iter->seq));
7041                 if (ret < 0) {
7042                         __free_page(spd.pages[i]);
7043                         break;
7044                 }
7045                 spd.partial[i].offset = 0;
7046                 spd.partial[i].len = trace_seq_used(&iter->seq);
7047
7048                 trace_seq_init(&iter->seq);
7049         }
7050
7051         trace_access_unlock(iter->cpu_file);
7052         trace_event_read_unlock();
7053         mutex_unlock(&iter->mutex);
7054
7055         spd.nr_pages = i;
7056
7057         if (i)
7058                 ret = splice_to_pipe(pipe, &spd);
7059         else
7060                 ret = 0;
7061 out:
7062         splice_shrink_spd(&spd);
7063         return ret;
7064
7065 out_err:
7066         mutex_unlock(&iter->mutex);
7067         goto out;
7068 }
7069
7070 static ssize_t
7071 tracing_entries_read(struct file *filp, char __user *ubuf,
7072                      size_t cnt, loff_t *ppos)
7073 {
7074         struct inode *inode = file_inode(filp);
7075         struct trace_array *tr = inode->i_private;
7076         int cpu = tracing_get_cpu(inode);
7077         char buf[64];
7078         int r = 0;
7079         ssize_t ret;
7080
7081         mutex_lock(&trace_types_lock);
7082
7083         if (cpu == RING_BUFFER_ALL_CPUS) {
7084                 int cpu, buf_size_same;
7085                 unsigned long size;
7086
7087                 size = 0;
7088                 buf_size_same = 1;
7089                 /* check if all cpu sizes are same */
7090                 for_each_tracing_cpu(cpu) {
7091                         /* fill in the size from first enabled cpu */
7092                         if (size == 0)
7093                                 size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
7094                         if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
7095                                 buf_size_same = 0;
7096                                 break;
7097                         }
7098                 }
7099
7100                 if (buf_size_same) {
7101                         if (!ring_buffer_expanded)
7102                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
7103                                             size >> 10,
7104                                             trace_buf_size >> 10);
7105                         else
7106                                 r = sprintf(buf, "%lu\n", size >> 10);
7107                 } else
7108                         r = sprintf(buf, "X\n");
7109         } else
7110                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
7111
7112         mutex_unlock(&trace_types_lock);
7113
7114         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7115         return ret;
7116 }
7117
7118 static ssize_t
7119 tracing_entries_write(struct file *filp, const char __user *ubuf,
7120                       size_t cnt, loff_t *ppos)
7121 {
7122         struct inode *inode = file_inode(filp);
7123         struct trace_array *tr = inode->i_private;
7124         unsigned long val;
7125         int ret;
7126
7127         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7128         if (ret)
7129                 return ret;
7130
7131         /* must have at least 1 entry */
7132         if (!val)
7133                 return -EINVAL;
7134
7135         /* value is in KB */
7136         val <<= 10;
7137         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
7138         if (ret < 0)
7139                 return ret;
7140
7141         *ppos += cnt;
7142
7143         return cnt;
7144 }
7145
7146 static ssize_t
7147 tracing_total_entries_read(struct file *filp, char __user *ubuf,
7148                                 size_t cnt, loff_t *ppos)
7149 {
7150         struct trace_array *tr = filp->private_data;
7151         char buf[64];
7152         int r, cpu;
7153         unsigned long size = 0, expanded_size = 0;
7154
7155         mutex_lock(&trace_types_lock);
7156         for_each_tracing_cpu(cpu) {
7157                 size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
7158                 if (!ring_buffer_expanded)
7159                         expanded_size += trace_buf_size >> 10;
7160         }
7161         if (ring_buffer_expanded)
7162                 r = sprintf(buf, "%lu\n", size);
7163         else
7164                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
7165         mutex_unlock(&trace_types_lock);
7166
7167         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7168 }
7169
7170 static ssize_t
7171 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
7172                           size_t cnt, loff_t *ppos)
7173 {
7174         /*
7175          * There is no need to read what the user has written, this function
7176          * is just to make sure that there is no error when "echo" is used
7177          */
7178
7179         *ppos += cnt;
7180
7181         return cnt;
7182 }
7183
7184 static int
7185 tracing_free_buffer_release(struct inode *inode, struct file *filp)
7186 {
7187         struct trace_array *tr = inode->i_private;
7188
7189         /* disable tracing ? */
7190         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
7191                 tracer_tracing_off(tr);
7192         /* resize the ring buffer to 0 */
7193         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
7194
7195         trace_array_put(tr);
7196
7197         return 0;
7198 }
7199
7200 static ssize_t
7201 tracing_mark_write(struct file *filp, const char __user *ubuf,
7202                                         size_t cnt, loff_t *fpos)
7203 {
7204         struct trace_array *tr = filp->private_data;
7205         struct ring_buffer_event *event;
7206         enum event_trigger_type tt = ETT_NONE;
7207         struct trace_buffer *buffer;
7208         struct print_entry *entry;
7209         ssize_t written;
7210         int size;
7211         int len;
7212
7213 /* Used in tracing_mark_raw_write() as well */
7214 #define FAULTED_STR "<faulted>"
7215 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
7216
7217         if (tracing_disabled)
7218                 return -EINVAL;
7219
7220         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7221                 return -EINVAL;
7222
7223         if (cnt > TRACE_BUF_SIZE)
7224                 cnt = TRACE_BUF_SIZE;
7225
7226         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7227
7228         size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
7229
7230         /* If less than "<faulted>", then make sure we can still add that */
7231         if (cnt < FAULTED_SIZE)
7232                 size += FAULTED_SIZE - cnt;
7233
7234         buffer = tr->array_buffer.buffer;
7235         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
7236                                             tracing_gen_ctx());
7237         if (unlikely(!event))
7238                 /* Ring buffer disabled, return as if not open for write */
7239                 return -EBADF;
7240
7241         entry = ring_buffer_event_data(event);
7242         entry->ip = _THIS_IP_;
7243
7244         len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
7245         if (len) {
7246                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7247                 cnt = FAULTED_SIZE;
7248                 written = -EFAULT;
7249         } else
7250                 written = cnt;
7251
7252         if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7253                 /* do not add \n before testing triggers, but add \0 */
7254                 entry->buf[cnt] = '\0';
7255                 tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7256         }
7257
7258         if (entry->buf[cnt - 1] != '\n') {
7259                 entry->buf[cnt] = '\n';
7260                 entry->buf[cnt + 1] = '\0';
7261         } else
7262                 entry->buf[cnt] = '\0';
7263
7264         if (static_branch_unlikely(&trace_marker_exports_enabled))
7265                 ftrace_exports(event, TRACE_EXPORT_MARKER);
7266         __buffer_unlock_commit(buffer, event);
7267
7268         if (tt)
7269                 event_triggers_post_call(tr->trace_marker_file, tt);
7270
7271         return written;
7272 }
7273
7274 /* Limit it for now to 3K (including tag) */
7275 #define RAW_DATA_MAX_SIZE (1024*3)
7276
7277 static ssize_t
7278 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7279                                         size_t cnt, loff_t *fpos)
7280 {
7281         struct trace_array *tr = filp->private_data;
7282         struct ring_buffer_event *event;
7283         struct trace_buffer *buffer;
7284         struct raw_data_entry *entry;
7285         ssize_t written;
7286         int size;
7287         int len;
7288
7289 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7290
7291         if (tracing_disabled)
7292                 return -EINVAL;
7293
7294         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7295                 return -EINVAL;
7296
7297         /* The marker must at least have a tag id */
7298         if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
7299                 return -EINVAL;
7300
7301         if (cnt > TRACE_BUF_SIZE)
7302                 cnt = TRACE_BUF_SIZE;
7303
7304         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7305
7306         size = sizeof(*entry) + cnt;
7307         if (cnt < FAULT_SIZE_ID)
7308                 size += FAULT_SIZE_ID - cnt;
7309
7310         buffer = tr->array_buffer.buffer;
7311         event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7312                                             tracing_gen_ctx());
7313         if (!event)
7314                 /* Ring buffer disabled, return as if not open for write */
7315                 return -EBADF;
7316
7317         entry = ring_buffer_event_data(event);
7318
7319         len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7320         if (len) {
7321                 entry->id = -1;
7322                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7323                 written = -EFAULT;
7324         } else
7325                 written = cnt;
7326
7327         __buffer_unlock_commit(buffer, event);
7328
7329         return written;
7330 }
7331
7332 static int tracing_clock_show(struct seq_file *m, void *v)
7333 {
7334         struct trace_array *tr = m->private;
7335         int i;
7336
7337         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7338                 seq_printf(m,
7339                         "%s%s%s%s", i ? " " : "",
7340                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7341                         i == tr->clock_id ? "]" : "");
7342         seq_putc(m, '\n');
7343
7344         return 0;
7345 }
7346
7347 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7348 {
7349         int i;
7350
7351         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7352                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
7353                         break;
7354         }
7355         if (i == ARRAY_SIZE(trace_clocks))
7356                 return -EINVAL;
7357
7358         mutex_lock(&trace_types_lock);
7359
7360         tr->clock_id = i;
7361
7362         ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7363
7364         /*
7365          * New clock may not be consistent with the previous clock.
7366          * Reset the buffer so that it doesn't have incomparable timestamps.
7367          */
7368         tracing_reset_online_cpus(&tr->array_buffer);
7369
7370 #ifdef CONFIG_TRACER_MAX_TRACE
7371         if (tr->max_buffer.buffer)
7372                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7373         tracing_reset_online_cpus(&tr->max_buffer);
7374 #endif
7375
7376         mutex_unlock(&trace_types_lock);
7377
7378         return 0;
7379 }
7380
7381 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7382                                    size_t cnt, loff_t *fpos)
7383 {
7384         struct seq_file *m = filp->private_data;
7385         struct trace_array *tr = m->private;
7386         char buf[64];
7387         const char *clockstr;
7388         int ret;
7389
7390         if (cnt >= sizeof(buf))
7391                 return -EINVAL;
7392
7393         if (copy_from_user(buf, ubuf, cnt))
7394                 return -EFAULT;
7395
7396         buf[cnt] = 0;
7397
7398         clockstr = strstrip(buf);
7399
7400         ret = tracing_set_clock(tr, clockstr);
7401         if (ret)
7402                 return ret;
7403
7404         *fpos += cnt;
7405
7406         return cnt;
7407 }
7408
7409 static int tracing_clock_open(struct inode *inode, struct file *file)
7410 {
7411         struct trace_array *tr = inode->i_private;
7412         int ret;
7413
7414         ret = tracing_check_open_get_tr(tr);
7415         if (ret)
7416                 return ret;
7417
7418         ret = single_open(file, tracing_clock_show, inode->i_private);
7419         if (ret < 0)
7420                 trace_array_put(tr);
7421
7422         return ret;
7423 }
7424
7425 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7426 {
7427         struct trace_array *tr = m->private;
7428
7429         mutex_lock(&trace_types_lock);
7430
7431         if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7432                 seq_puts(m, "delta [absolute]\n");
7433         else
7434                 seq_puts(m, "[delta] absolute\n");
7435
7436         mutex_unlock(&trace_types_lock);
7437
7438         return 0;
7439 }
7440
7441 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7442 {
7443         struct trace_array *tr = inode->i_private;
7444         int ret;
7445
7446         ret = tracing_check_open_get_tr(tr);
7447         if (ret)
7448                 return ret;
7449
7450         ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7451         if (ret < 0)
7452                 trace_array_put(tr);
7453
7454         return ret;
7455 }
7456
7457 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7458 {
7459         if (rbe == this_cpu_read(trace_buffered_event))
7460                 return ring_buffer_time_stamp(buffer);
7461
7462         return ring_buffer_event_time_stamp(buffer, rbe);
7463 }
7464
7465 /*
7466  * Set or disable using the per CPU trace_buffer_event when possible.
7467  */
7468 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7469 {
7470         int ret = 0;
7471
7472         mutex_lock(&trace_types_lock);
7473
7474         if (set && tr->no_filter_buffering_ref++)
7475                 goto out;
7476
7477         if (!set) {
7478                 if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) {
7479                         ret = -EINVAL;
7480                         goto out;
7481                 }
7482
7483                 --tr->no_filter_buffering_ref;
7484         }
7485  out:
7486         mutex_unlock(&trace_types_lock);
7487
7488         return ret;
7489 }
7490
7491 struct ftrace_buffer_info {
7492         struct trace_iterator   iter;
7493         void                    *spare;
7494         unsigned int            spare_cpu;
7495         unsigned int            read;
7496 };
7497
7498 #ifdef CONFIG_TRACER_SNAPSHOT
7499 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7500 {
7501         struct trace_array *tr = inode->i_private;
7502         struct trace_iterator *iter;
7503         struct seq_file *m;
7504         int ret;
7505
7506         ret = tracing_check_open_get_tr(tr);
7507         if (ret)
7508                 return ret;
7509
7510         if (file->f_mode & FMODE_READ) {
7511                 iter = __tracing_open(inode, file, true);
7512                 if (IS_ERR(iter))
7513                         ret = PTR_ERR(iter);
7514         } else {
7515                 /* Writes still need the seq_file to hold the private data */
7516                 ret = -ENOMEM;
7517                 m = kzalloc(sizeof(*m), GFP_KERNEL);
7518                 if (!m)
7519                         goto out;
7520                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7521                 if (!iter) {
7522                         kfree(m);
7523                         goto out;
7524                 }
7525                 ret = 0;
7526
7527                 iter->tr = tr;
7528                 iter->array_buffer = &tr->max_buffer;
7529                 iter->cpu_file = tracing_get_cpu(inode);
7530                 m->private = iter;
7531                 file->private_data = m;
7532         }
7533 out:
7534         if (ret < 0)
7535                 trace_array_put(tr);
7536
7537         return ret;
7538 }
7539
7540 static ssize_t
7541 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7542                        loff_t *ppos)
7543 {
7544         struct seq_file *m = filp->private_data;
7545         struct trace_iterator *iter = m->private;
7546         struct trace_array *tr = iter->tr;
7547         unsigned long val;
7548         int ret;
7549
7550         ret = tracing_update_buffers();
7551         if (ret < 0)
7552                 return ret;
7553
7554         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7555         if (ret)
7556                 return ret;
7557
7558         mutex_lock(&trace_types_lock);
7559
7560         if (tr->current_trace->use_max_tr) {
7561                 ret = -EBUSY;
7562                 goto out;
7563         }
7564
7565         local_irq_disable();
7566         arch_spin_lock(&tr->max_lock);
7567         if (tr->cond_snapshot)
7568                 ret = -EBUSY;
7569         arch_spin_unlock(&tr->max_lock);
7570         local_irq_enable();
7571         if (ret)
7572                 goto out;
7573
7574         switch (val) {
7575         case 0:
7576                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7577                         ret = -EINVAL;
7578                         break;
7579                 }
7580                 if (tr->allocated_snapshot)
7581                         free_snapshot(tr);
7582                 break;
7583         case 1:
7584 /* Only allow per-cpu swap if the ring buffer supports it */
7585 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7586                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7587                         ret = -EINVAL;
7588                         break;
7589                 }
7590 #endif
7591                 if (tr->allocated_snapshot)
7592                         ret = resize_buffer_duplicate_size(&tr->max_buffer,
7593                                         &tr->array_buffer, iter->cpu_file);
7594                 else
7595                         ret = tracing_alloc_snapshot_instance(tr);
7596                 if (ret < 0)
7597                         break;
7598                 local_irq_disable();
7599                 /* Now, we're going to swap */
7600                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7601                         update_max_tr(tr, current, smp_processor_id(), NULL);
7602                 else
7603                         update_max_tr_single(tr, current, iter->cpu_file);
7604                 local_irq_enable();
7605                 break;
7606         default:
7607                 if (tr->allocated_snapshot) {
7608                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7609                                 tracing_reset_online_cpus(&tr->max_buffer);
7610                         else
7611                                 tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7612                 }
7613                 break;
7614         }
7615
7616         if (ret >= 0) {
7617                 *ppos += cnt;
7618                 ret = cnt;
7619         }
7620 out:
7621         mutex_unlock(&trace_types_lock);
7622         return ret;
7623 }
7624
7625 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7626 {
7627         struct seq_file *m = file->private_data;
7628         int ret;
7629
7630         ret = tracing_release(inode, file);
7631
7632         if (file->f_mode & FMODE_READ)
7633                 return ret;
7634
7635         /* If write only, the seq_file is just a stub */
7636         if (m)
7637                 kfree(m->private);
7638         kfree(m);
7639
7640         return 0;
7641 }
7642
7643 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7644 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7645                                     size_t count, loff_t *ppos);
7646 static int tracing_buffers_release(struct inode *inode, struct file *file);
7647 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7648                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7649
7650 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7651 {
7652         struct ftrace_buffer_info *info;
7653         int ret;
7654
7655         /* The following checks for tracefs lockdown */
7656         ret = tracing_buffers_open(inode, filp);
7657         if (ret < 0)
7658                 return ret;
7659
7660         info = filp->private_data;
7661
7662         if (info->iter.trace->use_max_tr) {
7663                 tracing_buffers_release(inode, filp);
7664                 return -EBUSY;
7665         }
7666
7667         info->iter.snapshot = true;
7668         info->iter.array_buffer = &info->iter.tr->max_buffer;
7669
7670         return ret;
7671 }
7672
7673 #endif /* CONFIG_TRACER_SNAPSHOT */
7674
7675
7676 static const struct file_operations tracing_thresh_fops = {
7677         .open           = tracing_open_generic,
7678         .read           = tracing_thresh_read,
7679         .write          = tracing_thresh_write,
7680         .llseek         = generic_file_llseek,
7681 };
7682
7683 #ifdef CONFIG_TRACER_MAX_TRACE
7684 static const struct file_operations tracing_max_lat_fops = {
7685         .open           = tracing_open_generic,
7686         .read           = tracing_max_lat_read,
7687         .write          = tracing_max_lat_write,
7688         .llseek         = generic_file_llseek,
7689 };
7690 #endif
7691
7692 static const struct file_operations set_tracer_fops = {
7693         .open           = tracing_open_generic,
7694         .read           = tracing_set_trace_read,
7695         .write          = tracing_set_trace_write,
7696         .llseek         = generic_file_llseek,
7697 };
7698
7699 static const struct file_operations tracing_pipe_fops = {
7700         .open           = tracing_open_pipe,
7701         .poll           = tracing_poll_pipe,
7702         .read           = tracing_read_pipe,
7703         .splice_read    = tracing_splice_read_pipe,
7704         .release        = tracing_release_pipe,
7705         .llseek         = no_llseek,
7706 };
7707
7708 static const struct file_operations tracing_entries_fops = {
7709         .open           = tracing_open_generic_tr,
7710         .read           = tracing_entries_read,
7711         .write          = tracing_entries_write,
7712         .llseek         = generic_file_llseek,
7713         .release        = tracing_release_generic_tr,
7714 };
7715
7716 static const struct file_operations tracing_total_entries_fops = {
7717         .open           = tracing_open_generic_tr,
7718         .read           = tracing_total_entries_read,
7719         .llseek         = generic_file_llseek,
7720         .release        = tracing_release_generic_tr,
7721 };
7722
7723 static const struct file_operations tracing_free_buffer_fops = {
7724         .open           = tracing_open_generic_tr,
7725         .write          = tracing_free_buffer_write,
7726         .release        = tracing_free_buffer_release,
7727 };
7728
7729 static const struct file_operations tracing_mark_fops = {
7730         .open           = tracing_mark_open,
7731         .write          = tracing_mark_write,
7732         .release        = tracing_release_generic_tr,
7733 };
7734
7735 static const struct file_operations tracing_mark_raw_fops = {
7736         .open           = tracing_mark_open,
7737         .write          = tracing_mark_raw_write,
7738         .release        = tracing_release_generic_tr,
7739 };
7740
7741 static const struct file_operations trace_clock_fops = {
7742         .open           = tracing_clock_open,
7743         .read           = seq_read,
7744         .llseek         = seq_lseek,
7745         .release        = tracing_single_release_tr,
7746         .write          = tracing_clock_write,
7747 };
7748
7749 static const struct file_operations trace_time_stamp_mode_fops = {
7750         .open           = tracing_time_stamp_mode_open,
7751         .read           = seq_read,
7752         .llseek         = seq_lseek,
7753         .release        = tracing_single_release_tr,
7754 };
7755
7756 #ifdef CONFIG_TRACER_SNAPSHOT
7757 static const struct file_operations snapshot_fops = {
7758         .open           = tracing_snapshot_open,
7759         .read           = seq_read,
7760         .write          = tracing_snapshot_write,
7761         .llseek         = tracing_lseek,
7762         .release        = tracing_snapshot_release,
7763 };
7764
7765 static const struct file_operations snapshot_raw_fops = {
7766         .open           = snapshot_raw_open,
7767         .read           = tracing_buffers_read,
7768         .release        = tracing_buffers_release,
7769         .splice_read    = tracing_buffers_splice_read,
7770         .llseek         = no_llseek,
7771 };
7772
7773 #endif /* CONFIG_TRACER_SNAPSHOT */
7774
7775 /*
7776  * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7777  * @filp: The active open file structure
7778  * @ubuf: The userspace provided buffer to read value into
7779  * @cnt: The maximum number of bytes to read
7780  * @ppos: The current "file" position
7781  *
7782  * This function implements the write interface for a struct trace_min_max_param.
7783  * The filp->private_data must point to a trace_min_max_param structure that
7784  * defines where to write the value, the min and the max acceptable values,
7785  * and a lock to protect the write.
7786  */
7787 static ssize_t
7788 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7789 {
7790         struct trace_min_max_param *param = filp->private_data;
7791         u64 val;
7792         int err;
7793
7794         if (!param)
7795                 return -EFAULT;
7796
7797         err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7798         if (err)
7799                 return err;
7800
7801         if (param->lock)
7802                 mutex_lock(param->lock);
7803
7804         if (param->min && val < *param->min)
7805                 err = -EINVAL;
7806
7807         if (param->max && val > *param->max)
7808                 err = -EINVAL;
7809
7810         if (!err)
7811                 *param->val = val;
7812
7813         if (param->lock)
7814                 mutex_unlock(param->lock);
7815
7816         if (err)
7817                 return err;
7818
7819         return cnt;
7820 }
7821
7822 /*
7823  * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7824  * @filp: The active open file structure
7825  * @ubuf: The userspace provided buffer to read value into
7826  * @cnt: The maximum number of bytes to read
7827  * @ppos: The current "file" position
7828  *
7829  * This function implements the read interface for a struct trace_min_max_param.
7830  * The filp->private_data must point to a trace_min_max_param struct with valid
7831  * data.
7832  */
7833 static ssize_t
7834 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7835 {
7836         struct trace_min_max_param *param = filp->private_data;
7837         char buf[U64_STR_SIZE];
7838         int len;
7839         u64 val;
7840
7841         if (!param)
7842                 return -EFAULT;
7843
7844         val = *param->val;
7845
7846         if (cnt > sizeof(buf))
7847                 cnt = sizeof(buf);
7848
7849         len = snprintf(buf, sizeof(buf), "%llu\n", val);
7850
7851         return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7852 }
7853
7854 const struct file_operations trace_min_max_fops = {
7855         .open           = tracing_open_generic,
7856         .read           = trace_min_max_read,
7857         .write          = trace_min_max_write,
7858 };
7859
7860 #define TRACING_LOG_ERRS_MAX    8
7861 #define TRACING_LOG_LOC_MAX     128
7862
7863 #define CMD_PREFIX "  Command: "
7864
7865 struct err_info {
7866         const char      **errs; /* ptr to loc-specific array of err strings */
7867         u8              type;   /* index into errs -> specific err string */
7868         u16             pos;    /* caret position */
7869         u64             ts;
7870 };
7871
7872 struct tracing_log_err {
7873         struct list_head        list;
7874         struct err_info         info;
7875         char                    loc[TRACING_LOG_LOC_MAX]; /* err location */
7876         char                    *cmd;                     /* what caused err */
7877 };
7878
7879 static DEFINE_MUTEX(tracing_err_log_lock);
7880
7881 static struct tracing_log_err *alloc_tracing_log_err(int len)
7882 {
7883         struct tracing_log_err *err;
7884
7885         err = kzalloc(sizeof(*err), GFP_KERNEL);
7886         if (!err)
7887                 return ERR_PTR(-ENOMEM);
7888
7889         err->cmd = kzalloc(len, GFP_KERNEL);
7890         if (!err->cmd) {
7891                 kfree(err);
7892                 return ERR_PTR(-ENOMEM);
7893         }
7894
7895         return err;
7896 }
7897
7898 static void free_tracing_log_err(struct tracing_log_err *err)
7899 {
7900         kfree(err->cmd);
7901         kfree(err);
7902 }
7903
7904 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr,
7905                                                    int len)
7906 {
7907         struct tracing_log_err *err;
7908         char *cmd;
7909
7910         if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7911                 err = alloc_tracing_log_err(len);
7912                 if (PTR_ERR(err) != -ENOMEM)
7913                         tr->n_err_log_entries++;
7914
7915                 return err;
7916         }
7917         cmd = kzalloc(len, GFP_KERNEL);
7918         if (!cmd)
7919                 return ERR_PTR(-ENOMEM);
7920         err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7921         kfree(err->cmd);
7922         err->cmd = cmd;
7923         list_del(&err->list);
7924
7925         return err;
7926 }
7927
7928 /**
7929  * err_pos - find the position of a string within a command for error careting
7930  * @cmd: The tracing command that caused the error
7931  * @str: The string to position the caret at within @cmd
7932  *
7933  * Finds the position of the first occurrence of @str within @cmd.  The
7934  * return value can be passed to tracing_log_err() for caret placement
7935  * within @cmd.
7936  *
7937  * Returns the index within @cmd of the first occurrence of @str or 0
7938  * if @str was not found.
7939  */
7940 unsigned int err_pos(char *cmd, const char *str)
7941 {
7942         char *found;
7943
7944         if (WARN_ON(!strlen(cmd)))
7945                 return 0;
7946
7947         found = strstr(cmd, str);
7948         if (found)
7949                 return found - cmd;
7950
7951         return 0;
7952 }
7953
7954 /**
7955  * tracing_log_err - write an error to the tracing error log
7956  * @tr: The associated trace array for the error (NULL for top level array)
7957  * @loc: A string describing where the error occurred
7958  * @cmd: The tracing command that caused the error
7959  * @errs: The array of loc-specific static error strings
7960  * @type: The index into errs[], which produces the specific static err string
7961  * @pos: The position the caret should be placed in the cmd
7962  *
7963  * Writes an error into tracing/error_log of the form:
7964  *
7965  * <loc>: error: <text>
7966  *   Command: <cmd>
7967  *              ^
7968  *
7969  * tracing/error_log is a small log file containing the last
7970  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7971  * unless there has been a tracing error, and the error log can be
7972  * cleared and have its memory freed by writing the empty string in
7973  * truncation mode to it i.e. echo > tracing/error_log.
7974  *
7975  * NOTE: the @errs array along with the @type param are used to
7976  * produce a static error string - this string is not copied and saved
7977  * when the error is logged - only a pointer to it is saved.  See
7978  * existing callers for examples of how static strings are typically
7979  * defined for use with tracing_log_err().
7980  */
7981 void tracing_log_err(struct trace_array *tr,
7982                      const char *loc, const char *cmd,
7983                      const char **errs, u8 type, u16 pos)
7984 {
7985         struct tracing_log_err *err;
7986         int len = 0;
7987
7988         if (!tr)
7989                 tr = &global_trace;
7990
7991         len += sizeof(CMD_PREFIX) + 2 * sizeof("\n") + strlen(cmd) + 1;
7992
7993         mutex_lock(&tracing_err_log_lock);
7994         err = get_tracing_log_err(tr, len);
7995         if (PTR_ERR(err) == -ENOMEM) {
7996                 mutex_unlock(&tracing_err_log_lock);
7997                 return;
7998         }
7999
8000         snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
8001         snprintf(err->cmd, len, "\n" CMD_PREFIX "%s\n", cmd);
8002
8003         err->info.errs = errs;
8004         err->info.type = type;
8005         err->info.pos = pos;
8006         err->info.ts = local_clock();
8007
8008         list_add_tail(&err->list, &tr->err_log);
8009         mutex_unlock(&tracing_err_log_lock);
8010 }
8011
8012 static void clear_tracing_err_log(struct trace_array *tr)
8013 {
8014         struct tracing_log_err *err, *next;
8015
8016         mutex_lock(&tracing_err_log_lock);
8017         list_for_each_entry_safe(err, next, &tr->err_log, list) {
8018                 list_del(&err->list);
8019                 free_tracing_log_err(err);
8020         }
8021
8022         tr->n_err_log_entries = 0;
8023         mutex_unlock(&tracing_err_log_lock);
8024 }
8025
8026 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
8027 {
8028         struct trace_array *tr = m->private;
8029
8030         mutex_lock(&tracing_err_log_lock);
8031
8032         return seq_list_start(&tr->err_log, *pos);
8033 }
8034
8035 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
8036 {
8037         struct trace_array *tr = m->private;
8038
8039         return seq_list_next(v, &tr->err_log, pos);
8040 }
8041
8042 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
8043 {
8044         mutex_unlock(&tracing_err_log_lock);
8045 }
8046
8047 static void tracing_err_log_show_pos(struct seq_file *m, u16 pos)
8048 {
8049         u16 i;
8050
8051         for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
8052                 seq_putc(m, ' ');
8053         for (i = 0; i < pos; i++)
8054                 seq_putc(m, ' ');
8055         seq_puts(m, "^\n");
8056 }
8057
8058 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
8059 {
8060         struct tracing_log_err *err = v;
8061
8062         if (err) {
8063                 const char *err_text = err->info.errs[err->info.type];
8064                 u64 sec = err->info.ts;
8065                 u32 nsec;
8066
8067                 nsec = do_div(sec, NSEC_PER_SEC);
8068                 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
8069                            err->loc, err_text);
8070                 seq_printf(m, "%s", err->cmd);
8071                 tracing_err_log_show_pos(m, err->info.pos);
8072         }
8073
8074         return 0;
8075 }
8076
8077 static const struct seq_operations tracing_err_log_seq_ops = {
8078         .start  = tracing_err_log_seq_start,
8079         .next   = tracing_err_log_seq_next,
8080         .stop   = tracing_err_log_seq_stop,
8081         .show   = tracing_err_log_seq_show
8082 };
8083
8084 static int tracing_err_log_open(struct inode *inode, struct file *file)
8085 {
8086         struct trace_array *tr = inode->i_private;
8087         int ret = 0;
8088
8089         ret = tracing_check_open_get_tr(tr);
8090         if (ret)
8091                 return ret;
8092
8093         /* If this file was opened for write, then erase contents */
8094         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
8095                 clear_tracing_err_log(tr);
8096
8097         if (file->f_mode & FMODE_READ) {
8098                 ret = seq_open(file, &tracing_err_log_seq_ops);
8099                 if (!ret) {
8100                         struct seq_file *m = file->private_data;
8101                         m->private = tr;
8102                 } else {
8103                         trace_array_put(tr);
8104                 }
8105         }
8106         return ret;
8107 }
8108
8109 static ssize_t tracing_err_log_write(struct file *file,
8110                                      const char __user *buffer,
8111                                      size_t count, loff_t *ppos)
8112 {
8113         return count;
8114 }
8115
8116 static int tracing_err_log_release(struct inode *inode, struct file *file)
8117 {
8118         struct trace_array *tr = inode->i_private;
8119
8120         trace_array_put(tr);
8121
8122         if (file->f_mode & FMODE_READ)
8123                 seq_release(inode, file);
8124
8125         return 0;
8126 }
8127
8128 static const struct file_operations tracing_err_log_fops = {
8129         .open           = tracing_err_log_open,
8130         .write          = tracing_err_log_write,
8131         .read           = seq_read,
8132         .llseek         = seq_lseek,
8133         .release        = tracing_err_log_release,
8134 };
8135
8136 static int tracing_buffers_open(struct inode *inode, struct file *filp)
8137 {
8138         struct trace_array *tr = inode->i_private;
8139         struct ftrace_buffer_info *info;
8140         int ret;
8141
8142         ret = tracing_check_open_get_tr(tr);
8143         if (ret)
8144                 return ret;
8145
8146         info = kvzalloc(sizeof(*info), GFP_KERNEL);
8147         if (!info) {
8148                 trace_array_put(tr);
8149                 return -ENOMEM;
8150         }
8151
8152         mutex_lock(&trace_types_lock);
8153
8154         info->iter.tr           = tr;
8155         info->iter.cpu_file     = tracing_get_cpu(inode);
8156         info->iter.trace        = tr->current_trace;
8157         info->iter.array_buffer = &tr->array_buffer;
8158         info->spare             = NULL;
8159         /* Force reading ring buffer for first read */
8160         info->read              = (unsigned int)-1;
8161
8162         filp->private_data = info;
8163
8164         tr->trace_ref++;
8165
8166         mutex_unlock(&trace_types_lock);
8167
8168         ret = nonseekable_open(inode, filp);
8169         if (ret < 0)
8170                 trace_array_put(tr);
8171
8172         return ret;
8173 }
8174
8175 static __poll_t
8176 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
8177 {
8178         struct ftrace_buffer_info *info = filp->private_data;
8179         struct trace_iterator *iter = &info->iter;
8180
8181         return trace_poll(iter, filp, poll_table);
8182 }
8183
8184 static ssize_t
8185 tracing_buffers_read(struct file *filp, char __user *ubuf,
8186                      size_t count, loff_t *ppos)
8187 {
8188         struct ftrace_buffer_info *info = filp->private_data;
8189         struct trace_iterator *iter = &info->iter;
8190         ssize_t ret = 0;
8191         ssize_t size;
8192
8193         if (!count)
8194                 return 0;
8195
8196 #ifdef CONFIG_TRACER_MAX_TRACE
8197         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8198                 return -EBUSY;
8199 #endif
8200
8201         if (!info->spare) {
8202                 info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
8203                                                           iter->cpu_file);
8204                 if (IS_ERR(info->spare)) {
8205                         ret = PTR_ERR(info->spare);
8206                         info->spare = NULL;
8207                 } else {
8208                         info->spare_cpu = iter->cpu_file;
8209                 }
8210         }
8211         if (!info->spare)
8212                 return ret;
8213
8214         /* Do we have previous read data to read? */
8215         if (info->read < PAGE_SIZE)
8216                 goto read;
8217
8218  again:
8219         trace_access_lock(iter->cpu_file);
8220         ret = ring_buffer_read_page(iter->array_buffer->buffer,
8221                                     &info->spare,
8222                                     count,
8223                                     iter->cpu_file, 0);
8224         trace_access_unlock(iter->cpu_file);
8225
8226         if (ret < 0) {
8227                 if (trace_empty(iter)) {
8228                         if ((filp->f_flags & O_NONBLOCK))
8229                                 return -EAGAIN;
8230
8231                         ret = wait_on_pipe(iter, 0);
8232                         if (ret)
8233                                 return ret;
8234
8235                         goto again;
8236                 }
8237                 return 0;
8238         }
8239
8240         info->read = 0;
8241  read:
8242         size = PAGE_SIZE - info->read;
8243         if (size > count)
8244                 size = count;
8245
8246         ret = copy_to_user(ubuf, info->spare + info->read, size);
8247         if (ret == size)
8248                 return -EFAULT;
8249
8250         size -= ret;
8251
8252         *ppos += size;
8253         info->read += size;
8254
8255         return size;
8256 }
8257
8258 static int tracing_buffers_release(struct inode *inode, struct file *file)
8259 {
8260         struct ftrace_buffer_info *info = file->private_data;
8261         struct trace_iterator *iter = &info->iter;
8262
8263         mutex_lock(&trace_types_lock);
8264
8265         iter->tr->trace_ref--;
8266
8267         __trace_array_put(iter->tr);
8268
8269         iter->wait_index++;
8270         /* Make sure the waiters see the new wait_index */
8271         smp_wmb();
8272
8273         ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8274
8275         if (info->spare)
8276                 ring_buffer_free_read_page(iter->array_buffer->buffer,
8277                                            info->spare_cpu, info->spare);
8278         kvfree(info);
8279
8280         mutex_unlock(&trace_types_lock);
8281
8282         return 0;
8283 }
8284
8285 struct buffer_ref {
8286         struct trace_buffer     *buffer;
8287         void                    *page;
8288         int                     cpu;
8289         refcount_t              refcount;
8290 };
8291
8292 static void buffer_ref_release(struct buffer_ref *ref)
8293 {
8294         if (!refcount_dec_and_test(&ref->refcount))
8295                 return;
8296         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8297         kfree(ref);
8298 }
8299
8300 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8301                                     struct pipe_buffer *buf)
8302 {
8303         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8304
8305         buffer_ref_release(ref);
8306         buf->private = 0;
8307 }
8308
8309 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8310                                 struct pipe_buffer *buf)
8311 {
8312         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8313
8314         if (refcount_read(&ref->refcount) > INT_MAX/2)
8315                 return false;
8316
8317         refcount_inc(&ref->refcount);
8318         return true;
8319 }
8320
8321 /* Pipe buffer operations for a buffer. */
8322 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8323         .release                = buffer_pipe_buf_release,
8324         .get                    = buffer_pipe_buf_get,
8325 };
8326
8327 /*
8328  * Callback from splice_to_pipe(), if we need to release some pages
8329  * at the end of the spd in case we error'ed out in filling the pipe.
8330  */
8331 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8332 {
8333         struct buffer_ref *ref =
8334                 (struct buffer_ref *)spd->partial[i].private;
8335
8336         buffer_ref_release(ref);
8337         spd->partial[i].private = 0;
8338 }
8339
8340 static ssize_t
8341 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8342                             struct pipe_inode_info *pipe, size_t len,
8343                             unsigned int flags)
8344 {
8345         struct ftrace_buffer_info *info = file->private_data;
8346         struct trace_iterator *iter = &info->iter;
8347         struct partial_page partial_def[PIPE_DEF_BUFFERS];
8348         struct page *pages_def[PIPE_DEF_BUFFERS];
8349         struct splice_pipe_desc spd = {
8350                 .pages          = pages_def,
8351                 .partial        = partial_def,
8352                 .nr_pages_max   = PIPE_DEF_BUFFERS,
8353                 .ops            = &buffer_pipe_buf_ops,
8354                 .spd_release    = buffer_spd_release,
8355         };
8356         struct buffer_ref *ref;
8357         int entries, i;
8358         ssize_t ret = 0;
8359
8360 #ifdef CONFIG_TRACER_MAX_TRACE
8361         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8362                 return -EBUSY;
8363 #endif
8364
8365         if (*ppos & (PAGE_SIZE - 1))
8366                 return -EINVAL;
8367
8368         if (len & (PAGE_SIZE - 1)) {
8369                 if (len < PAGE_SIZE)
8370                         return -EINVAL;
8371                 len &= PAGE_MASK;
8372         }
8373
8374         if (splice_grow_spd(pipe, &spd))
8375                 return -ENOMEM;
8376
8377  again:
8378         trace_access_lock(iter->cpu_file);
8379         entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8380
8381         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
8382                 struct page *page;
8383                 int r;
8384
8385                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8386                 if (!ref) {
8387                         ret = -ENOMEM;
8388                         break;
8389                 }
8390
8391                 refcount_set(&ref->refcount, 1);
8392                 ref->buffer = iter->array_buffer->buffer;
8393                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8394                 if (IS_ERR(ref->page)) {
8395                         ret = PTR_ERR(ref->page);
8396                         ref->page = NULL;
8397                         kfree(ref);
8398                         break;
8399                 }
8400                 ref->cpu = iter->cpu_file;
8401
8402                 r = ring_buffer_read_page(ref->buffer, &ref->page,
8403                                           len, iter->cpu_file, 1);
8404                 if (r < 0) {
8405                         ring_buffer_free_read_page(ref->buffer, ref->cpu,
8406                                                    ref->page);
8407                         kfree(ref);
8408                         break;
8409                 }
8410
8411                 page = virt_to_page(ref->page);
8412
8413                 spd.pages[i] = page;
8414                 spd.partial[i].len = PAGE_SIZE;
8415                 spd.partial[i].offset = 0;
8416                 spd.partial[i].private = (unsigned long)ref;
8417                 spd.nr_pages++;
8418                 *ppos += PAGE_SIZE;
8419
8420                 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8421         }
8422
8423         trace_access_unlock(iter->cpu_file);
8424         spd.nr_pages = i;
8425
8426         /* did we read anything? */
8427         if (!spd.nr_pages) {
8428                 long wait_index;
8429
8430                 if (ret)
8431                         goto out;
8432
8433                 ret = -EAGAIN;
8434                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8435                         goto out;
8436
8437                 wait_index = READ_ONCE(iter->wait_index);
8438
8439                 ret = wait_on_pipe(iter, iter->tr->buffer_percent);
8440                 if (ret)
8441                         goto out;
8442
8443                 /* No need to wait after waking up when tracing is off */
8444                 if (!tracer_tracing_is_on(iter->tr))
8445                         goto out;
8446
8447                 /* Make sure we see the new wait_index */
8448                 smp_rmb();
8449                 if (wait_index != iter->wait_index)
8450                         goto out;
8451
8452                 goto again;
8453         }
8454
8455         ret = splice_to_pipe(pipe, &spd);
8456 out:
8457         splice_shrink_spd(&spd);
8458
8459         return ret;
8460 }
8461
8462 /* An ioctl call with cmd 0 to the ring buffer file will wake up all waiters */
8463 static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
8464 {
8465         struct ftrace_buffer_info *info = file->private_data;
8466         struct trace_iterator *iter = &info->iter;
8467
8468         if (cmd)
8469                 return -ENOIOCTLCMD;
8470
8471         mutex_lock(&trace_types_lock);
8472
8473         iter->wait_index++;
8474         /* Make sure the waiters see the new wait_index */
8475         smp_wmb();
8476
8477         ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8478
8479         mutex_unlock(&trace_types_lock);
8480         return 0;
8481 }
8482
8483 static const struct file_operations tracing_buffers_fops = {
8484         .open           = tracing_buffers_open,
8485         .read           = tracing_buffers_read,
8486         .poll           = tracing_buffers_poll,
8487         .release        = tracing_buffers_release,
8488         .splice_read    = tracing_buffers_splice_read,
8489         .unlocked_ioctl = tracing_buffers_ioctl,
8490         .llseek         = no_llseek,
8491 };
8492
8493 static ssize_t
8494 tracing_stats_read(struct file *filp, char __user *ubuf,
8495                    size_t count, loff_t *ppos)
8496 {
8497         struct inode *inode = file_inode(filp);
8498         struct trace_array *tr = inode->i_private;
8499         struct array_buffer *trace_buf = &tr->array_buffer;
8500         int cpu = tracing_get_cpu(inode);
8501         struct trace_seq *s;
8502         unsigned long cnt;
8503         unsigned long long t;
8504         unsigned long usec_rem;
8505
8506         s = kmalloc(sizeof(*s), GFP_KERNEL);
8507         if (!s)
8508                 return -ENOMEM;
8509
8510         trace_seq_init(s);
8511
8512         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8513         trace_seq_printf(s, "entries: %ld\n", cnt);
8514
8515         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8516         trace_seq_printf(s, "overrun: %ld\n", cnt);
8517
8518         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8519         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8520
8521         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8522         trace_seq_printf(s, "bytes: %ld\n", cnt);
8523
8524         if (trace_clocks[tr->clock_id].in_ns) {
8525                 /* local or global for trace_clock */
8526                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8527                 usec_rem = do_div(t, USEC_PER_SEC);
8528                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8529                                                                 t, usec_rem);
8530
8531                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8532                 usec_rem = do_div(t, USEC_PER_SEC);
8533                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8534         } else {
8535                 /* counter or tsc mode for trace_clock */
8536                 trace_seq_printf(s, "oldest event ts: %llu\n",
8537                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8538
8539                 trace_seq_printf(s, "now ts: %llu\n",
8540                                 ring_buffer_time_stamp(trace_buf->buffer));
8541         }
8542
8543         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8544         trace_seq_printf(s, "dropped events: %ld\n", cnt);
8545
8546         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8547         trace_seq_printf(s, "read events: %ld\n", cnt);
8548
8549         count = simple_read_from_buffer(ubuf, count, ppos,
8550                                         s->buffer, trace_seq_used(s));
8551
8552         kfree(s);
8553
8554         return count;
8555 }
8556
8557 static const struct file_operations tracing_stats_fops = {
8558         .open           = tracing_open_generic_tr,
8559         .read           = tracing_stats_read,
8560         .llseek         = generic_file_llseek,
8561         .release        = tracing_release_generic_tr,
8562 };
8563
8564 #ifdef CONFIG_DYNAMIC_FTRACE
8565
8566 static ssize_t
8567 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8568                   size_t cnt, loff_t *ppos)
8569 {
8570         ssize_t ret;
8571         char *buf;
8572         int r;
8573
8574         /* 256 should be plenty to hold the amount needed */
8575         buf = kmalloc(256, GFP_KERNEL);
8576         if (!buf)
8577                 return -ENOMEM;
8578
8579         r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
8580                       ftrace_update_tot_cnt,
8581                       ftrace_number_of_pages,
8582                       ftrace_number_of_groups);
8583
8584         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8585         kfree(buf);
8586         return ret;
8587 }
8588
8589 static const struct file_operations tracing_dyn_info_fops = {
8590         .open           = tracing_open_generic,
8591         .read           = tracing_read_dyn_info,
8592         .llseek         = generic_file_llseek,
8593 };
8594 #endif /* CONFIG_DYNAMIC_FTRACE */
8595
8596 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8597 static void
8598 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8599                 struct trace_array *tr, struct ftrace_probe_ops *ops,
8600                 void *data)
8601 {
8602         tracing_snapshot_instance(tr);
8603 }
8604
8605 static void
8606 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8607                       struct trace_array *tr, struct ftrace_probe_ops *ops,
8608                       void *data)
8609 {
8610         struct ftrace_func_mapper *mapper = data;
8611         long *count = NULL;
8612
8613         if (mapper)
8614                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8615
8616         if (count) {
8617
8618                 if (*count <= 0)
8619                         return;
8620
8621                 (*count)--;
8622         }
8623
8624         tracing_snapshot_instance(tr);
8625 }
8626
8627 static int
8628 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8629                       struct ftrace_probe_ops *ops, void *data)
8630 {
8631         struct ftrace_func_mapper *mapper = data;
8632         long *count = NULL;
8633
8634         seq_printf(m, "%ps:", (void *)ip);
8635
8636         seq_puts(m, "snapshot");
8637
8638         if (mapper)
8639                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8640
8641         if (count)
8642                 seq_printf(m, ":count=%ld\n", *count);
8643         else
8644                 seq_puts(m, ":unlimited\n");
8645
8646         return 0;
8647 }
8648
8649 static int
8650 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8651                      unsigned long ip, void *init_data, void **data)
8652 {
8653         struct ftrace_func_mapper *mapper = *data;
8654
8655         if (!mapper) {
8656                 mapper = allocate_ftrace_func_mapper();
8657                 if (!mapper)
8658                         return -ENOMEM;
8659                 *data = mapper;
8660         }
8661
8662         return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8663 }
8664
8665 static void
8666 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8667                      unsigned long ip, void *data)
8668 {
8669         struct ftrace_func_mapper *mapper = data;
8670
8671         if (!ip) {
8672                 if (!mapper)
8673                         return;
8674                 free_ftrace_func_mapper(mapper, NULL);
8675                 return;
8676         }
8677
8678         ftrace_func_mapper_remove_ip(mapper, ip);
8679 }
8680
8681 static struct ftrace_probe_ops snapshot_probe_ops = {
8682         .func                   = ftrace_snapshot,
8683         .print                  = ftrace_snapshot_print,
8684 };
8685
8686 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8687         .func                   = ftrace_count_snapshot,
8688         .print                  = ftrace_snapshot_print,
8689         .init                   = ftrace_snapshot_init,
8690         .free                   = ftrace_snapshot_free,
8691 };
8692
8693 static int
8694 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8695                                char *glob, char *cmd, char *param, int enable)
8696 {
8697         struct ftrace_probe_ops *ops;
8698         void *count = (void *)-1;
8699         char *number;
8700         int ret;
8701
8702         if (!tr)
8703                 return -ENODEV;
8704
8705         /* hash funcs only work with set_ftrace_filter */
8706         if (!enable)
8707                 return -EINVAL;
8708
8709         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8710
8711         if (glob[0] == '!')
8712                 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
8713
8714         if (!param)
8715                 goto out_reg;
8716
8717         number = strsep(&param, ":");
8718
8719         if (!strlen(number))
8720                 goto out_reg;
8721
8722         /*
8723          * We use the callback data field (which is a pointer)
8724          * as our counter.
8725          */
8726         ret = kstrtoul(number, 0, (unsigned long *)&count);
8727         if (ret)
8728                 return ret;
8729
8730  out_reg:
8731         ret = tracing_alloc_snapshot_instance(tr);
8732         if (ret < 0)
8733                 goto out;
8734
8735         ret = register_ftrace_function_probe(glob, tr, ops, count);
8736
8737  out:
8738         return ret < 0 ? ret : 0;
8739 }
8740
8741 static struct ftrace_func_command ftrace_snapshot_cmd = {
8742         .name                   = "snapshot",
8743         .func                   = ftrace_trace_snapshot_callback,
8744 };
8745
8746 static __init int register_snapshot_cmd(void)
8747 {
8748         return register_ftrace_command(&ftrace_snapshot_cmd);
8749 }
8750 #else
8751 static inline __init int register_snapshot_cmd(void) { return 0; }
8752 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8753
8754 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8755 {
8756         if (WARN_ON(!tr->dir))
8757                 return ERR_PTR(-ENODEV);
8758
8759         /* Top directory uses NULL as the parent */
8760         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8761                 return NULL;
8762
8763         /* All sub buffers have a descriptor */
8764         return tr->dir;
8765 }
8766
8767 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8768 {
8769         struct dentry *d_tracer;
8770
8771         if (tr->percpu_dir)
8772                 return tr->percpu_dir;
8773
8774         d_tracer = tracing_get_dentry(tr);
8775         if (IS_ERR(d_tracer))
8776                 return NULL;
8777
8778         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8779
8780         MEM_FAIL(!tr->percpu_dir,
8781                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8782
8783         return tr->percpu_dir;
8784 }
8785
8786 static struct dentry *
8787 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8788                       void *data, long cpu, const struct file_operations *fops)
8789 {
8790         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8791
8792         if (ret) /* See tracing_get_cpu() */
8793                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
8794         return ret;
8795 }
8796
8797 static void
8798 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8799 {
8800         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8801         struct dentry *d_cpu;
8802         char cpu_dir[30]; /* 30 characters should be more than enough */
8803
8804         if (!d_percpu)
8805                 return;
8806
8807         snprintf(cpu_dir, 30, "cpu%ld", cpu);
8808         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8809         if (!d_cpu) {
8810                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8811                 return;
8812         }
8813
8814         /* per cpu trace_pipe */
8815         trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu,
8816                                 tr, cpu, &tracing_pipe_fops);
8817
8818         /* per cpu trace */
8819         trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu,
8820                                 tr, cpu, &tracing_fops);
8821
8822         trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu,
8823                                 tr, cpu, &tracing_buffers_fops);
8824
8825         trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu,
8826                                 tr, cpu, &tracing_stats_fops);
8827
8828         trace_create_cpu_file("buffer_size_kb", TRACE_MODE_READ, d_cpu,
8829                                 tr, cpu, &tracing_entries_fops);
8830
8831 #ifdef CONFIG_TRACER_SNAPSHOT
8832         trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu,
8833                                 tr, cpu, &snapshot_fops);
8834
8835         trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu,
8836                                 tr, cpu, &snapshot_raw_fops);
8837 #endif
8838 }
8839
8840 #ifdef CONFIG_FTRACE_SELFTEST
8841 /* Let selftest have access to static functions in this file */
8842 #include "trace_selftest.c"
8843 #endif
8844
8845 static ssize_t
8846 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8847                         loff_t *ppos)
8848 {
8849         struct trace_option_dentry *topt = filp->private_data;
8850         char *buf;
8851
8852         if (topt->flags->val & topt->opt->bit)
8853                 buf = "1\n";
8854         else
8855                 buf = "0\n";
8856
8857         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8858 }
8859
8860 static ssize_t
8861 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8862                          loff_t *ppos)
8863 {
8864         struct trace_option_dentry *topt = filp->private_data;
8865         unsigned long val;
8866         int ret;
8867
8868         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8869         if (ret)
8870                 return ret;
8871
8872         if (val != 0 && val != 1)
8873                 return -EINVAL;
8874
8875         if (!!(topt->flags->val & topt->opt->bit) != val) {
8876                 mutex_lock(&trace_types_lock);
8877                 ret = __set_tracer_option(topt->tr, topt->flags,
8878                                           topt->opt, !val);
8879                 mutex_unlock(&trace_types_lock);
8880                 if (ret)
8881                         return ret;
8882         }
8883
8884         *ppos += cnt;
8885
8886         return cnt;
8887 }
8888
8889
8890 static const struct file_operations trace_options_fops = {
8891         .open = tracing_open_generic,
8892         .read = trace_options_read,
8893         .write = trace_options_write,
8894         .llseek = generic_file_llseek,
8895 };
8896
8897 /*
8898  * In order to pass in both the trace_array descriptor as well as the index
8899  * to the flag that the trace option file represents, the trace_array
8900  * has a character array of trace_flags_index[], which holds the index
8901  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8902  * The address of this character array is passed to the flag option file
8903  * read/write callbacks.
8904  *
8905  * In order to extract both the index and the trace_array descriptor,
8906  * get_tr_index() uses the following algorithm.
8907  *
8908  *   idx = *ptr;
8909  *
8910  * As the pointer itself contains the address of the index (remember
8911  * index[1] == 1).
8912  *
8913  * Then to get the trace_array descriptor, by subtracting that index
8914  * from the ptr, we get to the start of the index itself.
8915  *
8916  *   ptr - idx == &index[0]
8917  *
8918  * Then a simple container_of() from that pointer gets us to the
8919  * trace_array descriptor.
8920  */
8921 static void get_tr_index(void *data, struct trace_array **ptr,
8922                          unsigned int *pindex)
8923 {
8924         *pindex = *(unsigned char *)data;
8925
8926         *ptr = container_of(data - *pindex, struct trace_array,
8927                             trace_flags_index);
8928 }
8929
8930 static ssize_t
8931 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8932                         loff_t *ppos)
8933 {
8934         void *tr_index = filp->private_data;
8935         struct trace_array *tr;
8936         unsigned int index;
8937         char *buf;
8938
8939         get_tr_index(tr_index, &tr, &index);
8940
8941         if (tr->trace_flags & (1 << index))
8942                 buf = "1\n";
8943         else
8944                 buf = "0\n";
8945
8946         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8947 }
8948
8949 static ssize_t
8950 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8951                          loff_t *ppos)
8952 {
8953         void *tr_index = filp->private_data;
8954         struct trace_array *tr;
8955         unsigned int index;
8956         unsigned long val;
8957         int ret;
8958
8959         get_tr_index(tr_index, &tr, &index);
8960
8961         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8962         if (ret)
8963                 return ret;
8964
8965         if (val != 0 && val != 1)
8966                 return -EINVAL;
8967
8968         mutex_lock(&event_mutex);
8969         mutex_lock(&trace_types_lock);
8970         ret = set_tracer_flag(tr, 1 << index, val);
8971         mutex_unlock(&trace_types_lock);
8972         mutex_unlock(&event_mutex);
8973
8974         if (ret < 0)
8975                 return ret;
8976
8977         *ppos += cnt;
8978
8979         return cnt;
8980 }
8981
8982 static const struct file_operations trace_options_core_fops = {
8983         .open = tracing_open_generic,
8984         .read = trace_options_core_read,
8985         .write = trace_options_core_write,
8986         .llseek = generic_file_llseek,
8987 };
8988
8989 struct dentry *trace_create_file(const char *name,
8990                                  umode_t mode,
8991                                  struct dentry *parent,
8992                                  void *data,
8993                                  const struct file_operations *fops)
8994 {
8995         struct dentry *ret;
8996
8997         ret = tracefs_create_file(name, mode, parent, data, fops);
8998         if (!ret)
8999                 pr_warn("Could not create tracefs '%s' entry\n", name);
9000
9001         return ret;
9002 }
9003
9004
9005 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
9006 {
9007         struct dentry *d_tracer;
9008
9009         if (tr->options)
9010                 return tr->options;
9011
9012         d_tracer = tracing_get_dentry(tr);
9013         if (IS_ERR(d_tracer))
9014                 return NULL;
9015
9016         tr->options = tracefs_create_dir("options", d_tracer);
9017         if (!tr->options) {
9018                 pr_warn("Could not create tracefs directory 'options'\n");
9019                 return NULL;
9020         }
9021
9022         return tr->options;
9023 }
9024
9025 static void
9026 create_trace_option_file(struct trace_array *tr,
9027                          struct trace_option_dentry *topt,
9028                          struct tracer_flags *flags,
9029                          struct tracer_opt *opt)
9030 {
9031         struct dentry *t_options;
9032
9033         t_options = trace_options_init_dentry(tr);
9034         if (!t_options)
9035                 return;
9036
9037         topt->flags = flags;
9038         topt->opt = opt;
9039         topt->tr = tr;
9040
9041         topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE,
9042                                         t_options, topt, &trace_options_fops);
9043
9044 }
9045
9046 static void
9047 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
9048 {
9049         struct trace_option_dentry *topts;
9050         struct trace_options *tr_topts;
9051         struct tracer_flags *flags;
9052         struct tracer_opt *opts;
9053         int cnt;
9054         int i;
9055
9056         if (!tracer)
9057                 return;
9058
9059         flags = tracer->flags;
9060
9061         if (!flags || !flags->opts)
9062                 return;
9063
9064         /*
9065          * If this is an instance, only create flags for tracers
9066          * the instance may have.
9067          */
9068         if (!trace_ok_for_array(tracer, tr))
9069                 return;
9070
9071         for (i = 0; i < tr->nr_topts; i++) {
9072                 /* Make sure there's no duplicate flags. */
9073                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
9074                         return;
9075         }
9076
9077         opts = flags->opts;
9078
9079         for (cnt = 0; opts[cnt].name; cnt++)
9080                 ;
9081
9082         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
9083         if (!topts)
9084                 return;
9085
9086         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
9087                             GFP_KERNEL);
9088         if (!tr_topts) {
9089                 kfree(topts);
9090                 return;
9091         }
9092
9093         tr->topts = tr_topts;
9094         tr->topts[tr->nr_topts].tracer = tracer;
9095         tr->topts[tr->nr_topts].topts = topts;
9096         tr->nr_topts++;
9097
9098         for (cnt = 0; opts[cnt].name; cnt++) {
9099                 create_trace_option_file(tr, &topts[cnt], flags,
9100                                          &opts[cnt]);
9101                 MEM_FAIL(topts[cnt].entry == NULL,
9102                           "Failed to create trace option: %s",
9103                           opts[cnt].name);
9104         }
9105 }
9106
9107 static struct dentry *
9108 create_trace_option_core_file(struct trace_array *tr,
9109                               const char *option, long index)
9110 {
9111         struct dentry *t_options;
9112
9113         t_options = trace_options_init_dentry(tr);
9114         if (!t_options)
9115                 return NULL;
9116
9117         return trace_create_file(option, TRACE_MODE_WRITE, t_options,
9118                                  (void *)&tr->trace_flags_index[index],
9119                                  &trace_options_core_fops);
9120 }
9121
9122 static void create_trace_options_dir(struct trace_array *tr)
9123 {
9124         struct dentry *t_options;
9125         bool top_level = tr == &global_trace;
9126         int i;
9127
9128         t_options = trace_options_init_dentry(tr);
9129         if (!t_options)
9130                 return;
9131
9132         for (i = 0; trace_options[i]; i++) {
9133                 if (top_level ||
9134                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
9135                         create_trace_option_core_file(tr, trace_options[i], i);
9136         }
9137 }
9138
9139 static ssize_t
9140 rb_simple_read(struct file *filp, char __user *ubuf,
9141                size_t cnt, loff_t *ppos)
9142 {
9143         struct trace_array *tr = filp->private_data;
9144         char buf[64];
9145         int r;
9146
9147         r = tracer_tracing_is_on(tr);
9148         r = sprintf(buf, "%d\n", r);
9149
9150         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9151 }
9152
9153 static ssize_t
9154 rb_simple_write(struct file *filp, const char __user *ubuf,
9155                 size_t cnt, loff_t *ppos)
9156 {
9157         struct trace_array *tr = filp->private_data;
9158         struct trace_buffer *buffer = tr->array_buffer.buffer;
9159         unsigned long val;
9160         int ret;
9161
9162         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9163         if (ret)
9164                 return ret;
9165
9166         if (buffer) {
9167                 mutex_lock(&trace_types_lock);
9168                 if (!!val == tracer_tracing_is_on(tr)) {
9169                         val = 0; /* do nothing */
9170                 } else if (val) {
9171                         tracer_tracing_on(tr);
9172                         if (tr->current_trace->start)
9173                                 tr->current_trace->start(tr);
9174                 } else {
9175                         tracer_tracing_off(tr);
9176                         if (tr->current_trace->stop)
9177                                 tr->current_trace->stop(tr);
9178                         /* Wake up any waiters */
9179                         ring_buffer_wake_waiters(buffer, RING_BUFFER_ALL_CPUS);
9180                 }
9181                 mutex_unlock(&trace_types_lock);
9182         }
9183
9184         (*ppos)++;
9185
9186         return cnt;
9187 }
9188
9189 static const struct file_operations rb_simple_fops = {
9190         .open           = tracing_open_generic_tr,
9191         .read           = rb_simple_read,
9192         .write          = rb_simple_write,
9193         .release        = tracing_release_generic_tr,
9194         .llseek         = default_llseek,
9195 };
9196
9197 static ssize_t
9198 buffer_percent_read(struct file *filp, char __user *ubuf,
9199                     size_t cnt, loff_t *ppos)
9200 {
9201         struct trace_array *tr = filp->private_data;
9202         char buf[64];
9203         int r;
9204
9205         r = tr->buffer_percent;
9206         r = sprintf(buf, "%d\n", r);
9207
9208         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9209 }
9210
9211 static ssize_t
9212 buffer_percent_write(struct file *filp, const char __user *ubuf,
9213                      size_t cnt, loff_t *ppos)
9214 {
9215         struct trace_array *tr = filp->private_data;
9216         unsigned long val;
9217         int ret;
9218
9219         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9220         if (ret)
9221                 return ret;
9222
9223         if (val > 100)
9224                 return -EINVAL;
9225
9226         tr->buffer_percent = val;
9227
9228         (*ppos)++;
9229
9230         return cnt;
9231 }
9232
9233 static const struct file_operations buffer_percent_fops = {
9234         .open           = tracing_open_generic_tr,
9235         .read           = buffer_percent_read,
9236         .write          = buffer_percent_write,
9237         .release        = tracing_release_generic_tr,
9238         .llseek         = default_llseek,
9239 };
9240
9241 static struct dentry *trace_instance_dir;
9242
9243 static void
9244 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
9245
9246 static int
9247 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
9248 {
9249         enum ring_buffer_flags rb_flags;
9250
9251         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
9252
9253         buf->tr = tr;
9254
9255         buf->buffer = ring_buffer_alloc(size, rb_flags);
9256         if (!buf->buffer)
9257                 return -ENOMEM;
9258
9259         buf->data = alloc_percpu(struct trace_array_cpu);
9260         if (!buf->data) {
9261                 ring_buffer_free(buf->buffer);
9262                 buf->buffer = NULL;
9263                 return -ENOMEM;
9264         }
9265
9266         /* Allocate the first page for all buffers */
9267         set_buffer_entries(&tr->array_buffer,
9268                            ring_buffer_size(tr->array_buffer.buffer, 0));
9269
9270         return 0;
9271 }
9272
9273 static void free_trace_buffer(struct array_buffer *buf)
9274 {
9275         if (buf->buffer) {
9276                 ring_buffer_free(buf->buffer);
9277                 buf->buffer = NULL;
9278                 free_percpu(buf->data);
9279                 buf->data = NULL;
9280         }
9281 }
9282
9283 static int allocate_trace_buffers(struct trace_array *tr, int size)
9284 {
9285         int ret;
9286
9287         ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9288         if (ret)
9289                 return ret;
9290
9291 #ifdef CONFIG_TRACER_MAX_TRACE
9292         ret = allocate_trace_buffer(tr, &tr->max_buffer,
9293                                     allocate_snapshot ? size : 1);
9294         if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9295                 free_trace_buffer(&tr->array_buffer);
9296                 return -ENOMEM;
9297         }
9298         tr->allocated_snapshot = allocate_snapshot;
9299
9300         allocate_snapshot = false;
9301 #endif
9302
9303         return 0;
9304 }
9305
9306 static void free_trace_buffers(struct trace_array *tr)
9307 {
9308         if (!tr)
9309                 return;
9310
9311         free_trace_buffer(&tr->array_buffer);
9312
9313 #ifdef CONFIG_TRACER_MAX_TRACE
9314         free_trace_buffer(&tr->max_buffer);
9315 #endif
9316 }
9317
9318 static void init_trace_flags_index(struct trace_array *tr)
9319 {
9320         int i;
9321
9322         /* Used by the trace options files */
9323         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9324                 tr->trace_flags_index[i] = i;
9325 }
9326
9327 static void __update_tracer_options(struct trace_array *tr)
9328 {
9329         struct tracer *t;
9330
9331         for (t = trace_types; t; t = t->next)
9332                 add_tracer_options(tr, t);
9333 }
9334
9335 static void update_tracer_options(struct trace_array *tr)
9336 {
9337         mutex_lock(&trace_types_lock);
9338         tracer_options_updated = true;
9339         __update_tracer_options(tr);
9340         mutex_unlock(&trace_types_lock);
9341 }
9342
9343 /* Must have trace_types_lock held */
9344 struct trace_array *trace_array_find(const char *instance)
9345 {
9346         struct trace_array *tr, *found = NULL;
9347
9348         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9349                 if (tr->name && strcmp(tr->name, instance) == 0) {
9350                         found = tr;
9351                         break;
9352                 }
9353         }
9354
9355         return found;
9356 }
9357
9358 struct trace_array *trace_array_find_get(const char *instance)
9359 {
9360         struct trace_array *tr;
9361
9362         mutex_lock(&trace_types_lock);
9363         tr = trace_array_find(instance);
9364         if (tr)
9365                 tr->ref++;
9366         mutex_unlock(&trace_types_lock);
9367
9368         return tr;
9369 }
9370
9371 static int trace_array_create_dir(struct trace_array *tr)
9372 {
9373         int ret;
9374
9375         tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9376         if (!tr->dir)
9377                 return -EINVAL;
9378
9379         ret = event_trace_add_tracer(tr->dir, tr);
9380         if (ret) {
9381                 tracefs_remove(tr->dir);
9382                 return ret;
9383         }
9384
9385         init_tracer_tracefs(tr, tr->dir);
9386         __update_tracer_options(tr);
9387
9388         return ret;
9389 }
9390
9391 static struct trace_array *trace_array_create(const char *name)
9392 {
9393         struct trace_array *tr;
9394         int ret;
9395
9396         ret = -ENOMEM;
9397         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9398         if (!tr)
9399                 return ERR_PTR(ret);
9400
9401         tr->name = kstrdup(name, GFP_KERNEL);
9402         if (!tr->name)
9403                 goto out_free_tr;
9404
9405         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9406                 goto out_free_tr;
9407
9408         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9409
9410         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9411
9412         raw_spin_lock_init(&tr->start_lock);
9413
9414         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9415
9416         tr->current_trace = &nop_trace;
9417
9418         INIT_LIST_HEAD(&tr->systems);
9419         INIT_LIST_HEAD(&tr->events);
9420         INIT_LIST_HEAD(&tr->hist_vars);
9421         INIT_LIST_HEAD(&tr->err_log);
9422
9423         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9424                 goto out_free_tr;
9425
9426         if (ftrace_allocate_ftrace_ops(tr) < 0)
9427                 goto out_free_tr;
9428
9429         ftrace_init_trace_array(tr);
9430
9431         init_trace_flags_index(tr);
9432
9433         if (trace_instance_dir) {
9434                 ret = trace_array_create_dir(tr);
9435                 if (ret)
9436                         goto out_free_tr;
9437         } else
9438                 __trace_early_add_events(tr);
9439
9440         list_add(&tr->list, &ftrace_trace_arrays);
9441
9442         tr->ref++;
9443
9444         return tr;
9445
9446  out_free_tr:
9447         ftrace_free_ftrace_ops(tr);
9448         free_trace_buffers(tr);
9449         free_cpumask_var(tr->tracing_cpumask);
9450         kfree(tr->name);
9451         kfree(tr);
9452
9453         return ERR_PTR(ret);
9454 }
9455
9456 static int instance_mkdir(const char *name)
9457 {
9458         struct trace_array *tr;
9459         int ret;
9460
9461         mutex_lock(&event_mutex);
9462         mutex_lock(&trace_types_lock);
9463
9464         ret = -EEXIST;
9465         if (trace_array_find(name))
9466                 goto out_unlock;
9467
9468         tr = trace_array_create(name);
9469
9470         ret = PTR_ERR_OR_ZERO(tr);
9471
9472 out_unlock:
9473         mutex_unlock(&trace_types_lock);
9474         mutex_unlock(&event_mutex);
9475         return ret;
9476 }
9477
9478 /**
9479  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9480  * @name: The name of the trace array to be looked up/created.
9481  *
9482  * Returns pointer to trace array with given name.
9483  * NULL, if it cannot be created.
9484  *
9485  * NOTE: This function increments the reference counter associated with the
9486  * trace array returned. This makes sure it cannot be freed while in use.
9487  * Use trace_array_put() once the trace array is no longer needed.
9488  * If the trace_array is to be freed, trace_array_destroy() needs to
9489  * be called after the trace_array_put(), or simply let user space delete
9490  * it from the tracefs instances directory. But until the
9491  * trace_array_put() is called, user space can not delete it.
9492  *
9493  */
9494 struct trace_array *trace_array_get_by_name(const char *name)
9495 {
9496         struct trace_array *tr;
9497
9498         mutex_lock(&event_mutex);
9499         mutex_lock(&trace_types_lock);
9500
9501         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9502                 if (tr->name && strcmp(tr->name, name) == 0)
9503                         goto out_unlock;
9504         }
9505
9506         tr = trace_array_create(name);
9507
9508         if (IS_ERR(tr))
9509                 tr = NULL;
9510 out_unlock:
9511         if (tr)
9512                 tr->ref++;
9513
9514         mutex_unlock(&trace_types_lock);
9515         mutex_unlock(&event_mutex);
9516         return tr;
9517 }
9518 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9519
9520 static int __remove_instance(struct trace_array *tr)
9521 {
9522         int i;
9523
9524         /* Reference counter for a newly created trace array = 1. */
9525         if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9526                 return -EBUSY;
9527
9528         list_del(&tr->list);
9529
9530         /* Disable all the flags that were enabled coming in */
9531         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9532                 if ((1 << i) & ZEROED_TRACE_FLAGS)
9533                         set_tracer_flag(tr, 1 << i, 0);
9534         }
9535
9536         tracing_set_nop(tr);
9537         clear_ftrace_function_probes(tr);
9538         event_trace_del_tracer(tr);
9539         ftrace_clear_pids(tr);
9540         ftrace_destroy_function_files(tr);
9541         tracefs_remove(tr->dir);
9542         free_percpu(tr->last_func_repeats);
9543         free_trace_buffers(tr);
9544         clear_tracing_err_log(tr);
9545
9546         for (i = 0; i < tr->nr_topts; i++) {
9547                 kfree(tr->topts[i].topts);
9548         }
9549         kfree(tr->topts);
9550
9551         free_cpumask_var(tr->tracing_cpumask);
9552         kfree(tr->name);
9553         kfree(tr);
9554
9555         return 0;
9556 }
9557
9558 int trace_array_destroy(struct trace_array *this_tr)
9559 {
9560         struct trace_array *tr;
9561         int ret;
9562
9563         if (!this_tr)
9564                 return -EINVAL;
9565
9566         mutex_lock(&event_mutex);
9567         mutex_lock(&trace_types_lock);
9568
9569         ret = -ENODEV;
9570
9571         /* Making sure trace array exists before destroying it. */
9572         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9573                 if (tr == this_tr) {
9574                         ret = __remove_instance(tr);
9575                         break;
9576                 }
9577         }
9578
9579         mutex_unlock(&trace_types_lock);
9580         mutex_unlock(&event_mutex);
9581
9582         return ret;
9583 }
9584 EXPORT_SYMBOL_GPL(trace_array_destroy);
9585
9586 static int instance_rmdir(const char *name)
9587 {
9588         struct trace_array *tr;
9589         int ret;
9590
9591         mutex_lock(&event_mutex);
9592         mutex_lock(&trace_types_lock);
9593
9594         ret = -ENODEV;
9595         tr = trace_array_find(name);
9596         if (tr)
9597                 ret = __remove_instance(tr);
9598
9599         mutex_unlock(&trace_types_lock);
9600         mutex_unlock(&event_mutex);
9601
9602         return ret;
9603 }
9604
9605 static __init void create_trace_instances(struct dentry *d_tracer)
9606 {
9607         struct trace_array *tr;
9608
9609         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9610                                                          instance_mkdir,
9611                                                          instance_rmdir);
9612         if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9613                 return;
9614
9615         mutex_lock(&event_mutex);
9616         mutex_lock(&trace_types_lock);
9617
9618         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9619                 if (!tr->name)
9620                         continue;
9621                 if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9622                              "Failed to create instance directory\n"))
9623                         break;
9624         }
9625
9626         mutex_unlock(&trace_types_lock);
9627         mutex_unlock(&event_mutex);
9628 }
9629
9630 static void
9631 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9632 {
9633         struct trace_event_file *file;
9634         int cpu;
9635
9636         trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer,
9637                         tr, &show_traces_fops);
9638
9639         trace_create_file("current_tracer", TRACE_MODE_WRITE, d_tracer,
9640                         tr, &set_tracer_fops);
9641
9642         trace_create_file("tracing_cpumask", TRACE_MODE_WRITE, d_tracer,
9643                           tr, &tracing_cpumask_fops);
9644
9645         trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer,
9646                           tr, &tracing_iter_fops);
9647
9648         trace_create_file("trace", TRACE_MODE_WRITE, d_tracer,
9649                           tr, &tracing_fops);
9650
9651         trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer,
9652                           tr, &tracing_pipe_fops);
9653
9654         trace_create_file("buffer_size_kb", TRACE_MODE_WRITE, d_tracer,
9655                           tr, &tracing_entries_fops);
9656
9657         trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer,
9658                           tr, &tracing_total_entries_fops);
9659
9660         trace_create_file("free_buffer", 0200, d_tracer,
9661                           tr, &tracing_free_buffer_fops);
9662
9663         trace_create_file("trace_marker", 0220, d_tracer,
9664                           tr, &tracing_mark_fops);
9665
9666         file = __find_event_file(tr, "ftrace", "print");
9667         if (file && file->dir)
9668                 trace_create_file("trigger", TRACE_MODE_WRITE, file->dir,
9669                                   file, &event_trigger_fops);
9670         tr->trace_marker_file = file;
9671
9672         trace_create_file("trace_marker_raw", 0220, d_tracer,
9673                           tr, &tracing_mark_raw_fops);
9674
9675         trace_create_file("trace_clock", TRACE_MODE_WRITE, d_tracer, tr,
9676                           &trace_clock_fops);
9677
9678         trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer,
9679                           tr, &rb_simple_fops);
9680
9681         trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr,
9682                           &trace_time_stamp_mode_fops);
9683
9684         tr->buffer_percent = 50;
9685
9686         trace_create_file("buffer_percent", TRACE_MODE_WRITE, d_tracer,
9687                         tr, &buffer_percent_fops);
9688
9689         create_trace_options_dir(tr);
9690
9691 #ifdef CONFIG_TRACER_MAX_TRACE
9692         trace_create_maxlat_file(tr, d_tracer);
9693 #endif
9694
9695         if (ftrace_create_function_files(tr, d_tracer))
9696                 MEM_FAIL(1, "Could not allocate function filter files");
9697
9698 #ifdef CONFIG_TRACER_SNAPSHOT
9699         trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer,
9700                           tr, &snapshot_fops);
9701 #endif
9702
9703         trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer,
9704                           tr, &tracing_err_log_fops);
9705
9706         for_each_tracing_cpu(cpu)
9707                 tracing_init_tracefs_percpu(tr, cpu);
9708
9709         ftrace_init_tracefs(tr, d_tracer);
9710 }
9711
9712 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9713 {
9714         struct vfsmount *mnt;
9715         struct file_system_type *type;
9716
9717         /*
9718          * To maintain backward compatibility for tools that mount
9719          * debugfs to get to the tracing facility, tracefs is automatically
9720          * mounted to the debugfs/tracing directory.
9721          */
9722         type = get_fs_type("tracefs");
9723         if (!type)
9724                 return NULL;
9725         mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9726         put_filesystem(type);
9727         if (IS_ERR(mnt))
9728                 return NULL;
9729         mntget(mnt);
9730
9731         return mnt;
9732 }
9733
9734 /**
9735  * tracing_init_dentry - initialize top level trace array
9736  *
9737  * This is called when creating files or directories in the tracing
9738  * directory. It is called via fs_initcall() by any of the boot up code
9739  * and expects to return the dentry of the top level tracing directory.
9740  */
9741 int tracing_init_dentry(void)
9742 {
9743         struct trace_array *tr = &global_trace;
9744
9745         if (security_locked_down(LOCKDOWN_TRACEFS)) {
9746                 pr_warn("Tracing disabled due to lockdown\n");
9747                 return -EPERM;
9748         }
9749
9750         /* The top level trace array uses  NULL as parent */
9751         if (tr->dir)
9752                 return 0;
9753
9754         if (WARN_ON(!tracefs_initialized()))
9755                 return -ENODEV;
9756
9757         /*
9758          * As there may still be users that expect the tracing
9759          * files to exist in debugfs/tracing, we must automount
9760          * the tracefs file system there, so older tools still
9761          * work with the newer kernel.
9762          */
9763         tr->dir = debugfs_create_automount("tracing", NULL,
9764                                            trace_automount, NULL);
9765
9766         return 0;
9767 }
9768
9769 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9770 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9771
9772 static struct workqueue_struct *eval_map_wq __initdata;
9773 static struct work_struct eval_map_work __initdata;
9774 static struct work_struct tracerfs_init_work __initdata;
9775
9776 static void __init eval_map_work_func(struct work_struct *work)
9777 {
9778         int len;
9779
9780         len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9781         trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9782 }
9783
9784 static int __init trace_eval_init(void)
9785 {
9786         INIT_WORK(&eval_map_work, eval_map_work_func);
9787
9788         eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
9789         if (!eval_map_wq) {
9790                 pr_err("Unable to allocate eval_map_wq\n");
9791                 /* Do work here */
9792                 eval_map_work_func(&eval_map_work);
9793                 return -ENOMEM;
9794         }
9795
9796         queue_work(eval_map_wq, &eval_map_work);
9797         return 0;
9798 }
9799
9800 subsys_initcall(trace_eval_init);
9801
9802 static int __init trace_eval_sync(void)
9803 {
9804         /* Make sure the eval map updates are finished */
9805         if (eval_map_wq)
9806                 destroy_workqueue(eval_map_wq);
9807         return 0;
9808 }
9809
9810 late_initcall_sync(trace_eval_sync);
9811
9812
9813 #ifdef CONFIG_MODULES
9814 static void trace_module_add_evals(struct module *mod)
9815 {
9816         if (!mod->num_trace_evals)
9817                 return;
9818
9819         /*
9820          * Modules with bad taint do not have events created, do
9821          * not bother with enums either.
9822          */
9823         if (trace_module_has_bad_taint(mod))
9824                 return;
9825
9826         trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9827 }
9828
9829 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
9830 static void trace_module_remove_evals(struct module *mod)
9831 {
9832         union trace_eval_map_item *map;
9833         union trace_eval_map_item **last = &trace_eval_maps;
9834
9835         if (!mod->num_trace_evals)
9836                 return;
9837
9838         mutex_lock(&trace_eval_mutex);
9839
9840         map = trace_eval_maps;
9841
9842         while (map) {
9843                 if (map->head.mod == mod)
9844                         break;
9845                 map = trace_eval_jmp_to_tail(map);
9846                 last = &map->tail.next;
9847                 map = map->tail.next;
9848         }
9849         if (!map)
9850                 goto out;
9851
9852         *last = trace_eval_jmp_to_tail(map)->tail.next;
9853         kfree(map);
9854  out:
9855         mutex_unlock(&trace_eval_mutex);
9856 }
9857 #else
9858 static inline void trace_module_remove_evals(struct module *mod) { }
9859 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9860
9861 static int trace_module_notify(struct notifier_block *self,
9862                                unsigned long val, void *data)
9863 {
9864         struct module *mod = data;
9865
9866         switch (val) {
9867         case MODULE_STATE_COMING:
9868                 trace_module_add_evals(mod);
9869                 break;
9870         case MODULE_STATE_GOING:
9871                 trace_module_remove_evals(mod);
9872                 break;
9873         }
9874
9875         return NOTIFY_OK;
9876 }
9877
9878 static struct notifier_block trace_module_nb = {
9879         .notifier_call = trace_module_notify,
9880         .priority = 0,
9881 };
9882 #endif /* CONFIG_MODULES */
9883
9884 static __init void tracer_init_tracefs_work_func(struct work_struct *work)
9885 {
9886
9887         event_trace_init();
9888
9889         init_tracer_tracefs(&global_trace, NULL);
9890         ftrace_init_tracefs_toplevel(&global_trace, NULL);
9891
9892         trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL,
9893                         &global_trace, &tracing_thresh_fops);
9894
9895         trace_create_file("README", TRACE_MODE_READ, NULL,
9896                         NULL, &tracing_readme_fops);
9897
9898         trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL,
9899                         NULL, &tracing_saved_cmdlines_fops);
9900
9901         trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL,
9902                           NULL, &tracing_saved_cmdlines_size_fops);
9903
9904         trace_create_file("saved_tgids", TRACE_MODE_READ, NULL,
9905                         NULL, &tracing_saved_tgids_fops);
9906
9907         trace_create_eval_file(NULL);
9908
9909 #ifdef CONFIG_MODULES
9910         register_module_notifier(&trace_module_nb);
9911 #endif
9912
9913 #ifdef CONFIG_DYNAMIC_FTRACE
9914         trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL,
9915                         NULL, &tracing_dyn_info_fops);
9916 #endif
9917
9918         create_trace_instances(NULL);
9919
9920         update_tracer_options(&global_trace);
9921 }
9922
9923 static __init int tracer_init_tracefs(void)
9924 {
9925         int ret;
9926
9927         trace_access_lock_init();
9928
9929         ret = tracing_init_dentry();
9930         if (ret)
9931                 return 0;
9932
9933         if (eval_map_wq) {
9934                 INIT_WORK(&tracerfs_init_work, tracer_init_tracefs_work_func);
9935                 queue_work(eval_map_wq, &tracerfs_init_work);
9936         } else {
9937                 tracer_init_tracefs_work_func(NULL);
9938         }
9939
9940         rv_init_interface();
9941
9942         return 0;
9943 }
9944
9945 fs_initcall(tracer_init_tracefs);
9946
9947 static int trace_die_panic_handler(struct notifier_block *self,
9948                                 unsigned long ev, void *unused);
9949
9950 static struct notifier_block trace_panic_notifier = {
9951         .notifier_call = trace_die_panic_handler,
9952         .priority = INT_MAX - 1,
9953 };
9954
9955 static struct notifier_block trace_die_notifier = {
9956         .notifier_call = trace_die_panic_handler,
9957         .priority = INT_MAX - 1,
9958 };
9959
9960 /*
9961  * The idea is to execute the following die/panic callback early, in order
9962  * to avoid showing irrelevant information in the trace (like other panic
9963  * notifier functions); we are the 2nd to run, after hung_task/rcu_stall
9964  * warnings get disabled (to prevent potential log flooding).
9965  */
9966 static int trace_die_panic_handler(struct notifier_block *self,
9967                                 unsigned long ev, void *unused)
9968 {
9969         if (!ftrace_dump_on_oops)
9970                 return NOTIFY_DONE;
9971
9972         /* The die notifier requires DIE_OOPS to trigger */
9973         if (self == &trace_die_notifier && ev != DIE_OOPS)
9974                 return NOTIFY_DONE;
9975
9976         ftrace_dump(ftrace_dump_on_oops);
9977
9978         return NOTIFY_DONE;
9979 }
9980
9981 /*
9982  * printk is set to max of 1024, we really don't need it that big.
9983  * Nothing should be printing 1000 characters anyway.
9984  */
9985 #define TRACE_MAX_PRINT         1000
9986
9987 /*
9988  * Define here KERN_TRACE so that we have one place to modify
9989  * it if we decide to change what log level the ftrace dump
9990  * should be at.
9991  */
9992 #define KERN_TRACE              KERN_EMERG
9993
9994 void
9995 trace_printk_seq(struct trace_seq *s)
9996 {
9997         /* Probably should print a warning here. */
9998         if (s->seq.len >= TRACE_MAX_PRINT)
9999                 s->seq.len = TRACE_MAX_PRINT;
10000
10001         /*
10002          * More paranoid code. Although the buffer size is set to
10003          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
10004          * an extra layer of protection.
10005          */
10006         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
10007                 s->seq.len = s->seq.size - 1;
10008
10009         /* should be zero ended, but we are paranoid. */
10010         s->buffer[s->seq.len] = 0;
10011
10012         printk(KERN_TRACE "%s", s->buffer);
10013
10014         trace_seq_init(s);
10015 }
10016
10017 void trace_init_global_iter(struct trace_iterator *iter)
10018 {
10019         iter->tr = &global_trace;
10020         iter->trace = iter->tr->current_trace;
10021         iter->cpu_file = RING_BUFFER_ALL_CPUS;
10022         iter->array_buffer = &global_trace.array_buffer;
10023
10024         if (iter->trace && iter->trace->open)
10025                 iter->trace->open(iter);
10026
10027         /* Annotate start of buffers if we had overruns */
10028         if (ring_buffer_overruns(iter->array_buffer->buffer))
10029                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
10030
10031         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
10032         if (trace_clocks[iter->tr->clock_id].in_ns)
10033                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
10034
10035         /* Can not use kmalloc for iter.temp and iter.fmt */
10036         iter->temp = static_temp_buf;
10037         iter->temp_size = STATIC_TEMP_BUF_SIZE;
10038         iter->fmt = static_fmt_buf;
10039         iter->fmt_size = STATIC_FMT_BUF_SIZE;
10040 }
10041
10042 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
10043 {
10044         /* use static because iter can be a bit big for the stack */
10045         static struct trace_iterator iter;
10046         static atomic_t dump_running;
10047         struct trace_array *tr = &global_trace;
10048         unsigned int old_userobj;
10049         unsigned long flags;
10050         int cnt = 0, cpu;
10051
10052         /* Only allow one dump user at a time. */
10053         if (atomic_inc_return(&dump_running) != 1) {
10054                 atomic_dec(&dump_running);
10055                 return;
10056         }
10057
10058         /*
10059          * Always turn off tracing when we dump.
10060          * We don't need to show trace output of what happens
10061          * between multiple crashes.
10062          *
10063          * If the user does a sysrq-z, then they can re-enable
10064          * tracing with echo 1 > tracing_on.
10065          */
10066         tracing_off();
10067
10068         local_irq_save(flags);
10069
10070         /* Simulate the iterator */
10071         trace_init_global_iter(&iter);
10072
10073         for_each_tracing_cpu(cpu) {
10074                 atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10075         }
10076
10077         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
10078
10079         /* don't look at user memory in panic mode */
10080         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
10081
10082         switch (oops_dump_mode) {
10083         case DUMP_ALL:
10084                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
10085                 break;
10086         case DUMP_ORIG:
10087                 iter.cpu_file = raw_smp_processor_id();
10088                 break;
10089         case DUMP_NONE:
10090                 goto out_enable;
10091         default:
10092                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
10093                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
10094         }
10095
10096         printk(KERN_TRACE "Dumping ftrace buffer:\n");
10097
10098         /* Did function tracer already get disabled? */
10099         if (ftrace_is_dead()) {
10100                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
10101                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
10102         }
10103
10104         /*
10105          * We need to stop all tracing on all CPUS to read
10106          * the next buffer. This is a bit expensive, but is
10107          * not done often. We fill all what we can read,
10108          * and then release the locks again.
10109          */
10110
10111         while (!trace_empty(&iter)) {
10112
10113                 if (!cnt)
10114                         printk(KERN_TRACE "---------------------------------\n");
10115
10116                 cnt++;
10117
10118                 trace_iterator_reset(&iter);
10119                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
10120
10121                 if (trace_find_next_entry_inc(&iter) != NULL) {
10122                         int ret;
10123
10124                         ret = print_trace_line(&iter);
10125                         if (ret != TRACE_TYPE_NO_CONSUME)
10126                                 trace_consume(&iter);
10127                 }
10128                 touch_nmi_watchdog();
10129
10130                 trace_printk_seq(&iter.seq);
10131         }
10132
10133         if (!cnt)
10134                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
10135         else
10136                 printk(KERN_TRACE "---------------------------------\n");
10137
10138  out_enable:
10139         tr->trace_flags |= old_userobj;
10140
10141         for_each_tracing_cpu(cpu) {
10142                 atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10143         }
10144         atomic_dec(&dump_running);
10145         local_irq_restore(flags);
10146 }
10147 EXPORT_SYMBOL_GPL(ftrace_dump);
10148
10149 #define WRITE_BUFSIZE  4096
10150
10151 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
10152                                 size_t count, loff_t *ppos,
10153                                 int (*createfn)(const char *))
10154 {
10155         char *kbuf, *buf, *tmp;
10156         int ret = 0;
10157         size_t done = 0;
10158         size_t size;
10159
10160         kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
10161         if (!kbuf)
10162                 return -ENOMEM;
10163
10164         while (done < count) {
10165                 size = count - done;
10166
10167                 if (size >= WRITE_BUFSIZE)
10168                         size = WRITE_BUFSIZE - 1;
10169
10170                 if (copy_from_user(kbuf, buffer + done, size)) {
10171                         ret = -EFAULT;
10172                         goto out;
10173                 }
10174                 kbuf[size] = '\0';
10175                 buf = kbuf;
10176                 do {
10177                         tmp = strchr(buf, '\n');
10178                         if (tmp) {
10179                                 *tmp = '\0';
10180                                 size = tmp - buf + 1;
10181                         } else {
10182                                 size = strlen(buf);
10183                                 if (done + size < count) {
10184                                         if (buf != kbuf)
10185                                                 break;
10186                                         /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
10187                                         pr_warn("Line length is too long: Should be less than %d\n",
10188                                                 WRITE_BUFSIZE - 2);
10189                                         ret = -EINVAL;
10190                                         goto out;
10191                                 }
10192                         }
10193                         done += size;
10194
10195                         /* Remove comments */
10196                         tmp = strchr(buf, '#');
10197
10198                         if (tmp)
10199                                 *tmp = '\0';
10200
10201                         ret = createfn(buf);
10202                         if (ret)
10203                                 goto out;
10204                         buf += size;
10205
10206                 } while (done < count);
10207         }
10208         ret = done;
10209
10210 out:
10211         kfree(kbuf);
10212
10213         return ret;
10214 }
10215
10216 #ifdef CONFIG_TRACER_MAX_TRACE
10217 __init static bool tr_needs_alloc_snapshot(const char *name)
10218 {
10219         char *test;
10220         int len = strlen(name);
10221         bool ret;
10222
10223         if (!boot_snapshot_index)
10224                 return false;
10225
10226         if (strncmp(name, boot_snapshot_info, len) == 0 &&
10227             boot_snapshot_info[len] == '\t')
10228                 return true;
10229
10230         test = kmalloc(strlen(name) + 3, GFP_KERNEL);
10231         if (!test)
10232                 return false;
10233
10234         sprintf(test, "\t%s\t", name);
10235         ret = strstr(boot_snapshot_info, test) == NULL;
10236         kfree(test);
10237         return ret;
10238 }
10239
10240 __init static void do_allocate_snapshot(const char *name)
10241 {
10242         if (!tr_needs_alloc_snapshot(name))
10243                 return;
10244
10245         /*
10246          * When allocate_snapshot is set, the next call to
10247          * allocate_trace_buffers() (called by trace_array_get_by_name())
10248          * will allocate the snapshot buffer. That will alse clear
10249          * this flag.
10250          */
10251         allocate_snapshot = true;
10252 }
10253 #else
10254 static inline void do_allocate_snapshot(const char *name) { }
10255 #endif
10256
10257 __init static void enable_instances(void)
10258 {
10259         struct trace_array *tr;
10260         char *curr_str;
10261         char *str;
10262         char *tok;
10263
10264         /* A tab is always appended */
10265         boot_instance_info[boot_instance_index - 1] = '\0';
10266         str = boot_instance_info;
10267
10268         while ((curr_str = strsep(&str, "\t"))) {
10269
10270                 tok = strsep(&curr_str, ",");
10271
10272                 if (IS_ENABLED(CONFIG_TRACER_MAX_TRACE))
10273                         do_allocate_snapshot(tok);
10274
10275                 tr = trace_array_get_by_name(tok);
10276                 if (!tr) {
10277                         pr_warn("Failed to create instance buffer %s\n", curr_str);
10278                         continue;
10279                 }
10280                 /* Allow user space to delete it */
10281                 trace_array_put(tr);
10282
10283                 while ((tok = strsep(&curr_str, ","))) {
10284                         early_enable_events(tr, tok, true);
10285                 }
10286         }
10287 }
10288
10289 __init static int tracer_alloc_buffers(void)
10290 {
10291         int ring_buf_size;
10292         int ret = -ENOMEM;
10293
10294
10295         if (security_locked_down(LOCKDOWN_TRACEFS)) {
10296                 pr_warn("Tracing disabled due to lockdown\n");
10297                 return -EPERM;
10298         }
10299
10300         /*
10301          * Make sure we don't accidentally add more trace options
10302          * than we have bits for.
10303          */
10304         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
10305
10306         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
10307                 goto out;
10308
10309         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
10310                 goto out_free_buffer_mask;
10311
10312         /* Only allocate trace_printk buffers if a trace_printk exists */
10313         if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
10314                 /* Must be called before global_trace.buffer is allocated */
10315                 trace_printk_init_buffers();
10316
10317         /* To save memory, keep the ring buffer size to its minimum */
10318         if (ring_buffer_expanded)
10319                 ring_buf_size = trace_buf_size;
10320         else
10321                 ring_buf_size = 1;
10322
10323         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
10324         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
10325
10326         raw_spin_lock_init(&global_trace.start_lock);
10327
10328         /*
10329          * The prepare callbacks allocates some memory for the ring buffer. We
10330          * don't free the buffer if the CPU goes down. If we were to free
10331          * the buffer, then the user would lose any trace that was in the
10332          * buffer. The memory will be removed once the "instance" is removed.
10333          */
10334         ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
10335                                       "trace/RB:prepare", trace_rb_cpu_prepare,
10336                                       NULL);
10337         if (ret < 0)
10338                 goto out_free_cpumask;
10339         /* Used for event triggers */
10340         ret = -ENOMEM;
10341         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
10342         if (!temp_buffer)
10343                 goto out_rm_hp_state;
10344
10345         if (trace_create_savedcmd() < 0)
10346                 goto out_free_temp_buffer;
10347
10348         /* TODO: make the number of buffers hot pluggable with CPUS */
10349         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
10350                 MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
10351                 goto out_free_savedcmd;
10352         }
10353
10354         if (global_trace.buffer_disabled)
10355                 tracing_off();
10356
10357         if (trace_boot_clock) {
10358                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
10359                 if (ret < 0)
10360                         pr_warn("Trace clock %s not defined, going back to default\n",
10361                                 trace_boot_clock);
10362         }
10363
10364         /*
10365          * register_tracer() might reference current_trace, so it
10366          * needs to be set before we register anything. This is
10367          * just a bootstrap of current_trace anyway.
10368          */
10369         global_trace.current_trace = &nop_trace;
10370
10371         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10372
10373         ftrace_init_global_array_ops(&global_trace);
10374
10375         init_trace_flags_index(&global_trace);
10376
10377         register_tracer(&nop_trace);
10378
10379         /* Function tracing may start here (via kernel command line) */
10380         init_function_trace();
10381
10382         /* All seems OK, enable tracing */
10383         tracing_disabled = 0;
10384
10385         atomic_notifier_chain_register(&panic_notifier_list,
10386                                        &trace_panic_notifier);
10387
10388         register_die_notifier(&trace_die_notifier);
10389
10390         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
10391
10392         INIT_LIST_HEAD(&global_trace.systems);
10393         INIT_LIST_HEAD(&global_trace.events);
10394         INIT_LIST_HEAD(&global_trace.hist_vars);
10395         INIT_LIST_HEAD(&global_trace.err_log);
10396         list_add(&global_trace.list, &ftrace_trace_arrays);
10397
10398         apply_trace_boot_options();
10399
10400         register_snapshot_cmd();
10401
10402         test_can_verify();
10403
10404         return 0;
10405
10406 out_free_savedcmd:
10407         free_saved_cmdlines_buffer(savedcmd);
10408 out_free_temp_buffer:
10409         ring_buffer_free(temp_buffer);
10410 out_rm_hp_state:
10411         cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
10412 out_free_cpumask:
10413         free_cpumask_var(global_trace.tracing_cpumask);
10414 out_free_buffer_mask:
10415         free_cpumask_var(tracing_buffer_mask);
10416 out:
10417         return ret;
10418 }
10419
10420 void __init ftrace_boot_snapshot(void)
10421 {
10422 #ifdef CONFIG_TRACER_MAX_TRACE
10423         struct trace_array *tr;
10424
10425         if (!snapshot_at_boot)
10426                 return;
10427
10428         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10429                 if (!tr->allocated_snapshot)
10430                         continue;
10431
10432                 tracing_snapshot_instance(tr);
10433                 trace_array_puts(tr, "** Boot snapshot taken **\n");
10434         }
10435 #endif
10436 }
10437
10438 void __init early_trace_init(void)
10439 {
10440         if (tracepoint_printk) {
10441                 tracepoint_print_iter =
10442                         kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
10443                 if (MEM_FAIL(!tracepoint_print_iter,
10444                              "Failed to allocate trace iterator\n"))
10445                         tracepoint_printk = 0;
10446                 else
10447                         static_key_enable(&tracepoint_printk_key.key);
10448         }
10449         tracer_alloc_buffers();
10450
10451         init_events();
10452 }
10453
10454 void __init trace_init(void)
10455 {
10456         trace_event_init();
10457
10458         if (boot_instance_index)
10459                 enable_instances();
10460 }
10461
10462 __init static void clear_boot_tracer(void)
10463 {
10464         /*
10465          * The default tracer at boot buffer is an init section.
10466          * This function is called in lateinit. If we did not
10467          * find the boot tracer, then clear it out, to prevent
10468          * later registration from accessing the buffer that is
10469          * about to be freed.
10470          */
10471         if (!default_bootup_tracer)
10472                 return;
10473
10474         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
10475                default_bootup_tracer);
10476         default_bootup_tracer = NULL;
10477 }
10478
10479 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
10480 __init static void tracing_set_default_clock(void)
10481 {
10482         /* sched_clock_stable() is determined in late_initcall */
10483         if (!trace_boot_clock && !sched_clock_stable()) {
10484                 if (security_locked_down(LOCKDOWN_TRACEFS)) {
10485                         pr_warn("Can not set tracing clock due to lockdown\n");
10486                         return;
10487                 }
10488
10489                 printk(KERN_WARNING
10490                        "Unstable clock detected, switching default tracing clock to \"global\"\n"
10491                        "If you want to keep using the local clock, then add:\n"
10492                        "  \"trace_clock=local\"\n"
10493                        "on the kernel command line\n");
10494                 tracing_set_clock(&global_trace, "global");
10495         }
10496 }
10497 #else
10498 static inline void tracing_set_default_clock(void) { }
10499 #endif
10500
10501 __init static int late_trace_init(void)
10502 {
10503         if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
10504                 static_key_disable(&tracepoint_printk_key.key);
10505                 tracepoint_printk = 0;
10506         }
10507
10508         tracing_set_default_clock();
10509         clear_boot_tracer();
10510         return 0;
10511 }
10512
10513 late_initcall_sync(late_trace_init);