OSDN Git Service

02be4ddd4ad5d2dff07ad52d6f1232ab49be691c
[tomoyo/tomoyo-test1.git] / kernel / trace / trace.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/notifier.h>
23 #include <linux/irqflags.h>
24 #include <linux/debugfs.h>
25 #include <linux/tracefs.h>
26 #include <linux/pagemap.h>
27 #include <linux/hardirq.h>
28 #include <linux/linkage.h>
29 #include <linux/uaccess.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/poll.h>
43 #include <linux/nmi.h>
44 #include <linux/fs.h>
45 #include <linux/trace.h>
46 #include <linux/sched/clock.h>
47 #include <linux/sched/rt.h>
48 #include <linux/fsnotify.h>
49 #include <linux/irq_work.h>
50 #include <linux/workqueue.h>
51
52 #include "trace.h"
53 #include "trace_output.h"
54
55 /*
56  * On boot up, the ring buffer is set to the minimum size, so that
57  * we do not waste memory on systems that are not using tracing.
58  */
59 bool ring_buffer_expanded;
60
61 /*
62  * We need to change this state when a selftest is running.
63  * A selftest will lurk into the ring-buffer to count the
64  * entries inserted during the selftest although some concurrent
65  * insertions into the ring-buffer such as trace_printk could occurred
66  * at the same time, giving false positive or negative results.
67  */
68 static bool __read_mostly tracing_selftest_running;
69
70 /*
71  * If a tracer is running, we do not want to run SELFTEST.
72  */
73 bool __read_mostly tracing_selftest_disabled;
74
75 /* Pipe tracepoints to printk */
76 struct trace_iterator *tracepoint_print_iter;
77 int tracepoint_printk;
78 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
79
80 /* For tracers that don't implement custom flags */
81 static struct tracer_opt dummy_tracer_opt[] = {
82         { }
83 };
84
85 static int
86 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
87 {
88         return 0;
89 }
90
91 /*
92  * To prevent the comm cache from being overwritten when no
93  * tracing is active, only save the comm when a trace event
94  * occurred.
95  */
96 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
97
98 /*
99  * Kill all tracing for good (never come back).
100  * It is initialized to 1 but will turn to zero if the initialization
101  * of the tracer is successful. But that is the only place that sets
102  * this back to zero.
103  */
104 static int tracing_disabled = 1;
105
106 cpumask_var_t __read_mostly     tracing_buffer_mask;
107
108 /*
109  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
110  *
111  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
112  * is set, then ftrace_dump is called. This will output the contents
113  * of the ftrace buffers to the console.  This is very useful for
114  * capturing traces that lead to crashes and outputing it to a
115  * serial console.
116  *
117  * It is default off, but you can enable it with either specifying
118  * "ftrace_dump_on_oops" in the kernel command line, or setting
119  * /proc/sys/kernel/ftrace_dump_on_oops
120  * Set 1 if you want to dump buffers of all CPUs
121  * Set 2 if you want to dump the buffer of the CPU that triggered oops
122  */
123
124 enum ftrace_dump_mode ftrace_dump_on_oops;
125
126 /* When set, tracing will stop when a WARN*() is hit */
127 int __disable_trace_on_warning;
128
129 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
130 /* Map of enums to their values, for "eval_map" file */
131 struct trace_eval_map_head {
132         struct module                   *mod;
133         unsigned long                   length;
134 };
135
136 union trace_eval_map_item;
137
138 struct trace_eval_map_tail {
139         /*
140          * "end" is first and points to NULL as it must be different
141          * than "mod" or "eval_string"
142          */
143         union trace_eval_map_item       *next;
144         const char                      *end;   /* points to NULL */
145 };
146
147 static DEFINE_MUTEX(trace_eval_mutex);
148
149 /*
150  * The trace_eval_maps are saved in an array with two extra elements,
151  * one at the beginning, and one at the end. The beginning item contains
152  * the count of the saved maps (head.length), and the module they
153  * belong to if not built in (head.mod). The ending item contains a
154  * pointer to the next array of saved eval_map items.
155  */
156 union trace_eval_map_item {
157         struct trace_eval_map           map;
158         struct trace_eval_map_head      head;
159         struct trace_eval_map_tail      tail;
160 };
161
162 static union trace_eval_map_item *trace_eval_maps;
163 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
164
165 int tracing_set_tracer(struct trace_array *tr, const char *buf);
166 static void ftrace_trace_userstack(struct trace_buffer *buffer,
167                                    unsigned long flags, int pc);
168
169 #define MAX_TRACER_SIZE         100
170 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
171 static char *default_bootup_tracer;
172
173 static bool allocate_snapshot;
174
175 static int __init set_cmdline_ftrace(char *str)
176 {
177         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
178         default_bootup_tracer = bootup_tracer_buf;
179         /* We are using ftrace early, expand it */
180         ring_buffer_expanded = true;
181         return 1;
182 }
183 __setup("ftrace=", set_cmdline_ftrace);
184
185 static int __init set_ftrace_dump_on_oops(char *str)
186 {
187         if (*str++ != '=' || !*str) {
188                 ftrace_dump_on_oops = DUMP_ALL;
189                 return 1;
190         }
191
192         if (!strcmp("orig_cpu", str)) {
193                 ftrace_dump_on_oops = DUMP_ORIG;
194                 return 1;
195         }
196
197         return 0;
198 }
199 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
200
201 static int __init stop_trace_on_warning(char *str)
202 {
203         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
204                 __disable_trace_on_warning = 1;
205         return 1;
206 }
207 __setup("traceoff_on_warning", stop_trace_on_warning);
208
209 static int __init boot_alloc_snapshot(char *str)
210 {
211         allocate_snapshot = true;
212         /* We also need the main ring buffer expanded */
213         ring_buffer_expanded = true;
214         return 1;
215 }
216 __setup("alloc_snapshot", boot_alloc_snapshot);
217
218
219 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
220
221 static int __init set_trace_boot_options(char *str)
222 {
223         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
224         return 0;
225 }
226 __setup("trace_options=", set_trace_boot_options);
227
228 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
229 static char *trace_boot_clock __initdata;
230
231 static int __init set_trace_boot_clock(char *str)
232 {
233         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
234         trace_boot_clock = trace_boot_clock_buf;
235         return 0;
236 }
237 __setup("trace_clock=", set_trace_boot_clock);
238
239 static int __init set_tracepoint_printk(char *str)
240 {
241         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
242                 tracepoint_printk = 1;
243         return 1;
244 }
245 __setup("tp_printk", set_tracepoint_printk);
246
247 unsigned long long ns2usecs(u64 nsec)
248 {
249         nsec += 500;
250         do_div(nsec, 1000);
251         return nsec;
252 }
253
254 /* trace_flags holds trace_options default values */
255 #define TRACE_DEFAULT_FLAGS                                             \
256         (FUNCTION_DEFAULT_FLAGS |                                       \
257          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
258          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
259          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
260          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
261
262 /* trace_options that are only supported by global_trace */
263 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
264                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
265
266 /* trace_flags that are default zero for instances */
267 #define ZEROED_TRACE_FLAGS \
268         (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
269
270 /*
271  * The global_trace is the descriptor that holds the top-level tracing
272  * buffers for the live tracing.
273  */
274 static struct trace_array global_trace = {
275         .trace_flags = TRACE_DEFAULT_FLAGS,
276 };
277
278 LIST_HEAD(ftrace_trace_arrays);
279
280 int trace_array_get(struct trace_array *this_tr)
281 {
282         struct trace_array *tr;
283         int ret = -ENODEV;
284
285         mutex_lock(&trace_types_lock);
286         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
287                 if (tr == this_tr) {
288                         tr->ref++;
289                         ret = 0;
290                         break;
291                 }
292         }
293         mutex_unlock(&trace_types_lock);
294
295         return ret;
296 }
297
298 static void __trace_array_put(struct trace_array *this_tr)
299 {
300         WARN_ON(!this_tr->ref);
301         this_tr->ref--;
302 }
303
304 /**
305  * trace_array_put - Decrement the reference counter for this trace array.
306  *
307  * NOTE: Use this when we no longer need the trace array returned by
308  * trace_array_get_by_name(). This ensures the trace array can be later
309  * destroyed.
310  *
311  */
312 void trace_array_put(struct trace_array *this_tr)
313 {
314         if (!this_tr)
315                 return;
316
317         mutex_lock(&trace_types_lock);
318         __trace_array_put(this_tr);
319         mutex_unlock(&trace_types_lock);
320 }
321 EXPORT_SYMBOL_GPL(trace_array_put);
322
323 int tracing_check_open_get_tr(struct trace_array *tr)
324 {
325         int ret;
326
327         ret = security_locked_down(LOCKDOWN_TRACEFS);
328         if (ret)
329                 return ret;
330
331         if (tracing_disabled)
332                 return -ENODEV;
333
334         if (tr && trace_array_get(tr) < 0)
335                 return -ENODEV;
336
337         return 0;
338 }
339
340 int call_filter_check_discard(struct trace_event_call *call, void *rec,
341                               struct trace_buffer *buffer,
342                               struct ring_buffer_event *event)
343 {
344         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
345             !filter_match_preds(call->filter, rec)) {
346                 __trace_event_discard_commit(buffer, event);
347                 return 1;
348         }
349
350         return 0;
351 }
352
353 void trace_free_pid_list(struct trace_pid_list *pid_list)
354 {
355         vfree(pid_list->pids);
356         kfree(pid_list);
357 }
358
359 /**
360  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
361  * @filtered_pids: The list of pids to check
362  * @search_pid: The PID to find in @filtered_pids
363  *
364  * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
365  */
366 bool
367 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
368 {
369         /*
370          * If pid_max changed after filtered_pids was created, we
371          * by default ignore all pids greater than the previous pid_max.
372          */
373         if (search_pid >= filtered_pids->pid_max)
374                 return false;
375
376         return test_bit(search_pid, filtered_pids->pids);
377 }
378
379 /**
380  * trace_ignore_this_task - should a task be ignored for tracing
381  * @filtered_pids: The list of pids to check
382  * @task: The task that should be ignored if not filtered
383  *
384  * Checks if @task should be traced or not from @filtered_pids.
385  * Returns true if @task should *NOT* be traced.
386  * Returns false if @task should be traced.
387  */
388 bool
389 trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
390 {
391         /*
392          * Return false, because if filtered_pids does not exist,
393          * all pids are good to trace.
394          */
395         if (!filtered_pids)
396                 return false;
397
398         return !trace_find_filtered_pid(filtered_pids, task->pid);
399 }
400
401 /**
402  * trace_filter_add_remove_task - Add or remove a task from a pid_list
403  * @pid_list: The list to modify
404  * @self: The current task for fork or NULL for exit
405  * @task: The task to add or remove
406  *
407  * If adding a task, if @self is defined, the task is only added if @self
408  * is also included in @pid_list. This happens on fork and tasks should
409  * only be added when the parent is listed. If @self is NULL, then the
410  * @task pid will be removed from the list, which would happen on exit
411  * of a task.
412  */
413 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
414                                   struct task_struct *self,
415                                   struct task_struct *task)
416 {
417         if (!pid_list)
418                 return;
419
420         /* For forks, we only add if the forking task is listed */
421         if (self) {
422                 if (!trace_find_filtered_pid(pid_list, self->pid))
423                         return;
424         }
425
426         /* Sorry, but we don't support pid_max changing after setting */
427         if (task->pid >= pid_list->pid_max)
428                 return;
429
430         /* "self" is set for forks, and NULL for exits */
431         if (self)
432                 set_bit(task->pid, pid_list->pids);
433         else
434                 clear_bit(task->pid, pid_list->pids);
435 }
436
437 /**
438  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
439  * @pid_list: The pid list to show
440  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
441  * @pos: The position of the file
442  *
443  * This is used by the seq_file "next" operation to iterate the pids
444  * listed in a trace_pid_list structure.
445  *
446  * Returns the pid+1 as we want to display pid of zero, but NULL would
447  * stop the iteration.
448  */
449 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
450 {
451         unsigned long pid = (unsigned long)v;
452
453         (*pos)++;
454
455         /* pid already is +1 of the actual prevous bit */
456         pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
457
458         /* Return pid + 1 to allow zero to be represented */
459         if (pid < pid_list->pid_max)
460                 return (void *)(pid + 1);
461
462         return NULL;
463 }
464
465 /**
466  * trace_pid_start - Used for seq_file to start reading pid lists
467  * @pid_list: The pid list to show
468  * @pos: The position of the file
469  *
470  * This is used by seq_file "start" operation to start the iteration
471  * of listing pids.
472  *
473  * Returns the pid+1 as we want to display pid of zero, but NULL would
474  * stop the iteration.
475  */
476 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
477 {
478         unsigned long pid;
479         loff_t l = 0;
480
481         pid = find_first_bit(pid_list->pids, pid_list->pid_max);
482         if (pid >= pid_list->pid_max)
483                 return NULL;
484
485         /* Return pid + 1 so that zero can be the exit value */
486         for (pid++; pid && l < *pos;
487              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
488                 ;
489         return (void *)pid;
490 }
491
492 /**
493  * trace_pid_show - show the current pid in seq_file processing
494  * @m: The seq_file structure to write into
495  * @v: A void pointer of the pid (+1) value to display
496  *
497  * Can be directly used by seq_file operations to display the current
498  * pid value.
499  */
500 int trace_pid_show(struct seq_file *m, void *v)
501 {
502         unsigned long pid = (unsigned long)v - 1;
503
504         seq_printf(m, "%lu\n", pid);
505         return 0;
506 }
507
508 /* 128 should be much more than enough */
509 #define PID_BUF_SIZE            127
510
511 int trace_pid_write(struct trace_pid_list *filtered_pids,
512                     struct trace_pid_list **new_pid_list,
513                     const char __user *ubuf, size_t cnt)
514 {
515         struct trace_pid_list *pid_list;
516         struct trace_parser parser;
517         unsigned long val;
518         int nr_pids = 0;
519         ssize_t read = 0;
520         ssize_t ret = 0;
521         loff_t pos;
522         pid_t pid;
523
524         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
525                 return -ENOMEM;
526
527         /*
528          * Always recreate a new array. The write is an all or nothing
529          * operation. Always create a new array when adding new pids by
530          * the user. If the operation fails, then the current list is
531          * not modified.
532          */
533         pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
534         if (!pid_list) {
535                 trace_parser_put(&parser);
536                 return -ENOMEM;
537         }
538
539         pid_list->pid_max = READ_ONCE(pid_max);
540
541         /* Only truncating will shrink pid_max */
542         if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
543                 pid_list->pid_max = filtered_pids->pid_max;
544
545         pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
546         if (!pid_list->pids) {
547                 trace_parser_put(&parser);
548                 kfree(pid_list);
549                 return -ENOMEM;
550         }
551
552         if (filtered_pids) {
553                 /* copy the current bits to the new max */
554                 for_each_set_bit(pid, filtered_pids->pids,
555                                  filtered_pids->pid_max) {
556                         set_bit(pid, pid_list->pids);
557                         nr_pids++;
558                 }
559         }
560
561         while (cnt > 0) {
562
563                 pos = 0;
564
565                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
566                 if (ret < 0 || !trace_parser_loaded(&parser))
567                         break;
568
569                 read += ret;
570                 ubuf += ret;
571                 cnt -= ret;
572
573                 ret = -EINVAL;
574                 if (kstrtoul(parser.buffer, 0, &val))
575                         break;
576                 if (val >= pid_list->pid_max)
577                         break;
578
579                 pid = (pid_t)val;
580
581                 set_bit(pid, pid_list->pids);
582                 nr_pids++;
583
584                 trace_parser_clear(&parser);
585                 ret = 0;
586         }
587         trace_parser_put(&parser);
588
589         if (ret < 0) {
590                 trace_free_pid_list(pid_list);
591                 return ret;
592         }
593
594         if (!nr_pids) {
595                 /* Cleared the list of pids */
596                 trace_free_pid_list(pid_list);
597                 read = ret;
598                 pid_list = NULL;
599         }
600
601         *new_pid_list = pid_list;
602
603         return read;
604 }
605
606 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
607 {
608         u64 ts;
609
610         /* Early boot up does not have a buffer yet */
611         if (!buf->buffer)
612                 return trace_clock_local();
613
614         ts = ring_buffer_time_stamp(buf->buffer, cpu);
615         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
616
617         return ts;
618 }
619
620 u64 ftrace_now(int cpu)
621 {
622         return buffer_ftrace_now(&global_trace.array_buffer, cpu);
623 }
624
625 /**
626  * tracing_is_enabled - Show if global_trace has been disabled
627  *
628  * Shows if the global trace has been enabled or not. It uses the
629  * mirror flag "buffer_disabled" to be used in fast paths such as for
630  * the irqsoff tracer. But it may be inaccurate due to races. If you
631  * need to know the accurate state, use tracing_is_on() which is a little
632  * slower, but accurate.
633  */
634 int tracing_is_enabled(void)
635 {
636         /*
637          * For quick access (irqsoff uses this in fast path), just
638          * return the mirror variable of the state of the ring buffer.
639          * It's a little racy, but we don't really care.
640          */
641         smp_rmb();
642         return !global_trace.buffer_disabled;
643 }
644
645 /*
646  * trace_buf_size is the size in bytes that is allocated
647  * for a buffer. Note, the number of bytes is always rounded
648  * to page size.
649  *
650  * This number is purposely set to a low number of 16384.
651  * If the dump on oops happens, it will be much appreciated
652  * to not have to wait for all that output. Anyway this can be
653  * boot time and run time configurable.
654  */
655 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
656
657 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
658
659 /* trace_types holds a link list of available tracers. */
660 static struct tracer            *trace_types __read_mostly;
661
662 /*
663  * trace_types_lock is used to protect the trace_types list.
664  */
665 DEFINE_MUTEX(trace_types_lock);
666
667 /*
668  * serialize the access of the ring buffer
669  *
670  * ring buffer serializes readers, but it is low level protection.
671  * The validity of the events (which returns by ring_buffer_peek() ..etc)
672  * are not protected by ring buffer.
673  *
674  * The content of events may become garbage if we allow other process consumes
675  * these events concurrently:
676  *   A) the page of the consumed events may become a normal page
677  *      (not reader page) in ring buffer, and this page will be rewrited
678  *      by events producer.
679  *   B) The page of the consumed events may become a page for splice_read,
680  *      and this page will be returned to system.
681  *
682  * These primitives allow multi process access to different cpu ring buffer
683  * concurrently.
684  *
685  * These primitives don't distinguish read-only and read-consume access.
686  * Multi read-only access are also serialized.
687  */
688
689 #ifdef CONFIG_SMP
690 static DECLARE_RWSEM(all_cpu_access_lock);
691 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
692
693 static inline void trace_access_lock(int cpu)
694 {
695         if (cpu == RING_BUFFER_ALL_CPUS) {
696                 /* gain it for accessing the whole ring buffer. */
697                 down_write(&all_cpu_access_lock);
698         } else {
699                 /* gain it for accessing a cpu ring buffer. */
700
701                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
702                 down_read(&all_cpu_access_lock);
703
704                 /* Secondly block other access to this @cpu ring buffer. */
705                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
706         }
707 }
708
709 static inline void trace_access_unlock(int cpu)
710 {
711         if (cpu == RING_BUFFER_ALL_CPUS) {
712                 up_write(&all_cpu_access_lock);
713         } else {
714                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
715                 up_read(&all_cpu_access_lock);
716         }
717 }
718
719 static inline void trace_access_lock_init(void)
720 {
721         int cpu;
722
723         for_each_possible_cpu(cpu)
724                 mutex_init(&per_cpu(cpu_access_lock, cpu));
725 }
726
727 #else
728
729 static DEFINE_MUTEX(access_lock);
730
731 static inline void trace_access_lock(int cpu)
732 {
733         (void)cpu;
734         mutex_lock(&access_lock);
735 }
736
737 static inline void trace_access_unlock(int cpu)
738 {
739         (void)cpu;
740         mutex_unlock(&access_lock);
741 }
742
743 static inline void trace_access_lock_init(void)
744 {
745 }
746
747 #endif
748
749 #ifdef CONFIG_STACKTRACE
750 static void __ftrace_trace_stack(struct trace_buffer *buffer,
751                                  unsigned long flags,
752                                  int skip, int pc, struct pt_regs *regs);
753 static inline void ftrace_trace_stack(struct trace_array *tr,
754                                       struct trace_buffer *buffer,
755                                       unsigned long flags,
756                                       int skip, int pc, struct pt_regs *regs);
757
758 #else
759 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
760                                         unsigned long flags,
761                                         int skip, int pc, struct pt_regs *regs)
762 {
763 }
764 static inline void ftrace_trace_stack(struct trace_array *tr,
765                                       struct trace_buffer *buffer,
766                                       unsigned long flags,
767                                       int skip, int pc, struct pt_regs *regs)
768 {
769 }
770
771 #endif
772
773 static __always_inline void
774 trace_event_setup(struct ring_buffer_event *event,
775                   int type, unsigned long flags, int pc)
776 {
777         struct trace_entry *ent = ring_buffer_event_data(event);
778
779         tracing_generic_entry_update(ent, type, flags, pc);
780 }
781
782 static __always_inline struct ring_buffer_event *
783 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
784                           int type,
785                           unsigned long len,
786                           unsigned long flags, int pc)
787 {
788         struct ring_buffer_event *event;
789
790         event = ring_buffer_lock_reserve(buffer, len);
791         if (event != NULL)
792                 trace_event_setup(event, type, flags, pc);
793
794         return event;
795 }
796
797 void tracer_tracing_on(struct trace_array *tr)
798 {
799         if (tr->array_buffer.buffer)
800                 ring_buffer_record_on(tr->array_buffer.buffer);
801         /*
802          * This flag is looked at when buffers haven't been allocated
803          * yet, or by some tracers (like irqsoff), that just want to
804          * know if the ring buffer has been disabled, but it can handle
805          * races of where it gets disabled but we still do a record.
806          * As the check is in the fast path of the tracers, it is more
807          * important to be fast than accurate.
808          */
809         tr->buffer_disabled = 0;
810         /* Make the flag seen by readers */
811         smp_wmb();
812 }
813
814 /**
815  * tracing_on - enable tracing buffers
816  *
817  * This function enables tracing buffers that may have been
818  * disabled with tracing_off.
819  */
820 void tracing_on(void)
821 {
822         tracer_tracing_on(&global_trace);
823 }
824 EXPORT_SYMBOL_GPL(tracing_on);
825
826
827 static __always_inline void
828 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
829 {
830         __this_cpu_write(trace_taskinfo_save, true);
831
832         /* If this is the temp buffer, we need to commit fully */
833         if (this_cpu_read(trace_buffered_event) == event) {
834                 /* Length is in event->array[0] */
835                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
836                 /* Release the temp buffer */
837                 this_cpu_dec(trace_buffered_event_cnt);
838         } else
839                 ring_buffer_unlock_commit(buffer, event);
840 }
841
842 /**
843  * __trace_puts - write a constant string into the trace buffer.
844  * @ip:    The address of the caller
845  * @str:   The constant string to write
846  * @size:  The size of the string.
847  */
848 int __trace_puts(unsigned long ip, const char *str, int size)
849 {
850         struct ring_buffer_event *event;
851         struct trace_buffer *buffer;
852         struct print_entry *entry;
853         unsigned long irq_flags;
854         int alloc;
855         int pc;
856
857         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
858                 return 0;
859
860         pc = preempt_count();
861
862         if (unlikely(tracing_selftest_running || tracing_disabled))
863                 return 0;
864
865         alloc = sizeof(*entry) + size + 2; /* possible \n added */
866
867         local_save_flags(irq_flags);
868         buffer = global_trace.array_buffer.buffer;
869         ring_buffer_nest_start(buffer);
870         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, 
871                                             irq_flags, pc);
872         if (!event) {
873                 size = 0;
874                 goto out;
875         }
876
877         entry = ring_buffer_event_data(event);
878         entry->ip = ip;
879
880         memcpy(&entry->buf, str, size);
881
882         /* Add a newline if necessary */
883         if (entry->buf[size - 1] != '\n') {
884                 entry->buf[size] = '\n';
885                 entry->buf[size + 1] = '\0';
886         } else
887                 entry->buf[size] = '\0';
888
889         __buffer_unlock_commit(buffer, event);
890         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
891  out:
892         ring_buffer_nest_end(buffer);
893         return size;
894 }
895 EXPORT_SYMBOL_GPL(__trace_puts);
896
897 /**
898  * __trace_bputs - write the pointer to a constant string into trace buffer
899  * @ip:    The address of the caller
900  * @str:   The constant string to write to the buffer to
901  */
902 int __trace_bputs(unsigned long ip, const char *str)
903 {
904         struct ring_buffer_event *event;
905         struct trace_buffer *buffer;
906         struct bputs_entry *entry;
907         unsigned long irq_flags;
908         int size = sizeof(struct bputs_entry);
909         int ret = 0;
910         int pc;
911
912         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
913                 return 0;
914
915         pc = preempt_count();
916
917         if (unlikely(tracing_selftest_running || tracing_disabled))
918                 return 0;
919
920         local_save_flags(irq_flags);
921         buffer = global_trace.array_buffer.buffer;
922
923         ring_buffer_nest_start(buffer);
924         event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
925                                             irq_flags, pc);
926         if (!event)
927                 goto out;
928
929         entry = ring_buffer_event_data(event);
930         entry->ip                       = ip;
931         entry->str                      = str;
932
933         __buffer_unlock_commit(buffer, event);
934         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
935
936         ret = 1;
937  out:
938         ring_buffer_nest_end(buffer);
939         return ret;
940 }
941 EXPORT_SYMBOL_GPL(__trace_bputs);
942
943 #ifdef CONFIG_TRACER_SNAPSHOT
944 void tracing_snapshot_instance_cond(struct trace_array *tr, void *cond_data)
945 {
946         struct tracer *tracer = tr->current_trace;
947         unsigned long flags;
948
949         if (in_nmi()) {
950                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
951                 internal_trace_puts("*** snapshot is being ignored        ***\n");
952                 return;
953         }
954
955         if (!tr->allocated_snapshot) {
956                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
957                 internal_trace_puts("*** stopping trace here!   ***\n");
958                 tracing_off();
959                 return;
960         }
961
962         /* Note, snapshot can not be used when the tracer uses it */
963         if (tracer->use_max_tr) {
964                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
965                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
966                 return;
967         }
968
969         local_irq_save(flags);
970         update_max_tr(tr, current, smp_processor_id(), cond_data);
971         local_irq_restore(flags);
972 }
973
974 void tracing_snapshot_instance(struct trace_array *tr)
975 {
976         tracing_snapshot_instance_cond(tr, NULL);
977 }
978
979 /**
980  * tracing_snapshot - take a snapshot of the current buffer.
981  *
982  * This causes a swap between the snapshot buffer and the current live
983  * tracing buffer. You can use this to take snapshots of the live
984  * trace when some condition is triggered, but continue to trace.
985  *
986  * Note, make sure to allocate the snapshot with either
987  * a tracing_snapshot_alloc(), or by doing it manually
988  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
989  *
990  * If the snapshot buffer is not allocated, it will stop tracing.
991  * Basically making a permanent snapshot.
992  */
993 void tracing_snapshot(void)
994 {
995         struct trace_array *tr = &global_trace;
996
997         tracing_snapshot_instance(tr);
998 }
999 EXPORT_SYMBOL_GPL(tracing_snapshot);
1000
1001 /**
1002  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1003  * @tr:         The tracing instance to snapshot
1004  * @cond_data:  The data to be tested conditionally, and possibly saved
1005  *
1006  * This is the same as tracing_snapshot() except that the snapshot is
1007  * conditional - the snapshot will only happen if the
1008  * cond_snapshot.update() implementation receiving the cond_data
1009  * returns true, which means that the trace array's cond_snapshot
1010  * update() operation used the cond_data to determine whether the
1011  * snapshot should be taken, and if it was, presumably saved it along
1012  * with the snapshot.
1013  */
1014 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1015 {
1016         tracing_snapshot_instance_cond(tr, cond_data);
1017 }
1018 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1019
1020 /**
1021  * tracing_snapshot_cond_data - get the user data associated with a snapshot
1022  * @tr:         The tracing instance
1023  *
1024  * When the user enables a conditional snapshot using
1025  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1026  * with the snapshot.  This accessor is used to retrieve it.
1027  *
1028  * Should not be called from cond_snapshot.update(), since it takes
1029  * the tr->max_lock lock, which the code calling
1030  * cond_snapshot.update() has already done.
1031  *
1032  * Returns the cond_data associated with the trace array's snapshot.
1033  */
1034 void *tracing_cond_snapshot_data(struct trace_array *tr)
1035 {
1036         void *cond_data = NULL;
1037
1038         arch_spin_lock(&tr->max_lock);
1039
1040         if (tr->cond_snapshot)
1041                 cond_data = tr->cond_snapshot->cond_data;
1042
1043         arch_spin_unlock(&tr->max_lock);
1044
1045         return cond_data;
1046 }
1047 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1048
1049 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1050                                         struct array_buffer *size_buf, int cpu_id);
1051 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1052
1053 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1054 {
1055         int ret;
1056
1057         if (!tr->allocated_snapshot) {
1058
1059                 /* allocate spare buffer */
1060                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
1061                                    &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1062                 if (ret < 0)
1063                         return ret;
1064
1065                 tr->allocated_snapshot = true;
1066         }
1067
1068         return 0;
1069 }
1070
1071 static void free_snapshot(struct trace_array *tr)
1072 {
1073         /*
1074          * We don't free the ring buffer. instead, resize it because
1075          * The max_tr ring buffer has some state (e.g. ring->clock) and
1076          * we want preserve it.
1077          */
1078         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1079         set_buffer_entries(&tr->max_buffer, 1);
1080         tracing_reset_online_cpus(&tr->max_buffer);
1081         tr->allocated_snapshot = false;
1082 }
1083
1084 /**
1085  * tracing_alloc_snapshot - allocate snapshot buffer.
1086  *
1087  * This only allocates the snapshot buffer if it isn't already
1088  * allocated - it doesn't also take a snapshot.
1089  *
1090  * This is meant to be used in cases where the snapshot buffer needs
1091  * to be set up for events that can't sleep but need to be able to
1092  * trigger a snapshot.
1093  */
1094 int tracing_alloc_snapshot(void)
1095 {
1096         struct trace_array *tr = &global_trace;
1097         int ret;
1098
1099         ret = tracing_alloc_snapshot_instance(tr);
1100         WARN_ON(ret < 0);
1101
1102         return ret;
1103 }
1104 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1105
1106 /**
1107  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1108  *
1109  * This is similar to tracing_snapshot(), but it will allocate the
1110  * snapshot buffer if it isn't already allocated. Use this only
1111  * where it is safe to sleep, as the allocation may sleep.
1112  *
1113  * This causes a swap between the snapshot buffer and the current live
1114  * tracing buffer. You can use this to take snapshots of the live
1115  * trace when some condition is triggered, but continue to trace.
1116  */
1117 void tracing_snapshot_alloc(void)
1118 {
1119         int ret;
1120
1121         ret = tracing_alloc_snapshot();
1122         if (ret < 0)
1123                 return;
1124
1125         tracing_snapshot();
1126 }
1127 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1128
1129 /**
1130  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1131  * @tr:         The tracing instance
1132  * @cond_data:  User data to associate with the snapshot
1133  * @update:     Implementation of the cond_snapshot update function
1134  *
1135  * Check whether the conditional snapshot for the given instance has
1136  * already been enabled, or if the current tracer is already using a
1137  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1138  * save the cond_data and update function inside.
1139  *
1140  * Returns 0 if successful, error otherwise.
1141  */
1142 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1143                                  cond_update_fn_t update)
1144 {
1145         struct cond_snapshot *cond_snapshot;
1146         int ret = 0;
1147
1148         cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1149         if (!cond_snapshot)
1150                 return -ENOMEM;
1151
1152         cond_snapshot->cond_data = cond_data;
1153         cond_snapshot->update = update;
1154
1155         mutex_lock(&trace_types_lock);
1156
1157         ret = tracing_alloc_snapshot_instance(tr);
1158         if (ret)
1159                 goto fail_unlock;
1160
1161         if (tr->current_trace->use_max_tr) {
1162                 ret = -EBUSY;
1163                 goto fail_unlock;
1164         }
1165
1166         /*
1167          * The cond_snapshot can only change to NULL without the
1168          * trace_types_lock. We don't care if we race with it going
1169          * to NULL, but we want to make sure that it's not set to
1170          * something other than NULL when we get here, which we can
1171          * do safely with only holding the trace_types_lock and not
1172          * having to take the max_lock.
1173          */
1174         if (tr->cond_snapshot) {
1175                 ret = -EBUSY;
1176                 goto fail_unlock;
1177         }
1178
1179         arch_spin_lock(&tr->max_lock);
1180         tr->cond_snapshot = cond_snapshot;
1181         arch_spin_unlock(&tr->max_lock);
1182
1183         mutex_unlock(&trace_types_lock);
1184
1185         return ret;
1186
1187  fail_unlock:
1188         mutex_unlock(&trace_types_lock);
1189         kfree(cond_snapshot);
1190         return ret;
1191 }
1192 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1193
1194 /**
1195  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1196  * @tr:         The tracing instance
1197  *
1198  * Check whether the conditional snapshot for the given instance is
1199  * enabled; if so, free the cond_snapshot associated with it,
1200  * otherwise return -EINVAL.
1201  *
1202  * Returns 0 if successful, error otherwise.
1203  */
1204 int tracing_snapshot_cond_disable(struct trace_array *tr)
1205 {
1206         int ret = 0;
1207
1208         arch_spin_lock(&tr->max_lock);
1209
1210         if (!tr->cond_snapshot)
1211                 ret = -EINVAL;
1212         else {
1213                 kfree(tr->cond_snapshot);
1214                 tr->cond_snapshot = NULL;
1215         }
1216
1217         arch_spin_unlock(&tr->max_lock);
1218
1219         return ret;
1220 }
1221 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1222 #else
1223 void tracing_snapshot(void)
1224 {
1225         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1226 }
1227 EXPORT_SYMBOL_GPL(tracing_snapshot);
1228 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1229 {
1230         WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1231 }
1232 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1233 int tracing_alloc_snapshot(void)
1234 {
1235         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1236         return -ENODEV;
1237 }
1238 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1239 void tracing_snapshot_alloc(void)
1240 {
1241         /* Give warning */
1242         tracing_snapshot();
1243 }
1244 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1245 void *tracing_cond_snapshot_data(struct trace_array *tr)
1246 {
1247         return NULL;
1248 }
1249 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1250 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1251 {
1252         return -ENODEV;
1253 }
1254 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1255 int tracing_snapshot_cond_disable(struct trace_array *tr)
1256 {
1257         return false;
1258 }
1259 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1260 #endif /* CONFIG_TRACER_SNAPSHOT */
1261
1262 void tracer_tracing_off(struct trace_array *tr)
1263 {
1264         if (tr->array_buffer.buffer)
1265                 ring_buffer_record_off(tr->array_buffer.buffer);
1266         /*
1267          * This flag is looked at when buffers haven't been allocated
1268          * yet, or by some tracers (like irqsoff), that just want to
1269          * know if the ring buffer has been disabled, but it can handle
1270          * races of where it gets disabled but we still do a record.
1271          * As the check is in the fast path of the tracers, it is more
1272          * important to be fast than accurate.
1273          */
1274         tr->buffer_disabled = 1;
1275         /* Make the flag seen by readers */
1276         smp_wmb();
1277 }
1278
1279 /**
1280  * tracing_off - turn off tracing buffers
1281  *
1282  * This function stops the tracing buffers from recording data.
1283  * It does not disable any overhead the tracers themselves may
1284  * be causing. This function simply causes all recording to
1285  * the ring buffers to fail.
1286  */
1287 void tracing_off(void)
1288 {
1289         tracer_tracing_off(&global_trace);
1290 }
1291 EXPORT_SYMBOL_GPL(tracing_off);
1292
1293 void disable_trace_on_warning(void)
1294 {
1295         if (__disable_trace_on_warning)
1296                 tracing_off();
1297 }
1298
1299 /**
1300  * tracer_tracing_is_on - show real state of ring buffer enabled
1301  * @tr : the trace array to know if ring buffer is enabled
1302  *
1303  * Shows real state of the ring buffer if it is enabled or not.
1304  */
1305 bool tracer_tracing_is_on(struct trace_array *tr)
1306 {
1307         if (tr->array_buffer.buffer)
1308                 return ring_buffer_record_is_on(tr->array_buffer.buffer);
1309         return !tr->buffer_disabled;
1310 }
1311
1312 /**
1313  * tracing_is_on - show state of ring buffers enabled
1314  */
1315 int tracing_is_on(void)
1316 {
1317         return tracer_tracing_is_on(&global_trace);
1318 }
1319 EXPORT_SYMBOL_GPL(tracing_is_on);
1320
1321 static int __init set_buf_size(char *str)
1322 {
1323         unsigned long buf_size;
1324
1325         if (!str)
1326                 return 0;
1327         buf_size = memparse(str, &str);
1328         /* nr_entries can not be zero */
1329         if (buf_size == 0)
1330                 return 0;
1331         trace_buf_size = buf_size;
1332         return 1;
1333 }
1334 __setup("trace_buf_size=", set_buf_size);
1335
1336 static int __init set_tracing_thresh(char *str)
1337 {
1338         unsigned long threshold;
1339         int ret;
1340
1341         if (!str)
1342                 return 0;
1343         ret = kstrtoul(str, 0, &threshold);
1344         if (ret < 0)
1345                 return 0;
1346         tracing_thresh = threshold * 1000;
1347         return 1;
1348 }
1349 __setup("tracing_thresh=", set_tracing_thresh);
1350
1351 unsigned long nsecs_to_usecs(unsigned long nsecs)
1352 {
1353         return nsecs / 1000;
1354 }
1355
1356 /*
1357  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1358  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1359  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1360  * of strings in the order that the evals (enum) were defined.
1361  */
1362 #undef C
1363 #define C(a, b) b
1364
1365 /* These must match the bit postions in trace_iterator_flags */
1366 static const char *trace_options[] = {
1367         TRACE_FLAGS
1368         NULL
1369 };
1370
1371 static struct {
1372         u64 (*func)(void);
1373         const char *name;
1374         int in_ns;              /* is this clock in nanoseconds? */
1375 } trace_clocks[] = {
1376         { trace_clock_local,            "local",        1 },
1377         { trace_clock_global,           "global",       1 },
1378         { trace_clock_counter,          "counter",      0 },
1379         { trace_clock_jiffies,          "uptime",       0 },
1380         { trace_clock,                  "perf",         1 },
1381         { ktime_get_mono_fast_ns,       "mono",         1 },
1382         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1383         { ktime_get_boot_fast_ns,       "boot",         1 },
1384         ARCH_TRACE_CLOCKS
1385 };
1386
1387 bool trace_clock_in_ns(struct trace_array *tr)
1388 {
1389         if (trace_clocks[tr->clock_id].in_ns)
1390                 return true;
1391
1392         return false;
1393 }
1394
1395 /*
1396  * trace_parser_get_init - gets the buffer for trace parser
1397  */
1398 int trace_parser_get_init(struct trace_parser *parser, int size)
1399 {
1400         memset(parser, 0, sizeof(*parser));
1401
1402         parser->buffer = kmalloc(size, GFP_KERNEL);
1403         if (!parser->buffer)
1404                 return 1;
1405
1406         parser->size = size;
1407         return 0;
1408 }
1409
1410 /*
1411  * trace_parser_put - frees the buffer for trace parser
1412  */
1413 void trace_parser_put(struct trace_parser *parser)
1414 {
1415         kfree(parser->buffer);
1416         parser->buffer = NULL;
1417 }
1418
1419 /*
1420  * trace_get_user - reads the user input string separated by  space
1421  * (matched by isspace(ch))
1422  *
1423  * For each string found the 'struct trace_parser' is updated,
1424  * and the function returns.
1425  *
1426  * Returns number of bytes read.
1427  *
1428  * See kernel/trace/trace.h for 'struct trace_parser' details.
1429  */
1430 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1431         size_t cnt, loff_t *ppos)
1432 {
1433         char ch;
1434         size_t read = 0;
1435         ssize_t ret;
1436
1437         if (!*ppos)
1438                 trace_parser_clear(parser);
1439
1440         ret = get_user(ch, ubuf++);
1441         if (ret)
1442                 goto out;
1443
1444         read++;
1445         cnt--;
1446
1447         /*
1448          * The parser is not finished with the last write,
1449          * continue reading the user input without skipping spaces.
1450          */
1451         if (!parser->cont) {
1452                 /* skip white space */
1453                 while (cnt && isspace(ch)) {
1454                         ret = get_user(ch, ubuf++);
1455                         if (ret)
1456                                 goto out;
1457                         read++;
1458                         cnt--;
1459                 }
1460
1461                 parser->idx = 0;
1462
1463                 /* only spaces were written */
1464                 if (isspace(ch) || !ch) {
1465                         *ppos += read;
1466                         ret = read;
1467                         goto out;
1468                 }
1469         }
1470
1471         /* read the non-space input */
1472         while (cnt && !isspace(ch) && ch) {
1473                 if (parser->idx < parser->size - 1)
1474                         parser->buffer[parser->idx++] = ch;
1475                 else {
1476                         ret = -EINVAL;
1477                         goto out;
1478                 }
1479                 ret = get_user(ch, ubuf++);
1480                 if (ret)
1481                         goto out;
1482                 read++;
1483                 cnt--;
1484         }
1485
1486         /* We either got finished input or we have to wait for another call. */
1487         if (isspace(ch) || !ch) {
1488                 parser->buffer[parser->idx] = 0;
1489                 parser->cont = false;
1490         } else if (parser->idx < parser->size - 1) {
1491                 parser->cont = true;
1492                 parser->buffer[parser->idx++] = ch;
1493                 /* Make sure the parsed string always terminates with '\0'. */
1494                 parser->buffer[parser->idx] = 0;
1495         } else {
1496                 ret = -EINVAL;
1497                 goto out;
1498         }
1499
1500         *ppos += read;
1501         ret = read;
1502
1503 out:
1504         return ret;
1505 }
1506
1507 /* TODO add a seq_buf_to_buffer() */
1508 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1509 {
1510         int len;
1511
1512         if (trace_seq_used(s) <= s->seq.readpos)
1513                 return -EBUSY;
1514
1515         len = trace_seq_used(s) - s->seq.readpos;
1516         if (cnt > len)
1517                 cnt = len;
1518         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1519
1520         s->seq.readpos += cnt;
1521         return cnt;
1522 }
1523
1524 unsigned long __read_mostly     tracing_thresh;
1525 static const struct file_operations tracing_max_lat_fops;
1526
1527 #if (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1528         defined(CONFIG_FSNOTIFY)
1529
1530 static struct workqueue_struct *fsnotify_wq;
1531
1532 static void latency_fsnotify_workfn(struct work_struct *work)
1533 {
1534         struct trace_array *tr = container_of(work, struct trace_array,
1535                                               fsnotify_work);
1536         fsnotify(tr->d_max_latency->d_inode, FS_MODIFY,
1537                  tr->d_max_latency->d_inode, FSNOTIFY_EVENT_INODE, NULL, 0);
1538 }
1539
1540 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1541 {
1542         struct trace_array *tr = container_of(iwork, struct trace_array,
1543                                               fsnotify_irqwork);
1544         queue_work(fsnotify_wq, &tr->fsnotify_work);
1545 }
1546
1547 static void trace_create_maxlat_file(struct trace_array *tr,
1548                                      struct dentry *d_tracer)
1549 {
1550         INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1551         init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1552         tr->d_max_latency = trace_create_file("tracing_max_latency", 0644,
1553                                               d_tracer, &tr->max_latency,
1554                                               &tracing_max_lat_fops);
1555 }
1556
1557 __init static int latency_fsnotify_init(void)
1558 {
1559         fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1560                                       WQ_UNBOUND | WQ_HIGHPRI, 0);
1561         if (!fsnotify_wq) {
1562                 pr_err("Unable to allocate tr_max_lat_wq\n");
1563                 return -ENOMEM;
1564         }
1565         return 0;
1566 }
1567
1568 late_initcall_sync(latency_fsnotify_init);
1569
1570 void latency_fsnotify(struct trace_array *tr)
1571 {
1572         if (!fsnotify_wq)
1573                 return;
1574         /*
1575          * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1576          * possible that we are called from __schedule() or do_idle(), which
1577          * could cause a deadlock.
1578          */
1579         irq_work_queue(&tr->fsnotify_irqwork);
1580 }
1581
1582 /*
1583  * (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1584  *  defined(CONFIG_FSNOTIFY)
1585  */
1586 #else
1587
1588 #define trace_create_maxlat_file(tr, d_tracer)                          \
1589         trace_create_file("tracing_max_latency", 0644, d_tracer,        \
1590                           &tr->max_latency, &tracing_max_lat_fops)
1591
1592 #endif
1593
1594 #ifdef CONFIG_TRACER_MAX_TRACE
1595 /*
1596  * Copy the new maximum trace into the separate maximum-trace
1597  * structure. (this way the maximum trace is permanently saved,
1598  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1599  */
1600 static void
1601 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1602 {
1603         struct array_buffer *trace_buf = &tr->array_buffer;
1604         struct array_buffer *max_buf = &tr->max_buffer;
1605         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1606         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1607
1608         max_buf->cpu = cpu;
1609         max_buf->time_start = data->preempt_timestamp;
1610
1611         max_data->saved_latency = tr->max_latency;
1612         max_data->critical_start = data->critical_start;
1613         max_data->critical_end = data->critical_end;
1614
1615         strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1616         max_data->pid = tsk->pid;
1617         /*
1618          * If tsk == current, then use current_uid(), as that does not use
1619          * RCU. The irq tracer can be called out of RCU scope.
1620          */
1621         if (tsk == current)
1622                 max_data->uid = current_uid();
1623         else
1624                 max_data->uid = task_uid(tsk);
1625
1626         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1627         max_data->policy = tsk->policy;
1628         max_data->rt_priority = tsk->rt_priority;
1629
1630         /* record this tasks comm */
1631         tracing_record_cmdline(tsk);
1632         latency_fsnotify(tr);
1633 }
1634
1635 /**
1636  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1637  * @tr: tracer
1638  * @tsk: the task with the latency
1639  * @cpu: The cpu that initiated the trace.
1640  * @cond_data: User data associated with a conditional snapshot
1641  *
1642  * Flip the buffers between the @tr and the max_tr and record information
1643  * about which task was the cause of this latency.
1644  */
1645 void
1646 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1647               void *cond_data)
1648 {
1649         if (tr->stop_count)
1650                 return;
1651
1652         WARN_ON_ONCE(!irqs_disabled());
1653
1654         if (!tr->allocated_snapshot) {
1655                 /* Only the nop tracer should hit this when disabling */
1656                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1657                 return;
1658         }
1659
1660         arch_spin_lock(&tr->max_lock);
1661
1662         /* Inherit the recordable setting from array_buffer */
1663         if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1664                 ring_buffer_record_on(tr->max_buffer.buffer);
1665         else
1666                 ring_buffer_record_off(tr->max_buffer.buffer);
1667
1668 #ifdef CONFIG_TRACER_SNAPSHOT
1669         if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1670                 goto out_unlock;
1671 #endif
1672         swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1673
1674         __update_max_tr(tr, tsk, cpu);
1675
1676  out_unlock:
1677         arch_spin_unlock(&tr->max_lock);
1678 }
1679
1680 /**
1681  * update_max_tr_single - only copy one trace over, and reset the rest
1682  * @tr: tracer
1683  * @tsk: task with the latency
1684  * @cpu: the cpu of the buffer to copy.
1685  *
1686  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1687  */
1688 void
1689 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1690 {
1691         int ret;
1692
1693         if (tr->stop_count)
1694                 return;
1695
1696         WARN_ON_ONCE(!irqs_disabled());
1697         if (!tr->allocated_snapshot) {
1698                 /* Only the nop tracer should hit this when disabling */
1699                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1700                 return;
1701         }
1702
1703         arch_spin_lock(&tr->max_lock);
1704
1705         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1706
1707         if (ret == -EBUSY) {
1708                 /*
1709                  * We failed to swap the buffer due to a commit taking
1710                  * place on this CPU. We fail to record, but we reset
1711                  * the max trace buffer (no one writes directly to it)
1712                  * and flag that it failed.
1713                  */
1714                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1715                         "Failed to swap buffers due to commit in progress\n");
1716         }
1717
1718         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1719
1720         __update_max_tr(tr, tsk, cpu);
1721         arch_spin_unlock(&tr->max_lock);
1722 }
1723 #endif /* CONFIG_TRACER_MAX_TRACE */
1724
1725 static int wait_on_pipe(struct trace_iterator *iter, int full)
1726 {
1727         /* Iterators are static, they should be filled or empty */
1728         if (trace_buffer_iter(iter, iter->cpu_file))
1729                 return 0;
1730
1731         return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file,
1732                                 full);
1733 }
1734
1735 #ifdef CONFIG_FTRACE_STARTUP_TEST
1736 static bool selftests_can_run;
1737
1738 struct trace_selftests {
1739         struct list_head                list;
1740         struct tracer                   *type;
1741 };
1742
1743 static LIST_HEAD(postponed_selftests);
1744
1745 static int save_selftest(struct tracer *type)
1746 {
1747         struct trace_selftests *selftest;
1748
1749         selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1750         if (!selftest)
1751                 return -ENOMEM;
1752
1753         selftest->type = type;
1754         list_add(&selftest->list, &postponed_selftests);
1755         return 0;
1756 }
1757
1758 static int run_tracer_selftest(struct tracer *type)
1759 {
1760         struct trace_array *tr = &global_trace;
1761         struct tracer *saved_tracer = tr->current_trace;
1762         int ret;
1763
1764         if (!type->selftest || tracing_selftest_disabled)
1765                 return 0;
1766
1767         /*
1768          * If a tracer registers early in boot up (before scheduling is
1769          * initialized and such), then do not run its selftests yet.
1770          * Instead, run it a little later in the boot process.
1771          */
1772         if (!selftests_can_run)
1773                 return save_selftest(type);
1774
1775         /*
1776          * Run a selftest on this tracer.
1777          * Here we reset the trace buffer, and set the current
1778          * tracer to be this tracer. The tracer can then run some
1779          * internal tracing to verify that everything is in order.
1780          * If we fail, we do not register this tracer.
1781          */
1782         tracing_reset_online_cpus(&tr->array_buffer);
1783
1784         tr->current_trace = type;
1785
1786 #ifdef CONFIG_TRACER_MAX_TRACE
1787         if (type->use_max_tr) {
1788                 /* If we expanded the buffers, make sure the max is expanded too */
1789                 if (ring_buffer_expanded)
1790                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1791                                            RING_BUFFER_ALL_CPUS);
1792                 tr->allocated_snapshot = true;
1793         }
1794 #endif
1795
1796         /* the test is responsible for initializing and enabling */
1797         pr_info("Testing tracer %s: ", type->name);
1798         ret = type->selftest(type, tr);
1799         /* the test is responsible for resetting too */
1800         tr->current_trace = saved_tracer;
1801         if (ret) {
1802                 printk(KERN_CONT "FAILED!\n");
1803                 /* Add the warning after printing 'FAILED' */
1804                 WARN_ON(1);
1805                 return -1;
1806         }
1807         /* Only reset on passing, to avoid touching corrupted buffers */
1808         tracing_reset_online_cpus(&tr->array_buffer);
1809
1810 #ifdef CONFIG_TRACER_MAX_TRACE
1811         if (type->use_max_tr) {
1812                 tr->allocated_snapshot = false;
1813
1814                 /* Shrink the max buffer again */
1815                 if (ring_buffer_expanded)
1816                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1817                                            RING_BUFFER_ALL_CPUS);
1818         }
1819 #endif
1820
1821         printk(KERN_CONT "PASSED\n");
1822         return 0;
1823 }
1824
1825 static __init int init_trace_selftests(void)
1826 {
1827         struct trace_selftests *p, *n;
1828         struct tracer *t, **last;
1829         int ret;
1830
1831         selftests_can_run = true;
1832
1833         mutex_lock(&trace_types_lock);
1834
1835         if (list_empty(&postponed_selftests))
1836                 goto out;
1837
1838         pr_info("Running postponed tracer tests:\n");
1839
1840         tracing_selftest_running = true;
1841         list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1842                 /* This loop can take minutes when sanitizers are enabled, so
1843                  * lets make sure we allow RCU processing.
1844                  */
1845                 cond_resched();
1846                 ret = run_tracer_selftest(p->type);
1847                 /* If the test fails, then warn and remove from available_tracers */
1848                 if (ret < 0) {
1849                         WARN(1, "tracer: %s failed selftest, disabling\n",
1850                              p->type->name);
1851                         last = &trace_types;
1852                         for (t = trace_types; t; t = t->next) {
1853                                 if (t == p->type) {
1854                                         *last = t->next;
1855                                         break;
1856                                 }
1857                                 last = &t->next;
1858                         }
1859                 }
1860                 list_del(&p->list);
1861                 kfree(p);
1862         }
1863         tracing_selftest_running = false;
1864
1865  out:
1866         mutex_unlock(&trace_types_lock);
1867
1868         return 0;
1869 }
1870 core_initcall(init_trace_selftests);
1871 #else
1872 static inline int run_tracer_selftest(struct tracer *type)
1873 {
1874         return 0;
1875 }
1876 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1877
1878 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1879
1880 static void __init apply_trace_boot_options(void);
1881
1882 /**
1883  * register_tracer - register a tracer with the ftrace system.
1884  * @type: the plugin for the tracer
1885  *
1886  * Register a new plugin tracer.
1887  */
1888 int __init register_tracer(struct tracer *type)
1889 {
1890         struct tracer *t;
1891         int ret = 0;
1892
1893         if (!type->name) {
1894                 pr_info("Tracer must have a name\n");
1895                 return -1;
1896         }
1897
1898         if (strlen(type->name) >= MAX_TRACER_SIZE) {
1899                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1900                 return -1;
1901         }
1902
1903         if (security_locked_down(LOCKDOWN_TRACEFS)) {
1904                 pr_warn("Can not register tracer %s due to lockdown\n",
1905                            type->name);
1906                 return -EPERM;
1907         }
1908
1909         mutex_lock(&trace_types_lock);
1910
1911         tracing_selftest_running = true;
1912
1913         for (t = trace_types; t; t = t->next) {
1914                 if (strcmp(type->name, t->name) == 0) {
1915                         /* already found */
1916                         pr_info("Tracer %s already registered\n",
1917                                 type->name);
1918                         ret = -1;
1919                         goto out;
1920                 }
1921         }
1922
1923         if (!type->set_flag)
1924                 type->set_flag = &dummy_set_flag;
1925         if (!type->flags) {
1926                 /*allocate a dummy tracer_flags*/
1927                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1928                 if (!type->flags) {
1929                         ret = -ENOMEM;
1930                         goto out;
1931                 }
1932                 type->flags->val = 0;
1933                 type->flags->opts = dummy_tracer_opt;
1934         } else
1935                 if (!type->flags->opts)
1936                         type->flags->opts = dummy_tracer_opt;
1937
1938         /* store the tracer for __set_tracer_option */
1939         type->flags->trace = type;
1940
1941         ret = run_tracer_selftest(type);
1942         if (ret < 0)
1943                 goto out;
1944
1945         type->next = trace_types;
1946         trace_types = type;
1947         add_tracer_options(&global_trace, type);
1948
1949  out:
1950         tracing_selftest_running = false;
1951         mutex_unlock(&trace_types_lock);
1952
1953         if (ret || !default_bootup_tracer)
1954                 goto out_unlock;
1955
1956         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1957                 goto out_unlock;
1958
1959         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1960         /* Do we want this tracer to start on bootup? */
1961         tracing_set_tracer(&global_trace, type->name);
1962         default_bootup_tracer = NULL;
1963
1964         apply_trace_boot_options();
1965
1966         /* disable other selftests, since this will break it. */
1967         tracing_selftest_disabled = true;
1968 #ifdef CONFIG_FTRACE_STARTUP_TEST
1969         printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1970                type->name);
1971 #endif
1972
1973  out_unlock:
1974         return ret;
1975 }
1976
1977 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
1978 {
1979         struct trace_buffer *buffer = buf->buffer;
1980
1981         if (!buffer)
1982                 return;
1983
1984         ring_buffer_record_disable(buffer);
1985
1986         /* Make sure all commits have finished */
1987         synchronize_rcu();
1988         ring_buffer_reset_cpu(buffer, cpu);
1989
1990         ring_buffer_record_enable(buffer);
1991 }
1992
1993 void tracing_reset_online_cpus(struct array_buffer *buf)
1994 {
1995         struct trace_buffer *buffer = buf->buffer;
1996         int cpu;
1997
1998         if (!buffer)
1999                 return;
2000
2001         ring_buffer_record_disable(buffer);
2002
2003         /* Make sure all commits have finished */
2004         synchronize_rcu();
2005
2006         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2007
2008         for_each_online_cpu(cpu)
2009                 ring_buffer_reset_cpu(buffer, cpu);
2010
2011         ring_buffer_record_enable(buffer);
2012 }
2013
2014 /* Must have trace_types_lock held */
2015 void tracing_reset_all_online_cpus(void)
2016 {
2017         struct trace_array *tr;
2018
2019         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2020                 if (!tr->clear_trace)
2021                         continue;
2022                 tr->clear_trace = false;
2023                 tracing_reset_online_cpus(&tr->array_buffer);
2024 #ifdef CONFIG_TRACER_MAX_TRACE
2025                 tracing_reset_online_cpus(&tr->max_buffer);
2026 #endif
2027         }
2028 }
2029
2030 static int *tgid_map;
2031
2032 #define SAVED_CMDLINES_DEFAULT 128
2033 #define NO_CMDLINE_MAP UINT_MAX
2034 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2035 struct saved_cmdlines_buffer {
2036         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2037         unsigned *map_cmdline_to_pid;
2038         unsigned cmdline_num;
2039         int cmdline_idx;
2040         char *saved_cmdlines;
2041 };
2042 static struct saved_cmdlines_buffer *savedcmd;
2043
2044 /* temporary disable recording */
2045 static atomic_t trace_record_taskinfo_disabled __read_mostly;
2046
2047 static inline char *get_saved_cmdlines(int idx)
2048 {
2049         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2050 }
2051
2052 static inline void set_cmdline(int idx, const char *cmdline)
2053 {
2054         strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2055 }
2056
2057 static int allocate_cmdlines_buffer(unsigned int val,
2058                                     struct saved_cmdlines_buffer *s)
2059 {
2060         s->map_cmdline_to_pid = kmalloc_array(val,
2061                                               sizeof(*s->map_cmdline_to_pid),
2062                                               GFP_KERNEL);
2063         if (!s->map_cmdline_to_pid)
2064                 return -ENOMEM;
2065
2066         s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2067         if (!s->saved_cmdlines) {
2068                 kfree(s->map_cmdline_to_pid);
2069                 return -ENOMEM;
2070         }
2071
2072         s->cmdline_idx = 0;
2073         s->cmdline_num = val;
2074         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2075                sizeof(s->map_pid_to_cmdline));
2076         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2077                val * sizeof(*s->map_cmdline_to_pid));
2078
2079         return 0;
2080 }
2081
2082 static int trace_create_savedcmd(void)
2083 {
2084         int ret;
2085
2086         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2087         if (!savedcmd)
2088                 return -ENOMEM;
2089
2090         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2091         if (ret < 0) {
2092                 kfree(savedcmd);
2093                 savedcmd = NULL;
2094                 return -ENOMEM;
2095         }
2096
2097         return 0;
2098 }
2099
2100 int is_tracing_stopped(void)
2101 {
2102         return global_trace.stop_count;
2103 }
2104
2105 /**
2106  * tracing_start - quick start of the tracer
2107  *
2108  * If tracing is enabled but was stopped by tracing_stop,
2109  * this will start the tracer back up.
2110  */
2111 void tracing_start(void)
2112 {
2113         struct trace_buffer *buffer;
2114         unsigned long flags;
2115
2116         if (tracing_disabled)
2117                 return;
2118
2119         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2120         if (--global_trace.stop_count) {
2121                 if (global_trace.stop_count < 0) {
2122                         /* Someone screwed up their debugging */
2123                         WARN_ON_ONCE(1);
2124                         global_trace.stop_count = 0;
2125                 }
2126                 goto out;
2127         }
2128
2129         /* Prevent the buffers from switching */
2130         arch_spin_lock(&global_trace.max_lock);
2131
2132         buffer = global_trace.array_buffer.buffer;
2133         if (buffer)
2134                 ring_buffer_record_enable(buffer);
2135
2136 #ifdef CONFIG_TRACER_MAX_TRACE
2137         buffer = global_trace.max_buffer.buffer;
2138         if (buffer)
2139                 ring_buffer_record_enable(buffer);
2140 #endif
2141
2142         arch_spin_unlock(&global_trace.max_lock);
2143
2144  out:
2145         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2146 }
2147
2148 static void tracing_start_tr(struct trace_array *tr)
2149 {
2150         struct trace_buffer *buffer;
2151         unsigned long flags;
2152
2153         if (tracing_disabled)
2154                 return;
2155
2156         /* If global, we need to also start the max tracer */
2157         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2158                 return tracing_start();
2159
2160         raw_spin_lock_irqsave(&tr->start_lock, flags);
2161
2162         if (--tr->stop_count) {
2163                 if (tr->stop_count < 0) {
2164                         /* Someone screwed up their debugging */
2165                         WARN_ON_ONCE(1);
2166                         tr->stop_count = 0;
2167                 }
2168                 goto out;
2169         }
2170
2171         buffer = tr->array_buffer.buffer;
2172         if (buffer)
2173                 ring_buffer_record_enable(buffer);
2174
2175  out:
2176         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2177 }
2178
2179 /**
2180  * tracing_stop - quick stop of the tracer
2181  *
2182  * Light weight way to stop tracing. Use in conjunction with
2183  * tracing_start.
2184  */
2185 void tracing_stop(void)
2186 {
2187         struct trace_buffer *buffer;
2188         unsigned long flags;
2189
2190         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2191         if (global_trace.stop_count++)
2192                 goto out;
2193
2194         /* Prevent the buffers from switching */
2195         arch_spin_lock(&global_trace.max_lock);
2196
2197         buffer = global_trace.array_buffer.buffer;
2198         if (buffer)
2199                 ring_buffer_record_disable(buffer);
2200
2201 #ifdef CONFIG_TRACER_MAX_TRACE
2202         buffer = global_trace.max_buffer.buffer;
2203         if (buffer)
2204                 ring_buffer_record_disable(buffer);
2205 #endif
2206
2207         arch_spin_unlock(&global_trace.max_lock);
2208
2209  out:
2210         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2211 }
2212
2213 static void tracing_stop_tr(struct trace_array *tr)
2214 {
2215         struct trace_buffer *buffer;
2216         unsigned long flags;
2217
2218         /* If global, we need to also stop the max tracer */
2219         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2220                 return tracing_stop();
2221
2222         raw_spin_lock_irqsave(&tr->start_lock, flags);
2223         if (tr->stop_count++)
2224                 goto out;
2225
2226         buffer = tr->array_buffer.buffer;
2227         if (buffer)
2228                 ring_buffer_record_disable(buffer);
2229
2230  out:
2231         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2232 }
2233
2234 static int trace_save_cmdline(struct task_struct *tsk)
2235 {
2236         unsigned pid, idx;
2237
2238         /* treat recording of idle task as a success */
2239         if (!tsk->pid)
2240                 return 1;
2241
2242         if (unlikely(tsk->pid > PID_MAX_DEFAULT))
2243                 return 0;
2244
2245         /*
2246          * It's not the end of the world if we don't get
2247          * the lock, but we also don't want to spin
2248          * nor do we want to disable interrupts,
2249          * so if we miss here, then better luck next time.
2250          */
2251         if (!arch_spin_trylock(&trace_cmdline_lock))
2252                 return 0;
2253
2254         idx = savedcmd->map_pid_to_cmdline[tsk->pid];
2255         if (idx == NO_CMDLINE_MAP) {
2256                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2257
2258                 /*
2259                  * Check whether the cmdline buffer at idx has a pid
2260                  * mapped. We are going to overwrite that entry so we
2261                  * need to clear the map_pid_to_cmdline. Otherwise we
2262                  * would read the new comm for the old pid.
2263                  */
2264                 pid = savedcmd->map_cmdline_to_pid[idx];
2265                 if (pid != NO_CMDLINE_MAP)
2266                         savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
2267
2268                 savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2269                 savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
2270
2271                 savedcmd->cmdline_idx = idx;
2272         }
2273
2274         set_cmdline(idx, tsk->comm);
2275
2276         arch_spin_unlock(&trace_cmdline_lock);
2277
2278         return 1;
2279 }
2280
2281 static void __trace_find_cmdline(int pid, char comm[])
2282 {
2283         unsigned map;
2284
2285         if (!pid) {
2286                 strcpy(comm, "<idle>");
2287                 return;
2288         }
2289
2290         if (WARN_ON_ONCE(pid < 0)) {
2291                 strcpy(comm, "<XXX>");
2292                 return;
2293         }
2294
2295         if (pid > PID_MAX_DEFAULT) {
2296                 strcpy(comm, "<...>");
2297                 return;
2298         }
2299
2300         map = savedcmd->map_pid_to_cmdline[pid];
2301         if (map != NO_CMDLINE_MAP)
2302                 strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2303         else
2304                 strcpy(comm, "<...>");
2305 }
2306
2307 void trace_find_cmdline(int pid, char comm[])
2308 {
2309         preempt_disable();
2310         arch_spin_lock(&trace_cmdline_lock);
2311
2312         __trace_find_cmdline(pid, comm);
2313
2314         arch_spin_unlock(&trace_cmdline_lock);
2315         preempt_enable();
2316 }
2317
2318 int trace_find_tgid(int pid)
2319 {
2320         if (unlikely(!tgid_map || !pid || pid > PID_MAX_DEFAULT))
2321                 return 0;
2322
2323         return tgid_map[pid];
2324 }
2325
2326 static int trace_save_tgid(struct task_struct *tsk)
2327 {
2328         /* treat recording of idle task as a success */
2329         if (!tsk->pid)
2330                 return 1;
2331
2332         if (unlikely(!tgid_map || tsk->pid > PID_MAX_DEFAULT))
2333                 return 0;
2334
2335         tgid_map[tsk->pid] = tsk->tgid;
2336         return 1;
2337 }
2338
2339 static bool tracing_record_taskinfo_skip(int flags)
2340 {
2341         if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2342                 return true;
2343         if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on())
2344                 return true;
2345         if (!__this_cpu_read(trace_taskinfo_save))
2346                 return true;
2347         return false;
2348 }
2349
2350 /**
2351  * tracing_record_taskinfo - record the task info of a task
2352  *
2353  * @task:  task to record
2354  * @flags: TRACE_RECORD_CMDLINE for recording comm
2355  *         TRACE_RECORD_TGID for recording tgid
2356  */
2357 void tracing_record_taskinfo(struct task_struct *task, int flags)
2358 {
2359         bool done;
2360
2361         if (tracing_record_taskinfo_skip(flags))
2362                 return;
2363
2364         /*
2365          * Record as much task information as possible. If some fail, continue
2366          * to try to record the others.
2367          */
2368         done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2369         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2370
2371         /* If recording any information failed, retry again soon. */
2372         if (!done)
2373                 return;
2374
2375         __this_cpu_write(trace_taskinfo_save, false);
2376 }
2377
2378 /**
2379  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2380  *
2381  * @prev: previous task during sched_switch
2382  * @next: next task during sched_switch
2383  * @flags: TRACE_RECORD_CMDLINE for recording comm
2384  *         TRACE_RECORD_TGID for recording tgid
2385  */
2386 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2387                                           struct task_struct *next, int flags)
2388 {
2389         bool done;
2390
2391         if (tracing_record_taskinfo_skip(flags))
2392                 return;
2393
2394         /*
2395          * Record as much task information as possible. If some fail, continue
2396          * to try to record the others.
2397          */
2398         done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2399         done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2400         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2401         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2402
2403         /* If recording any information failed, retry again soon. */
2404         if (!done)
2405                 return;
2406
2407         __this_cpu_write(trace_taskinfo_save, false);
2408 }
2409
2410 /* Helpers to record a specific task information */
2411 void tracing_record_cmdline(struct task_struct *task)
2412 {
2413         tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2414 }
2415
2416 void tracing_record_tgid(struct task_struct *task)
2417 {
2418         tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2419 }
2420
2421 /*
2422  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2423  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2424  * simplifies those functions and keeps them in sync.
2425  */
2426 enum print_line_t trace_handle_return(struct trace_seq *s)
2427 {
2428         return trace_seq_has_overflowed(s) ?
2429                 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2430 }
2431 EXPORT_SYMBOL_GPL(trace_handle_return);
2432
2433 void
2434 tracing_generic_entry_update(struct trace_entry *entry, unsigned short type,
2435                              unsigned long flags, int pc)
2436 {
2437         struct task_struct *tsk = current;
2438
2439         entry->preempt_count            = pc & 0xff;
2440         entry->pid                      = (tsk) ? tsk->pid : 0;
2441         entry->type                     = type;
2442         entry->flags =
2443 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
2444                 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
2445 #else
2446                 TRACE_FLAG_IRQS_NOSUPPORT |
2447 #endif
2448                 ((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
2449                 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
2450                 ((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
2451                 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
2452                 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
2453 }
2454 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
2455
2456 struct ring_buffer_event *
2457 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2458                           int type,
2459                           unsigned long len,
2460                           unsigned long flags, int pc)
2461 {
2462         return __trace_buffer_lock_reserve(buffer, type, len, flags, pc);
2463 }
2464
2465 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2466 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2467 static int trace_buffered_event_ref;
2468
2469 /**
2470  * trace_buffered_event_enable - enable buffering events
2471  *
2472  * When events are being filtered, it is quicker to use a temporary
2473  * buffer to write the event data into if there's a likely chance
2474  * that it will not be committed. The discard of the ring buffer
2475  * is not as fast as committing, and is much slower than copying
2476  * a commit.
2477  *
2478  * When an event is to be filtered, allocate per cpu buffers to
2479  * write the event data into, and if the event is filtered and discarded
2480  * it is simply dropped, otherwise, the entire data is to be committed
2481  * in one shot.
2482  */
2483 void trace_buffered_event_enable(void)
2484 {
2485         struct ring_buffer_event *event;
2486         struct page *page;
2487         int cpu;
2488
2489         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2490
2491         if (trace_buffered_event_ref++)
2492                 return;
2493
2494         for_each_tracing_cpu(cpu) {
2495                 page = alloc_pages_node(cpu_to_node(cpu),
2496                                         GFP_KERNEL | __GFP_NORETRY, 0);
2497                 if (!page)
2498                         goto failed;
2499
2500                 event = page_address(page);
2501                 memset(event, 0, sizeof(*event));
2502
2503                 per_cpu(trace_buffered_event, cpu) = event;
2504
2505                 preempt_disable();
2506                 if (cpu == smp_processor_id() &&
2507                     this_cpu_read(trace_buffered_event) !=
2508                     per_cpu(trace_buffered_event, cpu))
2509                         WARN_ON_ONCE(1);
2510                 preempt_enable();
2511         }
2512
2513         return;
2514  failed:
2515         trace_buffered_event_disable();
2516 }
2517
2518 static void enable_trace_buffered_event(void *data)
2519 {
2520         /* Probably not needed, but do it anyway */
2521         smp_rmb();
2522         this_cpu_dec(trace_buffered_event_cnt);
2523 }
2524
2525 static void disable_trace_buffered_event(void *data)
2526 {
2527         this_cpu_inc(trace_buffered_event_cnt);
2528 }
2529
2530 /**
2531  * trace_buffered_event_disable - disable buffering events
2532  *
2533  * When a filter is removed, it is faster to not use the buffered
2534  * events, and to commit directly into the ring buffer. Free up
2535  * the temp buffers when there are no more users. This requires
2536  * special synchronization with current events.
2537  */
2538 void trace_buffered_event_disable(void)
2539 {
2540         int cpu;
2541
2542         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2543
2544         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2545                 return;
2546
2547         if (--trace_buffered_event_ref)
2548                 return;
2549
2550         preempt_disable();
2551         /* For each CPU, set the buffer as used. */
2552         smp_call_function_many(tracing_buffer_mask,
2553                                disable_trace_buffered_event, NULL, 1);
2554         preempt_enable();
2555
2556         /* Wait for all current users to finish */
2557         synchronize_rcu();
2558
2559         for_each_tracing_cpu(cpu) {
2560                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2561                 per_cpu(trace_buffered_event, cpu) = NULL;
2562         }
2563         /*
2564          * Make sure trace_buffered_event is NULL before clearing
2565          * trace_buffered_event_cnt.
2566          */
2567         smp_wmb();
2568
2569         preempt_disable();
2570         /* Do the work on each cpu */
2571         smp_call_function_many(tracing_buffer_mask,
2572                                enable_trace_buffered_event, NULL, 1);
2573         preempt_enable();
2574 }
2575
2576 static struct trace_buffer *temp_buffer;
2577
2578 struct ring_buffer_event *
2579 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2580                           struct trace_event_file *trace_file,
2581                           int type, unsigned long len,
2582                           unsigned long flags, int pc)
2583 {
2584         struct ring_buffer_event *entry;
2585         int val;
2586
2587         *current_rb = trace_file->tr->array_buffer.buffer;
2588
2589         if (!ring_buffer_time_stamp_abs(*current_rb) && (trace_file->flags &
2590              (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2591             (entry = this_cpu_read(trace_buffered_event))) {
2592                 /* Try to use the per cpu buffer first */
2593                 val = this_cpu_inc_return(trace_buffered_event_cnt);
2594                 if (val == 1) {
2595                         trace_event_setup(entry, type, flags, pc);
2596                         entry->array[0] = len;
2597                         return entry;
2598                 }
2599                 this_cpu_dec(trace_buffered_event_cnt);
2600         }
2601
2602         entry = __trace_buffer_lock_reserve(*current_rb,
2603                                             type, len, flags, pc);
2604         /*
2605          * If tracing is off, but we have triggers enabled
2606          * we still need to look at the event data. Use the temp_buffer
2607          * to store the trace event for the tigger to use. It's recusive
2608          * safe and will not be recorded anywhere.
2609          */
2610         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2611                 *current_rb = temp_buffer;
2612                 entry = __trace_buffer_lock_reserve(*current_rb,
2613                                                     type, len, flags, pc);
2614         }
2615         return entry;
2616 }
2617 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2618
2619 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2620 static DEFINE_MUTEX(tracepoint_printk_mutex);
2621
2622 static void output_printk(struct trace_event_buffer *fbuffer)
2623 {
2624         struct trace_event_call *event_call;
2625         struct trace_event_file *file;
2626         struct trace_event *event;
2627         unsigned long flags;
2628         struct trace_iterator *iter = tracepoint_print_iter;
2629
2630         /* We should never get here if iter is NULL */
2631         if (WARN_ON_ONCE(!iter))
2632                 return;
2633
2634         event_call = fbuffer->trace_file->event_call;
2635         if (!event_call || !event_call->event.funcs ||
2636             !event_call->event.funcs->trace)
2637                 return;
2638
2639         file = fbuffer->trace_file;
2640         if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2641             (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2642              !filter_match_preds(file->filter, fbuffer->entry)))
2643                 return;
2644
2645         event = &fbuffer->trace_file->event_call->event;
2646
2647         spin_lock_irqsave(&tracepoint_iter_lock, flags);
2648         trace_seq_init(&iter->seq);
2649         iter->ent = fbuffer->entry;
2650         event_call->event.funcs->trace(iter, 0, event);
2651         trace_seq_putc(&iter->seq, 0);
2652         printk("%s", iter->seq.buffer);
2653
2654         spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2655 }
2656
2657 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2658                              void __user *buffer, size_t *lenp,
2659                              loff_t *ppos)
2660 {
2661         int save_tracepoint_printk;
2662         int ret;
2663
2664         mutex_lock(&tracepoint_printk_mutex);
2665         save_tracepoint_printk = tracepoint_printk;
2666
2667         ret = proc_dointvec(table, write, buffer, lenp, ppos);
2668
2669         /*
2670          * This will force exiting early, as tracepoint_printk
2671          * is always zero when tracepoint_printk_iter is not allocated
2672          */
2673         if (!tracepoint_print_iter)
2674                 tracepoint_printk = 0;
2675
2676         if (save_tracepoint_printk == tracepoint_printk)
2677                 goto out;
2678
2679         if (tracepoint_printk)
2680                 static_key_enable(&tracepoint_printk_key.key);
2681         else
2682                 static_key_disable(&tracepoint_printk_key.key);
2683
2684  out:
2685         mutex_unlock(&tracepoint_printk_mutex);
2686
2687         return ret;
2688 }
2689
2690 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2691 {
2692         if (static_key_false(&tracepoint_printk_key.key))
2693                 output_printk(fbuffer);
2694
2695         event_trigger_unlock_commit_regs(fbuffer->trace_file, fbuffer->buffer,
2696                                     fbuffer->event, fbuffer->entry,
2697                                     fbuffer->flags, fbuffer->pc, fbuffer->regs);
2698 }
2699 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2700
2701 /*
2702  * Skip 3:
2703  *
2704  *   trace_buffer_unlock_commit_regs()
2705  *   trace_event_buffer_commit()
2706  *   trace_event_raw_event_xxx()
2707  */
2708 # define STACK_SKIP 3
2709
2710 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2711                                      struct trace_buffer *buffer,
2712                                      struct ring_buffer_event *event,
2713                                      unsigned long flags, int pc,
2714                                      struct pt_regs *regs)
2715 {
2716         __buffer_unlock_commit(buffer, event);
2717
2718         /*
2719          * If regs is not set, then skip the necessary functions.
2720          * Note, we can still get here via blktrace, wakeup tracer
2721          * and mmiotrace, but that's ok if they lose a function or
2722          * two. They are not that meaningful.
2723          */
2724         ftrace_trace_stack(tr, buffer, flags, regs ? 0 : STACK_SKIP, pc, regs);
2725         ftrace_trace_userstack(buffer, flags, pc);
2726 }
2727
2728 /*
2729  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2730  */
2731 void
2732 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2733                                    struct ring_buffer_event *event)
2734 {
2735         __buffer_unlock_commit(buffer, event);
2736 }
2737
2738 static void
2739 trace_process_export(struct trace_export *export,
2740                struct ring_buffer_event *event)
2741 {
2742         struct trace_entry *entry;
2743         unsigned int size = 0;
2744
2745         entry = ring_buffer_event_data(event);
2746         size = ring_buffer_event_length(event);
2747         export->write(export, entry, size);
2748 }
2749
2750 static DEFINE_MUTEX(ftrace_export_lock);
2751
2752 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
2753
2754 static DEFINE_STATIC_KEY_FALSE(ftrace_exports_enabled);
2755
2756 static inline void ftrace_exports_enable(void)
2757 {
2758         static_branch_enable(&ftrace_exports_enabled);
2759 }
2760
2761 static inline void ftrace_exports_disable(void)
2762 {
2763         static_branch_disable(&ftrace_exports_enabled);
2764 }
2765
2766 static void ftrace_exports(struct ring_buffer_event *event)
2767 {
2768         struct trace_export *export;
2769
2770         preempt_disable_notrace();
2771
2772         export = rcu_dereference_raw_check(ftrace_exports_list);
2773         while (export) {
2774                 trace_process_export(export, event);
2775                 export = rcu_dereference_raw_check(export->next);
2776         }
2777
2778         preempt_enable_notrace();
2779 }
2780
2781 static inline void
2782 add_trace_export(struct trace_export **list, struct trace_export *export)
2783 {
2784         rcu_assign_pointer(export->next, *list);
2785         /*
2786          * We are entering export into the list but another
2787          * CPU might be walking that list. We need to make sure
2788          * the export->next pointer is valid before another CPU sees
2789          * the export pointer included into the list.
2790          */
2791         rcu_assign_pointer(*list, export);
2792 }
2793
2794 static inline int
2795 rm_trace_export(struct trace_export **list, struct trace_export *export)
2796 {
2797         struct trace_export **p;
2798
2799         for (p = list; *p != NULL; p = &(*p)->next)
2800                 if (*p == export)
2801                         break;
2802
2803         if (*p != export)
2804                 return -1;
2805
2806         rcu_assign_pointer(*p, (*p)->next);
2807
2808         return 0;
2809 }
2810
2811 static inline void
2812 add_ftrace_export(struct trace_export **list, struct trace_export *export)
2813 {
2814         if (*list == NULL)
2815                 ftrace_exports_enable();
2816
2817         add_trace_export(list, export);
2818 }
2819
2820 static inline int
2821 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
2822 {
2823         int ret;
2824
2825         ret = rm_trace_export(list, export);
2826         if (*list == NULL)
2827                 ftrace_exports_disable();
2828
2829         return ret;
2830 }
2831
2832 int register_ftrace_export(struct trace_export *export)
2833 {
2834         if (WARN_ON_ONCE(!export->write))
2835                 return -1;
2836
2837         mutex_lock(&ftrace_export_lock);
2838
2839         add_ftrace_export(&ftrace_exports_list, export);
2840
2841         mutex_unlock(&ftrace_export_lock);
2842
2843         return 0;
2844 }
2845 EXPORT_SYMBOL_GPL(register_ftrace_export);
2846
2847 int unregister_ftrace_export(struct trace_export *export)
2848 {
2849         int ret;
2850
2851         mutex_lock(&ftrace_export_lock);
2852
2853         ret = rm_ftrace_export(&ftrace_exports_list, export);
2854
2855         mutex_unlock(&ftrace_export_lock);
2856
2857         return ret;
2858 }
2859 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
2860
2861 void
2862 trace_function(struct trace_array *tr,
2863                unsigned long ip, unsigned long parent_ip, unsigned long flags,
2864                int pc)
2865 {
2866         struct trace_event_call *call = &event_function;
2867         struct trace_buffer *buffer = tr->array_buffer.buffer;
2868         struct ring_buffer_event *event;
2869         struct ftrace_entry *entry;
2870
2871         event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2872                                             flags, pc);
2873         if (!event)
2874                 return;
2875         entry   = ring_buffer_event_data(event);
2876         entry->ip                       = ip;
2877         entry->parent_ip                = parent_ip;
2878
2879         if (!call_filter_check_discard(call, entry, buffer, event)) {
2880                 if (static_branch_unlikely(&ftrace_exports_enabled))
2881                         ftrace_exports(event);
2882                 __buffer_unlock_commit(buffer, event);
2883         }
2884 }
2885
2886 #ifdef CONFIG_STACKTRACE
2887
2888 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2889 #define FTRACE_KSTACK_NESTING   4
2890
2891 #define FTRACE_KSTACK_ENTRIES   (PAGE_SIZE / FTRACE_KSTACK_NESTING)
2892
2893 struct ftrace_stack {
2894         unsigned long           calls[FTRACE_KSTACK_ENTRIES];
2895 };
2896
2897
2898 struct ftrace_stacks {
2899         struct ftrace_stack     stacks[FTRACE_KSTACK_NESTING];
2900 };
2901
2902 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
2903 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2904
2905 static void __ftrace_trace_stack(struct trace_buffer *buffer,
2906                                  unsigned long flags,
2907                                  int skip, int pc, struct pt_regs *regs)
2908 {
2909         struct trace_event_call *call = &event_kernel_stack;
2910         struct ring_buffer_event *event;
2911         unsigned int size, nr_entries;
2912         struct ftrace_stack *fstack;
2913         struct stack_entry *entry;
2914         int stackidx;
2915
2916         /*
2917          * Add one, for this function and the call to save_stack_trace()
2918          * If regs is set, then these functions will not be in the way.
2919          */
2920 #ifndef CONFIG_UNWINDER_ORC
2921         if (!regs)
2922                 skip++;
2923 #endif
2924
2925         /*
2926          * Since events can happen in NMIs there's no safe way to
2927          * use the per cpu ftrace_stacks. We reserve it and if an interrupt
2928          * or NMI comes in, it will just have to use the default
2929          * FTRACE_STACK_SIZE.
2930          */
2931         preempt_disable_notrace();
2932
2933         stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
2934
2935         /* This should never happen. If it does, yell once and skip */
2936         if (WARN_ON_ONCE(stackidx > FTRACE_KSTACK_NESTING))
2937                 goto out;
2938
2939         /*
2940          * The above __this_cpu_inc_return() is 'atomic' cpu local. An
2941          * interrupt will either see the value pre increment or post
2942          * increment. If the interrupt happens pre increment it will have
2943          * restored the counter when it returns.  We just need a barrier to
2944          * keep gcc from moving things around.
2945          */
2946         barrier();
2947
2948         fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
2949         size = ARRAY_SIZE(fstack->calls);
2950
2951         if (regs) {
2952                 nr_entries = stack_trace_save_regs(regs, fstack->calls,
2953                                                    size, skip);
2954         } else {
2955                 nr_entries = stack_trace_save(fstack->calls, size, skip);
2956         }
2957
2958         size = nr_entries * sizeof(unsigned long);
2959         event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2960                                             sizeof(*entry) + size, flags, pc);
2961         if (!event)
2962                 goto out;
2963         entry = ring_buffer_event_data(event);
2964
2965         memcpy(&entry->caller, fstack->calls, size);
2966         entry->size = nr_entries;
2967
2968         if (!call_filter_check_discard(call, entry, buffer, event))
2969                 __buffer_unlock_commit(buffer, event);
2970
2971  out:
2972         /* Again, don't let gcc optimize things here */
2973         barrier();
2974         __this_cpu_dec(ftrace_stack_reserve);
2975         preempt_enable_notrace();
2976
2977 }
2978
2979 static inline void ftrace_trace_stack(struct trace_array *tr,
2980                                       struct trace_buffer *buffer,
2981                                       unsigned long flags,
2982                                       int skip, int pc, struct pt_regs *regs)
2983 {
2984         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2985                 return;
2986
2987         __ftrace_trace_stack(buffer, flags, skip, pc, regs);
2988 }
2989
2990 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
2991                    int pc)
2992 {
2993         struct trace_buffer *buffer = tr->array_buffer.buffer;
2994
2995         if (rcu_is_watching()) {
2996                 __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2997                 return;
2998         }
2999
3000         /*
3001          * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
3002          * but if the above rcu_is_watching() failed, then the NMI
3003          * triggered someplace critical, and rcu_irq_enter() should
3004          * not be called from NMI.
3005          */
3006         if (unlikely(in_nmi()))
3007                 return;
3008
3009         rcu_irq_enter_irqson();
3010         __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
3011         rcu_irq_exit_irqson();
3012 }
3013
3014 /**
3015  * trace_dump_stack - record a stack back trace in the trace buffer
3016  * @skip: Number of functions to skip (helper handlers)
3017  */
3018 void trace_dump_stack(int skip)
3019 {
3020         unsigned long flags;
3021
3022         if (tracing_disabled || tracing_selftest_running)
3023                 return;
3024
3025         local_save_flags(flags);
3026
3027 #ifndef CONFIG_UNWINDER_ORC
3028         /* Skip 1 to skip this function. */
3029         skip++;
3030 #endif
3031         __ftrace_trace_stack(global_trace.array_buffer.buffer,
3032                              flags, skip, preempt_count(), NULL);
3033 }
3034 EXPORT_SYMBOL_GPL(trace_dump_stack);
3035
3036 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3037 static DEFINE_PER_CPU(int, user_stack_count);
3038
3039 static void
3040 ftrace_trace_userstack(struct trace_buffer *buffer, unsigned long flags, int pc)
3041 {
3042         struct trace_event_call *call = &event_user_stack;
3043         struct ring_buffer_event *event;
3044         struct userstack_entry *entry;
3045
3046         if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
3047                 return;
3048
3049         /*
3050          * NMIs can not handle page faults, even with fix ups.
3051          * The save user stack can (and often does) fault.
3052          */
3053         if (unlikely(in_nmi()))
3054                 return;
3055
3056         /*
3057          * prevent recursion, since the user stack tracing may
3058          * trigger other kernel events.
3059          */
3060         preempt_disable();
3061         if (__this_cpu_read(user_stack_count))
3062                 goto out;
3063
3064         __this_cpu_inc(user_stack_count);
3065
3066         event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3067                                             sizeof(*entry), flags, pc);
3068         if (!event)
3069                 goto out_drop_count;
3070         entry   = ring_buffer_event_data(event);
3071
3072         entry->tgid             = current->tgid;
3073         memset(&entry->caller, 0, sizeof(entry->caller));
3074
3075         stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3076         if (!call_filter_check_discard(call, entry, buffer, event))
3077                 __buffer_unlock_commit(buffer, event);
3078
3079  out_drop_count:
3080         __this_cpu_dec(user_stack_count);
3081  out:
3082         preempt_enable();
3083 }
3084 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3085 static void ftrace_trace_userstack(struct trace_buffer *buffer,
3086                                    unsigned long flags, int pc)
3087 {
3088 }
3089 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3090
3091 #endif /* CONFIG_STACKTRACE */
3092
3093 /* created for use with alloc_percpu */
3094 struct trace_buffer_struct {
3095         int nesting;
3096         char buffer[4][TRACE_BUF_SIZE];
3097 };
3098
3099 static struct trace_buffer_struct *trace_percpu_buffer;
3100
3101 /*
3102  * Thise allows for lockless recording.  If we're nested too deeply, then
3103  * this returns NULL.
3104  */
3105 static char *get_trace_buf(void)
3106 {
3107         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3108
3109         if (!buffer || buffer->nesting >= 4)
3110                 return NULL;
3111
3112         buffer->nesting++;
3113
3114         /* Interrupts must see nesting incremented before we use the buffer */
3115         barrier();
3116         return &buffer->buffer[buffer->nesting][0];
3117 }
3118
3119 static void put_trace_buf(void)
3120 {
3121         /* Don't let the decrement of nesting leak before this */
3122         barrier();
3123         this_cpu_dec(trace_percpu_buffer->nesting);
3124 }
3125
3126 static int alloc_percpu_trace_buffer(void)
3127 {
3128         struct trace_buffer_struct *buffers;
3129
3130         buffers = alloc_percpu(struct trace_buffer_struct);
3131         if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3132                 return -ENOMEM;
3133
3134         trace_percpu_buffer = buffers;
3135         return 0;
3136 }
3137
3138 static int buffers_allocated;
3139
3140 void trace_printk_init_buffers(void)
3141 {
3142         if (buffers_allocated)
3143                 return;
3144
3145         if (alloc_percpu_trace_buffer())
3146                 return;
3147
3148         /* trace_printk() is for debug use only. Don't use it in production. */
3149
3150         pr_warn("\n");
3151         pr_warn("**********************************************************\n");
3152         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3153         pr_warn("**                                                      **\n");
3154         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3155         pr_warn("**                                                      **\n");
3156         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3157         pr_warn("** unsafe for production use.                           **\n");
3158         pr_warn("**                                                      **\n");
3159         pr_warn("** If you see this message and you are not debugging    **\n");
3160         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3161         pr_warn("**                                                      **\n");
3162         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3163         pr_warn("**********************************************************\n");
3164
3165         /* Expand the buffers to set size */
3166         tracing_update_buffers();
3167
3168         buffers_allocated = 1;
3169
3170         /*
3171          * trace_printk_init_buffers() can be called by modules.
3172          * If that happens, then we need to start cmdline recording
3173          * directly here. If the global_trace.buffer is already
3174          * allocated here, then this was called by module code.
3175          */
3176         if (global_trace.array_buffer.buffer)
3177                 tracing_start_cmdline_record();
3178 }
3179 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3180
3181 void trace_printk_start_comm(void)
3182 {
3183         /* Start tracing comms if trace printk is set */
3184         if (!buffers_allocated)
3185                 return;
3186         tracing_start_cmdline_record();
3187 }
3188
3189 static void trace_printk_start_stop_comm(int enabled)
3190 {
3191         if (!buffers_allocated)
3192                 return;
3193
3194         if (enabled)
3195                 tracing_start_cmdline_record();
3196         else
3197                 tracing_stop_cmdline_record();
3198 }
3199
3200 /**
3201  * trace_vbprintk - write binary msg to tracing buffer
3202  * @ip:    The address of the caller
3203  * @fmt:   The string format to write to the buffer
3204  * @args:  Arguments for @fmt
3205  */
3206 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3207 {
3208         struct trace_event_call *call = &event_bprint;
3209         struct ring_buffer_event *event;
3210         struct trace_buffer *buffer;
3211         struct trace_array *tr = &global_trace;
3212         struct bprint_entry *entry;
3213         unsigned long flags;
3214         char *tbuffer;
3215         int len = 0, size, pc;
3216
3217         if (unlikely(tracing_selftest_running || tracing_disabled))
3218                 return 0;
3219
3220         /* Don't pollute graph traces with trace_vprintk internals */
3221         pause_graph_tracing();
3222
3223         pc = preempt_count();
3224         preempt_disable_notrace();
3225
3226         tbuffer = get_trace_buf();
3227         if (!tbuffer) {
3228                 len = 0;
3229                 goto out_nobuffer;
3230         }
3231
3232         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3233
3234         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3235                 goto out_put;
3236
3237         local_save_flags(flags);
3238         size = sizeof(*entry) + sizeof(u32) * len;
3239         buffer = tr->array_buffer.buffer;
3240         ring_buffer_nest_start(buffer);
3241         event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3242                                             flags, pc);
3243         if (!event)
3244                 goto out;
3245         entry = ring_buffer_event_data(event);
3246         entry->ip                       = ip;
3247         entry->fmt                      = fmt;
3248
3249         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3250         if (!call_filter_check_discard(call, entry, buffer, event)) {
3251                 __buffer_unlock_commit(buffer, event);
3252                 ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
3253         }
3254
3255 out:
3256         ring_buffer_nest_end(buffer);
3257 out_put:
3258         put_trace_buf();
3259
3260 out_nobuffer:
3261         preempt_enable_notrace();
3262         unpause_graph_tracing();
3263
3264         return len;
3265 }
3266 EXPORT_SYMBOL_GPL(trace_vbprintk);
3267
3268 __printf(3, 0)
3269 static int
3270 __trace_array_vprintk(struct trace_buffer *buffer,
3271                       unsigned long ip, const char *fmt, va_list args)
3272 {
3273         struct trace_event_call *call = &event_print;
3274         struct ring_buffer_event *event;
3275         int len = 0, size, pc;
3276         struct print_entry *entry;
3277         unsigned long flags;
3278         char *tbuffer;
3279
3280         if (tracing_disabled || tracing_selftest_running)
3281                 return 0;
3282
3283         /* Don't pollute graph traces with trace_vprintk internals */
3284         pause_graph_tracing();
3285
3286         pc = preempt_count();
3287         preempt_disable_notrace();
3288
3289
3290         tbuffer = get_trace_buf();
3291         if (!tbuffer) {
3292                 len = 0;
3293                 goto out_nobuffer;
3294         }
3295
3296         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3297
3298         local_save_flags(flags);
3299         size = sizeof(*entry) + len + 1;
3300         ring_buffer_nest_start(buffer);
3301         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3302                                             flags, pc);
3303         if (!event)
3304                 goto out;
3305         entry = ring_buffer_event_data(event);
3306         entry->ip = ip;
3307
3308         memcpy(&entry->buf, tbuffer, len + 1);
3309         if (!call_filter_check_discard(call, entry, buffer, event)) {
3310                 __buffer_unlock_commit(buffer, event);
3311                 ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
3312         }
3313
3314 out:
3315         ring_buffer_nest_end(buffer);
3316         put_trace_buf();
3317
3318 out_nobuffer:
3319         preempt_enable_notrace();
3320         unpause_graph_tracing();
3321
3322         return len;
3323 }
3324
3325 __printf(3, 0)
3326 int trace_array_vprintk(struct trace_array *tr,
3327                         unsigned long ip, const char *fmt, va_list args)
3328 {
3329         return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3330 }
3331
3332 __printf(3, 0)
3333 int trace_array_printk(struct trace_array *tr,
3334                        unsigned long ip, const char *fmt, ...)
3335 {
3336         int ret;
3337         va_list ap;
3338
3339         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3340                 return 0;
3341
3342         if (!tr)
3343                 return -ENOENT;
3344
3345         va_start(ap, fmt);
3346         ret = trace_array_vprintk(tr, ip, fmt, ap);
3347         va_end(ap);
3348         return ret;
3349 }
3350 EXPORT_SYMBOL_GPL(trace_array_printk);
3351
3352 __printf(3, 4)
3353 int trace_array_printk_buf(struct trace_buffer *buffer,
3354                            unsigned long ip, const char *fmt, ...)
3355 {
3356         int ret;
3357         va_list ap;
3358
3359         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3360                 return 0;
3361
3362         va_start(ap, fmt);
3363         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3364         va_end(ap);
3365         return ret;
3366 }
3367
3368 __printf(2, 0)
3369 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3370 {
3371         return trace_array_vprintk(&global_trace, ip, fmt, args);
3372 }
3373 EXPORT_SYMBOL_GPL(trace_vprintk);
3374
3375 static void trace_iterator_increment(struct trace_iterator *iter)
3376 {
3377         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3378
3379         iter->idx++;
3380         if (buf_iter)
3381                 ring_buffer_read(buf_iter, NULL);
3382 }
3383
3384 static struct trace_entry *
3385 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3386                 unsigned long *lost_events)
3387 {
3388         struct ring_buffer_event *event;
3389         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3390
3391         if (buf_iter)
3392                 event = ring_buffer_iter_peek(buf_iter, ts);
3393         else
3394                 event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3395                                          lost_events);
3396
3397         if (event) {
3398                 iter->ent_size = ring_buffer_event_length(event);
3399                 return ring_buffer_event_data(event);
3400         }
3401         iter->ent_size = 0;
3402         return NULL;
3403 }
3404
3405 static struct trace_entry *
3406 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3407                   unsigned long *missing_events, u64 *ent_ts)
3408 {
3409         struct trace_buffer *buffer = iter->array_buffer->buffer;
3410         struct trace_entry *ent, *next = NULL;
3411         unsigned long lost_events = 0, next_lost = 0;
3412         int cpu_file = iter->cpu_file;
3413         u64 next_ts = 0, ts;
3414         int next_cpu = -1;
3415         int next_size = 0;
3416         int cpu;
3417
3418         /*
3419          * If we are in a per_cpu trace file, don't bother by iterating over
3420          * all cpu and peek directly.
3421          */
3422         if (cpu_file > RING_BUFFER_ALL_CPUS) {
3423                 if (ring_buffer_empty_cpu(buffer, cpu_file))
3424                         return NULL;
3425                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3426                 if (ent_cpu)
3427                         *ent_cpu = cpu_file;
3428
3429                 return ent;
3430         }
3431
3432         for_each_tracing_cpu(cpu) {
3433
3434                 if (ring_buffer_empty_cpu(buffer, cpu))
3435                         continue;
3436
3437                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3438
3439                 /*
3440                  * Pick the entry with the smallest timestamp:
3441                  */
3442                 if (ent && (!next || ts < next_ts)) {
3443                         next = ent;
3444                         next_cpu = cpu;
3445                         next_ts = ts;
3446                         next_lost = lost_events;
3447                         next_size = iter->ent_size;
3448                 }
3449         }
3450
3451         iter->ent_size = next_size;
3452
3453         if (ent_cpu)
3454                 *ent_cpu = next_cpu;
3455
3456         if (ent_ts)
3457                 *ent_ts = next_ts;
3458
3459         if (missing_events)
3460                 *missing_events = next_lost;
3461
3462         return next;
3463 }
3464
3465 /* Find the next real entry, without updating the iterator itself */
3466 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3467                                           int *ent_cpu, u64 *ent_ts)
3468 {
3469         return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3470 }
3471
3472 /* Find the next real entry, and increment the iterator to the next entry */
3473 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3474 {
3475         iter->ent = __find_next_entry(iter, &iter->cpu,
3476                                       &iter->lost_events, &iter->ts);
3477
3478         if (iter->ent)
3479                 trace_iterator_increment(iter);
3480
3481         return iter->ent ? iter : NULL;
3482 }
3483
3484 static void trace_consume(struct trace_iterator *iter)
3485 {
3486         ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3487                             &iter->lost_events);
3488 }
3489
3490 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3491 {
3492         struct trace_iterator *iter = m->private;
3493         int i = (int)*pos;
3494         void *ent;
3495
3496         WARN_ON_ONCE(iter->leftover);
3497
3498         (*pos)++;
3499
3500         /* can't go backwards */
3501         if (iter->idx > i)
3502                 return NULL;
3503
3504         if (iter->idx < 0)
3505                 ent = trace_find_next_entry_inc(iter);
3506         else
3507                 ent = iter;
3508
3509         while (ent && iter->idx < i)
3510                 ent = trace_find_next_entry_inc(iter);
3511
3512         iter->pos = *pos;
3513
3514         return ent;
3515 }
3516
3517 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3518 {
3519         struct ring_buffer_event *event;
3520         struct ring_buffer_iter *buf_iter;
3521         unsigned long entries = 0;
3522         u64 ts;
3523
3524         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
3525
3526         buf_iter = trace_buffer_iter(iter, cpu);
3527         if (!buf_iter)
3528                 return;
3529
3530         ring_buffer_iter_reset(buf_iter);
3531
3532         /*
3533          * We could have the case with the max latency tracers
3534          * that a reset never took place on a cpu. This is evident
3535          * by the timestamp being before the start of the buffer.
3536          */
3537         while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
3538                 if (ts >= iter->array_buffer->time_start)
3539                         break;
3540                 entries++;
3541                 ring_buffer_read(buf_iter, NULL);
3542         }
3543
3544         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
3545 }
3546
3547 /*
3548  * The current tracer is copied to avoid a global locking
3549  * all around.
3550  */
3551 static void *s_start(struct seq_file *m, loff_t *pos)
3552 {
3553         struct trace_iterator *iter = m->private;
3554         struct trace_array *tr = iter->tr;
3555         int cpu_file = iter->cpu_file;
3556         void *p = NULL;
3557         loff_t l = 0;
3558         int cpu;
3559
3560         /*
3561          * copy the tracer to avoid using a global lock all around.
3562          * iter->trace is a copy of current_trace, the pointer to the
3563          * name may be used instead of a strcmp(), as iter->trace->name
3564          * will point to the same string as current_trace->name.
3565          */
3566         mutex_lock(&trace_types_lock);
3567         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3568                 *iter->trace = *tr->current_trace;
3569         mutex_unlock(&trace_types_lock);
3570
3571 #ifdef CONFIG_TRACER_MAX_TRACE
3572         if (iter->snapshot && iter->trace->use_max_tr)
3573                 return ERR_PTR(-EBUSY);
3574 #endif
3575
3576         if (!iter->snapshot)
3577                 atomic_inc(&trace_record_taskinfo_disabled);
3578
3579         if (*pos != iter->pos) {
3580                 iter->ent = NULL;
3581                 iter->cpu = 0;
3582                 iter->idx = -1;
3583
3584                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
3585                         for_each_tracing_cpu(cpu)
3586                                 tracing_iter_reset(iter, cpu);
3587                 } else
3588                         tracing_iter_reset(iter, cpu_file);
3589
3590                 iter->leftover = 0;
3591                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3592                         ;
3593
3594         } else {
3595                 /*
3596                  * If we overflowed the seq_file before, then we want
3597                  * to just reuse the trace_seq buffer again.
3598                  */
3599                 if (iter->leftover)
3600                         p = iter;
3601                 else {
3602                         l = *pos - 1;
3603                         p = s_next(m, p, &l);
3604                 }
3605         }
3606
3607         trace_event_read_lock();
3608         trace_access_lock(cpu_file);
3609         return p;
3610 }
3611
3612 static void s_stop(struct seq_file *m, void *p)
3613 {
3614         struct trace_iterator *iter = m->private;
3615
3616 #ifdef CONFIG_TRACER_MAX_TRACE
3617         if (iter->snapshot && iter->trace->use_max_tr)
3618                 return;
3619 #endif
3620
3621         if (!iter->snapshot)
3622                 atomic_dec(&trace_record_taskinfo_disabled);
3623
3624         trace_access_unlock(iter->cpu_file);
3625         trace_event_read_unlock();
3626 }
3627
3628 static void
3629 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
3630                       unsigned long *entries, int cpu)
3631 {
3632         unsigned long count;
3633
3634         count = ring_buffer_entries_cpu(buf->buffer, cpu);
3635         /*
3636          * If this buffer has skipped entries, then we hold all
3637          * entries for the trace and we need to ignore the
3638          * ones before the time stamp.
3639          */
3640         if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3641                 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3642                 /* total is the same as the entries */
3643                 *total = count;
3644         } else
3645                 *total = count +
3646                         ring_buffer_overrun_cpu(buf->buffer, cpu);
3647         *entries = count;
3648 }
3649
3650 static void
3651 get_total_entries(struct array_buffer *buf,
3652                   unsigned long *total, unsigned long *entries)
3653 {
3654         unsigned long t, e;
3655         int cpu;
3656
3657         *total = 0;
3658         *entries = 0;
3659
3660         for_each_tracing_cpu(cpu) {
3661                 get_total_entries_cpu(buf, &t, &e, cpu);
3662                 *total += t;
3663                 *entries += e;
3664         }
3665 }
3666
3667 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
3668 {
3669         unsigned long total, entries;
3670
3671         if (!tr)
3672                 tr = &global_trace;
3673
3674         get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
3675
3676         return entries;
3677 }
3678
3679 unsigned long trace_total_entries(struct trace_array *tr)
3680 {
3681         unsigned long total, entries;
3682
3683         if (!tr)
3684                 tr = &global_trace;
3685
3686         get_total_entries(&tr->array_buffer, &total, &entries);
3687
3688         return entries;
3689 }
3690
3691 static void print_lat_help_header(struct seq_file *m)
3692 {
3693         seq_puts(m, "#                  _------=> CPU#            \n"
3694                     "#                 / _-----=> irqs-off        \n"
3695                     "#                | / _----=> need-resched    \n"
3696                     "#                || / _---=> hardirq/softirq \n"
3697                     "#                ||| / _--=> preempt-depth   \n"
3698                     "#                |||| /     delay            \n"
3699                     "#  cmd     pid   ||||| time  |   caller      \n"
3700                     "#     \\   /      |||||  \\    |   /         \n");
3701 }
3702
3703 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
3704 {
3705         unsigned long total;
3706         unsigned long entries;
3707
3708         get_total_entries(buf, &total, &entries);
3709         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
3710                    entries, total, num_online_cpus());
3711         seq_puts(m, "#\n");
3712 }
3713
3714 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
3715                                    unsigned int flags)
3716 {
3717         bool tgid = flags & TRACE_ITER_RECORD_TGID;
3718
3719         print_event_info(buf, m);
3720
3721         seq_printf(m, "#           TASK-PID   %s  CPU#   TIMESTAMP  FUNCTION\n", tgid ? "TGID     " : "");
3722         seq_printf(m, "#              | |     %s    |       |         |\n",      tgid ? "  |      " : "");
3723 }
3724
3725 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
3726                                        unsigned int flags)
3727 {
3728         bool tgid = flags & TRACE_ITER_RECORD_TGID;
3729         const char *space = "          ";
3730         int prec = tgid ? 10 : 2;
3731
3732         print_event_info(buf, m);
3733
3734         seq_printf(m, "#                          %.*s  _-----=> irqs-off\n", prec, space);
3735         seq_printf(m, "#                          %.*s / _----=> need-resched\n", prec, space);
3736         seq_printf(m, "#                          %.*s| / _---=> hardirq/softirq\n", prec, space);
3737         seq_printf(m, "#                          %.*s|| / _--=> preempt-depth\n", prec, space);
3738         seq_printf(m, "#                          %.*s||| /     delay\n", prec, space);
3739         seq_printf(m, "#           TASK-PID %.*sCPU#  ||||    TIMESTAMP  FUNCTION\n", prec, "   TGID   ");
3740         seq_printf(m, "#              | |   %.*s  |   ||||       |         |\n", prec, "     |    ");
3741 }
3742
3743 void
3744 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3745 {
3746         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3747         struct array_buffer *buf = iter->array_buffer;
3748         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3749         struct tracer *type = iter->trace;
3750         unsigned long entries;
3751         unsigned long total;
3752         const char *name = "preemption";
3753
3754         name = type->name;
3755
3756         get_total_entries(buf, &total, &entries);
3757
3758         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3759                    name, UTS_RELEASE);
3760         seq_puts(m, "# -----------------------------------"
3761                  "---------------------------------\n");
3762         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3763                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3764                    nsecs_to_usecs(data->saved_latency),
3765                    entries,
3766                    total,
3767                    buf->cpu,
3768 #if defined(CONFIG_PREEMPT_NONE)
3769                    "server",
3770 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
3771                    "desktop",
3772 #elif defined(CONFIG_PREEMPT)
3773                    "preempt",
3774 #elif defined(CONFIG_PREEMPT_RT)
3775                    "preempt_rt",
3776 #else
3777                    "unknown",
3778 #endif
3779                    /* These are reserved for later use */
3780                    0, 0, 0, 0);
3781 #ifdef CONFIG_SMP
3782         seq_printf(m, " #P:%d)\n", num_online_cpus());
3783 #else
3784         seq_puts(m, ")\n");
3785 #endif
3786         seq_puts(m, "#    -----------------\n");
3787         seq_printf(m, "#    | task: %.16s-%d "
3788                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3789                    data->comm, data->pid,
3790                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3791                    data->policy, data->rt_priority);
3792         seq_puts(m, "#    -----------------\n");
3793
3794         if (data->critical_start) {
3795                 seq_puts(m, "#  => started at: ");
3796                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3797                 trace_print_seq(m, &iter->seq);
3798                 seq_puts(m, "\n#  => ended at:   ");
3799                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3800                 trace_print_seq(m, &iter->seq);
3801                 seq_puts(m, "\n#\n");
3802         }
3803
3804         seq_puts(m, "#\n");
3805 }
3806
3807 static void test_cpu_buff_start(struct trace_iterator *iter)
3808 {
3809         struct trace_seq *s = &iter->seq;
3810         struct trace_array *tr = iter->tr;
3811
3812         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3813                 return;
3814
3815         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3816                 return;
3817
3818         if (cpumask_available(iter->started) &&
3819             cpumask_test_cpu(iter->cpu, iter->started))
3820                 return;
3821
3822         if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
3823                 return;
3824
3825         if (cpumask_available(iter->started))
3826                 cpumask_set_cpu(iter->cpu, iter->started);
3827
3828         /* Don't print started cpu buffer for the first entry of the trace */
3829         if (iter->idx > 1)
3830                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3831                                 iter->cpu);
3832 }
3833
3834 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3835 {
3836         struct trace_array *tr = iter->tr;
3837         struct trace_seq *s = &iter->seq;
3838         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3839         struct trace_entry *entry;
3840         struct trace_event *event;
3841
3842         entry = iter->ent;
3843
3844         test_cpu_buff_start(iter);
3845
3846         event = ftrace_find_event(entry->type);
3847
3848         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3849                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3850                         trace_print_lat_context(iter);
3851                 else
3852                         trace_print_context(iter);
3853         }
3854
3855         if (trace_seq_has_overflowed(s))
3856                 return TRACE_TYPE_PARTIAL_LINE;
3857
3858         if (event)
3859                 return event->funcs->trace(iter, sym_flags, event);
3860
3861         trace_seq_printf(s, "Unknown type %d\n", entry->type);
3862
3863         return trace_handle_return(s);
3864 }
3865
3866 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3867 {
3868         struct trace_array *tr = iter->tr;
3869         struct trace_seq *s = &iter->seq;
3870         struct trace_entry *entry;
3871         struct trace_event *event;
3872
3873         entry = iter->ent;
3874
3875         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3876                 trace_seq_printf(s, "%d %d %llu ",
3877                                  entry->pid, iter->cpu, iter->ts);
3878
3879         if (trace_seq_has_overflowed(s))
3880                 return TRACE_TYPE_PARTIAL_LINE;
3881
3882         event = ftrace_find_event(entry->type);
3883         if (event)
3884                 return event->funcs->raw(iter, 0, event);
3885
3886         trace_seq_printf(s, "%d ?\n", entry->type);
3887
3888         return trace_handle_return(s);
3889 }
3890
3891 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3892 {
3893         struct trace_array *tr = iter->tr;
3894         struct trace_seq *s = &iter->seq;
3895         unsigned char newline = '\n';
3896         struct trace_entry *entry;
3897         struct trace_event *event;
3898
3899         entry = iter->ent;
3900
3901         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3902                 SEQ_PUT_HEX_FIELD(s, entry->pid);
3903                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
3904                 SEQ_PUT_HEX_FIELD(s, iter->ts);
3905                 if (trace_seq_has_overflowed(s))
3906                         return TRACE_TYPE_PARTIAL_LINE;
3907         }
3908
3909         event = ftrace_find_event(entry->type);
3910         if (event) {
3911                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
3912                 if (ret != TRACE_TYPE_HANDLED)
3913                         return ret;
3914         }
3915
3916         SEQ_PUT_FIELD(s, newline);
3917
3918         return trace_handle_return(s);
3919 }
3920
3921 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3922 {
3923         struct trace_array *tr = iter->tr;
3924         struct trace_seq *s = &iter->seq;
3925         struct trace_entry *entry;
3926         struct trace_event *event;
3927
3928         entry = iter->ent;
3929
3930         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3931                 SEQ_PUT_FIELD(s, entry->pid);
3932                 SEQ_PUT_FIELD(s, iter->cpu);
3933                 SEQ_PUT_FIELD(s, iter->ts);
3934                 if (trace_seq_has_overflowed(s))
3935                         return TRACE_TYPE_PARTIAL_LINE;
3936         }
3937
3938         event = ftrace_find_event(entry->type);
3939         return event ? event->funcs->binary(iter, 0, event) :
3940                 TRACE_TYPE_HANDLED;
3941 }
3942
3943 int trace_empty(struct trace_iterator *iter)
3944 {
3945         struct ring_buffer_iter *buf_iter;
3946         int cpu;
3947
3948         /* If we are looking at one CPU buffer, only check that one */
3949         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
3950                 cpu = iter->cpu_file;
3951                 buf_iter = trace_buffer_iter(iter, cpu);
3952                 if (buf_iter) {
3953                         if (!ring_buffer_iter_empty(buf_iter))
3954                                 return 0;
3955                 } else {
3956                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
3957                                 return 0;
3958                 }
3959                 return 1;
3960         }
3961
3962         for_each_tracing_cpu(cpu) {
3963                 buf_iter = trace_buffer_iter(iter, cpu);
3964                 if (buf_iter) {
3965                         if (!ring_buffer_iter_empty(buf_iter))
3966                                 return 0;
3967                 } else {
3968                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
3969                                 return 0;
3970                 }
3971         }
3972
3973         return 1;
3974 }
3975
3976 /*  Called with trace_event_read_lock() held. */
3977 enum print_line_t print_trace_line(struct trace_iterator *iter)
3978 {
3979         struct trace_array *tr = iter->tr;
3980         unsigned long trace_flags = tr->trace_flags;
3981         enum print_line_t ret;
3982
3983         if (iter->lost_events) {
3984                 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
3985                                  iter->cpu, iter->lost_events);
3986                 if (trace_seq_has_overflowed(&iter->seq))
3987                         return TRACE_TYPE_PARTIAL_LINE;
3988         }
3989
3990         if (iter->trace && iter->trace->print_line) {
3991                 ret = iter->trace->print_line(iter);
3992                 if (ret != TRACE_TYPE_UNHANDLED)
3993                         return ret;
3994         }
3995
3996         if (iter->ent->type == TRACE_BPUTS &&
3997                         trace_flags & TRACE_ITER_PRINTK &&
3998                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3999                 return trace_print_bputs_msg_only(iter);
4000
4001         if (iter->ent->type == TRACE_BPRINT &&
4002                         trace_flags & TRACE_ITER_PRINTK &&
4003                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4004                 return trace_print_bprintk_msg_only(iter);
4005
4006         if (iter->ent->type == TRACE_PRINT &&
4007                         trace_flags & TRACE_ITER_PRINTK &&
4008                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4009                 return trace_print_printk_msg_only(iter);
4010
4011         if (trace_flags & TRACE_ITER_BIN)
4012                 return print_bin_fmt(iter);
4013
4014         if (trace_flags & TRACE_ITER_HEX)
4015                 return print_hex_fmt(iter);
4016
4017         if (trace_flags & TRACE_ITER_RAW)
4018                 return print_raw_fmt(iter);
4019
4020         return print_trace_fmt(iter);
4021 }
4022
4023 void trace_latency_header(struct seq_file *m)
4024 {
4025         struct trace_iterator *iter = m->private;
4026         struct trace_array *tr = iter->tr;
4027
4028         /* print nothing if the buffers are empty */
4029         if (trace_empty(iter))
4030                 return;
4031
4032         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4033                 print_trace_header(m, iter);
4034
4035         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4036                 print_lat_help_header(m);
4037 }
4038
4039 void trace_default_header(struct seq_file *m)
4040 {
4041         struct trace_iterator *iter = m->private;
4042         struct trace_array *tr = iter->tr;
4043         unsigned long trace_flags = tr->trace_flags;
4044
4045         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4046                 return;
4047
4048         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4049                 /* print nothing if the buffers are empty */
4050                 if (trace_empty(iter))
4051                         return;
4052                 print_trace_header(m, iter);
4053                 if (!(trace_flags & TRACE_ITER_VERBOSE))
4054                         print_lat_help_header(m);
4055         } else {
4056                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4057                         if (trace_flags & TRACE_ITER_IRQ_INFO)
4058                                 print_func_help_header_irq(iter->array_buffer,
4059                                                            m, trace_flags);
4060                         else
4061                                 print_func_help_header(iter->array_buffer, m,
4062                                                        trace_flags);
4063                 }
4064         }
4065 }
4066
4067 static void test_ftrace_alive(struct seq_file *m)
4068 {
4069         if (!ftrace_is_dead())
4070                 return;
4071         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4072                     "#          MAY BE MISSING FUNCTION EVENTS\n");
4073 }
4074
4075 #ifdef CONFIG_TRACER_MAX_TRACE
4076 static void show_snapshot_main_help(struct seq_file *m)
4077 {
4078         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4079                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4080                     "#                      Takes a snapshot of the main buffer.\n"
4081                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4082                     "#                      (Doesn't have to be '2' works with any number that\n"
4083                     "#                       is not a '0' or '1')\n");
4084 }
4085
4086 static void show_snapshot_percpu_help(struct seq_file *m)
4087 {
4088         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4089 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4090         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4091                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
4092 #else
4093         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4094                     "#                     Must use main snapshot file to allocate.\n");
4095 #endif
4096         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4097                     "#                      (Doesn't have to be '2' works with any number that\n"
4098                     "#                       is not a '0' or '1')\n");
4099 }
4100
4101 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4102 {
4103         if (iter->tr->allocated_snapshot)
4104                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4105         else
4106                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4107
4108         seq_puts(m, "# Snapshot commands:\n");
4109         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4110                 show_snapshot_main_help(m);
4111         else
4112                 show_snapshot_percpu_help(m);
4113 }
4114 #else
4115 /* Should never be called */
4116 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4117 #endif
4118
4119 static int s_show(struct seq_file *m, void *v)
4120 {
4121         struct trace_iterator *iter = v;
4122         int ret;
4123
4124         if (iter->ent == NULL) {
4125                 if (iter->tr) {
4126                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
4127                         seq_puts(m, "#\n");
4128                         test_ftrace_alive(m);
4129                 }
4130                 if (iter->snapshot && trace_empty(iter))
4131                         print_snapshot_help(m, iter);
4132                 else if (iter->trace && iter->trace->print_header)
4133                         iter->trace->print_header(m);
4134                 else
4135                         trace_default_header(m);
4136
4137         } else if (iter->leftover) {
4138                 /*
4139                  * If we filled the seq_file buffer earlier, we
4140                  * want to just show it now.
4141                  */
4142                 ret = trace_print_seq(m, &iter->seq);
4143
4144                 /* ret should this time be zero, but you never know */
4145                 iter->leftover = ret;
4146
4147         } else {
4148                 print_trace_line(iter);
4149                 ret = trace_print_seq(m, &iter->seq);
4150                 /*
4151                  * If we overflow the seq_file buffer, then it will
4152                  * ask us for this data again at start up.
4153                  * Use that instead.
4154                  *  ret is 0 if seq_file write succeeded.
4155                  *        -1 otherwise.
4156                  */
4157                 iter->leftover = ret;
4158         }
4159
4160         return 0;
4161 }
4162
4163 /*
4164  * Should be used after trace_array_get(), trace_types_lock
4165  * ensures that i_cdev was already initialized.
4166  */
4167 static inline int tracing_get_cpu(struct inode *inode)
4168 {
4169         if (inode->i_cdev) /* See trace_create_cpu_file() */
4170                 return (long)inode->i_cdev - 1;
4171         return RING_BUFFER_ALL_CPUS;
4172 }
4173
4174 static const struct seq_operations tracer_seq_ops = {
4175         .start          = s_start,
4176         .next           = s_next,
4177         .stop           = s_stop,
4178         .show           = s_show,
4179 };
4180
4181 static struct trace_iterator *
4182 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4183 {
4184         struct trace_array *tr = inode->i_private;
4185         struct trace_iterator *iter;
4186         int cpu;
4187
4188         if (tracing_disabled)
4189                 return ERR_PTR(-ENODEV);
4190
4191         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4192         if (!iter)
4193                 return ERR_PTR(-ENOMEM);
4194
4195         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4196                                     GFP_KERNEL);
4197         if (!iter->buffer_iter)
4198                 goto release;
4199
4200         /*
4201          * We make a copy of the current tracer to avoid concurrent
4202          * changes on it while we are reading.
4203          */
4204         mutex_lock(&trace_types_lock);
4205         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4206         if (!iter->trace)
4207                 goto fail;
4208
4209         *iter->trace = *tr->current_trace;
4210
4211         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4212                 goto fail;
4213
4214         iter->tr = tr;
4215
4216 #ifdef CONFIG_TRACER_MAX_TRACE
4217         /* Currently only the top directory has a snapshot */
4218         if (tr->current_trace->print_max || snapshot)
4219                 iter->array_buffer = &tr->max_buffer;
4220         else
4221 #endif
4222                 iter->array_buffer = &tr->array_buffer;
4223         iter->snapshot = snapshot;
4224         iter->pos = -1;
4225         iter->cpu_file = tracing_get_cpu(inode);
4226         mutex_init(&iter->mutex);
4227
4228         /* Notify the tracer early; before we stop tracing. */
4229         if (iter->trace->open)
4230                 iter->trace->open(iter);
4231
4232         /* Annotate start of buffers if we had overruns */
4233         if (ring_buffer_overruns(iter->array_buffer->buffer))
4234                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
4235
4236         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4237         if (trace_clocks[tr->clock_id].in_ns)
4238                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4239
4240         /* stop the trace while dumping if we are not opening "snapshot" */
4241         if (!iter->snapshot)
4242                 tracing_stop_tr(tr);
4243
4244         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4245                 for_each_tracing_cpu(cpu) {
4246                         iter->buffer_iter[cpu] =
4247                                 ring_buffer_read_prepare(iter->array_buffer->buffer,
4248                                                          cpu, GFP_KERNEL);
4249                 }
4250                 ring_buffer_read_prepare_sync();
4251                 for_each_tracing_cpu(cpu) {
4252                         ring_buffer_read_start(iter->buffer_iter[cpu]);
4253                         tracing_iter_reset(iter, cpu);
4254                 }
4255         } else {
4256                 cpu = iter->cpu_file;
4257                 iter->buffer_iter[cpu] =
4258                         ring_buffer_read_prepare(iter->array_buffer->buffer,
4259                                                  cpu, GFP_KERNEL);
4260                 ring_buffer_read_prepare_sync();
4261                 ring_buffer_read_start(iter->buffer_iter[cpu]);
4262                 tracing_iter_reset(iter, cpu);
4263         }
4264
4265         mutex_unlock(&trace_types_lock);
4266
4267         return iter;
4268
4269  fail:
4270         mutex_unlock(&trace_types_lock);
4271         kfree(iter->trace);
4272         kfree(iter->buffer_iter);
4273 release:
4274         seq_release_private(inode, file);
4275         return ERR_PTR(-ENOMEM);
4276 }
4277
4278 int tracing_open_generic(struct inode *inode, struct file *filp)
4279 {
4280         int ret;
4281
4282         ret = tracing_check_open_get_tr(NULL);
4283         if (ret)
4284                 return ret;
4285
4286         filp->private_data = inode->i_private;
4287         return 0;
4288 }
4289
4290 bool tracing_is_disabled(void)
4291 {
4292         return (tracing_disabled) ? true: false;
4293 }
4294
4295 /*
4296  * Open and update trace_array ref count.
4297  * Must have the current trace_array passed to it.
4298  */
4299 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4300 {
4301         struct trace_array *tr = inode->i_private;
4302         int ret;
4303
4304         ret = tracing_check_open_get_tr(tr);
4305         if (ret)
4306                 return ret;
4307
4308         filp->private_data = inode->i_private;
4309
4310         return 0;
4311 }
4312
4313 static int tracing_release(struct inode *inode, struct file *file)
4314 {
4315         struct trace_array *tr = inode->i_private;
4316         struct seq_file *m = file->private_data;
4317         struct trace_iterator *iter;
4318         int cpu;
4319
4320         if (!(file->f_mode & FMODE_READ)) {
4321                 trace_array_put(tr);
4322                 return 0;
4323         }
4324
4325         /* Writes do not use seq_file */
4326         iter = m->private;
4327         mutex_lock(&trace_types_lock);
4328
4329         for_each_tracing_cpu(cpu) {
4330                 if (iter->buffer_iter[cpu])
4331                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
4332         }
4333
4334         if (iter->trace && iter->trace->close)
4335                 iter->trace->close(iter);
4336
4337         if (!iter->snapshot)
4338                 /* reenable tracing if it was previously enabled */
4339                 tracing_start_tr(tr);
4340
4341         __trace_array_put(tr);
4342
4343         mutex_unlock(&trace_types_lock);
4344
4345         mutex_destroy(&iter->mutex);
4346         free_cpumask_var(iter->started);
4347         kfree(iter->trace);
4348         kfree(iter->buffer_iter);
4349         seq_release_private(inode, file);
4350
4351         return 0;
4352 }
4353
4354 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4355 {
4356         struct trace_array *tr = inode->i_private;
4357
4358         trace_array_put(tr);
4359         return 0;
4360 }
4361
4362 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4363 {
4364         struct trace_array *tr = inode->i_private;
4365
4366         trace_array_put(tr);
4367
4368         return single_release(inode, file);
4369 }
4370
4371 static int tracing_open(struct inode *inode, struct file *file)
4372 {
4373         struct trace_array *tr = inode->i_private;
4374         struct trace_iterator *iter;
4375         int ret;
4376
4377         ret = tracing_check_open_get_tr(tr);
4378         if (ret)
4379                 return ret;
4380
4381         /* If this file was open for write, then erase contents */
4382         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4383                 int cpu = tracing_get_cpu(inode);
4384                 struct array_buffer *trace_buf = &tr->array_buffer;
4385
4386 #ifdef CONFIG_TRACER_MAX_TRACE
4387                 if (tr->current_trace->print_max)
4388                         trace_buf = &tr->max_buffer;
4389 #endif
4390
4391                 if (cpu == RING_BUFFER_ALL_CPUS)
4392                         tracing_reset_online_cpus(trace_buf);
4393                 else
4394                         tracing_reset_cpu(trace_buf, cpu);
4395         }
4396
4397         if (file->f_mode & FMODE_READ) {
4398                 iter = __tracing_open(inode, file, false);
4399                 if (IS_ERR(iter))
4400                         ret = PTR_ERR(iter);
4401                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4402                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
4403         }
4404
4405         if (ret < 0)
4406                 trace_array_put(tr);
4407
4408         return ret;
4409 }
4410
4411 /*
4412  * Some tracers are not suitable for instance buffers.
4413  * A tracer is always available for the global array (toplevel)
4414  * or if it explicitly states that it is.
4415  */
4416 static bool
4417 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4418 {
4419         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4420 }
4421
4422 /* Find the next tracer that this trace array may use */
4423 static struct tracer *
4424 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4425 {
4426         while (t && !trace_ok_for_array(t, tr))
4427                 t = t->next;
4428
4429         return t;
4430 }
4431
4432 static void *
4433 t_next(struct seq_file *m, void *v, loff_t *pos)
4434 {
4435         struct trace_array *tr = m->private;
4436         struct tracer *t = v;
4437
4438         (*pos)++;
4439
4440         if (t)
4441                 t = get_tracer_for_array(tr, t->next);
4442
4443         return t;
4444 }
4445
4446 static void *t_start(struct seq_file *m, loff_t *pos)
4447 {
4448         struct trace_array *tr = m->private;
4449         struct tracer *t;
4450         loff_t l = 0;
4451
4452         mutex_lock(&trace_types_lock);
4453
4454         t = get_tracer_for_array(tr, trace_types);
4455         for (; t && l < *pos; t = t_next(m, t, &l))
4456                         ;
4457
4458         return t;
4459 }
4460
4461 static void t_stop(struct seq_file *m, void *p)
4462 {
4463         mutex_unlock(&trace_types_lock);
4464 }
4465
4466 static int t_show(struct seq_file *m, void *v)
4467 {
4468         struct tracer *t = v;
4469
4470         if (!t)
4471                 return 0;
4472
4473         seq_puts(m, t->name);
4474         if (t->next)
4475                 seq_putc(m, ' ');
4476         else
4477                 seq_putc(m, '\n');
4478
4479         return 0;
4480 }
4481
4482 static const struct seq_operations show_traces_seq_ops = {
4483         .start          = t_start,
4484         .next           = t_next,
4485         .stop           = t_stop,
4486         .show           = t_show,
4487 };
4488
4489 static int show_traces_open(struct inode *inode, struct file *file)
4490 {
4491         struct trace_array *tr = inode->i_private;
4492         struct seq_file *m;
4493         int ret;
4494
4495         ret = tracing_check_open_get_tr(tr);
4496         if (ret)
4497                 return ret;
4498
4499         ret = seq_open(file, &show_traces_seq_ops);
4500         if (ret) {
4501                 trace_array_put(tr);
4502                 return ret;
4503         }
4504
4505         m = file->private_data;
4506         m->private = tr;
4507
4508         return 0;
4509 }
4510
4511 static int show_traces_release(struct inode *inode, struct file *file)
4512 {
4513         struct trace_array *tr = inode->i_private;
4514
4515         trace_array_put(tr);
4516         return seq_release(inode, file);
4517 }
4518
4519 static ssize_t
4520 tracing_write_stub(struct file *filp, const char __user *ubuf,
4521                    size_t count, loff_t *ppos)
4522 {
4523         return count;
4524 }
4525
4526 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4527 {
4528         int ret;
4529
4530         if (file->f_mode & FMODE_READ)
4531                 ret = seq_lseek(file, offset, whence);
4532         else
4533                 file->f_pos = ret = 0;
4534
4535         return ret;
4536 }
4537
4538 static const struct file_operations tracing_fops = {
4539         .open           = tracing_open,
4540         .read           = seq_read,
4541         .write          = tracing_write_stub,
4542         .llseek         = tracing_lseek,
4543         .release        = tracing_release,
4544 };
4545
4546 static const struct file_operations show_traces_fops = {
4547         .open           = show_traces_open,
4548         .read           = seq_read,
4549         .llseek         = seq_lseek,
4550         .release        = show_traces_release,
4551 };
4552
4553 static ssize_t
4554 tracing_cpumask_read(struct file *filp, char __user *ubuf,
4555                      size_t count, loff_t *ppos)
4556 {
4557         struct trace_array *tr = file_inode(filp)->i_private;
4558         char *mask_str;
4559         int len;
4560
4561         len = snprintf(NULL, 0, "%*pb\n",
4562                        cpumask_pr_args(tr->tracing_cpumask)) + 1;
4563         mask_str = kmalloc(len, GFP_KERNEL);
4564         if (!mask_str)
4565                 return -ENOMEM;
4566
4567         len = snprintf(mask_str, len, "%*pb\n",
4568                        cpumask_pr_args(tr->tracing_cpumask));
4569         if (len >= count) {
4570                 count = -EINVAL;
4571                 goto out_err;
4572         }
4573         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
4574
4575 out_err:
4576         kfree(mask_str);
4577
4578         return count;
4579 }
4580
4581 int tracing_set_cpumask(struct trace_array *tr,
4582                         cpumask_var_t tracing_cpumask_new)
4583 {
4584         int cpu;
4585
4586         if (!tr)
4587                 return -EINVAL;
4588
4589         local_irq_disable();
4590         arch_spin_lock(&tr->max_lock);
4591         for_each_tracing_cpu(cpu) {
4592                 /*
4593                  * Increase/decrease the disabled counter if we are
4594                  * about to flip a bit in the cpumask:
4595                  */
4596                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4597                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4598                         atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
4599                         ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
4600                 }
4601                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4602                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4603                         atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
4604                         ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
4605                 }
4606         }
4607         arch_spin_unlock(&tr->max_lock);
4608         local_irq_enable();
4609
4610         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4611
4612         return 0;
4613 }
4614
4615 static ssize_t
4616 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4617                       size_t count, loff_t *ppos)
4618 {
4619         struct trace_array *tr = file_inode(filp)->i_private;
4620         cpumask_var_t tracing_cpumask_new;
4621         int err;
4622
4623         if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
4624                 return -ENOMEM;
4625
4626         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
4627         if (err)
4628                 goto err_free;
4629
4630         err = tracing_set_cpumask(tr, tracing_cpumask_new);
4631         if (err)
4632                 goto err_free;
4633
4634         free_cpumask_var(tracing_cpumask_new);
4635
4636         return count;
4637
4638 err_free:
4639         free_cpumask_var(tracing_cpumask_new);
4640
4641         return err;
4642 }
4643
4644 static const struct file_operations tracing_cpumask_fops = {
4645         .open           = tracing_open_generic_tr,
4646         .read           = tracing_cpumask_read,
4647         .write          = tracing_cpumask_write,
4648         .release        = tracing_release_generic_tr,
4649         .llseek         = generic_file_llseek,
4650 };
4651
4652 static int tracing_trace_options_show(struct seq_file *m, void *v)
4653 {
4654         struct tracer_opt *trace_opts;
4655         struct trace_array *tr = m->private;
4656         u32 tracer_flags;
4657         int i;
4658
4659         mutex_lock(&trace_types_lock);
4660         tracer_flags = tr->current_trace->flags->val;
4661         trace_opts = tr->current_trace->flags->opts;
4662
4663         for (i = 0; trace_options[i]; i++) {
4664                 if (tr->trace_flags & (1 << i))
4665                         seq_printf(m, "%s\n", trace_options[i]);
4666                 else
4667                         seq_printf(m, "no%s\n", trace_options[i]);
4668         }
4669
4670         for (i = 0; trace_opts[i].name; i++) {
4671                 if (tracer_flags & trace_opts[i].bit)
4672                         seq_printf(m, "%s\n", trace_opts[i].name);
4673                 else
4674                         seq_printf(m, "no%s\n", trace_opts[i].name);
4675         }
4676         mutex_unlock(&trace_types_lock);
4677
4678         return 0;
4679 }
4680
4681 static int __set_tracer_option(struct trace_array *tr,
4682                                struct tracer_flags *tracer_flags,
4683                                struct tracer_opt *opts, int neg)
4684 {
4685         struct tracer *trace = tracer_flags->trace;
4686         int ret;
4687
4688         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4689         if (ret)
4690                 return ret;
4691
4692         if (neg)
4693                 tracer_flags->val &= ~opts->bit;
4694         else
4695                 tracer_flags->val |= opts->bit;
4696         return 0;
4697 }
4698
4699 /* Try to assign a tracer specific option */
4700 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4701 {
4702         struct tracer *trace = tr->current_trace;
4703         struct tracer_flags *tracer_flags = trace->flags;
4704         struct tracer_opt *opts = NULL;
4705         int i;
4706
4707         for (i = 0; tracer_flags->opts[i].name; i++) {
4708                 opts = &tracer_flags->opts[i];
4709
4710                 if (strcmp(cmp, opts->name) == 0)
4711                         return __set_tracer_option(tr, trace->flags, opts, neg);
4712         }
4713
4714         return -EINVAL;
4715 }
4716
4717 /* Some tracers require overwrite to stay enabled */
4718 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
4719 {
4720         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
4721                 return -1;
4722
4723         return 0;
4724 }
4725
4726 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
4727 {
4728         if ((mask == TRACE_ITER_RECORD_TGID) ||
4729             (mask == TRACE_ITER_RECORD_CMD))
4730                 lockdep_assert_held(&event_mutex);
4731
4732         /* do nothing if flag is already set */
4733         if (!!(tr->trace_flags & mask) == !!enabled)
4734                 return 0;
4735
4736         /* Give the tracer a chance to approve the change */
4737         if (tr->current_trace->flag_changed)
4738                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4739                         return -EINVAL;
4740
4741         if (enabled)
4742                 tr->trace_flags |= mask;
4743         else
4744                 tr->trace_flags &= ~mask;
4745
4746         if (mask == TRACE_ITER_RECORD_CMD)
4747                 trace_event_enable_cmd_record(enabled);
4748
4749         if (mask == TRACE_ITER_RECORD_TGID) {
4750                 if (!tgid_map)
4751                         tgid_map = kvcalloc(PID_MAX_DEFAULT + 1,
4752                                            sizeof(*tgid_map),
4753                                            GFP_KERNEL);
4754                 if (!tgid_map) {
4755                         tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
4756                         return -ENOMEM;
4757                 }
4758
4759                 trace_event_enable_tgid_record(enabled);
4760         }
4761
4762         if (mask == TRACE_ITER_EVENT_FORK)
4763                 trace_event_follow_fork(tr, enabled);
4764
4765         if (mask == TRACE_ITER_FUNC_FORK)
4766                 ftrace_pid_follow_fork(tr, enabled);
4767
4768         if (mask == TRACE_ITER_OVERWRITE) {
4769                 ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
4770 #ifdef CONFIG_TRACER_MAX_TRACE
4771                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
4772 #endif
4773         }
4774
4775         if (mask == TRACE_ITER_PRINTK) {
4776                 trace_printk_start_stop_comm(enabled);
4777                 trace_printk_control(enabled);
4778         }
4779
4780         return 0;
4781 }
4782
4783 int trace_set_options(struct trace_array *tr, char *option)
4784 {
4785         char *cmp;
4786         int neg = 0;
4787         int ret;
4788         size_t orig_len = strlen(option);
4789         int len;
4790
4791         cmp = strstrip(option);
4792
4793         len = str_has_prefix(cmp, "no");
4794         if (len)
4795                 neg = 1;
4796
4797         cmp += len;
4798
4799         mutex_lock(&event_mutex);
4800         mutex_lock(&trace_types_lock);
4801
4802         ret = match_string(trace_options, -1, cmp);
4803         /* If no option could be set, test the specific tracer options */
4804         if (ret < 0)
4805                 ret = set_tracer_option(tr, cmp, neg);
4806         else
4807                 ret = set_tracer_flag(tr, 1 << ret, !neg);
4808
4809         mutex_unlock(&trace_types_lock);
4810         mutex_unlock(&event_mutex);
4811
4812         /*
4813          * If the first trailing whitespace is replaced with '\0' by strstrip,
4814          * turn it back into a space.
4815          */
4816         if (orig_len > strlen(option))
4817                 option[strlen(option)] = ' ';
4818
4819         return ret;
4820 }
4821
4822 static void __init apply_trace_boot_options(void)
4823 {
4824         char *buf = trace_boot_options_buf;
4825         char *option;
4826
4827         while (true) {
4828                 option = strsep(&buf, ",");
4829
4830                 if (!option)
4831                         break;
4832
4833                 if (*option)
4834                         trace_set_options(&global_trace, option);
4835
4836                 /* Put back the comma to allow this to be called again */
4837                 if (buf)
4838                         *(buf - 1) = ',';
4839         }
4840 }
4841
4842 static ssize_t
4843 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4844                         size_t cnt, loff_t *ppos)
4845 {
4846         struct seq_file *m = filp->private_data;
4847         struct trace_array *tr = m->private;
4848         char buf[64];
4849         int ret;
4850
4851         if (cnt >= sizeof(buf))
4852                 return -EINVAL;
4853
4854         if (copy_from_user(buf, ubuf, cnt))
4855                 return -EFAULT;
4856
4857         buf[cnt] = 0;
4858
4859         ret = trace_set_options(tr, buf);
4860         if (ret < 0)
4861                 return ret;
4862
4863         *ppos += cnt;
4864
4865         return cnt;
4866 }
4867
4868 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4869 {
4870         struct trace_array *tr = inode->i_private;
4871         int ret;
4872
4873         ret = tracing_check_open_get_tr(tr);
4874         if (ret)
4875                 return ret;
4876
4877         ret = single_open(file, tracing_trace_options_show, inode->i_private);
4878         if (ret < 0)
4879                 trace_array_put(tr);
4880
4881         return ret;
4882 }
4883
4884 static const struct file_operations tracing_iter_fops = {
4885         .open           = tracing_trace_options_open,
4886         .read           = seq_read,
4887         .llseek         = seq_lseek,
4888         .release        = tracing_single_release_tr,
4889         .write          = tracing_trace_options_write,
4890 };
4891
4892 static const char readme_msg[] =
4893         "tracing mini-HOWTO:\n\n"
4894         "# echo 0 > tracing_on : quick way to disable tracing\n"
4895         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4896         " Important files:\n"
4897         "  trace\t\t\t- The static contents of the buffer\n"
4898         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
4899         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4900         "  current_tracer\t- function and latency tracers\n"
4901         "  available_tracers\t- list of configured tracers for current_tracer\n"
4902         "  error_log\t- error log for failed commands (that support it)\n"
4903         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
4904         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
4905         "  trace_clock\t\t-change the clock used to order events\n"
4906         "       local:   Per cpu clock but may not be synced across CPUs\n"
4907         "      global:   Synced across CPUs but slows tracing down.\n"
4908         "     counter:   Not a clock, but just an increment\n"
4909         "      uptime:   Jiffy counter from time of boot\n"
4910         "        perf:   Same clock that perf events use\n"
4911 #ifdef CONFIG_X86_64
4912         "     x86-tsc:   TSC cycle counter\n"
4913 #endif
4914         "\n  timestamp_mode\t-view the mode used to timestamp events\n"
4915         "       delta:   Delta difference against a buffer-wide timestamp\n"
4916         "    absolute:   Absolute (standalone) timestamp\n"
4917         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4918         "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
4919         "  tracing_cpumask\t- Limit which CPUs to trace\n"
4920         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4921         "\t\t\t  Remove sub-buffer with rmdir\n"
4922         "  trace_options\t\t- Set format or modify how tracing happens\n"
4923         "\t\t\t  Disable an option by prefixing 'no' to the\n"
4924         "\t\t\t  option name\n"
4925         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
4926 #ifdef CONFIG_DYNAMIC_FTRACE
4927         "\n  available_filter_functions - list of functions that can be filtered on\n"
4928         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
4929         "\t\t\t  functions\n"
4930         "\t     accepts: func_full_name or glob-matching-pattern\n"
4931         "\t     modules: Can select a group via module\n"
4932         "\t      Format: :mod:<module-name>\n"
4933         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
4934         "\t    triggers: a command to perform when function is hit\n"
4935         "\t      Format: <function>:<trigger>[:count]\n"
4936         "\t     trigger: traceon, traceoff\n"
4937         "\t\t      enable_event:<system>:<event>\n"
4938         "\t\t      disable_event:<system>:<event>\n"
4939 #ifdef CONFIG_STACKTRACE
4940         "\t\t      stacktrace\n"
4941 #endif
4942 #ifdef CONFIG_TRACER_SNAPSHOT
4943         "\t\t      snapshot\n"
4944 #endif
4945         "\t\t      dump\n"
4946         "\t\t      cpudump\n"
4947         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
4948         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
4949         "\t     The first one will disable tracing every time do_fault is hit\n"
4950         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
4951         "\t       The first time do trap is hit and it disables tracing, the\n"
4952         "\t       counter will decrement to 2. If tracing is already disabled,\n"
4953         "\t       the counter will not decrement. It only decrements when the\n"
4954         "\t       trigger did work\n"
4955         "\t     To remove trigger without count:\n"
4956         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
4957         "\t     To remove trigger with a count:\n"
4958         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
4959         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
4960         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4961         "\t    modules: Can select a group via module command :mod:\n"
4962         "\t    Does not accept triggers\n"
4963 #endif /* CONFIG_DYNAMIC_FTRACE */
4964 #ifdef CONFIG_FUNCTION_TRACER
4965         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
4966         "\t\t    (function)\n"
4967 #endif
4968 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
4969         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
4970         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
4971         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
4972 #endif
4973 #ifdef CONFIG_TRACER_SNAPSHOT
4974         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
4975         "\t\t\t  snapshot buffer. Read the contents for more\n"
4976         "\t\t\t  information\n"
4977 #endif
4978 #ifdef CONFIG_STACK_TRACER
4979         "  stack_trace\t\t- Shows the max stack trace when active\n"
4980         "  stack_max_size\t- Shows current max stack size that was traced\n"
4981         "\t\t\t  Write into this file to reset the max size (trigger a\n"
4982         "\t\t\t  new trace)\n"
4983 #ifdef CONFIG_DYNAMIC_FTRACE
4984         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
4985         "\t\t\t  traces\n"
4986 #endif
4987 #endif /* CONFIG_STACK_TRACER */
4988 #ifdef CONFIG_DYNAMIC_EVENTS
4989         "  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
4990         "\t\t\t  Write into this file to define/undefine new trace events.\n"
4991 #endif
4992 #ifdef CONFIG_KPROBE_EVENTS
4993         "  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
4994         "\t\t\t  Write into this file to define/undefine new trace events.\n"
4995 #endif
4996 #ifdef CONFIG_UPROBE_EVENTS
4997         "  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
4998         "\t\t\t  Write into this file to define/undefine new trace events.\n"
4999 #endif
5000 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5001         "\t  accepts: event-definitions (one definition per line)\n"
5002         "\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
5003         "\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
5004 #ifdef CONFIG_HIST_TRIGGERS
5005         "\t           s:[synthetic/]<event> <field> [<field>]\n"
5006 #endif
5007         "\t           -:[<group>/]<event>\n"
5008 #ifdef CONFIG_KPROBE_EVENTS
5009         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5010   "place (kretprobe): [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5011 #endif
5012 #ifdef CONFIG_UPROBE_EVENTS
5013   "   place (uprobe): <path>:<offset>[(ref_ctr_offset)]\n"
5014 #endif
5015         "\t     args: <name>=fetcharg[:type]\n"
5016         "\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
5017 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5018         "\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5019 #else
5020         "\t           $stack<index>, $stack, $retval, $comm,\n"
5021 #endif
5022         "\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5023         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
5024         "\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5025         "\t           <type>\\[<array-size>\\]\n"
5026 #ifdef CONFIG_HIST_TRIGGERS
5027         "\t    field: <stype> <name>;\n"
5028         "\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5029         "\t           [unsigned] char/int/long\n"
5030 #endif
5031 #endif
5032         "  events/\t\t- Directory containing all trace event subsystems:\n"
5033         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5034         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
5035         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5036         "\t\t\t  events\n"
5037         "      filter\t\t- If set, only events passing filter are traced\n"
5038         "  events/<system>/<event>/\t- Directory containing control files for\n"
5039         "\t\t\t  <event>:\n"
5040         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5041         "      filter\t\t- If set, only events passing filter are traced\n"
5042         "      trigger\t\t- If set, a command to perform when event is hit\n"
5043         "\t    Format: <trigger>[:count][if <filter>]\n"
5044         "\t   trigger: traceon, traceoff\n"
5045         "\t            enable_event:<system>:<event>\n"
5046         "\t            disable_event:<system>:<event>\n"
5047 #ifdef CONFIG_HIST_TRIGGERS
5048         "\t            enable_hist:<system>:<event>\n"
5049         "\t            disable_hist:<system>:<event>\n"
5050 #endif
5051 #ifdef CONFIG_STACKTRACE
5052         "\t\t    stacktrace\n"
5053 #endif
5054 #ifdef CONFIG_TRACER_SNAPSHOT
5055         "\t\t    snapshot\n"
5056 #endif
5057 #ifdef CONFIG_HIST_TRIGGERS
5058         "\t\t    hist (see below)\n"
5059 #endif
5060         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5061         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5062         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5063         "\t                  events/block/block_unplug/trigger\n"
5064         "\t   The first disables tracing every time block_unplug is hit.\n"
5065         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5066         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5067         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5068         "\t   Like function triggers, the counter is only decremented if it\n"
5069         "\t    enabled or disabled tracing.\n"
5070         "\t   To remove a trigger without a count:\n"
5071         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
5072         "\t   To remove a trigger with a count:\n"
5073         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5074         "\t   Filters can be ignored when removing a trigger.\n"
5075 #ifdef CONFIG_HIST_TRIGGERS
5076         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5077         "\t    Format: hist:keys=<field1[,field2,...]>\n"
5078         "\t            [:values=<field1[,field2,...]>]\n"
5079         "\t            [:sort=<field1[,field2,...]>]\n"
5080         "\t            [:size=#entries]\n"
5081         "\t            [:pause][:continue][:clear]\n"
5082         "\t            [:name=histname1]\n"
5083         "\t            [:<handler>.<action>]\n"
5084         "\t            [if <filter>]\n\n"
5085         "\t    When a matching event is hit, an entry is added to a hash\n"
5086         "\t    table using the key(s) and value(s) named, and the value of a\n"
5087         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
5088         "\t    correspond to fields in the event's format description.  Keys\n"
5089         "\t    can be any field, or the special string 'stacktrace'.\n"
5090         "\t    Compound keys consisting of up to two fields can be specified\n"
5091         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5092         "\t    fields.  Sort keys consisting of up to two fields can be\n"
5093         "\t    specified using the 'sort' keyword.  The sort direction can\n"
5094         "\t    be modified by appending '.descending' or '.ascending' to a\n"
5095         "\t    sort field.  The 'size' parameter can be used to specify more\n"
5096         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
5097         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
5098         "\t    its histogram data will be shared with other triggers of the\n"
5099         "\t    same name, and trigger hits will update this common data.\n\n"
5100         "\t    Reading the 'hist' file for the event will dump the hash\n"
5101         "\t    table in its entirety to stdout.  If there are multiple hist\n"
5102         "\t    triggers attached to an event, there will be a table for each\n"
5103         "\t    trigger in the output.  The table displayed for a named\n"
5104         "\t    trigger will be the same as any other instance having the\n"
5105         "\t    same name.  The default format used to display a given field\n"
5106         "\t    can be modified by appending any of the following modifiers\n"
5107         "\t    to the field name, as applicable:\n\n"
5108         "\t            .hex        display a number as a hex value\n"
5109         "\t            .sym        display an address as a symbol\n"
5110         "\t            .sym-offset display an address as a symbol and offset\n"
5111         "\t            .execname   display a common_pid as a program name\n"
5112         "\t            .syscall    display a syscall id as a syscall name\n"
5113         "\t            .log2       display log2 value rather than raw number\n"
5114         "\t            .usecs      display a common_timestamp in microseconds\n\n"
5115         "\t    The 'pause' parameter can be used to pause an existing hist\n"
5116         "\t    trigger or to start a hist trigger but not log any events\n"
5117         "\t    until told to do so.  'continue' can be used to start or\n"
5118         "\t    restart a paused hist trigger.\n\n"
5119         "\t    The 'clear' parameter will clear the contents of a running\n"
5120         "\t    hist trigger and leave its current paused/active state\n"
5121         "\t    unchanged.\n\n"
5122         "\t    The enable_hist and disable_hist triggers can be used to\n"
5123         "\t    have one event conditionally start and stop another event's\n"
5124         "\t    already-attached hist trigger.  The syntax is analogous to\n"
5125         "\t    the enable_event and disable_event triggers.\n\n"
5126         "\t    Hist trigger handlers and actions are executed whenever a\n"
5127         "\t    a histogram entry is added or updated.  They take the form:\n\n"
5128         "\t        <handler>.<action>\n\n"
5129         "\t    The available handlers are:\n\n"
5130         "\t        onmatch(matching.event)  - invoke on addition or update\n"
5131         "\t        onmax(var)               - invoke if var exceeds current max\n"
5132         "\t        onchange(var)            - invoke action if var changes\n\n"
5133         "\t    The available actions are:\n\n"
5134         "\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5135         "\t        save(field,...)                      - save current event fields\n"
5136 #ifdef CONFIG_TRACER_SNAPSHOT
5137         "\t        snapshot()                           - snapshot the trace buffer\n"
5138 #endif
5139 #endif
5140 ;
5141
5142 static ssize_t
5143 tracing_readme_read(struct file *filp, char __user *ubuf,
5144                        size_t cnt, loff_t *ppos)
5145 {
5146         return simple_read_from_buffer(ubuf, cnt, ppos,
5147                                         readme_msg, strlen(readme_msg));
5148 }
5149
5150 static const struct file_operations tracing_readme_fops = {
5151         .open           = tracing_open_generic,
5152         .read           = tracing_readme_read,
5153         .llseek         = generic_file_llseek,
5154 };
5155
5156 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5157 {
5158         int *ptr = v;
5159
5160         if (*pos || m->count)
5161                 ptr++;
5162
5163         (*pos)++;
5164
5165         for (; ptr <= &tgid_map[PID_MAX_DEFAULT]; ptr++) {
5166                 if (trace_find_tgid(*ptr))
5167                         return ptr;
5168         }
5169
5170         return NULL;
5171 }
5172
5173 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5174 {
5175         void *v;
5176         loff_t l = 0;
5177
5178         if (!tgid_map)
5179                 return NULL;
5180
5181         v = &tgid_map[0];
5182         while (l <= *pos) {
5183                 v = saved_tgids_next(m, v, &l);
5184                 if (!v)
5185                         return NULL;
5186         }
5187
5188         return v;
5189 }
5190
5191 static void saved_tgids_stop(struct seq_file *m, void *v)
5192 {
5193 }
5194
5195 static int saved_tgids_show(struct seq_file *m, void *v)
5196 {
5197         int pid = (int *)v - tgid_map;
5198
5199         seq_printf(m, "%d %d\n", pid, trace_find_tgid(pid));
5200         return 0;
5201 }
5202
5203 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5204         .start          = saved_tgids_start,
5205         .stop           = saved_tgids_stop,
5206         .next           = saved_tgids_next,
5207         .show           = saved_tgids_show,
5208 };
5209
5210 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5211 {
5212         int ret;
5213
5214         ret = tracing_check_open_get_tr(NULL);
5215         if (ret)
5216                 return ret;
5217
5218         return seq_open(filp, &tracing_saved_tgids_seq_ops);
5219 }
5220
5221
5222 static const struct file_operations tracing_saved_tgids_fops = {
5223         .open           = tracing_saved_tgids_open,
5224         .read           = seq_read,
5225         .llseek         = seq_lseek,
5226         .release        = seq_release,
5227 };
5228
5229 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5230 {
5231         unsigned int *ptr = v;
5232
5233         if (*pos || m->count)
5234                 ptr++;
5235
5236         (*pos)++;
5237
5238         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5239              ptr++) {
5240                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5241                         continue;
5242
5243                 return ptr;
5244         }
5245
5246         return NULL;
5247 }
5248
5249 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5250 {
5251         void *v;
5252         loff_t l = 0;
5253
5254         preempt_disable();
5255         arch_spin_lock(&trace_cmdline_lock);
5256
5257         v = &savedcmd->map_cmdline_to_pid[0];
5258         while (l <= *pos) {
5259                 v = saved_cmdlines_next(m, v, &l);
5260                 if (!v)
5261                         return NULL;
5262         }
5263
5264         return v;
5265 }
5266
5267 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5268 {
5269         arch_spin_unlock(&trace_cmdline_lock);
5270         preempt_enable();
5271 }
5272
5273 static int saved_cmdlines_show(struct seq_file *m, void *v)
5274 {
5275         char buf[TASK_COMM_LEN];
5276         unsigned int *pid = v;
5277
5278         __trace_find_cmdline(*pid, buf);
5279         seq_printf(m, "%d %s\n", *pid, buf);
5280         return 0;
5281 }
5282
5283 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5284         .start          = saved_cmdlines_start,
5285         .next           = saved_cmdlines_next,
5286         .stop           = saved_cmdlines_stop,
5287         .show           = saved_cmdlines_show,
5288 };
5289
5290 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5291 {
5292         int ret;
5293
5294         ret = tracing_check_open_get_tr(NULL);
5295         if (ret)
5296                 return ret;
5297
5298         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5299 }
5300
5301 static const struct file_operations tracing_saved_cmdlines_fops = {
5302         .open           = tracing_saved_cmdlines_open,
5303         .read           = seq_read,
5304         .llseek         = seq_lseek,
5305         .release        = seq_release,
5306 };
5307
5308 static ssize_t
5309 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5310                                  size_t cnt, loff_t *ppos)
5311 {
5312         char buf[64];
5313         int r;
5314
5315         arch_spin_lock(&trace_cmdline_lock);
5316         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5317         arch_spin_unlock(&trace_cmdline_lock);
5318
5319         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5320 }
5321
5322 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5323 {
5324         kfree(s->saved_cmdlines);
5325         kfree(s->map_cmdline_to_pid);
5326         kfree(s);
5327 }
5328
5329 static int tracing_resize_saved_cmdlines(unsigned int val)
5330 {
5331         struct saved_cmdlines_buffer *s, *savedcmd_temp;
5332
5333         s = kmalloc(sizeof(*s), GFP_KERNEL);
5334         if (!s)
5335                 return -ENOMEM;
5336
5337         if (allocate_cmdlines_buffer(val, s) < 0) {
5338                 kfree(s);
5339                 return -ENOMEM;
5340         }
5341
5342         arch_spin_lock(&trace_cmdline_lock);
5343         savedcmd_temp = savedcmd;
5344         savedcmd = s;
5345         arch_spin_unlock(&trace_cmdline_lock);
5346         free_saved_cmdlines_buffer(savedcmd_temp);
5347
5348         return 0;
5349 }
5350
5351 static ssize_t
5352 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5353                                   size_t cnt, loff_t *ppos)
5354 {
5355         unsigned long val;
5356         int ret;
5357
5358         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5359         if (ret)
5360                 return ret;
5361
5362         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
5363         if (!val || val > PID_MAX_DEFAULT)
5364                 return -EINVAL;
5365
5366         ret = tracing_resize_saved_cmdlines((unsigned int)val);
5367         if (ret < 0)
5368                 return ret;
5369
5370         *ppos += cnt;
5371
5372         return cnt;
5373 }
5374
5375 static const struct file_operations tracing_saved_cmdlines_size_fops = {
5376         .open           = tracing_open_generic,
5377         .read           = tracing_saved_cmdlines_size_read,
5378         .write          = tracing_saved_cmdlines_size_write,
5379 };
5380
5381 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5382 static union trace_eval_map_item *
5383 update_eval_map(union trace_eval_map_item *ptr)
5384 {
5385         if (!ptr->map.eval_string) {
5386                 if (ptr->tail.next) {
5387                         ptr = ptr->tail.next;
5388                         /* Set ptr to the next real item (skip head) */
5389                         ptr++;
5390                 } else
5391                         return NULL;
5392         }
5393         return ptr;
5394 }
5395
5396 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5397 {
5398         union trace_eval_map_item *ptr = v;
5399
5400         /*
5401          * Paranoid! If ptr points to end, we don't want to increment past it.
5402          * This really should never happen.
5403          */
5404         (*pos)++;
5405         ptr = update_eval_map(ptr);
5406         if (WARN_ON_ONCE(!ptr))
5407                 return NULL;
5408
5409         ptr++;
5410         ptr = update_eval_map(ptr);
5411
5412         return ptr;
5413 }
5414
5415 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5416 {
5417         union trace_eval_map_item *v;
5418         loff_t l = 0;
5419
5420         mutex_lock(&trace_eval_mutex);
5421
5422         v = trace_eval_maps;
5423         if (v)
5424                 v++;
5425
5426         while (v && l < *pos) {
5427                 v = eval_map_next(m, v, &l);
5428         }
5429
5430         return v;
5431 }
5432
5433 static void eval_map_stop(struct seq_file *m, void *v)
5434 {
5435         mutex_unlock(&trace_eval_mutex);
5436 }
5437
5438 static int eval_map_show(struct seq_file *m, void *v)
5439 {
5440         union trace_eval_map_item *ptr = v;
5441
5442         seq_printf(m, "%s %ld (%s)\n",
5443                    ptr->map.eval_string, ptr->map.eval_value,
5444                    ptr->map.system);
5445
5446         return 0;
5447 }
5448
5449 static const struct seq_operations tracing_eval_map_seq_ops = {
5450         .start          = eval_map_start,
5451         .next           = eval_map_next,
5452         .stop           = eval_map_stop,
5453         .show           = eval_map_show,
5454 };
5455
5456 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5457 {
5458         int ret;
5459
5460         ret = tracing_check_open_get_tr(NULL);
5461         if (ret)
5462                 return ret;
5463
5464         return seq_open(filp, &tracing_eval_map_seq_ops);
5465 }
5466
5467 static const struct file_operations tracing_eval_map_fops = {
5468         .open           = tracing_eval_map_open,
5469         .read           = seq_read,
5470         .llseek         = seq_lseek,
5471         .release        = seq_release,
5472 };
5473
5474 static inline union trace_eval_map_item *
5475 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5476 {
5477         /* Return tail of array given the head */
5478         return ptr + ptr->head.length + 1;
5479 }
5480
5481 static void
5482 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5483                            int len)
5484 {
5485         struct trace_eval_map **stop;
5486         struct trace_eval_map **map;
5487         union trace_eval_map_item *map_array;
5488         union trace_eval_map_item *ptr;
5489
5490         stop = start + len;
5491
5492         /*
5493          * The trace_eval_maps contains the map plus a head and tail item,
5494          * where the head holds the module and length of array, and the
5495          * tail holds a pointer to the next list.
5496          */
5497         map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5498         if (!map_array) {
5499                 pr_warn("Unable to allocate trace eval mapping\n");
5500                 return;
5501         }
5502
5503         mutex_lock(&trace_eval_mutex);
5504
5505         if (!trace_eval_maps)
5506                 trace_eval_maps = map_array;
5507         else {
5508                 ptr = trace_eval_maps;
5509                 for (;;) {
5510                         ptr = trace_eval_jmp_to_tail(ptr);
5511                         if (!ptr->tail.next)
5512                                 break;
5513                         ptr = ptr->tail.next;
5514
5515                 }
5516                 ptr->tail.next = map_array;
5517         }
5518         map_array->head.mod = mod;
5519         map_array->head.length = len;
5520         map_array++;
5521
5522         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5523                 map_array->map = **map;
5524                 map_array++;
5525         }
5526         memset(map_array, 0, sizeof(*map_array));
5527
5528         mutex_unlock(&trace_eval_mutex);
5529 }
5530
5531 static void trace_create_eval_file(struct dentry *d_tracer)
5532 {
5533         trace_create_file("eval_map", 0444, d_tracer,
5534                           NULL, &tracing_eval_map_fops);
5535 }
5536
5537 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
5538 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
5539 static inline void trace_insert_eval_map_file(struct module *mod,
5540                               struct trace_eval_map **start, int len) { }
5541 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5542
5543 static void trace_insert_eval_map(struct module *mod,
5544                                   struct trace_eval_map **start, int len)
5545 {
5546         struct trace_eval_map **map;
5547
5548         if (len <= 0)
5549                 return;
5550
5551         map = start;
5552
5553         trace_event_eval_update(map, len);
5554
5555         trace_insert_eval_map_file(mod, start, len);
5556 }
5557
5558 static ssize_t
5559 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5560                        size_t cnt, loff_t *ppos)
5561 {
5562         struct trace_array *tr = filp->private_data;
5563         char buf[MAX_TRACER_SIZE+2];
5564         int r;
5565
5566         mutex_lock(&trace_types_lock);
5567         r = sprintf(buf, "%s\n", tr->current_trace->name);
5568         mutex_unlock(&trace_types_lock);
5569
5570         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5571 }
5572
5573 int tracer_init(struct tracer *t, struct trace_array *tr)
5574 {
5575         tracing_reset_online_cpus(&tr->array_buffer);
5576         return t->init(tr);
5577 }
5578
5579 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
5580 {
5581         int cpu;
5582
5583         for_each_tracing_cpu(cpu)
5584                 per_cpu_ptr(buf->data, cpu)->entries = val;
5585 }
5586
5587 #ifdef CONFIG_TRACER_MAX_TRACE
5588 /* resize @tr's buffer to the size of @size_tr's entries */
5589 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
5590                                         struct array_buffer *size_buf, int cpu_id)
5591 {
5592         int cpu, ret = 0;
5593
5594         if (cpu_id == RING_BUFFER_ALL_CPUS) {
5595                 for_each_tracing_cpu(cpu) {
5596                         ret = ring_buffer_resize(trace_buf->buffer,
5597                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5598                         if (ret < 0)
5599                                 break;
5600                         per_cpu_ptr(trace_buf->data, cpu)->entries =
5601                                 per_cpu_ptr(size_buf->data, cpu)->entries;
5602                 }
5603         } else {
5604                 ret = ring_buffer_resize(trace_buf->buffer,
5605                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5606                 if (ret == 0)
5607                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5608                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
5609         }
5610
5611         return ret;
5612 }
5613 #endif /* CONFIG_TRACER_MAX_TRACE */
5614
5615 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5616                                         unsigned long size, int cpu)
5617 {
5618         int ret;
5619
5620         /*
5621          * If kernel or user changes the size of the ring buffer
5622          * we use the size that was given, and we can forget about
5623          * expanding it later.
5624          */
5625         ring_buffer_expanded = true;
5626
5627         /* May be called before buffers are initialized */
5628         if (!tr->array_buffer.buffer)
5629                 return 0;
5630
5631         ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
5632         if (ret < 0)
5633                 return ret;
5634
5635 #ifdef CONFIG_TRACER_MAX_TRACE
5636         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
5637             !tr->current_trace->use_max_tr)
5638                 goto out;
5639
5640         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5641         if (ret < 0) {
5642                 int r = resize_buffer_duplicate_size(&tr->array_buffer,
5643                                                      &tr->array_buffer, cpu);
5644                 if (r < 0) {
5645                         /*
5646                          * AARGH! We are left with different
5647                          * size max buffer!!!!
5648                          * The max buffer is our "snapshot" buffer.
5649                          * When a tracer needs a snapshot (one of the
5650                          * latency tracers), it swaps the max buffer
5651                          * with the saved snap shot. We succeeded to
5652                          * update the size of the main buffer, but failed to
5653                          * update the size of the max buffer. But when we tried
5654                          * to reset the main buffer to the original size, we
5655                          * failed there too. This is very unlikely to
5656                          * happen, but if it does, warn and kill all
5657                          * tracing.
5658                          */
5659                         WARN_ON(1);
5660                         tracing_disabled = 1;
5661                 }
5662                 return ret;
5663         }
5664
5665         if (cpu == RING_BUFFER_ALL_CPUS)
5666                 set_buffer_entries(&tr->max_buffer, size);
5667         else
5668                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
5669
5670  out:
5671 #endif /* CONFIG_TRACER_MAX_TRACE */
5672
5673         if (cpu == RING_BUFFER_ALL_CPUS)
5674                 set_buffer_entries(&tr->array_buffer, size);
5675         else
5676                 per_cpu_ptr(tr->array_buffer.data, cpu)->entries = size;
5677
5678         return ret;
5679 }
5680
5681 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5682                                   unsigned long size, int cpu_id)
5683 {
5684         int ret = size;
5685
5686         mutex_lock(&trace_types_lock);
5687
5688         if (cpu_id != RING_BUFFER_ALL_CPUS) {
5689                 /* make sure, this cpu is enabled in the mask */
5690                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
5691                         ret = -EINVAL;
5692                         goto out;
5693                 }
5694         }
5695
5696         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
5697         if (ret < 0)
5698                 ret = -ENOMEM;
5699
5700 out:
5701         mutex_unlock(&trace_types_lock);
5702
5703         return ret;
5704 }
5705
5706
5707 /**
5708  * tracing_update_buffers - used by tracing facility to expand ring buffers
5709  *
5710  * To save on memory when the tracing is never used on a system with it
5711  * configured in. The ring buffers are set to a minimum size. But once
5712  * a user starts to use the tracing facility, then they need to grow
5713  * to their default size.
5714  *
5715  * This function is to be called when a tracer is about to be used.
5716  */
5717 int tracing_update_buffers(void)
5718 {
5719         int ret = 0;
5720
5721         mutex_lock(&trace_types_lock);
5722         if (!ring_buffer_expanded)
5723                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
5724                                                 RING_BUFFER_ALL_CPUS);
5725         mutex_unlock(&trace_types_lock);
5726
5727         return ret;
5728 }
5729
5730 struct trace_option_dentry;
5731
5732 static void
5733 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
5734
5735 /*
5736  * Used to clear out the tracer before deletion of an instance.
5737  * Must have trace_types_lock held.
5738  */
5739 static void tracing_set_nop(struct trace_array *tr)
5740 {
5741         if (tr->current_trace == &nop_trace)
5742                 return;
5743         
5744         tr->current_trace->enabled--;
5745
5746         if (tr->current_trace->reset)
5747                 tr->current_trace->reset(tr);
5748
5749         tr->current_trace = &nop_trace;
5750 }
5751
5752 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
5753 {
5754         /* Only enable if the directory has been created already. */
5755         if (!tr->dir)
5756                 return;
5757
5758         create_trace_option_files(tr, t);
5759 }
5760
5761 int tracing_set_tracer(struct trace_array *tr, const char *buf)
5762 {
5763         struct tracer *t;
5764 #ifdef CONFIG_TRACER_MAX_TRACE
5765         bool had_max_tr;
5766 #endif
5767         int ret = 0;
5768
5769         mutex_lock(&trace_types_lock);
5770
5771         if (!ring_buffer_expanded) {
5772                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5773                                                 RING_BUFFER_ALL_CPUS);
5774                 if (ret < 0)
5775                         goto out;
5776                 ret = 0;
5777         }
5778
5779         for (t = trace_types; t; t = t->next) {
5780                 if (strcmp(t->name, buf) == 0)
5781                         break;
5782         }
5783         if (!t) {
5784                 ret = -EINVAL;
5785                 goto out;
5786         }
5787         if (t == tr->current_trace)
5788                 goto out;
5789
5790 #ifdef CONFIG_TRACER_SNAPSHOT
5791         if (t->use_max_tr) {
5792                 arch_spin_lock(&tr->max_lock);
5793                 if (tr->cond_snapshot)
5794                         ret = -EBUSY;
5795                 arch_spin_unlock(&tr->max_lock);
5796                 if (ret)
5797                         goto out;
5798         }
5799 #endif
5800         /* Some tracers won't work on kernel command line */
5801         if (system_state < SYSTEM_RUNNING && t->noboot) {
5802                 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
5803                         t->name);
5804                 goto out;
5805         }
5806
5807         /* Some tracers are only allowed for the top level buffer */
5808         if (!trace_ok_for_array(t, tr)) {
5809                 ret = -EINVAL;
5810                 goto out;
5811         }
5812
5813         /* If trace pipe files are being read, we can't change the tracer */
5814         if (tr->current_trace->ref) {
5815                 ret = -EBUSY;
5816                 goto out;
5817         }
5818
5819         trace_branch_disable();
5820
5821         tr->current_trace->enabled--;
5822
5823         if (tr->current_trace->reset)
5824                 tr->current_trace->reset(tr);
5825
5826         /* Current trace needs to be nop_trace before synchronize_rcu */
5827         tr->current_trace = &nop_trace;
5828
5829 #ifdef CONFIG_TRACER_MAX_TRACE
5830         had_max_tr = tr->allocated_snapshot;
5831
5832         if (had_max_tr && !t->use_max_tr) {
5833                 /*
5834                  * We need to make sure that the update_max_tr sees that
5835                  * current_trace changed to nop_trace to keep it from
5836                  * swapping the buffers after we resize it.
5837                  * The update_max_tr is called from interrupts disabled
5838                  * so a synchronized_sched() is sufficient.
5839                  */
5840                 synchronize_rcu();
5841                 free_snapshot(tr);
5842         }
5843 #endif
5844
5845 #ifdef CONFIG_TRACER_MAX_TRACE
5846         if (t->use_max_tr && !had_max_tr) {
5847                 ret = tracing_alloc_snapshot_instance(tr);
5848                 if (ret < 0)
5849                         goto out;
5850         }
5851 #endif
5852
5853         if (t->init) {
5854                 ret = tracer_init(t, tr);
5855                 if (ret)
5856                         goto out;
5857         }
5858
5859         tr->current_trace = t;
5860         tr->current_trace->enabled++;
5861         trace_branch_enable(tr);
5862  out:
5863         mutex_unlock(&trace_types_lock);
5864
5865         return ret;
5866 }
5867
5868 static ssize_t
5869 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
5870                         size_t cnt, loff_t *ppos)
5871 {
5872         struct trace_array *tr = filp->private_data;
5873         char buf[MAX_TRACER_SIZE+1];
5874         int i;
5875         size_t ret;
5876         int err;
5877
5878         ret = cnt;
5879
5880         if (cnt > MAX_TRACER_SIZE)
5881                 cnt = MAX_TRACER_SIZE;
5882
5883         if (copy_from_user(buf, ubuf, cnt))
5884                 return -EFAULT;
5885
5886         buf[cnt] = 0;
5887
5888         /* strip ending whitespace. */
5889         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
5890                 buf[i] = 0;
5891
5892         err = tracing_set_tracer(tr, buf);
5893         if (err)
5894                 return err;
5895
5896         *ppos += ret;
5897
5898         return ret;
5899 }
5900
5901 static ssize_t
5902 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
5903                    size_t cnt, loff_t *ppos)
5904 {
5905         char buf[64];
5906         int r;
5907
5908         r = snprintf(buf, sizeof(buf), "%ld\n",
5909                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
5910         if (r > sizeof(buf))
5911                 r = sizeof(buf);
5912         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5913 }
5914
5915 static ssize_t
5916 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
5917                     size_t cnt, loff_t *ppos)
5918 {
5919         unsigned long val;
5920         int ret;
5921
5922         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5923         if (ret)
5924                 return ret;
5925
5926         *ptr = val * 1000;
5927
5928         return cnt;
5929 }
5930
5931 static ssize_t
5932 tracing_thresh_read(struct file *filp, char __user *ubuf,
5933                     size_t cnt, loff_t *ppos)
5934 {
5935         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
5936 }
5937
5938 static ssize_t
5939 tracing_thresh_write(struct file *filp, const char __user *ubuf,
5940                      size_t cnt, loff_t *ppos)
5941 {
5942         struct trace_array *tr = filp->private_data;
5943         int ret;
5944
5945         mutex_lock(&trace_types_lock);
5946         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
5947         if (ret < 0)
5948                 goto out;
5949
5950         if (tr->current_trace->update_thresh) {
5951                 ret = tr->current_trace->update_thresh(tr);
5952                 if (ret < 0)
5953                         goto out;
5954         }
5955
5956         ret = cnt;
5957 out:
5958         mutex_unlock(&trace_types_lock);
5959
5960         return ret;
5961 }
5962
5963 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
5964
5965 static ssize_t
5966 tracing_max_lat_read(struct file *filp, char __user *ubuf,
5967                      size_t cnt, loff_t *ppos)
5968 {
5969         return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
5970 }
5971
5972 static ssize_t
5973 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
5974                       size_t cnt, loff_t *ppos)
5975 {
5976         return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
5977 }
5978
5979 #endif
5980
5981 static int tracing_open_pipe(struct inode *inode, struct file *filp)
5982 {
5983         struct trace_array *tr = inode->i_private;
5984         struct trace_iterator *iter;
5985         int ret;
5986
5987         ret = tracing_check_open_get_tr(tr);
5988         if (ret)
5989                 return ret;
5990
5991         mutex_lock(&trace_types_lock);
5992
5993         /* create a buffer to store the information to pass to userspace */
5994         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5995         if (!iter) {
5996                 ret = -ENOMEM;
5997                 __trace_array_put(tr);
5998                 goto out;
5999         }
6000
6001         trace_seq_init(&iter->seq);
6002         iter->trace = tr->current_trace;
6003
6004         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6005                 ret = -ENOMEM;
6006                 goto fail;
6007         }
6008
6009         /* trace pipe does not show start of buffer */
6010         cpumask_setall(iter->started);
6011
6012         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6013                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
6014
6015         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6016         if (trace_clocks[tr->clock_id].in_ns)
6017                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6018
6019         iter->tr = tr;
6020         iter->array_buffer = &tr->array_buffer;
6021         iter->cpu_file = tracing_get_cpu(inode);
6022         mutex_init(&iter->mutex);
6023         filp->private_data = iter;
6024
6025         if (iter->trace->pipe_open)
6026                 iter->trace->pipe_open(iter);
6027
6028         nonseekable_open(inode, filp);
6029
6030         tr->current_trace->ref++;
6031 out:
6032         mutex_unlock(&trace_types_lock);
6033         return ret;
6034
6035 fail:
6036         kfree(iter);
6037         __trace_array_put(tr);
6038         mutex_unlock(&trace_types_lock);
6039         return ret;
6040 }
6041
6042 static int tracing_release_pipe(struct inode *inode, struct file *file)
6043 {
6044         struct trace_iterator *iter = file->private_data;
6045         struct trace_array *tr = inode->i_private;
6046
6047         mutex_lock(&trace_types_lock);
6048
6049         tr->current_trace->ref--;
6050
6051         if (iter->trace->pipe_close)
6052                 iter->trace->pipe_close(iter);
6053
6054         mutex_unlock(&trace_types_lock);
6055
6056         free_cpumask_var(iter->started);
6057         mutex_destroy(&iter->mutex);
6058         kfree(iter);
6059
6060         trace_array_put(tr);
6061
6062         return 0;
6063 }
6064
6065 static __poll_t
6066 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6067 {
6068         struct trace_array *tr = iter->tr;
6069
6070         /* Iterators are static, they should be filled or empty */
6071         if (trace_buffer_iter(iter, iter->cpu_file))
6072                 return EPOLLIN | EPOLLRDNORM;
6073
6074         if (tr->trace_flags & TRACE_ITER_BLOCK)
6075                 /*
6076                  * Always select as readable when in blocking mode
6077                  */
6078                 return EPOLLIN | EPOLLRDNORM;
6079         else
6080                 return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6081                                              filp, poll_table);
6082 }
6083
6084 static __poll_t
6085 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6086 {
6087         struct trace_iterator *iter = filp->private_data;
6088
6089         return trace_poll(iter, filp, poll_table);
6090 }
6091
6092 /* Must be called with iter->mutex held. */
6093 static int tracing_wait_pipe(struct file *filp)
6094 {
6095         struct trace_iterator *iter = filp->private_data;
6096         int ret;
6097
6098         while (trace_empty(iter)) {
6099
6100                 if ((filp->f_flags & O_NONBLOCK)) {
6101                         return -EAGAIN;
6102                 }
6103
6104                 /*
6105                  * We block until we read something and tracing is disabled.
6106                  * We still block if tracing is disabled, but we have never
6107                  * read anything. This allows a user to cat this file, and
6108                  * then enable tracing. But after we have read something,
6109                  * we give an EOF when tracing is again disabled.
6110                  *
6111                  * iter->pos will be 0 if we haven't read anything.
6112                  */
6113                 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6114                         break;
6115
6116                 mutex_unlock(&iter->mutex);
6117
6118                 ret = wait_on_pipe(iter, 0);
6119
6120                 mutex_lock(&iter->mutex);
6121
6122                 if (ret)
6123                         return ret;
6124         }
6125
6126         return 1;
6127 }
6128
6129 /*
6130  * Consumer reader.
6131  */
6132 static ssize_t
6133 tracing_read_pipe(struct file *filp, char __user *ubuf,
6134                   size_t cnt, loff_t *ppos)
6135 {
6136         struct trace_iterator *iter = filp->private_data;
6137         ssize_t sret;
6138
6139         /*
6140          * Avoid more than one consumer on a single file descriptor
6141          * This is just a matter of traces coherency, the ring buffer itself
6142          * is protected.
6143          */
6144         mutex_lock(&iter->mutex);
6145
6146         /* return any leftover data */
6147         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6148         if (sret != -EBUSY)
6149                 goto out;
6150
6151         trace_seq_init(&iter->seq);
6152
6153         if (iter->trace->read) {
6154                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6155                 if (sret)
6156                         goto out;
6157         }
6158
6159 waitagain:
6160         sret = tracing_wait_pipe(filp);
6161         if (sret <= 0)
6162                 goto out;
6163
6164         /* stop when tracing is finished */
6165         if (trace_empty(iter)) {
6166                 sret = 0;
6167                 goto out;
6168         }
6169
6170         if (cnt >= PAGE_SIZE)
6171                 cnt = PAGE_SIZE - 1;
6172
6173         /* reset all but tr, trace, and overruns */
6174         memset(&iter->seq, 0,
6175                sizeof(struct trace_iterator) -
6176                offsetof(struct trace_iterator, seq));
6177         cpumask_clear(iter->started);
6178         trace_seq_init(&iter->seq);
6179         iter->pos = -1;
6180
6181         trace_event_read_lock();
6182         trace_access_lock(iter->cpu_file);
6183         while (trace_find_next_entry_inc(iter) != NULL) {
6184                 enum print_line_t ret;
6185                 int save_len = iter->seq.seq.len;
6186
6187                 ret = print_trace_line(iter);
6188                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6189                         /* don't print partial lines */
6190                         iter->seq.seq.len = save_len;
6191                         break;
6192                 }
6193                 if (ret != TRACE_TYPE_NO_CONSUME)
6194                         trace_consume(iter);
6195
6196                 if (trace_seq_used(&iter->seq) >= cnt)
6197                         break;
6198
6199                 /*
6200                  * Setting the full flag means we reached the trace_seq buffer
6201                  * size and we should leave by partial output condition above.
6202                  * One of the trace_seq_* functions is not used properly.
6203                  */
6204                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6205                           iter->ent->type);
6206         }
6207         trace_access_unlock(iter->cpu_file);
6208         trace_event_read_unlock();
6209
6210         /* Now copy what we have to the user */
6211         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6212         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6213                 trace_seq_init(&iter->seq);
6214
6215         /*
6216          * If there was nothing to send to user, in spite of consuming trace
6217          * entries, go back to wait for more entries.
6218          */
6219         if (sret == -EBUSY)
6220                 goto waitagain;
6221
6222 out:
6223         mutex_unlock(&iter->mutex);
6224
6225         return sret;
6226 }
6227
6228 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6229                                      unsigned int idx)
6230 {
6231         __free_page(spd->pages[idx]);
6232 }
6233
6234 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
6235         .confirm                = generic_pipe_buf_confirm,
6236         .release                = generic_pipe_buf_release,
6237         .steal                  = generic_pipe_buf_steal,
6238         .get                    = generic_pipe_buf_get,
6239 };
6240
6241 static size_t
6242 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6243 {
6244         size_t count;
6245         int save_len;
6246         int ret;
6247
6248         /* Seq buffer is page-sized, exactly what we need. */
6249         for (;;) {
6250                 save_len = iter->seq.seq.len;
6251                 ret = print_trace_line(iter);
6252
6253                 if (trace_seq_has_overflowed(&iter->seq)) {
6254                         iter->seq.seq.len = save_len;
6255                         break;
6256                 }
6257
6258                 /*
6259                  * This should not be hit, because it should only
6260                  * be set if the iter->seq overflowed. But check it
6261                  * anyway to be safe.
6262                  */
6263                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6264                         iter->seq.seq.len = save_len;
6265                         break;
6266                 }
6267
6268                 count = trace_seq_used(&iter->seq) - save_len;
6269                 if (rem < count) {
6270                         rem = 0;
6271                         iter->seq.seq.len = save_len;
6272                         break;
6273                 }
6274
6275                 if (ret != TRACE_TYPE_NO_CONSUME)
6276                         trace_consume(iter);
6277                 rem -= count;
6278                 if (!trace_find_next_entry_inc(iter))   {
6279                         rem = 0;
6280                         iter->ent = NULL;
6281                         break;
6282                 }
6283         }
6284
6285         return rem;
6286 }
6287
6288 static ssize_t tracing_splice_read_pipe(struct file *filp,
6289                                         loff_t *ppos,
6290                                         struct pipe_inode_info *pipe,
6291                                         size_t len,
6292                                         unsigned int flags)
6293 {
6294         struct page *pages_def[PIPE_DEF_BUFFERS];
6295         struct partial_page partial_def[PIPE_DEF_BUFFERS];
6296         struct trace_iterator *iter = filp->private_data;
6297         struct splice_pipe_desc spd = {
6298                 .pages          = pages_def,
6299                 .partial        = partial_def,
6300                 .nr_pages       = 0, /* This gets updated below. */
6301                 .nr_pages_max   = PIPE_DEF_BUFFERS,
6302                 .ops            = &tracing_pipe_buf_ops,
6303                 .spd_release    = tracing_spd_release_pipe,
6304         };
6305         ssize_t ret;
6306         size_t rem;
6307         unsigned int i;
6308
6309         if (splice_grow_spd(pipe, &spd))
6310                 return -ENOMEM;
6311
6312         mutex_lock(&iter->mutex);
6313
6314         if (iter->trace->splice_read) {
6315                 ret = iter->trace->splice_read(iter, filp,
6316                                                ppos, pipe, len, flags);
6317                 if (ret)
6318                         goto out_err;
6319         }
6320
6321         ret = tracing_wait_pipe(filp);
6322         if (ret <= 0)
6323                 goto out_err;
6324
6325         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6326                 ret = -EFAULT;
6327                 goto out_err;
6328         }
6329
6330         trace_event_read_lock();
6331         trace_access_lock(iter->cpu_file);
6332
6333         /* Fill as many pages as possible. */
6334         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6335                 spd.pages[i] = alloc_page(GFP_KERNEL);
6336                 if (!spd.pages[i])
6337                         break;
6338
6339                 rem = tracing_fill_pipe_page(rem, iter);
6340
6341                 /* Copy the data into the page, so we can start over. */
6342                 ret = trace_seq_to_buffer(&iter->seq,
6343                                           page_address(spd.pages[i]),
6344                                           trace_seq_used(&iter->seq));
6345                 if (ret < 0) {
6346                         __free_page(spd.pages[i]);
6347                         break;
6348                 }
6349                 spd.partial[i].offset = 0;
6350                 spd.partial[i].len = trace_seq_used(&iter->seq);
6351
6352                 trace_seq_init(&iter->seq);
6353         }
6354
6355         trace_access_unlock(iter->cpu_file);
6356         trace_event_read_unlock();
6357         mutex_unlock(&iter->mutex);
6358
6359         spd.nr_pages = i;
6360
6361         if (i)
6362                 ret = splice_to_pipe(pipe, &spd);
6363         else
6364                 ret = 0;
6365 out:
6366         splice_shrink_spd(&spd);
6367         return ret;
6368
6369 out_err:
6370         mutex_unlock(&iter->mutex);
6371         goto out;
6372 }
6373
6374 static ssize_t
6375 tracing_entries_read(struct file *filp, char __user *ubuf,
6376                      size_t cnt, loff_t *ppos)
6377 {
6378         struct inode *inode = file_inode(filp);
6379         struct trace_array *tr = inode->i_private;
6380         int cpu = tracing_get_cpu(inode);
6381         char buf[64];
6382         int r = 0;
6383         ssize_t ret;
6384
6385         mutex_lock(&trace_types_lock);
6386
6387         if (cpu == RING_BUFFER_ALL_CPUS) {
6388                 int cpu, buf_size_same;
6389                 unsigned long size;
6390
6391                 size = 0;
6392                 buf_size_same = 1;
6393                 /* check if all cpu sizes are same */
6394                 for_each_tracing_cpu(cpu) {
6395                         /* fill in the size from first enabled cpu */
6396                         if (size == 0)
6397                                 size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6398                         if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6399                                 buf_size_same = 0;
6400                                 break;
6401                         }
6402                 }
6403
6404                 if (buf_size_same) {
6405                         if (!ring_buffer_expanded)
6406                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
6407                                             size >> 10,
6408                                             trace_buf_size >> 10);
6409                         else
6410                                 r = sprintf(buf, "%lu\n", size >> 10);
6411                 } else
6412                         r = sprintf(buf, "X\n");
6413         } else
6414                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
6415
6416         mutex_unlock(&trace_types_lock);
6417
6418         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6419         return ret;
6420 }
6421
6422 static ssize_t
6423 tracing_entries_write(struct file *filp, const char __user *ubuf,
6424                       size_t cnt, loff_t *ppos)
6425 {
6426         struct inode *inode = file_inode(filp);
6427         struct trace_array *tr = inode->i_private;
6428         unsigned long val;
6429         int ret;
6430
6431         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6432         if (ret)
6433                 return ret;
6434
6435         /* must have at least 1 entry */
6436         if (!val)
6437                 return -EINVAL;
6438
6439         /* value is in KB */
6440         val <<= 10;
6441         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6442         if (ret < 0)
6443                 return ret;
6444
6445         *ppos += cnt;
6446
6447         return cnt;
6448 }
6449
6450 static ssize_t
6451 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6452                                 size_t cnt, loff_t *ppos)
6453 {
6454         struct trace_array *tr = filp->private_data;
6455         char buf[64];
6456         int r, cpu;
6457         unsigned long size = 0, expanded_size = 0;
6458
6459         mutex_lock(&trace_types_lock);
6460         for_each_tracing_cpu(cpu) {
6461                 size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
6462                 if (!ring_buffer_expanded)
6463                         expanded_size += trace_buf_size >> 10;
6464         }
6465         if (ring_buffer_expanded)
6466                 r = sprintf(buf, "%lu\n", size);
6467         else
6468                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6469         mutex_unlock(&trace_types_lock);
6470
6471         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6472 }
6473
6474 static ssize_t
6475 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6476                           size_t cnt, loff_t *ppos)
6477 {
6478         /*
6479          * There is no need to read what the user has written, this function
6480          * is just to make sure that there is no error when "echo" is used
6481          */
6482
6483         *ppos += cnt;
6484
6485         return cnt;
6486 }
6487
6488 static int
6489 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6490 {
6491         struct trace_array *tr = inode->i_private;
6492
6493         /* disable tracing ? */
6494         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6495                 tracer_tracing_off(tr);
6496         /* resize the ring buffer to 0 */
6497         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6498
6499         trace_array_put(tr);
6500
6501         return 0;
6502 }
6503
6504 static ssize_t
6505 tracing_mark_write(struct file *filp, const char __user *ubuf,
6506                                         size_t cnt, loff_t *fpos)
6507 {
6508         struct trace_array *tr = filp->private_data;
6509         struct ring_buffer_event *event;
6510         enum event_trigger_type tt = ETT_NONE;
6511         struct trace_buffer *buffer;
6512         struct print_entry *entry;
6513         unsigned long irq_flags;
6514         ssize_t written;
6515         int size;
6516         int len;
6517
6518 /* Used in tracing_mark_raw_write() as well */
6519 #define FAULTED_STR "<faulted>"
6520 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
6521
6522         if (tracing_disabled)
6523                 return -EINVAL;
6524
6525         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6526                 return -EINVAL;
6527
6528         if (cnt > TRACE_BUF_SIZE)
6529                 cnt = TRACE_BUF_SIZE;
6530
6531         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6532
6533         local_save_flags(irq_flags);
6534         size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
6535
6536         /* If less than "<faulted>", then make sure we can still add that */
6537         if (cnt < FAULTED_SIZE)
6538                 size += FAULTED_SIZE - cnt;
6539
6540         buffer = tr->array_buffer.buffer;
6541         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6542                                             irq_flags, preempt_count());
6543         if (unlikely(!event))
6544                 /* Ring buffer disabled, return as if not open for write */
6545                 return -EBADF;
6546
6547         entry = ring_buffer_event_data(event);
6548         entry->ip = _THIS_IP_;
6549
6550         len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
6551         if (len) {
6552                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6553                 cnt = FAULTED_SIZE;
6554                 written = -EFAULT;
6555         } else
6556                 written = cnt;
6557         len = cnt;
6558
6559         if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
6560                 /* do not add \n before testing triggers, but add \0 */
6561                 entry->buf[cnt] = '\0';
6562                 tt = event_triggers_call(tr->trace_marker_file, entry, event);
6563         }
6564
6565         if (entry->buf[cnt - 1] != '\n') {
6566                 entry->buf[cnt] = '\n';
6567                 entry->buf[cnt + 1] = '\0';
6568         } else
6569                 entry->buf[cnt] = '\0';
6570
6571         __buffer_unlock_commit(buffer, event);
6572
6573         if (tt)
6574                 event_triggers_post_call(tr->trace_marker_file, tt);
6575
6576         if (written > 0)
6577                 *fpos += written;
6578
6579         return written;
6580 }
6581
6582 /* Limit it for now to 3K (including tag) */
6583 #define RAW_DATA_MAX_SIZE (1024*3)
6584
6585 static ssize_t
6586 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
6587                                         size_t cnt, loff_t *fpos)
6588 {
6589         struct trace_array *tr = filp->private_data;
6590         struct ring_buffer_event *event;
6591         struct trace_buffer *buffer;
6592         struct raw_data_entry *entry;
6593         unsigned long irq_flags;
6594         ssize_t written;
6595         int size;
6596         int len;
6597
6598 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
6599
6600         if (tracing_disabled)
6601                 return -EINVAL;
6602
6603         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6604                 return -EINVAL;
6605
6606         /* The marker must at least have a tag id */
6607         if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
6608                 return -EINVAL;
6609
6610         if (cnt > TRACE_BUF_SIZE)
6611                 cnt = TRACE_BUF_SIZE;
6612
6613         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6614
6615         local_save_flags(irq_flags);
6616         size = sizeof(*entry) + cnt;
6617         if (cnt < FAULT_SIZE_ID)
6618                 size += FAULT_SIZE_ID - cnt;
6619
6620         buffer = tr->array_buffer.buffer;
6621         event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
6622                                             irq_flags, preempt_count());
6623         if (!event)
6624                 /* Ring buffer disabled, return as if not open for write */
6625                 return -EBADF;
6626
6627         entry = ring_buffer_event_data(event);
6628
6629         len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
6630         if (len) {
6631                 entry->id = -1;
6632                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6633                 written = -EFAULT;
6634         } else
6635                 written = cnt;
6636
6637         __buffer_unlock_commit(buffer, event);
6638
6639         if (written > 0)
6640                 *fpos += written;
6641
6642         return written;
6643 }
6644
6645 static int tracing_clock_show(struct seq_file *m, void *v)
6646 {
6647         struct trace_array *tr = m->private;
6648         int i;
6649
6650         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
6651                 seq_printf(m,
6652                         "%s%s%s%s", i ? " " : "",
6653                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
6654                         i == tr->clock_id ? "]" : "");
6655         seq_putc(m, '\n');
6656
6657         return 0;
6658 }
6659
6660 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
6661 {
6662         int i;
6663
6664         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
6665                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
6666                         break;
6667         }
6668         if (i == ARRAY_SIZE(trace_clocks))
6669                 return -EINVAL;
6670
6671         mutex_lock(&trace_types_lock);
6672
6673         tr->clock_id = i;
6674
6675         ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
6676
6677         /*
6678          * New clock may not be consistent with the previous clock.
6679          * Reset the buffer so that it doesn't have incomparable timestamps.
6680          */
6681         tracing_reset_online_cpus(&tr->array_buffer);
6682
6683 #ifdef CONFIG_TRACER_MAX_TRACE
6684         if (tr->max_buffer.buffer)
6685                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
6686         tracing_reset_online_cpus(&tr->max_buffer);
6687 #endif
6688
6689         mutex_unlock(&trace_types_lock);
6690
6691         return 0;
6692 }
6693
6694 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
6695                                    size_t cnt, loff_t *fpos)
6696 {
6697         struct seq_file *m = filp->private_data;
6698         struct trace_array *tr = m->private;
6699         char buf[64];
6700         const char *clockstr;
6701         int ret;
6702
6703         if (cnt >= sizeof(buf))
6704                 return -EINVAL;
6705
6706         if (copy_from_user(buf, ubuf, cnt))
6707                 return -EFAULT;
6708
6709         buf[cnt] = 0;
6710
6711         clockstr = strstrip(buf);
6712
6713         ret = tracing_set_clock(tr, clockstr);
6714         if (ret)
6715                 return ret;
6716
6717         *fpos += cnt;
6718
6719         return cnt;
6720 }
6721
6722 static int tracing_clock_open(struct inode *inode, struct file *file)
6723 {
6724         struct trace_array *tr = inode->i_private;
6725         int ret;
6726
6727         ret = tracing_check_open_get_tr(tr);
6728         if (ret)
6729                 return ret;
6730
6731         ret = single_open(file, tracing_clock_show, inode->i_private);
6732         if (ret < 0)
6733                 trace_array_put(tr);
6734
6735         return ret;
6736 }
6737
6738 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
6739 {
6740         struct trace_array *tr = m->private;
6741
6742         mutex_lock(&trace_types_lock);
6743
6744         if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
6745                 seq_puts(m, "delta [absolute]\n");
6746         else
6747                 seq_puts(m, "[delta] absolute\n");
6748
6749         mutex_unlock(&trace_types_lock);
6750
6751         return 0;
6752 }
6753
6754 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
6755 {
6756         struct trace_array *tr = inode->i_private;
6757         int ret;
6758
6759         ret = tracing_check_open_get_tr(tr);
6760         if (ret)
6761                 return ret;
6762
6763         ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
6764         if (ret < 0)
6765                 trace_array_put(tr);
6766
6767         return ret;
6768 }
6769
6770 int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs)
6771 {
6772         int ret = 0;
6773
6774         mutex_lock(&trace_types_lock);
6775
6776         if (abs && tr->time_stamp_abs_ref++)
6777                 goto out;
6778
6779         if (!abs) {
6780                 if (WARN_ON_ONCE(!tr->time_stamp_abs_ref)) {
6781                         ret = -EINVAL;
6782                         goto out;
6783                 }
6784
6785                 if (--tr->time_stamp_abs_ref)
6786                         goto out;
6787         }
6788
6789         ring_buffer_set_time_stamp_abs(tr->array_buffer.buffer, abs);
6790
6791 #ifdef CONFIG_TRACER_MAX_TRACE
6792         if (tr->max_buffer.buffer)
6793                 ring_buffer_set_time_stamp_abs(tr->max_buffer.buffer, abs);
6794 #endif
6795  out:
6796         mutex_unlock(&trace_types_lock);
6797
6798         return ret;
6799 }
6800
6801 struct ftrace_buffer_info {
6802         struct trace_iterator   iter;
6803         void                    *spare;
6804         unsigned int            spare_cpu;
6805         unsigned int            read;
6806 };
6807
6808 #ifdef CONFIG_TRACER_SNAPSHOT
6809 static int tracing_snapshot_open(struct inode *inode, struct file *file)
6810 {
6811         struct trace_array *tr = inode->i_private;
6812         struct trace_iterator *iter;
6813         struct seq_file *m;
6814         int ret;
6815
6816         ret = tracing_check_open_get_tr(tr);
6817         if (ret)
6818                 return ret;
6819
6820         if (file->f_mode & FMODE_READ) {
6821                 iter = __tracing_open(inode, file, true);
6822                 if (IS_ERR(iter))
6823                         ret = PTR_ERR(iter);
6824         } else {
6825                 /* Writes still need the seq_file to hold the private data */
6826                 ret = -ENOMEM;
6827                 m = kzalloc(sizeof(*m), GFP_KERNEL);
6828                 if (!m)
6829                         goto out;
6830                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6831                 if (!iter) {
6832                         kfree(m);
6833                         goto out;
6834                 }
6835                 ret = 0;
6836
6837                 iter->tr = tr;
6838                 iter->array_buffer = &tr->max_buffer;
6839                 iter->cpu_file = tracing_get_cpu(inode);
6840                 m->private = iter;
6841                 file->private_data = m;
6842         }
6843 out:
6844         if (ret < 0)
6845                 trace_array_put(tr);
6846
6847         return ret;
6848 }
6849
6850 static ssize_t
6851 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
6852                        loff_t *ppos)
6853 {
6854         struct seq_file *m = filp->private_data;
6855         struct trace_iterator *iter = m->private;
6856         struct trace_array *tr = iter->tr;
6857         unsigned long val;
6858         int ret;
6859
6860         ret = tracing_update_buffers();
6861         if (ret < 0)
6862                 return ret;
6863
6864         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6865         if (ret)
6866                 return ret;
6867
6868         mutex_lock(&trace_types_lock);
6869
6870         if (tr->current_trace->use_max_tr) {
6871                 ret = -EBUSY;
6872                 goto out;
6873         }
6874
6875         arch_spin_lock(&tr->max_lock);
6876         if (tr->cond_snapshot)
6877                 ret = -EBUSY;
6878         arch_spin_unlock(&tr->max_lock);
6879         if (ret)
6880                 goto out;
6881
6882         switch (val) {
6883         case 0:
6884                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6885                         ret = -EINVAL;
6886                         break;
6887                 }
6888                 if (tr->allocated_snapshot)
6889                         free_snapshot(tr);
6890                 break;
6891         case 1:
6892 /* Only allow per-cpu swap if the ring buffer supports it */
6893 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
6894                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6895                         ret = -EINVAL;
6896                         break;
6897                 }
6898 #endif
6899                 if (tr->allocated_snapshot)
6900                         ret = resize_buffer_duplicate_size(&tr->max_buffer,
6901                                         &tr->array_buffer, iter->cpu_file);
6902                 else
6903                         ret = tracing_alloc_snapshot_instance(tr);
6904                 if (ret < 0)
6905                         break;
6906                 local_irq_disable();
6907                 /* Now, we're going to swap */
6908                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6909                         update_max_tr(tr, current, smp_processor_id(), NULL);
6910                 else
6911                         update_max_tr_single(tr, current, iter->cpu_file);
6912                 local_irq_enable();
6913                 break;
6914         default:
6915                 if (tr->allocated_snapshot) {
6916                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6917                                 tracing_reset_online_cpus(&tr->max_buffer);
6918                         else
6919                                 tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
6920                 }
6921                 break;
6922         }
6923
6924         if (ret >= 0) {
6925                 *ppos += cnt;
6926                 ret = cnt;
6927         }
6928 out:
6929         mutex_unlock(&trace_types_lock);
6930         return ret;
6931 }
6932
6933 static int tracing_snapshot_release(struct inode *inode, struct file *file)
6934 {
6935         struct seq_file *m = file->private_data;
6936         int ret;
6937
6938         ret = tracing_release(inode, file);
6939
6940         if (file->f_mode & FMODE_READ)
6941                 return ret;
6942
6943         /* If write only, the seq_file is just a stub */
6944         if (m)
6945                 kfree(m->private);
6946         kfree(m);
6947
6948         return 0;
6949 }
6950
6951 static int tracing_buffers_open(struct inode *inode, struct file *filp);
6952 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
6953                                     size_t count, loff_t *ppos);
6954 static int tracing_buffers_release(struct inode *inode, struct file *file);
6955 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6956                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
6957
6958 static int snapshot_raw_open(struct inode *inode, struct file *filp)
6959 {
6960         struct ftrace_buffer_info *info;
6961         int ret;
6962
6963         /* The following checks for tracefs lockdown */
6964         ret = tracing_buffers_open(inode, filp);
6965         if (ret < 0)
6966                 return ret;
6967
6968         info = filp->private_data;
6969
6970         if (info->iter.trace->use_max_tr) {
6971                 tracing_buffers_release(inode, filp);
6972                 return -EBUSY;
6973         }
6974
6975         info->iter.snapshot = true;
6976         info->iter.array_buffer = &info->iter.tr->max_buffer;
6977
6978         return ret;
6979 }
6980
6981 #endif /* CONFIG_TRACER_SNAPSHOT */
6982
6983
6984 static const struct file_operations tracing_thresh_fops = {
6985         .open           = tracing_open_generic,
6986         .read           = tracing_thresh_read,
6987         .write          = tracing_thresh_write,
6988         .llseek         = generic_file_llseek,
6989 };
6990
6991 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6992 static const struct file_operations tracing_max_lat_fops = {
6993         .open           = tracing_open_generic,
6994         .read           = tracing_max_lat_read,
6995         .write          = tracing_max_lat_write,
6996         .llseek         = generic_file_llseek,
6997 };
6998 #endif
6999
7000 static const struct file_operations set_tracer_fops = {
7001         .open           = tracing_open_generic,
7002         .read           = tracing_set_trace_read,
7003         .write          = tracing_set_trace_write,
7004         .llseek         = generic_file_llseek,
7005 };
7006
7007 static const struct file_operations tracing_pipe_fops = {
7008         .open           = tracing_open_pipe,
7009         .poll           = tracing_poll_pipe,
7010         .read           = tracing_read_pipe,
7011         .splice_read    = tracing_splice_read_pipe,
7012         .release        = tracing_release_pipe,
7013         .llseek         = no_llseek,
7014 };
7015
7016 static const struct file_operations tracing_entries_fops = {
7017         .open           = tracing_open_generic_tr,
7018         .read           = tracing_entries_read,
7019         .write          = tracing_entries_write,
7020         .llseek         = generic_file_llseek,
7021         .release        = tracing_release_generic_tr,
7022 };
7023
7024 static const struct file_operations tracing_total_entries_fops = {
7025         .open           = tracing_open_generic_tr,
7026         .read           = tracing_total_entries_read,
7027         .llseek         = generic_file_llseek,
7028         .release        = tracing_release_generic_tr,
7029 };
7030
7031 static const struct file_operations tracing_free_buffer_fops = {
7032         .open           = tracing_open_generic_tr,
7033         .write          = tracing_free_buffer_write,
7034         .release        = tracing_free_buffer_release,
7035 };
7036
7037 static const struct file_operations tracing_mark_fops = {
7038         .open           = tracing_open_generic_tr,
7039         .write          = tracing_mark_write,
7040         .llseek         = generic_file_llseek,
7041         .release        = tracing_release_generic_tr,
7042 };
7043
7044 static const struct file_operations tracing_mark_raw_fops = {
7045         .open           = tracing_open_generic_tr,
7046         .write          = tracing_mark_raw_write,
7047         .llseek         = generic_file_llseek,
7048         .release        = tracing_release_generic_tr,
7049 };
7050
7051 static const struct file_operations trace_clock_fops = {
7052         .open           = tracing_clock_open,
7053         .read           = seq_read,
7054         .llseek         = seq_lseek,
7055         .release        = tracing_single_release_tr,
7056         .write          = tracing_clock_write,
7057 };
7058
7059 static const struct file_operations trace_time_stamp_mode_fops = {
7060         .open           = tracing_time_stamp_mode_open,
7061         .read           = seq_read,
7062         .llseek         = seq_lseek,
7063         .release        = tracing_single_release_tr,
7064 };
7065
7066 #ifdef CONFIG_TRACER_SNAPSHOT
7067 static const struct file_operations snapshot_fops = {
7068         .open           = tracing_snapshot_open,
7069         .read           = seq_read,
7070         .write          = tracing_snapshot_write,
7071         .llseek         = tracing_lseek,
7072         .release        = tracing_snapshot_release,
7073 };
7074
7075 static const struct file_operations snapshot_raw_fops = {
7076         .open           = snapshot_raw_open,
7077         .read           = tracing_buffers_read,
7078         .release        = tracing_buffers_release,
7079         .splice_read    = tracing_buffers_splice_read,
7080         .llseek         = no_llseek,
7081 };
7082
7083 #endif /* CONFIG_TRACER_SNAPSHOT */
7084
7085 #define TRACING_LOG_ERRS_MAX    8
7086 #define TRACING_LOG_LOC_MAX     128
7087
7088 #define CMD_PREFIX "  Command: "
7089
7090 struct err_info {
7091         const char      **errs; /* ptr to loc-specific array of err strings */
7092         u8              type;   /* index into errs -> specific err string */
7093         u8              pos;    /* MAX_FILTER_STR_VAL = 256 */
7094         u64             ts;
7095 };
7096
7097 struct tracing_log_err {
7098         struct list_head        list;
7099         struct err_info         info;
7100         char                    loc[TRACING_LOG_LOC_MAX]; /* err location */
7101         char                    cmd[MAX_FILTER_STR_VAL]; /* what caused err */
7102 };
7103
7104 static DEFINE_MUTEX(tracing_err_log_lock);
7105
7106 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr)
7107 {
7108         struct tracing_log_err *err;
7109
7110         if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7111                 err = kzalloc(sizeof(*err), GFP_KERNEL);
7112                 if (!err)
7113                         err = ERR_PTR(-ENOMEM);
7114                 tr->n_err_log_entries++;
7115
7116                 return err;
7117         }
7118
7119         err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7120         list_del(&err->list);
7121
7122         return err;
7123 }
7124
7125 /**
7126  * err_pos - find the position of a string within a command for error careting
7127  * @cmd: The tracing command that caused the error
7128  * @str: The string to position the caret at within @cmd
7129  *
7130  * Finds the position of the first occurence of @str within @cmd.  The
7131  * return value can be passed to tracing_log_err() for caret placement
7132  * within @cmd.
7133  *
7134  * Returns the index within @cmd of the first occurence of @str or 0
7135  * if @str was not found.
7136  */
7137 unsigned int err_pos(char *cmd, const char *str)
7138 {
7139         char *found;
7140
7141         if (WARN_ON(!strlen(cmd)))
7142                 return 0;
7143
7144         found = strstr(cmd, str);
7145         if (found)
7146                 return found - cmd;
7147
7148         return 0;
7149 }
7150
7151 /**
7152  * tracing_log_err - write an error to the tracing error log
7153  * @tr: The associated trace array for the error (NULL for top level array)
7154  * @loc: A string describing where the error occurred
7155  * @cmd: The tracing command that caused the error
7156  * @errs: The array of loc-specific static error strings
7157  * @type: The index into errs[], which produces the specific static err string
7158  * @pos: The position the caret should be placed in the cmd
7159  *
7160  * Writes an error into tracing/error_log of the form:
7161  *
7162  * <loc>: error: <text>
7163  *   Command: <cmd>
7164  *              ^
7165  *
7166  * tracing/error_log is a small log file containing the last
7167  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7168  * unless there has been a tracing error, and the error log can be
7169  * cleared and have its memory freed by writing the empty string in
7170  * truncation mode to it i.e. echo > tracing/error_log.
7171  *
7172  * NOTE: the @errs array along with the @type param are used to
7173  * produce a static error string - this string is not copied and saved
7174  * when the error is logged - only a pointer to it is saved.  See
7175  * existing callers for examples of how static strings are typically
7176  * defined for use with tracing_log_err().
7177  */
7178 void tracing_log_err(struct trace_array *tr,
7179                      const char *loc, const char *cmd,
7180                      const char **errs, u8 type, u8 pos)
7181 {
7182         struct tracing_log_err *err;
7183
7184         if (!tr)
7185                 tr = &global_trace;
7186
7187         mutex_lock(&tracing_err_log_lock);
7188         err = get_tracing_log_err(tr);
7189         if (PTR_ERR(err) == -ENOMEM) {
7190                 mutex_unlock(&tracing_err_log_lock);
7191                 return;
7192         }
7193
7194         snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7195         snprintf(err->cmd, MAX_FILTER_STR_VAL,"\n" CMD_PREFIX "%s\n", cmd);
7196
7197         err->info.errs = errs;
7198         err->info.type = type;
7199         err->info.pos = pos;
7200         err->info.ts = local_clock();
7201
7202         list_add_tail(&err->list, &tr->err_log);
7203         mutex_unlock(&tracing_err_log_lock);
7204 }
7205
7206 static void clear_tracing_err_log(struct trace_array *tr)
7207 {
7208         struct tracing_log_err *err, *next;
7209
7210         mutex_lock(&tracing_err_log_lock);
7211         list_for_each_entry_safe(err, next, &tr->err_log, list) {
7212                 list_del(&err->list);
7213                 kfree(err);
7214         }
7215
7216         tr->n_err_log_entries = 0;
7217         mutex_unlock(&tracing_err_log_lock);
7218 }
7219
7220 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7221 {
7222         struct trace_array *tr = m->private;
7223
7224         mutex_lock(&tracing_err_log_lock);
7225
7226         return seq_list_start(&tr->err_log, *pos);
7227 }
7228
7229 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7230 {
7231         struct trace_array *tr = m->private;
7232
7233         return seq_list_next(v, &tr->err_log, pos);
7234 }
7235
7236 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7237 {
7238         mutex_unlock(&tracing_err_log_lock);
7239 }
7240
7241 static void tracing_err_log_show_pos(struct seq_file *m, u8 pos)
7242 {
7243         u8 i;
7244
7245         for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7246                 seq_putc(m, ' ');
7247         for (i = 0; i < pos; i++)
7248                 seq_putc(m, ' ');
7249         seq_puts(m, "^\n");
7250 }
7251
7252 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7253 {
7254         struct tracing_log_err *err = v;
7255
7256         if (err) {
7257                 const char *err_text = err->info.errs[err->info.type];
7258                 u64 sec = err->info.ts;
7259                 u32 nsec;
7260
7261                 nsec = do_div(sec, NSEC_PER_SEC);
7262                 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7263                            err->loc, err_text);
7264                 seq_printf(m, "%s", err->cmd);
7265                 tracing_err_log_show_pos(m, err->info.pos);
7266         }
7267
7268         return 0;
7269 }
7270
7271 static const struct seq_operations tracing_err_log_seq_ops = {
7272         .start  = tracing_err_log_seq_start,
7273         .next   = tracing_err_log_seq_next,
7274         .stop   = tracing_err_log_seq_stop,
7275         .show   = tracing_err_log_seq_show
7276 };
7277
7278 static int tracing_err_log_open(struct inode *inode, struct file *file)
7279 {
7280         struct trace_array *tr = inode->i_private;
7281         int ret = 0;
7282
7283         ret = tracing_check_open_get_tr(tr);
7284         if (ret)
7285                 return ret;
7286
7287         /* If this file was opened for write, then erase contents */
7288         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7289                 clear_tracing_err_log(tr);
7290
7291         if (file->f_mode & FMODE_READ) {
7292                 ret = seq_open(file, &tracing_err_log_seq_ops);
7293                 if (!ret) {
7294                         struct seq_file *m = file->private_data;
7295                         m->private = tr;
7296                 } else {
7297                         trace_array_put(tr);
7298                 }
7299         }
7300         return ret;
7301 }
7302
7303 static ssize_t tracing_err_log_write(struct file *file,
7304                                      const char __user *buffer,
7305                                      size_t count, loff_t *ppos)
7306 {
7307         return count;
7308 }
7309
7310 static int tracing_err_log_release(struct inode *inode, struct file *file)
7311 {
7312         struct trace_array *tr = inode->i_private;
7313
7314         trace_array_put(tr);
7315
7316         if (file->f_mode & FMODE_READ)
7317                 seq_release(inode, file);
7318
7319         return 0;
7320 }
7321
7322 static const struct file_operations tracing_err_log_fops = {
7323         .open           = tracing_err_log_open,
7324         .write          = tracing_err_log_write,
7325         .read           = seq_read,
7326         .llseek         = seq_lseek,
7327         .release        = tracing_err_log_release,
7328 };
7329
7330 static int tracing_buffers_open(struct inode *inode, struct file *filp)
7331 {
7332         struct trace_array *tr = inode->i_private;
7333         struct ftrace_buffer_info *info;
7334         int ret;
7335
7336         ret = tracing_check_open_get_tr(tr);
7337         if (ret)
7338                 return ret;
7339
7340         info = kzalloc(sizeof(*info), GFP_KERNEL);
7341         if (!info) {
7342                 trace_array_put(tr);
7343                 return -ENOMEM;
7344         }
7345
7346         mutex_lock(&trace_types_lock);
7347
7348         info->iter.tr           = tr;
7349         info->iter.cpu_file     = tracing_get_cpu(inode);
7350         info->iter.trace        = tr->current_trace;
7351         info->iter.array_buffer = &tr->array_buffer;
7352         info->spare             = NULL;
7353         /* Force reading ring buffer for first read */
7354         info->read              = (unsigned int)-1;
7355
7356         filp->private_data = info;
7357
7358         tr->current_trace->ref++;
7359
7360         mutex_unlock(&trace_types_lock);
7361
7362         ret = nonseekable_open(inode, filp);
7363         if (ret < 0)
7364                 trace_array_put(tr);
7365
7366         return ret;
7367 }
7368
7369 static __poll_t
7370 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
7371 {
7372         struct ftrace_buffer_info *info = filp->private_data;
7373         struct trace_iterator *iter = &info->iter;
7374
7375         return trace_poll(iter, filp, poll_table);
7376 }
7377
7378 static ssize_t
7379 tracing_buffers_read(struct file *filp, char __user *ubuf,
7380                      size_t count, loff_t *ppos)
7381 {
7382         struct ftrace_buffer_info *info = filp->private_data;
7383         struct trace_iterator *iter = &info->iter;
7384         ssize_t ret = 0;
7385         ssize_t size;
7386
7387         if (!count)
7388                 return 0;
7389
7390 #ifdef CONFIG_TRACER_MAX_TRACE
7391         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7392                 return -EBUSY;
7393 #endif
7394
7395         if (!info->spare) {
7396                 info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
7397                                                           iter->cpu_file);
7398                 if (IS_ERR(info->spare)) {
7399                         ret = PTR_ERR(info->spare);
7400                         info->spare = NULL;
7401                 } else {
7402                         info->spare_cpu = iter->cpu_file;
7403                 }
7404         }
7405         if (!info->spare)
7406                 return ret;
7407
7408         /* Do we have previous read data to read? */
7409         if (info->read < PAGE_SIZE)
7410                 goto read;
7411
7412  again:
7413         trace_access_lock(iter->cpu_file);
7414         ret = ring_buffer_read_page(iter->array_buffer->buffer,
7415                                     &info->spare,
7416                                     count,
7417                                     iter->cpu_file, 0);
7418         trace_access_unlock(iter->cpu_file);
7419
7420         if (ret < 0) {
7421                 if (trace_empty(iter)) {
7422                         if ((filp->f_flags & O_NONBLOCK))
7423                                 return -EAGAIN;
7424
7425                         ret = wait_on_pipe(iter, 0);
7426                         if (ret)
7427                                 return ret;
7428
7429                         goto again;
7430                 }
7431                 return 0;
7432         }
7433
7434         info->read = 0;
7435  read:
7436         size = PAGE_SIZE - info->read;
7437         if (size > count)
7438                 size = count;
7439
7440         ret = copy_to_user(ubuf, info->spare + info->read, size);
7441         if (ret == size)
7442                 return -EFAULT;
7443
7444         size -= ret;
7445
7446         *ppos += size;
7447         info->read += size;
7448
7449         return size;
7450 }
7451
7452 static int tracing_buffers_release(struct inode *inode, struct file *file)
7453 {
7454         struct ftrace_buffer_info *info = file->private_data;
7455         struct trace_iterator *iter = &info->iter;
7456
7457         mutex_lock(&trace_types_lock);
7458
7459         iter->tr->current_trace->ref--;
7460
7461         __trace_array_put(iter->tr);
7462
7463         if (info->spare)
7464                 ring_buffer_free_read_page(iter->array_buffer->buffer,
7465                                            info->spare_cpu, info->spare);
7466         kfree(info);
7467
7468         mutex_unlock(&trace_types_lock);
7469
7470         return 0;
7471 }
7472
7473 struct buffer_ref {
7474         struct trace_buffer     *buffer;
7475         void                    *page;
7476         int                     cpu;
7477         refcount_t              refcount;
7478 };
7479
7480 static void buffer_ref_release(struct buffer_ref *ref)
7481 {
7482         if (!refcount_dec_and_test(&ref->refcount))
7483                 return;
7484         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
7485         kfree(ref);
7486 }
7487
7488 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
7489                                     struct pipe_buffer *buf)
7490 {
7491         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7492
7493         buffer_ref_release(ref);
7494         buf->private = 0;
7495 }
7496
7497 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
7498                                 struct pipe_buffer *buf)
7499 {
7500         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7501
7502         if (refcount_read(&ref->refcount) > INT_MAX/2)
7503                 return false;
7504
7505         refcount_inc(&ref->refcount);
7506         return true;
7507 }
7508
7509 /* Pipe buffer operations for a buffer. */
7510 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
7511         .confirm                = generic_pipe_buf_confirm,
7512         .release                = buffer_pipe_buf_release,
7513         .steal                  = generic_pipe_buf_nosteal,
7514         .get                    = buffer_pipe_buf_get,
7515 };
7516
7517 /*
7518  * Callback from splice_to_pipe(), if we need to release some pages
7519  * at the end of the spd in case we error'ed out in filling the pipe.
7520  */
7521 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
7522 {
7523         struct buffer_ref *ref =
7524                 (struct buffer_ref *)spd->partial[i].private;
7525
7526         buffer_ref_release(ref);
7527         spd->partial[i].private = 0;
7528 }
7529
7530 static ssize_t
7531 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7532                             struct pipe_inode_info *pipe, size_t len,
7533                             unsigned int flags)
7534 {
7535         struct ftrace_buffer_info *info = file->private_data;
7536         struct trace_iterator *iter = &info->iter;
7537         struct partial_page partial_def[PIPE_DEF_BUFFERS];
7538         struct page *pages_def[PIPE_DEF_BUFFERS];
7539         struct splice_pipe_desc spd = {
7540                 .pages          = pages_def,
7541                 .partial        = partial_def,
7542                 .nr_pages_max   = PIPE_DEF_BUFFERS,
7543                 .ops            = &buffer_pipe_buf_ops,
7544                 .spd_release    = buffer_spd_release,
7545         };
7546         struct buffer_ref *ref;
7547         int entries, i;
7548         ssize_t ret = 0;
7549
7550 #ifdef CONFIG_TRACER_MAX_TRACE
7551         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7552                 return -EBUSY;
7553 #endif
7554
7555         if (*ppos & (PAGE_SIZE - 1))
7556                 return -EINVAL;
7557
7558         if (len & (PAGE_SIZE - 1)) {
7559                 if (len < PAGE_SIZE)
7560                         return -EINVAL;
7561                 len &= PAGE_MASK;
7562         }
7563
7564         if (splice_grow_spd(pipe, &spd))
7565                 return -ENOMEM;
7566
7567  again:
7568         trace_access_lock(iter->cpu_file);
7569         entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
7570
7571         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
7572                 struct page *page;
7573                 int r;
7574
7575                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
7576                 if (!ref) {
7577                         ret = -ENOMEM;
7578                         break;
7579                 }
7580
7581                 refcount_set(&ref->refcount, 1);
7582                 ref->buffer = iter->array_buffer->buffer;
7583                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
7584                 if (IS_ERR(ref->page)) {
7585                         ret = PTR_ERR(ref->page);
7586                         ref->page = NULL;
7587                         kfree(ref);
7588                         break;
7589                 }
7590                 ref->cpu = iter->cpu_file;
7591
7592                 r = ring_buffer_read_page(ref->buffer, &ref->page,
7593                                           len, iter->cpu_file, 1);
7594                 if (r < 0) {
7595                         ring_buffer_free_read_page(ref->buffer, ref->cpu,
7596                                                    ref->page);
7597                         kfree(ref);
7598                         break;
7599                 }
7600
7601                 page = virt_to_page(ref->page);
7602
7603                 spd.pages[i] = page;
7604                 spd.partial[i].len = PAGE_SIZE;
7605                 spd.partial[i].offset = 0;
7606                 spd.partial[i].private = (unsigned long)ref;
7607                 spd.nr_pages++;
7608                 *ppos += PAGE_SIZE;
7609
7610                 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
7611         }
7612
7613         trace_access_unlock(iter->cpu_file);
7614         spd.nr_pages = i;
7615
7616         /* did we read anything? */
7617         if (!spd.nr_pages) {
7618                 if (ret)
7619                         goto out;
7620
7621                 ret = -EAGAIN;
7622                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
7623                         goto out;
7624
7625                 ret = wait_on_pipe(iter, iter->tr->buffer_percent);
7626                 if (ret)
7627                         goto out;
7628
7629                 goto again;
7630         }
7631
7632         ret = splice_to_pipe(pipe, &spd);
7633 out:
7634         splice_shrink_spd(&spd);
7635
7636         return ret;
7637 }
7638
7639 static const struct file_operations tracing_buffers_fops = {
7640         .open           = tracing_buffers_open,
7641         .read           = tracing_buffers_read,
7642         .poll           = tracing_buffers_poll,
7643         .release        = tracing_buffers_release,
7644         .splice_read    = tracing_buffers_splice_read,
7645         .llseek         = no_llseek,
7646 };
7647
7648 static ssize_t
7649 tracing_stats_read(struct file *filp, char __user *ubuf,
7650                    size_t count, loff_t *ppos)
7651 {
7652         struct inode *inode = file_inode(filp);
7653         struct trace_array *tr = inode->i_private;
7654         struct array_buffer *trace_buf = &tr->array_buffer;
7655         int cpu = tracing_get_cpu(inode);
7656         struct trace_seq *s;
7657         unsigned long cnt;
7658         unsigned long long t;
7659         unsigned long usec_rem;
7660
7661         s = kmalloc(sizeof(*s), GFP_KERNEL);
7662         if (!s)
7663                 return -ENOMEM;
7664
7665         trace_seq_init(s);
7666
7667         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
7668         trace_seq_printf(s, "entries: %ld\n", cnt);
7669
7670         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
7671         trace_seq_printf(s, "overrun: %ld\n", cnt);
7672
7673         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
7674         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
7675
7676         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
7677         trace_seq_printf(s, "bytes: %ld\n", cnt);
7678
7679         if (trace_clocks[tr->clock_id].in_ns) {
7680                 /* local or global for trace_clock */
7681                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7682                 usec_rem = do_div(t, USEC_PER_SEC);
7683                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
7684                                                                 t, usec_rem);
7685
7686                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
7687                 usec_rem = do_div(t, USEC_PER_SEC);
7688                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
7689         } else {
7690                 /* counter or tsc mode for trace_clock */
7691                 trace_seq_printf(s, "oldest event ts: %llu\n",
7692                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7693
7694                 trace_seq_printf(s, "now ts: %llu\n",
7695                                 ring_buffer_time_stamp(trace_buf->buffer, cpu));
7696         }
7697
7698         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
7699         trace_seq_printf(s, "dropped events: %ld\n", cnt);
7700
7701         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
7702         trace_seq_printf(s, "read events: %ld\n", cnt);
7703
7704         count = simple_read_from_buffer(ubuf, count, ppos,
7705                                         s->buffer, trace_seq_used(s));
7706
7707         kfree(s);
7708
7709         return count;
7710 }
7711
7712 static const struct file_operations tracing_stats_fops = {
7713         .open           = tracing_open_generic_tr,
7714         .read           = tracing_stats_read,
7715         .llseek         = generic_file_llseek,
7716         .release        = tracing_release_generic_tr,
7717 };
7718
7719 #ifdef CONFIG_DYNAMIC_FTRACE
7720
7721 static ssize_t
7722 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
7723                   size_t cnt, loff_t *ppos)
7724 {
7725         ssize_t ret;
7726         char *buf;
7727         int r;
7728
7729         /* 256 should be plenty to hold the amount needed */
7730         buf = kmalloc(256, GFP_KERNEL);
7731         if (!buf)
7732                 return -ENOMEM;
7733
7734         r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
7735                       ftrace_update_tot_cnt,
7736                       ftrace_number_of_pages,
7737                       ftrace_number_of_groups);
7738
7739         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7740         kfree(buf);
7741         return ret;
7742 }
7743
7744 static const struct file_operations tracing_dyn_info_fops = {
7745         .open           = tracing_open_generic,
7746         .read           = tracing_read_dyn_info,
7747         .llseek         = generic_file_llseek,
7748 };
7749 #endif /* CONFIG_DYNAMIC_FTRACE */
7750
7751 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
7752 static void
7753 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
7754                 struct trace_array *tr, struct ftrace_probe_ops *ops,
7755                 void *data)
7756 {
7757         tracing_snapshot_instance(tr);
7758 }
7759
7760 static void
7761 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
7762                       struct trace_array *tr, struct ftrace_probe_ops *ops,
7763                       void *data)
7764 {
7765         struct ftrace_func_mapper *mapper = data;
7766         long *count = NULL;
7767
7768         if (mapper)
7769                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7770
7771         if (count) {
7772
7773                 if (*count <= 0)
7774                         return;
7775
7776                 (*count)--;
7777         }
7778
7779         tracing_snapshot_instance(tr);
7780 }
7781
7782 static int
7783 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
7784                       struct ftrace_probe_ops *ops, void *data)
7785 {
7786         struct ftrace_func_mapper *mapper = data;
7787         long *count = NULL;
7788
7789         seq_printf(m, "%ps:", (void *)ip);
7790
7791         seq_puts(m, "snapshot");
7792
7793         if (mapper)
7794                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7795
7796         if (count)
7797                 seq_printf(m, ":count=%ld\n", *count);
7798         else
7799                 seq_puts(m, ":unlimited\n");
7800
7801         return 0;
7802 }
7803
7804 static int
7805 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
7806                      unsigned long ip, void *init_data, void **data)
7807 {
7808         struct ftrace_func_mapper *mapper = *data;
7809
7810         if (!mapper) {
7811                 mapper = allocate_ftrace_func_mapper();
7812                 if (!mapper)
7813                         return -ENOMEM;
7814                 *data = mapper;
7815         }
7816
7817         return ftrace_func_mapper_add_ip(mapper, ip, init_data);
7818 }
7819
7820 static void
7821 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
7822                      unsigned long ip, void *data)
7823 {
7824         struct ftrace_func_mapper *mapper = data;
7825
7826         if (!ip) {
7827                 if (!mapper)
7828                         return;
7829                 free_ftrace_func_mapper(mapper, NULL);
7830                 return;
7831         }
7832
7833         ftrace_func_mapper_remove_ip(mapper, ip);
7834 }
7835
7836 static struct ftrace_probe_ops snapshot_probe_ops = {
7837         .func                   = ftrace_snapshot,
7838         .print                  = ftrace_snapshot_print,
7839 };
7840
7841 static struct ftrace_probe_ops snapshot_count_probe_ops = {
7842         .func                   = ftrace_count_snapshot,
7843         .print                  = ftrace_snapshot_print,
7844         .init                   = ftrace_snapshot_init,
7845         .free                   = ftrace_snapshot_free,
7846 };
7847
7848 static int
7849 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
7850                                char *glob, char *cmd, char *param, int enable)
7851 {
7852         struct ftrace_probe_ops *ops;
7853         void *count = (void *)-1;
7854         char *number;
7855         int ret;
7856
7857         if (!tr)
7858                 return -ENODEV;
7859
7860         /* hash funcs only work with set_ftrace_filter */
7861         if (!enable)
7862                 return -EINVAL;
7863
7864         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
7865
7866         if (glob[0] == '!')
7867                 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
7868
7869         if (!param)
7870                 goto out_reg;
7871
7872         number = strsep(&param, ":");
7873
7874         if (!strlen(number))
7875                 goto out_reg;
7876
7877         /*
7878          * We use the callback data field (which is a pointer)
7879          * as our counter.
7880          */
7881         ret = kstrtoul(number, 0, (unsigned long *)&count);
7882         if (ret)
7883                 return ret;
7884
7885  out_reg:
7886         ret = tracing_alloc_snapshot_instance(tr);
7887         if (ret < 0)
7888                 goto out;
7889
7890         ret = register_ftrace_function_probe(glob, tr, ops, count);
7891
7892  out:
7893         return ret < 0 ? ret : 0;
7894 }
7895
7896 static struct ftrace_func_command ftrace_snapshot_cmd = {
7897         .name                   = "snapshot",
7898         .func                   = ftrace_trace_snapshot_callback,
7899 };
7900
7901 static __init int register_snapshot_cmd(void)
7902 {
7903         return register_ftrace_command(&ftrace_snapshot_cmd);
7904 }
7905 #else
7906 static inline __init int register_snapshot_cmd(void) { return 0; }
7907 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
7908
7909 static struct dentry *tracing_get_dentry(struct trace_array *tr)
7910 {
7911         if (WARN_ON(!tr->dir))
7912                 return ERR_PTR(-ENODEV);
7913
7914         /* Top directory uses NULL as the parent */
7915         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
7916                 return NULL;
7917
7918         /* All sub buffers have a descriptor */
7919         return tr->dir;
7920 }
7921
7922 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
7923 {
7924         struct dentry *d_tracer;
7925
7926         if (tr->percpu_dir)
7927                 return tr->percpu_dir;
7928
7929         d_tracer = tracing_get_dentry(tr);
7930         if (IS_ERR(d_tracer))
7931                 return NULL;
7932
7933         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
7934
7935         MEM_FAIL(!tr->percpu_dir,
7936                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
7937
7938         return tr->percpu_dir;
7939 }
7940
7941 static struct dentry *
7942 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
7943                       void *data, long cpu, const struct file_operations *fops)
7944 {
7945         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
7946
7947         if (ret) /* See tracing_get_cpu() */
7948                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
7949         return ret;
7950 }
7951
7952 static void
7953 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
7954 {
7955         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
7956         struct dentry *d_cpu;
7957         char cpu_dir[30]; /* 30 characters should be more than enough */
7958
7959         if (!d_percpu)
7960                 return;
7961
7962         snprintf(cpu_dir, 30, "cpu%ld", cpu);
7963         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
7964         if (!d_cpu) {
7965                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
7966                 return;
7967         }
7968
7969         /* per cpu trace_pipe */
7970         trace_create_cpu_file("trace_pipe", 0444, d_cpu,
7971                                 tr, cpu, &tracing_pipe_fops);
7972
7973         /* per cpu trace */
7974         trace_create_cpu_file("trace", 0644, d_cpu,
7975                                 tr, cpu, &tracing_fops);
7976
7977         trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
7978                                 tr, cpu, &tracing_buffers_fops);
7979
7980         trace_create_cpu_file("stats", 0444, d_cpu,
7981                                 tr, cpu, &tracing_stats_fops);
7982
7983         trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
7984                                 tr, cpu, &tracing_entries_fops);
7985
7986 #ifdef CONFIG_TRACER_SNAPSHOT
7987         trace_create_cpu_file("snapshot", 0644, d_cpu,
7988                                 tr, cpu, &snapshot_fops);
7989
7990         trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
7991                                 tr, cpu, &snapshot_raw_fops);
7992 #endif
7993 }
7994
7995 #ifdef CONFIG_FTRACE_SELFTEST
7996 /* Let selftest have access to static functions in this file */
7997 #include "trace_selftest.c"
7998 #endif
7999
8000 static ssize_t
8001 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8002                         loff_t *ppos)
8003 {
8004         struct trace_option_dentry *topt = filp->private_data;
8005         char *buf;
8006
8007         if (topt->flags->val & topt->opt->bit)
8008                 buf = "1\n";
8009         else
8010                 buf = "0\n";
8011
8012         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8013 }
8014
8015 static ssize_t
8016 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8017                          loff_t *ppos)
8018 {
8019         struct trace_option_dentry *topt = filp->private_data;
8020         unsigned long val;
8021         int ret;
8022
8023         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8024         if (ret)
8025                 return ret;
8026
8027         if (val != 0 && val != 1)
8028                 return -EINVAL;
8029
8030         if (!!(topt->flags->val & topt->opt->bit) != val) {
8031                 mutex_lock(&trace_types_lock);
8032                 ret = __set_tracer_option(topt->tr, topt->flags,
8033                                           topt->opt, !val);
8034                 mutex_unlock(&trace_types_lock);
8035                 if (ret)
8036                         return ret;
8037         }
8038
8039         *ppos += cnt;
8040
8041         return cnt;
8042 }
8043
8044
8045 static const struct file_operations trace_options_fops = {
8046         .open = tracing_open_generic,
8047         .read = trace_options_read,
8048         .write = trace_options_write,
8049         .llseek = generic_file_llseek,
8050 };
8051
8052 /*
8053  * In order to pass in both the trace_array descriptor as well as the index
8054  * to the flag that the trace option file represents, the trace_array
8055  * has a character array of trace_flags_index[], which holds the index
8056  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8057  * The address of this character array is passed to the flag option file
8058  * read/write callbacks.
8059  *
8060  * In order to extract both the index and the trace_array descriptor,
8061  * get_tr_index() uses the following algorithm.
8062  *
8063  *   idx = *ptr;
8064  *
8065  * As the pointer itself contains the address of the index (remember
8066  * index[1] == 1).
8067  *
8068  * Then to get the trace_array descriptor, by subtracting that index
8069  * from the ptr, we get to the start of the index itself.
8070  *
8071  *   ptr - idx == &index[0]
8072  *
8073  * Then a simple container_of() from that pointer gets us to the
8074  * trace_array descriptor.
8075  */
8076 static void get_tr_index(void *data, struct trace_array **ptr,
8077                          unsigned int *pindex)
8078 {
8079         *pindex = *(unsigned char *)data;
8080
8081         *ptr = container_of(data - *pindex, struct trace_array,
8082                             trace_flags_index);
8083 }
8084
8085 static ssize_t
8086 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8087                         loff_t *ppos)
8088 {
8089         void *tr_index = filp->private_data;
8090         struct trace_array *tr;
8091         unsigned int index;
8092         char *buf;
8093
8094         get_tr_index(tr_index, &tr, &index);
8095
8096         if (tr->trace_flags & (1 << index))
8097                 buf = "1\n";
8098         else
8099                 buf = "0\n";
8100
8101         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8102 }
8103
8104 static ssize_t
8105 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8106                          loff_t *ppos)
8107 {
8108         void *tr_index = filp->private_data;
8109         struct trace_array *tr;
8110         unsigned int index;
8111         unsigned long val;
8112         int ret;
8113
8114         get_tr_index(tr_index, &tr, &index);
8115
8116         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8117         if (ret)
8118                 return ret;
8119
8120         if (val != 0 && val != 1)
8121                 return -EINVAL;
8122
8123         mutex_lock(&event_mutex);
8124         mutex_lock(&trace_types_lock);
8125         ret = set_tracer_flag(tr, 1 << index, val);
8126         mutex_unlock(&trace_types_lock);
8127         mutex_unlock(&event_mutex);
8128
8129         if (ret < 0)
8130                 return ret;
8131
8132         *ppos += cnt;
8133
8134         return cnt;
8135 }
8136
8137 static const struct file_operations trace_options_core_fops = {
8138         .open = tracing_open_generic,
8139         .read = trace_options_core_read,
8140         .write = trace_options_core_write,
8141         .llseek = generic_file_llseek,
8142 };
8143
8144 struct dentry *trace_create_file(const char *name,
8145                                  umode_t mode,
8146                                  struct dentry *parent,
8147                                  void *data,
8148                                  const struct file_operations *fops)
8149 {
8150         struct dentry *ret;
8151
8152         ret = tracefs_create_file(name, mode, parent, data, fops);
8153         if (!ret)
8154                 pr_warn("Could not create tracefs '%s' entry\n", name);
8155
8156         return ret;
8157 }
8158
8159
8160 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8161 {
8162         struct dentry *d_tracer;
8163
8164         if (tr->options)
8165                 return tr->options;
8166
8167         d_tracer = tracing_get_dentry(tr);
8168         if (IS_ERR(d_tracer))
8169                 return NULL;
8170
8171         tr->options = tracefs_create_dir("options", d_tracer);
8172         if (!tr->options) {
8173                 pr_warn("Could not create tracefs directory 'options'\n");
8174                 return NULL;
8175         }
8176
8177         return tr->options;
8178 }
8179
8180 static void
8181 create_trace_option_file(struct trace_array *tr,
8182                          struct trace_option_dentry *topt,
8183                          struct tracer_flags *flags,
8184                          struct tracer_opt *opt)
8185 {
8186         struct dentry *t_options;
8187
8188         t_options = trace_options_init_dentry(tr);
8189         if (!t_options)
8190                 return;
8191
8192         topt->flags = flags;
8193         topt->opt = opt;
8194         topt->tr = tr;
8195
8196         topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
8197                                     &trace_options_fops);
8198
8199 }
8200
8201 static void
8202 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8203 {
8204         struct trace_option_dentry *topts;
8205         struct trace_options *tr_topts;
8206         struct tracer_flags *flags;
8207         struct tracer_opt *opts;
8208         int cnt;
8209         int i;
8210
8211         if (!tracer)
8212                 return;
8213
8214         flags = tracer->flags;
8215
8216         if (!flags || !flags->opts)
8217                 return;
8218
8219         /*
8220          * If this is an instance, only create flags for tracers
8221          * the instance may have.
8222          */
8223         if (!trace_ok_for_array(tracer, tr))
8224                 return;
8225
8226         for (i = 0; i < tr->nr_topts; i++) {
8227                 /* Make sure there's no duplicate flags. */
8228                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8229                         return;
8230         }
8231
8232         opts = flags->opts;
8233
8234         for (cnt = 0; opts[cnt].name; cnt++)
8235                 ;
8236
8237         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8238         if (!topts)
8239                 return;
8240
8241         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8242                             GFP_KERNEL);
8243         if (!tr_topts) {
8244                 kfree(topts);
8245                 return;
8246         }
8247
8248         tr->topts = tr_topts;
8249         tr->topts[tr->nr_topts].tracer = tracer;
8250         tr->topts[tr->nr_topts].topts = topts;
8251         tr->nr_topts++;
8252
8253         for (cnt = 0; opts[cnt].name; cnt++) {
8254                 create_trace_option_file(tr, &topts[cnt], flags,
8255                                          &opts[cnt]);
8256                 MEM_FAIL(topts[cnt].entry == NULL,
8257                           "Failed to create trace option: %s",
8258                           opts[cnt].name);
8259         }
8260 }
8261
8262 static struct dentry *
8263 create_trace_option_core_file(struct trace_array *tr,
8264                               const char *option, long index)
8265 {
8266         struct dentry *t_options;
8267
8268         t_options = trace_options_init_dentry(tr);
8269         if (!t_options)
8270                 return NULL;
8271
8272         return trace_create_file(option, 0644, t_options,
8273                                  (void *)&tr->trace_flags_index[index],
8274                                  &trace_options_core_fops);
8275 }
8276
8277 static void create_trace_options_dir(struct trace_array *tr)
8278 {
8279         struct dentry *t_options;
8280         bool top_level = tr == &global_trace;
8281         int i;
8282
8283         t_options = trace_options_init_dentry(tr);
8284         if (!t_options)
8285                 return;
8286
8287         for (i = 0; trace_options[i]; i++) {
8288                 if (top_level ||
8289                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
8290                         create_trace_option_core_file(tr, trace_options[i], i);
8291         }
8292 }
8293
8294 static ssize_t
8295 rb_simple_read(struct file *filp, char __user *ubuf,
8296                size_t cnt, loff_t *ppos)
8297 {
8298         struct trace_array *tr = filp->private_data;
8299         char buf[64];
8300         int r;
8301
8302         r = tracer_tracing_is_on(tr);
8303         r = sprintf(buf, "%d\n", r);
8304
8305         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8306 }
8307
8308 static ssize_t
8309 rb_simple_write(struct file *filp, const char __user *ubuf,
8310                 size_t cnt, loff_t *ppos)
8311 {
8312         struct trace_array *tr = filp->private_data;
8313         struct trace_buffer *buffer = tr->array_buffer.buffer;
8314         unsigned long val;
8315         int ret;
8316
8317         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8318         if (ret)
8319                 return ret;
8320
8321         if (buffer) {
8322                 mutex_lock(&trace_types_lock);
8323                 if (!!val == tracer_tracing_is_on(tr)) {
8324                         val = 0; /* do nothing */
8325                 } else if (val) {
8326                         tracer_tracing_on(tr);
8327                         if (tr->current_trace->start)
8328                                 tr->current_trace->start(tr);
8329                 } else {
8330                         tracer_tracing_off(tr);
8331                         if (tr->current_trace->stop)
8332                                 tr->current_trace->stop(tr);
8333                 }
8334                 mutex_unlock(&trace_types_lock);
8335         }
8336
8337         (*ppos)++;
8338
8339         return cnt;
8340 }
8341
8342 static const struct file_operations rb_simple_fops = {
8343         .open           = tracing_open_generic_tr,
8344         .read           = rb_simple_read,
8345         .write          = rb_simple_write,
8346         .release        = tracing_release_generic_tr,
8347         .llseek         = default_llseek,
8348 };
8349
8350 static ssize_t
8351 buffer_percent_read(struct file *filp, char __user *ubuf,
8352                     size_t cnt, loff_t *ppos)
8353 {
8354         struct trace_array *tr = filp->private_data;
8355         char buf[64];
8356         int r;
8357
8358         r = tr->buffer_percent;
8359         r = sprintf(buf, "%d\n", r);
8360
8361         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8362 }
8363
8364 static ssize_t
8365 buffer_percent_write(struct file *filp, const char __user *ubuf,
8366                      size_t cnt, loff_t *ppos)
8367 {
8368         struct trace_array *tr = filp->private_data;
8369         unsigned long val;
8370         int ret;
8371
8372         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8373         if (ret)
8374                 return ret;
8375
8376         if (val > 100)
8377                 return -EINVAL;
8378
8379         if (!val)
8380                 val = 1;
8381
8382         tr->buffer_percent = val;
8383
8384         (*ppos)++;
8385
8386         return cnt;
8387 }
8388
8389 static const struct file_operations buffer_percent_fops = {
8390         .open           = tracing_open_generic_tr,
8391         .read           = buffer_percent_read,
8392         .write          = buffer_percent_write,
8393         .release        = tracing_release_generic_tr,
8394         .llseek         = default_llseek,
8395 };
8396
8397 static struct dentry *trace_instance_dir;
8398
8399 static void
8400 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
8401
8402 static int
8403 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
8404 {
8405         enum ring_buffer_flags rb_flags;
8406
8407         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
8408
8409         buf->tr = tr;
8410
8411         buf->buffer = ring_buffer_alloc(size, rb_flags);
8412         if (!buf->buffer)
8413                 return -ENOMEM;
8414
8415         buf->data = alloc_percpu(struct trace_array_cpu);
8416         if (!buf->data) {
8417                 ring_buffer_free(buf->buffer);
8418                 buf->buffer = NULL;
8419                 return -ENOMEM;
8420         }
8421
8422         /* Allocate the first page for all buffers */
8423         set_buffer_entries(&tr->array_buffer,
8424                            ring_buffer_size(tr->array_buffer.buffer, 0));
8425
8426         return 0;
8427 }
8428
8429 static int allocate_trace_buffers(struct trace_array *tr, int size)
8430 {
8431         int ret;
8432
8433         ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
8434         if (ret)
8435                 return ret;
8436
8437 #ifdef CONFIG_TRACER_MAX_TRACE
8438         ret = allocate_trace_buffer(tr, &tr->max_buffer,
8439                                     allocate_snapshot ? size : 1);
8440         if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
8441                 ring_buffer_free(tr->array_buffer.buffer);
8442                 tr->array_buffer.buffer = NULL;
8443                 free_percpu(tr->array_buffer.data);
8444                 tr->array_buffer.data = NULL;
8445                 return -ENOMEM;
8446         }
8447         tr->allocated_snapshot = allocate_snapshot;
8448
8449         /*
8450          * Only the top level trace array gets its snapshot allocated
8451          * from the kernel command line.
8452          */
8453         allocate_snapshot = false;
8454 #endif
8455         return 0;
8456 }
8457
8458 static void free_trace_buffer(struct array_buffer *buf)
8459 {
8460         if (buf->buffer) {
8461                 ring_buffer_free(buf->buffer);
8462                 buf->buffer = NULL;
8463                 free_percpu(buf->data);
8464                 buf->data = NULL;
8465         }
8466 }
8467
8468 static void free_trace_buffers(struct trace_array *tr)
8469 {
8470         if (!tr)
8471                 return;
8472
8473         free_trace_buffer(&tr->array_buffer);
8474
8475 #ifdef CONFIG_TRACER_MAX_TRACE
8476         free_trace_buffer(&tr->max_buffer);
8477 #endif
8478 }
8479
8480 static void init_trace_flags_index(struct trace_array *tr)
8481 {
8482         int i;
8483
8484         /* Used by the trace options files */
8485         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
8486                 tr->trace_flags_index[i] = i;
8487 }
8488
8489 static void __update_tracer_options(struct trace_array *tr)
8490 {
8491         struct tracer *t;
8492
8493         for (t = trace_types; t; t = t->next)
8494                 add_tracer_options(tr, t);
8495 }
8496
8497 static void update_tracer_options(struct trace_array *tr)
8498 {
8499         mutex_lock(&trace_types_lock);
8500         __update_tracer_options(tr);
8501         mutex_unlock(&trace_types_lock);
8502 }
8503
8504 /* Must have trace_types_lock held */
8505 struct trace_array *trace_array_find(const char *instance)
8506 {
8507         struct trace_array *tr, *found = NULL;
8508
8509         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8510                 if (tr->name && strcmp(tr->name, instance) == 0) {
8511                         found = tr;
8512                         break;
8513                 }
8514         }
8515
8516         return found;
8517 }
8518
8519 struct trace_array *trace_array_find_get(const char *instance)
8520 {
8521         struct trace_array *tr;
8522
8523         mutex_lock(&trace_types_lock);
8524         tr = trace_array_find(instance);
8525         if (tr)
8526                 tr->ref++;
8527         mutex_unlock(&trace_types_lock);
8528
8529         return tr;
8530 }
8531
8532 static struct trace_array *trace_array_create(const char *name)
8533 {
8534         struct trace_array *tr;
8535         int ret;
8536
8537         ret = -ENOMEM;
8538         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
8539         if (!tr)
8540                 return ERR_PTR(ret);
8541
8542         tr->name = kstrdup(name, GFP_KERNEL);
8543         if (!tr->name)
8544                 goto out_free_tr;
8545
8546         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
8547                 goto out_free_tr;
8548
8549         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
8550
8551         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
8552
8553         raw_spin_lock_init(&tr->start_lock);
8554
8555         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8556
8557         tr->current_trace = &nop_trace;
8558
8559         INIT_LIST_HEAD(&tr->systems);
8560         INIT_LIST_HEAD(&tr->events);
8561         INIT_LIST_HEAD(&tr->hist_vars);
8562         INIT_LIST_HEAD(&tr->err_log);
8563
8564         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
8565                 goto out_free_tr;
8566
8567         tr->dir = tracefs_create_dir(name, trace_instance_dir);
8568         if (!tr->dir)
8569                 goto out_free_tr;
8570
8571         ret = event_trace_add_tracer(tr->dir, tr);
8572         if (ret) {
8573                 tracefs_remove(tr->dir);
8574                 goto out_free_tr;
8575         }
8576
8577         ftrace_init_trace_array(tr);
8578
8579         init_tracer_tracefs(tr, tr->dir);
8580         init_trace_flags_index(tr);
8581         __update_tracer_options(tr);
8582
8583         list_add(&tr->list, &ftrace_trace_arrays);
8584
8585         tr->ref++;
8586
8587
8588         return tr;
8589
8590  out_free_tr:
8591         free_trace_buffers(tr);
8592         free_cpumask_var(tr->tracing_cpumask);
8593         kfree(tr->name);
8594         kfree(tr);
8595
8596         return ERR_PTR(ret);
8597 }
8598
8599 static int instance_mkdir(const char *name)
8600 {
8601         struct trace_array *tr;
8602         int ret;
8603
8604         mutex_lock(&event_mutex);
8605         mutex_lock(&trace_types_lock);
8606
8607         ret = -EEXIST;
8608         if (trace_array_find(name))
8609                 goto out_unlock;
8610
8611         tr = trace_array_create(name);
8612
8613         ret = PTR_ERR_OR_ZERO(tr);
8614
8615 out_unlock:
8616         mutex_unlock(&trace_types_lock);
8617         mutex_unlock(&event_mutex);
8618         return ret;
8619 }
8620
8621 /**
8622  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
8623  * @name: The name of the trace array to be looked up/created.
8624  *
8625  * Returns pointer to trace array with given name.
8626  * NULL, if it cannot be created.
8627  *
8628  * NOTE: This function increments the reference counter associated with the
8629  * trace array returned. This makes sure it cannot be freed while in use.
8630  * Use trace_array_put() once the trace array is no longer needed.
8631  * If the trace_array is to be freed, trace_array_destroy() needs to
8632  * be called after the trace_array_put(), or simply let user space delete
8633  * it from the tracefs instances directory. But until the
8634  * trace_array_put() is called, user space can not delete it.
8635  *
8636  */
8637 struct trace_array *trace_array_get_by_name(const char *name)
8638 {
8639         struct trace_array *tr;
8640
8641         mutex_lock(&event_mutex);
8642         mutex_lock(&trace_types_lock);
8643
8644         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8645                 if (tr->name && strcmp(tr->name, name) == 0)
8646                         goto out_unlock;
8647         }
8648
8649         tr = trace_array_create(name);
8650
8651         if (IS_ERR(tr))
8652                 tr = NULL;
8653 out_unlock:
8654         if (tr)
8655                 tr->ref++;
8656
8657         mutex_unlock(&trace_types_lock);
8658         mutex_unlock(&event_mutex);
8659         return tr;
8660 }
8661 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
8662
8663 static int __remove_instance(struct trace_array *tr)
8664 {
8665         int i;
8666
8667         /* Reference counter for a newly created trace array = 1. */
8668         if (tr->ref > 1 || (tr->current_trace && tr->current_trace->ref))
8669                 return -EBUSY;
8670
8671         list_del(&tr->list);
8672
8673         /* Disable all the flags that were enabled coming in */
8674         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
8675                 if ((1 << i) & ZEROED_TRACE_FLAGS)
8676                         set_tracer_flag(tr, 1 << i, 0);
8677         }
8678
8679         tracing_set_nop(tr);
8680         clear_ftrace_function_probes(tr);
8681         event_trace_del_tracer(tr);
8682         ftrace_clear_pids(tr);
8683         ftrace_destroy_function_files(tr);
8684         tracefs_remove(tr->dir);
8685         free_trace_buffers(tr);
8686
8687         for (i = 0; i < tr->nr_topts; i++) {
8688                 kfree(tr->topts[i].topts);
8689         }
8690         kfree(tr->topts);
8691
8692         free_cpumask_var(tr->tracing_cpumask);
8693         kfree(tr->name);
8694         kfree(tr);
8695         tr = NULL;
8696
8697         return 0;
8698 }
8699
8700 int trace_array_destroy(struct trace_array *this_tr)
8701 {
8702         struct trace_array *tr;
8703         int ret;
8704
8705         if (!this_tr)
8706                 return -EINVAL;
8707
8708         mutex_lock(&event_mutex);
8709         mutex_lock(&trace_types_lock);
8710
8711         ret = -ENODEV;
8712
8713         /* Making sure trace array exists before destroying it. */
8714         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8715                 if (tr == this_tr) {
8716                         ret = __remove_instance(tr);
8717                         break;
8718                 }
8719         }
8720
8721         mutex_unlock(&trace_types_lock);
8722         mutex_unlock(&event_mutex);
8723
8724         return ret;
8725 }
8726 EXPORT_SYMBOL_GPL(trace_array_destroy);
8727
8728 static int instance_rmdir(const char *name)
8729 {
8730         struct trace_array *tr;
8731         int ret;
8732
8733         mutex_lock(&event_mutex);
8734         mutex_lock(&trace_types_lock);
8735
8736         ret = -ENODEV;
8737         tr = trace_array_find(name);
8738         if (tr)
8739                 ret = __remove_instance(tr);
8740
8741         mutex_unlock(&trace_types_lock);
8742         mutex_unlock(&event_mutex);
8743
8744         return ret;
8745 }
8746
8747 static __init void create_trace_instances(struct dentry *d_tracer)
8748 {
8749         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
8750                                                          instance_mkdir,
8751                                                          instance_rmdir);
8752         if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
8753                 return;
8754 }
8755
8756 static void
8757 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
8758 {
8759         struct trace_event_file *file;
8760         int cpu;
8761
8762         trace_create_file("available_tracers", 0444, d_tracer,
8763                         tr, &show_traces_fops);
8764
8765         trace_create_file("current_tracer", 0644, d_tracer,
8766                         tr, &set_tracer_fops);
8767
8768         trace_create_file("tracing_cpumask", 0644, d_tracer,
8769                           tr, &tracing_cpumask_fops);
8770
8771         trace_create_file("trace_options", 0644, d_tracer,
8772                           tr, &tracing_iter_fops);
8773
8774         trace_create_file("trace", 0644, d_tracer,
8775                           tr, &tracing_fops);
8776
8777         trace_create_file("trace_pipe", 0444, d_tracer,
8778                           tr, &tracing_pipe_fops);
8779
8780         trace_create_file("buffer_size_kb", 0644, d_tracer,
8781                           tr, &tracing_entries_fops);
8782
8783         trace_create_file("buffer_total_size_kb", 0444, d_tracer,
8784                           tr, &tracing_total_entries_fops);
8785
8786         trace_create_file("free_buffer", 0200, d_tracer,
8787                           tr, &tracing_free_buffer_fops);
8788
8789         trace_create_file("trace_marker", 0220, d_tracer,
8790                           tr, &tracing_mark_fops);
8791
8792         file = __find_event_file(tr, "ftrace", "print");
8793         if (file && file->dir)
8794                 trace_create_file("trigger", 0644, file->dir, file,
8795                                   &event_trigger_fops);
8796         tr->trace_marker_file = file;
8797
8798         trace_create_file("trace_marker_raw", 0220, d_tracer,
8799                           tr, &tracing_mark_raw_fops);
8800
8801         trace_create_file("trace_clock", 0644, d_tracer, tr,
8802                           &trace_clock_fops);
8803
8804         trace_create_file("tracing_on", 0644, d_tracer,
8805                           tr, &rb_simple_fops);
8806
8807         trace_create_file("timestamp_mode", 0444, d_tracer, tr,
8808                           &trace_time_stamp_mode_fops);
8809
8810         tr->buffer_percent = 50;
8811
8812         trace_create_file("buffer_percent", 0444, d_tracer,
8813                         tr, &buffer_percent_fops);
8814
8815         create_trace_options_dir(tr);
8816
8817 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
8818         trace_create_maxlat_file(tr, d_tracer);
8819 #endif
8820
8821         if (ftrace_create_function_files(tr, d_tracer))
8822                 MEM_FAIL(1, "Could not allocate function filter files");
8823
8824 #ifdef CONFIG_TRACER_SNAPSHOT
8825         trace_create_file("snapshot", 0644, d_tracer,
8826                           tr, &snapshot_fops);
8827 #endif
8828
8829         trace_create_file("error_log", 0644, d_tracer,
8830                           tr, &tracing_err_log_fops);
8831
8832         for_each_tracing_cpu(cpu)
8833                 tracing_init_tracefs_percpu(tr, cpu);
8834
8835         ftrace_init_tracefs(tr, d_tracer);
8836 }
8837
8838 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
8839 {
8840         struct vfsmount *mnt;
8841         struct file_system_type *type;
8842
8843         /*
8844          * To maintain backward compatibility for tools that mount
8845          * debugfs to get to the tracing facility, tracefs is automatically
8846          * mounted to the debugfs/tracing directory.
8847          */
8848         type = get_fs_type("tracefs");
8849         if (!type)
8850                 return NULL;
8851         mnt = vfs_submount(mntpt, type, "tracefs", NULL);
8852         put_filesystem(type);
8853         if (IS_ERR(mnt))
8854                 return NULL;
8855         mntget(mnt);
8856
8857         return mnt;
8858 }
8859
8860 /**
8861  * tracing_init_dentry - initialize top level trace array
8862  *
8863  * This is called when creating files or directories in the tracing
8864  * directory. It is called via fs_initcall() by any of the boot up code
8865  * and expects to return the dentry of the top level tracing directory.
8866  */
8867 struct dentry *tracing_init_dentry(void)
8868 {
8869         struct trace_array *tr = &global_trace;
8870
8871         if (security_locked_down(LOCKDOWN_TRACEFS)) {
8872                 pr_warn("Tracing disabled due to lockdown\n");
8873                 return ERR_PTR(-EPERM);
8874         }
8875
8876         /* The top level trace array uses  NULL as parent */
8877         if (tr->dir)
8878                 return NULL;
8879
8880         if (WARN_ON(!tracefs_initialized()) ||
8881                 (IS_ENABLED(CONFIG_DEBUG_FS) &&
8882                  WARN_ON(!debugfs_initialized())))
8883                 return ERR_PTR(-ENODEV);
8884
8885         /*
8886          * As there may still be users that expect the tracing
8887          * files to exist in debugfs/tracing, we must automount
8888          * the tracefs file system there, so older tools still
8889          * work with the newer kerenl.
8890          */
8891         tr->dir = debugfs_create_automount("tracing", NULL,
8892                                            trace_automount, NULL);
8893
8894         return NULL;
8895 }
8896
8897 extern struct trace_eval_map *__start_ftrace_eval_maps[];
8898 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
8899
8900 static void __init trace_eval_init(void)
8901 {
8902         int len;
8903
8904         len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
8905         trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
8906 }
8907
8908 #ifdef CONFIG_MODULES
8909 static void trace_module_add_evals(struct module *mod)
8910 {
8911         if (!mod->num_trace_evals)
8912                 return;
8913
8914         /*
8915          * Modules with bad taint do not have events created, do
8916          * not bother with enums either.
8917          */
8918         if (trace_module_has_bad_taint(mod))
8919                 return;
8920
8921         trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
8922 }
8923
8924 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
8925 static void trace_module_remove_evals(struct module *mod)
8926 {
8927         union trace_eval_map_item *map;
8928         union trace_eval_map_item **last = &trace_eval_maps;
8929
8930         if (!mod->num_trace_evals)
8931                 return;
8932
8933         mutex_lock(&trace_eval_mutex);
8934
8935         map = trace_eval_maps;
8936
8937         while (map) {
8938                 if (map->head.mod == mod)
8939                         break;
8940                 map = trace_eval_jmp_to_tail(map);
8941                 last = &map->tail.next;
8942                 map = map->tail.next;
8943         }
8944         if (!map)
8945                 goto out;
8946
8947         *last = trace_eval_jmp_to_tail(map)->tail.next;
8948         kfree(map);
8949  out:
8950         mutex_unlock(&trace_eval_mutex);
8951 }
8952 #else
8953 static inline void trace_module_remove_evals(struct module *mod) { }
8954 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
8955
8956 static int trace_module_notify(struct notifier_block *self,
8957                                unsigned long val, void *data)
8958 {
8959         struct module *mod = data;
8960
8961         switch (val) {
8962         case MODULE_STATE_COMING:
8963                 trace_module_add_evals(mod);
8964                 break;
8965         case MODULE_STATE_GOING:
8966                 trace_module_remove_evals(mod);
8967                 break;
8968         }
8969
8970         return 0;
8971 }
8972
8973 static struct notifier_block trace_module_nb = {
8974         .notifier_call = trace_module_notify,
8975         .priority = 0,
8976 };
8977 #endif /* CONFIG_MODULES */
8978
8979 static __init int tracer_init_tracefs(void)
8980 {
8981         struct dentry *d_tracer;
8982
8983         trace_access_lock_init();
8984
8985         d_tracer = tracing_init_dentry();
8986         if (IS_ERR(d_tracer))
8987                 return 0;
8988
8989         event_trace_init();
8990
8991         init_tracer_tracefs(&global_trace, d_tracer);
8992         ftrace_init_tracefs_toplevel(&global_trace, d_tracer);
8993
8994         trace_create_file("tracing_thresh", 0644, d_tracer,
8995                         &global_trace, &tracing_thresh_fops);
8996
8997         trace_create_file("README", 0444, d_tracer,
8998                         NULL, &tracing_readme_fops);
8999
9000         trace_create_file("saved_cmdlines", 0444, d_tracer,
9001                         NULL, &tracing_saved_cmdlines_fops);
9002
9003         trace_create_file("saved_cmdlines_size", 0644, d_tracer,
9004                           NULL, &tracing_saved_cmdlines_size_fops);
9005
9006         trace_create_file("saved_tgids", 0444, d_tracer,
9007                         NULL, &tracing_saved_tgids_fops);
9008
9009         trace_eval_init();
9010
9011         trace_create_eval_file(d_tracer);
9012
9013 #ifdef CONFIG_MODULES
9014         register_module_notifier(&trace_module_nb);
9015 #endif
9016
9017 #ifdef CONFIG_DYNAMIC_FTRACE
9018         trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
9019                         NULL, &tracing_dyn_info_fops);
9020 #endif
9021
9022         create_trace_instances(d_tracer);
9023
9024         update_tracer_options(&global_trace);
9025
9026         return 0;
9027 }
9028
9029 static int trace_panic_handler(struct notifier_block *this,
9030                                unsigned long event, void *unused)
9031 {
9032         if (ftrace_dump_on_oops)
9033                 ftrace_dump(ftrace_dump_on_oops);
9034         return NOTIFY_OK;
9035 }
9036
9037 static struct notifier_block trace_panic_notifier = {
9038         .notifier_call  = trace_panic_handler,
9039         .next           = NULL,
9040         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
9041 };
9042
9043 static int trace_die_handler(struct notifier_block *self,
9044                              unsigned long val,
9045                              void *data)
9046 {
9047         switch (val) {
9048         case DIE_OOPS:
9049                 if (ftrace_dump_on_oops)
9050                         ftrace_dump(ftrace_dump_on_oops);
9051                 break;
9052         default:
9053                 break;
9054         }
9055         return NOTIFY_OK;
9056 }
9057
9058 static struct notifier_block trace_die_notifier = {
9059         .notifier_call = trace_die_handler,
9060         .priority = 200
9061 };
9062
9063 /*
9064  * printk is set to max of 1024, we really don't need it that big.
9065  * Nothing should be printing 1000 characters anyway.
9066  */
9067 #define TRACE_MAX_PRINT         1000
9068
9069 /*
9070  * Define here KERN_TRACE so that we have one place to modify
9071  * it if we decide to change what log level the ftrace dump
9072  * should be at.
9073  */
9074 #define KERN_TRACE              KERN_EMERG
9075
9076 void
9077 trace_printk_seq(struct trace_seq *s)
9078 {
9079         /* Probably should print a warning here. */
9080         if (s->seq.len >= TRACE_MAX_PRINT)
9081                 s->seq.len = TRACE_MAX_PRINT;
9082
9083         /*
9084          * More paranoid code. Although the buffer size is set to
9085          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
9086          * an extra layer of protection.
9087          */
9088         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
9089                 s->seq.len = s->seq.size - 1;
9090
9091         /* should be zero ended, but we are paranoid. */
9092         s->buffer[s->seq.len] = 0;
9093
9094         printk(KERN_TRACE "%s", s->buffer);
9095
9096         trace_seq_init(s);
9097 }
9098
9099 void trace_init_global_iter(struct trace_iterator *iter)
9100 {
9101         iter->tr = &global_trace;
9102         iter->trace = iter->tr->current_trace;
9103         iter->cpu_file = RING_BUFFER_ALL_CPUS;
9104         iter->array_buffer = &global_trace.array_buffer;
9105
9106         if (iter->trace && iter->trace->open)
9107                 iter->trace->open(iter);
9108
9109         /* Annotate start of buffers if we had overruns */
9110         if (ring_buffer_overruns(iter->array_buffer->buffer))
9111                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
9112
9113         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
9114         if (trace_clocks[iter->tr->clock_id].in_ns)
9115                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
9116 }
9117
9118 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
9119 {
9120         /* use static because iter can be a bit big for the stack */
9121         static struct trace_iterator iter;
9122         static atomic_t dump_running;
9123         struct trace_array *tr = &global_trace;
9124         unsigned int old_userobj;
9125         unsigned long flags;
9126         int cnt = 0, cpu;
9127
9128         /* Only allow one dump user at a time. */
9129         if (atomic_inc_return(&dump_running) != 1) {
9130                 atomic_dec(&dump_running);
9131                 return;
9132         }
9133
9134         /*
9135          * Always turn off tracing when we dump.
9136          * We don't need to show trace output of what happens
9137          * between multiple crashes.
9138          *
9139          * If the user does a sysrq-z, then they can re-enable
9140          * tracing with echo 1 > tracing_on.
9141          */
9142         tracing_off();
9143
9144         local_irq_save(flags);
9145         printk_nmi_direct_enter();
9146
9147         /* Simulate the iterator */
9148         trace_init_global_iter(&iter);
9149
9150         for_each_tracing_cpu(cpu) {
9151                 atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9152         }
9153
9154         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
9155
9156         /* don't look at user memory in panic mode */
9157         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
9158
9159         switch (oops_dump_mode) {
9160         case DUMP_ALL:
9161                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9162                 break;
9163         case DUMP_ORIG:
9164                 iter.cpu_file = raw_smp_processor_id();
9165                 break;
9166         case DUMP_NONE:
9167                 goto out_enable;
9168         default:
9169                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
9170                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9171         }
9172
9173         printk(KERN_TRACE "Dumping ftrace buffer:\n");
9174
9175         /* Did function tracer already get disabled? */
9176         if (ftrace_is_dead()) {
9177                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
9178                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
9179         }
9180
9181         /*
9182          * We need to stop all tracing on all CPUS to read the
9183          * the next buffer. This is a bit expensive, but is
9184          * not done often. We fill all what we can read,
9185          * and then release the locks again.
9186          */
9187
9188         while (!trace_empty(&iter)) {
9189
9190                 if (!cnt)
9191                         printk(KERN_TRACE "---------------------------------\n");
9192
9193                 cnt++;
9194
9195                 trace_iterator_reset(&iter);
9196                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
9197
9198                 if (trace_find_next_entry_inc(&iter) != NULL) {
9199                         int ret;
9200
9201                         ret = print_trace_line(&iter);
9202                         if (ret != TRACE_TYPE_NO_CONSUME)
9203                                 trace_consume(&iter);
9204                 }
9205                 touch_nmi_watchdog();
9206
9207                 trace_printk_seq(&iter.seq);
9208         }
9209
9210         if (!cnt)
9211                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
9212         else
9213                 printk(KERN_TRACE "---------------------------------\n");
9214
9215  out_enable:
9216         tr->trace_flags |= old_userobj;
9217
9218         for_each_tracing_cpu(cpu) {
9219                 atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9220         }
9221         atomic_dec(&dump_running);
9222         printk_nmi_direct_exit();
9223         local_irq_restore(flags);
9224 }
9225 EXPORT_SYMBOL_GPL(ftrace_dump);
9226
9227 int trace_run_command(const char *buf, int (*createfn)(int, char **))
9228 {
9229         char **argv;
9230         int argc, ret;
9231
9232         argc = 0;
9233         ret = 0;
9234         argv = argv_split(GFP_KERNEL, buf, &argc);
9235         if (!argv)
9236                 return -ENOMEM;
9237
9238         if (argc)
9239                 ret = createfn(argc, argv);
9240
9241         argv_free(argv);
9242
9243         return ret;
9244 }
9245
9246 #define WRITE_BUFSIZE  4096
9247
9248 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
9249                                 size_t count, loff_t *ppos,
9250                                 int (*createfn)(int, char **))
9251 {
9252         char *kbuf, *buf, *tmp;
9253         int ret = 0;
9254         size_t done = 0;
9255         size_t size;
9256
9257         kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
9258         if (!kbuf)
9259                 return -ENOMEM;
9260
9261         while (done < count) {
9262                 size = count - done;
9263
9264                 if (size >= WRITE_BUFSIZE)
9265                         size = WRITE_BUFSIZE - 1;
9266
9267                 if (copy_from_user(kbuf, buffer + done, size)) {
9268                         ret = -EFAULT;
9269                         goto out;
9270                 }
9271                 kbuf[size] = '\0';
9272                 buf = kbuf;
9273                 do {
9274                         tmp = strchr(buf, '\n');
9275                         if (tmp) {
9276                                 *tmp = '\0';
9277                                 size = tmp - buf + 1;
9278                         } else {
9279                                 size = strlen(buf);
9280                                 if (done + size < count) {
9281                                         if (buf != kbuf)
9282                                                 break;
9283                                         /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
9284                                         pr_warn("Line length is too long: Should be less than %d\n",
9285                                                 WRITE_BUFSIZE - 2);
9286                                         ret = -EINVAL;
9287                                         goto out;
9288                                 }
9289                         }
9290                         done += size;
9291
9292                         /* Remove comments */
9293                         tmp = strchr(buf, '#');
9294
9295                         if (tmp)
9296                                 *tmp = '\0';
9297
9298                         ret = trace_run_command(buf, createfn);
9299                         if (ret)
9300                                 goto out;
9301                         buf += size;
9302
9303                 } while (done < count);
9304         }
9305         ret = done;
9306
9307 out:
9308         kfree(kbuf);
9309
9310         return ret;
9311 }
9312
9313 __init static int tracer_alloc_buffers(void)
9314 {
9315         int ring_buf_size;
9316         int ret = -ENOMEM;
9317
9318
9319         if (security_locked_down(LOCKDOWN_TRACEFS)) {
9320                 pr_warn("Tracing disabled due to lockdown\n");
9321                 return -EPERM;
9322         }
9323
9324         /*
9325          * Make sure we don't accidently add more trace options
9326          * than we have bits for.
9327          */
9328         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
9329
9330         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
9331                 goto out;
9332
9333         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
9334                 goto out_free_buffer_mask;
9335
9336         /* Only allocate trace_printk buffers if a trace_printk exists */
9337         if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
9338                 /* Must be called before global_trace.buffer is allocated */
9339                 trace_printk_init_buffers();
9340
9341         /* To save memory, keep the ring buffer size to its minimum */
9342         if (ring_buffer_expanded)
9343                 ring_buf_size = trace_buf_size;
9344         else
9345                 ring_buf_size = 1;
9346
9347         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
9348         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
9349
9350         raw_spin_lock_init(&global_trace.start_lock);
9351
9352         /*
9353          * The prepare callbacks allocates some memory for the ring buffer. We
9354          * don't free the buffer if the if the CPU goes down. If we were to free
9355          * the buffer, then the user would lose any trace that was in the
9356          * buffer. The memory will be removed once the "instance" is removed.
9357          */
9358         ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
9359                                       "trace/RB:preapre", trace_rb_cpu_prepare,
9360                                       NULL);
9361         if (ret < 0)
9362                 goto out_free_cpumask;
9363         /* Used for event triggers */
9364         ret = -ENOMEM;
9365         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
9366         if (!temp_buffer)
9367                 goto out_rm_hp_state;
9368
9369         if (trace_create_savedcmd() < 0)
9370                 goto out_free_temp_buffer;
9371
9372         /* TODO: make the number of buffers hot pluggable with CPUS */
9373         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
9374                 MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
9375                 goto out_free_savedcmd;
9376         }
9377
9378         if (global_trace.buffer_disabled)
9379                 tracing_off();
9380
9381         if (trace_boot_clock) {
9382                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
9383                 if (ret < 0)
9384                         pr_warn("Trace clock %s not defined, going back to default\n",
9385                                 trace_boot_clock);
9386         }
9387
9388         /*
9389          * register_tracer() might reference current_trace, so it
9390          * needs to be set before we register anything. This is
9391          * just a bootstrap of current_trace anyway.
9392          */
9393         global_trace.current_trace = &nop_trace;
9394
9395         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9396
9397         ftrace_init_global_array_ops(&global_trace);
9398
9399         init_trace_flags_index(&global_trace);
9400
9401         register_tracer(&nop_trace);
9402
9403         /* Function tracing may start here (via kernel command line) */
9404         init_function_trace();
9405
9406         /* All seems OK, enable tracing */
9407         tracing_disabled = 0;
9408
9409         atomic_notifier_chain_register(&panic_notifier_list,
9410                                        &trace_panic_notifier);
9411
9412         register_die_notifier(&trace_die_notifier);
9413
9414         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
9415
9416         INIT_LIST_HEAD(&global_trace.systems);
9417         INIT_LIST_HEAD(&global_trace.events);
9418         INIT_LIST_HEAD(&global_trace.hist_vars);
9419         INIT_LIST_HEAD(&global_trace.err_log);
9420         list_add(&global_trace.list, &ftrace_trace_arrays);
9421
9422         apply_trace_boot_options();
9423
9424         register_snapshot_cmd();
9425
9426         return 0;
9427
9428 out_free_savedcmd:
9429         free_saved_cmdlines_buffer(savedcmd);
9430 out_free_temp_buffer:
9431         ring_buffer_free(temp_buffer);
9432 out_rm_hp_state:
9433         cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
9434 out_free_cpumask:
9435         free_cpumask_var(global_trace.tracing_cpumask);
9436 out_free_buffer_mask:
9437         free_cpumask_var(tracing_buffer_mask);
9438 out:
9439         return ret;
9440 }
9441
9442 void __init early_trace_init(void)
9443 {
9444         if (tracepoint_printk) {
9445                 tracepoint_print_iter =
9446                         kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
9447                 if (MEM_FAIL(!tracepoint_print_iter,
9448                              "Failed to allocate trace iterator\n"))
9449                         tracepoint_printk = 0;
9450                 else
9451                         static_key_enable(&tracepoint_printk_key.key);
9452         }
9453         tracer_alloc_buffers();
9454 }
9455
9456 void __init trace_init(void)
9457 {
9458         trace_event_init();
9459 }
9460
9461 __init static int clear_boot_tracer(void)
9462 {
9463         /*
9464          * The default tracer at boot buffer is an init section.
9465          * This function is called in lateinit. If we did not
9466          * find the boot tracer, then clear it out, to prevent
9467          * later registration from accessing the buffer that is
9468          * about to be freed.
9469          */
9470         if (!default_bootup_tracer)
9471                 return 0;
9472
9473         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
9474                default_bootup_tracer);
9475         default_bootup_tracer = NULL;
9476
9477         return 0;
9478 }
9479
9480 fs_initcall(tracer_init_tracefs);
9481 late_initcall_sync(clear_boot_tracer);
9482
9483 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
9484 __init static int tracing_set_default_clock(void)
9485 {
9486         /* sched_clock_stable() is determined in late_initcall */
9487         if (!trace_boot_clock && !sched_clock_stable()) {
9488                 if (security_locked_down(LOCKDOWN_TRACEFS)) {
9489                         pr_warn("Can not set tracing clock due to lockdown\n");
9490                         return -EPERM;
9491                 }
9492
9493                 printk(KERN_WARNING
9494                        "Unstable clock detected, switching default tracing clock to \"global\"\n"
9495                        "If you want to keep using the local clock, then add:\n"
9496                        "  \"trace_clock=local\"\n"
9497                        "on the kernel command line\n");
9498                 tracing_set_clock(&global_trace, "global");
9499         }
9500
9501         return 0;
9502 }
9503 late_initcall_sync(tracing_set_default_clock);
9504 #endif