OSDN Git Service

tracing: Use swap macro in update_max_tr
[tomoyo/tomoyo-test1.git] / kernel / trace / trace.c
1 /*
2  * ring buffer based function tracer
3  *
4  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
5  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6  *
7  * Originally taken from the RT patch by:
8  *    Arnaldo Carvalho de Melo <acme@redhat.com>
9  *
10  * Based on code from the latency_tracer, that is:
11  *  Copyright (C) 2004-2006 Ingo Molnar
12  *  Copyright (C) 2004 Nadia Yvette Chambers
13  */
14 #include <linux/ring_buffer.h>
15 #include <generated/utsrelease.h>
16 #include <linux/stacktrace.h>
17 #include <linux/writeback.h>
18 #include <linux/kallsyms.h>
19 #include <linux/seq_file.h>
20 #include <linux/notifier.h>
21 #include <linux/irqflags.h>
22 #include <linux/debugfs.h>
23 #include <linux/tracefs.h>
24 #include <linux/pagemap.h>
25 #include <linux/hardirq.h>
26 #include <linux/linkage.h>
27 #include <linux/uaccess.h>
28 #include <linux/vmalloc.h>
29 #include <linux/ftrace.h>
30 #include <linux/module.h>
31 #include <linux/percpu.h>
32 #include <linux/splice.h>
33 #include <linux/kdebug.h>
34 #include <linux/string.h>
35 #include <linux/mount.h>
36 #include <linux/rwsem.h>
37 #include <linux/slab.h>
38 #include <linux/ctype.h>
39 #include <linux/init.h>
40 #include <linux/poll.h>
41 #include <linux/nmi.h>
42 #include <linux/fs.h>
43 #include <linux/trace.h>
44 #include <linux/sched/clock.h>
45 #include <linux/sched/rt.h>
46
47 #include "trace.h"
48 #include "trace_output.h"
49
50 /*
51  * On boot up, the ring buffer is set to the minimum size, so that
52  * we do not waste memory on systems that are not using tracing.
53  */
54 bool ring_buffer_expanded;
55
56 /*
57  * We need to change this state when a selftest is running.
58  * A selftest will lurk into the ring-buffer to count the
59  * entries inserted during the selftest although some concurrent
60  * insertions into the ring-buffer such as trace_printk could occurred
61  * at the same time, giving false positive or negative results.
62  */
63 static bool __read_mostly tracing_selftest_running;
64
65 /*
66  * If a tracer is running, we do not want to run SELFTEST.
67  */
68 bool __read_mostly tracing_selftest_disabled;
69
70 /* Pipe tracepoints to printk */
71 struct trace_iterator *tracepoint_print_iter;
72 int tracepoint_printk;
73 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
74
75 /* For tracers that don't implement custom flags */
76 static struct tracer_opt dummy_tracer_opt[] = {
77         { }
78 };
79
80 static int
81 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
82 {
83         return 0;
84 }
85
86 /*
87  * To prevent the comm cache from being overwritten when no
88  * tracing is active, only save the comm when a trace event
89  * occurred.
90  */
91 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
92
93 /*
94  * Kill all tracing for good (never come back).
95  * It is initialized to 1 but will turn to zero if the initialization
96  * of the tracer is successful. But that is the only place that sets
97  * this back to zero.
98  */
99 static int tracing_disabled = 1;
100
101 cpumask_var_t __read_mostly     tracing_buffer_mask;
102
103 /*
104  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
105  *
106  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
107  * is set, then ftrace_dump is called. This will output the contents
108  * of the ftrace buffers to the console.  This is very useful for
109  * capturing traces that lead to crashes and outputing it to a
110  * serial console.
111  *
112  * It is default off, but you can enable it with either specifying
113  * "ftrace_dump_on_oops" in the kernel command line, or setting
114  * /proc/sys/kernel/ftrace_dump_on_oops
115  * Set 1 if you want to dump buffers of all CPUs
116  * Set 2 if you want to dump the buffer of the CPU that triggered oops
117  */
118
119 enum ftrace_dump_mode ftrace_dump_on_oops;
120
121 /* When set, tracing will stop when a WARN*() is hit */
122 int __disable_trace_on_warning;
123
124 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
125 /* Map of enums to their values, for "eval_map" file */
126 struct trace_eval_map_head {
127         struct module                   *mod;
128         unsigned long                   length;
129 };
130
131 union trace_eval_map_item;
132
133 struct trace_eval_map_tail {
134         /*
135          * "end" is first and points to NULL as it must be different
136          * than "mod" or "eval_string"
137          */
138         union trace_eval_map_item       *next;
139         const char                      *end;   /* points to NULL */
140 };
141
142 static DEFINE_MUTEX(trace_eval_mutex);
143
144 /*
145  * The trace_eval_maps are saved in an array with two extra elements,
146  * one at the beginning, and one at the end. The beginning item contains
147  * the count of the saved maps (head.length), and the module they
148  * belong to if not built in (head.mod). The ending item contains a
149  * pointer to the next array of saved eval_map items.
150  */
151 union trace_eval_map_item {
152         struct trace_eval_map           map;
153         struct trace_eval_map_head      head;
154         struct trace_eval_map_tail      tail;
155 };
156
157 static union trace_eval_map_item *trace_eval_maps;
158 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
159
160 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
161
162 #define MAX_TRACER_SIZE         100
163 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
164 static char *default_bootup_tracer;
165
166 static bool allocate_snapshot;
167
168 static int __init set_cmdline_ftrace(char *str)
169 {
170         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
171         default_bootup_tracer = bootup_tracer_buf;
172         /* We are using ftrace early, expand it */
173         ring_buffer_expanded = true;
174         return 1;
175 }
176 __setup("ftrace=", set_cmdline_ftrace);
177
178 static int __init set_ftrace_dump_on_oops(char *str)
179 {
180         if (*str++ != '=' || !*str) {
181                 ftrace_dump_on_oops = DUMP_ALL;
182                 return 1;
183         }
184
185         if (!strcmp("orig_cpu", str)) {
186                 ftrace_dump_on_oops = DUMP_ORIG;
187                 return 1;
188         }
189
190         return 0;
191 }
192 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
193
194 static int __init stop_trace_on_warning(char *str)
195 {
196         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
197                 __disable_trace_on_warning = 1;
198         return 1;
199 }
200 __setup("traceoff_on_warning", stop_trace_on_warning);
201
202 static int __init boot_alloc_snapshot(char *str)
203 {
204         allocate_snapshot = true;
205         /* We also need the main ring buffer expanded */
206         ring_buffer_expanded = true;
207         return 1;
208 }
209 __setup("alloc_snapshot", boot_alloc_snapshot);
210
211
212 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
213
214 static int __init set_trace_boot_options(char *str)
215 {
216         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
217         return 0;
218 }
219 __setup("trace_options=", set_trace_boot_options);
220
221 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
222 static char *trace_boot_clock __initdata;
223
224 static int __init set_trace_boot_clock(char *str)
225 {
226         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
227         trace_boot_clock = trace_boot_clock_buf;
228         return 0;
229 }
230 __setup("trace_clock=", set_trace_boot_clock);
231
232 static int __init set_tracepoint_printk(char *str)
233 {
234         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
235                 tracepoint_printk = 1;
236         return 1;
237 }
238 __setup("tp_printk", set_tracepoint_printk);
239
240 unsigned long long ns2usecs(u64 nsec)
241 {
242         nsec += 500;
243         do_div(nsec, 1000);
244         return nsec;
245 }
246
247 /* trace_flags holds trace_options default values */
248 #define TRACE_DEFAULT_FLAGS                                             \
249         (FUNCTION_DEFAULT_FLAGS |                                       \
250          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
251          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
252          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
253          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
254
255 /* trace_options that are only supported by global_trace */
256 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
257                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
258
259 /* trace_flags that are default zero for instances */
260 #define ZEROED_TRACE_FLAGS \
261         (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
262
263 /*
264  * The global_trace is the descriptor that holds the top-level tracing
265  * buffers for the live tracing.
266  */
267 static struct trace_array global_trace = {
268         .trace_flags = TRACE_DEFAULT_FLAGS,
269 };
270
271 LIST_HEAD(ftrace_trace_arrays);
272
273 int trace_array_get(struct trace_array *this_tr)
274 {
275         struct trace_array *tr;
276         int ret = -ENODEV;
277
278         mutex_lock(&trace_types_lock);
279         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
280                 if (tr == this_tr) {
281                         tr->ref++;
282                         ret = 0;
283                         break;
284                 }
285         }
286         mutex_unlock(&trace_types_lock);
287
288         return ret;
289 }
290
291 static void __trace_array_put(struct trace_array *this_tr)
292 {
293         WARN_ON(!this_tr->ref);
294         this_tr->ref--;
295 }
296
297 void trace_array_put(struct trace_array *this_tr)
298 {
299         mutex_lock(&trace_types_lock);
300         __trace_array_put(this_tr);
301         mutex_unlock(&trace_types_lock);
302 }
303
304 int call_filter_check_discard(struct trace_event_call *call, void *rec,
305                               struct ring_buffer *buffer,
306                               struct ring_buffer_event *event)
307 {
308         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
309             !filter_match_preds(call->filter, rec)) {
310                 __trace_event_discard_commit(buffer, event);
311                 return 1;
312         }
313
314         return 0;
315 }
316
317 void trace_free_pid_list(struct trace_pid_list *pid_list)
318 {
319         vfree(pid_list->pids);
320         kfree(pid_list);
321 }
322
323 /**
324  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
325  * @filtered_pids: The list of pids to check
326  * @search_pid: The PID to find in @filtered_pids
327  *
328  * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
329  */
330 bool
331 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
332 {
333         /*
334          * If pid_max changed after filtered_pids was created, we
335          * by default ignore all pids greater than the previous pid_max.
336          */
337         if (search_pid >= filtered_pids->pid_max)
338                 return false;
339
340         return test_bit(search_pid, filtered_pids->pids);
341 }
342
343 /**
344  * trace_ignore_this_task - should a task be ignored for tracing
345  * @filtered_pids: The list of pids to check
346  * @task: The task that should be ignored if not filtered
347  *
348  * Checks if @task should be traced or not from @filtered_pids.
349  * Returns true if @task should *NOT* be traced.
350  * Returns false if @task should be traced.
351  */
352 bool
353 trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
354 {
355         /*
356          * Return false, because if filtered_pids does not exist,
357          * all pids are good to trace.
358          */
359         if (!filtered_pids)
360                 return false;
361
362         return !trace_find_filtered_pid(filtered_pids, task->pid);
363 }
364
365 /**
366  * trace_pid_filter_add_remove_task - Add or remove a task from a pid_list
367  * @pid_list: The list to modify
368  * @self: The current task for fork or NULL for exit
369  * @task: The task to add or remove
370  *
371  * If adding a task, if @self is defined, the task is only added if @self
372  * is also included in @pid_list. This happens on fork and tasks should
373  * only be added when the parent is listed. If @self is NULL, then the
374  * @task pid will be removed from the list, which would happen on exit
375  * of a task.
376  */
377 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
378                                   struct task_struct *self,
379                                   struct task_struct *task)
380 {
381         if (!pid_list)
382                 return;
383
384         /* For forks, we only add if the forking task is listed */
385         if (self) {
386                 if (!trace_find_filtered_pid(pid_list, self->pid))
387                         return;
388         }
389
390         /* Sorry, but we don't support pid_max changing after setting */
391         if (task->pid >= pid_list->pid_max)
392                 return;
393
394         /* "self" is set for forks, and NULL for exits */
395         if (self)
396                 set_bit(task->pid, pid_list->pids);
397         else
398                 clear_bit(task->pid, pid_list->pids);
399 }
400
401 /**
402  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
403  * @pid_list: The pid list to show
404  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
405  * @pos: The position of the file
406  *
407  * This is used by the seq_file "next" operation to iterate the pids
408  * listed in a trace_pid_list structure.
409  *
410  * Returns the pid+1 as we want to display pid of zero, but NULL would
411  * stop the iteration.
412  */
413 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
414 {
415         unsigned long pid = (unsigned long)v;
416
417         (*pos)++;
418
419         /* pid already is +1 of the actual prevous bit */
420         pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
421
422         /* Return pid + 1 to allow zero to be represented */
423         if (pid < pid_list->pid_max)
424                 return (void *)(pid + 1);
425
426         return NULL;
427 }
428
429 /**
430  * trace_pid_start - Used for seq_file to start reading pid lists
431  * @pid_list: The pid list to show
432  * @pos: The position of the file
433  *
434  * This is used by seq_file "start" operation to start the iteration
435  * of listing pids.
436  *
437  * Returns the pid+1 as we want to display pid of zero, but NULL would
438  * stop the iteration.
439  */
440 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
441 {
442         unsigned long pid;
443         loff_t l = 0;
444
445         pid = find_first_bit(pid_list->pids, pid_list->pid_max);
446         if (pid >= pid_list->pid_max)
447                 return NULL;
448
449         /* Return pid + 1 so that zero can be the exit value */
450         for (pid++; pid && l < *pos;
451              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
452                 ;
453         return (void *)pid;
454 }
455
456 /**
457  * trace_pid_show - show the current pid in seq_file processing
458  * @m: The seq_file structure to write into
459  * @v: A void pointer of the pid (+1) value to display
460  *
461  * Can be directly used by seq_file operations to display the current
462  * pid value.
463  */
464 int trace_pid_show(struct seq_file *m, void *v)
465 {
466         unsigned long pid = (unsigned long)v - 1;
467
468         seq_printf(m, "%lu\n", pid);
469         return 0;
470 }
471
472 /* 128 should be much more than enough */
473 #define PID_BUF_SIZE            127
474
475 int trace_pid_write(struct trace_pid_list *filtered_pids,
476                     struct trace_pid_list **new_pid_list,
477                     const char __user *ubuf, size_t cnt)
478 {
479         struct trace_pid_list *pid_list;
480         struct trace_parser parser;
481         unsigned long val;
482         int nr_pids = 0;
483         ssize_t read = 0;
484         ssize_t ret = 0;
485         loff_t pos;
486         pid_t pid;
487
488         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
489                 return -ENOMEM;
490
491         /*
492          * Always recreate a new array. The write is an all or nothing
493          * operation. Always create a new array when adding new pids by
494          * the user. If the operation fails, then the current list is
495          * not modified.
496          */
497         pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
498         if (!pid_list)
499                 return -ENOMEM;
500
501         pid_list->pid_max = READ_ONCE(pid_max);
502
503         /* Only truncating will shrink pid_max */
504         if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
505                 pid_list->pid_max = filtered_pids->pid_max;
506
507         pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
508         if (!pid_list->pids) {
509                 kfree(pid_list);
510                 return -ENOMEM;
511         }
512
513         if (filtered_pids) {
514                 /* copy the current bits to the new max */
515                 for_each_set_bit(pid, filtered_pids->pids,
516                                  filtered_pids->pid_max) {
517                         set_bit(pid, pid_list->pids);
518                         nr_pids++;
519                 }
520         }
521
522         while (cnt > 0) {
523
524                 pos = 0;
525
526                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
527                 if (ret < 0 || !trace_parser_loaded(&parser))
528                         break;
529
530                 read += ret;
531                 ubuf += ret;
532                 cnt -= ret;
533
534                 ret = -EINVAL;
535                 if (kstrtoul(parser.buffer, 0, &val))
536                         break;
537                 if (val >= pid_list->pid_max)
538                         break;
539
540                 pid = (pid_t)val;
541
542                 set_bit(pid, pid_list->pids);
543                 nr_pids++;
544
545                 trace_parser_clear(&parser);
546                 ret = 0;
547         }
548         trace_parser_put(&parser);
549
550         if (ret < 0) {
551                 trace_free_pid_list(pid_list);
552                 return ret;
553         }
554
555         if (!nr_pids) {
556                 /* Cleared the list of pids */
557                 trace_free_pid_list(pid_list);
558                 read = ret;
559                 pid_list = NULL;
560         }
561
562         *new_pid_list = pid_list;
563
564         return read;
565 }
566
567 static u64 buffer_ftrace_now(struct trace_buffer *buf, int cpu)
568 {
569         u64 ts;
570
571         /* Early boot up does not have a buffer yet */
572         if (!buf->buffer)
573                 return trace_clock_local();
574
575         ts = ring_buffer_time_stamp(buf->buffer, cpu);
576         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
577
578         return ts;
579 }
580
581 u64 ftrace_now(int cpu)
582 {
583         return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
584 }
585
586 /**
587  * tracing_is_enabled - Show if global_trace has been disabled
588  *
589  * Shows if the global trace has been enabled or not. It uses the
590  * mirror flag "buffer_disabled" to be used in fast paths such as for
591  * the irqsoff tracer. But it may be inaccurate due to races. If you
592  * need to know the accurate state, use tracing_is_on() which is a little
593  * slower, but accurate.
594  */
595 int tracing_is_enabled(void)
596 {
597         /*
598          * For quick access (irqsoff uses this in fast path), just
599          * return the mirror variable of the state of the ring buffer.
600          * It's a little racy, but we don't really care.
601          */
602         smp_rmb();
603         return !global_trace.buffer_disabled;
604 }
605
606 /*
607  * trace_buf_size is the size in bytes that is allocated
608  * for a buffer. Note, the number of bytes is always rounded
609  * to page size.
610  *
611  * This number is purposely set to a low number of 16384.
612  * If the dump on oops happens, it will be much appreciated
613  * to not have to wait for all that output. Anyway this can be
614  * boot time and run time configurable.
615  */
616 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
617
618 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
619
620 /* trace_types holds a link list of available tracers. */
621 static struct tracer            *trace_types __read_mostly;
622
623 /*
624  * trace_types_lock is used to protect the trace_types list.
625  */
626 DEFINE_MUTEX(trace_types_lock);
627
628 /*
629  * serialize the access of the ring buffer
630  *
631  * ring buffer serializes readers, but it is low level protection.
632  * The validity of the events (which returns by ring_buffer_peek() ..etc)
633  * are not protected by ring buffer.
634  *
635  * The content of events may become garbage if we allow other process consumes
636  * these events concurrently:
637  *   A) the page of the consumed events may become a normal page
638  *      (not reader page) in ring buffer, and this page will be rewrited
639  *      by events producer.
640  *   B) The page of the consumed events may become a page for splice_read,
641  *      and this page will be returned to system.
642  *
643  * These primitives allow multi process access to different cpu ring buffer
644  * concurrently.
645  *
646  * These primitives don't distinguish read-only and read-consume access.
647  * Multi read-only access are also serialized.
648  */
649
650 #ifdef CONFIG_SMP
651 static DECLARE_RWSEM(all_cpu_access_lock);
652 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
653
654 static inline void trace_access_lock(int cpu)
655 {
656         if (cpu == RING_BUFFER_ALL_CPUS) {
657                 /* gain it for accessing the whole ring buffer. */
658                 down_write(&all_cpu_access_lock);
659         } else {
660                 /* gain it for accessing a cpu ring buffer. */
661
662                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
663                 down_read(&all_cpu_access_lock);
664
665                 /* Secondly block other access to this @cpu ring buffer. */
666                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
667         }
668 }
669
670 static inline void trace_access_unlock(int cpu)
671 {
672         if (cpu == RING_BUFFER_ALL_CPUS) {
673                 up_write(&all_cpu_access_lock);
674         } else {
675                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
676                 up_read(&all_cpu_access_lock);
677         }
678 }
679
680 static inline void trace_access_lock_init(void)
681 {
682         int cpu;
683
684         for_each_possible_cpu(cpu)
685                 mutex_init(&per_cpu(cpu_access_lock, cpu));
686 }
687
688 #else
689
690 static DEFINE_MUTEX(access_lock);
691
692 static inline void trace_access_lock(int cpu)
693 {
694         (void)cpu;
695         mutex_lock(&access_lock);
696 }
697
698 static inline void trace_access_unlock(int cpu)
699 {
700         (void)cpu;
701         mutex_unlock(&access_lock);
702 }
703
704 static inline void trace_access_lock_init(void)
705 {
706 }
707
708 #endif
709
710 #ifdef CONFIG_STACKTRACE
711 static void __ftrace_trace_stack(struct ring_buffer *buffer,
712                                  unsigned long flags,
713                                  int skip, int pc, struct pt_regs *regs);
714 static inline void ftrace_trace_stack(struct trace_array *tr,
715                                       struct ring_buffer *buffer,
716                                       unsigned long flags,
717                                       int skip, int pc, struct pt_regs *regs);
718
719 #else
720 static inline void __ftrace_trace_stack(struct ring_buffer *buffer,
721                                         unsigned long flags,
722                                         int skip, int pc, struct pt_regs *regs)
723 {
724 }
725 static inline void ftrace_trace_stack(struct trace_array *tr,
726                                       struct ring_buffer *buffer,
727                                       unsigned long flags,
728                                       int skip, int pc, struct pt_regs *regs)
729 {
730 }
731
732 #endif
733
734 static __always_inline void
735 trace_event_setup(struct ring_buffer_event *event,
736                   int type, unsigned long flags, int pc)
737 {
738         struct trace_entry *ent = ring_buffer_event_data(event);
739
740         tracing_generic_entry_update(ent, flags, pc);
741         ent->type = type;
742 }
743
744 static __always_inline struct ring_buffer_event *
745 __trace_buffer_lock_reserve(struct ring_buffer *buffer,
746                           int type,
747                           unsigned long len,
748                           unsigned long flags, int pc)
749 {
750         struct ring_buffer_event *event;
751
752         event = ring_buffer_lock_reserve(buffer, len);
753         if (event != NULL)
754                 trace_event_setup(event, type, flags, pc);
755
756         return event;
757 }
758
759 void tracer_tracing_on(struct trace_array *tr)
760 {
761         if (tr->trace_buffer.buffer)
762                 ring_buffer_record_on(tr->trace_buffer.buffer);
763         /*
764          * This flag is looked at when buffers haven't been allocated
765          * yet, or by some tracers (like irqsoff), that just want to
766          * know if the ring buffer has been disabled, but it can handle
767          * races of where it gets disabled but we still do a record.
768          * As the check is in the fast path of the tracers, it is more
769          * important to be fast than accurate.
770          */
771         tr->buffer_disabled = 0;
772         /* Make the flag seen by readers */
773         smp_wmb();
774 }
775
776 /**
777  * tracing_on - enable tracing buffers
778  *
779  * This function enables tracing buffers that may have been
780  * disabled with tracing_off.
781  */
782 void tracing_on(void)
783 {
784         tracer_tracing_on(&global_trace);
785 }
786 EXPORT_SYMBOL_GPL(tracing_on);
787
788
789 static __always_inline void
790 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
791 {
792         __this_cpu_write(trace_taskinfo_save, true);
793
794         /* If this is the temp buffer, we need to commit fully */
795         if (this_cpu_read(trace_buffered_event) == event) {
796                 /* Length is in event->array[0] */
797                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
798                 /* Release the temp buffer */
799                 this_cpu_dec(trace_buffered_event_cnt);
800         } else
801                 ring_buffer_unlock_commit(buffer, event);
802 }
803
804 /**
805  * __trace_puts - write a constant string into the trace buffer.
806  * @ip:    The address of the caller
807  * @str:   The constant string to write
808  * @size:  The size of the string.
809  */
810 int __trace_puts(unsigned long ip, const char *str, int size)
811 {
812         struct ring_buffer_event *event;
813         struct ring_buffer *buffer;
814         struct print_entry *entry;
815         unsigned long irq_flags;
816         int alloc;
817         int pc;
818
819         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
820                 return 0;
821
822         pc = preempt_count();
823
824         if (unlikely(tracing_selftest_running || tracing_disabled))
825                 return 0;
826
827         alloc = sizeof(*entry) + size + 2; /* possible \n added */
828
829         local_save_flags(irq_flags);
830         buffer = global_trace.trace_buffer.buffer;
831         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, 
832                                             irq_flags, pc);
833         if (!event)
834                 return 0;
835
836         entry = ring_buffer_event_data(event);
837         entry->ip = ip;
838
839         memcpy(&entry->buf, str, size);
840
841         /* Add a newline if necessary */
842         if (entry->buf[size - 1] != '\n') {
843                 entry->buf[size] = '\n';
844                 entry->buf[size + 1] = '\0';
845         } else
846                 entry->buf[size] = '\0';
847
848         __buffer_unlock_commit(buffer, event);
849         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
850
851         return size;
852 }
853 EXPORT_SYMBOL_GPL(__trace_puts);
854
855 /**
856  * __trace_bputs - write the pointer to a constant string into trace buffer
857  * @ip:    The address of the caller
858  * @str:   The constant string to write to the buffer to
859  */
860 int __trace_bputs(unsigned long ip, const char *str)
861 {
862         struct ring_buffer_event *event;
863         struct ring_buffer *buffer;
864         struct bputs_entry *entry;
865         unsigned long irq_flags;
866         int size = sizeof(struct bputs_entry);
867         int pc;
868
869         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
870                 return 0;
871
872         pc = preempt_count();
873
874         if (unlikely(tracing_selftest_running || tracing_disabled))
875                 return 0;
876
877         local_save_flags(irq_flags);
878         buffer = global_trace.trace_buffer.buffer;
879         event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
880                                             irq_flags, pc);
881         if (!event)
882                 return 0;
883
884         entry = ring_buffer_event_data(event);
885         entry->ip                       = ip;
886         entry->str                      = str;
887
888         __buffer_unlock_commit(buffer, event);
889         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
890
891         return 1;
892 }
893 EXPORT_SYMBOL_GPL(__trace_bputs);
894
895 #ifdef CONFIG_TRACER_SNAPSHOT
896 void tracing_snapshot_instance(struct trace_array *tr)
897 {
898         struct tracer *tracer = tr->current_trace;
899         unsigned long flags;
900
901         if (in_nmi()) {
902                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
903                 internal_trace_puts("*** snapshot is being ignored        ***\n");
904                 return;
905         }
906
907         if (!tr->allocated_snapshot) {
908                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
909                 internal_trace_puts("*** stopping trace here!   ***\n");
910                 tracing_off();
911                 return;
912         }
913
914         /* Note, snapshot can not be used when the tracer uses it */
915         if (tracer->use_max_tr) {
916                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
917                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
918                 return;
919         }
920
921         local_irq_save(flags);
922         update_max_tr(tr, current, smp_processor_id());
923         local_irq_restore(flags);
924 }
925
926 /**
927  * tracing_snapshot - take a snapshot of the current buffer.
928  *
929  * This causes a swap between the snapshot buffer and the current live
930  * tracing buffer. You can use this to take snapshots of the live
931  * trace when some condition is triggered, but continue to trace.
932  *
933  * Note, make sure to allocate the snapshot with either
934  * a tracing_snapshot_alloc(), or by doing it manually
935  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
936  *
937  * If the snapshot buffer is not allocated, it will stop tracing.
938  * Basically making a permanent snapshot.
939  */
940 void tracing_snapshot(void)
941 {
942         struct trace_array *tr = &global_trace;
943
944         tracing_snapshot_instance(tr);
945 }
946 EXPORT_SYMBOL_GPL(tracing_snapshot);
947
948 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
949                                         struct trace_buffer *size_buf, int cpu_id);
950 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
951
952 int tracing_alloc_snapshot_instance(struct trace_array *tr)
953 {
954         int ret;
955
956         if (!tr->allocated_snapshot) {
957
958                 /* allocate spare buffer */
959                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
960                                    &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
961                 if (ret < 0)
962                         return ret;
963
964                 tr->allocated_snapshot = true;
965         }
966
967         return 0;
968 }
969
970 static void free_snapshot(struct trace_array *tr)
971 {
972         /*
973          * We don't free the ring buffer. instead, resize it because
974          * The max_tr ring buffer has some state (e.g. ring->clock) and
975          * we want preserve it.
976          */
977         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
978         set_buffer_entries(&tr->max_buffer, 1);
979         tracing_reset_online_cpus(&tr->max_buffer);
980         tr->allocated_snapshot = false;
981 }
982
983 /**
984  * tracing_alloc_snapshot - allocate snapshot buffer.
985  *
986  * This only allocates the snapshot buffer if it isn't already
987  * allocated - it doesn't also take a snapshot.
988  *
989  * This is meant to be used in cases where the snapshot buffer needs
990  * to be set up for events that can't sleep but need to be able to
991  * trigger a snapshot.
992  */
993 int tracing_alloc_snapshot(void)
994 {
995         struct trace_array *tr = &global_trace;
996         int ret;
997
998         ret = tracing_alloc_snapshot_instance(tr);
999         WARN_ON(ret < 0);
1000
1001         return ret;
1002 }
1003 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1004
1005 /**
1006  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1007  *
1008  * This is similar to tracing_snapshot(), but it will allocate the
1009  * snapshot buffer if it isn't already allocated. Use this only
1010  * where it is safe to sleep, as the allocation may sleep.
1011  *
1012  * This causes a swap between the snapshot buffer and the current live
1013  * tracing buffer. You can use this to take snapshots of the live
1014  * trace when some condition is triggered, but continue to trace.
1015  */
1016 void tracing_snapshot_alloc(void)
1017 {
1018         int ret;
1019
1020         ret = tracing_alloc_snapshot();
1021         if (ret < 0)
1022                 return;
1023
1024         tracing_snapshot();
1025 }
1026 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1027 #else
1028 void tracing_snapshot(void)
1029 {
1030         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1031 }
1032 EXPORT_SYMBOL_GPL(tracing_snapshot);
1033 int tracing_alloc_snapshot(void)
1034 {
1035         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1036         return -ENODEV;
1037 }
1038 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1039 void tracing_snapshot_alloc(void)
1040 {
1041         /* Give warning */
1042         tracing_snapshot();
1043 }
1044 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1045 #endif /* CONFIG_TRACER_SNAPSHOT */
1046
1047 void tracer_tracing_off(struct trace_array *tr)
1048 {
1049         if (tr->trace_buffer.buffer)
1050                 ring_buffer_record_off(tr->trace_buffer.buffer);
1051         /*
1052          * This flag is looked at when buffers haven't been allocated
1053          * yet, or by some tracers (like irqsoff), that just want to
1054          * know if the ring buffer has been disabled, but it can handle
1055          * races of where it gets disabled but we still do a record.
1056          * As the check is in the fast path of the tracers, it is more
1057          * important to be fast than accurate.
1058          */
1059         tr->buffer_disabled = 1;
1060         /* Make the flag seen by readers */
1061         smp_wmb();
1062 }
1063
1064 /**
1065  * tracing_off - turn off tracing buffers
1066  *
1067  * This function stops the tracing buffers from recording data.
1068  * It does not disable any overhead the tracers themselves may
1069  * be causing. This function simply causes all recording to
1070  * the ring buffers to fail.
1071  */
1072 void tracing_off(void)
1073 {
1074         tracer_tracing_off(&global_trace);
1075 }
1076 EXPORT_SYMBOL_GPL(tracing_off);
1077
1078 void disable_trace_on_warning(void)
1079 {
1080         if (__disable_trace_on_warning)
1081                 tracing_off();
1082 }
1083
1084 /**
1085  * tracer_tracing_is_on - show real state of ring buffer enabled
1086  * @tr : the trace array to know if ring buffer is enabled
1087  *
1088  * Shows real state of the ring buffer if it is enabled or not.
1089  */
1090 int tracer_tracing_is_on(struct trace_array *tr)
1091 {
1092         if (tr->trace_buffer.buffer)
1093                 return ring_buffer_record_is_on(tr->trace_buffer.buffer);
1094         return !tr->buffer_disabled;
1095 }
1096
1097 /**
1098  * tracing_is_on - show state of ring buffers enabled
1099  */
1100 int tracing_is_on(void)
1101 {
1102         return tracer_tracing_is_on(&global_trace);
1103 }
1104 EXPORT_SYMBOL_GPL(tracing_is_on);
1105
1106 static int __init set_buf_size(char *str)
1107 {
1108         unsigned long buf_size;
1109
1110         if (!str)
1111                 return 0;
1112         buf_size = memparse(str, &str);
1113         /* nr_entries can not be zero */
1114         if (buf_size == 0)
1115                 return 0;
1116         trace_buf_size = buf_size;
1117         return 1;
1118 }
1119 __setup("trace_buf_size=", set_buf_size);
1120
1121 static int __init set_tracing_thresh(char *str)
1122 {
1123         unsigned long threshold;
1124         int ret;
1125
1126         if (!str)
1127                 return 0;
1128         ret = kstrtoul(str, 0, &threshold);
1129         if (ret < 0)
1130                 return 0;
1131         tracing_thresh = threshold * 1000;
1132         return 1;
1133 }
1134 __setup("tracing_thresh=", set_tracing_thresh);
1135
1136 unsigned long nsecs_to_usecs(unsigned long nsecs)
1137 {
1138         return nsecs / 1000;
1139 }
1140
1141 /*
1142  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1143  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1144  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1145  * of strings in the order that the evals (enum) were defined.
1146  */
1147 #undef C
1148 #define C(a, b) b
1149
1150 /* These must match the bit postions in trace_iterator_flags */
1151 static const char *trace_options[] = {
1152         TRACE_FLAGS
1153         NULL
1154 };
1155
1156 static struct {
1157         u64 (*func)(void);
1158         const char *name;
1159         int in_ns;              /* is this clock in nanoseconds? */
1160 } trace_clocks[] = {
1161         { trace_clock_local,            "local",        1 },
1162         { trace_clock_global,           "global",       1 },
1163         { trace_clock_counter,          "counter",      0 },
1164         { trace_clock_jiffies,          "uptime",       0 },
1165         { trace_clock,                  "perf",         1 },
1166         { ktime_get_mono_fast_ns,       "mono",         1 },
1167         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1168         { ktime_get_boot_fast_ns,       "boot",         1 },
1169         ARCH_TRACE_CLOCKS
1170 };
1171
1172 bool trace_clock_in_ns(struct trace_array *tr)
1173 {
1174         if (trace_clocks[tr->clock_id].in_ns)
1175                 return true;
1176
1177         return false;
1178 }
1179
1180 /*
1181  * trace_parser_get_init - gets the buffer for trace parser
1182  */
1183 int trace_parser_get_init(struct trace_parser *parser, int size)
1184 {
1185         memset(parser, 0, sizeof(*parser));
1186
1187         parser->buffer = kmalloc(size, GFP_KERNEL);
1188         if (!parser->buffer)
1189                 return 1;
1190
1191         parser->size = size;
1192         return 0;
1193 }
1194
1195 /*
1196  * trace_parser_put - frees the buffer for trace parser
1197  */
1198 void trace_parser_put(struct trace_parser *parser)
1199 {
1200         kfree(parser->buffer);
1201         parser->buffer = NULL;
1202 }
1203
1204 /*
1205  * trace_get_user - reads the user input string separated by  space
1206  * (matched by isspace(ch))
1207  *
1208  * For each string found the 'struct trace_parser' is updated,
1209  * and the function returns.
1210  *
1211  * Returns number of bytes read.
1212  *
1213  * See kernel/trace/trace.h for 'struct trace_parser' details.
1214  */
1215 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1216         size_t cnt, loff_t *ppos)
1217 {
1218         char ch;
1219         size_t read = 0;
1220         ssize_t ret;
1221
1222         if (!*ppos)
1223                 trace_parser_clear(parser);
1224
1225         ret = get_user(ch, ubuf++);
1226         if (ret)
1227                 goto out;
1228
1229         read++;
1230         cnt--;
1231
1232         /*
1233          * The parser is not finished with the last write,
1234          * continue reading the user input without skipping spaces.
1235          */
1236         if (!parser->cont) {
1237                 /* skip white space */
1238                 while (cnt && isspace(ch)) {
1239                         ret = get_user(ch, ubuf++);
1240                         if (ret)
1241                                 goto out;
1242                         read++;
1243                         cnt--;
1244                 }
1245
1246                 parser->idx = 0;
1247
1248                 /* only spaces were written */
1249                 if (isspace(ch) || !ch) {
1250                         *ppos += read;
1251                         ret = read;
1252                         goto out;
1253                 }
1254         }
1255
1256         /* read the non-space input */
1257         while (cnt && !isspace(ch) && ch) {
1258                 if (parser->idx < parser->size - 1)
1259                         parser->buffer[parser->idx++] = ch;
1260                 else {
1261                         ret = -EINVAL;
1262                         goto out;
1263                 }
1264                 ret = get_user(ch, ubuf++);
1265                 if (ret)
1266                         goto out;
1267                 read++;
1268                 cnt--;
1269         }
1270
1271         /* We either got finished input or we have to wait for another call. */
1272         if (isspace(ch) || !ch) {
1273                 parser->buffer[parser->idx] = 0;
1274                 parser->cont = false;
1275         } else if (parser->idx < parser->size - 1) {
1276                 parser->cont = true;
1277                 parser->buffer[parser->idx++] = ch;
1278                 /* Make sure the parsed string always terminates with '\0'. */
1279                 parser->buffer[parser->idx] = 0;
1280         } else {
1281                 ret = -EINVAL;
1282                 goto out;
1283         }
1284
1285         *ppos += read;
1286         ret = read;
1287
1288 out:
1289         return ret;
1290 }
1291
1292 /* TODO add a seq_buf_to_buffer() */
1293 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1294 {
1295         int len;
1296
1297         if (trace_seq_used(s) <= s->seq.readpos)
1298                 return -EBUSY;
1299
1300         len = trace_seq_used(s) - s->seq.readpos;
1301         if (cnt > len)
1302                 cnt = len;
1303         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1304
1305         s->seq.readpos += cnt;
1306         return cnt;
1307 }
1308
1309 unsigned long __read_mostly     tracing_thresh;
1310
1311 #ifdef CONFIG_TRACER_MAX_TRACE
1312 /*
1313  * Copy the new maximum trace into the separate maximum-trace
1314  * structure. (this way the maximum trace is permanently saved,
1315  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1316  */
1317 static void
1318 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1319 {
1320         struct trace_buffer *trace_buf = &tr->trace_buffer;
1321         struct trace_buffer *max_buf = &tr->max_buffer;
1322         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1323         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1324
1325         max_buf->cpu = cpu;
1326         max_buf->time_start = data->preempt_timestamp;
1327
1328         max_data->saved_latency = tr->max_latency;
1329         max_data->critical_start = data->critical_start;
1330         max_data->critical_end = data->critical_end;
1331
1332         memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1333         max_data->pid = tsk->pid;
1334         /*
1335          * If tsk == current, then use current_uid(), as that does not use
1336          * RCU. The irq tracer can be called out of RCU scope.
1337          */
1338         if (tsk == current)
1339                 max_data->uid = current_uid();
1340         else
1341                 max_data->uid = task_uid(tsk);
1342
1343         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1344         max_data->policy = tsk->policy;
1345         max_data->rt_priority = tsk->rt_priority;
1346
1347         /* record this tasks comm */
1348         tracing_record_cmdline(tsk);
1349 }
1350
1351 /**
1352  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1353  * @tr: tracer
1354  * @tsk: the task with the latency
1355  * @cpu: The cpu that initiated the trace.
1356  *
1357  * Flip the buffers between the @tr and the max_tr and record information
1358  * about which task was the cause of this latency.
1359  */
1360 void
1361 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1362 {
1363         if (tr->stop_count)
1364                 return;
1365
1366         WARN_ON_ONCE(!irqs_disabled());
1367
1368         if (!tr->allocated_snapshot) {
1369                 /* Only the nop tracer should hit this when disabling */
1370                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1371                 return;
1372         }
1373
1374         arch_spin_lock(&tr->max_lock);
1375
1376         swap(tr->trace_buffer.buffer, tr->max_buffer.buffer);
1377
1378         __update_max_tr(tr, tsk, cpu);
1379         arch_spin_unlock(&tr->max_lock);
1380 }
1381
1382 /**
1383  * update_max_tr_single - only copy one trace over, and reset the rest
1384  * @tr - tracer
1385  * @tsk - task with the latency
1386  * @cpu - the cpu of the buffer to copy.
1387  *
1388  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1389  */
1390 void
1391 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1392 {
1393         int ret;
1394
1395         if (tr->stop_count)
1396                 return;
1397
1398         WARN_ON_ONCE(!irqs_disabled());
1399         if (!tr->allocated_snapshot) {
1400                 /* Only the nop tracer should hit this when disabling */
1401                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1402                 return;
1403         }
1404
1405         arch_spin_lock(&tr->max_lock);
1406
1407         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1408
1409         if (ret == -EBUSY) {
1410                 /*
1411                  * We failed to swap the buffer due to a commit taking
1412                  * place on this CPU. We fail to record, but we reset
1413                  * the max trace buffer (no one writes directly to it)
1414                  * and flag that it failed.
1415                  */
1416                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1417                         "Failed to swap buffers due to commit in progress\n");
1418         }
1419
1420         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1421
1422         __update_max_tr(tr, tsk, cpu);
1423         arch_spin_unlock(&tr->max_lock);
1424 }
1425 #endif /* CONFIG_TRACER_MAX_TRACE */
1426
1427 static int wait_on_pipe(struct trace_iterator *iter, bool full)
1428 {
1429         /* Iterators are static, they should be filled or empty */
1430         if (trace_buffer_iter(iter, iter->cpu_file))
1431                 return 0;
1432
1433         return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1434                                 full);
1435 }
1436
1437 #ifdef CONFIG_FTRACE_STARTUP_TEST
1438 static bool selftests_can_run;
1439
1440 struct trace_selftests {
1441         struct list_head                list;
1442         struct tracer                   *type;
1443 };
1444
1445 static LIST_HEAD(postponed_selftests);
1446
1447 static int save_selftest(struct tracer *type)
1448 {
1449         struct trace_selftests *selftest;
1450
1451         selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1452         if (!selftest)
1453                 return -ENOMEM;
1454
1455         selftest->type = type;
1456         list_add(&selftest->list, &postponed_selftests);
1457         return 0;
1458 }
1459
1460 static int run_tracer_selftest(struct tracer *type)
1461 {
1462         struct trace_array *tr = &global_trace;
1463         struct tracer *saved_tracer = tr->current_trace;
1464         int ret;
1465
1466         if (!type->selftest || tracing_selftest_disabled)
1467                 return 0;
1468
1469         /*
1470          * If a tracer registers early in boot up (before scheduling is
1471          * initialized and such), then do not run its selftests yet.
1472          * Instead, run it a little later in the boot process.
1473          */
1474         if (!selftests_can_run)
1475                 return save_selftest(type);
1476
1477         /*
1478          * Run a selftest on this tracer.
1479          * Here we reset the trace buffer, and set the current
1480          * tracer to be this tracer. The tracer can then run some
1481          * internal tracing to verify that everything is in order.
1482          * If we fail, we do not register this tracer.
1483          */
1484         tracing_reset_online_cpus(&tr->trace_buffer);
1485
1486         tr->current_trace = type;
1487
1488 #ifdef CONFIG_TRACER_MAX_TRACE
1489         if (type->use_max_tr) {
1490                 /* If we expanded the buffers, make sure the max is expanded too */
1491                 if (ring_buffer_expanded)
1492                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1493                                            RING_BUFFER_ALL_CPUS);
1494                 tr->allocated_snapshot = true;
1495         }
1496 #endif
1497
1498         /* the test is responsible for initializing and enabling */
1499         pr_info("Testing tracer %s: ", type->name);
1500         ret = type->selftest(type, tr);
1501         /* the test is responsible for resetting too */
1502         tr->current_trace = saved_tracer;
1503         if (ret) {
1504                 printk(KERN_CONT "FAILED!\n");
1505                 /* Add the warning after printing 'FAILED' */
1506                 WARN_ON(1);
1507                 return -1;
1508         }
1509         /* Only reset on passing, to avoid touching corrupted buffers */
1510         tracing_reset_online_cpus(&tr->trace_buffer);
1511
1512 #ifdef CONFIG_TRACER_MAX_TRACE
1513         if (type->use_max_tr) {
1514                 tr->allocated_snapshot = false;
1515
1516                 /* Shrink the max buffer again */
1517                 if (ring_buffer_expanded)
1518                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1519                                            RING_BUFFER_ALL_CPUS);
1520         }
1521 #endif
1522
1523         printk(KERN_CONT "PASSED\n");
1524         return 0;
1525 }
1526
1527 static __init int init_trace_selftests(void)
1528 {
1529         struct trace_selftests *p, *n;
1530         struct tracer *t, **last;
1531         int ret;
1532
1533         selftests_can_run = true;
1534
1535         mutex_lock(&trace_types_lock);
1536
1537         if (list_empty(&postponed_selftests))
1538                 goto out;
1539
1540         pr_info("Running postponed tracer tests:\n");
1541
1542         list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1543                 ret = run_tracer_selftest(p->type);
1544                 /* If the test fails, then warn and remove from available_tracers */
1545                 if (ret < 0) {
1546                         WARN(1, "tracer: %s failed selftest, disabling\n",
1547                              p->type->name);
1548                         last = &trace_types;
1549                         for (t = trace_types; t; t = t->next) {
1550                                 if (t == p->type) {
1551                                         *last = t->next;
1552                                         break;
1553                                 }
1554                                 last = &t->next;
1555                         }
1556                 }
1557                 list_del(&p->list);
1558                 kfree(p);
1559         }
1560
1561  out:
1562         mutex_unlock(&trace_types_lock);
1563
1564         return 0;
1565 }
1566 core_initcall(init_trace_selftests);
1567 #else
1568 static inline int run_tracer_selftest(struct tracer *type)
1569 {
1570         return 0;
1571 }
1572 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1573
1574 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1575
1576 static void __init apply_trace_boot_options(void);
1577
1578 /**
1579  * register_tracer - register a tracer with the ftrace system.
1580  * @type - the plugin for the tracer
1581  *
1582  * Register a new plugin tracer.
1583  */
1584 int __init register_tracer(struct tracer *type)
1585 {
1586         struct tracer *t;
1587         int ret = 0;
1588
1589         if (!type->name) {
1590                 pr_info("Tracer must have a name\n");
1591                 return -1;
1592         }
1593
1594         if (strlen(type->name) >= MAX_TRACER_SIZE) {
1595                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1596                 return -1;
1597         }
1598
1599         mutex_lock(&trace_types_lock);
1600
1601         tracing_selftest_running = true;
1602
1603         for (t = trace_types; t; t = t->next) {
1604                 if (strcmp(type->name, t->name) == 0) {
1605                         /* already found */
1606                         pr_info("Tracer %s already registered\n",
1607                                 type->name);
1608                         ret = -1;
1609                         goto out;
1610                 }
1611         }
1612
1613         if (!type->set_flag)
1614                 type->set_flag = &dummy_set_flag;
1615         if (!type->flags) {
1616                 /*allocate a dummy tracer_flags*/
1617                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1618                 if (!type->flags) {
1619                         ret = -ENOMEM;
1620                         goto out;
1621                 }
1622                 type->flags->val = 0;
1623                 type->flags->opts = dummy_tracer_opt;
1624         } else
1625                 if (!type->flags->opts)
1626                         type->flags->opts = dummy_tracer_opt;
1627
1628         /* store the tracer for __set_tracer_option */
1629         type->flags->trace = type;
1630
1631         ret = run_tracer_selftest(type);
1632         if (ret < 0)
1633                 goto out;
1634
1635         type->next = trace_types;
1636         trace_types = type;
1637         add_tracer_options(&global_trace, type);
1638
1639  out:
1640         tracing_selftest_running = false;
1641         mutex_unlock(&trace_types_lock);
1642
1643         if (ret || !default_bootup_tracer)
1644                 goto out_unlock;
1645
1646         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1647                 goto out_unlock;
1648
1649         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1650         /* Do we want this tracer to start on bootup? */
1651         tracing_set_tracer(&global_trace, type->name);
1652         default_bootup_tracer = NULL;
1653
1654         apply_trace_boot_options();
1655
1656         /* disable other selftests, since this will break it. */
1657         tracing_selftest_disabled = true;
1658 #ifdef CONFIG_FTRACE_STARTUP_TEST
1659         printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1660                type->name);
1661 #endif
1662
1663  out_unlock:
1664         return ret;
1665 }
1666
1667 void tracing_reset(struct trace_buffer *buf, int cpu)
1668 {
1669         struct ring_buffer *buffer = buf->buffer;
1670
1671         if (!buffer)
1672                 return;
1673
1674         ring_buffer_record_disable(buffer);
1675
1676         /* Make sure all commits have finished */
1677         synchronize_sched();
1678         ring_buffer_reset_cpu(buffer, cpu);
1679
1680         ring_buffer_record_enable(buffer);
1681 }
1682
1683 void tracing_reset_online_cpus(struct trace_buffer *buf)
1684 {
1685         struct ring_buffer *buffer = buf->buffer;
1686         int cpu;
1687
1688         if (!buffer)
1689                 return;
1690
1691         ring_buffer_record_disable(buffer);
1692
1693         /* Make sure all commits have finished */
1694         synchronize_sched();
1695
1696         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1697
1698         for_each_online_cpu(cpu)
1699                 ring_buffer_reset_cpu(buffer, cpu);
1700
1701         ring_buffer_record_enable(buffer);
1702 }
1703
1704 /* Must have trace_types_lock held */
1705 void tracing_reset_all_online_cpus(void)
1706 {
1707         struct trace_array *tr;
1708
1709         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1710                 if (!tr->clear_trace)
1711                         continue;
1712                 tr->clear_trace = false;
1713                 tracing_reset_online_cpus(&tr->trace_buffer);
1714 #ifdef CONFIG_TRACER_MAX_TRACE
1715                 tracing_reset_online_cpus(&tr->max_buffer);
1716 #endif
1717         }
1718 }
1719
1720 static int *tgid_map;
1721
1722 #define SAVED_CMDLINES_DEFAULT 128
1723 #define NO_CMDLINE_MAP UINT_MAX
1724 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1725 struct saved_cmdlines_buffer {
1726         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1727         unsigned *map_cmdline_to_pid;
1728         unsigned cmdline_num;
1729         int cmdline_idx;
1730         char *saved_cmdlines;
1731 };
1732 static struct saved_cmdlines_buffer *savedcmd;
1733
1734 /* temporary disable recording */
1735 static atomic_t trace_record_taskinfo_disabled __read_mostly;
1736
1737 static inline char *get_saved_cmdlines(int idx)
1738 {
1739         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1740 }
1741
1742 static inline void set_cmdline(int idx, const char *cmdline)
1743 {
1744         memcpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1745 }
1746
1747 static int allocate_cmdlines_buffer(unsigned int val,
1748                                     struct saved_cmdlines_buffer *s)
1749 {
1750         s->map_cmdline_to_pid = kmalloc_array(val,
1751                                               sizeof(*s->map_cmdline_to_pid),
1752                                               GFP_KERNEL);
1753         if (!s->map_cmdline_to_pid)
1754                 return -ENOMEM;
1755
1756         s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
1757         if (!s->saved_cmdlines) {
1758                 kfree(s->map_cmdline_to_pid);
1759                 return -ENOMEM;
1760         }
1761
1762         s->cmdline_idx = 0;
1763         s->cmdline_num = val;
1764         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
1765                sizeof(s->map_pid_to_cmdline));
1766         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
1767                val * sizeof(*s->map_cmdline_to_pid));
1768
1769         return 0;
1770 }
1771
1772 static int trace_create_savedcmd(void)
1773 {
1774         int ret;
1775
1776         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
1777         if (!savedcmd)
1778                 return -ENOMEM;
1779
1780         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
1781         if (ret < 0) {
1782                 kfree(savedcmd);
1783                 savedcmd = NULL;
1784                 return -ENOMEM;
1785         }
1786
1787         return 0;
1788 }
1789
1790 int is_tracing_stopped(void)
1791 {
1792         return global_trace.stop_count;
1793 }
1794
1795 /**
1796  * tracing_start - quick start of the tracer
1797  *
1798  * If tracing is enabled but was stopped by tracing_stop,
1799  * this will start the tracer back up.
1800  */
1801 void tracing_start(void)
1802 {
1803         struct ring_buffer *buffer;
1804         unsigned long flags;
1805
1806         if (tracing_disabled)
1807                 return;
1808
1809         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1810         if (--global_trace.stop_count) {
1811                 if (global_trace.stop_count < 0) {
1812                         /* Someone screwed up their debugging */
1813                         WARN_ON_ONCE(1);
1814                         global_trace.stop_count = 0;
1815                 }
1816                 goto out;
1817         }
1818
1819         /* Prevent the buffers from switching */
1820         arch_spin_lock(&global_trace.max_lock);
1821
1822         buffer = global_trace.trace_buffer.buffer;
1823         if (buffer)
1824                 ring_buffer_record_enable(buffer);
1825
1826 #ifdef CONFIG_TRACER_MAX_TRACE
1827         buffer = global_trace.max_buffer.buffer;
1828         if (buffer)
1829                 ring_buffer_record_enable(buffer);
1830 #endif
1831
1832         arch_spin_unlock(&global_trace.max_lock);
1833
1834  out:
1835         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1836 }
1837
1838 static void tracing_start_tr(struct trace_array *tr)
1839 {
1840         struct ring_buffer *buffer;
1841         unsigned long flags;
1842
1843         if (tracing_disabled)
1844                 return;
1845
1846         /* If global, we need to also start the max tracer */
1847         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1848                 return tracing_start();
1849
1850         raw_spin_lock_irqsave(&tr->start_lock, flags);
1851
1852         if (--tr->stop_count) {
1853                 if (tr->stop_count < 0) {
1854                         /* Someone screwed up their debugging */
1855                         WARN_ON_ONCE(1);
1856                         tr->stop_count = 0;
1857                 }
1858                 goto out;
1859         }
1860
1861         buffer = tr->trace_buffer.buffer;
1862         if (buffer)
1863                 ring_buffer_record_enable(buffer);
1864
1865  out:
1866         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1867 }
1868
1869 /**
1870  * tracing_stop - quick stop of the tracer
1871  *
1872  * Light weight way to stop tracing. Use in conjunction with
1873  * tracing_start.
1874  */
1875 void tracing_stop(void)
1876 {
1877         struct ring_buffer *buffer;
1878         unsigned long flags;
1879
1880         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1881         if (global_trace.stop_count++)
1882                 goto out;
1883
1884         /* Prevent the buffers from switching */
1885         arch_spin_lock(&global_trace.max_lock);
1886
1887         buffer = global_trace.trace_buffer.buffer;
1888         if (buffer)
1889                 ring_buffer_record_disable(buffer);
1890
1891 #ifdef CONFIG_TRACER_MAX_TRACE
1892         buffer = global_trace.max_buffer.buffer;
1893         if (buffer)
1894                 ring_buffer_record_disable(buffer);
1895 #endif
1896
1897         arch_spin_unlock(&global_trace.max_lock);
1898
1899  out:
1900         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1901 }
1902
1903 static void tracing_stop_tr(struct trace_array *tr)
1904 {
1905         struct ring_buffer *buffer;
1906         unsigned long flags;
1907
1908         /* If global, we need to also stop the max tracer */
1909         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1910                 return tracing_stop();
1911
1912         raw_spin_lock_irqsave(&tr->start_lock, flags);
1913         if (tr->stop_count++)
1914                 goto out;
1915
1916         buffer = tr->trace_buffer.buffer;
1917         if (buffer)
1918                 ring_buffer_record_disable(buffer);
1919
1920  out:
1921         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1922 }
1923
1924 static int trace_save_cmdline(struct task_struct *tsk)
1925 {
1926         unsigned pid, idx;
1927
1928         /* treat recording of idle task as a success */
1929         if (!tsk->pid)
1930                 return 1;
1931
1932         if (unlikely(tsk->pid > PID_MAX_DEFAULT))
1933                 return 0;
1934
1935         /*
1936          * It's not the end of the world if we don't get
1937          * the lock, but we also don't want to spin
1938          * nor do we want to disable interrupts,
1939          * so if we miss here, then better luck next time.
1940          */
1941         if (!arch_spin_trylock(&trace_cmdline_lock))
1942                 return 0;
1943
1944         idx = savedcmd->map_pid_to_cmdline[tsk->pid];
1945         if (idx == NO_CMDLINE_MAP) {
1946                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
1947
1948                 /*
1949                  * Check whether the cmdline buffer at idx has a pid
1950                  * mapped. We are going to overwrite that entry so we
1951                  * need to clear the map_pid_to_cmdline. Otherwise we
1952                  * would read the new comm for the old pid.
1953                  */
1954                 pid = savedcmd->map_cmdline_to_pid[idx];
1955                 if (pid != NO_CMDLINE_MAP)
1956                         savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
1957
1958                 savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
1959                 savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
1960
1961                 savedcmd->cmdline_idx = idx;
1962         }
1963
1964         set_cmdline(idx, tsk->comm);
1965
1966         arch_spin_unlock(&trace_cmdline_lock);
1967
1968         return 1;
1969 }
1970
1971 static void __trace_find_cmdline(int pid, char comm[])
1972 {
1973         unsigned map;
1974
1975         if (!pid) {
1976                 strcpy(comm, "<idle>");
1977                 return;
1978         }
1979
1980         if (WARN_ON_ONCE(pid < 0)) {
1981                 strcpy(comm, "<XXX>");
1982                 return;
1983         }
1984
1985         if (pid > PID_MAX_DEFAULT) {
1986                 strcpy(comm, "<...>");
1987                 return;
1988         }
1989
1990         map = savedcmd->map_pid_to_cmdline[pid];
1991         if (map != NO_CMDLINE_MAP)
1992                 strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
1993         else
1994                 strcpy(comm, "<...>");
1995 }
1996
1997 void trace_find_cmdline(int pid, char comm[])
1998 {
1999         preempt_disable();
2000         arch_spin_lock(&trace_cmdline_lock);
2001
2002         __trace_find_cmdline(pid, comm);
2003
2004         arch_spin_unlock(&trace_cmdline_lock);
2005         preempt_enable();
2006 }
2007
2008 int trace_find_tgid(int pid)
2009 {
2010         if (unlikely(!tgid_map || !pid || pid > PID_MAX_DEFAULT))
2011                 return 0;
2012
2013         return tgid_map[pid];
2014 }
2015
2016 static int trace_save_tgid(struct task_struct *tsk)
2017 {
2018         /* treat recording of idle task as a success */
2019         if (!tsk->pid)
2020                 return 1;
2021
2022         if (unlikely(!tgid_map || tsk->pid > PID_MAX_DEFAULT))
2023                 return 0;
2024
2025         tgid_map[tsk->pid] = tsk->tgid;
2026         return 1;
2027 }
2028
2029 static bool tracing_record_taskinfo_skip(int flags)
2030 {
2031         if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2032                 return true;
2033         if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on())
2034                 return true;
2035         if (!__this_cpu_read(trace_taskinfo_save))
2036                 return true;
2037         return false;
2038 }
2039
2040 /**
2041  * tracing_record_taskinfo - record the task info of a task
2042  *
2043  * @task  - task to record
2044  * @flags - TRACE_RECORD_CMDLINE for recording comm
2045  *        - TRACE_RECORD_TGID for recording tgid
2046  */
2047 void tracing_record_taskinfo(struct task_struct *task, int flags)
2048 {
2049         bool done;
2050
2051         if (tracing_record_taskinfo_skip(flags))
2052                 return;
2053
2054         /*
2055          * Record as much task information as possible. If some fail, continue
2056          * to try to record the others.
2057          */
2058         done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2059         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2060
2061         /* If recording any information failed, retry again soon. */
2062         if (!done)
2063                 return;
2064
2065         __this_cpu_write(trace_taskinfo_save, false);
2066 }
2067
2068 /**
2069  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2070  *
2071  * @prev - previous task during sched_switch
2072  * @next - next task during sched_switch
2073  * @flags - TRACE_RECORD_CMDLINE for recording comm
2074  *          TRACE_RECORD_TGID for recording tgid
2075  */
2076 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2077                                           struct task_struct *next, int flags)
2078 {
2079         bool done;
2080
2081         if (tracing_record_taskinfo_skip(flags))
2082                 return;
2083
2084         /*
2085          * Record as much task information as possible. If some fail, continue
2086          * to try to record the others.
2087          */
2088         done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2089         done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2090         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2091         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2092
2093         /* If recording any information failed, retry again soon. */
2094         if (!done)
2095                 return;
2096
2097         __this_cpu_write(trace_taskinfo_save, false);
2098 }
2099
2100 /* Helpers to record a specific task information */
2101 void tracing_record_cmdline(struct task_struct *task)
2102 {
2103         tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2104 }
2105
2106 void tracing_record_tgid(struct task_struct *task)
2107 {
2108         tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2109 }
2110
2111 /*
2112  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2113  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2114  * simplifies those functions and keeps them in sync.
2115  */
2116 enum print_line_t trace_handle_return(struct trace_seq *s)
2117 {
2118         return trace_seq_has_overflowed(s) ?
2119                 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2120 }
2121 EXPORT_SYMBOL_GPL(trace_handle_return);
2122
2123 void
2124 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
2125                              int pc)
2126 {
2127         struct task_struct *tsk = current;
2128
2129         entry->preempt_count            = pc & 0xff;
2130         entry->pid                      = (tsk) ? tsk->pid : 0;
2131         entry->flags =
2132 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
2133                 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
2134 #else
2135                 TRACE_FLAG_IRQS_NOSUPPORT |
2136 #endif
2137                 ((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
2138                 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
2139                 ((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
2140                 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
2141                 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
2142 }
2143 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
2144
2145 struct ring_buffer_event *
2146 trace_buffer_lock_reserve(struct ring_buffer *buffer,
2147                           int type,
2148                           unsigned long len,
2149                           unsigned long flags, int pc)
2150 {
2151         return __trace_buffer_lock_reserve(buffer, type, len, flags, pc);
2152 }
2153
2154 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2155 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2156 static int trace_buffered_event_ref;
2157
2158 /**
2159  * trace_buffered_event_enable - enable buffering events
2160  *
2161  * When events are being filtered, it is quicker to use a temporary
2162  * buffer to write the event data into if there's a likely chance
2163  * that it will not be committed. The discard of the ring buffer
2164  * is not as fast as committing, and is much slower than copying
2165  * a commit.
2166  *
2167  * When an event is to be filtered, allocate per cpu buffers to
2168  * write the event data into, and if the event is filtered and discarded
2169  * it is simply dropped, otherwise, the entire data is to be committed
2170  * in one shot.
2171  */
2172 void trace_buffered_event_enable(void)
2173 {
2174         struct ring_buffer_event *event;
2175         struct page *page;
2176         int cpu;
2177
2178         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2179
2180         if (trace_buffered_event_ref++)
2181                 return;
2182
2183         for_each_tracing_cpu(cpu) {
2184                 page = alloc_pages_node(cpu_to_node(cpu),
2185                                         GFP_KERNEL | __GFP_NORETRY, 0);
2186                 if (!page)
2187                         goto failed;
2188
2189                 event = page_address(page);
2190                 memset(event, 0, sizeof(*event));
2191
2192                 per_cpu(trace_buffered_event, cpu) = event;
2193
2194                 preempt_disable();
2195                 if (cpu == smp_processor_id() &&
2196                     this_cpu_read(trace_buffered_event) !=
2197                     per_cpu(trace_buffered_event, cpu))
2198                         WARN_ON_ONCE(1);
2199                 preempt_enable();
2200         }
2201
2202         return;
2203  failed:
2204         trace_buffered_event_disable();
2205 }
2206
2207 static void enable_trace_buffered_event(void *data)
2208 {
2209         /* Probably not needed, but do it anyway */
2210         smp_rmb();
2211         this_cpu_dec(trace_buffered_event_cnt);
2212 }
2213
2214 static void disable_trace_buffered_event(void *data)
2215 {
2216         this_cpu_inc(trace_buffered_event_cnt);
2217 }
2218
2219 /**
2220  * trace_buffered_event_disable - disable buffering events
2221  *
2222  * When a filter is removed, it is faster to not use the buffered
2223  * events, and to commit directly into the ring buffer. Free up
2224  * the temp buffers when there are no more users. This requires
2225  * special synchronization with current events.
2226  */
2227 void trace_buffered_event_disable(void)
2228 {
2229         int cpu;
2230
2231         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2232
2233         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2234                 return;
2235
2236         if (--trace_buffered_event_ref)
2237                 return;
2238
2239         preempt_disable();
2240         /* For each CPU, set the buffer as used. */
2241         smp_call_function_many(tracing_buffer_mask,
2242                                disable_trace_buffered_event, NULL, 1);
2243         preempt_enable();
2244
2245         /* Wait for all current users to finish */
2246         synchronize_sched();
2247
2248         for_each_tracing_cpu(cpu) {
2249                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2250                 per_cpu(trace_buffered_event, cpu) = NULL;
2251         }
2252         /*
2253          * Make sure trace_buffered_event is NULL before clearing
2254          * trace_buffered_event_cnt.
2255          */
2256         smp_wmb();
2257
2258         preempt_disable();
2259         /* Do the work on each cpu */
2260         smp_call_function_many(tracing_buffer_mask,
2261                                enable_trace_buffered_event, NULL, 1);
2262         preempt_enable();
2263 }
2264
2265 static struct ring_buffer *temp_buffer;
2266
2267 struct ring_buffer_event *
2268 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
2269                           struct trace_event_file *trace_file,
2270                           int type, unsigned long len,
2271                           unsigned long flags, int pc)
2272 {
2273         struct ring_buffer_event *entry;
2274         int val;
2275
2276         *current_rb = trace_file->tr->trace_buffer.buffer;
2277
2278         if (!ring_buffer_time_stamp_abs(*current_rb) && (trace_file->flags &
2279              (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2280             (entry = this_cpu_read(trace_buffered_event))) {
2281                 /* Try to use the per cpu buffer first */
2282                 val = this_cpu_inc_return(trace_buffered_event_cnt);
2283                 if (val == 1) {
2284                         trace_event_setup(entry, type, flags, pc);
2285                         entry->array[0] = len;
2286                         return entry;
2287                 }
2288                 this_cpu_dec(trace_buffered_event_cnt);
2289         }
2290
2291         entry = __trace_buffer_lock_reserve(*current_rb,
2292                                             type, len, flags, pc);
2293         /*
2294          * If tracing is off, but we have triggers enabled
2295          * we still need to look at the event data. Use the temp_buffer
2296          * to store the trace event for the tigger to use. It's recusive
2297          * safe and will not be recorded anywhere.
2298          */
2299         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2300                 *current_rb = temp_buffer;
2301                 entry = __trace_buffer_lock_reserve(*current_rb,
2302                                                     type, len, flags, pc);
2303         }
2304         return entry;
2305 }
2306 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2307
2308 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2309 static DEFINE_MUTEX(tracepoint_printk_mutex);
2310
2311 static void output_printk(struct trace_event_buffer *fbuffer)
2312 {
2313         struct trace_event_call *event_call;
2314         struct trace_event *event;
2315         unsigned long flags;
2316         struct trace_iterator *iter = tracepoint_print_iter;
2317
2318         /* We should never get here if iter is NULL */
2319         if (WARN_ON_ONCE(!iter))
2320                 return;
2321
2322         event_call = fbuffer->trace_file->event_call;
2323         if (!event_call || !event_call->event.funcs ||
2324             !event_call->event.funcs->trace)
2325                 return;
2326
2327         event = &fbuffer->trace_file->event_call->event;
2328
2329         spin_lock_irqsave(&tracepoint_iter_lock, flags);
2330         trace_seq_init(&iter->seq);
2331         iter->ent = fbuffer->entry;
2332         event_call->event.funcs->trace(iter, 0, event);
2333         trace_seq_putc(&iter->seq, 0);
2334         printk("%s", iter->seq.buffer);
2335
2336         spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2337 }
2338
2339 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2340                              void __user *buffer, size_t *lenp,
2341                              loff_t *ppos)
2342 {
2343         int save_tracepoint_printk;
2344         int ret;
2345
2346         mutex_lock(&tracepoint_printk_mutex);
2347         save_tracepoint_printk = tracepoint_printk;
2348
2349         ret = proc_dointvec(table, write, buffer, lenp, ppos);
2350
2351         /*
2352          * This will force exiting early, as tracepoint_printk
2353          * is always zero when tracepoint_printk_iter is not allocated
2354          */
2355         if (!tracepoint_print_iter)
2356                 tracepoint_printk = 0;
2357
2358         if (save_tracepoint_printk == tracepoint_printk)
2359                 goto out;
2360
2361         if (tracepoint_printk)
2362                 static_key_enable(&tracepoint_printk_key.key);
2363         else
2364                 static_key_disable(&tracepoint_printk_key.key);
2365
2366  out:
2367         mutex_unlock(&tracepoint_printk_mutex);
2368
2369         return ret;
2370 }
2371
2372 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2373 {
2374         if (static_key_false(&tracepoint_printk_key.key))
2375                 output_printk(fbuffer);
2376
2377         event_trigger_unlock_commit(fbuffer->trace_file, fbuffer->buffer,
2378                                     fbuffer->event, fbuffer->entry,
2379                                     fbuffer->flags, fbuffer->pc);
2380 }
2381 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2382
2383 /*
2384  * Skip 3:
2385  *
2386  *   trace_buffer_unlock_commit_regs()
2387  *   trace_event_buffer_commit()
2388  *   trace_event_raw_event_xxx()
2389  */
2390 # define STACK_SKIP 3
2391
2392 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2393                                      struct ring_buffer *buffer,
2394                                      struct ring_buffer_event *event,
2395                                      unsigned long flags, int pc,
2396                                      struct pt_regs *regs)
2397 {
2398         __buffer_unlock_commit(buffer, event);
2399
2400         /*
2401          * If regs is not set, then skip the necessary functions.
2402          * Note, we can still get here via blktrace, wakeup tracer
2403          * and mmiotrace, but that's ok if they lose a function or
2404          * two. They are not that meaningful.
2405          */
2406         ftrace_trace_stack(tr, buffer, flags, regs ? 0 : STACK_SKIP, pc, regs);
2407         ftrace_trace_userstack(buffer, flags, pc);
2408 }
2409
2410 /*
2411  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2412  */
2413 void
2414 trace_buffer_unlock_commit_nostack(struct ring_buffer *buffer,
2415                                    struct ring_buffer_event *event)
2416 {
2417         __buffer_unlock_commit(buffer, event);
2418 }
2419
2420 static void
2421 trace_process_export(struct trace_export *export,
2422                struct ring_buffer_event *event)
2423 {
2424         struct trace_entry *entry;
2425         unsigned int size = 0;
2426
2427         entry = ring_buffer_event_data(event);
2428         size = ring_buffer_event_length(event);
2429         export->write(export, entry, size);
2430 }
2431
2432 static DEFINE_MUTEX(ftrace_export_lock);
2433
2434 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
2435
2436 static DEFINE_STATIC_KEY_FALSE(ftrace_exports_enabled);
2437
2438 static inline void ftrace_exports_enable(void)
2439 {
2440         static_branch_enable(&ftrace_exports_enabled);
2441 }
2442
2443 static inline void ftrace_exports_disable(void)
2444 {
2445         static_branch_disable(&ftrace_exports_enabled);
2446 }
2447
2448 void ftrace_exports(struct ring_buffer_event *event)
2449 {
2450         struct trace_export *export;
2451
2452         preempt_disable_notrace();
2453
2454         export = rcu_dereference_raw_notrace(ftrace_exports_list);
2455         while (export) {
2456                 trace_process_export(export, event);
2457                 export = rcu_dereference_raw_notrace(export->next);
2458         }
2459
2460         preempt_enable_notrace();
2461 }
2462
2463 static inline void
2464 add_trace_export(struct trace_export **list, struct trace_export *export)
2465 {
2466         rcu_assign_pointer(export->next, *list);
2467         /*
2468          * We are entering export into the list but another
2469          * CPU might be walking that list. We need to make sure
2470          * the export->next pointer is valid before another CPU sees
2471          * the export pointer included into the list.
2472          */
2473         rcu_assign_pointer(*list, export);
2474 }
2475
2476 static inline int
2477 rm_trace_export(struct trace_export **list, struct trace_export *export)
2478 {
2479         struct trace_export **p;
2480
2481         for (p = list; *p != NULL; p = &(*p)->next)
2482                 if (*p == export)
2483                         break;
2484
2485         if (*p != export)
2486                 return -1;
2487
2488         rcu_assign_pointer(*p, (*p)->next);
2489
2490         return 0;
2491 }
2492
2493 static inline void
2494 add_ftrace_export(struct trace_export **list, struct trace_export *export)
2495 {
2496         if (*list == NULL)
2497                 ftrace_exports_enable();
2498
2499         add_trace_export(list, export);
2500 }
2501
2502 static inline int
2503 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
2504 {
2505         int ret;
2506
2507         ret = rm_trace_export(list, export);
2508         if (*list == NULL)
2509                 ftrace_exports_disable();
2510
2511         return ret;
2512 }
2513
2514 int register_ftrace_export(struct trace_export *export)
2515 {
2516         if (WARN_ON_ONCE(!export->write))
2517                 return -1;
2518
2519         mutex_lock(&ftrace_export_lock);
2520
2521         add_ftrace_export(&ftrace_exports_list, export);
2522
2523         mutex_unlock(&ftrace_export_lock);
2524
2525         return 0;
2526 }
2527 EXPORT_SYMBOL_GPL(register_ftrace_export);
2528
2529 int unregister_ftrace_export(struct trace_export *export)
2530 {
2531         int ret;
2532
2533         mutex_lock(&ftrace_export_lock);
2534
2535         ret = rm_ftrace_export(&ftrace_exports_list, export);
2536
2537         mutex_unlock(&ftrace_export_lock);
2538
2539         return ret;
2540 }
2541 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
2542
2543 void
2544 trace_function(struct trace_array *tr,
2545                unsigned long ip, unsigned long parent_ip, unsigned long flags,
2546                int pc)
2547 {
2548         struct trace_event_call *call = &event_function;
2549         struct ring_buffer *buffer = tr->trace_buffer.buffer;
2550         struct ring_buffer_event *event;
2551         struct ftrace_entry *entry;
2552
2553         event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2554                                             flags, pc);
2555         if (!event)
2556                 return;
2557         entry   = ring_buffer_event_data(event);
2558         entry->ip                       = ip;
2559         entry->parent_ip                = parent_ip;
2560
2561         if (!call_filter_check_discard(call, entry, buffer, event)) {
2562                 if (static_branch_unlikely(&ftrace_exports_enabled))
2563                         ftrace_exports(event);
2564                 __buffer_unlock_commit(buffer, event);
2565         }
2566 }
2567
2568 #ifdef CONFIG_STACKTRACE
2569
2570 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
2571 struct ftrace_stack {
2572         unsigned long           calls[FTRACE_STACK_MAX_ENTRIES];
2573 };
2574
2575 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
2576 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2577
2578 static void __ftrace_trace_stack(struct ring_buffer *buffer,
2579                                  unsigned long flags,
2580                                  int skip, int pc, struct pt_regs *regs)
2581 {
2582         struct trace_event_call *call = &event_kernel_stack;
2583         struct ring_buffer_event *event;
2584         struct stack_entry *entry;
2585         struct stack_trace trace;
2586         int use_stack;
2587         int size = FTRACE_STACK_ENTRIES;
2588
2589         trace.nr_entries        = 0;
2590         trace.skip              = skip;
2591
2592         /*
2593          * Add one, for this function and the call to save_stack_trace()
2594          * If regs is set, then these functions will not be in the way.
2595          */
2596 #ifndef CONFIG_UNWINDER_ORC
2597         if (!regs)
2598                 trace.skip++;
2599 #endif
2600
2601         /*
2602          * Since events can happen in NMIs there's no safe way to
2603          * use the per cpu ftrace_stacks. We reserve it and if an interrupt
2604          * or NMI comes in, it will just have to use the default
2605          * FTRACE_STACK_SIZE.
2606          */
2607         preempt_disable_notrace();
2608
2609         use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
2610         /*
2611          * We don't need any atomic variables, just a barrier.
2612          * If an interrupt comes in, we don't care, because it would
2613          * have exited and put the counter back to what we want.
2614          * We just need a barrier to keep gcc from moving things
2615          * around.
2616          */
2617         barrier();
2618         if (use_stack == 1) {
2619                 trace.entries           = this_cpu_ptr(ftrace_stack.calls);
2620                 trace.max_entries       = FTRACE_STACK_MAX_ENTRIES;
2621
2622                 if (regs)
2623                         save_stack_trace_regs(regs, &trace);
2624                 else
2625                         save_stack_trace(&trace);
2626
2627                 if (trace.nr_entries > size)
2628                         size = trace.nr_entries;
2629         } else
2630                 /* From now on, use_stack is a boolean */
2631                 use_stack = 0;
2632
2633         size *= sizeof(unsigned long);
2634
2635         event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2636                                             sizeof(*entry) + size, flags, pc);
2637         if (!event)
2638                 goto out;
2639         entry = ring_buffer_event_data(event);
2640
2641         memset(&entry->caller, 0, size);
2642
2643         if (use_stack)
2644                 memcpy(&entry->caller, trace.entries,
2645                        trace.nr_entries * sizeof(unsigned long));
2646         else {
2647                 trace.max_entries       = FTRACE_STACK_ENTRIES;
2648                 trace.entries           = entry->caller;
2649                 if (regs)
2650                         save_stack_trace_regs(regs, &trace);
2651                 else
2652                         save_stack_trace(&trace);
2653         }
2654
2655         entry->size = trace.nr_entries;
2656
2657         if (!call_filter_check_discard(call, entry, buffer, event))
2658                 __buffer_unlock_commit(buffer, event);
2659
2660  out:
2661         /* Again, don't let gcc optimize things here */
2662         barrier();
2663         __this_cpu_dec(ftrace_stack_reserve);
2664         preempt_enable_notrace();
2665
2666 }
2667
2668 static inline void ftrace_trace_stack(struct trace_array *tr,
2669                                       struct ring_buffer *buffer,
2670                                       unsigned long flags,
2671                                       int skip, int pc, struct pt_regs *regs)
2672 {
2673         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2674                 return;
2675
2676         __ftrace_trace_stack(buffer, flags, skip, pc, regs);
2677 }
2678
2679 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
2680                    int pc)
2681 {
2682         struct ring_buffer *buffer = tr->trace_buffer.buffer;
2683
2684         if (rcu_is_watching()) {
2685                 __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2686                 return;
2687         }
2688
2689         /*
2690          * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
2691          * but if the above rcu_is_watching() failed, then the NMI
2692          * triggered someplace critical, and rcu_irq_enter() should
2693          * not be called from NMI.
2694          */
2695         if (unlikely(in_nmi()))
2696                 return;
2697
2698         rcu_irq_enter_irqson();
2699         __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2700         rcu_irq_exit_irqson();
2701 }
2702
2703 /**
2704  * trace_dump_stack - record a stack back trace in the trace buffer
2705  * @skip: Number of functions to skip (helper handlers)
2706  */
2707 void trace_dump_stack(int skip)
2708 {
2709         unsigned long flags;
2710
2711         if (tracing_disabled || tracing_selftest_running)
2712                 return;
2713
2714         local_save_flags(flags);
2715
2716 #ifndef CONFIG_UNWINDER_ORC
2717         /* Skip 1 to skip this function. */
2718         skip++;
2719 #endif
2720         __ftrace_trace_stack(global_trace.trace_buffer.buffer,
2721                              flags, skip, preempt_count(), NULL);
2722 }
2723
2724 static DEFINE_PER_CPU(int, user_stack_count);
2725
2726 void
2727 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
2728 {
2729         struct trace_event_call *call = &event_user_stack;
2730         struct ring_buffer_event *event;
2731         struct userstack_entry *entry;
2732         struct stack_trace trace;
2733
2734         if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
2735                 return;
2736
2737         /*
2738          * NMIs can not handle page faults, even with fix ups.
2739          * The save user stack can (and often does) fault.
2740          */
2741         if (unlikely(in_nmi()))
2742                 return;
2743
2744         /*
2745          * prevent recursion, since the user stack tracing may
2746          * trigger other kernel events.
2747          */
2748         preempt_disable();
2749         if (__this_cpu_read(user_stack_count))
2750                 goto out;
2751
2752         __this_cpu_inc(user_stack_count);
2753
2754         event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
2755                                             sizeof(*entry), flags, pc);
2756         if (!event)
2757                 goto out_drop_count;
2758         entry   = ring_buffer_event_data(event);
2759
2760         entry->tgid             = current->tgid;
2761         memset(&entry->caller, 0, sizeof(entry->caller));
2762
2763         trace.nr_entries        = 0;
2764         trace.max_entries       = FTRACE_STACK_ENTRIES;
2765         trace.skip              = 0;
2766         trace.entries           = entry->caller;
2767
2768         save_stack_trace_user(&trace);
2769         if (!call_filter_check_discard(call, entry, buffer, event))
2770                 __buffer_unlock_commit(buffer, event);
2771
2772  out_drop_count:
2773         __this_cpu_dec(user_stack_count);
2774  out:
2775         preempt_enable();
2776 }
2777
2778 #ifdef UNUSED
2779 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
2780 {
2781         ftrace_trace_userstack(tr, flags, preempt_count());
2782 }
2783 #endif /* UNUSED */
2784
2785 #endif /* CONFIG_STACKTRACE */
2786
2787 /* created for use with alloc_percpu */
2788 struct trace_buffer_struct {
2789         int nesting;
2790         char buffer[4][TRACE_BUF_SIZE];
2791 };
2792
2793 static struct trace_buffer_struct *trace_percpu_buffer;
2794
2795 /*
2796  * Thise allows for lockless recording.  If we're nested too deeply, then
2797  * this returns NULL.
2798  */
2799 static char *get_trace_buf(void)
2800 {
2801         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
2802
2803         if (!buffer || buffer->nesting >= 4)
2804                 return NULL;
2805
2806         buffer->nesting++;
2807
2808         /* Interrupts must see nesting incremented before we use the buffer */
2809         barrier();
2810         return &buffer->buffer[buffer->nesting][0];
2811 }
2812
2813 static void put_trace_buf(void)
2814 {
2815         /* Don't let the decrement of nesting leak before this */
2816         barrier();
2817         this_cpu_dec(trace_percpu_buffer->nesting);
2818 }
2819
2820 static int alloc_percpu_trace_buffer(void)
2821 {
2822         struct trace_buffer_struct *buffers;
2823
2824         buffers = alloc_percpu(struct trace_buffer_struct);
2825         if (WARN(!buffers, "Could not allocate percpu trace_printk buffer"))
2826                 return -ENOMEM;
2827
2828         trace_percpu_buffer = buffers;
2829         return 0;
2830 }
2831
2832 static int buffers_allocated;
2833
2834 void trace_printk_init_buffers(void)
2835 {
2836         if (buffers_allocated)
2837                 return;
2838
2839         if (alloc_percpu_trace_buffer())
2840                 return;
2841
2842         /* trace_printk() is for debug use only. Don't use it in production. */
2843
2844         pr_warn("\n");
2845         pr_warn("**********************************************************\n");
2846         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2847         pr_warn("**                                                      **\n");
2848         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
2849         pr_warn("**                                                      **\n");
2850         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
2851         pr_warn("** unsafe for production use.                           **\n");
2852         pr_warn("**                                                      **\n");
2853         pr_warn("** If you see this message and you are not debugging    **\n");
2854         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
2855         pr_warn("**                                                      **\n");
2856         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2857         pr_warn("**********************************************************\n");
2858
2859         /* Expand the buffers to set size */
2860         tracing_update_buffers();
2861
2862         buffers_allocated = 1;
2863
2864         /*
2865          * trace_printk_init_buffers() can be called by modules.
2866          * If that happens, then we need to start cmdline recording
2867          * directly here. If the global_trace.buffer is already
2868          * allocated here, then this was called by module code.
2869          */
2870         if (global_trace.trace_buffer.buffer)
2871                 tracing_start_cmdline_record();
2872 }
2873
2874 void trace_printk_start_comm(void)
2875 {
2876         /* Start tracing comms if trace printk is set */
2877         if (!buffers_allocated)
2878                 return;
2879         tracing_start_cmdline_record();
2880 }
2881
2882 static void trace_printk_start_stop_comm(int enabled)
2883 {
2884         if (!buffers_allocated)
2885                 return;
2886
2887         if (enabled)
2888                 tracing_start_cmdline_record();
2889         else
2890                 tracing_stop_cmdline_record();
2891 }
2892
2893 /**
2894  * trace_vbprintk - write binary msg to tracing buffer
2895  *
2896  */
2897 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
2898 {
2899         struct trace_event_call *call = &event_bprint;
2900         struct ring_buffer_event *event;
2901         struct ring_buffer *buffer;
2902         struct trace_array *tr = &global_trace;
2903         struct bprint_entry *entry;
2904         unsigned long flags;
2905         char *tbuffer;
2906         int len = 0, size, pc;
2907
2908         if (unlikely(tracing_selftest_running || tracing_disabled))
2909                 return 0;
2910
2911         /* Don't pollute graph traces with trace_vprintk internals */
2912         pause_graph_tracing();
2913
2914         pc = preempt_count();
2915         preempt_disable_notrace();
2916
2917         tbuffer = get_trace_buf();
2918         if (!tbuffer) {
2919                 len = 0;
2920                 goto out_nobuffer;
2921         }
2922
2923         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
2924
2925         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
2926                 goto out;
2927
2928         local_save_flags(flags);
2929         size = sizeof(*entry) + sizeof(u32) * len;
2930         buffer = tr->trace_buffer.buffer;
2931         event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
2932                                             flags, pc);
2933         if (!event)
2934                 goto out;
2935         entry = ring_buffer_event_data(event);
2936         entry->ip                       = ip;
2937         entry->fmt                      = fmt;
2938
2939         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
2940         if (!call_filter_check_discard(call, entry, buffer, event)) {
2941                 __buffer_unlock_commit(buffer, event);
2942                 ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
2943         }
2944
2945 out:
2946         put_trace_buf();
2947
2948 out_nobuffer:
2949         preempt_enable_notrace();
2950         unpause_graph_tracing();
2951
2952         return len;
2953 }
2954 EXPORT_SYMBOL_GPL(trace_vbprintk);
2955
2956 static int
2957 __trace_array_vprintk(struct ring_buffer *buffer,
2958                       unsigned long ip, const char *fmt, va_list args)
2959 {
2960         struct trace_event_call *call = &event_print;
2961         struct ring_buffer_event *event;
2962         int len = 0, size, pc;
2963         struct print_entry *entry;
2964         unsigned long flags;
2965         char *tbuffer;
2966
2967         if (tracing_disabled || tracing_selftest_running)
2968                 return 0;
2969
2970         /* Don't pollute graph traces with trace_vprintk internals */
2971         pause_graph_tracing();
2972
2973         pc = preempt_count();
2974         preempt_disable_notrace();
2975
2976
2977         tbuffer = get_trace_buf();
2978         if (!tbuffer) {
2979                 len = 0;
2980                 goto out_nobuffer;
2981         }
2982
2983         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
2984
2985         local_save_flags(flags);
2986         size = sizeof(*entry) + len + 1;
2987         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
2988                                             flags, pc);
2989         if (!event)
2990                 goto out;
2991         entry = ring_buffer_event_data(event);
2992         entry->ip = ip;
2993
2994         memcpy(&entry->buf, tbuffer, len + 1);
2995         if (!call_filter_check_discard(call, entry, buffer, event)) {
2996                 __buffer_unlock_commit(buffer, event);
2997                 ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
2998         }
2999
3000 out:
3001         put_trace_buf();
3002
3003 out_nobuffer:
3004         preempt_enable_notrace();
3005         unpause_graph_tracing();
3006
3007         return len;
3008 }
3009
3010 int trace_array_vprintk(struct trace_array *tr,
3011                         unsigned long ip, const char *fmt, va_list args)
3012 {
3013         return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
3014 }
3015
3016 int trace_array_printk(struct trace_array *tr,
3017                        unsigned long ip, const char *fmt, ...)
3018 {
3019         int ret;
3020         va_list ap;
3021
3022         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3023                 return 0;
3024
3025         va_start(ap, fmt);
3026         ret = trace_array_vprintk(tr, ip, fmt, ap);
3027         va_end(ap);
3028         return ret;
3029 }
3030
3031 int trace_array_printk_buf(struct ring_buffer *buffer,
3032                            unsigned long ip, const char *fmt, ...)
3033 {
3034         int ret;
3035         va_list ap;
3036
3037         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3038                 return 0;
3039
3040         va_start(ap, fmt);
3041         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3042         va_end(ap);
3043         return ret;
3044 }
3045
3046 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3047 {
3048         return trace_array_vprintk(&global_trace, ip, fmt, args);
3049 }
3050 EXPORT_SYMBOL_GPL(trace_vprintk);
3051
3052 static void trace_iterator_increment(struct trace_iterator *iter)
3053 {
3054         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3055
3056         iter->idx++;
3057         if (buf_iter)
3058                 ring_buffer_read(buf_iter, NULL);
3059 }
3060
3061 static struct trace_entry *
3062 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3063                 unsigned long *lost_events)
3064 {
3065         struct ring_buffer_event *event;
3066         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3067
3068         if (buf_iter)
3069                 event = ring_buffer_iter_peek(buf_iter, ts);
3070         else
3071                 event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
3072                                          lost_events);
3073
3074         if (event) {
3075                 iter->ent_size = ring_buffer_event_length(event);
3076                 return ring_buffer_event_data(event);
3077         }
3078         iter->ent_size = 0;
3079         return NULL;
3080 }
3081
3082 static struct trace_entry *
3083 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3084                   unsigned long *missing_events, u64 *ent_ts)
3085 {
3086         struct ring_buffer *buffer = iter->trace_buffer->buffer;
3087         struct trace_entry *ent, *next = NULL;
3088         unsigned long lost_events = 0, next_lost = 0;
3089         int cpu_file = iter->cpu_file;
3090         u64 next_ts = 0, ts;
3091         int next_cpu = -1;
3092         int next_size = 0;
3093         int cpu;
3094
3095         /*
3096          * If we are in a per_cpu trace file, don't bother by iterating over
3097          * all cpu and peek directly.
3098          */
3099         if (cpu_file > RING_BUFFER_ALL_CPUS) {
3100                 if (ring_buffer_empty_cpu(buffer, cpu_file))
3101                         return NULL;
3102                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3103                 if (ent_cpu)
3104                         *ent_cpu = cpu_file;
3105
3106                 return ent;
3107         }
3108
3109         for_each_tracing_cpu(cpu) {
3110
3111                 if (ring_buffer_empty_cpu(buffer, cpu))
3112                         continue;
3113
3114                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3115
3116                 /*
3117                  * Pick the entry with the smallest timestamp:
3118                  */
3119                 if (ent && (!next || ts < next_ts)) {
3120                         next = ent;
3121                         next_cpu = cpu;
3122                         next_ts = ts;
3123                         next_lost = lost_events;
3124                         next_size = iter->ent_size;
3125                 }
3126         }
3127
3128         iter->ent_size = next_size;
3129
3130         if (ent_cpu)
3131                 *ent_cpu = next_cpu;
3132
3133         if (ent_ts)
3134                 *ent_ts = next_ts;
3135
3136         if (missing_events)
3137                 *missing_events = next_lost;
3138
3139         return next;
3140 }
3141
3142 /* Find the next real entry, without updating the iterator itself */
3143 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3144                                           int *ent_cpu, u64 *ent_ts)
3145 {
3146         return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3147 }
3148
3149 /* Find the next real entry, and increment the iterator to the next entry */
3150 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3151 {
3152         iter->ent = __find_next_entry(iter, &iter->cpu,
3153                                       &iter->lost_events, &iter->ts);
3154
3155         if (iter->ent)
3156                 trace_iterator_increment(iter);
3157
3158         return iter->ent ? iter : NULL;
3159 }
3160
3161 static void trace_consume(struct trace_iterator *iter)
3162 {
3163         ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
3164                             &iter->lost_events);
3165 }
3166
3167 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3168 {
3169         struct trace_iterator *iter = m->private;
3170         int i = (int)*pos;
3171         void *ent;
3172
3173         WARN_ON_ONCE(iter->leftover);
3174
3175         (*pos)++;
3176
3177         /* can't go backwards */
3178         if (iter->idx > i)
3179                 return NULL;
3180
3181         if (iter->idx < 0)
3182                 ent = trace_find_next_entry_inc(iter);
3183         else
3184                 ent = iter;
3185
3186         while (ent && iter->idx < i)
3187                 ent = trace_find_next_entry_inc(iter);
3188
3189         iter->pos = *pos;
3190
3191         return ent;
3192 }
3193
3194 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3195 {
3196         struct ring_buffer_event *event;
3197         struct ring_buffer_iter *buf_iter;
3198         unsigned long entries = 0;
3199         u64 ts;
3200
3201         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
3202
3203         buf_iter = trace_buffer_iter(iter, cpu);
3204         if (!buf_iter)
3205                 return;
3206
3207         ring_buffer_iter_reset(buf_iter);
3208
3209         /*
3210          * We could have the case with the max latency tracers
3211          * that a reset never took place on a cpu. This is evident
3212          * by the timestamp being before the start of the buffer.
3213          */
3214         while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
3215                 if (ts >= iter->trace_buffer->time_start)
3216                         break;
3217                 entries++;
3218                 ring_buffer_read(buf_iter, NULL);
3219         }
3220
3221         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
3222 }
3223
3224 /*
3225  * The current tracer is copied to avoid a global locking
3226  * all around.
3227  */
3228 static void *s_start(struct seq_file *m, loff_t *pos)
3229 {
3230         struct trace_iterator *iter = m->private;
3231         struct trace_array *tr = iter->tr;
3232         int cpu_file = iter->cpu_file;
3233         void *p = NULL;
3234         loff_t l = 0;
3235         int cpu;
3236
3237         /*
3238          * copy the tracer to avoid using a global lock all around.
3239          * iter->trace is a copy of current_trace, the pointer to the
3240          * name may be used instead of a strcmp(), as iter->trace->name
3241          * will point to the same string as current_trace->name.
3242          */
3243         mutex_lock(&trace_types_lock);
3244         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3245                 *iter->trace = *tr->current_trace;
3246         mutex_unlock(&trace_types_lock);
3247
3248 #ifdef CONFIG_TRACER_MAX_TRACE
3249         if (iter->snapshot && iter->trace->use_max_tr)
3250                 return ERR_PTR(-EBUSY);
3251 #endif
3252
3253         if (!iter->snapshot)
3254                 atomic_inc(&trace_record_taskinfo_disabled);
3255
3256         if (*pos != iter->pos) {
3257                 iter->ent = NULL;
3258                 iter->cpu = 0;
3259                 iter->idx = -1;
3260
3261                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
3262                         for_each_tracing_cpu(cpu)
3263                                 tracing_iter_reset(iter, cpu);
3264                 } else
3265                         tracing_iter_reset(iter, cpu_file);
3266
3267                 iter->leftover = 0;
3268                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3269                         ;
3270
3271         } else {
3272                 /*
3273                  * If we overflowed the seq_file before, then we want
3274                  * to just reuse the trace_seq buffer again.
3275                  */
3276                 if (iter->leftover)
3277                         p = iter;
3278                 else {
3279                         l = *pos - 1;
3280                         p = s_next(m, p, &l);
3281                 }
3282         }
3283
3284         trace_event_read_lock();
3285         trace_access_lock(cpu_file);
3286         return p;
3287 }
3288
3289 static void s_stop(struct seq_file *m, void *p)
3290 {
3291         struct trace_iterator *iter = m->private;
3292
3293 #ifdef CONFIG_TRACER_MAX_TRACE
3294         if (iter->snapshot && iter->trace->use_max_tr)
3295                 return;
3296 #endif
3297
3298         if (!iter->snapshot)
3299                 atomic_dec(&trace_record_taskinfo_disabled);
3300
3301         trace_access_unlock(iter->cpu_file);
3302         trace_event_read_unlock();
3303 }
3304
3305 static void
3306 get_total_entries(struct trace_buffer *buf,
3307                   unsigned long *total, unsigned long *entries)
3308 {
3309         unsigned long count;
3310         int cpu;
3311
3312         *total = 0;
3313         *entries = 0;
3314
3315         for_each_tracing_cpu(cpu) {
3316                 count = ring_buffer_entries_cpu(buf->buffer, cpu);
3317                 /*
3318                  * If this buffer has skipped entries, then we hold all
3319                  * entries for the trace and we need to ignore the
3320                  * ones before the time stamp.
3321                  */
3322                 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3323                         count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3324                         /* total is the same as the entries */
3325                         *total += count;
3326                 } else
3327                         *total += count +
3328                                 ring_buffer_overrun_cpu(buf->buffer, cpu);
3329                 *entries += count;
3330         }
3331 }
3332
3333 static void print_lat_help_header(struct seq_file *m)
3334 {
3335         seq_puts(m, "#                  _------=> CPU#            \n"
3336                     "#                 / _-----=> irqs-off        \n"
3337                     "#                | / _----=> need-resched    \n"
3338                     "#                || / _---=> hardirq/softirq \n"
3339                     "#                ||| / _--=> preempt-depth   \n"
3340                     "#                |||| /     delay            \n"
3341                     "#  cmd     pid   ||||| time  |   caller      \n"
3342                     "#     \\   /      |||||  \\    |   /         \n");
3343 }
3344
3345 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
3346 {
3347         unsigned long total;
3348         unsigned long entries;
3349
3350         get_total_entries(buf, &total, &entries);
3351         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
3352                    entries, total, num_online_cpus());
3353         seq_puts(m, "#\n");
3354 }
3355
3356 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m,
3357                                    unsigned int flags)
3358 {
3359         bool tgid = flags & TRACE_ITER_RECORD_TGID;
3360
3361         print_event_info(buf, m);
3362
3363         seq_printf(m, "#           TASK-PID   CPU#   %s  TIMESTAMP  FUNCTION\n", tgid ? "TGID     " : "");
3364         seq_printf(m, "#              | |       |    %s     |         |\n",      tgid ? "  |      " : "");
3365 }
3366
3367 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m,
3368                                        unsigned int flags)
3369 {
3370         bool tgid = flags & TRACE_ITER_RECORD_TGID;
3371         const char tgid_space[] = "          ";
3372         const char space[] = "  ";
3373
3374         seq_printf(m, "#                          %s  _-----=> irqs-off\n",
3375                    tgid ? tgid_space : space);
3376         seq_printf(m, "#                          %s / _----=> need-resched\n",
3377                    tgid ? tgid_space : space);
3378         seq_printf(m, "#                          %s| / _---=> hardirq/softirq\n",
3379                    tgid ? tgid_space : space);
3380         seq_printf(m, "#                          %s|| / _--=> preempt-depth\n",
3381                    tgid ? tgid_space : space);
3382         seq_printf(m, "#                          %s||| /     delay\n",
3383                    tgid ? tgid_space : space);
3384         seq_printf(m, "#           TASK-PID   CPU#%s||||    TIMESTAMP  FUNCTION\n",
3385                    tgid ? "   TGID   " : space);
3386         seq_printf(m, "#              | |       | %s||||       |         |\n",
3387                    tgid ? "     |    " : space);
3388 }
3389
3390 void
3391 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3392 {
3393         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3394         struct trace_buffer *buf = iter->trace_buffer;
3395         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3396         struct tracer *type = iter->trace;
3397         unsigned long entries;
3398         unsigned long total;
3399         const char *name = "preemption";
3400
3401         name = type->name;
3402
3403         get_total_entries(buf, &total, &entries);
3404
3405         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3406                    name, UTS_RELEASE);
3407         seq_puts(m, "# -----------------------------------"
3408                  "---------------------------------\n");
3409         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3410                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3411                    nsecs_to_usecs(data->saved_latency),
3412                    entries,
3413                    total,
3414                    buf->cpu,
3415 #if defined(CONFIG_PREEMPT_NONE)
3416                    "server",
3417 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
3418                    "desktop",
3419 #elif defined(CONFIG_PREEMPT)
3420                    "preempt",
3421 #else
3422                    "unknown",
3423 #endif
3424                    /* These are reserved for later use */
3425                    0, 0, 0, 0);
3426 #ifdef CONFIG_SMP
3427         seq_printf(m, " #P:%d)\n", num_online_cpus());
3428 #else
3429         seq_puts(m, ")\n");
3430 #endif
3431         seq_puts(m, "#    -----------------\n");
3432         seq_printf(m, "#    | task: %.16s-%d "
3433                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3434                    data->comm, data->pid,
3435                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3436                    data->policy, data->rt_priority);
3437         seq_puts(m, "#    -----------------\n");
3438
3439         if (data->critical_start) {
3440                 seq_puts(m, "#  => started at: ");
3441                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3442                 trace_print_seq(m, &iter->seq);
3443                 seq_puts(m, "\n#  => ended at:   ");
3444                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3445                 trace_print_seq(m, &iter->seq);
3446                 seq_puts(m, "\n#\n");
3447         }
3448
3449         seq_puts(m, "#\n");
3450 }
3451
3452 static void test_cpu_buff_start(struct trace_iterator *iter)
3453 {
3454         struct trace_seq *s = &iter->seq;
3455         struct trace_array *tr = iter->tr;
3456
3457         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3458                 return;
3459
3460         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3461                 return;
3462
3463         if (cpumask_available(iter->started) &&
3464             cpumask_test_cpu(iter->cpu, iter->started))
3465                 return;
3466
3467         if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
3468                 return;
3469
3470         if (cpumask_available(iter->started))
3471                 cpumask_set_cpu(iter->cpu, iter->started);
3472
3473         /* Don't print started cpu buffer for the first entry of the trace */
3474         if (iter->idx > 1)
3475                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3476                                 iter->cpu);
3477 }
3478
3479 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3480 {
3481         struct trace_array *tr = iter->tr;
3482         struct trace_seq *s = &iter->seq;
3483         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3484         struct trace_entry *entry;
3485         struct trace_event *event;
3486
3487         entry = iter->ent;
3488
3489         test_cpu_buff_start(iter);
3490
3491         event = ftrace_find_event(entry->type);
3492
3493         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3494                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3495                         trace_print_lat_context(iter);
3496                 else
3497                         trace_print_context(iter);
3498         }
3499
3500         if (trace_seq_has_overflowed(s))
3501                 return TRACE_TYPE_PARTIAL_LINE;
3502
3503         if (event)
3504                 return event->funcs->trace(iter, sym_flags, event);
3505
3506         trace_seq_printf(s, "Unknown type %d\n", entry->type);
3507
3508         return trace_handle_return(s);
3509 }
3510
3511 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3512 {
3513         struct trace_array *tr = iter->tr;
3514         struct trace_seq *s = &iter->seq;
3515         struct trace_entry *entry;
3516         struct trace_event *event;
3517
3518         entry = iter->ent;
3519
3520         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3521                 trace_seq_printf(s, "%d %d %llu ",
3522                                  entry->pid, iter->cpu, iter->ts);
3523
3524         if (trace_seq_has_overflowed(s))
3525                 return TRACE_TYPE_PARTIAL_LINE;
3526
3527         event = ftrace_find_event(entry->type);
3528         if (event)
3529                 return event->funcs->raw(iter, 0, event);
3530
3531         trace_seq_printf(s, "%d ?\n", entry->type);
3532
3533         return trace_handle_return(s);
3534 }
3535
3536 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3537 {
3538         struct trace_array *tr = iter->tr;
3539         struct trace_seq *s = &iter->seq;
3540         unsigned char newline = '\n';
3541         struct trace_entry *entry;
3542         struct trace_event *event;
3543
3544         entry = iter->ent;
3545
3546         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3547                 SEQ_PUT_HEX_FIELD(s, entry->pid);
3548                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
3549                 SEQ_PUT_HEX_FIELD(s, iter->ts);
3550                 if (trace_seq_has_overflowed(s))
3551                         return TRACE_TYPE_PARTIAL_LINE;
3552         }
3553
3554         event = ftrace_find_event(entry->type);
3555         if (event) {
3556                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
3557                 if (ret != TRACE_TYPE_HANDLED)
3558                         return ret;
3559         }
3560
3561         SEQ_PUT_FIELD(s, newline);
3562
3563         return trace_handle_return(s);
3564 }
3565
3566 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3567 {
3568         struct trace_array *tr = iter->tr;
3569         struct trace_seq *s = &iter->seq;
3570         struct trace_entry *entry;
3571         struct trace_event *event;
3572
3573         entry = iter->ent;
3574
3575         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3576                 SEQ_PUT_FIELD(s, entry->pid);
3577                 SEQ_PUT_FIELD(s, iter->cpu);
3578                 SEQ_PUT_FIELD(s, iter->ts);
3579                 if (trace_seq_has_overflowed(s))
3580                         return TRACE_TYPE_PARTIAL_LINE;
3581         }
3582
3583         event = ftrace_find_event(entry->type);
3584         return event ? event->funcs->binary(iter, 0, event) :
3585                 TRACE_TYPE_HANDLED;
3586 }
3587
3588 int trace_empty(struct trace_iterator *iter)
3589 {
3590         struct ring_buffer_iter *buf_iter;
3591         int cpu;
3592
3593         /* If we are looking at one CPU buffer, only check that one */
3594         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
3595                 cpu = iter->cpu_file;
3596                 buf_iter = trace_buffer_iter(iter, cpu);
3597                 if (buf_iter) {
3598                         if (!ring_buffer_iter_empty(buf_iter))
3599                                 return 0;
3600                 } else {
3601                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3602                                 return 0;
3603                 }
3604                 return 1;
3605         }
3606
3607         for_each_tracing_cpu(cpu) {
3608                 buf_iter = trace_buffer_iter(iter, cpu);
3609                 if (buf_iter) {
3610                         if (!ring_buffer_iter_empty(buf_iter))
3611                                 return 0;
3612                 } else {
3613                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3614                                 return 0;
3615                 }
3616         }
3617
3618         return 1;
3619 }
3620
3621 /*  Called with trace_event_read_lock() held. */
3622 enum print_line_t print_trace_line(struct trace_iterator *iter)
3623 {
3624         struct trace_array *tr = iter->tr;
3625         unsigned long trace_flags = tr->trace_flags;
3626         enum print_line_t ret;
3627
3628         if (iter->lost_events) {
3629                 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
3630                                  iter->cpu, iter->lost_events);
3631                 if (trace_seq_has_overflowed(&iter->seq))
3632                         return TRACE_TYPE_PARTIAL_LINE;
3633         }
3634
3635         if (iter->trace && iter->trace->print_line) {
3636                 ret = iter->trace->print_line(iter);
3637                 if (ret != TRACE_TYPE_UNHANDLED)
3638                         return ret;
3639         }
3640
3641         if (iter->ent->type == TRACE_BPUTS &&
3642                         trace_flags & TRACE_ITER_PRINTK &&
3643                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3644                 return trace_print_bputs_msg_only(iter);
3645
3646         if (iter->ent->type == TRACE_BPRINT &&
3647                         trace_flags & TRACE_ITER_PRINTK &&
3648                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3649                 return trace_print_bprintk_msg_only(iter);
3650
3651         if (iter->ent->type == TRACE_PRINT &&
3652                         trace_flags & TRACE_ITER_PRINTK &&
3653                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3654                 return trace_print_printk_msg_only(iter);
3655
3656         if (trace_flags & TRACE_ITER_BIN)
3657                 return print_bin_fmt(iter);
3658
3659         if (trace_flags & TRACE_ITER_HEX)
3660                 return print_hex_fmt(iter);
3661
3662         if (trace_flags & TRACE_ITER_RAW)
3663                 return print_raw_fmt(iter);
3664
3665         return print_trace_fmt(iter);
3666 }
3667
3668 void trace_latency_header(struct seq_file *m)
3669 {
3670         struct trace_iterator *iter = m->private;
3671         struct trace_array *tr = iter->tr;
3672
3673         /* print nothing if the buffers are empty */
3674         if (trace_empty(iter))
3675                 return;
3676
3677         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3678                 print_trace_header(m, iter);
3679
3680         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
3681                 print_lat_help_header(m);
3682 }
3683
3684 void trace_default_header(struct seq_file *m)
3685 {
3686         struct trace_iterator *iter = m->private;
3687         struct trace_array *tr = iter->tr;
3688         unsigned long trace_flags = tr->trace_flags;
3689
3690         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
3691                 return;
3692
3693         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
3694                 /* print nothing if the buffers are empty */
3695                 if (trace_empty(iter))
3696                         return;
3697                 print_trace_header(m, iter);
3698                 if (!(trace_flags & TRACE_ITER_VERBOSE))
3699                         print_lat_help_header(m);
3700         } else {
3701                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
3702                         if (trace_flags & TRACE_ITER_IRQ_INFO)
3703                                 print_func_help_header_irq(iter->trace_buffer,
3704                                                            m, trace_flags);
3705                         else
3706                                 print_func_help_header(iter->trace_buffer, m,
3707                                                        trace_flags);
3708                 }
3709         }
3710 }
3711
3712 static void test_ftrace_alive(struct seq_file *m)
3713 {
3714         if (!ftrace_is_dead())
3715                 return;
3716         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
3717                     "#          MAY BE MISSING FUNCTION EVENTS\n");
3718 }
3719
3720 #ifdef CONFIG_TRACER_MAX_TRACE
3721 static void show_snapshot_main_help(struct seq_file *m)
3722 {
3723         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
3724                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3725                     "#                      Takes a snapshot of the main buffer.\n"
3726                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
3727                     "#                      (Doesn't have to be '2' works with any number that\n"
3728                     "#                       is not a '0' or '1')\n");
3729 }
3730
3731 static void show_snapshot_percpu_help(struct seq_file *m)
3732 {
3733         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
3734 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
3735         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3736                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
3737 #else
3738         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
3739                     "#                     Must use main snapshot file to allocate.\n");
3740 #endif
3741         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
3742                     "#                      (Doesn't have to be '2' works with any number that\n"
3743                     "#                       is not a '0' or '1')\n");
3744 }
3745
3746 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
3747 {
3748         if (iter->tr->allocated_snapshot)
3749                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
3750         else
3751                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
3752
3753         seq_puts(m, "# Snapshot commands:\n");
3754         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
3755                 show_snapshot_main_help(m);
3756         else
3757                 show_snapshot_percpu_help(m);
3758 }
3759 #else
3760 /* Should never be called */
3761 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
3762 #endif
3763
3764 static int s_show(struct seq_file *m, void *v)
3765 {
3766         struct trace_iterator *iter = v;
3767         int ret;
3768
3769         if (iter->ent == NULL) {
3770                 if (iter->tr) {
3771                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
3772                         seq_puts(m, "#\n");
3773                         test_ftrace_alive(m);
3774                 }
3775                 if (iter->snapshot && trace_empty(iter))
3776                         print_snapshot_help(m, iter);
3777                 else if (iter->trace && iter->trace->print_header)
3778                         iter->trace->print_header(m);
3779                 else
3780                         trace_default_header(m);
3781
3782         } else if (iter->leftover) {
3783                 /*
3784                  * If we filled the seq_file buffer earlier, we
3785                  * want to just show it now.
3786                  */
3787                 ret = trace_print_seq(m, &iter->seq);
3788
3789                 /* ret should this time be zero, but you never know */
3790                 iter->leftover = ret;
3791
3792         } else {
3793                 print_trace_line(iter);
3794                 ret = trace_print_seq(m, &iter->seq);
3795                 /*
3796                  * If we overflow the seq_file buffer, then it will
3797                  * ask us for this data again at start up.
3798                  * Use that instead.
3799                  *  ret is 0 if seq_file write succeeded.
3800                  *        -1 otherwise.
3801                  */
3802                 iter->leftover = ret;
3803         }
3804
3805         return 0;
3806 }
3807
3808 /*
3809  * Should be used after trace_array_get(), trace_types_lock
3810  * ensures that i_cdev was already initialized.
3811  */
3812 static inline int tracing_get_cpu(struct inode *inode)
3813 {
3814         if (inode->i_cdev) /* See trace_create_cpu_file() */
3815                 return (long)inode->i_cdev - 1;
3816         return RING_BUFFER_ALL_CPUS;
3817 }
3818
3819 static const struct seq_operations tracer_seq_ops = {
3820         .start          = s_start,
3821         .next           = s_next,
3822         .stop           = s_stop,
3823         .show           = s_show,
3824 };
3825
3826 static struct trace_iterator *
3827 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
3828 {
3829         struct trace_array *tr = inode->i_private;
3830         struct trace_iterator *iter;
3831         int cpu;
3832
3833         if (tracing_disabled)
3834                 return ERR_PTR(-ENODEV);
3835
3836         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
3837         if (!iter)
3838                 return ERR_PTR(-ENOMEM);
3839
3840         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
3841                                     GFP_KERNEL);
3842         if (!iter->buffer_iter)
3843                 goto release;
3844
3845         /*
3846          * We make a copy of the current tracer to avoid concurrent
3847          * changes on it while we are reading.
3848          */
3849         mutex_lock(&trace_types_lock);
3850         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
3851         if (!iter->trace)
3852                 goto fail;
3853
3854         *iter->trace = *tr->current_trace;
3855
3856         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
3857                 goto fail;
3858
3859         iter->tr = tr;
3860
3861 #ifdef CONFIG_TRACER_MAX_TRACE
3862         /* Currently only the top directory has a snapshot */
3863         if (tr->current_trace->print_max || snapshot)
3864                 iter->trace_buffer = &tr->max_buffer;
3865         else
3866 #endif
3867                 iter->trace_buffer = &tr->trace_buffer;
3868         iter->snapshot = snapshot;
3869         iter->pos = -1;
3870         iter->cpu_file = tracing_get_cpu(inode);
3871         mutex_init(&iter->mutex);
3872
3873         /* Notify the tracer early; before we stop tracing. */
3874         if (iter->trace && iter->trace->open)
3875                 iter->trace->open(iter);
3876
3877         /* Annotate start of buffers if we had overruns */
3878         if (ring_buffer_overruns(iter->trace_buffer->buffer))
3879                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
3880
3881         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
3882         if (trace_clocks[tr->clock_id].in_ns)
3883                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
3884
3885         /* stop the trace while dumping if we are not opening "snapshot" */
3886         if (!iter->snapshot)
3887                 tracing_stop_tr(tr);
3888
3889         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
3890                 for_each_tracing_cpu(cpu) {
3891                         iter->buffer_iter[cpu] =
3892                                 ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3893                 }
3894                 ring_buffer_read_prepare_sync();
3895                 for_each_tracing_cpu(cpu) {
3896                         ring_buffer_read_start(iter->buffer_iter[cpu]);
3897                         tracing_iter_reset(iter, cpu);
3898                 }
3899         } else {
3900                 cpu = iter->cpu_file;
3901                 iter->buffer_iter[cpu] =
3902                         ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3903                 ring_buffer_read_prepare_sync();
3904                 ring_buffer_read_start(iter->buffer_iter[cpu]);
3905                 tracing_iter_reset(iter, cpu);
3906         }
3907
3908         mutex_unlock(&trace_types_lock);
3909
3910         return iter;
3911
3912  fail:
3913         mutex_unlock(&trace_types_lock);
3914         kfree(iter->trace);
3915         kfree(iter->buffer_iter);
3916 release:
3917         seq_release_private(inode, file);
3918         return ERR_PTR(-ENOMEM);
3919 }
3920
3921 int tracing_open_generic(struct inode *inode, struct file *filp)
3922 {
3923         if (tracing_disabled)
3924                 return -ENODEV;
3925
3926         filp->private_data = inode->i_private;
3927         return 0;
3928 }
3929
3930 bool tracing_is_disabled(void)
3931 {
3932         return (tracing_disabled) ? true: false;
3933 }
3934
3935 /*
3936  * Open and update trace_array ref count.
3937  * Must have the current trace_array passed to it.
3938  */
3939 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
3940 {
3941         struct trace_array *tr = inode->i_private;
3942
3943         if (tracing_disabled)
3944                 return -ENODEV;
3945
3946         if (trace_array_get(tr) < 0)
3947                 return -ENODEV;
3948
3949         filp->private_data = inode->i_private;
3950
3951         return 0;
3952 }
3953
3954 static int tracing_release(struct inode *inode, struct file *file)
3955 {
3956         struct trace_array *tr = inode->i_private;
3957         struct seq_file *m = file->private_data;
3958         struct trace_iterator *iter;
3959         int cpu;
3960
3961         if (!(file->f_mode & FMODE_READ)) {
3962                 trace_array_put(tr);
3963                 return 0;
3964         }
3965
3966         /* Writes do not use seq_file */
3967         iter = m->private;
3968         mutex_lock(&trace_types_lock);
3969
3970         for_each_tracing_cpu(cpu) {
3971                 if (iter->buffer_iter[cpu])
3972                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
3973         }
3974
3975         if (iter->trace && iter->trace->close)
3976                 iter->trace->close(iter);
3977
3978         if (!iter->snapshot)
3979                 /* reenable tracing if it was previously enabled */
3980                 tracing_start_tr(tr);
3981
3982         __trace_array_put(tr);
3983
3984         mutex_unlock(&trace_types_lock);
3985
3986         mutex_destroy(&iter->mutex);
3987         free_cpumask_var(iter->started);
3988         kfree(iter->trace);
3989         kfree(iter->buffer_iter);
3990         seq_release_private(inode, file);
3991
3992         return 0;
3993 }
3994
3995 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
3996 {
3997         struct trace_array *tr = inode->i_private;
3998
3999         trace_array_put(tr);
4000         return 0;
4001 }
4002
4003 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4004 {
4005         struct trace_array *tr = inode->i_private;
4006
4007         trace_array_put(tr);
4008
4009         return single_release(inode, file);
4010 }
4011
4012 static int tracing_open(struct inode *inode, struct file *file)
4013 {
4014         struct trace_array *tr = inode->i_private;
4015         struct trace_iterator *iter;
4016         int ret = 0;
4017
4018         if (trace_array_get(tr) < 0)
4019                 return -ENODEV;
4020
4021         /* If this file was open for write, then erase contents */
4022         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4023                 int cpu = tracing_get_cpu(inode);
4024                 struct trace_buffer *trace_buf = &tr->trace_buffer;
4025
4026 #ifdef CONFIG_TRACER_MAX_TRACE
4027                 if (tr->current_trace->print_max)
4028                         trace_buf = &tr->max_buffer;
4029 #endif
4030
4031                 if (cpu == RING_BUFFER_ALL_CPUS)
4032                         tracing_reset_online_cpus(trace_buf);
4033                 else
4034                         tracing_reset(trace_buf, cpu);
4035         }
4036
4037         if (file->f_mode & FMODE_READ) {
4038                 iter = __tracing_open(inode, file, false);
4039                 if (IS_ERR(iter))
4040                         ret = PTR_ERR(iter);
4041                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4042                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
4043         }
4044
4045         if (ret < 0)
4046                 trace_array_put(tr);
4047
4048         return ret;
4049 }
4050
4051 /*
4052  * Some tracers are not suitable for instance buffers.
4053  * A tracer is always available for the global array (toplevel)
4054  * or if it explicitly states that it is.
4055  */
4056 static bool
4057 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4058 {
4059         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4060 }
4061
4062 /* Find the next tracer that this trace array may use */
4063 static struct tracer *
4064 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4065 {
4066         while (t && !trace_ok_for_array(t, tr))
4067                 t = t->next;
4068
4069         return t;
4070 }
4071
4072 static void *
4073 t_next(struct seq_file *m, void *v, loff_t *pos)
4074 {
4075         struct trace_array *tr = m->private;
4076         struct tracer *t = v;
4077
4078         (*pos)++;
4079
4080         if (t)
4081                 t = get_tracer_for_array(tr, t->next);
4082
4083         return t;
4084 }
4085
4086 static void *t_start(struct seq_file *m, loff_t *pos)
4087 {
4088         struct trace_array *tr = m->private;
4089         struct tracer *t;
4090         loff_t l = 0;
4091
4092         mutex_lock(&trace_types_lock);
4093
4094         t = get_tracer_for_array(tr, trace_types);
4095         for (; t && l < *pos; t = t_next(m, t, &l))
4096                         ;
4097
4098         return t;
4099 }
4100
4101 static void t_stop(struct seq_file *m, void *p)
4102 {
4103         mutex_unlock(&trace_types_lock);
4104 }
4105
4106 static int t_show(struct seq_file *m, void *v)
4107 {
4108         struct tracer *t = v;
4109
4110         if (!t)
4111                 return 0;
4112
4113         seq_puts(m, t->name);
4114         if (t->next)
4115                 seq_putc(m, ' ');
4116         else
4117                 seq_putc(m, '\n');
4118
4119         return 0;
4120 }
4121
4122 static const struct seq_operations show_traces_seq_ops = {
4123         .start          = t_start,
4124         .next           = t_next,
4125         .stop           = t_stop,
4126         .show           = t_show,
4127 };
4128
4129 static int show_traces_open(struct inode *inode, struct file *file)
4130 {
4131         struct trace_array *tr = inode->i_private;
4132         struct seq_file *m;
4133         int ret;
4134
4135         if (tracing_disabled)
4136                 return -ENODEV;
4137
4138         ret = seq_open(file, &show_traces_seq_ops);
4139         if (ret)
4140                 return ret;
4141
4142         m = file->private_data;
4143         m->private = tr;
4144
4145         return 0;
4146 }
4147
4148 static ssize_t
4149 tracing_write_stub(struct file *filp, const char __user *ubuf,
4150                    size_t count, loff_t *ppos)
4151 {
4152         return count;
4153 }
4154
4155 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4156 {
4157         int ret;
4158
4159         if (file->f_mode & FMODE_READ)
4160                 ret = seq_lseek(file, offset, whence);
4161         else
4162                 file->f_pos = ret = 0;
4163
4164         return ret;
4165 }
4166
4167 static const struct file_operations tracing_fops = {
4168         .open           = tracing_open,
4169         .read           = seq_read,
4170         .write          = tracing_write_stub,
4171         .llseek         = tracing_lseek,
4172         .release        = tracing_release,
4173 };
4174
4175 static const struct file_operations show_traces_fops = {
4176         .open           = show_traces_open,
4177         .read           = seq_read,
4178         .release        = seq_release,
4179         .llseek         = seq_lseek,
4180 };
4181
4182 static ssize_t
4183 tracing_cpumask_read(struct file *filp, char __user *ubuf,
4184                      size_t count, loff_t *ppos)
4185 {
4186         struct trace_array *tr = file_inode(filp)->i_private;
4187         char *mask_str;
4188         int len;
4189
4190         len = snprintf(NULL, 0, "%*pb\n",
4191                        cpumask_pr_args(tr->tracing_cpumask)) + 1;
4192         mask_str = kmalloc(len, GFP_KERNEL);
4193         if (!mask_str)
4194                 return -ENOMEM;
4195
4196         len = snprintf(mask_str, len, "%*pb\n",
4197                        cpumask_pr_args(tr->tracing_cpumask));
4198         if (len >= count) {
4199                 count = -EINVAL;
4200                 goto out_err;
4201         }
4202         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
4203
4204 out_err:
4205         kfree(mask_str);
4206
4207         return count;
4208 }
4209
4210 static ssize_t
4211 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4212                       size_t count, loff_t *ppos)
4213 {
4214         struct trace_array *tr = file_inode(filp)->i_private;
4215         cpumask_var_t tracing_cpumask_new;
4216         int err, cpu;
4217
4218         if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
4219                 return -ENOMEM;
4220
4221         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
4222         if (err)
4223                 goto err_unlock;
4224
4225         local_irq_disable();
4226         arch_spin_lock(&tr->max_lock);
4227         for_each_tracing_cpu(cpu) {
4228                 /*
4229                  * Increase/decrease the disabled counter if we are
4230                  * about to flip a bit in the cpumask:
4231                  */
4232                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4233                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4234                         atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4235                         ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
4236                 }
4237                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4238                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4239                         atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4240                         ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
4241                 }
4242         }
4243         arch_spin_unlock(&tr->max_lock);
4244         local_irq_enable();
4245
4246         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4247         free_cpumask_var(tracing_cpumask_new);
4248
4249         return count;
4250
4251 err_unlock:
4252         free_cpumask_var(tracing_cpumask_new);
4253
4254         return err;
4255 }
4256
4257 static const struct file_operations tracing_cpumask_fops = {
4258         .open           = tracing_open_generic_tr,
4259         .read           = tracing_cpumask_read,
4260         .write          = tracing_cpumask_write,
4261         .release        = tracing_release_generic_tr,
4262         .llseek         = generic_file_llseek,
4263 };
4264
4265 static int tracing_trace_options_show(struct seq_file *m, void *v)
4266 {
4267         struct tracer_opt *trace_opts;
4268         struct trace_array *tr = m->private;
4269         u32 tracer_flags;
4270         int i;
4271
4272         mutex_lock(&trace_types_lock);
4273         tracer_flags = tr->current_trace->flags->val;
4274         trace_opts = tr->current_trace->flags->opts;
4275
4276         for (i = 0; trace_options[i]; i++) {
4277                 if (tr->trace_flags & (1 << i))
4278                         seq_printf(m, "%s\n", trace_options[i]);
4279                 else
4280                         seq_printf(m, "no%s\n", trace_options[i]);
4281         }
4282
4283         for (i = 0; trace_opts[i].name; i++) {
4284                 if (tracer_flags & trace_opts[i].bit)
4285                         seq_printf(m, "%s\n", trace_opts[i].name);
4286                 else
4287                         seq_printf(m, "no%s\n", trace_opts[i].name);
4288         }
4289         mutex_unlock(&trace_types_lock);
4290
4291         return 0;
4292 }
4293
4294 static int __set_tracer_option(struct trace_array *tr,
4295                                struct tracer_flags *tracer_flags,
4296                                struct tracer_opt *opts, int neg)
4297 {
4298         struct tracer *trace = tracer_flags->trace;
4299         int ret;
4300
4301         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4302         if (ret)
4303                 return ret;
4304
4305         if (neg)
4306                 tracer_flags->val &= ~opts->bit;
4307         else
4308                 tracer_flags->val |= opts->bit;
4309         return 0;
4310 }
4311
4312 /* Try to assign a tracer specific option */
4313 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4314 {
4315         struct tracer *trace = tr->current_trace;
4316         struct tracer_flags *tracer_flags = trace->flags;
4317         struct tracer_opt *opts = NULL;
4318         int i;
4319
4320         for (i = 0; tracer_flags->opts[i].name; i++) {
4321                 opts = &tracer_flags->opts[i];
4322
4323                 if (strcmp(cmp, opts->name) == 0)
4324                         return __set_tracer_option(tr, trace->flags, opts, neg);
4325         }
4326
4327         return -EINVAL;
4328 }
4329
4330 /* Some tracers require overwrite to stay enabled */
4331 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
4332 {
4333         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
4334                 return -1;
4335
4336         return 0;
4337 }
4338
4339 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
4340 {
4341         /* do nothing if flag is already set */
4342         if (!!(tr->trace_flags & mask) == !!enabled)
4343                 return 0;
4344
4345         /* Give the tracer a chance to approve the change */
4346         if (tr->current_trace->flag_changed)
4347                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4348                         return -EINVAL;
4349
4350         if (enabled)
4351                 tr->trace_flags |= mask;
4352         else
4353                 tr->trace_flags &= ~mask;
4354
4355         if (mask == TRACE_ITER_RECORD_CMD)
4356                 trace_event_enable_cmd_record(enabled);
4357
4358         if (mask == TRACE_ITER_RECORD_TGID) {
4359                 if (!tgid_map)
4360                         tgid_map = kcalloc(PID_MAX_DEFAULT + 1,
4361                                            sizeof(*tgid_map),
4362                                            GFP_KERNEL);
4363                 if (!tgid_map) {
4364                         tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
4365                         return -ENOMEM;
4366                 }
4367
4368                 trace_event_enable_tgid_record(enabled);
4369         }
4370
4371         if (mask == TRACE_ITER_EVENT_FORK)
4372                 trace_event_follow_fork(tr, enabled);
4373
4374         if (mask == TRACE_ITER_FUNC_FORK)
4375                 ftrace_pid_follow_fork(tr, enabled);
4376
4377         if (mask == TRACE_ITER_OVERWRITE) {
4378                 ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
4379 #ifdef CONFIG_TRACER_MAX_TRACE
4380                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
4381 #endif
4382         }
4383
4384         if (mask == TRACE_ITER_PRINTK) {
4385                 trace_printk_start_stop_comm(enabled);
4386                 trace_printk_control(enabled);
4387         }
4388
4389         return 0;
4390 }
4391
4392 static int trace_set_options(struct trace_array *tr, char *option)
4393 {
4394         char *cmp;
4395         int neg = 0;
4396         int ret;
4397         size_t orig_len = strlen(option);
4398
4399         cmp = strstrip(option);
4400
4401         if (strncmp(cmp, "no", 2) == 0) {
4402                 neg = 1;
4403                 cmp += 2;
4404         }
4405
4406         mutex_lock(&trace_types_lock);
4407
4408         ret = match_string(trace_options, -1, cmp);
4409         /* If no option could be set, test the specific tracer options */
4410         if (ret < 0)
4411                 ret = set_tracer_option(tr, cmp, neg);
4412         else
4413                 ret = set_tracer_flag(tr, 1 << ret, !neg);
4414
4415         mutex_unlock(&trace_types_lock);
4416
4417         /*
4418          * If the first trailing whitespace is replaced with '\0' by strstrip,
4419          * turn it back into a space.
4420          */
4421         if (orig_len > strlen(option))
4422                 option[strlen(option)] = ' ';
4423
4424         return ret;
4425 }
4426
4427 static void __init apply_trace_boot_options(void)
4428 {
4429         char *buf = trace_boot_options_buf;
4430         char *option;
4431
4432         while (true) {
4433                 option = strsep(&buf, ",");
4434
4435                 if (!option)
4436                         break;
4437
4438                 if (*option)
4439                         trace_set_options(&global_trace, option);
4440
4441                 /* Put back the comma to allow this to be called again */
4442                 if (buf)
4443                         *(buf - 1) = ',';
4444         }
4445 }
4446
4447 static ssize_t
4448 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4449                         size_t cnt, loff_t *ppos)
4450 {
4451         struct seq_file *m = filp->private_data;
4452         struct trace_array *tr = m->private;
4453         char buf[64];
4454         int ret;
4455
4456         if (cnt >= sizeof(buf))
4457                 return -EINVAL;
4458
4459         if (copy_from_user(buf, ubuf, cnt))
4460                 return -EFAULT;
4461
4462         buf[cnt] = 0;
4463
4464         ret = trace_set_options(tr, buf);
4465         if (ret < 0)
4466                 return ret;
4467
4468         *ppos += cnt;
4469
4470         return cnt;
4471 }
4472
4473 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4474 {
4475         struct trace_array *tr = inode->i_private;
4476         int ret;
4477
4478         if (tracing_disabled)
4479                 return -ENODEV;
4480
4481         if (trace_array_get(tr) < 0)
4482                 return -ENODEV;
4483
4484         ret = single_open(file, tracing_trace_options_show, inode->i_private);
4485         if (ret < 0)
4486                 trace_array_put(tr);
4487
4488         return ret;
4489 }
4490
4491 static const struct file_operations tracing_iter_fops = {
4492         .open           = tracing_trace_options_open,
4493         .read           = seq_read,
4494         .llseek         = seq_lseek,
4495         .release        = tracing_single_release_tr,
4496         .write          = tracing_trace_options_write,
4497 };
4498
4499 static const char readme_msg[] =
4500         "tracing mini-HOWTO:\n\n"
4501         "# echo 0 > tracing_on : quick way to disable tracing\n"
4502         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4503         " Important files:\n"
4504         "  trace\t\t\t- The static contents of the buffer\n"
4505         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
4506         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4507         "  current_tracer\t- function and latency tracers\n"
4508         "  available_tracers\t- list of configured tracers for current_tracer\n"
4509         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
4510         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
4511         "  trace_clock\t\t-change the clock used to order events\n"
4512         "       local:   Per cpu clock but may not be synced across CPUs\n"
4513         "      global:   Synced across CPUs but slows tracing down.\n"
4514         "     counter:   Not a clock, but just an increment\n"
4515         "      uptime:   Jiffy counter from time of boot\n"
4516         "        perf:   Same clock that perf events use\n"
4517 #ifdef CONFIG_X86_64
4518         "     x86-tsc:   TSC cycle counter\n"
4519 #endif
4520         "\n  timestamp_mode\t-view the mode used to timestamp events\n"
4521         "       delta:   Delta difference against a buffer-wide timestamp\n"
4522         "    absolute:   Absolute (standalone) timestamp\n"
4523         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4524         "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
4525         "  tracing_cpumask\t- Limit which CPUs to trace\n"
4526         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4527         "\t\t\t  Remove sub-buffer with rmdir\n"
4528         "  trace_options\t\t- Set format or modify how tracing happens\n"
4529         "\t\t\t  Disable an option by adding a suffix 'no' to the\n"
4530         "\t\t\t  option name\n"
4531         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
4532 #ifdef CONFIG_DYNAMIC_FTRACE
4533         "\n  available_filter_functions - list of functions that can be filtered on\n"
4534         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
4535         "\t\t\t  functions\n"
4536         "\t     accepts: func_full_name or glob-matching-pattern\n"
4537         "\t     modules: Can select a group via module\n"
4538         "\t      Format: :mod:<module-name>\n"
4539         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
4540         "\t    triggers: a command to perform when function is hit\n"
4541         "\t      Format: <function>:<trigger>[:count]\n"
4542         "\t     trigger: traceon, traceoff\n"
4543         "\t\t      enable_event:<system>:<event>\n"
4544         "\t\t      disable_event:<system>:<event>\n"
4545 #ifdef CONFIG_STACKTRACE
4546         "\t\t      stacktrace\n"
4547 #endif
4548 #ifdef CONFIG_TRACER_SNAPSHOT
4549         "\t\t      snapshot\n"
4550 #endif
4551         "\t\t      dump\n"
4552         "\t\t      cpudump\n"
4553         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
4554         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
4555         "\t     The first one will disable tracing every time do_fault is hit\n"
4556         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
4557         "\t       The first time do trap is hit and it disables tracing, the\n"
4558         "\t       counter will decrement to 2. If tracing is already disabled,\n"
4559         "\t       the counter will not decrement. It only decrements when the\n"
4560         "\t       trigger did work\n"
4561         "\t     To remove trigger without count:\n"
4562         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
4563         "\t     To remove trigger with a count:\n"
4564         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
4565         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
4566         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4567         "\t    modules: Can select a group via module command :mod:\n"
4568         "\t    Does not accept triggers\n"
4569 #endif /* CONFIG_DYNAMIC_FTRACE */
4570 #ifdef CONFIG_FUNCTION_TRACER
4571         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
4572         "\t\t    (function)\n"
4573 #endif
4574 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
4575         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
4576         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
4577         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
4578 #endif
4579 #ifdef CONFIG_TRACER_SNAPSHOT
4580         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
4581         "\t\t\t  snapshot buffer. Read the contents for more\n"
4582         "\t\t\t  information\n"
4583 #endif
4584 #ifdef CONFIG_STACK_TRACER
4585         "  stack_trace\t\t- Shows the max stack trace when active\n"
4586         "  stack_max_size\t- Shows current max stack size that was traced\n"
4587         "\t\t\t  Write into this file to reset the max size (trigger a\n"
4588         "\t\t\t  new trace)\n"
4589 #ifdef CONFIG_DYNAMIC_FTRACE
4590         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
4591         "\t\t\t  traces\n"
4592 #endif
4593 #endif /* CONFIG_STACK_TRACER */
4594 #ifdef CONFIG_KPROBE_EVENTS
4595         "  kprobe_events\t\t- Add/remove/show the kernel dynamic events\n"
4596         "\t\t\t  Write into this file to define/undefine new trace events.\n"
4597 #endif
4598 #ifdef CONFIG_UPROBE_EVENTS
4599         "  uprobe_events\t\t- Add/remove/show the userspace dynamic events\n"
4600         "\t\t\t  Write into this file to define/undefine new trace events.\n"
4601 #endif
4602 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
4603         "\t  accepts: event-definitions (one definition per line)\n"
4604         "\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
4605         "\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
4606         "\t           -:[<group>/]<event>\n"
4607 #ifdef CONFIG_KPROBE_EVENTS
4608         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4609   "place (kretprobe): [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4610 #endif
4611 #ifdef CONFIG_UPROBE_EVENTS
4612         "\t    place: <path>:<offset>\n"
4613 #endif
4614         "\t     args: <name>=fetcharg[:type]\n"
4615         "\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
4616         "\t           $stack<index>, $stack, $retval, $comm\n"
4617         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string,\n"
4618         "\t           b<bit-width>@<bit-offset>/<container-size>\n"
4619 #endif
4620         "  events/\t\t- Directory containing all trace event subsystems:\n"
4621         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
4622         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
4623         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
4624         "\t\t\t  events\n"
4625         "      filter\t\t- If set, only events passing filter are traced\n"
4626         "  events/<system>/<event>/\t- Directory containing control files for\n"
4627         "\t\t\t  <event>:\n"
4628         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
4629         "      filter\t\t- If set, only events passing filter are traced\n"
4630         "      trigger\t\t- If set, a command to perform when event is hit\n"
4631         "\t    Format: <trigger>[:count][if <filter>]\n"
4632         "\t   trigger: traceon, traceoff\n"
4633         "\t            enable_event:<system>:<event>\n"
4634         "\t            disable_event:<system>:<event>\n"
4635 #ifdef CONFIG_HIST_TRIGGERS
4636         "\t            enable_hist:<system>:<event>\n"
4637         "\t            disable_hist:<system>:<event>\n"
4638 #endif
4639 #ifdef CONFIG_STACKTRACE
4640         "\t\t    stacktrace\n"
4641 #endif
4642 #ifdef CONFIG_TRACER_SNAPSHOT
4643         "\t\t    snapshot\n"
4644 #endif
4645 #ifdef CONFIG_HIST_TRIGGERS
4646         "\t\t    hist (see below)\n"
4647 #endif
4648         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
4649         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
4650         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
4651         "\t                  events/block/block_unplug/trigger\n"
4652         "\t   The first disables tracing every time block_unplug is hit.\n"
4653         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
4654         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
4655         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
4656         "\t   Like function triggers, the counter is only decremented if it\n"
4657         "\t    enabled or disabled tracing.\n"
4658         "\t   To remove a trigger without a count:\n"
4659         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
4660         "\t   To remove a trigger with a count:\n"
4661         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
4662         "\t   Filters can be ignored when removing a trigger.\n"
4663 #ifdef CONFIG_HIST_TRIGGERS
4664         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
4665         "\t    Format: hist:keys=<field1[,field2,...]>\n"
4666         "\t            [:values=<field1[,field2,...]>]\n"
4667         "\t            [:sort=<field1[,field2,...]>]\n"
4668         "\t            [:size=#entries]\n"
4669         "\t            [:pause][:continue][:clear]\n"
4670         "\t            [:name=histname1]\n"
4671         "\t            [if <filter>]\n\n"
4672         "\t    When a matching event is hit, an entry is added to a hash\n"
4673         "\t    table using the key(s) and value(s) named, and the value of a\n"
4674         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
4675         "\t    correspond to fields in the event's format description.  Keys\n"
4676         "\t    can be any field, or the special string 'stacktrace'.\n"
4677         "\t    Compound keys consisting of up to two fields can be specified\n"
4678         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
4679         "\t    fields.  Sort keys consisting of up to two fields can be\n"
4680         "\t    specified using the 'sort' keyword.  The sort direction can\n"
4681         "\t    be modified by appending '.descending' or '.ascending' to a\n"
4682         "\t    sort field.  The 'size' parameter can be used to specify more\n"
4683         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
4684         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
4685         "\t    its histogram data will be shared with other triggers of the\n"
4686         "\t    same name, and trigger hits will update this common data.\n\n"
4687         "\t    Reading the 'hist' file for the event will dump the hash\n"
4688         "\t    table in its entirety to stdout.  If there are multiple hist\n"
4689         "\t    triggers attached to an event, there will be a table for each\n"
4690         "\t    trigger in the output.  The table displayed for a named\n"
4691         "\t    trigger will be the same as any other instance having the\n"
4692         "\t    same name.  The default format used to display a given field\n"
4693         "\t    can be modified by appending any of the following modifiers\n"
4694         "\t    to the field name, as applicable:\n\n"
4695         "\t            .hex        display a number as a hex value\n"
4696         "\t            .sym        display an address as a symbol\n"
4697         "\t            .sym-offset display an address as a symbol and offset\n"
4698         "\t            .execname   display a common_pid as a program name\n"
4699         "\t            .syscall    display a syscall id as a syscall name\n"
4700         "\t            .log2       display log2 value rather than raw number\n"
4701         "\t            .usecs      display a common_timestamp in microseconds\n\n"
4702         "\t    The 'pause' parameter can be used to pause an existing hist\n"
4703         "\t    trigger or to start a hist trigger but not log any events\n"
4704         "\t    until told to do so.  'continue' can be used to start or\n"
4705         "\t    restart a paused hist trigger.\n\n"
4706         "\t    The 'clear' parameter will clear the contents of a running\n"
4707         "\t    hist trigger and leave its current paused/active state\n"
4708         "\t    unchanged.\n\n"
4709         "\t    The enable_hist and disable_hist triggers can be used to\n"
4710         "\t    have one event conditionally start and stop another event's\n"
4711         "\t    already-attached hist trigger.  The syntax is analagous to\n"
4712         "\t    the enable_event and disable_event triggers.\n"
4713 #endif
4714 ;
4715
4716 static ssize_t
4717 tracing_readme_read(struct file *filp, char __user *ubuf,
4718                        size_t cnt, loff_t *ppos)
4719 {
4720         return simple_read_from_buffer(ubuf, cnt, ppos,
4721                                         readme_msg, strlen(readme_msg));
4722 }
4723
4724 static const struct file_operations tracing_readme_fops = {
4725         .open           = tracing_open_generic,
4726         .read           = tracing_readme_read,
4727         .llseek         = generic_file_llseek,
4728 };
4729
4730 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
4731 {
4732         int *ptr = v;
4733
4734         if (*pos || m->count)
4735                 ptr++;
4736
4737         (*pos)++;
4738
4739         for (; ptr <= &tgid_map[PID_MAX_DEFAULT]; ptr++) {
4740                 if (trace_find_tgid(*ptr))
4741                         return ptr;
4742         }
4743
4744         return NULL;
4745 }
4746
4747 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
4748 {
4749         void *v;
4750         loff_t l = 0;
4751
4752         if (!tgid_map)
4753                 return NULL;
4754
4755         v = &tgid_map[0];
4756         while (l <= *pos) {
4757                 v = saved_tgids_next(m, v, &l);
4758                 if (!v)
4759                         return NULL;
4760         }
4761
4762         return v;
4763 }
4764
4765 static void saved_tgids_stop(struct seq_file *m, void *v)
4766 {
4767 }
4768
4769 static int saved_tgids_show(struct seq_file *m, void *v)
4770 {
4771         int pid = (int *)v - tgid_map;
4772
4773         seq_printf(m, "%d %d\n", pid, trace_find_tgid(pid));
4774         return 0;
4775 }
4776
4777 static const struct seq_operations tracing_saved_tgids_seq_ops = {
4778         .start          = saved_tgids_start,
4779         .stop           = saved_tgids_stop,
4780         .next           = saved_tgids_next,
4781         .show           = saved_tgids_show,
4782 };
4783
4784 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
4785 {
4786         if (tracing_disabled)
4787                 return -ENODEV;
4788
4789         return seq_open(filp, &tracing_saved_tgids_seq_ops);
4790 }
4791
4792
4793 static const struct file_operations tracing_saved_tgids_fops = {
4794         .open           = tracing_saved_tgids_open,
4795         .read           = seq_read,
4796         .llseek         = seq_lseek,
4797         .release        = seq_release,
4798 };
4799
4800 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
4801 {
4802         unsigned int *ptr = v;
4803
4804         if (*pos || m->count)
4805                 ptr++;
4806
4807         (*pos)++;
4808
4809         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
4810              ptr++) {
4811                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
4812                         continue;
4813
4814                 return ptr;
4815         }
4816
4817         return NULL;
4818 }
4819
4820 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
4821 {
4822         void *v;
4823         loff_t l = 0;
4824
4825         preempt_disable();
4826         arch_spin_lock(&trace_cmdline_lock);
4827
4828         v = &savedcmd->map_cmdline_to_pid[0];
4829         while (l <= *pos) {
4830                 v = saved_cmdlines_next(m, v, &l);
4831                 if (!v)
4832                         return NULL;
4833         }
4834
4835         return v;
4836 }
4837
4838 static void saved_cmdlines_stop(struct seq_file *m, void *v)
4839 {
4840         arch_spin_unlock(&trace_cmdline_lock);
4841         preempt_enable();
4842 }
4843
4844 static int saved_cmdlines_show(struct seq_file *m, void *v)
4845 {
4846         char buf[TASK_COMM_LEN];
4847         unsigned int *pid = v;
4848
4849         __trace_find_cmdline(*pid, buf);
4850         seq_printf(m, "%d %s\n", *pid, buf);
4851         return 0;
4852 }
4853
4854 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
4855         .start          = saved_cmdlines_start,
4856         .next           = saved_cmdlines_next,
4857         .stop           = saved_cmdlines_stop,
4858         .show           = saved_cmdlines_show,
4859 };
4860
4861 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
4862 {
4863         if (tracing_disabled)
4864                 return -ENODEV;
4865
4866         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
4867 }
4868
4869 static const struct file_operations tracing_saved_cmdlines_fops = {
4870         .open           = tracing_saved_cmdlines_open,
4871         .read           = seq_read,
4872         .llseek         = seq_lseek,
4873         .release        = seq_release,
4874 };
4875
4876 static ssize_t
4877 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
4878                                  size_t cnt, loff_t *ppos)
4879 {
4880         char buf[64];
4881         int r;
4882
4883         arch_spin_lock(&trace_cmdline_lock);
4884         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
4885         arch_spin_unlock(&trace_cmdline_lock);
4886
4887         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4888 }
4889
4890 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
4891 {
4892         kfree(s->saved_cmdlines);
4893         kfree(s->map_cmdline_to_pid);
4894         kfree(s);
4895 }
4896
4897 static int tracing_resize_saved_cmdlines(unsigned int val)
4898 {
4899         struct saved_cmdlines_buffer *s, *savedcmd_temp;
4900
4901         s = kmalloc(sizeof(*s), GFP_KERNEL);
4902         if (!s)
4903                 return -ENOMEM;
4904
4905         if (allocate_cmdlines_buffer(val, s) < 0) {
4906                 kfree(s);
4907                 return -ENOMEM;
4908         }
4909
4910         arch_spin_lock(&trace_cmdline_lock);
4911         savedcmd_temp = savedcmd;
4912         savedcmd = s;
4913         arch_spin_unlock(&trace_cmdline_lock);
4914         free_saved_cmdlines_buffer(savedcmd_temp);
4915
4916         return 0;
4917 }
4918
4919 static ssize_t
4920 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
4921                                   size_t cnt, loff_t *ppos)
4922 {
4923         unsigned long val;
4924         int ret;
4925
4926         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4927         if (ret)
4928                 return ret;
4929
4930         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
4931         if (!val || val > PID_MAX_DEFAULT)
4932                 return -EINVAL;
4933
4934         ret = tracing_resize_saved_cmdlines((unsigned int)val);
4935         if (ret < 0)
4936                 return ret;
4937
4938         *ppos += cnt;
4939
4940         return cnt;
4941 }
4942
4943 static const struct file_operations tracing_saved_cmdlines_size_fops = {
4944         .open           = tracing_open_generic,
4945         .read           = tracing_saved_cmdlines_size_read,
4946         .write          = tracing_saved_cmdlines_size_write,
4947 };
4948
4949 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
4950 static union trace_eval_map_item *
4951 update_eval_map(union trace_eval_map_item *ptr)
4952 {
4953         if (!ptr->map.eval_string) {
4954                 if (ptr->tail.next) {
4955                         ptr = ptr->tail.next;
4956                         /* Set ptr to the next real item (skip head) */
4957                         ptr++;
4958                 } else
4959                         return NULL;
4960         }
4961         return ptr;
4962 }
4963
4964 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
4965 {
4966         union trace_eval_map_item *ptr = v;
4967
4968         /*
4969          * Paranoid! If ptr points to end, we don't want to increment past it.
4970          * This really should never happen.
4971          */
4972         ptr = update_eval_map(ptr);
4973         if (WARN_ON_ONCE(!ptr))
4974                 return NULL;
4975
4976         ptr++;
4977
4978         (*pos)++;
4979
4980         ptr = update_eval_map(ptr);
4981
4982         return ptr;
4983 }
4984
4985 static void *eval_map_start(struct seq_file *m, loff_t *pos)
4986 {
4987         union trace_eval_map_item *v;
4988         loff_t l = 0;
4989
4990         mutex_lock(&trace_eval_mutex);
4991
4992         v = trace_eval_maps;
4993         if (v)
4994                 v++;
4995
4996         while (v && l < *pos) {
4997                 v = eval_map_next(m, v, &l);
4998         }
4999
5000         return v;
5001 }
5002
5003 static void eval_map_stop(struct seq_file *m, void *v)
5004 {
5005         mutex_unlock(&trace_eval_mutex);
5006 }
5007
5008 static int eval_map_show(struct seq_file *m, void *v)
5009 {
5010         union trace_eval_map_item *ptr = v;
5011
5012         seq_printf(m, "%s %ld (%s)\n",
5013                    ptr->map.eval_string, ptr->map.eval_value,
5014                    ptr->map.system);
5015
5016         return 0;
5017 }
5018
5019 static const struct seq_operations tracing_eval_map_seq_ops = {
5020         .start          = eval_map_start,
5021         .next           = eval_map_next,
5022         .stop           = eval_map_stop,
5023         .show           = eval_map_show,
5024 };
5025
5026 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5027 {
5028         if (tracing_disabled)
5029                 return -ENODEV;
5030
5031         return seq_open(filp, &tracing_eval_map_seq_ops);
5032 }
5033
5034 static const struct file_operations tracing_eval_map_fops = {
5035         .open           = tracing_eval_map_open,
5036         .read           = seq_read,
5037         .llseek         = seq_lseek,
5038         .release        = seq_release,
5039 };
5040
5041 static inline union trace_eval_map_item *
5042 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5043 {
5044         /* Return tail of array given the head */
5045         return ptr + ptr->head.length + 1;
5046 }
5047
5048 static void
5049 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5050                            int len)
5051 {
5052         struct trace_eval_map **stop;
5053         struct trace_eval_map **map;
5054         union trace_eval_map_item *map_array;
5055         union trace_eval_map_item *ptr;
5056
5057         stop = start + len;
5058
5059         /*
5060          * The trace_eval_maps contains the map plus a head and tail item,
5061          * where the head holds the module and length of array, and the
5062          * tail holds a pointer to the next list.
5063          */
5064         map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5065         if (!map_array) {
5066                 pr_warn("Unable to allocate trace eval mapping\n");
5067                 return;
5068         }
5069
5070         mutex_lock(&trace_eval_mutex);
5071
5072         if (!trace_eval_maps)
5073                 trace_eval_maps = map_array;
5074         else {
5075                 ptr = trace_eval_maps;
5076                 for (;;) {
5077                         ptr = trace_eval_jmp_to_tail(ptr);
5078                         if (!ptr->tail.next)
5079                                 break;
5080                         ptr = ptr->tail.next;
5081
5082                 }
5083                 ptr->tail.next = map_array;
5084         }
5085         map_array->head.mod = mod;
5086         map_array->head.length = len;
5087         map_array++;
5088
5089         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5090                 map_array->map = **map;
5091                 map_array++;
5092         }
5093         memset(map_array, 0, sizeof(*map_array));
5094
5095         mutex_unlock(&trace_eval_mutex);
5096 }
5097
5098 static void trace_create_eval_file(struct dentry *d_tracer)
5099 {
5100         trace_create_file("eval_map", 0444, d_tracer,
5101                           NULL, &tracing_eval_map_fops);
5102 }
5103
5104 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
5105 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
5106 static inline void trace_insert_eval_map_file(struct module *mod,
5107                               struct trace_eval_map **start, int len) { }
5108 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5109
5110 static void trace_insert_eval_map(struct module *mod,
5111                                   struct trace_eval_map **start, int len)
5112 {
5113         struct trace_eval_map **map;
5114
5115         if (len <= 0)
5116                 return;
5117
5118         map = start;
5119
5120         trace_event_eval_update(map, len);
5121
5122         trace_insert_eval_map_file(mod, start, len);
5123 }
5124
5125 static ssize_t
5126 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5127                        size_t cnt, loff_t *ppos)
5128 {
5129         struct trace_array *tr = filp->private_data;
5130         char buf[MAX_TRACER_SIZE+2];
5131         int r;
5132
5133         mutex_lock(&trace_types_lock);
5134         r = sprintf(buf, "%s\n", tr->current_trace->name);
5135         mutex_unlock(&trace_types_lock);
5136
5137         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5138 }
5139
5140 int tracer_init(struct tracer *t, struct trace_array *tr)
5141 {
5142         tracing_reset_online_cpus(&tr->trace_buffer);
5143         return t->init(tr);
5144 }
5145
5146 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
5147 {
5148         int cpu;
5149
5150         for_each_tracing_cpu(cpu)
5151                 per_cpu_ptr(buf->data, cpu)->entries = val;
5152 }
5153
5154 #ifdef CONFIG_TRACER_MAX_TRACE
5155 /* resize @tr's buffer to the size of @size_tr's entries */
5156 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
5157                                         struct trace_buffer *size_buf, int cpu_id)
5158 {
5159         int cpu, ret = 0;
5160
5161         if (cpu_id == RING_BUFFER_ALL_CPUS) {
5162                 for_each_tracing_cpu(cpu) {
5163                         ret = ring_buffer_resize(trace_buf->buffer,
5164                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5165                         if (ret < 0)
5166                                 break;
5167                         per_cpu_ptr(trace_buf->data, cpu)->entries =
5168                                 per_cpu_ptr(size_buf->data, cpu)->entries;
5169                 }
5170         } else {
5171                 ret = ring_buffer_resize(trace_buf->buffer,
5172                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5173                 if (ret == 0)
5174                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5175                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
5176         }
5177
5178         return ret;
5179 }
5180 #endif /* CONFIG_TRACER_MAX_TRACE */
5181
5182 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5183                                         unsigned long size, int cpu)
5184 {
5185         int ret;
5186
5187         /*
5188          * If kernel or user changes the size of the ring buffer
5189          * we use the size that was given, and we can forget about
5190          * expanding it later.
5191          */
5192         ring_buffer_expanded = true;
5193
5194         /* May be called before buffers are initialized */
5195         if (!tr->trace_buffer.buffer)
5196                 return 0;
5197
5198         ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
5199         if (ret < 0)
5200                 return ret;
5201
5202 #ifdef CONFIG_TRACER_MAX_TRACE
5203         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
5204             !tr->current_trace->use_max_tr)
5205                 goto out;
5206
5207         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5208         if (ret < 0) {
5209                 int r = resize_buffer_duplicate_size(&tr->trace_buffer,
5210                                                      &tr->trace_buffer, cpu);
5211                 if (r < 0) {
5212                         /*
5213                          * AARGH! We are left with different
5214                          * size max buffer!!!!
5215                          * The max buffer is our "snapshot" buffer.
5216                          * When a tracer needs a snapshot (one of the
5217                          * latency tracers), it swaps the max buffer
5218                          * with the saved snap shot. We succeeded to
5219                          * update the size of the main buffer, but failed to
5220                          * update the size of the max buffer. But when we tried
5221                          * to reset the main buffer to the original size, we
5222                          * failed there too. This is very unlikely to
5223                          * happen, but if it does, warn and kill all
5224                          * tracing.
5225                          */
5226                         WARN_ON(1);
5227                         tracing_disabled = 1;
5228                 }
5229                 return ret;
5230         }
5231
5232         if (cpu == RING_BUFFER_ALL_CPUS)
5233                 set_buffer_entries(&tr->max_buffer, size);
5234         else
5235                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
5236
5237  out:
5238 #endif /* CONFIG_TRACER_MAX_TRACE */
5239
5240         if (cpu == RING_BUFFER_ALL_CPUS)
5241                 set_buffer_entries(&tr->trace_buffer, size);
5242         else
5243                 per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
5244
5245         return ret;
5246 }
5247
5248 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5249                                           unsigned long size, int cpu_id)
5250 {
5251         int ret = size;
5252
5253         mutex_lock(&trace_types_lock);
5254
5255         if (cpu_id != RING_BUFFER_ALL_CPUS) {
5256                 /* make sure, this cpu is enabled in the mask */
5257                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
5258                         ret = -EINVAL;
5259                         goto out;
5260                 }
5261         }
5262
5263         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
5264         if (ret < 0)
5265                 ret = -ENOMEM;
5266
5267 out:
5268         mutex_unlock(&trace_types_lock);
5269
5270         return ret;
5271 }
5272
5273
5274 /**
5275  * tracing_update_buffers - used by tracing facility to expand ring buffers
5276  *
5277  * To save on memory when the tracing is never used on a system with it
5278  * configured in. The ring buffers are set to a minimum size. But once
5279  * a user starts to use the tracing facility, then they need to grow
5280  * to their default size.
5281  *
5282  * This function is to be called when a tracer is about to be used.
5283  */
5284 int tracing_update_buffers(void)
5285 {
5286         int ret = 0;
5287
5288         mutex_lock(&trace_types_lock);
5289         if (!ring_buffer_expanded)
5290                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
5291                                                 RING_BUFFER_ALL_CPUS);
5292         mutex_unlock(&trace_types_lock);
5293
5294         return ret;
5295 }
5296
5297 struct trace_option_dentry;
5298
5299 static void
5300 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
5301
5302 /*
5303  * Used to clear out the tracer before deletion of an instance.
5304  * Must have trace_types_lock held.
5305  */
5306 static void tracing_set_nop(struct trace_array *tr)
5307 {
5308         if (tr->current_trace == &nop_trace)
5309                 return;
5310         
5311         tr->current_trace->enabled--;
5312
5313         if (tr->current_trace->reset)
5314                 tr->current_trace->reset(tr);
5315
5316         tr->current_trace = &nop_trace;
5317 }
5318
5319 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
5320 {
5321         /* Only enable if the directory has been created already. */
5322         if (!tr->dir)
5323                 return;
5324
5325         create_trace_option_files(tr, t);
5326 }
5327
5328 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
5329 {
5330         struct tracer *t;
5331 #ifdef CONFIG_TRACER_MAX_TRACE
5332         bool had_max_tr;
5333 #endif
5334         int ret = 0;
5335
5336         mutex_lock(&trace_types_lock);
5337
5338         if (!ring_buffer_expanded) {
5339                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5340                                                 RING_BUFFER_ALL_CPUS);
5341                 if (ret < 0)
5342                         goto out;
5343                 ret = 0;
5344         }
5345
5346         for (t = trace_types; t; t = t->next) {
5347                 if (strcmp(t->name, buf) == 0)
5348                         break;
5349         }
5350         if (!t) {
5351                 ret = -EINVAL;
5352                 goto out;
5353         }
5354         if (t == tr->current_trace)
5355                 goto out;
5356
5357         /* Some tracers won't work on kernel command line */
5358         if (system_state < SYSTEM_RUNNING && t->noboot) {
5359                 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
5360                         t->name);
5361                 goto out;
5362         }
5363
5364         /* Some tracers are only allowed for the top level buffer */
5365         if (!trace_ok_for_array(t, tr)) {
5366                 ret = -EINVAL;
5367                 goto out;
5368         }
5369
5370         /* If trace pipe files are being read, we can't change the tracer */
5371         if (tr->current_trace->ref) {
5372                 ret = -EBUSY;
5373                 goto out;
5374         }
5375
5376         trace_branch_disable();
5377
5378         tr->current_trace->enabled--;
5379
5380         if (tr->current_trace->reset)
5381                 tr->current_trace->reset(tr);
5382
5383         /* Current trace needs to be nop_trace before synchronize_sched */
5384         tr->current_trace = &nop_trace;
5385
5386 #ifdef CONFIG_TRACER_MAX_TRACE
5387         had_max_tr = tr->allocated_snapshot;
5388
5389         if (had_max_tr && !t->use_max_tr) {
5390                 /*
5391                  * We need to make sure that the update_max_tr sees that
5392                  * current_trace changed to nop_trace to keep it from
5393                  * swapping the buffers after we resize it.
5394                  * The update_max_tr is called from interrupts disabled
5395                  * so a synchronized_sched() is sufficient.
5396                  */
5397                 synchronize_sched();
5398                 free_snapshot(tr);
5399         }
5400 #endif
5401
5402 #ifdef CONFIG_TRACER_MAX_TRACE
5403         if (t->use_max_tr && !had_max_tr) {
5404                 ret = tracing_alloc_snapshot_instance(tr);
5405                 if (ret < 0)
5406                         goto out;
5407         }
5408 #endif
5409
5410         if (t->init) {
5411                 ret = tracer_init(t, tr);
5412                 if (ret)
5413                         goto out;
5414         }
5415
5416         tr->current_trace = t;
5417         tr->current_trace->enabled++;
5418         trace_branch_enable(tr);
5419  out:
5420         mutex_unlock(&trace_types_lock);
5421
5422         return ret;
5423 }
5424
5425 static ssize_t
5426 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
5427                         size_t cnt, loff_t *ppos)
5428 {
5429         struct trace_array *tr = filp->private_data;
5430         char buf[MAX_TRACER_SIZE+1];
5431         int i;
5432         size_t ret;
5433         int err;
5434
5435         ret = cnt;
5436
5437         if (cnt > MAX_TRACER_SIZE)
5438                 cnt = MAX_TRACER_SIZE;
5439
5440         if (copy_from_user(buf, ubuf, cnt))
5441                 return -EFAULT;
5442
5443         buf[cnt] = 0;
5444
5445         /* strip ending whitespace. */
5446         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
5447                 buf[i] = 0;
5448
5449         err = tracing_set_tracer(tr, buf);
5450         if (err)
5451                 return err;
5452
5453         *ppos += ret;
5454
5455         return ret;
5456 }
5457
5458 static ssize_t
5459 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
5460                    size_t cnt, loff_t *ppos)
5461 {
5462         char buf[64];
5463         int r;
5464
5465         r = snprintf(buf, sizeof(buf), "%ld\n",
5466                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
5467         if (r > sizeof(buf))
5468                 r = sizeof(buf);
5469         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5470 }
5471
5472 static ssize_t
5473 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
5474                     size_t cnt, loff_t *ppos)
5475 {
5476         unsigned long val;
5477         int ret;
5478
5479         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5480         if (ret)
5481                 return ret;
5482
5483         *ptr = val * 1000;
5484
5485         return cnt;
5486 }
5487
5488 static ssize_t
5489 tracing_thresh_read(struct file *filp, char __user *ubuf,
5490                     size_t cnt, loff_t *ppos)
5491 {
5492         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
5493 }
5494
5495 static ssize_t
5496 tracing_thresh_write(struct file *filp, const char __user *ubuf,
5497                      size_t cnt, loff_t *ppos)
5498 {
5499         struct trace_array *tr = filp->private_data;
5500         int ret;
5501
5502         mutex_lock(&trace_types_lock);
5503         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
5504         if (ret < 0)
5505                 goto out;
5506
5507         if (tr->current_trace->update_thresh) {
5508                 ret = tr->current_trace->update_thresh(tr);
5509                 if (ret < 0)
5510                         goto out;
5511         }
5512
5513         ret = cnt;
5514 out:
5515         mutex_unlock(&trace_types_lock);
5516
5517         return ret;
5518 }
5519
5520 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
5521
5522 static ssize_t
5523 tracing_max_lat_read(struct file *filp, char __user *ubuf,
5524                      size_t cnt, loff_t *ppos)
5525 {
5526         return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
5527 }
5528
5529 static ssize_t
5530 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
5531                       size_t cnt, loff_t *ppos)
5532 {
5533         return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
5534 }
5535
5536 #endif
5537
5538 static int tracing_open_pipe(struct inode *inode, struct file *filp)
5539 {
5540         struct trace_array *tr = inode->i_private;
5541         struct trace_iterator *iter;
5542         int ret = 0;
5543
5544         if (tracing_disabled)
5545                 return -ENODEV;
5546
5547         if (trace_array_get(tr) < 0)
5548                 return -ENODEV;
5549
5550         mutex_lock(&trace_types_lock);
5551
5552         /* create a buffer to store the information to pass to userspace */
5553         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5554         if (!iter) {
5555                 ret = -ENOMEM;
5556                 __trace_array_put(tr);
5557                 goto out;
5558         }
5559
5560         trace_seq_init(&iter->seq);
5561         iter->trace = tr->current_trace;
5562
5563         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
5564                 ret = -ENOMEM;
5565                 goto fail;
5566         }
5567
5568         /* trace pipe does not show start of buffer */
5569         cpumask_setall(iter->started);
5570
5571         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5572                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
5573
5574         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
5575         if (trace_clocks[tr->clock_id].in_ns)
5576                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
5577
5578         iter->tr = tr;
5579         iter->trace_buffer = &tr->trace_buffer;
5580         iter->cpu_file = tracing_get_cpu(inode);
5581         mutex_init(&iter->mutex);
5582         filp->private_data = iter;
5583
5584         if (iter->trace->pipe_open)
5585                 iter->trace->pipe_open(iter);
5586
5587         nonseekable_open(inode, filp);
5588
5589         tr->current_trace->ref++;
5590 out:
5591         mutex_unlock(&trace_types_lock);
5592         return ret;
5593
5594 fail:
5595         kfree(iter->trace);
5596         kfree(iter);
5597         __trace_array_put(tr);
5598         mutex_unlock(&trace_types_lock);
5599         return ret;
5600 }
5601
5602 static int tracing_release_pipe(struct inode *inode, struct file *file)
5603 {
5604         struct trace_iterator *iter = file->private_data;
5605         struct trace_array *tr = inode->i_private;
5606
5607         mutex_lock(&trace_types_lock);
5608
5609         tr->current_trace->ref--;
5610
5611         if (iter->trace->pipe_close)
5612                 iter->trace->pipe_close(iter);
5613
5614         mutex_unlock(&trace_types_lock);
5615
5616         free_cpumask_var(iter->started);
5617         mutex_destroy(&iter->mutex);
5618         kfree(iter);
5619
5620         trace_array_put(tr);
5621
5622         return 0;
5623 }
5624
5625 static __poll_t
5626 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
5627 {
5628         struct trace_array *tr = iter->tr;
5629
5630         /* Iterators are static, they should be filled or empty */
5631         if (trace_buffer_iter(iter, iter->cpu_file))
5632                 return EPOLLIN | EPOLLRDNORM;
5633
5634         if (tr->trace_flags & TRACE_ITER_BLOCK)
5635                 /*
5636                  * Always select as readable when in blocking mode
5637                  */
5638                 return EPOLLIN | EPOLLRDNORM;
5639         else
5640                 return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
5641                                              filp, poll_table);
5642 }
5643
5644 static __poll_t
5645 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
5646 {
5647         struct trace_iterator *iter = filp->private_data;
5648
5649         return trace_poll(iter, filp, poll_table);
5650 }
5651
5652 /* Must be called with iter->mutex held. */
5653 static int tracing_wait_pipe(struct file *filp)
5654 {
5655         struct trace_iterator *iter = filp->private_data;
5656         int ret;
5657
5658         while (trace_empty(iter)) {
5659
5660                 if ((filp->f_flags & O_NONBLOCK)) {
5661                         return -EAGAIN;
5662                 }
5663
5664                 /*
5665                  * We block until we read something and tracing is disabled.
5666                  * We still block if tracing is disabled, but we have never
5667                  * read anything. This allows a user to cat this file, and
5668                  * then enable tracing. But after we have read something,
5669                  * we give an EOF when tracing is again disabled.
5670                  *
5671                  * iter->pos will be 0 if we haven't read anything.
5672                  */
5673                 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
5674                         break;
5675
5676                 mutex_unlock(&iter->mutex);
5677
5678                 ret = wait_on_pipe(iter, false);
5679
5680                 mutex_lock(&iter->mutex);
5681
5682                 if (ret)
5683                         return ret;
5684         }
5685
5686         return 1;
5687 }
5688
5689 /*
5690  * Consumer reader.
5691  */
5692 static ssize_t
5693 tracing_read_pipe(struct file *filp, char __user *ubuf,
5694                   size_t cnt, loff_t *ppos)
5695 {
5696         struct trace_iterator *iter = filp->private_data;
5697         ssize_t sret;
5698
5699         /*
5700          * Avoid more than one consumer on a single file descriptor
5701          * This is just a matter of traces coherency, the ring buffer itself
5702          * is protected.
5703          */
5704         mutex_lock(&iter->mutex);
5705
5706         /* return any leftover data */
5707         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5708         if (sret != -EBUSY)
5709                 goto out;
5710
5711         trace_seq_init(&iter->seq);
5712
5713         if (iter->trace->read) {
5714                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
5715                 if (sret)
5716                         goto out;
5717         }
5718
5719 waitagain:
5720         sret = tracing_wait_pipe(filp);
5721         if (sret <= 0)
5722                 goto out;
5723
5724         /* stop when tracing is finished */
5725         if (trace_empty(iter)) {
5726                 sret = 0;
5727                 goto out;
5728         }
5729
5730         if (cnt >= PAGE_SIZE)
5731                 cnt = PAGE_SIZE - 1;
5732
5733         /* reset all but tr, trace, and overruns */
5734         memset(&iter->seq, 0,
5735                sizeof(struct trace_iterator) -
5736                offsetof(struct trace_iterator, seq));
5737         cpumask_clear(iter->started);
5738         iter->pos = -1;
5739
5740         trace_event_read_lock();
5741         trace_access_lock(iter->cpu_file);
5742         while (trace_find_next_entry_inc(iter) != NULL) {
5743                 enum print_line_t ret;
5744                 int save_len = iter->seq.seq.len;
5745
5746                 ret = print_trace_line(iter);
5747                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
5748                         /* don't print partial lines */
5749                         iter->seq.seq.len = save_len;
5750                         break;
5751                 }
5752                 if (ret != TRACE_TYPE_NO_CONSUME)
5753                         trace_consume(iter);
5754
5755                 if (trace_seq_used(&iter->seq) >= cnt)
5756                         break;
5757
5758                 /*
5759                  * Setting the full flag means we reached the trace_seq buffer
5760                  * size and we should leave by partial output condition above.
5761                  * One of the trace_seq_* functions is not used properly.
5762                  */
5763                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
5764                           iter->ent->type);
5765         }
5766         trace_access_unlock(iter->cpu_file);
5767         trace_event_read_unlock();
5768
5769         /* Now copy what we have to the user */
5770         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5771         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
5772                 trace_seq_init(&iter->seq);
5773
5774         /*
5775          * If there was nothing to send to user, in spite of consuming trace
5776          * entries, go back to wait for more entries.
5777          */
5778         if (sret == -EBUSY)
5779                 goto waitagain;
5780
5781 out:
5782         mutex_unlock(&iter->mutex);
5783
5784         return sret;
5785 }
5786
5787 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
5788                                      unsigned int idx)
5789 {
5790         __free_page(spd->pages[idx]);
5791 }
5792
5793 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
5794         .can_merge              = 0,
5795         .confirm                = generic_pipe_buf_confirm,
5796         .release                = generic_pipe_buf_release,
5797         .steal                  = generic_pipe_buf_steal,
5798         .get                    = generic_pipe_buf_get,
5799 };
5800
5801 static size_t
5802 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
5803 {
5804         size_t count;
5805         int save_len;
5806         int ret;
5807
5808         /* Seq buffer is page-sized, exactly what we need. */
5809         for (;;) {
5810                 save_len = iter->seq.seq.len;
5811                 ret = print_trace_line(iter);
5812
5813                 if (trace_seq_has_overflowed(&iter->seq)) {
5814                         iter->seq.seq.len = save_len;
5815                         break;
5816                 }
5817
5818                 /*
5819                  * This should not be hit, because it should only
5820                  * be set if the iter->seq overflowed. But check it
5821                  * anyway to be safe.
5822                  */
5823                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
5824                         iter->seq.seq.len = save_len;
5825                         break;
5826                 }
5827
5828                 count = trace_seq_used(&iter->seq) - save_len;
5829                 if (rem < count) {
5830                         rem = 0;
5831                         iter->seq.seq.len = save_len;
5832                         break;
5833                 }
5834
5835                 if (ret != TRACE_TYPE_NO_CONSUME)
5836                         trace_consume(iter);
5837                 rem -= count;
5838                 if (!trace_find_next_entry_inc(iter))   {
5839                         rem = 0;
5840                         iter->ent = NULL;
5841                         break;
5842                 }
5843         }
5844
5845         return rem;
5846 }
5847
5848 static ssize_t tracing_splice_read_pipe(struct file *filp,
5849                                         loff_t *ppos,
5850                                         struct pipe_inode_info *pipe,
5851                                         size_t len,
5852                                         unsigned int flags)
5853 {
5854         struct page *pages_def[PIPE_DEF_BUFFERS];
5855         struct partial_page partial_def[PIPE_DEF_BUFFERS];
5856         struct trace_iterator *iter = filp->private_data;
5857         struct splice_pipe_desc spd = {
5858                 .pages          = pages_def,
5859                 .partial        = partial_def,
5860                 .nr_pages       = 0, /* This gets updated below. */
5861                 .nr_pages_max   = PIPE_DEF_BUFFERS,
5862                 .ops            = &tracing_pipe_buf_ops,
5863                 .spd_release    = tracing_spd_release_pipe,
5864         };
5865         ssize_t ret;
5866         size_t rem;
5867         unsigned int i;
5868
5869         if (splice_grow_spd(pipe, &spd))
5870                 return -ENOMEM;
5871
5872         mutex_lock(&iter->mutex);
5873
5874         if (iter->trace->splice_read) {
5875                 ret = iter->trace->splice_read(iter, filp,
5876                                                ppos, pipe, len, flags);
5877                 if (ret)
5878                         goto out_err;
5879         }
5880
5881         ret = tracing_wait_pipe(filp);
5882         if (ret <= 0)
5883                 goto out_err;
5884
5885         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
5886                 ret = -EFAULT;
5887                 goto out_err;
5888         }
5889
5890         trace_event_read_lock();
5891         trace_access_lock(iter->cpu_file);
5892
5893         /* Fill as many pages as possible. */
5894         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
5895                 spd.pages[i] = alloc_page(GFP_KERNEL);
5896                 if (!spd.pages[i])
5897                         break;
5898
5899                 rem = tracing_fill_pipe_page(rem, iter);
5900
5901                 /* Copy the data into the page, so we can start over. */
5902                 ret = trace_seq_to_buffer(&iter->seq,
5903                                           page_address(spd.pages[i]),
5904                                           trace_seq_used(&iter->seq));
5905                 if (ret < 0) {
5906                         __free_page(spd.pages[i]);
5907                         break;
5908                 }
5909                 spd.partial[i].offset = 0;
5910                 spd.partial[i].len = trace_seq_used(&iter->seq);
5911
5912                 trace_seq_init(&iter->seq);
5913         }
5914
5915         trace_access_unlock(iter->cpu_file);
5916         trace_event_read_unlock();
5917         mutex_unlock(&iter->mutex);
5918
5919         spd.nr_pages = i;
5920
5921         if (i)
5922                 ret = splice_to_pipe(pipe, &spd);
5923         else
5924                 ret = 0;
5925 out:
5926         splice_shrink_spd(&spd);
5927         return ret;
5928
5929 out_err:
5930         mutex_unlock(&iter->mutex);
5931         goto out;
5932 }
5933
5934 static ssize_t
5935 tracing_entries_read(struct file *filp, char __user *ubuf,
5936                      size_t cnt, loff_t *ppos)
5937 {
5938         struct inode *inode = file_inode(filp);
5939         struct trace_array *tr = inode->i_private;
5940         int cpu = tracing_get_cpu(inode);
5941         char buf[64];
5942         int r = 0;
5943         ssize_t ret;
5944
5945         mutex_lock(&trace_types_lock);
5946
5947         if (cpu == RING_BUFFER_ALL_CPUS) {
5948                 int cpu, buf_size_same;
5949                 unsigned long size;
5950
5951                 size = 0;
5952                 buf_size_same = 1;
5953                 /* check if all cpu sizes are same */
5954                 for_each_tracing_cpu(cpu) {
5955                         /* fill in the size from first enabled cpu */
5956                         if (size == 0)
5957                                 size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
5958                         if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
5959                                 buf_size_same = 0;
5960                                 break;
5961                         }
5962                 }
5963
5964                 if (buf_size_same) {
5965                         if (!ring_buffer_expanded)
5966                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
5967                                             size >> 10,
5968                                             trace_buf_size >> 10);
5969                         else
5970                                 r = sprintf(buf, "%lu\n", size >> 10);
5971                 } else
5972                         r = sprintf(buf, "X\n");
5973         } else
5974                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
5975
5976         mutex_unlock(&trace_types_lock);
5977
5978         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5979         return ret;
5980 }
5981
5982 static ssize_t
5983 tracing_entries_write(struct file *filp, const char __user *ubuf,
5984                       size_t cnt, loff_t *ppos)
5985 {
5986         struct inode *inode = file_inode(filp);
5987         struct trace_array *tr = inode->i_private;
5988         unsigned long val;
5989         int ret;
5990
5991         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5992         if (ret)
5993                 return ret;
5994
5995         /* must have at least 1 entry */
5996         if (!val)
5997                 return -EINVAL;
5998
5999         /* value is in KB */
6000         val <<= 10;
6001         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6002         if (ret < 0)
6003                 return ret;
6004
6005         *ppos += cnt;
6006
6007         return cnt;
6008 }
6009
6010 static ssize_t
6011 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6012                                 size_t cnt, loff_t *ppos)
6013 {
6014         struct trace_array *tr = filp->private_data;
6015         char buf[64];
6016         int r, cpu;
6017         unsigned long size = 0, expanded_size = 0;
6018
6019         mutex_lock(&trace_types_lock);
6020         for_each_tracing_cpu(cpu) {
6021                 size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
6022                 if (!ring_buffer_expanded)
6023                         expanded_size += trace_buf_size >> 10;
6024         }
6025         if (ring_buffer_expanded)
6026                 r = sprintf(buf, "%lu\n", size);
6027         else
6028                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6029         mutex_unlock(&trace_types_lock);
6030
6031         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6032 }
6033
6034 static ssize_t
6035 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6036                           size_t cnt, loff_t *ppos)
6037 {
6038         /*
6039          * There is no need to read what the user has written, this function
6040          * is just to make sure that there is no error when "echo" is used
6041          */
6042
6043         *ppos += cnt;
6044
6045         return cnt;
6046 }
6047
6048 static int
6049 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6050 {
6051         struct trace_array *tr = inode->i_private;
6052
6053         /* disable tracing ? */
6054         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6055                 tracer_tracing_off(tr);
6056         /* resize the ring buffer to 0 */
6057         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6058
6059         trace_array_put(tr);
6060
6061         return 0;
6062 }
6063
6064 static ssize_t
6065 tracing_mark_write(struct file *filp, const char __user *ubuf,
6066                                         size_t cnt, loff_t *fpos)
6067 {
6068         struct trace_array *tr = filp->private_data;
6069         struct ring_buffer_event *event;
6070         enum event_trigger_type tt = ETT_NONE;
6071         struct ring_buffer *buffer;
6072         struct print_entry *entry;
6073         unsigned long irq_flags;
6074         const char faulted[] = "<faulted>";
6075         ssize_t written;
6076         int size;
6077         int len;
6078
6079 /* Used in tracing_mark_raw_write() as well */
6080 #define FAULTED_SIZE (sizeof(faulted) - 1) /* '\0' is already accounted for */
6081
6082         if (tracing_disabled)
6083                 return -EINVAL;
6084
6085         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6086                 return -EINVAL;
6087
6088         if (cnt > TRACE_BUF_SIZE)
6089                 cnt = TRACE_BUF_SIZE;
6090
6091         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6092
6093         local_save_flags(irq_flags);
6094         size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
6095
6096         /* If less than "<faulted>", then make sure we can still add that */
6097         if (cnt < FAULTED_SIZE)
6098                 size += FAULTED_SIZE - cnt;
6099
6100         buffer = tr->trace_buffer.buffer;
6101         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6102                                             irq_flags, preempt_count());
6103         if (unlikely(!event))
6104                 /* Ring buffer disabled, return as if not open for write */
6105                 return -EBADF;
6106
6107         entry = ring_buffer_event_data(event);
6108         entry->ip = _THIS_IP_;
6109
6110         len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
6111         if (len) {
6112                 memcpy(&entry->buf, faulted, FAULTED_SIZE);
6113                 cnt = FAULTED_SIZE;
6114                 written = -EFAULT;
6115         } else
6116                 written = cnt;
6117         len = cnt;
6118
6119         if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
6120                 /* do not add \n before testing triggers, but add \0 */
6121                 entry->buf[cnt] = '\0';
6122                 tt = event_triggers_call(tr->trace_marker_file, entry, event);
6123         }
6124
6125         if (entry->buf[cnt - 1] != '\n') {
6126                 entry->buf[cnt] = '\n';
6127                 entry->buf[cnt + 1] = '\0';
6128         } else
6129                 entry->buf[cnt] = '\0';
6130
6131         __buffer_unlock_commit(buffer, event);
6132
6133         if (tt)
6134                 event_triggers_post_call(tr->trace_marker_file, tt);
6135
6136         if (written > 0)
6137                 *fpos += written;
6138
6139         return written;
6140 }
6141
6142 /* Limit it for now to 3K (including tag) */
6143 #define RAW_DATA_MAX_SIZE (1024*3)
6144
6145 static ssize_t
6146 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
6147                                         size_t cnt, loff_t *fpos)
6148 {
6149         struct trace_array *tr = filp->private_data;
6150         struct ring_buffer_event *event;
6151         struct ring_buffer *buffer;
6152         struct raw_data_entry *entry;
6153         const char faulted[] = "<faulted>";
6154         unsigned long irq_flags;
6155         ssize_t written;
6156         int size;
6157         int len;
6158
6159 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
6160
6161         if (tracing_disabled)
6162                 return -EINVAL;
6163
6164         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6165                 return -EINVAL;
6166
6167         /* The marker must at least have a tag id */
6168         if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
6169                 return -EINVAL;
6170
6171         if (cnt > TRACE_BUF_SIZE)
6172                 cnt = TRACE_BUF_SIZE;
6173
6174         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6175
6176         local_save_flags(irq_flags);
6177         size = sizeof(*entry) + cnt;
6178         if (cnt < FAULT_SIZE_ID)
6179                 size += FAULT_SIZE_ID - cnt;
6180
6181         buffer = tr->trace_buffer.buffer;
6182         event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
6183                                             irq_flags, preempt_count());
6184         if (!event)
6185                 /* Ring buffer disabled, return as if not open for write */
6186                 return -EBADF;
6187
6188         entry = ring_buffer_event_data(event);
6189
6190         len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
6191         if (len) {
6192                 entry->id = -1;
6193                 memcpy(&entry->buf, faulted, FAULTED_SIZE);
6194                 written = -EFAULT;
6195         } else
6196                 written = cnt;
6197
6198         __buffer_unlock_commit(buffer, event);
6199
6200         if (written > 0)
6201                 *fpos += written;
6202
6203         return written;
6204 }
6205
6206 static int tracing_clock_show(struct seq_file *m, void *v)
6207 {
6208         struct trace_array *tr = m->private;
6209         int i;
6210
6211         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
6212                 seq_printf(m,
6213                         "%s%s%s%s", i ? " " : "",
6214                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
6215                         i == tr->clock_id ? "]" : "");
6216         seq_putc(m, '\n');
6217
6218         return 0;
6219 }
6220
6221 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
6222 {
6223         int i;
6224
6225         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
6226                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
6227                         break;
6228         }
6229         if (i == ARRAY_SIZE(trace_clocks))
6230                 return -EINVAL;
6231
6232         mutex_lock(&trace_types_lock);
6233
6234         tr->clock_id = i;
6235
6236         ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
6237
6238         /*
6239          * New clock may not be consistent with the previous clock.
6240          * Reset the buffer so that it doesn't have incomparable timestamps.
6241          */
6242         tracing_reset_online_cpus(&tr->trace_buffer);
6243
6244 #ifdef CONFIG_TRACER_MAX_TRACE
6245         if (tr->max_buffer.buffer)
6246                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
6247         tracing_reset_online_cpus(&tr->max_buffer);
6248 #endif
6249
6250         mutex_unlock(&trace_types_lock);
6251
6252         return 0;
6253 }
6254
6255 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
6256                                    size_t cnt, loff_t *fpos)
6257 {
6258         struct seq_file *m = filp->private_data;
6259         struct trace_array *tr = m->private;
6260         char buf[64];
6261         const char *clockstr;
6262         int ret;
6263
6264         if (cnt >= sizeof(buf))
6265                 return -EINVAL;
6266
6267         if (copy_from_user(buf, ubuf, cnt))
6268                 return -EFAULT;
6269
6270         buf[cnt] = 0;
6271
6272         clockstr = strstrip(buf);
6273
6274         ret = tracing_set_clock(tr, clockstr);
6275         if (ret)
6276                 return ret;
6277
6278         *fpos += cnt;
6279
6280         return cnt;
6281 }
6282
6283 static int tracing_clock_open(struct inode *inode, struct file *file)
6284 {
6285         struct trace_array *tr = inode->i_private;
6286         int ret;
6287
6288         if (tracing_disabled)
6289                 return -ENODEV;
6290
6291         if (trace_array_get(tr))
6292                 return -ENODEV;
6293
6294         ret = single_open(file, tracing_clock_show, inode->i_private);
6295         if (ret < 0)
6296                 trace_array_put(tr);
6297
6298         return ret;
6299 }
6300
6301 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
6302 {
6303         struct trace_array *tr = m->private;
6304
6305         mutex_lock(&trace_types_lock);
6306
6307         if (ring_buffer_time_stamp_abs(tr->trace_buffer.buffer))
6308                 seq_puts(m, "delta [absolute]\n");
6309         else
6310                 seq_puts(m, "[delta] absolute\n");
6311
6312         mutex_unlock(&trace_types_lock);
6313
6314         return 0;
6315 }
6316
6317 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
6318 {
6319         struct trace_array *tr = inode->i_private;
6320         int ret;
6321
6322         if (tracing_disabled)
6323                 return -ENODEV;
6324
6325         if (trace_array_get(tr))
6326                 return -ENODEV;
6327
6328         ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
6329         if (ret < 0)
6330                 trace_array_put(tr);
6331
6332         return ret;
6333 }
6334
6335 int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs)
6336 {
6337         int ret = 0;
6338
6339         mutex_lock(&trace_types_lock);
6340
6341         if (abs && tr->time_stamp_abs_ref++)
6342                 goto out;
6343
6344         if (!abs) {
6345                 if (WARN_ON_ONCE(!tr->time_stamp_abs_ref)) {
6346                         ret = -EINVAL;
6347                         goto out;
6348                 }
6349
6350                 if (--tr->time_stamp_abs_ref)
6351                         goto out;
6352         }
6353
6354         ring_buffer_set_time_stamp_abs(tr->trace_buffer.buffer, abs);
6355
6356 #ifdef CONFIG_TRACER_MAX_TRACE
6357         if (tr->max_buffer.buffer)
6358                 ring_buffer_set_time_stamp_abs(tr->max_buffer.buffer, abs);
6359 #endif
6360  out:
6361         mutex_unlock(&trace_types_lock);
6362
6363         return ret;
6364 }
6365
6366 struct ftrace_buffer_info {
6367         struct trace_iterator   iter;
6368         void                    *spare;
6369         unsigned int            spare_cpu;
6370         unsigned int            read;
6371 };
6372
6373 #ifdef CONFIG_TRACER_SNAPSHOT
6374 static int tracing_snapshot_open(struct inode *inode, struct file *file)
6375 {
6376         struct trace_array *tr = inode->i_private;
6377         struct trace_iterator *iter;
6378         struct seq_file *m;
6379         int ret = 0;
6380
6381         if (trace_array_get(tr) < 0)
6382                 return -ENODEV;
6383
6384         if (file->f_mode & FMODE_READ) {
6385                 iter = __tracing_open(inode, file, true);
6386                 if (IS_ERR(iter))
6387                         ret = PTR_ERR(iter);
6388         } else {
6389                 /* Writes still need the seq_file to hold the private data */
6390                 ret = -ENOMEM;
6391                 m = kzalloc(sizeof(*m), GFP_KERNEL);
6392                 if (!m)
6393                         goto out;
6394                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6395                 if (!iter) {
6396                         kfree(m);
6397                         goto out;
6398                 }
6399                 ret = 0;
6400
6401                 iter->tr = tr;
6402                 iter->trace_buffer = &tr->max_buffer;
6403                 iter->cpu_file = tracing_get_cpu(inode);
6404                 m->private = iter;
6405                 file->private_data = m;
6406         }
6407 out:
6408         if (ret < 0)
6409                 trace_array_put(tr);
6410
6411         return ret;
6412 }
6413
6414 static ssize_t
6415 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
6416                        loff_t *ppos)
6417 {
6418         struct seq_file *m = filp->private_data;
6419         struct trace_iterator *iter = m->private;
6420         struct trace_array *tr = iter->tr;
6421         unsigned long val;
6422         int ret;
6423
6424         ret = tracing_update_buffers();
6425         if (ret < 0)
6426                 return ret;
6427
6428         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6429         if (ret)
6430                 return ret;
6431
6432         mutex_lock(&trace_types_lock);
6433
6434         if (tr->current_trace->use_max_tr) {
6435                 ret = -EBUSY;
6436                 goto out;
6437         }
6438
6439         switch (val) {
6440         case 0:
6441                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6442                         ret = -EINVAL;
6443                         break;
6444                 }
6445                 if (tr->allocated_snapshot)
6446                         free_snapshot(tr);
6447                 break;
6448         case 1:
6449 /* Only allow per-cpu swap if the ring buffer supports it */
6450 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
6451                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6452                         ret = -EINVAL;
6453                         break;
6454                 }
6455 #endif
6456                 if (!tr->allocated_snapshot) {
6457                         ret = tracing_alloc_snapshot_instance(tr);
6458                         if (ret < 0)
6459                                 break;
6460                 }
6461                 local_irq_disable();
6462                 /* Now, we're going to swap */
6463                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6464                         update_max_tr(tr, current, smp_processor_id());
6465                 else
6466                         update_max_tr_single(tr, current, iter->cpu_file);
6467                 local_irq_enable();
6468                 break;
6469         default:
6470                 if (tr->allocated_snapshot) {
6471                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6472                                 tracing_reset_online_cpus(&tr->max_buffer);
6473                         else
6474                                 tracing_reset(&tr->max_buffer, iter->cpu_file);
6475                 }
6476                 break;
6477         }
6478
6479         if (ret >= 0) {
6480                 *ppos += cnt;
6481                 ret = cnt;
6482         }
6483 out:
6484         mutex_unlock(&trace_types_lock);
6485         return ret;
6486 }
6487
6488 static int tracing_snapshot_release(struct inode *inode, struct file *file)
6489 {
6490         struct seq_file *m = file->private_data;
6491         int ret;
6492
6493         ret = tracing_release(inode, file);
6494
6495         if (file->f_mode & FMODE_READ)
6496                 return ret;
6497
6498         /* If write only, the seq_file is just a stub */
6499         if (m)
6500                 kfree(m->private);
6501         kfree(m);
6502
6503         return 0;
6504 }
6505
6506 static int tracing_buffers_open(struct inode *inode, struct file *filp);
6507 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
6508                                     size_t count, loff_t *ppos);
6509 static int tracing_buffers_release(struct inode *inode, struct file *file);
6510 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6511                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
6512
6513 static int snapshot_raw_open(struct inode *inode, struct file *filp)
6514 {
6515         struct ftrace_buffer_info *info;
6516         int ret;
6517
6518         ret = tracing_buffers_open(inode, filp);
6519         if (ret < 0)
6520                 return ret;
6521
6522         info = filp->private_data;
6523
6524         if (info->iter.trace->use_max_tr) {
6525                 tracing_buffers_release(inode, filp);
6526                 return -EBUSY;
6527         }
6528
6529         info->iter.snapshot = true;
6530         info->iter.trace_buffer = &info->iter.tr->max_buffer;
6531
6532         return ret;
6533 }
6534
6535 #endif /* CONFIG_TRACER_SNAPSHOT */
6536
6537
6538 static const struct file_operations tracing_thresh_fops = {
6539         .open           = tracing_open_generic,
6540         .read           = tracing_thresh_read,
6541         .write          = tracing_thresh_write,
6542         .llseek         = generic_file_llseek,
6543 };
6544
6545 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6546 static const struct file_operations tracing_max_lat_fops = {
6547         .open           = tracing_open_generic,
6548         .read           = tracing_max_lat_read,
6549         .write          = tracing_max_lat_write,
6550         .llseek         = generic_file_llseek,
6551 };
6552 #endif
6553
6554 static const struct file_operations set_tracer_fops = {
6555         .open           = tracing_open_generic,
6556         .read           = tracing_set_trace_read,
6557         .write          = tracing_set_trace_write,
6558         .llseek         = generic_file_llseek,
6559 };
6560
6561 static const struct file_operations tracing_pipe_fops = {
6562         .open           = tracing_open_pipe,
6563         .poll           = tracing_poll_pipe,
6564         .read           = tracing_read_pipe,
6565         .splice_read    = tracing_splice_read_pipe,
6566         .release        = tracing_release_pipe,
6567         .llseek         = no_llseek,
6568 };
6569
6570 static const struct file_operations tracing_entries_fops = {
6571         .open           = tracing_open_generic_tr,
6572         .read           = tracing_entries_read,
6573         .write          = tracing_entries_write,
6574         .llseek         = generic_file_llseek,
6575         .release        = tracing_release_generic_tr,
6576 };
6577
6578 static const struct file_operations tracing_total_entries_fops = {
6579         .open           = tracing_open_generic_tr,
6580         .read           = tracing_total_entries_read,
6581         .llseek         = generic_file_llseek,
6582         .release        = tracing_release_generic_tr,
6583 };
6584
6585 static const struct file_operations tracing_free_buffer_fops = {
6586         .open           = tracing_open_generic_tr,
6587         .write          = tracing_free_buffer_write,
6588         .release        = tracing_free_buffer_release,
6589 };
6590
6591 static const struct file_operations tracing_mark_fops = {
6592         .open           = tracing_open_generic_tr,
6593         .write          = tracing_mark_write,
6594         .llseek         = generic_file_llseek,
6595         .release        = tracing_release_generic_tr,
6596 };
6597
6598 static const struct file_operations tracing_mark_raw_fops = {
6599         .open           = tracing_open_generic_tr,
6600         .write          = tracing_mark_raw_write,
6601         .llseek         = generic_file_llseek,
6602         .release        = tracing_release_generic_tr,
6603 };
6604
6605 static const struct file_operations trace_clock_fops = {
6606         .open           = tracing_clock_open,
6607         .read           = seq_read,
6608         .llseek         = seq_lseek,
6609         .release        = tracing_single_release_tr,
6610         .write          = tracing_clock_write,
6611 };
6612
6613 static const struct file_operations trace_time_stamp_mode_fops = {
6614         .open           = tracing_time_stamp_mode_open,
6615         .read           = seq_read,
6616         .llseek         = seq_lseek,
6617         .release        = tracing_single_release_tr,
6618 };
6619
6620 #ifdef CONFIG_TRACER_SNAPSHOT
6621 static const struct file_operations snapshot_fops = {
6622         .open           = tracing_snapshot_open,
6623         .read           = seq_read,
6624         .write          = tracing_snapshot_write,
6625         .llseek         = tracing_lseek,
6626         .release        = tracing_snapshot_release,
6627 };
6628
6629 static const struct file_operations snapshot_raw_fops = {
6630         .open           = snapshot_raw_open,
6631         .read           = tracing_buffers_read,
6632         .release        = tracing_buffers_release,
6633         .splice_read    = tracing_buffers_splice_read,
6634         .llseek         = no_llseek,
6635 };
6636
6637 #endif /* CONFIG_TRACER_SNAPSHOT */
6638
6639 static int tracing_buffers_open(struct inode *inode, struct file *filp)
6640 {
6641         struct trace_array *tr = inode->i_private;
6642         struct ftrace_buffer_info *info;
6643         int ret;
6644
6645         if (tracing_disabled)
6646                 return -ENODEV;
6647
6648         if (trace_array_get(tr) < 0)
6649                 return -ENODEV;
6650
6651         info = kzalloc(sizeof(*info), GFP_KERNEL);
6652         if (!info) {
6653                 trace_array_put(tr);
6654                 return -ENOMEM;
6655         }
6656
6657         mutex_lock(&trace_types_lock);
6658
6659         info->iter.tr           = tr;
6660         info->iter.cpu_file     = tracing_get_cpu(inode);
6661         info->iter.trace        = tr->current_trace;
6662         info->iter.trace_buffer = &tr->trace_buffer;
6663         info->spare             = NULL;
6664         /* Force reading ring buffer for first read */
6665         info->read              = (unsigned int)-1;
6666
6667         filp->private_data = info;
6668
6669         tr->current_trace->ref++;
6670
6671         mutex_unlock(&trace_types_lock);
6672
6673         ret = nonseekable_open(inode, filp);
6674         if (ret < 0)
6675                 trace_array_put(tr);
6676
6677         return ret;
6678 }
6679
6680 static __poll_t
6681 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
6682 {
6683         struct ftrace_buffer_info *info = filp->private_data;
6684         struct trace_iterator *iter = &info->iter;
6685
6686         return trace_poll(iter, filp, poll_table);
6687 }
6688
6689 static ssize_t
6690 tracing_buffers_read(struct file *filp, char __user *ubuf,
6691                      size_t count, loff_t *ppos)
6692 {
6693         struct ftrace_buffer_info *info = filp->private_data;
6694         struct trace_iterator *iter = &info->iter;
6695         ssize_t ret = 0;
6696         ssize_t size;
6697
6698         if (!count)
6699                 return 0;
6700
6701 #ifdef CONFIG_TRACER_MAX_TRACE
6702         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6703                 return -EBUSY;
6704 #endif
6705
6706         if (!info->spare) {
6707                 info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
6708                                                           iter->cpu_file);
6709                 if (IS_ERR(info->spare)) {
6710                         ret = PTR_ERR(info->spare);
6711                         info->spare = NULL;
6712                 } else {
6713                         info->spare_cpu = iter->cpu_file;
6714                 }
6715         }
6716         if (!info->spare)
6717                 return ret;
6718
6719         /* Do we have previous read data to read? */
6720         if (info->read < PAGE_SIZE)
6721                 goto read;
6722
6723  again:
6724         trace_access_lock(iter->cpu_file);
6725         ret = ring_buffer_read_page(iter->trace_buffer->buffer,
6726                                     &info->spare,
6727                                     count,
6728                                     iter->cpu_file, 0);
6729         trace_access_unlock(iter->cpu_file);
6730
6731         if (ret < 0) {
6732                 if (trace_empty(iter)) {
6733                         if ((filp->f_flags & O_NONBLOCK))
6734                                 return -EAGAIN;
6735
6736                         ret = wait_on_pipe(iter, false);
6737                         if (ret)
6738                                 return ret;
6739
6740                         goto again;
6741                 }
6742                 return 0;
6743         }
6744
6745         info->read = 0;
6746  read:
6747         size = PAGE_SIZE - info->read;
6748         if (size > count)
6749                 size = count;
6750
6751         ret = copy_to_user(ubuf, info->spare + info->read, size);
6752         if (ret == size)
6753                 return -EFAULT;
6754
6755         size -= ret;
6756
6757         *ppos += size;
6758         info->read += size;
6759
6760         return size;
6761 }
6762
6763 static int tracing_buffers_release(struct inode *inode, struct file *file)
6764 {
6765         struct ftrace_buffer_info *info = file->private_data;
6766         struct trace_iterator *iter = &info->iter;
6767
6768         mutex_lock(&trace_types_lock);
6769
6770         iter->tr->current_trace->ref--;
6771
6772         __trace_array_put(iter->tr);
6773
6774         if (info->spare)
6775                 ring_buffer_free_read_page(iter->trace_buffer->buffer,
6776                                            info->spare_cpu, info->spare);
6777         kfree(info);
6778
6779         mutex_unlock(&trace_types_lock);
6780
6781         return 0;
6782 }
6783
6784 struct buffer_ref {
6785         struct ring_buffer      *buffer;
6786         void                    *page;
6787         int                     cpu;
6788         int                     ref;
6789 };
6790
6791 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
6792                                     struct pipe_buffer *buf)
6793 {
6794         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6795
6796         if (--ref->ref)
6797                 return;
6798
6799         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
6800         kfree(ref);
6801         buf->private = 0;
6802 }
6803
6804 static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
6805                                 struct pipe_buffer *buf)
6806 {
6807         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6808
6809         ref->ref++;
6810 }
6811
6812 /* Pipe buffer operations for a buffer. */
6813 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
6814         .can_merge              = 0,
6815         .confirm                = generic_pipe_buf_confirm,
6816         .release                = buffer_pipe_buf_release,
6817         .steal                  = generic_pipe_buf_steal,
6818         .get                    = buffer_pipe_buf_get,
6819 };
6820
6821 /*
6822  * Callback from splice_to_pipe(), if we need to release some pages
6823  * at the end of the spd in case we error'ed out in filling the pipe.
6824  */
6825 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
6826 {
6827         struct buffer_ref *ref =
6828                 (struct buffer_ref *)spd->partial[i].private;
6829
6830         if (--ref->ref)
6831                 return;
6832
6833         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
6834         kfree(ref);
6835         spd->partial[i].private = 0;
6836 }
6837
6838 static ssize_t
6839 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6840                             struct pipe_inode_info *pipe, size_t len,
6841                             unsigned int flags)
6842 {
6843         struct ftrace_buffer_info *info = file->private_data;
6844         struct trace_iterator *iter = &info->iter;
6845         struct partial_page partial_def[PIPE_DEF_BUFFERS];
6846         struct page *pages_def[PIPE_DEF_BUFFERS];
6847         struct splice_pipe_desc spd = {
6848                 .pages          = pages_def,
6849                 .partial        = partial_def,
6850                 .nr_pages_max   = PIPE_DEF_BUFFERS,
6851                 .ops            = &buffer_pipe_buf_ops,
6852                 .spd_release    = buffer_spd_release,
6853         };
6854         struct buffer_ref *ref;
6855         int entries, i;
6856         ssize_t ret = 0;
6857
6858 #ifdef CONFIG_TRACER_MAX_TRACE
6859         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6860                 return -EBUSY;
6861 #endif
6862
6863         if (*ppos & (PAGE_SIZE - 1))
6864                 return -EINVAL;
6865
6866         if (len & (PAGE_SIZE - 1)) {
6867                 if (len < PAGE_SIZE)
6868                         return -EINVAL;
6869                 len &= PAGE_MASK;
6870         }
6871
6872         if (splice_grow_spd(pipe, &spd))
6873                 return -ENOMEM;
6874
6875  again:
6876         trace_access_lock(iter->cpu_file);
6877         entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6878
6879         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
6880                 struct page *page;
6881                 int r;
6882
6883                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
6884                 if (!ref) {
6885                         ret = -ENOMEM;
6886                         break;
6887                 }
6888
6889                 ref->ref = 1;
6890                 ref->buffer = iter->trace_buffer->buffer;
6891                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
6892                 if (IS_ERR(ref->page)) {
6893                         ret = PTR_ERR(ref->page);
6894                         ref->page = NULL;
6895                         kfree(ref);
6896                         break;
6897                 }
6898                 ref->cpu = iter->cpu_file;
6899
6900                 r = ring_buffer_read_page(ref->buffer, &ref->page,
6901                                           len, iter->cpu_file, 1);
6902                 if (r < 0) {
6903                         ring_buffer_free_read_page(ref->buffer, ref->cpu,
6904                                                    ref->page);
6905                         kfree(ref);
6906                         break;
6907                 }
6908
6909                 page = virt_to_page(ref->page);
6910
6911                 spd.pages[i] = page;
6912                 spd.partial[i].len = PAGE_SIZE;
6913                 spd.partial[i].offset = 0;
6914                 spd.partial[i].private = (unsigned long)ref;
6915                 spd.nr_pages++;
6916                 *ppos += PAGE_SIZE;
6917
6918                 entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6919         }
6920
6921         trace_access_unlock(iter->cpu_file);
6922         spd.nr_pages = i;
6923
6924         /* did we read anything? */
6925         if (!spd.nr_pages) {
6926                 if (ret)
6927                         goto out;
6928
6929                 ret = -EAGAIN;
6930                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
6931                         goto out;
6932
6933                 ret = wait_on_pipe(iter, true);
6934                 if (ret)
6935                         goto out;
6936
6937                 goto again;
6938         }
6939
6940         ret = splice_to_pipe(pipe, &spd);
6941 out:
6942         splice_shrink_spd(&spd);
6943
6944         return ret;
6945 }
6946
6947 static const struct file_operations tracing_buffers_fops = {
6948         .open           = tracing_buffers_open,
6949         .read           = tracing_buffers_read,
6950         .poll           = tracing_buffers_poll,
6951         .release        = tracing_buffers_release,
6952         .splice_read    = tracing_buffers_splice_read,
6953         .llseek         = no_llseek,
6954 };
6955
6956 static ssize_t
6957 tracing_stats_read(struct file *filp, char __user *ubuf,
6958                    size_t count, loff_t *ppos)
6959 {
6960         struct inode *inode = file_inode(filp);
6961         struct trace_array *tr = inode->i_private;
6962         struct trace_buffer *trace_buf = &tr->trace_buffer;
6963         int cpu = tracing_get_cpu(inode);
6964         struct trace_seq *s;
6965         unsigned long cnt;
6966         unsigned long long t;
6967         unsigned long usec_rem;
6968
6969         s = kmalloc(sizeof(*s), GFP_KERNEL);
6970         if (!s)
6971                 return -ENOMEM;
6972
6973         trace_seq_init(s);
6974
6975         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
6976         trace_seq_printf(s, "entries: %ld\n", cnt);
6977
6978         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
6979         trace_seq_printf(s, "overrun: %ld\n", cnt);
6980
6981         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
6982         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
6983
6984         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
6985         trace_seq_printf(s, "bytes: %ld\n", cnt);
6986
6987         if (trace_clocks[tr->clock_id].in_ns) {
6988                 /* local or global for trace_clock */
6989                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
6990                 usec_rem = do_div(t, USEC_PER_SEC);
6991                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
6992                                                                 t, usec_rem);
6993
6994                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
6995                 usec_rem = do_div(t, USEC_PER_SEC);
6996                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
6997         } else {
6998                 /* counter or tsc mode for trace_clock */
6999                 trace_seq_printf(s, "oldest event ts: %llu\n",
7000                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7001
7002                 trace_seq_printf(s, "now ts: %llu\n",
7003                                 ring_buffer_time_stamp(trace_buf->buffer, cpu));
7004         }
7005
7006         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
7007         trace_seq_printf(s, "dropped events: %ld\n", cnt);
7008
7009         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
7010         trace_seq_printf(s, "read events: %ld\n", cnt);
7011
7012         count = simple_read_from_buffer(ubuf, count, ppos,
7013                                         s->buffer, trace_seq_used(s));
7014
7015         kfree(s);
7016
7017         return count;
7018 }
7019
7020 static const struct file_operations tracing_stats_fops = {
7021         .open           = tracing_open_generic_tr,
7022         .read           = tracing_stats_read,
7023         .llseek         = generic_file_llseek,
7024         .release        = tracing_release_generic_tr,
7025 };
7026
7027 #ifdef CONFIG_DYNAMIC_FTRACE
7028
7029 static ssize_t
7030 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
7031                   size_t cnt, loff_t *ppos)
7032 {
7033         unsigned long *p = filp->private_data;
7034         char buf[64]; /* Not too big for a shallow stack */
7035         int r;
7036
7037         r = scnprintf(buf, 63, "%ld", *p);
7038         buf[r++] = '\n';
7039
7040         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7041 }
7042
7043 static const struct file_operations tracing_dyn_info_fops = {
7044         .open           = tracing_open_generic,
7045         .read           = tracing_read_dyn_info,
7046         .llseek         = generic_file_llseek,
7047 };
7048 #endif /* CONFIG_DYNAMIC_FTRACE */
7049
7050 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
7051 static void
7052 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
7053                 struct trace_array *tr, struct ftrace_probe_ops *ops,
7054                 void *data)
7055 {
7056         tracing_snapshot_instance(tr);
7057 }
7058
7059 static void
7060 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
7061                       struct trace_array *tr, struct ftrace_probe_ops *ops,
7062                       void *data)
7063 {
7064         struct ftrace_func_mapper *mapper = data;
7065         long *count = NULL;
7066
7067         if (mapper)
7068                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7069
7070         if (count) {
7071
7072                 if (*count <= 0)
7073                         return;
7074
7075                 (*count)--;
7076         }
7077
7078         tracing_snapshot_instance(tr);
7079 }
7080
7081 static int
7082 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
7083                       struct ftrace_probe_ops *ops, void *data)
7084 {
7085         struct ftrace_func_mapper *mapper = data;
7086         long *count = NULL;
7087
7088         seq_printf(m, "%ps:", (void *)ip);
7089
7090         seq_puts(m, "snapshot");
7091
7092         if (mapper)
7093                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7094
7095         if (count)
7096                 seq_printf(m, ":count=%ld\n", *count);
7097         else
7098                 seq_puts(m, ":unlimited\n");
7099
7100         return 0;
7101 }
7102
7103 static int
7104 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
7105                      unsigned long ip, void *init_data, void **data)
7106 {
7107         struct ftrace_func_mapper *mapper = *data;
7108
7109         if (!mapper) {
7110                 mapper = allocate_ftrace_func_mapper();
7111                 if (!mapper)
7112                         return -ENOMEM;
7113                 *data = mapper;
7114         }
7115
7116         return ftrace_func_mapper_add_ip(mapper, ip, init_data);
7117 }
7118
7119 static void
7120 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
7121                      unsigned long ip, void *data)
7122 {
7123         struct ftrace_func_mapper *mapper = data;
7124
7125         if (!ip) {
7126                 if (!mapper)
7127                         return;
7128                 free_ftrace_func_mapper(mapper, NULL);
7129                 return;
7130         }
7131
7132         ftrace_func_mapper_remove_ip(mapper, ip);
7133 }
7134
7135 static struct ftrace_probe_ops snapshot_probe_ops = {
7136         .func                   = ftrace_snapshot,
7137         .print                  = ftrace_snapshot_print,
7138 };
7139
7140 static struct ftrace_probe_ops snapshot_count_probe_ops = {
7141         .func                   = ftrace_count_snapshot,
7142         .print                  = ftrace_snapshot_print,
7143         .init                   = ftrace_snapshot_init,
7144         .free                   = ftrace_snapshot_free,
7145 };
7146
7147 static int
7148 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
7149                                char *glob, char *cmd, char *param, int enable)
7150 {
7151         struct ftrace_probe_ops *ops;
7152         void *count = (void *)-1;
7153         char *number;
7154         int ret;
7155
7156         if (!tr)
7157                 return -ENODEV;
7158
7159         /* hash funcs only work with set_ftrace_filter */
7160         if (!enable)
7161                 return -EINVAL;
7162
7163         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
7164
7165         if (glob[0] == '!')
7166                 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
7167
7168         if (!param)
7169                 goto out_reg;
7170
7171         number = strsep(&param, ":");
7172
7173         if (!strlen(number))
7174                 goto out_reg;
7175
7176         /*
7177          * We use the callback data field (which is a pointer)
7178          * as our counter.
7179          */
7180         ret = kstrtoul(number, 0, (unsigned long *)&count);
7181         if (ret)
7182                 return ret;
7183
7184  out_reg:
7185         ret = tracing_alloc_snapshot_instance(tr);
7186         if (ret < 0)
7187                 goto out;
7188
7189         ret = register_ftrace_function_probe(glob, tr, ops, count);
7190
7191  out:
7192         return ret < 0 ? ret : 0;
7193 }
7194
7195 static struct ftrace_func_command ftrace_snapshot_cmd = {
7196         .name                   = "snapshot",
7197         .func                   = ftrace_trace_snapshot_callback,
7198 };
7199
7200 static __init int register_snapshot_cmd(void)
7201 {
7202         return register_ftrace_command(&ftrace_snapshot_cmd);
7203 }
7204 #else
7205 static inline __init int register_snapshot_cmd(void) { return 0; }
7206 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
7207
7208 static struct dentry *tracing_get_dentry(struct trace_array *tr)
7209 {
7210         if (WARN_ON(!tr->dir))
7211                 return ERR_PTR(-ENODEV);
7212
7213         /* Top directory uses NULL as the parent */
7214         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
7215                 return NULL;
7216
7217         /* All sub buffers have a descriptor */
7218         return tr->dir;
7219 }
7220
7221 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
7222 {
7223         struct dentry *d_tracer;
7224
7225         if (tr->percpu_dir)
7226                 return tr->percpu_dir;
7227
7228         d_tracer = tracing_get_dentry(tr);
7229         if (IS_ERR(d_tracer))
7230                 return NULL;
7231
7232         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
7233
7234         WARN_ONCE(!tr->percpu_dir,
7235                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
7236
7237         return tr->percpu_dir;
7238 }
7239
7240 static struct dentry *
7241 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
7242                       void *data, long cpu, const struct file_operations *fops)
7243 {
7244         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
7245
7246         if (ret) /* See tracing_get_cpu() */
7247                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
7248         return ret;
7249 }
7250
7251 static void
7252 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
7253 {
7254         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
7255         struct dentry *d_cpu;
7256         char cpu_dir[30]; /* 30 characters should be more than enough */
7257
7258         if (!d_percpu)
7259                 return;
7260
7261         snprintf(cpu_dir, 30, "cpu%ld", cpu);
7262         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
7263         if (!d_cpu) {
7264                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
7265                 return;
7266         }
7267
7268         /* per cpu trace_pipe */
7269         trace_create_cpu_file("trace_pipe", 0444, d_cpu,
7270                                 tr, cpu, &tracing_pipe_fops);
7271
7272         /* per cpu trace */
7273         trace_create_cpu_file("trace", 0644, d_cpu,
7274                                 tr, cpu, &tracing_fops);
7275
7276         trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
7277                                 tr, cpu, &tracing_buffers_fops);
7278
7279         trace_create_cpu_file("stats", 0444, d_cpu,
7280                                 tr, cpu, &tracing_stats_fops);
7281
7282         trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
7283                                 tr, cpu, &tracing_entries_fops);
7284
7285 #ifdef CONFIG_TRACER_SNAPSHOT
7286         trace_create_cpu_file("snapshot", 0644, d_cpu,
7287                                 tr, cpu, &snapshot_fops);
7288
7289         trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
7290                                 tr, cpu, &snapshot_raw_fops);
7291 #endif
7292 }
7293
7294 #ifdef CONFIG_FTRACE_SELFTEST
7295 /* Let selftest have access to static functions in this file */
7296 #include "trace_selftest.c"
7297 #endif
7298
7299 static ssize_t
7300 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
7301                         loff_t *ppos)
7302 {
7303         struct trace_option_dentry *topt = filp->private_data;
7304         char *buf;
7305
7306         if (topt->flags->val & topt->opt->bit)
7307                 buf = "1\n";
7308         else
7309                 buf = "0\n";
7310
7311         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7312 }
7313
7314 static ssize_t
7315 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
7316                          loff_t *ppos)
7317 {
7318         struct trace_option_dentry *topt = filp->private_data;
7319         unsigned long val;
7320         int ret;
7321
7322         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7323         if (ret)
7324                 return ret;
7325
7326         if (val != 0 && val != 1)
7327                 return -EINVAL;
7328
7329         if (!!(topt->flags->val & topt->opt->bit) != val) {
7330                 mutex_lock(&trace_types_lock);
7331                 ret = __set_tracer_option(topt->tr, topt->flags,
7332                                           topt->opt, !val);
7333                 mutex_unlock(&trace_types_lock);
7334                 if (ret)
7335                         return ret;
7336         }
7337
7338         *ppos += cnt;
7339
7340         return cnt;
7341 }
7342
7343
7344 static const struct file_operations trace_options_fops = {
7345         .open = tracing_open_generic,
7346         .read = trace_options_read,
7347         .write = trace_options_write,
7348         .llseek = generic_file_llseek,
7349 };
7350
7351 /*
7352  * In order to pass in both the trace_array descriptor as well as the index
7353  * to the flag that the trace option file represents, the trace_array
7354  * has a character array of trace_flags_index[], which holds the index
7355  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
7356  * The address of this character array is passed to the flag option file
7357  * read/write callbacks.
7358  *
7359  * In order to extract both the index and the trace_array descriptor,
7360  * get_tr_index() uses the following algorithm.
7361  *
7362  *   idx = *ptr;
7363  *
7364  * As the pointer itself contains the address of the index (remember
7365  * index[1] == 1).
7366  *
7367  * Then to get the trace_array descriptor, by subtracting that index
7368  * from the ptr, we get to the start of the index itself.
7369  *
7370  *   ptr - idx == &index[0]
7371  *
7372  * Then a simple container_of() from that pointer gets us to the
7373  * trace_array descriptor.
7374  */
7375 static void get_tr_index(void *data, struct trace_array **ptr,
7376                          unsigned int *pindex)
7377 {
7378         *pindex = *(unsigned char *)data;
7379
7380         *ptr = container_of(data - *pindex, struct trace_array,
7381                             trace_flags_index);
7382 }
7383
7384 static ssize_t
7385 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
7386                         loff_t *ppos)
7387 {
7388         void *tr_index = filp->private_data;
7389         struct trace_array *tr;
7390         unsigned int index;
7391         char *buf;
7392
7393         get_tr_index(tr_index, &tr, &index);
7394
7395         if (tr->trace_flags & (1 << index))
7396                 buf = "1\n";
7397         else
7398                 buf = "0\n";
7399
7400         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7401 }
7402
7403 static ssize_t
7404 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
7405                          loff_t *ppos)
7406 {
7407         void *tr_index = filp->private_data;
7408         struct trace_array *tr;
7409         unsigned int index;
7410         unsigned long val;
7411         int ret;
7412
7413         get_tr_index(tr_index, &tr, &index);
7414
7415         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7416         if (ret)
7417                 return ret;
7418
7419         if (val != 0 && val != 1)
7420                 return -EINVAL;
7421
7422         mutex_lock(&trace_types_lock);
7423         ret = set_tracer_flag(tr, 1 << index, val);
7424         mutex_unlock(&trace_types_lock);
7425
7426         if (ret < 0)
7427                 return ret;
7428
7429         *ppos += cnt;
7430
7431         return cnt;
7432 }
7433
7434 static const struct file_operations trace_options_core_fops = {
7435         .open = tracing_open_generic,
7436         .read = trace_options_core_read,
7437         .write = trace_options_core_write,
7438         .llseek = generic_file_llseek,
7439 };
7440
7441 struct dentry *trace_create_file(const char *name,
7442                                  umode_t mode,
7443                                  struct dentry *parent,
7444                                  void *data,
7445                                  const struct file_operations *fops)
7446 {
7447         struct dentry *ret;
7448
7449         ret = tracefs_create_file(name, mode, parent, data, fops);
7450         if (!ret)
7451                 pr_warn("Could not create tracefs '%s' entry\n", name);
7452
7453         return ret;
7454 }
7455
7456
7457 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
7458 {
7459         struct dentry *d_tracer;
7460
7461         if (tr->options)
7462                 return tr->options;
7463
7464         d_tracer = tracing_get_dentry(tr);
7465         if (IS_ERR(d_tracer))
7466                 return NULL;
7467
7468         tr->options = tracefs_create_dir("options", d_tracer);
7469         if (!tr->options) {
7470                 pr_warn("Could not create tracefs directory 'options'\n");
7471                 return NULL;
7472         }
7473
7474         return tr->options;
7475 }
7476
7477 static void
7478 create_trace_option_file(struct trace_array *tr,
7479                          struct trace_option_dentry *topt,
7480                          struct tracer_flags *flags,
7481                          struct tracer_opt *opt)
7482 {
7483         struct dentry *t_options;
7484
7485         t_options = trace_options_init_dentry(tr);
7486         if (!t_options)
7487                 return;
7488
7489         topt->flags = flags;
7490         topt->opt = opt;
7491         topt->tr = tr;
7492
7493         topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
7494                                     &trace_options_fops);
7495
7496 }
7497
7498 static void
7499 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
7500 {
7501         struct trace_option_dentry *topts;
7502         struct trace_options *tr_topts;
7503         struct tracer_flags *flags;
7504         struct tracer_opt *opts;
7505         int cnt;
7506         int i;
7507
7508         if (!tracer)
7509                 return;
7510
7511         flags = tracer->flags;
7512
7513         if (!flags || !flags->opts)
7514                 return;
7515
7516         /*
7517          * If this is an instance, only create flags for tracers
7518          * the instance may have.
7519          */
7520         if (!trace_ok_for_array(tracer, tr))
7521                 return;
7522
7523         for (i = 0; i < tr->nr_topts; i++) {
7524                 /* Make sure there's no duplicate flags. */
7525                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
7526                         return;
7527         }
7528
7529         opts = flags->opts;
7530
7531         for (cnt = 0; opts[cnt].name; cnt++)
7532                 ;
7533
7534         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
7535         if (!topts)
7536                 return;
7537
7538         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
7539                             GFP_KERNEL);
7540         if (!tr_topts) {
7541                 kfree(topts);
7542                 return;
7543         }
7544
7545         tr->topts = tr_topts;
7546         tr->topts[tr->nr_topts].tracer = tracer;
7547         tr->topts[tr->nr_topts].topts = topts;
7548         tr->nr_topts++;
7549
7550         for (cnt = 0; opts[cnt].name; cnt++) {
7551                 create_trace_option_file(tr, &topts[cnt], flags,
7552                                          &opts[cnt]);
7553                 WARN_ONCE(topts[cnt].entry == NULL,
7554                           "Failed to create trace option: %s",
7555                           opts[cnt].name);
7556         }
7557 }
7558
7559 static struct dentry *
7560 create_trace_option_core_file(struct trace_array *tr,
7561                               const char *option, long index)
7562 {
7563         struct dentry *t_options;
7564
7565         t_options = trace_options_init_dentry(tr);
7566         if (!t_options)
7567                 return NULL;
7568
7569         return trace_create_file(option, 0644, t_options,
7570                                  (void *)&tr->trace_flags_index[index],
7571                                  &trace_options_core_fops);
7572 }
7573
7574 static void create_trace_options_dir(struct trace_array *tr)
7575 {
7576         struct dentry *t_options;
7577         bool top_level = tr == &global_trace;
7578         int i;
7579
7580         t_options = trace_options_init_dentry(tr);
7581         if (!t_options)
7582                 return;
7583
7584         for (i = 0; trace_options[i]; i++) {
7585                 if (top_level ||
7586                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
7587                         create_trace_option_core_file(tr, trace_options[i], i);
7588         }
7589 }
7590
7591 static ssize_t
7592 rb_simple_read(struct file *filp, char __user *ubuf,
7593                size_t cnt, loff_t *ppos)
7594 {
7595         struct trace_array *tr = filp->private_data;
7596         char buf[64];
7597         int r;
7598
7599         r = tracer_tracing_is_on(tr);
7600         r = sprintf(buf, "%d\n", r);
7601
7602         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7603 }
7604
7605 static ssize_t
7606 rb_simple_write(struct file *filp, const char __user *ubuf,
7607                 size_t cnt, loff_t *ppos)
7608 {
7609         struct trace_array *tr = filp->private_data;
7610         struct ring_buffer *buffer = tr->trace_buffer.buffer;
7611         unsigned long val;
7612         int ret;
7613
7614         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7615         if (ret)
7616                 return ret;
7617
7618         if (buffer) {
7619                 mutex_lock(&trace_types_lock);
7620                 if (val) {
7621                         tracer_tracing_on(tr);
7622                         if (tr->current_trace->start)
7623                                 tr->current_trace->start(tr);
7624                 } else {
7625                         tracer_tracing_off(tr);
7626                         if (tr->current_trace->stop)
7627                                 tr->current_trace->stop(tr);
7628                 }
7629                 mutex_unlock(&trace_types_lock);
7630         }
7631
7632         (*ppos)++;
7633
7634         return cnt;
7635 }
7636
7637 static const struct file_operations rb_simple_fops = {
7638         .open           = tracing_open_generic_tr,
7639         .read           = rb_simple_read,
7640         .write          = rb_simple_write,
7641         .release        = tracing_release_generic_tr,
7642         .llseek         = default_llseek,
7643 };
7644
7645 struct dentry *trace_instance_dir;
7646
7647 static void
7648 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
7649
7650 static int
7651 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
7652 {
7653         enum ring_buffer_flags rb_flags;
7654
7655         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
7656
7657         buf->tr = tr;
7658
7659         buf->buffer = ring_buffer_alloc(size, rb_flags);
7660         if (!buf->buffer)
7661                 return -ENOMEM;
7662
7663         buf->data = alloc_percpu(struct trace_array_cpu);
7664         if (!buf->data) {
7665                 ring_buffer_free(buf->buffer);
7666                 buf->buffer = NULL;
7667                 return -ENOMEM;
7668         }
7669
7670         /* Allocate the first page for all buffers */
7671         set_buffer_entries(&tr->trace_buffer,
7672                            ring_buffer_size(tr->trace_buffer.buffer, 0));
7673
7674         return 0;
7675 }
7676
7677 static int allocate_trace_buffers(struct trace_array *tr, int size)
7678 {
7679         int ret;
7680
7681         ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
7682         if (ret)
7683                 return ret;
7684
7685 #ifdef CONFIG_TRACER_MAX_TRACE
7686         ret = allocate_trace_buffer(tr, &tr->max_buffer,
7687                                     allocate_snapshot ? size : 1);
7688         if (WARN_ON(ret)) {
7689                 ring_buffer_free(tr->trace_buffer.buffer);
7690                 tr->trace_buffer.buffer = NULL;
7691                 free_percpu(tr->trace_buffer.data);
7692                 tr->trace_buffer.data = NULL;
7693                 return -ENOMEM;
7694         }
7695         tr->allocated_snapshot = allocate_snapshot;
7696
7697         /*
7698          * Only the top level trace array gets its snapshot allocated
7699          * from the kernel command line.
7700          */
7701         allocate_snapshot = false;
7702 #endif
7703         return 0;
7704 }
7705
7706 static void free_trace_buffer(struct trace_buffer *buf)
7707 {
7708         if (buf->buffer) {
7709                 ring_buffer_free(buf->buffer);
7710                 buf->buffer = NULL;
7711                 free_percpu(buf->data);
7712                 buf->data = NULL;
7713         }
7714 }
7715
7716 static void free_trace_buffers(struct trace_array *tr)
7717 {
7718         if (!tr)
7719                 return;
7720
7721         free_trace_buffer(&tr->trace_buffer);
7722
7723 #ifdef CONFIG_TRACER_MAX_TRACE
7724         free_trace_buffer(&tr->max_buffer);
7725 #endif
7726 }
7727
7728 static void init_trace_flags_index(struct trace_array *tr)
7729 {
7730         int i;
7731
7732         /* Used by the trace options files */
7733         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
7734                 tr->trace_flags_index[i] = i;
7735 }
7736
7737 static void __update_tracer_options(struct trace_array *tr)
7738 {
7739         struct tracer *t;
7740
7741         for (t = trace_types; t; t = t->next)
7742                 add_tracer_options(tr, t);
7743 }
7744
7745 static void update_tracer_options(struct trace_array *tr)
7746 {
7747         mutex_lock(&trace_types_lock);
7748         __update_tracer_options(tr);
7749         mutex_unlock(&trace_types_lock);
7750 }
7751
7752 static int instance_mkdir(const char *name)
7753 {
7754         struct trace_array *tr;
7755         int ret;
7756
7757         mutex_lock(&event_mutex);
7758         mutex_lock(&trace_types_lock);
7759
7760         ret = -EEXIST;
7761         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7762                 if (tr->name && strcmp(tr->name, name) == 0)
7763                         goto out_unlock;
7764         }
7765
7766         ret = -ENOMEM;
7767         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
7768         if (!tr)
7769                 goto out_unlock;
7770
7771         tr->name = kstrdup(name, GFP_KERNEL);
7772         if (!tr->name)
7773                 goto out_free_tr;
7774
7775         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
7776                 goto out_free_tr;
7777
7778         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
7779
7780         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
7781
7782         raw_spin_lock_init(&tr->start_lock);
7783
7784         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
7785
7786         tr->current_trace = &nop_trace;
7787
7788         INIT_LIST_HEAD(&tr->systems);
7789         INIT_LIST_HEAD(&tr->events);
7790         INIT_LIST_HEAD(&tr->hist_vars);
7791
7792         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
7793                 goto out_free_tr;
7794
7795         tr->dir = tracefs_create_dir(name, trace_instance_dir);
7796         if (!tr->dir)
7797                 goto out_free_tr;
7798
7799         ret = event_trace_add_tracer(tr->dir, tr);
7800         if (ret) {
7801                 tracefs_remove_recursive(tr->dir);
7802                 goto out_free_tr;
7803         }
7804
7805         ftrace_init_trace_array(tr);
7806
7807         init_tracer_tracefs(tr, tr->dir);
7808         init_trace_flags_index(tr);
7809         __update_tracer_options(tr);
7810
7811         list_add(&tr->list, &ftrace_trace_arrays);
7812
7813         mutex_unlock(&trace_types_lock);
7814         mutex_unlock(&event_mutex);
7815
7816         return 0;
7817
7818  out_free_tr:
7819         free_trace_buffers(tr);
7820         free_cpumask_var(tr->tracing_cpumask);
7821         kfree(tr->name);
7822         kfree(tr);
7823
7824  out_unlock:
7825         mutex_unlock(&trace_types_lock);
7826         mutex_unlock(&event_mutex);
7827
7828         return ret;
7829
7830 }
7831
7832 static int instance_rmdir(const char *name)
7833 {
7834         struct trace_array *tr;
7835         int found = 0;
7836         int ret;
7837         int i;
7838
7839         mutex_lock(&event_mutex);
7840         mutex_lock(&trace_types_lock);
7841
7842         ret = -ENODEV;
7843         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7844                 if (tr->name && strcmp(tr->name, name) == 0) {
7845                         found = 1;
7846                         break;
7847                 }
7848         }
7849         if (!found)
7850                 goto out_unlock;
7851
7852         ret = -EBUSY;
7853         if (tr->ref || (tr->current_trace && tr->current_trace->ref))
7854                 goto out_unlock;
7855
7856         list_del(&tr->list);
7857
7858         /* Disable all the flags that were enabled coming in */
7859         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
7860                 if ((1 << i) & ZEROED_TRACE_FLAGS)
7861                         set_tracer_flag(tr, 1 << i, 0);
7862         }
7863
7864         tracing_set_nop(tr);
7865         clear_ftrace_function_probes(tr);
7866         event_trace_del_tracer(tr);
7867         ftrace_clear_pids(tr);
7868         ftrace_destroy_function_files(tr);
7869         tracefs_remove_recursive(tr->dir);
7870         free_trace_buffers(tr);
7871
7872         for (i = 0; i < tr->nr_topts; i++) {
7873                 kfree(tr->topts[i].topts);
7874         }
7875         kfree(tr->topts);
7876
7877         free_cpumask_var(tr->tracing_cpumask);
7878         kfree(tr->name);
7879         kfree(tr);
7880
7881         ret = 0;
7882
7883  out_unlock:
7884         mutex_unlock(&trace_types_lock);
7885         mutex_unlock(&event_mutex);
7886
7887         return ret;
7888 }
7889
7890 static __init void create_trace_instances(struct dentry *d_tracer)
7891 {
7892         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
7893                                                          instance_mkdir,
7894                                                          instance_rmdir);
7895         if (WARN_ON(!trace_instance_dir))
7896                 return;
7897 }
7898
7899 static void
7900 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
7901 {
7902         struct trace_event_file *file;
7903         int cpu;
7904
7905         trace_create_file("available_tracers", 0444, d_tracer,
7906                         tr, &show_traces_fops);
7907
7908         trace_create_file("current_tracer", 0644, d_tracer,
7909                         tr, &set_tracer_fops);
7910
7911         trace_create_file("tracing_cpumask", 0644, d_tracer,
7912                           tr, &tracing_cpumask_fops);
7913
7914         trace_create_file("trace_options", 0644, d_tracer,
7915                           tr, &tracing_iter_fops);
7916
7917         trace_create_file("trace", 0644, d_tracer,
7918                           tr, &tracing_fops);
7919
7920         trace_create_file("trace_pipe", 0444, d_tracer,
7921                           tr, &tracing_pipe_fops);
7922
7923         trace_create_file("buffer_size_kb", 0644, d_tracer,
7924                           tr, &tracing_entries_fops);
7925
7926         trace_create_file("buffer_total_size_kb", 0444, d_tracer,
7927                           tr, &tracing_total_entries_fops);
7928
7929         trace_create_file("free_buffer", 0200, d_tracer,
7930                           tr, &tracing_free_buffer_fops);
7931
7932         trace_create_file("trace_marker", 0220, d_tracer,
7933                           tr, &tracing_mark_fops);
7934
7935         file = __find_event_file(tr, "ftrace", "print");
7936         if (file && file->dir)
7937                 trace_create_file("trigger", 0644, file->dir, file,
7938                                   &event_trigger_fops);
7939         tr->trace_marker_file = file;
7940
7941         trace_create_file("trace_marker_raw", 0220, d_tracer,
7942                           tr, &tracing_mark_raw_fops);
7943
7944         trace_create_file("trace_clock", 0644, d_tracer, tr,
7945                           &trace_clock_fops);
7946
7947         trace_create_file("tracing_on", 0644, d_tracer,
7948                           tr, &rb_simple_fops);
7949
7950         trace_create_file("timestamp_mode", 0444, d_tracer, tr,
7951                           &trace_time_stamp_mode_fops);
7952
7953         create_trace_options_dir(tr);
7954
7955 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7956         trace_create_file("tracing_max_latency", 0644, d_tracer,
7957                         &tr->max_latency, &tracing_max_lat_fops);
7958 #endif
7959
7960         if (ftrace_create_function_files(tr, d_tracer))
7961                 WARN(1, "Could not allocate function filter files");
7962
7963 #ifdef CONFIG_TRACER_SNAPSHOT
7964         trace_create_file("snapshot", 0644, d_tracer,
7965                           tr, &snapshot_fops);
7966 #endif
7967
7968         for_each_tracing_cpu(cpu)
7969                 tracing_init_tracefs_percpu(tr, cpu);
7970
7971         ftrace_init_tracefs(tr, d_tracer);
7972 }
7973
7974 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
7975 {
7976         struct vfsmount *mnt;
7977         struct file_system_type *type;
7978
7979         /*
7980          * To maintain backward compatibility for tools that mount
7981          * debugfs to get to the tracing facility, tracefs is automatically
7982          * mounted to the debugfs/tracing directory.
7983          */
7984         type = get_fs_type("tracefs");
7985         if (!type)
7986                 return NULL;
7987         mnt = vfs_submount(mntpt, type, "tracefs", NULL);
7988         put_filesystem(type);
7989         if (IS_ERR(mnt))
7990                 return NULL;
7991         mntget(mnt);
7992
7993         return mnt;
7994 }
7995
7996 /**
7997  * tracing_init_dentry - initialize top level trace array
7998  *
7999  * This is called when creating files or directories in the tracing
8000  * directory. It is called via fs_initcall() by any of the boot up code
8001  * and expects to return the dentry of the top level tracing directory.
8002  */
8003 struct dentry *tracing_init_dentry(void)
8004 {
8005         struct trace_array *tr = &global_trace;
8006
8007         /* The top level trace array uses  NULL as parent */
8008         if (tr->dir)
8009                 return NULL;
8010
8011         if (WARN_ON(!tracefs_initialized()) ||
8012                 (IS_ENABLED(CONFIG_DEBUG_FS) &&
8013                  WARN_ON(!debugfs_initialized())))
8014                 return ERR_PTR(-ENODEV);
8015
8016         /*
8017          * As there may still be users that expect the tracing
8018          * files to exist in debugfs/tracing, we must automount
8019          * the tracefs file system there, so older tools still
8020          * work with the newer kerenl.
8021          */
8022         tr->dir = debugfs_create_automount("tracing", NULL,
8023                                            trace_automount, NULL);
8024         if (!tr->dir) {
8025                 pr_warn_once("Could not create debugfs directory 'tracing'\n");
8026                 return ERR_PTR(-ENOMEM);
8027         }
8028
8029         return NULL;
8030 }
8031
8032 extern struct trace_eval_map *__start_ftrace_eval_maps[];
8033 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
8034
8035 static void __init trace_eval_init(void)
8036 {
8037         int len;
8038
8039         len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
8040         trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
8041 }
8042
8043 #ifdef CONFIG_MODULES
8044 static void trace_module_add_evals(struct module *mod)
8045 {
8046         if (!mod->num_trace_evals)
8047                 return;
8048
8049         /*
8050          * Modules with bad taint do not have events created, do
8051          * not bother with enums either.
8052          */
8053         if (trace_module_has_bad_taint(mod))
8054                 return;
8055
8056         trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
8057 }
8058
8059 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
8060 static void trace_module_remove_evals(struct module *mod)
8061 {
8062         union trace_eval_map_item *map;
8063         union trace_eval_map_item **last = &trace_eval_maps;
8064
8065         if (!mod->num_trace_evals)
8066                 return;
8067
8068         mutex_lock(&trace_eval_mutex);
8069
8070         map = trace_eval_maps;
8071
8072         while (map) {
8073                 if (map->head.mod == mod)
8074                         break;
8075                 map = trace_eval_jmp_to_tail(map);
8076                 last = &map->tail.next;
8077                 map = map->tail.next;
8078         }
8079         if (!map)
8080                 goto out;
8081
8082         *last = trace_eval_jmp_to_tail(map)->tail.next;
8083         kfree(map);
8084  out:
8085         mutex_unlock(&trace_eval_mutex);
8086 }
8087 #else
8088 static inline void trace_module_remove_evals(struct module *mod) { }
8089 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
8090
8091 static int trace_module_notify(struct notifier_block *self,
8092                                unsigned long val, void *data)
8093 {
8094         struct module *mod = data;
8095
8096         switch (val) {
8097         case MODULE_STATE_COMING:
8098                 trace_module_add_evals(mod);
8099                 break;
8100         case MODULE_STATE_GOING:
8101                 trace_module_remove_evals(mod);
8102                 break;
8103         }
8104
8105         return 0;
8106 }
8107
8108 static struct notifier_block trace_module_nb = {
8109         .notifier_call = trace_module_notify,
8110         .priority = 0,
8111 };
8112 #endif /* CONFIG_MODULES */
8113
8114 static __init int tracer_init_tracefs(void)
8115 {
8116         struct dentry *d_tracer;
8117
8118         trace_access_lock_init();
8119
8120         d_tracer = tracing_init_dentry();
8121         if (IS_ERR(d_tracer))
8122                 return 0;
8123
8124         event_trace_init();
8125
8126         init_tracer_tracefs(&global_trace, d_tracer);
8127         ftrace_init_tracefs_toplevel(&global_trace, d_tracer);
8128
8129         trace_create_file("tracing_thresh", 0644, d_tracer,
8130                         &global_trace, &tracing_thresh_fops);
8131
8132         trace_create_file("README", 0444, d_tracer,
8133                         NULL, &tracing_readme_fops);
8134
8135         trace_create_file("saved_cmdlines", 0444, d_tracer,
8136                         NULL, &tracing_saved_cmdlines_fops);
8137
8138         trace_create_file("saved_cmdlines_size", 0644, d_tracer,
8139                           NULL, &tracing_saved_cmdlines_size_fops);
8140
8141         trace_create_file("saved_tgids", 0444, d_tracer,
8142                         NULL, &tracing_saved_tgids_fops);
8143
8144         trace_eval_init();
8145
8146         trace_create_eval_file(d_tracer);
8147
8148 #ifdef CONFIG_MODULES
8149         register_module_notifier(&trace_module_nb);
8150 #endif
8151
8152 #ifdef CONFIG_DYNAMIC_FTRACE
8153         trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
8154                         &ftrace_update_tot_cnt, &tracing_dyn_info_fops);
8155 #endif
8156
8157         create_trace_instances(d_tracer);
8158
8159         update_tracer_options(&global_trace);
8160
8161         return 0;
8162 }
8163
8164 static int trace_panic_handler(struct notifier_block *this,
8165                                unsigned long event, void *unused)
8166 {
8167         if (ftrace_dump_on_oops)
8168                 ftrace_dump(ftrace_dump_on_oops);
8169         return NOTIFY_OK;
8170 }
8171
8172 static struct notifier_block trace_panic_notifier = {
8173         .notifier_call  = trace_panic_handler,
8174         .next           = NULL,
8175         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
8176 };
8177
8178 static int trace_die_handler(struct notifier_block *self,
8179                              unsigned long val,
8180                              void *data)
8181 {
8182         switch (val) {
8183         case DIE_OOPS:
8184                 if (ftrace_dump_on_oops)
8185                         ftrace_dump(ftrace_dump_on_oops);
8186                 break;
8187         default:
8188                 break;
8189         }
8190         return NOTIFY_OK;
8191 }
8192
8193 static struct notifier_block trace_die_notifier = {
8194         .notifier_call = trace_die_handler,
8195         .priority = 200
8196 };
8197
8198 /*
8199  * printk is set to max of 1024, we really don't need it that big.
8200  * Nothing should be printing 1000 characters anyway.
8201  */
8202 #define TRACE_MAX_PRINT         1000
8203
8204 /*
8205  * Define here KERN_TRACE so that we have one place to modify
8206  * it if we decide to change what log level the ftrace dump
8207  * should be at.
8208  */
8209 #define KERN_TRACE              KERN_EMERG
8210
8211 void
8212 trace_printk_seq(struct trace_seq *s)
8213 {
8214         /* Probably should print a warning here. */
8215         if (s->seq.len >= TRACE_MAX_PRINT)
8216                 s->seq.len = TRACE_MAX_PRINT;
8217
8218         /*
8219          * More paranoid code. Although the buffer size is set to
8220          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
8221          * an extra layer of protection.
8222          */
8223         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
8224                 s->seq.len = s->seq.size - 1;
8225
8226         /* should be zero ended, but we are paranoid. */
8227         s->buffer[s->seq.len] = 0;
8228
8229         printk(KERN_TRACE "%s", s->buffer);
8230
8231         trace_seq_init(s);
8232 }
8233
8234 void trace_init_global_iter(struct trace_iterator *iter)
8235 {
8236         iter->tr = &global_trace;
8237         iter->trace = iter->tr->current_trace;
8238         iter->cpu_file = RING_BUFFER_ALL_CPUS;
8239         iter->trace_buffer = &global_trace.trace_buffer;
8240
8241         if (iter->trace && iter->trace->open)
8242                 iter->trace->open(iter);
8243
8244         /* Annotate start of buffers if we had overruns */
8245         if (ring_buffer_overruns(iter->trace_buffer->buffer))
8246                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
8247
8248         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
8249         if (trace_clocks[iter->tr->clock_id].in_ns)
8250                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
8251 }
8252
8253 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
8254 {
8255         /* use static because iter can be a bit big for the stack */
8256         static struct trace_iterator iter;
8257         static atomic_t dump_running;
8258         struct trace_array *tr = &global_trace;
8259         unsigned int old_userobj;
8260         unsigned long flags;
8261         int cnt = 0, cpu;
8262
8263         /* Only allow one dump user at a time. */
8264         if (atomic_inc_return(&dump_running) != 1) {
8265                 atomic_dec(&dump_running);
8266                 return;
8267         }
8268
8269         /*
8270          * Always turn off tracing when we dump.
8271          * We don't need to show trace output of what happens
8272          * between multiple crashes.
8273          *
8274          * If the user does a sysrq-z, then they can re-enable
8275          * tracing with echo 1 > tracing_on.
8276          */
8277         tracing_off();
8278
8279         local_irq_save(flags);
8280
8281         /* Simulate the iterator */
8282         trace_init_global_iter(&iter);
8283
8284         for_each_tracing_cpu(cpu) {
8285                 atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
8286         }
8287
8288         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
8289
8290         /* don't look at user memory in panic mode */
8291         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
8292
8293         switch (oops_dump_mode) {
8294         case DUMP_ALL:
8295                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
8296                 break;
8297         case DUMP_ORIG:
8298                 iter.cpu_file = raw_smp_processor_id();
8299                 break;
8300         case DUMP_NONE:
8301                 goto out_enable;
8302         default:
8303                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
8304                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
8305         }
8306
8307         printk(KERN_TRACE "Dumping ftrace buffer:\n");
8308
8309         /* Did function tracer already get disabled? */
8310         if (ftrace_is_dead()) {
8311                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
8312                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
8313         }
8314
8315         /*
8316          * We need to stop all tracing on all CPUS to read the
8317          * the next buffer. This is a bit expensive, but is
8318          * not done often. We fill all what we can read,
8319          * and then release the locks again.
8320          */
8321
8322         while (!trace_empty(&iter)) {
8323
8324                 if (!cnt)
8325                         printk(KERN_TRACE "---------------------------------\n");
8326
8327                 cnt++;
8328
8329                 /* reset all but tr, trace, and overruns */
8330                 memset(&iter.seq, 0,
8331                        sizeof(struct trace_iterator) -
8332                        offsetof(struct trace_iterator, seq));
8333                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
8334                 iter.pos = -1;
8335
8336                 if (trace_find_next_entry_inc(&iter) != NULL) {
8337                         int ret;
8338
8339                         ret = print_trace_line(&iter);
8340                         if (ret != TRACE_TYPE_NO_CONSUME)
8341                                 trace_consume(&iter);
8342                 }
8343                 touch_nmi_watchdog();
8344
8345                 trace_printk_seq(&iter.seq);
8346         }
8347
8348         if (!cnt)
8349                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
8350         else
8351                 printk(KERN_TRACE "---------------------------------\n");
8352
8353  out_enable:
8354         tr->trace_flags |= old_userobj;
8355
8356         for_each_tracing_cpu(cpu) {
8357                 atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
8358         }
8359         atomic_dec(&dump_running);
8360         local_irq_restore(flags);
8361 }
8362 EXPORT_SYMBOL_GPL(ftrace_dump);
8363
8364 int trace_run_command(const char *buf, int (*createfn)(int, char **))
8365 {
8366         char **argv;
8367         int argc, ret;
8368
8369         argc = 0;
8370         ret = 0;
8371         argv = argv_split(GFP_KERNEL, buf, &argc);
8372         if (!argv)
8373                 return -ENOMEM;
8374
8375         if (argc)
8376                 ret = createfn(argc, argv);
8377
8378         argv_free(argv);
8379
8380         return ret;
8381 }
8382
8383 #define WRITE_BUFSIZE  4096
8384
8385 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
8386                                 size_t count, loff_t *ppos,
8387                                 int (*createfn)(int, char **))
8388 {
8389         char *kbuf, *buf, *tmp;
8390         int ret = 0;
8391         size_t done = 0;
8392         size_t size;
8393
8394         kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
8395         if (!kbuf)
8396                 return -ENOMEM;
8397
8398         while (done < count) {
8399                 size = count - done;
8400
8401                 if (size >= WRITE_BUFSIZE)
8402                         size = WRITE_BUFSIZE - 1;
8403
8404                 if (copy_from_user(kbuf, buffer + done, size)) {
8405                         ret = -EFAULT;
8406                         goto out;
8407                 }
8408                 kbuf[size] = '\0';
8409                 buf = kbuf;
8410                 do {
8411                         tmp = strchr(buf, '\n');
8412                         if (tmp) {
8413                                 *tmp = '\0';
8414                                 size = tmp - buf + 1;
8415                         } else {
8416                                 size = strlen(buf);
8417                                 if (done + size < count) {
8418                                         if (buf != kbuf)
8419                                                 break;
8420                                         /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
8421                                         pr_warn("Line length is too long: Should be less than %d\n",
8422                                                 WRITE_BUFSIZE - 2);
8423                                         ret = -EINVAL;
8424                                         goto out;
8425                                 }
8426                         }
8427                         done += size;
8428
8429                         /* Remove comments */
8430                         tmp = strchr(buf, '#');
8431
8432                         if (tmp)
8433                                 *tmp = '\0';
8434
8435                         ret = trace_run_command(buf, createfn);
8436                         if (ret)
8437                                 goto out;
8438                         buf += size;
8439
8440                 } while (done < count);
8441         }
8442         ret = done;
8443
8444 out:
8445         kfree(kbuf);
8446
8447         return ret;
8448 }
8449
8450 __init static int tracer_alloc_buffers(void)
8451 {
8452         int ring_buf_size;
8453         int ret = -ENOMEM;
8454
8455         /*
8456          * Make sure we don't accidently add more trace options
8457          * than we have bits for.
8458          */
8459         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
8460
8461         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
8462                 goto out;
8463
8464         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
8465                 goto out_free_buffer_mask;
8466
8467         /* Only allocate trace_printk buffers if a trace_printk exists */
8468         if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
8469                 /* Must be called before global_trace.buffer is allocated */
8470                 trace_printk_init_buffers();
8471
8472         /* To save memory, keep the ring buffer size to its minimum */
8473         if (ring_buffer_expanded)
8474                 ring_buf_size = trace_buf_size;
8475         else
8476                 ring_buf_size = 1;
8477
8478         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
8479         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
8480
8481         raw_spin_lock_init(&global_trace.start_lock);
8482
8483         /*
8484          * The prepare callbacks allocates some memory for the ring buffer. We
8485          * don't free the buffer if the if the CPU goes down. If we were to free
8486          * the buffer, then the user would lose any trace that was in the
8487          * buffer. The memory will be removed once the "instance" is removed.
8488          */
8489         ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
8490                                       "trace/RB:preapre", trace_rb_cpu_prepare,
8491                                       NULL);
8492         if (ret < 0)
8493                 goto out_free_cpumask;
8494         /* Used for event triggers */
8495         ret = -ENOMEM;
8496         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
8497         if (!temp_buffer)
8498                 goto out_rm_hp_state;
8499
8500         if (trace_create_savedcmd() < 0)
8501                 goto out_free_temp_buffer;
8502
8503         /* TODO: make the number of buffers hot pluggable with CPUS */
8504         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
8505                 printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
8506                 WARN_ON(1);
8507                 goto out_free_savedcmd;
8508         }
8509
8510         if (global_trace.buffer_disabled)
8511                 tracing_off();
8512
8513         if (trace_boot_clock) {
8514                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
8515                 if (ret < 0)
8516                         pr_warn("Trace clock %s not defined, going back to default\n",
8517                                 trace_boot_clock);
8518         }
8519
8520         /*
8521          * register_tracer() might reference current_trace, so it
8522          * needs to be set before we register anything. This is
8523          * just a bootstrap of current_trace anyway.
8524          */
8525         global_trace.current_trace = &nop_trace;
8526
8527         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8528
8529         ftrace_init_global_array_ops(&global_trace);
8530
8531         init_trace_flags_index(&global_trace);
8532
8533         register_tracer(&nop_trace);
8534
8535         /* Function tracing may start here (via kernel command line) */
8536         init_function_trace();
8537
8538         /* All seems OK, enable tracing */
8539         tracing_disabled = 0;
8540
8541         atomic_notifier_chain_register(&panic_notifier_list,
8542                                        &trace_panic_notifier);
8543
8544         register_die_notifier(&trace_die_notifier);
8545
8546         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
8547
8548         INIT_LIST_HEAD(&global_trace.systems);
8549         INIT_LIST_HEAD(&global_trace.events);
8550         INIT_LIST_HEAD(&global_trace.hist_vars);
8551         list_add(&global_trace.list, &ftrace_trace_arrays);
8552
8553         apply_trace_boot_options();
8554
8555         register_snapshot_cmd();
8556
8557         return 0;
8558
8559 out_free_savedcmd:
8560         free_saved_cmdlines_buffer(savedcmd);
8561 out_free_temp_buffer:
8562         ring_buffer_free(temp_buffer);
8563 out_rm_hp_state:
8564         cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
8565 out_free_cpumask:
8566         free_cpumask_var(global_trace.tracing_cpumask);
8567 out_free_buffer_mask:
8568         free_cpumask_var(tracing_buffer_mask);
8569 out:
8570         return ret;
8571 }
8572
8573 void __init early_trace_init(void)
8574 {
8575         if (tracepoint_printk) {
8576                 tracepoint_print_iter =
8577                         kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
8578                 if (WARN_ON(!tracepoint_print_iter))
8579                         tracepoint_printk = 0;
8580                 else
8581                         static_key_enable(&tracepoint_printk_key.key);
8582         }
8583         tracer_alloc_buffers();
8584 }
8585
8586 void __init trace_init(void)
8587 {
8588         trace_event_init();
8589 }
8590
8591 __init static int clear_boot_tracer(void)
8592 {
8593         /*
8594          * The default tracer at boot buffer is an init section.
8595          * This function is called in lateinit. If we did not
8596          * find the boot tracer, then clear it out, to prevent
8597          * later registration from accessing the buffer that is
8598          * about to be freed.
8599          */
8600         if (!default_bootup_tracer)
8601                 return 0;
8602
8603         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
8604                default_bootup_tracer);
8605         default_bootup_tracer = NULL;
8606
8607         return 0;
8608 }
8609
8610 fs_initcall(tracer_init_tracefs);
8611 late_initcall_sync(clear_boot_tracer);
8612
8613 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
8614 __init static int tracing_set_default_clock(void)
8615 {
8616         /* sched_clock_stable() is determined in late_initcall */
8617         if (!trace_boot_clock && !sched_clock_stable()) {
8618                 printk(KERN_WARNING
8619                        "Unstable clock detected, switching default tracing clock to \"global\"\n"
8620                        "If you want to keep using the local clock, then add:\n"
8621                        "  \"trace_clock=local\"\n"
8622                        "on the kernel command line\n");
8623                 tracing_set_clock(&global_trace, "global");
8624         }
8625
8626         return 0;
8627 }
8628 late_initcall_sync(tracing_set_default_clock);
8629 #endif