OSDN Git Service

fc59dd11090d08403d44abf071b5844abd9aa8fd
[android-x86/kernel.git] / kernel / trace / trace.c
1 /*
2  * ring buffer based function tracer
3  *
4  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
5  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6  *
7  * Originally taken from the RT patch by:
8  *    Arnaldo Carvalho de Melo <acme@redhat.com>
9  *
10  * Based on code from the latency_tracer, that is:
11  *  Copyright (C) 2004-2006 Ingo Molnar
12  *  Copyright (C) 2004 Nadia Yvette Chambers
13  */
14 #include <linux/ring_buffer.h>
15 #include <generated/utsrelease.h>
16 #include <linux/stacktrace.h>
17 #include <linux/writeback.h>
18 #include <linux/kallsyms.h>
19 #include <linux/seq_file.h>
20 #include <linux/notifier.h>
21 #include <linux/irqflags.h>
22 #include <linux/debugfs.h>
23 #include <linux/tracefs.h>
24 #include <linux/pagemap.h>
25 #include <linux/hardirq.h>
26 #include <linux/linkage.h>
27 #include <linux/uaccess.h>
28 #include <linux/vmalloc.h>
29 #include <linux/ftrace.h>
30 #include <linux/module.h>
31 #include <linux/percpu.h>
32 #include <linux/splice.h>
33 #include <linux/kdebug.h>
34 #include <linux/string.h>
35 #include <linux/mount.h>
36 #include <linux/rwsem.h>
37 #include <linux/slab.h>
38 #include <linux/ctype.h>
39 #include <linux/init.h>
40 #include <linux/poll.h>
41 #include <linux/nmi.h>
42 #include <linux/fs.h>
43 #include <linux/sched/rt.h>
44
45 #include "trace.h"
46 #include "trace_output.h"
47
48 /*
49  * On boot up, the ring buffer is set to the minimum size, so that
50  * we do not waste memory on systems that are not using tracing.
51  */
52 bool ring_buffer_expanded;
53
54 /*
55  * We need to change this state when a selftest is running.
56  * A selftest will lurk into the ring-buffer to count the
57  * entries inserted during the selftest although some concurrent
58  * insertions into the ring-buffer such as trace_printk could occurred
59  * at the same time, giving false positive or negative results.
60  */
61 static bool __read_mostly tracing_selftest_running;
62
63 /*
64  * If a tracer is running, we do not want to run SELFTEST.
65  */
66 bool __read_mostly tracing_selftest_disabled;
67
68 /* Pipe tracepoints to printk */
69 struct trace_iterator *tracepoint_print_iter;
70 int tracepoint_printk;
71
72 /* For tracers that don't implement custom flags */
73 static struct tracer_opt dummy_tracer_opt[] = {
74         { }
75 };
76
77 static int
78 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
79 {
80         return 0;
81 }
82
83 /*
84  * To prevent the comm cache from being overwritten when no
85  * tracing is active, only save the comm when a trace event
86  * occurred.
87  */
88 static DEFINE_PER_CPU(bool, trace_cmdline_save);
89
90 /*
91  * Kill all tracing for good (never come back).
92  * It is initialized to 1 but will turn to zero if the initialization
93  * of the tracer is successful. But that is the only place that sets
94  * this back to zero.
95  */
96 static int tracing_disabled = 1;
97
98 cpumask_var_t __read_mostly     tracing_buffer_mask;
99
100 /*
101  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
102  *
103  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
104  * is set, then ftrace_dump is called. This will output the contents
105  * of the ftrace buffers to the console.  This is very useful for
106  * capturing traces that lead to crashes and outputing it to a
107  * serial console.
108  *
109  * It is default off, but you can enable it with either specifying
110  * "ftrace_dump_on_oops" in the kernel command line, or setting
111  * /proc/sys/kernel/ftrace_dump_on_oops
112  * Set 1 if you want to dump buffers of all CPUs
113  * Set 2 if you want to dump the buffer of the CPU that triggered oops
114  */
115
116 enum ftrace_dump_mode ftrace_dump_on_oops;
117
118 /* When set, tracing will stop when a WARN*() is hit */
119 int __disable_trace_on_warning;
120
121 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
122 /* Map of enums to their values, for "enum_map" file */
123 struct trace_enum_map_head {
124         struct module                   *mod;
125         unsigned long                   length;
126 };
127
128 union trace_enum_map_item;
129
130 struct trace_enum_map_tail {
131         /*
132          * "end" is first and points to NULL as it must be different
133          * than "mod" or "enum_string"
134          */
135         union trace_enum_map_item       *next;
136         const char                      *end;   /* points to NULL */
137 };
138
139 static DEFINE_MUTEX(trace_enum_mutex);
140
141 /*
142  * The trace_enum_maps are saved in an array with two extra elements,
143  * one at the beginning, and one at the end. The beginning item contains
144  * the count of the saved maps (head.length), and the module they
145  * belong to if not built in (head.mod). The ending item contains a
146  * pointer to the next array of saved enum_map items.
147  */
148 union trace_enum_map_item {
149         struct trace_enum_map           map;
150         struct trace_enum_map_head      head;
151         struct trace_enum_map_tail      tail;
152 };
153
154 static union trace_enum_map_item *trace_enum_maps;
155 #endif /* CONFIG_TRACE_ENUM_MAP_FILE */
156
157 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
158
159 #define MAX_TRACER_SIZE         100
160 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
161 static char *default_bootup_tracer;
162
163 static bool allocate_snapshot;
164
165 static int __init set_cmdline_ftrace(char *str)
166 {
167         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
168         default_bootup_tracer = bootup_tracer_buf;
169         /* We are using ftrace early, expand it */
170         ring_buffer_expanded = true;
171         return 1;
172 }
173 __setup("ftrace=", set_cmdline_ftrace);
174
175 static int __init set_ftrace_dump_on_oops(char *str)
176 {
177         if (*str++ != '=' || !*str) {
178                 ftrace_dump_on_oops = DUMP_ALL;
179                 return 1;
180         }
181
182         if (!strcmp("orig_cpu", str)) {
183                 ftrace_dump_on_oops = DUMP_ORIG;
184                 return 1;
185         }
186
187         return 0;
188 }
189 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
190
191 static int __init stop_trace_on_warning(char *str)
192 {
193         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
194                 __disable_trace_on_warning = 1;
195         return 1;
196 }
197 __setup("traceoff_on_warning", stop_trace_on_warning);
198
199 static int __init boot_alloc_snapshot(char *str)
200 {
201         allocate_snapshot = true;
202         /* We also need the main ring buffer expanded */
203         ring_buffer_expanded = true;
204         return 1;
205 }
206 __setup("alloc_snapshot", boot_alloc_snapshot);
207
208
209 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
210
211 static int __init set_trace_boot_options(char *str)
212 {
213         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
214         return 0;
215 }
216 __setup("trace_options=", set_trace_boot_options);
217
218 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
219 static char *trace_boot_clock __initdata;
220
221 static int __init set_trace_boot_clock(char *str)
222 {
223         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
224         trace_boot_clock = trace_boot_clock_buf;
225         return 0;
226 }
227 __setup("trace_clock=", set_trace_boot_clock);
228
229 static int __init set_tracepoint_printk(char *str)
230 {
231         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
232                 tracepoint_printk = 1;
233         return 1;
234 }
235 __setup("tp_printk", set_tracepoint_printk);
236
237 unsigned long long ns2usecs(cycle_t nsec)
238 {
239         nsec += 500;
240         do_div(nsec, 1000);
241         return nsec;
242 }
243
244 /* trace_flags holds trace_options default values */
245 #define TRACE_DEFAULT_FLAGS                                             \
246         (FUNCTION_DEFAULT_FLAGS |                                       \
247          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
248          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
249          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
250          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
251
252 /* trace_options that are only supported by global_trace */
253 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
254                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
255
256 /* trace_flags that are default zero for instances */
257 #define ZEROED_TRACE_FLAGS \
258         TRACE_ITER_EVENT_FORK
259
260 /*
261  * The global_trace is the descriptor that holds the tracing
262  * buffers for the live tracing. For each CPU, it contains
263  * a link list of pages that will store trace entries. The
264  * page descriptor of the pages in the memory is used to hold
265  * the link list by linking the lru item in the page descriptor
266  * to each of the pages in the buffer per CPU.
267  *
268  * For each active CPU there is a data field that holds the
269  * pages for the buffer for that CPU. Each CPU has the same number
270  * of pages allocated for its buffer.
271  */
272 static struct trace_array global_trace = {
273         .trace_flags = TRACE_DEFAULT_FLAGS,
274 };
275
276 LIST_HEAD(ftrace_trace_arrays);
277
278 int trace_array_get(struct trace_array *this_tr)
279 {
280         struct trace_array *tr;
281         int ret = -ENODEV;
282
283         mutex_lock(&trace_types_lock);
284         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
285                 if (tr == this_tr) {
286                         tr->ref++;
287                         ret = 0;
288                         break;
289                 }
290         }
291         mutex_unlock(&trace_types_lock);
292
293         return ret;
294 }
295
296 static void __trace_array_put(struct trace_array *this_tr)
297 {
298         WARN_ON(!this_tr->ref);
299         this_tr->ref--;
300 }
301
302 void trace_array_put(struct trace_array *this_tr)
303 {
304         mutex_lock(&trace_types_lock);
305         __trace_array_put(this_tr);
306         mutex_unlock(&trace_types_lock);
307 }
308
309 int call_filter_check_discard(struct trace_event_call *call, void *rec,
310                               struct ring_buffer *buffer,
311                               struct ring_buffer_event *event)
312 {
313         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
314             !filter_match_preds(call->filter, rec)) {
315                 __trace_event_discard_commit(buffer, event);
316                 return 1;
317         }
318
319         return 0;
320 }
321
322 void trace_free_pid_list(struct trace_pid_list *pid_list)
323 {
324         vfree(pid_list->pids);
325         kfree(pid_list);
326 }
327
328 /**
329  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
330  * @filtered_pids: The list of pids to check
331  * @search_pid: The PID to find in @filtered_pids
332  *
333  * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
334  */
335 bool
336 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
337 {
338         /*
339          * If pid_max changed after filtered_pids was created, we
340          * by default ignore all pids greater than the previous pid_max.
341          */
342         if (search_pid >= filtered_pids->pid_max)
343                 return false;
344
345         return test_bit(search_pid, filtered_pids->pids);
346 }
347
348 /**
349  * trace_ignore_this_task - should a task be ignored for tracing
350  * @filtered_pids: The list of pids to check
351  * @task: The task that should be ignored if not filtered
352  *
353  * Checks if @task should be traced or not from @filtered_pids.
354  * Returns true if @task should *NOT* be traced.
355  * Returns false if @task should be traced.
356  */
357 bool
358 trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
359 {
360         /*
361          * Return false, because if filtered_pids does not exist,
362          * all pids are good to trace.
363          */
364         if (!filtered_pids)
365                 return false;
366
367         return !trace_find_filtered_pid(filtered_pids, task->pid);
368 }
369
370 /**
371  * trace_pid_filter_add_remove - Add or remove a task from a pid_list
372  * @pid_list: The list to modify
373  * @self: The current task for fork or NULL for exit
374  * @task: The task to add or remove
375  *
376  * If adding a task, if @self is defined, the task is only added if @self
377  * is also included in @pid_list. This happens on fork and tasks should
378  * only be added when the parent is listed. If @self is NULL, then the
379  * @task pid will be removed from the list, which would happen on exit
380  * of a task.
381  */
382 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
383                                   struct task_struct *self,
384                                   struct task_struct *task)
385 {
386         if (!pid_list)
387                 return;
388
389         /* For forks, we only add if the forking task is listed */
390         if (self) {
391                 if (!trace_find_filtered_pid(pid_list, self->pid))
392                         return;
393         }
394
395         /* Sorry, but we don't support pid_max changing after setting */
396         if (task->pid >= pid_list->pid_max)
397                 return;
398
399         /* "self" is set for forks, and NULL for exits */
400         if (self)
401                 set_bit(task->pid, pid_list->pids);
402         else
403                 clear_bit(task->pid, pid_list->pids);
404 }
405
406 /**
407  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
408  * @pid_list: The pid list to show
409  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
410  * @pos: The position of the file
411  *
412  * This is used by the seq_file "next" operation to iterate the pids
413  * listed in a trace_pid_list structure.
414  *
415  * Returns the pid+1 as we want to display pid of zero, but NULL would
416  * stop the iteration.
417  */
418 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
419 {
420         unsigned long pid = (unsigned long)v;
421
422         (*pos)++;
423
424         /* pid already is +1 of the actual prevous bit */
425         pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
426
427         /* Return pid + 1 to allow zero to be represented */
428         if (pid < pid_list->pid_max)
429                 return (void *)(pid + 1);
430
431         return NULL;
432 }
433
434 /**
435  * trace_pid_start - Used for seq_file to start reading pid lists
436  * @pid_list: The pid list to show
437  * @pos: The position of the file
438  *
439  * This is used by seq_file "start" operation to start the iteration
440  * of listing pids.
441  *
442  * Returns the pid+1 as we want to display pid of zero, but NULL would
443  * stop the iteration.
444  */
445 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
446 {
447         unsigned long pid;
448         loff_t l = 0;
449
450         pid = find_first_bit(pid_list->pids, pid_list->pid_max);
451         if (pid >= pid_list->pid_max)
452                 return NULL;
453
454         /* Return pid + 1 so that zero can be the exit value */
455         for (pid++; pid && l < *pos;
456              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
457                 ;
458         return (void *)pid;
459 }
460
461 /**
462  * trace_pid_show - show the current pid in seq_file processing
463  * @m: The seq_file structure to write into
464  * @v: A void pointer of the pid (+1) value to display
465  *
466  * Can be directly used by seq_file operations to display the current
467  * pid value.
468  */
469 int trace_pid_show(struct seq_file *m, void *v)
470 {
471         unsigned long pid = (unsigned long)v - 1;
472
473         seq_printf(m, "%lu\n", pid);
474         return 0;
475 }
476
477 /* 128 should be much more than enough */
478 #define PID_BUF_SIZE            127
479
480 int trace_pid_write(struct trace_pid_list *filtered_pids,
481                     struct trace_pid_list **new_pid_list,
482                     const char __user *ubuf, size_t cnt)
483 {
484         struct trace_pid_list *pid_list;
485         struct trace_parser parser;
486         unsigned long val;
487         int nr_pids = 0;
488         ssize_t read = 0;
489         ssize_t ret = 0;
490         loff_t pos;
491         pid_t pid;
492
493         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
494                 return -ENOMEM;
495
496         /*
497          * Always recreate a new array. The write is an all or nothing
498          * operation. Always create a new array when adding new pids by
499          * the user. If the operation fails, then the current list is
500          * not modified.
501          */
502         pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
503         if (!pid_list) {
504                 trace_parser_put(&parser);
505                 return -ENOMEM;
506         }
507
508         pid_list->pid_max = READ_ONCE(pid_max);
509
510         /* Only truncating will shrink pid_max */
511         if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
512                 pid_list->pid_max = filtered_pids->pid_max;
513
514         pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
515         if (!pid_list->pids) {
516                 trace_parser_put(&parser);
517                 kfree(pid_list);
518                 return -ENOMEM;
519         }
520
521         if (filtered_pids) {
522                 /* copy the current bits to the new max */
523                 for_each_set_bit(pid, filtered_pids->pids,
524                                  filtered_pids->pid_max) {
525                         set_bit(pid, pid_list->pids);
526                         nr_pids++;
527                 }
528         }
529
530         while (cnt > 0) {
531
532                 pos = 0;
533
534                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
535                 if (ret < 0 || !trace_parser_loaded(&parser))
536                         break;
537
538                 read += ret;
539                 ubuf += ret;
540                 cnt -= ret;
541
542                 parser.buffer[parser.idx] = 0;
543
544                 ret = -EINVAL;
545                 if (kstrtoul(parser.buffer, 0, &val))
546                         break;
547                 if (val >= pid_list->pid_max)
548                         break;
549
550                 pid = (pid_t)val;
551
552                 set_bit(pid, pid_list->pids);
553                 nr_pids++;
554
555                 trace_parser_clear(&parser);
556                 ret = 0;
557         }
558         trace_parser_put(&parser);
559
560         if (ret < 0) {
561                 trace_free_pid_list(pid_list);
562                 return ret;
563         }
564
565         if (!nr_pids) {
566                 /* Cleared the list of pids */
567                 trace_free_pid_list(pid_list);
568                 read = ret;
569                 pid_list = NULL;
570         }
571
572         *new_pid_list = pid_list;
573
574         return read;
575 }
576
577 static cycle_t buffer_ftrace_now(struct trace_buffer *buf, int cpu)
578 {
579         u64 ts;
580
581         /* Early boot up does not have a buffer yet */
582         if (!buf->buffer)
583                 return trace_clock_local();
584
585         ts = ring_buffer_time_stamp(buf->buffer, cpu);
586         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
587
588         return ts;
589 }
590
591 cycle_t ftrace_now(int cpu)
592 {
593         return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
594 }
595
596 /**
597  * tracing_is_enabled - Show if global_trace has been disabled
598  *
599  * Shows if the global trace has been enabled or not. It uses the
600  * mirror flag "buffer_disabled" to be used in fast paths such as for
601  * the irqsoff tracer. But it may be inaccurate due to races. If you
602  * need to know the accurate state, use tracing_is_on() which is a little
603  * slower, but accurate.
604  */
605 int tracing_is_enabled(void)
606 {
607         /*
608          * For quick access (irqsoff uses this in fast path), just
609          * return the mirror variable of the state of the ring buffer.
610          * It's a little racy, but we don't really care.
611          */
612         smp_rmb();
613         return !global_trace.buffer_disabled;
614 }
615
616 /*
617  * trace_buf_size is the size in bytes that is allocated
618  * for a buffer. Note, the number of bytes is always rounded
619  * to page size.
620  *
621  * This number is purposely set to a low number of 16384.
622  * If the dump on oops happens, it will be much appreciated
623  * to not have to wait for all that output. Anyway this can be
624  * boot time and run time configurable.
625  */
626 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
627
628 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
629
630 /* trace_types holds a link list of available tracers. */
631 static struct tracer            *trace_types __read_mostly;
632
633 /*
634  * trace_types_lock is used to protect the trace_types list.
635  */
636 DEFINE_MUTEX(trace_types_lock);
637
638 /*
639  * serialize the access of the ring buffer
640  *
641  * ring buffer serializes readers, but it is low level protection.
642  * The validity of the events (which returns by ring_buffer_peek() ..etc)
643  * are not protected by ring buffer.
644  *
645  * The content of events may become garbage if we allow other process consumes
646  * these events concurrently:
647  *   A) the page of the consumed events may become a normal page
648  *      (not reader page) in ring buffer, and this page will be rewrited
649  *      by events producer.
650  *   B) The page of the consumed events may become a page for splice_read,
651  *      and this page will be returned to system.
652  *
653  * These primitives allow multi process access to different cpu ring buffer
654  * concurrently.
655  *
656  * These primitives don't distinguish read-only and read-consume access.
657  * Multi read-only access are also serialized.
658  */
659
660 #ifdef CONFIG_SMP
661 static DECLARE_RWSEM(all_cpu_access_lock);
662 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
663
664 static inline void trace_access_lock(int cpu)
665 {
666         if (cpu == RING_BUFFER_ALL_CPUS) {
667                 /* gain it for accessing the whole ring buffer. */
668                 down_write(&all_cpu_access_lock);
669         } else {
670                 /* gain it for accessing a cpu ring buffer. */
671
672                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
673                 down_read(&all_cpu_access_lock);
674
675                 /* Secondly block other access to this @cpu ring buffer. */
676                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
677         }
678 }
679
680 static inline void trace_access_unlock(int cpu)
681 {
682         if (cpu == RING_BUFFER_ALL_CPUS) {
683                 up_write(&all_cpu_access_lock);
684         } else {
685                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
686                 up_read(&all_cpu_access_lock);
687         }
688 }
689
690 static inline void trace_access_lock_init(void)
691 {
692         int cpu;
693
694         for_each_possible_cpu(cpu)
695                 mutex_init(&per_cpu(cpu_access_lock, cpu));
696 }
697
698 #else
699
700 static DEFINE_MUTEX(access_lock);
701
702 static inline void trace_access_lock(int cpu)
703 {
704         (void)cpu;
705         mutex_lock(&access_lock);
706 }
707
708 static inline void trace_access_unlock(int cpu)
709 {
710         (void)cpu;
711         mutex_unlock(&access_lock);
712 }
713
714 static inline void trace_access_lock_init(void)
715 {
716 }
717
718 #endif
719
720 #ifdef CONFIG_STACKTRACE
721 static void __ftrace_trace_stack(struct ring_buffer *buffer,
722                                  unsigned long flags,
723                                  int skip, int pc, struct pt_regs *regs);
724 static inline void ftrace_trace_stack(struct trace_array *tr,
725                                       struct ring_buffer *buffer,
726                                       unsigned long flags,
727                                       int skip, int pc, struct pt_regs *regs);
728
729 #else
730 static inline void __ftrace_trace_stack(struct ring_buffer *buffer,
731                                         unsigned long flags,
732                                         int skip, int pc, struct pt_regs *regs)
733 {
734 }
735 static inline void ftrace_trace_stack(struct trace_array *tr,
736                                       struct ring_buffer *buffer,
737                                       unsigned long flags,
738                                       int skip, int pc, struct pt_regs *regs)
739 {
740 }
741
742 #endif
743
744 static void tracer_tracing_on(struct trace_array *tr)
745 {
746         if (tr->trace_buffer.buffer)
747                 ring_buffer_record_on(tr->trace_buffer.buffer);
748         /*
749          * This flag is looked at when buffers haven't been allocated
750          * yet, or by some tracers (like irqsoff), that just want to
751          * know if the ring buffer has been disabled, but it can handle
752          * races of where it gets disabled but we still do a record.
753          * As the check is in the fast path of the tracers, it is more
754          * important to be fast than accurate.
755          */
756         tr->buffer_disabled = 0;
757         /* Make the flag seen by readers */
758         smp_wmb();
759 }
760
761 /**
762  * tracing_on - enable tracing buffers
763  *
764  * This function enables tracing buffers that may have been
765  * disabled with tracing_off.
766  */
767 void tracing_on(void)
768 {
769         tracer_tracing_on(&global_trace);
770 }
771 EXPORT_SYMBOL_GPL(tracing_on);
772
773 /**
774  * __trace_puts - write a constant string into the trace buffer.
775  * @ip:    The address of the caller
776  * @str:   The constant string to write
777  * @size:  The size of the string.
778  */
779 int __trace_puts(unsigned long ip, const char *str, int size)
780 {
781         struct ring_buffer_event *event;
782         struct ring_buffer *buffer;
783         struct print_entry *entry;
784         unsigned long irq_flags;
785         int alloc;
786         int pc;
787
788         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
789                 return 0;
790
791         pc = preempt_count();
792
793         if (unlikely(tracing_selftest_running || tracing_disabled))
794                 return 0;
795
796         alloc = sizeof(*entry) + size + 2; /* possible \n added */
797
798         local_save_flags(irq_flags);
799         buffer = global_trace.trace_buffer.buffer;
800         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, 
801                                           irq_flags, pc);
802         if (!event)
803                 return 0;
804
805         entry = ring_buffer_event_data(event);
806         entry->ip = ip;
807
808         memcpy(&entry->buf, str, size);
809
810         /* Add a newline if necessary */
811         if (entry->buf[size - 1] != '\n') {
812                 entry->buf[size] = '\n';
813                 entry->buf[size + 1] = '\0';
814         } else
815                 entry->buf[size] = '\0';
816
817         __buffer_unlock_commit(buffer, event);
818         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
819
820         return size;
821 }
822 EXPORT_SYMBOL_GPL(__trace_puts);
823
824 /**
825  * __trace_bputs - write the pointer to a constant string into trace buffer
826  * @ip:    The address of the caller
827  * @str:   The constant string to write to the buffer to
828  */
829 int __trace_bputs(unsigned long ip, const char *str)
830 {
831         struct ring_buffer_event *event;
832         struct ring_buffer *buffer;
833         struct bputs_entry *entry;
834         unsigned long irq_flags;
835         int size = sizeof(struct bputs_entry);
836         int pc;
837
838         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
839                 return 0;
840
841         pc = preempt_count();
842
843         if (unlikely(tracing_selftest_running || tracing_disabled))
844                 return 0;
845
846         local_save_flags(irq_flags);
847         buffer = global_trace.trace_buffer.buffer;
848         event = trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
849                                           irq_flags, pc);
850         if (!event)
851                 return 0;
852
853         entry = ring_buffer_event_data(event);
854         entry->ip                       = ip;
855         entry->str                      = str;
856
857         __buffer_unlock_commit(buffer, event);
858         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
859
860         return 1;
861 }
862 EXPORT_SYMBOL_GPL(__trace_bputs);
863
864 #ifdef CONFIG_TRACER_SNAPSHOT
865 /**
866  * trace_snapshot - take a snapshot of the current buffer.
867  *
868  * This causes a swap between the snapshot buffer and the current live
869  * tracing buffer. You can use this to take snapshots of the live
870  * trace when some condition is triggered, but continue to trace.
871  *
872  * Note, make sure to allocate the snapshot with either
873  * a tracing_snapshot_alloc(), or by doing it manually
874  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
875  *
876  * If the snapshot buffer is not allocated, it will stop tracing.
877  * Basically making a permanent snapshot.
878  */
879 void tracing_snapshot(void)
880 {
881         struct trace_array *tr = &global_trace;
882         struct tracer *tracer = tr->current_trace;
883         unsigned long flags;
884
885         if (in_nmi()) {
886                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
887                 internal_trace_puts("*** snapshot is being ignored        ***\n");
888                 return;
889         }
890
891         if (!tr->allocated_snapshot) {
892                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
893                 internal_trace_puts("*** stopping trace here!   ***\n");
894                 tracing_off();
895                 return;
896         }
897
898         /* Note, snapshot can not be used when the tracer uses it */
899         if (tracer->use_max_tr) {
900                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
901                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
902                 return;
903         }
904
905         local_irq_save(flags);
906         update_max_tr(tr, current, smp_processor_id());
907         local_irq_restore(flags);
908 }
909 EXPORT_SYMBOL_GPL(tracing_snapshot);
910
911 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
912                                         struct trace_buffer *size_buf, int cpu_id);
913 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
914
915 static int alloc_snapshot(struct trace_array *tr)
916 {
917         int ret;
918
919         if (!tr->allocated_snapshot) {
920
921                 /* allocate spare buffer */
922                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
923                                    &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
924                 if (ret < 0)
925                         return ret;
926
927                 tr->allocated_snapshot = true;
928         }
929
930         return 0;
931 }
932
933 static void free_snapshot(struct trace_array *tr)
934 {
935         /*
936          * We don't free the ring buffer. instead, resize it because
937          * The max_tr ring buffer has some state (e.g. ring->clock) and
938          * we want preserve it.
939          */
940         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
941         set_buffer_entries(&tr->max_buffer, 1);
942         tracing_reset_online_cpus(&tr->max_buffer);
943         tr->allocated_snapshot = false;
944 }
945
946 /**
947  * tracing_alloc_snapshot - allocate snapshot buffer.
948  *
949  * This only allocates the snapshot buffer if it isn't already
950  * allocated - it doesn't also take a snapshot.
951  *
952  * This is meant to be used in cases where the snapshot buffer needs
953  * to be set up for events that can't sleep but need to be able to
954  * trigger a snapshot.
955  */
956 int tracing_alloc_snapshot(void)
957 {
958         struct trace_array *tr = &global_trace;
959         int ret;
960
961         ret = alloc_snapshot(tr);
962         WARN_ON(ret < 0);
963
964         return ret;
965 }
966 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
967
968 /**
969  * trace_snapshot_alloc - allocate and take a snapshot of the current buffer.
970  *
971  * This is similar to trace_snapshot(), but it will allocate the
972  * snapshot buffer if it isn't already allocated. Use this only
973  * where it is safe to sleep, as the allocation may sleep.
974  *
975  * This causes a swap between the snapshot buffer and the current live
976  * tracing buffer. You can use this to take snapshots of the live
977  * trace when some condition is triggered, but continue to trace.
978  */
979 void tracing_snapshot_alloc(void)
980 {
981         int ret;
982
983         ret = tracing_alloc_snapshot();
984         if (ret < 0)
985                 return;
986
987         tracing_snapshot();
988 }
989 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
990 #else
991 void tracing_snapshot(void)
992 {
993         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
994 }
995 EXPORT_SYMBOL_GPL(tracing_snapshot);
996 int tracing_alloc_snapshot(void)
997 {
998         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
999         return -ENODEV;
1000 }
1001 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1002 void tracing_snapshot_alloc(void)
1003 {
1004         /* Give warning */
1005         tracing_snapshot();
1006 }
1007 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1008 #endif /* CONFIG_TRACER_SNAPSHOT */
1009
1010 static void tracer_tracing_off(struct trace_array *tr)
1011 {
1012         if (tr->trace_buffer.buffer)
1013                 ring_buffer_record_off(tr->trace_buffer.buffer);
1014         /*
1015          * This flag is looked at when buffers haven't been allocated
1016          * yet, or by some tracers (like irqsoff), that just want to
1017          * know if the ring buffer has been disabled, but it can handle
1018          * races of where it gets disabled but we still do a record.
1019          * As the check is in the fast path of the tracers, it is more
1020          * important to be fast than accurate.
1021          */
1022         tr->buffer_disabled = 1;
1023         /* Make the flag seen by readers */
1024         smp_wmb();
1025 }
1026
1027 /**
1028  * tracing_off - turn off tracing buffers
1029  *
1030  * This function stops the tracing buffers from recording data.
1031  * It does not disable any overhead the tracers themselves may
1032  * be causing. This function simply causes all recording to
1033  * the ring buffers to fail.
1034  */
1035 void tracing_off(void)
1036 {
1037         tracer_tracing_off(&global_trace);
1038 }
1039 EXPORT_SYMBOL_GPL(tracing_off);
1040
1041 void disable_trace_on_warning(void)
1042 {
1043         if (__disable_trace_on_warning)
1044                 tracing_off();
1045 }
1046
1047 /**
1048  * tracer_tracing_is_on - show real state of ring buffer enabled
1049  * @tr : the trace array to know if ring buffer is enabled
1050  *
1051  * Shows real state of the ring buffer if it is enabled or not.
1052  */
1053 int tracer_tracing_is_on(struct trace_array *tr)
1054 {
1055         if (tr->trace_buffer.buffer)
1056                 return ring_buffer_record_is_on(tr->trace_buffer.buffer);
1057         return !tr->buffer_disabled;
1058 }
1059
1060 /**
1061  * tracing_is_on - show state of ring buffers enabled
1062  */
1063 int tracing_is_on(void)
1064 {
1065         return tracer_tracing_is_on(&global_trace);
1066 }
1067 EXPORT_SYMBOL_GPL(tracing_is_on);
1068
1069 static int __init set_buf_size(char *str)
1070 {
1071         unsigned long buf_size;
1072
1073         if (!str)
1074                 return 0;
1075         buf_size = memparse(str, &str);
1076         /* nr_entries can not be zero */
1077         if (buf_size == 0)
1078                 return 0;
1079         trace_buf_size = buf_size;
1080         return 1;
1081 }
1082 __setup("trace_buf_size=", set_buf_size);
1083
1084 static int __init set_tracing_thresh(char *str)
1085 {
1086         unsigned long threshold;
1087         int ret;
1088
1089         if (!str)
1090                 return 0;
1091         ret = kstrtoul(str, 0, &threshold);
1092         if (ret < 0)
1093                 return 0;
1094         tracing_thresh = threshold * 1000;
1095         return 1;
1096 }
1097 __setup("tracing_thresh=", set_tracing_thresh);
1098
1099 unsigned long nsecs_to_usecs(unsigned long nsecs)
1100 {
1101         return nsecs / 1000;
1102 }
1103
1104 /*
1105  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1106  * It uses C(a, b) where 'a' is the enum name and 'b' is the string that
1107  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1108  * of strings in the order that the enums were defined.
1109  */
1110 #undef C
1111 #define C(a, b) b
1112
1113 /* These must match the bit postions in trace_iterator_flags */
1114 static const char *trace_options[] = {
1115         TRACE_FLAGS
1116         NULL
1117 };
1118
1119 static struct {
1120         u64 (*func)(void);
1121         const char *name;
1122         int in_ns;              /* is this clock in nanoseconds? */
1123 } trace_clocks[] = {
1124         { trace_clock_local,            "local",        1 },
1125         { trace_clock_global,           "global",       1 },
1126         { trace_clock_counter,          "counter",      0 },
1127         { trace_clock_jiffies,          "uptime",       0 },
1128         { trace_clock,                  "perf",         1 },
1129         { ktime_get_mono_fast_ns,       "mono",         1 },
1130         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1131         ARCH_TRACE_CLOCKS
1132 };
1133
1134 /*
1135  * trace_parser_get_init - gets the buffer for trace parser
1136  */
1137 int trace_parser_get_init(struct trace_parser *parser, int size)
1138 {
1139         memset(parser, 0, sizeof(*parser));
1140
1141         parser->buffer = kmalloc(size, GFP_KERNEL);
1142         if (!parser->buffer)
1143                 return 1;
1144
1145         parser->size = size;
1146         return 0;
1147 }
1148
1149 /*
1150  * trace_parser_put - frees the buffer for trace parser
1151  */
1152 void trace_parser_put(struct trace_parser *parser)
1153 {
1154         kfree(parser->buffer);
1155 }
1156
1157 /*
1158  * trace_get_user - reads the user input string separated by  space
1159  * (matched by isspace(ch))
1160  *
1161  * For each string found the 'struct trace_parser' is updated,
1162  * and the function returns.
1163  *
1164  * Returns number of bytes read.
1165  *
1166  * See kernel/trace/trace.h for 'struct trace_parser' details.
1167  */
1168 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1169         size_t cnt, loff_t *ppos)
1170 {
1171         char ch;
1172         size_t read = 0;
1173         ssize_t ret;
1174
1175         if (!*ppos)
1176                 trace_parser_clear(parser);
1177
1178         ret = get_user(ch, ubuf++);
1179         if (ret)
1180                 goto out;
1181
1182         read++;
1183         cnt--;
1184
1185         /*
1186          * The parser is not finished with the last write,
1187          * continue reading the user input without skipping spaces.
1188          */
1189         if (!parser->cont) {
1190                 /* skip white space */
1191                 while (cnt && isspace(ch)) {
1192                         ret = get_user(ch, ubuf++);
1193                         if (ret)
1194                                 goto out;
1195                         read++;
1196                         cnt--;
1197                 }
1198
1199                 /* only spaces were written */
1200                 if (isspace(ch)) {
1201                         *ppos += read;
1202                         ret = read;
1203                         goto out;
1204                 }
1205
1206                 parser->idx = 0;
1207         }
1208
1209         /* read the non-space input */
1210         while (cnt && !isspace(ch)) {
1211                 if (parser->idx < parser->size - 1)
1212                         parser->buffer[parser->idx++] = ch;
1213                 else {
1214                         ret = -EINVAL;
1215                         goto out;
1216                 }
1217                 ret = get_user(ch, ubuf++);
1218                 if (ret)
1219                         goto out;
1220                 read++;
1221                 cnt--;
1222         }
1223
1224         /* We either got finished input or we have to wait for another call. */
1225         if (isspace(ch)) {
1226                 parser->buffer[parser->idx] = 0;
1227                 parser->cont = false;
1228         } else if (parser->idx < parser->size - 1) {
1229                 parser->cont = true;
1230                 parser->buffer[parser->idx++] = ch;
1231         } else {
1232                 ret = -EINVAL;
1233                 goto out;
1234         }
1235
1236         *ppos += read;
1237         ret = read;
1238
1239 out:
1240         return ret;
1241 }
1242
1243 /* TODO add a seq_buf_to_buffer() */
1244 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1245 {
1246         int len;
1247
1248         if (trace_seq_used(s) <= s->seq.readpos)
1249                 return -EBUSY;
1250
1251         len = trace_seq_used(s) - s->seq.readpos;
1252         if (cnt > len)
1253                 cnt = len;
1254         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1255
1256         s->seq.readpos += cnt;
1257         return cnt;
1258 }
1259
1260 unsigned long __read_mostly     tracing_thresh;
1261
1262 #ifdef CONFIG_TRACER_MAX_TRACE
1263 /*
1264  * Copy the new maximum trace into the separate maximum-trace
1265  * structure. (this way the maximum trace is permanently saved,
1266  * for later retrieval via /sys/kernel/debug/tracing/latency_trace)
1267  */
1268 static void
1269 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1270 {
1271         struct trace_buffer *trace_buf = &tr->trace_buffer;
1272         struct trace_buffer *max_buf = &tr->max_buffer;
1273         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1274         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1275
1276         max_buf->cpu = cpu;
1277         max_buf->time_start = data->preempt_timestamp;
1278
1279         max_data->saved_latency = tr->max_latency;
1280         max_data->critical_start = data->critical_start;
1281         max_data->critical_end = data->critical_end;
1282
1283         memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1284         max_data->pid = tsk->pid;
1285         /*
1286          * If tsk == current, then use current_uid(), as that does not use
1287          * RCU. The irq tracer can be called out of RCU scope.
1288          */
1289         if (tsk == current)
1290                 max_data->uid = current_uid();
1291         else
1292                 max_data->uid = task_uid(tsk);
1293
1294         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1295         max_data->policy = tsk->policy;
1296         max_data->rt_priority = tsk->rt_priority;
1297
1298         /* record this tasks comm */
1299         tracing_record_cmdline(tsk);
1300 }
1301
1302 /**
1303  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1304  * @tr: tracer
1305  * @tsk: the task with the latency
1306  * @cpu: The cpu that initiated the trace.
1307  *
1308  * Flip the buffers between the @tr and the max_tr and record information
1309  * about which task was the cause of this latency.
1310  */
1311 void
1312 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1313 {
1314         struct ring_buffer *buf;
1315
1316         if (tr->stop_count)
1317                 return;
1318
1319         WARN_ON_ONCE(!irqs_disabled());
1320
1321         if (!tr->allocated_snapshot) {
1322                 /* Only the nop tracer should hit this when disabling */
1323                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1324                 return;
1325         }
1326
1327         arch_spin_lock(&tr->max_lock);
1328
1329         /* Inherit the recordable setting from trace_buffer */
1330         if (ring_buffer_record_is_set_on(tr->trace_buffer.buffer))
1331                 ring_buffer_record_on(tr->max_buffer.buffer);
1332         else
1333                 ring_buffer_record_off(tr->max_buffer.buffer);
1334
1335         buf = tr->trace_buffer.buffer;
1336         tr->trace_buffer.buffer = tr->max_buffer.buffer;
1337         tr->max_buffer.buffer = buf;
1338
1339         __update_max_tr(tr, tsk, cpu);
1340         arch_spin_unlock(&tr->max_lock);
1341 }
1342
1343 /**
1344  * update_max_tr_single - only copy one trace over, and reset the rest
1345  * @tr - tracer
1346  * @tsk - task with the latency
1347  * @cpu - the cpu of the buffer to copy.
1348  *
1349  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1350  */
1351 void
1352 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1353 {
1354         int ret;
1355
1356         if (tr->stop_count)
1357                 return;
1358
1359         WARN_ON_ONCE(!irqs_disabled());
1360         if (!tr->allocated_snapshot) {
1361                 /* Only the nop tracer should hit this when disabling */
1362                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1363                 return;
1364         }
1365
1366         arch_spin_lock(&tr->max_lock);
1367
1368         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1369
1370         if (ret == -EBUSY) {
1371                 /*
1372                  * We failed to swap the buffer due to a commit taking
1373                  * place on this CPU. We fail to record, but we reset
1374                  * the max trace buffer (no one writes directly to it)
1375                  * and flag that it failed.
1376                  */
1377                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1378                         "Failed to swap buffers due to commit in progress\n");
1379         }
1380
1381         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1382
1383         __update_max_tr(tr, tsk, cpu);
1384         arch_spin_unlock(&tr->max_lock);
1385 }
1386 #endif /* CONFIG_TRACER_MAX_TRACE */
1387
1388 static int wait_on_pipe(struct trace_iterator *iter, bool full)
1389 {
1390         /* Iterators are static, they should be filled or empty */
1391         if (trace_buffer_iter(iter, iter->cpu_file))
1392                 return 0;
1393
1394         return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1395                                 full);
1396 }
1397
1398 #ifdef CONFIG_FTRACE_STARTUP_TEST
1399 static int run_tracer_selftest(struct tracer *type)
1400 {
1401         struct trace_array *tr = &global_trace;
1402         struct tracer *saved_tracer = tr->current_trace;
1403         int ret;
1404
1405         if (!type->selftest || tracing_selftest_disabled)
1406                 return 0;
1407
1408         /*
1409          * Run a selftest on this tracer.
1410          * Here we reset the trace buffer, and set the current
1411          * tracer to be this tracer. The tracer can then run some
1412          * internal tracing to verify that everything is in order.
1413          * If we fail, we do not register this tracer.
1414          */
1415         tracing_reset_online_cpus(&tr->trace_buffer);
1416
1417         tr->current_trace = type;
1418
1419 #ifdef CONFIG_TRACER_MAX_TRACE
1420         if (type->use_max_tr) {
1421                 /* If we expanded the buffers, make sure the max is expanded too */
1422                 if (ring_buffer_expanded)
1423                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1424                                            RING_BUFFER_ALL_CPUS);
1425                 tr->allocated_snapshot = true;
1426         }
1427 #endif
1428
1429         /* the test is responsible for initializing and enabling */
1430         pr_info("Testing tracer %s: ", type->name);
1431         ret = type->selftest(type, tr);
1432         /* the test is responsible for resetting too */
1433         tr->current_trace = saved_tracer;
1434         if (ret) {
1435                 printk(KERN_CONT "FAILED!\n");
1436                 /* Add the warning after printing 'FAILED' */
1437                 WARN_ON(1);
1438                 return -1;
1439         }
1440         /* Only reset on passing, to avoid touching corrupted buffers */
1441         tracing_reset_online_cpus(&tr->trace_buffer);
1442
1443 #ifdef CONFIG_TRACER_MAX_TRACE
1444         if (type->use_max_tr) {
1445                 tr->allocated_snapshot = false;
1446
1447                 /* Shrink the max buffer again */
1448                 if (ring_buffer_expanded)
1449                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1450                                            RING_BUFFER_ALL_CPUS);
1451         }
1452 #endif
1453
1454         printk(KERN_CONT "PASSED\n");
1455         return 0;
1456 }
1457 #else
1458 static inline int run_tracer_selftest(struct tracer *type)
1459 {
1460         return 0;
1461 }
1462 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1463
1464 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1465
1466 static void __init apply_trace_boot_options(void);
1467
1468 /**
1469  * register_tracer - register a tracer with the ftrace system.
1470  * @type - the plugin for the tracer
1471  *
1472  * Register a new plugin tracer.
1473  */
1474 int __init register_tracer(struct tracer *type)
1475 {
1476         struct tracer *t;
1477         int ret = 0;
1478
1479         if (!type->name) {
1480                 pr_info("Tracer must have a name\n");
1481                 return -1;
1482         }
1483
1484         if (strlen(type->name) >= MAX_TRACER_SIZE) {
1485                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1486                 return -1;
1487         }
1488
1489         mutex_lock(&trace_types_lock);
1490
1491         tracing_selftest_running = true;
1492
1493         for (t = trace_types; t; t = t->next) {
1494                 if (strcmp(type->name, t->name) == 0) {
1495                         /* already found */
1496                         pr_info("Tracer %s already registered\n",
1497                                 type->name);
1498                         ret = -1;
1499                         goto out;
1500                 }
1501         }
1502
1503         if (!type->set_flag)
1504                 type->set_flag = &dummy_set_flag;
1505         if (!type->flags) {
1506                 /*allocate a dummy tracer_flags*/
1507                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1508                 if (!type->flags) {
1509                         ret = -ENOMEM;
1510                         goto out;
1511                 }
1512                 type->flags->val = 0;
1513                 type->flags->opts = dummy_tracer_opt;
1514         } else
1515                 if (!type->flags->opts)
1516                         type->flags->opts = dummy_tracer_opt;
1517
1518         /* store the tracer for __set_tracer_option */
1519         type->flags->trace = type;
1520
1521         ret = run_tracer_selftest(type);
1522         if (ret < 0)
1523                 goto out;
1524
1525         type->next = trace_types;
1526         trace_types = type;
1527         add_tracer_options(&global_trace, type);
1528
1529  out:
1530         tracing_selftest_running = false;
1531         mutex_unlock(&trace_types_lock);
1532
1533         if (ret || !default_bootup_tracer)
1534                 goto out_unlock;
1535
1536         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1537                 goto out_unlock;
1538
1539         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1540         /* Do we want this tracer to start on bootup? */
1541         tracing_set_tracer(&global_trace, type->name);
1542         default_bootup_tracer = NULL;
1543
1544         apply_trace_boot_options();
1545
1546         /* disable other selftests, since this will break it. */
1547         tracing_selftest_disabled = true;
1548 #ifdef CONFIG_FTRACE_STARTUP_TEST
1549         printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1550                type->name);
1551 #endif
1552
1553  out_unlock:
1554         return ret;
1555 }
1556
1557 void tracing_reset(struct trace_buffer *buf, int cpu)
1558 {
1559         struct ring_buffer *buffer = buf->buffer;
1560
1561         if (!buffer)
1562                 return;
1563
1564         ring_buffer_record_disable(buffer);
1565
1566         /* Make sure all commits have finished */
1567         synchronize_sched();
1568         ring_buffer_reset_cpu(buffer, cpu);
1569
1570         ring_buffer_record_enable(buffer);
1571 }
1572
1573 void tracing_reset_online_cpus(struct trace_buffer *buf)
1574 {
1575         struct ring_buffer *buffer = buf->buffer;
1576         int cpu;
1577
1578         if (!buffer)
1579                 return;
1580
1581         ring_buffer_record_disable(buffer);
1582
1583         /* Make sure all commits have finished */
1584         synchronize_sched();
1585
1586         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1587
1588         for_each_online_cpu(cpu)
1589                 ring_buffer_reset_cpu(buffer, cpu);
1590
1591         ring_buffer_record_enable(buffer);
1592 }
1593
1594 /* Must have trace_types_lock held */
1595 void tracing_reset_all_online_cpus(void)
1596 {
1597         struct trace_array *tr;
1598
1599         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1600                 tracing_reset_online_cpus(&tr->trace_buffer);
1601 #ifdef CONFIG_TRACER_MAX_TRACE
1602                 tracing_reset_online_cpus(&tr->max_buffer);
1603 #endif
1604         }
1605 }
1606
1607 #define SAVED_CMDLINES_DEFAULT 128
1608 #define NO_CMDLINE_MAP UINT_MAX
1609 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1610 struct saved_cmdlines_buffer {
1611         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1612         unsigned *map_cmdline_to_pid;
1613         unsigned cmdline_num;
1614         int cmdline_idx;
1615         char *saved_cmdlines;
1616 };
1617 static struct saved_cmdlines_buffer *savedcmd;
1618
1619 /* temporary disable recording */
1620 static atomic_t trace_record_cmdline_disabled __read_mostly;
1621
1622 static inline char *get_saved_cmdlines(int idx)
1623 {
1624         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1625 }
1626
1627 static inline void set_cmdline(int idx, const char *cmdline)
1628 {
1629         memcpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1630 }
1631
1632 static int allocate_cmdlines_buffer(unsigned int val,
1633                                     struct saved_cmdlines_buffer *s)
1634 {
1635         s->map_cmdline_to_pid = kmalloc(val * sizeof(*s->map_cmdline_to_pid),
1636                                         GFP_KERNEL);
1637         if (!s->map_cmdline_to_pid)
1638                 return -ENOMEM;
1639
1640         s->saved_cmdlines = kmalloc(val * TASK_COMM_LEN, GFP_KERNEL);
1641         if (!s->saved_cmdlines) {
1642                 kfree(s->map_cmdline_to_pid);
1643                 return -ENOMEM;
1644         }
1645
1646         s->cmdline_idx = 0;
1647         s->cmdline_num = val;
1648         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
1649                sizeof(s->map_pid_to_cmdline));
1650         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
1651                val * sizeof(*s->map_cmdline_to_pid));
1652
1653         return 0;
1654 }
1655
1656 static int trace_create_savedcmd(void)
1657 {
1658         int ret;
1659
1660         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
1661         if (!savedcmd)
1662                 return -ENOMEM;
1663
1664         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
1665         if (ret < 0) {
1666                 kfree(savedcmd);
1667                 savedcmd = NULL;
1668                 return -ENOMEM;
1669         }
1670
1671         return 0;
1672 }
1673
1674 int is_tracing_stopped(void)
1675 {
1676         return global_trace.stop_count;
1677 }
1678
1679 /**
1680  * tracing_start - quick start of the tracer
1681  *
1682  * If tracing is enabled but was stopped by tracing_stop,
1683  * this will start the tracer back up.
1684  */
1685 void tracing_start(void)
1686 {
1687         struct ring_buffer *buffer;
1688         unsigned long flags;
1689
1690         if (tracing_disabled)
1691                 return;
1692
1693         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1694         if (--global_trace.stop_count) {
1695                 if (global_trace.stop_count < 0) {
1696                         /* Someone screwed up their debugging */
1697                         WARN_ON_ONCE(1);
1698                         global_trace.stop_count = 0;
1699                 }
1700                 goto out;
1701         }
1702
1703         /* Prevent the buffers from switching */
1704         arch_spin_lock(&global_trace.max_lock);
1705
1706         buffer = global_trace.trace_buffer.buffer;
1707         if (buffer)
1708                 ring_buffer_record_enable(buffer);
1709
1710 #ifdef CONFIG_TRACER_MAX_TRACE
1711         buffer = global_trace.max_buffer.buffer;
1712         if (buffer)
1713                 ring_buffer_record_enable(buffer);
1714 #endif
1715
1716         arch_spin_unlock(&global_trace.max_lock);
1717
1718  out:
1719         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1720 }
1721
1722 static void tracing_start_tr(struct trace_array *tr)
1723 {
1724         struct ring_buffer *buffer;
1725         unsigned long flags;
1726
1727         if (tracing_disabled)
1728                 return;
1729
1730         /* If global, we need to also start the max tracer */
1731         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1732                 return tracing_start();
1733
1734         raw_spin_lock_irqsave(&tr->start_lock, flags);
1735
1736         if (--tr->stop_count) {
1737                 if (tr->stop_count < 0) {
1738                         /* Someone screwed up their debugging */
1739                         WARN_ON_ONCE(1);
1740                         tr->stop_count = 0;
1741                 }
1742                 goto out;
1743         }
1744
1745         buffer = tr->trace_buffer.buffer;
1746         if (buffer)
1747                 ring_buffer_record_enable(buffer);
1748
1749  out:
1750         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1751 }
1752
1753 /**
1754  * tracing_stop - quick stop of the tracer
1755  *
1756  * Light weight way to stop tracing. Use in conjunction with
1757  * tracing_start.
1758  */
1759 void tracing_stop(void)
1760 {
1761         struct ring_buffer *buffer;
1762         unsigned long flags;
1763
1764         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1765         if (global_trace.stop_count++)
1766                 goto out;
1767
1768         /* Prevent the buffers from switching */
1769         arch_spin_lock(&global_trace.max_lock);
1770
1771         buffer = global_trace.trace_buffer.buffer;
1772         if (buffer)
1773                 ring_buffer_record_disable(buffer);
1774
1775 #ifdef CONFIG_TRACER_MAX_TRACE
1776         buffer = global_trace.max_buffer.buffer;
1777         if (buffer)
1778                 ring_buffer_record_disable(buffer);
1779 #endif
1780
1781         arch_spin_unlock(&global_trace.max_lock);
1782
1783  out:
1784         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1785 }
1786
1787 static void tracing_stop_tr(struct trace_array *tr)
1788 {
1789         struct ring_buffer *buffer;
1790         unsigned long flags;
1791
1792         /* If global, we need to also stop the max tracer */
1793         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1794                 return tracing_stop();
1795
1796         raw_spin_lock_irqsave(&tr->start_lock, flags);
1797         if (tr->stop_count++)
1798                 goto out;
1799
1800         buffer = tr->trace_buffer.buffer;
1801         if (buffer)
1802                 ring_buffer_record_disable(buffer);
1803
1804  out:
1805         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1806 }
1807
1808 void trace_stop_cmdline_recording(void);
1809
1810 static int trace_save_cmdline(struct task_struct *tsk)
1811 {
1812         unsigned pid, idx;
1813
1814         if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT))
1815                 return 0;
1816
1817         /*
1818          * It's not the end of the world if we don't get
1819          * the lock, but we also don't want to spin
1820          * nor do we want to disable interrupts,
1821          * so if we miss here, then better luck next time.
1822          */
1823         if (!arch_spin_trylock(&trace_cmdline_lock))
1824                 return 0;
1825
1826         idx = savedcmd->map_pid_to_cmdline[tsk->pid];
1827         if (idx == NO_CMDLINE_MAP) {
1828                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
1829
1830                 /*
1831                  * Check whether the cmdline buffer at idx has a pid
1832                  * mapped. We are going to overwrite that entry so we
1833                  * need to clear the map_pid_to_cmdline. Otherwise we
1834                  * would read the new comm for the old pid.
1835                  */
1836                 pid = savedcmd->map_cmdline_to_pid[idx];
1837                 if (pid != NO_CMDLINE_MAP)
1838                         savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
1839
1840                 savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
1841                 savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
1842
1843                 savedcmd->cmdline_idx = idx;
1844         }
1845
1846         set_cmdline(idx, tsk->comm);
1847
1848         arch_spin_unlock(&trace_cmdline_lock);
1849
1850         return 1;
1851 }
1852
1853 static void __trace_find_cmdline(int pid, char comm[])
1854 {
1855         unsigned map;
1856
1857         if (!pid) {
1858                 strcpy(comm, "<idle>");
1859                 return;
1860         }
1861
1862         if (WARN_ON_ONCE(pid < 0)) {
1863                 strcpy(comm, "<XXX>");
1864                 return;
1865         }
1866
1867         if (pid > PID_MAX_DEFAULT) {
1868                 strcpy(comm, "<...>");
1869                 return;
1870         }
1871
1872         map = savedcmd->map_pid_to_cmdline[pid];
1873         if (map != NO_CMDLINE_MAP)
1874                 strcpy(comm, get_saved_cmdlines(map));
1875         else
1876                 strcpy(comm, "<...>");
1877 }
1878
1879 void trace_find_cmdline(int pid, char comm[])
1880 {
1881         preempt_disable();
1882         arch_spin_lock(&trace_cmdline_lock);
1883
1884         __trace_find_cmdline(pid, comm);
1885
1886         arch_spin_unlock(&trace_cmdline_lock);
1887         preempt_enable();
1888 }
1889
1890 void tracing_record_cmdline(struct task_struct *tsk)
1891 {
1892         if (atomic_read(&trace_record_cmdline_disabled) || !tracing_is_on())
1893                 return;
1894
1895         if (!__this_cpu_read(trace_cmdline_save))
1896                 return;
1897
1898         if (trace_save_cmdline(tsk))
1899                 __this_cpu_write(trace_cmdline_save, false);
1900 }
1901
1902 void
1903 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
1904                              int pc)
1905 {
1906         struct task_struct *tsk = current;
1907
1908         entry->preempt_count            = pc & 0xff;
1909         entry->pid                      = (tsk) ? tsk->pid : 0;
1910         entry->flags =
1911 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
1912                 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
1913 #else
1914                 TRACE_FLAG_IRQS_NOSUPPORT |
1915 #endif
1916                 ((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
1917                 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
1918                 ((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
1919                 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
1920                 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
1921 }
1922 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
1923
1924 static __always_inline void
1925 trace_event_setup(struct ring_buffer_event *event,
1926                   int type, unsigned long flags, int pc)
1927 {
1928         struct trace_entry *ent = ring_buffer_event_data(event);
1929
1930         tracing_generic_entry_update(ent, flags, pc);
1931         ent->type = type;
1932 }
1933
1934 struct ring_buffer_event *
1935 trace_buffer_lock_reserve(struct ring_buffer *buffer,
1936                           int type,
1937                           unsigned long len,
1938                           unsigned long flags, int pc)
1939 {
1940         struct ring_buffer_event *event;
1941
1942         event = ring_buffer_lock_reserve(buffer, len);
1943         if (event != NULL)
1944                 trace_event_setup(event, type, flags, pc);
1945
1946         return event;
1947 }
1948
1949 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
1950 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
1951 static int trace_buffered_event_ref;
1952
1953 /**
1954  * trace_buffered_event_enable - enable buffering events
1955  *
1956  * When events are being filtered, it is quicker to use a temporary
1957  * buffer to write the event data into if there's a likely chance
1958  * that it will not be committed. The discard of the ring buffer
1959  * is not as fast as committing, and is much slower than copying
1960  * a commit.
1961  *
1962  * When an event is to be filtered, allocate per cpu buffers to
1963  * write the event data into, and if the event is filtered and discarded
1964  * it is simply dropped, otherwise, the entire data is to be committed
1965  * in one shot.
1966  */
1967 void trace_buffered_event_enable(void)
1968 {
1969         struct ring_buffer_event *event;
1970         struct page *page;
1971         int cpu;
1972
1973         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
1974
1975         if (trace_buffered_event_ref++)
1976                 return;
1977
1978         for_each_tracing_cpu(cpu) {
1979                 page = alloc_pages_node(cpu_to_node(cpu),
1980                                         GFP_KERNEL | __GFP_NORETRY, 0);
1981                 if (!page)
1982                         goto failed;
1983
1984                 event = page_address(page);
1985                 memset(event, 0, sizeof(*event));
1986
1987                 per_cpu(trace_buffered_event, cpu) = event;
1988
1989                 preempt_disable();
1990                 if (cpu == smp_processor_id() &&
1991                     this_cpu_read(trace_buffered_event) !=
1992                     per_cpu(trace_buffered_event, cpu))
1993                         WARN_ON_ONCE(1);
1994                 preempt_enable();
1995         }
1996
1997         return;
1998  failed:
1999         trace_buffered_event_disable();
2000 }
2001
2002 static void enable_trace_buffered_event(void *data)
2003 {
2004         /* Probably not needed, but do it anyway */
2005         smp_rmb();
2006         this_cpu_dec(trace_buffered_event_cnt);
2007 }
2008
2009 static void disable_trace_buffered_event(void *data)
2010 {
2011         this_cpu_inc(trace_buffered_event_cnt);
2012 }
2013
2014 /**
2015  * trace_buffered_event_disable - disable buffering events
2016  *
2017  * When a filter is removed, it is faster to not use the buffered
2018  * events, and to commit directly into the ring buffer. Free up
2019  * the temp buffers when there are no more users. This requires
2020  * special synchronization with current events.
2021  */
2022 void trace_buffered_event_disable(void)
2023 {
2024         int cpu;
2025
2026         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2027
2028         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2029                 return;
2030
2031         if (--trace_buffered_event_ref)
2032                 return;
2033
2034         preempt_disable();
2035         /* For each CPU, set the buffer as used. */
2036         smp_call_function_many(tracing_buffer_mask,
2037                                disable_trace_buffered_event, NULL, 1);
2038         preempt_enable();
2039
2040         /* Wait for all current users to finish */
2041         synchronize_sched();
2042
2043         for_each_tracing_cpu(cpu) {
2044                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2045                 per_cpu(trace_buffered_event, cpu) = NULL;
2046         }
2047         /*
2048          * Make sure trace_buffered_event is NULL before clearing
2049          * trace_buffered_event_cnt.
2050          */
2051         smp_wmb();
2052
2053         preempt_disable();
2054         /* Do the work on each cpu */
2055         smp_call_function_many(tracing_buffer_mask,
2056                                enable_trace_buffered_event, NULL, 1);
2057         preempt_enable();
2058 }
2059
2060 void
2061 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
2062 {
2063         __this_cpu_write(trace_cmdline_save, true);
2064
2065         /* If this is the temp buffer, we need to commit fully */
2066         if (this_cpu_read(trace_buffered_event) == event) {
2067                 /* Length is in event->array[0] */
2068                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
2069                 /* Release the temp buffer */
2070                 this_cpu_dec(trace_buffered_event_cnt);
2071         } else
2072                 ring_buffer_unlock_commit(buffer, event);
2073 }
2074
2075 static struct ring_buffer *temp_buffer;
2076
2077 struct ring_buffer_event *
2078 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
2079                           struct trace_event_file *trace_file,
2080                           int type, unsigned long len,
2081                           unsigned long flags, int pc)
2082 {
2083         struct ring_buffer_event *entry;
2084         int val;
2085
2086         *current_rb = trace_file->tr->trace_buffer.buffer;
2087
2088         if ((trace_file->flags &
2089              (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2090             (entry = this_cpu_read(trace_buffered_event))) {
2091                 /* Try to use the per cpu buffer first */
2092                 val = this_cpu_inc_return(trace_buffered_event_cnt);
2093                 if (val == 1) {
2094                         trace_event_setup(entry, type, flags, pc);
2095                         entry->array[0] = len;
2096                         return entry;
2097                 }
2098                 this_cpu_dec(trace_buffered_event_cnt);
2099         }
2100
2101         entry = trace_buffer_lock_reserve(*current_rb,
2102                                          type, len, flags, pc);
2103         /*
2104          * If tracing is off, but we have triggers enabled
2105          * we still need to look at the event data. Use the temp_buffer
2106          * to store the trace event for the tigger to use. It's recusive
2107          * safe and will not be recorded anywhere.
2108          */
2109         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2110                 *current_rb = temp_buffer;
2111                 entry = trace_buffer_lock_reserve(*current_rb,
2112                                                   type, len, flags, pc);
2113         }
2114         return entry;
2115 }
2116 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2117
2118 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2119                                      struct ring_buffer *buffer,
2120                                      struct ring_buffer_event *event,
2121                                      unsigned long flags, int pc,
2122                                      struct pt_regs *regs)
2123 {
2124         __buffer_unlock_commit(buffer, event);
2125
2126         /*
2127          * If regs is not set, then skip the following callers:
2128          *   trace_buffer_unlock_commit_regs
2129          *   event_trigger_unlock_commit
2130          *   trace_event_buffer_commit
2131          *   trace_event_raw_event_sched_switch
2132          * Note, we can still get here via blktrace, wakeup tracer
2133          * and mmiotrace, but that's ok if they lose a function or
2134          * two. They are that meaningful.
2135          */
2136         ftrace_trace_stack(tr, buffer, flags, regs ? 0 : 4, pc, regs);
2137         ftrace_trace_userstack(buffer, flags, pc);
2138 }
2139
2140 void
2141 trace_function(struct trace_array *tr,
2142                unsigned long ip, unsigned long parent_ip, unsigned long flags,
2143                int pc)
2144 {
2145         struct trace_event_call *call = &event_function;
2146         struct ring_buffer *buffer = tr->trace_buffer.buffer;
2147         struct ring_buffer_event *event;
2148         struct ftrace_entry *entry;
2149
2150         event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2151                                           flags, pc);
2152         if (!event)
2153                 return;
2154         entry   = ring_buffer_event_data(event);
2155         entry->ip                       = ip;
2156         entry->parent_ip                = parent_ip;
2157
2158         if (!call_filter_check_discard(call, entry, buffer, event))
2159                 __buffer_unlock_commit(buffer, event);
2160 }
2161
2162 #ifdef CONFIG_STACKTRACE
2163
2164 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
2165 struct ftrace_stack {
2166         unsigned long           calls[FTRACE_STACK_MAX_ENTRIES];
2167 };
2168
2169 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
2170 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2171
2172 static void __ftrace_trace_stack(struct ring_buffer *buffer,
2173                                  unsigned long flags,
2174                                  int skip, int pc, struct pt_regs *regs)
2175 {
2176         struct trace_event_call *call = &event_kernel_stack;
2177         struct ring_buffer_event *event;
2178         struct stack_entry *entry;
2179         struct stack_trace trace;
2180         int use_stack;
2181         int size = FTRACE_STACK_ENTRIES;
2182
2183         trace.nr_entries        = 0;
2184         trace.skip              = skip;
2185
2186         /*
2187          * Add two, for this function and the call to save_stack_trace()
2188          * If regs is set, then these functions will not be in the way.
2189          */
2190         if (!regs)
2191                 trace.skip += 2;
2192
2193         /*
2194          * Since events can happen in NMIs there's no safe way to
2195          * use the per cpu ftrace_stacks. We reserve it and if an interrupt
2196          * or NMI comes in, it will just have to use the default
2197          * FTRACE_STACK_SIZE.
2198          */
2199         preempt_disable_notrace();
2200
2201         use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
2202         /*
2203          * We don't need any atomic variables, just a barrier.
2204          * If an interrupt comes in, we don't care, because it would
2205          * have exited and put the counter back to what we want.
2206          * We just need a barrier to keep gcc from moving things
2207          * around.
2208          */
2209         barrier();
2210         if (use_stack == 1) {
2211                 trace.entries           = this_cpu_ptr(ftrace_stack.calls);
2212                 trace.max_entries       = FTRACE_STACK_MAX_ENTRIES;
2213
2214                 if (regs)
2215                         save_stack_trace_regs(regs, &trace);
2216                 else
2217                         save_stack_trace(&trace);
2218
2219                 if (trace.nr_entries > size)
2220                         size = trace.nr_entries;
2221         } else
2222                 /* From now on, use_stack is a boolean */
2223                 use_stack = 0;
2224
2225         size *= sizeof(unsigned long);
2226
2227         event = trace_buffer_lock_reserve(buffer, TRACE_STACK,
2228                                           sizeof(*entry) + size, flags, pc);
2229         if (!event)
2230                 goto out;
2231         entry = ring_buffer_event_data(event);
2232
2233         memset(&entry->caller, 0, size);
2234
2235         if (use_stack)
2236                 memcpy(&entry->caller, trace.entries,
2237                        trace.nr_entries * sizeof(unsigned long));
2238         else {
2239                 trace.max_entries       = FTRACE_STACK_ENTRIES;
2240                 trace.entries           = entry->caller;
2241                 if (regs)
2242                         save_stack_trace_regs(regs, &trace);
2243                 else
2244                         save_stack_trace(&trace);
2245         }
2246
2247         entry->size = trace.nr_entries;
2248
2249         if (!call_filter_check_discard(call, entry, buffer, event))
2250                 __buffer_unlock_commit(buffer, event);
2251
2252  out:
2253         /* Again, don't let gcc optimize things here */
2254         barrier();
2255         __this_cpu_dec(ftrace_stack_reserve);
2256         preempt_enable_notrace();
2257
2258 }
2259
2260 static inline void ftrace_trace_stack(struct trace_array *tr,
2261                                       struct ring_buffer *buffer,
2262                                       unsigned long flags,
2263                                       int skip, int pc, struct pt_regs *regs)
2264 {
2265         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2266                 return;
2267
2268         __ftrace_trace_stack(buffer, flags, skip, pc, regs);
2269 }
2270
2271 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
2272                    int pc)
2273 {
2274         __ftrace_trace_stack(tr->trace_buffer.buffer, flags, skip, pc, NULL);
2275 }
2276
2277 /**
2278  * trace_dump_stack - record a stack back trace in the trace buffer
2279  * @skip: Number of functions to skip (helper handlers)
2280  */
2281 void trace_dump_stack(int skip)
2282 {
2283         unsigned long flags;
2284
2285         if (tracing_disabled || tracing_selftest_running)
2286                 return;
2287
2288         local_save_flags(flags);
2289
2290         /*
2291          * Skip 3 more, seems to get us at the caller of
2292          * this function.
2293          */
2294         skip += 3;
2295         __ftrace_trace_stack(global_trace.trace_buffer.buffer,
2296                              flags, skip, preempt_count(), NULL);
2297 }
2298
2299 static DEFINE_PER_CPU(int, user_stack_count);
2300
2301 void
2302 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
2303 {
2304         struct trace_event_call *call = &event_user_stack;
2305         struct ring_buffer_event *event;
2306         struct userstack_entry *entry;
2307         struct stack_trace trace;
2308
2309         if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
2310                 return;
2311
2312         /*
2313          * NMIs can not handle page faults, even with fix ups.
2314          * The save user stack can (and often does) fault.
2315          */
2316         if (unlikely(in_nmi()))
2317                 return;
2318
2319         /*
2320          * prevent recursion, since the user stack tracing may
2321          * trigger other kernel events.
2322          */
2323         preempt_disable();
2324         if (__this_cpu_read(user_stack_count))
2325                 goto out;
2326
2327         __this_cpu_inc(user_stack_count);
2328
2329         event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
2330                                           sizeof(*entry), flags, pc);
2331         if (!event)
2332                 goto out_drop_count;
2333         entry   = ring_buffer_event_data(event);
2334
2335         entry->tgid             = current->tgid;
2336         memset(&entry->caller, 0, sizeof(entry->caller));
2337
2338         trace.nr_entries        = 0;
2339         trace.max_entries       = FTRACE_STACK_ENTRIES;
2340         trace.skip              = 0;
2341         trace.entries           = entry->caller;
2342
2343         save_stack_trace_user(&trace);
2344         if (!call_filter_check_discard(call, entry, buffer, event))
2345                 __buffer_unlock_commit(buffer, event);
2346
2347  out_drop_count:
2348         __this_cpu_dec(user_stack_count);
2349  out:
2350         preempt_enable();
2351 }
2352
2353 #ifdef UNUSED
2354 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
2355 {
2356         ftrace_trace_userstack(tr, flags, preempt_count());
2357 }
2358 #endif /* UNUSED */
2359
2360 #endif /* CONFIG_STACKTRACE */
2361
2362 /* created for use with alloc_percpu */
2363 struct trace_buffer_struct {
2364         int nesting;
2365         char buffer[4][TRACE_BUF_SIZE];
2366 };
2367
2368 static struct trace_buffer_struct *trace_percpu_buffer;
2369
2370 /*
2371  * Thise allows for lockless recording.  If we're nested too deeply, then
2372  * this returns NULL.
2373  */
2374 static char *get_trace_buf(void)
2375 {
2376         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
2377
2378         if (!buffer || buffer->nesting >= 4)
2379                 return NULL;
2380
2381         buffer->nesting++;
2382
2383         /* Interrupts must see nesting incremented before we use the buffer */
2384         barrier();
2385         return &buffer->buffer[buffer->nesting][0];
2386 }
2387
2388 static void put_trace_buf(void)
2389 {
2390         /* Don't let the decrement of nesting leak before this */
2391         barrier();
2392         this_cpu_dec(trace_percpu_buffer->nesting);
2393 }
2394
2395 static int alloc_percpu_trace_buffer(void)
2396 {
2397         struct trace_buffer_struct *buffers;
2398
2399         buffers = alloc_percpu(struct trace_buffer_struct);
2400         if (WARN(!buffers, "Could not allocate percpu trace_printk buffer"))
2401                 return -ENOMEM;
2402
2403         trace_percpu_buffer = buffers;
2404         return 0;
2405 }
2406
2407 static int buffers_allocated;
2408
2409 void trace_printk_init_buffers(void)
2410 {
2411         if (buffers_allocated)
2412                 return;
2413
2414         if (alloc_percpu_trace_buffer())
2415                 return;
2416
2417         /* trace_printk() is for debug use only. Don't use it in production. */
2418
2419         pr_warn("\n");
2420         pr_warn("**********************************************************\n");
2421         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2422         pr_warn("**                                                      **\n");
2423         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
2424         pr_warn("**                                                      **\n");
2425         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
2426         pr_warn("** unsafe for production use.                           **\n");
2427         pr_warn("**                                                      **\n");
2428         pr_warn("** If you see this message and you are not debugging    **\n");
2429         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
2430         pr_warn("**                                                      **\n");
2431         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2432         pr_warn("**********************************************************\n");
2433
2434         /* Expand the buffers to set size */
2435         tracing_update_buffers();
2436
2437         buffers_allocated = 1;
2438
2439         /*
2440          * trace_printk_init_buffers() can be called by modules.
2441          * If that happens, then we need to start cmdline recording
2442          * directly here. If the global_trace.buffer is already
2443          * allocated here, then this was called by module code.
2444          */
2445         if (global_trace.trace_buffer.buffer)
2446                 tracing_start_cmdline_record();
2447 }
2448
2449 void trace_printk_start_comm(void)
2450 {
2451         /* Start tracing comms if trace printk is set */
2452         if (!buffers_allocated)
2453                 return;
2454         tracing_start_cmdline_record();
2455 }
2456
2457 static void trace_printk_start_stop_comm(int enabled)
2458 {
2459         if (!buffers_allocated)
2460                 return;
2461
2462         if (enabled)
2463                 tracing_start_cmdline_record();
2464         else
2465                 tracing_stop_cmdline_record();
2466 }
2467
2468 /**
2469  * trace_vbprintk - write binary msg to tracing buffer
2470  *
2471  */
2472 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
2473 {
2474         struct trace_event_call *call = &event_bprint;
2475         struct ring_buffer_event *event;
2476         struct ring_buffer *buffer;
2477         struct trace_array *tr = &global_trace;
2478         struct bprint_entry *entry;
2479         unsigned long flags;
2480         char *tbuffer;
2481         int len = 0, size, pc;
2482
2483         if (unlikely(tracing_selftest_running || tracing_disabled))
2484                 return 0;
2485
2486         /* Don't pollute graph traces with trace_vprintk internals */
2487         pause_graph_tracing();
2488
2489         pc = preempt_count();
2490         preempt_disable_notrace();
2491
2492         tbuffer = get_trace_buf();
2493         if (!tbuffer) {
2494                 len = 0;
2495                 goto out_nobuffer;
2496         }
2497
2498         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
2499
2500         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
2501                 goto out;
2502
2503         local_save_flags(flags);
2504         size = sizeof(*entry) + sizeof(u32) * len;
2505         buffer = tr->trace_buffer.buffer;
2506         event = trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
2507                                           flags, pc);
2508         if (!event)
2509                 goto out;
2510         entry = ring_buffer_event_data(event);
2511         entry->ip                       = ip;
2512         entry->fmt                      = fmt;
2513
2514         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
2515         if (!call_filter_check_discard(call, entry, buffer, event)) {
2516                 __buffer_unlock_commit(buffer, event);
2517                 ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
2518         }
2519
2520 out:
2521         put_trace_buf();
2522
2523 out_nobuffer:
2524         preempt_enable_notrace();
2525         unpause_graph_tracing();
2526
2527         return len;
2528 }
2529 EXPORT_SYMBOL_GPL(trace_vbprintk);
2530
2531 __printf(3, 0)
2532 static int
2533 __trace_array_vprintk(struct ring_buffer *buffer,
2534                       unsigned long ip, const char *fmt, va_list args)
2535 {
2536         struct trace_event_call *call = &event_print;
2537         struct ring_buffer_event *event;
2538         int len = 0, size, pc;
2539         struct print_entry *entry;
2540         unsigned long flags;
2541         char *tbuffer;
2542
2543         if (tracing_disabled || tracing_selftest_running)
2544                 return 0;
2545
2546         /* Don't pollute graph traces with trace_vprintk internals */
2547         pause_graph_tracing();
2548
2549         pc = preempt_count();
2550         preempt_disable_notrace();
2551
2552
2553         tbuffer = get_trace_buf();
2554         if (!tbuffer) {
2555                 len = 0;
2556                 goto out_nobuffer;
2557         }
2558
2559         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
2560
2561         local_save_flags(flags);
2562         size = sizeof(*entry) + len + 1;
2563         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
2564                                           flags, pc);
2565         if (!event)
2566                 goto out;
2567         entry = ring_buffer_event_data(event);
2568         entry->ip = ip;
2569
2570         memcpy(&entry->buf, tbuffer, len + 1);
2571         if (!call_filter_check_discard(call, entry, buffer, event)) {
2572                 __buffer_unlock_commit(buffer, event);
2573                 ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
2574         }
2575
2576 out:
2577         put_trace_buf();
2578
2579 out_nobuffer:
2580         preempt_enable_notrace();
2581         unpause_graph_tracing();
2582
2583         return len;
2584 }
2585
2586 __printf(3, 0)
2587 int trace_array_vprintk(struct trace_array *tr,
2588                         unsigned long ip, const char *fmt, va_list args)
2589 {
2590         return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
2591 }
2592
2593 __printf(3, 0)
2594 int trace_array_printk(struct trace_array *tr,
2595                        unsigned long ip, const char *fmt, ...)
2596 {
2597         int ret;
2598         va_list ap;
2599
2600         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
2601                 return 0;
2602
2603         va_start(ap, fmt);
2604         ret = trace_array_vprintk(tr, ip, fmt, ap);
2605         va_end(ap);
2606         return ret;
2607 }
2608
2609 __printf(3, 4)
2610 int trace_array_printk_buf(struct ring_buffer *buffer,
2611                            unsigned long ip, const char *fmt, ...)
2612 {
2613         int ret;
2614         va_list ap;
2615
2616         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
2617                 return 0;
2618
2619         va_start(ap, fmt);
2620         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
2621         va_end(ap);
2622         return ret;
2623 }
2624
2625 __printf(2, 0)
2626 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
2627 {
2628         return trace_array_vprintk(&global_trace, ip, fmt, args);
2629 }
2630 EXPORT_SYMBOL_GPL(trace_vprintk);
2631
2632 static void trace_iterator_increment(struct trace_iterator *iter)
2633 {
2634         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
2635
2636         iter->idx++;
2637         if (buf_iter)
2638                 ring_buffer_read(buf_iter, NULL);
2639 }
2640
2641 static struct trace_entry *
2642 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
2643                 unsigned long *lost_events)
2644 {
2645         struct ring_buffer_event *event;
2646         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
2647
2648         if (buf_iter)
2649                 event = ring_buffer_iter_peek(buf_iter, ts);
2650         else
2651                 event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
2652                                          lost_events);
2653
2654         if (event) {
2655                 iter->ent_size = ring_buffer_event_length(event);
2656                 return ring_buffer_event_data(event);
2657         }
2658         iter->ent_size = 0;
2659         return NULL;
2660 }
2661
2662 static struct trace_entry *
2663 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
2664                   unsigned long *missing_events, u64 *ent_ts)
2665 {
2666         struct ring_buffer *buffer = iter->trace_buffer->buffer;
2667         struct trace_entry *ent, *next = NULL;
2668         unsigned long lost_events = 0, next_lost = 0;
2669         int cpu_file = iter->cpu_file;
2670         u64 next_ts = 0, ts;
2671         int next_cpu = -1;
2672         int next_size = 0;
2673         int cpu;
2674
2675         /*
2676          * If we are in a per_cpu trace file, don't bother by iterating over
2677          * all cpu and peek directly.
2678          */
2679         if (cpu_file > RING_BUFFER_ALL_CPUS) {
2680                 if (ring_buffer_empty_cpu(buffer, cpu_file))
2681                         return NULL;
2682                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
2683                 if (ent_cpu)
2684                         *ent_cpu = cpu_file;
2685
2686                 return ent;
2687         }
2688
2689         for_each_tracing_cpu(cpu) {
2690
2691                 if (ring_buffer_empty_cpu(buffer, cpu))
2692                         continue;
2693
2694                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
2695
2696                 /*
2697                  * Pick the entry with the smallest timestamp:
2698                  */
2699                 if (ent && (!next || ts < next_ts)) {
2700                         next = ent;
2701                         next_cpu = cpu;
2702                         next_ts = ts;
2703                         next_lost = lost_events;
2704                         next_size = iter->ent_size;
2705                 }
2706         }
2707
2708         iter->ent_size = next_size;
2709
2710         if (ent_cpu)
2711                 *ent_cpu = next_cpu;
2712
2713         if (ent_ts)
2714                 *ent_ts = next_ts;
2715
2716         if (missing_events)
2717                 *missing_events = next_lost;
2718
2719         return next;
2720 }
2721
2722 /* Find the next real entry, without updating the iterator itself */
2723 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
2724                                           int *ent_cpu, u64 *ent_ts)
2725 {
2726         return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
2727 }
2728
2729 /* Find the next real entry, and increment the iterator to the next entry */
2730 void *trace_find_next_entry_inc(struct trace_iterator *iter)
2731 {
2732         iter->ent = __find_next_entry(iter, &iter->cpu,
2733                                       &iter->lost_events, &iter->ts);
2734
2735         if (iter->ent)
2736                 trace_iterator_increment(iter);
2737
2738         return iter->ent ? iter : NULL;
2739 }
2740
2741 static void trace_consume(struct trace_iterator *iter)
2742 {
2743         ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
2744                             &iter->lost_events);
2745 }
2746
2747 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
2748 {
2749         struct trace_iterator *iter = m->private;
2750         int i = (int)*pos;
2751         void *ent;
2752
2753         WARN_ON_ONCE(iter->leftover);
2754
2755         (*pos)++;
2756
2757         /* can't go backwards */
2758         if (iter->idx > i)
2759                 return NULL;
2760
2761         if (iter->idx < 0)
2762                 ent = trace_find_next_entry_inc(iter);
2763         else
2764                 ent = iter;
2765
2766         while (ent && iter->idx < i)
2767                 ent = trace_find_next_entry_inc(iter);
2768
2769         iter->pos = *pos;
2770
2771         return ent;
2772 }
2773
2774 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
2775 {
2776         struct ring_buffer_event *event;
2777         struct ring_buffer_iter *buf_iter;
2778         unsigned long entries = 0;
2779         u64 ts;
2780
2781         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
2782
2783         buf_iter = trace_buffer_iter(iter, cpu);
2784         if (!buf_iter)
2785                 return;
2786
2787         ring_buffer_iter_reset(buf_iter);
2788
2789         /*
2790          * We could have the case with the max latency tracers
2791          * that a reset never took place on a cpu. This is evident
2792          * by the timestamp being before the start of the buffer.
2793          */
2794         while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
2795                 if (ts >= iter->trace_buffer->time_start)
2796                         break;
2797                 entries++;
2798                 ring_buffer_read(buf_iter, NULL);
2799         }
2800
2801         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
2802 }
2803
2804 /*
2805  * The current tracer is copied to avoid a global locking
2806  * all around.
2807  */
2808 static void *s_start(struct seq_file *m, loff_t *pos)
2809 {
2810         struct trace_iterator *iter = m->private;
2811         struct trace_array *tr = iter->tr;
2812         int cpu_file = iter->cpu_file;
2813         void *p = NULL;
2814         loff_t l = 0;
2815         int cpu;
2816
2817         /*
2818          * copy the tracer to avoid using a global lock all around.
2819          * iter->trace is a copy of current_trace, the pointer to the
2820          * name may be used instead of a strcmp(), as iter->trace->name
2821          * will point to the same string as current_trace->name.
2822          */
2823         mutex_lock(&trace_types_lock);
2824         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
2825                 *iter->trace = *tr->current_trace;
2826         mutex_unlock(&trace_types_lock);
2827
2828 #ifdef CONFIG_TRACER_MAX_TRACE
2829         if (iter->snapshot && iter->trace->use_max_tr)
2830                 return ERR_PTR(-EBUSY);
2831 #endif
2832
2833         if (!iter->snapshot)
2834                 atomic_inc(&trace_record_cmdline_disabled);
2835
2836         if (*pos != iter->pos) {
2837                 iter->ent = NULL;
2838                 iter->cpu = 0;
2839                 iter->idx = -1;
2840
2841                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
2842                         for_each_tracing_cpu(cpu)
2843                                 tracing_iter_reset(iter, cpu);
2844                 } else
2845                         tracing_iter_reset(iter, cpu_file);
2846
2847                 iter->leftover = 0;
2848                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
2849                         ;
2850
2851         } else {
2852                 /*
2853                  * If we overflowed the seq_file before, then we want
2854                  * to just reuse the trace_seq buffer again.
2855                  */
2856                 if (iter->leftover)
2857                         p = iter;
2858                 else {
2859                         l = *pos - 1;
2860                         p = s_next(m, p, &l);
2861                 }
2862         }
2863
2864         trace_event_read_lock();
2865         trace_access_lock(cpu_file);
2866         return p;
2867 }
2868
2869 static void s_stop(struct seq_file *m, void *p)
2870 {
2871         struct trace_iterator *iter = m->private;
2872
2873 #ifdef CONFIG_TRACER_MAX_TRACE
2874         if (iter->snapshot && iter->trace->use_max_tr)
2875                 return;
2876 #endif
2877
2878         if (!iter->snapshot)
2879                 atomic_dec(&trace_record_cmdline_disabled);
2880
2881         trace_access_unlock(iter->cpu_file);
2882         trace_event_read_unlock();
2883 }
2884
2885 static void
2886 get_total_entries(struct trace_buffer *buf,
2887                   unsigned long *total, unsigned long *entries)
2888 {
2889         unsigned long count;
2890         int cpu;
2891
2892         *total = 0;
2893         *entries = 0;
2894
2895         for_each_tracing_cpu(cpu) {
2896                 count = ring_buffer_entries_cpu(buf->buffer, cpu);
2897                 /*
2898                  * If this buffer has skipped entries, then we hold all
2899                  * entries for the trace and we need to ignore the
2900                  * ones before the time stamp.
2901                  */
2902                 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
2903                         count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
2904                         /* total is the same as the entries */
2905                         *total += count;
2906                 } else
2907                         *total += count +
2908                                 ring_buffer_overrun_cpu(buf->buffer, cpu);
2909                 *entries += count;
2910         }
2911 }
2912
2913 static void print_lat_help_header(struct seq_file *m)
2914 {
2915         seq_puts(m, "#                  _------=> CPU#            \n"
2916                     "#                 / _-----=> irqs-off        \n"
2917                     "#                | / _----=> need-resched    \n"
2918                     "#                || / _---=> hardirq/softirq \n"
2919                     "#                ||| / _--=> preempt-depth   \n"
2920                     "#                |||| /     delay            \n"
2921                     "#  cmd     pid   ||||| time  |   caller      \n"
2922                     "#     \\   /      |||||  \\    |   /         \n");
2923 }
2924
2925 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
2926 {
2927         unsigned long total;
2928         unsigned long entries;
2929
2930         get_total_entries(buf, &total, &entries);
2931         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
2932                    entries, total, num_online_cpus());
2933         seq_puts(m, "#\n");
2934 }
2935
2936 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m)
2937 {
2938         print_event_info(buf, m);
2939         seq_puts(m, "#           TASK-PID   CPU#      TIMESTAMP  FUNCTION\n"
2940                     "#              | |       |          |         |\n");
2941 }
2942
2943 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m)
2944 {
2945         print_event_info(buf, m);
2946         seq_puts(m, "#                              _-----=> irqs-off\n"
2947                     "#                             / _----=> need-resched\n"
2948                     "#                            | / _---=> hardirq/softirq\n"
2949                     "#                            || / _--=> preempt-depth\n"
2950                     "#                            ||| /     delay\n"
2951                     "#           TASK-PID   CPU#  ||||    TIMESTAMP  FUNCTION\n"
2952                     "#              | |       |   ||||       |         |\n");
2953 }
2954
2955 void
2956 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
2957 {
2958         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
2959         struct trace_buffer *buf = iter->trace_buffer;
2960         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
2961         struct tracer *type = iter->trace;
2962         unsigned long entries;
2963         unsigned long total;
2964         const char *name = "preemption";
2965
2966         name = type->name;
2967
2968         get_total_entries(buf, &total, &entries);
2969
2970         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
2971                    name, UTS_RELEASE);
2972         seq_puts(m, "# -----------------------------------"
2973                  "---------------------------------\n");
2974         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
2975                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
2976                    nsecs_to_usecs(data->saved_latency),
2977                    entries,
2978                    total,
2979                    buf->cpu,
2980 #if defined(CONFIG_PREEMPT_NONE)
2981                    "server",
2982 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
2983                    "desktop",
2984 #elif defined(CONFIG_PREEMPT)
2985                    "preempt",
2986 #else
2987                    "unknown",
2988 #endif
2989                    /* These are reserved for later use */
2990                    0, 0, 0, 0);
2991 #ifdef CONFIG_SMP
2992         seq_printf(m, " #P:%d)\n", num_online_cpus());
2993 #else
2994         seq_puts(m, ")\n");
2995 #endif
2996         seq_puts(m, "#    -----------------\n");
2997         seq_printf(m, "#    | task: %.16s-%d "
2998                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
2999                    data->comm, data->pid,
3000                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3001                    data->policy, data->rt_priority);
3002         seq_puts(m, "#    -----------------\n");
3003
3004         if (data->critical_start) {
3005                 seq_puts(m, "#  => started at: ");
3006                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3007                 trace_print_seq(m, &iter->seq);
3008                 seq_puts(m, "\n#  => ended at:   ");
3009                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3010                 trace_print_seq(m, &iter->seq);
3011                 seq_puts(m, "\n#\n");
3012         }
3013
3014         seq_puts(m, "#\n");
3015 }
3016
3017 static void test_cpu_buff_start(struct trace_iterator *iter)
3018 {
3019         struct trace_seq *s = &iter->seq;
3020         struct trace_array *tr = iter->tr;
3021
3022         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3023                 return;
3024
3025         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3026                 return;
3027
3028         if (cpumask_available(iter->started) &&
3029             cpumask_test_cpu(iter->cpu, iter->started))
3030                 return;
3031
3032         if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
3033                 return;
3034
3035         if (cpumask_available(iter->started))
3036                 cpumask_set_cpu(iter->cpu, iter->started);
3037
3038         /* Don't print started cpu buffer for the first entry of the trace */
3039         if (iter->idx > 1)
3040                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3041                                 iter->cpu);
3042 }
3043
3044 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3045 {
3046         struct trace_array *tr = iter->tr;
3047         struct trace_seq *s = &iter->seq;
3048         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3049         struct trace_entry *entry;
3050         struct trace_event *event;
3051
3052         entry = iter->ent;
3053
3054         test_cpu_buff_start(iter);
3055
3056         event = ftrace_find_event(entry->type);
3057
3058         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3059                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3060                         trace_print_lat_context(iter);
3061                 else
3062                         trace_print_context(iter);
3063         }
3064
3065         if (trace_seq_has_overflowed(s))
3066                 return TRACE_TYPE_PARTIAL_LINE;
3067
3068         if (event)
3069                 return event->funcs->trace(iter, sym_flags, event);
3070
3071         trace_seq_printf(s, "Unknown type %d\n", entry->type);
3072
3073         return trace_handle_return(s);
3074 }
3075
3076 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3077 {
3078         struct trace_array *tr = iter->tr;
3079         struct trace_seq *s = &iter->seq;
3080         struct trace_entry *entry;
3081         struct trace_event *event;
3082
3083         entry = iter->ent;
3084
3085         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3086                 trace_seq_printf(s, "%d %d %llu ",
3087                                  entry->pid, iter->cpu, iter->ts);
3088
3089         if (trace_seq_has_overflowed(s))
3090                 return TRACE_TYPE_PARTIAL_LINE;
3091
3092         event = ftrace_find_event(entry->type);
3093         if (event)
3094                 return event->funcs->raw(iter, 0, event);
3095
3096         trace_seq_printf(s, "%d ?\n", entry->type);
3097
3098         return trace_handle_return(s);
3099 }
3100
3101 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3102 {
3103         struct trace_array *tr = iter->tr;
3104         struct trace_seq *s = &iter->seq;
3105         unsigned char newline = '\n';
3106         struct trace_entry *entry;
3107         struct trace_event *event;
3108
3109         entry = iter->ent;
3110
3111         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3112                 SEQ_PUT_HEX_FIELD(s, entry->pid);
3113                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
3114                 SEQ_PUT_HEX_FIELD(s, iter->ts);
3115                 if (trace_seq_has_overflowed(s))
3116                         return TRACE_TYPE_PARTIAL_LINE;
3117         }
3118
3119         event = ftrace_find_event(entry->type);
3120         if (event) {
3121                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
3122                 if (ret != TRACE_TYPE_HANDLED)
3123                         return ret;
3124         }
3125
3126         SEQ_PUT_FIELD(s, newline);
3127
3128         return trace_handle_return(s);
3129 }
3130
3131 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3132 {
3133         struct trace_array *tr = iter->tr;
3134         struct trace_seq *s = &iter->seq;
3135         struct trace_entry *entry;
3136         struct trace_event *event;
3137
3138         entry = iter->ent;
3139
3140         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3141                 SEQ_PUT_FIELD(s, entry->pid);
3142                 SEQ_PUT_FIELD(s, iter->cpu);
3143                 SEQ_PUT_FIELD(s, iter->ts);
3144                 if (trace_seq_has_overflowed(s))
3145                         return TRACE_TYPE_PARTIAL_LINE;
3146         }
3147
3148         event = ftrace_find_event(entry->type);
3149         return event ? event->funcs->binary(iter, 0, event) :
3150                 TRACE_TYPE_HANDLED;
3151 }
3152
3153 int trace_empty(struct trace_iterator *iter)
3154 {
3155         struct ring_buffer_iter *buf_iter;
3156         int cpu;
3157
3158         /* If we are looking at one CPU buffer, only check that one */
3159         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
3160                 cpu = iter->cpu_file;
3161                 buf_iter = trace_buffer_iter(iter, cpu);
3162                 if (buf_iter) {
3163                         if (!ring_buffer_iter_empty(buf_iter))
3164                                 return 0;
3165                 } else {
3166                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3167                                 return 0;
3168                 }
3169                 return 1;
3170         }
3171
3172         for_each_tracing_cpu(cpu) {
3173                 buf_iter = trace_buffer_iter(iter, cpu);
3174                 if (buf_iter) {
3175                         if (!ring_buffer_iter_empty(buf_iter))
3176                                 return 0;
3177                 } else {
3178                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3179                                 return 0;
3180                 }
3181         }
3182
3183         return 1;
3184 }
3185
3186 /*  Called with trace_event_read_lock() held. */
3187 enum print_line_t print_trace_line(struct trace_iterator *iter)
3188 {
3189         struct trace_array *tr = iter->tr;
3190         unsigned long trace_flags = tr->trace_flags;
3191         enum print_line_t ret;
3192
3193         if (iter->lost_events) {
3194                 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
3195                                  iter->cpu, iter->lost_events);
3196                 if (trace_seq_has_overflowed(&iter->seq))
3197                         return TRACE_TYPE_PARTIAL_LINE;
3198         }
3199
3200         if (iter->trace && iter->trace->print_line) {
3201                 ret = iter->trace->print_line(iter);
3202                 if (ret != TRACE_TYPE_UNHANDLED)
3203                         return ret;
3204         }
3205
3206         if (iter->ent->type == TRACE_BPUTS &&
3207                         trace_flags & TRACE_ITER_PRINTK &&
3208                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3209                 return trace_print_bputs_msg_only(iter);
3210
3211         if (iter->ent->type == TRACE_BPRINT &&
3212                         trace_flags & TRACE_ITER_PRINTK &&
3213                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3214                 return trace_print_bprintk_msg_only(iter);
3215
3216         if (iter->ent->type == TRACE_PRINT &&
3217                         trace_flags & TRACE_ITER_PRINTK &&
3218                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3219                 return trace_print_printk_msg_only(iter);
3220
3221         if (trace_flags & TRACE_ITER_BIN)
3222                 return print_bin_fmt(iter);
3223
3224         if (trace_flags & TRACE_ITER_HEX)
3225                 return print_hex_fmt(iter);
3226
3227         if (trace_flags & TRACE_ITER_RAW)
3228                 return print_raw_fmt(iter);
3229
3230         return print_trace_fmt(iter);
3231 }
3232
3233 void trace_latency_header(struct seq_file *m)
3234 {
3235         struct trace_iterator *iter = m->private;
3236         struct trace_array *tr = iter->tr;
3237
3238         /* print nothing if the buffers are empty */
3239         if (trace_empty(iter))
3240                 return;
3241
3242         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3243                 print_trace_header(m, iter);
3244
3245         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
3246                 print_lat_help_header(m);
3247 }
3248
3249 void trace_default_header(struct seq_file *m)
3250 {
3251         struct trace_iterator *iter = m->private;
3252         struct trace_array *tr = iter->tr;
3253         unsigned long trace_flags = tr->trace_flags;
3254
3255         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
3256                 return;
3257
3258         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
3259                 /* print nothing if the buffers are empty */
3260                 if (trace_empty(iter))
3261                         return;
3262                 print_trace_header(m, iter);
3263                 if (!(trace_flags & TRACE_ITER_VERBOSE))
3264                         print_lat_help_header(m);
3265         } else {
3266                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
3267                         if (trace_flags & TRACE_ITER_IRQ_INFO)
3268                                 print_func_help_header_irq(iter->trace_buffer, m);
3269                         else
3270                                 print_func_help_header(iter->trace_buffer, m);
3271                 }
3272         }
3273 }
3274
3275 static void test_ftrace_alive(struct seq_file *m)
3276 {
3277         if (!ftrace_is_dead())
3278                 return;
3279         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
3280                     "#          MAY BE MISSING FUNCTION EVENTS\n");
3281 }
3282
3283 #ifdef CONFIG_TRACER_MAX_TRACE
3284 static void show_snapshot_main_help(struct seq_file *m)
3285 {
3286         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
3287                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3288                     "#                      Takes a snapshot of the main buffer.\n"
3289                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
3290                     "#                      (Doesn't have to be '2' works with any number that\n"
3291                     "#                       is not a '0' or '1')\n");
3292 }
3293
3294 static void show_snapshot_percpu_help(struct seq_file *m)
3295 {
3296         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
3297 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
3298         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3299                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
3300 #else
3301         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
3302                     "#                     Must use main snapshot file to allocate.\n");
3303 #endif
3304         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
3305                     "#                      (Doesn't have to be '2' works with any number that\n"
3306                     "#                       is not a '0' or '1')\n");
3307 }
3308
3309 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
3310 {
3311         if (iter->tr->allocated_snapshot)
3312                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
3313         else
3314                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
3315
3316         seq_puts(m, "# Snapshot commands:\n");
3317         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
3318                 show_snapshot_main_help(m);
3319         else
3320                 show_snapshot_percpu_help(m);
3321 }
3322 #else
3323 /* Should never be called */
3324 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
3325 #endif
3326
3327 static int s_show(struct seq_file *m, void *v)
3328 {
3329         struct trace_iterator *iter = v;
3330         int ret;
3331
3332         if (iter->ent == NULL) {
3333                 if (iter->tr) {
3334                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
3335                         seq_puts(m, "#\n");
3336                         test_ftrace_alive(m);
3337                 }
3338                 if (iter->snapshot && trace_empty(iter))
3339                         print_snapshot_help(m, iter);
3340                 else if (iter->trace && iter->trace->print_header)
3341                         iter->trace->print_header(m);
3342                 else
3343                         trace_default_header(m);
3344
3345         } else if (iter->leftover) {
3346                 /*
3347                  * If we filled the seq_file buffer earlier, we
3348                  * want to just show it now.
3349                  */
3350                 ret = trace_print_seq(m, &iter->seq);
3351
3352                 /* ret should this time be zero, but you never know */
3353                 iter->leftover = ret;
3354
3355         } else {
3356                 print_trace_line(iter);
3357                 ret = trace_print_seq(m, &iter->seq);
3358                 /*
3359                  * If we overflow the seq_file buffer, then it will
3360                  * ask us for this data again at start up.
3361                  * Use that instead.
3362                  *  ret is 0 if seq_file write succeeded.
3363                  *        -1 otherwise.
3364                  */
3365                 iter->leftover = ret;
3366         }
3367
3368         return 0;
3369 }
3370
3371 /*
3372  * Should be used after trace_array_get(), trace_types_lock
3373  * ensures that i_cdev was already initialized.
3374  */
3375 static inline int tracing_get_cpu(struct inode *inode)
3376 {
3377         if (inode->i_cdev) /* See trace_create_cpu_file() */
3378                 return (long)inode->i_cdev - 1;
3379         return RING_BUFFER_ALL_CPUS;
3380 }
3381
3382 static const struct seq_operations tracer_seq_ops = {
3383         .start          = s_start,
3384         .next           = s_next,
3385         .stop           = s_stop,
3386         .show           = s_show,
3387 };
3388
3389 static struct trace_iterator *
3390 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
3391 {
3392         struct trace_array *tr = inode->i_private;
3393         struct trace_iterator *iter;
3394         int cpu;
3395
3396         if (tracing_disabled)
3397                 return ERR_PTR(-ENODEV);
3398
3399         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
3400         if (!iter)
3401                 return ERR_PTR(-ENOMEM);
3402
3403         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
3404                                     GFP_KERNEL);
3405         if (!iter->buffer_iter)
3406                 goto release;
3407
3408         /*
3409          * We make a copy of the current tracer to avoid concurrent
3410          * changes on it while we are reading.
3411          */
3412         mutex_lock(&trace_types_lock);
3413         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
3414         if (!iter->trace)
3415                 goto fail;
3416
3417         *iter->trace = *tr->current_trace;
3418
3419         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
3420                 goto fail;
3421
3422         iter->tr = tr;
3423
3424 #ifdef CONFIG_TRACER_MAX_TRACE
3425         /* Currently only the top directory has a snapshot */
3426         if (tr->current_trace->print_max || snapshot)
3427                 iter->trace_buffer = &tr->max_buffer;
3428         else
3429 #endif
3430                 iter->trace_buffer = &tr->trace_buffer;
3431         iter->snapshot = snapshot;
3432         iter->pos = -1;
3433         iter->cpu_file = tracing_get_cpu(inode);
3434         mutex_init(&iter->mutex);
3435
3436         /* Notify the tracer early; before we stop tracing. */
3437         if (iter->trace && iter->trace->open)
3438                 iter->trace->open(iter);
3439
3440         /* Annotate start of buffers if we had overruns */
3441         if (ring_buffer_overruns(iter->trace_buffer->buffer))
3442                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
3443
3444         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
3445         if (trace_clocks[tr->clock_id].in_ns)
3446                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
3447
3448         /* stop the trace while dumping if we are not opening "snapshot" */
3449         if (!iter->snapshot)
3450                 tracing_stop_tr(tr);
3451
3452         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
3453                 for_each_tracing_cpu(cpu) {
3454                         iter->buffer_iter[cpu] =
3455                                 ring_buffer_read_prepare(iter->trace_buffer->buffer,
3456                                                          cpu, GFP_KERNEL);
3457                 }
3458                 ring_buffer_read_prepare_sync();
3459                 for_each_tracing_cpu(cpu) {
3460                         ring_buffer_read_start(iter->buffer_iter[cpu]);
3461                         tracing_iter_reset(iter, cpu);
3462                 }
3463         } else {
3464                 cpu = iter->cpu_file;
3465                 iter->buffer_iter[cpu] =
3466                         ring_buffer_read_prepare(iter->trace_buffer->buffer,
3467                                                  cpu, GFP_KERNEL);
3468                 ring_buffer_read_prepare_sync();
3469                 ring_buffer_read_start(iter->buffer_iter[cpu]);
3470                 tracing_iter_reset(iter, cpu);
3471         }
3472
3473         mutex_unlock(&trace_types_lock);
3474
3475         return iter;
3476
3477  fail:
3478         mutex_unlock(&trace_types_lock);
3479         kfree(iter->trace);
3480         kfree(iter->buffer_iter);
3481 release:
3482         seq_release_private(inode, file);
3483         return ERR_PTR(-ENOMEM);
3484 }
3485
3486 int tracing_open_generic(struct inode *inode, struct file *filp)
3487 {
3488         if (tracing_disabled)
3489                 return -ENODEV;
3490
3491         filp->private_data = inode->i_private;
3492         return 0;
3493 }
3494
3495 bool tracing_is_disabled(void)
3496 {
3497         return (tracing_disabled) ? true: false;
3498 }
3499
3500 /*
3501  * Open and update trace_array ref count.
3502  * Must have the current trace_array passed to it.
3503  */
3504 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
3505 {
3506         struct trace_array *tr = inode->i_private;
3507
3508         if (tracing_disabled)
3509                 return -ENODEV;
3510
3511         if (trace_array_get(tr) < 0)
3512                 return -ENODEV;
3513
3514         filp->private_data = inode->i_private;
3515
3516         return 0;
3517 }
3518
3519 static int tracing_release(struct inode *inode, struct file *file)
3520 {
3521         struct trace_array *tr = inode->i_private;
3522         struct seq_file *m = file->private_data;
3523         struct trace_iterator *iter;
3524         int cpu;
3525
3526         if (!(file->f_mode & FMODE_READ)) {
3527                 trace_array_put(tr);
3528                 return 0;
3529         }
3530
3531         /* Writes do not use seq_file */
3532         iter = m->private;
3533         mutex_lock(&trace_types_lock);
3534
3535         for_each_tracing_cpu(cpu) {
3536                 if (iter->buffer_iter[cpu])
3537                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
3538         }
3539
3540         if (iter->trace && iter->trace->close)
3541                 iter->trace->close(iter);
3542
3543         if (!iter->snapshot)
3544                 /* reenable tracing if it was previously enabled */
3545                 tracing_start_tr(tr);
3546
3547         __trace_array_put(tr);
3548
3549         mutex_unlock(&trace_types_lock);
3550
3551         mutex_destroy(&iter->mutex);
3552         free_cpumask_var(iter->started);
3553         kfree(iter->trace);
3554         kfree(iter->buffer_iter);
3555         seq_release_private(inode, file);
3556
3557         return 0;
3558 }
3559
3560 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
3561 {
3562         struct trace_array *tr = inode->i_private;
3563
3564         trace_array_put(tr);
3565         return 0;
3566 }
3567
3568 static int tracing_single_release_tr(struct inode *inode, struct file *file)
3569 {
3570         struct trace_array *tr = inode->i_private;
3571
3572         trace_array_put(tr);
3573
3574         return single_release(inode, file);
3575 }
3576
3577 static int tracing_open(struct inode *inode, struct file *file)
3578 {
3579         struct trace_array *tr = inode->i_private;
3580         struct trace_iterator *iter;
3581         int ret = 0;
3582
3583         if (trace_array_get(tr) < 0)
3584                 return -ENODEV;
3585
3586         /* If this file was open for write, then erase contents */
3587         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
3588                 int cpu = tracing_get_cpu(inode);
3589                 struct trace_buffer *trace_buf = &tr->trace_buffer;
3590
3591 #ifdef CONFIG_TRACER_MAX_TRACE
3592                 if (tr->current_trace->print_max)
3593                         trace_buf = &tr->max_buffer;
3594 #endif
3595
3596                 if (cpu == RING_BUFFER_ALL_CPUS)
3597                         tracing_reset_online_cpus(trace_buf);
3598                 else
3599                         tracing_reset(trace_buf, cpu);
3600         }
3601
3602         if (file->f_mode & FMODE_READ) {
3603                 iter = __tracing_open(inode, file, false);
3604                 if (IS_ERR(iter))
3605                         ret = PTR_ERR(iter);
3606                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
3607                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
3608         }
3609
3610         if (ret < 0)
3611                 trace_array_put(tr);
3612
3613         return ret;
3614 }
3615
3616 /*
3617  * Some tracers are not suitable for instance buffers.
3618  * A tracer is always available for the global array (toplevel)
3619  * or if it explicitly states that it is.
3620  */
3621 static bool
3622 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
3623 {
3624         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
3625 }
3626
3627 /* Find the next tracer that this trace array may use */
3628 static struct tracer *
3629 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
3630 {
3631         while (t && !trace_ok_for_array(t, tr))
3632                 t = t->next;
3633
3634         return t;
3635 }
3636
3637 static void *
3638 t_next(struct seq_file *m, void *v, loff_t *pos)
3639 {
3640         struct trace_array *tr = m->private;
3641         struct tracer *t = v;
3642
3643         (*pos)++;
3644
3645         if (t)
3646                 t = get_tracer_for_array(tr, t->next);
3647
3648         return t;
3649 }
3650
3651 static void *t_start(struct seq_file *m, loff_t *pos)
3652 {
3653         struct trace_array *tr = m->private;
3654         struct tracer *t;
3655         loff_t l = 0;
3656
3657         mutex_lock(&trace_types_lock);
3658
3659         t = get_tracer_for_array(tr, trace_types);
3660         for (; t && l < *pos; t = t_next(m, t, &l))
3661                         ;
3662
3663         return t;
3664 }
3665
3666 static void t_stop(struct seq_file *m, void *p)
3667 {
3668         mutex_unlock(&trace_types_lock);
3669 }
3670
3671 static int t_show(struct seq_file *m, void *v)
3672 {
3673         struct tracer *t = v;
3674
3675         if (!t)
3676                 return 0;
3677
3678         seq_puts(m, t->name);
3679         if (t->next)
3680                 seq_putc(m, ' ');
3681         else
3682                 seq_putc(m, '\n');
3683
3684         return 0;
3685 }
3686
3687 static const struct seq_operations show_traces_seq_ops = {
3688         .start          = t_start,
3689         .next           = t_next,
3690         .stop           = t_stop,
3691         .show           = t_show,
3692 };
3693
3694 static int show_traces_open(struct inode *inode, struct file *file)
3695 {
3696         struct trace_array *tr = inode->i_private;
3697         struct seq_file *m;
3698         int ret;
3699
3700         if (tracing_disabled)
3701                 return -ENODEV;
3702
3703         ret = seq_open(file, &show_traces_seq_ops);
3704         if (ret)
3705                 return ret;
3706
3707         m = file->private_data;
3708         m->private = tr;
3709
3710         return 0;
3711 }
3712
3713 static ssize_t
3714 tracing_write_stub(struct file *filp, const char __user *ubuf,
3715                    size_t count, loff_t *ppos)
3716 {
3717         return count;
3718 }
3719
3720 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
3721 {
3722         int ret;
3723
3724         if (file->f_mode & FMODE_READ)
3725                 ret = seq_lseek(file, offset, whence);
3726         else
3727                 file->f_pos = ret = 0;
3728
3729         return ret;
3730 }
3731
3732 static const struct file_operations tracing_fops = {
3733         .open           = tracing_open,
3734         .read           = seq_read,
3735         .write          = tracing_write_stub,
3736         .llseek         = tracing_lseek,
3737         .release        = tracing_release,
3738 };
3739
3740 static const struct file_operations show_traces_fops = {
3741         .open           = show_traces_open,
3742         .read           = seq_read,
3743         .release        = seq_release,
3744         .llseek         = seq_lseek,
3745 };
3746
3747 static ssize_t
3748 tracing_cpumask_read(struct file *filp, char __user *ubuf,
3749                      size_t count, loff_t *ppos)
3750 {
3751         struct trace_array *tr = file_inode(filp)->i_private;
3752         char *mask_str;
3753         int len;
3754
3755         len = snprintf(NULL, 0, "%*pb\n",
3756                        cpumask_pr_args(tr->tracing_cpumask)) + 1;
3757         mask_str = kmalloc(len, GFP_KERNEL);
3758         if (!mask_str)
3759                 return -ENOMEM;
3760
3761         len = snprintf(mask_str, len, "%*pb\n",
3762                        cpumask_pr_args(tr->tracing_cpumask));
3763         if (len >= count) {
3764                 count = -EINVAL;
3765                 goto out_err;
3766         }
3767         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
3768
3769 out_err:
3770         kfree(mask_str);
3771
3772         return count;
3773 }
3774
3775 static ssize_t
3776 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
3777                       size_t count, loff_t *ppos)
3778 {
3779         struct trace_array *tr = file_inode(filp)->i_private;
3780         cpumask_var_t tracing_cpumask_new;
3781         int err, cpu;
3782
3783         if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
3784                 return -ENOMEM;
3785
3786         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
3787         if (err)
3788                 goto err_unlock;
3789
3790         local_irq_disable();
3791         arch_spin_lock(&tr->max_lock);
3792         for_each_tracing_cpu(cpu) {
3793                 /*
3794                  * Increase/decrease the disabled counter if we are
3795                  * about to flip a bit in the cpumask:
3796                  */
3797                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3798                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3799                         atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3800                         ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
3801                 }
3802                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3803                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3804                         atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3805                         ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
3806                 }
3807         }
3808         arch_spin_unlock(&tr->max_lock);
3809         local_irq_enable();
3810
3811         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
3812         free_cpumask_var(tracing_cpumask_new);
3813
3814         return count;
3815
3816 err_unlock:
3817         free_cpumask_var(tracing_cpumask_new);
3818
3819         return err;
3820 }
3821
3822 static const struct file_operations tracing_cpumask_fops = {
3823         .open           = tracing_open_generic_tr,
3824         .read           = tracing_cpumask_read,
3825         .write          = tracing_cpumask_write,
3826         .release        = tracing_release_generic_tr,
3827         .llseek         = generic_file_llseek,
3828 };
3829
3830 static int tracing_trace_options_show(struct seq_file *m, void *v)
3831 {
3832         struct tracer_opt *trace_opts;
3833         struct trace_array *tr = m->private;
3834         u32 tracer_flags;
3835         int i;
3836
3837         mutex_lock(&trace_types_lock);
3838         tracer_flags = tr->current_trace->flags->val;
3839         trace_opts = tr->current_trace->flags->opts;
3840
3841         for (i = 0; trace_options[i]; i++) {
3842                 if (tr->trace_flags & (1 << i))
3843                         seq_printf(m, "%s\n", trace_options[i]);
3844                 else
3845                         seq_printf(m, "no%s\n", trace_options[i]);
3846         }
3847
3848         for (i = 0; trace_opts[i].name; i++) {
3849                 if (tracer_flags & trace_opts[i].bit)
3850                         seq_printf(m, "%s\n", trace_opts[i].name);
3851                 else
3852                         seq_printf(m, "no%s\n", trace_opts[i].name);
3853         }
3854         mutex_unlock(&trace_types_lock);
3855
3856         return 0;
3857 }
3858
3859 static int __set_tracer_option(struct trace_array *tr,
3860                                struct tracer_flags *tracer_flags,
3861                                struct tracer_opt *opts, int neg)
3862 {
3863         struct tracer *trace = tracer_flags->trace;
3864         int ret;
3865
3866         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
3867         if (ret)
3868                 return ret;
3869
3870         if (neg)
3871                 tracer_flags->val &= ~opts->bit;
3872         else
3873                 tracer_flags->val |= opts->bit;
3874         return 0;
3875 }
3876
3877 /* Try to assign a tracer specific option */
3878 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
3879 {
3880         struct tracer *trace = tr->current_trace;
3881         struct tracer_flags *tracer_flags = trace->flags;
3882         struct tracer_opt *opts = NULL;
3883         int i;
3884
3885         for (i = 0; tracer_flags->opts[i].name; i++) {
3886                 opts = &tracer_flags->opts[i];
3887
3888                 if (strcmp(cmp, opts->name) == 0)
3889                         return __set_tracer_option(tr, trace->flags, opts, neg);
3890         }
3891
3892         return -EINVAL;
3893 }
3894
3895 /* Some tracers require overwrite to stay enabled */
3896 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
3897 {
3898         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
3899                 return -1;
3900
3901         return 0;
3902 }
3903
3904 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
3905 {
3906         /* do nothing if flag is already set */
3907         if (!!(tr->trace_flags & mask) == !!enabled)
3908                 return 0;
3909
3910         /* Give the tracer a chance to approve the change */
3911         if (tr->current_trace->flag_changed)
3912                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
3913                         return -EINVAL;
3914
3915         if (enabled)
3916                 tr->trace_flags |= mask;
3917         else
3918                 tr->trace_flags &= ~mask;
3919
3920         if (mask == TRACE_ITER_RECORD_CMD)
3921                 trace_event_enable_cmd_record(enabled);
3922
3923         if (mask == TRACE_ITER_EVENT_FORK)
3924                 trace_event_follow_fork(tr, enabled);
3925
3926         if (mask == TRACE_ITER_OVERWRITE) {
3927                 ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
3928 #ifdef CONFIG_TRACER_MAX_TRACE
3929                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
3930 #endif
3931         }
3932
3933         if (mask == TRACE_ITER_PRINTK) {
3934                 trace_printk_start_stop_comm(enabled);
3935                 trace_printk_control(enabled);
3936         }
3937
3938         return 0;
3939 }
3940
3941 static int trace_set_options(struct trace_array *tr, char *option)
3942 {
3943         char *cmp;
3944         int neg = 0;
3945         int ret = -ENODEV;
3946         int i;
3947         size_t orig_len = strlen(option);
3948
3949         cmp = strstrip(option);
3950
3951         if (strncmp(cmp, "no", 2) == 0) {
3952                 neg = 1;
3953                 cmp += 2;
3954         }
3955
3956         mutex_lock(&trace_types_lock);
3957
3958         for (i = 0; trace_options[i]; i++) {
3959                 if (strcmp(cmp, trace_options[i]) == 0) {
3960                         ret = set_tracer_flag(tr, 1 << i, !neg);
3961                         break;
3962                 }
3963         }
3964
3965         /* If no option could be set, test the specific tracer options */
3966         if (!trace_options[i])
3967                 ret = set_tracer_option(tr, cmp, neg);
3968
3969         mutex_unlock(&trace_types_lock);
3970
3971         /*
3972          * If the first trailing whitespace is replaced with '\0' by strstrip,
3973          * turn it back into a space.
3974          */
3975         if (orig_len > strlen(option))
3976                 option[strlen(option)] = ' ';
3977
3978         return ret;
3979 }
3980
3981 static void __init apply_trace_boot_options(void)
3982 {
3983         char *buf = trace_boot_options_buf;
3984         char *option;
3985
3986         while (true) {
3987                 option = strsep(&buf, ",");
3988
3989                 if (!option)
3990                         break;
3991
3992                 if (*option)
3993                         trace_set_options(&global_trace, option);
3994
3995                 /* Put back the comma to allow this to be called again */
3996                 if (buf)
3997                         *(buf - 1) = ',';
3998         }
3999 }
4000
4001 static ssize_t
4002 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4003                         size_t cnt, loff_t *ppos)
4004 {
4005         struct seq_file *m = filp->private_data;
4006         struct trace_array *tr = m->private;
4007         char buf[64];
4008         int ret;
4009
4010         if (cnt >= sizeof(buf))
4011                 return -EINVAL;
4012
4013         if (copy_from_user(buf, ubuf, cnt))
4014                 return -EFAULT;
4015
4016         buf[cnt] = 0;
4017
4018         ret = trace_set_options(tr, buf);
4019         if (ret < 0)
4020                 return ret;
4021
4022         *ppos += cnt;
4023
4024         return cnt;
4025 }
4026
4027 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4028 {
4029         struct trace_array *tr = inode->i_private;
4030         int ret;
4031
4032         if (tracing_disabled)
4033                 return -ENODEV;
4034
4035         if (trace_array_get(tr) < 0)
4036                 return -ENODEV;
4037
4038         ret = single_open(file, tracing_trace_options_show, inode->i_private);
4039         if (ret < 0)
4040                 trace_array_put(tr);
4041
4042         return ret;
4043 }
4044
4045 static const struct file_operations tracing_iter_fops = {
4046         .open           = tracing_trace_options_open,
4047         .read           = seq_read,
4048         .llseek         = seq_lseek,
4049         .release        = tracing_single_release_tr,
4050         .write          = tracing_trace_options_write,
4051 };
4052
4053 static const char readme_msg[] =
4054         "tracing mini-HOWTO:\n\n"
4055         "# echo 0 > tracing_on : quick way to disable tracing\n"
4056         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4057         " Important files:\n"
4058         "  trace\t\t\t- The static contents of the buffer\n"
4059         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
4060         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4061         "  current_tracer\t- function and latency tracers\n"
4062         "  available_tracers\t- list of configured tracers for current_tracer\n"
4063         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
4064         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
4065         "  trace_clock\t\t-change the clock used to order events\n"
4066         "       local:   Per cpu clock but may not be synced across CPUs\n"
4067         "      global:   Synced across CPUs but slows tracing down.\n"
4068         "     counter:   Not a clock, but just an increment\n"
4069         "      uptime:   Jiffy counter from time of boot\n"
4070         "        perf:   Same clock that perf events use\n"
4071 #ifdef CONFIG_X86_64
4072         "     x86-tsc:   TSC cycle counter\n"
4073 #endif
4074         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4075         "  tracing_cpumask\t- Limit which CPUs to trace\n"
4076         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4077         "\t\t\t  Remove sub-buffer with rmdir\n"
4078         "  trace_options\t\t- Set format or modify how tracing happens\n"
4079         "\t\t\t  Disable an option by adding a suffix 'no' to the\n"
4080         "\t\t\t  option name\n"
4081         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
4082 #ifdef CONFIG_DYNAMIC_FTRACE
4083         "\n  available_filter_functions - list of functions that can be filtered on\n"
4084         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
4085         "\t\t\t  functions\n"
4086         "\t     accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4087         "\t     modules: Can select a group via module\n"
4088         "\t      Format: :mod:<module-name>\n"
4089         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
4090         "\t    triggers: a command to perform when function is hit\n"
4091         "\t      Format: <function>:<trigger>[:count]\n"
4092         "\t     trigger: traceon, traceoff\n"
4093         "\t\t      enable_event:<system>:<event>\n"
4094         "\t\t      disable_event:<system>:<event>\n"
4095 #ifdef CONFIG_STACKTRACE
4096         "\t\t      stacktrace\n"
4097 #endif
4098 #ifdef CONFIG_TRACER_SNAPSHOT
4099         "\t\t      snapshot\n"
4100 #endif
4101         "\t\t      dump\n"
4102         "\t\t      cpudump\n"
4103         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
4104         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
4105         "\t     The first one will disable tracing every time do_fault is hit\n"
4106         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
4107         "\t       The first time do trap is hit and it disables tracing, the\n"
4108         "\t       counter will decrement to 2. If tracing is already disabled,\n"
4109         "\t       the counter will not decrement. It only decrements when the\n"
4110         "\t       trigger did work\n"
4111         "\t     To remove trigger without count:\n"
4112         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
4113         "\t     To remove trigger with a count:\n"
4114         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
4115         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
4116         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4117         "\t    modules: Can select a group via module command :mod:\n"
4118         "\t    Does not accept triggers\n"
4119 #endif /* CONFIG_DYNAMIC_FTRACE */
4120 #ifdef CONFIG_FUNCTION_TRACER
4121         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
4122         "\t\t    (function)\n"
4123 #endif
4124 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
4125         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
4126         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
4127         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
4128 #endif
4129 #ifdef CONFIG_TRACER_SNAPSHOT
4130         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
4131         "\t\t\t  snapshot buffer. Read the contents for more\n"
4132         "\t\t\t  information\n"
4133 #endif
4134 #ifdef CONFIG_STACK_TRACER
4135         "  stack_trace\t\t- Shows the max stack trace when active\n"
4136         "  stack_max_size\t- Shows current max stack size that was traced\n"
4137         "\t\t\t  Write into this file to reset the max size (trigger a\n"
4138         "\t\t\t  new trace)\n"
4139 #ifdef CONFIG_DYNAMIC_FTRACE
4140         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
4141         "\t\t\t  traces\n"
4142 #endif
4143 #endif /* CONFIG_STACK_TRACER */
4144 #ifdef CONFIG_KPROBE_EVENT
4145         "  kprobe_events\t\t- Add/remove/show the kernel dynamic events\n"
4146         "\t\t\t  Write into this file to define/undefine new trace events.\n"
4147 #endif
4148 #ifdef CONFIG_UPROBE_EVENT
4149         "  uprobe_events\t\t- Add/remove/show the userspace dynamic events\n"
4150         "\t\t\t  Write into this file to define/undefine new trace events.\n"
4151 #endif
4152 #if defined(CONFIG_KPROBE_EVENT) || defined(CONFIG_UPROBE_EVENT)
4153         "\t  accepts: event-definitions (one definition per line)\n"
4154         "\t   Format: p|r[:[<group>/]<event>] <place> [<args>]\n"
4155         "\t           -:[<group>/]<event>\n"
4156 #ifdef CONFIG_KPROBE_EVENT
4157         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4158 #endif
4159 #ifdef CONFIG_UPROBE_EVENT
4160         "\t    place: <path>:<offset>\n"
4161 #endif
4162         "\t     args: <name>=fetcharg[:type]\n"
4163         "\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
4164         "\t           $stack<index>, $stack, $retval, $comm\n"
4165         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string,\n"
4166         "\t           b<bit-width>@<bit-offset>/<container-size>\n"
4167 #endif
4168         "  events/\t\t- Directory containing all trace event subsystems:\n"
4169         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
4170         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
4171         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
4172         "\t\t\t  events\n"
4173         "      filter\t\t- If set, only events passing filter are traced\n"
4174         "  events/<system>/<event>/\t- Directory containing control files for\n"
4175         "\t\t\t  <event>:\n"
4176         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
4177         "      filter\t\t- If set, only events passing filter are traced\n"
4178         "      trigger\t\t- If set, a command to perform when event is hit\n"
4179         "\t    Format: <trigger>[:count][if <filter>]\n"
4180         "\t   trigger: traceon, traceoff\n"
4181         "\t            enable_event:<system>:<event>\n"
4182         "\t            disable_event:<system>:<event>\n"
4183 #ifdef CONFIG_HIST_TRIGGERS
4184         "\t            enable_hist:<system>:<event>\n"
4185         "\t            disable_hist:<system>:<event>\n"
4186 #endif
4187 #ifdef CONFIG_STACKTRACE
4188         "\t\t    stacktrace\n"
4189 #endif
4190 #ifdef CONFIG_TRACER_SNAPSHOT
4191         "\t\t    snapshot\n"
4192 #endif
4193 #ifdef CONFIG_HIST_TRIGGERS
4194         "\t\t    hist (see below)\n"
4195 #endif
4196         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
4197         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
4198         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
4199         "\t                  events/block/block_unplug/trigger\n"
4200         "\t   The first disables tracing every time block_unplug is hit.\n"
4201         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
4202         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
4203         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
4204         "\t   Like function triggers, the counter is only decremented if it\n"
4205         "\t    enabled or disabled tracing.\n"
4206         "\t   To remove a trigger without a count:\n"
4207         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
4208         "\t   To remove a trigger with a count:\n"
4209         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
4210         "\t   Filters can be ignored when removing a trigger.\n"
4211 #ifdef CONFIG_HIST_TRIGGERS
4212         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
4213         "\t    Format: hist:keys=<field1[,field2,...]>\n"
4214         "\t            [:values=<field1[,field2,...]>]\n"
4215         "\t            [:sort=<field1[,field2,...]>]\n"
4216         "\t            [:size=#entries]\n"
4217         "\t            [:pause][:continue][:clear]\n"
4218         "\t            [:name=histname1]\n"
4219         "\t            [if <filter>]\n\n"
4220         "\t    When a matching event is hit, an entry is added to a hash\n"
4221         "\t    table using the key(s) and value(s) named, and the value of a\n"
4222         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
4223         "\t    correspond to fields in the event's format description.  Keys\n"
4224         "\t    can be any field, or the special string 'stacktrace'.\n"
4225         "\t    Compound keys consisting of up to two fields can be specified\n"
4226         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
4227         "\t    fields.  Sort keys consisting of up to two fields can be\n"
4228         "\t    specified using the 'sort' keyword.  The sort direction can\n"
4229         "\t    be modified by appending '.descending' or '.ascending' to a\n"
4230         "\t    sort field.  The 'size' parameter can be used to specify more\n"
4231         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
4232         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
4233         "\t    its histogram data will be shared with other triggers of the\n"
4234         "\t    same name, and trigger hits will update this common data.\n\n"
4235         "\t    Reading the 'hist' file for the event will dump the hash\n"
4236         "\t    table in its entirety to stdout.  If there are multiple hist\n"
4237         "\t    triggers attached to an event, there will be a table for each\n"
4238         "\t    trigger in the output.  The table displayed for a named\n"
4239         "\t    trigger will be the same as any other instance having the\n"
4240         "\t    same name.  The default format used to display a given field\n"
4241         "\t    can be modified by appending any of the following modifiers\n"
4242         "\t    to the field name, as applicable:\n\n"
4243         "\t            .hex        display a number as a hex value\n"
4244         "\t            .sym        display an address as a symbol\n"
4245         "\t            .sym-offset display an address as a symbol and offset\n"
4246         "\t            .execname   display a common_pid as a program name\n"
4247         "\t            .syscall    display a syscall id as a syscall name\n\n"
4248         "\t            .log2       display log2 value rather than raw number\n\n"
4249         "\t    The 'pause' parameter can be used to pause an existing hist\n"
4250         "\t    trigger or to start a hist trigger but not log any events\n"
4251         "\t    until told to do so.  'continue' can be used to start or\n"
4252         "\t    restart a paused hist trigger.\n\n"
4253         "\t    The 'clear' parameter will clear the contents of a running\n"
4254         "\t    hist trigger and leave its current paused/active state\n"
4255         "\t    unchanged.\n\n"
4256         "\t    The enable_hist and disable_hist triggers can be used to\n"
4257         "\t    have one event conditionally start and stop another event's\n"
4258         "\t    already-attached hist trigger.  The syntax is analagous to\n"
4259         "\t    the enable_event and disable_event triggers.\n"
4260 #endif
4261 ;
4262
4263 static ssize_t
4264 tracing_readme_read(struct file *filp, char __user *ubuf,
4265                        size_t cnt, loff_t *ppos)
4266 {
4267         return simple_read_from_buffer(ubuf, cnt, ppos,
4268                                         readme_msg, strlen(readme_msg));
4269 }
4270
4271 static const struct file_operations tracing_readme_fops = {
4272         .open           = tracing_open_generic,
4273         .read           = tracing_readme_read,
4274         .llseek         = generic_file_llseek,
4275 };
4276
4277 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
4278 {
4279         unsigned int *ptr = v;
4280
4281         if (*pos || m->count)
4282                 ptr++;
4283
4284         (*pos)++;
4285
4286         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
4287              ptr++) {
4288                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
4289                         continue;
4290
4291                 return ptr;
4292         }
4293
4294         return NULL;
4295 }
4296
4297 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
4298 {
4299         void *v;
4300         loff_t l = 0;
4301
4302         preempt_disable();
4303         arch_spin_lock(&trace_cmdline_lock);
4304
4305         v = &savedcmd->map_cmdline_to_pid[0];
4306         while (l <= *pos) {
4307                 v = saved_cmdlines_next(m, v, &l);
4308                 if (!v)
4309                         return NULL;
4310         }
4311
4312         return v;
4313 }
4314
4315 static void saved_cmdlines_stop(struct seq_file *m, void *v)
4316 {
4317         arch_spin_unlock(&trace_cmdline_lock);
4318         preempt_enable();
4319 }
4320
4321 static int saved_cmdlines_show(struct seq_file *m, void *v)
4322 {
4323         char buf[TASK_COMM_LEN];
4324         unsigned int *pid = v;
4325
4326         __trace_find_cmdline(*pid, buf);
4327         seq_printf(m, "%d %s\n", *pid, buf);
4328         return 0;
4329 }
4330
4331 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
4332         .start          = saved_cmdlines_start,
4333         .next           = saved_cmdlines_next,
4334         .stop           = saved_cmdlines_stop,
4335         .show           = saved_cmdlines_show,
4336 };
4337
4338 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
4339 {
4340         if (tracing_disabled)
4341                 return -ENODEV;
4342
4343         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
4344 }
4345
4346 static const struct file_operations tracing_saved_cmdlines_fops = {
4347         .open           = tracing_saved_cmdlines_open,
4348         .read           = seq_read,
4349         .llseek         = seq_lseek,
4350         .release        = seq_release,
4351 };
4352
4353 static ssize_t
4354 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
4355                                  size_t cnt, loff_t *ppos)
4356 {
4357         char buf[64];
4358         int r;
4359
4360         arch_spin_lock(&trace_cmdline_lock);
4361         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
4362         arch_spin_unlock(&trace_cmdline_lock);
4363
4364         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4365 }
4366
4367 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
4368 {
4369         kfree(s->saved_cmdlines);
4370         kfree(s->map_cmdline_to_pid);
4371         kfree(s);
4372 }
4373
4374 static int tracing_resize_saved_cmdlines(unsigned int val)
4375 {
4376         struct saved_cmdlines_buffer *s, *savedcmd_temp;
4377
4378         s = kmalloc(sizeof(*s), GFP_KERNEL);
4379         if (!s)
4380                 return -ENOMEM;
4381
4382         if (allocate_cmdlines_buffer(val, s) < 0) {
4383                 kfree(s);
4384                 return -ENOMEM;
4385         }
4386
4387         arch_spin_lock(&trace_cmdline_lock);
4388         savedcmd_temp = savedcmd;
4389         savedcmd = s;
4390         arch_spin_unlock(&trace_cmdline_lock);
4391         free_saved_cmdlines_buffer(savedcmd_temp);
4392
4393         return 0;
4394 }
4395
4396 static ssize_t
4397 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
4398                                   size_t cnt, loff_t *ppos)
4399 {
4400         unsigned long val;
4401         int ret;
4402
4403         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4404         if (ret)
4405                 return ret;
4406
4407         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
4408         if (!val || val > PID_MAX_DEFAULT)
4409                 return -EINVAL;
4410
4411         ret = tracing_resize_saved_cmdlines((unsigned int)val);
4412         if (ret < 0)
4413                 return ret;
4414
4415         *ppos += cnt;
4416
4417         return cnt;
4418 }
4419
4420 static const struct file_operations tracing_saved_cmdlines_size_fops = {
4421         .open           = tracing_open_generic,
4422         .read           = tracing_saved_cmdlines_size_read,
4423         .write          = tracing_saved_cmdlines_size_write,
4424 };
4425
4426 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
4427 static union trace_enum_map_item *
4428 update_enum_map(union trace_enum_map_item *ptr)
4429 {
4430         if (!ptr->map.enum_string) {
4431                 if (ptr->tail.next) {
4432                         ptr = ptr->tail.next;
4433                         /* Set ptr to the next real item (skip head) */
4434                         ptr++;
4435                 } else
4436                         return NULL;
4437         }
4438         return ptr;
4439 }
4440
4441 static void *enum_map_next(struct seq_file *m, void *v, loff_t *pos)
4442 {
4443         union trace_enum_map_item *ptr = v;
4444
4445         /*
4446          * Paranoid! If ptr points to end, we don't want to increment past it.
4447          * This really should never happen.
4448          */
4449         ptr = update_enum_map(ptr);
4450         if (WARN_ON_ONCE(!ptr))
4451                 return NULL;
4452
4453         ptr++;
4454
4455         (*pos)++;
4456
4457         ptr = update_enum_map(ptr);
4458
4459         return ptr;
4460 }
4461
4462 static void *enum_map_start(struct seq_file *m, loff_t *pos)
4463 {
4464         union trace_enum_map_item *v;
4465         loff_t l = 0;
4466
4467         mutex_lock(&trace_enum_mutex);
4468
4469         v = trace_enum_maps;
4470         if (v)
4471                 v++;
4472
4473         while (v && l < *pos) {
4474                 v = enum_map_next(m, v, &l);
4475         }
4476
4477         return v;
4478 }
4479
4480 static void enum_map_stop(struct seq_file *m, void *v)
4481 {
4482         mutex_unlock(&trace_enum_mutex);
4483 }
4484
4485 static int enum_map_show(struct seq_file *m, void *v)
4486 {
4487         union trace_enum_map_item *ptr = v;
4488
4489         seq_printf(m, "%s %ld (%s)\n",
4490                    ptr->map.enum_string, ptr->map.enum_value,
4491                    ptr->map.system);
4492
4493         return 0;
4494 }
4495
4496 static const struct seq_operations tracing_enum_map_seq_ops = {
4497         .start          = enum_map_start,
4498         .next           = enum_map_next,
4499         .stop           = enum_map_stop,
4500         .show           = enum_map_show,
4501 };
4502
4503 static int tracing_enum_map_open(struct inode *inode, struct file *filp)
4504 {
4505         if (tracing_disabled)
4506                 return -ENODEV;
4507
4508         return seq_open(filp, &tracing_enum_map_seq_ops);
4509 }
4510
4511 static const struct file_operations tracing_enum_map_fops = {
4512         .open           = tracing_enum_map_open,
4513         .read           = seq_read,
4514         .llseek         = seq_lseek,
4515         .release        = seq_release,
4516 };
4517
4518 static inline union trace_enum_map_item *
4519 trace_enum_jmp_to_tail(union trace_enum_map_item *ptr)
4520 {
4521         /* Return tail of array given the head */
4522         return ptr + ptr->head.length + 1;
4523 }
4524
4525 static void
4526 trace_insert_enum_map_file(struct module *mod, struct trace_enum_map **start,
4527                            int len)
4528 {
4529         struct trace_enum_map **stop;
4530         struct trace_enum_map **map;
4531         union trace_enum_map_item *map_array;
4532         union trace_enum_map_item *ptr;
4533
4534         stop = start + len;
4535
4536         /*
4537          * The trace_enum_maps contains the map plus a head and tail item,
4538          * where the head holds the module and length of array, and the
4539          * tail holds a pointer to the next list.
4540          */
4541         map_array = kmalloc(sizeof(*map_array) * (len + 2), GFP_KERNEL);
4542         if (!map_array) {
4543                 pr_warn("Unable to allocate trace enum mapping\n");
4544                 return;
4545         }
4546
4547         mutex_lock(&trace_enum_mutex);
4548
4549         if (!trace_enum_maps)
4550                 trace_enum_maps = map_array;
4551         else {
4552                 ptr = trace_enum_maps;
4553                 for (;;) {
4554                         ptr = trace_enum_jmp_to_tail(ptr);
4555                         if (!ptr->tail.next)
4556                                 break;
4557                         ptr = ptr->tail.next;
4558
4559                 }
4560                 ptr->tail.next = map_array;
4561         }
4562         map_array->head.mod = mod;
4563         map_array->head.length = len;
4564         map_array++;
4565
4566         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
4567                 map_array->map = **map;
4568                 map_array++;
4569         }
4570         memset(map_array, 0, sizeof(*map_array));
4571
4572         mutex_unlock(&trace_enum_mutex);
4573 }
4574
4575 static void trace_create_enum_file(struct dentry *d_tracer)
4576 {
4577         trace_create_file("enum_map", 0444, d_tracer,
4578                           NULL, &tracing_enum_map_fops);
4579 }
4580
4581 #else /* CONFIG_TRACE_ENUM_MAP_FILE */
4582 static inline void trace_create_enum_file(struct dentry *d_tracer) { }
4583 static inline void trace_insert_enum_map_file(struct module *mod,
4584                               struct trace_enum_map **start, int len) { }
4585 #endif /* !CONFIG_TRACE_ENUM_MAP_FILE */
4586
4587 static void trace_insert_enum_map(struct module *mod,
4588                                   struct trace_enum_map **start, int len)
4589 {
4590         struct trace_enum_map **map;
4591
4592         if (len <= 0)
4593                 return;
4594
4595         map = start;
4596
4597         trace_event_enum_update(map, len);
4598
4599         trace_insert_enum_map_file(mod, start, len);
4600 }
4601
4602 static ssize_t
4603 tracing_set_trace_read(struct file *filp, char __user *ubuf,
4604                        size_t cnt, loff_t *ppos)
4605 {
4606         struct trace_array *tr = filp->private_data;
4607         char buf[MAX_TRACER_SIZE+2];
4608         int r;
4609
4610         mutex_lock(&trace_types_lock);
4611         r = sprintf(buf, "%s\n", tr->current_trace->name);
4612         mutex_unlock(&trace_types_lock);
4613
4614         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4615 }
4616
4617 int tracer_init(struct tracer *t, struct trace_array *tr)
4618 {
4619         tracing_reset_online_cpus(&tr->trace_buffer);
4620         return t->init(tr);
4621 }
4622
4623 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
4624 {
4625         int cpu;
4626
4627         for_each_tracing_cpu(cpu)
4628                 per_cpu_ptr(buf->data, cpu)->entries = val;
4629 }
4630
4631 #ifdef CONFIG_TRACER_MAX_TRACE
4632 /* resize @tr's buffer to the size of @size_tr's entries */
4633 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
4634                                         struct trace_buffer *size_buf, int cpu_id)
4635 {
4636         int cpu, ret = 0;
4637
4638         if (cpu_id == RING_BUFFER_ALL_CPUS) {
4639                 for_each_tracing_cpu(cpu) {
4640                         ret = ring_buffer_resize(trace_buf->buffer,
4641                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
4642                         if (ret < 0)
4643                                 break;
4644                         per_cpu_ptr(trace_buf->data, cpu)->entries =
4645                                 per_cpu_ptr(size_buf->data, cpu)->entries;
4646                 }
4647         } else {
4648                 ret = ring_buffer_resize(trace_buf->buffer,
4649                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
4650                 if (ret == 0)
4651                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
4652                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
4653         }
4654
4655         return ret;
4656 }
4657 #endif /* CONFIG_TRACER_MAX_TRACE */
4658
4659 static int __tracing_resize_ring_buffer(struct trace_array *tr,
4660                                         unsigned long size, int cpu)
4661 {
4662         int ret;
4663
4664         /*
4665          * If kernel or user changes the size of the ring buffer
4666          * we use the size that was given, and we can forget about
4667          * expanding it later.
4668          */
4669         ring_buffer_expanded = true;
4670
4671         /* May be called before buffers are initialized */
4672         if (!tr->trace_buffer.buffer)
4673                 return 0;
4674
4675         ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
4676         if (ret < 0)
4677                 return ret;
4678
4679 #ifdef CONFIG_TRACER_MAX_TRACE
4680         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
4681             !tr->current_trace->use_max_tr)
4682                 goto out;
4683
4684         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
4685         if (ret < 0) {
4686                 int r = resize_buffer_duplicate_size(&tr->trace_buffer,
4687                                                      &tr->trace_buffer, cpu);
4688                 if (r < 0) {
4689                         /*
4690                          * AARGH! We are left with different
4691                          * size max buffer!!!!
4692                          * The max buffer is our "snapshot" buffer.
4693                          * When a tracer needs a snapshot (one of the
4694                          * latency tracers), it swaps the max buffer
4695                          * with the saved snap shot. We succeeded to
4696                          * update the size of the main buffer, but failed to
4697                          * update the size of the max buffer. But when we tried
4698                          * to reset the main buffer to the original size, we
4699                          * failed there too. This is very unlikely to
4700                          * happen, but if it does, warn and kill all
4701                          * tracing.
4702                          */
4703                         WARN_ON(1);
4704                         tracing_disabled = 1;
4705                 }
4706                 return ret;
4707         }
4708
4709         if (cpu == RING_BUFFER_ALL_CPUS)
4710                 set_buffer_entries(&tr->max_buffer, size);
4711         else
4712                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
4713
4714  out:
4715 #endif /* CONFIG_TRACER_MAX_TRACE */
4716
4717         if (cpu == RING_BUFFER_ALL_CPUS)
4718                 set_buffer_entries(&tr->trace_buffer, size);
4719         else
4720                 per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
4721
4722         return ret;
4723 }
4724
4725 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
4726                                           unsigned long size, int cpu_id)
4727 {
4728         int ret = size;
4729
4730         mutex_lock(&trace_types_lock);
4731
4732         if (cpu_id != RING_BUFFER_ALL_CPUS) {
4733                 /* make sure, this cpu is enabled in the mask */
4734                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
4735                         ret = -EINVAL;
4736                         goto out;
4737                 }
4738         }
4739
4740         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
4741         if (ret < 0)
4742                 ret = -ENOMEM;
4743
4744 out:
4745         mutex_unlock(&trace_types_lock);
4746
4747         return ret;
4748 }
4749
4750
4751 /**
4752  * tracing_update_buffers - used by tracing facility to expand ring buffers
4753  *
4754  * To save on memory when the tracing is never used on a system with it
4755  * configured in. The ring buffers are set to a minimum size. But once
4756  * a user starts to use the tracing facility, then they need to grow
4757  * to their default size.
4758  *
4759  * This function is to be called when a tracer is about to be used.
4760  */
4761 int tracing_update_buffers(void)
4762 {
4763         int ret = 0;
4764
4765         mutex_lock(&trace_types_lock);
4766         if (!ring_buffer_expanded)
4767                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
4768                                                 RING_BUFFER_ALL_CPUS);
4769         mutex_unlock(&trace_types_lock);
4770
4771         return ret;
4772 }
4773
4774 struct trace_option_dentry;
4775
4776 static void
4777 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
4778
4779 /*
4780  * Used to clear out the tracer before deletion of an instance.
4781  * Must have trace_types_lock held.
4782  */
4783 static void tracing_set_nop(struct trace_array *tr)
4784 {
4785         if (tr->current_trace == &nop_trace)
4786                 return;
4787         
4788         tr->current_trace->enabled--;
4789
4790         if (tr->current_trace->reset)
4791                 tr->current_trace->reset(tr);
4792
4793         tr->current_trace = &nop_trace;
4794 }
4795
4796 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
4797 {
4798         /* Only enable if the directory has been created already. */
4799         if (!tr->dir)
4800                 return;
4801
4802         create_trace_option_files(tr, t);
4803 }
4804
4805 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
4806 {
4807         struct tracer *t;
4808 #ifdef CONFIG_TRACER_MAX_TRACE
4809         bool had_max_tr;
4810 #endif
4811         int ret = 0;
4812
4813         mutex_lock(&trace_types_lock);
4814
4815         if (!ring_buffer_expanded) {
4816                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
4817                                                 RING_BUFFER_ALL_CPUS);
4818                 if (ret < 0)
4819                         goto out;
4820                 ret = 0;
4821         }
4822
4823         for (t = trace_types; t; t = t->next) {
4824                 if (strcmp(t->name, buf) == 0)
4825                         break;
4826         }
4827         if (!t) {
4828                 ret = -EINVAL;
4829                 goto out;
4830         }
4831         if (t == tr->current_trace)
4832                 goto out;
4833
4834         /* Some tracers are only allowed for the top level buffer */
4835         if (!trace_ok_for_array(t, tr)) {
4836                 ret = -EINVAL;
4837                 goto out;
4838         }
4839
4840         /* If trace pipe files are being read, we can't change the tracer */
4841         if (tr->current_trace->ref) {
4842                 ret = -EBUSY;
4843                 goto out;
4844         }
4845
4846         trace_branch_disable();
4847
4848         tr->current_trace->enabled--;
4849
4850         if (tr->current_trace->reset)
4851                 tr->current_trace->reset(tr);
4852
4853         /* Current trace needs to be nop_trace before synchronize_sched */
4854         tr->current_trace = &nop_trace;
4855
4856 #ifdef CONFIG_TRACER_MAX_TRACE
4857         had_max_tr = tr->allocated_snapshot;
4858
4859         if (had_max_tr && !t->use_max_tr) {
4860                 /*
4861                  * We need to make sure that the update_max_tr sees that
4862                  * current_trace changed to nop_trace to keep it from
4863                  * swapping the buffers after we resize it.
4864                  * The update_max_tr is called from interrupts disabled
4865                  * so a synchronized_sched() is sufficient.
4866                  */
4867                 synchronize_sched();
4868                 free_snapshot(tr);
4869         }
4870 #endif
4871
4872 #ifdef CONFIG_TRACER_MAX_TRACE
4873         if (t->use_max_tr && !had_max_tr) {
4874                 ret = alloc_snapshot(tr);
4875                 if (ret < 0)
4876                         goto out;
4877         }
4878 #endif
4879
4880         if (t->init) {
4881                 ret = tracer_init(t, tr);
4882                 if (ret)
4883                         goto out;
4884         }
4885
4886         tr->current_trace = t;
4887         tr->current_trace->enabled++;
4888         trace_branch_enable(tr);
4889  out:
4890         mutex_unlock(&trace_types_lock);
4891
4892         return ret;
4893 }
4894
4895 static ssize_t
4896 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
4897                         size_t cnt, loff_t *ppos)
4898 {
4899         struct trace_array *tr = filp->private_data;
4900         char buf[MAX_TRACER_SIZE+1];
4901         int i;
4902         size_t ret;
4903         int err;
4904
4905         ret = cnt;
4906
4907         if (cnt > MAX_TRACER_SIZE)
4908                 cnt = MAX_TRACER_SIZE;
4909
4910         if (copy_from_user(buf, ubuf, cnt))
4911                 return -EFAULT;
4912
4913         buf[cnt] = 0;
4914
4915         /* strip ending whitespace. */
4916         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
4917                 buf[i] = 0;
4918
4919         err = tracing_set_tracer(tr, buf);
4920         if (err)
4921                 return err;
4922
4923         *ppos += ret;
4924
4925         return ret;
4926 }
4927
4928 static ssize_t
4929 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
4930                    size_t cnt, loff_t *ppos)
4931 {
4932         char buf[64];
4933         int r;
4934
4935         r = snprintf(buf, sizeof(buf), "%ld\n",
4936                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
4937         if (r > sizeof(buf))
4938                 r = sizeof(buf);
4939         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4940 }
4941
4942 static ssize_t
4943 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
4944                     size_t cnt, loff_t *ppos)
4945 {
4946         unsigned long val;
4947         int ret;
4948
4949         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4950         if (ret)
4951                 return ret;
4952
4953         *ptr = val * 1000;
4954
4955         return cnt;
4956 }
4957
4958 static ssize_t
4959 tracing_thresh_read(struct file *filp, char __user *ubuf,
4960                     size_t cnt, loff_t *ppos)
4961 {
4962         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
4963 }
4964
4965 static ssize_t
4966 tracing_thresh_write(struct file *filp, const char __user *ubuf,
4967                      size_t cnt, loff_t *ppos)
4968 {
4969         struct trace_array *tr = filp->private_data;
4970         int ret;
4971
4972         mutex_lock(&trace_types_lock);
4973         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
4974         if (ret < 0)
4975                 goto out;
4976
4977         if (tr->current_trace->update_thresh) {
4978                 ret = tr->current_trace->update_thresh(tr);
4979                 if (ret < 0)
4980                         goto out;
4981         }
4982
4983         ret = cnt;
4984 out:
4985         mutex_unlock(&trace_types_lock);
4986
4987         return ret;
4988 }
4989
4990 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
4991
4992 static ssize_t
4993 tracing_max_lat_read(struct file *filp, char __user *ubuf,
4994                      size_t cnt, loff_t *ppos)
4995 {
4996         return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
4997 }
4998
4999 static ssize_t
5000 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
5001                       size_t cnt, loff_t *ppos)
5002 {
5003         return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
5004 }
5005
5006 #endif
5007
5008 static int tracing_open_pipe(struct inode *inode, struct file *filp)
5009 {
5010         struct trace_array *tr = inode->i_private;
5011         struct trace_iterator *iter;
5012         int ret = 0;
5013
5014         if (tracing_disabled)
5015                 return -ENODEV;
5016
5017         if (trace_array_get(tr) < 0)
5018                 return -ENODEV;
5019
5020         mutex_lock(&trace_types_lock);
5021
5022         /* create a buffer to store the information to pass to userspace */
5023         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5024         if (!iter) {
5025                 ret = -ENOMEM;
5026                 __trace_array_put(tr);
5027                 goto out;
5028         }
5029
5030         trace_seq_init(&iter->seq);
5031         iter->trace = tr->current_trace;
5032
5033         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
5034                 ret = -ENOMEM;
5035                 goto fail;
5036         }
5037
5038         /* trace pipe does not show start of buffer */
5039         cpumask_setall(iter->started);
5040
5041         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5042                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
5043
5044         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
5045         if (trace_clocks[tr->clock_id].in_ns)
5046                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
5047
5048         iter->tr = tr;
5049         iter->trace_buffer = &tr->trace_buffer;
5050         iter->cpu_file = tracing_get_cpu(inode);
5051         mutex_init(&iter->mutex);
5052         filp->private_data = iter;
5053
5054         if (iter->trace->pipe_open)
5055                 iter->trace->pipe_open(iter);
5056
5057         nonseekable_open(inode, filp);
5058
5059         tr->current_trace->ref++;
5060 out:
5061         mutex_unlock(&trace_types_lock);
5062         return ret;
5063
5064 fail:
5065         kfree(iter);
5066         __trace_array_put(tr);
5067         mutex_unlock(&trace_types_lock);
5068         return ret;
5069 }
5070
5071 static int tracing_release_pipe(struct inode *inode, struct file *file)
5072 {
5073         struct trace_iterator *iter = file->private_data;
5074         struct trace_array *tr = inode->i_private;
5075
5076         mutex_lock(&trace_types_lock);
5077
5078         tr->current_trace->ref--;
5079
5080         if (iter->trace->pipe_close)
5081                 iter->trace->pipe_close(iter);
5082
5083         mutex_unlock(&trace_types_lock);
5084
5085         free_cpumask_var(iter->started);
5086         mutex_destroy(&iter->mutex);
5087         kfree(iter);
5088
5089         trace_array_put(tr);
5090
5091         return 0;
5092 }
5093
5094 static unsigned int
5095 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
5096 {
5097         struct trace_array *tr = iter->tr;
5098
5099         /* Iterators are static, they should be filled or empty */
5100         if (trace_buffer_iter(iter, iter->cpu_file))
5101                 return POLLIN | POLLRDNORM;
5102
5103         if (tr->trace_flags & TRACE_ITER_BLOCK)
5104                 /*
5105                  * Always select as readable when in blocking mode
5106                  */
5107                 return POLLIN | POLLRDNORM;
5108         else
5109                 return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
5110                                              filp, poll_table);
5111 }
5112
5113 static unsigned int
5114 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
5115 {
5116         struct trace_iterator *iter = filp->private_data;
5117
5118         return trace_poll(iter, filp, poll_table);
5119 }
5120
5121 /* Must be called with iter->mutex held. */
5122 static int tracing_wait_pipe(struct file *filp)
5123 {
5124         struct trace_iterator *iter = filp->private_data;
5125         int ret;
5126
5127         while (trace_empty(iter)) {
5128
5129                 if ((filp->f_flags & O_NONBLOCK)) {
5130                         return -EAGAIN;
5131                 }
5132
5133                 /*
5134                  * We block until we read something and tracing is disabled.
5135                  * We still block if tracing is disabled, but we have never
5136                  * read anything. This allows a user to cat this file, and
5137                  * then enable tracing. But after we have read something,
5138                  * we give an EOF when tracing is again disabled.
5139                  *
5140                  * iter->pos will be 0 if we haven't read anything.
5141                  */
5142                 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
5143                         break;
5144
5145                 mutex_unlock(&iter->mutex);
5146
5147                 ret = wait_on_pipe(iter, false);
5148
5149                 mutex_lock(&iter->mutex);
5150
5151                 if (ret)
5152                         return ret;
5153         }
5154
5155         return 1;
5156 }
5157
5158 /*
5159  * Consumer reader.
5160  */
5161 static ssize_t
5162 tracing_read_pipe(struct file *filp, char __user *ubuf,
5163                   size_t cnt, loff_t *ppos)
5164 {
5165         struct trace_iterator *iter = filp->private_data;
5166         ssize_t sret;
5167
5168         /*
5169          * Avoid more than one consumer on a single file descriptor
5170          * This is just a matter of traces coherency, the ring buffer itself
5171          * is protected.
5172          */
5173         mutex_lock(&iter->mutex);
5174
5175         /* return any leftover data */
5176         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5177         if (sret != -EBUSY)
5178                 goto out;
5179
5180         trace_seq_init(&iter->seq);
5181
5182         if (iter->trace->read) {
5183                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
5184                 if (sret)
5185                         goto out;
5186         }
5187
5188 waitagain:
5189         sret = tracing_wait_pipe(filp);
5190         if (sret <= 0)
5191                 goto out;
5192
5193         /* stop when tracing is finished */
5194         if (trace_empty(iter)) {
5195                 sret = 0;
5196                 goto out;
5197         }
5198
5199         if (cnt >= PAGE_SIZE)
5200                 cnt = PAGE_SIZE - 1;
5201
5202         /* reset all but tr, trace, and overruns */
5203         memset(&iter->seq, 0,
5204                sizeof(struct trace_iterator) -
5205                offsetof(struct trace_iterator, seq));
5206         cpumask_clear(iter->started);
5207         iter->pos = -1;
5208
5209         trace_event_read_lock();
5210         trace_access_lock(iter->cpu_file);
5211         while (trace_find_next_entry_inc(iter) != NULL) {
5212                 enum print_line_t ret;
5213                 int save_len = iter->seq.seq.len;
5214
5215                 ret = print_trace_line(iter);
5216                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
5217                         /* don't print partial lines */
5218                         iter->seq.seq.len = save_len;
5219                         break;
5220                 }
5221                 if (ret != TRACE_TYPE_NO_CONSUME)
5222                         trace_consume(iter);
5223
5224                 if (trace_seq_used(&iter->seq) >= cnt)
5225                         break;
5226
5227                 /*
5228                  * Setting the full flag means we reached the trace_seq buffer
5229                  * size and we should leave by partial output condition above.
5230                  * One of the trace_seq_* functions is not used properly.
5231                  */
5232                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
5233                           iter->ent->type);
5234         }
5235         trace_access_unlock(iter->cpu_file);
5236         trace_event_read_unlock();
5237
5238         /* Now copy what we have to the user */
5239         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5240         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
5241                 trace_seq_init(&iter->seq);
5242
5243         /*
5244          * If there was nothing to send to user, in spite of consuming trace
5245          * entries, go back to wait for more entries.
5246          */
5247         if (sret == -EBUSY)
5248                 goto waitagain;
5249
5250 out:
5251         mutex_unlock(&iter->mutex);
5252
5253         return sret;
5254 }
5255
5256 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
5257                                      unsigned int idx)
5258 {
5259         __free_page(spd->pages[idx]);
5260 }
5261
5262 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
5263         .can_merge              = 0,
5264         .confirm                = generic_pipe_buf_confirm,
5265         .release                = generic_pipe_buf_release,
5266         .steal                  = generic_pipe_buf_steal,
5267         .get                    = generic_pipe_buf_get,
5268 };
5269
5270 static size_t
5271 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
5272 {
5273         size_t count;
5274         int save_len;
5275         int ret;
5276
5277         /* Seq buffer is page-sized, exactly what we need. */
5278         for (;;) {
5279                 save_len = iter->seq.seq.len;
5280                 ret = print_trace_line(iter);
5281
5282                 if (trace_seq_has_overflowed(&iter->seq)) {
5283                         iter->seq.seq.len = save_len;
5284                         break;
5285                 }
5286
5287                 /*
5288                  * This should not be hit, because it should only
5289                  * be set if the iter->seq overflowed. But check it
5290                  * anyway to be safe.
5291                  */
5292                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
5293                         iter->seq.seq.len = save_len;
5294                         break;
5295                 }
5296
5297                 count = trace_seq_used(&iter->seq) - save_len;
5298                 if (rem < count) {
5299                         rem = 0;
5300                         iter->seq.seq.len = save_len;
5301                         break;
5302                 }
5303
5304                 if (ret != TRACE_TYPE_NO_CONSUME)
5305                         trace_consume(iter);
5306                 rem -= count;
5307                 if (!trace_find_next_entry_inc(iter))   {
5308                         rem = 0;
5309                         iter->ent = NULL;
5310                         break;
5311                 }
5312         }
5313
5314         return rem;
5315 }
5316
5317 static ssize_t tracing_splice_read_pipe(struct file *filp,
5318                                         loff_t *ppos,
5319                                         struct pipe_inode_info *pipe,
5320                                         size_t len,
5321                                         unsigned int flags)
5322 {
5323         struct page *pages_def[PIPE_DEF_BUFFERS];
5324         struct partial_page partial_def[PIPE_DEF_BUFFERS];
5325         struct trace_iterator *iter = filp->private_data;
5326         struct splice_pipe_desc spd = {
5327                 .pages          = pages_def,
5328                 .partial        = partial_def,
5329                 .nr_pages       = 0, /* This gets updated below. */
5330                 .nr_pages_max   = PIPE_DEF_BUFFERS,
5331                 .flags          = flags,
5332                 .ops            = &tracing_pipe_buf_ops,
5333                 .spd_release    = tracing_spd_release_pipe,
5334         };
5335         ssize_t ret;
5336         size_t rem;
5337         unsigned int i;
5338
5339         if (splice_grow_spd(pipe, &spd))
5340                 return -ENOMEM;
5341
5342         mutex_lock(&iter->mutex);
5343
5344         if (iter->trace->splice_read) {
5345                 ret = iter->trace->splice_read(iter, filp,
5346                                                ppos, pipe, len, flags);
5347                 if (ret)
5348                         goto out_err;
5349         }
5350
5351         ret = tracing_wait_pipe(filp);
5352         if (ret <= 0)
5353                 goto out_err;
5354
5355         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
5356                 ret = -EFAULT;
5357                 goto out_err;
5358         }
5359
5360         trace_event_read_lock();
5361         trace_access_lock(iter->cpu_file);
5362
5363         /* Fill as many pages as possible. */
5364         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
5365                 spd.pages[i] = alloc_page(GFP_KERNEL);
5366                 if (!spd.pages[i])
5367                         break;
5368
5369                 rem = tracing_fill_pipe_page(rem, iter);
5370
5371                 /* Copy the data into the page, so we can start over. */
5372                 ret = trace_seq_to_buffer(&iter->seq,
5373                                           page_address(spd.pages[i]),
5374                                           trace_seq_used(&iter->seq));
5375                 if (ret < 0) {
5376                         __free_page(spd.pages[i]);
5377                         break;
5378                 }
5379                 spd.partial[i].offset = 0;
5380                 spd.partial[i].len = trace_seq_used(&iter->seq);
5381
5382                 trace_seq_init(&iter->seq);
5383         }
5384
5385         trace_access_unlock(iter->cpu_file);
5386         trace_event_read_unlock();
5387         mutex_unlock(&iter->mutex);
5388
5389         spd.nr_pages = i;
5390
5391         if (i)
5392                 ret = splice_to_pipe(pipe, &spd);
5393         else
5394                 ret = 0;
5395 out:
5396         splice_shrink_spd(&spd);
5397         return ret;
5398
5399 out_err:
5400         mutex_unlock(&iter->mutex);
5401         goto out;
5402 }
5403
5404 static ssize_t
5405 tracing_entries_read(struct file *filp, char __user *ubuf,
5406                      size_t cnt, loff_t *ppos)
5407 {
5408         struct inode *inode = file_inode(filp);
5409         struct trace_array *tr = inode->i_private;
5410         int cpu = tracing_get_cpu(inode);
5411         char buf[64];
5412         int r = 0;
5413         ssize_t ret;
5414
5415         mutex_lock(&trace_types_lock);
5416
5417         if (cpu == RING_BUFFER_ALL_CPUS) {
5418                 int cpu, buf_size_same;
5419                 unsigned long size;
5420
5421                 size = 0;
5422                 buf_size_same = 1;
5423                 /* check if all cpu sizes are same */
5424                 for_each_tracing_cpu(cpu) {
5425                         /* fill in the size from first enabled cpu */
5426                         if (size == 0)
5427                                 size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
5428                         if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
5429                                 buf_size_same = 0;
5430                                 break;
5431                         }
5432                 }
5433
5434                 if (buf_size_same) {
5435                         if (!ring_buffer_expanded)
5436                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
5437                                             size >> 10,
5438                                             trace_buf_size >> 10);
5439                         else
5440                                 r = sprintf(buf, "%lu\n", size >> 10);
5441                 } else
5442                         r = sprintf(buf, "X\n");
5443         } else
5444                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
5445
5446         mutex_unlock(&trace_types_lock);
5447
5448         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5449         return ret;
5450 }
5451
5452 static ssize_t
5453 tracing_entries_write(struct file *filp, const char __user *ubuf,
5454                       size_t cnt, loff_t *ppos)
5455 {
5456         struct inode *inode = file_inode(filp);
5457         struct trace_array *tr = inode->i_private;
5458         unsigned long val;
5459         int ret;
5460
5461         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5462         if (ret)
5463                 return ret;
5464
5465         /* must have at least 1 entry */
5466         if (!val)
5467                 return -EINVAL;
5468
5469         /* value is in KB */
5470         val <<= 10;
5471         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
5472         if (ret < 0)
5473                 return ret;
5474
5475         *ppos += cnt;
5476
5477         return cnt;
5478 }
5479
5480 static ssize_t
5481 tracing_total_entries_read(struct file *filp, char __user *ubuf,
5482                                 size_t cnt, loff_t *ppos)
5483 {
5484         struct trace_array *tr = filp->private_data;
5485         char buf[64];
5486         int r, cpu;
5487         unsigned long size = 0, expanded_size = 0;
5488
5489         mutex_lock(&trace_types_lock);
5490         for_each_tracing_cpu(cpu) {
5491                 size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
5492                 if (!ring_buffer_expanded)
5493                         expanded_size += trace_buf_size >> 10;
5494         }
5495         if (ring_buffer_expanded)
5496                 r = sprintf(buf, "%lu\n", size);
5497         else
5498                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
5499         mutex_unlock(&trace_types_lock);
5500
5501         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5502 }
5503
5504 static ssize_t
5505 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
5506                           size_t cnt, loff_t *ppos)
5507 {
5508         /*
5509          * There is no need to read what the user has written, this function
5510          * is just to make sure that there is no error when "echo" is used
5511          */
5512
5513         *ppos += cnt;
5514
5515         return cnt;
5516 }
5517
5518 static int
5519 tracing_free_buffer_release(struct inode *inode, struct file *filp)
5520 {
5521         struct trace_array *tr = inode->i_private;
5522
5523         /* disable tracing ? */
5524         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
5525                 tracer_tracing_off(tr);
5526         /* resize the ring buffer to 0 */
5527         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
5528
5529         trace_array_put(tr);
5530
5531         return 0;
5532 }
5533
5534 static ssize_t
5535 tracing_mark_write(struct file *filp, const char __user *ubuf,
5536                                         size_t cnt, loff_t *fpos)
5537 {
5538         unsigned long addr = (unsigned long)ubuf;
5539         struct trace_array *tr = filp->private_data;
5540         struct ring_buffer_event *event;
5541         struct ring_buffer *buffer;
5542         struct print_entry *entry;
5543         unsigned long irq_flags;
5544         struct page *pages[2];
5545         void *map_page[2];
5546         int nr_pages = 1;
5547         ssize_t written;
5548         int offset;
5549         int size;
5550         int len;
5551         int ret;
5552         int i;
5553
5554         if (tracing_disabled)
5555                 return -EINVAL;
5556
5557         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
5558                 return -EINVAL;
5559
5560         if (cnt > TRACE_BUF_SIZE)
5561                 cnt = TRACE_BUF_SIZE;
5562
5563         /*
5564          * Userspace is injecting traces into the kernel trace buffer.
5565          * We want to be as non intrusive as possible.
5566          * To do so, we do not want to allocate any special buffers
5567          * or take any locks, but instead write the userspace data
5568          * straight into the ring buffer.
5569          *
5570          * First we need to pin the userspace buffer into memory,
5571          * which, most likely it is, because it just referenced it.
5572          * But there's no guarantee that it is. By using get_user_pages_fast()
5573          * and kmap_atomic/kunmap_atomic() we can get access to the
5574          * pages directly. We then write the data directly into the
5575          * ring buffer.
5576          */
5577         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
5578
5579         /* check if we cross pages */
5580         if ((addr & PAGE_MASK) != ((addr + cnt) & PAGE_MASK))
5581                 nr_pages = 2;
5582
5583         offset = addr & (PAGE_SIZE - 1);
5584         addr &= PAGE_MASK;
5585
5586         ret = get_user_pages_fast(addr, nr_pages, 0, pages);
5587         if (ret < nr_pages) {
5588                 while (--ret >= 0)
5589                         put_page(pages[ret]);
5590                 written = -EFAULT;
5591                 goto out;
5592         }
5593
5594         for (i = 0; i < nr_pages; i++)
5595                 map_page[i] = kmap_atomic(pages[i]);
5596
5597         local_save_flags(irq_flags);
5598         size = sizeof(*entry) + cnt + 2; /* possible \n added */
5599         buffer = tr->trace_buffer.buffer;
5600         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
5601                                           irq_flags, preempt_count());
5602         if (!event) {
5603                 /* Ring buffer disabled, return as if not open for write */
5604                 written = -EBADF;
5605                 goto out_unlock;
5606         }
5607
5608         entry = ring_buffer_event_data(event);
5609         entry->ip = _THIS_IP_;
5610
5611         if (nr_pages == 2) {
5612                 len = PAGE_SIZE - offset;
5613                 memcpy(&entry->buf, map_page[0] + offset, len);
5614                 memcpy(&entry->buf[len], map_page[1], cnt - len);
5615         } else
5616                 memcpy(&entry->buf, map_page[0] + offset, cnt);
5617
5618         if (entry->buf[cnt - 1] != '\n') {
5619                 entry->buf[cnt] = '\n';
5620                 entry->buf[cnt + 1] = '\0';
5621         } else
5622                 entry->buf[cnt] = '\0';
5623
5624         __buffer_unlock_commit(buffer, event);
5625
5626         written = cnt;
5627
5628         *fpos += written;
5629
5630  out_unlock:
5631         for (i = nr_pages - 1; i >= 0; i--) {
5632                 kunmap_atomic(map_page[i]);
5633                 put_page(pages[i]);
5634         }
5635  out:
5636         return written;
5637 }
5638
5639 static int tracing_clock_show(struct seq_file *m, void *v)
5640 {
5641         struct trace_array *tr = m->private;
5642         int i;
5643
5644         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
5645                 seq_printf(m,
5646                         "%s%s%s%s", i ? " " : "",
5647                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
5648                         i == tr->clock_id ? "]" : "");
5649         seq_putc(m, '\n');
5650
5651         return 0;
5652 }
5653
5654 static int tracing_set_clock(struct trace_array *tr, const char *clockstr)
5655 {
5656         int i;
5657
5658         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
5659                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
5660                         break;
5661         }
5662         if (i == ARRAY_SIZE(trace_clocks))
5663                 return -EINVAL;
5664
5665         mutex_lock(&trace_types_lock);
5666
5667         tr->clock_id = i;
5668
5669         ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
5670
5671         /*
5672          * New clock may not be consistent with the previous clock.
5673          * Reset the buffer so that it doesn't have incomparable timestamps.
5674          */
5675         tracing_reset_online_cpus(&tr->trace_buffer);
5676
5677 #ifdef CONFIG_TRACER_MAX_TRACE
5678         if (tr->max_buffer.buffer)
5679                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
5680         tracing_reset_online_cpus(&tr->max_buffer);
5681 #endif
5682
5683         mutex_unlock(&trace_types_lock);
5684
5685         return 0;
5686 }
5687
5688 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
5689                                    size_t cnt, loff_t *fpos)
5690 {
5691         struct seq_file *m = filp->private_data;
5692         struct trace_array *tr = m->private;
5693         char buf[64];
5694         const char *clockstr;
5695         int ret;
5696
5697         if (cnt >= sizeof(buf))
5698                 return -EINVAL;
5699
5700         if (copy_from_user(buf, ubuf, cnt))
5701                 return -EFAULT;
5702
5703         buf[cnt] = 0;
5704
5705         clockstr = strstrip(buf);
5706
5707         ret = tracing_set_clock(tr, clockstr);
5708         if (ret)
5709                 return ret;
5710
5711         *fpos += cnt;
5712
5713         return cnt;
5714 }
5715
5716 static int tracing_clock_open(struct inode *inode, struct file *file)
5717 {
5718         struct trace_array *tr = inode->i_private;
5719         int ret;
5720
5721         if (tracing_disabled)
5722                 return -ENODEV;
5723
5724         if (trace_array_get(tr))
5725                 return -ENODEV;
5726
5727         ret = single_open(file, tracing_clock_show, inode->i_private);
5728         if (ret < 0)
5729                 trace_array_put(tr);
5730
5731         return ret;
5732 }
5733
5734 struct ftrace_buffer_info {
5735         struct trace_iterator   iter;
5736         void                    *spare;
5737         unsigned int            read;
5738 };
5739
5740 #ifdef CONFIG_TRACER_SNAPSHOT
5741 static int tracing_snapshot_open(struct inode *inode, struct file *file)
5742 {
5743         struct trace_array *tr = inode->i_private;
5744         struct trace_iterator *iter;
5745         struct seq_file *m;
5746         int ret = 0;
5747
5748         if (trace_array_get(tr) < 0)
5749                 return -ENODEV;
5750
5751         if (file->f_mode & FMODE_READ) {
5752                 iter = __tracing_open(inode, file, true);
5753                 if (IS_ERR(iter))
5754                         ret = PTR_ERR(iter);
5755         } else {
5756                 /* Writes still need the seq_file to hold the private data */
5757                 ret = -ENOMEM;
5758                 m = kzalloc(sizeof(*m), GFP_KERNEL);
5759                 if (!m)
5760                         goto out;
5761                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5762                 if (!iter) {
5763                         kfree(m);
5764                         goto out;
5765                 }
5766                 ret = 0;
5767
5768                 iter->tr = tr;
5769                 iter->trace_buffer = &tr->max_buffer;
5770                 iter->cpu_file = tracing_get_cpu(inode);
5771                 m->private = iter;
5772                 file->private_data = m;
5773         }
5774 out:
5775         if (ret < 0)
5776                 trace_array_put(tr);
5777
5778         return ret;
5779 }
5780
5781 static ssize_t
5782 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
5783                        loff_t *ppos)
5784 {
5785         struct seq_file *m = filp->private_data;
5786         struct trace_iterator *iter = m->private;
5787         struct trace_array *tr = iter->tr;
5788         unsigned long val;
5789         int ret;
5790
5791         ret = tracing_update_buffers();
5792         if (ret < 0)
5793                 return ret;
5794
5795         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5796         if (ret)
5797                 return ret;
5798
5799         mutex_lock(&trace_types_lock);
5800
5801         if (tr->current_trace->use_max_tr) {
5802                 ret = -EBUSY;
5803                 goto out;
5804         }
5805
5806         switch (val) {
5807         case 0:
5808                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
5809                         ret = -EINVAL;
5810                         break;
5811                 }
5812                 if (tr->allocated_snapshot)
5813                         free_snapshot(tr);
5814                 break;
5815         case 1:
5816 /* Only allow per-cpu swap if the ring buffer supports it */
5817 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
5818                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
5819                         ret = -EINVAL;
5820                         break;
5821                 }
5822 #endif
5823                 if (!tr->allocated_snapshot) {
5824                         ret = alloc_snapshot(tr);
5825                         if (ret < 0)
5826                                 break;
5827                 }
5828                 local_irq_disable();
5829                 /* Now, we're going to swap */
5830                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
5831                         update_max_tr(tr, current, smp_processor_id());
5832                 else
5833                         update_max_tr_single(tr, current, iter->cpu_file);
5834                 local_irq_enable();
5835                 break;
5836         default:
5837                 if (tr->allocated_snapshot) {
5838                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
5839                                 tracing_reset_online_cpus(&tr->max_buffer);
5840                         else
5841                                 tracing_reset(&tr->max_buffer, iter->cpu_file);
5842                 }
5843                 break;
5844         }
5845
5846         if (ret >= 0) {
5847                 *ppos += cnt;
5848                 ret = cnt;
5849         }
5850 out:
5851         mutex_unlock(&trace_types_lock);
5852         return ret;
5853 }
5854
5855 static int tracing_snapshot_release(struct inode *inode, struct file *file)
5856 {
5857         struct seq_file *m = file->private_data;
5858         int ret;
5859
5860         ret = tracing_release(inode, file);
5861
5862         if (file->f_mode & FMODE_READ)
5863                 return ret;
5864
5865         /* If write only, the seq_file is just a stub */
5866         if (m)
5867                 kfree(m->private);
5868         kfree(m);
5869
5870         return 0;
5871 }
5872
5873 static int tracing_buffers_open(struct inode *inode, struct file *filp);
5874 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
5875                                     size_t count, loff_t *ppos);
5876 static int tracing_buffers_release(struct inode *inode, struct file *file);
5877 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
5878                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
5879
5880 static int snapshot_raw_open(struct inode *inode, struct file *filp)
5881 {
5882         struct ftrace_buffer_info *info;
5883         int ret;
5884
5885         ret = tracing_buffers_open(inode, filp);
5886         if (ret < 0)
5887                 return ret;
5888
5889         info = filp->private_data;
5890
5891         if (info->iter.trace->use_max_tr) {
5892                 tracing_buffers_release(inode, filp);
5893                 return -EBUSY;
5894         }
5895
5896         info->iter.snapshot = true;
5897         info->iter.trace_buffer = &info->iter.tr->max_buffer;
5898
5899         return ret;
5900 }
5901
5902 #endif /* CONFIG_TRACER_SNAPSHOT */
5903
5904
5905 static const struct file_operations tracing_thresh_fops = {
5906         .open           = tracing_open_generic,
5907         .read           = tracing_thresh_read,
5908         .write          = tracing_thresh_write,
5909         .llseek         = generic_file_llseek,
5910 };
5911
5912 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
5913 static const struct file_operations tracing_max_lat_fops = {
5914         .open           = tracing_open_generic,
5915         .read           = tracing_max_lat_read,
5916         .write          = tracing_max_lat_write,
5917         .llseek         = generic_file_llseek,
5918 };
5919 #endif
5920
5921 static const struct file_operations set_tracer_fops = {
5922         .open           = tracing_open_generic,
5923         .read           = tracing_set_trace_read,
5924         .write          = tracing_set_trace_write,
5925         .llseek         = generic_file_llseek,
5926 };
5927
5928 static const struct file_operations tracing_pipe_fops = {
5929         .open           = tracing_open_pipe,
5930         .poll           = tracing_poll_pipe,
5931         .read           = tracing_read_pipe,
5932         .splice_read    = tracing_splice_read_pipe,
5933         .release        = tracing_release_pipe,
5934         .llseek         = no_llseek,
5935 };
5936
5937 static const struct file_operations tracing_entries_fops = {
5938         .open           = tracing_open_generic_tr,
5939         .read           = tracing_entries_read,
5940         .write          = tracing_entries_write,
5941         .llseek         = generic_file_llseek,
5942         .release        = tracing_release_generic_tr,
5943 };
5944
5945 static const struct file_operations tracing_total_entries_fops = {
5946         .open           = tracing_open_generic_tr,
5947         .read           = tracing_total_entries_read,
5948         .llseek         = generic_file_llseek,
5949         .release        = tracing_release_generic_tr,
5950 };
5951
5952 static const struct file_operations tracing_free_buffer_fops = {
5953         .open           = tracing_open_generic_tr,
5954         .write          = tracing_free_buffer_write,
5955         .release        = tracing_free_buffer_release,
5956 };
5957
5958 static const struct file_operations tracing_mark_fops = {
5959         .open           = tracing_open_generic_tr,
5960         .write          = tracing_mark_write,
5961         .llseek         = generic_file_llseek,
5962         .release        = tracing_release_generic_tr,
5963 };
5964
5965 static const struct file_operations trace_clock_fops = {
5966         .open           = tracing_clock_open,
5967         .read           = seq_read,
5968         .llseek         = seq_lseek,
5969         .release        = tracing_single_release_tr,
5970         .write          = tracing_clock_write,
5971 };
5972
5973 #ifdef CONFIG_TRACER_SNAPSHOT
5974 static const struct file_operations snapshot_fops = {
5975         .open           = tracing_snapshot_open,
5976         .read           = seq_read,
5977         .write          = tracing_snapshot_write,
5978         .llseek         = tracing_lseek,
5979         .release        = tracing_snapshot_release,
5980 };
5981
5982 static const struct file_operations snapshot_raw_fops = {
5983         .open           = snapshot_raw_open,
5984         .read           = tracing_buffers_read,
5985         .release        = tracing_buffers_release,
5986         .splice_read    = tracing_buffers_splice_read,
5987         .llseek         = no_llseek,
5988 };
5989
5990 #endif /* CONFIG_TRACER_SNAPSHOT */
5991
5992 static int tracing_buffers_open(struct inode *inode, struct file *filp)
5993 {
5994         struct trace_array *tr = inode->i_private;
5995         struct ftrace_buffer_info *info;
5996         int ret;
5997
5998         if (tracing_disabled)
5999                 return -ENODEV;
6000
6001         if (trace_array_get(tr) < 0)
6002                 return -ENODEV;
6003
6004         info = kzalloc(sizeof(*info), GFP_KERNEL);
6005         if (!info) {
6006                 trace_array_put(tr);
6007                 return -ENOMEM;
6008         }
6009
6010         mutex_lock(&trace_types_lock);
6011
6012         info->iter.tr           = tr;
6013         info->iter.cpu_file     = tracing_get_cpu(inode);
6014         info->iter.trace        = tr->current_trace;
6015         info->iter.trace_buffer = &tr->trace_buffer;
6016         info->spare             = NULL;
6017         /* Force reading ring buffer for first read */
6018         info->read              = (unsigned int)-1;
6019
6020         filp->private_data = info;
6021
6022         tr->current_trace->ref++;
6023
6024         mutex_unlock(&trace_types_lock);
6025
6026         ret = nonseekable_open(inode, filp);
6027         if (ret < 0)
6028                 trace_array_put(tr);
6029
6030         return ret;
6031 }
6032
6033 static unsigned int
6034 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
6035 {
6036         struct ftrace_buffer_info *info = filp->private_data;
6037         struct trace_iterator *iter = &info->iter;
6038
6039         return trace_poll(iter, filp, poll_table);
6040 }
6041
6042 static ssize_t
6043 tracing_buffers_read(struct file *filp, char __user *ubuf,
6044                      size_t count, loff_t *ppos)
6045 {
6046         struct ftrace_buffer_info *info = filp->private_data;
6047         struct trace_iterator *iter = &info->iter;
6048         ssize_t ret;
6049         ssize_t size;
6050
6051         if (!count)
6052                 return 0;
6053
6054 #ifdef CONFIG_TRACER_MAX_TRACE
6055         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6056                 return -EBUSY;
6057 #endif
6058
6059         if (!info->spare)
6060                 info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
6061                                                           iter->cpu_file);
6062         if (!info->spare)
6063                 return -ENOMEM;
6064
6065         /* Do we have previous read data to read? */
6066         if (info->read < PAGE_SIZE)
6067                 goto read;
6068
6069  again:
6070         trace_access_lock(iter->cpu_file);
6071         ret = ring_buffer_read_page(iter->trace_buffer->buffer,
6072                                     &info->spare,
6073                                     count,
6074                                     iter->cpu_file, 0);
6075         trace_access_unlock(iter->cpu_file);
6076
6077         if (ret < 0) {
6078                 if (trace_empty(iter)) {
6079                         if ((filp->f_flags & O_NONBLOCK))
6080                                 return -EAGAIN;
6081
6082                         ret = wait_on_pipe(iter, false);
6083                         if (ret)
6084                                 return ret;
6085
6086                         goto again;
6087                 }
6088                 return 0;
6089         }
6090
6091         info->read = 0;
6092  read:
6093         size = PAGE_SIZE - info->read;
6094         if (size > count)
6095                 size = count;
6096
6097         ret = copy_to_user(ubuf, info->spare + info->read, size);
6098         if (ret == size)
6099                 return -EFAULT;
6100
6101         size -= ret;
6102
6103         *ppos += size;
6104         info->read += size;
6105
6106         return size;
6107 }
6108
6109 static int tracing_buffers_release(struct inode *inode, struct file *file)
6110 {
6111         struct ftrace_buffer_info *info = file->private_data;
6112         struct trace_iterator *iter = &info->iter;
6113
6114         mutex_lock(&trace_types_lock);
6115
6116         iter->tr->current_trace->ref--;
6117
6118         __trace_array_put(iter->tr);
6119
6120         if (info->spare)
6121                 ring_buffer_free_read_page(iter->trace_buffer->buffer, info->spare);
6122         kfree(info);
6123
6124         mutex_unlock(&trace_types_lock);
6125
6126         return 0;
6127 }
6128
6129 struct buffer_ref {
6130         struct ring_buffer      *buffer;
6131         void                    *page;
6132         int                     ref;
6133 };
6134
6135 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
6136                                     struct pipe_buffer *buf)
6137 {
6138         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6139
6140         if (--ref->ref)
6141                 return;
6142
6143         ring_buffer_free_read_page(ref->buffer, ref->page);
6144         kfree(ref);
6145         buf->private = 0;
6146 }
6147
6148 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
6149                                 struct pipe_buffer *buf)
6150 {
6151         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6152
6153         if (ref->ref > INT_MAX/2)
6154                 return false;
6155
6156         ref->ref++;
6157         return true;
6158 }
6159
6160 /* Pipe buffer operations for a buffer. */
6161 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
6162         .can_merge              = 0,
6163         .confirm                = generic_pipe_buf_confirm,
6164         .release                = buffer_pipe_buf_release,
6165         .steal                  = generic_pipe_buf_steal,
6166         .get                    = buffer_pipe_buf_get,
6167 };
6168
6169 /*
6170  * Callback from splice_to_pipe(), if we need to release some pages
6171  * at the end of the spd in case we error'ed out in filling the pipe.
6172  */
6173 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
6174 {
6175         struct buffer_ref *ref =
6176                 (struct buffer_ref *)spd->partial[i].private;
6177
6178         if (--ref->ref)
6179                 return;
6180
6181         ring_buffer_free_read_page(ref->buffer, ref->page);
6182         kfree(ref);
6183         spd->partial[i].private = 0;
6184 }
6185
6186 static ssize_t
6187 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6188                             struct pipe_inode_info *pipe, size_t len,
6189                             unsigned int flags)
6190 {
6191         struct ftrace_buffer_info *info = file->private_data;
6192         struct trace_iterator *iter = &info->iter;
6193         struct partial_page partial_def[PIPE_DEF_BUFFERS];
6194         struct page *pages_def[PIPE_DEF_BUFFERS];
6195         struct splice_pipe_desc spd = {
6196                 .pages          = pages_def,
6197                 .partial        = partial_def,
6198                 .nr_pages_max   = PIPE_DEF_BUFFERS,
6199                 .flags          = flags,
6200                 .ops            = &buffer_pipe_buf_ops,
6201                 .spd_release    = buffer_spd_release,
6202         };
6203         struct buffer_ref *ref;
6204         int entries, i;
6205         ssize_t ret = 0;
6206
6207 #ifdef CONFIG_TRACER_MAX_TRACE
6208         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6209                 return -EBUSY;
6210 #endif
6211
6212         if (*ppos & (PAGE_SIZE - 1))
6213                 return -EINVAL;
6214
6215         if (len & (PAGE_SIZE - 1)) {
6216                 if (len < PAGE_SIZE)
6217                         return -EINVAL;
6218                 len &= PAGE_MASK;
6219         }
6220
6221         if (splice_grow_spd(pipe, &spd))
6222                 return -ENOMEM;
6223
6224  again:
6225         trace_access_lock(iter->cpu_file);
6226         entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6227
6228         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
6229                 struct page *page;
6230                 int r;
6231
6232                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
6233                 if (!ref) {
6234                         ret = -ENOMEM;
6235                         break;
6236                 }
6237
6238                 ref->ref = 1;
6239                 ref->buffer = iter->trace_buffer->buffer;
6240                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
6241                 if (!ref->page) {
6242                         ret = -ENOMEM;
6243                         kfree(ref);
6244                         break;
6245                 }
6246
6247                 r = ring_buffer_read_page(ref->buffer, &ref->page,
6248                                           len, iter->cpu_file, 1);
6249                 if (r < 0) {
6250                         ring_buffer_free_read_page(ref->buffer, ref->page);
6251                         kfree(ref);
6252                         break;
6253                 }
6254
6255                 page = virt_to_page(ref->page);
6256
6257                 spd.pages[i] = page;
6258                 spd.partial[i].len = PAGE_SIZE;
6259                 spd.partial[i].offset = 0;
6260                 spd.partial[i].private = (unsigned long)ref;
6261                 spd.nr_pages++;
6262                 *ppos += PAGE_SIZE;
6263
6264                 entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6265         }
6266
6267         trace_access_unlock(iter->cpu_file);
6268         spd.nr_pages = i;
6269
6270         /* did we read anything? */
6271         if (!spd.nr_pages) {
6272                 if (ret)
6273                         goto out;
6274
6275                 ret = -EAGAIN;
6276                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
6277                         goto out;
6278
6279                 ret = wait_on_pipe(iter, true);
6280                 if (ret)
6281                         goto out;
6282
6283                 goto again;
6284         }
6285
6286         ret = splice_to_pipe(pipe, &spd);
6287 out:
6288         splice_shrink_spd(&spd);
6289
6290         return ret;
6291 }
6292
6293 static const struct file_operations tracing_buffers_fops = {
6294         .open           = tracing_buffers_open,
6295         .read           = tracing_buffers_read,
6296         .poll           = tracing_buffers_poll,
6297         .release        = tracing_buffers_release,
6298         .splice_read    = tracing_buffers_splice_read,
6299         .llseek         = no_llseek,
6300 };
6301
6302 static ssize_t
6303 tracing_stats_read(struct file *filp, char __user *ubuf,
6304                    size_t count, loff_t *ppos)
6305 {
6306         struct inode *inode = file_inode(filp);
6307         struct trace_array *tr = inode->i_private;
6308         struct trace_buffer *trace_buf = &tr->trace_buffer;
6309         int cpu = tracing_get_cpu(inode);
6310         struct trace_seq *s;
6311         unsigned long cnt;
6312         unsigned long long t;
6313         unsigned long usec_rem;
6314
6315         s = kmalloc(sizeof(*s), GFP_KERNEL);
6316         if (!s)
6317                 return -ENOMEM;
6318
6319         trace_seq_init(s);
6320
6321         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
6322         trace_seq_printf(s, "entries: %ld\n", cnt);
6323
6324         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
6325         trace_seq_printf(s, "overrun: %ld\n", cnt);
6326
6327         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
6328         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
6329
6330         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
6331         trace_seq_printf(s, "bytes: %ld\n", cnt);
6332
6333         if (trace_clocks[tr->clock_id].in_ns) {
6334                 /* local or global for trace_clock */
6335                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
6336                 usec_rem = do_div(t, USEC_PER_SEC);
6337                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
6338                                                                 t, usec_rem);
6339
6340                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
6341                 usec_rem = do_div(t, USEC_PER_SEC);
6342                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
6343         } else {
6344                 /* counter or tsc mode for trace_clock */
6345                 trace_seq_printf(s, "oldest event ts: %llu\n",
6346                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
6347
6348                 trace_seq_printf(s, "now ts: %llu\n",
6349                                 ring_buffer_time_stamp(trace_buf->buffer, cpu));
6350         }
6351
6352         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
6353         trace_seq_printf(s, "dropped events: %ld\n", cnt);
6354
6355         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
6356         trace_seq_printf(s, "read events: %ld\n", cnt);
6357
6358         count = simple_read_from_buffer(ubuf, count, ppos,
6359                                         s->buffer, trace_seq_used(s));
6360
6361         kfree(s);
6362
6363         return count;
6364 }
6365
6366 static const struct file_operations tracing_stats_fops = {
6367         .open           = tracing_open_generic_tr,
6368         .read           = tracing_stats_read,
6369         .llseek         = generic_file_llseek,
6370         .release        = tracing_release_generic_tr,
6371 };
6372
6373 #ifdef CONFIG_DYNAMIC_FTRACE
6374
6375 int __weak ftrace_arch_read_dyn_info(char *buf, int size)
6376 {
6377         return 0;
6378 }
6379
6380 static ssize_t
6381 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
6382                   size_t cnt, loff_t *ppos)
6383 {
6384         static char ftrace_dyn_info_buffer[1024];
6385         static DEFINE_MUTEX(dyn_info_mutex);
6386         unsigned long *p = filp->private_data;
6387         char *buf = ftrace_dyn_info_buffer;
6388         int size = ARRAY_SIZE(ftrace_dyn_info_buffer);
6389         int r;
6390
6391         mutex_lock(&dyn_info_mutex);
6392         r = sprintf(buf, "%ld ", *p);
6393
6394         r += ftrace_arch_read_dyn_info(buf+r, (size-1)-r);
6395         buf[r++] = '\n';
6396
6397         r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6398
6399         mutex_unlock(&dyn_info_mutex);
6400
6401         return r;
6402 }
6403
6404 static const struct file_operations tracing_dyn_info_fops = {
6405         .open           = tracing_open_generic,
6406         .read           = tracing_read_dyn_info,
6407         .llseek         = generic_file_llseek,
6408 };
6409 #endif /* CONFIG_DYNAMIC_FTRACE */
6410
6411 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
6412 static void
6413 ftrace_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
6414 {
6415         tracing_snapshot();
6416 }
6417
6418 static void
6419 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
6420 {
6421         unsigned long *count = (long *)data;
6422
6423         if (!*count)
6424                 return;
6425
6426         if (*count != -1)
6427                 (*count)--;
6428
6429         tracing_snapshot();
6430 }
6431
6432 static int
6433 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
6434                       struct ftrace_probe_ops *ops, void *data)
6435 {
6436         long count = (long)data;
6437
6438         seq_printf(m, "%ps:", (void *)ip);
6439
6440         seq_puts(m, "snapshot");
6441
6442         if (count == -1)
6443                 seq_puts(m, ":unlimited\n");
6444         else
6445                 seq_printf(m, ":count=%ld\n", count);
6446
6447         return 0;
6448 }
6449
6450 static struct ftrace_probe_ops snapshot_probe_ops = {
6451         .func                   = ftrace_snapshot,
6452         .print                  = ftrace_snapshot_print,
6453 };
6454
6455 static struct ftrace_probe_ops snapshot_count_probe_ops = {
6456         .func                   = ftrace_count_snapshot,
6457         .print                  = ftrace_snapshot_print,
6458 };
6459
6460 static int
6461 ftrace_trace_snapshot_callback(struct ftrace_hash *hash,
6462                                char *glob, char *cmd, char *param, int enable)
6463 {
6464         struct ftrace_probe_ops *ops;
6465         void *count = (void *)-1;
6466         char *number;
6467         int ret;
6468
6469         /* hash funcs only work with set_ftrace_filter */
6470         if (!enable)
6471                 return -EINVAL;
6472
6473         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
6474
6475         if (glob[0] == '!') {
6476                 unregister_ftrace_function_probe_func(glob+1, ops);
6477                 return 0;
6478         }
6479
6480         if (!param)
6481                 goto out_reg;
6482
6483         number = strsep(&param, ":");
6484
6485         if (!strlen(number))
6486                 goto out_reg;
6487
6488         /*
6489          * We use the callback data field (which is a pointer)
6490          * as our counter.
6491          */
6492         ret = kstrtoul(number, 0, (unsigned long *)&count);
6493         if (ret)
6494                 return ret;
6495
6496  out_reg:
6497         ret = alloc_snapshot(&global_trace);
6498         if (ret < 0)
6499                 goto out;
6500
6501         ret = register_ftrace_function_probe(glob, ops, count);
6502
6503  out:
6504         return ret < 0 ? ret : 0;
6505 }
6506
6507 static struct ftrace_func_command ftrace_snapshot_cmd = {
6508         .name                   = "snapshot",
6509         .func                   = ftrace_trace_snapshot_callback,
6510 };
6511
6512 static __init int register_snapshot_cmd(void)
6513 {
6514         return register_ftrace_command(&ftrace_snapshot_cmd);
6515 }
6516 #else
6517 static inline __init int register_snapshot_cmd(void) { return 0; }
6518 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
6519
6520 static struct dentry *tracing_get_dentry(struct trace_array *tr)
6521 {
6522         if (WARN_ON(!tr->dir))
6523                 return ERR_PTR(-ENODEV);
6524
6525         /* Top directory uses NULL as the parent */
6526         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
6527                 return NULL;
6528
6529         /* All sub buffers have a descriptor */
6530         return tr->dir;
6531 }
6532
6533 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
6534 {
6535         struct dentry *d_tracer;
6536
6537         if (tr->percpu_dir)
6538                 return tr->percpu_dir;
6539
6540         d_tracer = tracing_get_dentry(tr);
6541         if (IS_ERR(d_tracer))
6542                 return NULL;
6543
6544         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
6545
6546         WARN_ONCE(!tr->percpu_dir,
6547                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
6548
6549         return tr->percpu_dir;
6550 }
6551
6552 static struct dentry *
6553 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
6554                       void *data, long cpu, const struct file_operations *fops)
6555 {
6556         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
6557
6558         if (ret) /* See tracing_get_cpu() */
6559                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
6560         return ret;
6561 }
6562
6563 static void
6564 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
6565 {
6566         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
6567         struct dentry *d_cpu;
6568         char cpu_dir[30]; /* 30 characters should be more than enough */
6569
6570         if (!d_percpu)
6571                 return;
6572
6573         snprintf(cpu_dir, 30, "cpu%ld", cpu);
6574         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
6575         if (!d_cpu) {
6576                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
6577                 return;
6578         }
6579
6580         /* per cpu trace_pipe */
6581         trace_create_cpu_file("trace_pipe", 0444, d_cpu,
6582                                 tr, cpu, &tracing_pipe_fops);
6583
6584         /* per cpu trace */
6585         trace_create_cpu_file("trace", 0644, d_cpu,
6586                                 tr, cpu, &tracing_fops);
6587
6588         trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
6589                                 tr, cpu, &tracing_buffers_fops);
6590
6591         trace_create_cpu_file("stats", 0444, d_cpu,
6592                                 tr, cpu, &tracing_stats_fops);
6593
6594         trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
6595                                 tr, cpu, &tracing_entries_fops);
6596
6597 #ifdef CONFIG_TRACER_SNAPSHOT
6598         trace_create_cpu_file("snapshot", 0644, d_cpu,
6599                                 tr, cpu, &snapshot_fops);
6600
6601         trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
6602                                 tr, cpu, &snapshot_raw_fops);
6603 #endif
6604 }
6605
6606 #ifdef CONFIG_FTRACE_SELFTEST
6607 /* Let selftest have access to static functions in this file */
6608 #include "trace_selftest.c"
6609 #endif
6610
6611 static ssize_t
6612 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
6613                         loff_t *ppos)
6614 {
6615         struct trace_option_dentry *topt = filp->private_data;
6616         char *buf;
6617
6618         if (topt->flags->val & topt->opt->bit)
6619                 buf = "1\n";
6620         else
6621                 buf = "0\n";
6622
6623         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
6624 }
6625
6626 static ssize_t
6627 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
6628                          loff_t *ppos)
6629 {
6630         struct trace_option_dentry *topt = filp->private_data;
6631         unsigned long val;
6632         int ret;
6633
6634         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6635         if (ret)
6636                 return ret;
6637
6638         if (val != 0 && val != 1)
6639                 return -EINVAL;
6640
6641         if (!!(topt->flags->val & topt->opt->bit) != val) {
6642                 mutex_lock(&trace_types_lock);
6643                 ret = __set_tracer_option(topt->tr, topt->flags,
6644                                           topt->opt, !val);
6645                 mutex_unlock(&trace_types_lock);
6646                 if (ret)
6647                         return ret;
6648         }
6649
6650         *ppos += cnt;
6651
6652         return cnt;
6653 }
6654
6655
6656 static const struct file_operations trace_options_fops = {
6657         .open = tracing_open_generic,
6658         .read = trace_options_read,
6659         .write = trace_options_write,
6660         .llseek = generic_file_llseek,
6661 };
6662
6663 /*
6664  * In order to pass in both the trace_array descriptor as well as the index
6665  * to the flag that the trace option file represents, the trace_array
6666  * has a character array of trace_flags_index[], which holds the index
6667  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
6668  * The address of this character array is passed to the flag option file
6669  * read/write callbacks.
6670  *
6671  * In order to extract both the index and the trace_array descriptor,
6672  * get_tr_index() uses the following algorithm.
6673  *
6674  *   idx = *ptr;
6675  *
6676  * As the pointer itself contains the address of the index (remember
6677  * index[1] == 1).
6678  *
6679  * Then to get the trace_array descriptor, by subtracting that index
6680  * from the ptr, we get to the start of the index itself.
6681  *
6682  *   ptr - idx == &index[0]
6683  *
6684  * Then a simple container_of() from that pointer gets us to the
6685  * trace_array descriptor.
6686  */
6687 static void get_tr_index(void *data, struct trace_array **ptr,
6688                          unsigned int *pindex)
6689 {
6690         *pindex = *(unsigned char *)data;
6691
6692         *ptr = container_of(data - *pindex, struct trace_array,
6693                             trace_flags_index);
6694 }
6695
6696 static ssize_t
6697 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
6698                         loff_t *ppos)
6699 {
6700         void *tr_index = filp->private_data;
6701         struct trace_array *tr;
6702         unsigned int index;
6703         char *buf;
6704
6705         get_tr_index(tr_index, &tr, &index);
6706
6707         if (tr->trace_flags & (1 << index))
6708                 buf = "1\n";
6709         else
6710                 buf = "0\n";
6711
6712         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
6713 }
6714
6715 static ssize_t
6716 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
6717                          loff_t *ppos)
6718 {
6719         void *tr_index = filp->private_data;
6720         struct trace_array *tr;
6721         unsigned int index;
6722         unsigned long val;
6723         int ret;
6724
6725         get_tr_index(tr_index, &tr, &index);
6726
6727         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6728         if (ret)
6729                 return ret;
6730
6731         if (val != 0 && val != 1)
6732                 return -EINVAL;
6733
6734         mutex_lock(&trace_types_lock);
6735         ret = set_tracer_flag(tr, 1 << index, val);
6736         mutex_unlock(&trace_types_lock);
6737
6738         if (ret < 0)
6739                 return ret;
6740
6741         *ppos += cnt;
6742
6743         return cnt;
6744 }
6745
6746 static const struct file_operations trace_options_core_fops = {
6747         .open = tracing_open_generic,
6748         .read = trace_options_core_read,
6749         .write = trace_options_core_write,
6750         .llseek = generic_file_llseek,
6751 };
6752
6753 struct dentry *trace_create_file(const char *name,
6754                                  umode_t mode,
6755                                  struct dentry *parent,
6756                                  void *data,
6757                                  const struct file_operations *fops)
6758 {
6759         struct dentry *ret;
6760
6761         ret = tracefs_create_file(name, mode, parent, data, fops);
6762         if (!ret)
6763                 pr_warn("Could not create tracefs '%s' entry\n", name);
6764
6765         return ret;
6766 }
6767
6768
6769 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
6770 {
6771         struct dentry *d_tracer;
6772
6773         if (tr->options)
6774                 return tr->options;
6775
6776         d_tracer = tracing_get_dentry(tr);
6777         if (IS_ERR(d_tracer))
6778                 return NULL;
6779
6780         tr->options = tracefs_create_dir("options", d_tracer);
6781         if (!tr->options) {
6782                 pr_warn("Could not create tracefs directory 'options'\n");
6783                 return NULL;
6784         }
6785
6786         return tr->options;
6787 }
6788
6789 static void
6790 create_trace_option_file(struct trace_array *tr,
6791                          struct trace_option_dentry *topt,
6792                          struct tracer_flags *flags,
6793                          struct tracer_opt *opt)
6794 {
6795         struct dentry *t_options;
6796
6797         t_options = trace_options_init_dentry(tr);
6798         if (!t_options)
6799                 return;
6800
6801         topt->flags = flags;
6802         topt->opt = opt;
6803         topt->tr = tr;
6804
6805         topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
6806                                     &trace_options_fops);
6807
6808 }
6809
6810 static void
6811 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
6812 {
6813         struct trace_option_dentry *topts;
6814         struct trace_options *tr_topts;
6815         struct tracer_flags *flags;
6816         struct tracer_opt *opts;
6817         int cnt;
6818         int i;
6819
6820         if (!tracer)
6821                 return;
6822
6823         flags = tracer->flags;
6824
6825         if (!flags || !flags->opts)
6826                 return;
6827
6828         /*
6829          * If this is an instance, only create flags for tracers
6830          * the instance may have.
6831          */
6832         if (!trace_ok_for_array(tracer, tr))
6833                 return;
6834
6835         for (i = 0; i < tr->nr_topts; i++) {
6836                 /* Make sure there's no duplicate flags. */
6837                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
6838                         return;
6839         }
6840
6841         opts = flags->opts;
6842
6843         for (cnt = 0; opts[cnt].name; cnt++)
6844                 ;
6845
6846         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
6847         if (!topts)
6848                 return;
6849
6850         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
6851                             GFP_KERNEL);
6852         if (!tr_topts) {
6853                 kfree(topts);
6854                 return;
6855         }
6856
6857         tr->topts = tr_topts;
6858         tr->topts[tr->nr_topts].tracer = tracer;
6859         tr->topts[tr->nr_topts].topts = topts;
6860         tr->nr_topts++;
6861
6862         for (cnt = 0; opts[cnt].name; cnt++) {
6863                 create_trace_option_file(tr, &topts[cnt], flags,
6864                                          &opts[cnt]);
6865                 WARN_ONCE(topts[cnt].entry == NULL,
6866                           "Failed to create trace option: %s",
6867                           opts[cnt].name);
6868         }
6869 }
6870
6871 static struct dentry *
6872 create_trace_option_core_file(struct trace_array *tr,
6873                               const char *option, long index)
6874 {
6875         struct dentry *t_options;
6876
6877         t_options = trace_options_init_dentry(tr);
6878         if (!t_options)
6879                 return NULL;
6880
6881         return trace_create_file(option, 0644, t_options,
6882                                  (void *)&tr->trace_flags_index[index],
6883                                  &trace_options_core_fops);
6884 }
6885
6886 static void create_trace_options_dir(struct trace_array *tr)
6887 {
6888         struct dentry *t_options;
6889         bool top_level = tr == &global_trace;
6890         int i;
6891
6892         t_options = trace_options_init_dentry(tr);
6893         if (!t_options)
6894                 return;
6895
6896         for (i = 0; trace_options[i]; i++) {
6897                 if (top_level ||
6898                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
6899                         create_trace_option_core_file(tr, trace_options[i], i);
6900         }
6901 }
6902
6903 static ssize_t
6904 rb_simple_read(struct file *filp, char __user *ubuf,
6905                size_t cnt, loff_t *ppos)
6906 {
6907         struct trace_array *tr = filp->private_data;
6908         char buf[64];
6909         int r;
6910
6911         r = tracer_tracing_is_on(tr);
6912         r = sprintf(buf, "%d\n", r);
6913
6914         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6915 }
6916
6917 static ssize_t
6918 rb_simple_write(struct file *filp, const char __user *ubuf,
6919                 size_t cnt, loff_t *ppos)
6920 {
6921         struct trace_array *tr = filp->private_data;
6922         struct ring_buffer *buffer = tr->trace_buffer.buffer;
6923         unsigned long val;
6924         int ret;
6925
6926         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6927         if (ret)
6928                 return ret;
6929
6930         if (buffer) {
6931                 mutex_lock(&trace_types_lock);
6932                 if (!!val == tracer_tracing_is_on(tr)) {
6933                         val = 0; /* do nothing */
6934                 } else if (val) {
6935                         tracer_tracing_on(tr);
6936                         if (tr->current_trace->start)
6937                                 tr->current_trace->start(tr);
6938                 } else {
6939                         tracer_tracing_off(tr);
6940                         if (tr->current_trace->stop)
6941                                 tr->current_trace->stop(tr);
6942                 }
6943                 mutex_unlock(&trace_types_lock);
6944         }
6945
6946         (*ppos)++;
6947
6948         return cnt;
6949 }
6950
6951 static const struct file_operations rb_simple_fops = {
6952         .open           = tracing_open_generic_tr,
6953         .read           = rb_simple_read,
6954         .write          = rb_simple_write,
6955         .release        = tracing_release_generic_tr,
6956         .llseek         = default_llseek,
6957 };
6958
6959 struct dentry *trace_instance_dir;
6960
6961 static void
6962 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
6963
6964 static int
6965 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
6966 {
6967         enum ring_buffer_flags rb_flags;
6968
6969         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
6970
6971         buf->tr = tr;
6972
6973         buf->buffer = ring_buffer_alloc(size, rb_flags);
6974         if (!buf->buffer)
6975                 return -ENOMEM;
6976
6977         buf->data = alloc_percpu(struct trace_array_cpu);
6978         if (!buf->data) {
6979                 ring_buffer_free(buf->buffer);
6980                 buf->buffer = NULL;
6981                 return -ENOMEM;
6982         }
6983
6984         /* Allocate the first page for all buffers */
6985         set_buffer_entries(&tr->trace_buffer,
6986                            ring_buffer_size(tr->trace_buffer.buffer, 0));
6987
6988         return 0;
6989 }
6990
6991 static int allocate_trace_buffers(struct trace_array *tr, int size)
6992 {
6993         int ret;
6994
6995         ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
6996         if (ret)
6997                 return ret;
6998
6999 #ifdef CONFIG_TRACER_MAX_TRACE
7000         ret = allocate_trace_buffer(tr, &tr->max_buffer,
7001                                     allocate_snapshot ? size : 1);
7002         if (WARN_ON(ret)) {
7003                 ring_buffer_free(tr->trace_buffer.buffer);
7004                 tr->trace_buffer.buffer = NULL;
7005                 free_percpu(tr->trace_buffer.data);
7006                 tr->trace_buffer.data = NULL;
7007                 return -ENOMEM;
7008         }
7009         tr->allocated_snapshot = allocate_snapshot;
7010
7011         /*
7012          * Only the top level trace array gets its snapshot allocated
7013          * from the kernel command line.
7014          */
7015         allocate_snapshot = false;
7016 #endif
7017         return 0;
7018 }
7019
7020 static void free_trace_buffer(struct trace_buffer *buf)
7021 {
7022         if (buf->buffer) {
7023                 ring_buffer_free(buf->buffer);
7024                 buf->buffer = NULL;
7025                 free_percpu(buf->data);
7026                 buf->data = NULL;
7027         }
7028 }
7029
7030 static void free_trace_buffers(struct trace_array *tr)
7031 {
7032         if (!tr)
7033                 return;
7034
7035         free_trace_buffer(&tr->trace_buffer);
7036
7037 #ifdef CONFIG_TRACER_MAX_TRACE
7038         free_trace_buffer(&tr->max_buffer);
7039 #endif
7040 }
7041
7042 static void init_trace_flags_index(struct trace_array *tr)
7043 {
7044         int i;
7045
7046         /* Used by the trace options files */
7047         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
7048                 tr->trace_flags_index[i] = i;
7049 }
7050
7051 static void __update_tracer_options(struct trace_array *tr)
7052 {
7053         struct tracer *t;
7054
7055         for (t = trace_types; t; t = t->next)
7056                 add_tracer_options(tr, t);
7057 }
7058
7059 static void update_tracer_options(struct trace_array *tr)
7060 {
7061         mutex_lock(&trace_types_lock);
7062         __update_tracer_options(tr);
7063         mutex_unlock(&trace_types_lock);
7064 }
7065
7066 static int instance_mkdir(const char *name)
7067 {
7068         struct trace_array *tr;
7069         int ret;
7070
7071         mutex_lock(&trace_types_lock);
7072
7073         ret = -EEXIST;
7074         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7075                 if (tr->name && strcmp(tr->name, name) == 0)
7076                         goto out_unlock;
7077         }
7078
7079         ret = -ENOMEM;
7080         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
7081         if (!tr)
7082                 goto out_unlock;
7083
7084         tr->name = kstrdup(name, GFP_KERNEL);
7085         if (!tr->name)
7086                 goto out_free_tr;
7087
7088         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
7089                 goto out_free_tr;
7090
7091         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
7092
7093         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
7094
7095         raw_spin_lock_init(&tr->start_lock);
7096
7097         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
7098
7099         tr->current_trace = &nop_trace;
7100
7101         INIT_LIST_HEAD(&tr->systems);
7102         INIT_LIST_HEAD(&tr->events);
7103
7104         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
7105                 goto out_free_tr;
7106
7107         tr->dir = tracefs_create_dir(name, trace_instance_dir);
7108         if (!tr->dir)
7109                 goto out_free_tr;
7110
7111         ret = event_trace_add_tracer(tr->dir, tr);
7112         if (ret) {
7113                 tracefs_remove_recursive(tr->dir);
7114                 goto out_free_tr;
7115         }
7116
7117         init_tracer_tracefs(tr, tr->dir);
7118         init_trace_flags_index(tr);
7119         __update_tracer_options(tr);
7120
7121         list_add(&tr->list, &ftrace_trace_arrays);
7122
7123         mutex_unlock(&trace_types_lock);
7124
7125         return 0;
7126
7127  out_free_tr:
7128         free_trace_buffers(tr);
7129         free_cpumask_var(tr->tracing_cpumask);
7130         kfree(tr->name);
7131         kfree(tr);
7132
7133  out_unlock:
7134         mutex_unlock(&trace_types_lock);
7135
7136         return ret;
7137
7138 }
7139
7140 static int instance_rmdir(const char *name)
7141 {
7142         struct trace_array *tr;
7143         int found = 0;
7144         int ret;
7145         int i;
7146
7147         mutex_lock(&trace_types_lock);
7148
7149         ret = -ENODEV;
7150         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7151                 if (tr->name && strcmp(tr->name, name) == 0) {
7152                         found = 1;
7153                         break;
7154                 }
7155         }
7156         if (!found)
7157                 goto out_unlock;
7158
7159         ret = -EBUSY;
7160         if (tr->ref || (tr->current_trace && tr->current_trace->ref))
7161                 goto out_unlock;
7162
7163         list_del(&tr->list);
7164
7165         /* Disable all the flags that were enabled coming in */
7166         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
7167                 if ((1 << i) & ZEROED_TRACE_FLAGS)
7168                         set_tracer_flag(tr, 1 << i, 0);
7169         }
7170
7171         tracing_set_nop(tr);
7172         event_trace_del_tracer(tr);
7173         ftrace_clear_pids(tr);
7174         ftrace_destroy_function_files(tr);
7175         tracefs_remove_recursive(tr->dir);
7176         free_trace_buffers(tr);
7177
7178         for (i = 0; i < tr->nr_topts; i++) {
7179                 kfree(tr->topts[i].topts);
7180         }
7181         kfree(tr->topts);
7182
7183         free_cpumask_var(tr->tracing_cpumask);
7184         kfree(tr->name);
7185         kfree(tr);
7186
7187         ret = 0;
7188
7189  out_unlock:
7190         mutex_unlock(&trace_types_lock);
7191
7192         return ret;
7193 }
7194
7195 static __init void create_trace_instances(struct dentry *d_tracer)
7196 {
7197         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
7198                                                          instance_mkdir,
7199                                                          instance_rmdir);
7200         if (WARN_ON(!trace_instance_dir))
7201                 return;
7202 }
7203
7204 static void
7205 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
7206 {
7207         int cpu;
7208
7209         trace_create_file("available_tracers", 0444, d_tracer,
7210                         tr, &show_traces_fops);
7211
7212         trace_create_file("current_tracer", 0644, d_tracer,
7213                         tr, &set_tracer_fops);
7214
7215         trace_create_file("tracing_cpumask", 0644, d_tracer,
7216                           tr, &tracing_cpumask_fops);
7217
7218         trace_create_file("trace_options", 0644, d_tracer,
7219                           tr, &tracing_iter_fops);
7220
7221         trace_create_file("trace", 0644, d_tracer,
7222                           tr, &tracing_fops);
7223
7224         trace_create_file("trace_pipe", 0444, d_tracer,
7225                           tr, &tracing_pipe_fops);
7226
7227         trace_create_file("buffer_size_kb", 0644, d_tracer,
7228                           tr, &tracing_entries_fops);
7229
7230         trace_create_file("buffer_total_size_kb", 0444, d_tracer,
7231                           tr, &tracing_total_entries_fops);
7232
7233         trace_create_file("free_buffer", 0200, d_tracer,
7234                           tr, &tracing_free_buffer_fops);
7235
7236         trace_create_file("trace_marker", 0220, d_tracer,
7237                           tr, &tracing_mark_fops);
7238
7239         trace_create_file("trace_clock", 0644, d_tracer, tr,
7240                           &trace_clock_fops);
7241
7242         trace_create_file("tracing_on", 0644, d_tracer,
7243                           tr, &rb_simple_fops);
7244
7245         create_trace_options_dir(tr);
7246
7247 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7248         trace_create_file("tracing_max_latency", 0644, d_tracer,
7249                         &tr->max_latency, &tracing_max_lat_fops);
7250 #endif
7251
7252         if (ftrace_create_function_files(tr, d_tracer))
7253                 WARN(1, "Could not allocate function filter files");
7254
7255 #ifdef CONFIG_TRACER_SNAPSHOT
7256         trace_create_file("snapshot", 0644, d_tracer,
7257                           tr, &snapshot_fops);
7258 #endif
7259
7260         for_each_tracing_cpu(cpu)
7261                 tracing_init_tracefs_percpu(tr, cpu);
7262
7263         ftrace_init_tracefs(tr, d_tracer);
7264 }
7265
7266 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
7267 {
7268         struct vfsmount *mnt;
7269         struct file_system_type *type;
7270
7271         /*
7272          * To maintain backward compatibility for tools that mount
7273          * debugfs to get to the tracing facility, tracefs is automatically
7274          * mounted to the debugfs/tracing directory.
7275          */
7276         type = get_fs_type("tracefs");
7277         if (!type)
7278                 return NULL;
7279         mnt = vfs_submount(mntpt, type, "tracefs", NULL);
7280         put_filesystem(type);
7281         if (IS_ERR(mnt))
7282                 return NULL;
7283         mntget(mnt);
7284
7285         return mnt;
7286 }
7287
7288 /**
7289  * tracing_init_dentry - initialize top level trace array
7290  *
7291  * This is called when creating files or directories in the tracing
7292  * directory. It is called via fs_initcall() by any of the boot up code
7293  * and expects to return the dentry of the top level tracing directory.
7294  */
7295 struct dentry *tracing_init_dentry(void)
7296 {
7297         struct trace_array *tr = &global_trace;
7298
7299         /* The top level trace array uses  NULL as parent */
7300         if (tr->dir)
7301                 return NULL;
7302
7303         if (WARN_ON(!tracefs_initialized()) ||
7304                 (IS_ENABLED(CONFIG_DEBUG_FS) &&
7305                  WARN_ON(!debugfs_initialized())))
7306                 return ERR_PTR(-ENODEV);
7307
7308         /*
7309          * As there may still be users that expect the tracing
7310          * files to exist in debugfs/tracing, we must automount
7311          * the tracefs file system there, so older tools still
7312          * work with the newer kerenl.
7313          */
7314         tr->dir = debugfs_create_automount("tracing", NULL,
7315                                            trace_automount, NULL);
7316         if (!tr->dir) {
7317                 pr_warn_once("Could not create debugfs directory 'tracing'\n");
7318                 return ERR_PTR(-ENOMEM);
7319         }
7320
7321         return NULL;
7322 }
7323
7324 extern struct trace_enum_map *__start_ftrace_enum_maps[];
7325 extern struct trace_enum_map *__stop_ftrace_enum_maps[];
7326
7327 static void __init trace_enum_init(void)
7328 {
7329         int len;
7330
7331         len = __stop_ftrace_enum_maps - __start_ftrace_enum_maps;
7332         trace_insert_enum_map(NULL, __start_ftrace_enum_maps, len);
7333 }
7334
7335 #ifdef CONFIG_MODULES
7336 static void trace_module_add_enums(struct module *mod)
7337 {
7338         if (!mod->num_trace_enums)
7339                 return;
7340
7341         /*
7342          * Modules with bad taint do not have events created, do
7343          * not bother with enums either.
7344          */
7345         if (trace_module_has_bad_taint(mod))
7346                 return;
7347
7348         trace_insert_enum_map(mod, mod->trace_enums, mod->num_trace_enums);
7349 }
7350
7351 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
7352 static void trace_module_remove_enums(struct module *mod)
7353 {
7354         union trace_enum_map_item *map;
7355         union trace_enum_map_item **last = &trace_enum_maps;
7356
7357         if (!mod->num_trace_enums)
7358                 return;
7359
7360         mutex_lock(&trace_enum_mutex);
7361
7362         map = trace_enum_maps;
7363
7364         while (map) {
7365                 if (map->head.mod == mod)
7366                         break;
7367                 map = trace_enum_jmp_to_tail(map);
7368                 last = &map->tail.next;
7369                 map = map->tail.next;
7370         }
7371         if (!map)
7372                 goto out;
7373
7374         *last = trace_enum_jmp_to_tail(map)->tail.next;
7375         kfree(map);
7376  out:
7377         mutex_unlock(&trace_enum_mutex);
7378 }
7379 #else
7380 static inline void trace_module_remove_enums(struct module *mod) { }
7381 #endif /* CONFIG_TRACE_ENUM_MAP_FILE */
7382
7383 static int trace_module_notify(struct notifier_block *self,
7384                                unsigned long val, void *data)
7385 {
7386         struct module *mod = data;
7387
7388         switch (val) {
7389         case MODULE_STATE_COMING:
7390                 trace_module_add_enums(mod);
7391                 break;
7392         case MODULE_STATE_GOING:
7393                 trace_module_remove_enums(mod);
7394                 break;
7395         }
7396
7397         return 0;
7398 }
7399
7400 static struct notifier_block trace_module_nb = {
7401         .notifier_call = trace_module_notify,
7402         .priority = 0,
7403 };
7404 #endif /* CONFIG_MODULES */
7405
7406 static __init int tracer_init_tracefs(void)
7407 {
7408         struct dentry *d_tracer;
7409
7410         trace_access_lock_init();
7411
7412         d_tracer = tracing_init_dentry();
7413         if (IS_ERR(d_tracer))
7414                 return 0;
7415
7416         init_tracer_tracefs(&global_trace, d_tracer);
7417         ftrace_init_tracefs_toplevel(&global_trace, d_tracer);
7418
7419         trace_create_file("tracing_thresh", 0644, d_tracer,
7420                         &global_trace, &tracing_thresh_fops);
7421
7422         trace_create_file("README", 0444, d_tracer,
7423                         NULL, &tracing_readme_fops);
7424
7425         trace_create_file("saved_cmdlines", 0444, d_tracer,
7426                         NULL, &tracing_saved_cmdlines_fops);
7427
7428         trace_create_file("saved_cmdlines_size", 0644, d_tracer,
7429                           NULL, &tracing_saved_cmdlines_size_fops);
7430
7431         trace_enum_init();
7432
7433         trace_create_enum_file(d_tracer);
7434
7435 #ifdef CONFIG_MODULES
7436         register_module_notifier(&trace_module_nb);
7437 #endif
7438
7439 #ifdef CONFIG_DYNAMIC_FTRACE
7440         trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
7441                         &ftrace_update_tot_cnt, &tracing_dyn_info_fops);
7442 #endif
7443
7444         create_trace_instances(d_tracer);
7445
7446         update_tracer_options(&global_trace);
7447
7448         return 0;
7449 }
7450
7451 static int trace_panic_handler(struct notifier_block *this,
7452                                unsigned long event, void *unused)
7453 {
7454         if (ftrace_dump_on_oops)
7455                 ftrace_dump(ftrace_dump_on_oops);
7456         return NOTIFY_OK;
7457 }
7458
7459 static struct notifier_block trace_panic_notifier = {
7460         .notifier_call  = trace_panic_handler,
7461         .next           = NULL,
7462         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
7463 };
7464
7465 static int trace_die_handler(struct notifier_block *self,
7466                              unsigned long val,
7467                              void *data)
7468 {
7469         switch (val) {
7470         case DIE_OOPS:
7471                 if (ftrace_dump_on_oops)
7472                         ftrace_dump(ftrace_dump_on_oops);
7473                 break;
7474         default:
7475                 break;
7476         }
7477         return NOTIFY_OK;
7478 }
7479
7480 static struct notifier_block trace_die_notifier = {
7481         .notifier_call = trace_die_handler,
7482         .priority = 200
7483 };
7484
7485 /*
7486  * printk is set to max of 1024, we really don't need it that big.
7487  * Nothing should be printing 1000 characters anyway.
7488  */
7489 #define TRACE_MAX_PRINT         1000
7490
7491 /*
7492  * Define here KERN_TRACE so that we have one place to modify
7493  * it if we decide to change what log level the ftrace dump
7494  * should be at.
7495  */
7496 #define KERN_TRACE              KERN_EMERG
7497
7498 void
7499 trace_printk_seq(struct trace_seq *s)
7500 {
7501         /* Probably should print a warning here. */
7502         if (s->seq.len >= TRACE_MAX_PRINT)
7503                 s->seq.len = TRACE_MAX_PRINT;
7504
7505         /*
7506          * More paranoid code. Although the buffer size is set to
7507          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
7508          * an extra layer of protection.
7509          */
7510         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
7511                 s->seq.len = s->seq.size - 1;
7512
7513         /* should be zero ended, but we are paranoid. */
7514         s->buffer[s->seq.len] = 0;
7515
7516         printk(KERN_TRACE "%s", s->buffer);
7517
7518         trace_seq_init(s);
7519 }
7520
7521 void trace_init_global_iter(struct trace_iterator *iter)
7522 {
7523         iter->tr = &global_trace;
7524         iter->trace = iter->tr->current_trace;
7525         iter->cpu_file = RING_BUFFER_ALL_CPUS;
7526         iter->trace_buffer = &global_trace.trace_buffer;
7527
7528         if (iter->trace && iter->trace->open)
7529                 iter->trace->open(iter);
7530
7531         /* Annotate start of buffers if we had overruns */
7532         if (ring_buffer_overruns(iter->trace_buffer->buffer))
7533                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
7534
7535         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
7536         if (trace_clocks[iter->tr->clock_id].in_ns)
7537                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
7538 }
7539
7540 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
7541 {
7542         /* use static because iter can be a bit big for the stack */
7543         static struct trace_iterator iter;
7544         static atomic_t dump_running;
7545         struct trace_array *tr = &global_trace;
7546         unsigned int old_userobj;
7547         unsigned long flags;
7548         int cnt = 0, cpu;
7549
7550         /* Only allow one dump user at a time. */
7551         if (atomic_inc_return(&dump_running) != 1) {
7552                 atomic_dec(&dump_running);
7553                 return;
7554         }
7555
7556         /*
7557          * Always turn off tracing when we dump.
7558          * We don't need to show trace output of what happens
7559          * between multiple crashes.
7560          *
7561          * If the user does a sysrq-z, then they can re-enable
7562          * tracing with echo 1 > tracing_on.
7563          */
7564         tracing_off();
7565
7566         local_irq_save(flags);
7567
7568         /* Simulate the iterator */
7569         trace_init_global_iter(&iter);
7570
7571         for_each_tracing_cpu(cpu) {
7572                 atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
7573         }
7574
7575         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
7576
7577         /* don't look at user memory in panic mode */
7578         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
7579
7580         switch (oops_dump_mode) {
7581         case DUMP_ALL:
7582                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
7583                 break;
7584         case DUMP_ORIG:
7585                 iter.cpu_file = raw_smp_processor_id();
7586                 break;
7587         case DUMP_NONE:
7588                 goto out_enable;
7589         default:
7590                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
7591                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
7592         }
7593
7594         printk(KERN_TRACE "Dumping ftrace buffer:\n");
7595
7596         /* Did function tracer already get disabled? */
7597         if (ftrace_is_dead()) {
7598                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
7599                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
7600         }
7601
7602         /*
7603          * We need to stop all tracing on all CPUS to read the
7604          * the next buffer. This is a bit expensive, but is
7605          * not done often. We fill all what we can read,
7606          * and then release the locks again.
7607          */
7608
7609         while (!trace_empty(&iter)) {
7610
7611                 if (!cnt)
7612                         printk(KERN_TRACE "---------------------------------\n");
7613
7614                 cnt++;
7615
7616                 /* reset all but tr, trace, and overruns */
7617                 memset(&iter.seq, 0,
7618                        sizeof(struct trace_iterator) -
7619                        offsetof(struct trace_iterator, seq));
7620                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
7621                 iter.pos = -1;
7622
7623                 if (trace_find_next_entry_inc(&iter) != NULL) {
7624                         int ret;
7625
7626                         ret = print_trace_line(&iter);
7627                         if (ret != TRACE_TYPE_NO_CONSUME)
7628                                 trace_consume(&iter);
7629                 }
7630                 touch_nmi_watchdog();
7631
7632                 trace_printk_seq(&iter.seq);
7633         }
7634
7635         if (!cnt)
7636                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
7637         else
7638                 printk(KERN_TRACE "---------------------------------\n");
7639
7640  out_enable:
7641         tr->trace_flags |= old_userobj;
7642
7643         for_each_tracing_cpu(cpu) {
7644                 atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
7645         }
7646         atomic_dec(&dump_running);
7647         local_irq_restore(flags);
7648 }
7649 EXPORT_SYMBOL_GPL(ftrace_dump);
7650
7651 __init static int tracer_alloc_buffers(void)
7652 {
7653         int ring_buf_size;
7654         int ret = -ENOMEM;
7655
7656         /*
7657          * Make sure we don't accidently add more trace options
7658          * than we have bits for.
7659          */
7660         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
7661
7662         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
7663                 goto out;
7664
7665         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
7666                 goto out_free_buffer_mask;
7667
7668         /* Only allocate trace_printk buffers if a trace_printk exists */
7669         if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
7670                 /* Must be called before global_trace.buffer is allocated */
7671                 trace_printk_init_buffers();
7672
7673         /* To save memory, keep the ring buffer size to its minimum */
7674         if (ring_buffer_expanded)
7675                 ring_buf_size = trace_buf_size;
7676         else
7677                 ring_buf_size = 1;
7678
7679         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
7680         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
7681
7682         raw_spin_lock_init(&global_trace.start_lock);
7683
7684         /* Used for event triggers */
7685         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
7686         if (!temp_buffer)
7687                 goto out_free_cpumask;
7688
7689         if (trace_create_savedcmd() < 0)
7690                 goto out_free_temp_buffer;
7691
7692         /* TODO: make the number of buffers hot pluggable with CPUS */
7693         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
7694                 printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
7695                 WARN_ON(1);
7696                 goto out_free_savedcmd;
7697         }
7698
7699         if (global_trace.buffer_disabled)
7700                 tracing_off();
7701
7702         if (trace_boot_clock) {
7703                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
7704                 if (ret < 0)
7705                         pr_warn("Trace clock %s not defined, going back to default\n",
7706                                 trace_boot_clock);
7707         }
7708
7709         /*
7710          * register_tracer() might reference current_trace, so it
7711          * needs to be set before we register anything. This is
7712          * just a bootstrap of current_trace anyway.
7713          */
7714         global_trace.current_trace = &nop_trace;
7715
7716         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
7717
7718         ftrace_init_global_array_ops(&global_trace);
7719
7720         init_trace_flags_index(&global_trace);
7721
7722         register_tracer(&nop_trace);
7723
7724         /* All seems OK, enable tracing */
7725         tracing_disabled = 0;
7726
7727         atomic_notifier_chain_register(&panic_notifier_list,
7728                                        &trace_panic_notifier);
7729
7730         register_die_notifier(&trace_die_notifier);
7731
7732         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
7733
7734         INIT_LIST_HEAD(&global_trace.systems);
7735         INIT_LIST_HEAD(&global_trace.events);
7736         list_add(&global_trace.list, &ftrace_trace_arrays);
7737
7738         apply_trace_boot_options();
7739
7740         register_snapshot_cmd();
7741
7742         return 0;
7743
7744 out_free_savedcmd:
7745         free_saved_cmdlines_buffer(savedcmd);
7746 out_free_temp_buffer:
7747         ring_buffer_free(temp_buffer);
7748 out_free_cpumask:
7749         free_cpumask_var(global_trace.tracing_cpumask);
7750 out_free_buffer_mask:
7751         free_cpumask_var(tracing_buffer_mask);
7752 out:
7753         return ret;
7754 }
7755
7756 void __init trace_init(void)
7757 {
7758         if (tracepoint_printk) {
7759                 tracepoint_print_iter =
7760                         kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
7761                 if (WARN_ON(!tracepoint_print_iter))
7762                         tracepoint_printk = 0;
7763         }
7764         tracer_alloc_buffers();
7765         trace_event_init();
7766 }
7767
7768 __init static int clear_boot_tracer(void)
7769 {
7770         /*
7771          * The default tracer at boot buffer is an init section.
7772          * This function is called in lateinit. If we did not
7773          * find the boot tracer, then clear it out, to prevent
7774          * later registration from accessing the buffer that is
7775          * about to be freed.
7776          */
7777         if (!default_bootup_tracer)
7778                 return 0;
7779
7780         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
7781                default_bootup_tracer);
7782         default_bootup_tracer = NULL;
7783
7784         return 0;
7785 }
7786
7787 fs_initcall(tracer_init_tracefs);
7788 late_initcall_sync(clear_boot_tracer);