1 // SPDX-License-Identifier: GPL-2.0
3 * ring buffer based function tracer
5 * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6 * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
8 * Originally taken from the RT patch by:
9 * Arnaldo Carvalho de Melo <acme@redhat.com>
11 * Based on code from the latency_tracer, that is:
12 * Copyright (C) 2004-2006 Ingo Molnar
13 * Copyright (C) 2004 Nadia Yvette Chambers
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/irqflags.h>
23 #include <linux/debugfs.h>
24 #include <linux/tracefs.h>
25 #include <linux/pagemap.h>
26 #include <linux/hardirq.h>
27 #include <linux/linkage.h>
28 #include <linux/uaccess.h>
29 #include <linux/vmalloc.h>
30 #include <linux/ftrace.h>
31 #include <linux/module.h>
32 #include <linux/percpu.h>
33 #include <linux/splice.h>
34 #include <linux/kdebug.h>
35 #include <linux/string.h>
36 #include <linux/mount.h>
37 #include <linux/rwsem.h>
38 #include <linux/slab.h>
39 #include <linux/ctype.h>
40 #include <linux/init.h>
41 #include <linux/panic_notifier.h>
42 #include <linux/poll.h>
43 #include <linux/nmi.h>
45 #include <linux/trace.h>
46 #include <linux/sched/clock.h>
47 #include <linux/sched/rt.h>
48 #include <linux/fsnotify.h>
49 #include <linux/irq_work.h>
50 #include <linux/workqueue.h>
52 #include <asm/setup.h> /* COMMAND_LINE_SIZE */
55 #include "trace_output.h"
58 * On boot up, the ring buffer is set to the minimum size, so that
59 * we do not waste memory on systems that are not using tracing.
61 bool ring_buffer_expanded;
63 #ifdef CONFIG_FTRACE_STARTUP_TEST
65 * We need to change this state when a selftest is running.
66 * A selftest will lurk into the ring-buffer to count the
67 * entries inserted during the selftest although some concurrent
68 * insertions into the ring-buffer such as trace_printk could occurred
69 * at the same time, giving false positive or negative results.
71 static bool __read_mostly tracing_selftest_running;
74 * If boot-time tracing including tracers/events via kernel cmdline
75 * is running, we do not want to run SELFTEST.
77 bool __read_mostly tracing_selftest_disabled;
79 void __init disable_tracing_selftest(const char *reason)
81 if (!tracing_selftest_disabled) {
82 tracing_selftest_disabled = true;
83 pr_info("Ftrace startup test is disabled due to %s\n", reason);
87 #define tracing_selftest_running 0
88 #define tracing_selftest_disabled 0
91 /* Pipe tracepoints to printk */
92 static struct trace_iterator *tracepoint_print_iter;
93 int tracepoint_printk;
94 static bool tracepoint_printk_stop_on_boot __initdata;
95 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
97 /* For tracers that don't implement custom flags */
98 static struct tracer_opt dummy_tracer_opt[] = {
103 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
109 * To prevent the comm cache from being overwritten when no
110 * tracing is active, only save the comm when a trace event
113 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
116 * Kill all tracing for good (never come back).
117 * It is initialized to 1 but will turn to zero if the initialization
118 * of the tracer is successful. But that is the only place that sets
121 static int tracing_disabled = 1;
123 cpumask_var_t __read_mostly tracing_buffer_mask;
126 * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
128 * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
129 * is set, then ftrace_dump is called. This will output the contents
130 * of the ftrace buffers to the console. This is very useful for
131 * capturing traces that lead to crashes and outputing it to a
134 * It is default off, but you can enable it with either specifying
135 * "ftrace_dump_on_oops" in the kernel command line, or setting
136 * /proc/sys/kernel/ftrace_dump_on_oops
137 * Set 1 if you want to dump buffers of all CPUs
138 * Set 2 if you want to dump the buffer of the CPU that triggered oops
141 enum ftrace_dump_mode ftrace_dump_on_oops;
143 /* When set, tracing will stop when a WARN*() is hit */
144 int __disable_trace_on_warning;
146 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
147 /* Map of enums to their values, for "eval_map" file */
148 struct trace_eval_map_head {
150 unsigned long length;
153 union trace_eval_map_item;
155 struct trace_eval_map_tail {
157 * "end" is first and points to NULL as it must be different
158 * than "mod" or "eval_string"
160 union trace_eval_map_item *next;
161 const char *end; /* points to NULL */
164 static DEFINE_MUTEX(trace_eval_mutex);
167 * The trace_eval_maps are saved in an array with two extra elements,
168 * one at the beginning, and one at the end. The beginning item contains
169 * the count of the saved maps (head.length), and the module they
170 * belong to if not built in (head.mod). The ending item contains a
171 * pointer to the next array of saved eval_map items.
173 union trace_eval_map_item {
174 struct trace_eval_map map;
175 struct trace_eval_map_head head;
176 struct trace_eval_map_tail tail;
179 static union trace_eval_map_item *trace_eval_maps;
180 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
182 int tracing_set_tracer(struct trace_array *tr, const char *buf);
183 static void ftrace_trace_userstack(struct trace_array *tr,
184 struct trace_buffer *buffer,
185 unsigned int trace_ctx);
187 #define MAX_TRACER_SIZE 100
188 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
189 static char *default_bootup_tracer;
191 static bool allocate_snapshot;
192 static bool snapshot_at_boot;
194 static char boot_instance_info[COMMAND_LINE_SIZE] __initdata;
195 static int boot_instance_index;
197 static char boot_snapshot_info[COMMAND_LINE_SIZE] __initdata;
198 static int boot_snapshot_index;
200 static int __init set_cmdline_ftrace(char *str)
202 strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
203 default_bootup_tracer = bootup_tracer_buf;
204 /* We are using ftrace early, expand it */
205 ring_buffer_expanded = true;
208 __setup("ftrace=", set_cmdline_ftrace);
210 static int __init set_ftrace_dump_on_oops(char *str)
212 if (*str++ != '=' || !*str || !strcmp("1", str)) {
213 ftrace_dump_on_oops = DUMP_ALL;
217 if (!strcmp("orig_cpu", str) || !strcmp("2", str)) {
218 ftrace_dump_on_oops = DUMP_ORIG;
224 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
226 static int __init stop_trace_on_warning(char *str)
228 if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
229 __disable_trace_on_warning = 1;
232 __setup("traceoff_on_warning", stop_trace_on_warning);
234 static int __init boot_alloc_snapshot(char *str)
236 char *slot = boot_snapshot_info + boot_snapshot_index;
237 int left = sizeof(boot_snapshot_info) - boot_snapshot_index;
242 if (strlen(str) >= left)
245 ret = snprintf(slot, left, "%s\t", str);
246 boot_snapshot_index += ret;
248 allocate_snapshot = true;
249 /* We also need the main ring buffer expanded */
250 ring_buffer_expanded = true;
254 __setup("alloc_snapshot", boot_alloc_snapshot);
257 static int __init boot_snapshot(char *str)
259 snapshot_at_boot = true;
260 boot_alloc_snapshot(str);
263 __setup("ftrace_boot_snapshot", boot_snapshot);
266 static int __init boot_instance(char *str)
268 char *slot = boot_instance_info + boot_instance_index;
269 int left = sizeof(boot_instance_info) - boot_instance_index;
272 if (strlen(str) >= left)
275 ret = snprintf(slot, left, "%s\t", str);
276 boot_instance_index += ret;
280 __setup("trace_instance=", boot_instance);
283 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
285 static int __init set_trace_boot_options(char *str)
287 strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
290 __setup("trace_options=", set_trace_boot_options);
292 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
293 static char *trace_boot_clock __initdata;
295 static int __init set_trace_boot_clock(char *str)
297 strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
298 trace_boot_clock = trace_boot_clock_buf;
301 __setup("trace_clock=", set_trace_boot_clock);
303 static int __init set_tracepoint_printk(char *str)
305 /* Ignore the "tp_printk_stop_on_boot" param */
309 if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
310 tracepoint_printk = 1;
313 __setup("tp_printk", set_tracepoint_printk);
315 static int __init set_tracepoint_printk_stop(char *str)
317 tracepoint_printk_stop_on_boot = true;
320 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
322 unsigned long long ns2usecs(u64 nsec)
330 trace_process_export(struct trace_export *export,
331 struct ring_buffer_event *event, int flag)
333 struct trace_entry *entry;
334 unsigned int size = 0;
336 if (export->flags & flag) {
337 entry = ring_buffer_event_data(event);
338 size = ring_buffer_event_length(event);
339 export->write(export, entry, size);
343 static DEFINE_MUTEX(ftrace_export_lock);
345 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
347 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
348 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
349 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
351 static inline void ftrace_exports_enable(struct trace_export *export)
353 if (export->flags & TRACE_EXPORT_FUNCTION)
354 static_branch_inc(&trace_function_exports_enabled);
356 if (export->flags & TRACE_EXPORT_EVENT)
357 static_branch_inc(&trace_event_exports_enabled);
359 if (export->flags & TRACE_EXPORT_MARKER)
360 static_branch_inc(&trace_marker_exports_enabled);
363 static inline void ftrace_exports_disable(struct trace_export *export)
365 if (export->flags & TRACE_EXPORT_FUNCTION)
366 static_branch_dec(&trace_function_exports_enabled);
368 if (export->flags & TRACE_EXPORT_EVENT)
369 static_branch_dec(&trace_event_exports_enabled);
371 if (export->flags & TRACE_EXPORT_MARKER)
372 static_branch_dec(&trace_marker_exports_enabled);
375 static void ftrace_exports(struct ring_buffer_event *event, int flag)
377 struct trace_export *export;
379 preempt_disable_notrace();
381 export = rcu_dereference_raw_check(ftrace_exports_list);
383 trace_process_export(export, event, flag);
384 export = rcu_dereference_raw_check(export->next);
387 preempt_enable_notrace();
391 add_trace_export(struct trace_export **list, struct trace_export *export)
393 rcu_assign_pointer(export->next, *list);
395 * We are entering export into the list but another
396 * CPU might be walking that list. We need to make sure
397 * the export->next pointer is valid before another CPU sees
398 * the export pointer included into the list.
400 rcu_assign_pointer(*list, export);
404 rm_trace_export(struct trace_export **list, struct trace_export *export)
406 struct trace_export **p;
408 for (p = list; *p != NULL; p = &(*p)->next)
415 rcu_assign_pointer(*p, (*p)->next);
421 add_ftrace_export(struct trace_export **list, struct trace_export *export)
423 ftrace_exports_enable(export);
425 add_trace_export(list, export);
429 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
433 ret = rm_trace_export(list, export);
434 ftrace_exports_disable(export);
439 int register_ftrace_export(struct trace_export *export)
441 if (WARN_ON_ONCE(!export->write))
444 mutex_lock(&ftrace_export_lock);
446 add_ftrace_export(&ftrace_exports_list, export);
448 mutex_unlock(&ftrace_export_lock);
452 EXPORT_SYMBOL_GPL(register_ftrace_export);
454 int unregister_ftrace_export(struct trace_export *export)
458 mutex_lock(&ftrace_export_lock);
460 ret = rm_ftrace_export(&ftrace_exports_list, export);
462 mutex_unlock(&ftrace_export_lock);
466 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
468 /* trace_flags holds trace_options default values */
469 #define TRACE_DEFAULT_FLAGS \
470 (FUNCTION_DEFAULT_FLAGS | \
471 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | \
472 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | \
473 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE | \
474 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS | \
477 /* trace_options that are only supported by global_trace */
478 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK | \
479 TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
481 /* trace_flags that are default zero for instances */
482 #define ZEROED_TRACE_FLAGS \
483 (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
486 * The global_trace is the descriptor that holds the top-level tracing
487 * buffers for the live tracing.
489 static struct trace_array global_trace = {
490 .trace_flags = TRACE_DEFAULT_FLAGS,
493 LIST_HEAD(ftrace_trace_arrays);
495 int trace_array_get(struct trace_array *this_tr)
497 struct trace_array *tr;
500 mutex_lock(&trace_types_lock);
501 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
508 mutex_unlock(&trace_types_lock);
513 static void __trace_array_put(struct trace_array *this_tr)
515 WARN_ON(!this_tr->ref);
520 * trace_array_put - Decrement the reference counter for this trace array.
521 * @this_tr : pointer to the trace array
523 * NOTE: Use this when we no longer need the trace array returned by
524 * trace_array_get_by_name(). This ensures the trace array can be later
528 void trace_array_put(struct trace_array *this_tr)
533 mutex_lock(&trace_types_lock);
534 __trace_array_put(this_tr);
535 mutex_unlock(&trace_types_lock);
537 EXPORT_SYMBOL_GPL(trace_array_put);
539 int tracing_check_open_get_tr(struct trace_array *tr)
543 ret = security_locked_down(LOCKDOWN_TRACEFS);
547 if (tracing_disabled)
550 if (tr && trace_array_get(tr) < 0)
556 int call_filter_check_discard(struct trace_event_call *call, void *rec,
557 struct trace_buffer *buffer,
558 struct ring_buffer_event *event)
560 if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
561 !filter_match_preds(call->filter, rec)) {
562 __trace_event_discard_commit(buffer, event);
570 * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
571 * @filtered_pids: The list of pids to check
572 * @search_pid: The PID to find in @filtered_pids
574 * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
577 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
579 return trace_pid_list_is_set(filtered_pids, search_pid);
583 * trace_ignore_this_task - should a task be ignored for tracing
584 * @filtered_pids: The list of pids to check
585 * @filtered_no_pids: The list of pids not to be traced
586 * @task: The task that should be ignored if not filtered
588 * Checks if @task should be traced or not from @filtered_pids.
589 * Returns true if @task should *NOT* be traced.
590 * Returns false if @task should be traced.
593 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
594 struct trace_pid_list *filtered_no_pids,
595 struct task_struct *task)
598 * If filtered_no_pids is not empty, and the task's pid is listed
599 * in filtered_no_pids, then return true.
600 * Otherwise, if filtered_pids is empty, that means we can
601 * trace all tasks. If it has content, then only trace pids
602 * within filtered_pids.
605 return (filtered_pids &&
606 !trace_find_filtered_pid(filtered_pids, task->pid)) ||
608 trace_find_filtered_pid(filtered_no_pids, task->pid));
612 * trace_filter_add_remove_task - Add or remove a task from a pid_list
613 * @pid_list: The list to modify
614 * @self: The current task for fork or NULL for exit
615 * @task: The task to add or remove
617 * If adding a task, if @self is defined, the task is only added if @self
618 * is also included in @pid_list. This happens on fork and tasks should
619 * only be added when the parent is listed. If @self is NULL, then the
620 * @task pid will be removed from the list, which would happen on exit
623 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
624 struct task_struct *self,
625 struct task_struct *task)
630 /* For forks, we only add if the forking task is listed */
632 if (!trace_find_filtered_pid(pid_list, self->pid))
636 /* "self" is set for forks, and NULL for exits */
638 trace_pid_list_set(pid_list, task->pid);
640 trace_pid_list_clear(pid_list, task->pid);
644 * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
645 * @pid_list: The pid list to show
646 * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
647 * @pos: The position of the file
649 * This is used by the seq_file "next" operation to iterate the pids
650 * listed in a trace_pid_list structure.
652 * Returns the pid+1 as we want to display pid of zero, but NULL would
653 * stop the iteration.
655 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
657 long pid = (unsigned long)v;
662 /* pid already is +1 of the actual previous bit */
663 if (trace_pid_list_next(pid_list, pid, &next) < 0)
668 /* Return pid + 1 to allow zero to be represented */
669 return (void *)(pid + 1);
673 * trace_pid_start - Used for seq_file to start reading pid lists
674 * @pid_list: The pid list to show
675 * @pos: The position of the file
677 * This is used by seq_file "start" operation to start the iteration
680 * Returns the pid+1 as we want to display pid of zero, but NULL would
681 * stop the iteration.
683 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
689 if (trace_pid_list_first(pid_list, &first) < 0)
694 /* Return pid + 1 so that zero can be the exit value */
695 for (pid++; pid && l < *pos;
696 pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
702 * trace_pid_show - show the current pid in seq_file processing
703 * @m: The seq_file structure to write into
704 * @v: A void pointer of the pid (+1) value to display
706 * Can be directly used by seq_file operations to display the current
709 int trace_pid_show(struct seq_file *m, void *v)
711 unsigned long pid = (unsigned long)v - 1;
713 seq_printf(m, "%lu\n", pid);
717 /* 128 should be much more than enough */
718 #define PID_BUF_SIZE 127
720 int trace_pid_write(struct trace_pid_list *filtered_pids,
721 struct trace_pid_list **new_pid_list,
722 const char __user *ubuf, size_t cnt)
724 struct trace_pid_list *pid_list;
725 struct trace_parser parser;
733 if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
737 * Always recreate a new array. The write is an all or nothing
738 * operation. Always create a new array when adding new pids by
739 * the user. If the operation fails, then the current list is
742 pid_list = trace_pid_list_alloc();
744 trace_parser_put(&parser);
749 /* copy the current bits to the new max */
750 ret = trace_pid_list_first(filtered_pids, &pid);
752 trace_pid_list_set(pid_list, pid);
753 ret = trace_pid_list_next(filtered_pids, pid + 1, &pid);
763 ret = trace_get_user(&parser, ubuf, cnt, &pos);
771 if (!trace_parser_loaded(&parser))
775 if (kstrtoul(parser.buffer, 0, &val))
780 if (trace_pid_list_set(pid_list, pid) < 0) {
786 trace_parser_clear(&parser);
789 trace_parser_put(&parser);
792 trace_pid_list_free(pid_list);
797 /* Cleared the list of pids */
798 trace_pid_list_free(pid_list);
802 *new_pid_list = pid_list;
807 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
811 /* Early boot up does not have a buffer yet */
813 return trace_clock_local();
815 ts = ring_buffer_time_stamp(buf->buffer);
816 ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
821 u64 ftrace_now(int cpu)
823 return buffer_ftrace_now(&global_trace.array_buffer, cpu);
827 * tracing_is_enabled - Show if global_trace has been enabled
829 * Shows if the global trace has been enabled or not. It uses the
830 * mirror flag "buffer_disabled" to be used in fast paths such as for
831 * the irqsoff tracer. But it may be inaccurate due to races. If you
832 * need to know the accurate state, use tracing_is_on() which is a little
833 * slower, but accurate.
835 int tracing_is_enabled(void)
838 * For quick access (irqsoff uses this in fast path), just
839 * return the mirror variable of the state of the ring buffer.
840 * It's a little racy, but we don't really care.
843 return !global_trace.buffer_disabled;
847 * trace_buf_size is the size in bytes that is allocated
848 * for a buffer. Note, the number of bytes is always rounded
851 * This number is purposely set to a low number of 16384.
852 * If the dump on oops happens, it will be much appreciated
853 * to not have to wait for all that output. Anyway this can be
854 * boot time and run time configurable.
856 #define TRACE_BUF_SIZE_DEFAULT 1441792UL /* 16384 * 88 (sizeof(entry)) */
858 static unsigned long trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
860 /* trace_types holds a link list of available tracers. */
861 static struct tracer *trace_types __read_mostly;
864 * trace_types_lock is used to protect the trace_types list.
866 DEFINE_MUTEX(trace_types_lock);
869 * serialize the access of the ring buffer
871 * ring buffer serializes readers, but it is low level protection.
872 * The validity of the events (which returns by ring_buffer_peek() ..etc)
873 * are not protected by ring buffer.
875 * The content of events may become garbage if we allow other process consumes
876 * these events concurrently:
877 * A) the page of the consumed events may become a normal page
878 * (not reader page) in ring buffer, and this page will be rewritten
879 * by events producer.
880 * B) The page of the consumed events may become a page for splice_read,
881 * and this page will be returned to system.
883 * These primitives allow multi process access to different cpu ring buffer
886 * These primitives don't distinguish read-only and read-consume access.
887 * Multi read-only access are also serialized.
891 static DECLARE_RWSEM(all_cpu_access_lock);
892 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
894 static inline void trace_access_lock(int cpu)
896 if (cpu == RING_BUFFER_ALL_CPUS) {
897 /* gain it for accessing the whole ring buffer. */
898 down_write(&all_cpu_access_lock);
900 /* gain it for accessing a cpu ring buffer. */
902 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
903 down_read(&all_cpu_access_lock);
905 /* Secondly block other access to this @cpu ring buffer. */
906 mutex_lock(&per_cpu(cpu_access_lock, cpu));
910 static inline void trace_access_unlock(int cpu)
912 if (cpu == RING_BUFFER_ALL_CPUS) {
913 up_write(&all_cpu_access_lock);
915 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
916 up_read(&all_cpu_access_lock);
920 static inline void trace_access_lock_init(void)
924 for_each_possible_cpu(cpu)
925 mutex_init(&per_cpu(cpu_access_lock, cpu));
930 static DEFINE_MUTEX(access_lock);
932 static inline void trace_access_lock(int cpu)
935 mutex_lock(&access_lock);
938 static inline void trace_access_unlock(int cpu)
941 mutex_unlock(&access_lock);
944 static inline void trace_access_lock_init(void)
950 #ifdef CONFIG_STACKTRACE
951 static void __ftrace_trace_stack(struct trace_buffer *buffer,
952 unsigned int trace_ctx,
953 int skip, struct pt_regs *regs);
954 static inline void ftrace_trace_stack(struct trace_array *tr,
955 struct trace_buffer *buffer,
956 unsigned int trace_ctx,
957 int skip, struct pt_regs *regs);
960 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
961 unsigned int trace_ctx,
962 int skip, struct pt_regs *regs)
965 static inline void ftrace_trace_stack(struct trace_array *tr,
966 struct trace_buffer *buffer,
967 unsigned long trace_ctx,
968 int skip, struct pt_regs *regs)
974 static __always_inline void
975 trace_event_setup(struct ring_buffer_event *event,
976 int type, unsigned int trace_ctx)
978 struct trace_entry *ent = ring_buffer_event_data(event);
980 tracing_generic_entry_update(ent, type, trace_ctx);
983 static __always_inline struct ring_buffer_event *
984 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
987 unsigned int trace_ctx)
989 struct ring_buffer_event *event;
991 event = ring_buffer_lock_reserve(buffer, len);
993 trace_event_setup(event, type, trace_ctx);
998 void tracer_tracing_on(struct trace_array *tr)
1000 if (tr->array_buffer.buffer)
1001 ring_buffer_record_on(tr->array_buffer.buffer);
1003 * This flag is looked at when buffers haven't been allocated
1004 * yet, or by some tracers (like irqsoff), that just want to
1005 * know if the ring buffer has been disabled, but it can handle
1006 * races of where it gets disabled but we still do a record.
1007 * As the check is in the fast path of the tracers, it is more
1008 * important to be fast than accurate.
1010 tr->buffer_disabled = 0;
1011 /* Make the flag seen by readers */
1016 * tracing_on - enable tracing buffers
1018 * This function enables tracing buffers that may have been
1019 * disabled with tracing_off.
1021 void tracing_on(void)
1023 tracer_tracing_on(&global_trace);
1025 EXPORT_SYMBOL_GPL(tracing_on);
1028 static __always_inline void
1029 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
1031 __this_cpu_write(trace_taskinfo_save, true);
1033 /* If this is the temp buffer, we need to commit fully */
1034 if (this_cpu_read(trace_buffered_event) == event) {
1035 /* Length is in event->array[0] */
1036 ring_buffer_write(buffer, event->array[0], &event->array[1]);
1037 /* Release the temp buffer */
1038 this_cpu_dec(trace_buffered_event_cnt);
1039 /* ring_buffer_unlock_commit() enables preemption */
1040 preempt_enable_notrace();
1042 ring_buffer_unlock_commit(buffer);
1045 int __trace_array_puts(struct trace_array *tr, unsigned long ip,
1046 const char *str, int size)
1048 struct ring_buffer_event *event;
1049 struct trace_buffer *buffer;
1050 struct print_entry *entry;
1051 unsigned int trace_ctx;
1054 if (!(tr->trace_flags & TRACE_ITER_PRINTK))
1057 if (unlikely(tracing_selftest_running || tracing_disabled))
1060 alloc = sizeof(*entry) + size + 2; /* possible \n added */
1062 trace_ctx = tracing_gen_ctx();
1063 buffer = tr->array_buffer.buffer;
1064 ring_buffer_nest_start(buffer);
1065 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1072 entry = ring_buffer_event_data(event);
1075 memcpy(&entry->buf, str, size);
1077 /* Add a newline if necessary */
1078 if (entry->buf[size - 1] != '\n') {
1079 entry->buf[size] = '\n';
1080 entry->buf[size + 1] = '\0';
1082 entry->buf[size] = '\0';
1084 __buffer_unlock_commit(buffer, event);
1085 ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL);
1087 ring_buffer_nest_end(buffer);
1090 EXPORT_SYMBOL_GPL(__trace_array_puts);
1093 * __trace_puts - write a constant string into the trace buffer.
1094 * @ip: The address of the caller
1095 * @str: The constant string to write
1096 * @size: The size of the string.
1098 int __trace_puts(unsigned long ip, const char *str, int size)
1100 return __trace_array_puts(&global_trace, ip, str, size);
1102 EXPORT_SYMBOL_GPL(__trace_puts);
1105 * __trace_bputs - write the pointer to a constant string into trace buffer
1106 * @ip: The address of the caller
1107 * @str: The constant string to write to the buffer to
1109 int __trace_bputs(unsigned long ip, const char *str)
1111 struct ring_buffer_event *event;
1112 struct trace_buffer *buffer;
1113 struct bputs_entry *entry;
1114 unsigned int trace_ctx;
1115 int size = sizeof(struct bputs_entry);
1118 if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1121 if (unlikely(tracing_selftest_running || tracing_disabled))
1124 trace_ctx = tracing_gen_ctx();
1125 buffer = global_trace.array_buffer.buffer;
1127 ring_buffer_nest_start(buffer);
1128 event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1133 entry = ring_buffer_event_data(event);
1137 __buffer_unlock_commit(buffer, event);
1138 ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1142 ring_buffer_nest_end(buffer);
1145 EXPORT_SYMBOL_GPL(__trace_bputs);
1147 #ifdef CONFIG_TRACER_SNAPSHOT
1148 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1151 struct tracer *tracer = tr->current_trace;
1152 unsigned long flags;
1155 trace_array_puts(tr, "*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1156 trace_array_puts(tr, "*** snapshot is being ignored ***\n");
1160 if (!tr->allocated_snapshot) {
1161 trace_array_puts(tr, "*** SNAPSHOT NOT ALLOCATED ***\n");
1162 trace_array_puts(tr, "*** stopping trace here! ***\n");
1163 tracer_tracing_off(tr);
1167 /* Note, snapshot can not be used when the tracer uses it */
1168 if (tracer->use_max_tr) {
1169 trace_array_puts(tr, "*** LATENCY TRACER ACTIVE ***\n");
1170 trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
1174 local_irq_save(flags);
1175 update_max_tr(tr, current, smp_processor_id(), cond_data);
1176 local_irq_restore(flags);
1179 void tracing_snapshot_instance(struct trace_array *tr)
1181 tracing_snapshot_instance_cond(tr, NULL);
1185 * tracing_snapshot - take a snapshot of the current buffer.
1187 * This causes a swap between the snapshot buffer and the current live
1188 * tracing buffer. You can use this to take snapshots of the live
1189 * trace when some condition is triggered, but continue to trace.
1191 * Note, make sure to allocate the snapshot with either
1192 * a tracing_snapshot_alloc(), or by doing it manually
1193 * with: echo 1 > /sys/kernel/tracing/snapshot
1195 * If the snapshot buffer is not allocated, it will stop tracing.
1196 * Basically making a permanent snapshot.
1198 void tracing_snapshot(void)
1200 struct trace_array *tr = &global_trace;
1202 tracing_snapshot_instance(tr);
1204 EXPORT_SYMBOL_GPL(tracing_snapshot);
1207 * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1208 * @tr: The tracing instance to snapshot
1209 * @cond_data: The data to be tested conditionally, and possibly saved
1211 * This is the same as tracing_snapshot() except that the snapshot is
1212 * conditional - the snapshot will only happen if the
1213 * cond_snapshot.update() implementation receiving the cond_data
1214 * returns true, which means that the trace array's cond_snapshot
1215 * update() operation used the cond_data to determine whether the
1216 * snapshot should be taken, and if it was, presumably saved it along
1217 * with the snapshot.
1219 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1221 tracing_snapshot_instance_cond(tr, cond_data);
1223 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1226 * tracing_cond_snapshot_data - get the user data associated with a snapshot
1227 * @tr: The tracing instance
1229 * When the user enables a conditional snapshot using
1230 * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1231 * with the snapshot. This accessor is used to retrieve it.
1233 * Should not be called from cond_snapshot.update(), since it takes
1234 * the tr->max_lock lock, which the code calling
1235 * cond_snapshot.update() has already done.
1237 * Returns the cond_data associated with the trace array's snapshot.
1239 void *tracing_cond_snapshot_data(struct trace_array *tr)
1241 void *cond_data = NULL;
1243 local_irq_disable();
1244 arch_spin_lock(&tr->max_lock);
1246 if (tr->cond_snapshot)
1247 cond_data = tr->cond_snapshot->cond_data;
1249 arch_spin_unlock(&tr->max_lock);
1254 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1256 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1257 struct array_buffer *size_buf, int cpu_id);
1258 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1260 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1264 if (!tr->allocated_snapshot) {
1266 /* allocate spare buffer */
1267 ret = resize_buffer_duplicate_size(&tr->max_buffer,
1268 &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1272 tr->allocated_snapshot = true;
1278 static void free_snapshot(struct trace_array *tr)
1281 * We don't free the ring buffer. instead, resize it because
1282 * The max_tr ring buffer has some state (e.g. ring->clock) and
1283 * we want preserve it.
1285 ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1286 set_buffer_entries(&tr->max_buffer, 1);
1287 tracing_reset_online_cpus(&tr->max_buffer);
1288 tr->allocated_snapshot = false;
1292 * tracing_alloc_snapshot - allocate snapshot buffer.
1294 * This only allocates the snapshot buffer if it isn't already
1295 * allocated - it doesn't also take a snapshot.
1297 * This is meant to be used in cases where the snapshot buffer needs
1298 * to be set up for events that can't sleep but need to be able to
1299 * trigger a snapshot.
1301 int tracing_alloc_snapshot(void)
1303 struct trace_array *tr = &global_trace;
1306 ret = tracing_alloc_snapshot_instance(tr);
1311 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1314 * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1316 * This is similar to tracing_snapshot(), but it will allocate the
1317 * snapshot buffer if it isn't already allocated. Use this only
1318 * where it is safe to sleep, as the allocation may sleep.
1320 * This causes a swap between the snapshot buffer and the current live
1321 * tracing buffer. You can use this to take snapshots of the live
1322 * trace when some condition is triggered, but continue to trace.
1324 void tracing_snapshot_alloc(void)
1328 ret = tracing_alloc_snapshot();
1334 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1337 * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1338 * @tr: The tracing instance
1339 * @cond_data: User data to associate with the snapshot
1340 * @update: Implementation of the cond_snapshot update function
1342 * Check whether the conditional snapshot for the given instance has
1343 * already been enabled, or if the current tracer is already using a
1344 * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1345 * save the cond_data and update function inside.
1347 * Returns 0 if successful, error otherwise.
1349 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1350 cond_update_fn_t update)
1352 struct cond_snapshot *cond_snapshot;
1355 cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1359 cond_snapshot->cond_data = cond_data;
1360 cond_snapshot->update = update;
1362 mutex_lock(&trace_types_lock);
1364 ret = tracing_alloc_snapshot_instance(tr);
1368 if (tr->current_trace->use_max_tr) {
1374 * The cond_snapshot can only change to NULL without the
1375 * trace_types_lock. We don't care if we race with it going
1376 * to NULL, but we want to make sure that it's not set to
1377 * something other than NULL when we get here, which we can
1378 * do safely with only holding the trace_types_lock and not
1379 * having to take the max_lock.
1381 if (tr->cond_snapshot) {
1386 local_irq_disable();
1387 arch_spin_lock(&tr->max_lock);
1388 tr->cond_snapshot = cond_snapshot;
1389 arch_spin_unlock(&tr->max_lock);
1392 mutex_unlock(&trace_types_lock);
1397 mutex_unlock(&trace_types_lock);
1398 kfree(cond_snapshot);
1401 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1404 * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1405 * @tr: The tracing instance
1407 * Check whether the conditional snapshot for the given instance is
1408 * enabled; if so, free the cond_snapshot associated with it,
1409 * otherwise return -EINVAL.
1411 * Returns 0 if successful, error otherwise.
1413 int tracing_snapshot_cond_disable(struct trace_array *tr)
1417 local_irq_disable();
1418 arch_spin_lock(&tr->max_lock);
1420 if (!tr->cond_snapshot)
1423 kfree(tr->cond_snapshot);
1424 tr->cond_snapshot = NULL;
1427 arch_spin_unlock(&tr->max_lock);
1432 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1434 void tracing_snapshot(void)
1436 WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1438 EXPORT_SYMBOL_GPL(tracing_snapshot);
1439 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1441 WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1443 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1444 int tracing_alloc_snapshot(void)
1446 WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1449 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1450 void tracing_snapshot_alloc(void)
1455 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1456 void *tracing_cond_snapshot_data(struct trace_array *tr)
1460 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1461 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1465 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1466 int tracing_snapshot_cond_disable(struct trace_array *tr)
1470 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1471 #define free_snapshot(tr) do { } while (0)
1472 #endif /* CONFIG_TRACER_SNAPSHOT */
1474 void tracer_tracing_off(struct trace_array *tr)
1476 if (tr->array_buffer.buffer)
1477 ring_buffer_record_off(tr->array_buffer.buffer);
1479 * This flag is looked at when buffers haven't been allocated
1480 * yet, or by some tracers (like irqsoff), that just want to
1481 * know if the ring buffer has been disabled, but it can handle
1482 * races of where it gets disabled but we still do a record.
1483 * As the check is in the fast path of the tracers, it is more
1484 * important to be fast than accurate.
1486 tr->buffer_disabled = 1;
1487 /* Make the flag seen by readers */
1492 * tracing_off - turn off tracing buffers
1494 * This function stops the tracing buffers from recording data.
1495 * It does not disable any overhead the tracers themselves may
1496 * be causing. This function simply causes all recording to
1497 * the ring buffers to fail.
1499 void tracing_off(void)
1501 tracer_tracing_off(&global_trace);
1503 EXPORT_SYMBOL_GPL(tracing_off);
1505 void disable_trace_on_warning(void)
1507 if (__disable_trace_on_warning) {
1508 trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1509 "Disabling tracing due to warning\n");
1515 * tracer_tracing_is_on - show real state of ring buffer enabled
1516 * @tr : the trace array to know if ring buffer is enabled
1518 * Shows real state of the ring buffer if it is enabled or not.
1520 bool tracer_tracing_is_on(struct trace_array *tr)
1522 if (tr->array_buffer.buffer)
1523 return ring_buffer_record_is_on(tr->array_buffer.buffer);
1524 return !tr->buffer_disabled;
1528 * tracing_is_on - show state of ring buffers enabled
1530 int tracing_is_on(void)
1532 return tracer_tracing_is_on(&global_trace);
1534 EXPORT_SYMBOL_GPL(tracing_is_on);
1536 static int __init set_buf_size(char *str)
1538 unsigned long buf_size;
1542 buf_size = memparse(str, &str);
1544 * nr_entries can not be zero and the startup
1545 * tests require some buffer space. Therefore
1546 * ensure we have at least 4096 bytes of buffer.
1548 trace_buf_size = max(4096UL, buf_size);
1551 __setup("trace_buf_size=", set_buf_size);
1553 static int __init set_tracing_thresh(char *str)
1555 unsigned long threshold;
1560 ret = kstrtoul(str, 0, &threshold);
1563 tracing_thresh = threshold * 1000;
1566 __setup("tracing_thresh=", set_tracing_thresh);
1568 unsigned long nsecs_to_usecs(unsigned long nsecs)
1570 return nsecs / 1000;
1574 * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1575 * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1576 * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1577 * of strings in the order that the evals (enum) were defined.
1582 /* These must match the bit positions in trace_iterator_flags */
1583 static const char *trace_options[] = {
1591 int in_ns; /* is this clock in nanoseconds? */
1592 } trace_clocks[] = {
1593 { trace_clock_local, "local", 1 },
1594 { trace_clock_global, "global", 1 },
1595 { trace_clock_counter, "counter", 0 },
1596 { trace_clock_jiffies, "uptime", 0 },
1597 { trace_clock, "perf", 1 },
1598 { ktime_get_mono_fast_ns, "mono", 1 },
1599 { ktime_get_raw_fast_ns, "mono_raw", 1 },
1600 { ktime_get_boot_fast_ns, "boot", 1 },
1601 { ktime_get_tai_fast_ns, "tai", 1 },
1605 bool trace_clock_in_ns(struct trace_array *tr)
1607 if (trace_clocks[tr->clock_id].in_ns)
1614 * trace_parser_get_init - gets the buffer for trace parser
1616 int trace_parser_get_init(struct trace_parser *parser, int size)
1618 memset(parser, 0, sizeof(*parser));
1620 parser->buffer = kmalloc(size, GFP_KERNEL);
1621 if (!parser->buffer)
1624 parser->size = size;
1629 * trace_parser_put - frees the buffer for trace parser
1631 void trace_parser_put(struct trace_parser *parser)
1633 kfree(parser->buffer);
1634 parser->buffer = NULL;
1638 * trace_get_user - reads the user input string separated by space
1639 * (matched by isspace(ch))
1641 * For each string found the 'struct trace_parser' is updated,
1642 * and the function returns.
1644 * Returns number of bytes read.
1646 * See kernel/trace/trace.h for 'struct trace_parser' details.
1648 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1649 size_t cnt, loff_t *ppos)
1656 trace_parser_clear(parser);
1658 ret = get_user(ch, ubuf++);
1666 * The parser is not finished with the last write,
1667 * continue reading the user input without skipping spaces.
1669 if (!parser->cont) {
1670 /* skip white space */
1671 while (cnt && isspace(ch)) {
1672 ret = get_user(ch, ubuf++);
1681 /* only spaces were written */
1682 if (isspace(ch) || !ch) {
1689 /* read the non-space input */
1690 while (cnt && !isspace(ch) && ch) {
1691 if (parser->idx < parser->size - 1)
1692 parser->buffer[parser->idx++] = ch;
1697 ret = get_user(ch, ubuf++);
1704 /* We either got finished input or we have to wait for another call. */
1705 if (isspace(ch) || !ch) {
1706 parser->buffer[parser->idx] = 0;
1707 parser->cont = false;
1708 } else if (parser->idx < parser->size - 1) {
1709 parser->cont = true;
1710 parser->buffer[parser->idx++] = ch;
1711 /* Make sure the parsed string always terminates with '\0'. */
1712 parser->buffer[parser->idx] = 0;
1725 /* TODO add a seq_buf_to_buffer() */
1726 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1730 if (trace_seq_used(s) <= s->seq.readpos)
1733 len = trace_seq_used(s) - s->seq.readpos;
1736 memcpy(buf, s->buffer + s->seq.readpos, cnt);
1738 s->seq.readpos += cnt;
1742 unsigned long __read_mostly tracing_thresh;
1744 #ifdef CONFIG_TRACER_MAX_TRACE
1745 static const struct file_operations tracing_max_lat_fops;
1747 #ifdef LATENCY_FS_NOTIFY
1749 static struct workqueue_struct *fsnotify_wq;
1751 static void latency_fsnotify_workfn(struct work_struct *work)
1753 struct trace_array *tr = container_of(work, struct trace_array,
1755 fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1758 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1760 struct trace_array *tr = container_of(iwork, struct trace_array,
1762 queue_work(fsnotify_wq, &tr->fsnotify_work);
1765 static void trace_create_maxlat_file(struct trace_array *tr,
1766 struct dentry *d_tracer)
1768 INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1769 init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1770 tr->d_max_latency = trace_create_file("tracing_max_latency",
1772 d_tracer, &tr->max_latency,
1773 &tracing_max_lat_fops);
1776 __init static int latency_fsnotify_init(void)
1778 fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1779 WQ_UNBOUND | WQ_HIGHPRI, 0);
1781 pr_err("Unable to allocate tr_max_lat_wq\n");
1787 late_initcall_sync(latency_fsnotify_init);
1789 void latency_fsnotify(struct trace_array *tr)
1794 * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1795 * possible that we are called from __schedule() or do_idle(), which
1796 * could cause a deadlock.
1798 irq_work_queue(&tr->fsnotify_irqwork);
1801 #else /* !LATENCY_FS_NOTIFY */
1803 #define trace_create_maxlat_file(tr, d_tracer) \
1804 trace_create_file("tracing_max_latency", TRACE_MODE_WRITE, \
1805 d_tracer, &tr->max_latency, &tracing_max_lat_fops)
1810 * Copy the new maximum trace into the separate maximum-trace
1811 * structure. (this way the maximum trace is permanently saved,
1812 * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1815 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1817 struct array_buffer *trace_buf = &tr->array_buffer;
1818 struct array_buffer *max_buf = &tr->max_buffer;
1819 struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1820 struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1823 max_buf->time_start = data->preempt_timestamp;
1825 max_data->saved_latency = tr->max_latency;
1826 max_data->critical_start = data->critical_start;
1827 max_data->critical_end = data->critical_end;
1829 strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1830 max_data->pid = tsk->pid;
1832 * If tsk == current, then use current_uid(), as that does not use
1833 * RCU. The irq tracer can be called out of RCU scope.
1836 max_data->uid = current_uid();
1838 max_data->uid = task_uid(tsk);
1840 max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1841 max_data->policy = tsk->policy;
1842 max_data->rt_priority = tsk->rt_priority;
1844 /* record this tasks comm */
1845 tracing_record_cmdline(tsk);
1846 latency_fsnotify(tr);
1850 * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1852 * @tsk: the task with the latency
1853 * @cpu: The cpu that initiated the trace.
1854 * @cond_data: User data associated with a conditional snapshot
1856 * Flip the buffers between the @tr and the max_tr and record information
1857 * about which task was the cause of this latency.
1860 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1866 WARN_ON_ONCE(!irqs_disabled());
1868 if (!tr->allocated_snapshot) {
1869 /* Only the nop tracer should hit this when disabling */
1870 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1874 arch_spin_lock(&tr->max_lock);
1876 /* Inherit the recordable setting from array_buffer */
1877 if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1878 ring_buffer_record_on(tr->max_buffer.buffer);
1880 ring_buffer_record_off(tr->max_buffer.buffer);
1882 #ifdef CONFIG_TRACER_SNAPSHOT
1883 if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) {
1884 arch_spin_unlock(&tr->max_lock);
1888 swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1890 __update_max_tr(tr, tsk, cpu);
1892 arch_spin_unlock(&tr->max_lock);
1896 * update_max_tr_single - only copy one trace over, and reset the rest
1898 * @tsk: task with the latency
1899 * @cpu: the cpu of the buffer to copy.
1901 * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1904 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1911 WARN_ON_ONCE(!irqs_disabled());
1912 if (!tr->allocated_snapshot) {
1913 /* Only the nop tracer should hit this when disabling */
1914 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1918 arch_spin_lock(&tr->max_lock);
1920 ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1922 if (ret == -EBUSY) {
1924 * We failed to swap the buffer due to a commit taking
1925 * place on this CPU. We fail to record, but we reset
1926 * the max trace buffer (no one writes directly to it)
1927 * and flag that it failed.
1929 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1930 "Failed to swap buffers due to commit in progress\n");
1933 WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1935 __update_max_tr(tr, tsk, cpu);
1936 arch_spin_unlock(&tr->max_lock);
1939 #endif /* CONFIG_TRACER_MAX_TRACE */
1941 static int wait_on_pipe(struct trace_iterator *iter, int full)
1943 /* Iterators are static, they should be filled or empty */
1944 if (trace_buffer_iter(iter, iter->cpu_file))
1947 return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file,
1951 #ifdef CONFIG_FTRACE_STARTUP_TEST
1952 static bool selftests_can_run;
1954 struct trace_selftests {
1955 struct list_head list;
1956 struct tracer *type;
1959 static LIST_HEAD(postponed_selftests);
1961 static int save_selftest(struct tracer *type)
1963 struct trace_selftests *selftest;
1965 selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1969 selftest->type = type;
1970 list_add(&selftest->list, &postponed_selftests);
1974 static int run_tracer_selftest(struct tracer *type)
1976 struct trace_array *tr = &global_trace;
1977 struct tracer *saved_tracer = tr->current_trace;
1980 if (!type->selftest || tracing_selftest_disabled)
1984 * If a tracer registers early in boot up (before scheduling is
1985 * initialized and such), then do not run its selftests yet.
1986 * Instead, run it a little later in the boot process.
1988 if (!selftests_can_run)
1989 return save_selftest(type);
1991 if (!tracing_is_on()) {
1992 pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
1998 * Run a selftest on this tracer.
1999 * Here we reset the trace buffer, and set the current
2000 * tracer to be this tracer. The tracer can then run some
2001 * internal tracing to verify that everything is in order.
2002 * If we fail, we do not register this tracer.
2004 tracing_reset_online_cpus(&tr->array_buffer);
2006 tr->current_trace = type;
2008 #ifdef CONFIG_TRACER_MAX_TRACE
2009 if (type->use_max_tr) {
2010 /* If we expanded the buffers, make sure the max is expanded too */
2011 if (ring_buffer_expanded)
2012 ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
2013 RING_BUFFER_ALL_CPUS);
2014 tr->allocated_snapshot = true;
2018 /* the test is responsible for initializing and enabling */
2019 pr_info("Testing tracer %s: ", type->name);
2020 ret = type->selftest(type, tr);
2021 /* the test is responsible for resetting too */
2022 tr->current_trace = saved_tracer;
2024 printk(KERN_CONT "FAILED!\n");
2025 /* Add the warning after printing 'FAILED' */
2029 /* Only reset on passing, to avoid touching corrupted buffers */
2030 tracing_reset_online_cpus(&tr->array_buffer);
2032 #ifdef CONFIG_TRACER_MAX_TRACE
2033 if (type->use_max_tr) {
2034 tr->allocated_snapshot = false;
2036 /* Shrink the max buffer again */
2037 if (ring_buffer_expanded)
2038 ring_buffer_resize(tr->max_buffer.buffer, 1,
2039 RING_BUFFER_ALL_CPUS);
2043 printk(KERN_CONT "PASSED\n");
2047 static int do_run_tracer_selftest(struct tracer *type)
2052 * Tests can take a long time, especially if they are run one after the
2053 * other, as does happen during bootup when all the tracers are
2054 * registered. This could cause the soft lockup watchdog to trigger.
2058 tracing_selftest_running = true;
2059 ret = run_tracer_selftest(type);
2060 tracing_selftest_running = false;
2065 static __init int init_trace_selftests(void)
2067 struct trace_selftests *p, *n;
2068 struct tracer *t, **last;
2071 selftests_can_run = true;
2073 mutex_lock(&trace_types_lock);
2075 if (list_empty(&postponed_selftests))
2078 pr_info("Running postponed tracer tests:\n");
2080 tracing_selftest_running = true;
2081 list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2082 /* This loop can take minutes when sanitizers are enabled, so
2083 * lets make sure we allow RCU processing.
2086 ret = run_tracer_selftest(p->type);
2087 /* If the test fails, then warn and remove from available_tracers */
2089 WARN(1, "tracer: %s failed selftest, disabling\n",
2091 last = &trace_types;
2092 for (t = trace_types; t; t = t->next) {
2103 tracing_selftest_running = false;
2106 mutex_unlock(&trace_types_lock);
2110 core_initcall(init_trace_selftests);
2112 static inline int run_tracer_selftest(struct tracer *type)
2116 static inline int do_run_tracer_selftest(struct tracer *type)
2120 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2122 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2124 static void __init apply_trace_boot_options(void);
2127 * register_tracer - register a tracer with the ftrace system.
2128 * @type: the plugin for the tracer
2130 * Register a new plugin tracer.
2132 int __init register_tracer(struct tracer *type)
2138 pr_info("Tracer must have a name\n");
2142 if (strlen(type->name) >= MAX_TRACER_SIZE) {
2143 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2147 if (security_locked_down(LOCKDOWN_TRACEFS)) {
2148 pr_warn("Can not register tracer %s due to lockdown\n",
2153 mutex_lock(&trace_types_lock);
2155 for (t = trace_types; t; t = t->next) {
2156 if (strcmp(type->name, t->name) == 0) {
2158 pr_info("Tracer %s already registered\n",
2165 if (!type->set_flag)
2166 type->set_flag = &dummy_set_flag;
2168 /*allocate a dummy tracer_flags*/
2169 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2174 type->flags->val = 0;
2175 type->flags->opts = dummy_tracer_opt;
2177 if (!type->flags->opts)
2178 type->flags->opts = dummy_tracer_opt;
2180 /* store the tracer for __set_tracer_option */
2181 type->flags->trace = type;
2183 ret = do_run_tracer_selftest(type);
2187 type->next = trace_types;
2189 add_tracer_options(&global_trace, type);
2192 mutex_unlock(&trace_types_lock);
2194 if (ret || !default_bootup_tracer)
2197 if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2200 printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2201 /* Do we want this tracer to start on bootup? */
2202 tracing_set_tracer(&global_trace, type->name);
2203 default_bootup_tracer = NULL;
2205 apply_trace_boot_options();
2207 /* disable other selftests, since this will break it. */
2208 disable_tracing_selftest("running a tracer");
2214 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2216 struct trace_buffer *buffer = buf->buffer;
2221 ring_buffer_record_disable(buffer);
2223 /* Make sure all commits have finished */
2225 ring_buffer_reset_cpu(buffer, cpu);
2227 ring_buffer_record_enable(buffer);
2230 void tracing_reset_online_cpus(struct array_buffer *buf)
2232 struct trace_buffer *buffer = buf->buffer;
2237 ring_buffer_record_disable(buffer);
2239 /* Make sure all commits have finished */
2242 buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2244 ring_buffer_reset_online_cpus(buffer);
2246 ring_buffer_record_enable(buffer);
2249 /* Must have trace_types_lock held */
2250 void tracing_reset_all_online_cpus_unlocked(void)
2252 struct trace_array *tr;
2254 lockdep_assert_held(&trace_types_lock);
2256 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2257 if (!tr->clear_trace)
2259 tr->clear_trace = false;
2260 tracing_reset_online_cpus(&tr->array_buffer);
2261 #ifdef CONFIG_TRACER_MAX_TRACE
2262 tracing_reset_online_cpus(&tr->max_buffer);
2267 void tracing_reset_all_online_cpus(void)
2269 mutex_lock(&trace_types_lock);
2270 tracing_reset_all_online_cpus_unlocked();
2271 mutex_unlock(&trace_types_lock);
2275 * The tgid_map array maps from pid to tgid; i.e. the value stored at index i
2276 * is the tgid last observed corresponding to pid=i.
2278 static int *tgid_map;
2280 /* The maximum valid index into tgid_map. */
2281 static size_t tgid_map_max;
2283 #define SAVED_CMDLINES_DEFAULT 128
2284 #define NO_CMDLINE_MAP UINT_MAX
2286 * Preemption must be disabled before acquiring trace_cmdline_lock.
2287 * The various trace_arrays' max_lock must be acquired in a context
2288 * where interrupt is disabled.
2290 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2291 struct saved_cmdlines_buffer {
2292 unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2293 unsigned *map_cmdline_to_pid;
2294 unsigned cmdline_num;
2296 char *saved_cmdlines;
2298 static struct saved_cmdlines_buffer *savedcmd;
2300 static inline char *get_saved_cmdlines(int idx)
2302 return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2305 static inline void set_cmdline(int idx, const char *cmdline)
2307 strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2310 static int allocate_cmdlines_buffer(unsigned int val,
2311 struct saved_cmdlines_buffer *s)
2313 s->map_cmdline_to_pid = kmalloc_array(val,
2314 sizeof(*s->map_cmdline_to_pid),
2316 if (!s->map_cmdline_to_pid)
2319 s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2320 if (!s->saved_cmdlines) {
2321 kfree(s->map_cmdline_to_pid);
2326 s->cmdline_num = val;
2327 memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2328 sizeof(s->map_pid_to_cmdline));
2329 memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2330 val * sizeof(*s->map_cmdline_to_pid));
2335 static int trace_create_savedcmd(void)
2339 savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2343 ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2353 int is_tracing_stopped(void)
2355 return global_trace.stop_count;
2359 * tracing_start - quick start of the tracer
2361 * If tracing is enabled but was stopped by tracing_stop,
2362 * this will start the tracer back up.
2364 void tracing_start(void)
2366 struct trace_buffer *buffer;
2367 unsigned long flags;
2369 if (tracing_disabled)
2372 raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2373 if (--global_trace.stop_count) {
2374 if (global_trace.stop_count < 0) {
2375 /* Someone screwed up their debugging */
2377 global_trace.stop_count = 0;
2382 /* Prevent the buffers from switching */
2383 arch_spin_lock(&global_trace.max_lock);
2385 buffer = global_trace.array_buffer.buffer;
2387 ring_buffer_record_enable(buffer);
2389 #ifdef CONFIG_TRACER_MAX_TRACE
2390 buffer = global_trace.max_buffer.buffer;
2392 ring_buffer_record_enable(buffer);
2395 arch_spin_unlock(&global_trace.max_lock);
2398 raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2401 static void tracing_start_tr(struct trace_array *tr)
2403 struct trace_buffer *buffer;
2404 unsigned long flags;
2406 if (tracing_disabled)
2409 /* If global, we need to also start the max tracer */
2410 if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2411 return tracing_start();
2413 raw_spin_lock_irqsave(&tr->start_lock, flags);
2415 if (--tr->stop_count) {
2416 if (tr->stop_count < 0) {
2417 /* Someone screwed up their debugging */
2424 buffer = tr->array_buffer.buffer;
2426 ring_buffer_record_enable(buffer);
2429 raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2433 * tracing_stop - quick stop of the tracer
2435 * Light weight way to stop tracing. Use in conjunction with
2438 void tracing_stop(void)
2440 struct trace_buffer *buffer;
2441 unsigned long flags;
2443 raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2444 if (global_trace.stop_count++)
2447 /* Prevent the buffers from switching */
2448 arch_spin_lock(&global_trace.max_lock);
2450 buffer = global_trace.array_buffer.buffer;
2452 ring_buffer_record_disable(buffer);
2454 #ifdef CONFIG_TRACER_MAX_TRACE
2455 buffer = global_trace.max_buffer.buffer;
2457 ring_buffer_record_disable(buffer);
2460 arch_spin_unlock(&global_trace.max_lock);
2463 raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2466 static void tracing_stop_tr(struct trace_array *tr)
2468 struct trace_buffer *buffer;
2469 unsigned long flags;
2471 /* If global, we need to also stop the max tracer */
2472 if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2473 return tracing_stop();
2475 raw_spin_lock_irqsave(&tr->start_lock, flags);
2476 if (tr->stop_count++)
2479 buffer = tr->array_buffer.buffer;
2481 ring_buffer_record_disable(buffer);
2484 raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2487 static int trace_save_cmdline(struct task_struct *tsk)
2491 /* treat recording of idle task as a success */
2495 tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
2498 * It's not the end of the world if we don't get
2499 * the lock, but we also don't want to spin
2500 * nor do we want to disable interrupts,
2501 * so if we miss here, then better luck next time.
2503 * This is called within the scheduler and wake up, so interrupts
2504 * had better been disabled and run queue lock been held.
2506 lockdep_assert_preemption_disabled();
2507 if (!arch_spin_trylock(&trace_cmdline_lock))
2510 idx = savedcmd->map_pid_to_cmdline[tpid];
2511 if (idx == NO_CMDLINE_MAP) {
2512 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2514 savedcmd->map_pid_to_cmdline[tpid] = idx;
2515 savedcmd->cmdline_idx = idx;
2518 savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2519 set_cmdline(idx, tsk->comm);
2521 arch_spin_unlock(&trace_cmdline_lock);
2526 static void __trace_find_cmdline(int pid, char comm[])
2532 strcpy(comm, "<idle>");
2536 if (WARN_ON_ONCE(pid < 0)) {
2537 strcpy(comm, "<XXX>");
2541 tpid = pid & (PID_MAX_DEFAULT - 1);
2542 map = savedcmd->map_pid_to_cmdline[tpid];
2543 if (map != NO_CMDLINE_MAP) {
2544 tpid = savedcmd->map_cmdline_to_pid[map];
2546 strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2550 strcpy(comm, "<...>");
2553 void trace_find_cmdline(int pid, char comm[])
2556 arch_spin_lock(&trace_cmdline_lock);
2558 __trace_find_cmdline(pid, comm);
2560 arch_spin_unlock(&trace_cmdline_lock);
2564 static int *trace_find_tgid_ptr(int pid)
2567 * Pairs with the smp_store_release in set_tracer_flag() to ensure that
2568 * if we observe a non-NULL tgid_map then we also observe the correct
2571 int *map = smp_load_acquire(&tgid_map);
2573 if (unlikely(!map || pid > tgid_map_max))
2579 int trace_find_tgid(int pid)
2581 int *ptr = trace_find_tgid_ptr(pid);
2583 return ptr ? *ptr : 0;
2586 static int trace_save_tgid(struct task_struct *tsk)
2590 /* treat recording of idle task as a success */
2594 ptr = trace_find_tgid_ptr(tsk->pid);
2602 static bool tracing_record_taskinfo_skip(int flags)
2604 if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2606 if (!__this_cpu_read(trace_taskinfo_save))
2612 * tracing_record_taskinfo - record the task info of a task
2614 * @task: task to record
2615 * @flags: TRACE_RECORD_CMDLINE for recording comm
2616 * TRACE_RECORD_TGID for recording tgid
2618 void tracing_record_taskinfo(struct task_struct *task, int flags)
2622 if (tracing_record_taskinfo_skip(flags))
2626 * Record as much task information as possible. If some fail, continue
2627 * to try to record the others.
2629 done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2630 done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2632 /* If recording any information failed, retry again soon. */
2636 __this_cpu_write(trace_taskinfo_save, false);
2640 * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2642 * @prev: previous task during sched_switch
2643 * @next: next task during sched_switch
2644 * @flags: TRACE_RECORD_CMDLINE for recording comm
2645 * TRACE_RECORD_TGID for recording tgid
2647 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2648 struct task_struct *next, int flags)
2652 if (tracing_record_taskinfo_skip(flags))
2656 * Record as much task information as possible. If some fail, continue
2657 * to try to record the others.
2659 done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2660 done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2661 done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2662 done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2664 /* If recording any information failed, retry again soon. */
2668 __this_cpu_write(trace_taskinfo_save, false);
2671 /* Helpers to record a specific task information */
2672 void tracing_record_cmdline(struct task_struct *task)
2674 tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2677 void tracing_record_tgid(struct task_struct *task)
2679 tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2683 * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2684 * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2685 * simplifies those functions and keeps them in sync.
2687 enum print_line_t trace_handle_return(struct trace_seq *s)
2689 return trace_seq_has_overflowed(s) ?
2690 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2692 EXPORT_SYMBOL_GPL(trace_handle_return);
2694 static unsigned short migration_disable_value(void)
2696 #if defined(CONFIG_SMP)
2697 return current->migration_disabled;
2703 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2705 unsigned int trace_flags = irqs_status;
2708 pc = preempt_count();
2711 trace_flags |= TRACE_FLAG_NMI;
2712 if (pc & HARDIRQ_MASK)
2713 trace_flags |= TRACE_FLAG_HARDIRQ;
2714 if (in_serving_softirq())
2715 trace_flags |= TRACE_FLAG_SOFTIRQ;
2716 if (softirq_count() >> (SOFTIRQ_SHIFT + 1))
2717 trace_flags |= TRACE_FLAG_BH_OFF;
2719 if (tif_need_resched())
2720 trace_flags |= TRACE_FLAG_NEED_RESCHED;
2721 if (test_preempt_need_resched())
2722 trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2723 return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
2724 (min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
2727 struct ring_buffer_event *
2728 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2731 unsigned int trace_ctx)
2733 return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2736 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2737 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2738 static int trace_buffered_event_ref;
2741 * trace_buffered_event_enable - enable buffering events
2743 * When events are being filtered, it is quicker to use a temporary
2744 * buffer to write the event data into if there's a likely chance
2745 * that it will not be committed. The discard of the ring buffer
2746 * is not as fast as committing, and is much slower than copying
2749 * When an event is to be filtered, allocate per cpu buffers to
2750 * write the event data into, and if the event is filtered and discarded
2751 * it is simply dropped, otherwise, the entire data is to be committed
2754 void trace_buffered_event_enable(void)
2756 struct ring_buffer_event *event;
2760 WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2762 if (trace_buffered_event_ref++)
2765 for_each_tracing_cpu(cpu) {
2766 page = alloc_pages_node(cpu_to_node(cpu),
2767 GFP_KERNEL | __GFP_NORETRY, 0);
2771 event = page_address(page);
2772 memset(event, 0, sizeof(*event));
2774 per_cpu(trace_buffered_event, cpu) = event;
2777 if (cpu == smp_processor_id() &&
2778 __this_cpu_read(trace_buffered_event) !=
2779 per_cpu(trace_buffered_event, cpu))
2786 trace_buffered_event_disable();
2789 static void enable_trace_buffered_event(void *data)
2791 /* Probably not needed, but do it anyway */
2793 this_cpu_dec(trace_buffered_event_cnt);
2796 static void disable_trace_buffered_event(void *data)
2798 this_cpu_inc(trace_buffered_event_cnt);
2802 * trace_buffered_event_disable - disable buffering events
2804 * When a filter is removed, it is faster to not use the buffered
2805 * events, and to commit directly into the ring buffer. Free up
2806 * the temp buffers when there are no more users. This requires
2807 * special synchronization with current events.
2809 void trace_buffered_event_disable(void)
2813 WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2815 if (WARN_ON_ONCE(!trace_buffered_event_ref))
2818 if (--trace_buffered_event_ref)
2822 /* For each CPU, set the buffer as used. */
2823 smp_call_function_many(tracing_buffer_mask,
2824 disable_trace_buffered_event, NULL, 1);
2827 /* Wait for all current users to finish */
2830 for_each_tracing_cpu(cpu) {
2831 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2832 per_cpu(trace_buffered_event, cpu) = NULL;
2835 * Make sure trace_buffered_event is NULL before clearing
2836 * trace_buffered_event_cnt.
2841 /* Do the work on each cpu */
2842 smp_call_function_many(tracing_buffer_mask,
2843 enable_trace_buffered_event, NULL, 1);
2847 static struct trace_buffer *temp_buffer;
2849 struct ring_buffer_event *
2850 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2851 struct trace_event_file *trace_file,
2852 int type, unsigned long len,
2853 unsigned int trace_ctx)
2855 struct ring_buffer_event *entry;
2856 struct trace_array *tr = trace_file->tr;
2859 *current_rb = tr->array_buffer.buffer;
2861 if (!tr->no_filter_buffering_ref &&
2862 (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED))) {
2863 preempt_disable_notrace();
2865 * Filtering is on, so try to use the per cpu buffer first.
2866 * This buffer will simulate a ring_buffer_event,
2867 * where the type_len is zero and the array[0] will
2868 * hold the full length.
2869 * (see include/linux/ring-buffer.h for details on
2870 * how the ring_buffer_event is structured).
2872 * Using a temp buffer during filtering and copying it
2873 * on a matched filter is quicker than writing directly
2874 * into the ring buffer and then discarding it when
2875 * it doesn't match. That is because the discard
2876 * requires several atomic operations to get right.
2877 * Copying on match and doing nothing on a failed match
2878 * is still quicker than no copy on match, but having
2879 * to discard out of the ring buffer on a failed match.
2881 if ((entry = __this_cpu_read(trace_buffered_event))) {
2882 int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2884 val = this_cpu_inc_return(trace_buffered_event_cnt);
2887 * Preemption is disabled, but interrupts and NMIs
2888 * can still come in now. If that happens after
2889 * the above increment, then it will have to go
2890 * back to the old method of allocating the event
2891 * on the ring buffer, and if the filter fails, it
2892 * will have to call ring_buffer_discard_commit()
2895 * Need to also check the unlikely case that the
2896 * length is bigger than the temp buffer size.
2897 * If that happens, then the reserve is pretty much
2898 * guaranteed to fail, as the ring buffer currently
2899 * only allows events less than a page. But that may
2900 * change in the future, so let the ring buffer reserve
2901 * handle the failure in that case.
2903 if (val == 1 && likely(len <= max_len)) {
2904 trace_event_setup(entry, type, trace_ctx);
2905 entry->array[0] = len;
2906 /* Return with preemption disabled */
2909 this_cpu_dec(trace_buffered_event_cnt);
2911 /* __trace_buffer_lock_reserve() disables preemption */
2912 preempt_enable_notrace();
2915 entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2918 * If tracing is off, but we have triggers enabled
2919 * we still need to look at the event data. Use the temp_buffer
2920 * to store the trace event for the trigger to use. It's recursive
2921 * safe and will not be recorded anywhere.
2923 if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2924 *current_rb = temp_buffer;
2925 entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2930 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2932 static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
2933 static DEFINE_MUTEX(tracepoint_printk_mutex);
2935 static void output_printk(struct trace_event_buffer *fbuffer)
2937 struct trace_event_call *event_call;
2938 struct trace_event_file *file;
2939 struct trace_event *event;
2940 unsigned long flags;
2941 struct trace_iterator *iter = tracepoint_print_iter;
2943 /* We should never get here if iter is NULL */
2944 if (WARN_ON_ONCE(!iter))
2947 event_call = fbuffer->trace_file->event_call;
2948 if (!event_call || !event_call->event.funcs ||
2949 !event_call->event.funcs->trace)
2952 file = fbuffer->trace_file;
2953 if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2954 (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2955 !filter_match_preds(file->filter, fbuffer->entry)))
2958 event = &fbuffer->trace_file->event_call->event;
2960 raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
2961 trace_seq_init(&iter->seq);
2962 iter->ent = fbuffer->entry;
2963 event_call->event.funcs->trace(iter, 0, event);
2964 trace_seq_putc(&iter->seq, 0);
2965 printk("%s", iter->seq.buffer);
2967 raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2970 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2971 void *buffer, size_t *lenp,
2974 int save_tracepoint_printk;
2977 mutex_lock(&tracepoint_printk_mutex);
2978 save_tracepoint_printk = tracepoint_printk;
2980 ret = proc_dointvec(table, write, buffer, lenp, ppos);
2983 * This will force exiting early, as tracepoint_printk
2984 * is always zero when tracepoint_printk_iter is not allocated
2986 if (!tracepoint_print_iter)
2987 tracepoint_printk = 0;
2989 if (save_tracepoint_printk == tracepoint_printk)
2992 if (tracepoint_printk)
2993 static_key_enable(&tracepoint_printk_key.key);
2995 static_key_disable(&tracepoint_printk_key.key);
2998 mutex_unlock(&tracepoint_printk_mutex);
3003 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
3005 enum event_trigger_type tt = ETT_NONE;
3006 struct trace_event_file *file = fbuffer->trace_file;
3008 if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
3009 fbuffer->entry, &tt))
3012 if (static_key_false(&tracepoint_printk_key.key))
3013 output_printk(fbuffer);
3015 if (static_branch_unlikely(&trace_event_exports_enabled))
3016 ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
3018 trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
3019 fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
3023 event_triggers_post_call(file, tt);
3026 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
3031 * trace_buffer_unlock_commit_regs()
3032 * trace_event_buffer_commit()
3033 * trace_event_raw_event_xxx()
3035 # define STACK_SKIP 3
3037 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
3038 struct trace_buffer *buffer,
3039 struct ring_buffer_event *event,
3040 unsigned int trace_ctx,
3041 struct pt_regs *regs)
3043 __buffer_unlock_commit(buffer, event);
3046 * If regs is not set, then skip the necessary functions.
3047 * Note, we can still get here via blktrace, wakeup tracer
3048 * and mmiotrace, but that's ok if they lose a function or
3049 * two. They are not that meaningful.
3051 ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
3052 ftrace_trace_userstack(tr, buffer, trace_ctx);
3056 * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
3059 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
3060 struct ring_buffer_event *event)
3062 __buffer_unlock_commit(buffer, event);
3066 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
3067 parent_ip, unsigned int trace_ctx)
3069 struct trace_event_call *call = &event_function;
3070 struct trace_buffer *buffer = tr->array_buffer.buffer;
3071 struct ring_buffer_event *event;
3072 struct ftrace_entry *entry;
3074 event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
3078 entry = ring_buffer_event_data(event);
3080 entry->parent_ip = parent_ip;
3082 if (!call_filter_check_discard(call, entry, buffer, event)) {
3083 if (static_branch_unlikely(&trace_function_exports_enabled))
3084 ftrace_exports(event, TRACE_EXPORT_FUNCTION);
3085 __buffer_unlock_commit(buffer, event);
3089 #ifdef CONFIG_STACKTRACE
3091 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
3092 #define FTRACE_KSTACK_NESTING 4
3094 #define FTRACE_KSTACK_ENTRIES (PAGE_SIZE / FTRACE_KSTACK_NESTING)
3096 struct ftrace_stack {
3097 unsigned long calls[FTRACE_KSTACK_ENTRIES];
3101 struct ftrace_stacks {
3102 struct ftrace_stack stacks[FTRACE_KSTACK_NESTING];
3105 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
3106 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
3108 static void __ftrace_trace_stack(struct trace_buffer *buffer,
3109 unsigned int trace_ctx,
3110 int skip, struct pt_regs *regs)
3112 struct trace_event_call *call = &event_kernel_stack;
3113 struct ring_buffer_event *event;
3114 unsigned int size, nr_entries;
3115 struct ftrace_stack *fstack;
3116 struct stack_entry *entry;
3120 * Add one, for this function and the call to save_stack_trace()
3121 * If regs is set, then these functions will not be in the way.
3123 #ifndef CONFIG_UNWINDER_ORC
3128 preempt_disable_notrace();
3130 stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
3132 /* This should never happen. If it does, yell once and skip */
3133 if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
3137 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
3138 * interrupt will either see the value pre increment or post
3139 * increment. If the interrupt happens pre increment it will have
3140 * restored the counter when it returns. We just need a barrier to
3141 * keep gcc from moving things around.
3145 fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
3146 size = ARRAY_SIZE(fstack->calls);
3149 nr_entries = stack_trace_save_regs(regs, fstack->calls,
3152 nr_entries = stack_trace_save(fstack->calls, size, skip);
3155 size = nr_entries * sizeof(unsigned long);
3156 event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
3157 (sizeof(*entry) - sizeof(entry->caller)) + size,
3161 entry = ring_buffer_event_data(event);
3163 memcpy(&entry->caller, fstack->calls, size);
3164 entry->size = nr_entries;
3166 if (!call_filter_check_discard(call, entry, buffer, event))
3167 __buffer_unlock_commit(buffer, event);
3170 /* Again, don't let gcc optimize things here */
3172 __this_cpu_dec(ftrace_stack_reserve);
3173 preempt_enable_notrace();
3177 static inline void ftrace_trace_stack(struct trace_array *tr,
3178 struct trace_buffer *buffer,
3179 unsigned int trace_ctx,
3180 int skip, struct pt_regs *regs)
3182 if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3185 __ftrace_trace_stack(buffer, trace_ctx, skip, regs);
3188 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3191 struct trace_buffer *buffer = tr->array_buffer.buffer;
3193 if (rcu_is_watching()) {
3194 __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3198 if (WARN_ON_ONCE(IS_ENABLED(CONFIG_GENERIC_ENTRY)))
3202 * When an NMI triggers, RCU is enabled via ct_nmi_enter(),
3203 * but if the above rcu_is_watching() failed, then the NMI
3204 * triggered someplace critical, and ct_irq_enter() should
3205 * not be called from NMI.
3207 if (unlikely(in_nmi()))
3210 ct_irq_enter_irqson();
3211 __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3212 ct_irq_exit_irqson();
3216 * trace_dump_stack - record a stack back trace in the trace buffer
3217 * @skip: Number of functions to skip (helper handlers)
3219 void trace_dump_stack(int skip)
3221 if (tracing_disabled || tracing_selftest_running)
3224 #ifndef CONFIG_UNWINDER_ORC
3225 /* Skip 1 to skip this function. */
3228 __ftrace_trace_stack(global_trace.array_buffer.buffer,
3229 tracing_gen_ctx(), skip, NULL);
3231 EXPORT_SYMBOL_GPL(trace_dump_stack);
3233 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3234 static DEFINE_PER_CPU(int, user_stack_count);
3237 ftrace_trace_userstack(struct trace_array *tr,
3238 struct trace_buffer *buffer, unsigned int trace_ctx)
3240 struct trace_event_call *call = &event_user_stack;
3241 struct ring_buffer_event *event;
3242 struct userstack_entry *entry;
3244 if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3248 * NMIs can not handle page faults, even with fix ups.
3249 * The save user stack can (and often does) fault.
3251 if (unlikely(in_nmi()))
3255 * prevent recursion, since the user stack tracing may
3256 * trigger other kernel events.
3259 if (__this_cpu_read(user_stack_count))
3262 __this_cpu_inc(user_stack_count);
3264 event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3265 sizeof(*entry), trace_ctx);
3267 goto out_drop_count;
3268 entry = ring_buffer_event_data(event);
3270 entry->tgid = current->tgid;
3271 memset(&entry->caller, 0, sizeof(entry->caller));
3273 stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3274 if (!call_filter_check_discard(call, entry, buffer, event))
3275 __buffer_unlock_commit(buffer, event);
3278 __this_cpu_dec(user_stack_count);
3282 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3283 static void ftrace_trace_userstack(struct trace_array *tr,
3284 struct trace_buffer *buffer,
3285 unsigned int trace_ctx)
3288 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3290 #endif /* CONFIG_STACKTRACE */
3293 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3294 unsigned long long delta)
3296 entry->bottom_delta_ts = delta & U32_MAX;
3297 entry->top_delta_ts = (delta >> 32);
3300 void trace_last_func_repeats(struct trace_array *tr,
3301 struct trace_func_repeats *last_info,
3302 unsigned int trace_ctx)
3304 struct trace_buffer *buffer = tr->array_buffer.buffer;
3305 struct func_repeats_entry *entry;
3306 struct ring_buffer_event *event;
3309 event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3310 sizeof(*entry), trace_ctx);
3314 delta = ring_buffer_event_time_stamp(buffer, event) -
3315 last_info->ts_last_call;
3317 entry = ring_buffer_event_data(event);
3318 entry->ip = last_info->ip;
3319 entry->parent_ip = last_info->parent_ip;
3320 entry->count = last_info->count;
3321 func_repeats_set_delta_ts(entry, delta);
3323 __buffer_unlock_commit(buffer, event);
3326 /* created for use with alloc_percpu */
3327 struct trace_buffer_struct {
3329 char buffer[4][TRACE_BUF_SIZE];
3332 static struct trace_buffer_struct __percpu *trace_percpu_buffer;
3335 * This allows for lockless recording. If we're nested too deeply, then
3336 * this returns NULL.
3338 static char *get_trace_buf(void)
3340 struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3342 if (!trace_percpu_buffer || buffer->nesting >= 4)
3347 /* Interrupts must see nesting incremented before we use the buffer */
3349 return &buffer->buffer[buffer->nesting - 1][0];
3352 static void put_trace_buf(void)
3354 /* Don't let the decrement of nesting leak before this */
3356 this_cpu_dec(trace_percpu_buffer->nesting);
3359 static int alloc_percpu_trace_buffer(void)
3361 struct trace_buffer_struct __percpu *buffers;
3363 if (trace_percpu_buffer)
3366 buffers = alloc_percpu(struct trace_buffer_struct);
3367 if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3370 trace_percpu_buffer = buffers;
3374 static int buffers_allocated;
3376 void trace_printk_init_buffers(void)
3378 if (buffers_allocated)
3381 if (alloc_percpu_trace_buffer())
3384 /* trace_printk() is for debug use only. Don't use it in production. */
3387 pr_warn("**********************************************************\n");
3388 pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n");
3390 pr_warn("** trace_printk() being used. Allocating extra memory. **\n");
3392 pr_warn("** This means that this is a DEBUG kernel and it is **\n");
3393 pr_warn("** unsafe for production use. **\n");
3395 pr_warn("** If you see this message and you are not debugging **\n");
3396 pr_warn("** the kernel, report this immediately to your vendor! **\n");
3398 pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n");
3399 pr_warn("**********************************************************\n");
3401 /* Expand the buffers to set size */
3402 tracing_update_buffers();
3404 buffers_allocated = 1;
3407 * trace_printk_init_buffers() can be called by modules.
3408 * If that happens, then we need to start cmdline recording
3409 * directly here. If the global_trace.buffer is already
3410 * allocated here, then this was called by module code.
3412 if (global_trace.array_buffer.buffer)
3413 tracing_start_cmdline_record();
3415 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3417 void trace_printk_start_comm(void)
3419 /* Start tracing comms if trace printk is set */
3420 if (!buffers_allocated)
3422 tracing_start_cmdline_record();
3425 static void trace_printk_start_stop_comm(int enabled)
3427 if (!buffers_allocated)
3431 tracing_start_cmdline_record();
3433 tracing_stop_cmdline_record();
3437 * trace_vbprintk - write binary msg to tracing buffer
3438 * @ip: The address of the caller
3439 * @fmt: The string format to write to the buffer
3440 * @args: Arguments for @fmt
3442 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3444 struct trace_event_call *call = &event_bprint;
3445 struct ring_buffer_event *event;
3446 struct trace_buffer *buffer;
3447 struct trace_array *tr = &global_trace;
3448 struct bprint_entry *entry;
3449 unsigned int trace_ctx;
3453 if (unlikely(tracing_selftest_running || tracing_disabled))
3456 /* Don't pollute graph traces with trace_vprintk internals */
3457 pause_graph_tracing();
3459 trace_ctx = tracing_gen_ctx();
3460 preempt_disable_notrace();
3462 tbuffer = get_trace_buf();
3468 len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3470 if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3473 size = sizeof(*entry) + sizeof(u32) * len;
3474 buffer = tr->array_buffer.buffer;
3475 ring_buffer_nest_start(buffer);
3476 event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3480 entry = ring_buffer_event_data(event);
3484 memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3485 if (!call_filter_check_discard(call, entry, buffer, event)) {
3486 __buffer_unlock_commit(buffer, event);
3487 ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3491 ring_buffer_nest_end(buffer);
3496 preempt_enable_notrace();
3497 unpause_graph_tracing();
3501 EXPORT_SYMBOL_GPL(trace_vbprintk);
3505 __trace_array_vprintk(struct trace_buffer *buffer,
3506 unsigned long ip, const char *fmt, va_list args)
3508 struct trace_event_call *call = &event_print;
3509 struct ring_buffer_event *event;
3511 struct print_entry *entry;
3512 unsigned int trace_ctx;
3515 if (tracing_disabled || tracing_selftest_running)
3518 /* Don't pollute graph traces with trace_vprintk internals */
3519 pause_graph_tracing();
3521 trace_ctx = tracing_gen_ctx();
3522 preempt_disable_notrace();
3525 tbuffer = get_trace_buf();
3531 len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3533 size = sizeof(*entry) + len + 1;
3534 ring_buffer_nest_start(buffer);
3535 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3539 entry = ring_buffer_event_data(event);
3542 memcpy(&entry->buf, tbuffer, len + 1);
3543 if (!call_filter_check_discard(call, entry, buffer, event)) {
3544 __buffer_unlock_commit(buffer, event);
3545 ftrace_trace_stack(&global_trace, buffer, trace_ctx, 6, NULL);
3549 ring_buffer_nest_end(buffer);
3553 preempt_enable_notrace();
3554 unpause_graph_tracing();
3560 int trace_array_vprintk(struct trace_array *tr,
3561 unsigned long ip, const char *fmt, va_list args)
3563 return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3567 * trace_array_printk - Print a message to a specific instance
3568 * @tr: The instance trace_array descriptor
3569 * @ip: The instruction pointer that this is called from.
3570 * @fmt: The format to print (printf format)
3572 * If a subsystem sets up its own instance, they have the right to
3573 * printk strings into their tracing instance buffer using this
3574 * function. Note, this function will not write into the top level
3575 * buffer (use trace_printk() for that), as writing into the top level
3576 * buffer should only have events that can be individually disabled.
3577 * trace_printk() is only used for debugging a kernel, and should not
3578 * be ever incorporated in normal use.
3580 * trace_array_printk() can be used, as it will not add noise to the
3581 * top level tracing buffer.
3583 * Note, trace_array_init_printk() must be called on @tr before this
3587 int trace_array_printk(struct trace_array *tr,
3588 unsigned long ip, const char *fmt, ...)
3596 /* This is only allowed for created instances */
3597 if (tr == &global_trace)
3600 if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3604 ret = trace_array_vprintk(tr, ip, fmt, ap);
3608 EXPORT_SYMBOL_GPL(trace_array_printk);
3611 * trace_array_init_printk - Initialize buffers for trace_array_printk()
3612 * @tr: The trace array to initialize the buffers for
3614 * As trace_array_printk() only writes into instances, they are OK to
3615 * have in the kernel (unlike trace_printk()). This needs to be called
3616 * before trace_array_printk() can be used on a trace_array.
3618 int trace_array_init_printk(struct trace_array *tr)
3623 /* This is only allowed for created instances */
3624 if (tr == &global_trace)
3627 return alloc_percpu_trace_buffer();
3629 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3632 int trace_array_printk_buf(struct trace_buffer *buffer,
3633 unsigned long ip, const char *fmt, ...)
3638 if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3642 ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3648 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3650 return trace_array_vprintk(&global_trace, ip, fmt, args);
3652 EXPORT_SYMBOL_GPL(trace_vprintk);
3654 static void trace_iterator_increment(struct trace_iterator *iter)
3656 struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3660 ring_buffer_iter_advance(buf_iter);
3663 static struct trace_entry *
3664 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3665 unsigned long *lost_events)
3667 struct ring_buffer_event *event;
3668 struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3671 event = ring_buffer_iter_peek(buf_iter, ts);
3673 *lost_events = ring_buffer_iter_dropped(buf_iter) ?
3674 (unsigned long)-1 : 0;
3676 event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3681 iter->ent_size = ring_buffer_event_length(event);
3682 return ring_buffer_event_data(event);
3688 static struct trace_entry *
3689 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3690 unsigned long *missing_events, u64 *ent_ts)
3692 struct trace_buffer *buffer = iter->array_buffer->buffer;
3693 struct trace_entry *ent, *next = NULL;
3694 unsigned long lost_events = 0, next_lost = 0;
3695 int cpu_file = iter->cpu_file;
3696 u64 next_ts = 0, ts;
3702 * If we are in a per_cpu trace file, don't bother by iterating over
3703 * all cpu and peek directly.
3705 if (cpu_file > RING_BUFFER_ALL_CPUS) {
3706 if (ring_buffer_empty_cpu(buffer, cpu_file))
3708 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3710 *ent_cpu = cpu_file;
3715 for_each_tracing_cpu(cpu) {
3717 if (ring_buffer_empty_cpu(buffer, cpu))
3720 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3723 * Pick the entry with the smallest timestamp:
3725 if (ent && (!next || ts < next_ts)) {
3729 next_lost = lost_events;
3730 next_size = iter->ent_size;
3734 iter->ent_size = next_size;
3737 *ent_cpu = next_cpu;
3743 *missing_events = next_lost;
3748 #define STATIC_FMT_BUF_SIZE 128
3749 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3751 char *trace_iter_expand_format(struct trace_iterator *iter)
3756 * iter->tr is NULL when used with tp_printk, which makes
3757 * this get called where it is not safe to call krealloc().
3759 if (!iter->tr || iter->fmt == static_fmt_buf)
3762 tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3765 iter->fmt_size += STATIC_FMT_BUF_SIZE;
3772 /* Returns true if the string is safe to dereference from an event */
3773 static bool trace_safe_str(struct trace_iterator *iter, const char *str,
3776 unsigned long addr = (unsigned long)str;
3777 struct trace_event *trace_event;
3778 struct trace_event_call *event;
3780 /* Ignore strings with no length */
3784 /* OK if part of the event data */
3785 if ((addr >= (unsigned long)iter->ent) &&
3786 (addr < (unsigned long)iter->ent + iter->ent_size))
3789 /* OK if part of the temp seq buffer */
3790 if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3791 (addr < (unsigned long)iter->tmp_seq.buffer + PAGE_SIZE))
3794 /* Core rodata can not be freed */
3795 if (is_kernel_rodata(addr))
3798 if (trace_is_tracepoint_string(str))
3802 * Now this could be a module event, referencing core module
3803 * data, which is OK.
3808 trace_event = ftrace_find_event(iter->ent->type);
3812 event = container_of(trace_event, struct trace_event_call, event);
3813 if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
3816 /* Would rather have rodata, but this will suffice */
3817 if (within_module_core(addr, event->module))
3823 static const char *show_buffer(struct trace_seq *s)
3825 struct seq_buf *seq = &s->seq;
3827 seq_buf_terminate(seq);
3832 static DEFINE_STATIC_KEY_FALSE(trace_no_verify);
3834 static int test_can_verify_check(const char *fmt, ...)
3841 * The verifier is dependent on vsnprintf() modifies the va_list
3842 * passed to it, where it is sent as a reference. Some architectures
3843 * (like x86_32) passes it by value, which means that vsnprintf()
3844 * does not modify the va_list passed to it, and the verifier
3845 * would then need to be able to understand all the values that
3846 * vsnprintf can use. If it is passed by value, then the verifier
3850 vsnprintf(buf, 16, "%d", ap);
3851 ret = va_arg(ap, int);
3857 static void test_can_verify(void)
3859 if (!test_can_verify_check("%d %d", 0, 1)) {
3860 pr_info("trace event string verifier disabled\n");
3861 static_branch_inc(&trace_no_verify);
3866 * trace_check_vprintf - Check dereferenced strings while writing to the seq buffer
3867 * @iter: The iterator that holds the seq buffer and the event being printed
3868 * @fmt: The format used to print the event
3869 * @ap: The va_list holding the data to print from @fmt.
3871 * This writes the data into the @iter->seq buffer using the data from
3872 * @fmt and @ap. If the format has a %s, then the source of the string
3873 * is examined to make sure it is safe to print, otherwise it will
3874 * warn and print "[UNSAFE MEMORY]" in place of the dereferenced string
3877 void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
3880 const char *p = fmt;
3884 if (WARN_ON_ONCE(!fmt))
3887 if (static_branch_unlikely(&trace_no_verify))
3890 /* Don't bother checking when doing a ftrace_dump() */
3891 if (iter->fmt == static_fmt_buf)
3900 /* We only care about %s and variants */
3901 for (i = 0; p[i]; i++) {
3902 if (i + 1 >= iter->fmt_size) {
3904 * If we can't expand the copy buffer,
3907 if (!trace_iter_expand_format(iter))
3911 if (p[i] == '\\' && p[i+1]) {
3916 /* Need to test cases like %08.*s */
3917 for (j = 1; p[i+j]; j++) {
3918 if (isdigit(p[i+j]) ||
3921 if (p[i+j] == '*') {
3933 /* If no %s found then just print normally */
3937 /* Copy up to the %s, and print that */
3938 strncpy(iter->fmt, p, i);
3939 iter->fmt[i] = '\0';
3940 trace_seq_vprintf(&iter->seq, iter->fmt, ap);
3943 * If iter->seq is full, the above call no longer guarantees
3944 * that ap is in sync with fmt processing, and further calls
3945 * to va_arg() can return wrong positional arguments.
3947 * Ensure that ap is no longer used in this case.
3949 if (iter->seq.full) {
3955 len = va_arg(ap, int);
3957 /* The ap now points to the string data of the %s */
3958 str = va_arg(ap, const char *);
3961 * If you hit this warning, it is likely that the
3962 * trace event in question used %s on a string that
3963 * was saved at the time of the event, but may not be
3964 * around when the trace is read. Use __string(),
3965 * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3966 * instead. See samples/trace_events/trace-events-sample.h
3969 if (WARN_ONCE(!trace_safe_str(iter, str, star, len),
3970 "fmt: '%s' current_buffer: '%s'",
3971 fmt, show_buffer(&iter->seq))) {
3974 /* Try to safely read the string */
3976 if (len + 1 > iter->fmt_size)
3977 len = iter->fmt_size - 1;
3980 ret = copy_from_kernel_nofault(iter->fmt, str, len);
3984 ret = strncpy_from_kernel_nofault(iter->fmt, str,
3988 trace_seq_printf(&iter->seq, "(0x%px)", str);
3990 trace_seq_printf(&iter->seq, "(0x%px:%s)",
3992 str = "[UNSAFE-MEMORY]";
3993 strcpy(iter->fmt, "%s");
3995 strncpy(iter->fmt, p + i, j + 1);
3996 iter->fmt[j+1] = '\0';
3999 trace_seq_printf(&iter->seq, iter->fmt, len, str);
4001 trace_seq_printf(&iter->seq, iter->fmt, str);
4007 trace_seq_vprintf(&iter->seq, p, ap);
4010 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
4012 const char *p, *new_fmt;
4015 if (WARN_ON_ONCE(!fmt))
4018 if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
4022 new_fmt = q = iter->fmt;
4024 if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
4025 if (!trace_iter_expand_format(iter))
4028 q += iter->fmt - new_fmt;
4029 new_fmt = iter->fmt;
4034 /* Replace %p with %px */
4038 } else if (p[0] == 'p' && !isalnum(p[1])) {
4049 #define STATIC_TEMP_BUF_SIZE 128
4050 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
4052 /* Find the next real entry, without updating the iterator itself */
4053 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
4054 int *ent_cpu, u64 *ent_ts)
4056 /* __find_next_entry will reset ent_size */
4057 int ent_size = iter->ent_size;
4058 struct trace_entry *entry;
4061 * If called from ftrace_dump(), then the iter->temp buffer
4062 * will be the static_temp_buf and not created from kmalloc.
4063 * If the entry size is greater than the buffer, we can
4064 * not save it. Just return NULL in that case. This is only
4065 * used to add markers when two consecutive events' time
4066 * stamps have a large delta. See trace_print_lat_context()
4068 if (iter->temp == static_temp_buf &&
4069 STATIC_TEMP_BUF_SIZE < ent_size)
4073 * The __find_next_entry() may call peek_next_entry(), which may
4074 * call ring_buffer_peek() that may make the contents of iter->ent
4075 * undefined. Need to copy iter->ent now.
4077 if (iter->ent && iter->ent != iter->temp) {
4078 if ((!iter->temp || iter->temp_size < iter->ent_size) &&
4079 !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
4081 temp = kmalloc(iter->ent_size, GFP_KERNEL);
4086 iter->temp_size = iter->ent_size;
4088 memcpy(iter->temp, iter->ent, iter->ent_size);
4089 iter->ent = iter->temp;
4091 entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
4092 /* Put back the original ent_size */
4093 iter->ent_size = ent_size;
4098 /* Find the next real entry, and increment the iterator to the next entry */
4099 void *trace_find_next_entry_inc(struct trace_iterator *iter)
4101 iter->ent = __find_next_entry(iter, &iter->cpu,
4102 &iter->lost_events, &iter->ts);
4105 trace_iterator_increment(iter);
4107 return iter->ent ? iter : NULL;
4110 static void trace_consume(struct trace_iterator *iter)
4112 ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
4113 &iter->lost_events);
4116 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
4118 struct trace_iterator *iter = m->private;
4122 WARN_ON_ONCE(iter->leftover);
4126 /* can't go backwards */
4131 ent = trace_find_next_entry_inc(iter);
4135 while (ent && iter->idx < i)
4136 ent = trace_find_next_entry_inc(iter);
4143 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
4145 struct ring_buffer_iter *buf_iter;
4146 unsigned long entries = 0;
4149 per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
4151 buf_iter = trace_buffer_iter(iter, cpu);
4155 ring_buffer_iter_reset(buf_iter);
4158 * We could have the case with the max latency tracers
4159 * that a reset never took place on a cpu. This is evident
4160 * by the timestamp being before the start of the buffer.
4162 while (ring_buffer_iter_peek(buf_iter, &ts)) {
4163 if (ts >= iter->array_buffer->time_start)
4166 ring_buffer_iter_advance(buf_iter);
4169 per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
4173 * The current tracer is copied to avoid a global locking
4176 static void *s_start(struct seq_file *m, loff_t *pos)
4178 struct trace_iterator *iter = m->private;
4179 struct trace_array *tr = iter->tr;
4180 int cpu_file = iter->cpu_file;
4186 * copy the tracer to avoid using a global lock all around.
4187 * iter->trace is a copy of current_trace, the pointer to the
4188 * name may be used instead of a strcmp(), as iter->trace->name
4189 * will point to the same string as current_trace->name.
4191 mutex_lock(&trace_types_lock);
4192 if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
4193 *iter->trace = *tr->current_trace;
4194 mutex_unlock(&trace_types_lock);
4196 #ifdef CONFIG_TRACER_MAX_TRACE
4197 if (iter->snapshot && iter->trace->use_max_tr)
4198 return ERR_PTR(-EBUSY);
4201 if (*pos != iter->pos) {
4206 if (cpu_file == RING_BUFFER_ALL_CPUS) {
4207 for_each_tracing_cpu(cpu)
4208 tracing_iter_reset(iter, cpu);
4210 tracing_iter_reset(iter, cpu_file);
4213 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
4218 * If we overflowed the seq_file before, then we want
4219 * to just reuse the trace_seq buffer again.
4225 p = s_next(m, p, &l);
4229 trace_event_read_lock();
4230 trace_access_lock(cpu_file);
4234 static void s_stop(struct seq_file *m, void *p)
4236 struct trace_iterator *iter = m->private;
4238 #ifdef CONFIG_TRACER_MAX_TRACE
4239 if (iter->snapshot && iter->trace->use_max_tr)
4243 trace_access_unlock(iter->cpu_file);
4244 trace_event_read_unlock();
4248 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4249 unsigned long *entries, int cpu)
4251 unsigned long count;
4253 count = ring_buffer_entries_cpu(buf->buffer, cpu);
4255 * If this buffer has skipped entries, then we hold all
4256 * entries for the trace and we need to ignore the
4257 * ones before the time stamp.
4259 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4260 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4261 /* total is the same as the entries */
4265 ring_buffer_overrun_cpu(buf->buffer, cpu);
4270 get_total_entries(struct array_buffer *buf,
4271 unsigned long *total, unsigned long *entries)
4279 for_each_tracing_cpu(cpu) {
4280 get_total_entries_cpu(buf, &t, &e, cpu);
4286 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4288 unsigned long total, entries;
4293 get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4298 unsigned long trace_total_entries(struct trace_array *tr)
4300 unsigned long total, entries;
4305 get_total_entries(&tr->array_buffer, &total, &entries);
4310 static void print_lat_help_header(struct seq_file *m)
4312 seq_puts(m, "# _------=> CPU# \n"
4313 "# / _-----=> irqs-off/BH-disabled\n"
4314 "# | / _----=> need-resched \n"
4315 "# || / _---=> hardirq/softirq \n"
4316 "# ||| / _--=> preempt-depth \n"
4317 "# |||| / _-=> migrate-disable \n"
4318 "# ||||| / delay \n"
4319 "# cmd pid |||||| time | caller \n"
4320 "# \\ / |||||| \\ | / \n");
4323 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4325 unsigned long total;
4326 unsigned long entries;
4328 get_total_entries(buf, &total, &entries);
4329 seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu #P:%d\n",
4330 entries, total, num_online_cpus());
4334 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4337 bool tgid = flags & TRACE_ITER_RECORD_TGID;
4339 print_event_info(buf, m);
4341 seq_printf(m, "# TASK-PID %s CPU# TIMESTAMP FUNCTION\n", tgid ? " TGID " : "");
4342 seq_printf(m, "# | | %s | | |\n", tgid ? " | " : "");
4345 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4348 bool tgid = flags & TRACE_ITER_RECORD_TGID;
4349 static const char space[] = " ";
4350 int prec = tgid ? 12 : 2;
4352 print_event_info(buf, m);
4354 seq_printf(m, "# %.*s _-----=> irqs-off/BH-disabled\n", prec, space);
4355 seq_printf(m, "# %.*s / _----=> need-resched\n", prec, space);
4356 seq_printf(m, "# %.*s| / _---=> hardirq/softirq\n", prec, space);
4357 seq_printf(m, "# %.*s|| / _--=> preempt-depth\n", prec, space);
4358 seq_printf(m, "# %.*s||| / _-=> migrate-disable\n", prec, space);
4359 seq_printf(m, "# %.*s|||| / delay\n", prec, space);
4360 seq_printf(m, "# TASK-PID %.*s CPU# ||||| TIMESTAMP FUNCTION\n", prec, " TGID ");
4361 seq_printf(m, "# | | %.*s | ||||| | |\n", prec, " | ");
4365 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4367 unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4368 struct array_buffer *buf = iter->array_buffer;
4369 struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4370 struct tracer *type = iter->trace;
4371 unsigned long entries;
4372 unsigned long total;
4373 const char *name = type->name;
4375 get_total_entries(buf, &total, &entries);
4377 seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4379 seq_puts(m, "# -----------------------------------"
4380 "---------------------------------\n");
4381 seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4382 " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4383 nsecs_to_usecs(data->saved_latency),
4387 preempt_model_none() ? "server" :
4388 preempt_model_voluntary() ? "desktop" :
4389 preempt_model_full() ? "preempt" :
4390 preempt_model_rt() ? "preempt_rt" :
4392 /* These are reserved for later use */
4395 seq_printf(m, " #P:%d)\n", num_online_cpus());
4399 seq_puts(m, "# -----------------\n");
4400 seq_printf(m, "# | task: %.16s-%d "
4401 "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4402 data->comm, data->pid,
4403 from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4404 data->policy, data->rt_priority);
4405 seq_puts(m, "# -----------------\n");
4407 if (data->critical_start) {
4408 seq_puts(m, "# => started at: ");
4409 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4410 trace_print_seq(m, &iter->seq);
4411 seq_puts(m, "\n# => ended at: ");
4412 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4413 trace_print_seq(m, &iter->seq);
4414 seq_puts(m, "\n#\n");
4420 static void test_cpu_buff_start(struct trace_iterator *iter)
4422 struct trace_seq *s = &iter->seq;
4423 struct trace_array *tr = iter->tr;
4425 if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4428 if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4431 if (cpumask_available(iter->started) &&
4432 cpumask_test_cpu(iter->cpu, iter->started))
4435 if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4438 if (cpumask_available(iter->started))
4439 cpumask_set_cpu(iter->cpu, iter->started);
4441 /* Don't print started cpu buffer for the first entry of the trace */
4443 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4447 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4449 struct trace_array *tr = iter->tr;
4450 struct trace_seq *s = &iter->seq;
4451 unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4452 struct trace_entry *entry;
4453 struct trace_event *event;
4457 test_cpu_buff_start(iter);
4459 event = ftrace_find_event(entry->type);
4461 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4462 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4463 trace_print_lat_context(iter);
4465 trace_print_context(iter);
4468 if (trace_seq_has_overflowed(s))
4469 return TRACE_TYPE_PARTIAL_LINE;
4472 if (tr->trace_flags & TRACE_ITER_FIELDS)
4473 return print_event_fields(iter, event);
4474 return event->funcs->trace(iter, sym_flags, event);
4477 trace_seq_printf(s, "Unknown type %d\n", entry->type);
4479 return trace_handle_return(s);
4482 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4484 struct trace_array *tr = iter->tr;
4485 struct trace_seq *s = &iter->seq;
4486 struct trace_entry *entry;
4487 struct trace_event *event;
4491 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4492 trace_seq_printf(s, "%d %d %llu ",
4493 entry->pid, iter->cpu, iter->ts);
4495 if (trace_seq_has_overflowed(s))
4496 return TRACE_TYPE_PARTIAL_LINE;
4498 event = ftrace_find_event(entry->type);
4500 return event->funcs->raw(iter, 0, event);
4502 trace_seq_printf(s, "%d ?\n", entry->type);
4504 return trace_handle_return(s);
4507 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4509 struct trace_array *tr = iter->tr;
4510 struct trace_seq *s = &iter->seq;
4511 unsigned char newline = '\n';
4512 struct trace_entry *entry;
4513 struct trace_event *event;
4517 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4518 SEQ_PUT_HEX_FIELD(s, entry->pid);
4519 SEQ_PUT_HEX_FIELD(s, iter->cpu);
4520 SEQ_PUT_HEX_FIELD(s, iter->ts);
4521 if (trace_seq_has_overflowed(s))
4522 return TRACE_TYPE_PARTIAL_LINE;
4525 event = ftrace_find_event(entry->type);
4527 enum print_line_t ret = event->funcs->hex(iter, 0, event);
4528 if (ret != TRACE_TYPE_HANDLED)
4532 SEQ_PUT_FIELD(s, newline);
4534 return trace_handle_return(s);
4537 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4539 struct trace_array *tr = iter->tr;
4540 struct trace_seq *s = &iter->seq;
4541 struct trace_entry *entry;
4542 struct trace_event *event;
4546 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4547 SEQ_PUT_FIELD(s, entry->pid);
4548 SEQ_PUT_FIELD(s, iter->cpu);
4549 SEQ_PUT_FIELD(s, iter->ts);
4550 if (trace_seq_has_overflowed(s))
4551 return TRACE_TYPE_PARTIAL_LINE;
4554 event = ftrace_find_event(entry->type);
4555 return event ? event->funcs->binary(iter, 0, event) :
4559 int trace_empty(struct trace_iterator *iter)
4561 struct ring_buffer_iter *buf_iter;
4564 /* If we are looking at one CPU buffer, only check that one */
4565 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4566 cpu = iter->cpu_file;
4567 buf_iter = trace_buffer_iter(iter, cpu);
4569 if (!ring_buffer_iter_empty(buf_iter))
4572 if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4578 for_each_tracing_cpu(cpu) {
4579 buf_iter = trace_buffer_iter(iter, cpu);
4581 if (!ring_buffer_iter_empty(buf_iter))
4584 if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4592 /* Called with trace_event_read_lock() held. */
4593 enum print_line_t print_trace_line(struct trace_iterator *iter)
4595 struct trace_array *tr = iter->tr;
4596 unsigned long trace_flags = tr->trace_flags;
4597 enum print_line_t ret;
4599 if (iter->lost_events) {
4600 if (iter->lost_events == (unsigned long)-1)
4601 trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4604 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4605 iter->cpu, iter->lost_events);
4606 if (trace_seq_has_overflowed(&iter->seq))
4607 return TRACE_TYPE_PARTIAL_LINE;
4610 if (iter->trace && iter->trace->print_line) {
4611 ret = iter->trace->print_line(iter);
4612 if (ret != TRACE_TYPE_UNHANDLED)
4616 if (iter->ent->type == TRACE_BPUTS &&
4617 trace_flags & TRACE_ITER_PRINTK &&
4618 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4619 return trace_print_bputs_msg_only(iter);
4621 if (iter->ent->type == TRACE_BPRINT &&
4622 trace_flags & TRACE_ITER_PRINTK &&
4623 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4624 return trace_print_bprintk_msg_only(iter);
4626 if (iter->ent->type == TRACE_PRINT &&
4627 trace_flags & TRACE_ITER_PRINTK &&
4628 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4629 return trace_print_printk_msg_only(iter);
4631 if (trace_flags & TRACE_ITER_BIN)
4632 return print_bin_fmt(iter);
4634 if (trace_flags & TRACE_ITER_HEX)
4635 return print_hex_fmt(iter);
4637 if (trace_flags & TRACE_ITER_RAW)
4638 return print_raw_fmt(iter);
4640 return print_trace_fmt(iter);
4643 void trace_latency_header(struct seq_file *m)
4645 struct trace_iterator *iter = m->private;
4646 struct trace_array *tr = iter->tr;
4648 /* print nothing if the buffers are empty */
4649 if (trace_empty(iter))
4652 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4653 print_trace_header(m, iter);
4655 if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4656 print_lat_help_header(m);
4659 void trace_default_header(struct seq_file *m)
4661 struct trace_iterator *iter = m->private;
4662 struct trace_array *tr = iter->tr;
4663 unsigned long trace_flags = tr->trace_flags;
4665 if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4668 if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4669 /* print nothing if the buffers are empty */
4670 if (trace_empty(iter))
4672 print_trace_header(m, iter);
4673 if (!(trace_flags & TRACE_ITER_VERBOSE))
4674 print_lat_help_header(m);
4676 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4677 if (trace_flags & TRACE_ITER_IRQ_INFO)
4678 print_func_help_header_irq(iter->array_buffer,
4681 print_func_help_header(iter->array_buffer, m,
4687 static void test_ftrace_alive(struct seq_file *m)
4689 if (!ftrace_is_dead())
4691 seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4692 "# MAY BE MISSING FUNCTION EVENTS\n");
4695 #ifdef CONFIG_TRACER_MAX_TRACE
4696 static void show_snapshot_main_help(struct seq_file *m)
4698 seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4699 "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4700 "# Takes a snapshot of the main buffer.\n"
4701 "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4702 "# (Doesn't have to be '2' works with any number that\n"
4703 "# is not a '0' or '1')\n");
4706 static void show_snapshot_percpu_help(struct seq_file *m)
4708 seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4709 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4710 seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4711 "# Takes a snapshot of the main buffer for this cpu.\n");
4713 seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4714 "# Must use main snapshot file to allocate.\n");
4716 seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4717 "# (Doesn't have to be '2' works with any number that\n"
4718 "# is not a '0' or '1')\n");
4721 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4723 if (iter->tr->allocated_snapshot)
4724 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4726 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4728 seq_puts(m, "# Snapshot commands:\n");
4729 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4730 show_snapshot_main_help(m);
4732 show_snapshot_percpu_help(m);
4735 /* Should never be called */
4736 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4739 static int s_show(struct seq_file *m, void *v)
4741 struct trace_iterator *iter = v;
4744 if (iter->ent == NULL) {
4746 seq_printf(m, "# tracer: %s\n", iter->trace->name);
4748 test_ftrace_alive(m);
4750 if (iter->snapshot && trace_empty(iter))
4751 print_snapshot_help(m, iter);
4752 else if (iter->trace && iter->trace->print_header)
4753 iter->trace->print_header(m);
4755 trace_default_header(m);
4757 } else if (iter->leftover) {
4759 * If we filled the seq_file buffer earlier, we
4760 * want to just show it now.
4762 ret = trace_print_seq(m, &iter->seq);
4764 /* ret should this time be zero, but you never know */
4765 iter->leftover = ret;
4768 print_trace_line(iter);
4769 ret = trace_print_seq(m, &iter->seq);
4771 * If we overflow the seq_file buffer, then it will
4772 * ask us for this data again at start up.
4774 * ret is 0 if seq_file write succeeded.
4777 iter->leftover = ret;
4784 * Should be used after trace_array_get(), trace_types_lock
4785 * ensures that i_cdev was already initialized.
4787 static inline int tracing_get_cpu(struct inode *inode)
4789 if (inode->i_cdev) /* See trace_create_cpu_file() */
4790 return (long)inode->i_cdev - 1;
4791 return RING_BUFFER_ALL_CPUS;
4794 static const struct seq_operations tracer_seq_ops = {
4801 static struct trace_iterator *
4802 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4804 struct trace_array *tr = inode->i_private;
4805 struct trace_iterator *iter;
4808 if (tracing_disabled)
4809 return ERR_PTR(-ENODEV);
4811 iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4813 return ERR_PTR(-ENOMEM);
4815 iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4817 if (!iter->buffer_iter)
4821 * trace_find_next_entry() may need to save off iter->ent.
4822 * It will place it into the iter->temp buffer. As most
4823 * events are less than 128, allocate a buffer of that size.
4824 * If one is greater, then trace_find_next_entry() will
4825 * allocate a new buffer to adjust for the bigger iter->ent.
4826 * It's not critical if it fails to get allocated here.
4828 iter->temp = kmalloc(128, GFP_KERNEL);
4830 iter->temp_size = 128;
4833 * trace_event_printf() may need to modify given format
4834 * string to replace %p with %px so that it shows real address
4835 * instead of hash value. However, that is only for the event
4836 * tracing, other tracer may not need. Defer the allocation
4837 * until it is needed.
4843 * We make a copy of the current tracer to avoid concurrent
4844 * changes on it while we are reading.
4846 mutex_lock(&trace_types_lock);
4847 iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4851 *iter->trace = *tr->current_trace;
4853 if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4858 #ifdef CONFIG_TRACER_MAX_TRACE
4859 /* Currently only the top directory has a snapshot */
4860 if (tr->current_trace->print_max || snapshot)
4861 iter->array_buffer = &tr->max_buffer;
4864 iter->array_buffer = &tr->array_buffer;
4865 iter->snapshot = snapshot;
4867 iter->cpu_file = tracing_get_cpu(inode);
4868 mutex_init(&iter->mutex);
4870 /* Notify the tracer early; before we stop tracing. */
4871 if (iter->trace->open)
4872 iter->trace->open(iter);
4874 /* Annotate start of buffers if we had overruns */
4875 if (ring_buffer_overruns(iter->array_buffer->buffer))
4876 iter->iter_flags |= TRACE_FILE_ANNOTATE;
4878 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4879 if (trace_clocks[tr->clock_id].in_ns)
4880 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4883 * If pause-on-trace is enabled, then stop the trace while
4884 * dumping, unless this is the "snapshot" file
4886 if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4887 tracing_stop_tr(tr);
4889 if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4890 for_each_tracing_cpu(cpu) {
4891 iter->buffer_iter[cpu] =
4892 ring_buffer_read_prepare(iter->array_buffer->buffer,
4895 ring_buffer_read_prepare_sync();
4896 for_each_tracing_cpu(cpu) {
4897 ring_buffer_read_start(iter->buffer_iter[cpu]);
4898 tracing_iter_reset(iter, cpu);
4901 cpu = iter->cpu_file;
4902 iter->buffer_iter[cpu] =
4903 ring_buffer_read_prepare(iter->array_buffer->buffer,
4905 ring_buffer_read_prepare_sync();
4906 ring_buffer_read_start(iter->buffer_iter[cpu]);
4907 tracing_iter_reset(iter, cpu);
4910 mutex_unlock(&trace_types_lock);
4915 mutex_unlock(&trace_types_lock);
4918 kfree(iter->buffer_iter);
4920 seq_release_private(inode, file);
4921 return ERR_PTR(-ENOMEM);
4924 int tracing_open_generic(struct inode *inode, struct file *filp)
4928 ret = tracing_check_open_get_tr(NULL);
4932 filp->private_data = inode->i_private;
4936 bool tracing_is_disabled(void)
4938 return (tracing_disabled) ? true: false;
4942 * Open and update trace_array ref count.
4943 * Must have the current trace_array passed to it.
4945 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4947 struct trace_array *tr = inode->i_private;
4950 ret = tracing_check_open_get_tr(tr);
4954 filp->private_data = inode->i_private;
4959 static int tracing_mark_open(struct inode *inode, struct file *filp)
4961 stream_open(inode, filp);
4962 return tracing_open_generic_tr(inode, filp);
4965 static int tracing_release(struct inode *inode, struct file *file)
4967 struct trace_array *tr = inode->i_private;
4968 struct seq_file *m = file->private_data;
4969 struct trace_iterator *iter;
4972 if (!(file->f_mode & FMODE_READ)) {
4973 trace_array_put(tr);
4977 /* Writes do not use seq_file */
4979 mutex_lock(&trace_types_lock);
4981 for_each_tracing_cpu(cpu) {
4982 if (iter->buffer_iter[cpu])
4983 ring_buffer_read_finish(iter->buffer_iter[cpu]);
4986 if (iter->trace && iter->trace->close)
4987 iter->trace->close(iter);
4989 if (!iter->snapshot && tr->stop_count)
4990 /* reenable tracing if it was previously enabled */
4991 tracing_start_tr(tr);
4993 __trace_array_put(tr);
4995 mutex_unlock(&trace_types_lock);
4997 mutex_destroy(&iter->mutex);
4998 free_cpumask_var(iter->started);
5002 kfree(iter->buffer_iter);
5003 seq_release_private(inode, file);
5008 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
5010 struct trace_array *tr = inode->i_private;
5012 trace_array_put(tr);
5016 static int tracing_single_release_tr(struct inode *inode, struct file *file)
5018 struct trace_array *tr = inode->i_private;
5020 trace_array_put(tr);
5022 return single_release(inode, file);
5025 static int tracing_open(struct inode *inode, struct file *file)
5027 struct trace_array *tr = inode->i_private;
5028 struct trace_iterator *iter;
5031 ret = tracing_check_open_get_tr(tr);
5035 /* If this file was open for write, then erase contents */
5036 if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
5037 int cpu = tracing_get_cpu(inode);
5038 struct array_buffer *trace_buf = &tr->array_buffer;
5040 #ifdef CONFIG_TRACER_MAX_TRACE
5041 if (tr->current_trace->print_max)
5042 trace_buf = &tr->max_buffer;
5045 if (cpu == RING_BUFFER_ALL_CPUS)
5046 tracing_reset_online_cpus(trace_buf);
5048 tracing_reset_cpu(trace_buf, cpu);
5051 if (file->f_mode & FMODE_READ) {
5052 iter = __tracing_open(inode, file, false);
5054 ret = PTR_ERR(iter);
5055 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5056 iter->iter_flags |= TRACE_FILE_LAT_FMT;
5060 trace_array_put(tr);
5066 * Some tracers are not suitable for instance buffers.
5067 * A tracer is always available for the global array (toplevel)
5068 * or if it explicitly states that it is.
5071 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
5073 return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
5076 /* Find the next tracer that this trace array may use */
5077 static struct tracer *
5078 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
5080 while (t && !trace_ok_for_array(t, tr))
5087 t_next(struct seq_file *m, void *v, loff_t *pos)
5089 struct trace_array *tr = m->private;
5090 struct tracer *t = v;
5095 t = get_tracer_for_array(tr, t->next);
5100 static void *t_start(struct seq_file *m, loff_t *pos)
5102 struct trace_array *tr = m->private;
5106 mutex_lock(&trace_types_lock);
5108 t = get_tracer_for_array(tr, trace_types);
5109 for (; t && l < *pos; t = t_next(m, t, &l))
5115 static void t_stop(struct seq_file *m, void *p)
5117 mutex_unlock(&trace_types_lock);
5120 static int t_show(struct seq_file *m, void *v)
5122 struct tracer *t = v;
5127 seq_puts(m, t->name);
5136 static const struct seq_operations show_traces_seq_ops = {
5143 static int show_traces_open(struct inode *inode, struct file *file)
5145 struct trace_array *tr = inode->i_private;
5149 ret = tracing_check_open_get_tr(tr);
5153 ret = seq_open(file, &show_traces_seq_ops);
5155 trace_array_put(tr);
5159 m = file->private_data;
5165 static int show_traces_release(struct inode *inode, struct file *file)
5167 struct trace_array *tr = inode->i_private;
5169 trace_array_put(tr);
5170 return seq_release(inode, file);
5174 tracing_write_stub(struct file *filp, const char __user *ubuf,
5175 size_t count, loff_t *ppos)
5180 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
5184 if (file->f_mode & FMODE_READ)
5185 ret = seq_lseek(file, offset, whence);
5187 file->f_pos = ret = 0;
5192 static const struct file_operations tracing_fops = {
5193 .open = tracing_open,
5195 .read_iter = seq_read_iter,
5196 .splice_read = generic_file_splice_read,
5197 .write = tracing_write_stub,
5198 .llseek = tracing_lseek,
5199 .release = tracing_release,
5202 static const struct file_operations show_traces_fops = {
5203 .open = show_traces_open,
5205 .llseek = seq_lseek,
5206 .release = show_traces_release,
5210 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5211 size_t count, loff_t *ppos)
5213 struct trace_array *tr = file_inode(filp)->i_private;
5217 len = snprintf(NULL, 0, "%*pb\n",
5218 cpumask_pr_args(tr->tracing_cpumask)) + 1;
5219 mask_str = kmalloc(len, GFP_KERNEL);
5223 len = snprintf(mask_str, len, "%*pb\n",
5224 cpumask_pr_args(tr->tracing_cpumask));
5229 count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5237 int tracing_set_cpumask(struct trace_array *tr,
5238 cpumask_var_t tracing_cpumask_new)
5245 local_irq_disable();
5246 arch_spin_lock(&tr->max_lock);
5247 for_each_tracing_cpu(cpu) {
5249 * Increase/decrease the disabled counter if we are
5250 * about to flip a bit in the cpumask:
5252 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5253 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5254 atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5255 ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5257 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5258 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5259 atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5260 ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5263 arch_spin_unlock(&tr->max_lock);
5266 cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5272 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5273 size_t count, loff_t *ppos)
5275 struct trace_array *tr = file_inode(filp)->i_private;
5276 cpumask_var_t tracing_cpumask_new;
5279 if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5282 err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5286 err = tracing_set_cpumask(tr, tracing_cpumask_new);
5290 free_cpumask_var(tracing_cpumask_new);
5295 free_cpumask_var(tracing_cpumask_new);
5300 static const struct file_operations tracing_cpumask_fops = {
5301 .open = tracing_open_generic_tr,
5302 .read = tracing_cpumask_read,
5303 .write = tracing_cpumask_write,
5304 .release = tracing_release_generic_tr,
5305 .llseek = generic_file_llseek,
5308 static int tracing_trace_options_show(struct seq_file *m, void *v)
5310 struct tracer_opt *trace_opts;
5311 struct trace_array *tr = m->private;
5315 mutex_lock(&trace_types_lock);
5316 tracer_flags = tr->current_trace->flags->val;
5317 trace_opts = tr->current_trace->flags->opts;
5319 for (i = 0; trace_options[i]; i++) {
5320 if (tr->trace_flags & (1 << i))
5321 seq_printf(m, "%s\n", trace_options[i]);
5323 seq_printf(m, "no%s\n", trace_options[i]);
5326 for (i = 0; trace_opts[i].name; i++) {
5327 if (tracer_flags & trace_opts[i].bit)
5328 seq_printf(m, "%s\n", trace_opts[i].name);
5330 seq_printf(m, "no%s\n", trace_opts[i].name);
5332 mutex_unlock(&trace_types_lock);
5337 static int __set_tracer_option(struct trace_array *tr,
5338 struct tracer_flags *tracer_flags,
5339 struct tracer_opt *opts, int neg)
5341 struct tracer *trace = tracer_flags->trace;
5344 ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5349 tracer_flags->val &= ~opts->bit;
5351 tracer_flags->val |= opts->bit;
5355 /* Try to assign a tracer specific option */
5356 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5358 struct tracer *trace = tr->current_trace;
5359 struct tracer_flags *tracer_flags = trace->flags;
5360 struct tracer_opt *opts = NULL;
5363 for (i = 0; tracer_flags->opts[i].name; i++) {
5364 opts = &tracer_flags->opts[i];
5366 if (strcmp(cmp, opts->name) == 0)
5367 return __set_tracer_option(tr, trace->flags, opts, neg);
5373 /* Some tracers require overwrite to stay enabled */
5374 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5376 if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5382 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5386 if ((mask == TRACE_ITER_RECORD_TGID) ||
5387 (mask == TRACE_ITER_RECORD_CMD))
5388 lockdep_assert_held(&event_mutex);
5390 /* do nothing if flag is already set */
5391 if (!!(tr->trace_flags & mask) == !!enabled)
5394 /* Give the tracer a chance to approve the change */
5395 if (tr->current_trace->flag_changed)
5396 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5400 tr->trace_flags |= mask;
5402 tr->trace_flags &= ~mask;
5404 if (mask == TRACE_ITER_RECORD_CMD)
5405 trace_event_enable_cmd_record(enabled);
5407 if (mask == TRACE_ITER_RECORD_TGID) {
5409 tgid_map_max = pid_max;
5410 map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map),
5414 * Pairs with smp_load_acquire() in
5415 * trace_find_tgid_ptr() to ensure that if it observes
5416 * the tgid_map we just allocated then it also observes
5417 * the corresponding tgid_map_max value.
5419 smp_store_release(&tgid_map, map);
5422 tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5426 trace_event_enable_tgid_record(enabled);
5429 if (mask == TRACE_ITER_EVENT_FORK)
5430 trace_event_follow_fork(tr, enabled);
5432 if (mask == TRACE_ITER_FUNC_FORK)
5433 ftrace_pid_follow_fork(tr, enabled);
5435 if (mask == TRACE_ITER_OVERWRITE) {
5436 ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5437 #ifdef CONFIG_TRACER_MAX_TRACE
5438 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5442 if (mask == TRACE_ITER_PRINTK) {
5443 trace_printk_start_stop_comm(enabled);
5444 trace_printk_control(enabled);
5450 int trace_set_options(struct trace_array *tr, char *option)
5455 size_t orig_len = strlen(option);
5458 cmp = strstrip(option);
5460 len = str_has_prefix(cmp, "no");
5466 mutex_lock(&event_mutex);
5467 mutex_lock(&trace_types_lock);
5469 ret = match_string(trace_options, -1, cmp);
5470 /* If no option could be set, test the specific tracer options */
5472 ret = set_tracer_option(tr, cmp, neg);
5474 ret = set_tracer_flag(tr, 1 << ret, !neg);
5476 mutex_unlock(&trace_types_lock);
5477 mutex_unlock(&event_mutex);
5480 * If the first trailing whitespace is replaced with '\0' by strstrip,
5481 * turn it back into a space.
5483 if (orig_len > strlen(option))
5484 option[strlen(option)] = ' ';
5489 static void __init apply_trace_boot_options(void)
5491 char *buf = trace_boot_options_buf;
5495 option = strsep(&buf, ",");
5501 trace_set_options(&global_trace, option);
5503 /* Put back the comma to allow this to be called again */
5510 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5511 size_t cnt, loff_t *ppos)
5513 struct seq_file *m = filp->private_data;
5514 struct trace_array *tr = m->private;
5518 if (cnt >= sizeof(buf))
5521 if (copy_from_user(buf, ubuf, cnt))
5526 ret = trace_set_options(tr, buf);
5535 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5537 struct trace_array *tr = inode->i_private;
5540 ret = tracing_check_open_get_tr(tr);
5544 ret = single_open(file, tracing_trace_options_show, inode->i_private);
5546 trace_array_put(tr);
5551 static const struct file_operations tracing_iter_fops = {
5552 .open = tracing_trace_options_open,
5554 .llseek = seq_lseek,
5555 .release = tracing_single_release_tr,
5556 .write = tracing_trace_options_write,
5559 static const char readme_msg[] =
5560 "tracing mini-HOWTO:\n\n"
5561 "# echo 0 > tracing_on : quick way to disable tracing\n"
5562 "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5563 " Important files:\n"
5564 " trace\t\t\t- The static contents of the buffer\n"
5565 "\t\t\t To clear the buffer write into this file: echo > trace\n"
5566 " trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5567 " current_tracer\t- function and latency tracers\n"
5568 " available_tracers\t- list of configured tracers for current_tracer\n"
5569 " error_log\t- error log for failed commands (that support it)\n"
5570 " buffer_size_kb\t- view and modify size of per cpu buffer\n"
5571 " buffer_total_size_kb - view total size of all cpu buffers\n\n"
5572 " trace_clock\t\t- change the clock used to order events\n"
5573 " local: Per cpu clock but may not be synced across CPUs\n"
5574 " global: Synced across CPUs but slows tracing down.\n"
5575 " counter: Not a clock, but just an increment\n"
5576 " uptime: Jiffy counter from time of boot\n"
5577 " perf: Same clock that perf events use\n"
5578 #ifdef CONFIG_X86_64
5579 " x86-tsc: TSC cycle counter\n"
5581 "\n timestamp_mode\t- view the mode used to timestamp events\n"
5582 " delta: Delta difference against a buffer-wide timestamp\n"
5583 " absolute: Absolute (standalone) timestamp\n"
5584 "\n trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5585 "\n trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5586 " tracing_cpumask\t- Limit which CPUs to trace\n"
5587 " instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5588 "\t\t\t Remove sub-buffer with rmdir\n"
5589 " trace_options\t\t- Set format or modify how tracing happens\n"
5590 "\t\t\t Disable an option by prefixing 'no' to the\n"
5591 "\t\t\t option name\n"
5592 " saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5593 #ifdef CONFIG_DYNAMIC_FTRACE
5594 "\n available_filter_functions - list of functions that can be filtered on\n"
5595 " set_ftrace_filter\t- echo function name in here to only trace these\n"
5596 "\t\t\t functions\n"
5597 "\t accepts: func_full_name or glob-matching-pattern\n"
5598 "\t modules: Can select a group via module\n"
5599 "\t Format: :mod:<module-name>\n"
5600 "\t example: echo :mod:ext3 > set_ftrace_filter\n"
5601 "\t triggers: a command to perform when function is hit\n"
5602 "\t Format: <function>:<trigger>[:count]\n"
5603 "\t trigger: traceon, traceoff\n"
5604 "\t\t enable_event:<system>:<event>\n"
5605 "\t\t disable_event:<system>:<event>\n"
5606 #ifdef CONFIG_STACKTRACE
5609 #ifdef CONFIG_TRACER_SNAPSHOT
5614 "\t example: echo do_fault:traceoff > set_ftrace_filter\n"
5615 "\t echo do_trap:traceoff:3 > set_ftrace_filter\n"
5616 "\t The first one will disable tracing every time do_fault is hit\n"
5617 "\t The second will disable tracing at most 3 times when do_trap is hit\n"
5618 "\t The first time do trap is hit and it disables tracing, the\n"
5619 "\t counter will decrement to 2. If tracing is already disabled,\n"
5620 "\t the counter will not decrement. It only decrements when the\n"
5621 "\t trigger did work\n"
5622 "\t To remove trigger without count:\n"
5623 "\t echo '!<function>:<trigger> > set_ftrace_filter\n"
5624 "\t To remove trigger with a count:\n"
5625 "\t echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5626 " set_ftrace_notrace\t- echo function name in here to never trace.\n"
5627 "\t accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5628 "\t modules: Can select a group via module command :mod:\n"
5629 "\t Does not accept triggers\n"
5630 #endif /* CONFIG_DYNAMIC_FTRACE */
5631 #ifdef CONFIG_FUNCTION_TRACER
5632 " set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5634 " set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5637 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5638 " set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5639 " set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5640 " max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5642 #ifdef CONFIG_TRACER_SNAPSHOT
5643 "\n snapshot\t\t- Like 'trace' but shows the content of the static\n"
5644 "\t\t\t snapshot buffer. Read the contents for more\n"
5645 "\t\t\t information\n"
5647 #ifdef CONFIG_STACK_TRACER
5648 " stack_trace\t\t- Shows the max stack trace when active\n"
5649 " stack_max_size\t- Shows current max stack size that was traced\n"
5650 "\t\t\t Write into this file to reset the max size (trigger a\n"
5651 "\t\t\t new trace)\n"
5652 #ifdef CONFIG_DYNAMIC_FTRACE
5653 " stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5656 #endif /* CONFIG_STACK_TRACER */
5657 #ifdef CONFIG_DYNAMIC_EVENTS
5658 " dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5659 "\t\t\t Write into this file to define/undefine new trace events.\n"
5661 #ifdef CONFIG_KPROBE_EVENTS
5662 " kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5663 "\t\t\t Write into this file to define/undefine new trace events.\n"
5665 #ifdef CONFIG_UPROBE_EVENTS
5666 " uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5667 "\t\t\t Write into this file to define/undefine new trace events.\n"
5669 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5670 "\t accepts: event-definitions (one definition per line)\n"
5671 "\t Format: p[:[<group>/][<event>]] <place> [<args>]\n"
5672 "\t r[maxactive][:[<group>/][<event>]] <place> [<args>]\n"
5673 #ifdef CONFIG_HIST_TRIGGERS
5674 "\t s:[synthetic/]<event> <field> [<field>]\n"
5676 "\t e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>] [if <filter>]\n"
5677 "\t -:[<group>/][<event>]\n"
5678 #ifdef CONFIG_KPROBE_EVENTS
5679 "\t place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5680 "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5682 #ifdef CONFIG_UPROBE_EVENTS
5683 " place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5685 "\t args: <name>=fetcharg[:type]\n"
5686 "\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
5687 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5688 "\t $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5690 "\t $stack<index>, $stack, $retval, $comm,\n"
5692 "\t +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5693 "\t type: s8/16/32/64, u8/16/32/64, x8/16/32/64, char, string, symbol,\n"
5694 "\t b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5695 "\t symstr, <type>\\[<array-size>\\]\n"
5696 #ifdef CONFIG_HIST_TRIGGERS
5697 "\t field: <stype> <name>;\n"
5698 "\t stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5699 "\t [unsigned] char/int/long\n"
5701 "\t efield: For event probes ('e' types), the field is on of the fields\n"
5702 "\t of the <attached-group>/<attached-event>.\n"
5704 " events/\t\t- Directory containing all trace event subsystems:\n"
5705 " enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5706 " events/<system>/\t- Directory containing all trace events for <system>:\n"
5707 " enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5709 " filter\t\t- If set, only events passing filter are traced\n"
5710 " events/<system>/<event>/\t- Directory containing control files for\n"
5712 " enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5713 " filter\t\t- If set, only events passing filter are traced\n"
5714 " trigger\t\t- If set, a command to perform when event is hit\n"
5715 "\t Format: <trigger>[:count][if <filter>]\n"
5716 "\t trigger: traceon, traceoff\n"
5717 "\t enable_event:<system>:<event>\n"
5718 "\t disable_event:<system>:<event>\n"
5719 #ifdef CONFIG_HIST_TRIGGERS
5720 "\t enable_hist:<system>:<event>\n"
5721 "\t disable_hist:<system>:<event>\n"
5723 #ifdef CONFIG_STACKTRACE
5726 #ifdef CONFIG_TRACER_SNAPSHOT
5729 #ifdef CONFIG_HIST_TRIGGERS
5730 "\t\t hist (see below)\n"
5732 "\t example: echo traceoff > events/block/block_unplug/trigger\n"
5733 "\t echo traceoff:3 > events/block/block_unplug/trigger\n"
5734 "\t echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5735 "\t events/block/block_unplug/trigger\n"
5736 "\t The first disables tracing every time block_unplug is hit.\n"
5737 "\t The second disables tracing the first 3 times block_unplug is hit.\n"
5738 "\t The third enables the kmalloc event the first 3 times block_unplug\n"
5739 "\t is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5740 "\t Like function triggers, the counter is only decremented if it\n"
5741 "\t enabled or disabled tracing.\n"
5742 "\t To remove a trigger without a count:\n"
5743 "\t echo '!<trigger> > <system>/<event>/trigger\n"
5744 "\t To remove a trigger with a count:\n"
5745 "\t echo '!<trigger>:0 > <system>/<event>/trigger\n"
5746 "\t Filters can be ignored when removing a trigger.\n"
5747 #ifdef CONFIG_HIST_TRIGGERS
5748 " hist trigger\t- If set, event hits are aggregated into a hash table\n"
5749 "\t Format: hist:keys=<field1[,field2,...]>\n"
5750 "\t [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n"
5751 "\t [:values=<field1[,field2,...]>]\n"
5752 "\t [:sort=<field1[,field2,...]>]\n"
5753 "\t [:size=#entries]\n"
5754 "\t [:pause][:continue][:clear]\n"
5755 "\t [:name=histname1]\n"
5756 "\t [:nohitcount]\n"
5757 "\t [:<handler>.<action>]\n"
5758 "\t [if <filter>]\n\n"
5759 "\t Note, special fields can be used as well:\n"
5760 "\t common_timestamp - to record current timestamp\n"
5761 "\t common_cpu - to record the CPU the event happened on\n"
5763 "\t A hist trigger variable can be:\n"
5764 "\t - a reference to a field e.g. x=current_timestamp,\n"
5765 "\t - a reference to another variable e.g. y=$x,\n"
5766 "\t - a numeric literal: e.g. ms_per_sec=1000,\n"
5767 "\t - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n"
5769 "\t hist trigger arithmetic expressions support addition(+), subtraction(-),\n"
5770 "\t multiplication(*) and division(/) operators. An operand can be either a\n"
5771 "\t variable reference, field or numeric literal.\n"
5773 "\t When a matching event is hit, an entry is added to a hash\n"
5774 "\t table using the key(s) and value(s) named, and the value of a\n"
5775 "\t sum called 'hitcount' is incremented. Keys and values\n"
5776 "\t correspond to fields in the event's format description. Keys\n"
5777 "\t can be any field, or the special string 'common_stacktrace'.\n"
5778 "\t Compound keys consisting of up to two fields can be specified\n"
5779 "\t by the 'keys' keyword. Values must correspond to numeric\n"
5780 "\t fields. Sort keys consisting of up to two fields can be\n"
5781 "\t specified using the 'sort' keyword. The sort direction can\n"
5782 "\t be modified by appending '.descending' or '.ascending' to a\n"
5783 "\t sort field. The 'size' parameter can be used to specify more\n"
5784 "\t or fewer than the default 2048 entries for the hashtable size.\n"
5785 "\t If a hist trigger is given a name using the 'name' parameter,\n"
5786 "\t its histogram data will be shared with other triggers of the\n"
5787 "\t same name, and trigger hits will update this common data.\n\n"
5788 "\t Reading the 'hist' file for the event will dump the hash\n"
5789 "\t table in its entirety to stdout. If there are multiple hist\n"
5790 "\t triggers attached to an event, there will be a table for each\n"
5791 "\t trigger in the output. The table displayed for a named\n"
5792 "\t trigger will be the same as any other instance having the\n"
5793 "\t same name. The default format used to display a given field\n"
5794 "\t can be modified by appending any of the following modifiers\n"
5795 "\t to the field name, as applicable:\n\n"
5796 "\t .hex display a number as a hex value\n"
5797 "\t .sym display an address as a symbol\n"
5798 "\t .sym-offset display an address as a symbol and offset\n"
5799 "\t .execname display a common_pid as a program name\n"
5800 "\t .syscall display a syscall id as a syscall name\n"
5801 "\t .log2 display log2 value rather than raw number\n"
5802 "\t .buckets=size display values in groups of size rather than raw number\n"
5803 "\t .usecs display a common_timestamp in microseconds\n"
5804 "\t .percent display a number of percentage value\n"
5805 "\t .graph display a bar-graph of a value\n\n"
5806 "\t The 'pause' parameter can be used to pause an existing hist\n"
5807 "\t trigger or to start a hist trigger but not log any events\n"
5808 "\t until told to do so. 'continue' can be used to start or\n"
5809 "\t restart a paused hist trigger.\n\n"
5810 "\t The 'clear' parameter will clear the contents of a running\n"
5811 "\t hist trigger and leave its current paused/active state\n"
5813 "\t The 'nohitcount' (or NOHC) parameter will suppress display of\n"
5814 "\t raw hitcount in the histogram.\n\n"
5815 "\t The enable_hist and disable_hist triggers can be used to\n"
5816 "\t have one event conditionally start and stop another event's\n"
5817 "\t already-attached hist trigger. The syntax is analogous to\n"
5818 "\t the enable_event and disable_event triggers.\n\n"
5819 "\t Hist trigger handlers and actions are executed whenever a\n"
5820 "\t a histogram entry is added or updated. They take the form:\n\n"
5821 "\t <handler>.<action>\n\n"
5822 "\t The available handlers are:\n\n"
5823 "\t onmatch(matching.event) - invoke on addition or update\n"
5824 "\t onmax(var) - invoke if var exceeds current max\n"
5825 "\t onchange(var) - invoke action if var changes\n\n"
5826 "\t The available actions are:\n\n"
5827 "\t trace(<synthetic_event>,param list) - generate synthetic event\n"
5828 "\t save(field,...) - save current event fields\n"
5829 #ifdef CONFIG_TRACER_SNAPSHOT
5830 "\t snapshot() - snapshot the trace buffer\n\n"
5832 #ifdef CONFIG_SYNTH_EVENTS
5833 " events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5834 "\t Write into this file to define/undefine new synthetic events.\n"
5835 "\t example: echo 'myevent u64 lat; char name[]; long[] stack' >> synthetic_events\n"
5841 tracing_readme_read(struct file *filp, char __user *ubuf,
5842 size_t cnt, loff_t *ppos)
5844 return simple_read_from_buffer(ubuf, cnt, ppos,
5845 readme_msg, strlen(readme_msg));
5848 static const struct file_operations tracing_readme_fops = {
5849 .open = tracing_open_generic,
5850 .read = tracing_readme_read,
5851 .llseek = generic_file_llseek,
5854 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5858 return trace_find_tgid_ptr(pid);
5861 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5865 return trace_find_tgid_ptr(pid);
5868 static void saved_tgids_stop(struct seq_file *m, void *v)
5872 static int saved_tgids_show(struct seq_file *m, void *v)
5874 int *entry = (int *)v;
5875 int pid = entry - tgid_map;
5881 seq_printf(m, "%d %d\n", pid, tgid);
5885 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5886 .start = saved_tgids_start,
5887 .stop = saved_tgids_stop,
5888 .next = saved_tgids_next,
5889 .show = saved_tgids_show,
5892 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5896 ret = tracing_check_open_get_tr(NULL);
5900 return seq_open(filp, &tracing_saved_tgids_seq_ops);
5904 static const struct file_operations tracing_saved_tgids_fops = {
5905 .open = tracing_saved_tgids_open,
5907 .llseek = seq_lseek,
5908 .release = seq_release,
5911 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5913 unsigned int *ptr = v;
5915 if (*pos || m->count)
5920 for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5922 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5931 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5937 arch_spin_lock(&trace_cmdline_lock);
5939 v = &savedcmd->map_cmdline_to_pid[0];
5941 v = saved_cmdlines_next(m, v, &l);
5949 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5951 arch_spin_unlock(&trace_cmdline_lock);
5955 static int saved_cmdlines_show(struct seq_file *m, void *v)
5957 char buf[TASK_COMM_LEN];
5958 unsigned int *pid = v;
5960 __trace_find_cmdline(*pid, buf);
5961 seq_printf(m, "%d %s\n", *pid, buf);
5965 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5966 .start = saved_cmdlines_start,
5967 .next = saved_cmdlines_next,
5968 .stop = saved_cmdlines_stop,
5969 .show = saved_cmdlines_show,
5972 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5976 ret = tracing_check_open_get_tr(NULL);
5980 return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5983 static const struct file_operations tracing_saved_cmdlines_fops = {
5984 .open = tracing_saved_cmdlines_open,
5986 .llseek = seq_lseek,
5987 .release = seq_release,
5991 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5992 size_t cnt, loff_t *ppos)
5998 arch_spin_lock(&trace_cmdline_lock);
5999 r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
6000 arch_spin_unlock(&trace_cmdline_lock);
6003 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6006 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
6008 kfree(s->saved_cmdlines);
6009 kfree(s->map_cmdline_to_pid);
6013 static int tracing_resize_saved_cmdlines(unsigned int val)
6015 struct saved_cmdlines_buffer *s, *savedcmd_temp;
6017 s = kmalloc(sizeof(*s), GFP_KERNEL);
6021 if (allocate_cmdlines_buffer(val, s) < 0) {
6027 arch_spin_lock(&trace_cmdline_lock);
6028 savedcmd_temp = savedcmd;
6030 arch_spin_unlock(&trace_cmdline_lock);
6032 free_saved_cmdlines_buffer(savedcmd_temp);
6038 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
6039 size_t cnt, loff_t *ppos)
6044 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6048 /* must have at least 1 entry or less than PID_MAX_DEFAULT */
6049 if (!val || val > PID_MAX_DEFAULT)
6052 ret = tracing_resize_saved_cmdlines((unsigned int)val);
6061 static const struct file_operations tracing_saved_cmdlines_size_fops = {
6062 .open = tracing_open_generic,
6063 .read = tracing_saved_cmdlines_size_read,
6064 .write = tracing_saved_cmdlines_size_write,
6067 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
6068 static union trace_eval_map_item *
6069 update_eval_map(union trace_eval_map_item *ptr)
6071 if (!ptr->map.eval_string) {
6072 if (ptr->tail.next) {
6073 ptr = ptr->tail.next;
6074 /* Set ptr to the next real item (skip head) */
6082 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
6084 union trace_eval_map_item *ptr = v;
6087 * Paranoid! If ptr points to end, we don't want to increment past it.
6088 * This really should never happen.
6091 ptr = update_eval_map(ptr);
6092 if (WARN_ON_ONCE(!ptr))
6096 ptr = update_eval_map(ptr);
6101 static void *eval_map_start(struct seq_file *m, loff_t *pos)
6103 union trace_eval_map_item *v;
6106 mutex_lock(&trace_eval_mutex);
6108 v = trace_eval_maps;
6112 while (v && l < *pos) {
6113 v = eval_map_next(m, v, &l);
6119 static void eval_map_stop(struct seq_file *m, void *v)
6121 mutex_unlock(&trace_eval_mutex);
6124 static int eval_map_show(struct seq_file *m, void *v)
6126 union trace_eval_map_item *ptr = v;
6128 seq_printf(m, "%s %ld (%s)\n",
6129 ptr->map.eval_string, ptr->map.eval_value,
6135 static const struct seq_operations tracing_eval_map_seq_ops = {
6136 .start = eval_map_start,
6137 .next = eval_map_next,
6138 .stop = eval_map_stop,
6139 .show = eval_map_show,
6142 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
6146 ret = tracing_check_open_get_tr(NULL);
6150 return seq_open(filp, &tracing_eval_map_seq_ops);
6153 static const struct file_operations tracing_eval_map_fops = {
6154 .open = tracing_eval_map_open,
6156 .llseek = seq_lseek,
6157 .release = seq_release,
6160 static inline union trace_eval_map_item *
6161 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
6163 /* Return tail of array given the head */
6164 return ptr + ptr->head.length + 1;
6168 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
6171 struct trace_eval_map **stop;
6172 struct trace_eval_map **map;
6173 union trace_eval_map_item *map_array;
6174 union trace_eval_map_item *ptr;
6179 * The trace_eval_maps contains the map plus a head and tail item,
6180 * where the head holds the module and length of array, and the
6181 * tail holds a pointer to the next list.
6183 map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
6185 pr_warn("Unable to allocate trace eval mapping\n");
6189 mutex_lock(&trace_eval_mutex);
6191 if (!trace_eval_maps)
6192 trace_eval_maps = map_array;
6194 ptr = trace_eval_maps;
6196 ptr = trace_eval_jmp_to_tail(ptr);
6197 if (!ptr->tail.next)
6199 ptr = ptr->tail.next;
6202 ptr->tail.next = map_array;
6204 map_array->head.mod = mod;
6205 map_array->head.length = len;
6208 for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
6209 map_array->map = **map;
6212 memset(map_array, 0, sizeof(*map_array));
6214 mutex_unlock(&trace_eval_mutex);
6217 static void trace_create_eval_file(struct dentry *d_tracer)
6219 trace_create_file("eval_map", TRACE_MODE_READ, d_tracer,
6220 NULL, &tracing_eval_map_fops);
6223 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
6224 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
6225 static inline void trace_insert_eval_map_file(struct module *mod,
6226 struct trace_eval_map **start, int len) { }
6227 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
6229 static void trace_insert_eval_map(struct module *mod,
6230 struct trace_eval_map **start, int len)
6232 struct trace_eval_map **map;
6239 trace_event_eval_update(map, len);
6241 trace_insert_eval_map_file(mod, start, len);
6245 tracing_set_trace_read(struct file *filp, char __user *ubuf,
6246 size_t cnt, loff_t *ppos)
6248 struct trace_array *tr = filp->private_data;
6249 char buf[MAX_TRACER_SIZE+2];
6252 mutex_lock(&trace_types_lock);
6253 r = sprintf(buf, "%s\n", tr->current_trace->name);
6254 mutex_unlock(&trace_types_lock);
6256 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6259 int tracer_init(struct tracer *t, struct trace_array *tr)
6261 tracing_reset_online_cpus(&tr->array_buffer);
6265 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
6269 for_each_tracing_cpu(cpu)
6270 per_cpu_ptr(buf->data, cpu)->entries = val;
6273 #ifdef CONFIG_TRACER_MAX_TRACE
6274 /* resize @tr's buffer to the size of @size_tr's entries */
6275 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
6276 struct array_buffer *size_buf, int cpu_id)
6280 if (cpu_id == RING_BUFFER_ALL_CPUS) {
6281 for_each_tracing_cpu(cpu) {
6282 ret = ring_buffer_resize(trace_buf->buffer,
6283 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
6286 per_cpu_ptr(trace_buf->data, cpu)->entries =
6287 per_cpu_ptr(size_buf->data, cpu)->entries;
6290 ret = ring_buffer_resize(trace_buf->buffer,
6291 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
6293 per_cpu_ptr(trace_buf->data, cpu_id)->entries =
6294 per_cpu_ptr(size_buf->data, cpu_id)->entries;
6299 #endif /* CONFIG_TRACER_MAX_TRACE */
6301 static int __tracing_resize_ring_buffer(struct trace_array *tr,
6302 unsigned long size, int cpu)
6307 * If kernel or user changes the size of the ring buffer
6308 * we use the size that was given, and we can forget about
6309 * expanding it later.
6311 ring_buffer_expanded = true;
6313 /* May be called before buffers are initialized */
6314 if (!tr->array_buffer.buffer)
6317 ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
6321 #ifdef CONFIG_TRACER_MAX_TRACE
6322 if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
6323 !tr->current_trace->use_max_tr)
6326 ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
6328 int r = resize_buffer_duplicate_size(&tr->array_buffer,
6329 &tr->array_buffer, cpu);
6332 * AARGH! We are left with different
6333 * size max buffer!!!!
6334 * The max buffer is our "snapshot" buffer.
6335 * When a tracer needs a snapshot (one of the
6336 * latency tracers), it swaps the max buffer
6337 * with the saved snap shot. We succeeded to
6338 * update the size of the main buffer, but failed to
6339 * update the size of the max buffer. But when we tried
6340 * to reset the main buffer to the original size, we
6341 * failed there too. This is very unlikely to
6342 * happen, but if it does, warn and kill all
6346 tracing_disabled = 1;
6351 if (cpu == RING_BUFFER_ALL_CPUS)
6352 set_buffer_entries(&tr->max_buffer, size);
6354 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
6357 #endif /* CONFIG_TRACER_MAX_TRACE */
6359 if (cpu == RING_BUFFER_ALL_CPUS)
6360 set_buffer_entries(&tr->array_buffer, size);
6362 per_cpu_ptr(tr->array_buffer.data, cpu)->entries = size;
6367 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6368 unsigned long size, int cpu_id)
6372 mutex_lock(&trace_types_lock);
6374 if (cpu_id != RING_BUFFER_ALL_CPUS) {
6375 /* make sure, this cpu is enabled in the mask */
6376 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
6382 ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
6387 mutex_unlock(&trace_types_lock);
6394 * tracing_update_buffers - used by tracing facility to expand ring buffers
6396 * To save on memory when the tracing is never used on a system with it
6397 * configured in. The ring buffers are set to a minimum size. But once
6398 * a user starts to use the tracing facility, then they need to grow
6399 * to their default size.
6401 * This function is to be called when a tracer is about to be used.
6403 int tracing_update_buffers(void)
6407 mutex_lock(&trace_types_lock);
6408 if (!ring_buffer_expanded)
6409 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
6410 RING_BUFFER_ALL_CPUS);
6411 mutex_unlock(&trace_types_lock);
6416 struct trace_option_dentry;
6419 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6422 * Used to clear out the tracer before deletion of an instance.
6423 * Must have trace_types_lock held.
6425 static void tracing_set_nop(struct trace_array *tr)
6427 if (tr->current_trace == &nop_trace)
6430 tr->current_trace->enabled--;
6432 if (tr->current_trace->reset)
6433 tr->current_trace->reset(tr);
6435 tr->current_trace = &nop_trace;
6438 static bool tracer_options_updated;
6440 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6442 /* Only enable if the directory has been created already. */
6446 /* Only create trace option files after update_tracer_options finish */
6447 if (!tracer_options_updated)
6450 create_trace_option_files(tr, t);
6453 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6456 #ifdef CONFIG_TRACER_MAX_TRACE
6461 mutex_lock(&trace_types_lock);
6463 if (!ring_buffer_expanded) {
6464 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6465 RING_BUFFER_ALL_CPUS);
6471 for (t = trace_types; t; t = t->next) {
6472 if (strcmp(t->name, buf) == 0)
6479 if (t == tr->current_trace)
6482 #ifdef CONFIG_TRACER_SNAPSHOT
6483 if (t->use_max_tr) {
6484 local_irq_disable();
6485 arch_spin_lock(&tr->max_lock);
6486 if (tr->cond_snapshot)
6488 arch_spin_unlock(&tr->max_lock);
6494 /* Some tracers won't work on kernel command line */
6495 if (system_state < SYSTEM_RUNNING && t->noboot) {
6496 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6501 /* Some tracers are only allowed for the top level buffer */
6502 if (!trace_ok_for_array(t, tr)) {
6507 /* If trace pipe files are being read, we can't change the tracer */
6508 if (tr->trace_ref) {
6513 trace_branch_disable();
6515 tr->current_trace->enabled--;
6517 if (tr->current_trace->reset)
6518 tr->current_trace->reset(tr);
6520 #ifdef CONFIG_TRACER_MAX_TRACE
6521 had_max_tr = tr->current_trace->use_max_tr;
6523 /* Current trace needs to be nop_trace before synchronize_rcu */
6524 tr->current_trace = &nop_trace;
6526 if (had_max_tr && !t->use_max_tr) {
6528 * We need to make sure that the update_max_tr sees that
6529 * current_trace changed to nop_trace to keep it from
6530 * swapping the buffers after we resize it.
6531 * The update_max_tr is called from interrupts disabled
6532 * so a synchronized_sched() is sufficient.
6538 if (t->use_max_tr && !tr->allocated_snapshot) {
6539 ret = tracing_alloc_snapshot_instance(tr);
6544 tr->current_trace = &nop_trace;
6548 ret = tracer_init(t, tr);
6553 tr->current_trace = t;
6554 tr->current_trace->enabled++;
6555 trace_branch_enable(tr);
6557 mutex_unlock(&trace_types_lock);
6563 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6564 size_t cnt, loff_t *ppos)
6566 struct trace_array *tr = filp->private_data;
6567 char buf[MAX_TRACER_SIZE+1];
6574 if (cnt > MAX_TRACER_SIZE)
6575 cnt = MAX_TRACER_SIZE;
6577 if (copy_from_user(buf, ubuf, cnt))
6584 err = tracing_set_tracer(tr, name);
6594 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6595 size_t cnt, loff_t *ppos)
6600 r = snprintf(buf, sizeof(buf), "%ld\n",
6601 *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6602 if (r > sizeof(buf))
6604 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6608 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6609 size_t cnt, loff_t *ppos)
6614 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6624 tracing_thresh_read(struct file *filp, char __user *ubuf,
6625 size_t cnt, loff_t *ppos)
6627 return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6631 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6632 size_t cnt, loff_t *ppos)
6634 struct trace_array *tr = filp->private_data;
6637 mutex_lock(&trace_types_lock);
6638 ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6642 if (tr->current_trace->update_thresh) {
6643 ret = tr->current_trace->update_thresh(tr);
6650 mutex_unlock(&trace_types_lock);
6655 #ifdef CONFIG_TRACER_MAX_TRACE
6658 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6659 size_t cnt, loff_t *ppos)
6661 return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
6665 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6666 size_t cnt, loff_t *ppos)
6668 return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
6673 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6675 struct trace_array *tr = inode->i_private;
6676 struct trace_iterator *iter;
6679 ret = tracing_check_open_get_tr(tr);
6683 mutex_lock(&trace_types_lock);
6685 /* create a buffer to store the information to pass to userspace */
6686 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6689 __trace_array_put(tr);
6693 trace_seq_init(&iter->seq);
6694 iter->trace = tr->current_trace;
6696 if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6701 /* trace pipe does not show start of buffer */
6702 cpumask_setall(iter->started);
6704 if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6705 iter->iter_flags |= TRACE_FILE_LAT_FMT;
6707 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6708 if (trace_clocks[tr->clock_id].in_ns)
6709 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6712 iter->array_buffer = &tr->array_buffer;
6713 iter->cpu_file = tracing_get_cpu(inode);
6714 mutex_init(&iter->mutex);
6715 filp->private_data = iter;
6717 if (iter->trace->pipe_open)
6718 iter->trace->pipe_open(iter);
6720 nonseekable_open(inode, filp);
6724 mutex_unlock(&trace_types_lock);
6729 __trace_array_put(tr);
6730 mutex_unlock(&trace_types_lock);
6734 static int tracing_release_pipe(struct inode *inode, struct file *file)
6736 struct trace_iterator *iter = file->private_data;
6737 struct trace_array *tr = inode->i_private;
6739 mutex_lock(&trace_types_lock);
6743 if (iter->trace->pipe_close)
6744 iter->trace->pipe_close(iter);
6746 mutex_unlock(&trace_types_lock);
6748 free_cpumask_var(iter->started);
6750 mutex_destroy(&iter->mutex);
6753 trace_array_put(tr);
6759 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6761 struct trace_array *tr = iter->tr;
6763 /* Iterators are static, they should be filled or empty */
6764 if (trace_buffer_iter(iter, iter->cpu_file))
6765 return EPOLLIN | EPOLLRDNORM;
6767 if (tr->trace_flags & TRACE_ITER_BLOCK)
6769 * Always select as readable when in blocking mode
6771 return EPOLLIN | EPOLLRDNORM;
6773 return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6774 filp, poll_table, iter->tr->buffer_percent);
6778 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6780 struct trace_iterator *iter = filp->private_data;
6782 return trace_poll(iter, filp, poll_table);
6785 /* Must be called with iter->mutex held. */
6786 static int tracing_wait_pipe(struct file *filp)
6788 struct trace_iterator *iter = filp->private_data;
6791 while (trace_empty(iter)) {
6793 if ((filp->f_flags & O_NONBLOCK)) {
6798 * We block until we read something and tracing is disabled.
6799 * We still block if tracing is disabled, but we have never
6800 * read anything. This allows a user to cat this file, and
6801 * then enable tracing. But after we have read something,
6802 * we give an EOF when tracing is again disabled.
6804 * iter->pos will be 0 if we haven't read anything.
6806 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6809 mutex_unlock(&iter->mutex);
6811 ret = wait_on_pipe(iter, 0);
6813 mutex_lock(&iter->mutex);
6826 tracing_read_pipe(struct file *filp, char __user *ubuf,
6827 size_t cnt, loff_t *ppos)
6829 struct trace_iterator *iter = filp->private_data;
6833 * Avoid more than one consumer on a single file descriptor
6834 * This is just a matter of traces coherency, the ring buffer itself
6837 mutex_lock(&iter->mutex);
6839 /* return any leftover data */
6840 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6844 trace_seq_init(&iter->seq);
6846 if (iter->trace->read) {
6847 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6853 sret = tracing_wait_pipe(filp);
6857 /* stop when tracing is finished */
6858 if (trace_empty(iter)) {
6863 if (cnt >= PAGE_SIZE)
6864 cnt = PAGE_SIZE - 1;
6866 /* reset all but tr, trace, and overruns */
6867 trace_iterator_reset(iter);
6868 cpumask_clear(iter->started);
6869 trace_seq_init(&iter->seq);
6871 trace_event_read_lock();
6872 trace_access_lock(iter->cpu_file);
6873 while (trace_find_next_entry_inc(iter) != NULL) {
6874 enum print_line_t ret;
6875 int save_len = iter->seq.seq.len;
6877 ret = print_trace_line(iter);
6878 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6880 * If one print_trace_line() fills entire trace_seq in one shot,
6881 * trace_seq_to_user() will returns -EBUSY because save_len == 0,
6882 * In this case, we need to consume it, otherwise, loop will peek
6883 * this event next time, resulting in an infinite loop.
6885 if (save_len == 0) {
6887 trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
6888 trace_consume(iter);
6892 /* In other cases, don't print partial lines */
6893 iter->seq.seq.len = save_len;
6896 if (ret != TRACE_TYPE_NO_CONSUME)
6897 trace_consume(iter);
6899 if (trace_seq_used(&iter->seq) >= cnt)
6903 * Setting the full flag means we reached the trace_seq buffer
6904 * size and we should leave by partial output condition above.
6905 * One of the trace_seq_* functions is not used properly.
6907 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6910 trace_access_unlock(iter->cpu_file);
6911 trace_event_read_unlock();
6913 /* Now copy what we have to the user */
6914 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6915 if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6916 trace_seq_init(&iter->seq);
6919 * If there was nothing to send to user, in spite of consuming trace
6920 * entries, go back to wait for more entries.
6926 mutex_unlock(&iter->mutex);
6931 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6934 __free_page(spd->pages[idx]);
6938 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6944 /* Seq buffer is page-sized, exactly what we need. */
6946 save_len = iter->seq.seq.len;
6947 ret = print_trace_line(iter);
6949 if (trace_seq_has_overflowed(&iter->seq)) {
6950 iter->seq.seq.len = save_len;
6955 * This should not be hit, because it should only
6956 * be set if the iter->seq overflowed. But check it
6957 * anyway to be safe.
6959 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6960 iter->seq.seq.len = save_len;
6964 count = trace_seq_used(&iter->seq) - save_len;
6967 iter->seq.seq.len = save_len;
6971 if (ret != TRACE_TYPE_NO_CONSUME)
6972 trace_consume(iter);
6974 if (!trace_find_next_entry_inc(iter)) {
6984 static ssize_t tracing_splice_read_pipe(struct file *filp,
6986 struct pipe_inode_info *pipe,
6990 struct page *pages_def[PIPE_DEF_BUFFERS];
6991 struct partial_page partial_def[PIPE_DEF_BUFFERS];
6992 struct trace_iterator *iter = filp->private_data;
6993 struct splice_pipe_desc spd = {
6995 .partial = partial_def,
6996 .nr_pages = 0, /* This gets updated below. */
6997 .nr_pages_max = PIPE_DEF_BUFFERS,
6998 .ops = &default_pipe_buf_ops,
6999 .spd_release = tracing_spd_release_pipe,
7005 if (splice_grow_spd(pipe, &spd))
7008 mutex_lock(&iter->mutex);
7010 if (iter->trace->splice_read) {
7011 ret = iter->trace->splice_read(iter, filp,
7012 ppos, pipe, len, flags);
7017 ret = tracing_wait_pipe(filp);
7021 if (!iter->ent && !trace_find_next_entry_inc(iter)) {
7026 trace_event_read_lock();
7027 trace_access_lock(iter->cpu_file);
7029 /* Fill as many pages as possible. */
7030 for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
7031 spd.pages[i] = alloc_page(GFP_KERNEL);
7035 rem = tracing_fill_pipe_page(rem, iter);
7037 /* Copy the data into the page, so we can start over. */
7038 ret = trace_seq_to_buffer(&iter->seq,
7039 page_address(spd.pages[i]),
7040 trace_seq_used(&iter->seq));
7042 __free_page(spd.pages[i]);
7045 spd.partial[i].offset = 0;
7046 spd.partial[i].len = trace_seq_used(&iter->seq);
7048 trace_seq_init(&iter->seq);
7051 trace_access_unlock(iter->cpu_file);
7052 trace_event_read_unlock();
7053 mutex_unlock(&iter->mutex);
7058 ret = splice_to_pipe(pipe, &spd);
7062 splice_shrink_spd(&spd);
7066 mutex_unlock(&iter->mutex);
7071 tracing_entries_read(struct file *filp, char __user *ubuf,
7072 size_t cnt, loff_t *ppos)
7074 struct inode *inode = file_inode(filp);
7075 struct trace_array *tr = inode->i_private;
7076 int cpu = tracing_get_cpu(inode);
7081 mutex_lock(&trace_types_lock);
7083 if (cpu == RING_BUFFER_ALL_CPUS) {
7084 int cpu, buf_size_same;
7089 /* check if all cpu sizes are same */
7090 for_each_tracing_cpu(cpu) {
7091 /* fill in the size from first enabled cpu */
7093 size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
7094 if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
7100 if (buf_size_same) {
7101 if (!ring_buffer_expanded)
7102 r = sprintf(buf, "%lu (expanded: %lu)\n",
7104 trace_buf_size >> 10);
7106 r = sprintf(buf, "%lu\n", size >> 10);
7108 r = sprintf(buf, "X\n");
7110 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
7112 mutex_unlock(&trace_types_lock);
7114 ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7119 tracing_entries_write(struct file *filp, const char __user *ubuf,
7120 size_t cnt, loff_t *ppos)
7122 struct inode *inode = file_inode(filp);
7123 struct trace_array *tr = inode->i_private;
7127 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7131 /* must have at least 1 entry */
7135 /* value is in KB */
7137 ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
7147 tracing_total_entries_read(struct file *filp, char __user *ubuf,
7148 size_t cnt, loff_t *ppos)
7150 struct trace_array *tr = filp->private_data;
7153 unsigned long size = 0, expanded_size = 0;
7155 mutex_lock(&trace_types_lock);
7156 for_each_tracing_cpu(cpu) {
7157 size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
7158 if (!ring_buffer_expanded)
7159 expanded_size += trace_buf_size >> 10;
7161 if (ring_buffer_expanded)
7162 r = sprintf(buf, "%lu\n", size);
7164 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
7165 mutex_unlock(&trace_types_lock);
7167 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7171 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
7172 size_t cnt, loff_t *ppos)
7175 * There is no need to read what the user has written, this function
7176 * is just to make sure that there is no error when "echo" is used
7185 tracing_free_buffer_release(struct inode *inode, struct file *filp)
7187 struct trace_array *tr = inode->i_private;
7189 /* disable tracing ? */
7190 if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
7191 tracer_tracing_off(tr);
7192 /* resize the ring buffer to 0 */
7193 tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
7195 trace_array_put(tr);
7201 tracing_mark_write(struct file *filp, const char __user *ubuf,
7202 size_t cnt, loff_t *fpos)
7204 struct trace_array *tr = filp->private_data;
7205 struct ring_buffer_event *event;
7206 enum event_trigger_type tt = ETT_NONE;
7207 struct trace_buffer *buffer;
7208 struct print_entry *entry;
7213 /* Used in tracing_mark_raw_write() as well */
7214 #define FAULTED_STR "<faulted>"
7215 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
7217 if (tracing_disabled)
7220 if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7223 if (cnt > TRACE_BUF_SIZE)
7224 cnt = TRACE_BUF_SIZE;
7226 BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7228 size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
7230 /* If less than "<faulted>", then make sure we can still add that */
7231 if (cnt < FAULTED_SIZE)
7232 size += FAULTED_SIZE - cnt;
7234 buffer = tr->array_buffer.buffer;
7235 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
7237 if (unlikely(!event))
7238 /* Ring buffer disabled, return as if not open for write */
7241 entry = ring_buffer_event_data(event);
7242 entry->ip = _THIS_IP_;
7244 len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
7246 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7252 if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7253 /* do not add \n before testing triggers, but add \0 */
7254 entry->buf[cnt] = '\0';
7255 tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7258 if (entry->buf[cnt - 1] != '\n') {
7259 entry->buf[cnt] = '\n';
7260 entry->buf[cnt + 1] = '\0';
7262 entry->buf[cnt] = '\0';
7264 if (static_branch_unlikely(&trace_marker_exports_enabled))
7265 ftrace_exports(event, TRACE_EXPORT_MARKER);
7266 __buffer_unlock_commit(buffer, event);
7269 event_triggers_post_call(tr->trace_marker_file, tt);
7274 /* Limit it for now to 3K (including tag) */
7275 #define RAW_DATA_MAX_SIZE (1024*3)
7278 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7279 size_t cnt, loff_t *fpos)
7281 struct trace_array *tr = filp->private_data;
7282 struct ring_buffer_event *event;
7283 struct trace_buffer *buffer;
7284 struct raw_data_entry *entry;
7289 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7291 if (tracing_disabled)
7294 if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7297 /* The marker must at least have a tag id */
7298 if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
7301 if (cnt > TRACE_BUF_SIZE)
7302 cnt = TRACE_BUF_SIZE;
7304 BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7306 size = sizeof(*entry) + cnt;
7307 if (cnt < FAULT_SIZE_ID)
7308 size += FAULT_SIZE_ID - cnt;
7310 buffer = tr->array_buffer.buffer;
7311 event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7314 /* Ring buffer disabled, return as if not open for write */
7317 entry = ring_buffer_event_data(event);
7319 len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7322 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7327 __buffer_unlock_commit(buffer, event);
7332 static int tracing_clock_show(struct seq_file *m, void *v)
7334 struct trace_array *tr = m->private;
7337 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7339 "%s%s%s%s", i ? " " : "",
7340 i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7341 i == tr->clock_id ? "]" : "");
7347 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7351 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7352 if (strcmp(trace_clocks[i].name, clockstr) == 0)
7355 if (i == ARRAY_SIZE(trace_clocks))
7358 mutex_lock(&trace_types_lock);
7362 ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7365 * New clock may not be consistent with the previous clock.
7366 * Reset the buffer so that it doesn't have incomparable timestamps.
7368 tracing_reset_online_cpus(&tr->array_buffer);
7370 #ifdef CONFIG_TRACER_MAX_TRACE
7371 if (tr->max_buffer.buffer)
7372 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7373 tracing_reset_online_cpus(&tr->max_buffer);
7376 mutex_unlock(&trace_types_lock);
7381 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7382 size_t cnt, loff_t *fpos)
7384 struct seq_file *m = filp->private_data;
7385 struct trace_array *tr = m->private;
7387 const char *clockstr;
7390 if (cnt >= sizeof(buf))
7393 if (copy_from_user(buf, ubuf, cnt))
7398 clockstr = strstrip(buf);
7400 ret = tracing_set_clock(tr, clockstr);
7409 static int tracing_clock_open(struct inode *inode, struct file *file)
7411 struct trace_array *tr = inode->i_private;
7414 ret = tracing_check_open_get_tr(tr);
7418 ret = single_open(file, tracing_clock_show, inode->i_private);
7420 trace_array_put(tr);
7425 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7427 struct trace_array *tr = m->private;
7429 mutex_lock(&trace_types_lock);
7431 if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7432 seq_puts(m, "delta [absolute]\n");
7434 seq_puts(m, "[delta] absolute\n");
7436 mutex_unlock(&trace_types_lock);
7441 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7443 struct trace_array *tr = inode->i_private;
7446 ret = tracing_check_open_get_tr(tr);
7450 ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7452 trace_array_put(tr);
7457 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7459 if (rbe == this_cpu_read(trace_buffered_event))
7460 return ring_buffer_time_stamp(buffer);
7462 return ring_buffer_event_time_stamp(buffer, rbe);
7466 * Set or disable using the per CPU trace_buffer_event when possible.
7468 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7472 mutex_lock(&trace_types_lock);
7474 if (set && tr->no_filter_buffering_ref++)
7478 if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) {
7483 --tr->no_filter_buffering_ref;
7486 mutex_unlock(&trace_types_lock);
7491 struct ftrace_buffer_info {
7492 struct trace_iterator iter;
7494 unsigned int spare_cpu;
7498 #ifdef CONFIG_TRACER_SNAPSHOT
7499 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7501 struct trace_array *tr = inode->i_private;
7502 struct trace_iterator *iter;
7506 ret = tracing_check_open_get_tr(tr);
7510 if (file->f_mode & FMODE_READ) {
7511 iter = __tracing_open(inode, file, true);
7513 ret = PTR_ERR(iter);
7515 /* Writes still need the seq_file to hold the private data */
7517 m = kzalloc(sizeof(*m), GFP_KERNEL);
7520 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7528 iter->array_buffer = &tr->max_buffer;
7529 iter->cpu_file = tracing_get_cpu(inode);
7531 file->private_data = m;
7535 trace_array_put(tr);
7541 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7544 struct seq_file *m = filp->private_data;
7545 struct trace_iterator *iter = m->private;
7546 struct trace_array *tr = iter->tr;
7550 ret = tracing_update_buffers();
7554 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7558 mutex_lock(&trace_types_lock);
7560 if (tr->current_trace->use_max_tr) {
7565 local_irq_disable();
7566 arch_spin_lock(&tr->max_lock);
7567 if (tr->cond_snapshot)
7569 arch_spin_unlock(&tr->max_lock);
7576 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7580 if (tr->allocated_snapshot)
7584 /* Only allow per-cpu swap if the ring buffer supports it */
7585 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7586 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7591 if (tr->allocated_snapshot)
7592 ret = resize_buffer_duplicate_size(&tr->max_buffer,
7593 &tr->array_buffer, iter->cpu_file);
7595 ret = tracing_alloc_snapshot_instance(tr);
7598 local_irq_disable();
7599 /* Now, we're going to swap */
7600 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7601 update_max_tr(tr, current, smp_processor_id(), NULL);
7603 update_max_tr_single(tr, current, iter->cpu_file);
7607 if (tr->allocated_snapshot) {
7608 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7609 tracing_reset_online_cpus(&tr->max_buffer);
7611 tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7621 mutex_unlock(&trace_types_lock);
7625 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7627 struct seq_file *m = file->private_data;
7630 ret = tracing_release(inode, file);
7632 if (file->f_mode & FMODE_READ)
7635 /* If write only, the seq_file is just a stub */
7643 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7644 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7645 size_t count, loff_t *ppos);
7646 static int tracing_buffers_release(struct inode *inode, struct file *file);
7647 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7648 struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7650 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7652 struct ftrace_buffer_info *info;
7655 /* The following checks for tracefs lockdown */
7656 ret = tracing_buffers_open(inode, filp);
7660 info = filp->private_data;
7662 if (info->iter.trace->use_max_tr) {
7663 tracing_buffers_release(inode, filp);
7667 info->iter.snapshot = true;
7668 info->iter.array_buffer = &info->iter.tr->max_buffer;
7673 #endif /* CONFIG_TRACER_SNAPSHOT */
7676 static const struct file_operations tracing_thresh_fops = {
7677 .open = tracing_open_generic,
7678 .read = tracing_thresh_read,
7679 .write = tracing_thresh_write,
7680 .llseek = generic_file_llseek,
7683 #ifdef CONFIG_TRACER_MAX_TRACE
7684 static const struct file_operations tracing_max_lat_fops = {
7685 .open = tracing_open_generic,
7686 .read = tracing_max_lat_read,
7687 .write = tracing_max_lat_write,
7688 .llseek = generic_file_llseek,
7692 static const struct file_operations set_tracer_fops = {
7693 .open = tracing_open_generic,
7694 .read = tracing_set_trace_read,
7695 .write = tracing_set_trace_write,
7696 .llseek = generic_file_llseek,
7699 static const struct file_operations tracing_pipe_fops = {
7700 .open = tracing_open_pipe,
7701 .poll = tracing_poll_pipe,
7702 .read = tracing_read_pipe,
7703 .splice_read = tracing_splice_read_pipe,
7704 .release = tracing_release_pipe,
7705 .llseek = no_llseek,
7708 static const struct file_operations tracing_entries_fops = {
7709 .open = tracing_open_generic_tr,
7710 .read = tracing_entries_read,
7711 .write = tracing_entries_write,
7712 .llseek = generic_file_llseek,
7713 .release = tracing_release_generic_tr,
7716 static const struct file_operations tracing_total_entries_fops = {
7717 .open = tracing_open_generic_tr,
7718 .read = tracing_total_entries_read,
7719 .llseek = generic_file_llseek,
7720 .release = tracing_release_generic_tr,
7723 static const struct file_operations tracing_free_buffer_fops = {
7724 .open = tracing_open_generic_tr,
7725 .write = tracing_free_buffer_write,
7726 .release = tracing_free_buffer_release,
7729 static const struct file_operations tracing_mark_fops = {
7730 .open = tracing_mark_open,
7731 .write = tracing_mark_write,
7732 .release = tracing_release_generic_tr,
7735 static const struct file_operations tracing_mark_raw_fops = {
7736 .open = tracing_mark_open,
7737 .write = tracing_mark_raw_write,
7738 .release = tracing_release_generic_tr,
7741 static const struct file_operations trace_clock_fops = {
7742 .open = tracing_clock_open,
7744 .llseek = seq_lseek,
7745 .release = tracing_single_release_tr,
7746 .write = tracing_clock_write,
7749 static const struct file_operations trace_time_stamp_mode_fops = {
7750 .open = tracing_time_stamp_mode_open,
7752 .llseek = seq_lseek,
7753 .release = tracing_single_release_tr,
7756 #ifdef CONFIG_TRACER_SNAPSHOT
7757 static const struct file_operations snapshot_fops = {
7758 .open = tracing_snapshot_open,
7760 .write = tracing_snapshot_write,
7761 .llseek = tracing_lseek,
7762 .release = tracing_snapshot_release,
7765 static const struct file_operations snapshot_raw_fops = {
7766 .open = snapshot_raw_open,
7767 .read = tracing_buffers_read,
7768 .release = tracing_buffers_release,
7769 .splice_read = tracing_buffers_splice_read,
7770 .llseek = no_llseek,
7773 #endif /* CONFIG_TRACER_SNAPSHOT */
7776 * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7777 * @filp: The active open file structure
7778 * @ubuf: The userspace provided buffer to read value into
7779 * @cnt: The maximum number of bytes to read
7780 * @ppos: The current "file" position
7782 * This function implements the write interface for a struct trace_min_max_param.
7783 * The filp->private_data must point to a trace_min_max_param structure that
7784 * defines where to write the value, the min and the max acceptable values,
7785 * and a lock to protect the write.
7788 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7790 struct trace_min_max_param *param = filp->private_data;
7797 err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7802 mutex_lock(param->lock);
7804 if (param->min && val < *param->min)
7807 if (param->max && val > *param->max)
7814 mutex_unlock(param->lock);
7823 * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7824 * @filp: The active open file structure
7825 * @ubuf: The userspace provided buffer to read value into
7826 * @cnt: The maximum number of bytes to read
7827 * @ppos: The current "file" position
7829 * This function implements the read interface for a struct trace_min_max_param.
7830 * The filp->private_data must point to a trace_min_max_param struct with valid
7834 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7836 struct trace_min_max_param *param = filp->private_data;
7837 char buf[U64_STR_SIZE];
7846 if (cnt > sizeof(buf))
7849 len = snprintf(buf, sizeof(buf), "%llu\n", val);
7851 return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7854 const struct file_operations trace_min_max_fops = {
7855 .open = tracing_open_generic,
7856 .read = trace_min_max_read,
7857 .write = trace_min_max_write,
7860 #define TRACING_LOG_ERRS_MAX 8
7861 #define TRACING_LOG_LOC_MAX 128
7863 #define CMD_PREFIX " Command: "
7866 const char **errs; /* ptr to loc-specific array of err strings */
7867 u8 type; /* index into errs -> specific err string */
7868 u16 pos; /* caret position */
7872 struct tracing_log_err {
7873 struct list_head list;
7874 struct err_info info;
7875 char loc[TRACING_LOG_LOC_MAX]; /* err location */
7876 char *cmd; /* what caused err */
7879 static DEFINE_MUTEX(tracing_err_log_lock);
7881 static struct tracing_log_err *alloc_tracing_log_err(int len)
7883 struct tracing_log_err *err;
7885 err = kzalloc(sizeof(*err), GFP_KERNEL);
7887 return ERR_PTR(-ENOMEM);
7889 err->cmd = kzalloc(len, GFP_KERNEL);
7892 return ERR_PTR(-ENOMEM);
7898 static void free_tracing_log_err(struct tracing_log_err *err)
7904 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr,
7907 struct tracing_log_err *err;
7910 if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7911 err = alloc_tracing_log_err(len);
7912 if (PTR_ERR(err) != -ENOMEM)
7913 tr->n_err_log_entries++;
7917 cmd = kzalloc(len, GFP_KERNEL);
7919 return ERR_PTR(-ENOMEM);
7920 err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7923 list_del(&err->list);
7929 * err_pos - find the position of a string within a command for error careting
7930 * @cmd: The tracing command that caused the error
7931 * @str: The string to position the caret at within @cmd
7933 * Finds the position of the first occurrence of @str within @cmd. The
7934 * return value can be passed to tracing_log_err() for caret placement
7937 * Returns the index within @cmd of the first occurrence of @str or 0
7938 * if @str was not found.
7940 unsigned int err_pos(char *cmd, const char *str)
7944 if (WARN_ON(!strlen(cmd)))
7947 found = strstr(cmd, str);
7955 * tracing_log_err - write an error to the tracing error log
7956 * @tr: The associated trace array for the error (NULL for top level array)
7957 * @loc: A string describing where the error occurred
7958 * @cmd: The tracing command that caused the error
7959 * @errs: The array of loc-specific static error strings
7960 * @type: The index into errs[], which produces the specific static err string
7961 * @pos: The position the caret should be placed in the cmd
7963 * Writes an error into tracing/error_log of the form:
7965 * <loc>: error: <text>
7969 * tracing/error_log is a small log file containing the last
7970 * TRACING_LOG_ERRS_MAX errors (8). Memory for errors isn't allocated
7971 * unless there has been a tracing error, and the error log can be
7972 * cleared and have its memory freed by writing the empty string in
7973 * truncation mode to it i.e. echo > tracing/error_log.
7975 * NOTE: the @errs array along with the @type param are used to
7976 * produce a static error string - this string is not copied and saved
7977 * when the error is logged - only a pointer to it is saved. See
7978 * existing callers for examples of how static strings are typically
7979 * defined for use with tracing_log_err().
7981 void tracing_log_err(struct trace_array *tr,
7982 const char *loc, const char *cmd,
7983 const char **errs, u8 type, u16 pos)
7985 struct tracing_log_err *err;
7991 len += sizeof(CMD_PREFIX) + 2 * sizeof("\n") + strlen(cmd) + 1;
7993 mutex_lock(&tracing_err_log_lock);
7994 err = get_tracing_log_err(tr, len);
7995 if (PTR_ERR(err) == -ENOMEM) {
7996 mutex_unlock(&tracing_err_log_lock);
8000 snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
8001 snprintf(err->cmd, len, "\n" CMD_PREFIX "%s\n", cmd);
8003 err->info.errs = errs;
8004 err->info.type = type;
8005 err->info.pos = pos;
8006 err->info.ts = local_clock();
8008 list_add_tail(&err->list, &tr->err_log);
8009 mutex_unlock(&tracing_err_log_lock);
8012 static void clear_tracing_err_log(struct trace_array *tr)
8014 struct tracing_log_err *err, *next;
8016 mutex_lock(&tracing_err_log_lock);
8017 list_for_each_entry_safe(err, next, &tr->err_log, list) {
8018 list_del(&err->list);
8019 free_tracing_log_err(err);
8022 tr->n_err_log_entries = 0;
8023 mutex_unlock(&tracing_err_log_lock);
8026 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
8028 struct trace_array *tr = m->private;
8030 mutex_lock(&tracing_err_log_lock);
8032 return seq_list_start(&tr->err_log, *pos);
8035 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
8037 struct trace_array *tr = m->private;
8039 return seq_list_next(v, &tr->err_log, pos);
8042 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
8044 mutex_unlock(&tracing_err_log_lock);
8047 static void tracing_err_log_show_pos(struct seq_file *m, u16 pos)
8051 for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
8053 for (i = 0; i < pos; i++)
8058 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
8060 struct tracing_log_err *err = v;
8063 const char *err_text = err->info.errs[err->info.type];
8064 u64 sec = err->info.ts;
8067 nsec = do_div(sec, NSEC_PER_SEC);
8068 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
8069 err->loc, err_text);
8070 seq_printf(m, "%s", err->cmd);
8071 tracing_err_log_show_pos(m, err->info.pos);
8077 static const struct seq_operations tracing_err_log_seq_ops = {
8078 .start = tracing_err_log_seq_start,
8079 .next = tracing_err_log_seq_next,
8080 .stop = tracing_err_log_seq_stop,
8081 .show = tracing_err_log_seq_show
8084 static int tracing_err_log_open(struct inode *inode, struct file *file)
8086 struct trace_array *tr = inode->i_private;
8089 ret = tracing_check_open_get_tr(tr);
8093 /* If this file was opened for write, then erase contents */
8094 if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
8095 clear_tracing_err_log(tr);
8097 if (file->f_mode & FMODE_READ) {
8098 ret = seq_open(file, &tracing_err_log_seq_ops);
8100 struct seq_file *m = file->private_data;
8103 trace_array_put(tr);
8109 static ssize_t tracing_err_log_write(struct file *file,
8110 const char __user *buffer,
8111 size_t count, loff_t *ppos)
8116 static int tracing_err_log_release(struct inode *inode, struct file *file)
8118 struct trace_array *tr = inode->i_private;
8120 trace_array_put(tr);
8122 if (file->f_mode & FMODE_READ)
8123 seq_release(inode, file);
8128 static const struct file_operations tracing_err_log_fops = {
8129 .open = tracing_err_log_open,
8130 .write = tracing_err_log_write,
8132 .llseek = seq_lseek,
8133 .release = tracing_err_log_release,
8136 static int tracing_buffers_open(struct inode *inode, struct file *filp)
8138 struct trace_array *tr = inode->i_private;
8139 struct ftrace_buffer_info *info;
8142 ret = tracing_check_open_get_tr(tr);
8146 info = kvzalloc(sizeof(*info), GFP_KERNEL);
8148 trace_array_put(tr);
8152 mutex_lock(&trace_types_lock);
8155 info->iter.cpu_file = tracing_get_cpu(inode);
8156 info->iter.trace = tr->current_trace;
8157 info->iter.array_buffer = &tr->array_buffer;
8159 /* Force reading ring buffer for first read */
8160 info->read = (unsigned int)-1;
8162 filp->private_data = info;
8166 mutex_unlock(&trace_types_lock);
8168 ret = nonseekable_open(inode, filp);
8170 trace_array_put(tr);
8176 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
8178 struct ftrace_buffer_info *info = filp->private_data;
8179 struct trace_iterator *iter = &info->iter;
8181 return trace_poll(iter, filp, poll_table);
8185 tracing_buffers_read(struct file *filp, char __user *ubuf,
8186 size_t count, loff_t *ppos)
8188 struct ftrace_buffer_info *info = filp->private_data;
8189 struct trace_iterator *iter = &info->iter;
8196 #ifdef CONFIG_TRACER_MAX_TRACE
8197 if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8202 info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
8204 if (IS_ERR(info->spare)) {
8205 ret = PTR_ERR(info->spare);
8208 info->spare_cpu = iter->cpu_file;
8214 /* Do we have previous read data to read? */
8215 if (info->read < PAGE_SIZE)
8219 trace_access_lock(iter->cpu_file);
8220 ret = ring_buffer_read_page(iter->array_buffer->buffer,
8224 trace_access_unlock(iter->cpu_file);
8227 if (trace_empty(iter)) {
8228 if ((filp->f_flags & O_NONBLOCK))
8231 ret = wait_on_pipe(iter, 0);
8242 size = PAGE_SIZE - info->read;
8246 ret = copy_to_user(ubuf, info->spare + info->read, size);
8258 static int tracing_buffers_release(struct inode *inode, struct file *file)
8260 struct ftrace_buffer_info *info = file->private_data;
8261 struct trace_iterator *iter = &info->iter;
8263 mutex_lock(&trace_types_lock);
8265 iter->tr->trace_ref--;
8267 __trace_array_put(iter->tr);
8270 /* Make sure the waiters see the new wait_index */
8273 ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8276 ring_buffer_free_read_page(iter->array_buffer->buffer,
8277 info->spare_cpu, info->spare);
8280 mutex_unlock(&trace_types_lock);
8286 struct trace_buffer *buffer;
8289 refcount_t refcount;
8292 static void buffer_ref_release(struct buffer_ref *ref)
8294 if (!refcount_dec_and_test(&ref->refcount))
8296 ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8300 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8301 struct pipe_buffer *buf)
8303 struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8305 buffer_ref_release(ref);
8309 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8310 struct pipe_buffer *buf)
8312 struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8314 if (refcount_read(&ref->refcount) > INT_MAX/2)
8317 refcount_inc(&ref->refcount);
8321 /* Pipe buffer operations for a buffer. */
8322 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8323 .release = buffer_pipe_buf_release,
8324 .get = buffer_pipe_buf_get,
8328 * Callback from splice_to_pipe(), if we need to release some pages
8329 * at the end of the spd in case we error'ed out in filling the pipe.
8331 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8333 struct buffer_ref *ref =
8334 (struct buffer_ref *)spd->partial[i].private;
8336 buffer_ref_release(ref);
8337 spd->partial[i].private = 0;
8341 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8342 struct pipe_inode_info *pipe, size_t len,
8345 struct ftrace_buffer_info *info = file->private_data;
8346 struct trace_iterator *iter = &info->iter;
8347 struct partial_page partial_def[PIPE_DEF_BUFFERS];
8348 struct page *pages_def[PIPE_DEF_BUFFERS];
8349 struct splice_pipe_desc spd = {
8351 .partial = partial_def,
8352 .nr_pages_max = PIPE_DEF_BUFFERS,
8353 .ops = &buffer_pipe_buf_ops,
8354 .spd_release = buffer_spd_release,
8356 struct buffer_ref *ref;
8360 #ifdef CONFIG_TRACER_MAX_TRACE
8361 if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8365 if (*ppos & (PAGE_SIZE - 1))
8368 if (len & (PAGE_SIZE - 1)) {
8369 if (len < PAGE_SIZE)
8374 if (splice_grow_spd(pipe, &spd))
8378 trace_access_lock(iter->cpu_file);
8379 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8381 for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
8385 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8391 refcount_set(&ref->refcount, 1);
8392 ref->buffer = iter->array_buffer->buffer;
8393 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8394 if (IS_ERR(ref->page)) {
8395 ret = PTR_ERR(ref->page);
8400 ref->cpu = iter->cpu_file;
8402 r = ring_buffer_read_page(ref->buffer, &ref->page,
8403 len, iter->cpu_file, 1);
8405 ring_buffer_free_read_page(ref->buffer, ref->cpu,
8411 page = virt_to_page(ref->page);
8413 spd.pages[i] = page;
8414 spd.partial[i].len = PAGE_SIZE;
8415 spd.partial[i].offset = 0;
8416 spd.partial[i].private = (unsigned long)ref;
8420 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8423 trace_access_unlock(iter->cpu_file);
8426 /* did we read anything? */
8427 if (!spd.nr_pages) {
8434 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8437 wait_index = READ_ONCE(iter->wait_index);
8439 ret = wait_on_pipe(iter, iter->tr->buffer_percent);
8443 /* No need to wait after waking up when tracing is off */
8444 if (!tracer_tracing_is_on(iter->tr))
8447 /* Make sure we see the new wait_index */
8449 if (wait_index != iter->wait_index)
8455 ret = splice_to_pipe(pipe, &spd);
8457 splice_shrink_spd(&spd);
8462 /* An ioctl call with cmd 0 to the ring buffer file will wake up all waiters */
8463 static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
8465 struct ftrace_buffer_info *info = file->private_data;
8466 struct trace_iterator *iter = &info->iter;
8469 return -ENOIOCTLCMD;
8471 mutex_lock(&trace_types_lock);
8474 /* Make sure the waiters see the new wait_index */
8477 ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8479 mutex_unlock(&trace_types_lock);
8483 static const struct file_operations tracing_buffers_fops = {
8484 .open = tracing_buffers_open,
8485 .read = tracing_buffers_read,
8486 .poll = tracing_buffers_poll,
8487 .release = tracing_buffers_release,
8488 .splice_read = tracing_buffers_splice_read,
8489 .unlocked_ioctl = tracing_buffers_ioctl,
8490 .llseek = no_llseek,
8494 tracing_stats_read(struct file *filp, char __user *ubuf,
8495 size_t count, loff_t *ppos)
8497 struct inode *inode = file_inode(filp);
8498 struct trace_array *tr = inode->i_private;
8499 struct array_buffer *trace_buf = &tr->array_buffer;
8500 int cpu = tracing_get_cpu(inode);
8501 struct trace_seq *s;
8503 unsigned long long t;
8504 unsigned long usec_rem;
8506 s = kmalloc(sizeof(*s), GFP_KERNEL);
8512 cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8513 trace_seq_printf(s, "entries: %ld\n", cnt);
8515 cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8516 trace_seq_printf(s, "overrun: %ld\n", cnt);
8518 cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8519 trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8521 cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8522 trace_seq_printf(s, "bytes: %ld\n", cnt);
8524 if (trace_clocks[tr->clock_id].in_ns) {
8525 /* local or global for trace_clock */
8526 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8527 usec_rem = do_div(t, USEC_PER_SEC);
8528 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8531 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8532 usec_rem = do_div(t, USEC_PER_SEC);
8533 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8535 /* counter or tsc mode for trace_clock */
8536 trace_seq_printf(s, "oldest event ts: %llu\n",
8537 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8539 trace_seq_printf(s, "now ts: %llu\n",
8540 ring_buffer_time_stamp(trace_buf->buffer));
8543 cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8544 trace_seq_printf(s, "dropped events: %ld\n", cnt);
8546 cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8547 trace_seq_printf(s, "read events: %ld\n", cnt);
8549 count = simple_read_from_buffer(ubuf, count, ppos,
8550 s->buffer, trace_seq_used(s));
8557 static const struct file_operations tracing_stats_fops = {
8558 .open = tracing_open_generic_tr,
8559 .read = tracing_stats_read,
8560 .llseek = generic_file_llseek,
8561 .release = tracing_release_generic_tr,
8564 #ifdef CONFIG_DYNAMIC_FTRACE
8567 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8568 size_t cnt, loff_t *ppos)
8574 /* 256 should be plenty to hold the amount needed */
8575 buf = kmalloc(256, GFP_KERNEL);
8579 r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
8580 ftrace_update_tot_cnt,
8581 ftrace_number_of_pages,
8582 ftrace_number_of_groups);
8584 ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8589 static const struct file_operations tracing_dyn_info_fops = {
8590 .open = tracing_open_generic,
8591 .read = tracing_read_dyn_info,
8592 .llseek = generic_file_llseek,
8594 #endif /* CONFIG_DYNAMIC_FTRACE */
8596 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8598 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8599 struct trace_array *tr, struct ftrace_probe_ops *ops,
8602 tracing_snapshot_instance(tr);
8606 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8607 struct trace_array *tr, struct ftrace_probe_ops *ops,
8610 struct ftrace_func_mapper *mapper = data;
8614 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8624 tracing_snapshot_instance(tr);
8628 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8629 struct ftrace_probe_ops *ops, void *data)
8631 struct ftrace_func_mapper *mapper = data;
8634 seq_printf(m, "%ps:", (void *)ip);
8636 seq_puts(m, "snapshot");
8639 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8642 seq_printf(m, ":count=%ld\n", *count);
8644 seq_puts(m, ":unlimited\n");
8650 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8651 unsigned long ip, void *init_data, void **data)
8653 struct ftrace_func_mapper *mapper = *data;
8656 mapper = allocate_ftrace_func_mapper();
8662 return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8666 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8667 unsigned long ip, void *data)
8669 struct ftrace_func_mapper *mapper = data;
8674 free_ftrace_func_mapper(mapper, NULL);
8678 ftrace_func_mapper_remove_ip(mapper, ip);
8681 static struct ftrace_probe_ops snapshot_probe_ops = {
8682 .func = ftrace_snapshot,
8683 .print = ftrace_snapshot_print,
8686 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8687 .func = ftrace_count_snapshot,
8688 .print = ftrace_snapshot_print,
8689 .init = ftrace_snapshot_init,
8690 .free = ftrace_snapshot_free,
8694 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8695 char *glob, char *cmd, char *param, int enable)
8697 struct ftrace_probe_ops *ops;
8698 void *count = (void *)-1;
8705 /* hash funcs only work with set_ftrace_filter */
8709 ops = param ? &snapshot_count_probe_ops : &snapshot_probe_ops;
8712 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
8717 number = strsep(¶m, ":");
8719 if (!strlen(number))
8723 * We use the callback data field (which is a pointer)
8726 ret = kstrtoul(number, 0, (unsigned long *)&count);
8731 ret = tracing_alloc_snapshot_instance(tr);
8735 ret = register_ftrace_function_probe(glob, tr, ops, count);
8738 return ret < 0 ? ret : 0;
8741 static struct ftrace_func_command ftrace_snapshot_cmd = {
8743 .func = ftrace_trace_snapshot_callback,
8746 static __init int register_snapshot_cmd(void)
8748 return register_ftrace_command(&ftrace_snapshot_cmd);
8751 static inline __init int register_snapshot_cmd(void) { return 0; }
8752 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8754 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8756 if (WARN_ON(!tr->dir))
8757 return ERR_PTR(-ENODEV);
8759 /* Top directory uses NULL as the parent */
8760 if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8763 /* All sub buffers have a descriptor */
8767 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8769 struct dentry *d_tracer;
8772 return tr->percpu_dir;
8774 d_tracer = tracing_get_dentry(tr);
8775 if (IS_ERR(d_tracer))
8778 tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8780 MEM_FAIL(!tr->percpu_dir,
8781 "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8783 return tr->percpu_dir;
8786 static struct dentry *
8787 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8788 void *data, long cpu, const struct file_operations *fops)
8790 struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8792 if (ret) /* See tracing_get_cpu() */
8793 d_inode(ret)->i_cdev = (void *)(cpu + 1);
8798 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8800 struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8801 struct dentry *d_cpu;
8802 char cpu_dir[30]; /* 30 characters should be more than enough */
8807 snprintf(cpu_dir, 30, "cpu%ld", cpu);
8808 d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8810 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8814 /* per cpu trace_pipe */
8815 trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu,
8816 tr, cpu, &tracing_pipe_fops);
8819 trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu,
8820 tr, cpu, &tracing_fops);
8822 trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu,
8823 tr, cpu, &tracing_buffers_fops);
8825 trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu,
8826 tr, cpu, &tracing_stats_fops);
8828 trace_create_cpu_file("buffer_size_kb", TRACE_MODE_READ, d_cpu,
8829 tr, cpu, &tracing_entries_fops);
8831 #ifdef CONFIG_TRACER_SNAPSHOT
8832 trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu,
8833 tr, cpu, &snapshot_fops);
8835 trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu,
8836 tr, cpu, &snapshot_raw_fops);
8840 #ifdef CONFIG_FTRACE_SELFTEST
8841 /* Let selftest have access to static functions in this file */
8842 #include "trace_selftest.c"
8846 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8849 struct trace_option_dentry *topt = filp->private_data;
8852 if (topt->flags->val & topt->opt->bit)
8857 return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8861 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8864 struct trace_option_dentry *topt = filp->private_data;
8868 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8872 if (val != 0 && val != 1)
8875 if (!!(topt->flags->val & topt->opt->bit) != val) {
8876 mutex_lock(&trace_types_lock);
8877 ret = __set_tracer_option(topt->tr, topt->flags,
8879 mutex_unlock(&trace_types_lock);
8890 static const struct file_operations trace_options_fops = {
8891 .open = tracing_open_generic,
8892 .read = trace_options_read,
8893 .write = trace_options_write,
8894 .llseek = generic_file_llseek,
8898 * In order to pass in both the trace_array descriptor as well as the index
8899 * to the flag that the trace option file represents, the trace_array
8900 * has a character array of trace_flags_index[], which holds the index
8901 * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8902 * The address of this character array is passed to the flag option file
8903 * read/write callbacks.
8905 * In order to extract both the index and the trace_array descriptor,
8906 * get_tr_index() uses the following algorithm.
8910 * As the pointer itself contains the address of the index (remember
8913 * Then to get the trace_array descriptor, by subtracting that index
8914 * from the ptr, we get to the start of the index itself.
8916 * ptr - idx == &index[0]
8918 * Then a simple container_of() from that pointer gets us to the
8919 * trace_array descriptor.
8921 static void get_tr_index(void *data, struct trace_array **ptr,
8922 unsigned int *pindex)
8924 *pindex = *(unsigned char *)data;
8926 *ptr = container_of(data - *pindex, struct trace_array,
8931 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8934 void *tr_index = filp->private_data;
8935 struct trace_array *tr;
8939 get_tr_index(tr_index, &tr, &index);
8941 if (tr->trace_flags & (1 << index))
8946 return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8950 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8953 void *tr_index = filp->private_data;
8954 struct trace_array *tr;
8959 get_tr_index(tr_index, &tr, &index);
8961 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8965 if (val != 0 && val != 1)
8968 mutex_lock(&event_mutex);
8969 mutex_lock(&trace_types_lock);
8970 ret = set_tracer_flag(tr, 1 << index, val);
8971 mutex_unlock(&trace_types_lock);
8972 mutex_unlock(&event_mutex);
8982 static const struct file_operations trace_options_core_fops = {
8983 .open = tracing_open_generic,
8984 .read = trace_options_core_read,
8985 .write = trace_options_core_write,
8986 .llseek = generic_file_llseek,
8989 struct dentry *trace_create_file(const char *name,
8991 struct dentry *parent,
8993 const struct file_operations *fops)
8997 ret = tracefs_create_file(name, mode, parent, data, fops);
8999 pr_warn("Could not create tracefs '%s' entry\n", name);
9005 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
9007 struct dentry *d_tracer;
9012 d_tracer = tracing_get_dentry(tr);
9013 if (IS_ERR(d_tracer))
9016 tr->options = tracefs_create_dir("options", d_tracer);
9018 pr_warn("Could not create tracefs directory 'options'\n");
9026 create_trace_option_file(struct trace_array *tr,
9027 struct trace_option_dentry *topt,
9028 struct tracer_flags *flags,
9029 struct tracer_opt *opt)
9031 struct dentry *t_options;
9033 t_options = trace_options_init_dentry(tr);
9037 topt->flags = flags;
9041 topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE,
9042 t_options, topt, &trace_options_fops);
9047 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
9049 struct trace_option_dentry *topts;
9050 struct trace_options *tr_topts;
9051 struct tracer_flags *flags;
9052 struct tracer_opt *opts;
9059 flags = tracer->flags;
9061 if (!flags || !flags->opts)
9065 * If this is an instance, only create flags for tracers
9066 * the instance may have.
9068 if (!trace_ok_for_array(tracer, tr))
9071 for (i = 0; i < tr->nr_topts; i++) {
9072 /* Make sure there's no duplicate flags. */
9073 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
9079 for (cnt = 0; opts[cnt].name; cnt++)
9082 topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
9086 tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
9093 tr->topts = tr_topts;
9094 tr->topts[tr->nr_topts].tracer = tracer;
9095 tr->topts[tr->nr_topts].topts = topts;
9098 for (cnt = 0; opts[cnt].name; cnt++) {
9099 create_trace_option_file(tr, &topts[cnt], flags,
9101 MEM_FAIL(topts[cnt].entry == NULL,
9102 "Failed to create trace option: %s",
9107 static struct dentry *
9108 create_trace_option_core_file(struct trace_array *tr,
9109 const char *option, long index)
9111 struct dentry *t_options;
9113 t_options = trace_options_init_dentry(tr);
9117 return trace_create_file(option, TRACE_MODE_WRITE, t_options,
9118 (void *)&tr->trace_flags_index[index],
9119 &trace_options_core_fops);
9122 static void create_trace_options_dir(struct trace_array *tr)
9124 struct dentry *t_options;
9125 bool top_level = tr == &global_trace;
9128 t_options = trace_options_init_dentry(tr);
9132 for (i = 0; trace_options[i]; i++) {
9134 !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
9135 create_trace_option_core_file(tr, trace_options[i], i);
9140 rb_simple_read(struct file *filp, char __user *ubuf,
9141 size_t cnt, loff_t *ppos)
9143 struct trace_array *tr = filp->private_data;
9147 r = tracer_tracing_is_on(tr);
9148 r = sprintf(buf, "%d\n", r);
9150 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9154 rb_simple_write(struct file *filp, const char __user *ubuf,
9155 size_t cnt, loff_t *ppos)
9157 struct trace_array *tr = filp->private_data;
9158 struct trace_buffer *buffer = tr->array_buffer.buffer;
9162 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9167 mutex_lock(&trace_types_lock);
9168 if (!!val == tracer_tracing_is_on(tr)) {
9169 val = 0; /* do nothing */
9171 tracer_tracing_on(tr);
9172 if (tr->current_trace->start)
9173 tr->current_trace->start(tr);
9175 tracer_tracing_off(tr);
9176 if (tr->current_trace->stop)
9177 tr->current_trace->stop(tr);
9178 /* Wake up any waiters */
9179 ring_buffer_wake_waiters(buffer, RING_BUFFER_ALL_CPUS);
9181 mutex_unlock(&trace_types_lock);
9189 static const struct file_operations rb_simple_fops = {
9190 .open = tracing_open_generic_tr,
9191 .read = rb_simple_read,
9192 .write = rb_simple_write,
9193 .release = tracing_release_generic_tr,
9194 .llseek = default_llseek,
9198 buffer_percent_read(struct file *filp, char __user *ubuf,
9199 size_t cnt, loff_t *ppos)
9201 struct trace_array *tr = filp->private_data;
9205 r = tr->buffer_percent;
9206 r = sprintf(buf, "%d\n", r);
9208 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9212 buffer_percent_write(struct file *filp, const char __user *ubuf,
9213 size_t cnt, loff_t *ppos)
9215 struct trace_array *tr = filp->private_data;
9219 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9226 tr->buffer_percent = val;
9233 static const struct file_operations buffer_percent_fops = {
9234 .open = tracing_open_generic_tr,
9235 .read = buffer_percent_read,
9236 .write = buffer_percent_write,
9237 .release = tracing_release_generic_tr,
9238 .llseek = default_llseek,
9241 static struct dentry *trace_instance_dir;
9244 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
9247 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
9249 enum ring_buffer_flags rb_flags;
9251 rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
9255 buf->buffer = ring_buffer_alloc(size, rb_flags);
9259 buf->data = alloc_percpu(struct trace_array_cpu);
9261 ring_buffer_free(buf->buffer);
9266 /* Allocate the first page for all buffers */
9267 set_buffer_entries(&tr->array_buffer,
9268 ring_buffer_size(tr->array_buffer.buffer, 0));
9273 static void free_trace_buffer(struct array_buffer *buf)
9276 ring_buffer_free(buf->buffer);
9278 free_percpu(buf->data);
9283 static int allocate_trace_buffers(struct trace_array *tr, int size)
9287 ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9291 #ifdef CONFIG_TRACER_MAX_TRACE
9292 ret = allocate_trace_buffer(tr, &tr->max_buffer,
9293 allocate_snapshot ? size : 1);
9294 if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9295 free_trace_buffer(&tr->array_buffer);
9298 tr->allocated_snapshot = allocate_snapshot;
9300 allocate_snapshot = false;
9306 static void free_trace_buffers(struct trace_array *tr)
9311 free_trace_buffer(&tr->array_buffer);
9313 #ifdef CONFIG_TRACER_MAX_TRACE
9314 free_trace_buffer(&tr->max_buffer);
9318 static void init_trace_flags_index(struct trace_array *tr)
9322 /* Used by the trace options files */
9323 for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9324 tr->trace_flags_index[i] = i;
9327 static void __update_tracer_options(struct trace_array *tr)
9331 for (t = trace_types; t; t = t->next)
9332 add_tracer_options(tr, t);
9335 static void update_tracer_options(struct trace_array *tr)
9337 mutex_lock(&trace_types_lock);
9338 tracer_options_updated = true;
9339 __update_tracer_options(tr);
9340 mutex_unlock(&trace_types_lock);
9343 /* Must have trace_types_lock held */
9344 struct trace_array *trace_array_find(const char *instance)
9346 struct trace_array *tr, *found = NULL;
9348 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9349 if (tr->name && strcmp(tr->name, instance) == 0) {
9358 struct trace_array *trace_array_find_get(const char *instance)
9360 struct trace_array *tr;
9362 mutex_lock(&trace_types_lock);
9363 tr = trace_array_find(instance);
9366 mutex_unlock(&trace_types_lock);
9371 static int trace_array_create_dir(struct trace_array *tr)
9375 tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9379 ret = event_trace_add_tracer(tr->dir, tr);
9381 tracefs_remove(tr->dir);
9385 init_tracer_tracefs(tr, tr->dir);
9386 __update_tracer_options(tr);
9391 static struct trace_array *trace_array_create(const char *name)
9393 struct trace_array *tr;
9397 tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9399 return ERR_PTR(ret);
9401 tr->name = kstrdup(name, GFP_KERNEL);
9405 if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9408 tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9410 cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9412 raw_spin_lock_init(&tr->start_lock);
9414 tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9416 tr->current_trace = &nop_trace;
9418 INIT_LIST_HEAD(&tr->systems);
9419 INIT_LIST_HEAD(&tr->events);
9420 INIT_LIST_HEAD(&tr->hist_vars);
9421 INIT_LIST_HEAD(&tr->err_log);
9423 if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9426 if (ftrace_allocate_ftrace_ops(tr) < 0)
9429 ftrace_init_trace_array(tr);
9431 init_trace_flags_index(tr);
9433 if (trace_instance_dir) {
9434 ret = trace_array_create_dir(tr);
9438 __trace_early_add_events(tr);
9440 list_add(&tr->list, &ftrace_trace_arrays);
9447 ftrace_free_ftrace_ops(tr);
9448 free_trace_buffers(tr);
9449 free_cpumask_var(tr->tracing_cpumask);
9453 return ERR_PTR(ret);
9456 static int instance_mkdir(const char *name)
9458 struct trace_array *tr;
9461 mutex_lock(&event_mutex);
9462 mutex_lock(&trace_types_lock);
9465 if (trace_array_find(name))
9468 tr = trace_array_create(name);
9470 ret = PTR_ERR_OR_ZERO(tr);
9473 mutex_unlock(&trace_types_lock);
9474 mutex_unlock(&event_mutex);
9479 * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9480 * @name: The name of the trace array to be looked up/created.
9482 * Returns pointer to trace array with given name.
9483 * NULL, if it cannot be created.
9485 * NOTE: This function increments the reference counter associated with the
9486 * trace array returned. This makes sure it cannot be freed while in use.
9487 * Use trace_array_put() once the trace array is no longer needed.
9488 * If the trace_array is to be freed, trace_array_destroy() needs to
9489 * be called after the trace_array_put(), or simply let user space delete
9490 * it from the tracefs instances directory. But until the
9491 * trace_array_put() is called, user space can not delete it.
9494 struct trace_array *trace_array_get_by_name(const char *name)
9496 struct trace_array *tr;
9498 mutex_lock(&event_mutex);
9499 mutex_lock(&trace_types_lock);
9501 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9502 if (tr->name && strcmp(tr->name, name) == 0)
9506 tr = trace_array_create(name);
9514 mutex_unlock(&trace_types_lock);
9515 mutex_unlock(&event_mutex);
9518 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9520 static int __remove_instance(struct trace_array *tr)
9524 /* Reference counter for a newly created trace array = 1. */
9525 if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9528 list_del(&tr->list);
9530 /* Disable all the flags that were enabled coming in */
9531 for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9532 if ((1 << i) & ZEROED_TRACE_FLAGS)
9533 set_tracer_flag(tr, 1 << i, 0);
9536 tracing_set_nop(tr);
9537 clear_ftrace_function_probes(tr);
9538 event_trace_del_tracer(tr);
9539 ftrace_clear_pids(tr);
9540 ftrace_destroy_function_files(tr);
9541 tracefs_remove(tr->dir);
9542 free_percpu(tr->last_func_repeats);
9543 free_trace_buffers(tr);
9544 clear_tracing_err_log(tr);
9546 for (i = 0; i < tr->nr_topts; i++) {
9547 kfree(tr->topts[i].topts);
9551 free_cpumask_var(tr->tracing_cpumask);
9558 int trace_array_destroy(struct trace_array *this_tr)
9560 struct trace_array *tr;
9566 mutex_lock(&event_mutex);
9567 mutex_lock(&trace_types_lock);
9571 /* Making sure trace array exists before destroying it. */
9572 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9573 if (tr == this_tr) {
9574 ret = __remove_instance(tr);
9579 mutex_unlock(&trace_types_lock);
9580 mutex_unlock(&event_mutex);
9584 EXPORT_SYMBOL_GPL(trace_array_destroy);
9586 static int instance_rmdir(const char *name)
9588 struct trace_array *tr;
9591 mutex_lock(&event_mutex);
9592 mutex_lock(&trace_types_lock);
9595 tr = trace_array_find(name);
9597 ret = __remove_instance(tr);
9599 mutex_unlock(&trace_types_lock);
9600 mutex_unlock(&event_mutex);
9605 static __init void create_trace_instances(struct dentry *d_tracer)
9607 struct trace_array *tr;
9609 trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9612 if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9615 mutex_lock(&event_mutex);
9616 mutex_lock(&trace_types_lock);
9618 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9621 if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9622 "Failed to create instance directory\n"))
9626 mutex_unlock(&trace_types_lock);
9627 mutex_unlock(&event_mutex);
9631 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9633 struct trace_event_file *file;
9636 trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer,
9637 tr, &show_traces_fops);
9639 trace_create_file("current_tracer", TRACE_MODE_WRITE, d_tracer,
9640 tr, &set_tracer_fops);
9642 trace_create_file("tracing_cpumask", TRACE_MODE_WRITE, d_tracer,
9643 tr, &tracing_cpumask_fops);
9645 trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer,
9646 tr, &tracing_iter_fops);
9648 trace_create_file("trace", TRACE_MODE_WRITE, d_tracer,
9651 trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer,
9652 tr, &tracing_pipe_fops);
9654 trace_create_file("buffer_size_kb", TRACE_MODE_WRITE, d_tracer,
9655 tr, &tracing_entries_fops);
9657 trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer,
9658 tr, &tracing_total_entries_fops);
9660 trace_create_file("free_buffer", 0200, d_tracer,
9661 tr, &tracing_free_buffer_fops);
9663 trace_create_file("trace_marker", 0220, d_tracer,
9664 tr, &tracing_mark_fops);
9666 file = __find_event_file(tr, "ftrace", "print");
9667 if (file && file->dir)
9668 trace_create_file("trigger", TRACE_MODE_WRITE, file->dir,
9669 file, &event_trigger_fops);
9670 tr->trace_marker_file = file;
9672 trace_create_file("trace_marker_raw", 0220, d_tracer,
9673 tr, &tracing_mark_raw_fops);
9675 trace_create_file("trace_clock", TRACE_MODE_WRITE, d_tracer, tr,
9678 trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer,
9679 tr, &rb_simple_fops);
9681 trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr,
9682 &trace_time_stamp_mode_fops);
9684 tr->buffer_percent = 50;
9686 trace_create_file("buffer_percent", TRACE_MODE_WRITE, d_tracer,
9687 tr, &buffer_percent_fops);
9689 create_trace_options_dir(tr);
9691 #ifdef CONFIG_TRACER_MAX_TRACE
9692 trace_create_maxlat_file(tr, d_tracer);
9695 if (ftrace_create_function_files(tr, d_tracer))
9696 MEM_FAIL(1, "Could not allocate function filter files");
9698 #ifdef CONFIG_TRACER_SNAPSHOT
9699 trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer,
9700 tr, &snapshot_fops);
9703 trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer,
9704 tr, &tracing_err_log_fops);
9706 for_each_tracing_cpu(cpu)
9707 tracing_init_tracefs_percpu(tr, cpu);
9709 ftrace_init_tracefs(tr, d_tracer);
9712 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9714 struct vfsmount *mnt;
9715 struct file_system_type *type;
9718 * To maintain backward compatibility for tools that mount
9719 * debugfs to get to the tracing facility, tracefs is automatically
9720 * mounted to the debugfs/tracing directory.
9722 type = get_fs_type("tracefs");
9725 mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9726 put_filesystem(type);
9735 * tracing_init_dentry - initialize top level trace array
9737 * This is called when creating files or directories in the tracing
9738 * directory. It is called via fs_initcall() by any of the boot up code
9739 * and expects to return the dentry of the top level tracing directory.
9741 int tracing_init_dentry(void)
9743 struct trace_array *tr = &global_trace;
9745 if (security_locked_down(LOCKDOWN_TRACEFS)) {
9746 pr_warn("Tracing disabled due to lockdown\n");
9750 /* The top level trace array uses NULL as parent */
9754 if (WARN_ON(!tracefs_initialized()))
9758 * As there may still be users that expect the tracing
9759 * files to exist in debugfs/tracing, we must automount
9760 * the tracefs file system there, so older tools still
9761 * work with the newer kernel.
9763 tr->dir = debugfs_create_automount("tracing", NULL,
9764 trace_automount, NULL);
9769 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9770 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9772 static struct workqueue_struct *eval_map_wq __initdata;
9773 static struct work_struct eval_map_work __initdata;
9774 static struct work_struct tracerfs_init_work __initdata;
9776 static void __init eval_map_work_func(struct work_struct *work)
9780 len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9781 trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9784 static int __init trace_eval_init(void)
9786 INIT_WORK(&eval_map_work, eval_map_work_func);
9788 eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
9790 pr_err("Unable to allocate eval_map_wq\n");
9792 eval_map_work_func(&eval_map_work);
9796 queue_work(eval_map_wq, &eval_map_work);
9800 subsys_initcall(trace_eval_init);
9802 static int __init trace_eval_sync(void)
9804 /* Make sure the eval map updates are finished */
9806 destroy_workqueue(eval_map_wq);
9810 late_initcall_sync(trace_eval_sync);
9813 #ifdef CONFIG_MODULES
9814 static void trace_module_add_evals(struct module *mod)
9816 if (!mod->num_trace_evals)
9820 * Modules with bad taint do not have events created, do
9821 * not bother with enums either.
9823 if (trace_module_has_bad_taint(mod))
9826 trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9829 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
9830 static void trace_module_remove_evals(struct module *mod)
9832 union trace_eval_map_item *map;
9833 union trace_eval_map_item **last = &trace_eval_maps;
9835 if (!mod->num_trace_evals)
9838 mutex_lock(&trace_eval_mutex);
9840 map = trace_eval_maps;
9843 if (map->head.mod == mod)
9845 map = trace_eval_jmp_to_tail(map);
9846 last = &map->tail.next;
9847 map = map->tail.next;
9852 *last = trace_eval_jmp_to_tail(map)->tail.next;
9855 mutex_unlock(&trace_eval_mutex);
9858 static inline void trace_module_remove_evals(struct module *mod) { }
9859 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9861 static int trace_module_notify(struct notifier_block *self,
9862 unsigned long val, void *data)
9864 struct module *mod = data;
9867 case MODULE_STATE_COMING:
9868 trace_module_add_evals(mod);
9870 case MODULE_STATE_GOING:
9871 trace_module_remove_evals(mod);
9878 static struct notifier_block trace_module_nb = {
9879 .notifier_call = trace_module_notify,
9882 #endif /* CONFIG_MODULES */
9884 static __init void tracer_init_tracefs_work_func(struct work_struct *work)
9889 init_tracer_tracefs(&global_trace, NULL);
9890 ftrace_init_tracefs_toplevel(&global_trace, NULL);
9892 trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL,
9893 &global_trace, &tracing_thresh_fops);
9895 trace_create_file("README", TRACE_MODE_READ, NULL,
9896 NULL, &tracing_readme_fops);
9898 trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL,
9899 NULL, &tracing_saved_cmdlines_fops);
9901 trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL,
9902 NULL, &tracing_saved_cmdlines_size_fops);
9904 trace_create_file("saved_tgids", TRACE_MODE_READ, NULL,
9905 NULL, &tracing_saved_tgids_fops);
9907 trace_create_eval_file(NULL);
9909 #ifdef CONFIG_MODULES
9910 register_module_notifier(&trace_module_nb);
9913 #ifdef CONFIG_DYNAMIC_FTRACE
9914 trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL,
9915 NULL, &tracing_dyn_info_fops);
9918 create_trace_instances(NULL);
9920 update_tracer_options(&global_trace);
9923 static __init int tracer_init_tracefs(void)
9927 trace_access_lock_init();
9929 ret = tracing_init_dentry();
9934 INIT_WORK(&tracerfs_init_work, tracer_init_tracefs_work_func);
9935 queue_work(eval_map_wq, &tracerfs_init_work);
9937 tracer_init_tracefs_work_func(NULL);
9940 rv_init_interface();
9945 fs_initcall(tracer_init_tracefs);
9947 static int trace_die_panic_handler(struct notifier_block *self,
9948 unsigned long ev, void *unused);
9950 static struct notifier_block trace_panic_notifier = {
9951 .notifier_call = trace_die_panic_handler,
9952 .priority = INT_MAX - 1,
9955 static struct notifier_block trace_die_notifier = {
9956 .notifier_call = trace_die_panic_handler,
9957 .priority = INT_MAX - 1,
9961 * The idea is to execute the following die/panic callback early, in order
9962 * to avoid showing irrelevant information in the trace (like other panic
9963 * notifier functions); we are the 2nd to run, after hung_task/rcu_stall
9964 * warnings get disabled (to prevent potential log flooding).
9966 static int trace_die_panic_handler(struct notifier_block *self,
9967 unsigned long ev, void *unused)
9969 if (!ftrace_dump_on_oops)
9972 /* The die notifier requires DIE_OOPS to trigger */
9973 if (self == &trace_die_notifier && ev != DIE_OOPS)
9976 ftrace_dump(ftrace_dump_on_oops);
9982 * printk is set to max of 1024, we really don't need it that big.
9983 * Nothing should be printing 1000 characters anyway.
9985 #define TRACE_MAX_PRINT 1000
9988 * Define here KERN_TRACE so that we have one place to modify
9989 * it if we decide to change what log level the ftrace dump
9992 #define KERN_TRACE KERN_EMERG
9995 trace_printk_seq(struct trace_seq *s)
9997 /* Probably should print a warning here. */
9998 if (s->seq.len >= TRACE_MAX_PRINT)
9999 s->seq.len = TRACE_MAX_PRINT;
10002 * More paranoid code. Although the buffer size is set to
10003 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
10004 * an extra layer of protection.
10006 if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
10007 s->seq.len = s->seq.size - 1;
10009 /* should be zero ended, but we are paranoid. */
10010 s->buffer[s->seq.len] = 0;
10012 printk(KERN_TRACE "%s", s->buffer);
10017 void trace_init_global_iter(struct trace_iterator *iter)
10019 iter->tr = &global_trace;
10020 iter->trace = iter->tr->current_trace;
10021 iter->cpu_file = RING_BUFFER_ALL_CPUS;
10022 iter->array_buffer = &global_trace.array_buffer;
10024 if (iter->trace && iter->trace->open)
10025 iter->trace->open(iter);
10027 /* Annotate start of buffers if we had overruns */
10028 if (ring_buffer_overruns(iter->array_buffer->buffer))
10029 iter->iter_flags |= TRACE_FILE_ANNOTATE;
10031 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
10032 if (trace_clocks[iter->tr->clock_id].in_ns)
10033 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
10035 /* Can not use kmalloc for iter.temp and iter.fmt */
10036 iter->temp = static_temp_buf;
10037 iter->temp_size = STATIC_TEMP_BUF_SIZE;
10038 iter->fmt = static_fmt_buf;
10039 iter->fmt_size = STATIC_FMT_BUF_SIZE;
10042 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
10044 /* use static because iter can be a bit big for the stack */
10045 static struct trace_iterator iter;
10046 static atomic_t dump_running;
10047 struct trace_array *tr = &global_trace;
10048 unsigned int old_userobj;
10049 unsigned long flags;
10052 /* Only allow one dump user at a time. */
10053 if (atomic_inc_return(&dump_running) != 1) {
10054 atomic_dec(&dump_running);
10059 * Always turn off tracing when we dump.
10060 * We don't need to show trace output of what happens
10061 * between multiple crashes.
10063 * If the user does a sysrq-z, then they can re-enable
10064 * tracing with echo 1 > tracing_on.
10068 local_irq_save(flags);
10070 /* Simulate the iterator */
10071 trace_init_global_iter(&iter);
10073 for_each_tracing_cpu(cpu) {
10074 atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10077 old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
10079 /* don't look at user memory in panic mode */
10080 tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
10082 switch (oops_dump_mode) {
10084 iter.cpu_file = RING_BUFFER_ALL_CPUS;
10087 iter.cpu_file = raw_smp_processor_id();
10092 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
10093 iter.cpu_file = RING_BUFFER_ALL_CPUS;
10096 printk(KERN_TRACE "Dumping ftrace buffer:\n");
10098 /* Did function tracer already get disabled? */
10099 if (ftrace_is_dead()) {
10100 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
10101 printk("# MAY BE MISSING FUNCTION EVENTS\n");
10105 * We need to stop all tracing on all CPUS to read
10106 * the next buffer. This is a bit expensive, but is
10107 * not done often. We fill all what we can read,
10108 * and then release the locks again.
10111 while (!trace_empty(&iter)) {
10114 printk(KERN_TRACE "---------------------------------\n");
10118 trace_iterator_reset(&iter);
10119 iter.iter_flags |= TRACE_FILE_LAT_FMT;
10121 if (trace_find_next_entry_inc(&iter) != NULL) {
10124 ret = print_trace_line(&iter);
10125 if (ret != TRACE_TYPE_NO_CONSUME)
10126 trace_consume(&iter);
10128 touch_nmi_watchdog();
10130 trace_printk_seq(&iter.seq);
10134 printk(KERN_TRACE " (ftrace buffer empty)\n");
10136 printk(KERN_TRACE "---------------------------------\n");
10139 tr->trace_flags |= old_userobj;
10141 for_each_tracing_cpu(cpu) {
10142 atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10144 atomic_dec(&dump_running);
10145 local_irq_restore(flags);
10147 EXPORT_SYMBOL_GPL(ftrace_dump);
10149 #define WRITE_BUFSIZE 4096
10151 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
10152 size_t count, loff_t *ppos,
10153 int (*createfn)(const char *))
10155 char *kbuf, *buf, *tmp;
10160 kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
10164 while (done < count) {
10165 size = count - done;
10167 if (size >= WRITE_BUFSIZE)
10168 size = WRITE_BUFSIZE - 1;
10170 if (copy_from_user(kbuf, buffer + done, size)) {
10177 tmp = strchr(buf, '\n');
10180 size = tmp - buf + 1;
10182 size = strlen(buf);
10183 if (done + size < count) {
10186 /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
10187 pr_warn("Line length is too long: Should be less than %d\n",
10188 WRITE_BUFSIZE - 2);
10195 /* Remove comments */
10196 tmp = strchr(buf, '#');
10201 ret = createfn(buf);
10206 } while (done < count);
10216 #ifdef CONFIG_TRACER_MAX_TRACE
10217 __init static bool tr_needs_alloc_snapshot(const char *name)
10220 int len = strlen(name);
10223 if (!boot_snapshot_index)
10226 if (strncmp(name, boot_snapshot_info, len) == 0 &&
10227 boot_snapshot_info[len] == '\t')
10230 test = kmalloc(strlen(name) + 3, GFP_KERNEL);
10234 sprintf(test, "\t%s\t", name);
10235 ret = strstr(boot_snapshot_info, test) == NULL;
10240 __init static void do_allocate_snapshot(const char *name)
10242 if (!tr_needs_alloc_snapshot(name))
10246 * When allocate_snapshot is set, the next call to
10247 * allocate_trace_buffers() (called by trace_array_get_by_name())
10248 * will allocate the snapshot buffer. That will alse clear
10251 allocate_snapshot = true;
10254 static inline void do_allocate_snapshot(const char *name) { }
10257 __init static void enable_instances(void)
10259 struct trace_array *tr;
10264 /* A tab is always appended */
10265 boot_instance_info[boot_instance_index - 1] = '\0';
10266 str = boot_instance_info;
10268 while ((curr_str = strsep(&str, "\t"))) {
10270 tok = strsep(&curr_str, ",");
10272 if (IS_ENABLED(CONFIG_TRACER_MAX_TRACE))
10273 do_allocate_snapshot(tok);
10275 tr = trace_array_get_by_name(tok);
10277 pr_warn("Failed to create instance buffer %s\n", curr_str);
10280 /* Allow user space to delete it */
10281 trace_array_put(tr);
10283 while ((tok = strsep(&curr_str, ","))) {
10284 early_enable_events(tr, tok, true);
10289 __init static int tracer_alloc_buffers(void)
10295 if (security_locked_down(LOCKDOWN_TRACEFS)) {
10296 pr_warn("Tracing disabled due to lockdown\n");
10301 * Make sure we don't accidentally add more trace options
10302 * than we have bits for.
10304 BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
10306 if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
10309 if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
10310 goto out_free_buffer_mask;
10312 /* Only allocate trace_printk buffers if a trace_printk exists */
10313 if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
10314 /* Must be called before global_trace.buffer is allocated */
10315 trace_printk_init_buffers();
10317 /* To save memory, keep the ring buffer size to its minimum */
10318 if (ring_buffer_expanded)
10319 ring_buf_size = trace_buf_size;
10323 cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
10324 cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
10326 raw_spin_lock_init(&global_trace.start_lock);
10329 * The prepare callbacks allocates some memory for the ring buffer. We
10330 * don't free the buffer if the CPU goes down. If we were to free
10331 * the buffer, then the user would lose any trace that was in the
10332 * buffer. The memory will be removed once the "instance" is removed.
10334 ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
10335 "trace/RB:prepare", trace_rb_cpu_prepare,
10338 goto out_free_cpumask;
10339 /* Used for event triggers */
10341 temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
10343 goto out_rm_hp_state;
10345 if (trace_create_savedcmd() < 0)
10346 goto out_free_temp_buffer;
10348 /* TODO: make the number of buffers hot pluggable with CPUS */
10349 if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
10350 MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
10351 goto out_free_savedcmd;
10354 if (global_trace.buffer_disabled)
10357 if (trace_boot_clock) {
10358 ret = tracing_set_clock(&global_trace, trace_boot_clock);
10360 pr_warn("Trace clock %s not defined, going back to default\n",
10365 * register_tracer() might reference current_trace, so it
10366 * needs to be set before we register anything. This is
10367 * just a bootstrap of current_trace anyway.
10369 global_trace.current_trace = &nop_trace;
10371 global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10373 ftrace_init_global_array_ops(&global_trace);
10375 init_trace_flags_index(&global_trace);
10377 register_tracer(&nop_trace);
10379 /* Function tracing may start here (via kernel command line) */
10380 init_function_trace();
10382 /* All seems OK, enable tracing */
10383 tracing_disabled = 0;
10385 atomic_notifier_chain_register(&panic_notifier_list,
10386 &trace_panic_notifier);
10388 register_die_notifier(&trace_die_notifier);
10390 global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
10392 INIT_LIST_HEAD(&global_trace.systems);
10393 INIT_LIST_HEAD(&global_trace.events);
10394 INIT_LIST_HEAD(&global_trace.hist_vars);
10395 INIT_LIST_HEAD(&global_trace.err_log);
10396 list_add(&global_trace.list, &ftrace_trace_arrays);
10398 apply_trace_boot_options();
10400 register_snapshot_cmd();
10407 free_saved_cmdlines_buffer(savedcmd);
10408 out_free_temp_buffer:
10409 ring_buffer_free(temp_buffer);
10411 cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
10413 free_cpumask_var(global_trace.tracing_cpumask);
10414 out_free_buffer_mask:
10415 free_cpumask_var(tracing_buffer_mask);
10420 void __init ftrace_boot_snapshot(void)
10422 #ifdef CONFIG_TRACER_MAX_TRACE
10423 struct trace_array *tr;
10425 if (!snapshot_at_boot)
10428 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10429 if (!tr->allocated_snapshot)
10432 tracing_snapshot_instance(tr);
10433 trace_array_puts(tr, "** Boot snapshot taken **\n");
10438 void __init early_trace_init(void)
10440 if (tracepoint_printk) {
10441 tracepoint_print_iter =
10442 kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
10443 if (MEM_FAIL(!tracepoint_print_iter,
10444 "Failed to allocate trace iterator\n"))
10445 tracepoint_printk = 0;
10447 static_key_enable(&tracepoint_printk_key.key);
10449 tracer_alloc_buffers();
10454 void __init trace_init(void)
10456 trace_event_init();
10458 if (boot_instance_index)
10459 enable_instances();
10462 __init static void clear_boot_tracer(void)
10465 * The default tracer at boot buffer is an init section.
10466 * This function is called in lateinit. If we did not
10467 * find the boot tracer, then clear it out, to prevent
10468 * later registration from accessing the buffer that is
10469 * about to be freed.
10471 if (!default_bootup_tracer)
10474 printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
10475 default_bootup_tracer);
10476 default_bootup_tracer = NULL;
10479 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
10480 __init static void tracing_set_default_clock(void)
10482 /* sched_clock_stable() is determined in late_initcall */
10483 if (!trace_boot_clock && !sched_clock_stable()) {
10484 if (security_locked_down(LOCKDOWN_TRACEFS)) {
10485 pr_warn("Can not set tracing clock due to lockdown\n");
10489 printk(KERN_WARNING
10490 "Unstable clock detected, switching default tracing clock to \"global\"\n"
10491 "If you want to keep using the local clock, then add:\n"
10492 " \"trace_clock=local\"\n"
10493 "on the kernel command line\n");
10494 tracing_set_clock(&global_trace, "global");
10498 static inline void tracing_set_default_clock(void) { }
10501 __init static int late_trace_init(void)
10503 if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
10504 static_key_disable(&tracepoint_printk_key.key);
10505 tracepoint_printk = 0;
10508 tracing_set_default_clock();
10509 clear_boot_tracer();
10513 late_initcall_sync(late_trace_init);