OSDN Git Service

tracing: Use temp buffer when filtering events
[android-x86/kernel.git] / kernel / trace / trace_events.c
1 /*
2  * event tracer
3  *
4  * Copyright (C) 2008 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
5  *
6  *  - Added format output of fields of the trace point.
7  *    This was based off of work by Tom Zanussi <tzanussi@gmail.com>.
8  *
9  */
10
11 #define pr_fmt(fmt) fmt
12
13 #include <linux/workqueue.h>
14 #include <linux/spinlock.h>
15 #include <linux/kthread.h>
16 #include <linux/tracefs.h>
17 #include <linux/uaccess.h>
18 #include <linux/vmalloc.h>
19 #include <linux/module.h>
20 #include <linux/ctype.h>
21 #include <linux/sort.h>
22 #include <linux/slab.h>
23 #include <linux/delay.h>
24
25 #include <trace/events/sched.h>
26
27 #include <asm/setup.h>
28
29 #include "trace_output.h"
30
31 #undef TRACE_SYSTEM
32 #define TRACE_SYSTEM "TRACE_SYSTEM"
33
34 DEFINE_MUTEX(event_mutex);
35
36 LIST_HEAD(ftrace_events);
37 static LIST_HEAD(ftrace_generic_fields);
38 static LIST_HEAD(ftrace_common_fields);
39
40 #define GFP_TRACE (GFP_KERNEL | __GFP_ZERO)
41
42 static struct kmem_cache *field_cachep;
43 static struct kmem_cache *file_cachep;
44
45 static inline int system_refcount(struct event_subsystem *system)
46 {
47         return system->ref_count;
48 }
49
50 static int system_refcount_inc(struct event_subsystem *system)
51 {
52         return system->ref_count++;
53 }
54
55 static int system_refcount_dec(struct event_subsystem *system)
56 {
57         return --system->ref_count;
58 }
59
60 /* Double loops, do not use break, only goto's work */
61 #define do_for_each_event_file(tr, file)                        \
62         list_for_each_entry(tr, &ftrace_trace_arrays, list) {   \
63                 list_for_each_entry(file, &tr->events, list)
64
65 #define do_for_each_event_file_safe(tr, file)                   \
66         list_for_each_entry(tr, &ftrace_trace_arrays, list) {   \
67                 struct trace_event_file *___n;                          \
68                 list_for_each_entry_safe(file, ___n, &tr->events, list)
69
70 #define while_for_each_event_file()             \
71         }
72
73 static struct list_head *
74 trace_get_fields(struct trace_event_call *event_call)
75 {
76         if (!event_call->class->get_fields)
77                 return &event_call->class->fields;
78         return event_call->class->get_fields(event_call);
79 }
80
81 static struct ftrace_event_field *
82 __find_event_field(struct list_head *head, char *name)
83 {
84         struct ftrace_event_field *field;
85
86         list_for_each_entry(field, head, link) {
87                 if (!strcmp(field->name, name))
88                         return field;
89         }
90
91         return NULL;
92 }
93
94 struct ftrace_event_field *
95 trace_find_event_field(struct trace_event_call *call, char *name)
96 {
97         struct ftrace_event_field *field;
98         struct list_head *head;
99
100         head = trace_get_fields(call);
101         field = __find_event_field(head, name);
102         if (field)
103                 return field;
104
105         field = __find_event_field(&ftrace_generic_fields, name);
106         if (field)
107                 return field;
108
109         return __find_event_field(&ftrace_common_fields, name);
110 }
111
112 static int __trace_define_field(struct list_head *head, const char *type,
113                                 const char *name, int offset, int size,
114                                 int is_signed, int filter_type)
115 {
116         struct ftrace_event_field *field;
117
118         field = kmem_cache_alloc(field_cachep, GFP_TRACE);
119         if (!field)
120                 return -ENOMEM;
121
122         field->name = name;
123         field->type = type;
124
125         if (filter_type == FILTER_OTHER)
126                 field->filter_type = filter_assign_type(type);
127         else
128                 field->filter_type = filter_type;
129
130         field->offset = offset;
131         field->size = size;
132         field->is_signed = is_signed;
133
134         list_add(&field->link, head);
135
136         return 0;
137 }
138
139 int trace_define_field(struct trace_event_call *call, const char *type,
140                        const char *name, int offset, int size, int is_signed,
141                        int filter_type)
142 {
143         struct list_head *head;
144
145         if (WARN_ON(!call->class))
146                 return 0;
147
148         head = trace_get_fields(call);
149         return __trace_define_field(head, type, name, offset, size,
150                                     is_signed, filter_type);
151 }
152 EXPORT_SYMBOL_GPL(trace_define_field);
153
154 #define __generic_field(type, item, filter_type)                        \
155         ret = __trace_define_field(&ftrace_generic_fields, #type,       \
156                                    #item, 0, 0, is_signed_type(type),   \
157                                    filter_type);                        \
158         if (ret)                                                        \
159                 return ret;
160
161 #define __common_field(type, item)                                      \
162         ret = __trace_define_field(&ftrace_common_fields, #type,        \
163                                    "common_" #item,                     \
164                                    offsetof(typeof(ent), item),         \
165                                    sizeof(ent.item),                    \
166                                    is_signed_type(type), FILTER_OTHER); \
167         if (ret)                                                        \
168                 return ret;
169
170 static int trace_define_generic_fields(void)
171 {
172         int ret;
173
174         __generic_field(int, CPU, FILTER_CPU);
175         __generic_field(int, cpu, FILTER_CPU);
176         __generic_field(char *, COMM, FILTER_COMM);
177         __generic_field(char *, comm, FILTER_COMM);
178
179         return ret;
180 }
181
182 static int trace_define_common_fields(void)
183 {
184         int ret;
185         struct trace_entry ent;
186
187         __common_field(unsigned short, type);
188         __common_field(unsigned char, flags);
189         __common_field(unsigned char, preempt_count);
190         __common_field(int, pid);
191
192         return ret;
193 }
194
195 static void trace_destroy_fields(struct trace_event_call *call)
196 {
197         struct ftrace_event_field *field, *next;
198         struct list_head *head;
199
200         head = trace_get_fields(call);
201         list_for_each_entry_safe(field, next, head, link) {
202                 list_del(&field->link);
203                 kmem_cache_free(field_cachep, field);
204         }
205 }
206
207 int trace_event_raw_init(struct trace_event_call *call)
208 {
209         int id;
210
211         id = register_trace_event(&call->event);
212         if (!id)
213                 return -ENODEV;
214
215         return 0;
216 }
217 EXPORT_SYMBOL_GPL(trace_event_raw_init);
218
219 bool trace_event_ignore_this_pid(struct trace_event_file *trace_file)
220 {
221         struct trace_array *tr = trace_file->tr;
222         struct trace_array_cpu *data;
223         struct trace_pid_list *pid_list;
224
225         pid_list = rcu_dereference_sched(tr->filtered_pids);
226         if (!pid_list)
227                 return false;
228
229         data = this_cpu_ptr(tr->trace_buffer.data);
230
231         return data->ignore_pid;
232 }
233 EXPORT_SYMBOL_GPL(trace_event_ignore_this_pid);
234
235 void *trace_event_buffer_reserve(struct trace_event_buffer *fbuffer,
236                                  struct trace_event_file *trace_file,
237                                  unsigned long len)
238 {
239         struct trace_event_call *event_call = trace_file->event_call;
240
241         if ((trace_file->flags & EVENT_FILE_FL_PID_FILTER) &&
242             trace_event_ignore_this_pid(trace_file))
243                 return NULL;
244
245         local_save_flags(fbuffer->flags);
246         fbuffer->pc = preempt_count();
247         fbuffer->trace_file = trace_file;
248
249         fbuffer->event =
250                 trace_event_buffer_lock_reserve(&fbuffer->buffer, trace_file,
251                                                 event_call->event.type, len,
252                                                 fbuffer->flags, fbuffer->pc);
253         if (!fbuffer->event)
254                 return NULL;
255
256         fbuffer->entry = ring_buffer_event_data(fbuffer->event);
257         return fbuffer->entry;
258 }
259 EXPORT_SYMBOL_GPL(trace_event_buffer_reserve);
260
261 static DEFINE_SPINLOCK(tracepoint_iter_lock);
262
263 static void output_printk(struct trace_event_buffer *fbuffer)
264 {
265         struct trace_event_call *event_call;
266         struct trace_event *event;
267         unsigned long flags;
268         struct trace_iterator *iter = tracepoint_print_iter;
269
270         if (!iter)
271                 return;
272
273         event_call = fbuffer->trace_file->event_call;
274         if (!event_call || !event_call->event.funcs ||
275             !event_call->event.funcs->trace)
276                 return;
277
278         event = &fbuffer->trace_file->event_call->event;
279
280         spin_lock_irqsave(&tracepoint_iter_lock, flags);
281         trace_seq_init(&iter->seq);
282         iter->ent = fbuffer->entry;
283         event_call->event.funcs->trace(iter, 0, event);
284         trace_seq_putc(&iter->seq, 0);
285         printk("%s", iter->seq.buffer);
286
287         spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
288 }
289
290 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
291 {
292         if (tracepoint_printk)
293                 output_printk(fbuffer);
294
295         event_trigger_unlock_commit(fbuffer->trace_file, fbuffer->buffer,
296                                     fbuffer->event, fbuffer->entry,
297                                     fbuffer->flags, fbuffer->pc);
298 }
299 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
300
301 int trace_event_reg(struct trace_event_call *call,
302                     enum trace_reg type, void *data)
303 {
304         struct trace_event_file *file = data;
305
306         WARN_ON(!(call->flags & TRACE_EVENT_FL_TRACEPOINT));
307         switch (type) {
308         case TRACE_REG_REGISTER:
309                 return tracepoint_probe_register(call->tp,
310                                                  call->class->probe,
311                                                  file);
312         case TRACE_REG_UNREGISTER:
313                 tracepoint_probe_unregister(call->tp,
314                                             call->class->probe,
315                                             file);
316                 return 0;
317
318 #ifdef CONFIG_PERF_EVENTS
319         case TRACE_REG_PERF_REGISTER:
320                 return tracepoint_probe_register(call->tp,
321                                                  call->class->perf_probe,
322                                                  call);
323         case TRACE_REG_PERF_UNREGISTER:
324                 tracepoint_probe_unregister(call->tp,
325                                             call->class->perf_probe,
326                                             call);
327                 return 0;
328         case TRACE_REG_PERF_OPEN:
329         case TRACE_REG_PERF_CLOSE:
330         case TRACE_REG_PERF_ADD:
331         case TRACE_REG_PERF_DEL:
332                 return 0;
333 #endif
334         }
335         return 0;
336 }
337 EXPORT_SYMBOL_GPL(trace_event_reg);
338
339 void trace_event_enable_cmd_record(bool enable)
340 {
341         struct trace_event_file *file;
342         struct trace_array *tr;
343
344         mutex_lock(&event_mutex);
345         do_for_each_event_file(tr, file) {
346
347                 if (!(file->flags & EVENT_FILE_FL_ENABLED))
348                         continue;
349
350                 if (enable) {
351                         tracing_start_cmdline_record();
352                         set_bit(EVENT_FILE_FL_RECORDED_CMD_BIT, &file->flags);
353                 } else {
354                         tracing_stop_cmdline_record();
355                         clear_bit(EVENT_FILE_FL_RECORDED_CMD_BIT, &file->flags);
356                 }
357         } while_for_each_event_file();
358         mutex_unlock(&event_mutex);
359 }
360
361 static int __ftrace_event_enable_disable(struct trace_event_file *file,
362                                          int enable, int soft_disable)
363 {
364         struct trace_event_call *call = file->event_call;
365         struct trace_array *tr = file->tr;
366         unsigned long file_flags = file->flags;
367         int ret = 0;
368         int disable;
369
370         switch (enable) {
371         case 0:
372                 /*
373                  * When soft_disable is set and enable is cleared, the sm_ref
374                  * reference counter is decremented. If it reaches 0, we want
375                  * to clear the SOFT_DISABLED flag but leave the event in the
376                  * state that it was. That is, if the event was enabled and
377                  * SOFT_DISABLED isn't set, then do nothing. But if SOFT_DISABLED
378                  * is set we do not want the event to be enabled before we
379                  * clear the bit.
380                  *
381                  * When soft_disable is not set but the SOFT_MODE flag is,
382                  * we do nothing. Do not disable the tracepoint, otherwise
383                  * "soft enable"s (clearing the SOFT_DISABLED bit) wont work.
384                  */
385                 if (soft_disable) {
386                         if (atomic_dec_return(&file->sm_ref) > 0)
387                                 break;
388                         disable = file->flags & EVENT_FILE_FL_SOFT_DISABLED;
389                         clear_bit(EVENT_FILE_FL_SOFT_MODE_BIT, &file->flags);
390                 } else
391                         disable = !(file->flags & EVENT_FILE_FL_SOFT_MODE);
392
393                 if (disable && (file->flags & EVENT_FILE_FL_ENABLED)) {
394                         clear_bit(EVENT_FILE_FL_ENABLED_BIT, &file->flags);
395                         if (file->flags & EVENT_FILE_FL_RECORDED_CMD) {
396                                 tracing_stop_cmdline_record();
397                                 clear_bit(EVENT_FILE_FL_RECORDED_CMD_BIT, &file->flags);
398                         }
399                         call->class->reg(call, TRACE_REG_UNREGISTER, file);
400                 }
401                 /* If in SOFT_MODE, just set the SOFT_DISABLE_BIT, else clear it */
402                 if (file->flags & EVENT_FILE_FL_SOFT_MODE)
403                         set_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags);
404                 else
405                         clear_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags);
406                 break;
407         case 1:
408                 /*
409                  * When soft_disable is set and enable is set, we want to
410                  * register the tracepoint for the event, but leave the event
411                  * as is. That means, if the event was already enabled, we do
412                  * nothing (but set SOFT_MODE). If the event is disabled, we
413                  * set SOFT_DISABLED before enabling the event tracepoint, so
414                  * it still seems to be disabled.
415                  */
416                 if (!soft_disable)
417                         clear_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags);
418                 else {
419                         if (atomic_inc_return(&file->sm_ref) > 1)
420                                 break;
421                         set_bit(EVENT_FILE_FL_SOFT_MODE_BIT, &file->flags);
422                 }
423
424                 if (!(file->flags & EVENT_FILE_FL_ENABLED)) {
425
426                         /* Keep the event disabled, when going to SOFT_MODE. */
427                         if (soft_disable)
428                                 set_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags);
429
430                         if (tr->trace_flags & TRACE_ITER_RECORD_CMD) {
431                                 tracing_start_cmdline_record();
432                                 set_bit(EVENT_FILE_FL_RECORDED_CMD_BIT, &file->flags);
433                         }
434                         ret = call->class->reg(call, TRACE_REG_REGISTER, file);
435                         if (ret) {
436                                 tracing_stop_cmdline_record();
437                                 pr_info("event trace: Could not enable event "
438                                         "%s\n", trace_event_name(call));
439                                 break;
440                         }
441                         set_bit(EVENT_FILE_FL_ENABLED_BIT, &file->flags);
442
443                         /* WAS_ENABLED gets set but never cleared. */
444                         call->flags |= TRACE_EVENT_FL_WAS_ENABLED;
445                 }
446                 break;
447         }
448
449         /* Enable or disable use of trace_buffered_event */
450         if ((file_flags & EVENT_FILE_FL_SOFT_DISABLED) !=
451             (file->flags & EVENT_FILE_FL_SOFT_DISABLED)) {
452                 if (file->flags & EVENT_FILE_FL_SOFT_DISABLED)
453                         trace_buffered_event_enable();
454                 else
455                         trace_buffered_event_disable();
456         }
457
458         return ret;
459 }
460
461 int trace_event_enable_disable(struct trace_event_file *file,
462                                int enable, int soft_disable)
463 {
464         return __ftrace_event_enable_disable(file, enable, soft_disable);
465 }
466
467 static int ftrace_event_enable_disable(struct trace_event_file *file,
468                                        int enable)
469 {
470         return __ftrace_event_enable_disable(file, enable, 0);
471 }
472
473 static void ftrace_clear_events(struct trace_array *tr)
474 {
475         struct trace_event_file *file;
476
477         mutex_lock(&event_mutex);
478         list_for_each_entry(file, &tr->events, list) {
479                 ftrace_event_enable_disable(file, 0);
480         }
481         mutex_unlock(&event_mutex);
482 }
483
484 /* Shouldn't this be in a header? */
485 extern int pid_max;
486
487 /* Returns true if found in filter */
488 static bool
489 find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
490 {
491         /*
492          * If pid_max changed after filtered_pids was created, we
493          * by default ignore all pids greater than the previous pid_max.
494          */
495         if (search_pid >= filtered_pids->pid_max)
496                 return false;
497
498         return test_bit(search_pid, filtered_pids->pids);
499 }
500
501 static bool
502 ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
503 {
504         /*
505          * Return false, because if filtered_pids does not exist,
506          * all pids are good to trace.
507          */
508         if (!filtered_pids)
509                 return false;
510
511         return !find_filtered_pid(filtered_pids, task->pid);
512 }
513
514 static void filter_add_remove_task(struct trace_pid_list *pid_list,
515                                    struct task_struct *self,
516                                    struct task_struct *task)
517 {
518         if (!pid_list)
519                 return;
520
521         /* For forks, we only add if the forking task is listed */
522         if (self) {
523                 if (!find_filtered_pid(pid_list, self->pid))
524                         return;
525         }
526
527         /* Sorry, but we don't support pid_max changing after setting */
528         if (task->pid >= pid_list->pid_max)
529                 return;
530
531         /* "self" is set for forks, and NULL for exits */
532         if (self)
533                 set_bit(task->pid, pid_list->pids);
534         else
535                 clear_bit(task->pid, pid_list->pids);
536 }
537
538 static void
539 event_filter_pid_sched_process_exit(void *data, struct task_struct *task)
540 {
541         struct trace_pid_list *pid_list;
542         struct trace_array *tr = data;
543
544         pid_list = rcu_dereference_sched(tr->filtered_pids);
545         filter_add_remove_task(pid_list, NULL, task);
546 }
547
548 static void
549 event_filter_pid_sched_process_fork(void *data,
550                                     struct task_struct *self,
551                                     struct task_struct *task)
552 {
553         struct trace_pid_list *pid_list;
554         struct trace_array *tr = data;
555
556         pid_list = rcu_dereference_sched(tr->filtered_pids);
557         filter_add_remove_task(pid_list, self, task);
558 }
559
560 void trace_event_follow_fork(struct trace_array *tr, bool enable)
561 {
562         if (enable) {
563                 register_trace_prio_sched_process_fork(event_filter_pid_sched_process_fork,
564                                                        tr, INT_MIN);
565                 register_trace_prio_sched_process_exit(event_filter_pid_sched_process_exit,
566                                                        tr, INT_MAX);
567         } else {
568                 unregister_trace_sched_process_fork(event_filter_pid_sched_process_fork,
569                                                     tr);
570                 unregister_trace_sched_process_exit(event_filter_pid_sched_process_exit,
571                                                     tr);
572         }
573 }
574
575 static void
576 event_filter_pid_sched_switch_probe_pre(void *data, bool preempt,
577                     struct task_struct *prev, struct task_struct *next)
578 {
579         struct trace_array *tr = data;
580         struct trace_pid_list *pid_list;
581
582         pid_list = rcu_dereference_sched(tr->filtered_pids);
583
584         this_cpu_write(tr->trace_buffer.data->ignore_pid,
585                        ignore_this_task(pid_list, prev) &&
586                        ignore_this_task(pid_list, next));
587 }
588
589 static void
590 event_filter_pid_sched_switch_probe_post(void *data, bool preempt,
591                     struct task_struct *prev, struct task_struct *next)
592 {
593         struct trace_array *tr = data;
594         struct trace_pid_list *pid_list;
595
596         pid_list = rcu_dereference_sched(tr->filtered_pids);
597
598         this_cpu_write(tr->trace_buffer.data->ignore_pid,
599                        ignore_this_task(pid_list, next));
600 }
601
602 static void
603 event_filter_pid_sched_wakeup_probe_pre(void *data, struct task_struct *task)
604 {
605         struct trace_array *tr = data;
606         struct trace_pid_list *pid_list;
607
608         /* Nothing to do if we are already tracing */
609         if (!this_cpu_read(tr->trace_buffer.data->ignore_pid))
610                 return;
611
612         pid_list = rcu_dereference_sched(tr->filtered_pids);
613
614         this_cpu_write(tr->trace_buffer.data->ignore_pid,
615                        ignore_this_task(pid_list, task));
616 }
617
618 static void
619 event_filter_pid_sched_wakeup_probe_post(void *data, struct task_struct *task)
620 {
621         struct trace_array *tr = data;
622         struct trace_pid_list *pid_list;
623
624         /* Nothing to do if we are not tracing */
625         if (this_cpu_read(tr->trace_buffer.data->ignore_pid))
626                 return;
627
628         pid_list = rcu_dereference_sched(tr->filtered_pids);
629
630         /* Set tracing if current is enabled */
631         this_cpu_write(tr->trace_buffer.data->ignore_pid,
632                        ignore_this_task(pid_list, current));
633 }
634
635 static void __ftrace_clear_event_pids(struct trace_array *tr)
636 {
637         struct trace_pid_list *pid_list;
638         struct trace_event_file *file;
639         int cpu;
640
641         pid_list = rcu_dereference_protected(tr->filtered_pids,
642                                              lockdep_is_held(&event_mutex));
643         if (!pid_list)
644                 return;
645
646         unregister_trace_sched_switch(event_filter_pid_sched_switch_probe_pre, tr);
647         unregister_trace_sched_switch(event_filter_pid_sched_switch_probe_post, tr);
648
649         unregister_trace_sched_wakeup(event_filter_pid_sched_wakeup_probe_pre, tr);
650         unregister_trace_sched_wakeup(event_filter_pid_sched_wakeup_probe_post, tr);
651
652         unregister_trace_sched_wakeup_new(event_filter_pid_sched_wakeup_probe_pre, tr);
653         unregister_trace_sched_wakeup_new(event_filter_pid_sched_wakeup_probe_post, tr);
654
655         unregister_trace_sched_waking(event_filter_pid_sched_wakeup_probe_pre, tr);
656         unregister_trace_sched_waking(event_filter_pid_sched_wakeup_probe_post, tr);
657
658         list_for_each_entry(file, &tr->events, list) {
659                 clear_bit(EVENT_FILE_FL_PID_FILTER_BIT, &file->flags);
660         }
661
662         for_each_possible_cpu(cpu)
663                 per_cpu_ptr(tr->trace_buffer.data, cpu)->ignore_pid = false;
664
665         rcu_assign_pointer(tr->filtered_pids, NULL);
666
667         /* Wait till all users are no longer using pid filtering */
668         synchronize_sched();
669
670         vfree(pid_list->pids);
671         kfree(pid_list);
672 }
673
674 static void ftrace_clear_event_pids(struct trace_array *tr)
675 {
676         mutex_lock(&event_mutex);
677         __ftrace_clear_event_pids(tr);
678         mutex_unlock(&event_mutex);
679 }
680
681 static void __put_system(struct event_subsystem *system)
682 {
683         struct event_filter *filter = system->filter;
684
685         WARN_ON_ONCE(system_refcount(system) == 0);
686         if (system_refcount_dec(system))
687                 return;
688
689         list_del(&system->list);
690
691         if (filter) {
692                 kfree(filter->filter_string);
693                 kfree(filter);
694         }
695         kfree_const(system->name);
696         kfree(system);
697 }
698
699 static void __get_system(struct event_subsystem *system)
700 {
701         WARN_ON_ONCE(system_refcount(system) == 0);
702         system_refcount_inc(system);
703 }
704
705 static void __get_system_dir(struct trace_subsystem_dir *dir)
706 {
707         WARN_ON_ONCE(dir->ref_count == 0);
708         dir->ref_count++;
709         __get_system(dir->subsystem);
710 }
711
712 static void __put_system_dir(struct trace_subsystem_dir *dir)
713 {
714         WARN_ON_ONCE(dir->ref_count == 0);
715         /* If the subsystem is about to be freed, the dir must be too */
716         WARN_ON_ONCE(system_refcount(dir->subsystem) == 1 && dir->ref_count != 1);
717
718         __put_system(dir->subsystem);
719         if (!--dir->ref_count)
720                 kfree(dir);
721 }
722
723 static void put_system(struct trace_subsystem_dir *dir)
724 {
725         mutex_lock(&event_mutex);
726         __put_system_dir(dir);
727         mutex_unlock(&event_mutex);
728 }
729
730 static void remove_subsystem(struct trace_subsystem_dir *dir)
731 {
732         if (!dir)
733                 return;
734
735         if (!--dir->nr_events) {
736                 tracefs_remove_recursive(dir->entry);
737                 list_del(&dir->list);
738                 __put_system_dir(dir);
739         }
740 }
741
742 static void remove_event_file_dir(struct trace_event_file *file)
743 {
744         struct dentry *dir = file->dir;
745         struct dentry *child;
746
747         if (dir) {
748                 spin_lock(&dir->d_lock);        /* probably unneeded */
749                 list_for_each_entry(child, &dir->d_subdirs, d_child) {
750                         if (d_really_is_positive(child))        /* probably unneeded */
751                                 d_inode(child)->i_private = NULL;
752                 }
753                 spin_unlock(&dir->d_lock);
754
755                 tracefs_remove_recursive(dir);
756         }
757
758         list_del(&file->list);
759         remove_subsystem(file->system);
760         free_event_filter(file->filter);
761         kmem_cache_free(file_cachep, file);
762 }
763
764 /*
765  * __ftrace_set_clr_event(NULL, NULL, NULL, set) will set/unset all events.
766  */
767 static int
768 __ftrace_set_clr_event_nolock(struct trace_array *tr, const char *match,
769                               const char *sub, const char *event, int set)
770 {
771         struct trace_event_file *file;
772         struct trace_event_call *call;
773         const char *name;
774         int ret = -EINVAL;
775
776         list_for_each_entry(file, &tr->events, list) {
777
778                 call = file->event_call;
779                 name = trace_event_name(call);
780
781                 if (!name || !call->class || !call->class->reg)
782                         continue;
783
784                 if (call->flags & TRACE_EVENT_FL_IGNORE_ENABLE)
785                         continue;
786
787                 if (match &&
788                     strcmp(match, name) != 0 &&
789                     strcmp(match, call->class->system) != 0)
790                         continue;
791
792                 if (sub && strcmp(sub, call->class->system) != 0)
793                         continue;
794
795                 if (event && strcmp(event, name) != 0)
796                         continue;
797
798                 ftrace_event_enable_disable(file, set);
799
800                 ret = 0;
801         }
802
803         return ret;
804 }
805
806 static int __ftrace_set_clr_event(struct trace_array *tr, const char *match,
807                                   const char *sub, const char *event, int set)
808 {
809         int ret;
810
811         mutex_lock(&event_mutex);
812         ret = __ftrace_set_clr_event_nolock(tr, match, sub, event, set);
813         mutex_unlock(&event_mutex);
814
815         return ret;
816 }
817
818 static int ftrace_set_clr_event(struct trace_array *tr, char *buf, int set)
819 {
820         char *event = NULL, *sub = NULL, *match;
821         int ret;
822
823         /*
824          * The buf format can be <subsystem>:<event-name>
825          *  *:<event-name> means any event by that name.
826          *  :<event-name> is the same.
827          *
828          *  <subsystem>:* means all events in that subsystem
829          *  <subsystem>: means the same.
830          *
831          *  <name> (no ':') means all events in a subsystem with
832          *  the name <name> or any event that matches <name>
833          */
834
835         match = strsep(&buf, ":");
836         if (buf) {
837                 sub = match;
838                 event = buf;
839                 match = NULL;
840
841                 if (!strlen(sub) || strcmp(sub, "*") == 0)
842                         sub = NULL;
843                 if (!strlen(event) || strcmp(event, "*") == 0)
844                         event = NULL;
845         }
846
847         ret = __ftrace_set_clr_event(tr, match, sub, event, set);
848
849         /* Put back the colon to allow this to be called again */
850         if (buf)
851                 *(buf - 1) = ':';
852
853         return ret;
854 }
855
856 /**
857  * trace_set_clr_event - enable or disable an event
858  * @system: system name to match (NULL for any system)
859  * @event: event name to match (NULL for all events, within system)
860  * @set: 1 to enable, 0 to disable
861  *
862  * This is a way for other parts of the kernel to enable or disable
863  * event recording.
864  *
865  * Returns 0 on success, -EINVAL if the parameters do not match any
866  * registered events.
867  */
868 int trace_set_clr_event(const char *system, const char *event, int set)
869 {
870         struct trace_array *tr = top_trace_array();
871
872         if (!tr)
873                 return -ENODEV;
874
875         return __ftrace_set_clr_event(tr, NULL, system, event, set);
876 }
877 EXPORT_SYMBOL_GPL(trace_set_clr_event);
878
879 /* 128 should be much more than enough */
880 #define EVENT_BUF_SIZE          127
881
882 static ssize_t
883 ftrace_event_write(struct file *file, const char __user *ubuf,
884                    size_t cnt, loff_t *ppos)
885 {
886         struct trace_parser parser;
887         struct seq_file *m = file->private_data;
888         struct trace_array *tr = m->private;
889         ssize_t read, ret;
890
891         if (!cnt)
892                 return 0;
893
894         ret = tracing_update_buffers();
895         if (ret < 0)
896                 return ret;
897
898         if (trace_parser_get_init(&parser, EVENT_BUF_SIZE + 1))
899                 return -ENOMEM;
900
901         read = trace_get_user(&parser, ubuf, cnt, ppos);
902
903         if (read >= 0 && trace_parser_loaded((&parser))) {
904                 int set = 1;
905
906                 if (*parser.buffer == '!')
907                         set = 0;
908
909                 parser.buffer[parser.idx] = 0;
910
911                 ret = ftrace_set_clr_event(tr, parser.buffer + !set, set);
912                 if (ret)
913                         goto out_put;
914         }
915
916         ret = read;
917
918  out_put:
919         trace_parser_put(&parser);
920
921         return ret;
922 }
923
924 static void *
925 t_next(struct seq_file *m, void *v, loff_t *pos)
926 {
927         struct trace_event_file *file = v;
928         struct trace_event_call *call;
929         struct trace_array *tr = m->private;
930
931         (*pos)++;
932
933         list_for_each_entry_continue(file, &tr->events, list) {
934                 call = file->event_call;
935                 /*
936                  * The ftrace subsystem is for showing formats only.
937                  * They can not be enabled or disabled via the event files.
938                  */
939                 if (call->class && call->class->reg &&
940                     !(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE))
941                         return file;
942         }
943
944         return NULL;
945 }
946
947 static void *t_start(struct seq_file *m, loff_t *pos)
948 {
949         struct trace_event_file *file;
950         struct trace_array *tr = m->private;
951         loff_t l;
952
953         mutex_lock(&event_mutex);
954
955         file = list_entry(&tr->events, struct trace_event_file, list);
956         for (l = 0; l <= *pos; ) {
957                 file = t_next(m, file, &l);
958                 if (!file)
959                         break;
960         }
961         return file;
962 }
963
964 static void *
965 s_next(struct seq_file *m, void *v, loff_t *pos)
966 {
967         struct trace_event_file *file = v;
968         struct trace_array *tr = m->private;
969
970         (*pos)++;
971
972         list_for_each_entry_continue(file, &tr->events, list) {
973                 if (file->flags & EVENT_FILE_FL_ENABLED)
974                         return file;
975         }
976
977         return NULL;
978 }
979
980 static void *s_start(struct seq_file *m, loff_t *pos)
981 {
982         struct trace_event_file *file;
983         struct trace_array *tr = m->private;
984         loff_t l;
985
986         mutex_lock(&event_mutex);
987
988         file = list_entry(&tr->events, struct trace_event_file, list);
989         for (l = 0; l <= *pos; ) {
990                 file = s_next(m, file, &l);
991                 if (!file)
992                         break;
993         }
994         return file;
995 }
996
997 static int t_show(struct seq_file *m, void *v)
998 {
999         struct trace_event_file *file = v;
1000         struct trace_event_call *call = file->event_call;
1001
1002         if (strcmp(call->class->system, TRACE_SYSTEM) != 0)
1003                 seq_printf(m, "%s:", call->class->system);
1004         seq_printf(m, "%s\n", trace_event_name(call));
1005
1006         return 0;
1007 }
1008
1009 static void t_stop(struct seq_file *m, void *p)
1010 {
1011         mutex_unlock(&event_mutex);
1012 }
1013
1014 static void *
1015 p_next(struct seq_file *m, void *v, loff_t *pos)
1016 {
1017         struct trace_array *tr = m->private;
1018         struct trace_pid_list *pid_list = rcu_dereference_sched(tr->filtered_pids);
1019         unsigned long pid = (unsigned long)v;
1020
1021         (*pos)++;
1022
1023         /* pid already is +1 of the actual prevous bit */
1024         pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
1025
1026         /* Return pid + 1 to allow zero to be represented */
1027         if (pid < pid_list->pid_max)
1028                 return (void *)(pid + 1);
1029
1030         return NULL;
1031 }
1032
1033 static void *p_start(struct seq_file *m, loff_t *pos)
1034         __acquires(RCU)
1035 {
1036         struct trace_pid_list *pid_list;
1037         struct trace_array *tr = m->private;
1038         unsigned long pid;
1039         loff_t l = 0;
1040
1041         /*
1042          * Grab the mutex, to keep calls to p_next() having the same
1043          * tr->filtered_pids as p_start() has.
1044          * If we just passed the tr->filtered_pids around, then RCU would
1045          * have been enough, but doing that makes things more complex.
1046          */
1047         mutex_lock(&event_mutex);
1048         rcu_read_lock_sched();
1049
1050         pid_list = rcu_dereference_sched(tr->filtered_pids);
1051
1052         if (!pid_list)
1053                 return NULL;
1054
1055         pid = find_first_bit(pid_list->pids, pid_list->pid_max);
1056         if (pid >= pid_list->pid_max)
1057                 return NULL;
1058
1059         /* Return pid + 1 so that zero can be the exit value */
1060         for (pid++; pid && l < *pos;
1061              pid = (unsigned long)p_next(m, (void *)pid, &l))
1062                 ;
1063         return (void *)pid;
1064 }
1065
1066 static void p_stop(struct seq_file *m, void *p)
1067         __releases(RCU)
1068 {
1069         rcu_read_unlock_sched();
1070         mutex_unlock(&event_mutex);
1071 }
1072
1073 static int p_show(struct seq_file *m, void *v)
1074 {
1075         unsigned long pid = (unsigned long)v - 1;
1076
1077         seq_printf(m, "%lu\n", pid);
1078         return 0;
1079 }
1080
1081 static ssize_t
1082 event_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
1083                   loff_t *ppos)
1084 {
1085         struct trace_event_file *file;
1086         unsigned long flags;
1087         char buf[4] = "0";
1088
1089         mutex_lock(&event_mutex);
1090         file = event_file_data(filp);
1091         if (likely(file))
1092                 flags = file->flags;
1093         mutex_unlock(&event_mutex);
1094
1095         if (!file)
1096                 return -ENODEV;
1097
1098         if (flags & EVENT_FILE_FL_ENABLED &&
1099             !(flags & EVENT_FILE_FL_SOFT_DISABLED))
1100                 strcpy(buf, "1");
1101
1102         if (flags & EVENT_FILE_FL_SOFT_DISABLED ||
1103             flags & EVENT_FILE_FL_SOFT_MODE)
1104                 strcat(buf, "*");
1105
1106         strcat(buf, "\n");
1107
1108         return simple_read_from_buffer(ubuf, cnt, ppos, buf, strlen(buf));
1109 }
1110
1111 static ssize_t
1112 event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
1113                    loff_t *ppos)
1114 {
1115         struct trace_event_file *file;
1116         unsigned long val;
1117         int ret;
1118
1119         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
1120         if (ret)
1121                 return ret;
1122
1123         ret = tracing_update_buffers();
1124         if (ret < 0)
1125                 return ret;
1126
1127         switch (val) {
1128         case 0:
1129         case 1:
1130                 ret = -ENODEV;
1131                 mutex_lock(&event_mutex);
1132                 file = event_file_data(filp);
1133                 if (likely(file))
1134                         ret = ftrace_event_enable_disable(file, val);
1135                 mutex_unlock(&event_mutex);
1136                 break;
1137
1138         default:
1139                 return -EINVAL;
1140         }
1141
1142         *ppos += cnt;
1143
1144         return ret ? ret : cnt;
1145 }
1146
1147 static ssize_t
1148 system_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
1149                    loff_t *ppos)
1150 {
1151         const char set_to_char[4] = { '?', '0', '1', 'X' };
1152         struct trace_subsystem_dir *dir = filp->private_data;
1153         struct event_subsystem *system = dir->subsystem;
1154         struct trace_event_call *call;
1155         struct trace_event_file *file;
1156         struct trace_array *tr = dir->tr;
1157         char buf[2];
1158         int set = 0;
1159         int ret;
1160
1161         mutex_lock(&event_mutex);
1162         list_for_each_entry(file, &tr->events, list) {
1163                 call = file->event_call;
1164                 if (!trace_event_name(call) || !call->class || !call->class->reg)
1165                         continue;
1166
1167                 if (system && strcmp(call->class->system, system->name) != 0)
1168                         continue;
1169
1170                 /*
1171                  * We need to find out if all the events are set
1172                  * or if all events or cleared, or if we have
1173                  * a mixture.
1174                  */
1175                 set |= (1 << !!(file->flags & EVENT_FILE_FL_ENABLED));
1176
1177                 /*
1178                  * If we have a mixture, no need to look further.
1179                  */
1180                 if (set == 3)
1181                         break;
1182         }
1183         mutex_unlock(&event_mutex);
1184
1185         buf[0] = set_to_char[set];
1186         buf[1] = '\n';
1187
1188         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
1189
1190         return ret;
1191 }
1192
1193 static ssize_t
1194 system_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
1195                     loff_t *ppos)
1196 {
1197         struct trace_subsystem_dir *dir = filp->private_data;
1198         struct event_subsystem *system = dir->subsystem;
1199         const char *name = NULL;
1200         unsigned long val;
1201         ssize_t ret;
1202
1203         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
1204         if (ret)
1205                 return ret;
1206
1207         ret = tracing_update_buffers();
1208         if (ret < 0)
1209                 return ret;
1210
1211         if (val != 0 && val != 1)
1212                 return -EINVAL;
1213
1214         /*
1215          * Opening of "enable" adds a ref count to system,
1216          * so the name is safe to use.
1217          */
1218         if (system)
1219                 name = system->name;
1220
1221         ret = __ftrace_set_clr_event(dir->tr, NULL, name, NULL, val);
1222         if (ret)
1223                 goto out;
1224
1225         ret = cnt;
1226
1227 out:
1228         *ppos += cnt;
1229
1230         return ret;
1231 }
1232
1233 enum {
1234         FORMAT_HEADER           = 1,
1235         FORMAT_FIELD_SEPERATOR  = 2,
1236         FORMAT_PRINTFMT         = 3,
1237 };
1238
1239 static void *f_next(struct seq_file *m, void *v, loff_t *pos)
1240 {
1241         struct trace_event_call *call = event_file_data(m->private);
1242         struct list_head *common_head = &ftrace_common_fields;
1243         struct list_head *head = trace_get_fields(call);
1244         struct list_head *node = v;
1245
1246         (*pos)++;
1247
1248         switch ((unsigned long)v) {
1249         case FORMAT_HEADER:
1250                 node = common_head;
1251                 break;
1252
1253         case FORMAT_FIELD_SEPERATOR:
1254                 node = head;
1255                 break;
1256
1257         case FORMAT_PRINTFMT:
1258                 /* all done */
1259                 return NULL;
1260         }
1261
1262         node = node->prev;
1263         if (node == common_head)
1264                 return (void *)FORMAT_FIELD_SEPERATOR;
1265         else if (node == head)
1266                 return (void *)FORMAT_PRINTFMT;
1267         else
1268                 return node;
1269 }
1270
1271 static int f_show(struct seq_file *m, void *v)
1272 {
1273         struct trace_event_call *call = event_file_data(m->private);
1274         struct ftrace_event_field *field;
1275         const char *array_descriptor;
1276
1277         switch ((unsigned long)v) {
1278         case FORMAT_HEADER:
1279                 seq_printf(m, "name: %s\n", trace_event_name(call));
1280                 seq_printf(m, "ID: %d\n", call->event.type);
1281                 seq_puts(m, "format:\n");
1282                 return 0;
1283
1284         case FORMAT_FIELD_SEPERATOR:
1285                 seq_putc(m, '\n');
1286                 return 0;
1287
1288         case FORMAT_PRINTFMT:
1289                 seq_printf(m, "\nprint fmt: %s\n",
1290                            call->print_fmt);
1291                 return 0;
1292         }
1293
1294         field = list_entry(v, struct ftrace_event_field, link);
1295         /*
1296          * Smartly shows the array type(except dynamic array).
1297          * Normal:
1298          *      field:TYPE VAR
1299          * If TYPE := TYPE[LEN], it is shown:
1300          *      field:TYPE VAR[LEN]
1301          */
1302         array_descriptor = strchr(field->type, '[');
1303
1304         if (!strncmp(field->type, "__data_loc", 10))
1305                 array_descriptor = NULL;
1306
1307         if (!array_descriptor)
1308                 seq_printf(m, "\tfield:%s %s;\toffset:%u;\tsize:%u;\tsigned:%d;\n",
1309                            field->type, field->name, field->offset,
1310                            field->size, !!field->is_signed);
1311         else
1312                 seq_printf(m, "\tfield:%.*s %s%s;\toffset:%u;\tsize:%u;\tsigned:%d;\n",
1313                            (int)(array_descriptor - field->type),
1314                            field->type, field->name,
1315                            array_descriptor, field->offset,
1316                            field->size, !!field->is_signed);
1317
1318         return 0;
1319 }
1320
1321 static void *f_start(struct seq_file *m, loff_t *pos)
1322 {
1323         void *p = (void *)FORMAT_HEADER;
1324         loff_t l = 0;
1325
1326         /* ->stop() is called even if ->start() fails */
1327         mutex_lock(&event_mutex);
1328         if (!event_file_data(m->private))
1329                 return ERR_PTR(-ENODEV);
1330
1331         while (l < *pos && p)
1332                 p = f_next(m, p, &l);
1333
1334         return p;
1335 }
1336
1337 static void f_stop(struct seq_file *m, void *p)
1338 {
1339         mutex_unlock(&event_mutex);
1340 }
1341
1342 static const struct seq_operations trace_format_seq_ops = {
1343         .start          = f_start,
1344         .next           = f_next,
1345         .stop           = f_stop,
1346         .show           = f_show,
1347 };
1348
1349 static int trace_format_open(struct inode *inode, struct file *file)
1350 {
1351         struct seq_file *m;
1352         int ret;
1353
1354         ret = seq_open(file, &trace_format_seq_ops);
1355         if (ret < 0)
1356                 return ret;
1357
1358         m = file->private_data;
1359         m->private = file;
1360
1361         return 0;
1362 }
1363
1364 static ssize_t
1365 event_id_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
1366 {
1367         int id = (long)event_file_data(filp);
1368         char buf[32];
1369         int len;
1370
1371         if (*ppos)
1372                 return 0;
1373
1374         if (unlikely(!id))
1375                 return -ENODEV;
1376
1377         len = sprintf(buf, "%d\n", id);
1378
1379         return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
1380 }
1381
1382 static ssize_t
1383 event_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
1384                   loff_t *ppos)
1385 {
1386         struct trace_event_file *file;
1387         struct trace_seq *s;
1388         int r = -ENODEV;
1389
1390         if (*ppos)
1391                 return 0;
1392
1393         s = kmalloc(sizeof(*s), GFP_KERNEL);
1394
1395         if (!s)
1396                 return -ENOMEM;
1397
1398         trace_seq_init(s);
1399
1400         mutex_lock(&event_mutex);
1401         file = event_file_data(filp);
1402         if (file)
1403                 print_event_filter(file, s);
1404         mutex_unlock(&event_mutex);
1405
1406         if (file)
1407                 r = simple_read_from_buffer(ubuf, cnt, ppos,
1408                                             s->buffer, trace_seq_used(s));
1409
1410         kfree(s);
1411
1412         return r;
1413 }
1414
1415 static ssize_t
1416 event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
1417                    loff_t *ppos)
1418 {
1419         struct trace_event_file *file;
1420         char *buf;
1421         int err = -ENODEV;
1422
1423         if (cnt >= PAGE_SIZE)
1424                 return -EINVAL;
1425
1426         buf = memdup_user_nul(ubuf, cnt);
1427         if (IS_ERR(buf))
1428                 return PTR_ERR(buf);
1429
1430         mutex_lock(&event_mutex);
1431         file = event_file_data(filp);
1432         if (file)
1433                 err = apply_event_filter(file, buf);
1434         mutex_unlock(&event_mutex);
1435
1436         kfree(buf);
1437         if (err < 0)
1438                 return err;
1439
1440         *ppos += cnt;
1441
1442         return cnt;
1443 }
1444
1445 static LIST_HEAD(event_subsystems);
1446
1447 static int subsystem_open(struct inode *inode, struct file *filp)
1448 {
1449         struct event_subsystem *system = NULL;
1450         struct trace_subsystem_dir *dir = NULL; /* Initialize for gcc */
1451         struct trace_array *tr;
1452         int ret;
1453
1454         if (tracing_is_disabled())
1455                 return -ENODEV;
1456
1457         /* Make sure the system still exists */
1458         mutex_lock(&trace_types_lock);
1459         mutex_lock(&event_mutex);
1460         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1461                 list_for_each_entry(dir, &tr->systems, list) {
1462                         if (dir == inode->i_private) {
1463                                 /* Don't open systems with no events */
1464                                 if (dir->nr_events) {
1465                                         __get_system_dir(dir);
1466                                         system = dir->subsystem;
1467                                 }
1468                                 goto exit_loop;
1469                         }
1470                 }
1471         }
1472  exit_loop:
1473         mutex_unlock(&event_mutex);
1474         mutex_unlock(&trace_types_lock);
1475
1476         if (!system)
1477                 return -ENODEV;
1478
1479         /* Some versions of gcc think dir can be uninitialized here */
1480         WARN_ON(!dir);
1481
1482         /* Still need to increment the ref count of the system */
1483         if (trace_array_get(tr) < 0) {
1484                 put_system(dir);
1485                 return -ENODEV;
1486         }
1487
1488         ret = tracing_open_generic(inode, filp);
1489         if (ret < 0) {
1490                 trace_array_put(tr);
1491                 put_system(dir);
1492         }
1493
1494         return ret;
1495 }
1496
1497 static int system_tr_open(struct inode *inode, struct file *filp)
1498 {
1499         struct trace_subsystem_dir *dir;
1500         struct trace_array *tr = inode->i_private;
1501         int ret;
1502
1503         if (tracing_is_disabled())
1504                 return -ENODEV;
1505
1506         if (trace_array_get(tr) < 0)
1507                 return -ENODEV;
1508
1509         /* Make a temporary dir that has no system but points to tr */
1510         dir = kzalloc(sizeof(*dir), GFP_KERNEL);
1511         if (!dir) {
1512                 trace_array_put(tr);
1513                 return -ENOMEM;
1514         }
1515
1516         dir->tr = tr;
1517
1518         ret = tracing_open_generic(inode, filp);
1519         if (ret < 0) {
1520                 trace_array_put(tr);
1521                 kfree(dir);
1522                 return ret;
1523         }
1524
1525         filp->private_data = dir;
1526
1527         return 0;
1528 }
1529
1530 static int subsystem_release(struct inode *inode, struct file *file)
1531 {
1532         struct trace_subsystem_dir *dir = file->private_data;
1533
1534         trace_array_put(dir->tr);
1535
1536         /*
1537          * If dir->subsystem is NULL, then this is a temporary
1538          * descriptor that was made for a trace_array to enable
1539          * all subsystems.
1540          */
1541         if (dir->subsystem)
1542                 put_system(dir);
1543         else
1544                 kfree(dir);
1545
1546         return 0;
1547 }
1548
1549 static ssize_t
1550 subsystem_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
1551                       loff_t *ppos)
1552 {
1553         struct trace_subsystem_dir *dir = filp->private_data;
1554         struct event_subsystem *system = dir->subsystem;
1555         struct trace_seq *s;
1556         int r;
1557
1558         if (*ppos)
1559                 return 0;
1560
1561         s = kmalloc(sizeof(*s), GFP_KERNEL);
1562         if (!s)
1563                 return -ENOMEM;
1564
1565         trace_seq_init(s);
1566
1567         print_subsystem_event_filter(system, s);
1568         r = simple_read_from_buffer(ubuf, cnt, ppos,
1569                                     s->buffer, trace_seq_used(s));
1570
1571         kfree(s);
1572
1573         return r;
1574 }
1575
1576 static ssize_t
1577 subsystem_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
1578                        loff_t *ppos)
1579 {
1580         struct trace_subsystem_dir *dir = filp->private_data;
1581         char *buf;
1582         int err;
1583
1584         if (cnt >= PAGE_SIZE)
1585                 return -EINVAL;
1586
1587         buf = memdup_user_nul(ubuf, cnt);
1588         if (IS_ERR(buf))
1589                 return PTR_ERR(buf);
1590
1591         err = apply_subsystem_event_filter(dir, buf);
1592         kfree(buf);
1593         if (err < 0)
1594                 return err;
1595
1596         *ppos += cnt;
1597
1598         return cnt;
1599 }
1600
1601 static ssize_t
1602 show_header(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
1603 {
1604         int (*func)(struct trace_seq *s) = filp->private_data;
1605         struct trace_seq *s;
1606         int r;
1607
1608         if (*ppos)
1609                 return 0;
1610
1611         s = kmalloc(sizeof(*s), GFP_KERNEL);
1612         if (!s)
1613                 return -ENOMEM;
1614
1615         trace_seq_init(s);
1616
1617         func(s);
1618         r = simple_read_from_buffer(ubuf, cnt, ppos,
1619                                     s->buffer, trace_seq_used(s));
1620
1621         kfree(s);
1622
1623         return r;
1624 }
1625
1626 static void ignore_task_cpu(void *data)
1627 {
1628         struct trace_array *tr = data;
1629         struct trace_pid_list *pid_list;
1630
1631         /*
1632          * This function is called by on_each_cpu() while the
1633          * event_mutex is held.
1634          */
1635         pid_list = rcu_dereference_protected(tr->filtered_pids,
1636                                              mutex_is_locked(&event_mutex));
1637
1638         this_cpu_write(tr->trace_buffer.data->ignore_pid,
1639                        ignore_this_task(pid_list, current));
1640 }
1641
1642 static ssize_t
1643 ftrace_event_pid_write(struct file *filp, const char __user *ubuf,
1644                        size_t cnt, loff_t *ppos)
1645 {
1646         struct seq_file *m = filp->private_data;
1647         struct trace_array *tr = m->private;
1648         struct trace_pid_list *filtered_pids = NULL;
1649         struct trace_pid_list *pid_list;
1650         struct trace_event_file *file;
1651         struct trace_parser parser;
1652         unsigned long val;
1653         loff_t this_pos;
1654         ssize_t read = 0;
1655         ssize_t ret = 0;
1656         pid_t pid;
1657         int nr_pids = 0;
1658
1659         if (!cnt)
1660                 return 0;
1661
1662         ret = tracing_update_buffers();
1663         if (ret < 0)
1664                 return ret;
1665
1666         if (trace_parser_get_init(&parser, EVENT_BUF_SIZE + 1))
1667                 return -ENOMEM;
1668
1669         mutex_lock(&event_mutex);
1670         filtered_pids = rcu_dereference_protected(tr->filtered_pids,
1671                                              lockdep_is_held(&event_mutex));
1672
1673         /*
1674          * Always recreate a new array. The write is an all or nothing
1675          * operation. Always create a new array when adding new pids by
1676          * the user. If the operation fails, then the current list is
1677          * not modified.
1678          */
1679         pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
1680         if (!pid_list) {
1681                 read = -ENOMEM;
1682                 goto out;
1683         }
1684         pid_list->pid_max = READ_ONCE(pid_max);
1685         /* Only truncating will shrink pid_max */
1686         if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
1687                 pid_list->pid_max = filtered_pids->pid_max;
1688         pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
1689         if (!pid_list->pids) {
1690                 kfree(pid_list);
1691                 read = -ENOMEM;
1692                 goto out;
1693         }
1694         if (filtered_pids) {
1695                 /* copy the current bits to the new max */
1696                 pid = find_first_bit(filtered_pids->pids,
1697                                      filtered_pids->pid_max);
1698                 while (pid < filtered_pids->pid_max) {
1699                         set_bit(pid, pid_list->pids);
1700                         pid = find_next_bit(filtered_pids->pids,
1701                                             filtered_pids->pid_max,
1702                                             pid + 1);
1703                         nr_pids++;
1704                 }
1705         }
1706
1707         while (cnt > 0) {
1708
1709                 this_pos = 0;
1710
1711                 ret = trace_get_user(&parser, ubuf, cnt, &this_pos);
1712                 if (ret < 0 || !trace_parser_loaded(&parser))
1713                         break;
1714
1715                 read += ret;
1716                 ubuf += ret;
1717                 cnt -= ret;
1718
1719                 parser.buffer[parser.idx] = 0;
1720
1721                 ret = -EINVAL;
1722                 if (kstrtoul(parser.buffer, 0, &val))
1723                         break;
1724                 if (val >= pid_list->pid_max)
1725                         break;
1726
1727                 pid = (pid_t)val;
1728
1729                 set_bit(pid, pid_list->pids);
1730                 nr_pids++;
1731
1732                 trace_parser_clear(&parser);
1733                 ret = 0;
1734         }
1735         trace_parser_put(&parser);
1736
1737         if (ret < 0) {
1738                 vfree(pid_list->pids);
1739                 kfree(pid_list);
1740                 read = ret;
1741                 goto out;
1742         }
1743
1744         if (!nr_pids) {
1745                 /* Cleared the list of pids */
1746                 vfree(pid_list->pids);
1747                 kfree(pid_list);
1748                 read = ret;
1749                 if (!filtered_pids)
1750                         goto out;
1751                 pid_list = NULL;
1752         }
1753         rcu_assign_pointer(tr->filtered_pids, pid_list);
1754
1755         list_for_each_entry(file, &tr->events, list) {
1756                 set_bit(EVENT_FILE_FL_PID_FILTER_BIT, &file->flags);
1757         }
1758
1759         if (filtered_pids) {
1760                 synchronize_sched();
1761
1762                 vfree(filtered_pids->pids);
1763                 kfree(filtered_pids);
1764         } else {
1765                 /*
1766                  * Register a probe that is called before all other probes
1767                  * to set ignore_pid if next or prev do not match.
1768                  * Register a probe this is called after all other probes
1769                  * to only keep ignore_pid set if next pid matches.
1770                  */
1771                 register_trace_prio_sched_switch(event_filter_pid_sched_switch_probe_pre,
1772                                                  tr, INT_MAX);
1773                 register_trace_prio_sched_switch(event_filter_pid_sched_switch_probe_post,
1774                                                  tr, 0);
1775
1776                 register_trace_prio_sched_wakeup(event_filter_pid_sched_wakeup_probe_pre,
1777                                                  tr, INT_MAX);
1778                 register_trace_prio_sched_wakeup(event_filter_pid_sched_wakeup_probe_post,
1779                                                  tr, 0);
1780
1781                 register_trace_prio_sched_wakeup_new(event_filter_pid_sched_wakeup_probe_pre,
1782                                                      tr, INT_MAX);
1783                 register_trace_prio_sched_wakeup_new(event_filter_pid_sched_wakeup_probe_post,
1784                                                      tr, 0);
1785
1786                 register_trace_prio_sched_waking(event_filter_pid_sched_wakeup_probe_pre,
1787                                                  tr, INT_MAX);
1788                 register_trace_prio_sched_waking(event_filter_pid_sched_wakeup_probe_post,
1789                                                  tr, 0);
1790         }
1791
1792         /*
1793          * Ignoring of pids is done at task switch. But we have to
1794          * check for those tasks that are currently running.
1795          * Always do this in case a pid was appended or removed.
1796          */
1797         on_each_cpu(ignore_task_cpu, tr, 1);
1798
1799  out:
1800         mutex_unlock(&event_mutex);
1801
1802         ret = read;
1803         if (read > 0)
1804                 *ppos += read;
1805
1806         return ret;
1807 }
1808
1809 static int ftrace_event_avail_open(struct inode *inode, struct file *file);
1810 static int ftrace_event_set_open(struct inode *inode, struct file *file);
1811 static int ftrace_event_set_pid_open(struct inode *inode, struct file *file);
1812 static int ftrace_event_release(struct inode *inode, struct file *file);
1813
1814 static const struct seq_operations show_event_seq_ops = {
1815         .start = t_start,
1816         .next = t_next,
1817         .show = t_show,
1818         .stop = t_stop,
1819 };
1820
1821 static const struct seq_operations show_set_event_seq_ops = {
1822         .start = s_start,
1823         .next = s_next,
1824         .show = t_show,
1825         .stop = t_stop,
1826 };
1827
1828 static const struct seq_operations show_set_pid_seq_ops = {
1829         .start = p_start,
1830         .next = p_next,
1831         .show = p_show,
1832         .stop = p_stop,
1833 };
1834
1835 static const struct file_operations ftrace_avail_fops = {
1836         .open = ftrace_event_avail_open,
1837         .read = seq_read,
1838         .llseek = seq_lseek,
1839         .release = seq_release,
1840 };
1841
1842 static const struct file_operations ftrace_set_event_fops = {
1843         .open = ftrace_event_set_open,
1844         .read = seq_read,
1845         .write = ftrace_event_write,
1846         .llseek = seq_lseek,
1847         .release = ftrace_event_release,
1848 };
1849
1850 static const struct file_operations ftrace_set_event_pid_fops = {
1851         .open = ftrace_event_set_pid_open,
1852         .read = seq_read,
1853         .write = ftrace_event_pid_write,
1854         .llseek = seq_lseek,
1855         .release = ftrace_event_release,
1856 };
1857
1858 static const struct file_operations ftrace_enable_fops = {
1859         .open = tracing_open_generic,
1860         .read = event_enable_read,
1861         .write = event_enable_write,
1862         .llseek = default_llseek,
1863 };
1864
1865 static const struct file_operations ftrace_event_format_fops = {
1866         .open = trace_format_open,
1867         .read = seq_read,
1868         .llseek = seq_lseek,
1869         .release = seq_release,
1870 };
1871
1872 static const struct file_operations ftrace_event_id_fops = {
1873         .read = event_id_read,
1874         .llseek = default_llseek,
1875 };
1876
1877 static const struct file_operations ftrace_event_filter_fops = {
1878         .open = tracing_open_generic,
1879         .read = event_filter_read,
1880         .write = event_filter_write,
1881         .llseek = default_llseek,
1882 };
1883
1884 static const struct file_operations ftrace_subsystem_filter_fops = {
1885         .open = subsystem_open,
1886         .read = subsystem_filter_read,
1887         .write = subsystem_filter_write,
1888         .llseek = default_llseek,
1889         .release = subsystem_release,
1890 };
1891
1892 static const struct file_operations ftrace_system_enable_fops = {
1893         .open = subsystem_open,
1894         .read = system_enable_read,
1895         .write = system_enable_write,
1896         .llseek = default_llseek,
1897         .release = subsystem_release,
1898 };
1899
1900 static const struct file_operations ftrace_tr_enable_fops = {
1901         .open = system_tr_open,
1902         .read = system_enable_read,
1903         .write = system_enable_write,
1904         .llseek = default_llseek,
1905         .release = subsystem_release,
1906 };
1907
1908 static const struct file_operations ftrace_show_header_fops = {
1909         .open = tracing_open_generic,
1910         .read = show_header,
1911         .llseek = default_llseek,
1912 };
1913
1914 static int
1915 ftrace_event_open(struct inode *inode, struct file *file,
1916                   const struct seq_operations *seq_ops)
1917 {
1918         struct seq_file *m;
1919         int ret;
1920
1921         ret = seq_open(file, seq_ops);
1922         if (ret < 0)
1923                 return ret;
1924         m = file->private_data;
1925         /* copy tr over to seq ops */
1926         m->private = inode->i_private;
1927
1928         return ret;
1929 }
1930
1931 static int ftrace_event_release(struct inode *inode, struct file *file)
1932 {
1933         struct trace_array *tr = inode->i_private;
1934
1935         trace_array_put(tr);
1936
1937         return seq_release(inode, file);
1938 }
1939
1940 static int
1941 ftrace_event_avail_open(struct inode *inode, struct file *file)
1942 {
1943         const struct seq_operations *seq_ops = &show_event_seq_ops;
1944
1945         return ftrace_event_open(inode, file, seq_ops);
1946 }
1947
1948 static int
1949 ftrace_event_set_open(struct inode *inode, struct file *file)
1950 {
1951         const struct seq_operations *seq_ops = &show_set_event_seq_ops;
1952         struct trace_array *tr = inode->i_private;
1953         int ret;
1954
1955         if (trace_array_get(tr) < 0)
1956                 return -ENODEV;
1957
1958         if ((file->f_mode & FMODE_WRITE) &&
1959             (file->f_flags & O_TRUNC))
1960                 ftrace_clear_events(tr);
1961
1962         ret = ftrace_event_open(inode, file, seq_ops);
1963         if (ret < 0)
1964                 trace_array_put(tr);
1965         return ret;
1966 }
1967
1968 static int
1969 ftrace_event_set_pid_open(struct inode *inode, struct file *file)
1970 {
1971         const struct seq_operations *seq_ops = &show_set_pid_seq_ops;
1972         struct trace_array *tr = inode->i_private;
1973         int ret;
1974
1975         if (trace_array_get(tr) < 0)
1976                 return -ENODEV;
1977
1978         if ((file->f_mode & FMODE_WRITE) &&
1979             (file->f_flags & O_TRUNC))
1980                 ftrace_clear_event_pids(tr);
1981
1982         ret = ftrace_event_open(inode, file, seq_ops);
1983         if (ret < 0)
1984                 trace_array_put(tr);
1985         return ret;
1986 }
1987
1988 static struct event_subsystem *
1989 create_new_subsystem(const char *name)
1990 {
1991         struct event_subsystem *system;
1992
1993         /* need to create new entry */
1994         system = kmalloc(sizeof(*system), GFP_KERNEL);
1995         if (!system)
1996                 return NULL;
1997
1998         system->ref_count = 1;
1999
2000         /* Only allocate if dynamic (kprobes and modules) */
2001         system->name = kstrdup_const(name, GFP_KERNEL);
2002         if (!system->name)
2003                 goto out_free;
2004
2005         system->filter = NULL;
2006
2007         system->filter = kzalloc(sizeof(struct event_filter), GFP_KERNEL);
2008         if (!system->filter)
2009                 goto out_free;
2010
2011         list_add(&system->list, &event_subsystems);
2012
2013         return system;
2014
2015  out_free:
2016         kfree_const(system->name);
2017         kfree(system);
2018         return NULL;
2019 }
2020
2021 static struct dentry *
2022 event_subsystem_dir(struct trace_array *tr, const char *name,
2023                     struct trace_event_file *file, struct dentry *parent)
2024 {
2025         struct trace_subsystem_dir *dir;
2026         struct event_subsystem *system;
2027         struct dentry *entry;
2028
2029         /* First see if we did not already create this dir */
2030         list_for_each_entry(dir, &tr->systems, list) {
2031                 system = dir->subsystem;
2032                 if (strcmp(system->name, name) == 0) {
2033                         dir->nr_events++;
2034                         file->system = dir;
2035                         return dir->entry;
2036                 }
2037         }
2038
2039         /* Now see if the system itself exists. */
2040         list_for_each_entry(system, &event_subsystems, list) {
2041                 if (strcmp(system->name, name) == 0)
2042                         break;
2043         }
2044         /* Reset system variable when not found */
2045         if (&system->list == &event_subsystems)
2046                 system = NULL;
2047
2048         dir = kmalloc(sizeof(*dir), GFP_KERNEL);
2049         if (!dir)
2050                 goto out_fail;
2051
2052         if (!system) {
2053                 system = create_new_subsystem(name);
2054                 if (!system)
2055                         goto out_free;
2056         } else
2057                 __get_system(system);
2058
2059         dir->entry = tracefs_create_dir(name, parent);
2060         if (!dir->entry) {
2061                 pr_warn("Failed to create system directory %s\n", name);
2062                 __put_system(system);
2063                 goto out_free;
2064         }
2065
2066         dir->tr = tr;
2067         dir->ref_count = 1;
2068         dir->nr_events = 1;
2069         dir->subsystem = system;
2070         file->system = dir;
2071
2072         entry = tracefs_create_file("filter", 0644, dir->entry, dir,
2073                                     &ftrace_subsystem_filter_fops);
2074         if (!entry) {
2075                 kfree(system->filter);
2076                 system->filter = NULL;
2077                 pr_warn("Could not create tracefs '%s/filter' entry\n", name);
2078         }
2079
2080         trace_create_file("enable", 0644, dir->entry, dir,
2081                           &ftrace_system_enable_fops);
2082
2083         list_add(&dir->list, &tr->systems);
2084
2085         return dir->entry;
2086
2087  out_free:
2088         kfree(dir);
2089  out_fail:
2090         /* Only print this message if failed on memory allocation */
2091         if (!dir || !system)
2092                 pr_warn("No memory to create event subsystem %s\n", name);
2093         return NULL;
2094 }
2095
2096 static int
2097 event_create_dir(struct dentry *parent, struct trace_event_file *file)
2098 {
2099         struct trace_event_call *call = file->event_call;
2100         struct trace_array *tr = file->tr;
2101         struct list_head *head;
2102         struct dentry *d_events;
2103         const char *name;
2104         int ret;
2105
2106         /*
2107          * If the trace point header did not define TRACE_SYSTEM
2108          * then the system would be called "TRACE_SYSTEM".
2109          */
2110         if (strcmp(call->class->system, TRACE_SYSTEM) != 0) {
2111                 d_events = event_subsystem_dir(tr, call->class->system, file, parent);
2112                 if (!d_events)
2113                         return -ENOMEM;
2114         } else
2115                 d_events = parent;
2116
2117         name = trace_event_name(call);
2118         file->dir = tracefs_create_dir(name, d_events);
2119         if (!file->dir) {
2120                 pr_warn("Could not create tracefs '%s' directory\n", name);
2121                 return -1;
2122         }
2123
2124         if (call->class->reg && !(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE))
2125                 trace_create_file("enable", 0644, file->dir, file,
2126                                   &ftrace_enable_fops);
2127
2128 #ifdef CONFIG_PERF_EVENTS
2129         if (call->event.type && call->class->reg)
2130                 trace_create_file("id", 0444, file->dir,
2131                                   (void *)(long)call->event.type,
2132                                   &ftrace_event_id_fops);
2133 #endif
2134
2135         /*
2136          * Other events may have the same class. Only update
2137          * the fields if they are not already defined.
2138          */
2139         head = trace_get_fields(call);
2140         if (list_empty(head)) {
2141                 ret = call->class->define_fields(call);
2142                 if (ret < 0) {
2143                         pr_warn("Could not initialize trace point events/%s\n",
2144                                 name);
2145                         return -1;
2146                 }
2147         }
2148         trace_create_file("filter", 0644, file->dir, file,
2149                           &ftrace_event_filter_fops);
2150
2151         trace_create_file("trigger", 0644, file->dir, file,
2152                           &event_trigger_fops);
2153
2154 #ifdef CONFIG_HIST_TRIGGERS
2155         trace_create_file("hist", 0444, file->dir, file,
2156                           &event_hist_fops);
2157 #endif
2158         trace_create_file("format", 0444, file->dir, call,
2159                           &ftrace_event_format_fops);
2160
2161         return 0;
2162 }
2163
2164 static void remove_event_from_tracers(struct trace_event_call *call)
2165 {
2166         struct trace_event_file *file;
2167         struct trace_array *tr;
2168
2169         do_for_each_event_file_safe(tr, file) {
2170                 if (file->event_call != call)
2171                         continue;
2172
2173                 remove_event_file_dir(file);
2174                 /*
2175                  * The do_for_each_event_file_safe() is
2176                  * a double loop. After finding the call for this
2177                  * trace_array, we use break to jump to the next
2178                  * trace_array.
2179                  */
2180                 break;
2181         } while_for_each_event_file();
2182 }
2183
2184 static void event_remove(struct trace_event_call *call)
2185 {
2186         struct trace_array *tr;
2187         struct trace_event_file *file;
2188
2189         do_for_each_event_file(tr, file) {
2190                 if (file->event_call != call)
2191                         continue;
2192                 ftrace_event_enable_disable(file, 0);
2193                 /*
2194                  * The do_for_each_event_file() is
2195                  * a double loop. After finding the call for this
2196                  * trace_array, we use break to jump to the next
2197                  * trace_array.
2198                  */
2199                 break;
2200         } while_for_each_event_file();
2201
2202         if (call->event.funcs)
2203                 __unregister_trace_event(&call->event);
2204         remove_event_from_tracers(call);
2205         list_del(&call->list);
2206 }
2207
2208 static int event_init(struct trace_event_call *call)
2209 {
2210         int ret = 0;
2211         const char *name;
2212
2213         name = trace_event_name(call);
2214         if (WARN_ON(!name))
2215                 return -EINVAL;
2216
2217         if (call->class->raw_init) {
2218                 ret = call->class->raw_init(call);
2219                 if (ret < 0 && ret != -ENOSYS)
2220                         pr_warn("Could not initialize trace events/%s\n", name);
2221         }
2222
2223         return ret;
2224 }
2225
2226 static int
2227 __register_event(struct trace_event_call *call, struct module *mod)
2228 {
2229         int ret;
2230
2231         ret = event_init(call);
2232         if (ret < 0)
2233                 return ret;
2234
2235         list_add(&call->list, &ftrace_events);
2236         call->mod = mod;
2237
2238         return 0;
2239 }
2240
2241 static char *enum_replace(char *ptr, struct trace_enum_map *map, int len)
2242 {
2243         int rlen;
2244         int elen;
2245
2246         /* Find the length of the enum value as a string */
2247         elen = snprintf(ptr, 0, "%ld", map->enum_value);
2248         /* Make sure there's enough room to replace the string with the value */
2249         if (len < elen)
2250                 return NULL;
2251
2252         snprintf(ptr, elen + 1, "%ld", map->enum_value);
2253
2254         /* Get the rest of the string of ptr */
2255         rlen = strlen(ptr + len);
2256         memmove(ptr + elen, ptr + len, rlen);
2257         /* Make sure we end the new string */
2258         ptr[elen + rlen] = 0;
2259
2260         return ptr + elen;
2261 }
2262
2263 static void update_event_printk(struct trace_event_call *call,
2264                                 struct trace_enum_map *map)
2265 {
2266         char *ptr;
2267         int quote = 0;
2268         int len = strlen(map->enum_string);
2269
2270         for (ptr = call->print_fmt; *ptr; ptr++) {
2271                 if (*ptr == '\\') {
2272                         ptr++;
2273                         /* paranoid */
2274                         if (!*ptr)
2275                                 break;
2276                         continue;
2277                 }
2278                 if (*ptr == '"') {
2279                         quote ^= 1;
2280                         continue;
2281                 }
2282                 if (quote)
2283                         continue;
2284                 if (isdigit(*ptr)) {
2285                         /* skip numbers */
2286                         do {
2287                                 ptr++;
2288                                 /* Check for alpha chars like ULL */
2289                         } while (isalnum(*ptr));
2290                         if (!*ptr)
2291                                 break;
2292                         /*
2293                          * A number must have some kind of delimiter after
2294                          * it, and we can ignore that too.
2295                          */
2296                         continue;
2297                 }
2298                 if (isalpha(*ptr) || *ptr == '_') {
2299                         if (strncmp(map->enum_string, ptr, len) == 0 &&
2300                             !isalnum(ptr[len]) && ptr[len] != '_') {
2301                                 ptr = enum_replace(ptr, map, len);
2302                                 /* Hmm, enum string smaller than value */
2303                                 if (WARN_ON_ONCE(!ptr))
2304                                         return;
2305                                 /*
2306                                  * No need to decrement here, as enum_replace()
2307                                  * returns the pointer to the character passed
2308                                  * the enum, and two enums can not be placed
2309                                  * back to back without something in between.
2310                                  * We can skip that something in between.
2311                                  */
2312                                 continue;
2313                         }
2314                 skip_more:
2315                         do {
2316                                 ptr++;
2317                         } while (isalnum(*ptr) || *ptr == '_');
2318                         if (!*ptr)
2319                                 break;
2320                         /*
2321                          * If what comes after this variable is a '.' or
2322                          * '->' then we can continue to ignore that string.
2323                          */
2324                         if (*ptr == '.' || (ptr[0] == '-' && ptr[1] == '>')) {
2325                                 ptr += *ptr == '.' ? 1 : 2;
2326                                 if (!*ptr)
2327                                         break;
2328                                 goto skip_more;
2329                         }
2330                         /*
2331                          * Once again, we can skip the delimiter that came
2332                          * after the string.
2333                          */
2334                         continue;
2335                 }
2336         }
2337 }
2338
2339 void trace_event_enum_update(struct trace_enum_map **map, int len)
2340 {
2341         struct trace_event_call *call, *p;
2342         const char *last_system = NULL;
2343         int last_i;
2344         int i;
2345
2346         down_write(&trace_event_sem);
2347         list_for_each_entry_safe(call, p, &ftrace_events, list) {
2348                 /* events are usually grouped together with systems */
2349                 if (!last_system || call->class->system != last_system) {
2350                         last_i = 0;
2351                         last_system = call->class->system;
2352                 }
2353
2354                 for (i = last_i; i < len; i++) {
2355                         if (call->class->system == map[i]->system) {
2356                                 /* Save the first system if need be */
2357                                 if (!last_i)
2358                                         last_i = i;
2359                                 update_event_printk(call, map[i]);
2360                         }
2361                 }
2362         }
2363         up_write(&trace_event_sem);
2364 }
2365
2366 static struct trace_event_file *
2367 trace_create_new_event(struct trace_event_call *call,
2368                        struct trace_array *tr)
2369 {
2370         struct trace_event_file *file;
2371
2372         file = kmem_cache_alloc(file_cachep, GFP_TRACE);
2373         if (!file)
2374                 return NULL;
2375
2376         file->event_call = call;
2377         file->tr = tr;
2378         atomic_set(&file->sm_ref, 0);
2379         atomic_set(&file->tm_ref, 0);
2380         INIT_LIST_HEAD(&file->triggers);
2381         list_add(&file->list, &tr->events);
2382
2383         return file;
2384 }
2385
2386 /* Add an event to a trace directory */
2387 static int
2388 __trace_add_new_event(struct trace_event_call *call, struct trace_array *tr)
2389 {
2390         struct trace_event_file *file;
2391
2392         file = trace_create_new_event(call, tr);
2393         if (!file)
2394                 return -ENOMEM;
2395
2396         return event_create_dir(tr->event_dir, file);
2397 }
2398
2399 /*
2400  * Just create a decriptor for early init. A descriptor is required
2401  * for enabling events at boot. We want to enable events before
2402  * the filesystem is initialized.
2403  */
2404 static __init int
2405 __trace_early_add_new_event(struct trace_event_call *call,
2406                             struct trace_array *tr)
2407 {
2408         struct trace_event_file *file;
2409
2410         file = trace_create_new_event(call, tr);
2411         if (!file)
2412                 return -ENOMEM;
2413
2414         return 0;
2415 }
2416
2417 struct ftrace_module_file_ops;
2418 static void __add_event_to_tracers(struct trace_event_call *call);
2419
2420 /* Add an additional event_call dynamically */
2421 int trace_add_event_call(struct trace_event_call *call)
2422 {
2423         int ret;
2424         mutex_lock(&trace_types_lock);
2425         mutex_lock(&event_mutex);
2426
2427         ret = __register_event(call, NULL);
2428         if (ret >= 0)
2429                 __add_event_to_tracers(call);
2430
2431         mutex_unlock(&event_mutex);
2432         mutex_unlock(&trace_types_lock);
2433         return ret;
2434 }
2435
2436 /*
2437  * Must be called under locking of trace_types_lock, event_mutex and
2438  * trace_event_sem.
2439  */
2440 static void __trace_remove_event_call(struct trace_event_call *call)
2441 {
2442         event_remove(call);
2443         trace_destroy_fields(call);
2444         free_event_filter(call->filter);
2445         call->filter = NULL;
2446 }
2447
2448 static int probe_remove_event_call(struct trace_event_call *call)
2449 {
2450         struct trace_array *tr;
2451         struct trace_event_file *file;
2452
2453 #ifdef CONFIG_PERF_EVENTS
2454         if (call->perf_refcount)
2455                 return -EBUSY;
2456 #endif
2457         do_for_each_event_file(tr, file) {
2458                 if (file->event_call != call)
2459                         continue;
2460                 /*
2461                  * We can't rely on ftrace_event_enable_disable(enable => 0)
2462                  * we are going to do, EVENT_FILE_FL_SOFT_MODE can suppress
2463                  * TRACE_REG_UNREGISTER.
2464                  */
2465                 if (file->flags & EVENT_FILE_FL_ENABLED)
2466                         return -EBUSY;
2467                 /*
2468                  * The do_for_each_event_file_safe() is
2469                  * a double loop. After finding the call for this
2470                  * trace_array, we use break to jump to the next
2471                  * trace_array.
2472                  */
2473                 break;
2474         } while_for_each_event_file();
2475
2476         __trace_remove_event_call(call);
2477
2478         return 0;
2479 }
2480
2481 /* Remove an event_call */
2482 int trace_remove_event_call(struct trace_event_call *call)
2483 {
2484         int ret;
2485
2486         mutex_lock(&trace_types_lock);
2487         mutex_lock(&event_mutex);
2488         down_write(&trace_event_sem);
2489         ret = probe_remove_event_call(call);
2490         up_write(&trace_event_sem);
2491         mutex_unlock(&event_mutex);
2492         mutex_unlock(&trace_types_lock);
2493
2494         return ret;
2495 }
2496
2497 #define for_each_event(event, start, end)                       \
2498         for (event = start;                                     \
2499              (unsigned long)event < (unsigned long)end;         \
2500              event++)
2501
2502 #ifdef CONFIG_MODULES
2503
2504 static void trace_module_add_events(struct module *mod)
2505 {
2506         struct trace_event_call **call, **start, **end;
2507
2508         if (!mod->num_trace_events)
2509                 return;
2510
2511         /* Don't add infrastructure for mods without tracepoints */
2512         if (trace_module_has_bad_taint(mod)) {
2513                 pr_err("%s: module has bad taint, not creating trace events\n",
2514                        mod->name);
2515                 return;
2516         }
2517
2518         start = mod->trace_events;
2519         end = mod->trace_events + mod->num_trace_events;
2520
2521         for_each_event(call, start, end) {
2522                 __register_event(*call, mod);
2523                 __add_event_to_tracers(*call);
2524         }
2525 }
2526
2527 static void trace_module_remove_events(struct module *mod)
2528 {
2529         struct trace_event_call *call, *p;
2530         bool clear_trace = false;
2531
2532         down_write(&trace_event_sem);
2533         list_for_each_entry_safe(call, p, &ftrace_events, list) {
2534                 if (call->mod == mod) {
2535                         if (call->flags & TRACE_EVENT_FL_WAS_ENABLED)
2536                                 clear_trace = true;
2537                         __trace_remove_event_call(call);
2538                 }
2539         }
2540         up_write(&trace_event_sem);
2541
2542         /*
2543          * It is safest to reset the ring buffer if the module being unloaded
2544          * registered any events that were used. The only worry is if
2545          * a new module gets loaded, and takes on the same id as the events
2546          * of this module. When printing out the buffer, traced events left
2547          * over from this module may be passed to the new module events and
2548          * unexpected results may occur.
2549          */
2550         if (clear_trace)
2551                 tracing_reset_all_online_cpus();
2552 }
2553
2554 static int trace_module_notify(struct notifier_block *self,
2555                                unsigned long val, void *data)
2556 {
2557         struct module *mod = data;
2558
2559         mutex_lock(&trace_types_lock);
2560         mutex_lock(&event_mutex);
2561         switch (val) {
2562         case MODULE_STATE_COMING:
2563                 trace_module_add_events(mod);
2564                 break;
2565         case MODULE_STATE_GOING:
2566                 trace_module_remove_events(mod);
2567                 break;
2568         }
2569         mutex_unlock(&event_mutex);
2570         mutex_unlock(&trace_types_lock);
2571
2572         return 0;
2573 }
2574
2575 static struct notifier_block trace_module_nb = {
2576         .notifier_call = trace_module_notify,
2577         .priority = 1, /* higher than trace.c module notify */
2578 };
2579 #endif /* CONFIG_MODULES */
2580
2581 /* Create a new event directory structure for a trace directory. */
2582 static void
2583 __trace_add_event_dirs(struct trace_array *tr)
2584 {
2585         struct trace_event_call *call;
2586         int ret;
2587
2588         list_for_each_entry(call, &ftrace_events, list) {
2589                 ret = __trace_add_new_event(call, tr);
2590                 if (ret < 0)
2591                         pr_warn("Could not create directory for event %s\n",
2592                                 trace_event_name(call));
2593         }
2594 }
2595
2596 struct trace_event_file *
2597 find_event_file(struct trace_array *tr, const char *system,  const char *event)
2598 {
2599         struct trace_event_file *file;
2600         struct trace_event_call *call;
2601         const char *name;
2602
2603         list_for_each_entry(file, &tr->events, list) {
2604
2605                 call = file->event_call;
2606                 name = trace_event_name(call);
2607
2608                 if (!name || !call->class || !call->class->reg)
2609                         continue;
2610
2611                 if (call->flags & TRACE_EVENT_FL_IGNORE_ENABLE)
2612                         continue;
2613
2614                 if (strcmp(event, name) == 0 &&
2615                     strcmp(system, call->class->system) == 0)
2616                         return file;
2617         }
2618         return NULL;
2619 }
2620
2621 #ifdef CONFIG_DYNAMIC_FTRACE
2622
2623 /* Avoid typos */
2624 #define ENABLE_EVENT_STR        "enable_event"
2625 #define DISABLE_EVENT_STR       "disable_event"
2626
2627 struct event_probe_data {
2628         struct trace_event_file *file;
2629         unsigned long                   count;
2630         int                             ref;
2631         bool                            enable;
2632 };
2633
2634 static void
2635 event_enable_probe(unsigned long ip, unsigned long parent_ip, void **_data)
2636 {
2637         struct event_probe_data **pdata = (struct event_probe_data **)_data;
2638         struct event_probe_data *data = *pdata;
2639
2640         if (!data)
2641                 return;
2642
2643         if (data->enable)
2644                 clear_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &data->file->flags);
2645         else
2646                 set_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &data->file->flags);
2647 }
2648
2649 static void
2650 event_enable_count_probe(unsigned long ip, unsigned long parent_ip, void **_data)
2651 {
2652         struct event_probe_data **pdata = (struct event_probe_data **)_data;
2653         struct event_probe_data *data = *pdata;
2654
2655         if (!data)
2656                 return;
2657
2658         if (!data->count)
2659                 return;
2660
2661         /* Skip if the event is in a state we want to switch to */
2662         if (data->enable == !(data->file->flags & EVENT_FILE_FL_SOFT_DISABLED))
2663                 return;
2664
2665         if (data->count != -1)
2666                 (data->count)--;
2667
2668         event_enable_probe(ip, parent_ip, _data);
2669 }
2670
2671 static int
2672 event_enable_print(struct seq_file *m, unsigned long ip,
2673                       struct ftrace_probe_ops *ops, void *_data)
2674 {
2675         struct event_probe_data *data = _data;
2676
2677         seq_printf(m, "%ps:", (void *)ip);
2678
2679         seq_printf(m, "%s:%s:%s",
2680                    data->enable ? ENABLE_EVENT_STR : DISABLE_EVENT_STR,
2681                    data->file->event_call->class->system,
2682                    trace_event_name(data->file->event_call));
2683
2684         if (data->count == -1)
2685                 seq_puts(m, ":unlimited\n");
2686         else
2687                 seq_printf(m, ":count=%ld\n", data->count);
2688
2689         return 0;
2690 }
2691
2692 static int
2693 event_enable_init(struct ftrace_probe_ops *ops, unsigned long ip,
2694                   void **_data)
2695 {
2696         struct event_probe_data **pdata = (struct event_probe_data **)_data;
2697         struct event_probe_data *data = *pdata;
2698
2699         data->ref++;
2700         return 0;
2701 }
2702
2703 static void
2704 event_enable_free(struct ftrace_probe_ops *ops, unsigned long ip,
2705                   void **_data)
2706 {
2707         struct event_probe_data **pdata = (struct event_probe_data **)_data;
2708         struct event_probe_data *data = *pdata;
2709
2710         if (WARN_ON_ONCE(data->ref <= 0))
2711                 return;
2712
2713         data->ref--;
2714         if (!data->ref) {
2715                 /* Remove the SOFT_MODE flag */
2716                 __ftrace_event_enable_disable(data->file, 0, 1);
2717                 module_put(data->file->event_call->mod);
2718                 kfree(data);
2719         }
2720         *pdata = NULL;
2721 }
2722
2723 static struct ftrace_probe_ops event_enable_probe_ops = {
2724         .func                   = event_enable_probe,
2725         .print                  = event_enable_print,
2726         .init                   = event_enable_init,
2727         .free                   = event_enable_free,
2728 };
2729
2730 static struct ftrace_probe_ops event_enable_count_probe_ops = {
2731         .func                   = event_enable_count_probe,
2732         .print                  = event_enable_print,
2733         .init                   = event_enable_init,
2734         .free                   = event_enable_free,
2735 };
2736
2737 static struct ftrace_probe_ops event_disable_probe_ops = {
2738         .func                   = event_enable_probe,
2739         .print                  = event_enable_print,
2740         .init                   = event_enable_init,
2741         .free                   = event_enable_free,
2742 };
2743
2744 static struct ftrace_probe_ops event_disable_count_probe_ops = {
2745         .func                   = event_enable_count_probe,
2746         .print                  = event_enable_print,
2747         .init                   = event_enable_init,
2748         .free                   = event_enable_free,
2749 };
2750
2751 static int
2752 event_enable_func(struct ftrace_hash *hash,
2753                   char *glob, char *cmd, char *param, int enabled)
2754 {
2755         struct trace_array *tr = top_trace_array();
2756         struct trace_event_file *file;
2757         struct ftrace_probe_ops *ops;
2758         struct event_probe_data *data;
2759         const char *system;
2760         const char *event;
2761         char *number;
2762         bool enable;
2763         int ret;
2764
2765         if (!tr)
2766                 return -ENODEV;
2767
2768         /* hash funcs only work with set_ftrace_filter */
2769         if (!enabled || !param)
2770                 return -EINVAL;
2771
2772         system = strsep(&param, ":");
2773         if (!param)
2774                 return -EINVAL;
2775
2776         event = strsep(&param, ":");
2777
2778         mutex_lock(&event_mutex);
2779
2780         ret = -EINVAL;
2781         file = find_event_file(tr, system, event);
2782         if (!file)
2783                 goto out;
2784
2785         enable = strcmp(cmd, ENABLE_EVENT_STR) == 0;
2786
2787         if (enable)
2788                 ops = param ? &event_enable_count_probe_ops : &event_enable_probe_ops;
2789         else
2790                 ops = param ? &event_disable_count_probe_ops : &event_disable_probe_ops;
2791
2792         if (glob[0] == '!') {
2793                 unregister_ftrace_function_probe_func(glob+1, ops);
2794                 ret = 0;
2795                 goto out;
2796         }
2797
2798         ret = -ENOMEM;
2799         data = kzalloc(sizeof(*data), GFP_KERNEL);
2800         if (!data)
2801                 goto out;
2802
2803         data->enable = enable;
2804         data->count = -1;
2805         data->file = file;
2806
2807         if (!param)
2808                 goto out_reg;
2809
2810         number = strsep(&param, ":");
2811
2812         ret = -EINVAL;
2813         if (!strlen(number))
2814                 goto out_free;
2815
2816         /*
2817          * We use the callback data field (which is a pointer)
2818          * as our counter.
2819          */
2820         ret = kstrtoul(number, 0, &data->count);
2821         if (ret)
2822                 goto out_free;
2823
2824  out_reg:
2825         /* Don't let event modules unload while probe registered */
2826         ret = try_module_get(file->event_call->mod);
2827         if (!ret) {
2828                 ret = -EBUSY;
2829                 goto out_free;
2830         }
2831
2832         ret = __ftrace_event_enable_disable(file, 1, 1);
2833         if (ret < 0)
2834                 goto out_put;
2835         ret = register_ftrace_function_probe(glob, ops, data);
2836         /*
2837          * The above returns on success the # of functions enabled,
2838          * but if it didn't find any functions it returns zero.
2839          * Consider no functions a failure too.
2840          */
2841         if (!ret) {
2842                 ret = -ENOENT;
2843                 goto out_disable;
2844         } else if (ret < 0)
2845                 goto out_disable;
2846         /* Just return zero, not the number of enabled functions */
2847         ret = 0;
2848  out:
2849         mutex_unlock(&event_mutex);
2850         return ret;
2851
2852  out_disable:
2853         __ftrace_event_enable_disable(file, 0, 1);
2854  out_put:
2855         module_put(file->event_call->mod);
2856  out_free:
2857         kfree(data);
2858         goto out;
2859 }
2860
2861 static struct ftrace_func_command event_enable_cmd = {
2862         .name                   = ENABLE_EVENT_STR,
2863         .func                   = event_enable_func,
2864 };
2865
2866 static struct ftrace_func_command event_disable_cmd = {
2867         .name                   = DISABLE_EVENT_STR,
2868         .func                   = event_enable_func,
2869 };
2870
2871 static __init int register_event_cmds(void)
2872 {
2873         int ret;
2874
2875         ret = register_ftrace_command(&event_enable_cmd);
2876         if (WARN_ON(ret < 0))
2877                 return ret;
2878         ret = register_ftrace_command(&event_disable_cmd);
2879         if (WARN_ON(ret < 0))
2880                 unregister_ftrace_command(&event_enable_cmd);
2881         return ret;
2882 }
2883 #else
2884 static inline int register_event_cmds(void) { return 0; }
2885 #endif /* CONFIG_DYNAMIC_FTRACE */
2886
2887 /*
2888  * The top level array has already had its trace_event_file
2889  * descriptors created in order to allow for early events to
2890  * be recorded. This function is called after the tracefs has been
2891  * initialized, and we now have to create the files associated
2892  * to the events.
2893  */
2894 static __init void
2895 __trace_early_add_event_dirs(struct trace_array *tr)
2896 {
2897         struct trace_event_file *file;
2898         int ret;
2899
2900
2901         list_for_each_entry(file, &tr->events, list) {
2902                 ret = event_create_dir(tr->event_dir, file);
2903                 if (ret < 0)
2904                         pr_warn("Could not create directory for event %s\n",
2905                                 trace_event_name(file->event_call));
2906         }
2907 }
2908
2909 /*
2910  * For early boot up, the top trace array requires to have
2911  * a list of events that can be enabled. This must be done before
2912  * the filesystem is set up in order to allow events to be traced
2913  * early.
2914  */
2915 static __init void
2916 __trace_early_add_events(struct trace_array *tr)
2917 {
2918         struct trace_event_call *call;
2919         int ret;
2920
2921         list_for_each_entry(call, &ftrace_events, list) {
2922                 /* Early boot up should not have any modules loaded */
2923                 if (WARN_ON_ONCE(call->mod))
2924                         continue;
2925
2926                 ret = __trace_early_add_new_event(call, tr);
2927                 if (ret < 0)
2928                         pr_warn("Could not create early event %s\n",
2929                                 trace_event_name(call));
2930         }
2931 }
2932
2933 /* Remove the event directory structure for a trace directory. */
2934 static void
2935 __trace_remove_event_dirs(struct trace_array *tr)
2936 {
2937         struct trace_event_file *file, *next;
2938
2939         list_for_each_entry_safe(file, next, &tr->events, list)
2940                 remove_event_file_dir(file);
2941 }
2942
2943 static void __add_event_to_tracers(struct trace_event_call *call)
2944 {
2945         struct trace_array *tr;
2946
2947         list_for_each_entry(tr, &ftrace_trace_arrays, list)
2948                 __trace_add_new_event(call, tr);
2949 }
2950
2951 extern struct trace_event_call *__start_ftrace_events[];
2952 extern struct trace_event_call *__stop_ftrace_events[];
2953
2954 static char bootup_event_buf[COMMAND_LINE_SIZE] __initdata;
2955
2956 static __init int setup_trace_event(char *str)
2957 {
2958         strlcpy(bootup_event_buf, str, COMMAND_LINE_SIZE);
2959         ring_buffer_expanded = true;
2960         tracing_selftest_disabled = true;
2961
2962         return 1;
2963 }
2964 __setup("trace_event=", setup_trace_event);
2965
2966 /* Expects to have event_mutex held when called */
2967 static int
2968 create_event_toplevel_files(struct dentry *parent, struct trace_array *tr)
2969 {
2970         struct dentry *d_events;
2971         struct dentry *entry;
2972
2973         entry = tracefs_create_file("set_event", 0644, parent,
2974                                     tr, &ftrace_set_event_fops);
2975         if (!entry) {
2976                 pr_warn("Could not create tracefs 'set_event' entry\n");
2977                 return -ENOMEM;
2978         }
2979
2980         d_events = tracefs_create_dir("events", parent);
2981         if (!d_events) {
2982                 pr_warn("Could not create tracefs 'events' directory\n");
2983                 return -ENOMEM;
2984         }
2985
2986         entry = tracefs_create_file("set_event_pid", 0644, parent,
2987                                     tr, &ftrace_set_event_pid_fops);
2988
2989         /* ring buffer internal formats */
2990         trace_create_file("header_page", 0444, d_events,
2991                           ring_buffer_print_page_header,
2992                           &ftrace_show_header_fops);
2993
2994         trace_create_file("header_event", 0444, d_events,
2995                           ring_buffer_print_entry_header,
2996                           &ftrace_show_header_fops);
2997
2998         trace_create_file("enable", 0644, d_events,
2999                           tr, &ftrace_tr_enable_fops);
3000
3001         tr->event_dir = d_events;
3002
3003         return 0;
3004 }
3005
3006 /**
3007  * event_trace_add_tracer - add a instance of a trace_array to events
3008  * @parent: The parent dentry to place the files/directories for events in
3009  * @tr: The trace array associated with these events
3010  *
3011  * When a new instance is created, it needs to set up its events
3012  * directory, as well as other files associated with events. It also
3013  * creates the event hierachry in the @parent/events directory.
3014  *
3015  * Returns 0 on success.
3016  */
3017 int event_trace_add_tracer(struct dentry *parent, struct trace_array *tr)
3018 {
3019         int ret;
3020
3021         mutex_lock(&event_mutex);
3022
3023         ret = create_event_toplevel_files(parent, tr);
3024         if (ret)
3025                 goto out_unlock;
3026
3027         down_write(&trace_event_sem);
3028         __trace_add_event_dirs(tr);
3029         up_write(&trace_event_sem);
3030
3031  out_unlock:
3032         mutex_unlock(&event_mutex);
3033
3034         return ret;
3035 }
3036
3037 /*
3038  * The top trace array already had its file descriptors created.
3039  * Now the files themselves need to be created.
3040  */
3041 static __init int
3042 early_event_add_tracer(struct dentry *parent, struct trace_array *tr)
3043 {
3044         int ret;
3045
3046         mutex_lock(&event_mutex);
3047
3048         ret = create_event_toplevel_files(parent, tr);
3049         if (ret)
3050                 goto out_unlock;
3051
3052         down_write(&trace_event_sem);
3053         __trace_early_add_event_dirs(tr);
3054         up_write(&trace_event_sem);
3055
3056  out_unlock:
3057         mutex_unlock(&event_mutex);
3058
3059         return ret;
3060 }
3061
3062 int event_trace_del_tracer(struct trace_array *tr)
3063 {
3064         mutex_lock(&event_mutex);
3065
3066         /* Disable any event triggers and associated soft-disabled events */
3067         clear_event_triggers(tr);
3068
3069         /* Clear the pid list */
3070         __ftrace_clear_event_pids(tr);
3071
3072         /* Disable any running events */
3073         __ftrace_set_clr_event_nolock(tr, NULL, NULL, NULL, 0);
3074
3075         /* Access to events are within rcu_read_lock_sched() */
3076         synchronize_sched();
3077
3078         down_write(&trace_event_sem);
3079         __trace_remove_event_dirs(tr);
3080         tracefs_remove_recursive(tr->event_dir);
3081         up_write(&trace_event_sem);
3082
3083         tr->event_dir = NULL;
3084
3085         mutex_unlock(&event_mutex);
3086
3087         return 0;
3088 }
3089
3090 static __init int event_trace_memsetup(void)
3091 {
3092         field_cachep = KMEM_CACHE(ftrace_event_field, SLAB_PANIC);
3093         file_cachep = KMEM_CACHE(trace_event_file, SLAB_PANIC);
3094         return 0;
3095 }
3096
3097 static __init void
3098 early_enable_events(struct trace_array *tr, bool disable_first)
3099 {
3100         char *buf = bootup_event_buf;
3101         char *token;
3102         int ret;
3103
3104         while (true) {
3105                 token = strsep(&buf, ",");
3106
3107                 if (!token)
3108                         break;
3109
3110                 if (*token) {
3111                         /* Restarting syscalls requires that we stop them first */
3112                         if (disable_first)
3113                                 ftrace_set_clr_event(tr, token, 0);
3114
3115                         ret = ftrace_set_clr_event(tr, token, 1);
3116                         if (ret)
3117                                 pr_warn("Failed to enable trace event: %s\n", token);
3118                 }
3119
3120                 /* Put back the comma to allow this to be called again */
3121                 if (buf)
3122                         *(buf - 1) = ',';
3123         }
3124 }
3125
3126 static __init int event_trace_enable(void)
3127 {
3128         struct trace_array *tr = top_trace_array();
3129         struct trace_event_call **iter, *call;
3130         int ret;
3131
3132         if (!tr)
3133                 return -ENODEV;
3134
3135         for_each_event(iter, __start_ftrace_events, __stop_ftrace_events) {
3136
3137                 call = *iter;
3138                 ret = event_init(call);
3139                 if (!ret)
3140                         list_add(&call->list, &ftrace_events);
3141         }
3142
3143         /*
3144          * We need the top trace array to have a working set of trace
3145          * points at early init, before the debug files and directories
3146          * are created. Create the file entries now, and attach them
3147          * to the actual file dentries later.
3148          */
3149         __trace_early_add_events(tr);
3150
3151         early_enable_events(tr, false);
3152
3153         trace_printk_start_comm();
3154
3155         register_event_cmds();
3156
3157         register_trigger_cmds();
3158
3159         return 0;
3160 }
3161
3162 /*
3163  * event_trace_enable() is called from trace_event_init() first to
3164  * initialize events and perhaps start any events that are on the
3165  * command line. Unfortunately, there are some events that will not
3166  * start this early, like the system call tracepoints that need
3167  * to set the TIF_SYSCALL_TRACEPOINT flag of pid 1. But event_trace_enable()
3168  * is called before pid 1 starts, and this flag is never set, making
3169  * the syscall tracepoint never get reached, but the event is enabled
3170  * regardless (and not doing anything).
3171  */
3172 static __init int event_trace_enable_again(void)
3173 {
3174         struct trace_array *tr;
3175
3176         tr = top_trace_array();
3177         if (!tr)
3178                 return -ENODEV;
3179
3180         early_enable_events(tr, true);
3181
3182         return 0;
3183 }
3184
3185 early_initcall(event_trace_enable_again);
3186
3187 static __init int event_trace_init(void)
3188 {
3189         struct trace_array *tr;
3190         struct dentry *d_tracer;
3191         struct dentry *entry;
3192         int ret;
3193
3194         tr = top_trace_array();
3195         if (!tr)
3196                 return -ENODEV;
3197
3198         d_tracer = tracing_init_dentry();
3199         if (IS_ERR(d_tracer))
3200                 return 0;
3201
3202         entry = tracefs_create_file("available_events", 0444, d_tracer,
3203                                     tr, &ftrace_avail_fops);
3204         if (!entry)
3205                 pr_warn("Could not create tracefs 'available_events' entry\n");
3206
3207         if (trace_define_generic_fields())
3208                 pr_warn("tracing: Failed to allocated generic fields");
3209
3210         if (trace_define_common_fields())
3211                 pr_warn("tracing: Failed to allocate common fields");
3212
3213         ret = early_event_add_tracer(d_tracer, tr);
3214         if (ret)
3215                 return ret;
3216
3217 #ifdef CONFIG_MODULES
3218         ret = register_module_notifier(&trace_module_nb);
3219         if (ret)
3220                 pr_warn("Failed to register trace events module notifier\n");
3221 #endif
3222         return 0;
3223 }
3224
3225 void __init trace_event_init(void)
3226 {
3227         event_trace_memsetup();
3228         init_ftrace_syscalls();
3229         event_trace_enable();
3230 }
3231
3232 fs_initcall(event_trace_init);
3233
3234 #ifdef CONFIG_FTRACE_STARTUP_TEST
3235
3236 static DEFINE_SPINLOCK(test_spinlock);
3237 static DEFINE_SPINLOCK(test_spinlock_irq);
3238 static DEFINE_MUTEX(test_mutex);
3239
3240 static __init void test_work(struct work_struct *dummy)
3241 {
3242         spin_lock(&test_spinlock);
3243         spin_lock_irq(&test_spinlock_irq);
3244         udelay(1);
3245         spin_unlock_irq(&test_spinlock_irq);
3246         spin_unlock(&test_spinlock);
3247
3248         mutex_lock(&test_mutex);
3249         msleep(1);
3250         mutex_unlock(&test_mutex);
3251 }
3252
3253 static __init int event_test_thread(void *unused)
3254 {
3255         void *test_malloc;
3256
3257         test_malloc = kmalloc(1234, GFP_KERNEL);
3258         if (!test_malloc)
3259                 pr_info("failed to kmalloc\n");
3260
3261         schedule_on_each_cpu(test_work);
3262
3263         kfree(test_malloc);
3264
3265         set_current_state(TASK_INTERRUPTIBLE);
3266         while (!kthread_should_stop()) {
3267                 schedule();
3268                 set_current_state(TASK_INTERRUPTIBLE);
3269         }
3270         __set_current_state(TASK_RUNNING);
3271
3272         return 0;
3273 }
3274
3275 /*
3276  * Do various things that may trigger events.
3277  */
3278 static __init void event_test_stuff(void)
3279 {
3280         struct task_struct *test_thread;
3281
3282         test_thread = kthread_run(event_test_thread, NULL, "test-events");
3283         msleep(1);
3284         kthread_stop(test_thread);
3285 }
3286
3287 /*
3288  * For every trace event defined, we will test each trace point separately,
3289  * and then by groups, and finally all trace points.
3290  */
3291 static __init void event_trace_self_tests(void)
3292 {
3293         struct trace_subsystem_dir *dir;
3294         struct trace_event_file *file;
3295         struct trace_event_call *call;
3296         struct event_subsystem *system;
3297         struct trace_array *tr;
3298         int ret;
3299
3300         tr = top_trace_array();
3301         if (!tr)
3302                 return;
3303
3304         pr_info("Running tests on trace events:\n");
3305
3306         list_for_each_entry(file, &tr->events, list) {
3307
3308                 call = file->event_call;
3309
3310                 /* Only test those that have a probe */
3311                 if (!call->class || !call->class->probe)
3312                         continue;
3313
3314 /*
3315  * Testing syscall events here is pretty useless, but
3316  * we still do it if configured. But this is time consuming.
3317  * What we really need is a user thread to perform the
3318  * syscalls as we test.
3319  */
3320 #ifndef CONFIG_EVENT_TRACE_TEST_SYSCALLS
3321                 if (call->class->system &&
3322                     strcmp(call->class->system, "syscalls") == 0)
3323                         continue;
3324 #endif
3325
3326                 pr_info("Testing event %s: ", trace_event_name(call));
3327
3328                 /*
3329                  * If an event is already enabled, someone is using
3330                  * it and the self test should not be on.
3331                  */
3332                 if (file->flags & EVENT_FILE_FL_ENABLED) {
3333                         pr_warn("Enabled event during self test!\n");
3334                         WARN_ON_ONCE(1);
3335                         continue;
3336                 }
3337
3338                 ftrace_event_enable_disable(file, 1);
3339                 event_test_stuff();
3340                 ftrace_event_enable_disable(file, 0);
3341
3342                 pr_cont("OK\n");
3343         }
3344
3345         /* Now test at the sub system level */
3346
3347         pr_info("Running tests on trace event systems:\n");
3348
3349         list_for_each_entry(dir, &tr->systems, list) {
3350
3351                 system = dir->subsystem;
3352
3353                 /* the ftrace system is special, skip it */
3354                 if (strcmp(system->name, "ftrace") == 0)
3355                         continue;
3356
3357                 pr_info("Testing event system %s: ", system->name);
3358
3359                 ret = __ftrace_set_clr_event(tr, NULL, system->name, NULL, 1);
3360                 if (WARN_ON_ONCE(ret)) {
3361                         pr_warn("error enabling system %s\n",
3362                                 system->name);
3363                         continue;
3364                 }
3365
3366                 event_test_stuff();
3367
3368                 ret = __ftrace_set_clr_event(tr, NULL, system->name, NULL, 0);
3369                 if (WARN_ON_ONCE(ret)) {
3370                         pr_warn("error disabling system %s\n",
3371                                 system->name);
3372                         continue;
3373                 }
3374
3375                 pr_cont("OK\n");
3376         }
3377
3378         /* Test with all events enabled */
3379
3380         pr_info("Running tests on all trace events:\n");
3381         pr_info("Testing all events: ");
3382
3383         ret = __ftrace_set_clr_event(tr, NULL, NULL, NULL, 1);
3384         if (WARN_ON_ONCE(ret)) {
3385                 pr_warn("error enabling all events\n");
3386                 return;
3387         }
3388
3389         event_test_stuff();
3390
3391         /* reset sysname */
3392         ret = __ftrace_set_clr_event(tr, NULL, NULL, NULL, 0);
3393         if (WARN_ON_ONCE(ret)) {
3394                 pr_warn("error disabling all events\n");
3395                 return;
3396         }
3397
3398         pr_cont("OK\n");
3399 }
3400
3401 #ifdef CONFIG_FUNCTION_TRACER
3402
3403 static DEFINE_PER_CPU(atomic_t, ftrace_test_event_disable);
3404
3405 static struct trace_event_file event_trace_file __initdata;
3406
3407 static void __init
3408 function_test_events_call(unsigned long ip, unsigned long parent_ip,
3409                           struct ftrace_ops *op, struct pt_regs *pt_regs)
3410 {
3411         struct ring_buffer_event *event;
3412         struct ring_buffer *buffer;
3413         struct ftrace_entry *entry;
3414         unsigned long flags;
3415         long disabled;
3416         int cpu;
3417         int pc;
3418
3419         pc = preempt_count();
3420         preempt_disable_notrace();
3421         cpu = raw_smp_processor_id();
3422         disabled = atomic_inc_return(&per_cpu(ftrace_test_event_disable, cpu));
3423
3424         if (disabled != 1)
3425                 goto out;
3426
3427         local_save_flags(flags);
3428
3429         event = trace_event_buffer_lock_reserve(&buffer, &event_trace_file,
3430                                                 TRACE_FN, sizeof(*entry),
3431                                                 flags, pc);
3432         if (!event)
3433                 goto out;
3434         entry   = ring_buffer_event_data(event);
3435         entry->ip                       = ip;
3436         entry->parent_ip                = parent_ip;
3437
3438         event_trigger_unlock_commit(&event_trace_file, buffer, event,
3439                                     entry, flags, pc);
3440  out:
3441         atomic_dec(&per_cpu(ftrace_test_event_disable, cpu));
3442         preempt_enable_notrace();
3443 }
3444
3445 static struct ftrace_ops trace_ops __initdata  =
3446 {
3447         .func = function_test_events_call,
3448         .flags = FTRACE_OPS_FL_RECURSION_SAFE,
3449 };
3450
3451 static __init void event_trace_self_test_with_function(void)
3452 {
3453         int ret;
3454
3455         event_trace_file.tr = top_trace_array();
3456         if (WARN_ON(!event_trace_file.tr))
3457                 return;
3458
3459         ret = register_ftrace_function(&trace_ops);
3460         if (WARN_ON(ret < 0)) {
3461                 pr_info("Failed to enable function tracer for event tests\n");
3462                 return;
3463         }
3464         pr_info("Running tests again, along with the function tracer\n");
3465         event_trace_self_tests();
3466         unregister_ftrace_function(&trace_ops);
3467 }
3468 #else
3469 static __init void event_trace_self_test_with_function(void)
3470 {
3471 }
3472 #endif
3473
3474 static __init int event_trace_self_tests_init(void)
3475 {
3476         if (!tracing_selftest_disabled) {
3477                 event_trace_self_tests();
3478                 event_trace_self_test_with_function();
3479         }
3480
3481         return 0;
3482 }
3483
3484 late_initcall(event_trace_self_tests_init);
3485
3486 #endif