Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs

[uclinux-h8/linux.git] / kernel / events / core.c
diff --git a/kernel/events/core.c b/kernel/events/core.c

index 5c964e8..e965cfa 100644 (file)
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -36,7 +36,7 @@
  #include <linux/kernel_stat.h>
  #include <linux/cgroup.h>
  #include <linux/perf_event.h>
-#include <linux/ftrace_event.h>
+#include <linux/trace_events.h>
  #include <linux/hw_breakpoint.h>
  #include <linux/mm_types.h>
  #include <linux/module.h>
@@ -51,9 +51,11 @@
  
  static struct workqueue_struct *perf_wq;
  
+typedef int (*remote_function_f)(void *);
+
  struct remote_function_call {
         struct task_struct      *p;
-       int                     (*func)(void *info);
+       remote_function_f       func;
         void                    *info;
         int                     ret;
  };
@@ -86,7 +88,7 @@ static void remote_function(void *data)
   *         -EAGAIN - when the process moved away
   */
  static int
-task_function_call(struct task_struct *p, int (*func) (void *info), void *info)
+task_function_call(struct task_struct *p, remote_function_f func, void *info)
  {
         struct remote_function_call data = {
                 .p      = p,
@@ -110,7 +112,7 @@ task_function_call(struct task_struct *p, int (*func) (void *info), void *info)
   *
   * returns: @func return value or -ENXIO when the cpu is offline
   */
-static int cpu_function_call(int cpu, int (*func) (void *info), void *info)
+static int cpu_function_call(int cpu, remote_function_f func, void *info)
  {
         struct remote_function_call data = {
                 .p      = NULL,
@@ -747,62 +749,31 @@ perf_cgroup_mark_enabled(struct perf_event *event,
  /*
   * function must be called with interrupts disbled
   */
-static enum hrtimer_restart perf_cpu_hrtimer_handler(struct hrtimer *hr)
+static enum hrtimer_restart perf_mux_hrtimer_handler(struct hrtimer *hr)
  {
         struct perf_cpu_context *cpuctx;
-       enum hrtimer_restart ret = HRTIMER_NORESTART;
         int rotations = 0;
  
         WARN_ON(!irqs_disabled());
  
         cpuctx = container_of(hr, struct perf_cpu_context, hrtimer);
-
         rotations = perf_rotate_context(cpuctx);
  
-       /*
-        * arm timer if needed
-        */
-       if (rotations) {
+       raw_spin_lock(&cpuctx->hrtimer_lock);
+       if (rotations)
                 hrtimer_forward_now(hr, cpuctx->hrtimer_interval);
-               ret = HRTIMER_RESTART;
-       }
-
-       return ret;
-}
-
-/* CPU is going down */
-void perf_cpu_hrtimer_cancel(int cpu)
-{
-       struct perf_cpu_context *cpuctx;
-       struct pmu *pmu;
-       unsigned long flags;
-
-       if (WARN_ON(cpu != smp_processor_id()))
-               return;
-
-       local_irq_save(flags);
-
-       rcu_read_lock();
-
-       list_for_each_entry_rcu(pmu, &pmus, entry) {
-               cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
-
-               if (pmu->task_ctx_nr == perf_sw_context)
-                       continue;
-
-               hrtimer_cancel(&cpuctx->hrtimer);
-       }
-
-       rcu_read_unlock();
+       else
+               cpuctx->hrtimer_active = 0;
+       raw_spin_unlock(&cpuctx->hrtimer_lock);
  
-       local_irq_restore(flags);
+       return rotations ? HRTIMER_RESTART : HRTIMER_NORESTART;
  }
  
-static void __perf_cpu_hrtimer_init(struct perf_cpu_context *cpuctx, int cpu)
+static void __perf_mux_hrtimer_init(struct perf_cpu_context *cpuctx, int cpu)
  {
-       struct hrtimer *hr = &cpuctx->hrtimer;
+       struct hrtimer *timer = &cpuctx->hrtimer;
         struct pmu *pmu = cpuctx->ctx.pmu;
-       int timer;
+       u64 interval;
  
         /* no multiplexing needed for SW PMU */
         if (pmu->task_ctx_nr == perf_sw_context)
@@ -812,31 +783,36 @@ static void __perf_cpu_hrtimer_init(struct perf_cpu_context *cpuctx, int cpu)
          * check default is sane, if not set then force to
          * default interval (1/tick)
          */
-       timer = pmu->hrtimer_interval_ms;
-       if (timer < 1)
-               timer = pmu->hrtimer_interval_ms = PERF_CPU_HRTIMER;
+       interval = pmu->hrtimer_interval_ms;
+       if (interval < 1)
+               interval = pmu->hrtimer_interval_ms = PERF_CPU_HRTIMER;
  
-       cpuctx->hrtimer_interval = ns_to_ktime(NSEC_PER_MSEC * timer);
+       cpuctx->hrtimer_interval = ns_to_ktime(NSEC_PER_MSEC * interval);
  
-       hrtimer_init(hr, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED);
-       hr->function = perf_cpu_hrtimer_handler;
+       raw_spin_lock_init(&cpuctx->hrtimer_lock);
+       hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
+       timer->function = perf_mux_hrtimer_handler;
  }
  
-static void perf_cpu_hrtimer_restart(struct perf_cpu_context *cpuctx)
+static int perf_mux_hrtimer_restart(struct perf_cpu_context *cpuctx)
  {
-       struct hrtimer *hr = &cpuctx->hrtimer;
+       struct hrtimer *timer = &cpuctx->hrtimer;
         struct pmu *pmu = cpuctx->ctx.pmu;
+       unsigned long flags;
  
         /* not for SW PMU */
         if (pmu->task_ctx_nr == perf_sw_context)
-               return;
+               return 0;
  
-       if (hrtimer_active(hr))
-               return;
+       raw_spin_lock_irqsave(&cpuctx->hrtimer_lock, flags);
+       if (!cpuctx->hrtimer_active) {
+               cpuctx->hrtimer_active = 1;
+               hrtimer_forward_now(timer, cpuctx->hrtimer_interval);
+               hrtimer_start_expires(timer, HRTIMER_MODE_ABS_PINNED);
+       }
+       raw_spin_unlock_irqrestore(&cpuctx->hrtimer_lock, flags);
  
-       if (!hrtimer_callback_running(hr))
-               __hrtimer_start_range_ns(hr, cpuctx->hrtimer_interval,
-                                        0, HRTIMER_MODE_REL_PINNED, 0);
+       return 0;
  }
  
  void perf_pmu_disable(struct pmu *pmu)
@@ -913,10 +889,30 @@ static void put_ctx(struct perf_event_context *ctx)
   * Those places that change perf_event::ctx will hold both
   * perf_event_ctx::mutex of the 'old' and 'new' ctx value.
   *
- * Lock ordering is by mutex address. There is one other site where
- * perf_event_context::mutex nests and that is put_event(). But remember that
- * that is a parent<->child context relation, and migration does not affect
- * children, therefore these two orderings should not interact.
+ * Lock ordering is by mutex address. There are two other sites where
+ * perf_event_context::mutex nests and those are:
+ *
+ *  - perf_event_exit_task_context()   [ child , 0 ]
+ *      __perf_event_exit_task()
+ *        sync_child_event()
+ *          put_event()                        [ parent, 1 ]
+ *
+ *  - perf_event_init_context()                [ parent, 0 ]
+ *      inherit_task_group()
+ *        inherit_group()
+ *          inherit_event()
+ *            perf_event_alloc()
+ *              perf_init_event()
+ *                perf_try_init_event()        [ child , 1 ]
+ *
+ * While it appears there is an obvious deadlock here -- the parent and child
+ * nesting levels are inverted between the two. This is in fact safe because
+ * life-time rules separate them. That is an exiting task cannot fork, and a
+ * spawning task cannot (yet) exit.
+ *
+ * But remember that that these are parent<->child context relations, and
+ * migration does not affect children, therefore these two orderings should not
+ * interact.
   *
   * The change in perf_event::ctx does not affect children (as claimed above)
   * because the sys_perf_event_open() case will install a new event and break
@@ -1506,11 +1502,17 @@ static int __init perf_workqueue_init(void)
  
  core_initcall(perf_workqueue_init);
  
+static inline int pmu_filter_match(struct perf_event *event)
+{
+       struct pmu *pmu = event->pmu;
+       return pmu->filter_match ? pmu->filter_match(event) : 1;
+}
+
  static inline int
  event_filter_match(struct perf_event *event)
  {
         return (event->cpu == -1 || event->cpu == smp_processor_id())
-           && perf_cgroup_match(event);
+           && perf_cgroup_match(event) && pmu_filter_match(event);
  }
  
  static void
@@ -1915,7 +1917,7 @@ group_sched_in(struct perf_event *group_event,
  
         if (event_sched_in(group_event, cpuctx, ctx)) {
                 pmu->cancel_txn(pmu);
-               perf_cpu_hrtimer_restart(cpuctx);
+               perf_mux_hrtimer_restart(cpuctx);
                 return -EAGAIN;
         }
  
@@ -1962,7 +1964,7 @@ group_error:
  
         pmu->cancel_txn(pmu);
  
-       perf_cpu_hrtimer_restart(cpuctx);
+       perf_mux_hrtimer_restart(cpuctx);
  
         return -EAGAIN;
  }
@@ -2235,7 +2237,7 @@ static int __perf_event_enable(void *info)
                  */
                 if (leader != event) {
                         group_sched_out(leader, cpuctx, ctx);
-                       perf_cpu_hrtimer_restart(cpuctx);
+                       perf_mux_hrtimer_restart(cpuctx);
                 }
                 if (leader->attr.pinned) {
                         update_group_times(leader);
@@ -3422,7 +3424,6 @@ static void free_event_rcu(struct rcu_head *head)
         if (event->ns)
                 put_pid_ns(event->ns);
         perf_event_free_filter(event);
-       perf_event_free_bpf_prog(event);
         kfree(event);
  }
  
@@ -3553,6 +3554,8 @@ static void __free_event(struct perf_event *event)
                         put_callchain_buffers();
         }
  
+       perf_event_free_bpf_prog(event);
+
         if (event->destroy)
                 event->destroy(event);
  
@@ -3657,9 +3660,6 @@ static void perf_remove_from_owner(struct perf_event *event)
         }
  }
  
-/*
- * Called when the last reference to the file is gone.
- */
  static void put_event(struct perf_event *event)
  {
         struct perf_event_context *ctx;
@@ -3697,6 +3697,9 @@ int perf_event_release_kernel(struct perf_event *event)
  }
  EXPORT_SYMBOL_GPL(perf_event_release_kernel);
  
+/*
+ * Called when the last reference to the file is gone.
+ */
  static int perf_release(struct inode *inode, struct file *file)
  {
         put_event(file->private_data);
@@ -4310,20 +4313,20 @@ static void ring_buffer_attach(struct perf_event *event,
                 WARN_ON_ONCE(event->rcu_pending);
  
                 old_rb = event->rb;
-               event->rcu_batches = get_state_synchronize_rcu();
-               event->rcu_pending = 1;
-
                 spin_lock_irqsave(&old_rb->event_lock, flags);
                 list_del_rcu(&event->rb_entry);
                 spin_unlock_irqrestore(&old_rb->event_lock, flags);
-       }
  
-       if (event->rcu_pending && rb) {
-               cond_synchronize_rcu(event->rcu_batches);
-               event->rcu_pending = 0;
+               event->rcu_batches = get_state_synchronize_rcu();
+               event->rcu_pending = 1;
         }
  
         if (rb) {
+               if (event->rcu_pending) {
+                       cond_synchronize_rcu(event->rcu_batches);
+                       event->rcu_pending = 0;
+               }
+
                 spin_lock_irqsave(&rb->event_lock, flags);
                 list_add_rcu(&event->rb_entry, &rb->event_list);
                 spin_unlock_irqrestore(&rb->event_lock, flags);
@@ -5360,9 +5363,9 @@ void perf_prepare_sample(struct perf_event_header *header,
         }
  }
  
-static void perf_event_output(struct perf_event *event,
-                               struct perf_sample_data *data,
-                               struct pt_regs *regs)
+void perf_event_output(struct perf_event *event,
+                       struct perf_sample_data *data,
+                       struct pt_regs *regs)
  {
         struct perf_output_handle handle;
         struct perf_event_header header;
@@ -5954,6 +5957,39 @@ void perf_event_aux_event(struct perf_event *event, unsigned long head,
  }
  
  /*
+ * Lost/dropped samples logging
+ */
+void perf_log_lost_samples(struct perf_event *event, u64 lost)
+{
+       struct perf_output_handle handle;
+       struct perf_sample_data sample;
+       int ret;
+
+       struct {
+               struct perf_event_header        header;
+               u64                             lost;
+       } lost_samples_event = {
+               .header = {
+                       .type = PERF_RECORD_LOST_SAMPLES,
+                       .misc = 0,
+                       .size = sizeof(lost_samples_event),
+               },
+               .lost           = lost,
+       };
+
+       perf_event_header__init_id(&lost_samples_event.header, &sample, event);
+
+       ret = perf_output_begin(&handle, event,
+                               lost_samples_event.header.size);
+       if (ret)
+               return;
+
+       perf_output_put(&handle, lost_samples_event);
+       perf_event__output_id_sample(event, &handle, &sample);
+       perf_output_end(&handle);
+}
+
+/*
   * IRQ throttle logging
   */
  
@@ -6843,9 +6879,8 @@ static void perf_swevent_start_hrtimer(struct perf_event *event)
         } else {
                 period = max_t(u64, 10000, hwc->sample_period);
         }
-       __hrtimer_start_range_ns(&hwc->hrtimer,
-                               ns_to_ktime(period), 0,
-                               HRTIMER_MODE_REL_PINNED, 0);
+       hrtimer_start(&hwc->hrtimer, ns_to_ktime(period),
+                     HRTIMER_MODE_REL_PINNED);
  }
  
  static void perf_swevent_cancel_hrtimer(struct perf_event *event)
@@ -7146,6 +7181,8 @@ perf_event_mux_interval_ms_show(struct device *dev,
         return snprintf(page, PAGE_SIZE-1, "%d\n", pmu->hrtimer_interval_ms);
  }
  
+static DEFINE_MUTEX(mux_interval_mutex);
+
  static ssize_t
  perf_event_mux_interval_ms_store(struct device *dev,
                                  struct device_attribute *attr,
@@ -7165,17 +7202,21 @@ perf_event_mux_interval_ms_store(struct device *dev,
         if (timer == pmu->hrtimer_interval_ms)
                 return count;
  
+       mutex_lock(&mux_interval_mutex);
         pmu->hrtimer_interval_ms = timer;
  
         /* update all cpuctx for this PMU */
-       for_each_possible_cpu(cpu) {
+       get_online_cpus();
+       for_each_online_cpu(cpu) {
                 struct perf_cpu_context *cpuctx;
                 cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
                 cpuctx->hrtimer_interval = ns_to_ktime(NSEC_PER_MSEC * timer);
  
-               if (hrtimer_active(&cpuctx->hrtimer))
-                       hrtimer_forward_now(&cpuctx->hrtimer, cpuctx->hrtimer_interval);
+               cpu_function_call(cpu,
+                       (remote_function_f)perf_mux_hrtimer_restart, cpuctx);
         }
+       put_online_cpus();
+       mutex_unlock(&mux_interval_mutex);
  
         return count;
  }
@@ -7280,7 +7321,7 @@ skip_type:
                 lockdep_set_class(&cpuctx->ctx.lock, &cpuctx_lock);
                 cpuctx->ctx.pmu = pmu;
  
-               __perf_cpu_hrtimer_init(cpuctx, cpu);
+               __perf_mux_hrtimer_init(cpuctx, cpu);
  
                 cpuctx->unique_pmu = pmu;
         }
@@ -7364,7 +7405,12 @@ static int perf_try_init_event(struct pmu *pmu, struct perf_event *event)
                 return -ENODEV;
  
         if (event->group_leader != event) {
-               ctx = perf_event_ctx_lock(event->group_leader);
+               /*
+                * This ctx->mutex can nest when we're called through
+                * inheritance. See the perf_event_ctx_lock_nested() comment.
+                */
+               ctx = perf_event_ctx_lock_nested(event->group_leader,
+                                                SINGLE_DEPTH_NESTING);
                 BUG_ON(!ctx);
         }