perf: Fix data race between pin_count increment/decrement

[android-x86/kernel.git] / kernel / events / core.c
diff --git a/kernel/events/core.c b/kernel/events/core.c

index 8b94eb6..dd740f9 100644 (file)
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -2086,6 +2086,7 @@ __perf_remove_from_context(struct perf_event *event,
  
         if (!ctx->nr_events && ctx->is_active) {
                 ctx->is_active = 0;
+               ctx->rotate_necessary = 0;
                 if (ctx->task) {
                         WARN_ON_ONCE(cpuctx->task_ctx != ctx);
                         cpuctx->task_ctx = NULL;
@@ -2961,6 +2962,13 @@ static void ctx_sched_out(struct perf_event_context *ctx,
         if (is_active & EVENT_FLEXIBLE) {
                 list_for_each_entry_safe(event, tmp, &ctx->flexible_active, active_list)
                         group_sched_out(event, cpuctx, ctx);
+
+               /*
+                * Since we cleared EVENT_FLEXIBLE, also clear
+                * rotate_necessary, is will be reset by
+                * ctx_flexible_sched_in() when needed.
+                */
+               ctx->rotate_necessary = 0;
         }
         perf_pmu_enable(ctx->pmu);
  }
@@ -3319,10 +3327,13 @@ static int flexible_sched_in(struct perf_event *event, void *data)
                 return 0;
  
         if (group_can_go_on(event, sid->cpuctx, sid->can_add_hw)) {
-               if (!group_sched_in(event, sid->cpuctx, sid->ctx))
-                       list_add_tail(&event->active_list, &sid->ctx->flexible_active);
-               else
+               int ret = group_sched_in(event, sid->cpuctx, sid->ctx);
+               if (ret) {
                         sid->can_add_hw = 0;
+                       sid->ctx->rotate_necessary = 1;
+                       return 0;
+               }
+               list_add_tail(&event->active_list, &sid->ctx->flexible_active);
         }
  
         return 0;
@@ -3680,34 +3691,45 @@ static void rotate_ctx(struct perf_event_context *ctx, struct perf_event *event)
         perf_event_groups_insert(&ctx->flexible_groups, event);
  }
  
+/* pick an event from the flexible_groups to rotate */
  static inline struct perf_event *
-ctx_first_active(struct perf_event_context *ctx)
+ctx_event_to_rotate(struct perf_event_context *ctx)
  {
-       return list_first_entry_or_null(&ctx->flexible_active,
-                                       struct perf_event, active_list);
+       struct perf_event *event;
+
+       /* pick the first active flexible event */
+       event = list_first_entry_or_null(&ctx->flexible_active,
+                                        struct perf_event, active_list);
+
+       /* if no active flexible event, pick the first event */
+       if (!event) {
+               event = rb_entry_safe(rb_first(&ctx->flexible_groups.tree),
+                                     typeof(*event), group_node);
+       }
+
+       /*
+        * Unconditionally clear rotate_necessary; if ctx_flexible_sched_in()
+        * finds there are unschedulable events, it will set it again.
+        */
+       ctx->rotate_necessary = 0;
+
+       return event;
  }
  
  static bool perf_rotate_context(struct perf_cpu_context *cpuctx)
  {
         struct perf_event *cpu_event = NULL, *task_event = NULL;
-       bool cpu_rotate = false, task_rotate = false;
-       struct perf_event_context *ctx = NULL;
+       struct perf_event_context *task_ctx = NULL;
+       int cpu_rotate, task_rotate;
  
         /*
          * Since we run this from IRQ context, nobody can install new
          * events, thus the event count values are stable.
          */
  
-       if (cpuctx->ctx.nr_events) {
-               if (cpuctx->ctx.nr_events != cpuctx->ctx.nr_active)
-                       cpu_rotate = true;
-       }
-
-       ctx = cpuctx->task_ctx;
-       if (ctx && ctx->nr_events) {
-               if (ctx->nr_events != ctx->nr_active)
-                       task_rotate = true;
-       }
+       cpu_rotate = cpuctx->ctx.rotate_necessary;
+       task_ctx = cpuctx->task_ctx;
+       task_rotate = task_ctx ? task_ctx->rotate_necessary : 0;
  
         if (!(cpu_rotate || task_rotate))
                 return false;
@@ -3716,25 +3738,25 @@ static bool perf_rotate_context(struct perf_cpu_context *cpuctx)
         perf_pmu_disable(cpuctx->ctx.pmu);
  
         if (task_rotate)
-               task_event = ctx_first_active(ctx);
+               task_event = ctx_event_to_rotate(task_ctx);
         if (cpu_rotate)
-               cpu_event = ctx_first_active(&cpuctx->ctx);
+               cpu_event = ctx_event_to_rotate(&cpuctx->ctx);
  
         /*
          * As per the order given at ctx_resched() first 'pop' task flexible
          * and then, if needed CPU flexible.
          */
-       if (task_event || (ctx && cpu_event))
-               ctx_sched_out(ctx, cpuctx, EVENT_FLEXIBLE);
+       if (task_event || (task_ctx && cpu_event))
+               ctx_sched_out(task_ctx, cpuctx, EVENT_FLEXIBLE);
         if (cpu_event)
                 cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE);
  
         if (task_event)
-               rotate_ctx(ctx, task_event);
+               rotate_ctx(task_ctx, task_event);
         if (cpu_event)
                 rotate_ctx(&cpuctx->ctx, cpu_event);
  
-       perf_event_sched_in(cpuctx, ctx, current);
+       perf_event_sched_in(cpuctx, task_ctx, current);
  
         perf_pmu_enable(cpuctx->ctx.pmu);
         perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
@@ -4133,7 +4155,9 @@ find_get_context(struct pmu *pmu, struct task_struct *task,
                 cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
                 ctx = &cpuctx->ctx;
                 get_ctx(ctx);
+               raw_spin_lock_irqsave(&ctx->lock, flags);
                 ++ctx->pin_count;
+               raw_spin_unlock_irqrestore(&ctx->lock, flags);
  
                 return ctx;
         }
@@ -5475,11 +5499,11 @@ static void perf_pmu_output_stop(struct perf_event *event);
  static void perf_mmap_close(struct vm_area_struct *vma)
  {
         struct perf_event *event = vma->vm_file->private_data;
-
         struct ring_buffer *rb = ring_buffer_get(event);
         struct user_struct *mmap_user = rb->mmap_user;
         int mmap_locked = rb->mmap_locked;
         unsigned long size = perf_data_size(rb);
+       bool detach_rest = false;
  
         if (event->pmu->event_unmapped)
                 event->pmu->event_unmapped(event, vma->vm_mm);
@@ -5510,7 +5534,8 @@ static void perf_mmap_close(struct vm_area_struct *vma)
                 mutex_unlock(&event->mmap_mutex);
         }
  
-       atomic_dec(&rb->mmap_count);
+       if (atomic_dec_and_test(&rb->mmap_count))
+               detach_rest = true;
  
         if (!atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex))
                 goto out_put;
@@ -5519,7 +5544,7 @@ static void perf_mmap_close(struct vm_area_struct *vma)
         mutex_unlock(&event->mmap_mutex);
  
         /* If there's still other mmap()s of this buffer, we're done. */
-       if (atomic_read(&rb->mmap_count))
+       if (!detach_rest)
                 goto out_put;
  
         /*