OSDN Git Service

perf: Fix data race between pin_count increment/decrement
[android-x86/kernel.git] / kernel / events / core.c
index 8b94eb6..dd740f9 100644 (file)
@@ -2086,6 +2086,7 @@ __perf_remove_from_context(struct perf_event *event,
 
        if (!ctx->nr_events && ctx->is_active) {
                ctx->is_active = 0;
+               ctx->rotate_necessary = 0;
                if (ctx->task) {
                        WARN_ON_ONCE(cpuctx->task_ctx != ctx);
                        cpuctx->task_ctx = NULL;
@@ -2961,6 +2962,13 @@ static void ctx_sched_out(struct perf_event_context *ctx,
        if (is_active & EVENT_FLEXIBLE) {
                list_for_each_entry_safe(event, tmp, &ctx->flexible_active, active_list)
                        group_sched_out(event, cpuctx, ctx);
+
+               /*
+                * Since we cleared EVENT_FLEXIBLE, also clear
+                * rotate_necessary, is will be reset by
+                * ctx_flexible_sched_in() when needed.
+                */
+               ctx->rotate_necessary = 0;
        }
        perf_pmu_enable(ctx->pmu);
 }
@@ -3319,10 +3327,13 @@ static int flexible_sched_in(struct perf_event *event, void *data)
                return 0;
 
        if (group_can_go_on(event, sid->cpuctx, sid->can_add_hw)) {
-               if (!group_sched_in(event, sid->cpuctx, sid->ctx))
-                       list_add_tail(&event->active_list, &sid->ctx->flexible_active);
-               else
+               int ret = group_sched_in(event, sid->cpuctx, sid->ctx);
+               if (ret) {
                        sid->can_add_hw = 0;
+                       sid->ctx->rotate_necessary = 1;
+                       return 0;
+               }
+               list_add_tail(&event->active_list, &sid->ctx->flexible_active);
        }
 
        return 0;
@@ -3680,34 +3691,45 @@ static void rotate_ctx(struct perf_event_context *ctx, struct perf_event *event)
        perf_event_groups_insert(&ctx->flexible_groups, event);
 }
 
+/* pick an event from the flexible_groups to rotate */
 static inline struct perf_event *
-ctx_first_active(struct perf_event_context *ctx)
+ctx_event_to_rotate(struct perf_event_context *ctx)
 {
-       return list_first_entry_or_null(&ctx->flexible_active,
-                                       struct perf_event, active_list);
+       struct perf_event *event;
+
+       /* pick the first active flexible event */
+       event = list_first_entry_or_null(&ctx->flexible_active,
+                                        struct perf_event, active_list);
+
+       /* if no active flexible event, pick the first event */
+       if (!event) {
+               event = rb_entry_safe(rb_first(&ctx->flexible_groups.tree),
+                                     typeof(*event), group_node);
+       }
+
+       /*
+        * Unconditionally clear rotate_necessary; if ctx_flexible_sched_in()
+        * finds there are unschedulable events, it will set it again.
+        */
+       ctx->rotate_necessary = 0;
+
+       return event;
 }
 
 static bool perf_rotate_context(struct perf_cpu_context *cpuctx)
 {
        struct perf_event *cpu_event = NULL, *task_event = NULL;
-       bool cpu_rotate = false, task_rotate = false;
-       struct perf_event_context *ctx = NULL;
+       struct perf_event_context *task_ctx = NULL;
+       int cpu_rotate, task_rotate;
 
        /*
         * Since we run this from IRQ context, nobody can install new
         * events, thus the event count values are stable.
         */
 
-       if (cpuctx->ctx.nr_events) {
-               if (cpuctx->ctx.nr_events != cpuctx->ctx.nr_active)
-                       cpu_rotate = true;
-       }
-
-       ctx = cpuctx->task_ctx;
-       if (ctx && ctx->nr_events) {
-               if (ctx->nr_events != ctx->nr_active)
-                       task_rotate = true;
-       }
+       cpu_rotate = cpuctx->ctx.rotate_necessary;
+       task_ctx = cpuctx->task_ctx;
+       task_rotate = task_ctx ? task_ctx->rotate_necessary : 0;
 
        if (!(cpu_rotate || task_rotate))
                return false;
@@ -3716,25 +3738,25 @@ static bool perf_rotate_context(struct perf_cpu_context *cpuctx)
        perf_pmu_disable(cpuctx->ctx.pmu);
 
        if (task_rotate)
-               task_event = ctx_first_active(ctx);
+               task_event = ctx_event_to_rotate(task_ctx);
        if (cpu_rotate)
-               cpu_event = ctx_first_active(&cpuctx->ctx);
+               cpu_event = ctx_event_to_rotate(&cpuctx->ctx);
 
        /*
         * As per the order given at ctx_resched() first 'pop' task flexible
         * and then, if needed CPU flexible.
         */
-       if (task_event || (ctx && cpu_event))
-               ctx_sched_out(ctx, cpuctx, EVENT_FLEXIBLE);
+       if (task_event || (task_ctx && cpu_event))
+               ctx_sched_out(task_ctx, cpuctx, EVENT_FLEXIBLE);
        if (cpu_event)
                cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE);
 
        if (task_event)
-               rotate_ctx(ctx, task_event);
+               rotate_ctx(task_ctx, task_event);
        if (cpu_event)
                rotate_ctx(&cpuctx->ctx, cpu_event);
 
-       perf_event_sched_in(cpuctx, ctx, current);
+       perf_event_sched_in(cpuctx, task_ctx, current);
 
        perf_pmu_enable(cpuctx->ctx.pmu);
        perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
@@ -4133,7 +4155,9 @@ find_get_context(struct pmu *pmu, struct task_struct *task,
                cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
                ctx = &cpuctx->ctx;
                get_ctx(ctx);
+               raw_spin_lock_irqsave(&ctx->lock, flags);
                ++ctx->pin_count;
+               raw_spin_unlock_irqrestore(&ctx->lock, flags);
 
                return ctx;
        }
@@ -5475,11 +5499,11 @@ static void perf_pmu_output_stop(struct perf_event *event);
 static void perf_mmap_close(struct vm_area_struct *vma)
 {
        struct perf_event *event = vma->vm_file->private_data;
-
        struct ring_buffer *rb = ring_buffer_get(event);
        struct user_struct *mmap_user = rb->mmap_user;
        int mmap_locked = rb->mmap_locked;
        unsigned long size = perf_data_size(rb);
+       bool detach_rest = false;
 
        if (event->pmu->event_unmapped)
                event->pmu->event_unmapped(event, vma->vm_mm);
@@ -5510,7 +5534,8 @@ static void perf_mmap_close(struct vm_area_struct *vma)
                mutex_unlock(&event->mmap_mutex);
        }
 
-       atomic_dec(&rb->mmap_count);
+       if (atomic_dec_and_test(&rb->mmap_count))
+               detach_rest = true;
 
        if (!atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex))
                goto out_put;
@@ -5519,7 +5544,7 @@ static void perf_mmap_close(struct vm_area_struct *vma)
        mutex_unlock(&event->mmap_mutex);
 
        /* If there's still other mmap()s of this buffer, we're done. */
-       if (atomic_read(&rb->mmap_count))
+       if (!detach_rest)
                goto out_put;
 
        /*