OSDN Git Service

sched/vtime: Prevent unstable evaluation of WARN(vtime->state)
authorChris Wilson <chris@chris-wilson.co.uk>
Thu, 23 Jan 2020 18:08:49 +0000 (19:08 +0100)
committerIngo Molnar <mingo@kernel.org>
Fri, 6 Mar 2020 11:57:16 +0000 (12:57 +0100)
As the vtime is sampled under loose seqcount protection by kcpustat, the
vtime fields may change as the code flows. Where logic dictates a field
has a static value, use a READ_ONCE.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Fixes: 74722bb223d0 ("sched/vtime: Bring up complete kcpustat accessor")
Link: https://lkml.kernel.org/r/20200123180849.28486-1-frederic@kernel.org
kernel/sched/cputime.c

index cff3e65..dac9104 100644 (file)
@@ -909,8 +909,10 @@ void task_cputime(struct task_struct *t, u64 *utime, u64 *stime)
        } while (read_seqcount_retry(&vtime->seqcount, seq));
 }
 
-static int vtime_state_check(struct vtime *vtime, int cpu)
+static int vtime_state_fetch(struct vtime *vtime, int cpu)
 {
+       int state = READ_ONCE(vtime->state);
+
        /*
         * We raced against a context switch, fetch the
         * kcpustat task again.
@@ -927,10 +929,10 @@ static int vtime_state_check(struct vtime *vtime, int cpu)
         *
         * Case 1) is ok but 2) is not. So wait for a safe VTIME state.
         */
-       if (vtime->state == VTIME_INACTIVE)
+       if (state == VTIME_INACTIVE)
                return -EAGAIN;
 
-       return 0;
+       return state;
 }
 
 static u64 kcpustat_user_vtime(struct vtime *vtime)
@@ -949,14 +951,15 @@ static int kcpustat_field_vtime(u64 *cpustat,
 {
        struct vtime *vtime = &tsk->vtime;
        unsigned int seq;
-       int err;
 
        do {
+               int state;
+
                seq = read_seqcount_begin(&vtime->seqcount);
 
-               err = vtime_state_check(vtime, cpu);
-               if (err < 0)
-                       return err;
+               state = vtime_state_fetch(vtime, cpu);
+               if (state < 0)
+                       return state;
 
                *val = cpustat[usage];
 
@@ -969,7 +972,7 @@ static int kcpustat_field_vtime(u64 *cpustat,
                 */
                switch (usage) {
                case CPUTIME_SYSTEM:
-                       if (vtime->state == VTIME_SYS)
+                       if (state == VTIME_SYS)
                                *val += vtime->stime + vtime_delta(vtime);
                        break;
                case CPUTIME_USER:
@@ -981,11 +984,11 @@ static int kcpustat_field_vtime(u64 *cpustat,
                                *val += kcpustat_user_vtime(vtime);
                        break;
                case CPUTIME_GUEST:
-                       if (vtime->state == VTIME_GUEST && task_nice(tsk) <= 0)
+                       if (state == VTIME_GUEST && task_nice(tsk) <= 0)
                                *val += vtime->gtime + vtime_delta(vtime);
                        break;
                case CPUTIME_GUEST_NICE:
-                       if (vtime->state == VTIME_GUEST && task_nice(tsk) > 0)
+                       if (state == VTIME_GUEST && task_nice(tsk) > 0)
                                *val += vtime->gtime + vtime_delta(vtime);
                        break;
                default:
@@ -1036,23 +1039,23 @@ static int kcpustat_cpu_fetch_vtime(struct kernel_cpustat *dst,
 {
        struct vtime *vtime = &tsk->vtime;
        unsigned int seq;
-       int err;
 
        do {
                u64 *cpustat;
                u64 delta;
+               int state;
 
                seq = read_seqcount_begin(&vtime->seqcount);
 
-               err = vtime_state_check(vtime, cpu);
-               if (err < 0)
-                       return err;
+               state = vtime_state_fetch(vtime, cpu);
+               if (state < 0)
+                       return state;
 
                *dst = *src;
                cpustat = dst->cpustat;
 
                /* Task is sleeping, dead or idle, nothing to add */
-               if (vtime->state < VTIME_SYS)
+               if (state < VTIME_SYS)
                        continue;
 
                delta = vtime_delta(vtime);
@@ -1061,15 +1064,15 @@ static int kcpustat_cpu_fetch_vtime(struct kernel_cpustat *dst,
                 * Task runs either in user (including guest) or kernel space,
                 * add pending nohz time to the right place.
                 */
-               if (vtime->state == VTIME_SYS) {
+               if (state == VTIME_SYS) {
                        cpustat[CPUTIME_SYSTEM] += vtime->stime + delta;
-               } else if (vtime->state == VTIME_USER) {
+               } else if (state == VTIME_USER) {
                        if (task_nice(tsk) > 0)
                                cpustat[CPUTIME_NICE] += vtime->utime + delta;
                        else
                                cpustat[CPUTIME_USER] += vtime->utime + delta;
                } else {
-                       WARN_ON_ONCE(vtime->state != VTIME_GUEST);
+                       WARN_ON_ONCE(state != VTIME_GUEST);
                        if (task_nice(tsk) > 0) {
                                cpustat[CPUTIME_GUEST_NICE] += vtime->gtime + delta;
                                cpustat[CPUTIME_NICE] += vtime->gtime + delta;
@@ -1080,7 +1083,7 @@ static int kcpustat_cpu_fetch_vtime(struct kernel_cpustat *dst,
                }
        } while (read_seqcount_retry(&vtime->seqcount, seq));
 
-       return err;
+       return 0;
 }
 
 void kcpustat_cpu_fetch(struct kernel_cpustat *dst, int cpu)