sched/vtime: Prevent unstable evaluation of WARN(vtime->state)

As the vtime is sampled under loose seqcount protection by kcpustat, the
vtime fields may change as the code flows. Where logic dictates a field
has a static value, use a READ_ONCE.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Fixes: 74722bb223 ("sched/vtime: Bring up complete kcpustat accessor")
Link: https://lkml.kernel.org/r/20200123180849.28486-1-frederic@kernel.org
This commit is contained in:
Chris Wilson 2020-01-23 19:08:49 +01:00 committed by Ingo Molnar
parent 1b10d388d0
commit f1dfdab694
1 changed files with 22 additions and 19 deletions

View File

@ -909,8 +909,10 @@ void task_cputime(struct task_struct *t, u64 *utime, u64 *stime)
} while (read_seqcount_retry(&vtime->seqcount, seq)); } while (read_seqcount_retry(&vtime->seqcount, seq));
} }
static int vtime_state_check(struct vtime *vtime, int cpu) static int vtime_state_fetch(struct vtime *vtime, int cpu)
{ {
int state = READ_ONCE(vtime->state);
/* /*
* We raced against a context switch, fetch the * We raced against a context switch, fetch the
* kcpustat task again. * kcpustat task again.
@ -927,10 +929,10 @@ static int vtime_state_check(struct vtime *vtime, int cpu)
* *
* Case 1) is ok but 2) is not. So wait for a safe VTIME state. * Case 1) is ok but 2) is not. So wait for a safe VTIME state.
*/ */
if (vtime->state == VTIME_INACTIVE) if (state == VTIME_INACTIVE)
return -EAGAIN; return -EAGAIN;
return 0; return state;
} }
static u64 kcpustat_user_vtime(struct vtime *vtime) static u64 kcpustat_user_vtime(struct vtime *vtime)
@ -949,14 +951,15 @@ static int kcpustat_field_vtime(u64 *cpustat,
{ {
struct vtime *vtime = &tsk->vtime; struct vtime *vtime = &tsk->vtime;
unsigned int seq; unsigned int seq;
int err;
do { do {
int state;
seq = read_seqcount_begin(&vtime->seqcount); seq = read_seqcount_begin(&vtime->seqcount);
err = vtime_state_check(vtime, cpu); state = vtime_state_fetch(vtime, cpu);
if (err < 0) if (state < 0)
return err; return state;
*val = cpustat[usage]; *val = cpustat[usage];
@ -969,7 +972,7 @@ static int kcpustat_field_vtime(u64 *cpustat,
*/ */
switch (usage) { switch (usage) {
case CPUTIME_SYSTEM: case CPUTIME_SYSTEM:
if (vtime->state == VTIME_SYS) if (state == VTIME_SYS)
*val += vtime->stime + vtime_delta(vtime); *val += vtime->stime + vtime_delta(vtime);
break; break;
case CPUTIME_USER: case CPUTIME_USER:
@ -981,11 +984,11 @@ static int kcpustat_field_vtime(u64 *cpustat,
*val += kcpustat_user_vtime(vtime); *val += kcpustat_user_vtime(vtime);
break; break;
case CPUTIME_GUEST: case CPUTIME_GUEST:
if (vtime->state == VTIME_GUEST && task_nice(tsk) <= 0) if (state == VTIME_GUEST && task_nice(tsk) <= 0)
*val += vtime->gtime + vtime_delta(vtime); *val += vtime->gtime + vtime_delta(vtime);
break; break;
case CPUTIME_GUEST_NICE: case CPUTIME_GUEST_NICE:
if (vtime->state == VTIME_GUEST && task_nice(tsk) > 0) if (state == VTIME_GUEST && task_nice(tsk) > 0)
*val += vtime->gtime + vtime_delta(vtime); *val += vtime->gtime + vtime_delta(vtime);
break; break;
default: default:
@ -1036,23 +1039,23 @@ static int kcpustat_cpu_fetch_vtime(struct kernel_cpustat *dst,
{ {
struct vtime *vtime = &tsk->vtime; struct vtime *vtime = &tsk->vtime;
unsigned int seq; unsigned int seq;
int err;
do { do {
u64 *cpustat; u64 *cpustat;
u64 delta; u64 delta;
int state;
seq = read_seqcount_begin(&vtime->seqcount); seq = read_seqcount_begin(&vtime->seqcount);
err = vtime_state_check(vtime, cpu); state = vtime_state_fetch(vtime, cpu);
if (err < 0) if (state < 0)
return err; return state;
*dst = *src; *dst = *src;
cpustat = dst->cpustat; cpustat = dst->cpustat;
/* Task is sleeping, dead or idle, nothing to add */ /* Task is sleeping, dead or idle, nothing to add */
if (vtime->state < VTIME_SYS) if (state < VTIME_SYS)
continue; continue;
delta = vtime_delta(vtime); delta = vtime_delta(vtime);
@ -1061,15 +1064,15 @@ static int kcpustat_cpu_fetch_vtime(struct kernel_cpustat *dst,
* Task runs either in user (including guest) or kernel space, * Task runs either in user (including guest) or kernel space,
* add pending nohz time to the right place. * add pending nohz time to the right place.
*/ */
if (vtime->state == VTIME_SYS) { if (state == VTIME_SYS) {
cpustat[CPUTIME_SYSTEM] += vtime->stime + delta; cpustat[CPUTIME_SYSTEM] += vtime->stime + delta;
} else if (vtime->state == VTIME_USER) { } else if (state == VTIME_USER) {
if (task_nice(tsk) > 0) if (task_nice(tsk) > 0)
cpustat[CPUTIME_NICE] += vtime->utime + delta; cpustat[CPUTIME_NICE] += vtime->utime + delta;
else else
cpustat[CPUTIME_USER] += vtime->utime + delta; cpustat[CPUTIME_USER] += vtime->utime + delta;
} else { } else {
WARN_ON_ONCE(vtime->state != VTIME_GUEST); WARN_ON_ONCE(state != VTIME_GUEST);
if (task_nice(tsk) > 0) { if (task_nice(tsk) > 0) {
cpustat[CPUTIME_GUEST_NICE] += vtime->gtime + delta; cpustat[CPUTIME_GUEST_NICE] += vtime->gtime + delta;
cpustat[CPUTIME_NICE] += vtime->gtime + delta; cpustat[CPUTIME_NICE] += vtime->gtime + delta;
@ -1080,7 +1083,7 @@ static int kcpustat_cpu_fetch_vtime(struct kernel_cpustat *dst,
} }
} while (read_seqcount_retry(&vtime->seqcount, seq)); } while (read_seqcount_retry(&vtime->seqcount, seq));
return err; return 0;
} }
void kcpustat_cpu_fetch(struct kernel_cpustat *dst, int cpu) void kcpustat_cpu_fetch(struct kernel_cpustat *dst, int cpu)