rcu/nocb: Fix NOCB kthreads spawn failure with rcu_nocb_rdp_deoffload() direct call

If the rcuog/o[p] kthreads spawn failed, the offloaded rdp needs to
be explicitly deoffloaded, otherwise the target rdp is still considered
offloaded even though nothing actually handles the callbacks.

Signed-off-by: Zqiang <qiang1.zhang@intel.com>
Cc: Neeraj Upadhyay <quic_neeraju@quicinc.com>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Uladzislau Rezki <uladzislau.rezki@sony.com>
Cc: Joel Fernandes <joel@joelfernandes.org>
Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Reviewed-by: Neeraj Upadhyay <quic_neeraju@quicinc.com>
This commit is contained in:
Zqiang 2022-04-19 14:23:20 +02:00 committed by Paul E. McKenney
parent 24a57affd2
commit 3a5761dc02

View file

@ -986,10 +986,7 @@ static int rdp_offload_toggle(struct rcu_data *rdp,
}
raw_spin_unlock_irqrestore(&rdp_gp->nocb_gp_lock, flags);
if (wake_gp)
wake_up_process(rdp_gp->nocb_gp_kthread);
return 0;
return wake_gp;
}
static long rcu_nocb_rdp_deoffload(void *arg)
@ -997,9 +994,15 @@ static long rcu_nocb_rdp_deoffload(void *arg)
struct rcu_data *rdp = arg;
struct rcu_segcblist *cblist = &rdp->cblist;
unsigned long flags;
int ret;
int wake_gp;
struct rcu_data *rdp_gp = rdp->nocb_gp_rdp;
WARN_ON_ONCE(rdp->cpu != raw_smp_processor_id());
/*
* rcu_nocb_rdp_deoffload() may be called directly if
* rcuog/o[p] spawn failed, because at this time the rdp->cpu
* is not online yet.
*/
WARN_ON_ONCE((rdp->cpu != raw_smp_processor_id()) && cpu_online(rdp->cpu));
pr_info("De-offloading %d\n", rdp->cpu);
@ -1023,10 +1026,41 @@ static long rcu_nocb_rdp_deoffload(void *arg)
*/
rcu_segcblist_set_flags(cblist, SEGCBLIST_RCU_CORE);
invoke_rcu_core();
ret = rdp_offload_toggle(rdp, false, flags);
swait_event_exclusive(rdp->nocb_state_wq,
!rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_CB |
SEGCBLIST_KTHREAD_GP));
wake_gp = rdp_offload_toggle(rdp, false, flags);
mutex_lock(&rdp_gp->nocb_gp_kthread_mutex);
if (rdp_gp->nocb_gp_kthread) {
if (wake_gp)
wake_up_process(rdp_gp->nocb_gp_kthread);
/*
* If rcuo[p] kthread spawn failed, directly remove SEGCBLIST_KTHREAD_CB.
* Just wait SEGCBLIST_KTHREAD_GP to be cleared by rcuog.
*/
if (!rdp->nocb_cb_kthread) {
rcu_nocb_lock_irqsave(rdp, flags);
rcu_segcblist_clear_flags(&rdp->cblist, SEGCBLIST_KTHREAD_CB);
rcu_nocb_unlock_irqrestore(rdp, flags);
}
swait_event_exclusive(rdp->nocb_state_wq,
!rcu_segcblist_test_flags(cblist,
SEGCBLIST_KTHREAD_CB | SEGCBLIST_KTHREAD_GP));
} else {
/*
* No kthread to clear the flags for us or remove the rdp from the nocb list
* to iterate. Do it here instead. Locking doesn't look stricly necessary
* but we stick to paranoia in this rare path.
*/
rcu_nocb_lock_irqsave(rdp, flags);
rcu_segcblist_clear_flags(&rdp->cblist,
SEGCBLIST_KTHREAD_CB | SEGCBLIST_KTHREAD_GP);
rcu_nocb_unlock_irqrestore(rdp, flags);
list_del(&rdp->nocb_entry_rdp);
}
mutex_unlock(&rdp_gp->nocb_gp_kthread_mutex);
/*
* Lock one last time to acquire latest callback updates from kthreads
* so we can later handle callbacks locally without locking.
@ -1047,7 +1081,7 @@ static long rcu_nocb_rdp_deoffload(void *arg)
WARN_ON_ONCE(rcu_cblist_n_cbs(&rdp->nocb_bypass));
return ret;
return 0;
}
int rcu_nocb_cpu_deoffload(int cpu)
@ -1079,7 +1113,8 @@ static long rcu_nocb_rdp_offload(void *arg)
struct rcu_data *rdp = arg;
struct rcu_segcblist *cblist = &rdp->cblist;
unsigned long flags;
int ret;
int wake_gp;
struct rcu_data *rdp_gp = rdp->nocb_gp_rdp;
WARN_ON_ONCE(rdp->cpu != raw_smp_processor_id());
/*
@ -1089,6 +1124,9 @@ static long rcu_nocb_rdp_offload(void *arg)
if (!rdp->nocb_gp_rdp)
return -EINVAL;
if (WARN_ON_ONCE(!rdp_gp->nocb_gp_kthread))
return -EINVAL;
pr_info("Offloading %d\n", rdp->cpu);
/*
@ -1113,7 +1151,9 @@ static long rcu_nocb_rdp_offload(void *arg)
* WRITE flags READ callbacks
* rcu_nocb_unlock() rcu_nocb_unlock()
*/
ret = rdp_offload_toggle(rdp, true, flags);
wake_gp = rdp_offload_toggle(rdp, true, flags);
if (wake_gp)
wake_up_process(rdp_gp->nocb_gp_kthread);
swait_event_exclusive(rdp->nocb_state_wq,
rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_CB) &&
rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_GP));
@ -1126,7 +1166,7 @@ static long rcu_nocb_rdp_offload(void *arg)
rcu_segcblist_clear_flags(cblist, SEGCBLIST_RCU_CORE);
rcu_nocb_unlock_irqrestore(rdp, flags);
return ret;
return 0;
}
int rcu_nocb_cpu_offload(int cpu)
@ -1248,7 +1288,7 @@ static void rcu_spawn_cpu_nocb_kthread(int cpu)
"rcuog/%d", rdp_gp->cpu);
if (WARN_ONCE(IS_ERR(t), "%s: Could not start rcuo GP kthread, OOM is now expected behavior\n", __func__)) {
mutex_unlock(&rdp_gp->nocb_gp_kthread_mutex);
return;
goto end;
}
WRITE_ONCE(rdp_gp->nocb_gp_kthread, t);
if (kthread_prio)
@ -1260,12 +1300,20 @@ static void rcu_spawn_cpu_nocb_kthread(int cpu)
t = kthread_run(rcu_nocb_cb_kthread, rdp,
"rcuo%c/%d", rcu_state.abbr, cpu);
if (WARN_ONCE(IS_ERR(t), "%s: Could not start rcuo CB kthread, OOM is now expected behavior\n", __func__))
return;
goto end;
if (kthread_prio)
sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
WRITE_ONCE(rdp->nocb_cb_kthread, t);
WRITE_ONCE(rdp->nocb_gp_kthread, rdp_gp->nocb_gp_kthread);
return;
end:
mutex_lock(&rcu_state.barrier_mutex);
if (rcu_rdp_is_offloaded(rdp)) {
rcu_nocb_rdp_deoffload(rdp);
cpumask_clear_cpu(cpu, rcu_nocb_mask);
}
mutex_unlock(&rcu_state.barrier_mutex);
}
/* How many CB CPU IDs per GP kthread? Default of -1 for sqrt(nr_cpu_ids). */