Merge branch 'exp.2022.05.11a' into HEAD

exp.2022.05.11a: Expedited-grace-period latency-reduction updates.
This commit is contained in:
Paul E. McKenney 2022-05-11 11:49:35 -07:00
commit ce13389053
10 changed files with 268 additions and 36 deletions

View File

@ -162,6 +162,26 @@ CONFIG_RCU_CPU_STALL_TIMEOUT
Stall-warning messages may be enabled and disabled completely via
/sys/module/rcupdate/parameters/rcu_cpu_stall_suppress.
CONFIG_RCU_EXP_CPU_STALL_TIMEOUT
--------------------------------
Same as the CONFIG_RCU_CPU_STALL_TIMEOUT parameter but only for
the expedited grace period. This parameter defines the period
of time that RCU will wait from the beginning of an expedited
grace period until it issues an RCU CPU stall warning. This time
period is normally 20 milliseconds on Android devices. A zero
value causes the CONFIG_RCU_CPU_STALL_TIMEOUT value to be used,
after conversion to milliseconds.
This configuration parameter may be changed at runtime via the
/sys/module/rcupdate/parameters/rcu_exp_cpu_stall_timeout, however
this parameter is checked only at the beginning of a cycle. If you
are in a current stall cycle, setting it to a new value will change
the timeout for the -next- stall.
Stall-warning messages may be enabled and disabled completely via
/sys/module/rcupdate/parameters/rcu_cpu_stall_suppress.
RCU_STALL_DELAY_DELTA
---------------------

View File

@ -4893,6 +4893,18 @@
rcupdate.rcu_cpu_stall_timeout= [KNL]
Set timeout for RCU CPU stall warning messages.
The value is in seconds and the maximum allowed
value is 300 seconds.
rcupdate.rcu_exp_cpu_stall_timeout= [KNL]
Set timeout for expedited RCU CPU stall warning
messages. The value is in milliseconds
and the maximum allowed value is 21000
milliseconds. Please note that this value is
adjusted to an arch timer tick resolution.
Setting this to zero causes the value from
rcupdate.rcu_cpu_stall_timeout to be used (after
conversion from seconds to milliseconds).
rcupdate.rcu_expedited= [KNL]
Use expedited grace-period primitives, for

View File

@ -220,6 +220,20 @@ config RCU_BOOST_DELAY
Accept the default if unsure.
config RCU_EXP_KTHREAD
bool "Perform RCU expedited work in a real-time kthread"
depends on RCU_BOOST && RCU_EXPERT
default !PREEMPT_RT && NR_CPUS <= 32
help
Use this option to further reduce the latencies of expedited
grace periods at the expense of being more disruptive.
This option is disabled by default on PREEMPT_RT=y kernels which
disable expedited grace periods after boot by unconditionally
setting rcupdate.rcu_normal_after_boot=1.
Accept the default if unsure.
config RCU_NOCB_CPU
bool "Offload RCU callback processing from boot-selected CPUs"
depends on TREE_RCU

View File

@ -82,6 +82,20 @@ config RCU_CPU_STALL_TIMEOUT
RCU grace period persists, additional CPU stall warnings are
printed at more widely spaced intervals.
config RCU_EXP_CPU_STALL_TIMEOUT
int "Expedited RCU CPU stall timeout in milliseconds"
depends on RCU_STALL_COMMON
range 0 21000
default 20 if ANDROID
default 0 if !ANDROID
help
If a given expedited RCU grace period extends more than the
specified number of milliseconds, a CPU stall warning is printed.
If the RCU grace period persists, additional CPU stall warnings
are printed at more widely spaced intervals. A value of zero
says to use the RCU_CPU_STALL_TIMEOUT value converted from
seconds to milliseconds.
config RCU_TRACE
bool "Enable tracing for RCU"
depends on DEBUG_KERNEL

View File

@ -210,7 +210,9 @@ static inline bool rcu_stall_is_suppressed_at_boot(void)
extern int rcu_cpu_stall_ftrace_dump;
extern int rcu_cpu_stall_suppress;
extern int rcu_cpu_stall_timeout;
extern int rcu_exp_cpu_stall_timeout;
int rcu_jiffies_till_stall_check(void);
int rcu_exp_jiffies_till_stall_check(void);
static inline bool rcu_stall_is_suppressed(void)
{
@ -536,7 +538,12 @@ int rcu_get_gp_kthreads_prio(void);
void rcu_fwd_progress_check(unsigned long j);
void rcu_force_quiescent_state(void);
extern struct workqueue_struct *rcu_gp_wq;
#ifdef CONFIG_RCU_EXP_KTHREAD
extern struct kthread_worker *rcu_exp_gp_kworker;
extern struct kthread_worker *rcu_exp_par_gp_kworker;
#else /* !CONFIG_RCU_EXP_KTHREAD */
extern struct workqueue_struct *rcu_par_gp_wq;
#endif /* CONFIG_RCU_EXP_KTHREAD */
void rcu_gp_slow_register(atomic_t *rgssp);
void rcu_gp_slow_unregister(atomic_t *rgssp);
#endif /* #else #ifdef CONFIG_TINY_RCU */

View File

@ -4526,6 +4526,51 @@ static int rcu_pm_notify(struct notifier_block *self,
return NOTIFY_OK;
}
#ifdef CONFIG_RCU_EXP_KTHREAD
struct kthread_worker *rcu_exp_gp_kworker;
struct kthread_worker *rcu_exp_par_gp_kworker;
static void __init rcu_start_exp_gp_kworkers(void)
{
const char *par_gp_kworker_name = "rcu_exp_par_gp_kthread_worker";
const char *gp_kworker_name = "rcu_exp_gp_kthread_worker";
struct sched_param param = { .sched_priority = kthread_prio };
rcu_exp_gp_kworker = kthread_create_worker(0, gp_kworker_name);
if (IS_ERR_OR_NULL(rcu_exp_gp_kworker)) {
pr_err("Failed to create %s!\n", gp_kworker_name);
return;
}
rcu_exp_par_gp_kworker = kthread_create_worker(0, par_gp_kworker_name);
if (IS_ERR_OR_NULL(rcu_exp_par_gp_kworker)) {
pr_err("Failed to create %s!\n", par_gp_kworker_name);
kthread_destroy_worker(rcu_exp_gp_kworker);
return;
}
sched_setscheduler_nocheck(rcu_exp_gp_kworker->task, SCHED_FIFO, &param);
sched_setscheduler_nocheck(rcu_exp_par_gp_kworker->task, SCHED_FIFO,
&param);
}
static inline void rcu_alloc_par_gp_wq(void)
{
}
#else /* !CONFIG_RCU_EXP_KTHREAD */
struct workqueue_struct *rcu_par_gp_wq;
static void __init rcu_start_exp_gp_kworkers(void)
{
}
static inline void rcu_alloc_par_gp_wq(void)
{
rcu_par_gp_wq = alloc_workqueue("rcu_par_gp", WQ_MEM_RECLAIM, 0);
WARN_ON(!rcu_par_gp_wq);
}
#endif /* CONFIG_RCU_EXP_KTHREAD */
/*
* Spawn the kthreads that handle RCU's grace periods.
*/
@ -4562,6 +4607,8 @@ static int __init rcu_spawn_gp_kthread(void)
rcu_spawn_cpu_nocb_kthread(smp_processor_id());
rcu_spawn_one_boost_kthread(rdp->mynode);
rcu_spawn_core_kthreads();
/* Create kthread worker for expedited GPs */
rcu_start_exp_gp_kworkers();
return 0;
}
early_initcall(rcu_spawn_gp_kthread);
@ -4807,7 +4854,6 @@ static void __init rcu_dump_rcu_node_tree(void)
}
struct workqueue_struct *rcu_gp_wq;
struct workqueue_struct *rcu_par_gp_wq;
static void __init kfree_rcu_batch_init(void)
{
@ -4872,8 +4918,7 @@ void __init rcu_init(void)
/* Create workqueue for Tree SRCU and for expedited GPs. */
rcu_gp_wq = alloc_workqueue("rcu_gp", WQ_MEM_RECLAIM, 0);
WARN_ON(!rcu_gp_wq);
rcu_par_gp_wq = alloc_workqueue("rcu_par_gp", WQ_MEM_RECLAIM, 0);
WARN_ON(!rcu_par_gp_wq);
rcu_alloc_par_gp_wq();
/* Fill in default value for rcutree.qovld boot parameter. */
/* -After- the rcu_node ->lock fields are initialized! */

View File

@ -10,6 +10,7 @@
*/
#include <linux/cache.h>
#include <linux/kthread.h>
#include <linux/spinlock.h>
#include <linux/rtmutex.h>
#include <linux/threads.h>
@ -23,7 +24,11 @@
/* Communicate arguments to a workqueue handler. */
struct rcu_exp_work {
unsigned long rew_s;
#ifdef CONFIG_RCU_EXP_KTHREAD
struct kthread_work rew_work;
#else
struct work_struct rew_work;
#endif /* CONFIG_RCU_EXP_KTHREAD */
};
/* RCU's kthread states for tracing. */

View File

@ -334,15 +334,13 @@ fastpath:
* Select the CPUs within the specified rcu_node that the upcoming
* expedited grace period needs to wait for.
*/
static void sync_rcu_exp_select_node_cpus(struct work_struct *wp)
static void __sync_rcu_exp_select_node_cpus(struct rcu_exp_work *rewp)
{
int cpu;
unsigned long flags;
unsigned long mask_ofl_test;
unsigned long mask_ofl_ipi;
int ret;
struct rcu_exp_work *rewp =
container_of(wp, struct rcu_exp_work, rew_work);
struct rcu_node *rnp = container_of(rewp, struct rcu_node, rew);
raw_spin_lock_irqsave_rcu_node(rnp, flags);
@ -417,13 +415,119 @@ retry_ipi:
rcu_report_exp_cpu_mult(rnp, mask_ofl_test, false);
}
static void rcu_exp_sel_wait_wake(unsigned long s);
#ifdef CONFIG_RCU_EXP_KTHREAD
static void sync_rcu_exp_select_node_cpus(struct kthread_work *wp)
{
struct rcu_exp_work *rewp =
container_of(wp, struct rcu_exp_work, rew_work);
__sync_rcu_exp_select_node_cpus(rewp);
}
static inline bool rcu_gp_par_worker_started(void)
{
return !!READ_ONCE(rcu_exp_par_gp_kworker);
}
static inline void sync_rcu_exp_select_cpus_queue_work(struct rcu_node *rnp)
{
kthread_init_work(&rnp->rew.rew_work, sync_rcu_exp_select_node_cpus);
/*
* Use rcu_exp_par_gp_kworker, because flushing a work item from
* another work item on the same kthread worker can result in
* deadlock.
*/
kthread_queue_work(rcu_exp_par_gp_kworker, &rnp->rew.rew_work);
}
static inline void sync_rcu_exp_select_cpus_flush_work(struct rcu_node *rnp)
{
kthread_flush_work(&rnp->rew.rew_work);
}
/*
* Work-queue handler to drive an expedited grace period forward.
*/
static void wait_rcu_exp_gp(struct kthread_work *wp)
{
struct rcu_exp_work *rewp;
rewp = container_of(wp, struct rcu_exp_work, rew_work);
rcu_exp_sel_wait_wake(rewp->rew_s);
}
static inline void synchronize_rcu_expedited_queue_work(struct rcu_exp_work *rew)
{
kthread_init_work(&rew->rew_work, wait_rcu_exp_gp);
kthread_queue_work(rcu_exp_gp_kworker, &rew->rew_work);
}
static inline void synchronize_rcu_expedited_destroy_work(struct rcu_exp_work *rew)
{
}
#else /* !CONFIG_RCU_EXP_KTHREAD */
static void sync_rcu_exp_select_node_cpus(struct work_struct *wp)
{
struct rcu_exp_work *rewp =
container_of(wp, struct rcu_exp_work, rew_work);
__sync_rcu_exp_select_node_cpus(rewp);
}
static inline bool rcu_gp_par_worker_started(void)
{
return !!READ_ONCE(rcu_par_gp_wq);
}
static inline void sync_rcu_exp_select_cpus_queue_work(struct rcu_node *rnp)
{
int cpu = find_next_bit(&rnp->ffmask, BITS_PER_LONG, -1);
INIT_WORK(&rnp->rew.rew_work, sync_rcu_exp_select_node_cpus);
/* If all offline, queue the work on an unbound CPU. */
if (unlikely(cpu > rnp->grphi - rnp->grplo))
cpu = WORK_CPU_UNBOUND;
else
cpu += rnp->grplo;
queue_work_on(cpu, rcu_par_gp_wq, &rnp->rew.rew_work);
}
static inline void sync_rcu_exp_select_cpus_flush_work(struct rcu_node *rnp)
{
flush_work(&rnp->rew.rew_work);
}
/*
* Work-queue handler to drive an expedited grace period forward.
*/
static void wait_rcu_exp_gp(struct work_struct *wp)
{
struct rcu_exp_work *rewp;
rewp = container_of(wp, struct rcu_exp_work, rew_work);
rcu_exp_sel_wait_wake(rewp->rew_s);
}
static inline void synchronize_rcu_expedited_queue_work(struct rcu_exp_work *rew)
{
INIT_WORK_ONSTACK(&rew->rew_work, wait_rcu_exp_gp);
queue_work(rcu_gp_wq, &rew->rew_work);
}
static inline void synchronize_rcu_expedited_destroy_work(struct rcu_exp_work *rew)
{
destroy_work_on_stack(&rew->rew_work);
}
#endif /* CONFIG_RCU_EXP_KTHREAD */
/*
* Select the nodes that the upcoming expedited grace period needs
* to wait for.
*/
static void sync_rcu_exp_select_cpus(void)
{
int cpu;
struct rcu_node *rnp;
trace_rcu_exp_grace_period(rcu_state.name, rcu_exp_gp_seq_endval(), TPS("reset"));
@ -435,28 +539,21 @@ static void sync_rcu_exp_select_cpus(void)
rnp->exp_need_flush = false;
if (!READ_ONCE(rnp->expmask))
continue; /* Avoid early boot non-existent wq. */
if (!READ_ONCE(rcu_par_gp_wq) ||
if (!rcu_gp_par_worker_started() ||
rcu_scheduler_active != RCU_SCHEDULER_RUNNING ||
rcu_is_last_leaf_node(rnp)) {
/* No workqueues yet or last leaf, do direct call. */
/* No worker started yet or last leaf, do direct call. */
sync_rcu_exp_select_node_cpus(&rnp->rew.rew_work);
continue;
}
INIT_WORK(&rnp->rew.rew_work, sync_rcu_exp_select_node_cpus);
cpu = find_next_bit(&rnp->ffmask, BITS_PER_LONG, -1);
/* If all offline, queue the work on an unbound CPU. */
if (unlikely(cpu > rnp->grphi - rnp->grplo))
cpu = WORK_CPU_UNBOUND;
else
cpu += rnp->grplo;
queue_work_on(cpu, rcu_par_gp_wq, &rnp->rew.rew_work);
sync_rcu_exp_select_cpus_queue_work(rnp);
rnp->exp_need_flush = true;
}
/* Wait for workqueue jobs (if any) to complete. */
/* Wait for jobs (if any) to complete. */
rcu_for_each_leaf_node(rnp)
if (rnp->exp_need_flush)
flush_work(&rnp->rew.rew_work);
sync_rcu_exp_select_cpus_flush_work(rnp);
}
/*
@ -496,7 +593,7 @@ static void synchronize_rcu_expedited_wait(void)
struct rcu_node *rnp_root = rcu_get_root();
trace_rcu_exp_grace_period(rcu_state.name, rcu_exp_gp_seq_endval(), TPS("startwait"));
jiffies_stall = rcu_jiffies_till_stall_check();
jiffies_stall = rcu_exp_jiffies_till_stall_check();
jiffies_start = jiffies;
if (tick_nohz_full_enabled() && rcu_inkernel_boot_has_ended()) {
if (synchronize_rcu_expedited_wait_once(1))
@ -571,7 +668,7 @@ static void synchronize_rcu_expedited_wait(void)
dump_cpu_task(cpu);
}
}
jiffies_stall = 3 * rcu_jiffies_till_stall_check() + 3;
jiffies_stall = 3 * rcu_exp_jiffies_till_stall_check() + 3;
}
}
@ -622,17 +719,6 @@ static void rcu_exp_sel_wait_wake(unsigned long s)
rcu_exp_wait_wake(s);
}
/*
* Work-queue handler to drive an expedited grace period forward.
*/
static void wait_rcu_exp_gp(struct work_struct *wp)
{
struct rcu_exp_work *rewp;
rewp = container_of(wp, struct rcu_exp_work, rew_work);
rcu_exp_sel_wait_wake(rewp->rew_s);
}
#ifdef CONFIG_PREEMPT_RCU
/*
@ -848,20 +934,19 @@ void synchronize_rcu_expedited(void)
} else {
/* Marshall arguments & schedule the expedited grace period. */
rew.rew_s = s;
INIT_WORK_ONSTACK(&rew.rew_work, wait_rcu_exp_gp);
queue_work(rcu_gp_wq, &rew.rew_work);
synchronize_rcu_expedited_queue_work(&rew);
}
/* Wait for expedited grace period to complete. */
rnp = rcu_get_root();
wait_event(rnp->exp_wq[rcu_seq_ctr(s) & 0x3],
sync_exp_work_done(s));
smp_mb(); /* Workqueue actions happen before return. */
smp_mb(); /* Work actions happen before return. */
/* Let the next expedited grace period start. */
mutex_unlock(&rcu_state.exp_mutex);
if (likely(!boottime))
destroy_work_on_stack(&rew.rew_work);
synchronize_rcu_expedited_destroy_work(&rew);
}
EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);

View File

@ -25,6 +25,34 @@ int sysctl_max_rcu_stall_to_panic __read_mostly;
#define RCU_STALL_MIGHT_DIV 8
#define RCU_STALL_MIGHT_MIN (2 * HZ)
int rcu_exp_jiffies_till_stall_check(void)
{
int cpu_stall_timeout = READ_ONCE(rcu_exp_cpu_stall_timeout);
int exp_stall_delay_delta = 0;
int till_stall_check;
// Zero says to use rcu_cpu_stall_timeout, but in milliseconds.
if (!cpu_stall_timeout)
cpu_stall_timeout = jiffies_to_msecs(rcu_jiffies_till_stall_check());
// Limit check must be consistent with the Kconfig limits for
// CONFIG_RCU_EXP_CPU_STALL_TIMEOUT, so check the allowed range.
// The minimum clamped value is "2UL", because at least one full
// tick has to be guaranteed.
till_stall_check = clamp(msecs_to_jiffies(cpu_stall_timeout), 2UL, 21UL * HZ);
if (cpu_stall_timeout && jiffies_to_msecs(till_stall_check) != cpu_stall_timeout)
WRITE_ONCE(rcu_exp_cpu_stall_timeout, jiffies_to_msecs(till_stall_check));
#ifdef CONFIG_PROVE_RCU
/* Add extra ~25% out of till_stall_check. */
exp_stall_delay_delta = ((till_stall_check * 25) / 100) + 1;
#endif
return till_stall_check + exp_stall_delay_delta;
}
EXPORT_SYMBOL_GPL(rcu_exp_jiffies_till_stall_check);
/* Limit-check stall timeouts specified at boottime and runtime. */
int rcu_jiffies_till_stall_check(void)
{

View File

@ -506,6 +506,8 @@ EXPORT_SYMBOL_GPL(rcu_cpu_stall_suppress);
module_param(rcu_cpu_stall_suppress, int, 0644);
int rcu_cpu_stall_timeout __read_mostly = CONFIG_RCU_CPU_STALL_TIMEOUT;
module_param(rcu_cpu_stall_timeout, int, 0644);
int rcu_exp_cpu_stall_timeout __read_mostly = CONFIG_RCU_EXP_CPU_STALL_TIMEOUT;
module_param(rcu_exp_cpu_stall_timeout, int, 0644);
#endif /* #ifdef CONFIG_RCU_STALL_COMMON */
// Suppress boot-time RCU CPU stall warnings and rcutorture writer stall