rcu: Remove the RCU_FAST_NO_HZ Kconfig option

All of the uses of CONFIG_RCU_FAST_NO_HZ=y that I have seen involve
systems with RCU callbacks offloaded.  In this situation, all that this
Kconfig option does is slow down idle entry/exit with an additional
allways-taken early exit.  If this is the only use case, then this
Kconfig option nothing but an attractive nuisance that needs to go away.

This commit therefore removes the RCU_FAST_NO_HZ Kconfig option.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
This commit is contained in:
Paul E. McKenney 2021-09-27 14:18:51 -07:00
parent 24eab6e1ff
commit e2c73a6860
11 changed files with 7 additions and 269 deletions

View File

@ -254,17 +254,6 @@ period (in this case 2603), the grace-period sequence number (7075), and
an estimate of the total number of RCU callbacks queued across all CPUs
(625 in this case).
In kernels with CONFIG_RCU_FAST_NO_HZ, more information is printed
for each CPU::
0: (64628 ticks this GP) idle=dd5/3fffffffffffffff/0 softirq=82/543 last_accelerate: a345/d342 dyntick_enabled: 1
The "last_accelerate:" prints the low-order 16 bits (in hex) of the
jiffies counter when this CPU last invoked rcu_try_advance_all_cbs()
from rcu_needs_cpu() or last invoked rcu_accelerate_cbs() from
rcu_prepare_for_idle(). "dyntick_enabled: 1" indicates that dyntick-idle
processing is enabled.
If the grace period ends just as the stall warning starts printing,
there will be a spurious stall-warning message, which will include
the following::

View File

@ -4489,10 +4489,6 @@
on rcutree.qhimark at boot time and to zero to
disable more aggressive help enlistment.
rcutree.rcu_idle_gp_delay= [KNL]
Set wakeup interval for idle CPUs that have
RCU callbacks (RCU_FAST_NO_HZ=y).
rcutree.rcu_kick_kthreads= [KNL]
Cause the grace-period kthread to get an extra
wake_up() if it sleeps three times longer than

View File

@ -184,16 +184,12 @@ There are situations in which idle CPUs cannot be permitted to
enter either dyntick-idle mode or adaptive-tick mode, the most
common being when that CPU has RCU callbacks pending.
The CONFIG_RCU_FAST_NO_HZ=y Kconfig option may be used to cause such CPUs
to enter dyntick-idle mode or adaptive-tick mode anyway. In this case,
a timer will awaken these CPUs every four jiffies in order to ensure
that the RCU callbacks are processed in a timely fashion.
Another approach is to offload RCU callback processing to "rcuo" kthreads
Avoid this by offloading RCU callback processing to "rcuo" kthreads
using the CONFIG_RCU_NOCB_CPU=y Kconfig option. The specific CPUs to
offload may be selected using The "rcu_nocbs=" kernel boot parameter,
which takes a comma-separated list of CPUs and CPU ranges, for example,
"1,3-5" selects CPUs 1, 3, 4, and 5.
"1,3-5" selects CPUs 1, 3, 4, and 5. Note that CPUs specified by
the "nohz_full" kernel boot parameter are also offloaded.
The offloaded CPUs will never queue RCU callbacks, and therefore RCU
never prevents offloaded CPUs from entering either dyntick-idle mode

View File

@ -169,24 +169,6 @@ config RCU_FANOUT_LEAF
Take the default if unsure.
config RCU_FAST_NO_HZ
bool "Accelerate last non-dyntick-idle CPU's grace periods"
depends on NO_HZ_COMMON && SMP && RCU_EXPERT
default n
help
This option permits CPUs to enter dynticks-idle state even if
they have RCU callbacks queued, and prevents RCU from waking
these CPUs up more than roughly once every four jiffies (by
default, you can adjust this using the rcutree.rcu_idle_gp_delay
parameter), thus improving energy efficiency. On the other
hand, this option increases the duration of RCU grace periods,
for example, slowing down synchronize_rcu().
Say Y if energy efficiency is critically important, and you
don't care about increased grace-period durations.
Say N if you are unsure.
config RCU_BOOST
bool "Enable RCU priority boosting"
depends on (RT_MUTEXES && PREEMPT_RCU && RCU_EXPERT) || PREEMPT_RT

View File

@ -624,7 +624,6 @@ static noinstr void rcu_eqs_enter(bool user)
instrumentation_begin();
trace_rcu_dyntick(TPS("Start"), rdp->dynticks_nesting, 0, atomic_read(&rdp->dynticks));
WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !user && !is_idle_task(current));
rcu_prepare_for_idle();
rcu_preempt_deferred_qs(current);
// instrumentation for the noinstr rcu_dynticks_eqs_enter()
@ -768,9 +767,6 @@ noinstr void rcu_nmi_exit(void)
trace_rcu_dyntick(TPS("Startirq"), rdp->dynticks_nmi_nesting, 0, atomic_read(&rdp->dynticks));
WRITE_ONCE(rdp->dynticks_nmi_nesting, 0); /* Avoid store tearing. */
if (!in_nmi())
rcu_prepare_for_idle();
// instrumentation for the noinstr rcu_dynticks_eqs_enter()
instrument_atomic_write(&rdp->dynticks, sizeof(rdp->dynticks));
instrumentation_end();
@ -872,7 +868,6 @@ static void noinstr rcu_eqs_exit(bool user)
// instrumentation for the noinstr rcu_dynticks_eqs_exit()
instrument_atomic_write(&rdp->dynticks, sizeof(rdp->dynticks));
rcu_cleanup_after_idle();
trace_rcu_dyntick(TPS("End"), rdp->dynticks_nesting, 1, atomic_read(&rdp->dynticks));
WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !user && !is_idle_task(current));
WRITE_ONCE(rdp->dynticks_nesting, 1);
@ -1014,12 +1009,6 @@ noinstr void rcu_nmi_enter(void)
rcu_dynticks_eqs_exit();
// ... but is watching here.
if (!in_nmi()) {
instrumentation_begin();
rcu_cleanup_after_idle();
instrumentation_end();
}
instrumentation_begin();
// instrumentation for the noinstr rcu_dynticks_curr_cpu_in_eqs()
instrument_atomic_read(&rdp->dynticks, sizeof(rdp->dynticks));

View File

@ -189,11 +189,6 @@ struct rcu_data {
bool rcu_urgent_qs; /* GP old need light quiescent state. */
bool rcu_forced_tick; /* Forced tick to provide QS. */
bool rcu_forced_tick_exp; /* ... provide QS to expedited GP. */
#ifdef CONFIG_RCU_FAST_NO_HZ
unsigned long last_accelerate; /* Last jiffy CBs were accelerated. */
unsigned long last_advance_all; /* Last jiffy CBs were all advanced. */
int tick_nohz_enabled_snap; /* Previously seen value from sysfs. */
#endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */
/* 4) rcu_barrier(), OOM callbacks, and expediting. */
struct rcu_head barrier_head;
@ -419,8 +414,6 @@ static bool rcu_is_callbacks_kthread(void);
static void rcu_cpu_kthread_setup(unsigned int cpu);
static void rcu_spawn_one_boost_kthread(struct rcu_node *rnp);
static void __init rcu_spawn_boost_kthreads(void);
static void rcu_cleanup_after_idle(void);
static void rcu_prepare_for_idle(void);
static bool rcu_preempt_has_tasks(struct rcu_node *rnp);
static bool rcu_preempt_need_deferred_qs(struct task_struct *t);
static void rcu_preempt_deferred_qs(struct task_struct *t);

View File

@ -51,8 +51,6 @@ static void __init rcu_bootup_announce_oddness(void)
RCU_FANOUT);
if (rcu_fanout_exact)
pr_info("\tHierarchical RCU autobalancing is disabled.\n");
if (IS_ENABLED(CONFIG_RCU_FAST_NO_HZ))
pr_info("\tRCU dyntick-idle grace-period acceleration is enabled.\n");
if (IS_ENABLED(CONFIG_PROVE_RCU))
pr_info("\tRCU lockdep checking is enabled.\n");
if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD))
@ -1253,16 +1251,14 @@ static void __init rcu_spawn_boost_kthreads(void)
#endif /* #else #ifdef CONFIG_RCU_BOOST */
#if !defined(CONFIG_RCU_FAST_NO_HZ)
/*
* Check to see if any future non-offloaded RCU-related work will need
* to be done by the current CPU, even if none need be done immediately,
* returning 1 if so. This function is part of the RCU implementation;
* it is -not- an exported member of the RCU API.
*
* Because we not have RCU_FAST_NO_HZ, just check whether or not this
* CPU has RCU callbacks queued.
* Just check whether or not this CPU has non-offloaded RCU callbacks
* queued.
*/
int rcu_needs_cpu(u64 basemono, u64 *nextevt)
{
@ -1271,183 +1267,6 @@ int rcu_needs_cpu(u64 basemono, u64 *nextevt)
!rcu_rdp_is_offloaded(this_cpu_ptr(&rcu_data));
}
/*
* Because we do not have RCU_FAST_NO_HZ, don't bother cleaning up
* after it.
*/
static void rcu_cleanup_after_idle(void)
{
}
/*
* Do the idle-entry grace-period work, which, because CONFIG_RCU_FAST_NO_HZ=n,
* is nothing.
*/
static void rcu_prepare_for_idle(void)
{
}
#else /* #if !defined(CONFIG_RCU_FAST_NO_HZ) */
/*
* This code is invoked when a CPU goes idle, at which point we want
* to have the CPU do everything required for RCU so that it can enter
* the energy-efficient dyntick-idle mode.
*
* The following preprocessor symbol controls this:
*
* RCU_IDLE_GP_DELAY gives the number of jiffies that a CPU is permitted
* to sleep in dyntick-idle mode with RCU callbacks pending. This
* is sized to be roughly one RCU grace period. Those energy-efficiency
* benchmarkers who might otherwise be tempted to set this to a large
* number, be warned: Setting RCU_IDLE_GP_DELAY too high can hang your
* system. And if you are -that- concerned about energy efficiency,
* just power the system down and be done with it!
*
* The value below works well in practice. If future workloads require
* adjustment, they can be converted into kernel config parameters, though
* making the state machine smarter might be a better option.
*/
#define RCU_IDLE_GP_DELAY 4 /* Roughly one grace period. */
static int rcu_idle_gp_delay = RCU_IDLE_GP_DELAY;
module_param(rcu_idle_gp_delay, int, 0644);
/*
* Try to advance callbacks on the current CPU, but only if it has been
* awhile since the last time we did so. Afterwards, if there are any
* callbacks ready for immediate invocation, return true.
*/
static bool __maybe_unused rcu_try_advance_all_cbs(void)
{
bool cbs_ready = false;
struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
struct rcu_node *rnp;
/* Exit early if we advanced recently. */
if (jiffies == rdp->last_advance_all)
return false;
rdp->last_advance_all = jiffies;
rnp = rdp->mynode;
/*
* Don't bother checking unless a grace period has
* completed since we last checked and there are
* callbacks not yet ready to invoke.
*/
if ((rcu_seq_completed_gp(rdp->gp_seq,
rcu_seq_current(&rnp->gp_seq)) ||
unlikely(READ_ONCE(rdp->gpwrap))) &&
rcu_segcblist_pend_cbs(&rdp->cblist))
note_gp_changes(rdp);
if (rcu_segcblist_ready_cbs(&rdp->cblist))
cbs_ready = true;
return cbs_ready;
}
/*
* Allow the CPU to enter dyntick-idle mode unless it has callbacks ready
* to invoke. If the CPU has callbacks, try to advance them. Tell the
* caller about what to set the timeout.
*
* The caller must have disabled interrupts.
*/
int rcu_needs_cpu(u64 basemono, u64 *nextevt)
{
struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
unsigned long dj;
lockdep_assert_irqs_disabled();
/* If no non-offloaded callbacks, RCU doesn't need the CPU. */
if (rcu_segcblist_empty(&rdp->cblist) ||
rcu_rdp_is_offloaded(rdp)) {
*nextevt = KTIME_MAX;
return 0;
}
/* Attempt to advance callbacks. */
if (rcu_try_advance_all_cbs()) {
/* Some ready to invoke, so initiate later invocation. */
invoke_rcu_core();
return 1;
}
rdp->last_accelerate = jiffies;
/* Request timer and round. */
dj = round_up(rcu_idle_gp_delay + jiffies, rcu_idle_gp_delay) - jiffies;
*nextevt = basemono + dj * TICK_NSEC;
return 0;
}
/*
* Prepare a CPU for idle from an RCU perspective. The first major task is to
* sense whether nohz mode has been enabled or disabled via sysfs. The second
* major task is to accelerate (that is, assign grace-period numbers to) any
* recently arrived callbacks.
*
* The caller must have disabled interrupts.
*/
static void rcu_prepare_for_idle(void)
{
bool needwake;
struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
struct rcu_node *rnp;
int tne;
lockdep_assert_irqs_disabled();
if (rcu_rdp_is_offloaded(rdp))
return;
/* Handle nohz enablement switches conservatively. */
tne = READ_ONCE(tick_nohz_active);
if (tne != rdp->tick_nohz_enabled_snap) {
if (!rcu_segcblist_empty(&rdp->cblist))
invoke_rcu_core(); /* force nohz to see update. */
rdp->tick_nohz_enabled_snap = tne;
return;
}
if (!tne)
return;
/*
* If we have not yet accelerated this jiffy, accelerate all
* callbacks on this CPU.
*/
if (rdp->last_accelerate == jiffies)
return;
rdp->last_accelerate = jiffies;
if (rcu_segcblist_pend_cbs(&rdp->cblist)) {
rnp = rdp->mynode;
raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */
needwake = rcu_accelerate_cbs(rnp, rdp);
raw_spin_unlock_rcu_node(rnp); /* irqs remain disabled. */
if (needwake)
rcu_gp_kthread_wake();
}
}
/*
* Clean up for exit from idle. Attempt to advance callbacks based on
* any grace periods that elapsed while the CPU was idle, and if any
* callbacks are now ready to invoke, initiate invocation.
*/
static void rcu_cleanup_after_idle(void)
{
struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
lockdep_assert_irqs_disabled();
if (rcu_rdp_is_offloaded(rdp))
return;
if (rcu_try_advance_all_cbs())
invoke_rcu_core();
}
#endif /* #else #if !defined(CONFIG_RCU_FAST_NO_HZ) */
/*
* Is this CPU a NO_HZ_FULL CPU that should ignore RCU so that the
* grace-period kthread will do force_quiescent_state() processing?

View File

@ -347,26 +347,6 @@ static void rcu_dump_cpu_stacks(void)
}
}
#ifdef CONFIG_RCU_FAST_NO_HZ
static void print_cpu_stall_fast_no_hz(char *cp, int cpu)
{
struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
sprintf(cp, "last_accelerate: %04lx/%04lx dyntick_enabled: %d",
rdp->last_accelerate & 0xffff, jiffies & 0xffff,
!!rdp->tick_nohz_enabled_snap);
}
#else /* #ifdef CONFIG_RCU_FAST_NO_HZ */
static void print_cpu_stall_fast_no_hz(char *cp, int cpu)
{
*cp = '\0';
}
#endif /* #else #ifdef CONFIG_RCU_FAST_NO_HZ */
static const char * const gp_state_names[] = {
[RCU_GP_IDLE] = "RCU_GP_IDLE",
[RCU_GP_WAIT_GPS] = "RCU_GP_WAIT_GPS",
@ -408,13 +388,12 @@ static bool rcu_is_gp_kthread_starving(unsigned long *jp)
* of RCU grace periods that this CPU is ignorant of, for example, "1"
* if the CPU was aware of the previous grace period.
*
* Also print out idle and (if CONFIG_RCU_FAST_NO_HZ) idle-entry info.
* Also print out idle info.
*/
static void print_cpu_stall_info(int cpu)
{
unsigned long delta;
bool falsepositive;
char fast_no_hz[72];
struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
char *ticks_title;
unsigned long ticks_value;
@ -432,11 +411,10 @@ static void print_cpu_stall_info(int cpu)
ticks_title = "ticks this GP";
ticks_value = rdp->ticks_this_gp;
}
print_cpu_stall_fast_no_hz(fast_no_hz, cpu);
delta = rcu_seq_ctr(rdp->mynode->gp_seq - rdp->rcu_iw_gp_seq);
falsepositive = rcu_is_gp_kthread_starving(NULL) &&
rcu_dynticks_in_eqs(rcu_dynticks_snap(rdp));
pr_err("\t%d-%c%c%c%c: (%lu %s) idle=%03x/%ld/%#lx softirq=%u/%u fqs=%ld %s%s\n",
pr_err("\t%d-%c%c%c%c: (%lu %s) idle=%03x/%ld/%#lx softirq=%u/%u fqs=%ld %s\n",
cpu,
"O."[!!cpu_online(cpu)],
"o."[!!(rdp->grpmask & rdp->mynode->qsmaskinit)],
@ -449,7 +427,6 @@ static void print_cpu_stall_info(int cpu)
rdp->dynticks_nesting, rdp->dynticks_nmi_nesting,
rdp->softirq_snap, kstat_softirqs_cpu(RCU_SOFTIRQ, cpu),
data_race(rcu_state.n_force_qs) - rcu_state.n_force_qs_gpstart,
fast_no_hz,
falsepositive ? " (false positive?)" : "");
}

View File

@ -6,7 +6,6 @@ CONFIG_PREEMPT=y
CONFIG_HZ_PERIODIC=n
CONFIG_NO_HZ_IDLE=y
CONFIG_NO_HZ_FULL=n
CONFIG_RCU_FAST_NO_HZ=y
CONFIG_RCU_TRACE=y
CONFIG_HOTPLUG_CPU=y
CONFIG_MAXSMP=y

View File

@ -7,7 +7,6 @@ CONFIG_PREEMPT=n
CONFIG_HZ_PERIODIC=n
CONFIG_NO_HZ_IDLE=n
CONFIG_NO_HZ_FULL=y
CONFIG_RCU_FAST_NO_HZ=y
CONFIG_RCU_TRACE=y
CONFIG_RCU_FANOUT=4
CONFIG_RCU_FANOUT_LEAF=3

View File

@ -15,7 +15,6 @@ CONFIG_PROVE_RCU -- Hardwired to CONFIG_PROVE_LOCKING.
CONFIG_RCU_BOOST -- one of PREEMPT_RCU.
CONFIG_RCU_FANOUT -- Cover hierarchy, but overlap with others.
CONFIG_RCU_FANOUT_LEAF -- Do one non-default.
CONFIG_RCU_FAST_NO_HZ -- Do one, but not with all nohz_full CPUs.
CONFIG_RCU_NOCB_CPU -- Do three, one with no rcu_nocbs CPUs, one with
rcu_nocbs=0, and one with all rcu_nocbs CPUs.
CONFIG_RCU_TRACE -- Do half.