Merge branches 'doc.2023.01.05a', 'fixes.2023.01.23a', 'kvfree.2023.01.03a', 'srcu.2023.01.03a', 'srcu-always.2023.02.02a', 'tasks.2023.01.03a', 'torture.2023.01.05a' and 'torturescript.2023.01.03a' into HEAD

doc.2023.01.05a: Documentation update.
fixes.2023.01.23a: Miscellaneous fixes.
kvfree.2023.01.03a: kvfree_rcu() updates.
srcu.2023.01.03a: SRCU updates.
srcu-always.2023.02.02a: Finish making SRCU be unconditionally available.
tasks.2023.01.03a: Tasks-RCU updates.
torture.2023.01.05a: Torture-test updates.
torturescript.2023.01.03a: Torture-test scripting updates.
This commit is contained in:
Paul E. McKenney 2023-02-02 16:33:43 -08:00
39 changed files with 1019 additions and 515 deletions

View file

@ -5113,6 +5113,11 @@
rcupdate.rcu_cpu_stall_timeout to be used (after
conversion from seconds to milliseconds).
rcupdate.rcu_exp_stall_task_details= [KNL]
Print stack dumps of any tasks blocking the
current expedited RCU grace period during an
expedited RCU CPU stall warning.
rcupdate.rcu_expedited= [KNL]
Use expedited grace-period primitives, for
example, synchronize_rcu_expedited() instead

View file

@ -181,7 +181,6 @@ void fw_devlink_purge_absent_suppliers(struct fwnode_handle *fwnode)
}
EXPORT_SYMBOL_GPL(fw_devlink_purge_absent_suppliers);
#ifdef CONFIG_SRCU
static DEFINE_MUTEX(device_links_lock);
DEFINE_STATIC_SRCU(device_links_srcu);
@ -220,47 +219,6 @@ static void device_link_remove_from_lists(struct device_link *link)
list_del_rcu(&link->s_node);
list_del_rcu(&link->c_node);
}
#else /* !CONFIG_SRCU */
static DECLARE_RWSEM(device_links_lock);
static inline void device_links_write_lock(void)
{
down_write(&device_links_lock);
}
static inline void device_links_write_unlock(void)
{
up_write(&device_links_lock);
}
int device_links_read_lock(void)
{
down_read(&device_links_lock);
return 0;
}
void device_links_read_unlock(int not_used)
{
up_read(&device_links_lock);
}
#ifdef CONFIG_DEBUG_LOCK_ALLOC
int device_links_read_lock_held(void)
{
return lockdep_is_held(&device_links_lock);
}
#endif
static inline void device_link_synchronize_removal(void)
{
}
static void device_link_remove_from_lists(struct device_link *link)
{
list_del(&link->s_node);
list_del(&link->c_node);
}
#endif /* !CONFIG_SRCU */
static bool device_is_ancestor(struct device *dev, struct device *target)
{

View file

@ -1,7 +1,6 @@
# SPDX-License-Identifier: GPL-2.0-only
menuconfig DAX
tristate "DAX: direct access to differentiated memory"
select SRCU
default m if NVDIMM_DAX
if DAX

View file

@ -2,7 +2,6 @@
config STM
tristate "System Trace Module devices"
select CONFIGFS_FS
select SRCU
help
A System Trace Module (STM) is a device exporting data in System
Trace Protocol (STP) format as defined by MIPI STP standards.

View file

@ -6,7 +6,6 @@
menuconfig MD
bool "Multiple devices driver support (RAID and LVM)"
depends on BLOCK
select SRCU
help
Support multiple physical spindles through a single logical device.
Required for RAID and logical volume management.

View file

@ -334,7 +334,6 @@ config NETCONSOLE_DYNAMIC
config NETPOLL
def_bool NETCONSOLE
select SRCU
config NET_POLL_CONTROLLER
def_bool NETPOLL

View file

@ -258,7 +258,7 @@ config PCIE_MEDIATEK_GEN3
MediaTek SoCs.
config VMD
depends on PCI_MSI && X86_64 && SRCU && !UML
depends on PCI_MSI && X86_64 && !UML
tristate "Intel Volume Management Device Driver"
help
Adds support for the Intel Volume Management Device (VMD). VMD is a

View file

@ -17,7 +17,6 @@ config BTRFS_FS
select FS_IOMAP
select RAID6_PQ
select XOR_BLOCKS
select SRCU
depends on PAGE_SIZE_LESS_THAN_256KB
help

View file

@ -1889,7 +1889,6 @@ int generic_setlease(struct file *filp, long arg, struct file_lock **flp,
}
EXPORT_SYMBOL(generic_setlease);
#if IS_ENABLED(CONFIG_SRCU)
/*
* Kernel subsystems can register to be notified on any attempt to set
* a new lease with the lease_notifier_chain. This is used by (e.g.) nfsd
@ -1923,30 +1922,6 @@ void lease_unregister_notifier(struct notifier_block *nb)
}
EXPORT_SYMBOL_GPL(lease_unregister_notifier);
#else /* !IS_ENABLED(CONFIG_SRCU) */
static inline void
lease_notifier_chain_init(void)
{
}
static inline void
setlease_notifier(long arg, struct file_lock *lease)
{
}
int lease_register_notifier(struct notifier_block *nb)
{
return 0;
}
EXPORT_SYMBOL_GPL(lease_register_notifier);
void lease_unregister_notifier(struct notifier_block *nb)
{
}
EXPORT_SYMBOL_GPL(lease_unregister_notifier);
#endif /* IS_ENABLED(CONFIG_SRCU) */
/**
* vfs_setlease - sets a lease on an open file
* @filp: file pointer

View file

@ -1,7 +1,6 @@
# SPDX-License-Identifier: GPL-2.0-only
config FSNOTIFY
def_bool n
select SRCU
source "fs/notify/dnotify/Kconfig"
source "fs/notify/inotify/Kconfig"

View file

@ -6,7 +6,6 @@
config QUOTA
bool "Quota support"
select QUOTACTL
select SRCU
help
If you say Y here, you will be able to set per user limits for disk
usage (also called disk quotas). Currently, it works for the

View file

@ -139,7 +139,7 @@ static inline void hlist_nulls_add_tail_rcu(struct hlist_nulls_node *n,
if (last) {
n->next = last->next;
n->pprev = &last->next;
rcu_assign_pointer(hlist_next_rcu(last), n);
rcu_assign_pointer(hlist_nulls_next_rcu(last), n);
} else {
hlist_nulls_add_head_rcu(n, h);
}

View file

@ -238,6 +238,7 @@ void synchronize_rcu_tasks_rude(void);
#define rcu_note_voluntary_context_switch(t) rcu_tasks_qs(t, false)
void exit_tasks_rcu_start(void);
void exit_tasks_rcu_stop(void);
void exit_tasks_rcu_finish(void);
#else /* #ifdef CONFIG_TASKS_RCU_GENERIC */
#define rcu_tasks_classic_qs(t, preempt) do { } while (0)
@ -246,6 +247,7 @@ void exit_tasks_rcu_finish(void);
#define call_rcu_tasks call_rcu
#define synchronize_rcu_tasks synchronize_rcu
static inline void exit_tasks_rcu_start(void) { }
static inline void exit_tasks_rcu_stop(void) { }
static inline void exit_tasks_rcu_finish(void) { }
#endif /* #else #ifdef CONFIG_TASKS_RCU_GENERIC */
@ -374,11 +376,18 @@ static inline int debug_lockdep_rcu_enabled(void)
* RCU_LOCKDEP_WARN - emit lockdep splat if specified condition is met
* @c: condition to check
* @s: informative message
*
* This checks debug_lockdep_rcu_enabled() before checking (c) to
* prevent early boot splats due to lockdep not yet being initialized,
* and rechecks it after checking (c) to prevent false-positive splats
* due to races with lockdep being disabled. See commit 3066820034b5dd
* ("rcu: Reject RCU_LOCKDEP_WARN() false positives") for more detail.
*/
#define RCU_LOCKDEP_WARN(c, s) \
do { \
static bool __section(".data.unlikely") __warned; \
if ((c) && debug_lockdep_rcu_enabled() && !__warned) { \
if (debug_lockdep_rcu_enabled() && (c) && \
debug_lockdep_rcu_enabled() && !__warned) { \
__warned = true; \
lockdep_rcu_suspicious(__FILE__, __LINE__, s); \
} \
@ -1004,6 +1013,9 @@ static inline notrace void rcu_read_unlock_sched_notrace(void)
#define kvfree_rcu(...) KVFREE_GET_MACRO(__VA_ARGS__, \
kvfree_rcu_arg_2, kvfree_rcu_arg_1)(__VA_ARGS__)
#define kvfree_rcu_mightsleep(ptr) kvfree_rcu_arg_1(ptr)
#define kfree_rcu_mightsleep(ptr) kvfree_rcu_mightsleep(ptr)
#define KVFREE_GET_MACRO(_1, _2, NAME, ...) NAME
#define kvfree_rcu_arg_2(ptr, rhf) \
do { \
@ -1011,8 +1023,7 @@ do { \
\
if (___p) { \
BUILD_BUG_ON(!__is_kvfree_rcu_offset(offsetof(typeof(*(ptr)), rhf))); \
kvfree_call_rcu(&((___p)->rhf), (rcu_callback_t)(unsigned long) \
(offsetof(typeof(*(ptr)), rhf))); \
kvfree_call_rcu(&((___p)->rhf), (void *) (___p)); \
} \
} while (0)
@ -1021,7 +1032,7 @@ do { \
typeof(ptr) ___p = (ptr); \
\
if (___p) \
kvfree_call_rcu(NULL, (rcu_callback_t) (___p)); \
kvfree_call_rcu(NULL, (void *) (___p)); \
} while (0)
/*

View file

@ -98,25 +98,25 @@ static inline void synchronize_rcu_expedited(void)
*/
extern void kvfree(const void *addr);
static inline void __kvfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
static inline void __kvfree_call_rcu(struct rcu_head *head, void *ptr)
{
if (head) {
call_rcu(head, func);
call_rcu(head, (rcu_callback_t) ((void *) head - ptr));
return;
}
// kvfree_rcu(one_arg) call.
might_sleep();
synchronize_rcu();
kvfree((void *) func);
kvfree(ptr);
}
#ifdef CONFIG_KASAN_GENERIC
void kvfree_call_rcu(struct rcu_head *head, rcu_callback_t func);
void kvfree_call_rcu(struct rcu_head *head, void *ptr);
#else
static inline void kvfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
static inline void kvfree_call_rcu(struct rcu_head *head, void *ptr)
{
__kvfree_call_rcu(head, func);
__kvfree_call_rcu(head, ptr);
}
#endif

View file

@ -33,7 +33,7 @@ static inline void rcu_virt_note_context_switch(void)
}
void synchronize_rcu_expedited(void);
void kvfree_call_rcu(struct rcu_head *head, rcu_callback_t func);
void kvfree_call_rcu(struct rcu_head *head, void *ptr);
void rcu_barrier(void);
bool rcu_eqs_special_set(int cpu);

View file

@ -214,6 +214,34 @@ srcu_read_lock_notrace(struct srcu_struct *ssp) __acquires(ssp)
return retval;
}
/**
* srcu_down_read - register a new reader for an SRCU-protected structure.
* @ssp: srcu_struct in which to register the new reader.
*
* Enter a semaphore-like SRCU read-side critical section. Note that
* SRCU read-side critical sections may be nested. However, it is
* illegal to call anything that waits on an SRCU grace period for the
* same srcu_struct, whether directly or indirectly. Please note that
* one way to indirectly wait on an SRCU grace period is to acquire
* a mutex that is held elsewhere while calling synchronize_srcu() or
* synchronize_srcu_expedited(). But if you want lockdep to help you
* keep this stuff straight, you should instead use srcu_read_lock().
*
* The semaphore-like nature of srcu_down_read() means that the matching
* srcu_up_read() can be invoked from some other context, for example,
* from some other task or from an irq handler. However, neither
* srcu_down_read() nor srcu_up_read() may be invoked from an NMI handler.
*
* Calls to srcu_down_read() may be nested, similar to the manner in
* which calls to down_read() may be nested.
*/
static inline int srcu_down_read(struct srcu_struct *ssp) __acquires(ssp)
{
WARN_ON_ONCE(in_nmi());
srcu_check_nmi_safety(ssp, false);
return __srcu_read_lock(ssp);
}
/**
* srcu_read_unlock - unregister a old reader from an SRCU-protected structure.
* @ssp: srcu_struct in which to unregister the old reader.
@ -254,6 +282,23 @@ srcu_read_unlock_notrace(struct srcu_struct *ssp, int idx) __releases(ssp)
__srcu_read_unlock(ssp, idx);
}
/**
* srcu_up_read - unregister a old reader from an SRCU-protected structure.
* @ssp: srcu_struct in which to unregister the old reader.
* @idx: return value from corresponding srcu_read_lock().
*
* Exit an SRCU read-side critical section, but not necessarily from
* the same context as the maching srcu_down_read().
*/
static inline void srcu_up_read(struct srcu_struct *ssp, int idx)
__releases(ssp)
{
WARN_ON_ONCE(idx & ~0x1);
WARN_ON_ONCE(in_nmi());
srcu_check_nmi_safety(ssp, false);
__srcu_read_unlock(ssp, idx);
}
/**
* smp_mb__after_srcu_read_unlock - ensure full ordering after srcu_read_unlock
*

View file

@ -49,7 +49,7 @@ struct srcu_data {
struct srcu_node {
spinlock_t __private lock;
unsigned long srcu_have_cbs[4]; /* GP seq for children having CBs, but only */
/* if greater than ->srcu_gq_seq. */
/* if greater than ->srcu_gp_seq. */
unsigned long srcu_data_have_cbs[4]; /* Which srcu_data structs have CBs for given GP? */
unsigned long srcu_gp_seq_needed_exp; /* Furthest future exp GP. */
struct srcu_node *srcu_parent; /* Next up in tree. */

View file

@ -1865,7 +1865,6 @@ config PERF_EVENTS
default y if PROFILING
depends on HAVE_PERF_EVENTS
select IRQ_WORK
select SRCU
help
Enable kernel support for various performance events provided
by software and hardware.

View file

@ -46,6 +46,9 @@ torture_param(int, shutdown_secs, 0, "Shutdown time (j), <= zero to disable.");
torture_param(int, stat_interval, 60,
"Number of seconds between stats printk()s");
torture_param(int, stutter, 5, "Number of jiffies to run/halt test, 0=disable");
torture_param(int, rt_boost, 2,
"Do periodic rt-boost. 0=Disable, 1=Only for rt_mutex, 2=For all lock types.");
torture_param(int, rt_boost_factor, 50, "A factor determining how often rt-boost happens.");
torture_param(int, verbose, 1,
"Enable verbose debugging printk()s");
@ -127,15 +130,50 @@ static void torture_lock_busted_write_unlock(int tid __maybe_unused)
/* BUGGY, do not use in real life!!! */
}
static void torture_boost_dummy(struct torture_random_state *trsp)
static void __torture_rt_boost(struct torture_random_state *trsp)
{
/* Only rtmutexes care about priority */
const unsigned int factor = rt_boost_factor;
if (!rt_task(current)) {
/*
* Boost priority once every rt_boost_factor operations. When
* the task tries to take the lock, the rtmutex it will account
* for the new priority, and do any corresponding pi-dance.
*/
if (trsp && !(torture_random(trsp) %
(cxt.nrealwriters_stress * factor))) {
sched_set_fifo(current);
} else /* common case, do nothing */
return;
} else {
/*
* The task will remain boosted for another 10 * rt_boost_factor
* operations, then restored back to its original prio, and so
* forth.
*
* When @trsp is nil, we want to force-reset the task for
* stopping the kthread.
*/
if (!trsp || !(torture_random(trsp) %
(cxt.nrealwriters_stress * factor * 2))) {
sched_set_normal(current, 0);
} else /* common case, do nothing */
return;
}
}
static void torture_rt_boost(struct torture_random_state *trsp)
{
if (rt_boost != 2)
return;
__torture_rt_boost(trsp);
}
static struct lock_torture_ops lock_busted_ops = {
.writelock = torture_lock_busted_write_lock,
.write_delay = torture_lock_busted_write_delay,
.task_boost = torture_boost_dummy,
.task_boost = torture_rt_boost,
.writeunlock = torture_lock_busted_write_unlock,
.readlock = NULL,
.read_delay = NULL,
@ -179,7 +217,7 @@ __releases(torture_spinlock)
static struct lock_torture_ops spin_lock_ops = {
.writelock = torture_spin_lock_write_lock,
.write_delay = torture_spin_lock_write_delay,
.task_boost = torture_boost_dummy,
.task_boost = torture_rt_boost,
.writeunlock = torture_spin_lock_write_unlock,
.readlock = NULL,
.read_delay = NULL,
@ -206,7 +244,7 @@ __releases(torture_spinlock)
static struct lock_torture_ops spin_lock_irq_ops = {
.writelock = torture_spin_lock_write_lock_irq,
.write_delay = torture_spin_lock_write_delay,
.task_boost = torture_boost_dummy,
.task_boost = torture_rt_boost,
.writeunlock = torture_lock_spin_write_unlock_irq,
.readlock = NULL,
.read_delay = NULL,
@ -275,7 +313,7 @@ __releases(torture_rwlock)
static struct lock_torture_ops rw_lock_ops = {
.writelock = torture_rwlock_write_lock,
.write_delay = torture_rwlock_write_delay,
.task_boost = torture_boost_dummy,
.task_boost = torture_rt_boost,
.writeunlock = torture_rwlock_write_unlock,
.readlock = torture_rwlock_read_lock,
.read_delay = torture_rwlock_read_delay,
@ -318,7 +356,7 @@ __releases(torture_rwlock)
static struct lock_torture_ops rw_lock_irq_ops = {
.writelock = torture_rwlock_write_lock_irq,
.write_delay = torture_rwlock_write_delay,
.task_boost = torture_boost_dummy,
.task_boost = torture_rt_boost,
.writeunlock = torture_rwlock_write_unlock_irq,
.readlock = torture_rwlock_read_lock_irq,
.read_delay = torture_rwlock_read_delay,
@ -358,7 +396,7 @@ __releases(torture_mutex)
static struct lock_torture_ops mutex_lock_ops = {
.writelock = torture_mutex_lock,
.write_delay = torture_mutex_delay,
.task_boost = torture_boost_dummy,
.task_boost = torture_rt_boost,
.writeunlock = torture_mutex_unlock,
.readlock = NULL,
.read_delay = NULL,
@ -456,7 +494,7 @@ static struct lock_torture_ops ww_mutex_lock_ops = {
.exit = torture_ww_mutex_exit,
.writelock = torture_ww_mutex_lock,
.write_delay = torture_mutex_delay,
.task_boost = torture_boost_dummy,
.task_boost = torture_rt_boost,
.writeunlock = torture_ww_mutex_unlock,
.readlock = NULL,
.read_delay = NULL,
@ -474,37 +512,6 @@ __acquires(torture_rtmutex)
return 0;
}
static void torture_rtmutex_boost(struct torture_random_state *trsp)
{
const unsigned int factor = 50000; /* yes, quite arbitrary */
if (!rt_task(current)) {
/*
* Boost priority once every ~50k operations. When the
* task tries to take the lock, the rtmutex it will account
* for the new priority, and do any corresponding pi-dance.
*/
if (trsp && !(torture_random(trsp) %
(cxt.nrealwriters_stress * factor))) {
sched_set_fifo(current);
} else /* common case, do nothing */
return;
} else {
/*
* The task will remain boosted for another ~500k operations,
* then restored back to its original prio, and so forth.
*
* When @trsp is nil, we want to force-reset the task for
* stopping the kthread.
*/
if (!trsp || !(torture_random(trsp) %
(cxt.nrealwriters_stress * factor * 2))) {
sched_set_normal(current, 0);
} else /* common case, do nothing */
return;
}
}
static void torture_rtmutex_delay(struct torture_random_state *trsp)
{
const unsigned long shortdelay_us = 2;
@ -530,10 +537,18 @@ __releases(torture_rtmutex)
rt_mutex_unlock(&torture_rtmutex);
}
static void torture_rt_boost_rtmutex(struct torture_random_state *trsp)
{
if (!rt_boost)
return;
__torture_rt_boost(trsp);
}
static struct lock_torture_ops rtmutex_lock_ops = {
.writelock = torture_rtmutex_lock,
.write_delay = torture_rtmutex_delay,
.task_boost = torture_rtmutex_boost,
.task_boost = torture_rt_boost_rtmutex,
.writeunlock = torture_rtmutex_unlock,
.readlock = NULL,
.read_delay = NULL,
@ -600,7 +615,7 @@ __releases(torture_rwsem)
static struct lock_torture_ops rwsem_lock_ops = {
.writelock = torture_rwsem_down_write,
.write_delay = torture_rwsem_write_delay,
.task_boost = torture_boost_dummy,
.task_boost = torture_rt_boost,
.writeunlock = torture_rwsem_up_write,
.readlock = torture_rwsem_down_read,
.read_delay = torture_rwsem_read_delay,
@ -652,7 +667,7 @@ static struct lock_torture_ops percpu_rwsem_lock_ops = {
.exit = torture_percpu_rwsem_exit,
.writelock = torture_percpu_rwsem_down_write,
.write_delay = torture_rwsem_write_delay,
.task_boost = torture_boost_dummy,
.task_boost = torture_rt_boost,
.writeunlock = torture_percpu_rwsem_up_write,
.readlock = torture_percpu_rwsem_down_read,
.read_delay = torture_rwsem_read_delay,

View file

@ -456,7 +456,6 @@ int raw_notifier_call_chain(struct raw_notifier_head *nh,
}
EXPORT_SYMBOL_GPL(raw_notifier_call_chain);
#ifdef CONFIG_SRCU
/*
* SRCU notifier chain routines. Registration and unregistration
* use a mutex, and call_chain is synchronized by SRCU (no locks).
@ -573,8 +572,6 @@ void srcu_init_notifier_head(struct srcu_notifier_head *nh)
}
EXPORT_SYMBOL_GPL(srcu_init_notifier_head);
#endif /* CONFIG_SRCU */
static ATOMIC_NOTIFIER_HEAD(die_chain);
int notrace notify_die(enum die_val val, const char *str,

View file

@ -244,7 +244,24 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns)
set_current_state(TASK_INTERRUPTIBLE);
if (pid_ns->pid_allocated == init_pids)
break;
/*
* Release tasks_rcu_exit_srcu to avoid following deadlock:
*
* 1) TASK A unshare(CLONE_NEWPID)
* 2) TASK A fork() twice -> TASK B (child reaper for new ns)
* and TASK C
* 3) TASK B exits, kills TASK C, waits for TASK A to reap it
* 4) TASK A calls synchronize_rcu_tasks()
* -> synchronize_srcu(tasks_rcu_exit_srcu)
* 5) *DEADLOCK*
*
* It is considered safe to release tasks_rcu_exit_srcu here
* because we assume the current task can not be concurrently
* reaped at this point.
*/
exit_tasks_rcu_stop();
schedule();
exit_tasks_rcu_start();
}
__set_current_state(TASK_RUNNING);

View file

@ -224,6 +224,7 @@ extern int rcu_cpu_stall_ftrace_dump;
extern int rcu_cpu_stall_suppress;
extern int rcu_cpu_stall_timeout;
extern int rcu_exp_cpu_stall_timeout;
extern bool rcu_exp_stall_task_details __read_mostly;
int rcu_jiffies_till_stall_check(void);
int rcu_exp_jiffies_till_stall_check(void);
@ -447,14 +448,20 @@ do { \
/* Tiny RCU doesn't expedite, as its purpose in life is instead to be tiny. */
static inline bool rcu_gp_is_normal(void) { return true; }
static inline bool rcu_gp_is_expedited(void) { return false; }
static inline bool rcu_async_should_hurry(void) { return false; }
static inline void rcu_expedite_gp(void) { }
static inline void rcu_unexpedite_gp(void) { }
static inline void rcu_async_hurry(void) { }
static inline void rcu_async_relax(void) { }
static inline void rcu_request_urgent_qs_task(struct task_struct *t) { }
#else /* #ifdef CONFIG_TINY_RCU */
bool rcu_gp_is_normal(void); /* Internal RCU use. */
bool rcu_gp_is_expedited(void); /* Internal RCU use. */
bool rcu_async_should_hurry(void); /* Internal RCU use. */
void rcu_expedite_gp(void);
void rcu_unexpedite_gp(void);
void rcu_async_hurry(void);
void rcu_async_relax(void);
void rcupdate_announce_bootup_oddness(void);
#ifdef CONFIG_TASKS_RCU_GENERIC
void show_rcu_tasks_gp_kthreads(void);

View file

@ -89,7 +89,7 @@ static void rcu_segcblist_set_len(struct rcu_segcblist *rsclp, long v)
}
/* Get the length of a segment of the rcu_segcblist structure. */
static long rcu_segcblist_get_seglen(struct rcu_segcblist *rsclp, int seg)
long rcu_segcblist_get_seglen(struct rcu_segcblist *rsclp, int seg)
{
return READ_ONCE(rsclp->seglen[seg]);
}

View file

@ -15,6 +15,8 @@ static inline long rcu_cblist_n_cbs(struct rcu_cblist *rclp)
return READ_ONCE(rclp->len);
}
long rcu_segcblist_get_seglen(struct rcu_segcblist *rsclp, int seg);
/* Return number of callbacks in segmented callback list by summing seglen. */
long rcu_segcblist_n_segment_cbs(struct rcu_segcblist *rsclp);

View file

@ -399,7 +399,7 @@ static int torture_readlock_not_held(void)
return rcu_read_lock_bh_held() || rcu_read_lock_sched_held();
}
static int rcu_torture_read_lock(void) __acquires(RCU)
static int rcu_torture_read_lock(void)
{
rcu_read_lock();
return 0;
@ -441,7 +441,7 @@ rcu_read_delay(struct torture_random_state *rrsp, struct rt_read_seg *rtrsp)
}
}
static void rcu_torture_read_unlock(int idx) __releases(RCU)
static void rcu_torture_read_unlock(int idx)
{
rcu_read_unlock();
}
@ -625,7 +625,7 @@ static struct srcu_struct srcu_ctld;
static struct srcu_struct *srcu_ctlp = &srcu_ctl;
static struct rcu_torture_ops srcud_ops;
static int srcu_torture_read_lock(void) __acquires(srcu_ctlp)
static int srcu_torture_read_lock(void)
{
if (cur_ops == &srcud_ops)
return srcu_read_lock_nmisafe(srcu_ctlp);
@ -652,7 +652,7 @@ srcu_read_delay(struct torture_random_state *rrsp, struct rt_read_seg *rtrsp)
}
}
static void srcu_torture_read_unlock(int idx) __releases(srcu_ctlp)
static void srcu_torture_read_unlock(int idx)
{
if (cur_ops == &srcud_ops)
srcu_read_unlock_nmisafe(srcu_ctlp, idx);
@ -814,13 +814,13 @@ static void synchronize_rcu_trivial(void)
}
}
static int rcu_torture_read_lock_trivial(void) __acquires(RCU)
static int rcu_torture_read_lock_trivial(void)
{
preempt_disable();
return 0;
}
static void rcu_torture_read_unlock_trivial(int idx) __releases(RCU)
static void rcu_torture_read_unlock_trivial(int idx)
{
preempt_enable();
}

View file

@ -76,6 +76,8 @@ torture_param(int, verbose_batched, 0, "Batch verbose debugging printk()s");
// Wait until there are multiple CPUs before starting test.
torture_param(int, holdoff, IS_BUILTIN(CONFIG_RCU_REF_SCALE_TEST) ? 10 : 0,
"Holdoff time before test start (s)");
// Number of typesafe_lookup structures, that is, the degree of concurrency.
torture_param(long, lookup_instances, 0, "Number of typesafe_lookup structures.");
// Number of loops per experiment, all readers execute operations concurrently.
torture_param(long, loops, 10000, "Number of loops per experiment.");
// Number of readers, with -1 defaulting to about 75% of the CPUs.
@ -124,7 +126,7 @@ static int exp_idx;
// Operations vector for selecting different types of tests.
struct ref_scale_ops {
void (*init)(void);
bool (*init)(void);
void (*cleanup)(void);
void (*readsection)(const int nloops);
void (*delaysection)(const int nloops, const int udl, const int ndl);
@ -162,8 +164,9 @@ static void ref_rcu_delay_section(const int nloops, const int udl, const int ndl
}
}
static void rcu_sync_scale_init(void)
static bool rcu_sync_scale_init(void)
{
return true;
}
static struct ref_scale_ops rcu_ops = {
@ -315,9 +318,10 @@ static struct ref_scale_ops refcnt_ops = {
// Definitions for rwlock
static rwlock_t test_rwlock;
static void ref_rwlock_init(void)
static bool ref_rwlock_init(void)
{
rwlock_init(&test_rwlock);
return true;
}
static void ref_rwlock_section(const int nloops)
@ -351,9 +355,10 @@ static struct ref_scale_ops rwlock_ops = {
// Definitions for rwsem
static struct rw_semaphore test_rwsem;
static void ref_rwsem_init(void)
static bool ref_rwsem_init(void)
{
init_rwsem(&test_rwsem);
return true;
}
static void ref_rwsem_section(const int nloops)
@ -523,6 +528,237 @@ static struct ref_scale_ops clock_ops = {
.name = "clock"
};
////////////////////////////////////////////////////////////////////////
//
// Methods leveraging SLAB_TYPESAFE_BY_RCU.
//
// Item to look up in a typesafe manner. Array of pointers to these.
struct refscale_typesafe {
atomic_t rts_refctr; // Used by all flavors
spinlock_t rts_lock;
seqlock_t rts_seqlock;
unsigned int a;
unsigned int b;
};
static struct kmem_cache *typesafe_kmem_cachep;
static struct refscale_typesafe **rtsarray;
static long rtsarray_size;
static DEFINE_TORTURE_RANDOM_PERCPU(refscale_rand);
static bool (*rts_acquire)(struct refscale_typesafe *rtsp, unsigned int *start);
static bool (*rts_release)(struct refscale_typesafe *rtsp, unsigned int start);
// Conditionally acquire an explicit in-structure reference count.
static bool typesafe_ref_acquire(struct refscale_typesafe *rtsp, unsigned int *start)
{
return atomic_inc_not_zero(&rtsp->rts_refctr);
}
// Unconditionally release an explicit in-structure reference count.
static bool typesafe_ref_release(struct refscale_typesafe *rtsp, unsigned int start)
{
if (!atomic_dec_return(&rtsp->rts_refctr)) {
WRITE_ONCE(rtsp->a, rtsp->a + 1);
kmem_cache_free(typesafe_kmem_cachep, rtsp);
}
return true;
}
// Unconditionally acquire an explicit in-structure spinlock.
static bool typesafe_lock_acquire(struct refscale_typesafe *rtsp, unsigned int *start)
{
spin_lock(&rtsp->rts_lock);
return true;
}
// Unconditionally release an explicit in-structure spinlock.
static bool typesafe_lock_release(struct refscale_typesafe *rtsp, unsigned int start)
{
spin_unlock(&rtsp->rts_lock);
return true;
}
// Unconditionally acquire an explicit in-structure sequence lock.
static bool typesafe_seqlock_acquire(struct refscale_typesafe *rtsp, unsigned int *start)
{
*start = read_seqbegin(&rtsp->rts_seqlock);
return true;
}
// Conditionally release an explicit in-structure sequence lock. Return
// true if this release was successful, that is, if no retry is required.
static bool typesafe_seqlock_release(struct refscale_typesafe *rtsp, unsigned int start)
{
return !read_seqretry(&rtsp->rts_seqlock, start);
}
// Do a read-side critical section with the specified delay in
// microseconds and nanoseconds inserted so as to increase probability
// of failure.
static void typesafe_delay_section(const int nloops, const int udl, const int ndl)
{
unsigned int a;
unsigned int b;
int i;
long idx;
struct refscale_typesafe *rtsp;
unsigned int start;
for (i = nloops; i >= 0; i--) {
preempt_disable();
idx = torture_random(this_cpu_ptr(&refscale_rand)) % rtsarray_size;
preempt_enable();
retry:
rcu_read_lock();
rtsp = rcu_dereference(rtsarray[idx]);
a = READ_ONCE(rtsp->a);
if (!rts_acquire(rtsp, &start)) {
rcu_read_unlock();
goto retry;
}
if (a != READ_ONCE(rtsp->a)) {
(void)rts_release(rtsp, start);
rcu_read_unlock();
goto retry;
}
un_delay(udl, ndl);
// Remember, seqlock read-side release can fail.
if (!rts_release(rtsp, start)) {
rcu_read_unlock();
goto retry;
}
b = READ_ONCE(rtsp->a);
WARN_ONCE(a != b, "Re-read of ->a changed from %u to %u.\n", a, b);
b = rtsp->b;
rcu_read_unlock();
WARN_ON_ONCE(a * a != b);
}
}
// Because the acquisition and release methods are expensive, there
// is no point in optimizing away the un_delay() function's two checks.
// Thus simply define typesafe_read_section() as a simple wrapper around
// typesafe_delay_section().
static void typesafe_read_section(const int nloops)
{
typesafe_delay_section(nloops, 0, 0);
}
// Allocate and initialize one refscale_typesafe structure.
static struct refscale_typesafe *typesafe_alloc_one(void)
{
struct refscale_typesafe *rtsp;
rtsp = kmem_cache_alloc(typesafe_kmem_cachep, GFP_KERNEL);
if (!rtsp)
return NULL;
atomic_set(&rtsp->rts_refctr, 1);
WRITE_ONCE(rtsp->a, rtsp->a + 1);
WRITE_ONCE(rtsp->b, rtsp->a * rtsp->a);
return rtsp;
}
// Slab-allocator constructor for refscale_typesafe structures created
// out of a new slab of system memory.
static void refscale_typesafe_ctor(void *rtsp_in)
{
struct refscale_typesafe *rtsp = rtsp_in;
spin_lock_init(&rtsp->rts_lock);
seqlock_init(&rtsp->rts_seqlock);
preempt_disable();
rtsp->a = torture_random(this_cpu_ptr(&refscale_rand));
preempt_enable();
}
static struct ref_scale_ops typesafe_ref_ops;
static struct ref_scale_ops typesafe_lock_ops;
static struct ref_scale_ops typesafe_seqlock_ops;
// Initialize for a typesafe test.
static bool typesafe_init(void)
{
long idx;
long si = lookup_instances;
typesafe_kmem_cachep = kmem_cache_create("refscale_typesafe",
sizeof(struct refscale_typesafe), sizeof(void *),
SLAB_TYPESAFE_BY_RCU, refscale_typesafe_ctor);
if (!typesafe_kmem_cachep)
return false;
if (si < 0)
si = -si * nr_cpu_ids;
else if (si == 0)
si = nr_cpu_ids;
rtsarray_size = si;
rtsarray = kcalloc(si, sizeof(*rtsarray), GFP_KERNEL);
if (!rtsarray)
return false;
for (idx = 0; idx < rtsarray_size; idx++) {
rtsarray[idx] = typesafe_alloc_one();
if (!rtsarray[idx])
return false;
}
if (cur_ops == &typesafe_ref_ops) {
rts_acquire = typesafe_ref_acquire;
rts_release = typesafe_ref_release;
} else if (cur_ops == &typesafe_lock_ops) {
rts_acquire = typesafe_lock_acquire;
rts_release = typesafe_lock_release;
} else if (cur_ops == &typesafe_seqlock_ops) {
rts_acquire = typesafe_seqlock_acquire;
rts_release = typesafe_seqlock_release;
} else {
WARN_ON_ONCE(1);
return false;
}
return true;
}
// Clean up after a typesafe test.
static void typesafe_cleanup(void)
{
long idx;
if (rtsarray) {
for (idx = 0; idx < rtsarray_size; idx++)
kmem_cache_free(typesafe_kmem_cachep, rtsarray[idx]);
kfree(rtsarray);
rtsarray = NULL;
rtsarray_size = 0;
}
kmem_cache_destroy(typesafe_kmem_cachep);
typesafe_kmem_cachep = NULL;
rts_acquire = NULL;
rts_release = NULL;
}
// The typesafe_init() function distinguishes these structures by address.
static struct ref_scale_ops typesafe_ref_ops = {
.init = typesafe_init,
.cleanup = typesafe_cleanup,
.readsection = typesafe_read_section,
.delaysection = typesafe_delay_section,
.name = "typesafe_ref"
};
static struct ref_scale_ops typesafe_lock_ops = {
.init = typesafe_init,
.cleanup = typesafe_cleanup,
.readsection = typesafe_read_section,
.delaysection = typesafe_delay_section,
.name = "typesafe_lock"
};
static struct ref_scale_ops typesafe_seqlock_ops = {
.init = typesafe_init,
.cleanup = typesafe_cleanup,
.readsection = typesafe_read_section,
.delaysection = typesafe_delay_section,
.name = "typesafe_seqlock"
};
static void rcu_scale_one_reader(void)
{
if (readdelay <= 0)
@ -812,6 +1048,7 @@ ref_scale_init(void)
static struct ref_scale_ops *scale_ops[] = {
&rcu_ops, &srcu_ops, RCU_TRACE_OPS RCU_TASKS_OPS &refcnt_ops, &rwlock_ops,
&rwsem_ops, &lock_ops, &lock_irq_ops, &acqrel_ops, &clock_ops,
&typesafe_ref_ops, &typesafe_lock_ops, &typesafe_seqlock_ops,
};
if (!torture_init_begin(scale_type, verbose))
@ -833,7 +1070,10 @@ ref_scale_init(void)
goto unwind;
}
if (cur_ops->init)
cur_ops->init();
if (!cur_ops->init()) {
firsterr = -EUCLEAN;
goto unwind;
}
ref_scale_print_module_parms(cur_ops, "Start of test");

View file

@ -154,7 +154,7 @@ static void init_srcu_struct_data(struct srcu_struct *ssp)
*/
static inline bool srcu_invl_snp_seq(unsigned long s)
{
return rcu_seq_state(s) == SRCU_SNP_INIT_SEQ;
return s == SRCU_SNP_INIT_SEQ;
}
/*
@ -469,24 +469,59 @@ static bool srcu_readers_active_idx_check(struct srcu_struct *ssp, int idx)
/*
* If the locks are the same as the unlocks, then there must have
* been no readers on this index at some time in between. This does
* not mean that there are no more readers, as one could have read
* the current index but not have incremented the lock counter yet.
* been no readers on this index at some point in this function.
* But there might be more readers, as a task might have read
* the current ->srcu_idx but not yet have incremented its CPU's
* ->srcu_lock_count[idx] counter. In fact, it is possible
* that most of the tasks have been preempted between fetching
* ->srcu_idx and incrementing ->srcu_lock_count[idx]. And there
* could be almost (ULONG_MAX / sizeof(struct task_struct)) tasks
* in a system whose address space was fully populated with memory.
* Call this quantity Nt.
*
* So suppose that the updater is preempted here for so long
* that more than ULONG_MAX non-nested readers come and go in
* the meantime. It turns out that this cannot result in overflow
* because if a reader modifies its unlock count after we read it
* above, then that reader's next load of ->srcu_idx is guaranteed
* to get the new value, which will cause it to operate on the
* other bank of counters, where it cannot contribute to the
* overflow of these counters. This means that there is a maximum
* of 2*NR_CPUS increments, which cannot overflow given current
* systems, especially not on 64-bit systems.
* So suppose that the updater is preempted at this point in the
* code for a long time. That now-preempted updater has already
* flipped ->srcu_idx (possibly during the preceding grace period),
* done an smp_mb() (again, possibly during the preceding grace
* period), and summed up the ->srcu_unlock_count[idx] counters.
* How many times can a given one of the aforementioned Nt tasks
* increment the old ->srcu_idx value's ->srcu_lock_count[idx]
* counter, in the absence of nesting?
*
* OK, how about nesting? This does impose a limit on nesting
* of floor(ULONG_MAX/NR_CPUS/2), which should be sufficient,
* especially on 64-bit systems.
* It can clearly do so once, given that it has already fetched
* the old value of ->srcu_idx and is just about to use that value
* to index its increment of ->srcu_lock_count[idx]. But as soon as
* it leaves that SRCU read-side critical section, it will increment
* ->srcu_unlock_count[idx], which must follow the updater's above
* read from that same value. Thus, as soon the reading task does
* an smp_mb() and a later fetch from ->srcu_idx, that task will be
* guaranteed to get the new index. Except that the increment of
* ->srcu_unlock_count[idx] in __srcu_read_unlock() is after the
* smp_mb(), and the fetch from ->srcu_idx in __srcu_read_lock()
* is before the smp_mb(). Thus, that task might not see the new
* value of ->srcu_idx until the -second- __srcu_read_lock(),
* which in turn means that this task might well increment
* ->srcu_lock_count[idx] for the old value of ->srcu_idx twice,
* not just once.
*
* However, it is important to note that a given smp_mb() takes
* effect not just for the task executing it, but also for any
* later task running on that same CPU.
*
* That is, there can be almost Nt + Nc further increments of
* ->srcu_lock_count[idx] for the old index, where Nc is the number
* of CPUs. But this is OK because the size of the task_struct
* structure limits the value of Nt and current systems limit Nc
* to a few thousand.
*
* OK, but what about nesting? This does impose a limit on
* nesting of half of the size of the task_struct structure
* (measured in bytes), which should be sufficient. A late 2022
* TREE01 rcutorture run reported this size to be no less than
* 9408 bytes, allowing up to 4704 levels of nesting, which is
* comfortably beyond excessive. Especially on 64-bit systems,
* which are unlikely to be configured with an address space fully
* populated with memory, at least not anytime soon.
*/
return srcu_readers_lock_idx(ssp, idx) == unlocks;
}
@ -726,7 +761,7 @@ static void srcu_gp_start(struct srcu_struct *ssp)
int state;
if (smp_load_acquire(&ssp->srcu_size_state) < SRCU_SIZE_WAIT_BARRIER)
sdp = per_cpu_ptr(ssp->sda, 0);
sdp = per_cpu_ptr(ssp->sda, get_boot_cpu_id());
else
sdp = this_cpu_ptr(ssp->sda);
lockdep_assert_held(&ACCESS_PRIVATE(ssp, lock));
@ -837,7 +872,8 @@ static void srcu_gp_end(struct srcu_struct *ssp)
/* Initiate callback invocation as needed. */
ss_state = smp_load_acquire(&ssp->srcu_size_state);
if (ss_state < SRCU_SIZE_WAIT_BARRIER) {
srcu_schedule_cbs_sdp(per_cpu_ptr(ssp->sda, 0), cbdelay);
srcu_schedule_cbs_sdp(per_cpu_ptr(ssp->sda, get_boot_cpu_id()),
cbdelay);
} else {
idx = rcu_seq_ctr(gpseq) % ARRAY_SIZE(snp->srcu_have_cbs);
srcu_for_each_node_breadth_first(ssp, snp) {
@ -914,7 +950,7 @@ static void srcu_funnel_exp_start(struct srcu_struct *ssp, struct srcu_node *snp
if (snp)
for (; snp != NULL; snp = snp->srcu_parent) {
sgsne = READ_ONCE(snp->srcu_gp_seq_needed_exp);
if (rcu_seq_done(&ssp->srcu_gp_seq, s) ||
if (WARN_ON_ONCE(rcu_seq_done(&ssp->srcu_gp_seq, s)) ||
(!srcu_invl_snp_seq(sgsne) && ULONG_CMP_GE(sgsne, s)))
return;
spin_lock_irqsave_rcu_node(snp, flags);
@ -941,6 +977,9 @@ static void srcu_funnel_exp_start(struct srcu_struct *ssp, struct srcu_node *snp
*
* Note that this function also does the work of srcu_funnel_exp_start(),
* in some cases by directly invoking it.
*
* The srcu read lock should be hold around this function. And s is a seq snap
* after holding that lock.
*/
static void srcu_funnel_gp_start(struct srcu_struct *ssp, struct srcu_data *sdp,
unsigned long s, bool do_norm)
@ -961,7 +1000,7 @@ static void srcu_funnel_gp_start(struct srcu_struct *ssp, struct srcu_data *sdp,
if (snp_leaf)
/* Each pass through the loop does one level of the srcu_node tree. */
for (snp = snp_leaf; snp != NULL; snp = snp->srcu_parent) {
if (rcu_seq_done(&ssp->srcu_gp_seq, s) && snp != snp_leaf)
if (WARN_ON_ONCE(rcu_seq_done(&ssp->srcu_gp_seq, s)) && snp != snp_leaf)
return; /* GP already done and CBs recorded. */
spin_lock_irqsave_rcu_node(snp, flags);
snp_seq = snp->srcu_have_cbs[idx];
@ -998,8 +1037,8 @@ static void srcu_funnel_gp_start(struct srcu_struct *ssp, struct srcu_data *sdp,
if (!do_norm && ULONG_CMP_LT(ssp->srcu_gp_seq_needed_exp, s))
WRITE_ONCE(ssp->srcu_gp_seq_needed_exp, s);
/* If grace period not already done and none in progress, start it. */
if (!rcu_seq_done(&ssp->srcu_gp_seq, s) &&
/* If grace period not already in progress, start it. */
if (!WARN_ON_ONCE(rcu_seq_done(&ssp->srcu_gp_seq, s)) &&
rcu_seq_state(ssp->srcu_gp_seq) == SRCU_STATE_IDLE) {
WARN_ON_ONCE(ULONG_CMP_GE(ssp->srcu_gp_seq, ssp->srcu_gp_seq_needed));
srcu_gp_start(ssp);
@ -1059,10 +1098,11 @@ static void srcu_flip(struct srcu_struct *ssp)
/*
* Ensure that if the updater misses an __srcu_read_unlock()
* increment, that task's next __srcu_read_lock() will see the
* above counter update. Note that both this memory barrier
* and the one in srcu_readers_active_idx_check() provide the
* guarantee for __srcu_read_lock().
* increment, that task's __srcu_read_lock() following its next
* __srcu_read_lock() or __srcu_read_unlock() will see the above
* counter update. Note that both this memory barrier and the
* one in srcu_readers_active_idx_check() provide the guarantee
* for __srcu_read_lock().
*/
smp_mb(); /* D */ /* Pairs with C. */
}
@ -1161,7 +1201,7 @@ static unsigned long srcu_gp_start_if_needed(struct srcu_struct *ssp,
idx = __srcu_read_lock_nmisafe(ssp);
ss_state = smp_load_acquire(&ssp->srcu_size_state);
if (ss_state < SRCU_SIZE_WAIT_CALL)
sdp = per_cpu_ptr(ssp->sda, 0);
sdp = per_cpu_ptr(ssp->sda, get_boot_cpu_id());
else
sdp = raw_cpu_ptr(ssp->sda);
spin_lock_irqsave_sdp_contention(sdp, &flags);
@ -1497,7 +1537,7 @@ void srcu_barrier(struct srcu_struct *ssp)
idx = __srcu_read_lock_nmisafe(ssp);
if (smp_load_acquire(&ssp->srcu_size_state) < SRCU_SIZE_WAIT_BARRIER)
srcu_barrier_one_cpu(ssp, per_cpu_ptr(ssp->sda, 0));
srcu_barrier_one_cpu(ssp, per_cpu_ptr(ssp->sda, get_boot_cpu_id()));
else
for_each_possible_cpu(cpu)
srcu_barrier_one_cpu(ssp, per_cpu_ptr(ssp->sda, cpu));

View file

@ -384,6 +384,7 @@ static int rcu_tasks_need_gpcb(struct rcu_tasks *rtp)
{
int cpu;
unsigned long flags;
bool gpdone = poll_state_synchronize_rcu(rtp->percpu_dequeue_gpseq);
long n;
long ncbs = 0;
long ncbsnz = 0;
@ -425,21 +426,23 @@ static int rcu_tasks_need_gpcb(struct rcu_tasks *rtp)
WRITE_ONCE(rtp->percpu_enqueue_shift, order_base_2(nr_cpu_ids));
smp_store_release(&rtp->percpu_enqueue_lim, 1);
rtp->percpu_dequeue_gpseq = get_state_synchronize_rcu();
gpdone = false;
pr_info("Starting switch %s to CPU-0 callback queuing.\n", rtp->name);
}
raw_spin_unlock_irqrestore(&rtp->cbs_gbl_lock, flags);
}
if (rcu_task_cb_adjust && !ncbsnz &&
poll_state_synchronize_rcu(rtp->percpu_dequeue_gpseq)) {
if (rcu_task_cb_adjust && !ncbsnz && gpdone) {
raw_spin_lock_irqsave(&rtp->cbs_gbl_lock, flags);
if (rtp->percpu_enqueue_lim < rtp->percpu_dequeue_lim) {
WRITE_ONCE(rtp->percpu_dequeue_lim, 1);
pr_info("Completing switch %s to CPU-0 callback queuing.\n", rtp->name);
}
for (cpu = rtp->percpu_dequeue_lim; cpu < nr_cpu_ids; cpu++) {
struct rcu_tasks_percpu *rtpcp = per_cpu_ptr(rtp->rtpcpu, cpu);
if (rtp->percpu_dequeue_lim == 1) {
for (cpu = rtp->percpu_dequeue_lim; cpu < nr_cpu_ids; cpu++) {
struct rcu_tasks_percpu *rtpcp = per_cpu_ptr(rtp->rtpcpu, cpu);
WARN_ON_ONCE(rcu_segcblist_n_cbs(&rtpcp->cblist));
WARN_ON_ONCE(rcu_segcblist_n_cbs(&rtpcp->cblist));
}
}
raw_spin_unlock_irqrestore(&rtp->cbs_gbl_lock, flags);
}
@ -560,8 +563,9 @@ static int __noreturn rcu_tasks_kthread(void *arg)
static void synchronize_rcu_tasks_generic(struct rcu_tasks *rtp)
{
/* Complain if the scheduler has not started. */
WARN_ONCE(rcu_scheduler_active == RCU_SCHEDULER_INACTIVE,
"synchronize_rcu_tasks called too soon");
if (WARN_ONCE(rcu_scheduler_active == RCU_SCHEDULER_INACTIVE,
"synchronize_%s() called too soon", rtp->name))
return;
// If the grace-period kthread is running, use it.
if (READ_ONCE(rtp->kthread_ptr)) {
@ -827,11 +831,21 @@ static void rcu_tasks_pertask(struct task_struct *t, struct list_head *hop)
static void rcu_tasks_postscan(struct list_head *hop)
{
/*
* Wait for tasks that are in the process of exiting. This
* does only part of the job, ensuring that all tasks that were
* previously exiting reach the point where they have disabled
* preemption, allowing the later synchronize_rcu() to finish
* the job.
* Exiting tasks may escape the tasklist scan. Those are vulnerable
* until their final schedule() with TASK_DEAD state. To cope with
* this, divide the fragile exit path part in two intersecting
* read side critical sections:
*
* 1) An _SRCU_ read side starting before calling exit_notify(),
* which may remove the task from the tasklist, and ending after
* the final preempt_disable() call in do_exit().
*
* 2) An _RCU_ read side starting with the final preempt_disable()
* call in do_exit() and ending with the final call to schedule()
* with TASK_DEAD state.
*
* This handles the part 1). And postgp will handle part 2) with a
* call to synchronize_rcu().
*/
synchronize_srcu(&tasks_rcu_exit_srcu);
}
@ -898,7 +912,10 @@ static void rcu_tasks_postgp(struct rcu_tasks *rtp)
*
* In addition, this synchronize_rcu() waits for exiting tasks
* to complete their final preempt_disable() region of execution,
* cleaning up after the synchronize_srcu() above.
* cleaning up after synchronize_srcu(&tasks_rcu_exit_srcu),
* enforcing the whole region before tasklist removal until
* the final schedule() with TASK_DEAD state to be an RCU TASKS
* read side critical section.
*/
synchronize_rcu();
}
@ -988,27 +1005,42 @@ void show_rcu_tasks_classic_gp_kthread(void)
EXPORT_SYMBOL_GPL(show_rcu_tasks_classic_gp_kthread);
#endif // !defined(CONFIG_TINY_RCU)
/* Do the srcu_read_lock() for the above synchronize_srcu(). */
/*
* Contribute to protect against tasklist scan blind spot while the
* task is exiting and may be removed from the tasklist. See
* corresponding synchronize_srcu() for further details.
*/
void exit_tasks_rcu_start(void) __acquires(&tasks_rcu_exit_srcu)
{
preempt_disable();
current->rcu_tasks_idx = __srcu_read_lock(&tasks_rcu_exit_srcu);
preempt_enable();
}
/* Do the srcu_read_unlock() for the above synchronize_srcu(). */
void exit_tasks_rcu_finish(void) __releases(&tasks_rcu_exit_srcu)
/*
* Contribute to protect against tasklist scan blind spot while the
* task is exiting and may be removed from the tasklist. See
* corresponding synchronize_srcu() for further details.
*/
void exit_tasks_rcu_stop(void) __releases(&tasks_rcu_exit_srcu)
{
struct task_struct *t = current;
preempt_disable();
__srcu_read_unlock(&tasks_rcu_exit_srcu, t->rcu_tasks_idx);
preempt_enable();
exit_tasks_rcu_finish_trace(t);
}
/*
* Contribute to protect against tasklist scan blind spot while the
* task is exiting and may be removed from the tasklist. See
* corresponding synchronize_srcu() for further details.
*/
void exit_tasks_rcu_finish(void)
{
exit_tasks_rcu_stop();
exit_tasks_rcu_finish_trace(current);
}
#else /* #ifdef CONFIG_TASKS_RCU */
void exit_tasks_rcu_start(void) { }
void exit_tasks_rcu_stop(void) { }
void exit_tasks_rcu_finish(void) { exit_tasks_rcu_finish_trace(current); }
#endif /* #else #ifdef CONFIG_TASKS_RCU */
@ -1036,9 +1068,6 @@ static void rcu_tasks_be_rude(struct work_struct *work)
// Wait for one rude RCU-tasks grace period.
static void rcu_tasks_rude_wait_gp(struct rcu_tasks *rtp)
{
if (num_online_cpus() <= 1)
return; // Fastpath for only one CPU.
rtp->n_ipis += cpumask_weight(cpu_online_mask);
schedule_on_each_cpu(rcu_tasks_be_rude);
}
@ -1815,23 +1844,21 @@ static void test_rcu_tasks_callback(struct rcu_head *rhp)
static void rcu_tasks_initiate_self_tests(void)
{
unsigned long j = jiffies;
pr_info("Running RCU-tasks wait API self tests\n");
#ifdef CONFIG_TASKS_RCU
tests[0].runstart = j;
tests[0].runstart = jiffies;
synchronize_rcu_tasks();
call_rcu_tasks(&tests[0].rh, test_rcu_tasks_callback);
#endif
#ifdef CONFIG_TASKS_RUDE_RCU
tests[1].runstart = j;
tests[1].runstart = jiffies;
synchronize_rcu_tasks_rude();
call_rcu_tasks_rude(&tests[1].rh, test_rcu_tasks_callback);
#endif
#ifdef CONFIG_TASKS_TRACE_RCU
tests[2].runstart = j;
tests[2].runstart = jiffies;
synchronize_rcu_tasks_trace();
call_rcu_tasks_trace(&tests[2].rh, test_rcu_tasks_callback);
#endif

View file

@ -246,15 +246,12 @@ bool poll_state_synchronize_rcu(unsigned long oldstate)
EXPORT_SYMBOL_GPL(poll_state_synchronize_rcu);
#ifdef CONFIG_KASAN_GENERIC
void kvfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
void kvfree_call_rcu(struct rcu_head *head, void *ptr)
{
if (head) {
void *ptr = (void *) head - (unsigned long) func;
if (head)
kasan_record_aux_stack_noalloc(ptr);
}
__kvfree_call_rcu(head, func);
__kvfree_call_rcu(head, ptr);
}
EXPORT_SYMBOL_GPL(kvfree_call_rcu);
#endif

View file

@ -144,14 +144,16 @@ static int rcu_scheduler_fully_active __read_mostly;
static void rcu_report_qs_rnp(unsigned long mask, struct rcu_node *rnp,
unsigned long gps, unsigned long flags);
static void rcu_init_new_rnp(struct rcu_node *rnp_leaf);
static void rcu_cleanup_dead_rnp(struct rcu_node *rnp_leaf);
static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu);
static void invoke_rcu_core(void);
static void rcu_report_exp_rdp(struct rcu_data *rdp);
static void sync_sched_exp_online_cleanup(int cpu);
static void check_cb_ovld_locked(struct rcu_data *rdp, struct rcu_node *rnp);
static bool rcu_rdp_is_offloaded(struct rcu_data *rdp);
static bool rcu_rdp_cpu_online(struct rcu_data *rdp);
static bool rcu_init_invoked(void);
static void rcu_cleanup_dead_rnp(struct rcu_node *rnp_leaf);
static void rcu_init_new_rnp(struct rcu_node *rnp_leaf);
/*
* rcuc/rcub/rcuop kthread realtime priority. The "rcuop"
@ -214,27 +216,6 @@ EXPORT_SYMBOL_GPL(rcu_get_gp_kthreads_prio);
*/
#define PER_RCU_NODE_PERIOD 3 /* Number of grace periods between delays for debugging. */
/*
* Compute the mask of online CPUs for the specified rcu_node structure.
* This will not be stable unless the rcu_node structure's ->lock is
* held, but the bit corresponding to the current CPU will be stable
* in most contexts.
*/
static unsigned long rcu_rnp_online_cpus(struct rcu_node *rnp)
{
return READ_ONCE(rnp->qsmaskinitnext);
}
/*
* Is the CPU corresponding to the specified rcu_data structure online
* from RCU's perspective? This perspective is given by that structure's
* ->qsmaskinitnext field rather than by the global cpu_online_mask.
*/
static bool rcu_rdp_cpu_online(struct rcu_data *rdp)
{
return !!(rdp->grpmask & rcu_rnp_online_cpus(rdp->mynode));
}
/*
* Return true if an RCU grace period is in progress. The READ_ONCE()s
* permit this function to be invoked without holding the root rcu_node
@ -734,46 +715,6 @@ void rcu_request_urgent_qs_task(struct task_struct *t)
smp_store_release(per_cpu_ptr(&rcu_data.rcu_urgent_qs, cpu), true);
}
#if defined(CONFIG_PROVE_RCU) && defined(CONFIG_HOTPLUG_CPU)
/*
* Is the current CPU online as far as RCU is concerned?
*
* Disable preemption to avoid false positives that could otherwise
* happen due to the current CPU number being sampled, this task being
* preempted, its old CPU being taken offline, resuming on some other CPU,
* then determining that its old CPU is now offline.
*
* Disable checking if in an NMI handler because we cannot safely
* report errors from NMI handlers anyway. In addition, it is OK to use
* RCU on an offline processor during initial boot, hence the check for
* rcu_scheduler_fully_active.
*/
bool rcu_lockdep_current_cpu_online(void)
{
struct rcu_data *rdp;
bool ret = false;
if (in_nmi() || !rcu_scheduler_fully_active)
return true;
preempt_disable_notrace();
rdp = this_cpu_ptr(&rcu_data);
/*
* Strictly, we care here about the case where the current CPU is
* in rcu_cpu_starting() and thus has an excuse for rdp->grpmask
* not being up to date. So arch_spin_is_locked() might have a
* false positive if it's held by some *other* CPU, but that's
* OK because that just means a false *negative* on the warning.
*/
if (rcu_rdp_cpu_online(rdp) || arch_spin_is_locked(&rcu_state.ofl_lock))
ret = true;
preempt_enable_notrace();
return ret;
}
EXPORT_SYMBOL_GPL(rcu_lockdep_current_cpu_online);
#endif /* #if defined(CONFIG_PROVE_RCU) && defined(CONFIG_HOTPLUG_CPU) */
/*
* When trying to report a quiescent state on behalf of some other CPU,
* it is our responsibility to check for and handle potential overflow
@ -1350,13 +1291,6 @@ static void rcu_strict_gp_boundary(void *unused)
invoke_rcu_core();
}
// Has rcu_init() been invoked? This is used (for example) to determine
// whether spinlocks may be acquired safely.
static bool rcu_init_invoked(void)
{
return !!rcu_state.n_online_cpus;
}
// Make the polled API aware of the beginning of a grace period.
static void rcu_poll_gp_seq_start(unsigned long *snap)
{
@ -2091,92 +2025,6 @@ rcu_check_quiescent_state(struct rcu_data *rdp)
rcu_report_qs_rdp(rdp);
}
/*
* Near the end of the offline process. Trace the fact that this CPU
* is going offline.
*/
int rcutree_dying_cpu(unsigned int cpu)
{
bool blkd;
struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
struct rcu_node *rnp = rdp->mynode;
if (!IS_ENABLED(CONFIG_HOTPLUG_CPU))
return 0;
blkd = !!(READ_ONCE(rnp->qsmask) & rdp->grpmask);
trace_rcu_grace_period(rcu_state.name, READ_ONCE(rnp->gp_seq),
blkd ? TPS("cpuofl-bgp") : TPS("cpuofl"));
return 0;
}
/*
* All CPUs for the specified rcu_node structure have gone offline,
* and all tasks that were preempted within an RCU read-side critical
* section while running on one of those CPUs have since exited their RCU
* read-side critical section. Some other CPU is reporting this fact with
* the specified rcu_node structure's ->lock held and interrupts disabled.
* This function therefore goes up the tree of rcu_node structures,
* clearing the corresponding bits in the ->qsmaskinit fields. Note that
* the leaf rcu_node structure's ->qsmaskinit field has already been
* updated.
*
* This function does check that the specified rcu_node structure has
* all CPUs offline and no blocked tasks, so it is OK to invoke it
* prematurely. That said, invoking it after the fact will cost you
* a needless lock acquisition. So once it has done its work, don't
* invoke it again.
*/
static void rcu_cleanup_dead_rnp(struct rcu_node *rnp_leaf)
{
long mask;
struct rcu_node *rnp = rnp_leaf;
raw_lockdep_assert_held_rcu_node(rnp_leaf);
if (!IS_ENABLED(CONFIG_HOTPLUG_CPU) ||
WARN_ON_ONCE(rnp_leaf->qsmaskinit) ||
WARN_ON_ONCE(rcu_preempt_has_tasks(rnp_leaf)))
return;
for (;;) {
mask = rnp->grpmask;
rnp = rnp->parent;
if (!rnp)
break;
raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */
rnp->qsmaskinit &= ~mask;
/* Between grace periods, so better already be zero! */
WARN_ON_ONCE(rnp->qsmask);
if (rnp->qsmaskinit) {
raw_spin_unlock_rcu_node(rnp);
/* irqs remain disabled. */
return;
}
raw_spin_unlock_rcu_node(rnp); /* irqs remain disabled. */
}
}
/*
* The CPU has been completely removed, and some other CPU is reporting
* this fact from process context. Do the remainder of the cleanup.
* There can only be one CPU hotplug operation at a time, so no need for
* explicit locking.
*/
int rcutree_dead_cpu(unsigned int cpu)
{
struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
struct rcu_node *rnp = rdp->mynode; /* Outgoing CPU's rdp & rnp. */
if (!IS_ENABLED(CONFIG_HOTPLUG_CPU))
return 0;
WRITE_ONCE(rcu_state.n_online_cpus, rcu_state.n_online_cpus - 1);
/* Adjust any no-longer-needed kthreads. */
rcu_boost_kthread_setaffinity(rnp, -1);
// Stop-machine done, so allow nohz_full to disable tick.
tick_dep_clear(TICK_DEP_BIT_RCU);
return 0;
}
/*
* Invoke any RCU callbacks that have made it to the end of their grace
* period. Throttle as specified by rdp->blimit.
@ -2209,7 +2057,7 @@ static void rcu_do_batch(struct rcu_data *rdp)
*/
rcu_nocb_lock_irqsave(rdp, flags);
WARN_ON_ONCE(cpu_is_offline(smp_processor_id()));
pending = rcu_segcblist_n_cbs(&rdp->cblist);
pending = rcu_segcblist_get_seglen(&rdp->cblist, RCU_DONE_TAIL);
div = READ_ONCE(rcu_divisor);
div = div < 0 ? 7 : div > sizeof(long) * 8 - 2 ? sizeof(long) * 8 - 2 : div;
bl = max(rdp->blimit, pending >> div);
@ -2727,10 +2575,11 @@ static void check_cb_ovld(struct rcu_data *rdp)
}
static void
__call_rcu_common(struct rcu_head *head, rcu_callback_t func, bool lazy)
__call_rcu_common(struct rcu_head *head, rcu_callback_t func, bool lazy_in)
{
static atomic_t doublefrees;
unsigned long flags;
bool lazy;
struct rcu_data *rdp;
bool was_alldone;
@ -2755,6 +2604,7 @@ __call_rcu_common(struct rcu_head *head, rcu_callback_t func, bool lazy)
kasan_record_aux_stack_noalloc(head);
local_irq_save(flags);
rdp = this_cpu_ptr(&rcu_data);
lazy = lazy_in && !rcu_async_should_hurry();
/* Add the callback to our list. */
if (unlikely(!rcu_segcblist_is_enabled(&rdp->cblist))) {
@ -2876,13 +2726,15 @@ EXPORT_SYMBOL_GPL(call_rcu);
/**
* struct kvfree_rcu_bulk_data - single block to store kvfree_rcu() pointers
* @list: List node. All blocks are linked between each other
* @gp_snap: Snapshot of RCU state for objects placed to this bulk
* @nr_records: Number of active pointers in the array
* @next: Next bulk object in the block chain
* @records: Array of the kvfree_rcu() pointers
*/
struct kvfree_rcu_bulk_data {
struct list_head list;
unsigned long gp_snap;
unsigned long nr_records;
struct kvfree_rcu_bulk_data *next;
void *records[];
};
@ -2898,26 +2750,28 @@ struct kvfree_rcu_bulk_data {
* struct kfree_rcu_cpu_work - single batch of kfree_rcu() requests
* @rcu_work: Let queue_rcu_work() invoke workqueue handler after grace period
* @head_free: List of kfree_rcu() objects waiting for a grace period
* @bkvhead_free: Bulk-List of kvfree_rcu() objects waiting for a grace period
* @bulk_head_free: Bulk-List of kvfree_rcu() objects waiting for a grace period
* @krcp: Pointer to @kfree_rcu_cpu structure
*/
struct kfree_rcu_cpu_work {
struct rcu_work rcu_work;
struct rcu_head *head_free;
struct kvfree_rcu_bulk_data *bkvhead_free[FREE_N_CHANNELS];
struct list_head bulk_head_free[FREE_N_CHANNELS];
struct kfree_rcu_cpu *krcp;
};
/**
* struct kfree_rcu_cpu - batch up kfree_rcu() requests for RCU grace period
* @head: List of kfree_rcu() objects not yet waiting for a grace period
* @bkvhead: Bulk-List of kvfree_rcu() objects not yet waiting for a grace period
* @head_gp_snap: Snapshot of RCU state for objects placed to "@head"
* @bulk_head: Bulk-List of kvfree_rcu() objects not yet waiting for a grace period
* @krw_arr: Array of batches of kfree_rcu() objects waiting for a grace period
* @lock: Synchronize access to this structure
* @monitor_work: Promote @head to @head_free after KFREE_DRAIN_JIFFIES
* @initialized: The @rcu_work fields have been initialized
* @count: Number of objects for which GP not started
* @head_count: Number of objects in rcu_head singular list
* @bulk_count: Number of objects in bulk-list
* @bkvcache:
* A simple cache list that contains objects for reuse purpose.
* In order to save some per-cpu space the list is singular.
@ -2935,13 +2789,20 @@ struct kfree_rcu_cpu_work {
* the interactions with the slab allocators.
*/
struct kfree_rcu_cpu {
// Objects queued on a linked list
// through their rcu_head structures.
struct rcu_head *head;
struct kvfree_rcu_bulk_data *bkvhead[FREE_N_CHANNELS];
unsigned long head_gp_snap;
atomic_t head_count;
// Objects queued on a bulk-list.
struct list_head bulk_head[FREE_N_CHANNELS];
atomic_t bulk_count[FREE_N_CHANNELS];
struct kfree_rcu_cpu_work krw_arr[KFREE_N_BATCHES];
raw_spinlock_t lock;
struct delayed_work monitor_work;
bool initialized;
int count;
struct delayed_work page_cache_work;
atomic_t backoff_page_cache_fill;
@ -3029,82 +2890,51 @@ drain_page_cache(struct kfree_rcu_cpu *krcp)
return freed;
}
/*
* This function is invoked in workqueue context after a grace period.
* It frees all the objects queued on ->bkvhead_free or ->head_free.
*/
static void kfree_rcu_work(struct work_struct *work)
static void
kvfree_rcu_bulk(struct kfree_rcu_cpu *krcp,
struct kvfree_rcu_bulk_data *bnode, int idx)
{
unsigned long flags;
struct kvfree_rcu_bulk_data *bkvhead[FREE_N_CHANNELS], *bnext;
struct rcu_head *head, *next;
struct kfree_rcu_cpu *krcp;
struct kfree_rcu_cpu_work *krwp;
int i, j;
int i;
krwp = container_of(to_rcu_work(work),
struct kfree_rcu_cpu_work, rcu_work);
krcp = krwp->krcp;
debug_rcu_bhead_unqueue(bnode);
raw_spin_lock_irqsave(&krcp->lock, flags);
// Channels 1 and 2.
for (i = 0; i < FREE_N_CHANNELS; i++) {
bkvhead[i] = krwp->bkvhead_free[i];
krwp->bkvhead_free[i] = NULL;
}
rcu_lock_acquire(&rcu_callback_map);
if (idx == 0) { // kmalloc() / kfree().
trace_rcu_invoke_kfree_bulk_callback(
rcu_state.name, bnode->nr_records,
bnode->records);
// Channel 3.
head = krwp->head_free;
krwp->head_free = NULL;
raw_spin_unlock_irqrestore(&krcp->lock, flags);
kfree_bulk(bnode->nr_records, bnode->records);
} else { // vmalloc() / vfree().
for (i = 0; i < bnode->nr_records; i++) {
trace_rcu_invoke_kvfree_callback(
rcu_state.name, bnode->records[i], 0);
// Handle the first two channels.
for (i = 0; i < FREE_N_CHANNELS; i++) {
for (; bkvhead[i]; bkvhead[i] = bnext) {
bnext = bkvhead[i]->next;
debug_rcu_bhead_unqueue(bkvhead[i]);
rcu_lock_acquire(&rcu_callback_map);
if (i == 0) { // kmalloc() / kfree().
trace_rcu_invoke_kfree_bulk_callback(
rcu_state.name, bkvhead[i]->nr_records,
bkvhead[i]->records);
kfree_bulk(bkvhead[i]->nr_records,
bkvhead[i]->records);
} else { // vmalloc() / vfree().
for (j = 0; j < bkvhead[i]->nr_records; j++) {
trace_rcu_invoke_kvfree_callback(
rcu_state.name,
bkvhead[i]->records[j], 0);
vfree(bkvhead[i]->records[j]);
}
}
rcu_lock_release(&rcu_callback_map);
raw_spin_lock_irqsave(&krcp->lock, flags);
if (put_cached_bnode(krcp, bkvhead[i]))
bkvhead[i] = NULL;
raw_spin_unlock_irqrestore(&krcp->lock, flags);
if (bkvhead[i])
free_page((unsigned long) bkvhead[i]);
cond_resched_tasks_rcu_qs();
vfree(bnode->records[i]);
}
}
rcu_lock_release(&rcu_callback_map);
raw_spin_lock_irqsave(&krcp->lock, flags);
if (put_cached_bnode(krcp, bnode))
bnode = NULL;
raw_spin_unlock_irqrestore(&krcp->lock, flags);
if (bnode)
free_page((unsigned long) bnode);
cond_resched_tasks_rcu_qs();
}
static void
kvfree_rcu_list(struct rcu_head *head)
{
struct rcu_head *next;
/*
* This is used when the "bulk" path can not be used for the
* double-argument of kvfree_rcu(). This happens when the
* page-cache is empty, which means that objects are instead
* queued on a linked list through their rcu_head structures.
* This list is named "Channel 3".
*/
for (; head; head = next) {
unsigned long offset = (unsigned long)head->func;
void *ptr = (void *)head - offset;
void *ptr = (void *) head->func;
unsigned long offset = (void *) head - ptr;
next = head->next;
debug_rcu_head_unqueue((struct rcu_head *)ptr);
@ -3119,16 +2949,72 @@ static void kfree_rcu_work(struct work_struct *work)
}
}
/*
* This function is invoked in workqueue context after a grace period.
* It frees all the objects queued on ->bulk_head_free or ->head_free.
*/
static void kfree_rcu_work(struct work_struct *work)
{
unsigned long flags;
struct kvfree_rcu_bulk_data *bnode, *n;
struct list_head bulk_head[FREE_N_CHANNELS];
struct rcu_head *head;
struct kfree_rcu_cpu *krcp;
struct kfree_rcu_cpu_work *krwp;
int i;
krwp = container_of(to_rcu_work(work),
struct kfree_rcu_cpu_work, rcu_work);
krcp = krwp->krcp;
raw_spin_lock_irqsave(&krcp->lock, flags);
// Channels 1 and 2.
for (i = 0; i < FREE_N_CHANNELS; i++)
list_replace_init(&krwp->bulk_head_free[i], &bulk_head[i]);
// Channel 3.
head = krwp->head_free;
krwp->head_free = NULL;
raw_spin_unlock_irqrestore(&krcp->lock, flags);
// Handle the first two channels.
for (i = 0; i < FREE_N_CHANNELS; i++) {
// Start from the tail page, so a GP is likely passed for it.
list_for_each_entry_safe(bnode, n, &bulk_head[i], list)
kvfree_rcu_bulk(krcp, bnode, i);
}
/*
* This is used when the "bulk" path can not be used for the
* double-argument of kvfree_rcu(). This happens when the
* page-cache is empty, which means that objects are instead
* queued on a linked list through their rcu_head structures.
* This list is named "Channel 3".
*/
kvfree_rcu_list(head);
}
static bool
need_offload_krc(struct kfree_rcu_cpu *krcp)
{
int i;
for (i = 0; i < FREE_N_CHANNELS; i++)
if (krcp->bkvhead[i])
if (!list_empty(&krcp->bulk_head[i]))
return true;
return !!krcp->head;
return !!READ_ONCE(krcp->head);
}
static int krc_count(struct kfree_rcu_cpu *krcp)
{
int sum = atomic_read(&krcp->head_count);
int i;
for (i = 0; i < FREE_N_CHANNELS; i++)
sum += atomic_read(&krcp->bulk_count[i]);
return sum;
}
static void
@ -3136,7 +3022,7 @@ schedule_delayed_monitor_work(struct kfree_rcu_cpu *krcp)
{
long delay, delay_left;
delay = READ_ONCE(krcp->count) >= KVFREE_BULK_MAX_ENTR ? 1:KFREE_DRAIN_JIFFIES;
delay = krc_count(krcp) >= KVFREE_BULK_MAX_ENTR ? 1:KFREE_DRAIN_JIFFIES;
if (delayed_work_pending(&krcp->monitor_work)) {
delay_left = krcp->monitor_work.timer.expires - jiffies;
if (delay < delay_left)
@ -3146,6 +3032,44 @@ schedule_delayed_monitor_work(struct kfree_rcu_cpu *krcp)
queue_delayed_work(system_wq, &krcp->monitor_work, delay);
}
static void
kvfree_rcu_drain_ready(struct kfree_rcu_cpu *krcp)
{
struct list_head bulk_ready[FREE_N_CHANNELS];
struct kvfree_rcu_bulk_data *bnode, *n;
struct rcu_head *head_ready = NULL;
unsigned long flags;
int i;
raw_spin_lock_irqsave(&krcp->lock, flags);
for (i = 0; i < FREE_N_CHANNELS; i++) {
INIT_LIST_HEAD(&bulk_ready[i]);
list_for_each_entry_safe_reverse(bnode, n, &krcp->bulk_head[i], list) {
if (!poll_state_synchronize_rcu(bnode->gp_snap))
break;
atomic_sub(bnode->nr_records, &krcp->bulk_count[i]);
list_move(&bnode->list, &bulk_ready[i]);
}
}
if (krcp->head && poll_state_synchronize_rcu(krcp->head_gp_snap)) {
head_ready = krcp->head;
atomic_set(&krcp->head_count, 0);
WRITE_ONCE(krcp->head, NULL);
}
raw_spin_unlock_irqrestore(&krcp->lock, flags);
for (i = 0; i < FREE_N_CHANNELS; i++) {
list_for_each_entry_safe(bnode, n, &bulk_ready[i], list)
kvfree_rcu_bulk(krcp, bnode, i);
}
if (head_ready)
kvfree_rcu_list(head_ready);
}
/*
* This function is invoked after the KFREE_DRAIN_JIFFIES timeout.
*/
@ -3156,26 +3080,31 @@ static void kfree_rcu_monitor(struct work_struct *work)
unsigned long flags;
int i, j;
// Drain ready for reclaim.
kvfree_rcu_drain_ready(krcp);
raw_spin_lock_irqsave(&krcp->lock, flags);
// Attempt to start a new batch.
for (i = 0; i < KFREE_N_BATCHES; i++) {
struct kfree_rcu_cpu_work *krwp = &(krcp->krw_arr[i]);
// Try to detach bkvhead or head and attach it over any
// Try to detach bulk_head or head and attach it over any
// available corresponding free channel. It can be that
// a previous RCU batch is in progress, it means that
// immediately to queue another one is not possible so
// in that case the monitor work is rearmed.
if ((krcp->bkvhead[0] && !krwp->bkvhead_free[0]) ||
(krcp->bkvhead[1] && !krwp->bkvhead_free[1]) ||
(krcp->head && !krwp->head_free)) {
if ((!list_empty(&krcp->bulk_head[0]) && list_empty(&krwp->bulk_head_free[0])) ||
(!list_empty(&krcp->bulk_head[1]) && list_empty(&krwp->bulk_head_free[1])) ||
(READ_ONCE(krcp->head) && !krwp->head_free)) {
// Channel 1 corresponds to the SLAB-pointer bulk path.
// Channel 2 corresponds to vmalloc-pointer bulk path.
for (j = 0; j < FREE_N_CHANNELS; j++) {
if (!krwp->bkvhead_free[j]) {
krwp->bkvhead_free[j] = krcp->bkvhead[j];
krcp->bkvhead[j] = NULL;
if (list_empty(&krwp->bulk_head_free[j])) {
atomic_set(&krcp->bulk_count[j], 0);
list_replace_init(&krcp->bulk_head[j],
&krwp->bulk_head_free[j]);
}
}
@ -3183,11 +3112,10 @@ static void kfree_rcu_monitor(struct work_struct *work)
// objects queued on the linked list.
if (!krwp->head_free) {
krwp->head_free = krcp->head;
krcp->head = NULL;
atomic_set(&krcp->head_count, 0);
WRITE_ONCE(krcp->head, NULL);
}
WRITE_ONCE(krcp->count, 0);
// One work is per one batch, so there are three
// "free channels", the batch can handle. It can
// be that the work is in the pending state when
@ -3197,6 +3125,8 @@ static void kfree_rcu_monitor(struct work_struct *work)
}
}
raw_spin_unlock_irqrestore(&krcp->lock, flags);
// If there is nothing to detach, it means that our job is
// successfully done here. In case of having at least one
// of the channels that is still busy we should rearm the
@ -3204,8 +3134,6 @@ static void kfree_rcu_monitor(struct work_struct *work)
// still in progress.
if (need_offload_krc(krcp))
schedule_delayed_monitor_work(krcp);
raw_spin_unlock_irqrestore(&krcp->lock, flags);
}
static enum hrtimer_restart
@ -3288,10 +3216,11 @@ add_ptr_to_bulk_krc_lock(struct kfree_rcu_cpu **krcp,
return false;
idx = !!is_vmalloc_addr(ptr);
bnode = list_first_entry_or_null(&(*krcp)->bulk_head[idx],
struct kvfree_rcu_bulk_data, list);
/* Check if a new block is required. */
if (!(*krcp)->bkvhead[idx] ||
(*krcp)->bkvhead[idx]->nr_records == KVFREE_BULK_MAX_ENTR) {
if (!bnode || bnode->nr_records == KVFREE_BULK_MAX_ENTR) {
bnode = get_cached_bnode(*krcp);
if (!bnode && can_alloc) {
krc_this_cpu_unlock(*krcp, *flags);
@ -3315,17 +3244,15 @@ add_ptr_to_bulk_krc_lock(struct kfree_rcu_cpu **krcp,
if (!bnode)
return false;
/* Initialize the new block. */
// Initialize the new block and attach it.
bnode->nr_records = 0;
bnode->next = (*krcp)->bkvhead[idx];
/* Attach it to the head. */
(*krcp)->bkvhead[idx] = bnode;
list_add(&bnode->list, &(*krcp)->bulk_head[idx]);
}
/* Finally insert. */
(*krcp)->bkvhead[idx]->records
[(*krcp)->bkvhead[idx]->nr_records++] = ptr;
// Finally insert and update the GP for this page.
bnode->records[bnode->nr_records++] = ptr;
bnode->gp_snap = get_state_synchronize_rcu();
atomic_inc(&(*krcp)->bulk_count[idx]);
return true;
}
@ -3342,26 +3269,21 @@ add_ptr_to_bulk_krc_lock(struct kfree_rcu_cpu **krcp,
* be free'd in workqueue context. This allows us to: batch requests together to
* reduce the number of grace periods during heavy kfree_rcu()/kvfree_rcu() load.
*/
void kvfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
void kvfree_call_rcu(struct rcu_head *head, void *ptr)
{
unsigned long flags;
struct kfree_rcu_cpu *krcp;
bool success;
void *ptr;
if (head) {
ptr = (void *) head - (unsigned long) func;
} else {
/*
* Please note there is a limitation for the head-less
* variant, that is why there is a clear rule for such
* objects: it can be used from might_sleep() context
* only. For other places please embed an rcu_head to
* your data.
*/
/*
* Please note there is a limitation for the head-less
* variant, that is why there is a clear rule for such
* objects: it can be used from might_sleep() context
* only. For other places please embed an rcu_head to
* your data.
*/
if (!head)
might_sleep();
ptr = (unsigned long *) func;
}
// Queue the object but don't yet schedule the batch.
if (debug_rcu_head_queue(ptr)) {
@ -3382,14 +3304,16 @@ void kvfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
// Inline if kvfree_rcu(one_arg) call.
goto unlock_return;
head->func = func;
head->func = ptr;
head->next = krcp->head;
krcp->head = head;
WRITE_ONCE(krcp->head, head);
atomic_inc(&krcp->head_count);
// Take a snapshot for this krcp.
krcp->head_gp_snap = get_state_synchronize_rcu();
success = true;
}
WRITE_ONCE(krcp->count, krcp->count + 1);
// Set timer to drain after KFREE_DRAIN_JIFFIES.
if (rcu_scheduler_active == RCU_SCHEDULER_RUNNING)
schedule_delayed_monitor_work(krcp);
@ -3420,7 +3344,7 @@ kfree_rcu_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
for_each_possible_cpu(cpu) {
struct kfree_rcu_cpu *krcp = per_cpu_ptr(&krc, cpu);
count += READ_ONCE(krcp->count);
count += krc_count(krcp);
count += READ_ONCE(krcp->nr_bkv_objs);
atomic_set(&krcp->backoff_page_cache_fill, 1);
}
@ -3437,7 +3361,7 @@ kfree_rcu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
int count;
struct kfree_rcu_cpu *krcp = per_cpu_ptr(&krc, cpu);
count = krcp->count;
count = krc_count(krcp);
count += drain_page_cache(krcp);
kfree_rcu_monitor(&krcp->monitor_work.work);
@ -3461,15 +3385,12 @@ static struct shrinker kfree_rcu_shrinker = {
void __init kfree_rcu_scheduler_running(void)
{
int cpu;
unsigned long flags;
for_each_possible_cpu(cpu) {
struct kfree_rcu_cpu *krcp = per_cpu_ptr(&krc, cpu);
raw_spin_lock_irqsave(&krcp->lock, flags);
if (need_offload_krc(krcp))
schedule_delayed_monitor_work(krcp);
raw_spin_unlock_irqrestore(&krcp->lock, flags);
}
}
@ -3485,9 +3406,10 @@ void __init kfree_rcu_scheduler_running(void)
*/
static int rcu_blocking_is_gp(void)
{
if (rcu_scheduler_active != RCU_SCHEDULER_INACTIVE)
if (rcu_scheduler_active != RCU_SCHEDULER_INACTIVE) {
might_sleep();
return false;
might_sleep(); /* Check for RCU read-side critical section. */
}
return true;
}
@ -3711,7 +3633,9 @@ EXPORT_SYMBOL_GPL(start_poll_synchronize_rcu_full);
* If @false is returned, it is the caller's responsibility to invoke this
* function later on until it does return @true. Alternatively, the caller
* can explicitly wait for a grace period, for example, by passing @oldstate
* to cond_synchronize_rcu() or by directly invoking synchronize_rcu().
* to either cond_synchronize_rcu() or cond_synchronize_rcu_expedited()
* on the one hand or by directly invoking either synchronize_rcu() or
* synchronize_rcu_expedited() on the other.
*
* Yes, this function does not take counter wrap into account.
* But counter wrap is harmless. If the counter wraps, we have waited for
@ -3722,6 +3646,12 @@ EXPORT_SYMBOL_GPL(start_poll_synchronize_rcu_full);
* completed. Alternatively, they can use get_completed_synchronize_rcu()
* to get a guaranteed-completed grace-period state.
*
* In addition, because oldstate compresses the grace-period state for
* both normal and expedited grace periods into a single unsigned long,
* it can miss a grace period when synchronize_rcu() runs concurrently
* with synchronize_rcu_expedited(). If this is unacceptable, please
* instead use the _full() variant of these polling APIs.
*
* This function provides the same memory-ordering guarantees that
* would be provided by a synchronize_rcu() that was invoked at the call
* to the function that provided @oldstate, and that returned at the end
@ -4079,6 +4009,155 @@ void rcu_barrier(void)
}
EXPORT_SYMBOL_GPL(rcu_barrier);
/*
* Compute the mask of online CPUs for the specified rcu_node structure.
* This will not be stable unless the rcu_node structure's ->lock is
* held, but the bit corresponding to the current CPU will be stable
* in most contexts.
*/
static unsigned long rcu_rnp_online_cpus(struct rcu_node *rnp)
{
return READ_ONCE(rnp->qsmaskinitnext);
}
/*
* Is the CPU corresponding to the specified rcu_data structure online
* from RCU's perspective? This perspective is given by that structure's
* ->qsmaskinitnext field rather than by the global cpu_online_mask.
*/
static bool rcu_rdp_cpu_online(struct rcu_data *rdp)
{
return !!(rdp->grpmask & rcu_rnp_online_cpus(rdp->mynode));
}
#if defined(CONFIG_PROVE_RCU) && defined(CONFIG_HOTPLUG_CPU)
/*
* Is the current CPU online as far as RCU is concerned?
*
* Disable preemption to avoid false positives that could otherwise
* happen due to the current CPU number being sampled, this task being
* preempted, its old CPU being taken offline, resuming on some other CPU,
* then determining that its old CPU is now offline.
*
* Disable checking if in an NMI handler because we cannot safely
* report errors from NMI handlers anyway. In addition, it is OK to use
* RCU on an offline processor during initial boot, hence the check for
* rcu_scheduler_fully_active.
*/
bool rcu_lockdep_current_cpu_online(void)
{
struct rcu_data *rdp;
bool ret = false;
if (in_nmi() || !rcu_scheduler_fully_active)
return true;
preempt_disable_notrace();
rdp = this_cpu_ptr(&rcu_data);
/*
* Strictly, we care here about the case where the current CPU is
* in rcu_cpu_starting() and thus has an excuse for rdp->grpmask
* not being up to date. So arch_spin_is_locked() might have a
* false positive if it's held by some *other* CPU, but that's
* OK because that just means a false *negative* on the warning.
*/
if (rcu_rdp_cpu_online(rdp) || arch_spin_is_locked(&rcu_state.ofl_lock))
ret = true;
preempt_enable_notrace();
return ret;
}
EXPORT_SYMBOL_GPL(rcu_lockdep_current_cpu_online);
#endif /* #if defined(CONFIG_PROVE_RCU) && defined(CONFIG_HOTPLUG_CPU) */
// Has rcu_init() been invoked? This is used (for example) to determine
// whether spinlocks may be acquired safely.
static bool rcu_init_invoked(void)
{
return !!rcu_state.n_online_cpus;
}
/*
* Near the end of the offline process. Trace the fact that this CPU
* is going offline.
*/
int rcutree_dying_cpu(unsigned int cpu)
{
bool blkd;
struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
struct rcu_node *rnp = rdp->mynode;
if (!IS_ENABLED(CONFIG_HOTPLUG_CPU))
return 0;
blkd = !!(READ_ONCE(rnp->qsmask) & rdp->grpmask);
trace_rcu_grace_period(rcu_state.name, READ_ONCE(rnp->gp_seq),
blkd ? TPS("cpuofl-bgp") : TPS("cpuofl"));
return 0;
}
/*
* All CPUs for the specified rcu_node structure have gone offline,
* and all tasks that were preempted within an RCU read-side critical
* section while running on one of those CPUs have since exited their RCU
* read-side critical section. Some other CPU is reporting this fact with
* the specified rcu_node structure's ->lock held and interrupts disabled.
* This function therefore goes up the tree of rcu_node structures,
* clearing the corresponding bits in the ->qsmaskinit fields. Note that
* the leaf rcu_node structure's ->qsmaskinit field has already been
* updated.
*
* This function does check that the specified rcu_node structure has
* all CPUs offline and no blocked tasks, so it is OK to invoke it
* prematurely. That said, invoking it after the fact will cost you
* a needless lock acquisition. So once it has done its work, don't
* invoke it again.
*/
static void rcu_cleanup_dead_rnp(struct rcu_node *rnp_leaf)
{
long mask;
struct rcu_node *rnp = rnp_leaf;
raw_lockdep_assert_held_rcu_node(rnp_leaf);
if (!IS_ENABLED(CONFIG_HOTPLUG_CPU) ||
WARN_ON_ONCE(rnp_leaf->qsmaskinit) ||
WARN_ON_ONCE(rcu_preempt_has_tasks(rnp_leaf)))
return;
for (;;) {
mask = rnp->grpmask;
rnp = rnp->parent;
if (!rnp)
break;
raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */
rnp->qsmaskinit &= ~mask;
/* Between grace periods, so better already be zero! */
WARN_ON_ONCE(rnp->qsmask);
if (rnp->qsmaskinit) {
raw_spin_unlock_rcu_node(rnp);
/* irqs remain disabled. */
return;
}
raw_spin_unlock_rcu_node(rnp); /* irqs remain disabled. */
}
}
/*
* The CPU has been completely removed, and some other CPU is reporting
* this fact from process context. Do the remainder of the cleanup.
* There can only be one CPU hotplug operation at a time, so no need for
* explicit locking.
*/
int rcutree_dead_cpu(unsigned int cpu)
{
if (!IS_ENABLED(CONFIG_HOTPLUG_CPU))
return 0;
WRITE_ONCE(rcu_state.n_online_cpus, rcu_state.n_online_cpus - 1);
// Stop-machine done, so allow nohz_full to disable tick.
tick_dep_clear(TICK_DEP_BIT_RCU);
return 0;
}
/*
* Propagate ->qsinitmask bits up the rcu_node tree to account for the
* first CPU in a given leaf rcu_node structure coming online. The caller
@ -4408,11 +4487,13 @@ static int rcu_pm_notify(struct notifier_block *self,
switch (action) {
case PM_HIBERNATION_PREPARE:
case PM_SUSPEND_PREPARE:
rcu_async_hurry();
rcu_expedite_gp();
break;
case PM_POST_HIBERNATION:
case PM_POST_SUSPEND:
rcu_unexpedite_gp();
rcu_async_relax();
break;
default:
break;
@ -4766,7 +4847,7 @@ struct workqueue_struct *rcu_gp_wq;
static void __init kfree_rcu_batch_init(void)
{
int cpu;
int i;
int i, j;
/* Clamp it to [0:100] seconds interval. */
if (rcu_delay_page_cache_fill_msec < 0 ||
@ -4786,8 +4867,14 @@ static void __init kfree_rcu_batch_init(void)
for (i = 0; i < KFREE_N_BATCHES; i++) {
INIT_RCU_WORK(&krcp->krw_arr[i].rcu_work, kfree_rcu_work);
krcp->krw_arr[i].krcp = krcp;
for (j = 0; j < FREE_N_CHANNELS; j++)
INIT_LIST_HEAD(&krcp->krw_arr[i].bulk_head_free[j]);
}
for (i = 0; i < FREE_N_CHANNELS; i++)
INIT_LIST_HEAD(&krcp->bulk_head[i]);
INIT_DELAYED_WORK(&krcp->monitor_work, kfree_rcu_monitor);
INIT_DELAYED_WORK(&krcp->page_cache_work, fill_page_cache_func);
krcp->initialized = true;
@ -4838,6 +4925,8 @@ void __init rcu_init(void)
// Kick-start any polled grace periods that started early.
if (!(per_cpu_ptr(&rcu_data, cpu)->mynode->exp_seq_poll_rq & 0x1))
(void)start_poll_synchronize_rcu_expedited();
rcu_test_sync_prims();
}
#include "tree_stall.h"

View file

@ -11,6 +11,7 @@
static void rcu_exp_handler(void *unused);
static int rcu_print_task_exp_stall(struct rcu_node *rnp);
static void rcu_exp_print_detail_task_stall_rnp(struct rcu_node *rnp);
/*
* Record the start of an expedited grace period.
@ -667,8 +668,11 @@ static void synchronize_rcu_expedited_wait(void)
mask = leaf_node_cpu_bit(rnp, cpu);
if (!(READ_ONCE(rnp->expmask) & mask))
continue;
preempt_disable(); // For smp_processor_id() in dump_cpu_task().
dump_cpu_task(cpu);
preempt_enable();
}
rcu_exp_print_detail_task_stall_rnp(rnp);
}
jiffies_stall = 3 * rcu_exp_jiffies_till_stall_check() + 3;
panic_on_rcu_stall();
@ -811,6 +815,36 @@ static int rcu_print_task_exp_stall(struct rcu_node *rnp)
return ndetected;
}
/*
* Scan the current list of tasks blocked within RCU read-side critical
* sections, dumping the stack of each that is blocking the current
* expedited grace period.
*/
static void rcu_exp_print_detail_task_stall_rnp(struct rcu_node *rnp)
{
unsigned long flags;
struct task_struct *t;
if (!rcu_exp_stall_task_details)
return;
raw_spin_lock_irqsave_rcu_node(rnp, flags);
if (!READ_ONCE(rnp->exp_tasks)) {
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
return;
}
t = list_entry(rnp->exp_tasks->prev,
struct task_struct, rcu_node_entry);
list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) {
/*
* We could be printing a lot while holding a spinlock.
* Avoid triggering hard lockup.
*/
touch_nmi_watchdog();
sched_show_task(t);
}
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
}
#else /* #ifdef CONFIG_PREEMPT_RCU */
/* Request an expedited quiescent state. */
@ -883,6 +917,15 @@ static int rcu_print_task_exp_stall(struct rcu_node *rnp)
return 0;
}
/*
* Because preemptible RCU does not exist, we never have to print out
* tasks blocked within RCU read-side critical sections that are blocking
* the current expedited grace period.
*/
static void rcu_exp_print_detail_task_stall_rnp(struct rcu_node *rnp)
{
}
#endif /* #else #ifdef CONFIG_PREEMPT_RCU */
/**

View file

@ -144,8 +144,45 @@ bool rcu_gp_is_normal(void)
}
EXPORT_SYMBOL_GPL(rcu_gp_is_normal);
static atomic_t rcu_expedited_nesting = ATOMIC_INIT(1);
static atomic_t rcu_async_hurry_nesting = ATOMIC_INIT(1);
/*
* Should call_rcu() callbacks be processed with urgency or are
* they OK being executed with arbitrary delays?
*/
bool rcu_async_should_hurry(void)
{
return !IS_ENABLED(CONFIG_RCU_LAZY) ||
atomic_read(&rcu_async_hurry_nesting);
}
EXPORT_SYMBOL_GPL(rcu_async_should_hurry);
/**
* rcu_async_hurry - Make future async RCU callbacks not lazy.
*
* After a call to this function, future calls to call_rcu()
* will be processed in a timely fashion.
*/
void rcu_async_hurry(void)
{
if (IS_ENABLED(CONFIG_RCU_LAZY))
atomic_inc(&rcu_async_hurry_nesting);
}
EXPORT_SYMBOL_GPL(rcu_async_hurry);
/**
* rcu_async_relax - Make future async RCU callbacks lazy.
*
* After a call to this function, future calls to call_rcu()
* will be processed in a lazy fashion.
*/
void rcu_async_relax(void)
{
if (IS_ENABLED(CONFIG_RCU_LAZY))
atomic_dec(&rcu_async_hurry_nesting);
}
EXPORT_SYMBOL_GPL(rcu_async_relax);
static atomic_t rcu_expedited_nesting = ATOMIC_INIT(1);
/*
* Should normal grace-period primitives be expedited? Intended for
* use within RCU. Note that this function takes the rcu_expedited
@ -195,6 +232,7 @@ static bool rcu_boot_ended __read_mostly;
void rcu_end_inkernel_boot(void)
{
rcu_unexpedite_gp();
rcu_async_relax();
if (rcu_normal_after_boot)
WRITE_ONCE(rcu_normal, 1);
rcu_boot_ended = true;
@ -220,6 +258,7 @@ void rcu_test_sync_prims(void)
{
if (!IS_ENABLED(CONFIG_PROVE_RCU))
return;
pr_info("Running RCU synchronous self tests\n");
synchronize_rcu();
synchronize_rcu_expedited();
}
@ -508,6 +547,8 @@ int rcu_cpu_stall_timeout __read_mostly = CONFIG_RCU_CPU_STALL_TIMEOUT;
module_param(rcu_cpu_stall_timeout, int, 0644);
int rcu_exp_cpu_stall_timeout __read_mostly = CONFIG_RCU_EXP_CPU_STALL_TIMEOUT;
module_param(rcu_exp_cpu_stall_timeout, int, 0644);
bool rcu_exp_stall_task_details __read_mostly;
module_param(rcu_exp_stall_task_details, bool, 0644);
#endif /* #ifdef CONFIG_RCU_STALL_COMMON */
// Suppress boot-time RCU CPU stall warnings and rcutorture writer stall
@ -555,9 +596,12 @@ struct early_boot_kfree_rcu {
static void early_boot_test_call_rcu(void)
{
static struct rcu_head head;
int idx;
static struct rcu_head shead;
struct early_boot_kfree_rcu *rhp;
idx = srcu_down_read(&early_srcu);
srcu_up_read(&early_srcu, idx);
call_rcu(&head, test_callback);
early_srcu_cookie = start_poll_synchronize_srcu(&early_srcu);
call_srcu(&early_srcu, &shead, test_callback);
@ -586,6 +630,7 @@ static int rcu_verify_early_boot_tests(void)
early_boot_test_counter++;
srcu_barrier(&early_srcu);
WARN_ON_ONCE(!poll_state_synchronize_srcu(&early_srcu, early_srcu_cookie));
cleanup_srcu_struct(&early_srcu);
}
if (rcu_self_test_counter != early_boot_test_counter) {
WARN_ON(1);

View file

@ -450,7 +450,7 @@ unsigned long
torture_random(struct torture_random_state *trsp)
{
if (--trsp->trs_count < 0) {
trsp->trs_state += (unsigned long)local_clock();
trsp->trs_state += (unsigned long)local_clock() + raw_smp_processor_id();
trsp->trs_count = TORTURE_RANDOM_REFRESH;
}
trsp->trs_state = trsp->trs_state * TORTURE_RANDOM_MULT +
@ -915,7 +915,7 @@ void torture_kthread_stopping(char *title)
VERBOSE_TOROUT_STRING(buf);
while (!kthread_should_stop()) {
torture_shutdown_absorb(title);
schedule_timeout_uninterruptible(1);
schedule_timeout_uninterruptible(HZ / 20);
}
}
EXPORT_SYMBOL_GPL(torture_kthread_stopping);

View file

@ -10,10 +10,9 @@
T="`mktemp -d ${TMPDIR-/tmp}/configcheck.sh.XXXXXX`"
trap 'rm -rf $T' 0
cat $1 > $T/.config
sed -e 's/"//g' < $1 > $T/.config
cat $2 | sed -e 's/\(.*\)=n/# \1 is not set/' -e 's/^#CHECK#//' |
grep -v '^CONFIG_INITRAMFS_SOURCE' |
sed -e 's/"//g' -e 's/\(.*\)=n/# \1 is not set/' -e 's/^#CHECK#//' < $2 |
awk '
{
print "if grep -q \"" $0 "\" < '"$T/.config"'";

View file

@ -10,7 +10,7 @@
#
# Authors: Paul E. McKenney <paulmck@kernel.org>
egrep 'Badness|WARNING:|Warn|BUG|===========|BUG: KCSAN:|Call Trace:|Oops:|detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state|rcu_.*kthread starved for|!!!' |
grep -E 'Badness|WARNING:|Warn|BUG|===========|BUG: KCSAN:|Call Trace:|Oops:|detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state|rcu_.*kthread starved for|!!!' |
grep -v 'ODEBUG: ' |
grep -v 'This means that this is a DEBUG kernel and it is' |
grep -v 'Warning: unable to open an initial console' |

View file

@ -44,10 +44,10 @@ fi
ncpus="`getconf _NPROCESSORS_ONLN`"
make -j$((2 * ncpus)) $TORTURE_KMAKE_ARG > $resdir/Make.out 2>&1
retval=$?
if test $retval -ne 0 || grep "rcu[^/]*": < $resdir/Make.out | egrep -q "Stop|Error|error:|warning:" || egrep -q "Stop|Error|error:" < $resdir/Make.out
if test $retval -ne 0 || grep "rcu[^/]*": < $resdir/Make.out | grep -E -q "Stop|Error|error:|warning:" || grep -E -q "Stop|Error|error:" < $resdir/Make.out
then
echo Kernel build error
egrep "Stop|Error|error:|warning:" < $resdir/Make.out
grep -E "Stop|Error|error:|warning:" < $resdir/Make.out
echo Run aborted.
exit 3
fi

View file

@ -32,11 +32,11 @@ for i in ${rundir}/*/Make.out
do
scenariodir="`dirname $i`"
scenariobasedir="`echo ${scenariodir} | sed -e 's/\.[0-9]*$//'`"
if egrep -q "error:|warning:|^ld: .*undefined reference to" < $i
if grep -E -q "error:|warning:|^ld: .*undefined reference to" < $i
then
egrep "error:|warning:|^ld: .*undefined reference to" < $i > $i.diags
grep -E "error:|warning:|^ld: .*undefined reference to" < $i > $i.diags
files="$files $i.diags $i"
elif ! test -f ${scenariobasedir}/vmlinux && ! test -f "${rundir}/re-run"
elif ! test -f ${scenariobasedir}/vmlinux && ! test -f ${scenariobasedir}/vmlinux.xz && ! test -f "${rundir}/re-run"
then
echo No ${scenariobasedir}/vmlinux file > $i.diags
files="$files $i.diags $i"

View file

@ -186,7 +186,7 @@ do
fi
;;
--kconfig|--kconfigs)
checkarg --kconfig "(Kconfig options)" $# "$2" '^CONFIG_[A-Z0-9_]\+=\([ynm]\|[0-9]\+\)\( CONFIG_[A-Z0-9_]\+=\([ynm]\|[0-9]\+\)\)*$' '^error$'
checkarg --kconfig "(Kconfig options)" $# "$2" '^CONFIG_[A-Z0-9_]\+=\([ynm]\|[0-9]\+\|"[^"]*"\)\( CONFIG_[A-Z0-9_]\+=\([ynm]\|[0-9]\+\|"[^"]*"\)\)*$' '^error$'
TORTURE_KCONFIG_ARG="`echo "$TORTURE_KCONFIG_ARG $2" | sed -e 's/^ *//' -e 's/ *$//'`"
shift
;;
@ -585,7 +585,7 @@ awk < $T/cfgcpu.pack \
echo kvm-end-run-stats.sh "$resdir/$ds" "$starttime" >> $T/script
# Extract the tests and their batches from the script.
egrep 'Start batch|Starting build\.' $T/script | grep -v ">>" |
grep -E 'Start batch|Starting build\.' $T/script | grep -v ">>" |
sed -e 's/:.*$//' -e 's/^echo //' -e 's/-ovf//' |
awk '
/^----Start/ {
@ -622,7 +622,7 @@ then
elif test "$dryrun" = sched
then
# Extract the test run schedule from the script.
egrep 'Start batch|Starting build\.' $T/script | grep -v ">>" |
grep -E 'Start batch|Starting build\.' $T/script | grep -v ">>" |
sed -e 's/:.*$//' -e 's/^echo //'
nbuilds="`grep 'Starting build\.' $T/script |
grep -v ">>" | sed -e 's/:.*$//' -e 's/^echo //' |

View file

@ -65,7 +65,7 @@ then
fi
grep --binary-files=text 'torture:.*ver:' $file |
egrep --binary-files=text -v '\(null\)|rtc: 000000000* ' |
grep -E --binary-files=text -v '\(null\)|rtc: 000000000* ' |
sed -e 's/^(initramfs)[^]]*] //' -e 's/^\[[^]]*] //' |
sed -e 's/^.*ver: //' |
awk '
@ -128,17 +128,17 @@ then
then
summary="$summary Badness: $n_badness"
fi
n_warn=`grep -v 'Warning: unable to open an initial console' $file | grep -v 'Warning: Failed to add ttynull console. No stdin, stdout, and stderr for the init process' | egrep -c 'WARNING:|Warn'`
n_warn=`grep -v 'Warning: unable to open an initial console' $file | grep -v 'Warning: Failed to add ttynull console. No stdin, stdout, and stderr for the init process' | grep -E -c 'WARNING:|Warn'`
if test "$n_warn" -ne 0
then
summary="$summary Warnings: $n_warn"
fi
n_bugs=`egrep -c '\bBUG|Oops:' $file`
n_bugs=`grep -E -c '\bBUG|Oops:' $file`
if test "$n_bugs" -ne 0
then
summary="$summary Bugs: $n_bugs"
fi
n_kcsan=`egrep -c 'BUG: KCSAN: ' $file`
n_kcsan=`grep -E -c 'BUG: KCSAN: ' $file`
if test "$n_kcsan" -ne 0
then
if test "$n_bugs" = "$n_kcsan"
@ -158,7 +158,7 @@ then
then
summary="$summary lockdep: $n_badness"
fi
n_stalls=`egrep -c 'detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state' $file`
n_stalls=`grep -E -c 'detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state' $file`
if test "$n_stalls" -ne 0
then
summary="$summary Stalls: $n_stalls"