diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c index f147a95fd84a..3704997e8b25 100644 --- a/arch/x86/kernel/aperture_64.c +++ b/arch/x86/kernel/aperture_64.c @@ -31,7 +31,6 @@ #include int gart_iommu_aperture; -EXPORT_SYMBOL_GPL(gart_iommu_aperture); int gart_iommu_aperture_disabled __initdata; int gart_iommu_aperture_allowed __initdata; diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 28cba46bf32c..bd58de4d7a29 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -46,6 +46,11 @@ #include "mce-internal.h" +#define rcu_dereference_check_mce(p) \ + rcu_dereference_check((p), \ + rcu_read_lock_sched_held() || \ + lockdep_is_held(&mce_read_mutex)) + #define CREATE_TRACE_POINTS #include @@ -158,7 +163,7 @@ void mce_log(struct mce *mce) mce->finished = 0; wmb(); for (;;) { - entry = rcu_dereference(mcelog.next); + entry = rcu_dereference_check_mce(mcelog.next); for (;;) { /* * When the buffer fills up discard new entries. @@ -1500,7 +1505,7 @@ static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize, return -ENOMEM; mutex_lock(&mce_read_mutex); - next = rcu_dereference(mcelog.next); + next = rcu_dereference_check_mce(mcelog.next); /* Only supports full reads right now */ if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce)) { @@ -1565,7 +1570,7 @@ timeout: static unsigned int mce_poll(struct file *file, poll_table *wait) { poll_wait(file, &mce_wait, wait); - if (rcu_dereference(mcelog.next)) + if (rcu_dereference_check_mce(mcelog.next)) return POLLIN | POLLRDNORM; return 0; } diff --git a/include/linux/cred.h b/include/linux/cred.h index 4db09f89b637..52507c3e1387 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -280,7 +280,7 @@ static inline void put_cred(const struct cred *_cred) * task or by holding tasklist_lock to prevent it from being unlinked. */ #define __task_cred(task) \ - ((const struct cred *)(rcu_dereference_check((task)->real_cred, rcu_read_lock_held() || lockdep_is_held(&tasklist_lock)))) + ((const struct cred *)(rcu_dereference_check((task)->real_cred, rcu_read_lock_held() || lockdep_tasklist_lock_is_held()))) /** * get_task_cred - Get another task's objective credentials diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index a005cac5e302..3024050c82a1 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -101,6 +101,11 @@ extern struct lockdep_map rcu_sched_lock_map; # define rcu_read_release_sched() \ lock_release(&rcu_sched_lock_map, 1, _THIS_IP_) +static inline int debug_lockdep_rcu_enabled(void) +{ + return likely(rcu_scheduler_active && debug_locks); +} + /** * rcu_read_lock_held - might we be in RCU read-side critical section? * @@ -108,12 +113,14 @@ extern struct lockdep_map rcu_sched_lock_map; * an RCU read-side critical section. In absence of CONFIG_PROVE_LOCKING, * this assumes we are in an RCU read-side critical section unless it can * prove otherwise. + * + * Check rcu_scheduler_active to prevent false positives during boot. */ static inline int rcu_read_lock_held(void) { - if (debug_locks) - return lock_is_held(&rcu_lock_map); - return 1; + if (!debug_lockdep_rcu_enabled()) + return 1; + return lock_is_held(&rcu_lock_map); } /** @@ -123,12 +130,14 @@ static inline int rcu_read_lock_held(void) * an RCU-bh read-side critical section. In absence of CONFIG_PROVE_LOCKING, * this assumes we are in an RCU-bh read-side critical section unless it can * prove otherwise. + * + * Check rcu_scheduler_active to prevent false positives during boot. */ static inline int rcu_read_lock_bh_held(void) { - if (debug_locks) - return lock_is_held(&rcu_bh_lock_map); - return 1; + if (!debug_lockdep_rcu_enabled()) + return 1; + return lock_is_held(&rcu_bh_lock_map); } /** @@ -139,15 +148,26 @@ static inline int rcu_read_lock_bh_held(void) * this assumes we are in an RCU-sched read-side critical section unless it * can prove otherwise. Note that disabling of preemption (including * disabling irqs) counts as an RCU-sched read-side critical section. + * + * Check rcu_scheduler_active to prevent false positives during boot. */ +#ifdef CONFIG_PREEMPT static inline int rcu_read_lock_sched_held(void) { int lockdep_opinion = 0; + if (!debug_lockdep_rcu_enabled()) + return 1; if (debug_locks) lockdep_opinion = lock_is_held(&rcu_sched_lock_map); - return lockdep_opinion || preempt_count() != 0 || !rcu_scheduler_active; + return lockdep_opinion || preempt_count() != 0; } +#else /* #ifdef CONFIG_PREEMPT */ +static inline int rcu_read_lock_sched_held(void) +{ + return 1; +} +#endif /* #else #ifdef CONFIG_PREEMPT */ #else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ @@ -168,10 +188,17 @@ static inline int rcu_read_lock_bh_held(void) return 1; } +#ifdef CONFIG_PREEMPT static inline int rcu_read_lock_sched_held(void) { - return preempt_count() != 0 || !rcu_scheduler_active; + return !rcu_scheduler_active || preempt_count() != 0; } +#else /* #ifdef CONFIG_PREEMPT */ +static inline int rcu_read_lock_sched_held(void) +{ + return 1; +} +#endif /* #else #ifdef CONFIG_PREEMPT */ #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */ @@ -188,7 +215,7 @@ static inline int rcu_read_lock_sched_held(void) */ #define rcu_dereference_check(p, c) \ ({ \ - if (debug_locks && !(c)) \ + if (debug_lockdep_rcu_enabled() && !(c)) \ lockdep_rcu_dereference(__FILE__, __LINE__); \ rcu_dereference_raw(p); \ }) diff --git a/include/linux/rwlock.h b/include/linux/rwlock.h index 71e0b00b6f2c..bc2994ed66e1 100644 --- a/include/linux/rwlock.h +++ b/include/linux/rwlock.h @@ -29,25 +29,25 @@ do { \ #endif #ifdef CONFIG_DEBUG_SPINLOCK - extern void do_raw_read_lock(rwlock_t *lock); + extern void do_raw_read_lock(rwlock_t *lock) __acquires(lock); #define do_raw_read_lock_flags(lock, flags) do_raw_read_lock(lock) extern int do_raw_read_trylock(rwlock_t *lock); - extern void do_raw_read_unlock(rwlock_t *lock); - extern void do_raw_write_lock(rwlock_t *lock); + extern void do_raw_read_unlock(rwlock_t *lock) __releases(lock); + extern void do_raw_write_lock(rwlock_t *lock) __acquires(lock); #define do_raw_write_lock_flags(lock, flags) do_raw_write_lock(lock) extern int do_raw_write_trylock(rwlock_t *lock); - extern void do_raw_write_unlock(rwlock_t *lock); + extern void do_raw_write_unlock(rwlock_t *lock) __releases(lock); #else -# define do_raw_read_lock(rwlock) arch_read_lock(&(rwlock)->raw_lock) +# define do_raw_read_lock(rwlock) do {__acquire(lock); arch_read_lock(&(rwlock)->raw_lock); } while (0) # define do_raw_read_lock_flags(lock, flags) \ - arch_read_lock_flags(&(lock)->raw_lock, *(flags)) + do {__acquire(lock); arch_read_lock_flags(&(lock)->raw_lock, *(flags)); } while (0) # define do_raw_read_trylock(rwlock) arch_read_trylock(&(rwlock)->raw_lock) -# define do_raw_read_unlock(rwlock) arch_read_unlock(&(rwlock)->raw_lock) -# define do_raw_write_lock(rwlock) arch_write_lock(&(rwlock)->raw_lock) +# define do_raw_read_unlock(rwlock) do {arch_read_unlock(&(rwlock)->raw_lock); __release(lock); } while (0) +# define do_raw_write_lock(rwlock) do {__acquire(lock); arch_write_lock(&(rwlock)->raw_lock); } while (0) # define do_raw_write_lock_flags(lock, flags) \ - arch_write_lock_flags(&(lock)->raw_lock, *(flags)) + do {__acquire(lock); arch_write_lock_flags(&(lock)->raw_lock, *(flags)); } while (0) # define do_raw_write_trylock(rwlock) arch_write_trylock(&(rwlock)->raw_lock) -# define do_raw_write_unlock(rwlock) arch_write_unlock(&(rwlock)->raw_lock) +# define do_raw_write_unlock(rwlock) do {arch_write_unlock(&(rwlock)->raw_lock); __release(lock); } while (0) #endif #define read_can_lock(rwlock) arch_read_can_lock(&(rwlock)->raw_lock) diff --git a/include/linux/sched.h b/include/linux/sched.h index 8d70ff802da2..dad7f668ebf7 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -258,6 +258,10 @@ extern spinlock_t mmlist_lock; struct task_struct; +#ifdef CONFIG_PROVE_RCU +extern int lockdep_tasklist_lock_is_held(void); +#endif /* #ifdef CONFIG_PROVE_RCU */ + extern void sched_init(void); extern void sched_init_smp(void); extern asmlinkage void schedule_tail(struct task_struct *prev); diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index 86088213334a..89fac6a3f78b 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -128,19 +128,21 @@ static inline void smp_mb__after_lock(void) { smp_mb(); } #define raw_spin_unlock_wait(lock) arch_spin_unlock_wait(&(lock)->raw_lock) #ifdef CONFIG_DEBUG_SPINLOCK - extern void do_raw_spin_lock(raw_spinlock_t *lock); + extern void do_raw_spin_lock(raw_spinlock_t *lock) __acquires(lock); #define do_raw_spin_lock_flags(lock, flags) do_raw_spin_lock(lock) extern int do_raw_spin_trylock(raw_spinlock_t *lock); - extern void do_raw_spin_unlock(raw_spinlock_t *lock); + extern void do_raw_spin_unlock(raw_spinlock_t *lock) __releases(lock); #else -static inline void do_raw_spin_lock(raw_spinlock_t *lock) +static inline void do_raw_spin_lock(raw_spinlock_t *lock) __acquires(lock) { + __acquire(lock); arch_spin_lock(&lock->raw_lock); } static inline void -do_raw_spin_lock_flags(raw_spinlock_t *lock, unsigned long *flags) +do_raw_spin_lock_flags(raw_spinlock_t *lock, unsigned long *flags) __acquires(lock) { + __acquire(lock); arch_spin_lock_flags(&lock->raw_lock, *flags); } @@ -149,9 +151,10 @@ static inline int do_raw_spin_trylock(raw_spinlock_t *lock) return arch_spin_trylock(&(lock)->raw_lock); } -static inline void do_raw_spin_unlock(raw_spinlock_t *lock) +static inline void do_raw_spin_unlock(raw_spinlock_t *lock) __releases(lock) { arch_spin_unlock(&lock->raw_lock); + __release(lock); } #endif diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 0804cd594803..601ad7744247 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -699,9 +699,9 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ * __cpu = smp_processor_id(); * * if (in_nmi()) - * trace_buf = rcu_dereference(perf_trace_buf_nmi); + * trace_buf = rcu_dereference_sched(perf_trace_buf_nmi); * else - * trace_buf = rcu_dereference(perf_trace_buf); + * trace_buf = rcu_dereference_sched(perf_trace_buf); * * if (!trace_buf) * goto end; diff --git a/kernel/exit.c b/kernel/exit.c index ce1e48c2d93d..cce59cb5ee6a 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -87,7 +87,7 @@ static void __exit_signal(struct task_struct *tsk) sighand = rcu_dereference_check(tsk->sighand, rcu_read_lock_held() || - lockdep_is_held(&tasklist_lock)); + lockdep_tasklist_lock_is_held()); spin_lock(&sighand->siglock); posix_cpu_timers_exit(tsk); diff --git a/kernel/fork.c b/kernel/fork.c index 1beb6c303c41..4799c5f0e6d0 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -86,7 +86,14 @@ int max_threads; /* tunable limit on nr_threads */ DEFINE_PER_CPU(unsigned long, process_counts) = 0; __cacheline_aligned DEFINE_RWLOCK(tasklist_lock); /* outer */ -EXPORT_SYMBOL_GPL(tasklist_lock); + +#ifdef CONFIG_PROVE_RCU +int lockdep_tasklist_lock_is_held(void) +{ + return lockdep_is_held(&tasklist_lock); +} +EXPORT_SYMBOL_GPL(lockdep_tasklist_lock_is_held); +#endif /* #ifdef CONFIG_PROVE_RCU */ int nr_processes(void) { diff --git a/kernel/lockdep.c b/kernel/lockdep.c index 0c30d0455de1..681bc2e1e187 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -3822,6 +3822,7 @@ void lockdep_rcu_dereference(const char *file, const int line) printk("%s:%d invoked rcu_dereference_check() without protection!\n", file, line); printk("\nother info that might help us debug this:\n\n"); + printk("\nrcu_scheduler_active = %d, debug_locks = %d\n", rcu_scheduler_active, debug_locks); lockdep_print_held_locks(curr); printk("\nstack backtrace:\n"); dump_stack(); diff --git a/kernel/pid.c b/kernel/pid.c index 86b296943e5f..aebb30d9c233 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -367,7 +367,9 @@ struct task_struct *pid_task(struct pid *pid, enum pid_type type) struct task_struct *result = NULL; if (pid) { struct hlist_node *first; - first = rcu_dereference_check(pid->tasks[type].first, rcu_read_lock_held() || lockdep_is_held(&tasklist_lock)); + first = rcu_dereference_check(pid->tasks[type].first, + rcu_read_lock_held() || + lockdep_tasklist_lock_is_held()); if (first) result = hlist_entry(first, struct task_struct, pids[(type)].node); } diff --git a/kernel/rcutree.h b/kernel/rcutree.h index 1439eb504c22..4a525a30e08e 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -246,12 +246,21 @@ struct rcu_data { #define RCU_JIFFIES_TILL_FORCE_QS 3 /* for rsp->jiffies_force_qs */ #ifdef CONFIG_RCU_CPU_STALL_DETECTOR -#define RCU_SECONDS_TILL_STALL_CHECK (10 * HZ) /* for rsp->jiffies_stall */ -#define RCU_SECONDS_TILL_STALL_RECHECK (30 * HZ) /* for rsp->jiffies_stall */ -#define RCU_STALL_RAT_DELAY 2 /* Allow other CPUs time */ - /* to take at least one */ - /* scheduling clock irq */ - /* before ratting on them. */ + +#ifdef CONFIG_PROVE_RCU +#define RCU_STALL_DELAY_DELTA (5 * HZ) +#else +#define RCU_STALL_DELAY_DELTA 0 +#endif + +#define RCU_SECONDS_TILL_STALL_CHECK (10 * HZ + RCU_STALL_DELAY_DELTA) + /* for rsp->jiffies_stall */ +#define RCU_SECONDS_TILL_STALL_RECHECK (30 * HZ + RCU_STALL_DELAY_DELTA) + /* for rsp->jiffies_stall */ +#define RCU_STALL_RAT_DELAY 2 /* Allow other CPUs time */ + /* to take at least one */ + /* scheduling clock irq */ + /* before ratting on them. */ #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 464ad2cdee00..79b53bda8943 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -1010,6 +1010,10 @@ int rcu_needs_cpu(int cpu) int c = 0; int thatcpu; + /* Check for being in the holdoff period. */ + if (per_cpu(rcu_dyntick_holdoff, cpu) == jiffies) + return rcu_needs_cpu_quick_check(cpu); + /* Don't bother unless we are the last non-dyntick-idle CPU. */ for_each_cpu_not(thatcpu, nohz_cpu_mask) if (thatcpu != cpu) { @@ -1041,10 +1045,8 @@ int rcu_needs_cpu(int cpu) } /* If RCU callbacks are still pending, RCU still needs this CPU. */ - if (c) { + if (c) raise_softirq(RCU_SOFTIRQ); - per_cpu(rcu_dyntick_holdoff, cpu) = jiffies; - } return c; } diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 3e1fd96c6cf9..5a5ea2cd924f 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -3476,7 +3476,7 @@ static void run_rebalance_domains(struct softirq_action *h) static inline int on_null_domain(int cpu) { - return !rcu_dereference(cpu_rq(cpu)->sd); + return !rcu_dereference_sched(cpu_rq(cpu)->sd); } /* diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index bb53edbb5c8c..d9062f5cc0c0 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -27,6 +27,7 @@ #include #include #include +#include #include @@ -84,18 +85,22 @@ ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub; ftrace_func_t __ftrace_trace_function __read_mostly = ftrace_stub; ftrace_func_t ftrace_pid_function __read_mostly = ftrace_stub; +/* + * Traverse the ftrace_list, invoking all entries. The reason that we + * can use rcu_dereference_raw() is that elements removed from this list + * are simply leaked, so there is no need to interact with a grace-period + * mechanism. The rcu_dereference_raw() calls are needed to handle + * concurrent insertions into the ftrace_list. + * + * Silly Alpha and silly pointer-speculation compiler optimizations! + */ static void ftrace_list_func(unsigned long ip, unsigned long parent_ip) { - struct ftrace_ops *op = ftrace_list; - - /* in case someone actually ports this to alpha! */ - read_barrier_depends(); + struct ftrace_ops *op = rcu_dereference_raw(ftrace_list); /*see above*/ while (op != &ftrace_list_end) { - /* silly alpha */ - read_barrier_depends(); op->func(ip, parent_ip); - op = op->next; + op = rcu_dereference_raw(op->next); /*see above*/ }; } @@ -150,8 +155,7 @@ static int __register_ftrace_function(struct ftrace_ops *ops) * the ops->next pointer is valid before another CPU sees * the ops pointer included into the ftrace_list. */ - smp_wmb(); - ftrace_list = ops; + rcu_assign_pointer(ftrace_list, ops); if (ftrace_enabled) { ftrace_func_t func; diff --git a/kernel/trace/trace_event_profile.c b/kernel/trace/trace_event_profile.c index f0d693005075..c1cc3ab633de 100644 --- a/kernel/trace/trace_event_profile.c +++ b/kernel/trace/trace_event_profile.c @@ -138,9 +138,9 @@ __kprobes void *ftrace_perf_buf_prepare(int size, unsigned short type, cpu = smp_processor_id(); if (in_nmi()) - trace_buf = rcu_dereference(perf_trace_buf_nmi); + trace_buf = rcu_dereference_sched(perf_trace_buf_nmi); else - trace_buf = rcu_dereference(perf_trace_buf); + trace_buf = rcu_dereference_sched(perf_trace_buf); if (!trace_buf) goto err; diff --git a/mm/mempolicy.c b/mm/mempolicy.c index bda230e52acd..643f66e10187 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -1756,10 +1756,12 @@ struct mempolicy *__mpol_dup(struct mempolicy *old) if (!new) return ERR_PTR(-ENOMEM); + rcu_read_lock(); if (current_cpuset_is_being_rebound()) { nodemask_t mems = cpuset_mems_allowed(current); mpol_rebind_policy(old, &mems); } + rcu_read_unlock(); *new = *old; atomic_set(&new->refcnt, 1); return new;