mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2024-09-29 13:53:33 +00:00
The x86 MM changes in this cycle were:
- Implement concurrent TLB flushes, which overlaps the local TLB flush with the remote TLB flush. In testing this improved sysbench performance measurably by a couple of percentage points, especially if TLB-heavy security mitigations are active. - Further micro-optimizations to improve the performance of TLB flushes. Signed-off-by: Ingo Molnar <mingo@kernel.org> -----BEGIN PGP SIGNATURE----- iQJFBAABCgAvFiEEBpT5eoXrXCwVQwEKEnMQ0APhK1gFAmCKbNcRHG1pbmdvQGtl cm5lbC5vcmcACgkQEnMQ0APhK1hjYBAAsyNUa/gOu0g6/Cx8R86w9HtHHmm5vso/ 6nJjWj2fd2qJ9JShlddxvXEMeXtPTYabVWQkiiriFMuofk6JeKnlHm1Jzl6keABX OQFwjIFeNASPRcdXvuuYPOVWAJJdr2oL9QUr6OOK1ccQJTz/Cd0zA+VQ5YqcsCon yaWbkxELwKXpgql+qt66eAZ6Q2Y1TKXyrTW7ZgxQi0yeeWqMaEOub0/oyS7Ax1Rg qEJMwm1prb76NPzeqR/G3e4KTrDZfQ/B/KnSsz36GTJpl4eye6XqWDUgm1nAGNIc 5dbc4Vx7JtZsUOuC0AmzWb3hsDyzVcN/lQvijdZ2RsYR3gvuYGaBhKqExqV0XH6P oqaWOKWCz+LqWbsgJmxCpqkt1LZl5+VUOcfJ97WkIS7DyIPtSHTzQXbBMZqKLeat mn5UcKYB2Gi7wsUPv6VC2ChKbDqN0VT8G86XbYylGo4BE46KoZKPUNY/QWKLUPd6 0UKcVeNM2HFyf1C73p/tO/z7hzu3qLuMMnsphP6/c2pKLpdgawEXgbnVKNId1B/c NrzyhTvVaMt+Um28bBRhHONIlzPJwWcnZbdY7NqMnu+LBKQ68cL/h4FOIV/RDLNb GJLgfAr8fIw/zIpqYuFHiiMNo9wWqVtZko1MvXhGceXUL69QuzTra2XR/6aDxkPf 6gQVesetTvo= =3Cyp -----END PGP SIGNATURE----- Merge tag 'x86-mm-2021-04-29' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip Pull x86 tlb updates from Ingo Molnar: "The x86 MM changes in this cycle were: - Implement concurrent TLB flushes, which overlaps the local TLB flush with the remote TLB flush. In testing this improved sysbench performance measurably by a couple of percentage points, especially if TLB-heavy security mitigations are active. - Further micro-optimizations to improve the performance of TLB flushes" * tag 'x86-mm-2021-04-29' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: smp: Micro-optimize smp_call_function_many_cond() smp: Inline on_each_cpu_cond() and on_each_cpu() x86/mm/tlb: Remove unnecessary uses of the inline keyword cpumask: Mark functions as pure x86/mm/tlb: Do not make is_lazy dirty for no reason x86/mm/tlb: Privatize cpu_tlbstate x86/mm/tlb: Flush remote and local TLBs concurrently x86/mm/tlb: Open-code on_each_cpu_cond_mask() for tlb_is_not_lazy() x86/mm/tlb: Unify flush_tlb_func_local() and flush_tlb_func_remote() smp: Run functions concurrently in smp_call_function_many_cond()
This commit is contained in:
commit
635de956a7
16 changed files with 294 additions and 302 deletions
|
@ -52,7 +52,7 @@ static inline int fill_gva_list(u64 gva_list[], int offset,
|
||||||
return gva_n - offset;
|
return gva_n - offset;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void hyperv_flush_tlb_others(const struct cpumask *cpus,
|
static void hyperv_flush_tlb_multi(const struct cpumask *cpus,
|
||||||
const struct flush_tlb_info *info)
|
const struct flush_tlb_info *info)
|
||||||
{
|
{
|
||||||
int cpu, vcpu, gva_n, max_gvas;
|
int cpu, vcpu, gva_n, max_gvas;
|
||||||
|
@ -61,7 +61,7 @@ static void hyperv_flush_tlb_others(const struct cpumask *cpus,
|
||||||
u64 status;
|
u64 status;
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
|
|
||||||
trace_hyperv_mmu_flush_tlb_others(cpus, info);
|
trace_hyperv_mmu_flush_tlb_multi(cpus, info);
|
||||||
|
|
||||||
if (!hv_hypercall_pg)
|
if (!hv_hypercall_pg)
|
||||||
goto do_native;
|
goto do_native;
|
||||||
|
@ -164,7 +164,7 @@ static void hyperv_flush_tlb_others(const struct cpumask *cpus,
|
||||||
if (hv_result_success(status))
|
if (hv_result_success(status))
|
||||||
return;
|
return;
|
||||||
do_native:
|
do_native:
|
||||||
native_flush_tlb_others(cpus, info);
|
native_flush_tlb_multi(cpus, info);
|
||||||
}
|
}
|
||||||
|
|
||||||
static u64 hyperv_flush_tlb_others_ex(const struct cpumask *cpus,
|
static u64 hyperv_flush_tlb_others_ex(const struct cpumask *cpus,
|
||||||
|
@ -239,6 +239,6 @@ void hyperv_setup_mmu_ops(void)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
pr_info("Using hypercall for remote TLB flush\n");
|
pr_info("Using hypercall for remote TLB flush\n");
|
||||||
pv_ops.mmu.flush_tlb_others = hyperv_flush_tlb_others;
|
pv_ops.mmu.flush_tlb_multi = hyperv_flush_tlb_multi;
|
||||||
pv_ops.mmu.tlb_remove_table = tlb_remove_table;
|
pv_ops.mmu.tlb_remove_table = tlb_remove_table;
|
||||||
}
|
}
|
||||||
|
|
|
@ -63,7 +63,7 @@ static inline void slow_down_io(void)
|
||||||
void native_flush_tlb_local(void);
|
void native_flush_tlb_local(void);
|
||||||
void native_flush_tlb_global(void);
|
void native_flush_tlb_global(void);
|
||||||
void native_flush_tlb_one_user(unsigned long addr);
|
void native_flush_tlb_one_user(unsigned long addr);
|
||||||
void native_flush_tlb_others(const struct cpumask *cpumask,
|
void native_flush_tlb_multi(const struct cpumask *cpumask,
|
||||||
const struct flush_tlb_info *info);
|
const struct flush_tlb_info *info);
|
||||||
|
|
||||||
static inline void __flush_tlb_local(void)
|
static inline void __flush_tlb_local(void)
|
||||||
|
@ -81,10 +81,10 @@ static inline void __flush_tlb_one_user(unsigned long addr)
|
||||||
PVOP_VCALL1(mmu.flush_tlb_one_user, addr);
|
PVOP_VCALL1(mmu.flush_tlb_one_user, addr);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void __flush_tlb_others(const struct cpumask *cpumask,
|
static inline void __flush_tlb_multi(const struct cpumask *cpumask,
|
||||||
const struct flush_tlb_info *info)
|
const struct flush_tlb_info *info)
|
||||||
{
|
{
|
||||||
PVOP_VCALL2(mmu.flush_tlb_others, cpumask, info);
|
PVOP_VCALL2(mmu.flush_tlb_multi, cpumask, info);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void paravirt_tlb_remove_table(struct mmu_gather *tlb, void *table)
|
static inline void paravirt_tlb_remove_table(struct mmu_gather *tlb, void *table)
|
||||||
|
|
|
@ -161,7 +161,7 @@ struct pv_mmu_ops {
|
||||||
void (*flush_tlb_user)(void);
|
void (*flush_tlb_user)(void);
|
||||||
void (*flush_tlb_kernel)(void);
|
void (*flush_tlb_kernel)(void);
|
||||||
void (*flush_tlb_one_user)(unsigned long addr);
|
void (*flush_tlb_one_user)(unsigned long addr);
|
||||||
void (*flush_tlb_others)(const struct cpumask *cpus,
|
void (*flush_tlb_multi)(const struct cpumask *cpus,
|
||||||
const struct flush_tlb_info *info);
|
const struct flush_tlb_info *info);
|
||||||
|
|
||||||
void (*tlb_remove_table)(struct mmu_gather *tlb, void *table);
|
void (*tlb_remove_table)(struct mmu_gather *tlb, void *table);
|
||||||
|
|
|
@ -89,23 +89,6 @@ struct tlb_state {
|
||||||
u16 loaded_mm_asid;
|
u16 loaded_mm_asid;
|
||||||
u16 next_asid;
|
u16 next_asid;
|
||||||
|
|
||||||
/*
|
|
||||||
* We can be in one of several states:
|
|
||||||
*
|
|
||||||
* - Actively using an mm. Our CPU's bit will be set in
|
|
||||||
* mm_cpumask(loaded_mm) and is_lazy == false;
|
|
||||||
*
|
|
||||||
* - Not using a real mm. loaded_mm == &init_mm. Our CPU's bit
|
|
||||||
* will not be set in mm_cpumask(&init_mm) and is_lazy == false.
|
|
||||||
*
|
|
||||||
* - Lazily using a real mm. loaded_mm != &init_mm, our bit
|
|
||||||
* is set in mm_cpumask(loaded_mm), but is_lazy == true.
|
|
||||||
* We're heuristically guessing that the CR3 load we
|
|
||||||
* skipped more than makes up for the overhead added by
|
|
||||||
* lazy mode.
|
|
||||||
*/
|
|
||||||
bool is_lazy;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If set we changed the page tables in such a way that we
|
* If set we changed the page tables in such a way that we
|
||||||
* needed an invalidation of all contexts (aka. PCIDs / ASIDs).
|
* needed an invalidation of all contexts (aka. PCIDs / ASIDs).
|
||||||
|
@ -151,7 +134,27 @@ struct tlb_state {
|
||||||
*/
|
*/
|
||||||
struct tlb_context ctxs[TLB_NR_DYN_ASIDS];
|
struct tlb_context ctxs[TLB_NR_DYN_ASIDS];
|
||||||
};
|
};
|
||||||
DECLARE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate);
|
DECLARE_PER_CPU_ALIGNED(struct tlb_state, cpu_tlbstate);
|
||||||
|
|
||||||
|
struct tlb_state_shared {
|
||||||
|
/*
|
||||||
|
* We can be in one of several states:
|
||||||
|
*
|
||||||
|
* - Actively using an mm. Our CPU's bit will be set in
|
||||||
|
* mm_cpumask(loaded_mm) and is_lazy == false;
|
||||||
|
*
|
||||||
|
* - Not using a real mm. loaded_mm == &init_mm. Our CPU's bit
|
||||||
|
* will not be set in mm_cpumask(&init_mm) and is_lazy == false.
|
||||||
|
*
|
||||||
|
* - Lazily using a real mm. loaded_mm != &init_mm, our bit
|
||||||
|
* is set in mm_cpumask(loaded_mm), but is_lazy == true.
|
||||||
|
* We're heuristically guessing that the CR3 load we
|
||||||
|
* skipped more than makes up for the overhead added by
|
||||||
|
* lazy mode.
|
||||||
|
*/
|
||||||
|
bool is_lazy;
|
||||||
|
};
|
||||||
|
DECLARE_PER_CPU_SHARED_ALIGNED(struct tlb_state_shared, cpu_tlbstate_shared);
|
||||||
|
|
||||||
bool nmi_uaccess_okay(void);
|
bool nmi_uaccess_okay(void);
|
||||||
#define nmi_uaccess_okay nmi_uaccess_okay
|
#define nmi_uaccess_okay nmi_uaccess_okay
|
||||||
|
@ -175,7 +178,7 @@ extern void initialize_tlbstate_and_flush(void);
|
||||||
* - flush_tlb_page(vma, vmaddr) flushes one page
|
* - flush_tlb_page(vma, vmaddr) flushes one page
|
||||||
* - flush_tlb_range(vma, start, end) flushes a range of pages
|
* - flush_tlb_range(vma, start, end) flushes a range of pages
|
||||||
* - flush_tlb_kernel_range(start, end) flushes a range of kernel pages
|
* - flush_tlb_kernel_range(start, end) flushes a range of kernel pages
|
||||||
* - flush_tlb_others(cpumask, info) flushes TLBs on other cpus
|
* - flush_tlb_multi(cpumask, info) flushes TLBs on multiple cpus
|
||||||
*
|
*
|
||||||
* ..but the i386 has somewhat limited tlb flushing capabilities,
|
* ..but the i386 has somewhat limited tlb flushing capabilities,
|
||||||
* and page-granular flushes are available only on i486 and up.
|
* and page-granular flushes are available only on i486 and up.
|
||||||
|
@ -201,14 +204,15 @@ struct flush_tlb_info {
|
||||||
unsigned long start;
|
unsigned long start;
|
||||||
unsigned long end;
|
unsigned long end;
|
||||||
u64 new_tlb_gen;
|
u64 new_tlb_gen;
|
||||||
unsigned int stride_shift;
|
unsigned int initiating_cpu;
|
||||||
bool freed_tables;
|
u8 stride_shift;
|
||||||
|
u8 freed_tables;
|
||||||
};
|
};
|
||||||
|
|
||||||
void flush_tlb_local(void);
|
void flush_tlb_local(void);
|
||||||
void flush_tlb_one_user(unsigned long addr);
|
void flush_tlb_one_user(unsigned long addr);
|
||||||
void flush_tlb_one_kernel(unsigned long addr);
|
void flush_tlb_one_kernel(unsigned long addr);
|
||||||
void flush_tlb_others(const struct cpumask *cpumask,
|
void flush_tlb_multi(const struct cpumask *cpumask,
|
||||||
const struct flush_tlb_info *info);
|
const struct flush_tlb_info *info);
|
||||||
|
|
||||||
#ifdef CONFIG_PARAVIRT
|
#ifdef CONFIG_PARAVIRT
|
||||||
|
|
|
@ -8,7 +8,7 @@
|
||||||
|
|
||||||
#if IS_ENABLED(CONFIG_HYPERV)
|
#if IS_ENABLED(CONFIG_HYPERV)
|
||||||
|
|
||||||
TRACE_EVENT(hyperv_mmu_flush_tlb_others,
|
TRACE_EVENT(hyperv_mmu_flush_tlb_multi,
|
||||||
TP_PROTO(const struct cpumask *cpus,
|
TP_PROTO(const struct cpumask *cpus,
|
||||||
const struct flush_tlb_info *info),
|
const struct flush_tlb_info *info),
|
||||||
TP_ARGS(cpus, info),
|
TP_ARGS(cpus, info),
|
||||||
|
|
|
@ -706,7 +706,7 @@ static inline temp_mm_state_t use_temporary_mm(struct mm_struct *mm)
|
||||||
* with a stale address space WITHOUT being in lazy mode after
|
* with a stale address space WITHOUT being in lazy mode after
|
||||||
* restoring the previous mm.
|
* restoring the previous mm.
|
||||||
*/
|
*/
|
||||||
if (this_cpu_read(cpu_tlbstate.is_lazy))
|
if (this_cpu_read(cpu_tlbstate_shared.is_lazy))
|
||||||
leave_mm(smp_processor_id());
|
leave_mm(smp_processor_id());
|
||||||
|
|
||||||
temp_state.mm = this_cpu_read(cpu_tlbstate.loaded_mm);
|
temp_state.mm = this_cpu_read(cpu_tlbstate.loaded_mm);
|
||||||
|
|
|
@ -613,7 +613,7 @@ static int kvm_cpu_down_prepare(unsigned int cpu)
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static void kvm_flush_tlb_others(const struct cpumask *cpumask,
|
static void kvm_flush_tlb_multi(const struct cpumask *cpumask,
|
||||||
const struct flush_tlb_info *info)
|
const struct flush_tlb_info *info)
|
||||||
{
|
{
|
||||||
u8 state;
|
u8 state;
|
||||||
|
@ -627,6 +627,11 @@ static void kvm_flush_tlb_others(const struct cpumask *cpumask,
|
||||||
* queue flush_on_enter for pre-empted vCPUs
|
* queue flush_on_enter for pre-empted vCPUs
|
||||||
*/
|
*/
|
||||||
for_each_cpu(cpu, flushmask) {
|
for_each_cpu(cpu, flushmask) {
|
||||||
|
/*
|
||||||
|
* The local vCPU is never preempted, so we do not explicitly
|
||||||
|
* skip check for local vCPU - it will never be cleared from
|
||||||
|
* flushmask.
|
||||||
|
*/
|
||||||
src = &per_cpu(steal_time, cpu);
|
src = &per_cpu(steal_time, cpu);
|
||||||
state = READ_ONCE(src->preempted);
|
state = READ_ONCE(src->preempted);
|
||||||
if ((state & KVM_VCPU_PREEMPTED)) {
|
if ((state & KVM_VCPU_PREEMPTED)) {
|
||||||
|
@ -636,7 +641,7 @@ static void kvm_flush_tlb_others(const struct cpumask *cpumask,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
native_flush_tlb_others(flushmask, info);
|
native_flush_tlb_multi(flushmask, info);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void __init kvm_guest_init(void)
|
static void __init kvm_guest_init(void)
|
||||||
|
@ -654,7 +659,7 @@ static void __init kvm_guest_init(void)
|
||||||
}
|
}
|
||||||
|
|
||||||
if (pv_tlb_flush_supported()) {
|
if (pv_tlb_flush_supported()) {
|
||||||
pv_ops.mmu.flush_tlb_others = kvm_flush_tlb_others;
|
pv_ops.mmu.flush_tlb_multi = kvm_flush_tlb_multi;
|
||||||
pv_ops.mmu.tlb_remove_table = tlb_remove_table;
|
pv_ops.mmu.tlb_remove_table = tlb_remove_table;
|
||||||
pr_info("KVM setup pv remote TLB flush\n");
|
pr_info("KVM setup pv remote TLB flush\n");
|
||||||
}
|
}
|
||||||
|
|
|
@ -291,7 +291,7 @@ struct paravirt_patch_template pv_ops = {
|
||||||
.mmu.flush_tlb_user = native_flush_tlb_local,
|
.mmu.flush_tlb_user = native_flush_tlb_local,
|
||||||
.mmu.flush_tlb_kernel = native_flush_tlb_global,
|
.mmu.flush_tlb_kernel = native_flush_tlb_global,
|
||||||
.mmu.flush_tlb_one_user = native_flush_tlb_one_user,
|
.mmu.flush_tlb_one_user = native_flush_tlb_one_user,
|
||||||
.mmu.flush_tlb_others = native_flush_tlb_others,
|
.mmu.flush_tlb_multi = native_flush_tlb_multi,
|
||||||
.mmu.tlb_remove_table =
|
.mmu.tlb_remove_table =
|
||||||
(void (*)(struct mmu_gather *, void *))tlb_remove_page,
|
(void (*)(struct mmu_gather *, void *))tlb_remove_page,
|
||||||
|
|
||||||
|
|
|
@ -1017,7 +1017,7 @@ void __init zone_sizes_init(void)
|
||||||
free_area_init(max_zone_pfns);
|
free_area_init(max_zone_pfns);
|
||||||
}
|
}
|
||||||
|
|
||||||
__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) = {
|
__visible DEFINE_PER_CPU_ALIGNED(struct tlb_state, cpu_tlbstate) = {
|
||||||
.loaded_mm = &init_mm,
|
.loaded_mm = &init_mm,
|
||||||
.next_asid = 1,
|
.next_asid = 1,
|
||||||
.cr4 = ~0UL, /* fail hard if we screw up cr4 shadow initialization */
|
.cr4 = ~0UL, /* fail hard if we screw up cr4 shadow initialization */
|
||||||
|
|
|
@ -24,7 +24,7 @@
|
||||||
# define __flush_tlb_local native_flush_tlb_local
|
# define __flush_tlb_local native_flush_tlb_local
|
||||||
# define __flush_tlb_global native_flush_tlb_global
|
# define __flush_tlb_global native_flush_tlb_global
|
||||||
# define __flush_tlb_one_user(addr) native_flush_tlb_one_user(addr)
|
# define __flush_tlb_one_user(addr) native_flush_tlb_one_user(addr)
|
||||||
# define __flush_tlb_others(msk, info) native_flush_tlb_others(msk, info)
|
# define __flush_tlb_multi(msk, info) native_flush_tlb_multi(msk, info)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -300,7 +300,7 @@ void leave_mm(int cpu)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* Warn if we're not lazy. */
|
/* Warn if we're not lazy. */
|
||||||
WARN_ON(!this_cpu_read(cpu_tlbstate.is_lazy));
|
WARN_ON(!this_cpu_read(cpu_tlbstate_shared.is_lazy));
|
||||||
|
|
||||||
switch_mm(NULL, &init_mm, NULL);
|
switch_mm(NULL, &init_mm, NULL);
|
||||||
}
|
}
|
||||||
|
@ -316,7 +316,7 @@ void switch_mm(struct mm_struct *prev, struct mm_struct *next,
|
||||||
local_irq_restore(flags);
|
local_irq_restore(flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline unsigned long mm_mangle_tif_spec_ib(struct task_struct *next)
|
static unsigned long mm_mangle_tif_spec_ib(struct task_struct *next)
|
||||||
{
|
{
|
||||||
unsigned long next_tif = task_thread_info(next)->flags;
|
unsigned long next_tif = task_thread_info(next)->flags;
|
||||||
unsigned long ibpb = (next_tif >> TIF_SPEC_IB) & LAST_USER_MM_IBPB;
|
unsigned long ibpb = (next_tif >> TIF_SPEC_IB) & LAST_USER_MM_IBPB;
|
||||||
|
@ -424,7 +424,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
|
||||||
{
|
{
|
||||||
struct mm_struct *real_prev = this_cpu_read(cpu_tlbstate.loaded_mm);
|
struct mm_struct *real_prev = this_cpu_read(cpu_tlbstate.loaded_mm);
|
||||||
u16 prev_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
|
u16 prev_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
|
||||||
bool was_lazy = this_cpu_read(cpu_tlbstate.is_lazy);
|
bool was_lazy = this_cpu_read(cpu_tlbstate_shared.is_lazy);
|
||||||
unsigned cpu = smp_processor_id();
|
unsigned cpu = smp_processor_id();
|
||||||
u64 next_tlb_gen;
|
u64 next_tlb_gen;
|
||||||
bool need_flush;
|
bool need_flush;
|
||||||
|
@ -439,7 +439,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
|
||||||
* NB: leave_mm() calls us with prev == NULL and tsk == NULL.
|
* NB: leave_mm() calls us with prev == NULL and tsk == NULL.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/* We don't want flush_tlb_func_* to run concurrently with us. */
|
/* We don't want flush_tlb_func() to run concurrently with us. */
|
||||||
if (IS_ENABLED(CONFIG_PROVE_LOCKING))
|
if (IS_ENABLED(CONFIG_PROVE_LOCKING))
|
||||||
WARN_ON_ONCE(!irqs_disabled());
|
WARN_ON_ONCE(!irqs_disabled());
|
||||||
|
|
||||||
|
@ -469,7 +469,8 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
|
||||||
__flush_tlb_all();
|
__flush_tlb_all();
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
this_cpu_write(cpu_tlbstate.is_lazy, false);
|
if (was_lazy)
|
||||||
|
this_cpu_write(cpu_tlbstate_shared.is_lazy, false);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The membarrier system call requires a full memory barrier and
|
* The membarrier system call requires a full memory barrier and
|
||||||
|
@ -490,7 +491,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
|
||||||
/*
|
/*
|
||||||
* Even in lazy TLB mode, the CPU should stay set in the
|
* Even in lazy TLB mode, the CPU should stay set in the
|
||||||
* mm_cpumask. The TLB shootdown code can figure out from
|
* mm_cpumask. The TLB shootdown code can figure out from
|
||||||
* from cpu_tlbstate.is_lazy whether or not to send an IPI.
|
* cpu_tlbstate_shared.is_lazy whether or not to send an IPI.
|
||||||
*/
|
*/
|
||||||
if (WARN_ON_ONCE(real_prev != &init_mm &&
|
if (WARN_ON_ONCE(real_prev != &init_mm &&
|
||||||
!cpumask_test_cpu(cpu, mm_cpumask(next))))
|
!cpumask_test_cpu(cpu, mm_cpumask(next))))
|
||||||
|
@ -598,7 +599,7 @@ void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
|
||||||
if (this_cpu_read(cpu_tlbstate.loaded_mm) == &init_mm)
|
if (this_cpu_read(cpu_tlbstate.loaded_mm) == &init_mm)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
this_cpu_write(cpu_tlbstate.is_lazy, true);
|
this_cpu_write(cpu_tlbstate_shared.is_lazy, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -647,14 +648,13 @@ void initialize_tlbstate_and_flush(void)
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* flush_tlb_func_common()'s memory ordering requirement is that any
|
* flush_tlb_func()'s memory ordering requirement is that any
|
||||||
* TLB fills that happen after we flush the TLB are ordered after we
|
* TLB fills that happen after we flush the TLB are ordered after we
|
||||||
* read active_mm's tlb_gen. We don't need any explicit barriers
|
* read active_mm's tlb_gen. We don't need any explicit barriers
|
||||||
* because all x86 flush operations are serializing and the
|
* because all x86 flush operations are serializing and the
|
||||||
* atomic64_read operation won't be reordered by the compiler.
|
* atomic64_read operation won't be reordered by the compiler.
|
||||||
*/
|
*/
|
||||||
static void flush_tlb_func_common(const struct flush_tlb_info *f,
|
static void flush_tlb_func(void *info)
|
||||||
bool local, enum tlb_flush_reason reason)
|
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* We have three different tlb_gen values in here. They are:
|
* We have three different tlb_gen values in here. They are:
|
||||||
|
@ -665,28 +665,40 @@ static void flush_tlb_func_common(const struct flush_tlb_info *f,
|
||||||
* - f->new_tlb_gen: the generation that the requester of the flush
|
* - f->new_tlb_gen: the generation that the requester of the flush
|
||||||
* wants us to catch up to.
|
* wants us to catch up to.
|
||||||
*/
|
*/
|
||||||
|
const struct flush_tlb_info *f = info;
|
||||||
struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm);
|
struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm);
|
||||||
u32 loaded_mm_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
|
u32 loaded_mm_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
|
||||||
u64 mm_tlb_gen = atomic64_read(&loaded_mm->context.tlb_gen);
|
u64 mm_tlb_gen = atomic64_read(&loaded_mm->context.tlb_gen);
|
||||||
u64 local_tlb_gen = this_cpu_read(cpu_tlbstate.ctxs[loaded_mm_asid].tlb_gen);
|
u64 local_tlb_gen = this_cpu_read(cpu_tlbstate.ctxs[loaded_mm_asid].tlb_gen);
|
||||||
|
bool local = smp_processor_id() == f->initiating_cpu;
|
||||||
|
unsigned long nr_invalidate = 0;
|
||||||
|
|
||||||
/* This code cannot presently handle being reentered. */
|
/* This code cannot presently handle being reentered. */
|
||||||
VM_WARN_ON(!irqs_disabled());
|
VM_WARN_ON(!irqs_disabled());
|
||||||
|
|
||||||
|
if (!local) {
|
||||||
|
inc_irq_stat(irq_tlb_count);
|
||||||
|
count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
|
||||||
|
|
||||||
|
/* Can only happen on remote CPUs */
|
||||||
|
if (f->mm && f->mm != loaded_mm)
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
if (unlikely(loaded_mm == &init_mm))
|
if (unlikely(loaded_mm == &init_mm))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[loaded_mm_asid].ctx_id) !=
|
VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[loaded_mm_asid].ctx_id) !=
|
||||||
loaded_mm->context.ctx_id);
|
loaded_mm->context.ctx_id);
|
||||||
|
|
||||||
if (this_cpu_read(cpu_tlbstate.is_lazy)) {
|
if (this_cpu_read(cpu_tlbstate_shared.is_lazy)) {
|
||||||
/*
|
/*
|
||||||
* We're in lazy mode. We need to at least flush our
|
* We're in lazy mode. We need to at least flush our
|
||||||
* paging-structure cache to avoid speculatively reading
|
* paging-structure cache to avoid speculatively reading
|
||||||
* garbage into our TLB. Since switching to init_mm is barely
|
* garbage into our TLB. Since switching to init_mm is barely
|
||||||
* slower than a minimal flush, just switch to init_mm.
|
* slower than a minimal flush, just switch to init_mm.
|
||||||
*
|
*
|
||||||
* This should be rare, with native_flush_tlb_others skipping
|
* This should be rare, with native_flush_tlb_multi() skipping
|
||||||
* IPIs to lazy TLB mode CPUs.
|
* IPIs to lazy TLB mode CPUs.
|
||||||
*/
|
*/
|
||||||
switch_mm_irqs_off(NULL, &init_mm, NULL);
|
switch_mm_irqs_off(NULL, &init_mm, NULL);
|
||||||
|
@ -700,8 +712,7 @@ static void flush_tlb_func_common(const struct flush_tlb_info *f,
|
||||||
* be handled can catch us all the way up, leaving no work for
|
* be handled can catch us all the way up, leaving no work for
|
||||||
* the second flush.
|
* the second flush.
|
||||||
*/
|
*/
|
||||||
trace_tlb_flush(reason, 0);
|
goto done;
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
WARN_ON_ONCE(local_tlb_gen > mm_tlb_gen);
|
WARN_ON_ONCE(local_tlb_gen > mm_tlb_gen);
|
||||||
|
@ -748,56 +759,54 @@ static void flush_tlb_func_common(const struct flush_tlb_info *f,
|
||||||
f->new_tlb_gen == local_tlb_gen + 1 &&
|
f->new_tlb_gen == local_tlb_gen + 1 &&
|
||||||
f->new_tlb_gen == mm_tlb_gen) {
|
f->new_tlb_gen == mm_tlb_gen) {
|
||||||
/* Partial flush */
|
/* Partial flush */
|
||||||
unsigned long nr_invalidate = (f->end - f->start) >> f->stride_shift;
|
|
||||||
unsigned long addr = f->start;
|
unsigned long addr = f->start;
|
||||||
|
|
||||||
|
nr_invalidate = (f->end - f->start) >> f->stride_shift;
|
||||||
|
|
||||||
while (addr < f->end) {
|
while (addr < f->end) {
|
||||||
flush_tlb_one_user(addr);
|
flush_tlb_one_user(addr);
|
||||||
addr += 1UL << f->stride_shift;
|
addr += 1UL << f->stride_shift;
|
||||||
}
|
}
|
||||||
if (local)
|
if (local)
|
||||||
count_vm_tlb_events(NR_TLB_LOCAL_FLUSH_ONE, nr_invalidate);
|
count_vm_tlb_events(NR_TLB_LOCAL_FLUSH_ONE, nr_invalidate);
|
||||||
trace_tlb_flush(reason, nr_invalidate);
|
|
||||||
} else {
|
} else {
|
||||||
/* Full flush. */
|
/* Full flush. */
|
||||||
|
nr_invalidate = TLB_FLUSH_ALL;
|
||||||
|
|
||||||
flush_tlb_local();
|
flush_tlb_local();
|
||||||
if (local)
|
if (local)
|
||||||
count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
|
count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
|
||||||
trace_tlb_flush(reason, TLB_FLUSH_ALL);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Both paths above update our state to mm_tlb_gen. */
|
/* Both paths above update our state to mm_tlb_gen. */
|
||||||
this_cpu_write(cpu_tlbstate.ctxs[loaded_mm_asid].tlb_gen, mm_tlb_gen);
|
this_cpu_write(cpu_tlbstate.ctxs[loaded_mm_asid].tlb_gen, mm_tlb_gen);
|
||||||
|
|
||||||
|
/* Tracing is done in a unified manner to reduce the code size */
|
||||||
|
done:
|
||||||
|
trace_tlb_flush(!local ? TLB_REMOTE_SHOOTDOWN :
|
||||||
|
(f->mm == NULL) ? TLB_LOCAL_SHOOTDOWN :
|
||||||
|
TLB_LOCAL_MM_SHOOTDOWN,
|
||||||
|
nr_invalidate);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void flush_tlb_func_local(const void *info, enum tlb_flush_reason reason)
|
static bool tlb_is_not_lazy(int cpu)
|
||||||
{
|
{
|
||||||
const struct flush_tlb_info *f = info;
|
return !per_cpu(cpu_tlbstate_shared.is_lazy, cpu);
|
||||||
|
|
||||||
flush_tlb_func_common(f, true, reason);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void flush_tlb_func_remote(void *info)
|
static DEFINE_PER_CPU(cpumask_t, flush_tlb_mask);
|
||||||
{
|
|
||||||
const struct flush_tlb_info *f = info;
|
|
||||||
|
|
||||||
inc_irq_stat(irq_tlb_count);
|
DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state_shared, cpu_tlbstate_shared);
|
||||||
|
EXPORT_PER_CPU_SYMBOL(cpu_tlbstate_shared);
|
||||||
|
|
||||||
if (f->mm && f->mm != this_cpu_read(cpu_tlbstate.loaded_mm))
|
STATIC_NOPV void native_flush_tlb_multi(const struct cpumask *cpumask,
|
||||||
return;
|
|
||||||
|
|
||||||
count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
|
|
||||||
flush_tlb_func_common(f, false, TLB_REMOTE_SHOOTDOWN);
|
|
||||||
}
|
|
||||||
|
|
||||||
static bool tlb_is_not_lazy(int cpu, void *data)
|
|
||||||
{
|
|
||||||
return !per_cpu(cpu_tlbstate.is_lazy, cpu);
|
|
||||||
}
|
|
||||||
|
|
||||||
STATIC_NOPV void native_flush_tlb_others(const struct cpumask *cpumask,
|
|
||||||
const struct flush_tlb_info *info)
|
const struct flush_tlb_info *info)
|
||||||
{
|
{
|
||||||
|
/*
|
||||||
|
* Do accounting and tracing. Note that there are (and have always been)
|
||||||
|
* cases in which a remote TLB flush will be traced, but eventually
|
||||||
|
* would not happen.
|
||||||
|
*/
|
||||||
count_vm_tlb_event(NR_TLB_REMOTE_FLUSH);
|
count_vm_tlb_event(NR_TLB_REMOTE_FLUSH);
|
||||||
if (info->end == TLB_FLUSH_ALL)
|
if (info->end == TLB_FLUSH_ALL)
|
||||||
trace_tlb_flush(TLB_REMOTE_SEND_IPI, TLB_FLUSH_ALL);
|
trace_tlb_flush(TLB_REMOTE_SEND_IPI, TLB_FLUSH_ALL);
|
||||||
|
@ -815,18 +824,42 @@ STATIC_NOPV void native_flush_tlb_others(const struct cpumask *cpumask,
|
||||||
* up on the new contents of what used to be page tables, while
|
* up on the new contents of what used to be page tables, while
|
||||||
* doing a speculative memory access.
|
* doing a speculative memory access.
|
||||||
*/
|
*/
|
||||||
if (info->freed_tables)
|
if (info->freed_tables) {
|
||||||
smp_call_function_many(cpumask, flush_tlb_func_remote,
|
on_each_cpu_mask(cpumask, flush_tlb_func, (void *)info, true);
|
||||||
(void *)info, 1);
|
} else {
|
||||||
else
|
/*
|
||||||
on_each_cpu_cond_mask(tlb_is_not_lazy, flush_tlb_func_remote,
|
* Although we could have used on_each_cpu_cond_mask(),
|
||||||
(void *)info, 1, cpumask);
|
* open-coding it has performance advantages, as it eliminates
|
||||||
|
* the need for indirect calls or retpolines. In addition, it
|
||||||
|
* allows to use a designated cpumask for evaluating the
|
||||||
|
* condition, instead of allocating one.
|
||||||
|
*
|
||||||
|
* This code works under the assumption that there are no nested
|
||||||
|
* TLB flushes, an assumption that is already made in
|
||||||
|
* flush_tlb_mm_range().
|
||||||
|
*
|
||||||
|
* cond_cpumask is logically a stack-local variable, but it is
|
||||||
|
* more efficient to have it off the stack and not to allocate
|
||||||
|
* it on demand. Preemption is disabled and this code is
|
||||||
|
* non-reentrant.
|
||||||
|
*/
|
||||||
|
struct cpumask *cond_cpumask = this_cpu_ptr(&flush_tlb_mask);
|
||||||
|
int cpu;
|
||||||
|
|
||||||
|
cpumask_clear(cond_cpumask);
|
||||||
|
|
||||||
|
for_each_cpu(cpu, cpumask) {
|
||||||
|
if (tlb_is_not_lazy(cpu))
|
||||||
|
__cpumask_set_cpu(cpu, cond_cpumask);
|
||||||
|
}
|
||||||
|
on_each_cpu_mask(cond_cpumask, flush_tlb_func, (void *)info, true);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void flush_tlb_others(const struct cpumask *cpumask,
|
void flush_tlb_multi(const struct cpumask *cpumask,
|
||||||
const struct flush_tlb_info *info)
|
const struct flush_tlb_info *info)
|
||||||
{
|
{
|
||||||
__flush_tlb_others(cpumask, info);
|
__flush_tlb_multi(cpumask, info);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -847,7 +880,7 @@ static DEFINE_PER_CPU_SHARED_ALIGNED(struct flush_tlb_info, flush_tlb_info);
|
||||||
static DEFINE_PER_CPU(unsigned int, flush_tlb_info_idx);
|
static DEFINE_PER_CPU(unsigned int, flush_tlb_info_idx);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static inline struct flush_tlb_info *get_flush_tlb_info(struct mm_struct *mm,
|
static struct flush_tlb_info *get_flush_tlb_info(struct mm_struct *mm,
|
||||||
unsigned long start, unsigned long end,
|
unsigned long start, unsigned long end,
|
||||||
unsigned int stride_shift, bool freed_tables,
|
unsigned int stride_shift, bool freed_tables,
|
||||||
u64 new_tlb_gen)
|
u64 new_tlb_gen)
|
||||||
|
@ -869,11 +902,12 @@ static inline struct flush_tlb_info *get_flush_tlb_info(struct mm_struct *mm,
|
||||||
info->stride_shift = stride_shift;
|
info->stride_shift = stride_shift;
|
||||||
info->freed_tables = freed_tables;
|
info->freed_tables = freed_tables;
|
||||||
info->new_tlb_gen = new_tlb_gen;
|
info->new_tlb_gen = new_tlb_gen;
|
||||||
|
info->initiating_cpu = smp_processor_id();
|
||||||
|
|
||||||
return info;
|
return info;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void put_flush_tlb_info(void)
|
static void put_flush_tlb_info(void)
|
||||||
{
|
{
|
||||||
#ifdef CONFIG_DEBUG_VM
|
#ifdef CONFIG_DEBUG_VM
|
||||||
/* Complete reentrancy prevention checks */
|
/* Complete reentrancy prevention checks */
|
||||||
|
@ -905,16 +939,20 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
|
||||||
info = get_flush_tlb_info(mm, start, end, stride_shift, freed_tables,
|
info = get_flush_tlb_info(mm, start, end, stride_shift, freed_tables,
|
||||||
new_tlb_gen);
|
new_tlb_gen);
|
||||||
|
|
||||||
if (mm == this_cpu_read(cpu_tlbstate.loaded_mm)) {
|
/*
|
||||||
|
* flush_tlb_multi() is not optimized for the common case in which only
|
||||||
|
* a local TLB flush is needed. Optimize this use-case by calling
|
||||||
|
* flush_tlb_func_local() directly in this case.
|
||||||
|
*/
|
||||||
|
if (cpumask_any_but(mm_cpumask(mm), cpu) < nr_cpu_ids) {
|
||||||
|
flush_tlb_multi(mm_cpumask(mm), info);
|
||||||
|
} else if (mm == this_cpu_read(cpu_tlbstate.loaded_mm)) {
|
||||||
lockdep_assert_irqs_enabled();
|
lockdep_assert_irqs_enabled();
|
||||||
local_irq_disable();
|
local_irq_disable();
|
||||||
flush_tlb_func_local(info, TLB_LOCAL_MM_SHOOTDOWN);
|
flush_tlb_func(info);
|
||||||
local_irq_enable();
|
local_irq_enable();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (cpumask_any_but(mm_cpumask(mm), cpu) < nr_cpu_ids)
|
|
||||||
flush_tlb_others(mm_cpumask(mm), info);
|
|
||||||
|
|
||||||
put_flush_tlb_info();
|
put_flush_tlb_info();
|
||||||
put_cpu();
|
put_cpu();
|
||||||
}
|
}
|
||||||
|
@ -1119,34 +1157,30 @@ void __flush_tlb_all(void)
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(__flush_tlb_all);
|
EXPORT_SYMBOL_GPL(__flush_tlb_all);
|
||||||
|
|
||||||
/*
|
|
||||||
* arch_tlbbatch_flush() performs a full TLB flush regardless of the active mm.
|
|
||||||
* This means that the 'struct flush_tlb_info' that describes which mappings to
|
|
||||||
* flush is actually fixed. We therefore set a single fixed struct and use it in
|
|
||||||
* arch_tlbbatch_flush().
|
|
||||||
*/
|
|
||||||
static const struct flush_tlb_info full_flush_tlb_info = {
|
|
||||||
.mm = NULL,
|
|
||||||
.start = 0,
|
|
||||||
.end = TLB_FLUSH_ALL,
|
|
||||||
};
|
|
||||||
|
|
||||||
void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
|
void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
|
||||||
{
|
{
|
||||||
|
struct flush_tlb_info *info;
|
||||||
|
|
||||||
int cpu = get_cpu();
|
int cpu = get_cpu();
|
||||||
|
|
||||||
if (cpumask_test_cpu(cpu, &batch->cpumask)) {
|
info = get_flush_tlb_info(NULL, 0, TLB_FLUSH_ALL, 0, false, 0);
|
||||||
|
/*
|
||||||
|
* flush_tlb_multi() is not optimized for the common case in which only
|
||||||
|
* a local TLB flush is needed. Optimize this use-case by calling
|
||||||
|
* flush_tlb_func_local() directly in this case.
|
||||||
|
*/
|
||||||
|
if (cpumask_any_but(&batch->cpumask, cpu) < nr_cpu_ids) {
|
||||||
|
flush_tlb_multi(&batch->cpumask, info);
|
||||||
|
} else if (cpumask_test_cpu(cpu, &batch->cpumask)) {
|
||||||
lockdep_assert_irqs_enabled();
|
lockdep_assert_irqs_enabled();
|
||||||
local_irq_disable();
|
local_irq_disable();
|
||||||
flush_tlb_func_local(&full_flush_tlb_info, TLB_LOCAL_SHOOTDOWN);
|
flush_tlb_func(info);
|
||||||
local_irq_enable();
|
local_irq_enable();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (cpumask_any_but(&batch->cpumask, cpu) < nr_cpu_ids)
|
|
||||||
flush_tlb_others(&batch->cpumask, &full_flush_tlb_info);
|
|
||||||
|
|
||||||
cpumask_clear(&batch->cpumask);
|
cpumask_clear(&batch->cpumask);
|
||||||
|
|
||||||
|
put_flush_tlb_info();
|
||||||
put_cpu();
|
put_cpu();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1247,7 +1247,7 @@ static void xen_flush_tlb_one_user(unsigned long addr)
|
||||||
preempt_enable();
|
preempt_enable();
|
||||||
}
|
}
|
||||||
|
|
||||||
static void xen_flush_tlb_others(const struct cpumask *cpus,
|
static void xen_flush_tlb_multi(const struct cpumask *cpus,
|
||||||
const struct flush_tlb_info *info)
|
const struct flush_tlb_info *info)
|
||||||
{
|
{
|
||||||
struct {
|
struct {
|
||||||
|
@ -1258,7 +1258,7 @@ static void xen_flush_tlb_others(const struct cpumask *cpus,
|
||||||
const size_t mc_entry_size = sizeof(args->op) +
|
const size_t mc_entry_size = sizeof(args->op) +
|
||||||
sizeof(args->mask[0]) * BITS_TO_LONGS(num_possible_cpus());
|
sizeof(args->mask[0]) * BITS_TO_LONGS(num_possible_cpus());
|
||||||
|
|
||||||
trace_xen_mmu_flush_tlb_others(cpus, info->mm, info->start, info->end);
|
trace_xen_mmu_flush_tlb_multi(cpus, info->mm, info->start, info->end);
|
||||||
|
|
||||||
if (cpumask_empty(cpus))
|
if (cpumask_empty(cpus))
|
||||||
return; /* nothing to do */
|
return; /* nothing to do */
|
||||||
|
@ -1267,9 +1267,8 @@ static void xen_flush_tlb_others(const struct cpumask *cpus,
|
||||||
args = mcs.args;
|
args = mcs.args;
|
||||||
args->op.arg2.vcpumask = to_cpumask(args->mask);
|
args->op.arg2.vcpumask = to_cpumask(args->mask);
|
||||||
|
|
||||||
/* Remove us, and any offline CPUS. */
|
/* Remove any offline CPUs */
|
||||||
cpumask_and(to_cpumask(args->mask), cpus, cpu_online_mask);
|
cpumask_and(to_cpumask(args->mask), cpus, cpu_online_mask);
|
||||||
cpumask_clear_cpu(smp_processor_id(), to_cpumask(args->mask));
|
|
||||||
|
|
||||||
args->op.cmd = MMUEXT_TLB_FLUSH_MULTI;
|
args->op.cmd = MMUEXT_TLB_FLUSH_MULTI;
|
||||||
if (info->end != TLB_FLUSH_ALL &&
|
if (info->end != TLB_FLUSH_ALL &&
|
||||||
|
@ -2086,7 +2085,7 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = {
|
||||||
.flush_tlb_user = xen_flush_tlb,
|
.flush_tlb_user = xen_flush_tlb,
|
||||||
.flush_tlb_kernel = xen_flush_tlb,
|
.flush_tlb_kernel = xen_flush_tlb,
|
||||||
.flush_tlb_one_user = xen_flush_tlb_one_user,
|
.flush_tlb_one_user = xen_flush_tlb_one_user,
|
||||||
.flush_tlb_others = xen_flush_tlb_others,
|
.flush_tlb_multi = xen_flush_tlb_multi,
|
||||||
.tlb_remove_table = tlb_remove_table,
|
.tlb_remove_table = tlb_remove_table,
|
||||||
|
|
||||||
.pgd_alloc = xen_pgd_alloc,
|
.pgd_alloc = xen_pgd_alloc,
|
||||||
|
|
|
@ -206,7 +206,7 @@ static inline unsigned int cpumask_last(const struct cpumask *srcp)
|
||||||
return find_last_bit(cpumask_bits(srcp), nr_cpumask_bits);
|
return find_last_bit(cpumask_bits(srcp), nr_cpumask_bits);
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned int cpumask_next(int n, const struct cpumask *srcp);
|
unsigned int __pure cpumask_next(int n, const struct cpumask *srcp);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* cpumask_next_zero - get the next unset cpu in a cpumask
|
* cpumask_next_zero - get the next unset cpu in a cpumask
|
||||||
|
@ -223,8 +223,8 @@ static inline unsigned int cpumask_next_zero(int n, const struct cpumask *srcp)
|
||||||
return find_next_zero_bit(cpumask_bits(srcp), nr_cpumask_bits, n+1);
|
return find_next_zero_bit(cpumask_bits(srcp), nr_cpumask_bits, n+1);
|
||||||
}
|
}
|
||||||
|
|
||||||
int cpumask_next_and(int n, const struct cpumask *, const struct cpumask *);
|
int __pure cpumask_next_and(int n, const struct cpumask *, const struct cpumask *);
|
||||||
int cpumask_any_but(const struct cpumask *mask, unsigned int cpu);
|
int __pure cpumask_any_but(const struct cpumask *mask, unsigned int cpu);
|
||||||
unsigned int cpumask_local_spread(unsigned int i, int node);
|
unsigned int cpumask_local_spread(unsigned int i, int node);
|
||||||
int cpumask_any_and_distribute(const struct cpumask *src1p,
|
int cpumask_any_and_distribute(const struct cpumask *src1p,
|
||||||
const struct cpumask *src2p);
|
const struct cpumask *src2p);
|
||||||
|
|
|
@ -50,31 +50,53 @@ extern unsigned int total_cpus;
|
||||||
int smp_call_function_single(int cpuid, smp_call_func_t func, void *info,
|
int smp_call_function_single(int cpuid, smp_call_func_t func, void *info,
|
||||||
int wait);
|
int wait);
|
||||||
|
|
||||||
/*
|
|
||||||
* Call a function on all processors
|
|
||||||
*/
|
|
||||||
void on_each_cpu(smp_call_func_t func, void *info, int wait);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Call a function on processors specified by mask, which might include
|
|
||||||
* the local one.
|
|
||||||
*/
|
|
||||||
void on_each_cpu_mask(const struct cpumask *mask, smp_call_func_t func,
|
|
||||||
void *info, bool wait);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Call a function on each processor for which the supplied function
|
|
||||||
* cond_func returns a positive value. This may include the local
|
|
||||||
* processor.
|
|
||||||
*/
|
|
||||||
void on_each_cpu_cond(smp_cond_func_t cond_func, smp_call_func_t func,
|
|
||||||
void *info, bool wait);
|
|
||||||
|
|
||||||
void on_each_cpu_cond_mask(smp_cond_func_t cond_func, smp_call_func_t func,
|
void on_each_cpu_cond_mask(smp_cond_func_t cond_func, smp_call_func_t func,
|
||||||
void *info, bool wait, const struct cpumask *mask);
|
void *info, bool wait, const struct cpumask *mask);
|
||||||
|
|
||||||
int smp_call_function_single_async(int cpu, call_single_data_t *csd);
|
int smp_call_function_single_async(int cpu, call_single_data_t *csd);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Call a function on all processors
|
||||||
|
*/
|
||||||
|
static inline void on_each_cpu(smp_call_func_t func, void *info, int wait)
|
||||||
|
{
|
||||||
|
on_each_cpu_cond_mask(NULL, func, info, wait, cpu_online_mask);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* on_each_cpu_mask(): Run a function on processors specified by
|
||||||
|
* cpumask, which may include the local processor.
|
||||||
|
* @mask: The set of cpus to run on (only runs on online subset).
|
||||||
|
* @func: The function to run. This must be fast and non-blocking.
|
||||||
|
* @info: An arbitrary pointer to pass to the function.
|
||||||
|
* @wait: If true, wait (atomically) until function has completed
|
||||||
|
* on other CPUs.
|
||||||
|
*
|
||||||
|
* If @wait is true, then returns once @func has returned.
|
||||||
|
*
|
||||||
|
* You must not call this function with disabled interrupts or from a
|
||||||
|
* hardware interrupt handler or from a bottom half handler. The
|
||||||
|
* exception is that it may be used during early boot while
|
||||||
|
* early_boot_irqs_disabled is set.
|
||||||
|
*/
|
||||||
|
static inline void on_each_cpu_mask(const struct cpumask *mask,
|
||||||
|
smp_call_func_t func, void *info, bool wait)
|
||||||
|
{
|
||||||
|
on_each_cpu_cond_mask(NULL, func, info, wait, mask);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Call a function on each processor for which the supplied function
|
||||||
|
* cond_func returns a positive value. This may include the local
|
||||||
|
* processor. May be used during early boot while early_boot_irqs_disabled is
|
||||||
|
* set. Use local_irq_save/restore() instead of local_irq_disable/enable().
|
||||||
|
*/
|
||||||
|
static inline void on_each_cpu_cond(smp_cond_func_t cond_func,
|
||||||
|
smp_call_func_t func, void *info, bool wait)
|
||||||
|
{
|
||||||
|
on_each_cpu_cond_mask(cond_func, func, info, wait, cpu_online_mask);
|
||||||
|
}
|
||||||
|
|
||||||
#ifdef CONFIG_SMP
|
#ifdef CONFIG_SMP
|
||||||
|
|
||||||
#include <linux/preempt.h>
|
#include <linux/preempt.h>
|
||||||
|
|
|
@ -346,7 +346,7 @@ TRACE_EVENT(xen_mmu_flush_tlb_one_user,
|
||||||
TP_printk("addr %lx", __entry->addr)
|
TP_printk("addr %lx", __entry->addr)
|
||||||
);
|
);
|
||||||
|
|
||||||
TRACE_EVENT(xen_mmu_flush_tlb_others,
|
TRACE_EVENT(xen_mmu_flush_tlb_multi,
|
||||||
TP_PROTO(const struct cpumask *cpus, struct mm_struct *mm,
|
TP_PROTO(const struct cpumask *cpus, struct mm_struct *mm,
|
||||||
unsigned long addr, unsigned long end),
|
unsigned long addr, unsigned long end),
|
||||||
TP_ARGS(cpus, mm, addr, end),
|
TP_ARGS(cpus, mm, addr, end),
|
||||||
|
|
170
kernel/smp.c
170
kernel/smp.c
|
@ -850,12 +850,28 @@ int smp_call_function_any(const struct cpumask *mask,
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(smp_call_function_any);
|
EXPORT_SYMBOL_GPL(smp_call_function_any);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Flags to be used as scf_flags argument of smp_call_function_many_cond().
|
||||||
|
*
|
||||||
|
* %SCF_WAIT: Wait until function execution is completed
|
||||||
|
* %SCF_RUN_LOCAL: Run also locally if local cpu is set in cpumask
|
||||||
|
*/
|
||||||
|
#define SCF_WAIT (1U << 0)
|
||||||
|
#define SCF_RUN_LOCAL (1U << 1)
|
||||||
|
|
||||||
static void smp_call_function_many_cond(const struct cpumask *mask,
|
static void smp_call_function_many_cond(const struct cpumask *mask,
|
||||||
smp_call_func_t func, void *info,
|
smp_call_func_t func, void *info,
|
||||||
bool wait, smp_cond_func_t cond_func)
|
unsigned int scf_flags,
|
||||||
|
smp_cond_func_t cond_func)
|
||||||
{
|
{
|
||||||
|
int cpu, last_cpu, this_cpu = smp_processor_id();
|
||||||
struct call_function_data *cfd;
|
struct call_function_data *cfd;
|
||||||
int cpu, next_cpu, this_cpu = smp_processor_id();
|
bool wait = scf_flags & SCF_WAIT;
|
||||||
|
bool run_remote = false;
|
||||||
|
bool run_local = false;
|
||||||
|
int nr_cpus = 0;
|
||||||
|
|
||||||
|
lockdep_assert_preemption_disabled();
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Can deadlock when called with interrupts disabled.
|
* Can deadlock when called with interrupts disabled.
|
||||||
|
@ -863,8 +879,9 @@ static void smp_call_function_many_cond(const struct cpumask *mask,
|
||||||
* send smp call function interrupt to this cpu and as such deadlocks
|
* send smp call function interrupt to this cpu and as such deadlocks
|
||||||
* can't happen.
|
* can't happen.
|
||||||
*/
|
*/
|
||||||
WARN_ON_ONCE(cpu_online(this_cpu) && irqs_disabled()
|
if (cpu_online(this_cpu) && !oops_in_progress &&
|
||||||
&& !oops_in_progress && !early_boot_irqs_disabled);
|
!early_boot_irqs_disabled)
|
||||||
|
lockdep_assert_irqs_enabled();
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* When @wait we can deadlock when we interrupt between llist_add() and
|
* When @wait we can deadlock when we interrupt between llist_add() and
|
||||||
|
@ -874,36 +891,22 @@ static void smp_call_function_many_cond(const struct cpumask *mask,
|
||||||
*/
|
*/
|
||||||
WARN_ON_ONCE(!in_task());
|
WARN_ON_ONCE(!in_task());
|
||||||
|
|
||||||
/* Try to fastpath. So, what's a CPU they want? Ignoring this one. */
|
/* Check if we need local execution. */
|
||||||
|
if ((scf_flags & SCF_RUN_LOCAL) && cpumask_test_cpu(this_cpu, mask))
|
||||||
|
run_local = true;
|
||||||
|
|
||||||
|
/* Check if we need remote execution, i.e., any CPU excluding this one. */
|
||||||
cpu = cpumask_first_and(mask, cpu_online_mask);
|
cpu = cpumask_first_and(mask, cpu_online_mask);
|
||||||
if (cpu == this_cpu)
|
if (cpu == this_cpu)
|
||||||
cpu = cpumask_next_and(cpu, mask, cpu_online_mask);
|
cpu = cpumask_next_and(cpu, mask, cpu_online_mask);
|
||||||
|
if (cpu < nr_cpu_ids)
|
||||||
|
run_remote = true;
|
||||||
|
|
||||||
/* No online cpus? We're done. */
|
if (run_remote) {
|
||||||
if (cpu >= nr_cpu_ids)
|
|
||||||
return;
|
|
||||||
|
|
||||||
/* Do we have another CPU which isn't us? */
|
|
||||||
next_cpu = cpumask_next_and(cpu, mask, cpu_online_mask);
|
|
||||||
if (next_cpu == this_cpu)
|
|
||||||
next_cpu = cpumask_next_and(next_cpu, mask, cpu_online_mask);
|
|
||||||
|
|
||||||
/* Fastpath: do that cpu by itself. */
|
|
||||||
if (next_cpu >= nr_cpu_ids) {
|
|
||||||
if (!cond_func || cond_func(cpu, info))
|
|
||||||
smp_call_function_single(cpu, func, info, wait);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
cfd = this_cpu_ptr(&cfd_data);
|
cfd = this_cpu_ptr(&cfd_data);
|
||||||
|
|
||||||
cpumask_and(cfd->cpumask, mask, cpu_online_mask);
|
cpumask_and(cfd->cpumask, mask, cpu_online_mask);
|
||||||
__cpumask_clear_cpu(this_cpu, cfd->cpumask);
|
__cpumask_clear_cpu(this_cpu, cfd->cpumask);
|
||||||
|
|
||||||
/* Some callers race with other cpus changing the passed mask */
|
|
||||||
if (unlikely(!cpumask_weight(cfd->cpumask)))
|
|
||||||
return;
|
|
||||||
|
|
||||||
cpumask_clear(cfd->cpumask_ipi);
|
cpumask_clear(cfd->cpumask_ipi);
|
||||||
for_each_cpu(cpu, cfd->cpumask) {
|
for_each_cpu(cpu, cfd->cpumask) {
|
||||||
struct cfd_percpu *pcpu = per_cpu_ptr(cfd->pcpu, cpu);
|
struct cfd_percpu *pcpu = per_cpu_ptr(cfd->pcpu, cpu);
|
||||||
|
@ -924,20 +927,39 @@ static void smp_call_function_many_cond(const struct cpumask *mask,
|
||||||
cfd_seq_store(pcpu->seq_queue, this_cpu, cpu, CFD_SEQ_QUEUE);
|
cfd_seq_store(pcpu->seq_queue, this_cpu, cpu, CFD_SEQ_QUEUE);
|
||||||
if (llist_add(&csd->node.llist, &per_cpu(call_single_queue, cpu))) {
|
if (llist_add(&csd->node.llist, &per_cpu(call_single_queue, cpu))) {
|
||||||
__cpumask_set_cpu(cpu, cfd->cpumask_ipi);
|
__cpumask_set_cpu(cpu, cfd->cpumask_ipi);
|
||||||
|
nr_cpus++;
|
||||||
|
last_cpu = cpu;
|
||||||
|
|
||||||
cfd_seq_store(pcpu->seq_ipi, this_cpu, cpu, CFD_SEQ_IPI);
|
cfd_seq_store(pcpu->seq_ipi, this_cpu, cpu, CFD_SEQ_IPI);
|
||||||
} else {
|
} else {
|
||||||
cfd_seq_store(pcpu->seq_noipi, this_cpu, cpu, CFD_SEQ_NOIPI);
|
cfd_seq_store(pcpu->seq_noipi, this_cpu, cpu, CFD_SEQ_NOIPI);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Send a message to all CPUs in the map */
|
cfd_seq_store(this_cpu_ptr(&cfd_seq_local)->ping, this_cpu, CFD_SEQ_NOCPU, CFD_SEQ_PING);
|
||||||
cfd_seq_store(this_cpu_ptr(&cfd_seq_local)->ping, this_cpu,
|
|
||||||
CFD_SEQ_NOCPU, CFD_SEQ_PING);
|
|
||||||
arch_send_call_function_ipi_mask(cfd->cpumask_ipi);
|
|
||||||
cfd_seq_store(this_cpu_ptr(&cfd_seq_local)->pinged, this_cpu,
|
|
||||||
CFD_SEQ_NOCPU, CFD_SEQ_PINGED);
|
|
||||||
|
|
||||||
if (wait) {
|
/*
|
||||||
|
* Choose the most efficient way to send an IPI. Note that the
|
||||||
|
* number of CPUs might be zero due to concurrent changes to the
|
||||||
|
* provided mask.
|
||||||
|
*/
|
||||||
|
if (nr_cpus == 1)
|
||||||
|
send_call_function_single_ipi(last_cpu);
|
||||||
|
else if (likely(nr_cpus > 1))
|
||||||
|
arch_send_call_function_ipi_mask(cfd->cpumask_ipi);
|
||||||
|
|
||||||
|
cfd_seq_store(this_cpu_ptr(&cfd_seq_local)->pinged, this_cpu, CFD_SEQ_NOCPU, CFD_SEQ_PINGED);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (run_local && (!cond_func || cond_func(this_cpu, info))) {
|
||||||
|
unsigned long flags;
|
||||||
|
|
||||||
|
local_irq_save(flags);
|
||||||
|
func(info);
|
||||||
|
local_irq_restore(flags);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (run_remote && wait) {
|
||||||
for_each_cpu(cpu, cfd->cpumask) {
|
for_each_cpu(cpu, cfd->cpumask) {
|
||||||
call_single_data_t *csd;
|
call_single_data_t *csd;
|
||||||
|
|
||||||
|
@ -948,12 +970,14 @@ static void smp_call_function_many_cond(const struct cpumask *mask,
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* smp_call_function_many(): Run a function on a set of other CPUs.
|
* smp_call_function_many(): Run a function on a set of CPUs.
|
||||||
* @mask: The set of cpus to run on (only runs on online subset).
|
* @mask: The set of cpus to run on (only runs on online subset).
|
||||||
* @func: The function to run. This must be fast and non-blocking.
|
* @func: The function to run. This must be fast and non-blocking.
|
||||||
* @info: An arbitrary pointer to pass to the function.
|
* @info: An arbitrary pointer to pass to the function.
|
||||||
* @wait: If true, wait (atomically) until function has completed
|
* @flags: Bitmask that controls the operation. If %SCF_WAIT is set, wait
|
||||||
* on other CPUs.
|
* (atomically) until function has completed on other CPUs. If
|
||||||
|
* %SCF_RUN_LOCAL is set, the function will also be run locally
|
||||||
|
* if the local CPU is set in the @cpumask.
|
||||||
*
|
*
|
||||||
* If @wait is true, then returns once @func has returned.
|
* If @wait is true, then returns once @func has returned.
|
||||||
*
|
*
|
||||||
|
@ -964,7 +988,7 @@ static void smp_call_function_many_cond(const struct cpumask *mask,
|
||||||
void smp_call_function_many(const struct cpumask *mask,
|
void smp_call_function_many(const struct cpumask *mask,
|
||||||
smp_call_func_t func, void *info, bool wait)
|
smp_call_func_t func, void *info, bool wait)
|
||||||
{
|
{
|
||||||
smp_call_function_many_cond(mask, func, info, wait, NULL);
|
smp_call_function_many_cond(mask, func, info, wait * SCF_WAIT, NULL);
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(smp_call_function_many);
|
EXPORT_SYMBOL(smp_call_function_many);
|
||||||
|
|
||||||
|
@ -1075,56 +1099,6 @@ void __init smp_init(void)
|
||||||
smp_cpus_done(setup_max_cpus);
|
smp_cpus_done(setup_max_cpus);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* Call a function on all processors. May be used during early boot while
|
|
||||||
* early_boot_irqs_disabled is set. Use local_irq_save/restore() instead
|
|
||||||
* of local_irq_disable/enable().
|
|
||||||
*/
|
|
||||||
void on_each_cpu(smp_call_func_t func, void *info, int wait)
|
|
||||||
{
|
|
||||||
unsigned long flags;
|
|
||||||
|
|
||||||
preempt_disable();
|
|
||||||
smp_call_function(func, info, wait);
|
|
||||||
local_irq_save(flags);
|
|
||||||
func(info);
|
|
||||||
local_irq_restore(flags);
|
|
||||||
preempt_enable();
|
|
||||||
}
|
|
||||||
EXPORT_SYMBOL(on_each_cpu);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* on_each_cpu_mask(): Run a function on processors specified by
|
|
||||||
* cpumask, which may include the local processor.
|
|
||||||
* @mask: The set of cpus to run on (only runs on online subset).
|
|
||||||
* @func: The function to run. This must be fast and non-blocking.
|
|
||||||
* @info: An arbitrary pointer to pass to the function.
|
|
||||||
* @wait: If true, wait (atomically) until function has completed
|
|
||||||
* on other CPUs.
|
|
||||||
*
|
|
||||||
* If @wait is true, then returns once @func has returned.
|
|
||||||
*
|
|
||||||
* You must not call this function with disabled interrupts or from a
|
|
||||||
* hardware interrupt handler or from a bottom half handler. The
|
|
||||||
* exception is that it may be used during early boot while
|
|
||||||
* early_boot_irqs_disabled is set.
|
|
||||||
*/
|
|
||||||
void on_each_cpu_mask(const struct cpumask *mask, smp_call_func_t func,
|
|
||||||
void *info, bool wait)
|
|
||||||
{
|
|
||||||
int cpu = get_cpu();
|
|
||||||
|
|
||||||
smp_call_function_many(mask, func, info, wait);
|
|
||||||
if (cpumask_test_cpu(cpu, mask)) {
|
|
||||||
unsigned long flags;
|
|
||||||
local_irq_save(flags);
|
|
||||||
func(info);
|
|
||||||
local_irq_restore(flags);
|
|
||||||
}
|
|
||||||
put_cpu();
|
|
||||||
}
|
|
||||||
EXPORT_SYMBOL(on_each_cpu_mask);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* on_each_cpu_cond(): Call a function on each processor for which
|
* on_each_cpu_cond(): Call a function on each processor for which
|
||||||
* the supplied function cond_func returns true, optionally waiting
|
* the supplied function cond_func returns true, optionally waiting
|
||||||
|
@ -1150,27 +1124,17 @@ EXPORT_SYMBOL(on_each_cpu_mask);
|
||||||
void on_each_cpu_cond_mask(smp_cond_func_t cond_func, smp_call_func_t func,
|
void on_each_cpu_cond_mask(smp_cond_func_t cond_func, smp_call_func_t func,
|
||||||
void *info, bool wait, const struct cpumask *mask)
|
void *info, bool wait, const struct cpumask *mask)
|
||||||
{
|
{
|
||||||
int cpu = get_cpu();
|
unsigned int scf_flags = SCF_RUN_LOCAL;
|
||||||
|
|
||||||
smp_call_function_many_cond(mask, func, info, wait, cond_func);
|
if (wait)
|
||||||
if (cpumask_test_cpu(cpu, mask) && cond_func(cpu, info)) {
|
scf_flags |= SCF_WAIT;
|
||||||
unsigned long flags;
|
|
||||||
|
|
||||||
local_irq_save(flags);
|
preempt_disable();
|
||||||
func(info);
|
smp_call_function_many_cond(mask, func, info, scf_flags, cond_func);
|
||||||
local_irq_restore(flags);
|
preempt_enable();
|
||||||
}
|
|
||||||
put_cpu();
|
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(on_each_cpu_cond_mask);
|
EXPORT_SYMBOL(on_each_cpu_cond_mask);
|
||||||
|
|
||||||
void on_each_cpu_cond(smp_cond_func_t cond_func, smp_call_func_t func,
|
|
||||||
void *info, bool wait)
|
|
||||||
{
|
|
||||||
on_each_cpu_cond_mask(cond_func, func, info, wait, cpu_online_mask);
|
|
||||||
}
|
|
||||||
EXPORT_SYMBOL(on_each_cpu_cond);
|
|
||||||
|
|
||||||
static void do_nothing(void *unused)
|
static void do_nothing(void *unused)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
38
kernel/up.c
38
kernel/up.c
|
@ -36,35 +36,6 @@ int smp_call_function_single_async(int cpu, call_single_data_t *csd)
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(smp_call_function_single_async);
|
EXPORT_SYMBOL(smp_call_function_single_async);
|
||||||
|
|
||||||
void on_each_cpu(smp_call_func_t func, void *info, int wait)
|
|
||||||
{
|
|
||||||
unsigned long flags;
|
|
||||||
|
|
||||||
local_irq_save(flags);
|
|
||||||
func(info);
|
|
||||||
local_irq_restore(flags);
|
|
||||||
}
|
|
||||||
EXPORT_SYMBOL(on_each_cpu);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Note we still need to test the mask even for UP
|
|
||||||
* because we actually can get an empty mask from
|
|
||||||
* code that on SMP might call us without the local
|
|
||||||
* CPU in the mask.
|
|
||||||
*/
|
|
||||||
void on_each_cpu_mask(const struct cpumask *mask,
|
|
||||||
smp_call_func_t func, void *info, bool wait)
|
|
||||||
{
|
|
||||||
unsigned long flags;
|
|
||||||
|
|
||||||
if (cpumask_test_cpu(0, mask)) {
|
|
||||||
local_irq_save(flags);
|
|
||||||
func(info);
|
|
||||||
local_irq_restore(flags);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
EXPORT_SYMBOL(on_each_cpu_mask);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Preemption is disabled here to make sure the cond_func is called under the
|
* Preemption is disabled here to make sure the cond_func is called under the
|
||||||
* same condtions in UP and SMP.
|
* same condtions in UP and SMP.
|
||||||
|
@ -75,7 +46,7 @@ void on_each_cpu_cond_mask(smp_cond_func_t cond_func, smp_call_func_t func,
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
|
|
||||||
preempt_disable();
|
preempt_disable();
|
||||||
if (cond_func(0, info)) {
|
if ((!cond_func || cond_func(0, info)) && cpumask_test_cpu(0, mask)) {
|
||||||
local_irq_save(flags);
|
local_irq_save(flags);
|
||||||
func(info);
|
func(info);
|
||||||
local_irq_restore(flags);
|
local_irq_restore(flags);
|
||||||
|
@ -84,13 +55,6 @@ void on_each_cpu_cond_mask(smp_cond_func_t cond_func, smp_call_func_t func,
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(on_each_cpu_cond_mask);
|
EXPORT_SYMBOL(on_each_cpu_cond_mask);
|
||||||
|
|
||||||
void on_each_cpu_cond(smp_cond_func_t cond_func, smp_call_func_t func,
|
|
||||||
void *info, bool wait)
|
|
||||||
{
|
|
||||||
on_each_cpu_cond_mask(cond_func, func, info, wait, NULL);
|
|
||||||
}
|
|
||||||
EXPORT_SYMBOL(on_each_cpu_cond);
|
|
||||||
|
|
||||||
int smp_call_on_cpu(unsigned int cpu, int (*func)(void *), void *par, bool phys)
|
int smp_call_on_cpu(unsigned int cpu, int (*func)(void *), void *par, bool phys)
|
||||||
{
|
{
|
||||||
int ret;
|
int ret;
|
||||||
|
|
Loading…
Reference in a new issue