mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2024-09-25 11:55:37 +00:00
Changes in this cycle were:
- Cleanups for SCHED_DEADLINE - Tracing updates/fixes - CPU Accounting fixes - First wave of changes to optimize the overhead of the scheduler build, from the fast-headers tree - including placeholder *_api.h headers for later header split-ups. - Preempt-dynamic using static_branch() for ARM64 - Isolation housekeeping mask rework; preperatory for further changes - NUMA-balancing: deal with CPU-less nodes - NUMA-balancing: tune systems that have multiple LLC cache domains per node (eg. AMD) - Updates to RSEQ UAPI in preparation for glibc usage - Lots of RSEQ/selftests, for same - Add Suren as PSI co-maintainer Signed-off-by: Ingo Molnar <mingo@kernel.org> -----BEGIN PGP SIGNATURE----- iQJFBAABCgAvFiEEBpT5eoXrXCwVQwEKEnMQ0APhK1gFAmI5rg8RHG1pbmdvQGtl cm5lbC5vcmcACgkQEnMQ0APhK1hGrw/+M3QOk6fH7G48wjlNnBvcOife6ls+Ni4k ixOAcF4JKoixO8HieU5vv0A7yf/83tAa6fpeXeMf1hkCGc0NSlmLtuIux+WOmoAL LzCyDEYfiP8KnVh0A1Tui/lK0+AkGo21O6ADhQE2gh8o2LpslOHQMzvtyekSzeeb mVxMYQN+QH0m518xdO2D8IQv9ctOYK0eGjmkqdNfntOlytypPZHeNel/tCzwklP/ dElJUjNiSKDlUgTBPtL3DfpoLOI/0mHF2p6NEXvNyULxSOqJTu8pv9Z2ADb2kKo1 0D56iXBDngMi9MHIJLgvzsA8gKzHLFSuPbpODDqkTZCa28vaMB9NYGhJ643NtEie IXTJEvF1rmNkcLcZlZxo0yjL0fjvPkczjw4Vj27gbrUQeEBfb4mfuI4BRmij63Ep qEkgQTJhduCqqrQP1rVyhwWZRk1JNcVug+F6N42qWW3fg1xhj0YSrLai2c9nPez6 3Zt98H8YGS1Z/JQomSw48iGXVqfTp/ETI7uU7jqHK8QcjzQ4lFK5H4GZpwuqGBZi NJJ1l97XMEas+rPHiwMEN7Z1DVhzJLCp8omEj12QU+tGLofxxwAuuOVat3CQWLRk f80Oya3TLEgd22hGIKDRmHa22vdWnNQyS0S15wJotawBzQf+n3auS9Q3/rh979+t ES/qvlGxTIs= =Z8uT -----END PGP SIGNATURE----- Merge tag 'sched-core-2022-03-22' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip Pull scheduler updates from Ingo Molnar: - Cleanups for SCHED_DEADLINE - Tracing updates/fixes - CPU Accounting fixes - First wave of changes to optimize the overhead of the scheduler build, from the fast-headers tree - including placeholder *_api.h headers for later header split-ups. - Preempt-dynamic using static_branch() for ARM64 - Isolation housekeeping mask rework; preperatory for further changes - NUMA-balancing: deal with CPU-less nodes - NUMA-balancing: tune systems that have multiple LLC cache domains per node (eg. AMD) - Updates to RSEQ UAPI in preparation for glibc usage - Lots of RSEQ/selftests, for same - Add Suren as PSI co-maintainer * tag 'sched-core-2022-03-22' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (81 commits) sched/headers: ARM needs asm/paravirt_api_clock.h too sched/numa: Fix boot crash on arm64 systems headers/prep: Fix header to build standalone: <linux/psi.h> sched/headers: Only include <linux/entry-common.h> when CONFIG_GENERIC_ENTRY=y cgroup: Fix suspicious rcu_dereference_check() usage warning sched/preempt: Tell about PREEMPT_DYNAMIC on kernel headers sched/topology: Remove redundant variable and fix incorrect type in build_sched_domains sched/deadline,rt: Remove unused parameter from pick_next_[rt|dl]_entity() sched/deadline,rt: Remove unused functions for !CONFIG_SMP sched/deadline: Use __node_2_[pdl|dle]() and rb_first_cached() consistently sched/deadline: Merge dl_task_can_attach() and dl_cpu_busy() sched/deadline: Move bandwidth mgmt and reclaim functions into sched class source file sched/deadline: Remove unused def_dl_bandwidth sched/tracing: Report TASK_RTLOCK_WAIT tasks as TASK_UNINTERRUPTIBLE sched/tracing: Don't re-read p->state when emitting sched_switch event sched/rt: Plug rt_mutex_setprio() vs push_rt_task() race sched/cpuacct: Remove redundant RCU read lock sched/cpuacct: Optimize away RCU read lock sched/cpuacct: Fix charge percpu cpuusage sched/headers: Reorganize, clean up and optimize kernel/sched/sched.h dependencies ...
This commit is contained in:
commit
3fe2f7446f
135 changed files with 2350 additions and 1312 deletions
|
@ -609,51 +609,7 @@ be migrated to a local memory node.
|
||||||
The unmapping of pages and trapping faults incur additional overhead that
|
The unmapping of pages and trapping faults incur additional overhead that
|
||||||
ideally is offset by improved memory locality but there is no universal
|
ideally is offset by improved memory locality but there is no universal
|
||||||
guarantee. If the target workload is already bound to NUMA nodes then this
|
guarantee. If the target workload is already bound to NUMA nodes then this
|
||||||
feature should be disabled. Otherwise, if the system overhead from the
|
feature should be disabled.
|
||||||
feature is too high then the rate the kernel samples for NUMA hinting
|
|
||||||
faults may be controlled by the `numa_balancing_scan_period_min_ms,
|
|
||||||
numa_balancing_scan_delay_ms, numa_balancing_scan_period_max_ms,
|
|
||||||
numa_balancing_scan_size_mb`_, and numa_balancing_settle_count sysctls.
|
|
||||||
|
|
||||||
|
|
||||||
numa_balancing_scan_period_min_ms, numa_balancing_scan_delay_ms, numa_balancing_scan_period_max_ms, numa_balancing_scan_size_mb
|
|
||||||
===============================================================================================================================
|
|
||||||
|
|
||||||
|
|
||||||
Automatic NUMA balancing scans tasks address space and unmaps pages to
|
|
||||||
detect if pages are properly placed or if the data should be migrated to a
|
|
||||||
memory node local to where the task is running. Every "scan delay" the task
|
|
||||||
scans the next "scan size" number of pages in its address space. When the
|
|
||||||
end of the address space is reached the scanner restarts from the beginning.
|
|
||||||
|
|
||||||
In combination, the "scan delay" and "scan size" determine the scan rate.
|
|
||||||
When "scan delay" decreases, the scan rate increases. The scan delay and
|
|
||||||
hence the scan rate of every task is adaptive and depends on historical
|
|
||||||
behaviour. If pages are properly placed then the scan delay increases,
|
|
||||||
otherwise the scan delay decreases. The "scan size" is not adaptive but
|
|
||||||
the higher the "scan size", the higher the scan rate.
|
|
||||||
|
|
||||||
Higher scan rates incur higher system overhead as page faults must be
|
|
||||||
trapped and potentially data must be migrated. However, the higher the scan
|
|
||||||
rate, the more quickly a tasks memory is migrated to a local node if the
|
|
||||||
workload pattern changes and minimises performance impact due to remote
|
|
||||||
memory accesses. These sysctls control the thresholds for scan delays and
|
|
||||||
the number of pages scanned.
|
|
||||||
|
|
||||||
``numa_balancing_scan_period_min_ms`` is the minimum time in milliseconds to
|
|
||||||
scan a tasks virtual memory. It effectively controls the maximum scanning
|
|
||||||
rate for each task.
|
|
||||||
|
|
||||||
``numa_balancing_scan_delay_ms`` is the starting "scan delay" used for a task
|
|
||||||
when it initially forks.
|
|
||||||
|
|
||||||
``numa_balancing_scan_period_max_ms`` is the maximum time in milliseconds to
|
|
||||||
scan a tasks virtual memory. It effectively controls the minimum scanning
|
|
||||||
rate for each task.
|
|
||||||
|
|
||||||
``numa_balancing_scan_size_mb`` is how many megabytes worth of pages are
|
|
||||||
scanned for a given scan.
|
|
||||||
|
|
||||||
|
|
||||||
oops_all_cpu_backtrace
|
oops_all_cpu_backtrace
|
||||||
======================
|
======================
|
||||||
|
|
|
@ -18,6 +18,7 @@ Linux Scheduler
|
||||||
sched-nice-design
|
sched-nice-design
|
||||||
sched-rt-group
|
sched-rt-group
|
||||||
sched-stats
|
sched-stats
|
||||||
|
sched-debug
|
||||||
|
|
||||||
text_files
|
text_files
|
||||||
|
|
||||||
|
|
54
Documentation/scheduler/sched-debug.rst
Normal file
54
Documentation/scheduler/sched-debug.rst
Normal file
|
@ -0,0 +1,54 @@
|
||||||
|
=================
|
||||||
|
Scheduler debugfs
|
||||||
|
=================
|
||||||
|
|
||||||
|
Booting a kernel with CONFIG_SCHED_DEBUG=y will give access to
|
||||||
|
scheduler specific debug files under /sys/kernel/debug/sched. Some of
|
||||||
|
those files are described below.
|
||||||
|
|
||||||
|
numa_balancing
|
||||||
|
==============
|
||||||
|
|
||||||
|
`numa_balancing` directory is used to hold files to control NUMA
|
||||||
|
balancing feature. If the system overhead from the feature is too
|
||||||
|
high then the rate the kernel samples for NUMA hinting faults may be
|
||||||
|
controlled by the `scan_period_min_ms, scan_delay_ms,
|
||||||
|
scan_period_max_ms, scan_size_mb` files.
|
||||||
|
|
||||||
|
|
||||||
|
scan_period_min_ms, scan_delay_ms, scan_period_max_ms, scan_size_mb
|
||||||
|
-------------------------------------------------------------------
|
||||||
|
|
||||||
|
Automatic NUMA balancing scans tasks address space and unmaps pages to
|
||||||
|
detect if pages are properly placed or if the data should be migrated to a
|
||||||
|
memory node local to where the task is running. Every "scan delay" the task
|
||||||
|
scans the next "scan size" number of pages in its address space. When the
|
||||||
|
end of the address space is reached the scanner restarts from the beginning.
|
||||||
|
|
||||||
|
In combination, the "scan delay" and "scan size" determine the scan rate.
|
||||||
|
When "scan delay" decreases, the scan rate increases. The scan delay and
|
||||||
|
hence the scan rate of every task is adaptive and depends on historical
|
||||||
|
behaviour. If pages are properly placed then the scan delay increases,
|
||||||
|
otherwise the scan delay decreases. The "scan size" is not adaptive but
|
||||||
|
the higher the "scan size", the higher the scan rate.
|
||||||
|
|
||||||
|
Higher scan rates incur higher system overhead as page faults must be
|
||||||
|
trapped and potentially data must be migrated. However, the higher the scan
|
||||||
|
rate, the more quickly a tasks memory is migrated to a local node if the
|
||||||
|
workload pattern changes and minimises performance impact due to remote
|
||||||
|
memory accesses. These files control the thresholds for scan delays and
|
||||||
|
the number of pages scanned.
|
||||||
|
|
||||||
|
``scan_period_min_ms`` is the minimum time in milliseconds to scan a
|
||||||
|
tasks virtual memory. It effectively controls the maximum scanning
|
||||||
|
rate for each task.
|
||||||
|
|
||||||
|
``scan_delay_ms`` is the starting "scan delay" used for a task when it
|
||||||
|
initially forks.
|
||||||
|
|
||||||
|
``scan_period_max_ms`` is the maximum time in milliseconds to scan a
|
||||||
|
tasks virtual memory. It effectively controls the minimum scanning
|
||||||
|
rate for each task.
|
||||||
|
|
||||||
|
``scan_size_mb`` is how many megabytes worth of pages are scanned for
|
||||||
|
a given scan.
|
|
@ -15566,6 +15566,7 @@ F: drivers/net/ppp/pptp.c
|
||||||
|
|
||||||
PRESSURE STALL INFORMATION (PSI)
|
PRESSURE STALL INFORMATION (PSI)
|
||||||
M: Johannes Weiner <hannes@cmpxchg.org>
|
M: Johannes Weiner <hannes@cmpxchg.org>
|
||||||
|
M: Suren Baghdasaryan <surenb@google.com>
|
||||||
S: Maintained
|
S: Maintained
|
||||||
F: include/linux/psi*
|
F: include/linux/psi*
|
||||||
F: kernel/sched/psi.c
|
F: kernel/sched/psi.c
|
||||||
|
|
37
arch/Kconfig
37
arch/Kconfig
|
@ -1293,12 +1293,41 @@ config HAVE_STATIC_CALL_INLINE
|
||||||
|
|
||||||
config HAVE_PREEMPT_DYNAMIC
|
config HAVE_PREEMPT_DYNAMIC
|
||||||
bool
|
bool
|
||||||
|
|
||||||
|
config HAVE_PREEMPT_DYNAMIC_CALL
|
||||||
|
bool
|
||||||
depends on HAVE_STATIC_CALL
|
depends on HAVE_STATIC_CALL
|
||||||
depends on GENERIC_ENTRY
|
select HAVE_PREEMPT_DYNAMIC
|
||||||
help
|
help
|
||||||
Select this if the architecture support boot time preempt setting
|
An architecture should select this if it can handle the preemption
|
||||||
on top of static calls. It is strongly advised to support inline
|
model being selected at boot time using static calls.
|
||||||
static call to avoid any overhead.
|
|
||||||
|
Where an architecture selects HAVE_STATIC_CALL_INLINE, any call to a
|
||||||
|
preemption function will be patched directly.
|
||||||
|
|
||||||
|
Where an architecture does not select HAVE_STATIC_CALL_INLINE, any
|
||||||
|
call to a preemption function will go through a trampoline, and the
|
||||||
|
trampoline will be patched.
|
||||||
|
|
||||||
|
It is strongly advised to support inline static call to avoid any
|
||||||
|
overhead.
|
||||||
|
|
||||||
|
config HAVE_PREEMPT_DYNAMIC_KEY
|
||||||
|
bool
|
||||||
|
depends on HAVE_ARCH_JUMP_LABEL && CC_HAS_ASM_GOTO
|
||||||
|
select HAVE_PREEMPT_DYNAMIC
|
||||||
|
help
|
||||||
|
An architecture should select this if it can handle the preemption
|
||||||
|
model being selected at boot time using static keys.
|
||||||
|
|
||||||
|
Each preemption function will be given an early return based on a
|
||||||
|
static key. This should have slightly lower overhead than non-inline
|
||||||
|
static calls, as this effectively inlines each trampoline into the
|
||||||
|
start of its callee. This may avoid redundant work, and may
|
||||||
|
integrate better with CFI schemes.
|
||||||
|
|
||||||
|
This will have greater overhead than using inline static calls as
|
||||||
|
the call to the preemption function cannot be entirely elided.
|
||||||
|
|
||||||
config ARCH_WANT_LD_ORPHAN_WARN
|
config ARCH_WANT_LD_ORPHAN_WARN
|
||||||
bool
|
bool
|
||||||
|
|
1
arch/arm/include/asm/paravirt_api_clock.h
Normal file
1
arch/arm/include/asm/paravirt_api_clock.h
Normal file
|
@ -0,0 +1 @@
|
||||||
|
#include <asm/paravirt.h>
|
|
@ -194,6 +194,7 @@ config ARM64
|
||||||
select HAVE_PERF_EVENTS
|
select HAVE_PERF_EVENTS
|
||||||
select HAVE_PERF_REGS
|
select HAVE_PERF_REGS
|
||||||
select HAVE_PERF_USER_STACK_DUMP
|
select HAVE_PERF_USER_STACK_DUMP
|
||||||
|
select HAVE_PREEMPT_DYNAMIC_KEY
|
||||||
select HAVE_REGS_AND_STACK_ACCESS_API
|
select HAVE_REGS_AND_STACK_ACCESS_API
|
||||||
select HAVE_POSIX_CPU_TIMERS_TASK_WORK
|
select HAVE_POSIX_CPU_TIMERS_TASK_WORK
|
||||||
select HAVE_FUNCTION_ARG_ACCESS_API
|
select HAVE_FUNCTION_ARG_ACCESS_API
|
||||||
|
|
1
arch/arm64/include/asm/paravirt_api_clock.h
Normal file
1
arch/arm64/include/asm/paravirt_api_clock.h
Normal file
|
@ -0,0 +1 @@
|
||||||
|
#include <asm/paravirt.h>
|
|
@ -2,6 +2,7 @@
|
||||||
#ifndef __ASM_PREEMPT_H
|
#ifndef __ASM_PREEMPT_H
|
||||||
#define __ASM_PREEMPT_H
|
#define __ASM_PREEMPT_H
|
||||||
|
|
||||||
|
#include <linux/jump_label.h>
|
||||||
#include <linux/thread_info.h>
|
#include <linux/thread_info.h>
|
||||||
|
|
||||||
#define PREEMPT_NEED_RESCHED BIT(32)
|
#define PREEMPT_NEED_RESCHED BIT(32)
|
||||||
|
@ -80,10 +81,24 @@ static inline bool should_resched(int preempt_offset)
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef CONFIG_PREEMPTION
|
#ifdef CONFIG_PREEMPTION
|
||||||
|
|
||||||
void preempt_schedule(void);
|
void preempt_schedule(void);
|
||||||
#define __preempt_schedule() preempt_schedule()
|
|
||||||
void preempt_schedule_notrace(void);
|
void preempt_schedule_notrace(void);
|
||||||
#define __preempt_schedule_notrace() preempt_schedule_notrace()
|
|
||||||
|
#ifdef CONFIG_PREEMPT_DYNAMIC
|
||||||
|
|
||||||
|
DECLARE_STATIC_KEY_TRUE(sk_dynamic_irqentry_exit_cond_resched);
|
||||||
|
void dynamic_preempt_schedule(void);
|
||||||
|
#define __preempt_schedule() dynamic_preempt_schedule()
|
||||||
|
void dynamic_preempt_schedule_notrace(void);
|
||||||
|
#define __preempt_schedule_notrace() dynamic_preempt_schedule_notrace()
|
||||||
|
|
||||||
|
#else /* CONFIG_PREEMPT_DYNAMIC */
|
||||||
|
|
||||||
|
#define __preempt_schedule() preempt_schedule()
|
||||||
|
#define __preempt_schedule_notrace() preempt_schedule_notrace()
|
||||||
|
|
||||||
|
#endif /* CONFIG_PREEMPT_DYNAMIC */
|
||||||
#endif /* CONFIG_PREEMPTION */
|
#endif /* CONFIG_PREEMPTION */
|
||||||
|
|
||||||
#endif /* __ASM_PREEMPT_H */
|
#endif /* __ASM_PREEMPT_H */
|
||||||
|
|
|
@ -223,9 +223,26 @@ static void noinstr arm64_exit_el1_dbg(struct pt_regs *regs)
|
||||||
lockdep_hardirqs_on(CALLER_ADDR0);
|
lockdep_hardirqs_on(CALLER_ADDR0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef CONFIG_PREEMPT_DYNAMIC
|
||||||
|
DEFINE_STATIC_KEY_TRUE(sk_dynamic_irqentry_exit_cond_resched);
|
||||||
|
#define need_irq_preemption() \
|
||||||
|
(static_branch_unlikely(&sk_dynamic_irqentry_exit_cond_resched))
|
||||||
|
#else
|
||||||
|
#define need_irq_preemption() (IS_ENABLED(CONFIG_PREEMPTION))
|
||||||
|
#endif
|
||||||
|
|
||||||
static void __sched arm64_preempt_schedule_irq(void)
|
static void __sched arm64_preempt_schedule_irq(void)
|
||||||
{
|
{
|
||||||
lockdep_assert_irqs_disabled();
|
if (!need_irq_preemption())
|
||||||
|
return;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Note: thread_info::preempt_count includes both thread_info::count
|
||||||
|
* and thread_info::need_resched, and is not equivalent to
|
||||||
|
* preempt_count().
|
||||||
|
*/
|
||||||
|
if (READ_ONCE(current_thread_info()->preempt_count) != 0)
|
||||||
|
return;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* DAIF.DA are cleared at the start of IRQ/FIQ handling, and when GIC
|
* DAIF.DA are cleared at the start of IRQ/FIQ handling, and when GIC
|
||||||
|
@ -441,14 +458,7 @@ static __always_inline void __el1_irq(struct pt_regs *regs,
|
||||||
do_interrupt_handler(regs, handler);
|
do_interrupt_handler(regs, handler);
|
||||||
irq_exit_rcu();
|
irq_exit_rcu();
|
||||||
|
|
||||||
/*
|
arm64_preempt_schedule_irq();
|
||||||
* Note: thread_info::preempt_count includes both thread_info::count
|
|
||||||
* and thread_info::need_resched, and is not equivalent to
|
|
||||||
* preempt_count().
|
|
||||||
*/
|
|
||||||
if (IS_ENABLED(CONFIG_PREEMPTION) &&
|
|
||||||
READ_ONCE(current_thread_info()->preempt_count) == 0)
|
|
||||||
arm64_preempt_schedule_irq();
|
|
||||||
|
|
||||||
exit_to_kernel_mode(regs);
|
exit_to_kernel_mode(regs);
|
||||||
}
|
}
|
||||||
|
|
|
@ -248,7 +248,7 @@ config X86
|
||||||
select HAVE_STACK_VALIDATION if X86_64
|
select HAVE_STACK_VALIDATION if X86_64
|
||||||
select HAVE_STATIC_CALL
|
select HAVE_STATIC_CALL
|
||||||
select HAVE_STATIC_CALL_INLINE if HAVE_STACK_VALIDATION
|
select HAVE_STATIC_CALL_INLINE if HAVE_STACK_VALIDATION
|
||||||
select HAVE_PREEMPT_DYNAMIC
|
select HAVE_PREEMPT_DYNAMIC_CALL
|
||||||
select HAVE_RSEQ
|
select HAVE_RSEQ
|
||||||
select HAVE_SYSCALL_TRACEPOINTS
|
select HAVE_SYSCALL_TRACEPOINTS
|
||||||
select HAVE_UNSTABLE_SCHED_CLOCK
|
select HAVE_UNSTABLE_SCHED_CLOCK
|
||||||
|
|
1
arch/x86/include/asm/paravirt_api_clock.h
Normal file
1
arch/x86/include/asm/paravirt_api_clock.h
Normal file
|
@ -0,0 +1 @@
|
||||||
|
#include <asm/paravirt.h>
|
|
@ -108,16 +108,18 @@ static __always_inline bool should_resched(int preempt_offset)
|
||||||
extern asmlinkage void preempt_schedule(void);
|
extern asmlinkage void preempt_schedule(void);
|
||||||
extern asmlinkage void preempt_schedule_thunk(void);
|
extern asmlinkage void preempt_schedule_thunk(void);
|
||||||
|
|
||||||
#define __preempt_schedule_func preempt_schedule_thunk
|
#define preempt_schedule_dynamic_enabled preempt_schedule_thunk
|
||||||
|
#define preempt_schedule_dynamic_disabled NULL
|
||||||
|
|
||||||
extern asmlinkage void preempt_schedule_notrace(void);
|
extern asmlinkage void preempt_schedule_notrace(void);
|
||||||
extern asmlinkage void preempt_schedule_notrace_thunk(void);
|
extern asmlinkage void preempt_schedule_notrace_thunk(void);
|
||||||
|
|
||||||
#define __preempt_schedule_notrace_func preempt_schedule_notrace_thunk
|
#define preempt_schedule_notrace_dynamic_enabled preempt_schedule_notrace_thunk
|
||||||
|
#define preempt_schedule_notrace_dynamic_disabled NULL
|
||||||
|
|
||||||
#ifdef CONFIG_PREEMPT_DYNAMIC
|
#ifdef CONFIG_PREEMPT_DYNAMIC
|
||||||
|
|
||||||
DECLARE_STATIC_CALL(preempt_schedule, __preempt_schedule_func);
|
DECLARE_STATIC_CALL(preempt_schedule, preempt_schedule_dynamic_enabled);
|
||||||
|
|
||||||
#define __preempt_schedule() \
|
#define __preempt_schedule() \
|
||||||
do { \
|
do { \
|
||||||
|
@ -125,7 +127,7 @@ do { \
|
||||||
asm volatile ("call " STATIC_CALL_TRAMP_STR(preempt_schedule) : ASM_CALL_CONSTRAINT); \
|
asm volatile ("call " STATIC_CALL_TRAMP_STR(preempt_schedule) : ASM_CALL_CONSTRAINT); \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
DECLARE_STATIC_CALL(preempt_schedule_notrace, __preempt_schedule_notrace_func);
|
DECLARE_STATIC_CALL(preempt_schedule_notrace, preempt_schedule_notrace_dynamic_enabled);
|
||||||
|
|
||||||
#define __preempt_schedule_notrace() \
|
#define __preempt_schedule_notrace() \
|
||||||
do { \
|
do { \
|
||||||
|
|
|
@ -91,7 +91,7 @@ unsigned int aperfmperf_get_khz(int cpu)
|
||||||
if (!boot_cpu_has(X86_FEATURE_APERFMPERF))
|
if (!boot_cpu_has(X86_FEATURE_APERFMPERF))
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
if (!housekeeping_cpu(cpu, HK_FLAG_MISC))
|
if (!housekeeping_cpu(cpu, HK_TYPE_MISC))
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
if (rcu_is_idle_cpu(cpu))
|
if (rcu_is_idle_cpu(cpu))
|
||||||
|
@ -114,7 +114,7 @@ void arch_freq_prepare_all(void)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
for_each_online_cpu(cpu) {
|
for_each_online_cpu(cpu) {
|
||||||
if (!housekeeping_cpu(cpu, HK_FLAG_MISC))
|
if (!housekeeping_cpu(cpu, HK_TYPE_MISC))
|
||||||
continue;
|
continue;
|
||||||
if (rcu_is_idle_cpu(cpu))
|
if (rcu_is_idle_cpu(cpu))
|
||||||
continue; /* Idle CPUs are completely uninteresting. */
|
continue; /* Idle CPUs are completely uninteresting. */
|
||||||
|
@ -136,7 +136,7 @@ unsigned int arch_freq_get_on_cpu(int cpu)
|
||||||
if (!boot_cpu_has(X86_FEATURE_APERFMPERF))
|
if (!boot_cpu_has(X86_FEATURE_APERFMPERF))
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
if (!housekeeping_cpu(cpu, HK_FLAG_MISC))
|
if (!housekeeping_cpu(cpu, HK_TYPE_MISC))
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
if (aperfmperf_snapshot_cpu(cpu, ktime_get(), true))
|
if (aperfmperf_snapshot_cpu(cpu, ktime_get(), true))
|
||||||
|
|
|
@ -8853,7 +8853,7 @@ int kvm_arch_init(void *opaque)
|
||||||
}
|
}
|
||||||
|
|
||||||
if (pi_inject_timer == -1)
|
if (pi_inject_timer == -1)
|
||||||
pi_inject_timer = housekeeping_enabled(HK_FLAG_TIMER);
|
pi_inject_timer = housekeeping_enabled(HK_TYPE_TIMER);
|
||||||
#ifdef CONFIG_X86_64
|
#ifdef CONFIG_X86_64
|
||||||
pvclock_gtod_register_notifier(&pvclock_gtod_notifier);
|
pvclock_gtod_register_notifier(&pvclock_gtod_notifier);
|
||||||
|
|
||||||
|
|
|
@ -275,7 +275,7 @@ static ssize_t print_cpus_isolated(struct device *dev,
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
|
|
||||||
cpumask_andnot(isolated, cpu_possible_mask,
|
cpumask_andnot(isolated, cpu_possible_mask,
|
||||||
housekeeping_cpumask(HK_FLAG_DOMAIN));
|
housekeeping_cpumask(HK_TYPE_DOMAIN));
|
||||||
len = sysfs_emit(buf, "%*pbl\n", cpumask_pr_args(isolated));
|
len = sysfs_emit(buf, "%*pbl\n", cpumask_pr_args(isolated));
|
||||||
|
|
||||||
free_cpumask_var(isolated);
|
free_cpumask_var(isolated);
|
||||||
|
|
|
@ -350,7 +350,6 @@ static int pci_call_probe(struct pci_driver *drv, struct pci_dev *dev,
|
||||||
const struct pci_device_id *id)
|
const struct pci_device_id *id)
|
||||||
{
|
{
|
||||||
int error, node, cpu;
|
int error, node, cpu;
|
||||||
int hk_flags = HK_FLAG_DOMAIN | HK_FLAG_WQ;
|
|
||||||
struct drv_dev_and_id ddi = { drv, dev, id };
|
struct drv_dev_and_id ddi = { drv, dev, id };
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -368,17 +367,29 @@ static int pci_call_probe(struct pci_driver *drv, struct pci_dev *dev,
|
||||||
* device is probed from work_on_cpu() of the Physical device.
|
* device is probed from work_on_cpu() of the Physical device.
|
||||||
*/
|
*/
|
||||||
if (node < 0 || node >= MAX_NUMNODES || !node_online(node) ||
|
if (node < 0 || node >= MAX_NUMNODES || !node_online(node) ||
|
||||||
pci_physfn_is_probed(dev))
|
pci_physfn_is_probed(dev)) {
|
||||||
cpu = nr_cpu_ids;
|
cpu = nr_cpu_ids;
|
||||||
else
|
} else {
|
||||||
|
cpumask_var_t wq_domain_mask;
|
||||||
|
|
||||||
|
if (!zalloc_cpumask_var(&wq_domain_mask, GFP_KERNEL)) {
|
||||||
|
error = -ENOMEM;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
cpumask_and(wq_domain_mask,
|
||||||
|
housekeeping_cpumask(HK_TYPE_WQ),
|
||||||
|
housekeeping_cpumask(HK_TYPE_DOMAIN));
|
||||||
|
|
||||||
cpu = cpumask_any_and(cpumask_of_node(node),
|
cpu = cpumask_any_and(cpumask_of_node(node),
|
||||||
housekeeping_cpumask(hk_flags));
|
wq_domain_mask);
|
||||||
|
free_cpumask_var(wq_domain_mask);
|
||||||
|
}
|
||||||
|
|
||||||
if (cpu < nr_cpu_ids)
|
if (cpu < nr_cpu_ids)
|
||||||
error = work_on_cpu(cpu, local_pci_probe, &ddi);
|
error = work_on_cpu(cpu, local_pci_probe, &ddi);
|
||||||
else
|
else
|
||||||
error = local_pci_probe(&ddi);
|
error = local_pci_probe(&ddi);
|
||||||
|
out:
|
||||||
dev->is_probed = 0;
|
dev->is_probed = 0;
|
||||||
cpu_hotplug_enable();
|
cpu_hotplug_enable();
|
||||||
return error;
|
return error;
|
||||||
|
|
|
@ -450,6 +450,7 @@ extern struct mutex cgroup_mutex;
|
||||||
extern spinlock_t css_set_lock;
|
extern spinlock_t css_set_lock;
|
||||||
#define task_css_set_check(task, __c) \
|
#define task_css_set_check(task, __c) \
|
||||||
rcu_dereference_check((task)->cgroups, \
|
rcu_dereference_check((task)->cgroups, \
|
||||||
|
rcu_read_lock_sched_held() || \
|
||||||
lockdep_is_held(&cgroup_mutex) || \
|
lockdep_is_held(&cgroup_mutex) || \
|
||||||
lockdep_is_held(&css_set_lock) || \
|
lockdep_is_held(&css_set_lock) || \
|
||||||
((task)->flags & PF_EXITING) || (__c))
|
((task)->flags & PF_EXITING) || (__c))
|
||||||
|
@ -791,11 +792,9 @@ static inline void cgroup_account_cputime(struct task_struct *task,
|
||||||
|
|
||||||
cpuacct_charge(task, delta_exec);
|
cpuacct_charge(task, delta_exec);
|
||||||
|
|
||||||
rcu_read_lock();
|
|
||||||
cgrp = task_dfl_cgroup(task);
|
cgrp = task_dfl_cgroup(task);
|
||||||
if (cgroup_parent(cgrp))
|
if (cgroup_parent(cgrp))
|
||||||
__cgroup_account_cputime(cgrp, delta_exec);
|
__cgroup_account_cputime(cgrp, delta_exec);
|
||||||
rcu_read_unlock();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void cgroup_account_cputime_field(struct task_struct *task,
|
static inline void cgroup_account_cputime_field(struct task_struct *task,
|
||||||
|
@ -806,11 +805,9 @@ static inline void cgroup_account_cputime_field(struct task_struct *task,
|
||||||
|
|
||||||
cpuacct_account_field(task, index, delta_exec);
|
cpuacct_account_field(task, index, delta_exec);
|
||||||
|
|
||||||
rcu_read_lock();
|
|
||||||
cgrp = task_dfl_cgroup(task);
|
cgrp = task_dfl_cgroup(task);
|
||||||
if (cgroup_parent(cgrp))
|
if (cgroup_parent(cgrp))
|
||||||
__cgroup_account_cputime_field(cgrp, index, delta_exec);
|
__cgroup_account_cputime_field(cgrp, index, delta_exec);
|
||||||
rcu_read_unlock();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#else /* CONFIG_CGROUPS */
|
#else /* CONFIG_CGROUPS */
|
||||||
|
|
1
include/linux/cgroup_api.h
Normal file
1
include/linux/cgroup_api.h
Normal file
|
@ -0,0 +1 @@
|
||||||
|
#include <linux/cgroup.h>
|
1
include/linux/cpumask_api.h
Normal file
1
include/linux/cpumask_api.h
Normal file
|
@ -0,0 +1 @@
|
||||||
|
#include <linux/cpumask.h>
|
|
@ -454,10 +454,21 @@ irqentry_state_t noinstr irqentry_enter(struct pt_regs *regs);
|
||||||
*
|
*
|
||||||
* Conditional reschedule with additional sanity checks.
|
* Conditional reschedule with additional sanity checks.
|
||||||
*/
|
*/
|
||||||
void irqentry_exit_cond_resched(void);
|
void raw_irqentry_exit_cond_resched(void);
|
||||||
#ifdef CONFIG_PREEMPT_DYNAMIC
|
#ifdef CONFIG_PREEMPT_DYNAMIC
|
||||||
DECLARE_STATIC_CALL(irqentry_exit_cond_resched, irqentry_exit_cond_resched);
|
#if defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL)
|
||||||
|
#define irqentry_exit_cond_resched_dynamic_enabled raw_irqentry_exit_cond_resched
|
||||||
|
#define irqentry_exit_cond_resched_dynamic_disabled NULL
|
||||||
|
DECLARE_STATIC_CALL(irqentry_exit_cond_resched, raw_irqentry_exit_cond_resched);
|
||||||
|
#define irqentry_exit_cond_resched() static_call(irqentry_exit_cond_resched)()
|
||||||
|
#elif defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY)
|
||||||
|
DECLARE_STATIC_KEY_TRUE(sk_dynamic_irqentry_exit_cond_resched);
|
||||||
|
void dynamic_irqentry_exit_cond_resched(void);
|
||||||
|
#define irqentry_exit_cond_resched() dynamic_irqentry_exit_cond_resched()
|
||||||
#endif
|
#endif
|
||||||
|
#else /* CONFIG_PREEMPT_DYNAMIC */
|
||||||
|
#define irqentry_exit_cond_resched() raw_irqentry_exit_cond_resched()
|
||||||
|
#endif /* CONFIG_PREEMPT_DYNAMIC */
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* irqentry_exit - Handle return from exception that used irqentry_enter()
|
* irqentry_exit - Handle return from exception that used irqentry_enter()
|
||||||
|
|
1
include/linux/fs_api.h
Normal file
1
include/linux/fs_api.h
Normal file
|
@ -0,0 +1 @@
|
||||||
|
#include <linux/fs.h>
|
1
include/linux/gfp_api.h
Normal file
1
include/linux/gfp_api.h
Normal file
|
@ -0,0 +1 @@
|
||||||
|
#include <linux/gfp.h>
|
1
include/linux/hashtable_api.h
Normal file
1
include/linux/hashtable_api.h
Normal file
|
@ -0,0 +1 @@
|
||||||
|
#include <linux/hashtable.h>
|
1
include/linux/hrtimer_api.h
Normal file
1
include/linux/hrtimer_api.h
Normal file
|
@ -0,0 +1 @@
|
||||||
|
#include <linux/hrtimer.h>
|
|
@ -99,7 +99,7 @@ struct user;
|
||||||
extern int __cond_resched(void);
|
extern int __cond_resched(void);
|
||||||
# define might_resched() __cond_resched()
|
# define might_resched() __cond_resched()
|
||||||
|
|
||||||
#elif defined(CONFIG_PREEMPT_DYNAMIC)
|
#elif defined(CONFIG_PREEMPT_DYNAMIC) && defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL)
|
||||||
|
|
||||||
extern int __cond_resched(void);
|
extern int __cond_resched(void);
|
||||||
|
|
||||||
|
@ -110,6 +110,11 @@ static __always_inline void might_resched(void)
|
||||||
static_call_mod(might_resched)();
|
static_call_mod(might_resched)();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#elif defined(CONFIG_PREEMPT_DYNAMIC) && defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY)
|
||||||
|
|
||||||
|
extern int dynamic_might_resched(void);
|
||||||
|
# define might_resched() dynamic_might_resched()
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
# define might_resched() do { } while (0)
|
# define might_resched() do { } while (0)
|
||||||
|
|
1
include/linux/kobject_api.h
Normal file
1
include/linux/kobject_api.h
Normal file
|
@ -0,0 +1 @@
|
||||||
|
#include <linux/kobject.h>
|
1
include/linux/kref_api.h
Normal file
1
include/linux/kref_api.h
Normal file
|
@ -0,0 +1 @@
|
||||||
|
#include <linux/kref.h>
|
1
include/linux/ktime_api.h
Normal file
1
include/linux/ktime_api.h
Normal file
|
@ -0,0 +1 @@
|
||||||
|
#include <linux/ktime.h>
|
1
include/linux/llist_api.h
Normal file
1
include/linux/llist_api.h
Normal file
|
@ -0,0 +1 @@
|
||||||
|
#include <linux/llist.h>
|
1
include/linux/lockdep_api.h
Normal file
1
include/linux/lockdep_api.h
Normal file
|
@ -0,0 +1 @@
|
||||||
|
#include <linux/lockdep.h>
|
1
include/linux/mm_api.h
Normal file
1
include/linux/mm_api.h
Normal file
|
@ -0,0 +1 @@
|
||||||
|
#include <linux/mm.h>
|
1
include/linux/mutex_api.h
Normal file
1
include/linux/mutex_api.h
Normal file
|
@ -0,0 +1 @@
|
||||||
|
#include <linux/mutex.h>
|
1
include/linux/perf_event_api.h
Normal file
1
include/linux/perf_event_api.h
Normal file
|
@ -0,0 +1 @@
|
||||||
|
#include <linux/perf_event.h>
|
1
include/linux/pgtable_api.h
Normal file
1
include/linux/pgtable_api.h
Normal file
|
@ -0,0 +1 @@
|
||||||
|
#include <linux/pgtable.h>
|
|
@ -6,6 +6,7 @@
|
||||||
#include <linux/psi_types.h>
|
#include <linux/psi_types.h>
|
||||||
#include <linux/sched.h>
|
#include <linux/sched.h>
|
||||||
#include <linux/poll.h>
|
#include <linux/poll.h>
|
||||||
|
#include <linux/cgroup-defs.h>
|
||||||
|
|
||||||
struct seq_file;
|
struct seq_file;
|
||||||
struct css_set;
|
struct css_set;
|
||||||
|
|
|
@ -141,6 +141,9 @@ struct psi_trigger {
|
||||||
* events to one per window
|
* events to one per window
|
||||||
*/
|
*/
|
||||||
u64 last_event_time;
|
u64 last_event_time;
|
||||||
|
|
||||||
|
/* Deferred event(s) from previous ratelimit window */
|
||||||
|
bool pending_event;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct psi_group {
|
struct psi_group {
|
||||||
|
|
1
include/linux/ptrace_api.h
Normal file
1
include/linux/ptrace_api.h
Normal file
|
@ -0,0 +1 @@
|
||||||
|
#include <linux/ptrace.h>
|
1
include/linux/rcuwait_api.h
Normal file
1
include/linux/rcuwait_api.h
Normal file
|
@ -0,0 +1 @@
|
||||||
|
#include <linux/rcuwait.h>
|
1
include/linux/refcount_api.h
Normal file
1
include/linux/refcount_api.h
Normal file
|
@ -0,0 +1 @@
|
||||||
|
#include <linux/refcount.h>
|
|
@ -1626,19 +1626,32 @@ static inline pid_t task_pgrp_nr(struct task_struct *tsk)
|
||||||
#define TASK_REPORT_IDLE (TASK_REPORT + 1)
|
#define TASK_REPORT_IDLE (TASK_REPORT + 1)
|
||||||
#define TASK_REPORT_MAX (TASK_REPORT_IDLE << 1)
|
#define TASK_REPORT_MAX (TASK_REPORT_IDLE << 1)
|
||||||
|
|
||||||
static inline unsigned int task_state_index(struct task_struct *tsk)
|
static inline unsigned int __task_state_index(unsigned int tsk_state,
|
||||||
|
unsigned int tsk_exit_state)
|
||||||
{
|
{
|
||||||
unsigned int tsk_state = READ_ONCE(tsk->__state);
|
unsigned int state = (tsk_state | tsk_exit_state) & TASK_REPORT;
|
||||||
unsigned int state = (tsk_state | tsk->exit_state) & TASK_REPORT;
|
|
||||||
|
|
||||||
BUILD_BUG_ON_NOT_POWER_OF_2(TASK_REPORT_MAX);
|
BUILD_BUG_ON_NOT_POWER_OF_2(TASK_REPORT_MAX);
|
||||||
|
|
||||||
if (tsk_state == TASK_IDLE)
|
if (tsk_state == TASK_IDLE)
|
||||||
state = TASK_REPORT_IDLE;
|
state = TASK_REPORT_IDLE;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We're lying here, but rather than expose a completely new task state
|
||||||
|
* to userspace, we can make this appear as if the task has gone through
|
||||||
|
* a regular rt_mutex_lock() call.
|
||||||
|
*/
|
||||||
|
if (tsk_state == TASK_RTLOCK_WAIT)
|
||||||
|
state = TASK_UNINTERRUPTIBLE;
|
||||||
|
|
||||||
return fls(state);
|
return fls(state);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline unsigned int task_state_index(struct task_struct *tsk)
|
||||||
|
{
|
||||||
|
return __task_state_index(READ_ONCE(tsk->__state), tsk->exit_state);
|
||||||
|
}
|
||||||
|
|
||||||
static inline char task_index_to_char(unsigned int state)
|
static inline char task_index_to_char(unsigned int state)
|
||||||
{
|
{
|
||||||
static const char state_char[] = "RSDTtXZPI";
|
static const char state_char[] = "RSDTtXZPI";
|
||||||
|
@ -2021,7 +2034,7 @@ static inline int test_tsk_need_resched(struct task_struct *tsk)
|
||||||
#if !defined(CONFIG_PREEMPTION) || defined(CONFIG_PREEMPT_DYNAMIC)
|
#if !defined(CONFIG_PREEMPTION) || defined(CONFIG_PREEMPT_DYNAMIC)
|
||||||
extern int __cond_resched(void);
|
extern int __cond_resched(void);
|
||||||
|
|
||||||
#ifdef CONFIG_PREEMPT_DYNAMIC
|
#if defined(CONFIG_PREEMPT_DYNAMIC) && defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL)
|
||||||
|
|
||||||
DECLARE_STATIC_CALL(cond_resched, __cond_resched);
|
DECLARE_STATIC_CALL(cond_resched, __cond_resched);
|
||||||
|
|
||||||
|
@ -2030,6 +2043,14 @@ static __always_inline int _cond_resched(void)
|
||||||
return static_call_mod(cond_resched)();
|
return static_call_mod(cond_resched)();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#elif defined(CONFIG_PREEMPT_DYNAMIC) && defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY)
|
||||||
|
extern int dynamic_cond_resched(void);
|
||||||
|
|
||||||
|
static __always_inline int _cond_resched(void)
|
||||||
|
{
|
||||||
|
return dynamic_cond_resched();
|
||||||
|
}
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
static inline int _cond_resched(void)
|
static inline int _cond_resched(void)
|
||||||
|
|
1
include/linux/sched/affinity.h
Normal file
1
include/linux/sched/affinity.h
Normal file
|
@ -0,0 +1 @@
|
||||||
|
#include <linux/sched.h>
|
1
include/linux/sched/cond_resched.h
Normal file
1
include/linux/sched/cond_resched.h
Normal file
|
@ -0,0 +1 @@
|
||||||
|
#include <linux/sched.h>
|
|
@ -6,6 +6,8 @@
|
||||||
* NORMAL/BATCH tasks.
|
* NORMAL/BATCH tasks.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#include <linux/sched.h>
|
||||||
|
|
||||||
#define MAX_DL_PRIO 0
|
#define MAX_DL_PRIO 0
|
||||||
|
|
||||||
static inline int dl_prio(int prio)
|
static inline int dl_prio(int prio)
|
||||||
|
|
|
@ -5,54 +5,55 @@
|
||||||
#include <linux/init.h>
|
#include <linux/init.h>
|
||||||
#include <linux/tick.h>
|
#include <linux/tick.h>
|
||||||
|
|
||||||
enum hk_flags {
|
enum hk_type {
|
||||||
HK_FLAG_TIMER = 1,
|
HK_TYPE_TIMER,
|
||||||
HK_FLAG_RCU = (1 << 1),
|
HK_TYPE_RCU,
|
||||||
HK_FLAG_MISC = (1 << 2),
|
HK_TYPE_MISC,
|
||||||
HK_FLAG_SCHED = (1 << 3),
|
HK_TYPE_SCHED,
|
||||||
HK_FLAG_TICK = (1 << 4),
|
HK_TYPE_TICK,
|
||||||
HK_FLAG_DOMAIN = (1 << 5),
|
HK_TYPE_DOMAIN,
|
||||||
HK_FLAG_WQ = (1 << 6),
|
HK_TYPE_WQ,
|
||||||
HK_FLAG_MANAGED_IRQ = (1 << 7),
|
HK_TYPE_MANAGED_IRQ,
|
||||||
HK_FLAG_KTHREAD = (1 << 8),
|
HK_TYPE_KTHREAD,
|
||||||
|
HK_TYPE_MAX
|
||||||
};
|
};
|
||||||
|
|
||||||
#ifdef CONFIG_CPU_ISOLATION
|
#ifdef CONFIG_CPU_ISOLATION
|
||||||
DECLARE_STATIC_KEY_FALSE(housekeeping_overridden);
|
DECLARE_STATIC_KEY_FALSE(housekeeping_overridden);
|
||||||
extern int housekeeping_any_cpu(enum hk_flags flags);
|
extern int housekeeping_any_cpu(enum hk_type type);
|
||||||
extern const struct cpumask *housekeeping_cpumask(enum hk_flags flags);
|
extern const struct cpumask *housekeeping_cpumask(enum hk_type type);
|
||||||
extern bool housekeeping_enabled(enum hk_flags flags);
|
extern bool housekeeping_enabled(enum hk_type type);
|
||||||
extern void housekeeping_affine(struct task_struct *t, enum hk_flags flags);
|
extern void housekeeping_affine(struct task_struct *t, enum hk_type type);
|
||||||
extern bool housekeeping_test_cpu(int cpu, enum hk_flags flags);
|
extern bool housekeeping_test_cpu(int cpu, enum hk_type type);
|
||||||
extern void __init housekeeping_init(void);
|
extern void __init housekeeping_init(void);
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
static inline int housekeeping_any_cpu(enum hk_flags flags)
|
static inline int housekeeping_any_cpu(enum hk_type type)
|
||||||
{
|
{
|
||||||
return smp_processor_id();
|
return smp_processor_id();
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline const struct cpumask *housekeeping_cpumask(enum hk_flags flags)
|
static inline const struct cpumask *housekeeping_cpumask(enum hk_type type)
|
||||||
{
|
{
|
||||||
return cpu_possible_mask;
|
return cpu_possible_mask;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline bool housekeeping_enabled(enum hk_flags flags)
|
static inline bool housekeeping_enabled(enum hk_type type)
|
||||||
{
|
{
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void housekeeping_affine(struct task_struct *t,
|
static inline void housekeeping_affine(struct task_struct *t,
|
||||||
enum hk_flags flags) { }
|
enum hk_type type) { }
|
||||||
static inline void housekeeping_init(void) { }
|
static inline void housekeeping_init(void) { }
|
||||||
#endif /* CONFIG_CPU_ISOLATION */
|
#endif /* CONFIG_CPU_ISOLATION */
|
||||||
|
|
||||||
static inline bool housekeeping_cpu(int cpu, enum hk_flags flags)
|
static inline bool housekeeping_cpu(int cpu, enum hk_type type)
|
||||||
{
|
{
|
||||||
#ifdef CONFIG_CPU_ISOLATION
|
#ifdef CONFIG_CPU_ISOLATION
|
||||||
if (static_branch_unlikely(&housekeeping_overridden))
|
if (static_branch_unlikely(&housekeeping_overridden))
|
||||||
return housekeeping_test_cpu(cpu, flags);
|
return housekeeping_test_cpu(cpu, type);
|
||||||
#endif
|
#endif
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
1
include/linux/sched/posix-timers.h
Normal file
1
include/linux/sched/posix-timers.h
Normal file
|
@ -0,0 +1 @@
|
||||||
|
#include <linux/posix-timers.h>
|
1
include/linux/sched/rseq_api.h
Normal file
1
include/linux/sched/rseq_api.h
Normal file
|
@ -0,0 +1 @@
|
||||||
|
#include <linux/rseq.h>
|
|
@ -45,10 +45,6 @@ extern unsigned int sysctl_sched_uclamp_util_min_rt_default;
|
||||||
extern unsigned int sysctl_sched_cfs_bandwidth_slice;
|
extern unsigned int sysctl_sched_cfs_bandwidth_slice;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef CONFIG_SCHED_AUTOGROUP
|
|
||||||
extern unsigned int sysctl_sched_autogroup_enabled;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
extern int sysctl_sched_rr_timeslice;
|
extern int sysctl_sched_rr_timeslice;
|
||||||
extern int sched_rr_timeslice;
|
extern int sched_rr_timeslice;
|
||||||
|
|
||||||
|
|
1
include/linux/sched/task_flags.h
Normal file
1
include/linux/sched/task_flags.h
Normal file
|
@ -0,0 +1 @@
|
||||||
|
#include <linux/sched.h>
|
1
include/linux/sched/thread_info_api.h
Normal file
1
include/linux/sched/thread_info_api.h
Normal file
|
@ -0,0 +1 @@
|
||||||
|
#include <linux/thread_info.h>
|
|
@ -93,6 +93,7 @@ struct sched_domain {
|
||||||
unsigned int busy_factor; /* less balancing by factor if busy */
|
unsigned int busy_factor; /* less balancing by factor if busy */
|
||||||
unsigned int imbalance_pct; /* No balance until over watermark */
|
unsigned int imbalance_pct; /* No balance until over watermark */
|
||||||
unsigned int cache_nice_tries; /* Leave cache hot tasks for # tries */
|
unsigned int cache_nice_tries; /* Leave cache hot tasks for # tries */
|
||||||
|
unsigned int imb_numa_nr; /* Nr running tasks that allows a NUMA imbalance */
|
||||||
|
|
||||||
int nohz_idle; /* NOHZ IDLE status */
|
int nohz_idle; /* NOHZ IDLE status */
|
||||||
int flags; /* See SD_* */
|
int flags; /* See SD_* */
|
||||||
|
|
|
@ -5,6 +5,8 @@
|
||||||
#ifndef LINUX_SCHED_CLOCK
|
#ifndef LINUX_SCHED_CLOCK
|
||||||
#define LINUX_SCHED_CLOCK
|
#define LINUX_SCHED_CLOCK
|
||||||
|
|
||||||
|
#include <linux/types.h>
|
||||||
|
|
||||||
#ifdef CONFIG_GENERIC_SCHED_CLOCK
|
#ifdef CONFIG_GENERIC_SCHED_CLOCK
|
||||||
/**
|
/**
|
||||||
* struct clock_read_data - data required to read from sched_clock()
|
* struct clock_read_data - data required to read from sched_clock()
|
||||||
|
|
1
include/linux/seqlock_api.h
Normal file
1
include/linux/seqlock_api.h
Normal file
|
@ -0,0 +1 @@
|
||||||
|
#include <linux/seqlock.h>
|
1
include/linux/softirq.h
Normal file
1
include/linux/softirq.h
Normal file
|
@ -0,0 +1 @@
|
||||||
|
#include <linux/interrupt.h>
|
1
include/linux/spinlock_api.h
Normal file
1
include/linux/spinlock_api.h
Normal file
|
@ -0,0 +1 @@
|
||||||
|
#include <linux/spinlock.h>
|
1
include/linux/swait_api.h
Normal file
1
include/linux/swait_api.h
Normal file
|
@ -0,0 +1 @@
|
||||||
|
#include <linux/swait.h>
|
1
include/linux/syscalls_api.h
Normal file
1
include/linux/syscalls_api.h
Normal file
|
@ -0,0 +1 @@
|
||||||
|
#include <linux/syscalls.h>
|
1
include/linux/u64_stats_sync_api.h
Normal file
1
include/linux/u64_stats_sync_api.h
Normal file
|
@ -0,0 +1 @@
|
||||||
|
#include <linux/u64_stats_sync.h>
|
1
include/linux/wait_api.h
Normal file
1
include/linux/wait_api.h
Normal file
|
@ -0,0 +1 @@
|
||||||
|
#include <linux/wait.h>
|
1
include/linux/workqueue_api.h
Normal file
1
include/linux/workqueue_api.h
Normal file
|
@ -0,0 +1 @@
|
||||||
|
#include <linux/workqueue.h>
|
|
@ -187,7 +187,9 @@ DEFINE_EVENT(sched_wakeup_template, sched_wakeup_new,
|
||||||
TP_ARGS(p));
|
TP_ARGS(p));
|
||||||
|
|
||||||
#ifdef CREATE_TRACE_POINTS
|
#ifdef CREATE_TRACE_POINTS
|
||||||
static inline long __trace_sched_switch_state(bool preempt, struct task_struct *p)
|
static inline long __trace_sched_switch_state(bool preempt,
|
||||||
|
unsigned int prev_state,
|
||||||
|
struct task_struct *p)
|
||||||
{
|
{
|
||||||
unsigned int state;
|
unsigned int state;
|
||||||
|
|
||||||
|
@ -208,7 +210,7 @@ static inline long __trace_sched_switch_state(bool preempt, struct task_struct *
|
||||||
* it for left shift operation to get the correct task->state
|
* it for left shift operation to get the correct task->state
|
||||||
* mapping.
|
* mapping.
|
||||||
*/
|
*/
|
||||||
state = task_state_index(p);
|
state = __task_state_index(prev_state, p->exit_state);
|
||||||
|
|
||||||
return state ? (1 << (state - 1)) : state;
|
return state ? (1 << (state - 1)) : state;
|
||||||
}
|
}
|
||||||
|
@ -220,10 +222,11 @@ static inline long __trace_sched_switch_state(bool preempt, struct task_struct *
|
||||||
TRACE_EVENT(sched_switch,
|
TRACE_EVENT(sched_switch,
|
||||||
|
|
||||||
TP_PROTO(bool preempt,
|
TP_PROTO(bool preempt,
|
||||||
|
unsigned int prev_state,
|
||||||
struct task_struct *prev,
|
struct task_struct *prev,
|
||||||
struct task_struct *next),
|
struct task_struct *next),
|
||||||
|
|
||||||
TP_ARGS(preempt, prev, next),
|
TP_ARGS(preempt, prev_state, prev, next),
|
||||||
|
|
||||||
TP_STRUCT__entry(
|
TP_STRUCT__entry(
|
||||||
__array( char, prev_comm, TASK_COMM_LEN )
|
__array( char, prev_comm, TASK_COMM_LEN )
|
||||||
|
@ -239,7 +242,7 @@ TRACE_EVENT(sched_switch,
|
||||||
memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN);
|
memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN);
|
||||||
__entry->prev_pid = prev->pid;
|
__entry->prev_pid = prev->pid;
|
||||||
__entry->prev_prio = prev->prio;
|
__entry->prev_prio = prev->prio;
|
||||||
__entry->prev_state = __trace_sched_switch_state(preempt, prev);
|
__entry->prev_state = __trace_sched_switch_state(preempt, prev_state, prev);
|
||||||
memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN);
|
memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN);
|
||||||
__entry->next_pid = next->pid;
|
__entry->next_pid = next->pid;
|
||||||
__entry->next_prio = next->prio;
|
__entry->next_prio = next->prio;
|
||||||
|
|
|
@ -105,23 +105,11 @@ struct rseq {
|
||||||
* Read and set by the kernel. Set by user-space with single-copy
|
* Read and set by the kernel. Set by user-space with single-copy
|
||||||
* atomicity semantics. This field should only be updated by the
|
* atomicity semantics. This field should only be updated by the
|
||||||
* thread which registered this data structure. Aligned on 64-bit.
|
* thread which registered this data structure. Aligned on 64-bit.
|
||||||
|
*
|
||||||
|
* 32-bit architectures should update the low order bits of the
|
||||||
|
* rseq_cs field, leaving the high order bits initialized to 0.
|
||||||
*/
|
*/
|
||||||
union {
|
__u64 rseq_cs;
|
||||||
__u64 ptr64;
|
|
||||||
#ifdef __LP64__
|
|
||||||
__u64 ptr;
|
|
||||||
#else
|
|
||||||
struct {
|
|
||||||
#if (defined(__BYTE_ORDER) && (__BYTE_ORDER == __BIG_ENDIAN)) || defined(__BIG_ENDIAN)
|
|
||||||
__u32 padding; /* Initialized to zero. */
|
|
||||||
__u32 ptr32;
|
|
||||||
#else /* LITTLE */
|
|
||||||
__u32 ptr32;
|
|
||||||
__u32 padding; /* Initialized to zero. */
|
|
||||||
#endif /* ENDIAN */
|
|
||||||
} ptr;
|
|
||||||
#endif
|
|
||||||
} rseq_cs;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Restartable sequences flags field.
|
* Restartable sequences flags field.
|
||||||
|
|
|
@ -31,7 +31,8 @@ quiet_cmd_compile.h = CHK $@
|
||||||
cmd_compile.h = \
|
cmd_compile.h = \
|
||||||
$(CONFIG_SHELL) $(srctree)/scripts/mkcompile_h $@ \
|
$(CONFIG_SHELL) $(srctree)/scripts/mkcompile_h $@ \
|
||||||
"$(UTS_MACHINE)" "$(CONFIG_SMP)" "$(CONFIG_PREEMPT_BUILD)" \
|
"$(UTS_MACHINE)" "$(CONFIG_SMP)" "$(CONFIG_PREEMPT_BUILD)" \
|
||||||
"$(CONFIG_PREEMPT_RT)" "$(CONFIG_CC_VERSION_TEXT)" "$(LD)"
|
"$(CONFIG_PREEMPT_DYNAMIC)" "$(CONFIG_PREEMPT_RT)" \
|
||||||
|
"$(CONFIG_CC_VERSION_TEXT)" "$(LD)"
|
||||||
|
|
||||||
include/generated/compile.h: FORCE
|
include/generated/compile.h: FORCE
|
||||||
$(call cmd,compile.h)
|
$(call cmd,compile.h)
|
||||||
|
|
|
@ -96,8 +96,9 @@ config PREEMPTION
|
||||||
config PREEMPT_DYNAMIC
|
config PREEMPT_DYNAMIC
|
||||||
bool "Preemption behaviour defined on boot"
|
bool "Preemption behaviour defined on boot"
|
||||||
depends on HAVE_PREEMPT_DYNAMIC && !PREEMPT_RT
|
depends on HAVE_PREEMPT_DYNAMIC && !PREEMPT_RT
|
||||||
|
select JUMP_LABEL if HAVE_PREEMPT_DYNAMIC_KEY
|
||||||
select PREEMPT_BUILD
|
select PREEMPT_BUILD
|
||||||
default y
|
default y if HAVE_PREEMPT_DYNAMIC_CALL
|
||||||
help
|
help
|
||||||
This option allows to define the preemption model on the kernel
|
This option allows to define the preemption model on the kernel
|
||||||
command line parameter and thus override the default preemption
|
command line parameter and thus override the default preemption
|
||||||
|
|
|
@ -833,7 +833,7 @@ static int generate_sched_domains(cpumask_var_t **domains,
|
||||||
update_domain_attr_tree(dattr, &top_cpuset);
|
update_domain_attr_tree(dattr, &top_cpuset);
|
||||||
}
|
}
|
||||||
cpumask_and(doms[0], top_cpuset.effective_cpus,
|
cpumask_and(doms[0], top_cpuset.effective_cpus,
|
||||||
housekeeping_cpumask(HK_FLAG_DOMAIN));
|
housekeeping_cpumask(HK_TYPE_DOMAIN));
|
||||||
|
|
||||||
goto done;
|
goto done;
|
||||||
}
|
}
|
||||||
|
@ -863,7 +863,7 @@ static int generate_sched_domains(cpumask_var_t **domains,
|
||||||
if (!cpumask_empty(cp->cpus_allowed) &&
|
if (!cpumask_empty(cp->cpus_allowed) &&
|
||||||
!(is_sched_load_balance(cp) &&
|
!(is_sched_load_balance(cp) &&
|
||||||
cpumask_intersects(cp->cpus_allowed,
|
cpumask_intersects(cp->cpus_allowed,
|
||||||
housekeeping_cpumask(HK_FLAG_DOMAIN))))
|
housekeeping_cpumask(HK_TYPE_DOMAIN))))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
if (root_load_balance &&
|
if (root_load_balance &&
|
||||||
|
@ -952,7 +952,7 @@ static int generate_sched_domains(cpumask_var_t **domains,
|
||||||
|
|
||||||
if (apn == b->pn) {
|
if (apn == b->pn) {
|
||||||
cpumask_or(dp, dp, b->effective_cpus);
|
cpumask_or(dp, dp, b->effective_cpus);
|
||||||
cpumask_and(dp, dp, housekeeping_cpumask(HK_FLAG_DOMAIN));
|
cpumask_and(dp, dp, housekeeping_cpumask(HK_TYPE_DOMAIN));
|
||||||
if (dattr)
|
if (dattr)
|
||||||
update_domain_attr_tree(dattr + nslot, b);
|
update_domain_attr_tree(dattr + nslot, b);
|
||||||
|
|
||||||
|
|
|
@ -1489,8 +1489,8 @@ int freeze_secondary_cpus(int primary)
|
||||||
cpu_maps_update_begin();
|
cpu_maps_update_begin();
|
||||||
if (primary == -1) {
|
if (primary == -1) {
|
||||||
primary = cpumask_first(cpu_online_mask);
|
primary = cpumask_first(cpu_online_mask);
|
||||||
if (!housekeeping_cpu(primary, HK_FLAG_TIMER))
|
if (!housekeeping_cpu(primary, HK_TYPE_TIMER))
|
||||||
primary = housekeeping_any_cpu(HK_FLAG_TIMER);
|
primary = housekeeping_any_cpu(HK_TYPE_TIMER);
|
||||||
} else {
|
} else {
|
||||||
if (!cpu_online(primary))
|
if (!cpu_online(primary))
|
||||||
primary = cpumask_first(cpu_online_mask);
|
primary = cpumask_first(cpu_online_mask);
|
||||||
|
|
|
@ -3,6 +3,7 @@
|
||||||
#include <linux/context_tracking.h>
|
#include <linux/context_tracking.h>
|
||||||
#include <linux/entry-common.h>
|
#include <linux/entry-common.h>
|
||||||
#include <linux/highmem.h>
|
#include <linux/highmem.h>
|
||||||
|
#include <linux/jump_label.h>
|
||||||
#include <linux/livepatch.h>
|
#include <linux/livepatch.h>
|
||||||
#include <linux/audit.h>
|
#include <linux/audit.h>
|
||||||
#include <linux/tick.h>
|
#include <linux/tick.h>
|
||||||
|
@ -394,7 +395,7 @@ noinstr irqentry_state_t irqentry_enter(struct pt_regs *regs)
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
void irqentry_exit_cond_resched(void)
|
void raw_irqentry_exit_cond_resched(void)
|
||||||
{
|
{
|
||||||
if (!preempt_count()) {
|
if (!preempt_count()) {
|
||||||
/* Sanity check RCU and thread stack */
|
/* Sanity check RCU and thread stack */
|
||||||
|
@ -406,7 +407,17 @@ void irqentry_exit_cond_resched(void)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#ifdef CONFIG_PREEMPT_DYNAMIC
|
#ifdef CONFIG_PREEMPT_DYNAMIC
|
||||||
DEFINE_STATIC_CALL(irqentry_exit_cond_resched, irqentry_exit_cond_resched);
|
#if defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL)
|
||||||
|
DEFINE_STATIC_CALL(irqentry_exit_cond_resched, raw_irqentry_exit_cond_resched);
|
||||||
|
#elif defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY)
|
||||||
|
DEFINE_STATIC_KEY_TRUE(sk_dynamic_irqentry_exit_cond_resched);
|
||||||
|
void dynamic_irqentry_exit_cond_resched(void)
|
||||||
|
{
|
||||||
|
if (!static_key_unlikely(&sk_dynamic_irqentry_exit_cond_resched))
|
||||||
|
return;
|
||||||
|
raw_irqentry_exit_cond_resched();
|
||||||
|
}
|
||||||
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
noinstr void irqentry_exit(struct pt_regs *regs, irqentry_state_t state)
|
noinstr void irqentry_exit(struct pt_regs *regs, irqentry_state_t state)
|
||||||
|
@ -434,13 +445,9 @@ noinstr void irqentry_exit(struct pt_regs *regs, irqentry_state_t state)
|
||||||
}
|
}
|
||||||
|
|
||||||
instrumentation_begin();
|
instrumentation_begin();
|
||||||
if (IS_ENABLED(CONFIG_PREEMPTION)) {
|
if (IS_ENABLED(CONFIG_PREEMPTION))
|
||||||
#ifdef CONFIG_PREEMPT_DYNAMIC
|
|
||||||
static_call(irqentry_exit_cond_resched)();
|
|
||||||
#else
|
|
||||||
irqentry_exit_cond_resched();
|
irqentry_exit_cond_resched();
|
||||||
#endif
|
|
||||||
}
|
|
||||||
/* Covers both tracing and lockdep */
|
/* Covers both tracing and lockdep */
|
||||||
trace_hardirqs_on();
|
trace_hardirqs_on();
|
||||||
instrumentation_end();
|
instrumentation_end();
|
||||||
|
|
|
@ -176,10 +176,10 @@ static bool hk_should_isolate(struct irq_data *data, unsigned int cpu)
|
||||||
{
|
{
|
||||||
const struct cpumask *hk_mask;
|
const struct cpumask *hk_mask;
|
||||||
|
|
||||||
if (!housekeeping_enabled(HK_FLAG_MANAGED_IRQ))
|
if (!housekeeping_enabled(HK_TYPE_MANAGED_IRQ))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
hk_mask = housekeeping_cpumask(HK_FLAG_MANAGED_IRQ);
|
hk_mask = housekeeping_cpumask(HK_TYPE_MANAGED_IRQ);
|
||||||
if (cpumask_subset(irq_data_get_effective_affinity_mask(data), hk_mask))
|
if (cpumask_subset(irq_data_get_effective_affinity_mask(data), hk_mask))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
|
|
@ -247,13 +247,13 @@ int irq_do_set_affinity(struct irq_data *data, const struct cpumask *mask,
|
||||||
* online.
|
* online.
|
||||||
*/
|
*/
|
||||||
if (irqd_affinity_is_managed(data) &&
|
if (irqd_affinity_is_managed(data) &&
|
||||||
housekeeping_enabled(HK_FLAG_MANAGED_IRQ)) {
|
housekeeping_enabled(HK_TYPE_MANAGED_IRQ)) {
|
||||||
const struct cpumask *hk_mask, *prog_mask;
|
const struct cpumask *hk_mask, *prog_mask;
|
||||||
|
|
||||||
static DEFINE_RAW_SPINLOCK(tmp_mask_lock);
|
static DEFINE_RAW_SPINLOCK(tmp_mask_lock);
|
||||||
static struct cpumask tmp_mask;
|
static struct cpumask tmp_mask;
|
||||||
|
|
||||||
hk_mask = housekeeping_cpumask(HK_FLAG_MANAGED_IRQ);
|
hk_mask = housekeeping_cpumask(HK_TYPE_MANAGED_IRQ);
|
||||||
|
|
||||||
raw_spin_lock(&tmp_mask_lock);
|
raw_spin_lock(&tmp_mask_lock);
|
||||||
cpumask_and(&tmp_mask, mask, hk_mask);
|
cpumask_and(&tmp_mask, mask, hk_mask);
|
||||||
|
|
|
@ -356,7 +356,7 @@ static int kthread(void *_create)
|
||||||
* back to default in case they have been changed.
|
* back to default in case they have been changed.
|
||||||
*/
|
*/
|
||||||
sched_setscheduler_nocheck(current, SCHED_NORMAL, ¶m);
|
sched_setscheduler_nocheck(current, SCHED_NORMAL, ¶m);
|
||||||
set_cpus_allowed_ptr(current, housekeeping_cpumask(HK_FLAG_KTHREAD));
|
set_cpus_allowed_ptr(current, housekeeping_cpumask(HK_TYPE_KTHREAD));
|
||||||
|
|
||||||
/* OK, tell user we're spawned, wait for stop or wakeup */
|
/* OK, tell user we're spawned, wait for stop or wakeup */
|
||||||
__set_current_state(TASK_UNINTERRUPTIBLE);
|
__set_current_state(TASK_UNINTERRUPTIBLE);
|
||||||
|
@ -722,7 +722,7 @@ int kthreadd(void *unused)
|
||||||
/* Setup a clean context for our children to inherit. */
|
/* Setup a clean context for our children to inherit. */
|
||||||
set_task_comm(tsk, "kthreadd");
|
set_task_comm(tsk, "kthreadd");
|
||||||
ignore_signals(tsk);
|
ignore_signals(tsk);
|
||||||
set_cpus_allowed_ptr(tsk, housekeeping_cpumask(HK_FLAG_KTHREAD));
|
set_cpus_allowed_ptr(tsk, housekeeping_cpumask(HK_TYPE_KTHREAD));
|
||||||
set_mems_allowed(node_states[N_MEMORY]);
|
set_mems_allowed(node_states[N_MEMORY]);
|
||||||
|
|
||||||
current->flags |= PF_NOFREEZE;
|
current->flags |= PF_NOFREEZE;
|
||||||
|
|
|
@ -496,7 +496,7 @@ static int __noreturn rcu_tasks_kthread(void *arg)
|
||||||
struct rcu_tasks *rtp = arg;
|
struct rcu_tasks *rtp = arg;
|
||||||
|
|
||||||
/* Run on housekeeping CPUs by default. Sysadm can move if desired. */
|
/* Run on housekeeping CPUs by default. Sysadm can move if desired. */
|
||||||
housekeeping_affine(current, HK_FLAG_RCU);
|
housekeeping_affine(current, HK_TYPE_RCU);
|
||||||
WRITE_ONCE(rtp->kthread_ptr, current); // Let GPs start!
|
WRITE_ONCE(rtp->kthread_ptr, current); // Let GPs start!
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
|
@ -1218,9 +1218,9 @@ static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)
|
||||||
if ((mask & leaf_node_cpu_bit(rnp, cpu)) &&
|
if ((mask & leaf_node_cpu_bit(rnp, cpu)) &&
|
||||||
cpu != outgoingcpu)
|
cpu != outgoingcpu)
|
||||||
cpumask_set_cpu(cpu, cm);
|
cpumask_set_cpu(cpu, cm);
|
||||||
cpumask_and(cm, cm, housekeeping_cpumask(HK_FLAG_RCU));
|
cpumask_and(cm, cm, housekeeping_cpumask(HK_TYPE_RCU));
|
||||||
if (cpumask_empty(cm))
|
if (cpumask_empty(cm))
|
||||||
cpumask_copy(cm, housekeeping_cpumask(HK_FLAG_RCU));
|
cpumask_copy(cm, housekeeping_cpumask(HK_TYPE_RCU));
|
||||||
set_cpus_allowed_ptr(t, cm);
|
set_cpus_allowed_ptr(t, cm);
|
||||||
mutex_unlock(&rnp->boost_kthread_mutex);
|
mutex_unlock(&rnp->boost_kthread_mutex);
|
||||||
free_cpumask_var(cm);
|
free_cpumask_var(cm);
|
||||||
|
@ -1296,7 +1296,7 @@ static void rcu_bind_gp_kthread(void)
|
||||||
{
|
{
|
||||||
if (!tick_nohz_full_enabled())
|
if (!tick_nohz_full_enabled())
|
||||||
return;
|
return;
|
||||||
housekeeping_affine(current, HK_FLAG_RCU);
|
housekeeping_affine(current, HK_TYPE_RCU);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Record the current task on dyntick-idle entry. */
|
/* Record the current task on dyntick-idle entry. */
|
||||||
|
|
|
@ -128,10 +128,10 @@ static int rseq_get_rseq_cs(struct task_struct *t, struct rseq_cs *rseq_cs)
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
#ifdef CONFIG_64BIT
|
#ifdef CONFIG_64BIT
|
||||||
if (get_user(ptr, &t->rseq->rseq_cs.ptr64))
|
if (get_user(ptr, &t->rseq->rseq_cs))
|
||||||
return -EFAULT;
|
return -EFAULT;
|
||||||
#else
|
#else
|
||||||
if (copy_from_user(&ptr, &t->rseq->rseq_cs.ptr64, sizeof(ptr)))
|
if (copy_from_user(&ptr, &t->rseq->rseq_cs, sizeof(ptr)))
|
||||||
return -EFAULT;
|
return -EFAULT;
|
||||||
#endif
|
#endif
|
||||||
if (!ptr) {
|
if (!ptr) {
|
||||||
|
@ -217,9 +217,9 @@ static int clear_rseq_cs(struct task_struct *t)
|
||||||
* Set rseq_cs to NULL.
|
* Set rseq_cs to NULL.
|
||||||
*/
|
*/
|
||||||
#ifdef CONFIG_64BIT
|
#ifdef CONFIG_64BIT
|
||||||
return put_user(0UL, &t->rseq->rseq_cs.ptr64);
|
return put_user(0UL, &t->rseq->rseq_cs);
|
||||||
#else
|
#else
|
||||||
if (clear_user(&t->rseq->rseq_cs.ptr64, sizeof(t->rseq->rseq_cs.ptr64)))
|
if (clear_user(&t->rseq->rseq_cs, sizeof(t->rseq->rseq_cs)))
|
||||||
return -EFAULT;
|
return -EFAULT;
|
||||||
return 0;
|
return 0;
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -1,7 +1,4 @@
|
||||||
# SPDX-License-Identifier: GPL-2.0
|
# SPDX-License-Identifier: GPL-2.0
|
||||||
ifdef CONFIG_FUNCTION_TRACER
|
|
||||||
CFLAGS_REMOVE_clock.o = $(CC_FLAGS_FTRACE)
|
|
||||||
endif
|
|
||||||
|
|
||||||
# The compilers are complaining about unused variables inside an if(0) scope
|
# The compilers are complaining about unused variables inside an if(0) scope
|
||||||
# block. This is daft, shut them up.
|
# block. This is daft, shut them up.
|
||||||
|
@ -25,18 +22,13 @@ ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y)
|
||||||
CFLAGS_core.o := $(PROFILING) -fno-omit-frame-pointer
|
CFLAGS_core.o := $(PROFILING) -fno-omit-frame-pointer
|
||||||
endif
|
endif
|
||||||
|
|
||||||
obj-y += core.o loadavg.o clock.o cputime.o
|
#
|
||||||
obj-y += idle.o fair.o rt.o deadline.o
|
# Build efficiency:
|
||||||
obj-y += wait.o wait_bit.o swait.o completion.o
|
#
|
||||||
|
# These compilation units have roughly the same size and complexity - so their
|
||||||
obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o topology.o stop_task.o pelt.o
|
# build parallelizes well and finishes roughly at once:
|
||||||
obj-$(CONFIG_SCHED_AUTOGROUP) += autogroup.o
|
#
|
||||||
obj-$(CONFIG_SCHEDSTATS) += stats.o
|
obj-y += core.o
|
||||||
obj-$(CONFIG_SCHED_DEBUG) += debug.o
|
obj-y += fair.o
|
||||||
obj-$(CONFIG_CGROUP_CPUACCT) += cpuacct.o
|
obj-y += build_policy.o
|
||||||
obj-$(CONFIG_CPU_FREQ) += cpufreq.o
|
obj-y += build_utility.o
|
||||||
obj-$(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) += cpufreq_schedutil.o
|
|
||||||
obj-$(CONFIG_MEMBARRIER) += membarrier.o
|
|
||||||
obj-$(CONFIG_CPU_ISOLATION) += isolation.o
|
|
||||||
obj-$(CONFIG_PSI) += psi.o
|
|
||||||
obj-$(CONFIG_SCHED_CORE) += core_sched.o
|
|
||||||
|
|
|
@ -1,14 +1,35 @@
|
||||||
// SPDX-License-Identifier: GPL-2.0
|
// SPDX-License-Identifier: GPL-2.0
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Auto-group scheduling implementation:
|
* Auto-group scheduling implementation:
|
||||||
*/
|
*/
|
||||||
#include <linux/nospec.h>
|
|
||||||
#include "sched.h"
|
|
||||||
|
|
||||||
unsigned int __read_mostly sysctl_sched_autogroup_enabled = 1;
|
unsigned int __read_mostly sysctl_sched_autogroup_enabled = 1;
|
||||||
static struct autogroup autogroup_default;
|
static struct autogroup autogroup_default;
|
||||||
static atomic_t autogroup_seq_nr;
|
static atomic_t autogroup_seq_nr;
|
||||||
|
|
||||||
|
#ifdef CONFIG_SYSCTL
|
||||||
|
static struct ctl_table sched_autogroup_sysctls[] = {
|
||||||
|
{
|
||||||
|
.procname = "sched_autogroup_enabled",
|
||||||
|
.data = &sysctl_sched_autogroup_enabled,
|
||||||
|
.maxlen = sizeof(unsigned int),
|
||||||
|
.mode = 0644,
|
||||||
|
.proc_handler = proc_dointvec_minmax,
|
||||||
|
.extra1 = SYSCTL_ZERO,
|
||||||
|
.extra2 = SYSCTL_ONE,
|
||||||
|
},
|
||||||
|
{}
|
||||||
|
};
|
||||||
|
|
||||||
|
static void __init sched_autogroup_sysctl_init(void)
|
||||||
|
{
|
||||||
|
register_sysctl_init("kernel", sched_autogroup_sysctls);
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
#define sched_autogroup_sysctl_init() do { } while (0)
|
||||||
|
#endif
|
||||||
|
|
||||||
void __init autogroup_init(struct task_struct *init_task)
|
void __init autogroup_init(struct task_struct *init_task)
|
||||||
{
|
{
|
||||||
autogroup_default.tg = &root_task_group;
|
autogroup_default.tg = &root_task_group;
|
||||||
|
@ -198,6 +219,7 @@ void sched_autogroup_exit(struct signal_struct *sig)
|
||||||
static int __init setup_autogroup(char *str)
|
static int __init setup_autogroup(char *str)
|
||||||
{
|
{
|
||||||
sysctl_sched_autogroup_enabled = 0;
|
sysctl_sched_autogroup_enabled = 0;
|
||||||
|
sched_autogroup_sysctl_init();
|
||||||
|
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,4 +1,7 @@
|
||||||
/* SPDX-License-Identifier: GPL-2.0 */
|
/* SPDX-License-Identifier: GPL-2.0 */
|
||||||
|
#ifndef _KERNEL_SCHED_AUTOGROUP_H
|
||||||
|
#define _KERNEL_SCHED_AUTOGROUP_H
|
||||||
|
|
||||||
#ifdef CONFIG_SCHED_AUTOGROUP
|
#ifdef CONFIG_SCHED_AUTOGROUP
|
||||||
|
|
||||||
struct autogroup {
|
struct autogroup {
|
||||||
|
@ -27,6 +30,7 @@ extern bool task_wants_autogroup(struct task_struct *p, struct task_group *tg);
|
||||||
static inline struct task_group *
|
static inline struct task_group *
|
||||||
autogroup_task_group(struct task_struct *p, struct task_group *tg)
|
autogroup_task_group(struct task_struct *p, struct task_group *tg)
|
||||||
{
|
{
|
||||||
|
extern unsigned int sysctl_sched_autogroup_enabled;
|
||||||
int enabled = READ_ONCE(sysctl_sched_autogroup_enabled);
|
int enabled = READ_ONCE(sysctl_sched_autogroup_enabled);
|
||||||
|
|
||||||
if (enabled && task_wants_autogroup(p, tg))
|
if (enabled && task_wants_autogroup(p, tg))
|
||||||
|
@ -58,3 +62,5 @@ static inline int autogroup_path(struct task_group *tg, char *buf, int buflen)
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif /* CONFIG_SCHED_AUTOGROUP */
|
#endif /* CONFIG_SCHED_AUTOGROUP */
|
||||||
|
|
||||||
|
#endif /* _KERNEL_SCHED_AUTOGROUP_H */
|
||||||
|
|
52
kernel/sched/build_policy.c
Normal file
52
kernel/sched/build_policy.c
Normal file
|
@ -0,0 +1,52 @@
|
||||||
|
// SPDX-License-Identifier: GPL-2.0-only
|
||||||
|
/*
|
||||||
|
* These are the scheduling policy related scheduler files, built
|
||||||
|
* in a single compilation unit for build efficiency reasons.
|
||||||
|
*
|
||||||
|
* ( Incidentally, the size of the compilation unit is roughly
|
||||||
|
* comparable to core.c and fair.c, the other two big
|
||||||
|
* compilation units. This helps balance build time, while
|
||||||
|
* coalescing source files to amortize header inclusion
|
||||||
|
* cost. )
|
||||||
|
*
|
||||||
|
* core.c and fair.c are built separately.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* Headers: */
|
||||||
|
#include <linux/sched/clock.h>
|
||||||
|
#include <linux/sched/cputime.h>
|
||||||
|
#include <linux/sched/posix-timers.h>
|
||||||
|
#include <linux/sched/rt.h>
|
||||||
|
|
||||||
|
#include <linux/cpuidle.h>
|
||||||
|
#include <linux/jiffies.h>
|
||||||
|
#include <linux/livepatch.h>
|
||||||
|
#include <linux/psi.h>
|
||||||
|
#include <linux/seqlock_api.h>
|
||||||
|
#include <linux/slab.h>
|
||||||
|
#include <linux/suspend.h>
|
||||||
|
#include <linux/tsacct_kern.h>
|
||||||
|
#include <linux/vtime.h>
|
||||||
|
|
||||||
|
#include <uapi/linux/sched/types.h>
|
||||||
|
|
||||||
|
#include "sched.h"
|
||||||
|
|
||||||
|
#include "autogroup.h"
|
||||||
|
#include "stats.h"
|
||||||
|
#include "pelt.h"
|
||||||
|
|
||||||
|
/* Source code modules: */
|
||||||
|
|
||||||
|
#include "idle.c"
|
||||||
|
|
||||||
|
#include "rt.c"
|
||||||
|
|
||||||
|
#ifdef CONFIG_SMP
|
||||||
|
# include "cpudeadline.c"
|
||||||
|
# include "pelt.c"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include "cputime.c"
|
||||||
|
#include "deadline.c"
|
||||||
|
|
109
kernel/sched/build_utility.c
Normal file
109
kernel/sched/build_utility.c
Normal file
|
@ -0,0 +1,109 @@
|
||||||
|
// SPDX-License-Identifier: GPL-2.0-only
|
||||||
|
/*
|
||||||
|
* These are various utility functions of the scheduler,
|
||||||
|
* built in a single compilation unit for build efficiency reasons.
|
||||||
|
*
|
||||||
|
* ( Incidentally, the size of the compilation unit is roughly
|
||||||
|
* comparable to core.c, fair.c, smp.c and policy.c, the other
|
||||||
|
* big compilation units. This helps balance build time, while
|
||||||
|
* coalescing source files to amortize header inclusion
|
||||||
|
* cost. )
|
||||||
|
*/
|
||||||
|
#include <linux/sched/clock.h>
|
||||||
|
#include <linux/sched/cputime.h>
|
||||||
|
#include <linux/sched/debug.h>
|
||||||
|
#include <linux/sched/isolation.h>
|
||||||
|
#include <linux/sched/loadavg.h>
|
||||||
|
#include <linux/sched/mm.h>
|
||||||
|
#include <linux/sched/rseq_api.h>
|
||||||
|
#include <linux/sched/task_stack.h>
|
||||||
|
|
||||||
|
#include <linux/cpufreq.h>
|
||||||
|
#include <linux/cpumask_api.h>
|
||||||
|
#include <linux/cpuset.h>
|
||||||
|
#include <linux/ctype.h>
|
||||||
|
#include <linux/debugfs.h>
|
||||||
|
#include <linux/energy_model.h>
|
||||||
|
#include <linux/hashtable_api.h>
|
||||||
|
#include <linux/irq.h>
|
||||||
|
#include <linux/kobject_api.h>
|
||||||
|
#include <linux/membarrier.h>
|
||||||
|
#include <linux/mempolicy.h>
|
||||||
|
#include <linux/nmi.h>
|
||||||
|
#include <linux/nospec.h>
|
||||||
|
#include <linux/proc_fs.h>
|
||||||
|
#include <linux/psi.h>
|
||||||
|
#include <linux/psi.h>
|
||||||
|
#include <linux/ptrace_api.h>
|
||||||
|
#include <linux/sched_clock.h>
|
||||||
|
#include <linux/security.h>
|
||||||
|
#include <linux/spinlock_api.h>
|
||||||
|
#include <linux/swait_api.h>
|
||||||
|
#include <linux/timex.h>
|
||||||
|
#include <linux/utsname.h>
|
||||||
|
#include <linux/wait_api.h>
|
||||||
|
#include <linux/workqueue_api.h>
|
||||||
|
|
||||||
|
#include <uapi/linux/prctl.h>
|
||||||
|
#include <uapi/linux/sched/types.h>
|
||||||
|
|
||||||
|
#include <asm/switch_to.h>
|
||||||
|
|
||||||
|
#include "sched.h"
|
||||||
|
#include "sched-pelt.h"
|
||||||
|
#include "stats.h"
|
||||||
|
#include "autogroup.h"
|
||||||
|
|
||||||
|
#include "clock.c"
|
||||||
|
|
||||||
|
#ifdef CONFIG_CGROUP_CPUACCT
|
||||||
|
# include "cpuacct.c"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef CONFIG_CPU_FREQ
|
||||||
|
# include "cpufreq.c"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef CONFIG_CPU_FREQ_GOV_SCHEDUTIL
|
||||||
|
# include "cpufreq_schedutil.c"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef CONFIG_SCHED_DEBUG
|
||||||
|
# include "debug.c"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef CONFIG_SCHEDSTATS
|
||||||
|
# include "stats.c"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include "loadavg.c"
|
||||||
|
#include "completion.c"
|
||||||
|
#include "swait.c"
|
||||||
|
#include "wait_bit.c"
|
||||||
|
#include "wait.c"
|
||||||
|
|
||||||
|
#ifdef CONFIG_SMP
|
||||||
|
# include "cpupri.c"
|
||||||
|
# include "stop_task.c"
|
||||||
|
# include "topology.c"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef CONFIG_SCHED_CORE
|
||||||
|
# include "core_sched.c"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef CONFIG_PSI
|
||||||
|
# include "psi.c"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef CONFIG_MEMBARRIER
|
||||||
|
# include "membarrier.c"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef CONFIG_CPU_ISOLATION
|
||||||
|
# include "isolation.c"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef CONFIG_SCHED_AUTOGROUP
|
||||||
|
# include "autogroup.c"
|
||||||
|
#endif
|
|
@ -53,15 +53,13 @@
|
||||||
* that is otherwise invisible (TSC gets stopped).
|
* that is otherwise invisible (TSC gets stopped).
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
#include "sched.h"
|
|
||||||
#include <linux/sched_clock.h>
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Scheduler clock - returns current time in nanosec units.
|
* Scheduler clock - returns current time in nanosec units.
|
||||||
* This is default implementation.
|
* This is default implementation.
|
||||||
* Architectures and sub-architectures can override this.
|
* Architectures and sub-architectures can override this.
|
||||||
*/
|
*/
|
||||||
unsigned long long __weak sched_clock(void)
|
notrace unsigned long long __weak sched_clock(void)
|
||||||
{
|
{
|
||||||
return (unsigned long long)(jiffies - INITIAL_JIFFIES)
|
return (unsigned long long)(jiffies - INITIAL_JIFFIES)
|
||||||
* (NSEC_PER_SEC / HZ);
|
* (NSEC_PER_SEC / HZ);
|
||||||
|
@ -95,28 +93,28 @@ struct sched_clock_data {
|
||||||
|
|
||||||
static DEFINE_PER_CPU_SHARED_ALIGNED(struct sched_clock_data, sched_clock_data);
|
static DEFINE_PER_CPU_SHARED_ALIGNED(struct sched_clock_data, sched_clock_data);
|
||||||
|
|
||||||
static inline struct sched_clock_data *this_scd(void)
|
notrace static inline struct sched_clock_data *this_scd(void)
|
||||||
{
|
{
|
||||||
return this_cpu_ptr(&sched_clock_data);
|
return this_cpu_ptr(&sched_clock_data);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline struct sched_clock_data *cpu_sdc(int cpu)
|
notrace static inline struct sched_clock_data *cpu_sdc(int cpu)
|
||||||
{
|
{
|
||||||
return &per_cpu(sched_clock_data, cpu);
|
return &per_cpu(sched_clock_data, cpu);
|
||||||
}
|
}
|
||||||
|
|
||||||
int sched_clock_stable(void)
|
notrace int sched_clock_stable(void)
|
||||||
{
|
{
|
||||||
return static_branch_likely(&__sched_clock_stable);
|
return static_branch_likely(&__sched_clock_stable);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void __scd_stamp(struct sched_clock_data *scd)
|
notrace static void __scd_stamp(struct sched_clock_data *scd)
|
||||||
{
|
{
|
||||||
scd->tick_gtod = ktime_get_ns();
|
scd->tick_gtod = ktime_get_ns();
|
||||||
scd->tick_raw = sched_clock();
|
scd->tick_raw = sched_clock();
|
||||||
}
|
}
|
||||||
|
|
||||||
static void __set_sched_clock_stable(void)
|
notrace static void __set_sched_clock_stable(void)
|
||||||
{
|
{
|
||||||
struct sched_clock_data *scd;
|
struct sched_clock_data *scd;
|
||||||
|
|
||||||
|
@ -151,7 +149,7 @@ static void __set_sched_clock_stable(void)
|
||||||
* The only way to fully avoid random clock jumps is to boot with:
|
* The only way to fully avoid random clock jumps is to boot with:
|
||||||
* "tsc=unstable".
|
* "tsc=unstable".
|
||||||
*/
|
*/
|
||||||
static void __sched_clock_work(struct work_struct *work)
|
notrace static void __sched_clock_work(struct work_struct *work)
|
||||||
{
|
{
|
||||||
struct sched_clock_data *scd;
|
struct sched_clock_data *scd;
|
||||||
int cpu;
|
int cpu;
|
||||||
|
@ -177,7 +175,7 @@ static void __sched_clock_work(struct work_struct *work)
|
||||||
|
|
||||||
static DECLARE_WORK(sched_clock_work, __sched_clock_work);
|
static DECLARE_WORK(sched_clock_work, __sched_clock_work);
|
||||||
|
|
||||||
static void __clear_sched_clock_stable(void)
|
notrace static void __clear_sched_clock_stable(void)
|
||||||
{
|
{
|
||||||
if (!sched_clock_stable())
|
if (!sched_clock_stable())
|
||||||
return;
|
return;
|
||||||
|
@ -186,7 +184,7 @@ static void __clear_sched_clock_stable(void)
|
||||||
schedule_work(&sched_clock_work);
|
schedule_work(&sched_clock_work);
|
||||||
}
|
}
|
||||||
|
|
||||||
void clear_sched_clock_stable(void)
|
notrace void clear_sched_clock_stable(void)
|
||||||
{
|
{
|
||||||
__sched_clock_stable_early = 0;
|
__sched_clock_stable_early = 0;
|
||||||
|
|
||||||
|
@ -196,7 +194,7 @@ void clear_sched_clock_stable(void)
|
||||||
__clear_sched_clock_stable();
|
__clear_sched_clock_stable();
|
||||||
}
|
}
|
||||||
|
|
||||||
static void __sched_clock_gtod_offset(void)
|
notrace static void __sched_clock_gtod_offset(void)
|
||||||
{
|
{
|
||||||
struct sched_clock_data *scd = this_scd();
|
struct sched_clock_data *scd = this_scd();
|
||||||
|
|
||||||
|
@ -246,12 +244,12 @@ late_initcall(sched_clock_init_late);
|
||||||
* min, max except they take wrapping into account
|
* min, max except they take wrapping into account
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static inline u64 wrap_min(u64 x, u64 y)
|
notrace static inline u64 wrap_min(u64 x, u64 y)
|
||||||
{
|
{
|
||||||
return (s64)(x - y) < 0 ? x : y;
|
return (s64)(x - y) < 0 ? x : y;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline u64 wrap_max(u64 x, u64 y)
|
notrace static inline u64 wrap_max(u64 x, u64 y)
|
||||||
{
|
{
|
||||||
return (s64)(x - y) > 0 ? x : y;
|
return (s64)(x - y) > 0 ? x : y;
|
||||||
}
|
}
|
||||||
|
@ -262,7 +260,7 @@ static inline u64 wrap_max(u64 x, u64 y)
|
||||||
* - filter out backward motion
|
* - filter out backward motion
|
||||||
* - use the GTOD tick value to create a window to filter crazy TSC values
|
* - use the GTOD tick value to create a window to filter crazy TSC values
|
||||||
*/
|
*/
|
||||||
static u64 sched_clock_local(struct sched_clock_data *scd)
|
notrace static u64 sched_clock_local(struct sched_clock_data *scd)
|
||||||
{
|
{
|
||||||
u64 now, clock, old_clock, min_clock, max_clock, gtod;
|
u64 now, clock, old_clock, min_clock, max_clock, gtod;
|
||||||
s64 delta;
|
s64 delta;
|
||||||
|
@ -295,7 +293,7 @@ static u64 sched_clock_local(struct sched_clock_data *scd)
|
||||||
return clock;
|
return clock;
|
||||||
}
|
}
|
||||||
|
|
||||||
static u64 sched_clock_remote(struct sched_clock_data *scd)
|
notrace static u64 sched_clock_remote(struct sched_clock_data *scd)
|
||||||
{
|
{
|
||||||
struct sched_clock_data *my_scd = this_scd();
|
struct sched_clock_data *my_scd = this_scd();
|
||||||
u64 this_clock, remote_clock;
|
u64 this_clock, remote_clock;
|
||||||
|
@ -362,7 +360,7 @@ static u64 sched_clock_remote(struct sched_clock_data *scd)
|
||||||
*
|
*
|
||||||
* See cpu_clock().
|
* See cpu_clock().
|
||||||
*/
|
*/
|
||||||
u64 sched_clock_cpu(int cpu)
|
notrace u64 sched_clock_cpu(int cpu)
|
||||||
{
|
{
|
||||||
struct sched_clock_data *scd;
|
struct sched_clock_data *scd;
|
||||||
u64 clock;
|
u64 clock;
|
||||||
|
@ -386,7 +384,7 @@ u64 sched_clock_cpu(int cpu)
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(sched_clock_cpu);
|
EXPORT_SYMBOL_GPL(sched_clock_cpu);
|
||||||
|
|
||||||
void sched_clock_tick(void)
|
notrace void sched_clock_tick(void)
|
||||||
{
|
{
|
||||||
struct sched_clock_data *scd;
|
struct sched_clock_data *scd;
|
||||||
|
|
||||||
|
@ -403,7 +401,7 @@ void sched_clock_tick(void)
|
||||||
sched_clock_local(scd);
|
sched_clock_local(scd);
|
||||||
}
|
}
|
||||||
|
|
||||||
void sched_clock_tick_stable(void)
|
notrace void sched_clock_tick_stable(void)
|
||||||
{
|
{
|
||||||
if (!sched_clock_stable())
|
if (!sched_clock_stable())
|
||||||
return;
|
return;
|
||||||
|
@ -423,7 +421,7 @@ void sched_clock_tick_stable(void)
|
||||||
/*
|
/*
|
||||||
* We are going deep-idle (irqs are disabled):
|
* We are going deep-idle (irqs are disabled):
|
||||||
*/
|
*/
|
||||||
void sched_clock_idle_sleep_event(void)
|
notrace void sched_clock_idle_sleep_event(void)
|
||||||
{
|
{
|
||||||
sched_clock_cpu(smp_processor_id());
|
sched_clock_cpu(smp_processor_id());
|
||||||
}
|
}
|
||||||
|
@ -432,7 +430,7 @@ EXPORT_SYMBOL_GPL(sched_clock_idle_sleep_event);
|
||||||
/*
|
/*
|
||||||
* We just idled; resync with ktime.
|
* We just idled; resync with ktime.
|
||||||
*/
|
*/
|
||||||
void sched_clock_idle_wakeup_event(void)
|
notrace void sched_clock_idle_wakeup_event(void)
|
||||||
{
|
{
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
|
|
||||||
|
@ -458,7 +456,7 @@ void __init sched_clock_init(void)
|
||||||
local_irq_enable();
|
local_irq_enable();
|
||||||
}
|
}
|
||||||
|
|
||||||
u64 sched_clock_cpu(int cpu)
|
notrace u64 sched_clock_cpu(int cpu)
|
||||||
{
|
{
|
||||||
if (!static_branch_likely(&sched_clock_running))
|
if (!static_branch_likely(&sched_clock_running))
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -476,7 +474,7 @@ u64 sched_clock_cpu(int cpu)
|
||||||
* On bare metal this function should return the same as local_clock.
|
* On bare metal this function should return the same as local_clock.
|
||||||
* Architectures and sub-architectures can override this.
|
* Architectures and sub-architectures can override this.
|
||||||
*/
|
*/
|
||||||
u64 __weak running_clock(void)
|
notrace u64 __weak running_clock(void)
|
||||||
{
|
{
|
||||||
return local_clock();
|
return local_clock();
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
// SPDX-License-Identifier: GPL-2.0
|
// SPDX-License-Identifier: GPL-2.0
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Generic wait-for-completion handler;
|
* Generic wait-for-completion handler;
|
||||||
*
|
*
|
||||||
|
@ -11,7 +12,6 @@
|
||||||
* typically be used for exclusion which gives rise to priority inversion.
|
* typically be used for exclusion which gives rise to priority inversion.
|
||||||
* Waiting for completion is a typically sync point, but not an exclusion point.
|
* Waiting for completion is a typically sync point, but not an exclusion point.
|
||||||
*/
|
*/
|
||||||
#include "sched.h"
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* complete: - signals a single thread waiting on this completion
|
* complete: - signals a single thread waiting on this completion
|
||||||
|
|
|
@ -6,27 +6,91 @@
|
||||||
*
|
*
|
||||||
* Copyright (C) 1991-2002 Linus Torvalds
|
* Copyright (C) 1991-2002 Linus Torvalds
|
||||||
*/
|
*/
|
||||||
#define CREATE_TRACE_POINTS
|
#include <linux/highmem.h>
|
||||||
#include <trace/events/sched.h>
|
#include <linux/hrtimer_api.h>
|
||||||
#undef CREATE_TRACE_POINTS
|
#include <linux/ktime_api.h>
|
||||||
|
#include <linux/sched/signal.h>
|
||||||
|
#include <linux/syscalls_api.h>
|
||||||
|
#include <linux/debug_locks.h>
|
||||||
|
#include <linux/prefetch.h>
|
||||||
|
#include <linux/capability.h>
|
||||||
|
#include <linux/pgtable_api.h>
|
||||||
|
#include <linux/wait_bit.h>
|
||||||
|
#include <linux/jiffies.h>
|
||||||
|
#include <linux/spinlock_api.h>
|
||||||
|
#include <linux/cpumask_api.h>
|
||||||
|
#include <linux/lockdep_api.h>
|
||||||
|
#include <linux/hardirq.h>
|
||||||
|
#include <linux/softirq.h>
|
||||||
|
#include <linux/refcount_api.h>
|
||||||
|
#include <linux/topology.h>
|
||||||
|
#include <linux/sched/clock.h>
|
||||||
|
#include <linux/sched/cond_resched.h>
|
||||||
|
#include <linux/sched/debug.h>
|
||||||
|
#include <linux/sched/isolation.h>
|
||||||
|
#include <linux/sched/loadavg.h>
|
||||||
|
#include <linux/sched/mm.h>
|
||||||
|
#include <linux/sched/nohz.h>
|
||||||
|
#include <linux/sched/rseq_api.h>
|
||||||
|
#include <linux/sched/rt.h>
|
||||||
|
|
||||||
#include "sched.h"
|
|
||||||
|
|
||||||
#include <linux/nospec.h>
|
|
||||||
#include <linux/blkdev.h>
|
#include <linux/blkdev.h>
|
||||||
|
#include <linux/context_tracking.h>
|
||||||
|
#include <linux/cpuset.h>
|
||||||
|
#include <linux/delayacct.h>
|
||||||
|
#include <linux/init_task.h>
|
||||||
|
#include <linux/interrupt.h>
|
||||||
|
#include <linux/ioprio.h>
|
||||||
|
#include <linux/kallsyms.h>
|
||||||
#include <linux/kcov.h>
|
#include <linux/kcov.h>
|
||||||
|
#include <linux/kprobes.h>
|
||||||
|
#include <linux/llist_api.h>
|
||||||
|
#include <linux/mmu_context.h>
|
||||||
|
#include <linux/mmzone.h>
|
||||||
|
#include <linux/mutex_api.h>
|
||||||
|
#include <linux/nmi.h>
|
||||||
|
#include <linux/nospec.h>
|
||||||
|
#include <linux/perf_event_api.h>
|
||||||
|
#include <linux/profile.h>
|
||||||
|
#include <linux/psi.h>
|
||||||
|
#include <linux/rcuwait_api.h>
|
||||||
|
#include <linux/sched/wake_q.h>
|
||||||
#include <linux/scs.h>
|
#include <linux/scs.h>
|
||||||
|
#include <linux/slab.h>
|
||||||
|
#include <linux/syscalls.h>
|
||||||
|
#include <linux/vtime.h>
|
||||||
|
#include <linux/wait_api.h>
|
||||||
|
#include <linux/workqueue_api.h>
|
||||||
|
|
||||||
|
#ifdef CONFIG_PREEMPT_DYNAMIC
|
||||||
|
# ifdef CONFIG_GENERIC_ENTRY
|
||||||
|
# include <linux/entry-common.h>
|
||||||
|
# endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include <uapi/linux/sched/types.h>
|
||||||
|
|
||||||
#include <asm/switch_to.h>
|
#include <asm/switch_to.h>
|
||||||
#include <asm/tlb.h>
|
#include <asm/tlb.h>
|
||||||
|
|
||||||
|
#define CREATE_TRACE_POINTS
|
||||||
|
#include <linux/sched/rseq_api.h>
|
||||||
|
#include <trace/events/sched.h>
|
||||||
|
#undef CREATE_TRACE_POINTS
|
||||||
|
|
||||||
|
#include "sched.h"
|
||||||
|
#include "stats.h"
|
||||||
|
#include "autogroup.h"
|
||||||
|
|
||||||
|
#include "autogroup.h"
|
||||||
|
#include "pelt.h"
|
||||||
|
#include "smp.h"
|
||||||
|
#include "stats.h"
|
||||||
|
|
||||||
#include "../workqueue_internal.h"
|
#include "../workqueue_internal.h"
|
||||||
#include "../../fs/io-wq.h"
|
#include "../../fs/io-wq.h"
|
||||||
#include "../smpboot.h"
|
#include "../smpboot.h"
|
||||||
|
|
||||||
#include "pelt.h"
|
|
||||||
#include "smp.h"
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Export tracepoints that act as a bare tracehook (ie: have no trace event
|
* Export tracepoints that act as a bare tracehook (ie: have no trace event
|
||||||
* associated with them) to allow external modules to probe them.
|
* associated with them) to allow external modules to probe them.
|
||||||
|
@ -36,6 +100,7 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_rt_tp);
|
||||||
EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_dl_tp);
|
EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_dl_tp);
|
||||||
EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_irq_tp);
|
EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_irq_tp);
|
||||||
EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_se_tp);
|
EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_se_tp);
|
||||||
|
EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_thermal_tp);
|
||||||
EXPORT_TRACEPOINT_SYMBOL_GPL(sched_cpu_capacity_tp);
|
EXPORT_TRACEPOINT_SYMBOL_GPL(sched_cpu_capacity_tp);
|
||||||
EXPORT_TRACEPOINT_SYMBOL_GPL(sched_overutilized_tp);
|
EXPORT_TRACEPOINT_SYMBOL_GPL(sched_overutilized_tp);
|
||||||
EXPORT_TRACEPOINT_SYMBOL_GPL(sched_util_est_cfs_tp);
|
EXPORT_TRACEPOINT_SYMBOL_GPL(sched_util_est_cfs_tp);
|
||||||
|
@ -1024,13 +1089,13 @@ int get_nohz_timer_target(void)
|
||||||
struct sched_domain *sd;
|
struct sched_domain *sd;
|
||||||
const struct cpumask *hk_mask;
|
const struct cpumask *hk_mask;
|
||||||
|
|
||||||
if (housekeeping_cpu(cpu, HK_FLAG_TIMER)) {
|
if (housekeeping_cpu(cpu, HK_TYPE_TIMER)) {
|
||||||
if (!idle_cpu(cpu))
|
if (!idle_cpu(cpu))
|
||||||
return cpu;
|
return cpu;
|
||||||
default_cpu = cpu;
|
default_cpu = cpu;
|
||||||
}
|
}
|
||||||
|
|
||||||
hk_mask = housekeeping_cpumask(HK_FLAG_TIMER);
|
hk_mask = housekeeping_cpumask(HK_TYPE_TIMER);
|
||||||
|
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
for_each_domain(cpu, sd) {
|
for_each_domain(cpu, sd) {
|
||||||
|
@ -1046,7 +1111,7 @@ int get_nohz_timer_target(void)
|
||||||
}
|
}
|
||||||
|
|
||||||
if (default_cpu == -1)
|
if (default_cpu == -1)
|
||||||
default_cpu = housekeeping_any_cpu(HK_FLAG_TIMER);
|
default_cpu = housekeeping_any_cpu(HK_TYPE_TIMER);
|
||||||
cpu = default_cpu;
|
cpu = default_cpu;
|
||||||
unlock:
|
unlock:
|
||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
|
@ -4834,7 +4899,7 @@ static struct rq *finish_task_switch(struct task_struct *prev)
|
||||||
{
|
{
|
||||||
struct rq *rq = this_rq();
|
struct rq *rq = this_rq();
|
||||||
struct mm_struct *mm = rq->prev_mm;
|
struct mm_struct *mm = rq->prev_mm;
|
||||||
long prev_state;
|
unsigned int prev_state;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The previous task will have left us with a preempt_count of 2
|
* The previous task will have left us with a preempt_count of 2
|
||||||
|
@ -5379,7 +5444,7 @@ static void sched_tick_start(int cpu)
|
||||||
int os;
|
int os;
|
||||||
struct tick_work *twork;
|
struct tick_work *twork;
|
||||||
|
|
||||||
if (housekeeping_cpu(cpu, HK_FLAG_TICK))
|
if (housekeeping_cpu(cpu, HK_TYPE_TICK))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
WARN_ON_ONCE(!tick_work_cpu);
|
WARN_ON_ONCE(!tick_work_cpu);
|
||||||
|
@ -5400,7 +5465,7 @@ static void sched_tick_stop(int cpu)
|
||||||
struct tick_work *twork;
|
struct tick_work *twork;
|
||||||
int os;
|
int os;
|
||||||
|
|
||||||
if (housekeeping_cpu(cpu, HK_FLAG_TICK))
|
if (housekeeping_cpu(cpu, HK_TYPE_TICK))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
WARN_ON_ONCE(!tick_work_cpu);
|
WARN_ON_ONCE(!tick_work_cpu);
|
||||||
|
@ -6298,7 +6363,7 @@ static void __sched notrace __schedule(unsigned int sched_mode)
|
||||||
migrate_disable_switch(rq, prev);
|
migrate_disable_switch(rq, prev);
|
||||||
psi_sched_switch(prev, next, !task_on_rq_queued(prev));
|
psi_sched_switch(prev, next, !task_on_rq_queued(prev));
|
||||||
|
|
||||||
trace_sched_switch(sched_mode & SM_MASK_PREEMPT, prev, next);
|
trace_sched_switch(sched_mode & SM_MASK_PREEMPT, prev_state, prev, next);
|
||||||
|
|
||||||
/* Also unlocks the rq: */
|
/* Also unlocks the rq: */
|
||||||
rq = context_switch(rq, prev, next, &rf);
|
rq = context_switch(rq, prev, next, &rf);
|
||||||
|
@ -6490,17 +6555,31 @@ asmlinkage __visible void __sched notrace preempt_schedule(void)
|
||||||
*/
|
*/
|
||||||
if (likely(!preemptible()))
|
if (likely(!preemptible()))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
preempt_schedule_common();
|
preempt_schedule_common();
|
||||||
}
|
}
|
||||||
NOKPROBE_SYMBOL(preempt_schedule);
|
NOKPROBE_SYMBOL(preempt_schedule);
|
||||||
EXPORT_SYMBOL(preempt_schedule);
|
EXPORT_SYMBOL(preempt_schedule);
|
||||||
|
|
||||||
#ifdef CONFIG_PREEMPT_DYNAMIC
|
#ifdef CONFIG_PREEMPT_DYNAMIC
|
||||||
DEFINE_STATIC_CALL(preempt_schedule, __preempt_schedule_func);
|
#if defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL)
|
||||||
EXPORT_STATIC_CALL_TRAMP(preempt_schedule);
|
#ifndef preempt_schedule_dynamic_enabled
|
||||||
|
#define preempt_schedule_dynamic_enabled preempt_schedule
|
||||||
|
#define preempt_schedule_dynamic_disabled NULL
|
||||||
|
#endif
|
||||||
|
DEFINE_STATIC_CALL(preempt_schedule, preempt_schedule_dynamic_enabled);
|
||||||
|
EXPORT_STATIC_CALL_TRAMP(preempt_schedule);
|
||||||
|
#elif defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY)
|
||||||
|
static DEFINE_STATIC_KEY_TRUE(sk_dynamic_preempt_schedule);
|
||||||
|
void __sched notrace dynamic_preempt_schedule(void)
|
||||||
|
{
|
||||||
|
if (!static_branch_unlikely(&sk_dynamic_preempt_schedule))
|
||||||
|
return;
|
||||||
|
preempt_schedule();
|
||||||
|
}
|
||||||
|
NOKPROBE_SYMBOL(dynamic_preempt_schedule);
|
||||||
|
EXPORT_SYMBOL(dynamic_preempt_schedule);
|
||||||
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* preempt_schedule_notrace - preempt_schedule called by tracing
|
* preempt_schedule_notrace - preempt_schedule called by tracing
|
||||||
|
@ -6555,148 +6634,28 @@ asmlinkage __visible void __sched notrace preempt_schedule_notrace(void)
|
||||||
EXPORT_SYMBOL_GPL(preempt_schedule_notrace);
|
EXPORT_SYMBOL_GPL(preempt_schedule_notrace);
|
||||||
|
|
||||||
#ifdef CONFIG_PREEMPT_DYNAMIC
|
#ifdef CONFIG_PREEMPT_DYNAMIC
|
||||||
DEFINE_STATIC_CALL(preempt_schedule_notrace, __preempt_schedule_notrace_func);
|
#if defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL)
|
||||||
|
#ifndef preempt_schedule_notrace_dynamic_enabled
|
||||||
|
#define preempt_schedule_notrace_dynamic_enabled preempt_schedule_notrace
|
||||||
|
#define preempt_schedule_notrace_dynamic_disabled NULL
|
||||||
|
#endif
|
||||||
|
DEFINE_STATIC_CALL(preempt_schedule_notrace, preempt_schedule_notrace_dynamic_enabled);
|
||||||
EXPORT_STATIC_CALL_TRAMP(preempt_schedule_notrace);
|
EXPORT_STATIC_CALL_TRAMP(preempt_schedule_notrace);
|
||||||
|
#elif defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY)
|
||||||
|
static DEFINE_STATIC_KEY_TRUE(sk_dynamic_preempt_schedule_notrace);
|
||||||
|
void __sched notrace dynamic_preempt_schedule_notrace(void)
|
||||||
|
{
|
||||||
|
if (!static_branch_unlikely(&sk_dynamic_preempt_schedule_notrace))
|
||||||
|
return;
|
||||||
|
preempt_schedule_notrace();
|
||||||
|
}
|
||||||
|
NOKPROBE_SYMBOL(dynamic_preempt_schedule_notrace);
|
||||||
|
EXPORT_SYMBOL(dynamic_preempt_schedule_notrace);
|
||||||
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#endif /* CONFIG_PREEMPTION */
|
#endif /* CONFIG_PREEMPTION */
|
||||||
|
|
||||||
#ifdef CONFIG_PREEMPT_DYNAMIC
|
|
||||||
|
|
||||||
#include <linux/entry-common.h>
|
|
||||||
|
|
||||||
/*
|
|
||||||
* SC:cond_resched
|
|
||||||
* SC:might_resched
|
|
||||||
* SC:preempt_schedule
|
|
||||||
* SC:preempt_schedule_notrace
|
|
||||||
* SC:irqentry_exit_cond_resched
|
|
||||||
*
|
|
||||||
*
|
|
||||||
* NONE:
|
|
||||||
* cond_resched <- __cond_resched
|
|
||||||
* might_resched <- RET0
|
|
||||||
* preempt_schedule <- NOP
|
|
||||||
* preempt_schedule_notrace <- NOP
|
|
||||||
* irqentry_exit_cond_resched <- NOP
|
|
||||||
*
|
|
||||||
* VOLUNTARY:
|
|
||||||
* cond_resched <- __cond_resched
|
|
||||||
* might_resched <- __cond_resched
|
|
||||||
* preempt_schedule <- NOP
|
|
||||||
* preempt_schedule_notrace <- NOP
|
|
||||||
* irqentry_exit_cond_resched <- NOP
|
|
||||||
*
|
|
||||||
* FULL:
|
|
||||||
* cond_resched <- RET0
|
|
||||||
* might_resched <- RET0
|
|
||||||
* preempt_schedule <- preempt_schedule
|
|
||||||
* preempt_schedule_notrace <- preempt_schedule_notrace
|
|
||||||
* irqentry_exit_cond_resched <- irqentry_exit_cond_resched
|
|
||||||
*/
|
|
||||||
|
|
||||||
enum {
|
|
||||||
preempt_dynamic_undefined = -1,
|
|
||||||
preempt_dynamic_none,
|
|
||||||
preempt_dynamic_voluntary,
|
|
||||||
preempt_dynamic_full,
|
|
||||||
};
|
|
||||||
|
|
||||||
int preempt_dynamic_mode = preempt_dynamic_undefined;
|
|
||||||
|
|
||||||
int sched_dynamic_mode(const char *str)
|
|
||||||
{
|
|
||||||
if (!strcmp(str, "none"))
|
|
||||||
return preempt_dynamic_none;
|
|
||||||
|
|
||||||
if (!strcmp(str, "voluntary"))
|
|
||||||
return preempt_dynamic_voluntary;
|
|
||||||
|
|
||||||
if (!strcmp(str, "full"))
|
|
||||||
return preempt_dynamic_full;
|
|
||||||
|
|
||||||
return -EINVAL;
|
|
||||||
}
|
|
||||||
|
|
||||||
void sched_dynamic_update(int mode)
|
|
||||||
{
|
|
||||||
/*
|
|
||||||
* Avoid {NONE,VOLUNTARY} -> FULL transitions from ever ending up in
|
|
||||||
* the ZERO state, which is invalid.
|
|
||||||
*/
|
|
||||||
static_call_update(cond_resched, __cond_resched);
|
|
||||||
static_call_update(might_resched, __cond_resched);
|
|
||||||
static_call_update(preempt_schedule, __preempt_schedule_func);
|
|
||||||
static_call_update(preempt_schedule_notrace, __preempt_schedule_notrace_func);
|
|
||||||
static_call_update(irqentry_exit_cond_resched, irqentry_exit_cond_resched);
|
|
||||||
|
|
||||||
switch (mode) {
|
|
||||||
case preempt_dynamic_none:
|
|
||||||
static_call_update(cond_resched, __cond_resched);
|
|
||||||
static_call_update(might_resched, (void *)&__static_call_return0);
|
|
||||||
static_call_update(preempt_schedule, NULL);
|
|
||||||
static_call_update(preempt_schedule_notrace, NULL);
|
|
||||||
static_call_update(irqentry_exit_cond_resched, NULL);
|
|
||||||
pr_info("Dynamic Preempt: none\n");
|
|
||||||
break;
|
|
||||||
|
|
||||||
case preempt_dynamic_voluntary:
|
|
||||||
static_call_update(cond_resched, __cond_resched);
|
|
||||||
static_call_update(might_resched, __cond_resched);
|
|
||||||
static_call_update(preempt_schedule, NULL);
|
|
||||||
static_call_update(preempt_schedule_notrace, NULL);
|
|
||||||
static_call_update(irqentry_exit_cond_resched, NULL);
|
|
||||||
pr_info("Dynamic Preempt: voluntary\n");
|
|
||||||
break;
|
|
||||||
|
|
||||||
case preempt_dynamic_full:
|
|
||||||
static_call_update(cond_resched, (void *)&__static_call_return0);
|
|
||||||
static_call_update(might_resched, (void *)&__static_call_return0);
|
|
||||||
static_call_update(preempt_schedule, __preempt_schedule_func);
|
|
||||||
static_call_update(preempt_schedule_notrace, __preempt_schedule_notrace_func);
|
|
||||||
static_call_update(irqentry_exit_cond_resched, irqentry_exit_cond_resched);
|
|
||||||
pr_info("Dynamic Preempt: full\n");
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
preempt_dynamic_mode = mode;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int __init setup_preempt_mode(char *str)
|
|
||||||
{
|
|
||||||
int mode = sched_dynamic_mode(str);
|
|
||||||
if (mode < 0) {
|
|
||||||
pr_warn("Dynamic Preempt: unsupported mode: %s\n", str);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
sched_dynamic_update(mode);
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
__setup("preempt=", setup_preempt_mode);
|
|
||||||
|
|
||||||
static void __init preempt_dynamic_init(void)
|
|
||||||
{
|
|
||||||
if (preempt_dynamic_mode == preempt_dynamic_undefined) {
|
|
||||||
if (IS_ENABLED(CONFIG_PREEMPT_NONE)) {
|
|
||||||
sched_dynamic_update(preempt_dynamic_none);
|
|
||||||
} else if (IS_ENABLED(CONFIG_PREEMPT_VOLUNTARY)) {
|
|
||||||
sched_dynamic_update(preempt_dynamic_voluntary);
|
|
||||||
} else {
|
|
||||||
/* Default static call setting, nothing to do */
|
|
||||||
WARN_ON_ONCE(!IS_ENABLED(CONFIG_PREEMPT));
|
|
||||||
preempt_dynamic_mode = preempt_dynamic_full;
|
|
||||||
pr_info("Dynamic Preempt: full\n");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#else /* !CONFIG_PREEMPT_DYNAMIC */
|
|
||||||
|
|
||||||
static inline void preempt_dynamic_init(void) { }
|
|
||||||
|
|
||||||
#endif /* #ifdef CONFIG_PREEMPT_DYNAMIC */
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This is the entry point to schedule() from kernel preemption
|
* This is the entry point to schedule() from kernel preemption
|
||||||
* off of irq context.
|
* off of irq context.
|
||||||
|
@ -8202,11 +8161,35 @@ EXPORT_SYMBOL(__cond_resched);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef CONFIG_PREEMPT_DYNAMIC
|
#ifdef CONFIG_PREEMPT_DYNAMIC
|
||||||
|
#if defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL)
|
||||||
|
#define cond_resched_dynamic_enabled __cond_resched
|
||||||
|
#define cond_resched_dynamic_disabled ((void *)&__static_call_return0)
|
||||||
DEFINE_STATIC_CALL_RET0(cond_resched, __cond_resched);
|
DEFINE_STATIC_CALL_RET0(cond_resched, __cond_resched);
|
||||||
EXPORT_STATIC_CALL_TRAMP(cond_resched);
|
EXPORT_STATIC_CALL_TRAMP(cond_resched);
|
||||||
|
|
||||||
|
#define might_resched_dynamic_enabled __cond_resched
|
||||||
|
#define might_resched_dynamic_disabled ((void *)&__static_call_return0)
|
||||||
DEFINE_STATIC_CALL_RET0(might_resched, __cond_resched);
|
DEFINE_STATIC_CALL_RET0(might_resched, __cond_resched);
|
||||||
EXPORT_STATIC_CALL_TRAMP(might_resched);
|
EXPORT_STATIC_CALL_TRAMP(might_resched);
|
||||||
|
#elif defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY)
|
||||||
|
static DEFINE_STATIC_KEY_FALSE(sk_dynamic_cond_resched);
|
||||||
|
int __sched dynamic_cond_resched(void)
|
||||||
|
{
|
||||||
|
if (!static_branch_unlikely(&sk_dynamic_cond_resched))
|
||||||
|
return 0;
|
||||||
|
return __cond_resched();
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL(dynamic_cond_resched);
|
||||||
|
|
||||||
|
static DEFINE_STATIC_KEY_FALSE(sk_dynamic_might_resched);
|
||||||
|
int __sched dynamic_might_resched(void)
|
||||||
|
{
|
||||||
|
if (!static_branch_unlikely(&sk_dynamic_might_resched))
|
||||||
|
return 0;
|
||||||
|
return __cond_resched();
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL(dynamic_might_resched);
|
||||||
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -8271,6 +8254,154 @@ int __cond_resched_rwlock_write(rwlock_t *lock)
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(__cond_resched_rwlock_write);
|
EXPORT_SYMBOL(__cond_resched_rwlock_write);
|
||||||
|
|
||||||
|
#ifdef CONFIG_PREEMPT_DYNAMIC
|
||||||
|
|
||||||
|
#ifdef CONFIG_GENERIC_ENTRY
|
||||||
|
#include <linux/entry-common.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/*
|
||||||
|
* SC:cond_resched
|
||||||
|
* SC:might_resched
|
||||||
|
* SC:preempt_schedule
|
||||||
|
* SC:preempt_schedule_notrace
|
||||||
|
* SC:irqentry_exit_cond_resched
|
||||||
|
*
|
||||||
|
*
|
||||||
|
* NONE:
|
||||||
|
* cond_resched <- __cond_resched
|
||||||
|
* might_resched <- RET0
|
||||||
|
* preempt_schedule <- NOP
|
||||||
|
* preempt_schedule_notrace <- NOP
|
||||||
|
* irqentry_exit_cond_resched <- NOP
|
||||||
|
*
|
||||||
|
* VOLUNTARY:
|
||||||
|
* cond_resched <- __cond_resched
|
||||||
|
* might_resched <- __cond_resched
|
||||||
|
* preempt_schedule <- NOP
|
||||||
|
* preempt_schedule_notrace <- NOP
|
||||||
|
* irqentry_exit_cond_resched <- NOP
|
||||||
|
*
|
||||||
|
* FULL:
|
||||||
|
* cond_resched <- RET0
|
||||||
|
* might_resched <- RET0
|
||||||
|
* preempt_schedule <- preempt_schedule
|
||||||
|
* preempt_schedule_notrace <- preempt_schedule_notrace
|
||||||
|
* irqentry_exit_cond_resched <- irqentry_exit_cond_resched
|
||||||
|
*/
|
||||||
|
|
||||||
|
enum {
|
||||||
|
preempt_dynamic_undefined = -1,
|
||||||
|
preempt_dynamic_none,
|
||||||
|
preempt_dynamic_voluntary,
|
||||||
|
preempt_dynamic_full,
|
||||||
|
};
|
||||||
|
|
||||||
|
int preempt_dynamic_mode = preempt_dynamic_undefined;
|
||||||
|
|
||||||
|
int sched_dynamic_mode(const char *str)
|
||||||
|
{
|
||||||
|
if (!strcmp(str, "none"))
|
||||||
|
return preempt_dynamic_none;
|
||||||
|
|
||||||
|
if (!strcmp(str, "voluntary"))
|
||||||
|
return preempt_dynamic_voluntary;
|
||||||
|
|
||||||
|
if (!strcmp(str, "full"))
|
||||||
|
return preempt_dynamic_full;
|
||||||
|
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
#if defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL)
|
||||||
|
#define preempt_dynamic_enable(f) static_call_update(f, f##_dynamic_enabled)
|
||||||
|
#define preempt_dynamic_disable(f) static_call_update(f, f##_dynamic_disabled)
|
||||||
|
#elif defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY)
|
||||||
|
#define preempt_dynamic_enable(f) static_key_enable(&sk_dynamic_##f.key)
|
||||||
|
#define preempt_dynamic_disable(f) static_key_disable(&sk_dynamic_##f.key)
|
||||||
|
#else
|
||||||
|
#error "Unsupported PREEMPT_DYNAMIC mechanism"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
void sched_dynamic_update(int mode)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* Avoid {NONE,VOLUNTARY} -> FULL transitions from ever ending up in
|
||||||
|
* the ZERO state, which is invalid.
|
||||||
|
*/
|
||||||
|
preempt_dynamic_enable(cond_resched);
|
||||||
|
preempt_dynamic_enable(might_resched);
|
||||||
|
preempt_dynamic_enable(preempt_schedule);
|
||||||
|
preempt_dynamic_enable(preempt_schedule_notrace);
|
||||||
|
preempt_dynamic_enable(irqentry_exit_cond_resched);
|
||||||
|
|
||||||
|
switch (mode) {
|
||||||
|
case preempt_dynamic_none:
|
||||||
|
preempt_dynamic_enable(cond_resched);
|
||||||
|
preempt_dynamic_disable(might_resched);
|
||||||
|
preempt_dynamic_disable(preempt_schedule);
|
||||||
|
preempt_dynamic_disable(preempt_schedule_notrace);
|
||||||
|
preempt_dynamic_disable(irqentry_exit_cond_resched);
|
||||||
|
pr_info("Dynamic Preempt: none\n");
|
||||||
|
break;
|
||||||
|
|
||||||
|
case preempt_dynamic_voluntary:
|
||||||
|
preempt_dynamic_enable(cond_resched);
|
||||||
|
preempt_dynamic_enable(might_resched);
|
||||||
|
preempt_dynamic_disable(preempt_schedule);
|
||||||
|
preempt_dynamic_disable(preempt_schedule_notrace);
|
||||||
|
preempt_dynamic_disable(irqentry_exit_cond_resched);
|
||||||
|
pr_info("Dynamic Preempt: voluntary\n");
|
||||||
|
break;
|
||||||
|
|
||||||
|
case preempt_dynamic_full:
|
||||||
|
preempt_dynamic_disable(cond_resched);
|
||||||
|
preempt_dynamic_disable(might_resched);
|
||||||
|
preempt_dynamic_enable(preempt_schedule);
|
||||||
|
preempt_dynamic_enable(preempt_schedule_notrace);
|
||||||
|
preempt_dynamic_enable(irqentry_exit_cond_resched);
|
||||||
|
pr_info("Dynamic Preempt: full\n");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
preempt_dynamic_mode = mode;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int __init setup_preempt_mode(char *str)
|
||||||
|
{
|
||||||
|
int mode = sched_dynamic_mode(str);
|
||||||
|
if (mode < 0) {
|
||||||
|
pr_warn("Dynamic Preempt: unsupported mode: %s\n", str);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
sched_dynamic_update(mode);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
__setup("preempt=", setup_preempt_mode);
|
||||||
|
|
||||||
|
static void __init preempt_dynamic_init(void)
|
||||||
|
{
|
||||||
|
if (preempt_dynamic_mode == preempt_dynamic_undefined) {
|
||||||
|
if (IS_ENABLED(CONFIG_PREEMPT_NONE)) {
|
||||||
|
sched_dynamic_update(preempt_dynamic_none);
|
||||||
|
} else if (IS_ENABLED(CONFIG_PREEMPT_VOLUNTARY)) {
|
||||||
|
sched_dynamic_update(preempt_dynamic_voluntary);
|
||||||
|
} else {
|
||||||
|
/* Default static call setting, nothing to do */
|
||||||
|
WARN_ON_ONCE(!IS_ENABLED(CONFIG_PREEMPT));
|
||||||
|
preempt_dynamic_mode = preempt_dynamic_full;
|
||||||
|
pr_info("Dynamic Preempt: full\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#else /* !CONFIG_PREEMPT_DYNAMIC */
|
||||||
|
|
||||||
|
static inline void preempt_dynamic_init(void) { }
|
||||||
|
|
||||||
|
#endif /* #ifdef CONFIG_PREEMPT_DYNAMIC */
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* yield - yield the current processor to other threads.
|
* yield - yield the current processor to other threads.
|
||||||
*
|
*
|
||||||
|
@ -8706,7 +8837,7 @@ int cpuset_cpumask_can_shrink(const struct cpumask *cur,
|
||||||
{
|
{
|
||||||
int ret = 1;
|
int ret = 1;
|
||||||
|
|
||||||
if (!cpumask_weight(cur))
|
if (cpumask_empty(cur))
|
||||||
return ret;
|
return ret;
|
||||||
|
|
||||||
ret = dl_cpuset_cpumask_can_shrink(cur, trial);
|
ret = dl_cpuset_cpumask_can_shrink(cur, trial);
|
||||||
|
@ -8734,8 +8865,11 @@ int task_can_attach(struct task_struct *p,
|
||||||
}
|
}
|
||||||
|
|
||||||
if (dl_task(p) && !cpumask_intersects(task_rq(p)->rd->span,
|
if (dl_task(p) && !cpumask_intersects(task_rq(p)->rd->span,
|
||||||
cs_cpus_allowed))
|
cs_cpus_allowed)) {
|
||||||
ret = dl_task_can_attach(p, cs_cpus_allowed);
|
int cpu = cpumask_any_and(cpu_active_mask, cs_cpus_allowed);
|
||||||
|
|
||||||
|
ret = dl_cpu_busy(cpu, p);
|
||||||
|
}
|
||||||
|
|
||||||
out:
|
out:
|
||||||
return ret;
|
return ret;
|
||||||
|
@ -9019,8 +9153,10 @@ static void cpuset_cpu_active(void)
|
||||||
static int cpuset_cpu_inactive(unsigned int cpu)
|
static int cpuset_cpu_inactive(unsigned int cpu)
|
||||||
{
|
{
|
||||||
if (!cpuhp_tasks_frozen) {
|
if (!cpuhp_tasks_frozen) {
|
||||||
if (dl_cpu_busy(cpu))
|
int ret = dl_cpu_busy(cpu, NULL);
|
||||||
return -EBUSY;
|
|
||||||
|
if (ret)
|
||||||
|
return ret;
|
||||||
cpuset_update_active_cpus();
|
cpuset_update_active_cpus();
|
||||||
} else {
|
} else {
|
||||||
num_cpus_frozen++;
|
num_cpus_frozen++;
|
||||||
|
@ -9050,6 +9186,7 @@ int sched_cpu_activate(unsigned int cpu)
|
||||||
set_cpu_active(cpu, true);
|
set_cpu_active(cpu, true);
|
||||||
|
|
||||||
if (sched_smp_initialized) {
|
if (sched_smp_initialized) {
|
||||||
|
sched_update_numa(cpu, true);
|
||||||
sched_domains_numa_masks_set(cpu);
|
sched_domains_numa_masks_set(cpu);
|
||||||
cpuset_cpu_active();
|
cpuset_cpu_active();
|
||||||
}
|
}
|
||||||
|
@ -9128,10 +9265,12 @@ int sched_cpu_deactivate(unsigned int cpu)
|
||||||
if (!sched_smp_initialized)
|
if (!sched_smp_initialized)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
|
sched_update_numa(cpu, false);
|
||||||
ret = cpuset_cpu_inactive(cpu);
|
ret = cpuset_cpu_inactive(cpu);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
balance_push_set(cpu, false);
|
balance_push_set(cpu, false);
|
||||||
set_cpu_active(cpu, true);
|
set_cpu_active(cpu, true);
|
||||||
|
sched_update_numa(cpu, true);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
sched_domains_numa_masks_clear(cpu);
|
sched_domains_numa_masks_clear(cpu);
|
||||||
|
@ -9234,7 +9373,7 @@ int sched_cpu_dying(unsigned int cpu)
|
||||||
|
|
||||||
void __init sched_init_smp(void)
|
void __init sched_init_smp(void)
|
||||||
{
|
{
|
||||||
sched_init_numa();
|
sched_init_numa(NUMA_NO_NODE);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* There's no userspace yet to cause hotplug operations; hence all the
|
* There's no userspace yet to cause hotplug operations; hence all the
|
||||||
|
@ -9246,7 +9385,7 @@ void __init sched_init_smp(void)
|
||||||
mutex_unlock(&sched_domains_mutex);
|
mutex_unlock(&sched_domains_mutex);
|
||||||
|
|
||||||
/* Move init over to a non-isolated CPU */
|
/* Move init over to a non-isolated CPU */
|
||||||
if (set_cpus_allowed_ptr(current, housekeeping_cpumask(HK_FLAG_DOMAIN)) < 0)
|
if (set_cpus_allowed_ptr(current, housekeeping_cpumask(HK_TYPE_DOMAIN)) < 0)
|
||||||
BUG();
|
BUG();
|
||||||
current->flags &= ~PF_NO_SETAFFINITY;
|
current->flags &= ~PF_NO_SETAFFINITY;
|
||||||
sched_init_granularity();
|
sched_init_granularity();
|
||||||
|
@ -9346,7 +9485,6 @@ void __init sched_init(void)
|
||||||
#endif /* CONFIG_CPUMASK_OFFSTACK */
|
#endif /* CONFIG_CPUMASK_OFFSTACK */
|
||||||
|
|
||||||
init_rt_bandwidth(&def_rt_bandwidth, global_rt_period(), global_rt_runtime());
|
init_rt_bandwidth(&def_rt_bandwidth, global_rt_period(), global_rt_runtime());
|
||||||
init_dl_bandwidth(&def_dl_bandwidth, global_rt_period(), global_rt_runtime());
|
|
||||||
|
|
||||||
#ifdef CONFIG_SMP
|
#ifdef CONFIG_SMP
|
||||||
init_defrootdomain();
|
init_defrootdomain();
|
||||||
|
|
|
@ -1,8 +1,5 @@
|
||||||
// SPDX-License-Identifier: GPL-2.0-only
|
// SPDX-License-Identifier: GPL-2.0-only
|
||||||
|
|
||||||
#include <linux/prctl.h>
|
|
||||||
#include "sched.h"
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* A simple wrapper around refcount. An allocated sched_core_cookie's
|
* A simple wrapper around refcount. An allocated sched_core_cookie's
|
||||||
* address is used to compute the cookie of the task.
|
* address is used to compute the cookie of the task.
|
||||||
|
|
|
@ -1,12 +1,11 @@
|
||||||
// SPDX-License-Identifier: GPL-2.0
|
// SPDX-License-Identifier: GPL-2.0
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* CPU accounting code for task groups.
|
* CPU accounting code for task groups.
|
||||||
*
|
*
|
||||||
* Based on the work by Paul Menage (menage@google.com) and Balbir Singh
|
* Based on the work by Paul Menage (menage@google.com) and Balbir Singh
|
||||||
* (balbir@in.ibm.com).
|
* (balbir@in.ibm.com).
|
||||||
*/
|
*/
|
||||||
#include <asm/irq_regs.h>
|
|
||||||
#include "sched.h"
|
|
||||||
|
|
||||||
/* Time spent by the tasks of the CPU accounting group executing in ... */
|
/* Time spent by the tasks of the CPU accounting group executing in ... */
|
||||||
enum cpuacct_stat_index {
|
enum cpuacct_stat_index {
|
||||||
|
@ -334,14 +333,13 @@ static struct cftype files[] = {
|
||||||
*/
|
*/
|
||||||
void cpuacct_charge(struct task_struct *tsk, u64 cputime)
|
void cpuacct_charge(struct task_struct *tsk, u64 cputime)
|
||||||
{
|
{
|
||||||
|
unsigned int cpu = task_cpu(tsk);
|
||||||
struct cpuacct *ca;
|
struct cpuacct *ca;
|
||||||
|
|
||||||
rcu_read_lock();
|
lockdep_assert_rq_held(cpu_rq(cpu));
|
||||||
|
|
||||||
for (ca = task_ca(tsk); ca; ca = parent_ca(ca))
|
for (ca = task_ca(tsk); ca; ca = parent_ca(ca))
|
||||||
__this_cpu_add(*ca->cpuusage, cputime);
|
*per_cpu_ptr(ca->cpuusage, cpu) += cputime;
|
||||||
|
|
||||||
rcu_read_unlock();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -353,10 +351,8 @@ void cpuacct_account_field(struct task_struct *tsk, int index, u64 val)
|
||||||
{
|
{
|
||||||
struct cpuacct *ca;
|
struct cpuacct *ca;
|
||||||
|
|
||||||
rcu_read_lock();
|
|
||||||
for (ca = task_ca(tsk); ca != &root_cpuacct; ca = parent_ca(ca))
|
for (ca = task_ca(tsk); ca != &root_cpuacct; ca = parent_ca(ca))
|
||||||
__this_cpu_add(ca->cpustat->cpustat[index], val);
|
__this_cpu_add(ca->cpustat->cpustat[index], val);
|
||||||
rcu_read_unlock();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
struct cgroup_subsys cpuacct_cgrp_subsys = {
|
struct cgroup_subsys cpuacct_cgrp_subsys = {
|
||||||
|
|
|
@ -1,12 +1,11 @@
|
||||||
// SPDX-License-Identifier: GPL-2.0-only
|
// SPDX-License-Identifier: GPL-2.0-only
|
||||||
/*
|
/*
|
||||||
* kernel/sched/cpudl.c
|
* kernel/sched/cpudeadline.c
|
||||||
*
|
*
|
||||||
* Global CPU deadline management
|
* Global CPU deadline management
|
||||||
*
|
*
|
||||||
* Author: Juri Lelli <j.lelli@sssup.it>
|
* Author: Juri Lelli <j.lelli@sssup.it>
|
||||||
*/
|
*/
|
||||||
#include "sched.h"
|
|
||||||
|
|
||||||
static inline int parent(int i)
|
static inline int parent(int i)
|
||||||
{
|
{
|
||||||
|
|
|
@ -5,9 +5,6 @@
|
||||||
* Copyright (C) 2016, Intel Corporation
|
* Copyright (C) 2016, Intel Corporation
|
||||||
* Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
|
* Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
|
||||||
*/
|
*/
|
||||||
#include <linux/cpufreq.h>
|
|
||||||
|
|
||||||
#include "sched.h"
|
|
||||||
|
|
||||||
DEFINE_PER_CPU(struct update_util_data __rcu *, cpufreq_update_util_data);
|
DEFINE_PER_CPU(struct update_util_data __rcu *, cpufreq_update_util_data);
|
||||||
|
|
||||||
|
|
|
@ -6,13 +6,6 @@
|
||||||
* Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
|
* Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
|
|
||||||
|
|
||||||
#include "sched.h"
|
|
||||||
|
|
||||||
#include <linux/sched/cpufreq.h>
|
|
||||||
#include <trace/events/power.h>
|
|
||||||
|
|
||||||
#define IOWAIT_BOOST_MIN (SCHED_CAPACITY_SCALE / 8)
|
#define IOWAIT_BOOST_MIN (SCHED_CAPACITY_SCALE / 8)
|
||||||
|
|
||||||
struct sugov_tunables {
|
struct sugov_tunables {
|
||||||
|
@ -289,6 +282,7 @@ static void sugov_iowait_apply(struct sugov_cpu *sg_cpu, u64 time)
|
||||||
* into the same scale so we can compare.
|
* into the same scale so we can compare.
|
||||||
*/
|
*/
|
||||||
boost = (sg_cpu->iowait_boost * sg_cpu->max) >> SCHED_CAPACITY_SHIFT;
|
boost = (sg_cpu->iowait_boost * sg_cpu->max) >> SCHED_CAPACITY_SHIFT;
|
||||||
|
boost = uclamp_rq_util_with(cpu_rq(sg_cpu->cpu), boost, NULL);
|
||||||
if (sg_cpu->util < boost)
|
if (sg_cpu->util < boost)
|
||||||
sg_cpu->util = boost;
|
sg_cpu->util = boost;
|
||||||
}
|
}
|
||||||
|
@ -348,8 +342,11 @@ static void sugov_update_single_freq(struct update_util_data *hook, u64 time,
|
||||||
/*
|
/*
|
||||||
* Do not reduce the frequency if the CPU has not been idle
|
* Do not reduce the frequency if the CPU has not been idle
|
||||||
* recently, as the reduction is likely to be premature then.
|
* recently, as the reduction is likely to be premature then.
|
||||||
|
*
|
||||||
|
* Except when the rq is capped by uclamp_max.
|
||||||
*/
|
*/
|
||||||
if (sugov_cpu_is_busy(sg_cpu) && next_f < sg_policy->next_freq) {
|
if (!uclamp_rq_is_capped(cpu_rq(sg_cpu->cpu)) &&
|
||||||
|
sugov_cpu_is_busy(sg_cpu) && next_f < sg_policy->next_freq) {
|
||||||
next_f = sg_policy->next_freq;
|
next_f = sg_policy->next_freq;
|
||||||
|
|
||||||
/* Restore cached freq as next_freq has changed */
|
/* Restore cached freq as next_freq has changed */
|
||||||
|
@ -395,8 +392,11 @@ static void sugov_update_single_perf(struct update_util_data *hook, u64 time,
|
||||||
/*
|
/*
|
||||||
* Do not reduce the target performance level if the CPU has not been
|
* Do not reduce the target performance level if the CPU has not been
|
||||||
* idle recently, as the reduction is likely to be premature then.
|
* idle recently, as the reduction is likely to be premature then.
|
||||||
|
*
|
||||||
|
* Except when the rq is capped by uclamp_max.
|
||||||
*/
|
*/
|
||||||
if (sugov_cpu_is_busy(sg_cpu) && sg_cpu->util < prev_util)
|
if (!uclamp_rq_is_capped(cpu_rq(sg_cpu->cpu)) &&
|
||||||
|
sugov_cpu_is_busy(sg_cpu) && sg_cpu->util < prev_util)
|
||||||
sg_cpu->util = prev_util;
|
sg_cpu->util = prev_util;
|
||||||
|
|
||||||
cpufreq_driver_adjust_perf(sg_cpu->cpu, map_util_perf(sg_cpu->bw_dl),
|
cpufreq_driver_adjust_perf(sg_cpu->cpu, map_util_perf(sg_cpu->bw_dl),
|
||||||
|
|
|
@ -22,7 +22,6 @@
|
||||||
* worst case complexity of O(min(101, nr_domcpus)), though the scenario that
|
* worst case complexity of O(min(101, nr_domcpus)), though the scenario that
|
||||||
* yields the worst case search is fairly contrived.
|
* yields the worst case search is fairly contrived.
|
||||||
*/
|
*/
|
||||||
#include "sched.h"
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* p->rt_priority p->prio newpri cpupri
|
* p->rt_priority p->prio newpri cpupri
|
||||||
|
|
|
@ -2,7 +2,6 @@
|
||||||
/*
|
/*
|
||||||
* Simple CPU accounting cgroup controller
|
* Simple CPU accounting cgroup controller
|
||||||
*/
|
*/
|
||||||
#include "sched.h"
|
|
||||||
|
|
||||||
#ifdef CONFIG_IRQ_TIME_ACCOUNTING
|
#ifdef CONFIG_IRQ_TIME_ACCOUNTING
|
||||||
|
|
||||||
|
|
|
@ -15,10 +15,6 @@
|
||||||
* Michael Trimarchi <michael@amarulasolutions.com>,
|
* Michael Trimarchi <michael@amarulasolutions.com>,
|
||||||
* Fabio Checconi <fchecconi@gmail.com>
|
* Fabio Checconi <fchecconi@gmail.com>
|
||||||
*/
|
*/
|
||||||
#include "sched.h"
|
|
||||||
#include "pelt.h"
|
|
||||||
|
|
||||||
struct dl_bandwidth def_dl_bandwidth;
|
|
||||||
|
|
||||||
static inline struct task_struct *dl_task_of(struct sched_dl_entity *dl_se)
|
static inline struct task_struct *dl_task_of(struct sched_dl_entity *dl_se)
|
||||||
{
|
{
|
||||||
|
@ -130,6 +126,21 @@ static inline bool dl_bw_visited(int cpu, u64 gen)
|
||||||
rd->visit_gen = gen;
|
rd->visit_gen = gen;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline
|
||||||
|
void __dl_update(struct dl_bw *dl_b, s64 bw)
|
||||||
|
{
|
||||||
|
struct root_domain *rd = container_of(dl_b, struct root_domain, dl_bw);
|
||||||
|
int i;
|
||||||
|
|
||||||
|
RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held(),
|
||||||
|
"sched RCU must be held");
|
||||||
|
for_each_cpu_and(i, rd->span, cpu_active_mask) {
|
||||||
|
struct rq *rq = cpu_rq(i);
|
||||||
|
|
||||||
|
rq->dl.extra_bw += bw;
|
||||||
|
}
|
||||||
|
}
|
||||||
#else
|
#else
|
||||||
static inline struct dl_bw *dl_bw_of(int i)
|
static inline struct dl_bw *dl_bw_of(int i)
|
||||||
{
|
{
|
||||||
|
@ -150,8 +161,37 @@ static inline bool dl_bw_visited(int cpu, u64 gen)
|
||||||
{
|
{
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline
|
||||||
|
void __dl_update(struct dl_bw *dl_b, s64 bw)
|
||||||
|
{
|
||||||
|
struct dl_rq *dl = container_of(dl_b, struct dl_rq, dl_bw);
|
||||||
|
|
||||||
|
dl->extra_bw += bw;
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
static inline
|
||||||
|
void __dl_sub(struct dl_bw *dl_b, u64 tsk_bw, int cpus)
|
||||||
|
{
|
||||||
|
dl_b->total_bw -= tsk_bw;
|
||||||
|
__dl_update(dl_b, (s32)tsk_bw / cpus);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline
|
||||||
|
void __dl_add(struct dl_bw *dl_b, u64 tsk_bw, int cpus)
|
||||||
|
{
|
||||||
|
dl_b->total_bw += tsk_bw;
|
||||||
|
__dl_update(dl_b, -((s32)tsk_bw / cpus));
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline bool
|
||||||
|
__dl_overflow(struct dl_bw *dl_b, unsigned long cap, u64 old_bw, u64 new_bw)
|
||||||
|
{
|
||||||
|
return dl_b->bw != -1 &&
|
||||||
|
cap_scale(dl_b->bw, cap) < dl_b->total_bw - old_bw + new_bw;
|
||||||
|
}
|
||||||
|
|
||||||
static inline
|
static inline
|
||||||
void __add_running_bw(u64 dl_bw, struct dl_rq *dl_rq)
|
void __add_running_bw(u64 dl_bw, struct dl_rq *dl_rq)
|
||||||
{
|
{
|
||||||
|
@ -408,7 +448,7 @@ static inline int is_leftmost(struct task_struct *p, struct dl_rq *dl_rq)
|
||||||
{
|
{
|
||||||
struct sched_dl_entity *dl_se = &p->dl;
|
struct sched_dl_entity *dl_se = &p->dl;
|
||||||
|
|
||||||
return dl_rq->root.rb_leftmost == &dl_se->rb_node;
|
return rb_first_cached(&dl_rq->root) == &dl_se->rb_node;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void init_dl_rq_bw_ratio(struct dl_rq *dl_rq);
|
static void init_dl_rq_bw_ratio(struct dl_rq *dl_rq);
|
||||||
|
@ -423,12 +463,10 @@ void init_dl_bandwidth(struct dl_bandwidth *dl_b, u64 period, u64 runtime)
|
||||||
void init_dl_bw(struct dl_bw *dl_b)
|
void init_dl_bw(struct dl_bw *dl_b)
|
||||||
{
|
{
|
||||||
raw_spin_lock_init(&dl_b->lock);
|
raw_spin_lock_init(&dl_b->lock);
|
||||||
raw_spin_lock(&def_dl_bandwidth.dl_runtime_lock);
|
|
||||||
if (global_rt_runtime() == RUNTIME_INF)
|
if (global_rt_runtime() == RUNTIME_INF)
|
||||||
dl_b->bw = -1;
|
dl_b->bw = -1;
|
||||||
else
|
else
|
||||||
dl_b->bw = to_ratio(global_rt_period(), global_rt_runtime());
|
dl_b->bw = to_ratio(global_rt_period(), global_rt_runtime());
|
||||||
raw_spin_unlock(&def_dl_bandwidth.dl_runtime_lock);
|
|
||||||
dl_b->total_bw = 0;
|
dl_b->total_bw = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -683,15 +721,6 @@ void dec_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline bool need_pull_dl_task(struct rq *rq, struct task_struct *prev)
|
|
||||||
{
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void pull_dl_task(struct rq *rq)
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void deadline_queue_push_tasks(struct rq *rq)
|
static inline void deadline_queue_push_tasks(struct rq *rq)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
@ -1393,6 +1422,9 @@ void init_dl_inactive_task_timer(struct sched_dl_entity *dl_se)
|
||||||
timer->function = inactive_task_timer;
|
timer->function = inactive_task_timer;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define __node_2_dle(node) \
|
||||||
|
rb_entry((node), struct sched_dl_entity, rb_node)
|
||||||
|
|
||||||
#ifdef CONFIG_SMP
|
#ifdef CONFIG_SMP
|
||||||
|
|
||||||
static void inc_dl_deadline(struct dl_rq *dl_rq, u64 deadline)
|
static void inc_dl_deadline(struct dl_rq *dl_rq, u64 deadline)
|
||||||
|
@ -1422,10 +1454,9 @@ static void dec_dl_deadline(struct dl_rq *dl_rq, u64 deadline)
|
||||||
cpudl_clear(&rq->rd->cpudl, rq->cpu);
|
cpudl_clear(&rq->rd->cpudl, rq->cpu);
|
||||||
cpupri_set(&rq->rd->cpupri, rq->cpu, rq->rt.highest_prio.curr);
|
cpupri_set(&rq->rd->cpupri, rq->cpu, rq->rt.highest_prio.curr);
|
||||||
} else {
|
} else {
|
||||||
struct rb_node *leftmost = dl_rq->root.rb_leftmost;
|
struct rb_node *leftmost = rb_first_cached(&dl_rq->root);
|
||||||
struct sched_dl_entity *entry;
|
struct sched_dl_entity *entry = __node_2_dle(leftmost);
|
||||||
|
|
||||||
entry = rb_entry(leftmost, struct sched_dl_entity, rb_node);
|
|
||||||
dl_rq->earliest_dl.curr = entry->deadline;
|
dl_rq->earliest_dl.curr = entry->deadline;
|
||||||
cpudl_set(&rq->rd->cpudl, rq->cpu, entry->deadline);
|
cpudl_set(&rq->rd->cpudl, rq->cpu, entry->deadline);
|
||||||
}
|
}
|
||||||
|
@ -1466,9 +1497,6 @@ void dec_dl_tasks(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
|
||||||
dec_dl_migration(dl_se, dl_rq);
|
dec_dl_migration(dl_se, dl_rq);
|
||||||
}
|
}
|
||||||
|
|
||||||
#define __node_2_dle(node) \
|
|
||||||
rb_entry((node), struct sched_dl_entity, rb_node)
|
|
||||||
|
|
||||||
static inline bool __dl_less(struct rb_node *a, const struct rb_node *b)
|
static inline bool __dl_less(struct rb_node *a, const struct rb_node *b)
|
||||||
{
|
{
|
||||||
return dl_time_before(__node_2_dle(a)->deadline, __node_2_dle(b)->deadline);
|
return dl_time_before(__node_2_dle(a)->deadline, __node_2_dle(b)->deadline);
|
||||||
|
@ -1931,15 +1959,14 @@ static void set_next_task_dl(struct rq *rq, struct task_struct *p, bool first)
|
||||||
deadline_queue_push_tasks(rq);
|
deadline_queue_push_tasks(rq);
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct sched_dl_entity *pick_next_dl_entity(struct rq *rq,
|
static struct sched_dl_entity *pick_next_dl_entity(struct dl_rq *dl_rq)
|
||||||
struct dl_rq *dl_rq)
|
|
||||||
{
|
{
|
||||||
struct rb_node *left = rb_first_cached(&dl_rq->root);
|
struct rb_node *left = rb_first_cached(&dl_rq->root);
|
||||||
|
|
||||||
if (!left)
|
if (!left)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
return rb_entry(left, struct sched_dl_entity, rb_node);
|
return __node_2_dle(left);
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct task_struct *pick_task_dl(struct rq *rq)
|
static struct task_struct *pick_task_dl(struct rq *rq)
|
||||||
|
@ -1951,7 +1978,7 @@ static struct task_struct *pick_task_dl(struct rq *rq)
|
||||||
if (!sched_dl_runnable(rq))
|
if (!sched_dl_runnable(rq))
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
dl_se = pick_next_dl_entity(rq, dl_rq);
|
dl_se = pick_next_dl_entity(dl_rq);
|
||||||
BUG_ON(!dl_se);
|
BUG_ON(!dl_se);
|
||||||
p = dl_task_of(dl_se);
|
p = dl_task_of(dl_se);
|
||||||
|
|
||||||
|
@ -2034,15 +2061,17 @@ static int pick_dl_task(struct rq *rq, struct task_struct *p, int cpu)
|
||||||
*/
|
*/
|
||||||
static struct task_struct *pick_earliest_pushable_dl_task(struct rq *rq, int cpu)
|
static struct task_struct *pick_earliest_pushable_dl_task(struct rq *rq, int cpu)
|
||||||
{
|
{
|
||||||
struct rb_node *next_node = rq->dl.pushable_dl_tasks_root.rb_leftmost;
|
|
||||||
struct task_struct *p = NULL;
|
struct task_struct *p = NULL;
|
||||||
|
struct rb_node *next_node;
|
||||||
|
|
||||||
if (!has_pushable_dl_tasks(rq))
|
if (!has_pushable_dl_tasks(rq))
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
|
next_node = rb_first_cached(&rq->dl.pushable_dl_tasks_root);
|
||||||
|
|
||||||
next_node:
|
next_node:
|
||||||
if (next_node) {
|
if (next_node) {
|
||||||
p = rb_entry(next_node, struct task_struct, pushable_dl_tasks);
|
p = __node_2_pdl(next_node);
|
||||||
|
|
||||||
if (pick_dl_task(rq, p, cpu))
|
if (pick_dl_task(rq, p, cpu))
|
||||||
return p;
|
return p;
|
||||||
|
@ -2208,8 +2237,7 @@ static struct task_struct *pick_next_pushable_dl_task(struct rq *rq)
|
||||||
if (!has_pushable_dl_tasks(rq))
|
if (!has_pushable_dl_tasks(rq))
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
p = rb_entry(rq->dl.pushable_dl_tasks_root.rb_leftmost,
|
p = __node_2_pdl(rb_first_cached(&rq->dl.pushable_dl_tasks_root));
|
||||||
struct task_struct, pushable_dl_tasks);
|
|
||||||
|
|
||||||
BUG_ON(rq->cpu != task_cpu(p));
|
BUG_ON(rq->cpu != task_cpu(p));
|
||||||
BUG_ON(task_current(rq, p));
|
BUG_ON(task_current(rq, p));
|
||||||
|
@ -2240,12 +2268,6 @@ static int push_dl_task(struct rq *rq)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
retry:
|
retry:
|
||||||
if (is_migration_disabled(next_task))
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
if (WARN_ON(next_task == rq->curr))
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If next_task preempts rq->curr, and rq->curr
|
* If next_task preempts rq->curr, and rq->curr
|
||||||
* can move away, it makes sense to just reschedule
|
* can move away, it makes sense to just reschedule
|
||||||
|
@ -2258,6 +2280,12 @@ static int push_dl_task(struct rq *rq)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (is_migration_disabled(next_task))
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
if (WARN_ON(next_task == rq->curr))
|
||||||
|
return 0;
|
||||||
|
|
||||||
/* We might release rq lock */
|
/* We might release rq lock */
|
||||||
get_task_struct(next_task);
|
get_task_struct(next_task);
|
||||||
|
|
||||||
|
@ -2731,9 +2759,6 @@ void sched_dl_do_global(void)
|
||||||
int cpu;
|
int cpu;
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
|
|
||||||
def_dl_bandwidth.dl_period = global_rt_period();
|
|
||||||
def_dl_bandwidth.dl_runtime = global_rt_runtime();
|
|
||||||
|
|
||||||
if (global_rt_runtime() != RUNTIME_INF)
|
if (global_rt_runtime() != RUNTIME_INF)
|
||||||
new_bw = to_ratio(global_rt_period(), global_rt_runtime());
|
new_bw = to_ratio(global_rt_period(), global_rt_runtime());
|
||||||
|
|
||||||
|
@ -2955,41 +2980,6 @@ bool dl_param_changed(struct task_struct *p, const struct sched_attr *attr)
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef CONFIG_SMP
|
#ifdef CONFIG_SMP
|
||||||
int dl_task_can_attach(struct task_struct *p, const struct cpumask *cs_cpus_allowed)
|
|
||||||
{
|
|
||||||
unsigned long flags, cap;
|
|
||||||
unsigned int dest_cpu;
|
|
||||||
struct dl_bw *dl_b;
|
|
||||||
bool overflow;
|
|
||||||
int ret;
|
|
||||||
|
|
||||||
dest_cpu = cpumask_any_and(cpu_active_mask, cs_cpus_allowed);
|
|
||||||
|
|
||||||
rcu_read_lock_sched();
|
|
||||||
dl_b = dl_bw_of(dest_cpu);
|
|
||||||
raw_spin_lock_irqsave(&dl_b->lock, flags);
|
|
||||||
cap = dl_bw_capacity(dest_cpu);
|
|
||||||
overflow = __dl_overflow(dl_b, cap, 0, p->dl.dl_bw);
|
|
||||||
if (overflow) {
|
|
||||||
ret = -EBUSY;
|
|
||||||
} else {
|
|
||||||
/*
|
|
||||||
* We reserve space for this task in the destination
|
|
||||||
* root_domain, as we can't fail after this point.
|
|
||||||
* We will free resources in the source root_domain
|
|
||||||
* later on (see set_cpus_allowed_dl()).
|
|
||||||
*/
|
|
||||||
int cpus = dl_bw_cpus(dest_cpu);
|
|
||||||
|
|
||||||
__dl_add(dl_b, p->dl.dl_bw, cpus);
|
|
||||||
ret = 0;
|
|
||||||
}
|
|
||||||
raw_spin_unlock_irqrestore(&dl_b->lock, flags);
|
|
||||||
rcu_read_unlock_sched();
|
|
||||||
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
int dl_cpuset_cpumask_can_shrink(const struct cpumask *cur,
|
int dl_cpuset_cpumask_can_shrink(const struct cpumask *cur,
|
||||||
const struct cpumask *trial)
|
const struct cpumask *trial)
|
||||||
{
|
{
|
||||||
|
@ -3011,7 +3001,7 @@ int dl_cpuset_cpumask_can_shrink(const struct cpumask *cur,
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool dl_cpu_busy(unsigned int cpu)
|
int dl_cpu_busy(int cpu, struct task_struct *p)
|
||||||
{
|
{
|
||||||
unsigned long flags, cap;
|
unsigned long flags, cap;
|
||||||
struct dl_bw *dl_b;
|
struct dl_bw *dl_b;
|
||||||
|
@ -3021,11 +3011,22 @@ bool dl_cpu_busy(unsigned int cpu)
|
||||||
dl_b = dl_bw_of(cpu);
|
dl_b = dl_bw_of(cpu);
|
||||||
raw_spin_lock_irqsave(&dl_b->lock, flags);
|
raw_spin_lock_irqsave(&dl_b->lock, flags);
|
||||||
cap = dl_bw_capacity(cpu);
|
cap = dl_bw_capacity(cpu);
|
||||||
overflow = __dl_overflow(dl_b, cap, 0, 0);
|
overflow = __dl_overflow(dl_b, cap, 0, p ? p->dl.dl_bw : 0);
|
||||||
|
|
||||||
|
if (!overflow && p) {
|
||||||
|
/*
|
||||||
|
* We reserve space for this task in the destination
|
||||||
|
* root_domain, as we can't fail after this point.
|
||||||
|
* We will free resources in the source root_domain
|
||||||
|
* later on (see set_cpus_allowed_dl()).
|
||||||
|
*/
|
||||||
|
__dl_add(dl_b, p->dl.dl_bw, dl_bw_cpus(cpu));
|
||||||
|
}
|
||||||
|
|
||||||
raw_spin_unlock_irqrestore(&dl_b->lock, flags);
|
raw_spin_unlock_irqrestore(&dl_b->lock, flags);
|
||||||
rcu_read_unlock_sched();
|
rcu_read_unlock_sched();
|
||||||
|
|
||||||
return overflow;
|
return overflow ? -EBUSY : 0;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -6,7 +6,6 @@
|
||||||
*
|
*
|
||||||
* Copyright(C) 2007, Red Hat, Inc., Ingo Molnar
|
* Copyright(C) 2007, Red Hat, Inc., Ingo Molnar
|
||||||
*/
|
*/
|
||||||
#include "sched.h"
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This allows printing both to /proc/sched_debug and
|
* This allows printing both to /proc/sched_debug and
|
||||||
|
@ -931,25 +930,15 @@ void print_numa_stats(struct seq_file *m, int node, unsigned long tsf,
|
||||||
static void sched_show_numa(struct task_struct *p, struct seq_file *m)
|
static void sched_show_numa(struct task_struct *p, struct seq_file *m)
|
||||||
{
|
{
|
||||||
#ifdef CONFIG_NUMA_BALANCING
|
#ifdef CONFIG_NUMA_BALANCING
|
||||||
struct mempolicy *pol;
|
|
||||||
|
|
||||||
if (p->mm)
|
if (p->mm)
|
||||||
P(mm->numa_scan_seq);
|
P(mm->numa_scan_seq);
|
||||||
|
|
||||||
task_lock(p);
|
|
||||||
pol = p->mempolicy;
|
|
||||||
if (pol && !(pol->flags & MPOL_F_MORON))
|
|
||||||
pol = NULL;
|
|
||||||
mpol_get(pol);
|
|
||||||
task_unlock(p);
|
|
||||||
|
|
||||||
P(numa_pages_migrated);
|
P(numa_pages_migrated);
|
||||||
P(numa_preferred_nid);
|
P(numa_preferred_nid);
|
||||||
P(total_numa_faults);
|
P(total_numa_faults);
|
||||||
SEQ_printf(m, "current_node=%d, numa_group_id=%d\n",
|
SEQ_printf(m, "current_node=%d, numa_group_id=%d\n",
|
||||||
task_node(p), task_numa_group_id(p));
|
task_node(p), task_numa_group_id(p));
|
||||||
show_numa_stats(p, m);
|
show_numa_stats(p, m);
|
||||||
mpol_put(pol);
|
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -20,7 +20,38 @@
|
||||||
* Adaptive scheduling granularity, math enhancements by Peter Zijlstra
|
* Adaptive scheduling granularity, math enhancements by Peter Zijlstra
|
||||||
* Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra
|
* Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra
|
||||||
*/
|
*/
|
||||||
|
#include <linux/energy_model.h>
|
||||||
|
#include <linux/mmap_lock.h>
|
||||||
|
#include <linux/hugetlb_inline.h>
|
||||||
|
#include <linux/jiffies.h>
|
||||||
|
#include <linux/mm_api.h>
|
||||||
|
#include <linux/highmem.h>
|
||||||
|
#include <linux/spinlock_api.h>
|
||||||
|
#include <linux/cpumask_api.h>
|
||||||
|
#include <linux/lockdep_api.h>
|
||||||
|
#include <linux/softirq.h>
|
||||||
|
#include <linux/refcount_api.h>
|
||||||
|
#include <linux/topology.h>
|
||||||
|
#include <linux/sched/clock.h>
|
||||||
|
#include <linux/sched/cond_resched.h>
|
||||||
|
#include <linux/sched/cputime.h>
|
||||||
|
#include <linux/sched/isolation.h>
|
||||||
|
|
||||||
|
#include <linux/cpuidle.h>
|
||||||
|
#include <linux/interrupt.h>
|
||||||
|
#include <linux/mempolicy.h>
|
||||||
|
#include <linux/mutex_api.h>
|
||||||
|
#include <linux/profile.h>
|
||||||
|
#include <linux/psi.h>
|
||||||
|
#include <linux/ratelimit.h>
|
||||||
|
|
||||||
|
#include <asm/switch_to.h>
|
||||||
|
|
||||||
|
#include <linux/sched/cond_resched.h>
|
||||||
|
|
||||||
#include "sched.h"
|
#include "sched.h"
|
||||||
|
#include "stats.h"
|
||||||
|
#include "autogroup.h"
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Targeted preemption latency for CPU-bound tasks:
|
* Targeted preemption latency for CPU-bound tasks:
|
||||||
|
@ -1259,10 +1290,10 @@ static bool numa_is_active_node(int nid, struct numa_group *ng)
|
||||||
|
|
||||||
/* Handle placement on systems where not all nodes are directly connected. */
|
/* Handle placement on systems where not all nodes are directly connected. */
|
||||||
static unsigned long score_nearby_nodes(struct task_struct *p, int nid,
|
static unsigned long score_nearby_nodes(struct task_struct *p, int nid,
|
||||||
int maxdist, bool task)
|
int lim_dist, bool task)
|
||||||
{
|
{
|
||||||
unsigned long score = 0;
|
unsigned long score = 0;
|
||||||
int node;
|
int node, max_dist;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* All nodes are directly connected, and the same distance
|
* All nodes are directly connected, and the same distance
|
||||||
|
@ -1271,6 +1302,8 @@ static unsigned long score_nearby_nodes(struct task_struct *p, int nid,
|
||||||
if (sched_numa_topology_type == NUMA_DIRECT)
|
if (sched_numa_topology_type == NUMA_DIRECT)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
|
/* sched_max_numa_distance may be changed in parallel. */
|
||||||
|
max_dist = READ_ONCE(sched_max_numa_distance);
|
||||||
/*
|
/*
|
||||||
* This code is called for each node, introducing N^2 complexity,
|
* This code is called for each node, introducing N^2 complexity,
|
||||||
* which should be ok given the number of nodes rarely exceeds 8.
|
* which should be ok given the number of nodes rarely exceeds 8.
|
||||||
|
@ -1283,7 +1316,7 @@ static unsigned long score_nearby_nodes(struct task_struct *p, int nid,
|
||||||
* The furthest away nodes in the system are not interesting
|
* The furthest away nodes in the system are not interesting
|
||||||
* for placement; nid was already counted.
|
* for placement; nid was already counted.
|
||||||
*/
|
*/
|
||||||
if (dist == sched_max_numa_distance || node == nid)
|
if (dist >= max_dist || node == nid)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -1293,8 +1326,7 @@ static unsigned long score_nearby_nodes(struct task_struct *p, int nid,
|
||||||
* "hoplimit", only nodes closer by than "hoplimit" are part
|
* "hoplimit", only nodes closer by than "hoplimit" are part
|
||||||
* of each group. Skip other nodes.
|
* of each group. Skip other nodes.
|
||||||
*/
|
*/
|
||||||
if (sched_numa_topology_type == NUMA_BACKPLANE &&
|
if (sched_numa_topology_type == NUMA_BACKPLANE && dist >= lim_dist)
|
||||||
dist >= maxdist)
|
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
/* Add up the faults from nearby nodes. */
|
/* Add up the faults from nearby nodes. */
|
||||||
|
@ -1312,8 +1344,8 @@ static unsigned long score_nearby_nodes(struct task_struct *p, int nid,
|
||||||
* This seems to result in good task placement.
|
* This seems to result in good task placement.
|
||||||
*/
|
*/
|
||||||
if (sched_numa_topology_type == NUMA_GLUELESS_MESH) {
|
if (sched_numa_topology_type == NUMA_GLUELESS_MESH) {
|
||||||
faults *= (sched_max_numa_distance - dist);
|
faults *= (max_dist - dist);
|
||||||
faults /= (sched_max_numa_distance - LOCAL_DISTANCE);
|
faults /= (max_dist - LOCAL_DISTANCE);
|
||||||
}
|
}
|
||||||
|
|
||||||
score += faults;
|
score += faults;
|
||||||
|
@ -1489,6 +1521,7 @@ struct task_numa_env {
|
||||||
|
|
||||||
int src_cpu, src_nid;
|
int src_cpu, src_nid;
|
||||||
int dst_cpu, dst_nid;
|
int dst_cpu, dst_nid;
|
||||||
|
int imb_numa_nr;
|
||||||
|
|
||||||
struct numa_stats src_stats, dst_stats;
|
struct numa_stats src_stats, dst_stats;
|
||||||
|
|
||||||
|
@ -1503,7 +1536,7 @@ struct task_numa_env {
|
||||||
static unsigned long cpu_load(struct rq *rq);
|
static unsigned long cpu_load(struct rq *rq);
|
||||||
static unsigned long cpu_runnable(struct rq *rq);
|
static unsigned long cpu_runnable(struct rq *rq);
|
||||||
static inline long adjust_numa_imbalance(int imbalance,
|
static inline long adjust_numa_imbalance(int imbalance,
|
||||||
int dst_running, int dst_weight);
|
int dst_running, int imb_numa_nr);
|
||||||
|
|
||||||
static inline enum
|
static inline enum
|
||||||
numa_type numa_classify(unsigned int imbalance_pct,
|
numa_type numa_classify(unsigned int imbalance_pct,
|
||||||
|
@ -1884,7 +1917,7 @@ static void task_numa_find_cpu(struct task_numa_env *env,
|
||||||
dst_running = env->dst_stats.nr_running + 1;
|
dst_running = env->dst_stats.nr_running + 1;
|
||||||
imbalance = max(0, dst_running - src_running);
|
imbalance = max(0, dst_running - src_running);
|
||||||
imbalance = adjust_numa_imbalance(imbalance, dst_running,
|
imbalance = adjust_numa_imbalance(imbalance, dst_running,
|
||||||
env->dst_stats.weight);
|
env->imb_numa_nr);
|
||||||
|
|
||||||
/* Use idle CPU if there is no imbalance */
|
/* Use idle CPU if there is no imbalance */
|
||||||
if (!imbalance) {
|
if (!imbalance) {
|
||||||
|
@ -1949,8 +1982,10 @@ static int task_numa_migrate(struct task_struct *p)
|
||||||
*/
|
*/
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
sd = rcu_dereference(per_cpu(sd_numa, env.src_cpu));
|
sd = rcu_dereference(per_cpu(sd_numa, env.src_cpu));
|
||||||
if (sd)
|
if (sd) {
|
||||||
env.imbalance_pct = 100 + (sd->imbalance_pct - 100) / 2;
|
env.imbalance_pct = 100 + (sd->imbalance_pct - 100) / 2;
|
||||||
|
env.imb_numa_nr = sd->imb_numa_nr;
|
||||||
|
}
|
||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -1985,7 +2020,7 @@ static int task_numa_migrate(struct task_struct *p)
|
||||||
*/
|
*/
|
||||||
ng = deref_curr_numa_group(p);
|
ng = deref_curr_numa_group(p);
|
||||||
if (env.best_cpu == -1 || (ng && ng->active_nodes > 1)) {
|
if (env.best_cpu == -1 || (ng && ng->active_nodes > 1)) {
|
||||||
for_each_online_node(nid) {
|
for_each_node_state(nid, N_CPU) {
|
||||||
if (nid == env.src_nid || nid == p->numa_preferred_nid)
|
if (nid == env.src_nid || nid == p->numa_preferred_nid)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
|
@ -2083,13 +2118,13 @@ static void numa_group_count_active_nodes(struct numa_group *numa_group)
|
||||||
unsigned long faults, max_faults = 0;
|
unsigned long faults, max_faults = 0;
|
||||||
int nid, active_nodes = 0;
|
int nid, active_nodes = 0;
|
||||||
|
|
||||||
for_each_online_node(nid) {
|
for_each_node_state(nid, N_CPU) {
|
||||||
faults = group_faults_cpu(numa_group, nid);
|
faults = group_faults_cpu(numa_group, nid);
|
||||||
if (faults > max_faults)
|
if (faults > max_faults)
|
||||||
max_faults = faults;
|
max_faults = faults;
|
||||||
}
|
}
|
||||||
|
|
||||||
for_each_online_node(nid) {
|
for_each_node_state(nid, N_CPU) {
|
||||||
faults = group_faults_cpu(numa_group, nid);
|
faults = group_faults_cpu(numa_group, nid);
|
||||||
if (faults * ACTIVE_NODE_FRACTION > max_faults)
|
if (faults * ACTIVE_NODE_FRACTION > max_faults)
|
||||||
active_nodes++;
|
active_nodes++;
|
||||||
|
@ -2243,7 +2278,7 @@ static int preferred_group_nid(struct task_struct *p, int nid)
|
||||||
|
|
||||||
dist = sched_max_numa_distance;
|
dist = sched_max_numa_distance;
|
||||||
|
|
||||||
for_each_online_node(node) {
|
for_each_node_state(node, N_CPU) {
|
||||||
score = group_weight(p, node, dist);
|
score = group_weight(p, node, dist);
|
||||||
if (score > max_score) {
|
if (score > max_score) {
|
||||||
max_score = score;
|
max_score = score;
|
||||||
|
@ -2262,7 +2297,7 @@ static int preferred_group_nid(struct task_struct *p, int nid)
|
||||||
* inside the highest scoring group of nodes. The nodemask tricks
|
* inside the highest scoring group of nodes. The nodemask tricks
|
||||||
* keep the complexity of the search down.
|
* keep the complexity of the search down.
|
||||||
*/
|
*/
|
||||||
nodes = node_online_map;
|
nodes = node_states[N_CPU];
|
||||||
for (dist = sched_max_numa_distance; dist > LOCAL_DISTANCE; dist--) {
|
for (dist = sched_max_numa_distance; dist > LOCAL_DISTANCE; dist--) {
|
||||||
unsigned long max_faults = 0;
|
unsigned long max_faults = 0;
|
||||||
nodemask_t max_group = NODE_MASK_NONE;
|
nodemask_t max_group = NODE_MASK_NONE;
|
||||||
|
@ -2401,6 +2436,21 @@ static void task_numa_placement(struct task_struct *p)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Cannot migrate task to CPU-less node */
|
||||||
|
if (max_nid != NUMA_NO_NODE && !node_state(max_nid, N_CPU)) {
|
||||||
|
int near_nid = max_nid;
|
||||||
|
int distance, near_distance = INT_MAX;
|
||||||
|
|
||||||
|
for_each_node_state(nid, N_CPU) {
|
||||||
|
distance = node_distance(max_nid, nid);
|
||||||
|
if (distance < near_distance) {
|
||||||
|
near_nid = nid;
|
||||||
|
near_distance = distance;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
max_nid = near_nid;
|
||||||
|
}
|
||||||
|
|
||||||
if (ng) {
|
if (ng) {
|
||||||
numa_group_count_active_nodes(ng);
|
numa_group_count_active_nodes(ng);
|
||||||
spin_unlock_irq(group_lock);
|
spin_unlock_irq(group_lock);
|
||||||
|
@ -2825,6 +2875,8 @@ void init_numa_balancing(unsigned long clone_flags, struct task_struct *p)
|
||||||
/* Protect against double add, see task_tick_numa and task_numa_work */
|
/* Protect against double add, see task_tick_numa and task_numa_work */
|
||||||
p->numa_work.next = &p->numa_work;
|
p->numa_work.next = &p->numa_work;
|
||||||
p->numa_faults = NULL;
|
p->numa_faults = NULL;
|
||||||
|
p->numa_pages_migrated = 0;
|
||||||
|
p->total_numa_faults = 0;
|
||||||
RCU_INIT_POINTER(p->numa_group, NULL);
|
RCU_INIT_POINTER(p->numa_group, NULL);
|
||||||
p->last_task_numa_placement = 0;
|
p->last_task_numa_placement = 0;
|
||||||
p->last_sum_exec_runtime = 0;
|
p->last_sum_exec_runtime = 0;
|
||||||
|
@ -9040,9 +9092,9 @@ static bool update_pick_idlest(struct sched_group *idlest,
|
||||||
* This is an approximation as the number of running tasks may not be
|
* This is an approximation as the number of running tasks may not be
|
||||||
* related to the number of busy CPUs due to sched_setaffinity.
|
* related to the number of busy CPUs due to sched_setaffinity.
|
||||||
*/
|
*/
|
||||||
static inline bool allow_numa_imbalance(int dst_running, int dst_weight)
|
static inline bool allow_numa_imbalance(int running, int imb_numa_nr)
|
||||||
{
|
{
|
||||||
return (dst_running < (dst_weight >> 2));
|
return running <= imb_numa_nr;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -9176,12 +9228,13 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu)
|
||||||
return idlest;
|
return idlest;
|
||||||
#endif
|
#endif
|
||||||
/*
|
/*
|
||||||
* Otherwise, keep the task on this node to stay close
|
* Otherwise, keep the task close to the wakeup source
|
||||||
* its wakeup source and improve locality. If there is
|
* and improve locality if the number of running tasks
|
||||||
* a real need of migration, periodic load balance will
|
* would remain below threshold where an imbalance is
|
||||||
* take care of it.
|
* allowed. If there is a real need of migration,
|
||||||
|
* periodic load balance will take care of it.
|
||||||
*/
|
*/
|
||||||
if (allow_numa_imbalance(local_sgs.sum_nr_running, sd->span_weight))
|
if (allow_numa_imbalance(local_sgs.sum_nr_running + 1, sd->imb_numa_nr))
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -9273,9 +9326,9 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd
|
||||||
#define NUMA_IMBALANCE_MIN 2
|
#define NUMA_IMBALANCE_MIN 2
|
||||||
|
|
||||||
static inline long adjust_numa_imbalance(int imbalance,
|
static inline long adjust_numa_imbalance(int imbalance,
|
||||||
int dst_running, int dst_weight)
|
int dst_running, int imb_numa_nr)
|
||||||
{
|
{
|
||||||
if (!allow_numa_imbalance(dst_running, dst_weight))
|
if (!allow_numa_imbalance(dst_running, imb_numa_nr))
|
||||||
return imbalance;
|
return imbalance;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -9387,7 +9440,7 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
|
||||||
/* Consider allowing a small imbalance between NUMA groups */
|
/* Consider allowing a small imbalance between NUMA groups */
|
||||||
if (env->sd->flags & SD_NUMA) {
|
if (env->sd->flags & SD_NUMA) {
|
||||||
env->imbalance = adjust_numa_imbalance(env->imbalance,
|
env->imbalance = adjust_numa_imbalance(env->imbalance,
|
||||||
busiest->sum_nr_running, busiest->group_weight);
|
local->sum_nr_running + 1, env->sd->imb_numa_nr);
|
||||||
}
|
}
|
||||||
|
|
||||||
return;
|
return;
|
||||||
|
@ -10351,7 +10404,7 @@ static inline int on_null_domain(struct rq *rq)
|
||||||
* - When one of the busy CPUs notice that there may be an idle rebalancing
|
* - When one of the busy CPUs notice that there may be an idle rebalancing
|
||||||
* needed, they will kick the idle load balancer, which then does idle
|
* needed, they will kick the idle load balancer, which then does idle
|
||||||
* load balancing for all the idle CPUs.
|
* load balancing for all the idle CPUs.
|
||||||
* - HK_FLAG_MISC CPUs are used for this task, because HK_FLAG_SCHED not set
|
* - HK_TYPE_MISC CPUs are used for this task, because HK_TYPE_SCHED not set
|
||||||
* anywhere yet.
|
* anywhere yet.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
@ -10360,7 +10413,7 @@ static inline int find_new_ilb(void)
|
||||||
int ilb;
|
int ilb;
|
||||||
const struct cpumask *hk_mask;
|
const struct cpumask *hk_mask;
|
||||||
|
|
||||||
hk_mask = housekeeping_cpumask(HK_FLAG_MISC);
|
hk_mask = housekeeping_cpumask(HK_TYPE_MISC);
|
||||||
|
|
||||||
for_each_cpu_and(ilb, nohz.idle_cpus_mask, hk_mask) {
|
for_each_cpu_and(ilb, nohz.idle_cpus_mask, hk_mask) {
|
||||||
|
|
||||||
|
@ -10376,7 +10429,7 @@ static inline int find_new_ilb(void)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Kick a CPU to do the nohz balancing, if it is time for it. We pick any
|
* Kick a CPU to do the nohz balancing, if it is time for it. We pick any
|
||||||
* idle CPU in the HK_FLAG_MISC housekeeping set (if there is one).
|
* idle CPU in the HK_TYPE_MISC housekeeping set (if there is one).
|
||||||
*/
|
*/
|
||||||
static void kick_ilb(unsigned int flags)
|
static void kick_ilb(unsigned int flags)
|
||||||
{
|
{
|
||||||
|
@ -10589,7 +10642,7 @@ void nohz_balance_enter_idle(int cpu)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* Spare idle load balancing on CPUs that don't want to be disturbed: */
|
/* Spare idle load balancing on CPUs that don't want to be disturbed: */
|
||||||
if (!housekeeping_cpu(cpu, HK_FLAG_SCHED))
|
if (!housekeeping_cpu(cpu, HK_TYPE_SCHED))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -10805,7 +10858,7 @@ static void nohz_newidle_balance(struct rq *this_rq)
|
||||||
* This CPU doesn't want to be disturbed by scheduler
|
* This CPU doesn't want to be disturbed by scheduler
|
||||||
* housekeeping
|
* housekeeping
|
||||||
*/
|
*/
|
||||||
if (!housekeeping_cpu(this_cpu, HK_FLAG_SCHED))
|
if (!housekeeping_cpu(this_cpu, HK_TYPE_SCHED))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* Will wake up very soon. No time for doing anything else*/
|
/* Will wake up very soon. No time for doing anything else*/
|
||||||
|
|
|
@ -6,9 +6,6 @@
|
||||||
* (NOTE: these are not related to SCHED_IDLE batch scheduled
|
* (NOTE: these are not related to SCHED_IDLE batch scheduled
|
||||||
* tasks which are handled in sched/fair.c )
|
* tasks which are handled in sched/fair.c )
|
||||||
*/
|
*/
|
||||||
#include "sched.h"
|
|
||||||
|
|
||||||
#include <trace/events/power.h>
|
|
||||||
|
|
||||||
/* Linker adds these: start and end of __cpuidle functions */
|
/* Linker adds these: start and end of __cpuidle functions */
|
||||||
extern char __cpuidle_text_start[], __cpuidle_text_end[];
|
extern char __cpuidle_text_start[], __cpuidle_text_end[];
|
||||||
|
|
|
@ -7,136 +7,179 @@
|
||||||
* Copyright (C) 2017-2018 SUSE, Frederic Weisbecker
|
* Copyright (C) 2017-2018 SUSE, Frederic Weisbecker
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
#include "sched.h"
|
|
||||||
|
enum hk_flags {
|
||||||
|
HK_FLAG_TIMER = BIT(HK_TYPE_TIMER),
|
||||||
|
HK_FLAG_RCU = BIT(HK_TYPE_RCU),
|
||||||
|
HK_FLAG_MISC = BIT(HK_TYPE_MISC),
|
||||||
|
HK_FLAG_SCHED = BIT(HK_TYPE_SCHED),
|
||||||
|
HK_FLAG_TICK = BIT(HK_TYPE_TICK),
|
||||||
|
HK_FLAG_DOMAIN = BIT(HK_TYPE_DOMAIN),
|
||||||
|
HK_FLAG_WQ = BIT(HK_TYPE_WQ),
|
||||||
|
HK_FLAG_MANAGED_IRQ = BIT(HK_TYPE_MANAGED_IRQ),
|
||||||
|
HK_FLAG_KTHREAD = BIT(HK_TYPE_KTHREAD),
|
||||||
|
};
|
||||||
|
|
||||||
DEFINE_STATIC_KEY_FALSE(housekeeping_overridden);
|
DEFINE_STATIC_KEY_FALSE(housekeeping_overridden);
|
||||||
EXPORT_SYMBOL_GPL(housekeeping_overridden);
|
EXPORT_SYMBOL_GPL(housekeeping_overridden);
|
||||||
static cpumask_var_t housekeeping_mask;
|
|
||||||
static unsigned int housekeeping_flags;
|
|
||||||
|
|
||||||
bool housekeeping_enabled(enum hk_flags flags)
|
struct housekeeping {
|
||||||
|
cpumask_var_t cpumasks[HK_TYPE_MAX];
|
||||||
|
unsigned long flags;
|
||||||
|
};
|
||||||
|
|
||||||
|
static struct housekeeping housekeeping;
|
||||||
|
|
||||||
|
bool housekeeping_enabled(enum hk_type type)
|
||||||
{
|
{
|
||||||
return !!(housekeeping_flags & flags);
|
return !!(housekeeping.flags & BIT(type));
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(housekeeping_enabled);
|
EXPORT_SYMBOL_GPL(housekeeping_enabled);
|
||||||
|
|
||||||
int housekeeping_any_cpu(enum hk_flags flags)
|
int housekeeping_any_cpu(enum hk_type type)
|
||||||
{
|
{
|
||||||
int cpu;
|
int cpu;
|
||||||
|
|
||||||
if (static_branch_unlikely(&housekeeping_overridden)) {
|
if (static_branch_unlikely(&housekeeping_overridden)) {
|
||||||
if (housekeeping_flags & flags) {
|
if (housekeeping.flags & BIT(type)) {
|
||||||
cpu = sched_numa_find_closest(housekeeping_mask, smp_processor_id());
|
cpu = sched_numa_find_closest(housekeeping.cpumasks[type], smp_processor_id());
|
||||||
if (cpu < nr_cpu_ids)
|
if (cpu < nr_cpu_ids)
|
||||||
return cpu;
|
return cpu;
|
||||||
|
|
||||||
return cpumask_any_and(housekeeping_mask, cpu_online_mask);
|
return cpumask_any_and(housekeeping.cpumasks[type], cpu_online_mask);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return smp_processor_id();
|
return smp_processor_id();
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(housekeeping_any_cpu);
|
EXPORT_SYMBOL_GPL(housekeeping_any_cpu);
|
||||||
|
|
||||||
const struct cpumask *housekeeping_cpumask(enum hk_flags flags)
|
const struct cpumask *housekeeping_cpumask(enum hk_type type)
|
||||||
{
|
{
|
||||||
if (static_branch_unlikely(&housekeeping_overridden))
|
if (static_branch_unlikely(&housekeeping_overridden))
|
||||||
if (housekeeping_flags & flags)
|
if (housekeeping.flags & BIT(type))
|
||||||
return housekeeping_mask;
|
return housekeeping.cpumasks[type];
|
||||||
return cpu_possible_mask;
|
return cpu_possible_mask;
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(housekeeping_cpumask);
|
EXPORT_SYMBOL_GPL(housekeeping_cpumask);
|
||||||
|
|
||||||
void housekeeping_affine(struct task_struct *t, enum hk_flags flags)
|
void housekeeping_affine(struct task_struct *t, enum hk_type type)
|
||||||
{
|
{
|
||||||
if (static_branch_unlikely(&housekeeping_overridden))
|
if (static_branch_unlikely(&housekeeping_overridden))
|
||||||
if (housekeeping_flags & flags)
|
if (housekeeping.flags & BIT(type))
|
||||||
set_cpus_allowed_ptr(t, housekeeping_mask);
|
set_cpus_allowed_ptr(t, housekeeping.cpumasks[type]);
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(housekeeping_affine);
|
EXPORT_SYMBOL_GPL(housekeeping_affine);
|
||||||
|
|
||||||
bool housekeeping_test_cpu(int cpu, enum hk_flags flags)
|
bool housekeeping_test_cpu(int cpu, enum hk_type type)
|
||||||
{
|
{
|
||||||
if (static_branch_unlikely(&housekeeping_overridden))
|
if (static_branch_unlikely(&housekeeping_overridden))
|
||||||
if (housekeeping_flags & flags)
|
if (housekeeping.flags & BIT(type))
|
||||||
return cpumask_test_cpu(cpu, housekeeping_mask);
|
return cpumask_test_cpu(cpu, housekeeping.cpumasks[type]);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(housekeeping_test_cpu);
|
EXPORT_SYMBOL_GPL(housekeeping_test_cpu);
|
||||||
|
|
||||||
void __init housekeeping_init(void)
|
void __init housekeeping_init(void)
|
||||||
{
|
{
|
||||||
if (!housekeeping_flags)
|
enum hk_type type;
|
||||||
|
|
||||||
|
if (!housekeeping.flags)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
static_branch_enable(&housekeeping_overridden);
|
static_branch_enable(&housekeeping_overridden);
|
||||||
|
|
||||||
if (housekeeping_flags & HK_FLAG_TICK)
|
if (housekeeping.flags & HK_FLAG_TICK)
|
||||||
sched_tick_offload_init();
|
sched_tick_offload_init();
|
||||||
|
|
||||||
/* We need at least one CPU to handle housekeeping work */
|
for_each_set_bit(type, &housekeeping.flags, HK_TYPE_MAX) {
|
||||||
WARN_ON_ONCE(cpumask_empty(housekeeping_mask));
|
/* We need at least one CPU to handle housekeeping work */
|
||||||
|
WARN_ON_ONCE(cpumask_empty(housekeeping.cpumasks[type]));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static int __init housekeeping_setup(char *str, enum hk_flags flags)
|
static void __init housekeeping_setup_type(enum hk_type type,
|
||||||
|
cpumask_var_t housekeeping_staging)
|
||||||
{
|
{
|
||||||
cpumask_var_t non_housekeeping_mask;
|
|
||||||
cpumask_var_t tmp;
|
alloc_bootmem_cpumask_var(&housekeeping.cpumasks[type]);
|
||||||
|
cpumask_copy(housekeeping.cpumasks[type],
|
||||||
|
housekeeping_staging);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int __init housekeeping_setup(char *str, unsigned long flags)
|
||||||
|
{
|
||||||
|
cpumask_var_t non_housekeeping_mask, housekeeping_staging;
|
||||||
|
int err = 0;
|
||||||
|
|
||||||
|
if ((flags & HK_FLAG_TICK) && !(housekeeping.flags & HK_FLAG_TICK)) {
|
||||||
|
if (!IS_ENABLED(CONFIG_NO_HZ_FULL)) {
|
||||||
|
pr_warn("Housekeeping: nohz unsupported."
|
||||||
|
" Build with CONFIG_NO_HZ_FULL\n");
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
alloc_bootmem_cpumask_var(&non_housekeeping_mask);
|
alloc_bootmem_cpumask_var(&non_housekeeping_mask);
|
||||||
if (cpulist_parse(str, non_housekeeping_mask) < 0) {
|
if (cpulist_parse(str, non_housekeeping_mask) < 0) {
|
||||||
pr_warn("Housekeeping: nohz_full= or isolcpus= incorrect CPU range\n");
|
pr_warn("Housekeeping: nohz_full= or isolcpus= incorrect CPU range\n");
|
||||||
free_bootmem_cpumask_var(non_housekeeping_mask);
|
goto free_non_housekeeping_mask;
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
alloc_bootmem_cpumask_var(&tmp);
|
alloc_bootmem_cpumask_var(&housekeeping_staging);
|
||||||
if (!housekeeping_flags) {
|
cpumask_andnot(housekeeping_staging,
|
||||||
alloc_bootmem_cpumask_var(&housekeeping_mask);
|
cpu_possible_mask, non_housekeeping_mask);
|
||||||
cpumask_andnot(housekeeping_mask,
|
|
||||||
cpu_possible_mask, non_housekeeping_mask);
|
|
||||||
|
|
||||||
cpumask_andnot(tmp, cpu_present_mask, non_housekeeping_mask);
|
if (!cpumask_intersects(cpu_present_mask, housekeeping_staging)) {
|
||||||
if (cpumask_empty(tmp)) {
|
__cpumask_set_cpu(smp_processor_id(), housekeeping_staging);
|
||||||
|
__cpumask_clear_cpu(smp_processor_id(), non_housekeeping_mask);
|
||||||
|
if (!housekeeping.flags) {
|
||||||
pr_warn("Housekeeping: must include one present CPU, "
|
pr_warn("Housekeeping: must include one present CPU, "
|
||||||
"using boot CPU:%d\n", smp_processor_id());
|
"using boot CPU:%d\n", smp_processor_id());
|
||||||
__cpumask_set_cpu(smp_processor_id(), housekeeping_mask);
|
|
||||||
__cpumask_clear_cpu(smp_processor_id(), non_housekeeping_mask);
|
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!housekeeping.flags) {
|
||||||
|
/* First setup call ("nohz_full=" or "isolcpus=") */
|
||||||
|
enum hk_type type;
|
||||||
|
|
||||||
|
for_each_set_bit(type, &flags, HK_TYPE_MAX)
|
||||||
|
housekeeping_setup_type(type, housekeeping_staging);
|
||||||
} else {
|
} else {
|
||||||
cpumask_andnot(tmp, cpu_present_mask, non_housekeeping_mask);
|
/* Second setup call ("nohz_full=" after "isolcpus=" or the reverse) */
|
||||||
if (cpumask_empty(tmp))
|
enum hk_type type;
|
||||||
__cpumask_clear_cpu(smp_processor_id(), non_housekeeping_mask);
|
unsigned long iter_flags = flags & housekeeping.flags;
|
||||||
cpumask_andnot(tmp, cpu_possible_mask, non_housekeeping_mask);
|
|
||||||
if (!cpumask_equal(tmp, housekeeping_mask)) {
|
|
||||||
pr_warn("Housekeeping: nohz_full= must match isolcpus=\n");
|
|
||||||
free_bootmem_cpumask_var(tmp);
|
|
||||||
free_bootmem_cpumask_var(non_housekeeping_mask);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
free_bootmem_cpumask_var(tmp);
|
|
||||||
|
|
||||||
if ((flags & HK_FLAG_TICK) && !(housekeeping_flags & HK_FLAG_TICK)) {
|
for_each_set_bit(type, &iter_flags, HK_TYPE_MAX) {
|
||||||
if (IS_ENABLED(CONFIG_NO_HZ_FULL)) {
|
if (!cpumask_equal(housekeeping_staging,
|
||||||
tick_nohz_full_setup(non_housekeeping_mask);
|
housekeeping.cpumasks[type])) {
|
||||||
} else {
|
pr_warn("Housekeeping: nohz_full= must match isolcpus=\n");
|
||||||
pr_warn("Housekeeping: nohz unsupported."
|
goto free_housekeeping_staging;
|
||||||
" Build with CONFIG_NO_HZ_FULL\n");
|
}
|
||||||
free_bootmem_cpumask_var(non_housekeeping_mask);
|
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
iter_flags = flags & ~housekeeping.flags;
|
||||||
|
|
||||||
|
for_each_set_bit(type, &iter_flags, HK_TYPE_MAX)
|
||||||
|
housekeeping_setup_type(type, housekeeping_staging);
|
||||||
}
|
}
|
||||||
|
|
||||||
housekeeping_flags |= flags;
|
if ((flags & HK_FLAG_TICK) && !(housekeeping.flags & HK_FLAG_TICK))
|
||||||
|
tick_nohz_full_setup(non_housekeeping_mask);
|
||||||
|
|
||||||
|
housekeeping.flags |= flags;
|
||||||
|
err = 1;
|
||||||
|
|
||||||
|
free_housekeeping_staging:
|
||||||
|
free_bootmem_cpumask_var(housekeeping_staging);
|
||||||
|
free_non_housekeeping_mask:
|
||||||
free_bootmem_cpumask_var(non_housekeeping_mask);
|
free_bootmem_cpumask_var(non_housekeeping_mask);
|
||||||
|
|
||||||
return 1;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int __init housekeeping_nohz_full_setup(char *str)
|
static int __init housekeeping_nohz_full_setup(char *str)
|
||||||
{
|
{
|
||||||
unsigned int flags;
|
unsigned long flags;
|
||||||
|
|
||||||
flags = HK_FLAG_TICK | HK_FLAG_WQ | HK_FLAG_TIMER | HK_FLAG_RCU |
|
flags = HK_FLAG_TICK | HK_FLAG_WQ | HK_FLAG_TIMER | HK_FLAG_RCU |
|
||||||
HK_FLAG_MISC | HK_FLAG_KTHREAD;
|
HK_FLAG_MISC | HK_FLAG_KTHREAD;
|
||||||
|
@ -147,7 +190,7 @@ __setup("nohz_full=", housekeeping_nohz_full_setup);
|
||||||
|
|
||||||
static int __init housekeeping_isolcpus_setup(char *str)
|
static int __init housekeeping_isolcpus_setup(char *str)
|
||||||
{
|
{
|
||||||
unsigned int flags = 0;
|
unsigned long flags = 0;
|
||||||
bool illegal = false;
|
bool illegal = false;
|
||||||
char *par;
|
char *par;
|
||||||
int len;
|
int len;
|
||||||
|
|
|
@ -6,7 +6,6 @@
|
||||||
* figure. Its a silly number but people think its important. We go through
|
* figure. Its a silly number but people think its important. We go through
|
||||||
* great pains to make it work on big machines and tickless kernels.
|
* great pains to make it work on big machines and tickless kernels.
|
||||||
*/
|
*/
|
||||||
#include "sched.h"
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Global load-average calculations
|
* Global load-average calculations
|
||||||
|
|
|
@ -4,7 +4,6 @@
|
||||||
*
|
*
|
||||||
* membarrier system call
|
* membarrier system call
|
||||||
*/
|
*/
|
||||||
#include "sched.h"
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* For documentation purposes, here are some membarrier ordering
|
* For documentation purposes, here are some membarrier ordering
|
||||||
|
|
|
@ -24,10 +24,6 @@
|
||||||
* Author: Vincent Guittot <vincent.guittot@linaro.org>
|
* Author: Vincent Guittot <vincent.guittot@linaro.org>
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <linux/sched.h>
|
|
||||||
#include "sched.h"
|
|
||||||
#include "pelt.h"
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Approximate:
|
* Approximate:
|
||||||
* val * y^n, where y^32 ~= 0.5 (~1 scheduling period)
|
* val * y^n, where y^32 ~= 0.5 (~1 scheduling period)
|
||||||
|
|
|
@ -137,21 +137,6 @@
|
||||||
* sampling of the aggregate task states would be.
|
* sampling of the aggregate task states would be.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "../workqueue_internal.h"
|
|
||||||
#include <linux/sched/loadavg.h>
|
|
||||||
#include <linux/seq_file.h>
|
|
||||||
#include <linux/proc_fs.h>
|
|
||||||
#include <linux/seqlock.h>
|
|
||||||
#include <linux/uaccess.h>
|
|
||||||
#include <linux/cgroup.h>
|
|
||||||
#include <linux/module.h>
|
|
||||||
#include <linux/sched.h>
|
|
||||||
#include <linux/ctype.h>
|
|
||||||
#include <linux/file.h>
|
|
||||||
#include <linux/poll.h>
|
|
||||||
#include <linux/psi.h>
|
|
||||||
#include "sched.h"
|
|
||||||
|
|
||||||
static int psi_bug __read_mostly;
|
static int psi_bug __read_mostly;
|
||||||
|
|
||||||
DEFINE_STATIC_KEY_FALSE(psi_disabled);
|
DEFINE_STATIC_KEY_FALSE(psi_disabled);
|
||||||
|
@ -523,7 +508,7 @@ static void init_triggers(struct psi_group *group, u64 now)
|
||||||
static u64 update_triggers(struct psi_group *group, u64 now)
|
static u64 update_triggers(struct psi_group *group, u64 now)
|
||||||
{
|
{
|
||||||
struct psi_trigger *t;
|
struct psi_trigger *t;
|
||||||
bool new_stall = false;
|
bool update_total = false;
|
||||||
u64 *total = group->total[PSI_POLL];
|
u64 *total = group->total[PSI_POLL];
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -532,24 +517,35 @@ static u64 update_triggers(struct psi_group *group, u64 now)
|
||||||
*/
|
*/
|
||||||
list_for_each_entry(t, &group->triggers, node) {
|
list_for_each_entry(t, &group->triggers, node) {
|
||||||
u64 growth;
|
u64 growth;
|
||||||
|
bool new_stall;
|
||||||
|
|
||||||
/* Check for stall activity */
|
new_stall = group->polling_total[t->state] != total[t->state];
|
||||||
if (group->polling_total[t->state] == total[t->state])
|
|
||||||
|
/* Check for stall activity or a previous threshold breach */
|
||||||
|
if (!new_stall && !t->pending_event)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Multiple triggers might be looking at the same state,
|
* Check for new stall activity, as well as deferred
|
||||||
* remember to update group->polling_total[] once we've
|
* events that occurred in the last window after the
|
||||||
* been through all of them. Also remember to extend the
|
* trigger had already fired (we want to ratelimit
|
||||||
* polling time if we see new stall activity.
|
* events without dropping any).
|
||||||
*/
|
*/
|
||||||
new_stall = true;
|
if (new_stall) {
|
||||||
|
/*
|
||||||
|
* Multiple triggers might be looking at the same state,
|
||||||
|
* remember to update group->polling_total[] once we've
|
||||||
|
* been through all of them. Also remember to extend the
|
||||||
|
* polling time if we see new stall activity.
|
||||||
|
*/
|
||||||
|
update_total = true;
|
||||||
|
|
||||||
/* Calculate growth since last update */
|
/* Calculate growth since last update */
|
||||||
growth = window_update(&t->win, now, total[t->state]);
|
growth = window_update(&t->win, now, total[t->state]);
|
||||||
if (growth < t->threshold)
|
if (growth < t->threshold)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
|
t->pending_event = true;
|
||||||
|
}
|
||||||
/* Limit event signaling to once per window */
|
/* Limit event signaling to once per window */
|
||||||
if (now < t->last_event_time + t->win.size)
|
if (now < t->last_event_time + t->win.size)
|
||||||
continue;
|
continue;
|
||||||
|
@ -558,9 +554,11 @@ static u64 update_triggers(struct psi_group *group, u64 now)
|
||||||
if (cmpxchg(&t->event, 0, 1) == 0)
|
if (cmpxchg(&t->event, 0, 1) == 0)
|
||||||
wake_up_interruptible(&t->event_wait);
|
wake_up_interruptible(&t->event_wait);
|
||||||
t->last_event_time = now;
|
t->last_event_time = now;
|
||||||
|
/* Reset threshold breach flag once event got generated */
|
||||||
|
t->pending_event = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (new_stall)
|
if (update_total)
|
||||||
memcpy(group->polling_total, total,
|
memcpy(group->polling_total, total,
|
||||||
sizeof(group->polling_total));
|
sizeof(group->polling_total));
|
||||||
|
|
||||||
|
@ -1124,6 +1122,7 @@ struct psi_trigger *psi_trigger_create(struct psi_group *group,
|
||||||
t->event = 0;
|
t->event = 0;
|
||||||
t->last_event_time = 0;
|
t->last_event_time = 0;
|
||||||
init_waitqueue_head(&t->event_wait);
|
init_waitqueue_head(&t->event_wait);
|
||||||
|
t->pending_event = false;
|
||||||
|
|
||||||
mutex_lock(&group->trigger_lock);
|
mutex_lock(&group->trigger_lock);
|
||||||
|
|
||||||
|
|
|
@ -3,9 +3,6 @@
|
||||||
* Real-Time Scheduling Class (mapped to the SCHED_FIFO and SCHED_RR
|
* Real-Time Scheduling Class (mapped to the SCHED_FIFO and SCHED_RR
|
||||||
* policies)
|
* policies)
|
||||||
*/
|
*/
|
||||||
#include "sched.h"
|
|
||||||
|
|
||||||
#include "pelt.h"
|
|
||||||
|
|
||||||
int sched_rr_timeslice = RR_TIMESLICE;
|
int sched_rr_timeslice = RR_TIMESLICE;
|
||||||
int sysctl_sched_rr_timeslice = (MSEC_PER_SEC / HZ) * RR_TIMESLICE;
|
int sysctl_sched_rr_timeslice = (MSEC_PER_SEC / HZ) * RR_TIMESLICE;
|
||||||
|
@ -271,8 +268,6 @@ int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent)
|
||||||
|
|
||||||
#ifdef CONFIG_SMP
|
#ifdef CONFIG_SMP
|
||||||
|
|
||||||
static void pull_rt_task(struct rq *this_rq);
|
|
||||||
|
|
||||||
static inline bool need_pull_rt_task(struct rq *rq, struct task_struct *prev)
|
static inline bool need_pull_rt_task(struct rq *rq, struct task_struct *prev)
|
||||||
{
|
{
|
||||||
/* Try to pull RT tasks here if we lower this rq's prio */
|
/* Try to pull RT tasks here if we lower this rq's prio */
|
||||||
|
@ -429,15 +424,6 @@ void dec_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline bool need_pull_rt_task(struct rq *rq, struct task_struct *prev)
|
|
||||||
{
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void pull_rt_task(struct rq *this_rq)
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void rt_queue_push_tasks(struct rq *rq)
|
static inline void rt_queue_push_tasks(struct rq *rq)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
@ -1730,8 +1716,7 @@ static inline void set_next_task_rt(struct rq *rq, struct task_struct *p, bool f
|
||||||
rt_queue_push_tasks(rq);
|
rt_queue_push_tasks(rq);
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct sched_rt_entity *pick_next_rt_entity(struct rq *rq,
|
static struct sched_rt_entity *pick_next_rt_entity(struct rt_rq *rt_rq)
|
||||||
struct rt_rq *rt_rq)
|
|
||||||
{
|
{
|
||||||
struct rt_prio_array *array = &rt_rq->active;
|
struct rt_prio_array *array = &rt_rq->active;
|
||||||
struct sched_rt_entity *next = NULL;
|
struct sched_rt_entity *next = NULL;
|
||||||
|
@ -1753,7 +1738,7 @@ static struct task_struct *_pick_next_task_rt(struct rq *rq)
|
||||||
struct rt_rq *rt_rq = &rq->rt;
|
struct rt_rq *rt_rq = &rq->rt;
|
||||||
|
|
||||||
do {
|
do {
|
||||||
rt_se = pick_next_rt_entity(rq, rt_rq);
|
rt_se = pick_next_rt_entity(rt_rq);
|
||||||
BUG_ON(!rt_se);
|
BUG_ON(!rt_se);
|
||||||
rt_rq = group_rt_rq(rt_se);
|
rt_rq = group_rt_rq(rt_se);
|
||||||
} while (rt_rq);
|
} while (rt_rq);
|
||||||
|
@ -2026,6 +2011,16 @@ static int push_rt_task(struct rq *rq, bool pull)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
retry:
|
retry:
|
||||||
|
/*
|
||||||
|
* It's possible that the next_task slipped in of
|
||||||
|
* higher priority than current. If that's the case
|
||||||
|
* just reschedule current.
|
||||||
|
*/
|
||||||
|
if (unlikely(next_task->prio < rq->curr->prio)) {
|
||||||
|
resched_curr(rq);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
if (is_migration_disabled(next_task)) {
|
if (is_migration_disabled(next_task)) {
|
||||||
struct task_struct *push_task = NULL;
|
struct task_struct *push_task = NULL;
|
||||||
int cpu;
|
int cpu;
|
||||||
|
@ -2033,6 +2028,18 @@ static int push_rt_task(struct rq *rq, bool pull)
|
||||||
if (!pull || rq->push_busy)
|
if (!pull || rq->push_busy)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Invoking find_lowest_rq() on anything but an RT task doesn't
|
||||||
|
* make sense. Per the above priority check, curr has to
|
||||||
|
* be of higher priority than next_task, so no need to
|
||||||
|
* reschedule when bailing out.
|
||||||
|
*
|
||||||
|
* Note that the stoppers are masqueraded as SCHED_FIFO
|
||||||
|
* (cf. sched_set_stop_task()), so we can't rely on rt_task().
|
||||||
|
*/
|
||||||
|
if (rq->curr->sched_class != &rt_sched_class)
|
||||||
|
return 0;
|
||||||
|
|
||||||
cpu = find_lowest_rq(rq->curr);
|
cpu = find_lowest_rq(rq->curr);
|
||||||
if (cpu == -1 || cpu == rq->cpu)
|
if (cpu == -1 || cpu == rq->cpu)
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -2057,16 +2064,6 @@ static int push_rt_task(struct rq *rq, bool pull)
|
||||||
if (WARN_ON(next_task == rq->curr))
|
if (WARN_ON(next_task == rq->curr))
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
/*
|
|
||||||
* It's possible that the next_task slipped in of
|
|
||||||
* higher priority than current. If that's the case
|
|
||||||
* just reschedule current.
|
|
||||||
*/
|
|
||||||
if (unlikely(next_task->prio < rq->curr->prio)) {
|
|
||||||
resched_curr(rq);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* We might release rq lock */
|
/* We might release rq lock */
|
||||||
get_task_struct(next_task);
|
get_task_struct(next_task);
|
||||||
|
|
||||||
|
|
|
@ -2,86 +2,98 @@
|
||||||
/*
|
/*
|
||||||
* Scheduler internal types and methods:
|
* Scheduler internal types and methods:
|
||||||
*/
|
*/
|
||||||
#include <linux/sched.h>
|
#ifndef _KERNEL_SCHED_SCHED_H
|
||||||
|
#define _KERNEL_SCHED_SCHED_H
|
||||||
|
|
||||||
|
#include <linux/sched/affinity.h>
|
||||||
#include <linux/sched/autogroup.h>
|
#include <linux/sched/autogroup.h>
|
||||||
#include <linux/sched/clock.h>
|
|
||||||
#include <linux/sched/coredump.h>
|
|
||||||
#include <linux/sched/cpufreq.h>
|
#include <linux/sched/cpufreq.h>
|
||||||
#include <linux/sched/cputime.h>
|
|
||||||
#include <linux/sched/deadline.h>
|
#include <linux/sched/deadline.h>
|
||||||
#include <linux/sched/debug.h>
|
#include <linux/sched.h>
|
||||||
#include <linux/sched/hotplug.h>
|
|
||||||
#include <linux/sched/idle.h>
|
|
||||||
#include <linux/sched/init.h>
|
|
||||||
#include <linux/sched/isolation.h>
|
|
||||||
#include <linux/sched/jobctl.h>
|
|
||||||
#include <linux/sched/loadavg.h>
|
#include <linux/sched/loadavg.h>
|
||||||
#include <linux/sched/mm.h>
|
#include <linux/sched/mm.h>
|
||||||
#include <linux/sched/nohz.h>
|
#include <linux/sched/rseq_api.h>
|
||||||
#include <linux/sched/numa_balancing.h>
|
|
||||||
#include <linux/sched/prio.h>
|
|
||||||
#include <linux/sched/rt.h>
|
|
||||||
#include <linux/sched/signal.h>
|
#include <linux/sched/signal.h>
|
||||||
#include <linux/sched/smt.h>
|
#include <linux/sched/smt.h>
|
||||||
#include <linux/sched/stat.h>
|
#include <linux/sched/stat.h>
|
||||||
#include <linux/sched/sysctl.h>
|
#include <linux/sched/sysctl.h>
|
||||||
|
#include <linux/sched/task_flags.h>
|
||||||
#include <linux/sched/task.h>
|
#include <linux/sched/task.h>
|
||||||
#include <linux/sched/task_stack.h>
|
|
||||||
#include <linux/sched/topology.h>
|
#include <linux/sched/topology.h>
|
||||||
#include <linux/sched/user.h>
|
|
||||||
#include <linux/sched/wake_q.h>
|
|
||||||
#include <linux/sched/xacct.h>
|
|
||||||
|
|
||||||
#include <uapi/linux/sched/types.h>
|
#include <linux/atomic.h>
|
||||||
|
#include <linux/bitmap.h>
|
||||||
#include <linux/binfmts.h>
|
#include <linux/bug.h>
|
||||||
#include <linux/bitops.h>
|
#include <linux/capability.h>
|
||||||
#include <linux/compat.h>
|
#include <linux/cgroup_api.h>
|
||||||
#include <linux/context_tracking.h>
|
#include <linux/cgroup.h>
|
||||||
#include <linux/cpufreq.h>
|
#include <linux/cpufreq.h>
|
||||||
#include <linux/cpuidle.h>
|
#include <linux/cpumask_api.h>
|
||||||
#include <linux/cpuset.h>
|
|
||||||
#include <linux/ctype.h>
|
#include <linux/ctype.h>
|
||||||
#include <linux/debugfs.h>
|
#include <linux/file.h>
|
||||||
#include <linux/delayacct.h>
|
#include <linux/fs_api.h>
|
||||||
#include <linux/energy_model.h>
|
#include <linux/hrtimer_api.h>
|
||||||
#include <linux/init_task.h>
|
#include <linux/interrupt.h>
|
||||||
#include <linux/kprobes.h>
|
#include <linux/irq_work.h>
|
||||||
|
#include <linux/jiffies.h>
|
||||||
|
#include <linux/kref_api.h>
|
||||||
#include <linux/kthread.h>
|
#include <linux/kthread.h>
|
||||||
#include <linux/membarrier.h>
|
#include <linux/ktime_api.h>
|
||||||
#include <linux/migrate.h>
|
#include <linux/lockdep_api.h>
|
||||||
#include <linux/mmu_context.h>
|
#include <linux/lockdep.h>
|
||||||
#include <linux/nmi.h>
|
#include <linux/minmax.h>
|
||||||
|
#include <linux/mm.h>
|
||||||
|
#include <linux/module.h>
|
||||||
|
#include <linux/mutex_api.h>
|
||||||
|
#include <linux/plist.h>
|
||||||
|
#include <linux/poll.h>
|
||||||
#include <linux/proc_fs.h>
|
#include <linux/proc_fs.h>
|
||||||
#include <linux/prefetch.h>
|
|
||||||
#include <linux/profile.h>
|
#include <linux/profile.h>
|
||||||
#include <linux/psi.h>
|
#include <linux/psi.h>
|
||||||
#include <linux/ratelimit.h>
|
#include <linux/rcupdate.h>
|
||||||
#include <linux/rcupdate_wait.h>
|
#include <linux/seq_file.h>
|
||||||
#include <linux/security.h>
|
#include <linux/seqlock.h>
|
||||||
|
#include <linux/softirq.h>
|
||||||
|
#include <linux/spinlock_api.h>
|
||||||
|
#include <linux/static_key.h>
|
||||||
#include <linux/stop_machine.h>
|
#include <linux/stop_machine.h>
|
||||||
#include <linux/suspend.h>
|
#include <linux/syscalls_api.h>
|
||||||
#include <linux/swait.h>
|
|
||||||
#include <linux/syscalls.h>
|
#include <linux/syscalls.h>
|
||||||
#include <linux/task_work.h>
|
#include <linux/tick.h>
|
||||||
#include <linux/tsacct_kern.h>
|
#include <linux/topology.h>
|
||||||
|
#include <linux/types.h>
|
||||||
|
#include <linux/u64_stats_sync_api.h>
|
||||||
|
#include <linux/uaccess.h>
|
||||||
|
#include <linux/wait_api.h>
|
||||||
|
#include <linux/wait_bit.h>
|
||||||
|
#include <linux/workqueue_api.h>
|
||||||
|
|
||||||
#include <asm/tlb.h>
|
#include <trace/events/power.h>
|
||||||
|
#include <trace/events/sched.h>
|
||||||
|
|
||||||
|
#include "../workqueue_internal.h"
|
||||||
|
|
||||||
|
#ifdef CONFIG_CGROUP_SCHED
|
||||||
|
#include <linux/cgroup.h>
|
||||||
|
#include <linux/psi.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef CONFIG_SCHED_DEBUG
|
||||||
|
# include <linux/static_key.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef CONFIG_PARAVIRT
|
#ifdef CONFIG_PARAVIRT
|
||||||
# include <asm/paravirt.h>
|
# include <asm/paravirt.h>
|
||||||
|
# include <asm/paravirt_api_clock.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#include "cpupri.h"
|
#include "cpupri.h"
|
||||||
#include "cpudeadline.h"
|
#include "cpudeadline.h"
|
||||||
|
|
||||||
#include <trace/events/sched.h>
|
|
||||||
|
|
||||||
#ifdef CONFIG_SCHED_DEBUG
|
#ifdef CONFIG_SCHED_DEBUG
|
||||||
# define SCHED_WARN_ON(x) WARN_ONCE(x, #x)
|
# define SCHED_WARN_ON(x) WARN_ONCE(x, #x)
|
||||||
#else
|
#else
|
||||||
# define SCHED_WARN_ON(x) ({ (void)(x), 0; })
|
# define SCHED_WARN_ON(x) ({ (void)(x), 0; })
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
struct rq;
|
struct rq;
|
||||||
|
@ -301,29 +313,6 @@ struct dl_bw {
|
||||||
u64 total_bw;
|
u64 total_bw;
|
||||||
};
|
};
|
||||||
|
|
||||||
static inline void __dl_update(struct dl_bw *dl_b, s64 bw);
|
|
||||||
|
|
||||||
static inline
|
|
||||||
void __dl_sub(struct dl_bw *dl_b, u64 tsk_bw, int cpus)
|
|
||||||
{
|
|
||||||
dl_b->total_bw -= tsk_bw;
|
|
||||||
__dl_update(dl_b, (s32)tsk_bw / cpus);
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline
|
|
||||||
void __dl_add(struct dl_bw *dl_b, u64 tsk_bw, int cpus)
|
|
||||||
{
|
|
||||||
dl_b->total_bw += tsk_bw;
|
|
||||||
__dl_update(dl_b, -((s32)tsk_bw / cpus));
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline bool __dl_overflow(struct dl_bw *dl_b, unsigned long cap,
|
|
||||||
u64 old_bw, u64 new_bw)
|
|
||||||
{
|
|
||||||
return dl_b->bw != -1 &&
|
|
||||||
cap_scale(dl_b->bw, cap) < dl_b->total_bw - old_bw + new_bw;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Verify the fitness of task @p to run on @cpu taking into account the
|
* Verify the fitness of task @p to run on @cpu taking into account the
|
||||||
* CPU original capacity and the runtime/deadline ratio of the task.
|
* CPU original capacity and the runtime/deadline ratio of the task.
|
||||||
|
@ -347,15 +336,11 @@ extern void __setparam_dl(struct task_struct *p, const struct sched_attr *attr);
|
||||||
extern void __getparam_dl(struct task_struct *p, struct sched_attr *attr);
|
extern void __getparam_dl(struct task_struct *p, struct sched_attr *attr);
|
||||||
extern bool __checkparam_dl(const struct sched_attr *attr);
|
extern bool __checkparam_dl(const struct sched_attr *attr);
|
||||||
extern bool dl_param_changed(struct task_struct *p, const struct sched_attr *attr);
|
extern bool dl_param_changed(struct task_struct *p, const struct sched_attr *attr);
|
||||||
extern int dl_task_can_attach(struct task_struct *p, const struct cpumask *cs_cpus_allowed);
|
|
||||||
extern int dl_cpuset_cpumask_can_shrink(const struct cpumask *cur, const struct cpumask *trial);
|
extern int dl_cpuset_cpumask_can_shrink(const struct cpumask *cur, const struct cpumask *trial);
|
||||||
extern bool dl_cpu_busy(unsigned int cpu);
|
extern int dl_cpu_busy(int cpu, struct task_struct *p);
|
||||||
|
|
||||||
#ifdef CONFIG_CGROUP_SCHED
|
#ifdef CONFIG_CGROUP_SCHED
|
||||||
|
|
||||||
#include <linux/cgroup.h>
|
|
||||||
#include <linux/psi.h>
|
|
||||||
|
|
||||||
struct cfs_rq;
|
struct cfs_rq;
|
||||||
struct rt_rq;
|
struct rt_rq;
|
||||||
|
|
||||||
|
@ -1662,12 +1647,14 @@ enum numa_topology_type {
|
||||||
extern enum numa_topology_type sched_numa_topology_type;
|
extern enum numa_topology_type sched_numa_topology_type;
|
||||||
extern int sched_max_numa_distance;
|
extern int sched_max_numa_distance;
|
||||||
extern bool find_numa_distance(int distance);
|
extern bool find_numa_distance(int distance);
|
||||||
extern void sched_init_numa(void);
|
extern void sched_init_numa(int offline_node);
|
||||||
|
extern void sched_update_numa(int cpu, bool online);
|
||||||
extern void sched_domains_numa_masks_set(unsigned int cpu);
|
extern void sched_domains_numa_masks_set(unsigned int cpu);
|
||||||
extern void sched_domains_numa_masks_clear(unsigned int cpu);
|
extern void sched_domains_numa_masks_clear(unsigned int cpu);
|
||||||
extern int sched_numa_find_closest(const struct cpumask *cpus, int cpu);
|
extern int sched_numa_find_closest(const struct cpumask *cpus, int cpu);
|
||||||
#else
|
#else
|
||||||
static inline void sched_init_numa(void) { }
|
static inline void sched_init_numa(int offline_node) { }
|
||||||
|
static inline void sched_update_numa(int cpu, bool online) { }
|
||||||
static inline void sched_domains_numa_masks_set(unsigned int cpu) { }
|
static inline void sched_domains_numa_masks_set(unsigned int cpu) { }
|
||||||
static inline void sched_domains_numa_masks_clear(unsigned int cpu) { }
|
static inline void sched_domains_numa_masks_clear(unsigned int cpu) { }
|
||||||
static inline int sched_numa_find_closest(const struct cpumask *cpus, int cpu)
|
static inline int sched_numa_find_closest(const struct cpumask *cpus, int cpu)
|
||||||
|
@ -1854,7 +1841,6 @@ static inline void flush_smp_call_function_from_idle(void) { }
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#include "stats.h"
|
#include "stats.h"
|
||||||
#include "autogroup.h"
|
|
||||||
|
|
||||||
#if defined(CONFIG_SCHED_CORE) && defined(CONFIG_SCHEDSTATS)
|
#if defined(CONFIG_SCHED_CORE) && defined(CONFIG_SCHEDSTATS)
|
||||||
|
|
||||||
|
@ -1950,7 +1936,6 @@ static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu)
|
||||||
* Tunables that become constants when CONFIG_SCHED_DEBUG is off:
|
* Tunables that become constants when CONFIG_SCHED_DEBUG is off:
|
||||||
*/
|
*/
|
||||||
#ifdef CONFIG_SCHED_DEBUG
|
#ifdef CONFIG_SCHED_DEBUG
|
||||||
# include <linux/static_key.h>
|
|
||||||
# define const_debug __read_mostly
|
# define const_debug __read_mostly
|
||||||
#else
|
#else
|
||||||
# define const_debug const
|
# define const_debug const
|
||||||
|
@ -2331,7 +2316,6 @@ extern void resched_cpu(int cpu);
|
||||||
extern struct rt_bandwidth def_rt_bandwidth;
|
extern struct rt_bandwidth def_rt_bandwidth;
|
||||||
extern void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime);
|
extern void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime);
|
||||||
|
|
||||||
extern struct dl_bandwidth def_dl_bandwidth;
|
|
||||||
extern void init_dl_bandwidth(struct dl_bandwidth *dl_b, u64 period, u64 runtime);
|
extern void init_dl_bandwidth(struct dl_bandwidth *dl_b, u64 period, u64 runtime);
|
||||||
extern void init_dl_task_timer(struct sched_dl_entity *dl_se);
|
extern void init_dl_task_timer(struct sched_dl_entity *dl_se);
|
||||||
extern void init_dl_inactive_task_timer(struct sched_dl_entity *dl_se);
|
extern void init_dl_inactive_task_timer(struct sched_dl_entity *dl_se);
|
||||||
|
@ -2747,32 +2731,6 @@ extern void nohz_run_idle_balance(int cpu);
|
||||||
static inline void nohz_run_idle_balance(int cpu) { }
|
static inline void nohz_run_idle_balance(int cpu) { }
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef CONFIG_SMP
|
|
||||||
static inline
|
|
||||||
void __dl_update(struct dl_bw *dl_b, s64 bw)
|
|
||||||
{
|
|
||||||
struct root_domain *rd = container_of(dl_b, struct root_domain, dl_bw);
|
|
||||||
int i;
|
|
||||||
|
|
||||||
RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held(),
|
|
||||||
"sched RCU must be held");
|
|
||||||
for_each_cpu_and(i, rd->span, cpu_active_mask) {
|
|
||||||
struct rq *rq = cpu_rq(i);
|
|
||||||
|
|
||||||
rq->dl.extra_bw += bw;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
static inline
|
|
||||||
void __dl_update(struct dl_bw *dl_b, s64 bw)
|
|
||||||
{
|
|
||||||
struct dl_rq *dl = container_of(dl_b, struct dl_rq, dl_bw);
|
|
||||||
|
|
||||||
dl->extra_bw += bw;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
|
||||||
#ifdef CONFIG_IRQ_TIME_ACCOUNTING
|
#ifdef CONFIG_IRQ_TIME_ACCOUNTING
|
||||||
struct irqtime {
|
struct irqtime {
|
||||||
u64 total;
|
u64 total;
|
||||||
|
@ -2841,88 +2799,6 @@ static inline void cpufreq_update_util(struct rq *rq, unsigned int flags)
|
||||||
static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) {}
|
static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) {}
|
||||||
#endif /* CONFIG_CPU_FREQ */
|
#endif /* CONFIG_CPU_FREQ */
|
||||||
|
|
||||||
#ifdef CONFIG_UCLAMP_TASK
|
|
||||||
unsigned long uclamp_eff_value(struct task_struct *p, enum uclamp_id clamp_id);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* uclamp_rq_util_with - clamp @util with @rq and @p effective uclamp values.
|
|
||||||
* @rq: The rq to clamp against. Must not be NULL.
|
|
||||||
* @util: The util value to clamp.
|
|
||||||
* @p: The task to clamp against. Can be NULL if you want to clamp
|
|
||||||
* against @rq only.
|
|
||||||
*
|
|
||||||
* Clamps the passed @util to the max(@rq, @p) effective uclamp values.
|
|
||||||
*
|
|
||||||
* If sched_uclamp_used static key is disabled, then just return the util
|
|
||||||
* without any clamping since uclamp aggregation at the rq level in the fast
|
|
||||||
* path is disabled, rendering this operation a NOP.
|
|
||||||
*
|
|
||||||
* Use uclamp_eff_value() if you don't care about uclamp values at rq level. It
|
|
||||||
* will return the correct effective uclamp value of the task even if the
|
|
||||||
* static key is disabled.
|
|
||||||
*/
|
|
||||||
static __always_inline
|
|
||||||
unsigned long uclamp_rq_util_with(struct rq *rq, unsigned long util,
|
|
||||||
struct task_struct *p)
|
|
||||||
{
|
|
||||||
unsigned long min_util = 0;
|
|
||||||
unsigned long max_util = 0;
|
|
||||||
|
|
||||||
if (!static_branch_likely(&sched_uclamp_used))
|
|
||||||
return util;
|
|
||||||
|
|
||||||
if (p) {
|
|
||||||
min_util = uclamp_eff_value(p, UCLAMP_MIN);
|
|
||||||
max_util = uclamp_eff_value(p, UCLAMP_MAX);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Ignore last runnable task's max clamp, as this task will
|
|
||||||
* reset it. Similarly, no need to read the rq's min clamp.
|
|
||||||
*/
|
|
||||||
if (rq->uclamp_flags & UCLAMP_FLAG_IDLE)
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
|
|
||||||
min_util = max_t(unsigned long, min_util, READ_ONCE(rq->uclamp[UCLAMP_MIN].value));
|
|
||||||
max_util = max_t(unsigned long, max_util, READ_ONCE(rq->uclamp[UCLAMP_MAX].value));
|
|
||||||
out:
|
|
||||||
/*
|
|
||||||
* Since CPU's {min,max}_util clamps are MAX aggregated considering
|
|
||||||
* RUNNABLE tasks with _different_ clamps, we can end up with an
|
|
||||||
* inversion. Fix it now when the clamps are applied.
|
|
||||||
*/
|
|
||||||
if (unlikely(min_util >= max_util))
|
|
||||||
return min_util;
|
|
||||||
|
|
||||||
return clamp(util, min_util, max_util);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* When uclamp is compiled in, the aggregation at rq level is 'turned off'
|
|
||||||
* by default in the fast path and only gets turned on once userspace performs
|
|
||||||
* an operation that requires it.
|
|
||||||
*
|
|
||||||
* Returns true if userspace opted-in to use uclamp and aggregation at rq level
|
|
||||||
* hence is active.
|
|
||||||
*/
|
|
||||||
static inline bool uclamp_is_used(void)
|
|
||||||
{
|
|
||||||
return static_branch_likely(&sched_uclamp_used);
|
|
||||||
}
|
|
||||||
#else /* CONFIG_UCLAMP_TASK */
|
|
||||||
static inline
|
|
||||||
unsigned long uclamp_rq_util_with(struct rq *rq, unsigned long util,
|
|
||||||
struct task_struct *p)
|
|
||||||
{
|
|
||||||
return util;
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline bool uclamp_is_used(void)
|
|
||||||
{
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
#endif /* CONFIG_UCLAMP_TASK */
|
|
||||||
|
|
||||||
#ifdef arch_scale_freq_capacity
|
#ifdef arch_scale_freq_capacity
|
||||||
# ifndef arch_scale_freq_invariant
|
# ifndef arch_scale_freq_invariant
|
||||||
# define arch_scale_freq_invariant() true
|
# define arch_scale_freq_invariant() true
|
||||||
|
@ -3020,6 +2896,105 @@ static inline unsigned long cpu_util_rt(struct rq *rq)
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef CONFIG_UCLAMP_TASK
|
||||||
|
unsigned long uclamp_eff_value(struct task_struct *p, enum uclamp_id clamp_id);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* uclamp_rq_util_with - clamp @util with @rq and @p effective uclamp values.
|
||||||
|
* @rq: The rq to clamp against. Must not be NULL.
|
||||||
|
* @util: The util value to clamp.
|
||||||
|
* @p: The task to clamp against. Can be NULL if you want to clamp
|
||||||
|
* against @rq only.
|
||||||
|
*
|
||||||
|
* Clamps the passed @util to the max(@rq, @p) effective uclamp values.
|
||||||
|
*
|
||||||
|
* If sched_uclamp_used static key is disabled, then just return the util
|
||||||
|
* without any clamping since uclamp aggregation at the rq level in the fast
|
||||||
|
* path is disabled, rendering this operation a NOP.
|
||||||
|
*
|
||||||
|
* Use uclamp_eff_value() if you don't care about uclamp values at rq level. It
|
||||||
|
* will return the correct effective uclamp value of the task even if the
|
||||||
|
* static key is disabled.
|
||||||
|
*/
|
||||||
|
static __always_inline
|
||||||
|
unsigned long uclamp_rq_util_with(struct rq *rq, unsigned long util,
|
||||||
|
struct task_struct *p)
|
||||||
|
{
|
||||||
|
unsigned long min_util = 0;
|
||||||
|
unsigned long max_util = 0;
|
||||||
|
|
||||||
|
if (!static_branch_likely(&sched_uclamp_used))
|
||||||
|
return util;
|
||||||
|
|
||||||
|
if (p) {
|
||||||
|
min_util = uclamp_eff_value(p, UCLAMP_MIN);
|
||||||
|
max_util = uclamp_eff_value(p, UCLAMP_MAX);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Ignore last runnable task's max clamp, as this task will
|
||||||
|
* reset it. Similarly, no need to read the rq's min clamp.
|
||||||
|
*/
|
||||||
|
if (rq->uclamp_flags & UCLAMP_FLAG_IDLE)
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
min_util = max_t(unsigned long, min_util, READ_ONCE(rq->uclamp[UCLAMP_MIN].value));
|
||||||
|
max_util = max_t(unsigned long, max_util, READ_ONCE(rq->uclamp[UCLAMP_MAX].value));
|
||||||
|
out:
|
||||||
|
/*
|
||||||
|
* Since CPU's {min,max}_util clamps are MAX aggregated considering
|
||||||
|
* RUNNABLE tasks with _different_ clamps, we can end up with an
|
||||||
|
* inversion. Fix it now when the clamps are applied.
|
||||||
|
*/
|
||||||
|
if (unlikely(min_util >= max_util))
|
||||||
|
return min_util;
|
||||||
|
|
||||||
|
return clamp(util, min_util, max_util);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Is the rq being capped/throttled by uclamp_max? */
|
||||||
|
static inline bool uclamp_rq_is_capped(struct rq *rq)
|
||||||
|
{
|
||||||
|
unsigned long rq_util;
|
||||||
|
unsigned long max_util;
|
||||||
|
|
||||||
|
if (!static_branch_likely(&sched_uclamp_used))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
rq_util = cpu_util_cfs(cpu_of(rq)) + cpu_util_rt(rq);
|
||||||
|
max_util = READ_ONCE(rq->uclamp[UCLAMP_MAX].value);
|
||||||
|
|
||||||
|
return max_util != SCHED_CAPACITY_SCALE && rq_util >= max_util;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* When uclamp is compiled in, the aggregation at rq level is 'turned off'
|
||||||
|
* by default in the fast path and only gets turned on once userspace performs
|
||||||
|
* an operation that requires it.
|
||||||
|
*
|
||||||
|
* Returns true if userspace opted-in to use uclamp and aggregation at rq level
|
||||||
|
* hence is active.
|
||||||
|
*/
|
||||||
|
static inline bool uclamp_is_used(void)
|
||||||
|
{
|
||||||
|
return static_branch_likely(&sched_uclamp_used);
|
||||||
|
}
|
||||||
|
#else /* CONFIG_UCLAMP_TASK */
|
||||||
|
static inline
|
||||||
|
unsigned long uclamp_rq_util_with(struct rq *rq, unsigned long util,
|
||||||
|
struct task_struct *p)
|
||||||
|
{
|
||||||
|
return util;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline bool uclamp_rq_is_capped(struct rq *rq) { return false; }
|
||||||
|
|
||||||
|
static inline bool uclamp_is_used(void)
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
#endif /* CONFIG_UCLAMP_TASK */
|
||||||
|
|
||||||
#ifdef CONFIG_HAVE_SCHED_AVG_IRQ
|
#ifdef CONFIG_HAVE_SCHED_AVG_IRQ
|
||||||
static inline unsigned long cpu_util_irq(struct rq *rq)
|
static inline unsigned long cpu_util_irq(struct rq *rq)
|
||||||
{
|
{
|
||||||
|
@ -3118,3 +3093,4 @@ extern int sched_dynamic_mode(const char *str);
|
||||||
extern void sched_dynamic_update(int mode);
|
extern void sched_dynamic_update(int mode);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#endif /* _KERNEL_SCHED_SCHED_H */
|
||||||
|
|
|
@ -2,7 +2,6 @@
|
||||||
/*
|
/*
|
||||||
* /proc/schedstat implementation
|
* /proc/schedstat implementation
|
||||||
*/
|
*/
|
||||||
#include "sched.h"
|
|
||||||
|
|
||||||
void __update_stats_wait_start(struct rq *rq, struct task_struct *p,
|
void __update_stats_wait_start(struct rq *rq, struct task_struct *p,
|
||||||
struct sched_statistics *stats)
|
struct sched_statistics *stats)
|
||||||
|
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Reference in a new issue