Scheduler changes for v6.1:

- Debuggability:
 
      - Change most occurances of BUG_ON() to WARN_ON_ONCE()
 
      - Reorganize & fix TASK_ state comparisons, turn it into a bitmap
 
      - Update/fix misc scheduler debugging facilities
 
  - Load-balancing & regular scheduling:
 
      - Improve the behavior of the scheduler in presence of lot of
        SCHED_IDLE tasks - in particular they should not impact other
        scheduling classes.
 
      - Optimize task load tracking, cleanups & fixes
 
      - Clean up & simplify misc load-balancing code
 
  - Freezer:
 
      - Rewrite the core freezer to behave better wrt thawing and be simpler
        in general, by replacing PF_FROZEN with TASK_FROZEN & fixing/adjusting
        all the fallout.
 
  - Deadline scheduler:
 
      - Fix the DL capacity-aware code
 
      - Factor out dl_task_is_earliest_deadline() & replenish_dl_new_period()
 
      - Relax/optimize locking in task_non_contending()
 
  - Cleanups:
 
      - Factor out the update_current_exec_runtime() helper
 
      - Various cleanups, simplifications
 
 Signed-off-by: Ingo Molnar <mingo@kernel.org>
 -----BEGIN PGP SIGNATURE-----
 
 iQJFBAABCgAvFiEEBpT5eoXrXCwVQwEKEnMQ0APhK1gFAmM/01cRHG1pbmdvQGtl
 cm5lbC5vcmcACgkQEnMQ0APhK1geZA/+PB4KC1T9aVxzaTHI36R03YgJYZmIdtxw
 wTf02MixePmz+gQCbepJbempGOh5ST28aOcI0xhdYOql5B63MaUBBMlB0HvGUyDG
 IU3zETqLMRtAbnSTdQFv8m++ECUtZYp8/x1FCel4WO7ya4ETkRu1NRfCoUepEhpZ
 aVAlae9LH3NBaF9t7s0PT2lTjf3pIzMFRkddJ0ywJhbFR3VnWat05fAK+J6fGY8+
 LS54coefNlJD4oDh5TY8uniL1j5SmWmmwbk9Cdj7bLU5P3dFSS0/+5FJNHJPVGDE
 srGT7wstRUcDrN0CnZo48VIUBiApJCCDqTfJYi9wNYd0NAHvwY6MIJJgEIY8mKsI
 L/qH26H81Wt+ezSZ/5JIlGlZ/LIeNaa6OO/fbWEYABBQogvvx3nxsRNUYKSQzumH
 CnSBasBjLnjWyLlK4qARM9cI7NFSEK6NUigrEx/7h8JFu/8T4DlSy6LsF1HUyKgq
 4+FJLAqG6cL0tcwB/fHYd0oRESN8dStnQhGxSojgufwLc7dlFULvCYF5JM/dX+/V
 IKwbOfIOeOn6ViMtSOXAEGdII+IQ2/ZFPwr+8Z5JC7NzvTVL6xlu/3JXkLZR3L7o
 yaXTSaz06h1vil7Z+GRf7RHc+wUeGkEpXh5vnarGZKXivhFdWsBdROIJANK+xR0i
 TeSLCxQxXlU=
 =KjMD
 -----END PGP SIGNATURE-----

Merge tag 'sched-core-2022-10-07' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull scheduler updates from Ingo Molnar:
 "Debuggability:

   - Change most occurances of BUG_ON() to WARN_ON_ONCE()

   - Reorganize & fix TASK_ state comparisons, turn it into a bitmap

   - Update/fix misc scheduler debugging facilities

  Load-balancing & regular scheduling:

   - Improve the behavior of the scheduler in presence of lot of
     SCHED_IDLE tasks - in particular they should not impact other
     scheduling classes.

   - Optimize task load tracking, cleanups & fixes

   - Clean up & simplify misc load-balancing code

  Freezer:

   - Rewrite the core freezer to behave better wrt thawing and be
     simpler in general, by replacing PF_FROZEN with TASK_FROZEN &
     fixing/adjusting all the fallout.

  Deadline scheduler:

   - Fix the DL capacity-aware code

   - Factor out dl_task_is_earliest_deadline() &
     replenish_dl_new_period()

   - Relax/optimize locking in task_non_contending()

  Cleanups:

   - Factor out the update_current_exec_runtime() helper

   - Various cleanups, simplifications"

* tag 'sched-core-2022-10-07' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (41 commits)
  sched: Fix more TASK_state comparisons
  sched: Fix TASK_state comparisons
  sched/fair: Move call to list_last_entry() in detach_tasks
  sched/fair: Cleanup loop_max and loop_break
  sched/fair: Make sure to try to detach at least one movable task
  sched: Show PF_flag holes
  freezer,sched: Rewrite core freezer logic
  sched: Widen TAKS_state literals
  sched/wait: Add wait_event_state()
  sched/completion: Add wait_for_completion_state()
  sched: Add TASK_ANY for wait_task_inactive()
  sched: Change wait_task_inactive()s match_state
  freezer,umh: Clean up freezer/initrd interaction
  freezer: Have {,un}lock_system_sleep() save/restore flags
  sched: Rename task_running() to task_on_cpu()
  sched/fair: Cleanup for SIS_PROP
  sched/fair: Default to false in test_idle_cores()
  sched/fair: Remove useless check in select_idle_core()
  sched/fair: Avoid double search on same cpu
  sched/fair: Remove redundant check in select_idle_smt()
  ...
This commit is contained in:
Linus Torvalds 2022-10-10 09:10:28 -07:00
commit 30c999937f
52 changed files with 618 additions and 762 deletions

View File

@ -654,12 +654,14 @@ void __init acpi_s2idle_setup(void)
int acpi_register_lps0_dev(struct acpi_s2idle_dev_ops *arg)
{
unsigned int sleep_flags;
if (!lps0_device_handle || sleep_no_lps0)
return -ENODEV;
lock_system_sleep();
sleep_flags = lock_system_sleep();
list_add(&arg->list_node, &lps0_s2idle_devops_head);
unlock_system_sleep();
unlock_system_sleep(sleep_flags);
return 0;
}
@ -667,12 +669,14 @@ EXPORT_SYMBOL_GPL(acpi_register_lps0_dev);
void acpi_unregister_lps0_dev(struct acpi_s2idle_dev_ops *arg)
{
unsigned int sleep_flags;
if (!lps0_device_handle || sleep_no_lps0)
return;
lock_system_sleep();
sleep_flags = lock_system_sleep();
list_del(&arg->list_node);
unlock_system_sleep();
unlock_system_sleep(sleep_flags);
}
EXPORT_SYMBOL_GPL(acpi_unregister_lps0_dev);

View File

@ -4259,10 +4259,9 @@ static int binder_wait_for_work(struct binder_thread *thread,
struct binder_proc *proc = thread->proc;
int ret = 0;
freezer_do_not_count();
binder_inner_proc_lock(proc);
for (;;) {
prepare_to_wait(&thread->wait, &wait, TASK_INTERRUPTIBLE);
prepare_to_wait(&thread->wait, &wait, TASK_INTERRUPTIBLE|TASK_FREEZABLE);
if (binder_has_work_ilocked(thread, do_proc_work))
break;
if (do_proc_work)
@ -4279,7 +4278,6 @@ static int binder_wait_for_work(struct binder_thread *thread,
}
finish_wait(&thread->wait, &wait);
binder_inner_proc_unlock(proc);
freezer_count();
return ret;
}

View File

@ -445,8 +445,8 @@ static int pt3_fetch_thread(void *data)
pt3_proc_dma(adap);
delay = ktime_set(0, PT3_FETCH_DELAY * NSEC_PER_MSEC);
set_current_state(TASK_UNINTERRUPTIBLE);
freezable_schedule_hrtimeout_range(&delay,
set_current_state(TASK_UNINTERRUPTIBLE|TASK_FREEZABLE);
schedule_hrtimeout_range(&delay,
PT3_FETCH_DELAY_DELTA * NSEC_PER_MSEC,
HRTIMER_MODE_REL);
}

View File

@ -254,7 +254,7 @@ void idle_inject_stop(struct idle_inject_device *ii_dev)
iit = per_cpu_ptr(&idle_inject_thread, cpu);
iit->should_run = 0;
wait_task_inactive(iit->tsk, 0);
wait_task_inactive(iit->tsk, TASK_ANY);
}
cpu_hotplug_enable();

View File

@ -998,8 +998,9 @@ void
spi_dv_device(struct scsi_device *sdev)
{
struct scsi_target *starget = sdev->sdev_target;
u8 *buffer;
const int len = SPI_MAX_ECHO_BUFFER_SIZE*2;
unsigned int sleep_flags;
u8 *buffer;
/*
* Because this function and the power management code both call
@ -1007,7 +1008,7 @@ spi_dv_device(struct scsi_device *sdev)
* while suspend or resume is in progress. Hence the
* lock/unlock_system_sleep() calls.
*/
lock_system_sleep();
sleep_flags = lock_system_sleep();
if (scsi_autopm_get_device(sdev))
goto unlock_system_sleep;
@ -1058,7 +1059,7 @@ put_autopm:
scsi_autopm_put_device(sdev);
unlock_system_sleep:
unlock_system_sleep();
unlock_system_sleep(sleep_flags);
}
EXPORT_SYMBOL(spi_dv_device);

View File

@ -2327,7 +2327,7 @@ cifs_invalidate_mapping(struct inode *inode)
static int
cifs_wait_bit_killable(struct wait_bit_key *key, int mode)
{
freezable_schedule_unsafe();
schedule();
if (signal_pending_state(mode, current))
return -ERESTARTSYS;
return 0;
@ -2345,7 +2345,7 @@ cifs_revalidate_mapping(struct inode *inode)
return 0;
rc = wait_on_bit_lock_action(flags, CIFS_INO_LOCK, cifs_wait_bit_killable,
TASK_KILLABLE);
TASK_KILLABLE|TASK_FREEZABLE_UNSAFE);
if (rc)
return rc;

View File

@ -753,8 +753,9 @@ wait_for_response(struct TCP_Server_Info *server, struct mid_q_entry *midQ)
{
int error;
error = wait_event_freezekillable_unsafe(server->response_q,
midQ->mid_state != MID_REQUEST_SUBMITTED);
error = wait_event_state(server->response_q,
midQ->mid_state != MID_REQUEST_SUBMITTED,
(TASK_KILLABLE|TASK_FREEZABLE_UNSAFE));
if (error < 0)
return -ERESTARTSYS;

View File

@ -402,9 +402,8 @@ static int coredump_wait(int exit_code, struct core_state *core_state)
if (core_waiters > 0) {
struct core_thread *ptr;
freezer_do_not_count();
wait_for_completion(&core_state->startup);
freezer_count();
wait_for_completion_state(&core_state->startup,
TASK_UNINTERRUPTIBLE|TASK_FREEZABLE);
/*
* Wait for all the threads to become inactive, so that
* all the thread context (extended register state, like
@ -412,7 +411,7 @@ static int coredump_wait(int exit_code, struct core_state *core_state)
*/
ptr = core_state->dumper.next;
while (ptr != NULL) {
wait_task_inactive(ptr->task, 0);
wait_task_inactive(ptr->task, TASK_ANY);
ptr = ptr->next;
}
}

View File

@ -567,7 +567,8 @@ static vm_fault_t nfs_vm_page_mkwrite(struct vm_fault *vmf)
}
wait_on_bit_action(&NFS_I(inode)->flags, NFS_INO_INVALIDATING,
nfs_wait_bit_killable, TASK_KILLABLE);
nfs_wait_bit_killable,
TASK_KILLABLE|TASK_FREEZABLE_UNSAFE);
lock_page(page);
mapping = page_file_mapping(page);

View File

@ -72,18 +72,13 @@ nfs_fattr_to_ino_t(struct nfs_fattr *fattr)
return nfs_fileid_to_ino_t(fattr->fileid);
}
static int nfs_wait_killable(int mode)
int nfs_wait_bit_killable(struct wait_bit_key *key, int mode)
{
freezable_schedule_unsafe();
schedule();
if (signal_pending_state(mode, current))
return -ERESTARTSYS;
return 0;
}
int nfs_wait_bit_killable(struct wait_bit_key *key, int mode)
{
return nfs_wait_killable(mode);
}
EXPORT_SYMBOL_GPL(nfs_wait_bit_killable);
/**
@ -1332,7 +1327,8 @@ int nfs_clear_invalid_mapping(struct address_space *mapping)
*/
for (;;) {
ret = wait_on_bit_action(bitlock, NFS_INO_INVALIDATING,
nfs_wait_bit_killable, TASK_KILLABLE);
nfs_wait_bit_killable,
TASK_KILLABLE|TASK_FREEZABLE_UNSAFE);
if (ret)
goto out;
spin_lock(&inode->i_lock);

View File

@ -36,7 +36,8 @@ nfs3_rpc_wrapper(struct rpc_clnt *clnt, struct rpc_message *msg, int flags)
res = rpc_call_sync(clnt, msg, flags);
if (res != -EJUKEBOX)
break;
freezable_schedule_timeout_killable_unsafe(NFS_JUKEBOX_RETRY_TIME);
__set_current_state(TASK_KILLABLE|TASK_FREEZABLE_UNSAFE);
schedule_timeout(NFS_JUKEBOX_RETRY_TIME);
res = -ERESTARTSYS;
} while (!fatal_signal_pending(current));
return res;

View File

@ -416,8 +416,8 @@ static int nfs4_delay_killable(long *timeout)
{
might_sleep();
freezable_schedule_timeout_killable_unsafe(
nfs4_update_delay(timeout));
__set_current_state(TASK_KILLABLE|TASK_FREEZABLE_UNSAFE);
schedule_timeout(nfs4_update_delay(timeout));
if (!__fatal_signal_pending(current))
return 0;
return -EINTR;
@ -427,7 +427,8 @@ static int nfs4_delay_interruptible(long *timeout)
{
might_sleep();
freezable_schedule_timeout_interruptible_unsafe(nfs4_update_delay(timeout));
__set_current_state(TASK_INTERRUPTIBLE|TASK_FREEZABLE_UNSAFE);
schedule_timeout(nfs4_update_delay(timeout));
if (!signal_pending(current))
return 0;
return __fatal_signal_pending(current) ? -EINTR :-ERESTARTSYS;
@ -7406,7 +7407,8 @@ nfs4_retry_setlk_simple(struct nfs4_state *state, int cmd,
status = nfs4_proc_setlk(state, cmd, request);
if ((status != -EAGAIN) || IS_SETLK(cmd))
break;
freezable_schedule_timeout_interruptible(timeout);
__set_current_state(TASK_INTERRUPTIBLE|TASK_FREEZABLE);
schedule_timeout(timeout);
timeout *= 2;
timeout = min_t(unsigned long, NFS4_LOCK_MAXTIMEOUT, timeout);
status = -ERESTARTSYS;
@ -7474,10 +7476,8 @@ nfs4_retry_setlk(struct nfs4_state *state, int cmd, struct file_lock *request)
break;
status = -ERESTARTSYS;
freezer_do_not_count();
wait_woken(&waiter.wait, TASK_INTERRUPTIBLE,
wait_woken(&waiter.wait, TASK_INTERRUPTIBLE|TASK_FREEZABLE,
NFS4_LOCK_MAXTIMEOUT);
freezer_count();
} while (!signalled());
remove_wait_queue(q, &waiter.wait);

View File

@ -1314,7 +1314,8 @@ int nfs4_wait_clnt_recover(struct nfs_client *clp)
refcount_inc(&clp->cl_count);
res = wait_on_bit_action(&clp->cl_state, NFS4CLNT_MANAGER_RUNNING,
nfs_wait_bit_killable, TASK_KILLABLE);
nfs_wait_bit_killable,
TASK_KILLABLE|TASK_FREEZABLE_UNSAFE);
if (res)
goto out;
if (clp->cl_cons_state < 0)

View File

@ -1908,7 +1908,7 @@ static int pnfs_prepare_to_retry_layoutget(struct pnfs_layout_hdr *lo)
pnfs_layoutcommit_inode(lo->plh_inode, false);
return wait_on_bit_action(&lo->plh_flags, NFS_LAYOUT_RETURN,
nfs_wait_bit_killable,
TASK_KILLABLE);
TASK_KILLABLE|TASK_FREEZABLE_UNSAFE);
}
static void nfs_layoutget_begin(struct pnfs_layout_hdr *lo)
@ -3192,7 +3192,7 @@ pnfs_layoutcommit_inode(struct inode *inode, bool sync)
status = wait_on_bit_lock_action(&nfsi->flags,
NFS_INO_LAYOUTCOMMITTING,
nfs_wait_bit_killable,
TASK_KILLABLE);
TASK_KILLABLE|TASK_FREEZABLE_UNSAFE);
if (status)
goto out;
}

View File

@ -602,9 +602,9 @@ xfsaild(
while (1) {
if (tout && tout <= 20)
set_current_state(TASK_KILLABLE);
set_current_state(TASK_KILLABLE|TASK_FREEZABLE);
else
set_current_state(TASK_INTERRUPTIBLE);
set_current_state(TASK_INTERRUPTIBLE|TASK_FREEZABLE);
/*
* Check kthread_should_stop() after we set the task state to
@ -653,14 +653,14 @@ xfsaild(
ailp->ail_target == ailp->ail_target_prev &&
list_empty(&ailp->ail_buf_list)) {
spin_unlock(&ailp->ail_lock);
freezable_schedule();
schedule();
tout = 0;
continue;
}
spin_unlock(&ailp->ail_lock);
if (tout)
freezable_schedule_timeout(msecs_to_jiffies(tout));
schedule_timeout(msecs_to_jiffies(tout));
__set_current_state(TASK_RUNNING);

View File

@ -103,6 +103,7 @@ extern void wait_for_completion(struct completion *);
extern void wait_for_completion_io(struct completion *);
extern int wait_for_completion_interruptible(struct completion *x);
extern int wait_for_completion_killable(struct completion *x);
extern int wait_for_completion_state(struct completion *x, unsigned int state);
extern unsigned long wait_for_completion_timeout(struct completion *x,
unsigned long timeout);
extern unsigned long wait_for_completion_io_timeout(struct completion *x,

View File

@ -8,9 +8,11 @@
#include <linux/sched.h>
#include <linux/wait.h>
#include <linux/atomic.h>
#include <linux/jump_label.h>
#ifdef CONFIG_FREEZER
extern atomic_t system_freezing_cnt; /* nr of freezing conds in effect */
DECLARE_STATIC_KEY_FALSE(freezer_active);
extern bool pm_freezing; /* PM freezing in effect */
extern bool pm_nosig_freezing; /* PM nosig freezing in effect */
@ -22,10 +24,7 @@ extern unsigned int freeze_timeout_msecs;
/*
* Check if a process has been frozen
*/
static inline bool frozen(struct task_struct *p)
{
return p->flags & PF_FROZEN;
}
extern bool frozen(struct task_struct *p);
extern bool freezing_slow_path(struct task_struct *p);
@ -34,9 +33,10 @@ extern bool freezing_slow_path(struct task_struct *p);
*/
static inline bool freezing(struct task_struct *p)
{
if (likely(!atomic_read(&system_freezing_cnt)))
return false;
return freezing_slow_path(p);
if (static_branch_unlikely(&freezer_active))
return freezing_slow_path(p);
return false;
}
/* Takes and releases task alloc lock using task_lock() */
@ -48,23 +48,14 @@ extern int freeze_kernel_threads(void);
extern void thaw_processes(void);
extern void thaw_kernel_threads(void);
/*
* DO NOT ADD ANY NEW CALLERS OF THIS FUNCTION
* If try_to_freeze causes a lockdep warning it means the caller may deadlock
*/
static inline bool try_to_freeze_unsafe(void)
static inline bool try_to_freeze(void)
{
might_sleep();
if (likely(!freezing(current)))
return false;
return __refrigerator(false);
}
static inline bool try_to_freeze(void)
{
if (!(current->flags & PF_NOFREEZE))
debug_check_no_locks_held();
return try_to_freeze_unsafe();
return __refrigerator(false);
}
extern bool freeze_task(struct task_struct *p);
@ -79,195 +70,6 @@ static inline bool cgroup_freezing(struct task_struct *task)
}
#endif /* !CONFIG_CGROUP_FREEZER */
/*
* The PF_FREEZER_SKIP flag should be set by a vfork parent right before it
* calls wait_for_completion(&vfork) and reset right after it returns from this
* function. Next, the parent should call try_to_freeze() to freeze itself
* appropriately in case the child has exited before the freezing of tasks is
* complete. However, we don't want kernel threads to be frozen in unexpected
* places, so we allow them to block freeze_processes() instead or to set
* PF_NOFREEZE if needed. Fortunately, in the ____call_usermodehelper() case the
* parent won't really block freeze_processes(), since ____call_usermodehelper()
* (the child) does a little before exec/exit and it can't be frozen before
* waking up the parent.
*/
/**
* freezer_do_not_count - tell freezer to ignore %current
*
* Tell freezers to ignore the current task when determining whether the
* target frozen state is reached. IOW, the current task will be
* considered frozen enough by freezers.
*
* The caller shouldn't do anything which isn't allowed for a frozen task
* until freezer_cont() is called. Usually, freezer[_do_not]_count() pair
* wrap a scheduling operation and nothing much else.
*/
static inline void freezer_do_not_count(void)
{
current->flags |= PF_FREEZER_SKIP;
}
/**
* freezer_count - tell freezer to stop ignoring %current
*
* Undo freezer_do_not_count(). It tells freezers that %current should be
* considered again and tries to freeze if freezing condition is already in
* effect.
*/
static inline void freezer_count(void)
{
current->flags &= ~PF_FREEZER_SKIP;
/*
* If freezing is in progress, the following paired with smp_mb()
* in freezer_should_skip() ensures that either we see %true
* freezing() or freezer_should_skip() sees !PF_FREEZER_SKIP.
*/
smp_mb();
try_to_freeze();
}
/* DO NOT ADD ANY NEW CALLERS OF THIS FUNCTION */
static inline void freezer_count_unsafe(void)
{
current->flags &= ~PF_FREEZER_SKIP;
smp_mb();
try_to_freeze_unsafe();
}
/**
* freezer_should_skip - whether to skip a task when determining frozen
* state is reached
* @p: task in quesion
*
* This function is used by freezers after establishing %true freezing() to
* test whether a task should be skipped when determining the target frozen
* state is reached. IOW, if this function returns %true, @p is considered
* frozen enough.
*/
static inline bool freezer_should_skip(struct task_struct *p)
{
/*
* The following smp_mb() paired with the one in freezer_count()
* ensures that either freezer_count() sees %true freezing() or we
* see cleared %PF_FREEZER_SKIP and return %false. This makes it
* impossible for a task to slip frozen state testing after
* clearing %PF_FREEZER_SKIP.
*/
smp_mb();
return p->flags & PF_FREEZER_SKIP;
}
/*
* These functions are intended to be used whenever you want allow a sleeping
* task to be frozen. Note that neither return any clear indication of
* whether a freeze event happened while in this function.
*/
/* Like schedule(), but should not block the freezer. */
static inline void freezable_schedule(void)
{
freezer_do_not_count();
schedule();
freezer_count();
}
/* DO NOT ADD ANY NEW CALLERS OF THIS FUNCTION */
static inline void freezable_schedule_unsafe(void)
{
freezer_do_not_count();
schedule();
freezer_count_unsafe();
}
/*
* Like schedule_timeout(), but should not block the freezer. Do not
* call this with locks held.
*/
static inline long freezable_schedule_timeout(long timeout)
{
long __retval;
freezer_do_not_count();
__retval = schedule_timeout(timeout);
freezer_count();
return __retval;
}
/*
* Like schedule_timeout_interruptible(), but should not block the freezer. Do not
* call this with locks held.
*/
static inline long freezable_schedule_timeout_interruptible(long timeout)
{
long __retval;
freezer_do_not_count();
__retval = schedule_timeout_interruptible(timeout);
freezer_count();
return __retval;
}
/* DO NOT ADD ANY NEW CALLERS OF THIS FUNCTION */
static inline long freezable_schedule_timeout_interruptible_unsafe(long timeout)
{
long __retval;
freezer_do_not_count();
__retval = schedule_timeout_interruptible(timeout);
freezer_count_unsafe();
return __retval;
}
/* Like schedule_timeout_killable(), but should not block the freezer. */
static inline long freezable_schedule_timeout_killable(long timeout)
{
long __retval;
freezer_do_not_count();
__retval = schedule_timeout_killable(timeout);
freezer_count();
return __retval;
}
/* DO NOT ADD ANY NEW CALLERS OF THIS FUNCTION */
static inline long freezable_schedule_timeout_killable_unsafe(long timeout)
{
long __retval;
freezer_do_not_count();
__retval = schedule_timeout_killable(timeout);
freezer_count_unsafe();
return __retval;
}
/*
* Like schedule_hrtimeout_range(), but should not block the freezer. Do not
* call this with locks held.
*/
static inline int freezable_schedule_hrtimeout_range(ktime_t *expires,
u64 delta, const enum hrtimer_mode mode)
{
int __retval;
freezer_do_not_count();
__retval = schedule_hrtimeout_range(expires, delta, mode);
freezer_count();
return __retval;
}
/*
* Freezer-friendly wrappers around wait_event_interruptible(),
* wait_event_killable() and wait_event_interruptible_timeout(), originally
* defined in <linux/wait.h>
*/
/* DO NOT ADD ANY NEW CALLERS OF THIS FUNCTION */
#define wait_event_freezekillable_unsafe(wq, condition) \
({ \
int __retval; \
freezer_do_not_count(); \
__retval = wait_event_killable(wq, (condition)); \
freezer_count_unsafe(); \
__retval; \
})
#else /* !CONFIG_FREEZER */
static inline bool frozen(struct task_struct *p) { return false; }
static inline bool freezing(struct task_struct *p) { return false; }
@ -281,35 +83,8 @@ static inline void thaw_kernel_threads(void) {}
static inline bool try_to_freeze(void) { return false; }
static inline void freezer_do_not_count(void) {}
static inline void freezer_count(void) {}
static inline int freezer_should_skip(struct task_struct *p) { return 0; }
static inline void set_freezable(void) {}
#define freezable_schedule() schedule()
#define freezable_schedule_unsafe() schedule()
#define freezable_schedule_timeout(timeout) schedule_timeout(timeout)
#define freezable_schedule_timeout_interruptible(timeout) \
schedule_timeout_interruptible(timeout)
#define freezable_schedule_timeout_interruptible_unsafe(timeout) \
schedule_timeout_interruptible(timeout)
#define freezable_schedule_timeout_killable(timeout) \
schedule_timeout_killable(timeout)
#define freezable_schedule_timeout_killable_unsafe(timeout) \
schedule_timeout_killable(timeout)
#define freezable_schedule_hrtimeout_range(expires, delta, mode) \
schedule_hrtimeout_range(expires, delta, mode)
#define wait_event_freezekillable_unsafe(wq, condition) \
wait_event_killable(wq, condition)
#endif /* !CONFIG_FREEZER */
#endif /* FREEZER_H_INCLUDED */

View File

@ -81,25 +81,34 @@ struct task_group;
*/
/* Used in tsk->state: */
#define TASK_RUNNING 0x0000
#define TASK_INTERRUPTIBLE 0x0001
#define TASK_UNINTERRUPTIBLE 0x0002
#define __TASK_STOPPED 0x0004
#define __TASK_TRACED 0x0008
#define TASK_RUNNING 0x00000000
#define TASK_INTERRUPTIBLE 0x00000001
#define TASK_UNINTERRUPTIBLE 0x00000002
#define __TASK_STOPPED 0x00000004
#define __TASK_TRACED 0x00000008
/* Used in tsk->exit_state: */
#define EXIT_DEAD 0x0010
#define EXIT_ZOMBIE 0x0020
#define EXIT_DEAD 0x00000010
#define EXIT_ZOMBIE 0x00000020
#define EXIT_TRACE (EXIT_ZOMBIE | EXIT_DEAD)
/* Used in tsk->state again: */
#define TASK_PARKED 0x0040
#define TASK_DEAD 0x0080
#define TASK_WAKEKILL 0x0100
#define TASK_WAKING 0x0200
#define TASK_NOLOAD 0x0400
#define TASK_NEW 0x0800
/* RT specific auxilliary flag to mark RT lock waiters */
#define TASK_RTLOCK_WAIT 0x1000
#define TASK_STATE_MAX 0x2000
#define TASK_PARKED 0x00000040
#define TASK_DEAD 0x00000080
#define TASK_WAKEKILL 0x00000100
#define TASK_WAKING 0x00000200
#define TASK_NOLOAD 0x00000400
#define TASK_NEW 0x00000800
#define TASK_RTLOCK_WAIT 0x00001000
#define TASK_FREEZABLE 0x00002000
#define __TASK_FREEZABLE_UNSAFE (0x00004000 * IS_ENABLED(CONFIG_LOCKDEP))
#define TASK_FROZEN 0x00008000
#define TASK_STATE_MAX 0x00010000
#define TASK_ANY (TASK_STATE_MAX-1)
/*
* DO NOT ADD ANY NEW USERS !
*/
#define TASK_FREEZABLE_UNSAFE (TASK_FREEZABLE | __TASK_FREEZABLE_UNSAFE)
/* Convenience macros for the sake of set_current_state: */
#define TASK_KILLABLE (TASK_WAKEKILL | TASK_UNINTERRUPTIBLE)
@ -1713,8 +1722,9 @@ extern struct pid *cad_pid;
#define PF_MEMALLOC 0x00000800 /* Allocating memory */
#define PF_NPROC_EXCEEDED 0x00001000 /* set_user() noticed that RLIMIT_NPROC was exceeded */
#define PF_USED_MATH 0x00002000 /* If unset the fpu must be initialized before use */
#define PF__HOLE__00004000 0x00004000
#define PF_NOFREEZE 0x00008000 /* This thread should not be frozen */
#define PF_FROZEN 0x00010000 /* Frozen for system suspend */
#define PF__HOLE__00010000 0x00010000
#define PF_KSWAPD 0x00020000 /* I am kswapd */
#define PF_MEMALLOC_NOFS 0x00040000 /* All allocation requests will inherit GFP_NOFS */
#define PF_MEMALLOC_NOIO 0x00080000 /* All allocation requests will inherit GFP_NOIO */
@ -1722,10 +1732,14 @@ extern struct pid *cad_pid;
* I am cleaning dirty pages from some other bdi. */
#define PF_KTHREAD 0x00200000 /* I am a kernel thread */
#define PF_RANDOMIZE 0x00400000 /* Randomize virtual address space */
#define PF__HOLE__00800000 0x00800000
#define PF__HOLE__01000000 0x01000000
#define PF__HOLE__02000000 0x02000000
#define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_mask */
#define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */
#define PF_MEMALLOC_PIN 0x10000000 /* Allocation context constrained to zones which allow long term pinning. */
#define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezable */
#define PF__HOLE__20000000 0x20000000
#define PF__HOLE__40000000 0x40000000
#define PF_SUSPEND_TASK 0x80000000 /* This thread called freeze_processes() and should not be frozen */
/*

View File

@ -252,7 +252,7 @@ int rpc_malloc(struct rpc_task *);
void rpc_free(struct rpc_task *);
int rpciod_up(void);
void rpciod_down(void);
int __rpc_wait_for_completion_task(struct rpc_task *task, wait_bit_action_f *);
int rpc_wait_for_completion_task(struct rpc_task *task);
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
struct net;
void rpc_show_tasks(struct net *);
@ -264,11 +264,6 @@ extern struct workqueue_struct *xprtiod_workqueue;
void rpc_prepare_task(struct rpc_task *task);
gfp_t rpc_task_gfp_mask(void);
static inline int rpc_wait_for_completion_task(struct rpc_task *task)
{
return __rpc_wait_for_completion_task(task, NULL);
}
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) || IS_ENABLED(CONFIG_TRACEPOINTS)
static inline const char * rpc_qname(const struct rpc_wait_queue *q)
{

View File

@ -511,8 +511,8 @@ extern bool pm_save_wakeup_count(unsigned int count);
extern void pm_wakep_autosleep_enabled(bool set);
extern void pm_print_active_wakeup_sources(void);
extern void lock_system_sleep(void);
extern void unlock_system_sleep(void);
extern unsigned int lock_system_sleep(void);
extern void unlock_system_sleep(unsigned int);
#else /* !CONFIG_PM_SLEEP */
@ -535,8 +535,8 @@ static inline void pm_system_wakeup(void) {}
static inline void pm_wakeup_clear(bool reset) {}
static inline void pm_system_irq_wakeup(unsigned int irq_number) {}
static inline void lock_system_sleep(void) {}
static inline void unlock_system_sleep(void) {}
static inline unsigned int lock_system_sleep(void) { return 0; }
static inline void unlock_system_sleep(unsigned int flags) {}
#endif /* !CONFIG_PM_SLEEP */

View File

@ -11,10 +11,11 @@
struct cred;
struct file;
#define UMH_NO_WAIT 0 /* don't wait at all */
#define UMH_WAIT_EXEC 1 /* wait for the exec, but not the process */
#define UMH_WAIT_PROC 2 /* wait for the process to complete */
#define UMH_KILLABLE 4 /* wait for EXEC/PROC killable */
#define UMH_NO_WAIT 0x00 /* don't wait at all */
#define UMH_WAIT_EXEC 0x01 /* wait for the exec, but not the process */
#define UMH_WAIT_PROC 0x02 /* wait for the process to complete */
#define UMH_KILLABLE 0x04 /* wait for EXEC/PROC killable */
#define UMH_FREEZABLE 0x08 /* wait for EXEC/PROC freezable */
struct subprocess_info {
struct work_struct work;

View File

@ -281,7 +281,7 @@ static inline void wake_up_pollfree(struct wait_queue_head *wq_head)
#define ___wait_is_interruptible(state) \
(!__builtin_constant_p(state) || \
state == TASK_INTERRUPTIBLE || state == TASK_KILLABLE) \
(state & (TASK_INTERRUPTIBLE | TASK_WAKEKILL)))
extern void init_wait_entry(struct wait_queue_entry *wq_entry, int flags);
@ -361,8 +361,8 @@ do { \
} while (0)
#define __wait_event_freezable(wq_head, condition) \
___wait_event(wq_head, condition, TASK_INTERRUPTIBLE, 0, 0, \
freezable_schedule())
___wait_event(wq_head, condition, (TASK_INTERRUPTIBLE|TASK_FREEZABLE), \
0, 0, schedule())
/**
* wait_event_freezable - sleep (or freeze) until a condition gets true
@ -420,8 +420,8 @@ do { \
#define __wait_event_freezable_timeout(wq_head, condition, timeout) \
___wait_event(wq_head, ___wait_cond_timeout(condition), \
TASK_INTERRUPTIBLE, 0, timeout, \
__ret = freezable_schedule_timeout(__ret))
(TASK_INTERRUPTIBLE|TASK_FREEZABLE), 0, timeout, \
__ret = schedule_timeout(__ret))
/*
* like wait_event_timeout() -- except it uses TASK_INTERRUPTIBLE to avoid
@ -642,8 +642,8 @@ do { \
#define __wait_event_freezable_exclusive(wq, condition) \
___wait_event(wq, condition, TASK_INTERRUPTIBLE, 1, 0, \
freezable_schedule())
___wait_event(wq, condition, (TASK_INTERRUPTIBLE|TASK_FREEZABLE), 1, 0,\
schedule())
#define wait_event_freezable_exclusive(wq, condition) \
({ \
@ -932,6 +932,34 @@ extern int do_wait_intr_irq(wait_queue_head_t *, wait_queue_entry_t *);
__ret; \
})
#define __wait_event_state(wq, condition, state) \
___wait_event(wq, condition, state, 0, 0, schedule())
/**
* wait_event_state - sleep until a condition gets true
* @wq_head: the waitqueue to wait on
* @condition: a C expression for the event to wait for
* @state: state to sleep in
*
* The process is put to sleep (@state) until the @condition evaluates to true
* or a signal is received (when allowed by @state). The @condition is checked
* each time the waitqueue @wq_head is woken up.
*
* wake_up() has to be called after changing any variable that could
* change the result of the wait condition.
*
* The function will return -ERESTARTSYS if it was interrupted by a signal
* (when allowed by @state) and 0 if @condition evaluated to true.
*/
#define wait_event_state(wq_head, condition, state) \
({ \
int __ret = 0; \
might_sleep(); \
if (!(condition)) \
__ret = __wait_event_state(wq_head, condition, state); \
__ret; \
})
#define __wait_event_killable_timeout(wq_head, condition, timeout) \
___wait_event(wq_head, ___wait_cond_timeout(condition), \
TASK_KILLABLE, 0, timeout, \

View File

@ -99,19 +99,11 @@ static void __init handle_initrd(void)
init_mkdir("/old", 0700);
init_chdir("/old");
/*
* In case that a resume from disk is carried out by linuxrc or one of
* its children, we need to tell the freezer not to wait for us.
*/
current->flags |= PF_FREEZER_SKIP;
info = call_usermodehelper_setup("/linuxrc", argv, envp_init,
GFP_KERNEL, init_linuxrc, NULL, NULL);
if (!info)
return;
call_usermodehelper_exec(info, UMH_WAIT_PROC);
current->flags &= ~PF_FREEZER_SKIP;
call_usermodehelper_exec(info, UMH_WAIT_PROC|UMH_FREEZABLE);
/* move initrd to rootfs' /old */
init_mount("..", ".", NULL, MS_MOVE, NULL);

View File

@ -113,7 +113,7 @@ static int freezer_css_online(struct cgroup_subsys_state *css)
if (parent && (parent->state & CGROUP_FREEZING)) {
freezer->state |= CGROUP_FREEZING_PARENT | CGROUP_FROZEN;
atomic_inc(&system_freezing_cnt);
static_branch_inc(&freezer_active);
}
mutex_unlock(&freezer_mutex);
@ -134,7 +134,7 @@ static void freezer_css_offline(struct cgroup_subsys_state *css)
mutex_lock(&freezer_mutex);
if (freezer->state & CGROUP_FREEZING)
atomic_dec(&system_freezing_cnt);
static_branch_dec(&freezer_active);
freezer->state = 0;
@ -179,6 +179,7 @@ static void freezer_attach(struct cgroup_taskset *tset)
__thaw_task(task);
} else {
freeze_task(task);
/* clear FROZEN and propagate upwards */
while (freezer && (freezer->state & CGROUP_FROZEN)) {
freezer->state &= ~CGROUP_FROZEN;
@ -271,16 +272,8 @@ static void update_if_frozen(struct cgroup_subsys_state *css)
css_task_iter_start(css, 0, &it);
while ((task = css_task_iter_next(&it))) {
if (freezing(task)) {
/*
* freezer_should_skip() indicates that the task
* should be skipped when determining freezing
* completion. Consider it frozen in addition to
* the usual frozen condition.
*/
if (!frozen(task) && !freezer_should_skip(task))
goto out_iter_end;
}
if (freezing(task) && !frozen(task))
goto out_iter_end;
}
freezer->state |= CGROUP_FROZEN;
@ -357,7 +350,7 @@ static void freezer_apply_state(struct freezer *freezer, bool freeze,
if (freeze) {
if (!(freezer->state & CGROUP_FREEZING))
atomic_inc(&system_freezing_cnt);
static_branch_inc(&freezer_active);
freezer->state |= state;
freeze_cgroup(freezer);
} else {
@ -366,9 +359,9 @@ static void freezer_apply_state(struct freezer *freezer, bool freeze,
freezer->state &= ~state;
if (!(freezer->state & CGROUP_FREEZING)) {
if (was_freezing)
atomic_dec(&system_freezing_cnt);
freezer->state &= ~CGROUP_FROZEN;
if (was_freezing)
static_branch_dec(&freezer_active);
unfreeze_cgroup(freezer);
}
}

View File

@ -374,10 +374,10 @@ static void coredump_task_exit(struct task_struct *tsk)
complete(&core_state->startup);
for (;;) {
set_current_state(TASK_UNINTERRUPTIBLE);
set_current_state(TASK_UNINTERRUPTIBLE|TASK_FREEZABLE);
if (!self.task) /* see coredump_finish() */
break;
freezable_schedule();
schedule();
}
__set_current_state(TASK_RUNNING);
}

View File

@ -1421,13 +1421,12 @@ static void complete_vfork_done(struct task_struct *tsk)
static int wait_for_vfork_done(struct task_struct *child,
struct completion *vfork)
{
unsigned int state = TASK_UNINTERRUPTIBLE|TASK_KILLABLE|TASK_FREEZABLE;
int killed;
freezer_do_not_count();
cgroup_enter_frozen();
killed = wait_for_completion_killable(vfork);
killed = wait_for_completion_state(vfork, state);
cgroup_leave_frozen(false);
freezer_count();
if (killed) {
task_lock(child);

View File

@ -13,10 +13,11 @@
#include <linux/kthread.h>
/* total number of freezing conditions in effect */
atomic_t system_freezing_cnt = ATOMIC_INIT(0);
EXPORT_SYMBOL(system_freezing_cnt);
DEFINE_STATIC_KEY_FALSE(freezer_active);
EXPORT_SYMBOL(freezer_active);
/* indicate whether PM freezing is in effect, protected by
/*
* indicate whether PM freezing is in effect, protected by
* system_transition_mutex
*/
bool pm_freezing;
@ -29,7 +30,7 @@ static DEFINE_SPINLOCK(freezer_lock);
* freezing_slow_path - slow path for testing whether a task needs to be frozen
* @p: task to be tested
*
* This function is called by freezing() if system_freezing_cnt isn't zero
* This function is called by freezing() if freezer_active isn't zero
* and tests whether @p needs to enter and stay in frozen state. Can be
* called under any context. The freezers are responsible for ensuring the
* target tasks see the updated state.
@ -52,41 +53,40 @@ bool freezing_slow_path(struct task_struct *p)
}
EXPORT_SYMBOL(freezing_slow_path);
bool frozen(struct task_struct *p)
{
return READ_ONCE(p->__state) & TASK_FROZEN;
}
/* Refrigerator is place where frozen processes are stored :-). */
bool __refrigerator(bool check_kthr_stop)
{
/* Hmm, should we be allowed to suspend when there are realtime
processes around? */
unsigned int state = get_current_state();
bool was_frozen = false;
unsigned int save = get_current_state();
pr_debug("%s entered refrigerator\n", current->comm);
WARN_ON_ONCE(state && !(state & TASK_NORMAL));
for (;;) {
set_current_state(TASK_UNINTERRUPTIBLE);
bool freeze;
set_current_state(TASK_FROZEN);
spin_lock_irq(&freezer_lock);
current->flags |= PF_FROZEN;
if (!freezing(current) ||
(check_kthr_stop && kthread_should_stop()))
current->flags &= ~PF_FROZEN;
freeze = freezing(current) && !(check_kthr_stop && kthread_should_stop());
spin_unlock_irq(&freezer_lock);
if (!(current->flags & PF_FROZEN))
if (!freeze)
break;
was_frozen = true;
schedule();
}
__set_current_state(TASK_RUNNING);
pr_debug("%s left refrigerator\n", current->comm);
/*
* Restore saved task state before returning. The mb'd version
* needs to be used; otherwise, it might silently break
* synchronization which depends on ordered task state change.
*/
set_current_state(save);
return was_frozen;
}
EXPORT_SYMBOL(__refrigerator);
@ -101,6 +101,44 @@ static void fake_signal_wake_up(struct task_struct *p)
}
}
static int __set_task_frozen(struct task_struct *p, void *arg)
{
unsigned int state = READ_ONCE(p->__state);
if (p->on_rq)
return 0;
if (p != current && task_curr(p))
return 0;
if (!(state & (TASK_FREEZABLE | __TASK_STOPPED | __TASK_TRACED)))
return 0;
/*
* Only TASK_NORMAL can be augmented with TASK_FREEZABLE, since they
* can suffer spurious wakeups.
*/
if (state & TASK_FREEZABLE)
WARN_ON_ONCE(!(state & TASK_NORMAL));
#ifdef CONFIG_LOCKDEP
/*
* It's dangerous to freeze with locks held; there be dragons there.
*/
if (!(state & __TASK_FREEZABLE_UNSAFE))
WARN_ON_ONCE(debug_locks && p->lockdep_depth);
#endif
WRITE_ONCE(p->__state, TASK_FROZEN);
return TASK_FROZEN;
}
static bool __freeze_task(struct task_struct *p)
{
/* TASK_FREEZABLE|TASK_STOPPED|TASK_TRACED -> TASK_FROZEN */
return task_call_func(p, __set_task_frozen, NULL);
}
/**
* freeze_task - send a freeze request to given task
* @p: task to send the request to
@ -116,20 +154,8 @@ bool freeze_task(struct task_struct *p)
{
unsigned long flags;
/*
* This check can race with freezer_do_not_count, but worst case that
* will result in an extra wakeup being sent to the task. It does not
* race with freezer_count(), the barriers in freezer_count() and
* freezer_should_skip() ensure that either freezer_count() sees
* freezing == true in try_to_freeze() and freezes, or
* freezer_should_skip() sees !PF_FREEZE_SKIP and freezes the task
* normally.
*/
if (freezer_should_skip(p))
return false;
spin_lock_irqsave(&freezer_lock, flags);
if (!freezing(p) || frozen(p)) {
if (!freezing(p) || frozen(p) || __freeze_task(p)) {
spin_unlock_irqrestore(&freezer_lock, flags);
return false;
}
@ -137,19 +163,52 @@ bool freeze_task(struct task_struct *p)
if (!(p->flags & PF_KTHREAD))
fake_signal_wake_up(p);
else
wake_up_state(p, TASK_INTERRUPTIBLE);
wake_up_state(p, TASK_NORMAL);
spin_unlock_irqrestore(&freezer_lock, flags);
return true;
}
/*
* The special task states (TASK_STOPPED, TASK_TRACED) keep their canonical
* state in p->jobctl. If either of them got a wakeup that was missed because
* TASK_FROZEN, then their canonical state reflects that and the below will
* refuse to restore the special state and instead issue the wakeup.
*/
static int __set_task_special(struct task_struct *p, void *arg)
{
unsigned int state = 0;
if (p->jobctl & JOBCTL_TRACED)
state = TASK_TRACED;
else if (p->jobctl & JOBCTL_STOPPED)
state = TASK_STOPPED;
if (state)
WRITE_ONCE(p->__state, state);
return state;
}
void __thaw_task(struct task_struct *p)
{
unsigned long flags;
unsigned long flags, flags2;
spin_lock_irqsave(&freezer_lock, flags);
if (frozen(p))
wake_up_process(p);
if (WARN_ON_ONCE(freezing(p)))
goto unlock;
if (lock_task_sighand(p, &flags2)) {
/* TASK_FROZEN -> TASK_{STOPPED,TRACED} */
bool ret = task_call_func(p, __set_task_special, NULL);
unlock_task_sighand(p, &flags2);
if (ret)
goto unlock;
}
wake_up_state(p, TASK_FROZEN);
unlock:
spin_unlock_irqrestore(&freezer_lock, flags);
}

View File

@ -334,7 +334,7 @@ void futex_wait_queue(struct futex_hash_bucket *hb, struct futex_q *q,
* futex_queue() calls spin_unlock() upon completion, both serializing
* access to the hash list and forcing another memory barrier.
*/
set_current_state(TASK_INTERRUPTIBLE);
set_current_state(TASK_INTERRUPTIBLE|TASK_FREEZABLE);
futex_queue(q, hb);
/* Arm the timer */
@ -352,7 +352,7 @@ void futex_wait_queue(struct futex_hash_bucket *hb, struct futex_q *q,
* is no timeout, or if it has yet to expire.
*/
if (!timeout || timeout->task)
freezable_schedule();
schedule();
}
__set_current_state(TASK_RUNNING);
}
@ -430,7 +430,7 @@ retry:
return ret;
}
set_current_state(TASK_INTERRUPTIBLE);
set_current_state(TASK_INTERRUPTIBLE|TASK_FREEZABLE);
for (i = 0; i < count; i++) {
u32 __user *uaddr = (u32 __user *)(unsigned long)vs[i].w.uaddr;
@ -504,7 +504,7 @@ static void futex_sleep_multiple(struct futex_vector *vs, unsigned int count,
return;
}
freezable_schedule();
schedule();
}
/**

View File

@ -95,8 +95,8 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout)
* Ensure the task is not frozen.
* Also, skip vfork and any other user process that freezer should skip.
*/
if (unlikely(t->flags & (PF_FROZEN | PF_FREEZER_SKIP)))
return;
if (unlikely(READ_ONCE(t->__state) & TASK_FROZEN))
return;
/*
* When a freshly created task is scheduled once, changes its state to
@ -191,6 +191,8 @@ static void check_hung_uninterruptible_tasks(unsigned long timeout)
hung_task_show_lock = false;
rcu_read_lock();
for_each_process_thread(g, t) {
unsigned int state;
if (!max_count--)
goto unlock;
if (time_after(jiffies, last_break + HUNG_TASK_LOCK_BREAK)) {
@ -198,8 +200,14 @@ static void check_hung_uninterruptible_tasks(unsigned long timeout)
goto unlock;
last_break = jiffies;
}
/* use "==" to skip the TASK_KILLABLE tasks waiting on NFS */
if (READ_ONCE(t->__state) == TASK_UNINTERRUPTIBLE)
/*
* skip the TASK_KILLABLE tasks -- these can be killed
* skip the TASK_IDLE tasks -- those are genuinely idle
*/
state = READ_ONCE(t->__state);
if ((state & TASK_UNINTERRUPTIBLE) &&
!(state & TASK_WAKEKILL) &&
!(state & TASK_NOLOAD))
check_hung_task(t, timeout);
}
unlock:

View File

@ -92,20 +92,24 @@ bool hibernation_available(void)
*/
void hibernation_set_ops(const struct platform_hibernation_ops *ops)
{
unsigned int sleep_flags;
if (ops && !(ops->begin && ops->end && ops->pre_snapshot
&& ops->prepare && ops->finish && ops->enter && ops->pre_restore
&& ops->restore_cleanup && ops->leave)) {
WARN_ON(1);
return;
}
lock_system_sleep();
sleep_flags = lock_system_sleep();
hibernation_ops = ops;
if (ops)
hibernation_mode = HIBERNATION_PLATFORM;
else if (hibernation_mode == HIBERNATION_PLATFORM)
hibernation_mode = HIBERNATION_SHUTDOWN;
unlock_system_sleep();
unlock_system_sleep(sleep_flags);
}
EXPORT_SYMBOL_GPL(hibernation_set_ops);
@ -713,6 +717,7 @@ static int load_image_and_restore(void)
int hibernate(void)
{
bool snapshot_test = false;
unsigned int sleep_flags;
int error;
if (!hibernation_available()) {
@ -720,7 +725,7 @@ int hibernate(void)
return -EPERM;
}
lock_system_sleep();
sleep_flags = lock_system_sleep();
/* The snapshot device should not be opened while we're running */
if (!hibernate_acquire()) {
error = -EBUSY;
@ -794,7 +799,7 @@ int hibernate(void)
pm_restore_console();
hibernate_release();
Unlock:
unlock_system_sleep();
unlock_system_sleep(sleep_flags);
pr_info("hibernation exit\n");
return error;
@ -809,9 +814,10 @@ int hibernate(void)
*/
int hibernate_quiet_exec(int (*func)(void *data), void *data)
{
unsigned int sleep_flags;
int error;
lock_system_sleep();
sleep_flags = lock_system_sleep();
if (!hibernate_acquire()) {
error = -EBUSY;
@ -891,7 +897,7 @@ restore:
hibernate_release();
unlock:
unlock_system_sleep();
unlock_system_sleep(sleep_flags);
return error;
}
@ -1100,11 +1106,12 @@ static ssize_t disk_show(struct kobject *kobj, struct kobj_attribute *attr,
static ssize_t disk_store(struct kobject *kobj, struct kobj_attribute *attr,
const char *buf, size_t n)
{
int mode = HIBERNATION_INVALID;
unsigned int sleep_flags;
int error = 0;
int i;
int len;
char *p;
int mode = HIBERNATION_INVALID;
int i;
if (!hibernation_available())
return -EPERM;
@ -1112,7 +1119,7 @@ static ssize_t disk_store(struct kobject *kobj, struct kobj_attribute *attr,
p = memchr(buf, '\n', n);
len = p ? p - buf : n;
lock_system_sleep();
sleep_flags = lock_system_sleep();
for (i = HIBERNATION_FIRST; i <= HIBERNATION_MAX; i++) {
if (len == strlen(hibernation_modes[i])
&& !strncmp(buf, hibernation_modes[i], len)) {
@ -1142,7 +1149,7 @@ static ssize_t disk_store(struct kobject *kobj, struct kobj_attribute *attr,
if (!error)
pm_pr_dbg("Hibernation mode set to '%s'\n",
hibernation_modes[mode]);
unlock_system_sleep();
unlock_system_sleep(sleep_flags);
return error ? error : n;
}
@ -1158,9 +1165,10 @@ static ssize_t resume_show(struct kobject *kobj, struct kobj_attribute *attr,
static ssize_t resume_store(struct kobject *kobj, struct kobj_attribute *attr,
const char *buf, size_t n)
{
dev_t res;
unsigned int sleep_flags;
int len = n;
char *name;
dev_t res;
if (len && buf[len-1] == '\n')
len--;
@ -1173,9 +1181,10 @@ static ssize_t resume_store(struct kobject *kobj, struct kobj_attribute *attr,
if (!res)
return -EINVAL;
lock_system_sleep();
sleep_flags = lock_system_sleep();
swsusp_resume_device = res;
unlock_system_sleep();
unlock_system_sleep(sleep_flags);
pm_pr_dbg("Configured hibernation resume from disk to %u\n",
swsusp_resume_device);
noresume = 0;

View File

@ -21,14 +21,16 @@
#ifdef CONFIG_PM_SLEEP
void lock_system_sleep(void)
unsigned int lock_system_sleep(void)
{
current->flags |= PF_FREEZER_SKIP;
unsigned int flags = current->flags;
current->flags |= PF_NOFREEZE;
mutex_lock(&system_transition_mutex);
return flags;
}
EXPORT_SYMBOL_GPL(lock_system_sleep);
void unlock_system_sleep(void)
void unlock_system_sleep(unsigned int flags)
{
/*
* Don't use freezer_count() because we don't want the call to
@ -46,7 +48,8 @@ void unlock_system_sleep(void)
* Which means, if we use try_to_freeze() here, it would make them
* enter the refrigerator, thus causing hibernation to lockup.
*/
current->flags &= ~PF_FREEZER_SKIP;
if (!(flags & PF_NOFREEZE))
current->flags &= ~PF_NOFREEZE;
mutex_unlock(&system_transition_mutex);
}
EXPORT_SYMBOL_GPL(unlock_system_sleep);
@ -263,16 +266,17 @@ static ssize_t pm_test_show(struct kobject *kobj, struct kobj_attribute *attr,
static ssize_t pm_test_store(struct kobject *kobj, struct kobj_attribute *attr,
const char *buf, size_t n)
{
unsigned int sleep_flags;
const char * const *s;
int error = -EINVAL;
int level;
char *p;
int len;
int error = -EINVAL;
p = memchr(buf, '\n', n);
len = p ? p - buf : n;
lock_system_sleep();
sleep_flags = lock_system_sleep();
level = TEST_FIRST;
for (s = &pm_tests[level]; level <= TEST_MAX; s++, level++)
@ -282,7 +286,7 @@ static ssize_t pm_test_store(struct kobject *kobj, struct kobj_attribute *attr,
break;
}
unlock_system_sleep();
unlock_system_sleep(sleep_flags);
return error ? error : n;
}

View File

@ -50,8 +50,7 @@ static int try_to_freeze_tasks(bool user_only)
if (p == current || !freeze_task(p))
continue;
if (!freezer_should_skip(p))
todo++;
todo++;
}
read_unlock(&tasklist_lock);
@ -96,8 +95,7 @@ static int try_to_freeze_tasks(bool user_only)
if (!wakeup || pm_debug_messages_on) {
read_lock(&tasklist_lock);
for_each_process_thread(g, p) {
if (p != current && !freezer_should_skip(p)
&& freezing(p) && !frozen(p))
if (p != current && freezing(p) && !frozen(p))
sched_show_task(p);
}
read_unlock(&tasklist_lock);
@ -129,7 +127,7 @@ int freeze_processes(void)
current->flags |= PF_SUSPEND_TASK;
if (!pm_freezing)
atomic_inc(&system_freezing_cnt);
static_branch_inc(&freezer_active);
pm_wakeup_clear(0);
pr_info("Freezing user space processes ... ");
@ -190,7 +188,7 @@ void thaw_processes(void)
trace_suspend_resume(TPS("thaw_processes"), 0, true);
if (pm_freezing)
atomic_dec(&system_freezing_cnt);
static_branch_dec(&freezer_active);
pm_freezing = false;
pm_nosig_freezing = false;

View File

@ -75,9 +75,11 @@ EXPORT_SYMBOL_GPL(pm_suspend_default_s2idle);
void s2idle_set_ops(const struct platform_s2idle_ops *ops)
{
lock_system_sleep();
unsigned int sleep_flags;
sleep_flags = lock_system_sleep();
s2idle_ops = ops;
unlock_system_sleep();
unlock_system_sleep(sleep_flags);
}
static void s2idle_begin(void)
@ -203,7 +205,9 @@ __setup("mem_sleep_default=", mem_sleep_default_setup);
*/
void suspend_set_ops(const struct platform_suspend_ops *ops)
{
lock_system_sleep();
unsigned int sleep_flags;
sleep_flags = lock_system_sleep();
suspend_ops = ops;
@ -219,7 +223,7 @@ void suspend_set_ops(const struct platform_suspend_ops *ops)
mem_sleep_current = PM_SUSPEND_MEM;
}
unlock_system_sleep();
unlock_system_sleep(sleep_flags);
}
EXPORT_SYMBOL_GPL(suspend_set_ops);

View File

@ -47,12 +47,13 @@ int is_hibernate_resume_dev(dev_t dev)
static int snapshot_open(struct inode *inode, struct file *filp)
{
struct snapshot_data *data;
unsigned int sleep_flags;
int error;
if (!hibernation_available())
return -EPERM;
lock_system_sleep();
sleep_flags = lock_system_sleep();
if (!hibernate_acquire()) {
error = -EBUSY;
@ -98,7 +99,7 @@ static int snapshot_open(struct inode *inode, struct file *filp)
data->dev = 0;
Unlock:
unlock_system_sleep();
unlock_system_sleep(sleep_flags);
return error;
}
@ -106,8 +107,9 @@ static int snapshot_open(struct inode *inode, struct file *filp)
static int snapshot_release(struct inode *inode, struct file *filp)
{
struct snapshot_data *data;
unsigned int sleep_flags;
lock_system_sleep();
sleep_flags = lock_system_sleep();
swsusp_free();
data = filp->private_data;
@ -124,7 +126,7 @@ static int snapshot_release(struct inode *inode, struct file *filp)
PM_POST_HIBERNATION : PM_POST_RESTORE);
hibernate_release();
unlock_system_sleep();
unlock_system_sleep(sleep_flags);
return 0;
}
@ -132,11 +134,12 @@ static int snapshot_release(struct inode *inode, struct file *filp)
static ssize_t snapshot_read(struct file *filp, char __user *buf,
size_t count, loff_t *offp)
{
struct snapshot_data *data;
ssize_t res;
loff_t pg_offp = *offp & ~PAGE_MASK;
struct snapshot_data *data;
unsigned int sleep_flags;
ssize_t res;
lock_system_sleep();
sleep_flags = lock_system_sleep();
data = filp->private_data;
if (!data->ready) {
@ -157,7 +160,7 @@ static ssize_t snapshot_read(struct file *filp, char __user *buf,
*offp += res;
Unlock:
unlock_system_sleep();
unlock_system_sleep(sleep_flags);
return res;
}
@ -165,16 +168,17 @@ static ssize_t snapshot_read(struct file *filp, char __user *buf,
static ssize_t snapshot_write(struct file *filp, const char __user *buf,
size_t count, loff_t *offp)
{
struct snapshot_data *data;
ssize_t res;
loff_t pg_offp = *offp & ~PAGE_MASK;
struct snapshot_data *data;
unsigned long sleep_flags;
ssize_t res;
if (need_wait) {
wait_for_device_probe();
need_wait = false;
}
lock_system_sleep();
sleep_flags = lock_system_sleep();
data = filp->private_data;
@ -196,7 +200,7 @@ static ssize_t snapshot_write(struct file *filp, const char __user *buf,
if (res > 0)
*offp += res;
unlock:
unlock_system_sleep();
unlock_system_sleep(sleep_flags);
return res;
}

View File

@ -269,7 +269,7 @@ static int ptrace_check_attach(struct task_struct *child, bool ignore_state)
read_unlock(&tasklist_lock);
if (!ret && !ignore_state &&
WARN_ON_ONCE(!wait_task_inactive(child, __TASK_TRACED)))
WARN_ON_ONCE(!wait_task_inactive(child, __TASK_TRACED|TASK_FROZEN)))
ret = -ESRCH;
return ret;

View File

@ -161,7 +161,8 @@ autogroup_move_group(struct task_struct *p, struct autogroup *ag)
struct task_struct *t;
unsigned long flags;
BUG_ON(!lock_task_sighand(p, &flags));
if (WARN_ON_ONCE(!lock_task_sighand(p, &flags)))
return;
prev = p->signal->autogroup;
if (prev == ag) {

View File

@ -204,6 +204,7 @@ EXPORT_SYMBOL(wait_for_completion_io_timeout);
int __sched wait_for_completion_interruptible(struct completion *x)
{
long t = wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_INTERRUPTIBLE);
if (t == -ERESTARTSYS)
return t;
return 0;
@ -241,12 +242,23 @@ EXPORT_SYMBOL(wait_for_completion_interruptible_timeout);
int __sched wait_for_completion_killable(struct completion *x)
{
long t = wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_KILLABLE);
if (t == -ERESTARTSYS)
return t;
return 0;
}
EXPORT_SYMBOL(wait_for_completion_killable);
int __sched wait_for_completion_state(struct completion *x, unsigned int state)
{
long t = wait_for_common(x, MAX_SCHEDULE_TIMEOUT, state);
if (t == -ERESTARTSYS)
return t;
return 0;
}
EXPORT_SYMBOL(wait_for_completion_state);
/**
* wait_for_completion_killable_timeout: - waits for completion of a task (w/(to,killable))
* @x: holds the state of this particular completion

View File

@ -143,11 +143,7 @@ __read_mostly int sysctl_resched_latency_warn_once = 1;
* Number of tasks to iterate in a single balance run.
* Limited because this is done with IRQs disabled.
*/
#ifdef CONFIG_PREEMPT_RT
const_debug unsigned int sysctl_sched_nr_migrate = 8;
#else
const_debug unsigned int sysctl_sched_nr_migrate = 32;
#endif
const_debug unsigned int sysctl_sched_nr_migrate = SCHED_NR_MIGRATE_BREAK;
__read_mostly int scheduler_running;
@ -482,8 +478,7 @@ sched_core_dequeue(struct rq *rq, struct task_struct *p, int flags) { }
* p->se.load, p->rt_priority,
* p->dl.dl_{runtime, deadline, period, flags, bw, density}
* - sched_setnuma(): p->numa_preferred_nid
* - sched_move_task()/
* cpu_cgroup_fork(): p->sched_task_group
* - sched_move_task(): p->sched_task_group
* - uclamp_update_active() p->uclamp*
*
* p->state <- TASK_*:
@ -2329,7 +2324,7 @@ static struct rq *move_queued_task(struct rq *rq, struct rq_flags *rf,
rq = cpu_rq(new_cpu);
rq_lock(rq, rf);
BUG_ON(task_cpu(p) != new_cpu);
WARN_ON_ONCE(task_cpu(p) != new_cpu);
activate_task(rq, p, 0);
check_preempt_curr(rq, p, 0);
@ -2779,7 +2774,7 @@ static int affine_move_task(struct rq *rq, struct task_struct *p, struct rq_flag
return -EINVAL;
}
if (task_running(rq, p) || READ_ONCE(p->__state) == TASK_WAKING) {
if (task_on_cpu(rq, p) || READ_ONCE(p->__state) == TASK_WAKING) {
/*
* MIGRATE_ENABLE gets here because 'p == current', but for
* anything else we cannot do is_migration_disabled(), punt
@ -3255,12 +3250,12 @@ out:
/*
* wait_task_inactive - wait for a thread to unschedule.
*
* If @match_state is nonzero, it's the @p->state value just checked and
* not expected to change. If it changes, i.e. @p might have woken up,
* then return zero. When we succeed in waiting for @p to be off its CPU,
* we return a positive number (its total switch count). If a second call
* a short while later returns the same number, the caller can be sure that
* @p has remained unscheduled the whole time.
* Wait for the thread to block in any of the states set in @match_state.
* If it changes, i.e. @p might have woken up, then return zero. When we
* succeed in waiting for @p to be off its CPU, we return a positive number
* (its total switch count). If a second call a short while later returns the
* same number, the caller can be sure that @p has remained unscheduled the
* whole time.
*
* The caller must ensure that the task *will* unschedule sometime soon,
* else this function might spin for a *long* time. This function can't
@ -3291,12 +3286,12 @@ unsigned long wait_task_inactive(struct task_struct *p, unsigned int match_state
*
* NOTE! Since we don't hold any locks, it's not
* even sure that "rq" stays as the right runqueue!
* But we don't care, since "task_running()" will
* But we don't care, since "task_on_cpu()" will
* return false if the runqueue has changed and p
* is actually now running somewhere else!
*/
while (task_running(rq, p)) {
if (match_state && unlikely(READ_ONCE(p->__state) != match_state))
while (task_on_cpu(rq, p)) {
if (!(READ_ONCE(p->__state) & match_state))
return 0;
cpu_relax();
}
@ -3308,10 +3303,10 @@ unsigned long wait_task_inactive(struct task_struct *p, unsigned int match_state
*/
rq = task_rq_lock(p, &rf);
trace_sched_wait_task(p);
running = task_running(rq, p);
running = task_on_cpu(rq, p);
queued = task_on_rq_queued(p);
ncsw = 0;
if (!match_state || READ_ONCE(p->__state) == match_state)
if (READ_ONCE(p->__state) & match_state)
ncsw = p->nvcsw | LONG_MIN; /* sets MSB */
task_rq_unlock(rq, p, &rf);
@ -6430,7 +6425,7 @@ static void __sched notrace __schedule(unsigned int sched_mode)
prev->sched_contributes_to_load =
(prev_state & TASK_UNINTERRUPTIBLE) &&
!(prev_state & TASK_NOLOAD) &&
!(prev->flags & PF_FROZEN);
!(prev_state & TASK_FROZEN);
if (prev->sched_contributes_to_load)
rq->nr_uninterruptible++;
@ -8650,7 +8645,7 @@ again:
if (curr->sched_class != p->sched_class)
goto out_unlock;
if (task_running(p_rq, p) || !task_is_running(p))
if (task_on_cpu(p_rq, p) || !task_is_running(p))
goto out_unlock;
yielded = curr->sched_class->yield_to_task(rq, p);
@ -8862,7 +8857,7 @@ void sched_show_task(struct task_struct *p)
if (pid_alive(p))
ppid = task_pid_nr(rcu_dereference(p->real_parent));
rcu_read_unlock();
pr_cont(" stack:%5lu pid:%5d ppid:%6d flags:0x%08lx\n",
pr_cont(" stack:%-5lu pid:%-5d ppid:%-6d flags:0x%08lx\n",
free, task_pid_nr(p), ppid,
read_task_thread_flags(p));
@ -8890,7 +8885,7 @@ state_filter_match(unsigned long state_filter, struct task_struct *p)
* When looking for TASK_UNINTERRUPTIBLE skip TASK_IDLE (allows
* TASK_KILLABLE).
*/
if (state_filter == TASK_UNINTERRUPTIBLE && state == TASK_IDLE)
if (state_filter == TASK_UNINTERRUPTIBLE && (state & TASK_NOLOAD))
return false;
return true;
@ -9602,9 +9597,6 @@ LIST_HEAD(task_groups);
static struct kmem_cache *task_group_cache __read_mostly;
#endif
DECLARE_PER_CPU(cpumask_var_t, load_balance_mask);
DECLARE_PER_CPU(cpumask_var_t, select_rq_mask);
void __init sched_init(void)
{
unsigned long ptr = 0;
@ -9648,14 +9640,6 @@ void __init sched_init(void)
#endif /* CONFIG_RT_GROUP_SCHED */
}
#ifdef CONFIG_CPUMASK_OFFSTACK
for_each_possible_cpu(i) {
per_cpu(load_balance_mask, i) = (cpumask_var_t)kzalloc_node(
cpumask_size(), GFP_KERNEL, cpu_to_node(i));
per_cpu(select_rq_mask, i) = (cpumask_var_t)kzalloc_node(
cpumask_size(), GFP_KERNEL, cpu_to_node(i));
}
#endif /* CONFIG_CPUMASK_OFFSTACK */
init_rt_bandwidth(&def_rt_bandwidth, global_rt_period(), global_rt_runtime());
@ -10164,7 +10148,7 @@ void sched_release_group(struct task_group *tg)
spin_unlock_irqrestore(&task_group_lock, flags);
}
static void sched_change_group(struct task_struct *tsk, int type)
static void sched_change_group(struct task_struct *tsk)
{
struct task_group *tg;
@ -10180,7 +10164,7 @@ static void sched_change_group(struct task_struct *tsk, int type)
#ifdef CONFIG_FAIR_GROUP_SCHED
if (tsk->sched_class->task_change_group)
tsk->sched_class->task_change_group(tsk, type);
tsk->sched_class->task_change_group(tsk);
else
#endif
set_task_rq(tsk, task_cpu(tsk));
@ -10211,7 +10195,7 @@ void sched_move_task(struct task_struct *tsk)
if (running)
put_prev_task(rq, tsk);
sched_change_group(tsk, TASK_MOVE_GROUP);
sched_change_group(tsk);
if (queued)
enqueue_task(rq, tsk, queue_flags);
@ -10289,53 +10273,19 @@ static void cpu_cgroup_css_free(struct cgroup_subsys_state *css)
sched_unregister_group(tg);
}
/*
* This is called before wake_up_new_task(), therefore we really only
* have to set its group bits, all the other stuff does not apply.
*/
static void cpu_cgroup_fork(struct task_struct *task)
{
struct rq_flags rf;
struct rq *rq;
rq = task_rq_lock(task, &rf);
update_rq_clock(rq);
sched_change_group(task, TASK_SET_GROUP);
task_rq_unlock(rq, task, &rf);
}
#ifdef CONFIG_RT_GROUP_SCHED
static int cpu_cgroup_can_attach(struct cgroup_taskset *tset)
{
struct task_struct *task;
struct cgroup_subsys_state *css;
int ret = 0;
cgroup_taskset_for_each(task, css, tset) {
#ifdef CONFIG_RT_GROUP_SCHED
if (!sched_rt_can_attach(css_tg(css), task))
return -EINVAL;
#endif
/*
* Serialize against wake_up_new_task() such that if it's
* running, we're sure to observe its full state.
*/
raw_spin_lock_irq(&task->pi_lock);
/*
* Avoid calling sched_move_task() before wake_up_new_task()
* has happened. This would lead to problems with PELT, due to
* move wanting to detach+attach while we're not attached yet.
*/
if (READ_ONCE(task->__state) == TASK_NEW)
ret = -EINVAL;
raw_spin_unlock_irq(&task->pi_lock);
if (ret)
break;
}
return ret;
return 0;
}
#endif
static void cpu_cgroup_attach(struct cgroup_taskset *tset)
{
@ -11171,8 +11121,9 @@ struct cgroup_subsys cpu_cgrp_subsys = {
.css_released = cpu_cgroup_css_released,
.css_free = cpu_cgroup_css_free,
.css_extra_stat_show = cpu_extra_stat_show,
.fork = cpu_cgroup_fork,
#ifdef CONFIG_RT_GROUP_SCHED
.can_attach = cpu_cgroup_can_attach,
#endif
.attach = cpu_cgroup_attach,
.legacy_cftypes = cpu_legacy_files,
.dfl_cftypes = cpu_files,

View File

@ -88,7 +88,7 @@ static unsigned long sched_core_update_cookie(struct task_struct *p,
* core has now entered/left forced idle state. Defer accounting to the
* next scheduling edge, rather than always forcing a reschedule here.
*/
if (task_running(rq, p))
if (task_on_cpu(rq, p))
resched_curr(rq);
task_rq_unlock(rq, p, &rf);
@ -205,7 +205,7 @@ int sched_core_share_pid(unsigned int cmd, pid_t pid, enum pid_type type,
default:
err = -EINVAL;
goto out;
};
}
if (type == PIDTYPE_PID) {
__sched_core_set(task, cookie);

View File

@ -123,7 +123,7 @@ int cpudl_find(struct cpudl *cp, struct task_struct *p,
unsigned long cap, max_cap = 0;
int cpu, max_cpu = -1;
if (!static_branch_unlikely(&sched_asym_cpucapacity))
if (!sched_asym_cpucap_active())
return 1;
/* Ensure the capacity of the CPUs fits the task. */

View File

@ -147,7 +147,7 @@ int cpupri_find_fitness(struct cpupri *cp, struct task_struct *p,
int task_pri = convert_prio(p->prio);
int idx, cpu;
BUG_ON(task_pri >= CPUPRI_NR_PRIORITIES);
WARN_ON_ONCE(task_pri >= CPUPRI_NR_PRIORITIES);
for (idx = 0; idx < task_pri; idx++) {

View File

@ -124,15 +124,12 @@ static inline int dl_bw_cpus(int i)
return cpus;
}
static inline unsigned long __dl_bw_capacity(int i)
static inline unsigned long __dl_bw_capacity(const struct cpumask *mask)
{
struct root_domain *rd = cpu_rq(i)->rd;
unsigned long cap = 0;
int i;
RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held(),
"sched RCU must be held");
for_each_cpu_and(i, rd->span, cpu_active_mask)
for_each_cpu_and(i, mask, cpu_active_mask)
cap += capacity_orig_of(i);
return cap;
@ -144,11 +141,14 @@ static inline unsigned long __dl_bw_capacity(int i)
*/
static inline unsigned long dl_bw_capacity(int i)
{
if (!static_branch_unlikely(&sched_asym_cpucapacity) &&
if (!sched_asym_cpucap_active() &&
capacity_orig_of(i) == SCHED_CAPACITY_SCALE) {
return dl_bw_cpus(i) << SCHED_CAPACITY_SHIFT;
} else {
return __dl_bw_capacity(i);
RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held(),
"sched RCU must be held");
return __dl_bw_capacity(cpu_rq(i)->rd->span);
}
}
@ -310,7 +310,7 @@ static void dl_change_utilization(struct task_struct *p, u64 new_bw)
{
struct rq *rq;
BUG_ON(p->dl.flags & SCHED_FLAG_SUGOV);
WARN_ON_ONCE(p->dl.flags & SCHED_FLAG_SUGOV);
if (task_on_rq_queued(p))
return;
@ -431,8 +431,8 @@ static void task_non_contending(struct task_struct *p)
sub_rq_bw(&p->dl, &rq->dl);
raw_spin_lock(&dl_b->lock);
__dl_sub(dl_b, p->dl.dl_bw, dl_bw_cpus(task_cpu(p)));
__dl_clear_params(p);
raw_spin_unlock(&dl_b->lock);
__dl_clear_params(p);
}
return;
@ -607,7 +607,7 @@ static void enqueue_pushable_dl_task(struct rq *rq, struct task_struct *p)
{
struct rb_node *leftmost;
BUG_ON(!RB_EMPTY_NODE(&p->pushable_dl_tasks));
WARN_ON_ONCE(!RB_EMPTY_NODE(&p->pushable_dl_tasks));
leftmost = rb_add_cached(&p->pushable_dl_tasks,
&rq->dl.pushable_dl_tasks_root,
@ -684,7 +684,7 @@ static struct rq *dl_task_offline_migration(struct rq *rq, struct task_struct *p
* Failed to find any suitable CPU.
* The task will never come back!
*/
BUG_ON(dl_bandwidth_enabled());
WARN_ON_ONCE(dl_bandwidth_enabled());
/*
* If admission control is disabled we
@ -770,6 +770,14 @@ static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags);
static void __dequeue_task_dl(struct rq *rq, struct task_struct *p, int flags);
static void check_preempt_curr_dl(struct rq *rq, struct task_struct *p, int flags);
static inline void replenish_dl_new_period(struct sched_dl_entity *dl_se,
struct rq *rq)
{
/* for non-boosted task, pi_of(dl_se) == dl_se */
dl_se->deadline = rq_clock(rq) + pi_of(dl_se)->dl_deadline;
dl_se->runtime = pi_of(dl_se)->dl_runtime;
}
/*
* We are being explicitly informed that a new instance is starting,
* and this means that:
@ -803,8 +811,7 @@ static inline void setup_new_dl_entity(struct sched_dl_entity *dl_se)
* future; in fact, we must consider execution overheads (time
* spent on hardirq context, etc.).
*/
dl_se->deadline = rq_clock(rq) + dl_se->dl_deadline;
dl_se->runtime = dl_se->dl_runtime;
replenish_dl_new_period(dl_se, rq);
}
/*
@ -830,16 +837,14 @@ static void replenish_dl_entity(struct sched_dl_entity *dl_se)
struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
struct rq *rq = rq_of_dl_rq(dl_rq);
BUG_ON(pi_of(dl_se)->dl_runtime <= 0);
WARN_ON_ONCE(pi_of(dl_se)->dl_runtime <= 0);
/*
* This could be the case for a !-dl task that is boosted.
* Just go with full inherited parameters.
*/
if (dl_se->dl_deadline == 0) {
dl_se->deadline = rq_clock(rq) + pi_of(dl_se)->dl_deadline;
dl_se->runtime = pi_of(dl_se)->dl_runtime;
}
if (dl_se->dl_deadline == 0)
replenish_dl_new_period(dl_se, rq);
if (dl_se->dl_yielded && dl_se->runtime > 0)
dl_se->runtime = 0;
@ -866,8 +871,7 @@ static void replenish_dl_entity(struct sched_dl_entity *dl_se)
*/
if (dl_time_before(dl_se->deadline, rq_clock(rq))) {
printk_deferred_once("sched: DL replenish lagged too much\n");
dl_se->deadline = rq_clock(rq) + pi_of(dl_se)->dl_deadline;
dl_se->runtime = pi_of(dl_se)->dl_runtime;
replenish_dl_new_period(dl_se, rq);
}
if (dl_se->dl_yielded)
@ -1024,8 +1028,7 @@ static void update_dl_entity(struct sched_dl_entity *dl_se)
return;
}
dl_se->deadline = rq_clock(rq) + pi_of(dl_se)->dl_deadline;
dl_se->runtime = pi_of(dl_se)->dl_runtime;
replenish_dl_new_period(dl_se, rq);
}
}
@ -1333,11 +1336,7 @@ static void update_curr_dl(struct rq *rq)
trace_sched_stat_runtime(curr, delta_exec, 0);
curr->se.sum_exec_runtime += delta_exec;
account_group_exec_runtime(curr, delta_exec);
curr->se.exec_start = now;
cgroup_account_cputime(curr, delta_exec);
update_current_exec_runtime(curr, now, delta_exec);
if (dl_entity_is_special(dl_se))
return;
@ -1616,7 +1615,7 @@ static void __enqueue_dl_entity(struct sched_dl_entity *dl_se)
{
struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
BUG_ON(!RB_EMPTY_NODE(&dl_se->rb_node));
WARN_ON_ONCE(!RB_EMPTY_NODE(&dl_se->rb_node));
rb_add_cached(&dl_se->rb_node, &dl_rq->root, __dl_less);
@ -1640,7 +1639,7 @@ static void __dequeue_dl_entity(struct sched_dl_entity *dl_se)
static void
enqueue_dl_entity(struct sched_dl_entity *dl_se, int flags)
{
BUG_ON(on_dl_rq(dl_se));
WARN_ON_ONCE(on_dl_rq(dl_se));
update_stats_enqueue_dl(dl_rq_of_se(dl_se), dl_se, flags);
@ -1814,6 +1813,14 @@ static void yield_task_dl(struct rq *rq)
#ifdef CONFIG_SMP
static inline bool dl_task_is_earliest_deadline(struct task_struct *p,
struct rq *rq)
{
return (!rq->dl.dl_nr_running ||
dl_time_before(p->dl.deadline,
rq->dl.earliest_dl.curr));
}
static int find_later_rq(struct task_struct *task);
static int
@ -1849,16 +1856,14 @@ select_task_rq_dl(struct task_struct *p, int cpu, int flags)
* Take the capacity of the CPU into account to
* ensure it fits the requirement of the task.
*/
if (static_branch_unlikely(&sched_asym_cpucapacity))
if (sched_asym_cpucap_active())
select_rq |= !dl_task_fits_capacity(p, cpu);
if (select_rq) {
int target = find_later_rq(p);
if (target != -1 &&
(dl_time_before(p->dl.deadline,
cpu_rq(target)->dl.earliest_dl.curr) ||
(cpu_rq(target)->dl.dl_nr_running == 0)))
dl_task_is_earliest_deadline(p, cpu_rq(target)))
cpu = target;
}
rcu_read_unlock();
@ -2017,7 +2022,7 @@ static struct task_struct *pick_task_dl(struct rq *rq)
return NULL;
dl_se = pick_next_dl_entity(dl_rq);
BUG_ON(!dl_se);
WARN_ON_ONCE(!dl_se);
p = dl_task_of(dl_se);
return p;
@ -2087,7 +2092,7 @@ static void task_fork_dl(struct task_struct *p)
static int pick_dl_task(struct rq *rq, struct task_struct *p, int cpu)
{
if (!task_running(rq, p) &&
if (!task_on_cpu(rq, p) &&
cpumask_test_cpu(cpu, &p->cpus_mask))
return 1;
return 0;
@ -2225,9 +2230,7 @@ static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq)
later_rq = cpu_rq(cpu);
if (later_rq->dl.dl_nr_running &&
!dl_time_before(task->dl.deadline,
later_rq->dl.earliest_dl.curr)) {
if (!dl_task_is_earliest_deadline(task, later_rq)) {
/*
* Target rq has tasks of equal or earlier deadline,
* retrying does not release any lock and is unlikely
@ -2241,7 +2244,7 @@ static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq)
if (double_lock_balance(rq, later_rq)) {
if (unlikely(task_rq(task) != rq ||
!cpumask_test_cpu(later_rq->cpu, &task->cpus_mask) ||
task_running(rq, task) ||
task_on_cpu(rq, task) ||
!dl_task(task) ||
!task_on_rq_queued(task))) {
double_unlock_balance(rq, later_rq);
@ -2255,9 +2258,7 @@ static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq)
* its earliest one has a later deadline than our
* task, the rq is a good one.
*/
if (!later_rq->dl.dl_nr_running ||
dl_time_before(task->dl.deadline,
later_rq->dl.earliest_dl.curr))
if (dl_task_is_earliest_deadline(task, later_rq))
break;
/* Otherwise we try again. */
@ -2277,12 +2278,12 @@ static struct task_struct *pick_next_pushable_dl_task(struct rq *rq)
p = __node_2_pdl(rb_first_cached(&rq->dl.pushable_dl_tasks_root));
BUG_ON(rq->cpu != task_cpu(p));
BUG_ON(task_current(rq, p));
BUG_ON(p->nr_cpus_allowed <= 1);
WARN_ON_ONCE(rq->cpu != task_cpu(p));
WARN_ON_ONCE(task_current(rq, p));
WARN_ON_ONCE(p->nr_cpus_allowed <= 1);
BUG_ON(!task_on_rq_queued(p));
BUG_ON(!dl_task(p));
WARN_ON_ONCE(!task_on_rq_queued(p));
WARN_ON_ONCE(!dl_task(p));
return p;
}
@ -2428,9 +2429,7 @@ static void pull_dl_task(struct rq *this_rq)
* - it will preempt the last one we pulled (if any).
*/
if (p && dl_time_before(p->dl.deadline, dmin) &&
(!this_rq->dl.dl_nr_running ||
dl_time_before(p->dl.deadline,
this_rq->dl.earliest_dl.curr))) {
dl_task_is_earliest_deadline(p, this_rq)) {
WARN_ON(p == src_rq->curr);
WARN_ON(!task_on_rq_queued(p));
@ -2475,7 +2474,7 @@ skip:
*/
static void task_woken_dl(struct rq *rq, struct task_struct *p)
{
if (!task_running(rq, p) &&
if (!task_on_cpu(rq, p) &&
!test_tsk_need_resched(rq->curr) &&
p->nr_cpus_allowed > 1 &&
dl_task(rq->curr) &&
@ -2492,7 +2491,7 @@ static void set_cpus_allowed_dl(struct task_struct *p,
struct root_domain *src_rd;
struct rq *rq;
BUG_ON(!dl_task(p));
WARN_ON_ONCE(!dl_task(p));
rq = task_rq(p);
src_rd = rq->rd;
@ -3007,17 +3006,15 @@ bool dl_param_changed(struct task_struct *p, const struct sched_attr *attr)
int dl_cpuset_cpumask_can_shrink(const struct cpumask *cur,
const struct cpumask *trial)
{
int ret = 1, trial_cpus;
unsigned long flags, cap;
struct dl_bw *cur_dl_b;
unsigned long flags;
int ret = 1;
rcu_read_lock_sched();
cur_dl_b = dl_bw_of(cpumask_any(cur));
trial_cpus = cpumask_weight(trial);
cap = __dl_bw_capacity(trial);
raw_spin_lock_irqsave(&cur_dl_b->lock, flags);
if (cur_dl_b->bw != -1 &&
cur_dl_b->bw * trial_cpus < cur_dl_b->total_bw)
if (__dl_overflow(cur_dl_b, cap, 0, 0))
ret = 0;
raw_spin_unlock_irqrestore(&cur_dl_b->lock, flags);
rcu_read_unlock_sched();

View File

@ -799,8 +799,6 @@ void init_entity_runnable_average(struct sched_entity *se)
/* when this task enqueue'ed, it will contribute to its cfs_rq's load_avg */
}
static void attach_entity_cfs_rq(struct sched_entity *se);
/*
* With new tasks being created, their initial util_avgs are extrapolated
* based on the cfs_rq's current util_avg:
@ -835,20 +833,6 @@ void post_init_entity_util_avg(struct task_struct *p)
long cpu_scale = arch_scale_cpu_capacity(cpu_of(rq_of(cfs_rq)));
long cap = (long)(cpu_scale - cfs_rq->avg.util_avg) / 2;
if (cap > 0) {
if (cfs_rq->avg.util_avg != 0) {
sa->util_avg = cfs_rq->avg.util_avg * se->load.weight;
sa->util_avg /= (cfs_rq->avg.load_avg + 1);
if (sa->util_avg > cap)
sa->util_avg = cap;
} else {
sa->util_avg = cap;
}
}
sa->runnable_avg = sa->util_avg;
if (p->sched_class != &fair_sched_class) {
/*
* For !fair tasks do:
@ -864,7 +848,19 @@ void post_init_entity_util_avg(struct task_struct *p)
return;
}
attach_entity_cfs_rq(se);
if (cap > 0) {
if (cfs_rq->avg.util_avg != 0) {
sa->util_avg = cfs_rq->avg.util_avg * se->load.weight;
sa->util_avg /= (cfs_rq->avg.load_avg + 1);
if (sa->util_avg > cap)
sa->util_avg = cap;
} else {
sa->util_avg = cap;
}
}
sa->runnable_avg = sa->util_avg;
}
#else /* !CONFIG_SMP */
@ -1592,11 +1588,11 @@ numa_type numa_classify(unsigned int imbalance_pct,
#ifdef CONFIG_SCHED_SMT
/* Forward declarations of select_idle_sibling helpers */
static inline bool test_idle_cores(int cpu, bool def);
static inline bool test_idle_cores(int cpu);
static inline int numa_idle_core(int idle_core, int cpu)
{
if (!static_branch_likely(&sched_smt_present) ||
idle_core >= 0 || !test_idle_cores(cpu, false))
idle_core >= 0 || !test_idle_cores(cpu))
return idle_core;
/*
@ -2600,7 +2596,7 @@ static void task_numa_group(struct task_struct *p, int cpupid, int flags,
if (!join)
return;
BUG_ON(irqs_disabled());
WARN_ON_ONCE(irqs_disabled());
double_lock_irq(&my_grp->lock, &grp->lock);
for (i = 0; i < NR_NUMA_HINT_FAULT_STATS * nr_node_ids; i++) {
@ -3838,8 +3834,7 @@ static void migrate_se_pelt_lag(struct sched_entity *se) {}
* @cfs_rq: cfs_rq to update
*
* The cfs_rq avg is the direct sum of all its entities (blocked and runnable)
* avg. The immediate corollary is that all (fair) tasks must be attached, see
* post_init_entity_util_avg().
* avg. The immediate corollary is that all (fair) tasks must be attached.
*
* cfs_rq->avg is used for task_h_load() and update_cfs_share() for example.
*
@ -4003,6 +3998,7 @@ static void detach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s
#define UPDATE_TG 0x1
#define SKIP_AGE_LOAD 0x2
#define DO_ATTACH 0x4
#define DO_DETACH 0x8
/* Update task and its cfs_rq load average */
static inline void update_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
@ -4032,6 +4028,13 @@ static inline void update_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s
attach_entity_load_avg(cfs_rq, se);
update_tg_load_avg(cfs_rq);
} else if (flags & DO_DETACH) {
/*
* DO_DETACH means we're here from dequeue_entity()
* and we are migrating task out of the CPU.
*/
detach_entity_load_avg(cfs_rq, se);
update_tg_load_avg(cfs_rq);
} else if (decayed) {
cfs_rq_util_change(cfs_rq, 0);
@ -4064,8 +4067,8 @@ static void remove_entity_load_avg(struct sched_entity *se)
/*
* tasks cannot exit without having gone through wake_up_new_task() ->
* post_init_entity_util_avg() which will have added things to the
* cfs_rq, so we can remove unconditionally.
* enqueue_task_fair() which will have added things to the cfs_rq,
* so we can remove unconditionally.
*/
sync_entity_load_avg(se);
@ -4262,7 +4265,7 @@ static inline int task_fits_capacity(struct task_struct *p,
static inline void update_misfit_status(struct task_struct *p, struct rq *rq)
{
if (!static_branch_unlikely(&sched_asym_cpucapacity))
if (!sched_asym_cpucap_active())
return;
if (!p || p->nr_cpus_allowed == 1) {
@ -4292,6 +4295,7 @@ static inline bool cfs_rq_is_decayed(struct cfs_rq *cfs_rq)
#define UPDATE_TG 0x0
#define SKIP_AGE_LOAD 0x0
#define DO_ATTACH 0x0
#define DO_DETACH 0x0
static inline void update_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se, int not_used1)
{
@ -4434,7 +4438,8 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
/*
* When enqueuing a sched_entity, we must:
* - Update loads to have both entity and cfs_rq synced with now.
* - Add its load to cfs_rq->runnable_avg
* - For group_entity, update its runnable_weight to reflect the new
* h_nr_running of its group cfs_rq.
* - For group_entity, update its weight to reflect the new share of
* its group cfs_rq
* - Add its new weight to cfs_rq->load.weight
@ -4511,6 +4516,11 @@ static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq);
static void
dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
{
int action = UPDATE_TG;
if (entity_is_task(se) && task_on_rq_migrating(task_of(se)))
action |= DO_DETACH;
/*
* Update run-time statistics of the 'current'.
*/
@ -4519,12 +4529,13 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
/*
* When dequeuing a sched_entity, we must:
* - Update loads to have both entity and cfs_rq synced with now.
* - Subtract its load from the cfs_rq->runnable_avg.
* - For group_entity, update its runnable_weight to reflect the new
* h_nr_running of its group cfs_rq.
* - Subtract its previous weight from cfs_rq->load.weight.
* - For group entity, update its weight to reflect the new share
* of its group cfs_rq.
*/
update_load_avg(cfs_rq, se, UPDATE_TG);
update_load_avg(cfs_rq, se, action);
se_update_runnable(se);
update_stats_dequeue_fair(cfs_rq, se, flags);
@ -5893,8 +5904,8 @@ dequeue_throttle:
#ifdef CONFIG_SMP
/* Working cpumask for: load_balance, load_balance_newidle. */
DEFINE_PER_CPU(cpumask_var_t, load_balance_mask);
DEFINE_PER_CPU(cpumask_var_t, select_rq_mask);
static DEFINE_PER_CPU(cpumask_var_t, load_balance_mask);
static DEFINE_PER_CPU(cpumask_var_t, select_rq_mask);
#ifdef CONFIG_NO_HZ_COMMON
@ -6260,7 +6271,7 @@ static inline void set_idle_cores(int cpu, int val)
WRITE_ONCE(sds->has_idle_cores, val);
}
static inline bool test_idle_cores(int cpu, bool def)
static inline bool test_idle_cores(int cpu)
{
struct sched_domain_shared *sds;
@ -6268,7 +6279,7 @@ static inline bool test_idle_cores(int cpu, bool def)
if (sds)
return READ_ONCE(sds->has_idle_cores);
return def;
return false;
}
/*
@ -6284,7 +6295,7 @@ void __update_idle_core(struct rq *rq)
int cpu;
rcu_read_lock();
if (test_idle_cores(core, true))
if (test_idle_cores(core))
goto unlock;
for_each_cpu(cpu, cpu_smt_mask(core)) {
@ -6310,9 +6321,6 @@ static int select_idle_core(struct task_struct *p, int core, struct cpumask *cpu
bool idle = true;
int cpu;
if (!static_branch_likely(&sched_smt_present))
return __select_idle_cpu(core, p);
for_each_cpu(cpu, cpu_smt_mask(core)) {
if (!available_idle_cpu(cpu)) {
idle = false;
@ -6339,13 +6347,12 @@ static int select_idle_core(struct task_struct *p, int core, struct cpumask *cpu
/*
* Scan the local SMT mask for idle CPUs.
*/
static int select_idle_smt(struct task_struct *p, struct sched_domain *sd, int target)
static int select_idle_smt(struct task_struct *p, int target)
{
int cpu;
for_each_cpu(cpu, cpu_smt_mask(target)) {
if (!cpumask_test_cpu(cpu, p->cpus_ptr) ||
!cpumask_test_cpu(cpu, sched_domain_span(sd)))
for_each_cpu_and(cpu, cpu_smt_mask(target), p->cpus_ptr) {
if (cpu == target)
continue;
if (available_idle_cpu(cpu) || sched_idle_cpu(cpu))
return cpu;
@ -6360,9 +6367,9 @@ static inline void set_idle_cores(int cpu, int val)
{
}
static inline bool test_idle_cores(int cpu, bool def)
static inline bool test_idle_cores(int cpu)
{
return def;
return false;
}
static inline int select_idle_core(struct task_struct *p, int core, struct cpumask *cpus, int *idle_cpu)
@ -6370,7 +6377,7 @@ static inline int select_idle_core(struct task_struct *p, int core, struct cpuma
return __select_idle_cpu(core, p);
}
static inline int select_idle_smt(struct task_struct *p, struct sched_domain *sd, int target)
static inline int select_idle_smt(struct task_struct *p, int target)
{
return -1;
}
@ -6389,19 +6396,19 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, bool
struct sched_domain_shared *sd_share;
struct rq *this_rq = this_rq();
int this = smp_processor_id();
struct sched_domain *this_sd;
struct sched_domain *this_sd = NULL;
u64 time = 0;
this_sd = rcu_dereference(*this_cpu_ptr(&sd_llc));
if (!this_sd)
return -1;
cpumask_and(cpus, sched_domain_span(sd), p->cpus_ptr);
if (sched_feat(SIS_PROP) && !has_idle_core) {
u64 avg_cost, avg_idle, span_avg;
unsigned long now = jiffies;
this_sd = rcu_dereference(*this_cpu_ptr(&sd_llc));
if (!this_sd)
return -1;
/*
* If we're busy, the assumption that the last idle period
* predicts the future is flawed; age away the remaining
@ -6455,7 +6462,7 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, bool
if (has_idle_core)
set_idle_cores(target, false);
if (sched_feat(SIS_PROP) && !has_idle_core) {
if (sched_feat(SIS_PROP) && this_sd && !has_idle_core) {
time = cpu_clock(this) - time;
/*
@ -6506,7 +6513,7 @@ select_idle_capacity(struct task_struct *p, struct sched_domain *sd, int target)
static inline bool asym_fits_capacity(unsigned long task_util, int cpu)
{
if (static_branch_unlikely(&sched_asym_cpucapacity))
if (sched_asym_cpucap_active())
return fits_capacity(task_util, capacity_of(cpu));
return true;
@ -6526,7 +6533,7 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
* On asymmetric system, update task utilization because we will check
* that the task fits with cpu's capacity.
*/
if (static_branch_unlikely(&sched_asym_cpucapacity)) {
if (sched_asym_cpucap_active()) {
sync_entity_load_avg(&p->se);
task_util = uclamp_task_util(p);
}
@ -6580,7 +6587,7 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
* For asymmetric CPU capacity systems, our domain of interest is
* sd_asym_cpucapacity rather than sd_llc.
*/
if (static_branch_unlikely(&sched_asym_cpucapacity)) {
if (sched_asym_cpucap_active()) {
sd = rcu_dereference(per_cpu(sd_asym_cpucapacity, target));
/*
* On an asymmetric CPU capacity system where an exclusive
@ -6601,10 +6608,10 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
return target;
if (sched_smt_active()) {
has_idle_core = test_idle_cores(target, false);
has_idle_core = test_idle_cores(target);
if (!has_idle_core && cpus_share_cache(prev, target)) {
i = select_idle_smt(p, sd, prev);
i = select_idle_smt(p, prev);
if ((unsigned int)i < nr_cpumask_bits)
return i;
}
@ -7076,8 +7083,6 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int wake_flags)
return new_cpu;
}
static void detach_entity_cfs_rq(struct sched_entity *se);
/*
* Called immediately before a task is migrated to a new CPU; task_cpu(p) and
* cfs_rq_of(p) references at time of call are still valid and identify the
@ -7099,15 +7104,7 @@ static void migrate_task_rq_fair(struct task_struct *p, int new_cpu)
se->vruntime -= u64_u32_load(cfs_rq->min_vruntime);
}
if (p->on_rq == TASK_ON_RQ_MIGRATING) {
/*
* In case of TASK_ON_RQ_MIGRATING we in fact hold the 'old'
* rq->lock and can modify state directly.
*/
lockdep_assert_rq_held(task_rq(p));
detach_entity_cfs_rq(se);
} else {
if (!task_on_rq_migrating(p)) {
remove_entity_load_avg(se);
/*
@ -7279,7 +7276,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
return;
find_matching_se(&se, &pse);
BUG_ON(!pse);
WARN_ON_ONCE(!pse);
cse_is_idle = se_is_idle(se);
pse_is_idle = se_is_idle(pse);
@ -7938,7 +7935,7 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
/* Record that we found at least one task that could run on dst_cpu */
env->flags &= ~LBF_ALL_PINNED;
if (task_running(env->src_rq, p)) {
if (task_on_cpu(env->src_rq, p)) {
schedstat_inc(p->stats.nr_failed_migrations_running);
return 0;
}
@ -8012,8 +8009,6 @@ static struct task_struct *detach_one_task(struct lb_env *env)
return NULL;
}
static const unsigned int sched_nr_migrate_break = 32;
/*
* detach_tasks() -- tries to detach up to imbalance load/util/tasks from
* busiest_rq, as part of a balancing operation within domain "sd".
@ -8049,20 +8044,24 @@ static int detach_tasks(struct lb_env *env)
if (env->idle != CPU_NOT_IDLE && env->src_rq->nr_running <= 1)
break;
p = list_last_entry(tasks, struct task_struct, se.group_node);
env->loop++;
/* We've more or less seen every task there is, call it quits */
if (env->loop > env->loop_max)
/*
* We've more or less seen every task there is, call it quits
* unless we haven't found any movable task yet.
*/
if (env->loop > env->loop_max &&
!(env->flags & LBF_ALL_PINNED))
break;
/* take a breather every nr_migrate tasks */
if (env->loop > env->loop_break) {
env->loop_break += sched_nr_migrate_break;
env->loop_break += SCHED_NR_MIGRATE_BREAK;
env->flags |= LBF_NEED_BREAK;
break;
}
p = list_last_entry(tasks, struct task_struct, se.group_node);
if (!can_migrate_task(p, env))
goto next;
@ -8159,7 +8158,7 @@ static void attach_task(struct rq *rq, struct task_struct *p)
{
lockdep_assert_rq_held(rq);
BUG_ON(task_rq(p) != rq);
WARN_ON_ONCE(task_rq(p) != rq);
activate_task(rq, p, ENQUEUE_NOCLOCK);
check_preempt_curr(rq, p, 0);
}
@ -10099,14 +10098,13 @@ static int load_balance(int this_cpu, struct rq *this_rq,
struct rq *busiest;
struct rq_flags rf;
struct cpumask *cpus = this_cpu_cpumask_var_ptr(load_balance_mask);
struct lb_env env = {
.sd = sd,
.dst_cpu = this_cpu,
.dst_rq = this_rq,
.dst_grpmask = sched_group_span(sd->groups),
.idle = idle,
.loop_break = sched_nr_migrate_break,
.loop_break = SCHED_NR_MIGRATE_BREAK,
.cpus = cpus,
.fbq_type = all,
.tasks = LIST_HEAD_INIT(env.tasks),
@ -10134,7 +10132,7 @@ redo:
goto out_balanced;
}
BUG_ON(busiest == env.dst_rq);
WARN_ON_ONCE(busiest == env.dst_rq);
schedstat_add(sd->lb_imbalance[idle], env.imbalance);
@ -10182,7 +10180,9 @@ more_balance:
if (env.flags & LBF_NEED_BREAK) {
env.flags &= ~LBF_NEED_BREAK;
goto more_balance;
/* Stop if we tried all running tasks */
if (env.loop < busiest->nr_running)
goto more_balance;
}
/*
@ -10213,7 +10213,7 @@ more_balance:
env.dst_cpu = env.new_dst_cpu;
env.flags &= ~LBF_DST_PINNED;
env.loop = 0;
env.loop_break = sched_nr_migrate_break;
env.loop_break = SCHED_NR_MIGRATE_BREAK;
/*
* Go back to "more_balance" rather than "redo" since we
@ -10245,7 +10245,7 @@ more_balance:
*/
if (!cpumask_subset(cpus, env.dst_grpmask)) {
env.loop = 0;
env.loop_break = sched_nr_migrate_break;
env.loop_break = SCHED_NR_MIGRATE_BREAK;
goto redo;
}
goto out_all_pinned;
@ -10430,7 +10430,7 @@ static int active_load_balance_cpu_stop(void *data)
* we need to fix it. Originally reported by
* Bjorn Helgaas on a 128-CPU setup.
*/
BUG_ON(busiest_rq == target_rq);
WARN_ON_ONCE(busiest_rq == target_rq);
/* Search for an sd spanning us and the target CPU. */
rcu_read_lock();
@ -10916,8 +10916,7 @@ static bool update_nohz_stats(struct rq *rq)
* can be a simple update of blocked load or a complete load balance with
* tasks movement depending of flags.
*/
static void _nohz_idle_balance(struct rq *this_rq, unsigned int flags,
enum cpu_idle_type idle)
static void _nohz_idle_balance(struct rq *this_rq, unsigned int flags)
{
/* Earliest time when we have to do rebalance again */
unsigned long now = jiffies;
@ -11032,7 +11031,7 @@ static bool nohz_idle_balance(struct rq *this_rq, enum cpu_idle_type idle)
if (idle != CPU_IDLE)
return false;
_nohz_idle_balance(this_rq, flags, idle);
_nohz_idle_balance(this_rq, flags);
return true;
}
@ -11052,7 +11051,7 @@ void nohz_run_idle_balance(int cpu)
* (ie NOHZ_STATS_KICK set) and will do the same.
*/
if ((flags == NOHZ_NEWILB_KICK) && !need_resched())
_nohz_idle_balance(cpu_rq(cpu), NOHZ_STATS_KICK, CPU_IDLE);
_nohz_idle_balance(cpu_rq(cpu), NOHZ_STATS_KICK);
}
static void nohz_newidle_balance(struct rq *this_rq)
@ -11552,6 +11551,17 @@ static void detach_entity_cfs_rq(struct sched_entity *se)
{
struct cfs_rq *cfs_rq = cfs_rq_of(se);
#ifdef CONFIG_SMP
/*
* In case the task sched_avg hasn't been attached:
* - A forked task which hasn't been woken up by wake_up_new_task().
* - A task which has been woken up by try_to_wake_up() but is
* waiting for actually being woken up by sched_ttwu_pending().
*/
if (!se->avg.last_update_time)
return;
#endif
/* Catch up with the cfs_rq and remove our load when we leave */
update_load_avg(cfs_rq, se, 0);
detach_entity_load_avg(cfs_rq, se);
@ -11563,14 +11573,6 @@ static void attach_entity_cfs_rq(struct sched_entity *se)
{
struct cfs_rq *cfs_rq = cfs_rq_of(se);
#ifdef CONFIG_FAIR_GROUP_SCHED
/*
* Since the real-depth could have been changed (only FAIR
* class maintain depth value), reset depth properly.
*/
se->depth = se->parent ? se->parent->depth + 1 : 0;
#endif
/* Synchronize entity with its cfs_rq */
update_load_avg(cfs_rq, se, sched_feat(ATTACH_AGE_LOAD) ? 0 : SKIP_AGE_LOAD);
attach_entity_load_avg(cfs_rq, se);
@ -11666,39 +11668,25 @@ void init_cfs_rq(struct cfs_rq *cfs_rq)
}
#ifdef CONFIG_FAIR_GROUP_SCHED
static void task_set_group_fair(struct task_struct *p)
static void task_change_group_fair(struct task_struct *p)
{
struct sched_entity *se = &p->se;
/*
* We couldn't detach or attach a forked task which
* hasn't been woken up by wake_up_new_task().
*/
if (READ_ONCE(p->__state) == TASK_NEW)
return;
set_task_rq(p, task_cpu(p));
se->depth = se->parent ? se->parent->depth + 1 : 0;
}
static void task_move_group_fair(struct task_struct *p)
{
detach_task_cfs_rq(p);
set_task_rq(p, task_cpu(p));
#ifdef CONFIG_SMP
/* Tell se's cfs_rq has been changed -- migrated */
p->se.avg.last_update_time = 0;
#endif
set_task_rq(p, task_cpu(p));
attach_task_cfs_rq(p);
}
static void task_change_group_fair(struct task_struct *p, int type)
{
switch (type) {
case TASK_SET_GROUP:
task_set_group_fair(p);
break;
case TASK_MOVE_GROUP:
task_move_group_fair(p);
break;
}
}
void free_fair_sched_group(struct task_group *tg)
{
int i;
@ -12075,6 +12063,13 @@ void show_numa_stats(struct task_struct *p, struct seq_file *m)
__init void init_sched_fair_class(void)
{
#ifdef CONFIG_SMP
int i;
for_each_possible_cpu(i) {
zalloc_cpumask_var_node(&per_cpu(load_balance_mask, i), GFP_KERNEL, cpu_to_node(i));
zalloc_cpumask_var_node(&per_cpu(select_rq_mask, i), GFP_KERNEL, cpu_to_node(i));
}
open_softirq(SCHED_SOFTIRQ, run_rebalance_domains);
#ifdef CONFIG_NO_HZ_COMMON

View File

@ -509,7 +509,7 @@ static inline bool rt_task_fits_capacity(struct task_struct *p, int cpu)
unsigned int cpu_cap;
/* Only heterogeneous systems can benefit from this check */
if (!static_branch_unlikely(&sched_asym_cpucapacity))
if (!sched_asym_cpucap_active())
return true;
min_cap = uclamp_eff_value(p, UCLAMP_MIN);
@ -843,7 +843,7 @@ static void __disable_runtime(struct rq *rq)
* We cannot be left wanting - that would mean some runtime
* leaked out of the system.
*/
BUG_ON(want);
WARN_ON_ONCE(want);
balanced:
/*
* Disable all the borrow logic by pretending we have inf
@ -1062,11 +1062,7 @@ static void update_curr_rt(struct rq *rq)
trace_sched_stat_runtime(curr, delta_exec, 0);
curr->se.sum_exec_runtime += delta_exec;
account_group_exec_runtime(curr, delta_exec);
curr->se.exec_start = now;
cgroup_account_cputime(curr, delta_exec);
update_current_exec_runtime(curr, now, delta_exec);
if (!rt_bandwidth_enabled())
return;
@ -1849,7 +1845,7 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu)
{
if (!task_running(rq, p) &&
if (!task_on_cpu(rq, p) &&
cpumask_test_cpu(cpu, &p->cpus_mask))
return 1;
@ -1897,7 +1893,7 @@ static int find_lowest_rq(struct task_struct *task)
* If we're on asym system ensure we consider the different capacities
* of the CPUs when searching for the lowest_mask.
*/
if (static_branch_unlikely(&sched_asym_cpucapacity)) {
if (sched_asym_cpucap_active()) {
ret = cpupri_find_fitness(&task_rq(task)->rd->cpupri,
task, lowest_mask,
@ -2004,7 +2000,7 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq)
*/
if (unlikely(task_rq(task) != rq ||
!cpumask_test_cpu(lowest_rq->cpu, &task->cpus_mask) ||
task_running(rq, task) ||
task_on_cpu(rq, task) ||
!rt_task(task) ||
!task_on_rq_queued(task))) {
@ -2462,7 +2458,7 @@ skip:
*/
static void task_woken_rt(struct rq *rq, struct task_struct *p)
{
bool need_to_push = !task_running(rq, p) &&
bool need_to_push = !task_on_cpu(rq, p) &&
!test_tsk_need_resched(rq->curr) &&
p->nr_cpus_allowed > 1 &&
(dl_task(rq->curr) || rt_task(rq->curr)) &&

View File

@ -321,21 +321,6 @@ struct dl_bw {
u64 total_bw;
};
/*
* Verify the fitness of task @p to run on @cpu taking into account the
* CPU original capacity and the runtime/deadline ratio of the task.
*
* The function will return true if the CPU original capacity of the
* @cpu scaled by SCHED_CAPACITY_SCALE >= runtime/deadline ratio of the
* task and false otherwise.
*/
static inline bool dl_task_fits_capacity(struct task_struct *p, int cpu)
{
unsigned long cap = arch_scale_cpu_capacity(cpu);
return cap_scale(p->dl.dl_deadline, cap) >= p->dl.dl_runtime;
}
extern void init_dl_bw(struct dl_bw *dl_b);
extern int sched_dl_global_validate(void);
extern void sched_dl_do_global(void);
@ -1815,6 +1800,11 @@ DECLARE_PER_CPU(struct sched_domain __rcu *, sd_asym_packing);
DECLARE_PER_CPU(struct sched_domain __rcu *, sd_asym_cpucapacity);
extern struct static_key_false sched_asym_cpucapacity;
static __always_inline bool sched_asym_cpucap_active(void)
{
return static_branch_unlikely(&sched_asym_cpucapacity);
}
struct sched_group_capacity {
atomic_t ref;
/*
@ -1942,6 +1932,7 @@ static inline void set_task_rq(struct task_struct *p, unsigned int cpu)
set_task_rq_fair(&p->se, p->se.cfs_rq, tg->cfs_rq[cpu]);
p->se.cfs_rq = tg->cfs_rq[cpu];
p->se.parent = tg->se[cpu];
p->se.depth = tg->se[cpu] ? tg->se[cpu]->depth + 1 : 0;
#endif
#ifdef CONFIG_RT_GROUP_SCHED
@ -2060,7 +2051,7 @@ static inline int task_current(struct rq *rq, struct task_struct *p)
return rq->curr == p;
}
static inline int task_running(struct rq *rq, struct task_struct *p)
static inline int task_on_cpu(struct rq *rq, struct task_struct *p)
{
#ifdef CONFIG_SMP
return p->on_cpu;
@ -2204,11 +2195,8 @@ struct sched_class {
void (*update_curr)(struct rq *rq);
#define TASK_SET_GROUP 0
#define TASK_MOVE_GROUP 1
#ifdef CONFIG_FAIR_GROUP_SCHED
void (*task_change_group)(struct task_struct *p, int type);
void (*task_change_group)(struct task_struct *p);
#endif
};
@ -2435,6 +2423,12 @@ extern void deactivate_task(struct rq *rq, struct task_struct *p, int flags);
extern void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags);
#ifdef CONFIG_PREEMPT_RT
#define SCHED_NR_MIGRATE_BREAK 8
#else
#define SCHED_NR_MIGRATE_BREAK 32
#endif
extern const_debug unsigned int sysctl_sched_nr_migrate;
extern const_debug unsigned int sysctl_sched_migration_cost;
@ -2709,8 +2703,8 @@ static inline void double_rq_lock(struct rq *rq1, struct rq *rq2)
__acquires(rq1->lock)
__acquires(rq2->lock)
{
BUG_ON(!irqs_disabled());
BUG_ON(rq1 != rq2);
WARN_ON_ONCE(!irqs_disabled());
WARN_ON_ONCE(rq1 != rq2);
raw_spin_rq_lock(rq1);
__acquire(rq2->lock); /* Fake it out ;) */
double_rq_clock_clear_update(rq1, rq2);
@ -2726,7 +2720,7 @@ static inline void double_rq_unlock(struct rq *rq1, struct rq *rq2)
__releases(rq1->lock)
__releases(rq2->lock)
{
BUG_ON(rq1 != rq2);
WARN_ON_ONCE(rq1 != rq2);
raw_spin_rq_unlock(rq1);
__release(rq2->lock);
}
@ -2896,6 +2890,21 @@ unsigned long effective_cpu_util(int cpu, unsigned long util_cfs,
enum cpu_util_type type,
struct task_struct *p);
/*
* Verify the fitness of task @p to run on @cpu taking into account the
* CPU original capacity and the runtime/deadline ratio of the task.
*
* The function will return true if the original capacity of @cpu is
* greater than or equal to task's deadline density right shifted by
* (BW_SHIFT - SCHED_CAPACITY_SHIFT) and false otherwise.
*/
static inline bool dl_task_fits_capacity(struct task_struct *p, int cpu)
{
unsigned long cap = arch_scale_cpu_capacity(cpu);
return cap >= p->dl.dl_density >> (BW_SHIFT - SCHED_CAPACITY_SHIFT);
}
static inline unsigned long cpu_bw_dl(struct rq *rq)
{
return (rq->dl.running_bw * SCHED_CAPACITY_SCALE) >> BW_SHIFT;
@ -3157,4 +3166,14 @@ extern int sched_dynamic_mode(const char *str);
extern void sched_dynamic_update(int mode);
#endif
static inline void update_current_exec_runtime(struct task_struct *curr,
u64 now, u64 delta_exec)
{
curr->se.sum_exec_runtime += delta_exec;
account_group_exec_runtime(curr, delta_exec);
curr->se.exec_start = now;
cgroup_account_cputime(curr, delta_exec);
}
#endif /* _KERNEL_SCHED_SCHED_H */

View File

@ -71,20 +71,17 @@ static void yield_task_stop(struct rq *rq)
static void put_prev_task_stop(struct rq *rq, struct task_struct *prev)
{
struct task_struct *curr = rq->curr;
u64 delta_exec;
u64 now, delta_exec;
delta_exec = rq_clock_task(rq) - curr->se.exec_start;
now = rq_clock_task(rq);
delta_exec = now - curr->se.exec_start;
if (unlikely((s64)delta_exec < 0))
delta_exec = 0;
schedstat_set(curr->stats.exec_max,
max(curr->stats.exec_max, delta_exec));
curr->se.sum_exec_runtime += delta_exec;
account_group_exec_runtime(curr, delta_exec);
curr->se.exec_start = rq_clock_task(rq);
cgroup_account_cputime(curr, delta_exec);
update_current_exec_runtime(curr, now, delta_exec);
}
/*

View File

@ -2305,7 +2305,7 @@ static int ptrace_stop(int exit_code, int why, unsigned long message,
read_unlock(&tasklist_lock);
cgroup_enter_frozen();
preempt_enable_no_resched();
freezable_schedule();
schedule();
cgroup_leave_frozen(true);
/*
@ -2474,7 +2474,7 @@ static bool do_signal_stop(int signr)
/* Now we don't run again until woken by SIGCONT or SIGKILL */
cgroup_enter_frozen();
freezable_schedule();
schedule();
return true;
} else {
/*
@ -2549,11 +2549,11 @@ static void do_freezer_trap(void)
* immediately (if there is a non-fatal signal pending), and
* put the task into sleep.
*/
__set_current_state(TASK_INTERRUPTIBLE);
__set_current_state(TASK_INTERRUPTIBLE|TASK_FREEZABLE);
clear_thread_flag(TIF_SIGPENDING);
spin_unlock_irq(&current->sighand->siglock);
cgroup_enter_frozen();
freezable_schedule();
schedule();
}
static int ptrace_signal(int signr, kernel_siginfo_t *info, enum pid_type type)
@ -3601,9 +3601,9 @@ static int do_sigtimedwait(const sigset_t *which, kernel_siginfo_t *info,
recalc_sigpending();
spin_unlock_irq(&tsk->sighand->siglock);
__set_current_state(TASK_INTERRUPTIBLE);
ret = freezable_schedule_hrtimeout_range(to, tsk->timer_slack_ns,
HRTIMER_MODE_REL);
__set_current_state(TASK_INTERRUPTIBLE|TASK_FREEZABLE);
ret = schedule_hrtimeout_range(to, tsk->timer_slack_ns,
HRTIMER_MODE_REL);
spin_lock_irq(&tsk->sighand->siglock);
__set_task_blocked(tsk, &tsk->real_blocked);
sigemptyset(&tsk->real_blocked);

View File

@ -2037,11 +2037,11 @@ static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mod
struct restart_block *restart;
do {
set_current_state(TASK_INTERRUPTIBLE);
set_current_state(TASK_INTERRUPTIBLE|TASK_FREEZABLE);
hrtimer_sleeper_start_expires(t, mode);
if (likely(t->task))
freezable_schedule();
schedule();
hrtimer_cancel(&t->timer);
mode = HRTIMER_MODE_ABS;

View File

@ -28,6 +28,7 @@
#include <linux/async.h>
#include <linux/uaccess.h>
#include <linux/initrd.h>
#include <linux/freezer.h>
#include <trace/events/module.h>
@ -403,6 +404,7 @@ EXPORT_SYMBOL(call_usermodehelper_setup);
*/
int call_usermodehelper_exec(struct subprocess_info *sub_info, int wait)
{
unsigned int state = TASK_UNINTERRUPTIBLE;
DECLARE_COMPLETION_ONSTACK(done);
int retval = 0;
@ -436,18 +438,22 @@ int call_usermodehelper_exec(struct subprocess_info *sub_info, int wait)
if (wait == UMH_NO_WAIT) /* task has freed sub_info */
goto unlock;
if (wait & UMH_KILLABLE) {
retval = wait_for_completion_killable(&done);
if (!retval)
goto wait_done;
if (wait & UMH_KILLABLE)
state |= TASK_KILLABLE;
if (wait & UMH_FREEZABLE)
state |= TASK_FREEZABLE;
retval = wait_for_completion_state(&done, state);
if (!retval)
goto wait_done;
if (wait & UMH_KILLABLE) {
/* umh_complete() will see NULL and free sub_info */
if (xchg(&sub_info->complete, NULL))
goto unlock;
/* fallthrough, umh_complete() was already called */
}
wait_for_completion(&done);
wait_done:
retval = sub_info->retval;
out:

View File

@ -730,8 +730,8 @@ static void khugepaged_alloc_sleep(void)
DEFINE_WAIT(wait);
add_wait_queue(&khugepaged_wait, &wait);
freezable_schedule_timeout_interruptible(
msecs_to_jiffies(khugepaged_alloc_sleep_millisecs));
__set_current_state(TASK_INTERRUPTIBLE|TASK_FREEZABLE);
schedule_timeout(msecs_to_jiffies(khugepaged_alloc_sleep_millisecs));
remove_wait_queue(&khugepaged_wait, &wait);
}

View File

@ -269,7 +269,7 @@ EXPORT_SYMBOL_GPL(rpc_destroy_wait_queue);
static int rpc_wait_bit_killable(struct wait_bit_key *key, int mode)
{
freezable_schedule_unsafe();
schedule();
if (signal_pending_state(mode, current))
return -ERESTARTSYS;
return 0;
@ -333,14 +333,12 @@ static int rpc_complete_task(struct rpc_task *task)
* to enforce taking of the wq->lock and hence avoid races with
* rpc_complete_task().
*/
int __rpc_wait_for_completion_task(struct rpc_task *task, wait_bit_action_f *action)
int rpc_wait_for_completion_task(struct rpc_task *task)
{
if (action == NULL)
action = rpc_wait_bit_killable;
return out_of_line_wait_on_bit(&task->tk_runstate, RPC_TASK_ACTIVE,
action, TASK_KILLABLE);
rpc_wait_bit_killable, TASK_KILLABLE|TASK_FREEZABLE_UNSAFE);
}
EXPORT_SYMBOL_GPL(__rpc_wait_for_completion_task);
EXPORT_SYMBOL_GPL(rpc_wait_for_completion_task);
/*
* Make an RPC task runnable.
@ -964,7 +962,7 @@ static void __rpc_execute(struct rpc_task *task)
trace_rpc_task_sync_sleep(task, task->tk_action);
status = out_of_line_wait_on_bit(&task->tk_runstate,
RPC_TASK_QUEUED, rpc_wait_bit_killable,
TASK_KILLABLE);
TASK_KILLABLE|TASK_FREEZABLE);
if (status < 0) {
/*
* When a sync task receives a signal, it exits with

View File

@ -2560,13 +2560,14 @@ static long unix_stream_data_wait(struct sock *sk, long timeo,
struct sk_buff *last, unsigned int last_len,
bool freezable)
{
unsigned int state = TASK_INTERRUPTIBLE | freezable * TASK_FREEZABLE;
struct sk_buff *tail;
DEFINE_WAIT(wait);
unix_state_lock(sk);
for (;;) {
prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
prepare_to_wait(sk_sleep(sk), &wait, state);
tail = skb_peek_tail(&sk->sk_receive_queue);
if (tail != last ||
@ -2579,10 +2580,7 @@ static long unix_stream_data_wait(struct sock *sk, long timeo,
sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
unix_state_unlock(sk);
if (freezable)
timeo = freezable_schedule_timeout(timeo);
else
timeo = schedule_timeout(timeo);
timeo = schedule_timeout(timeo);
unix_state_lock(sk);
if (sock_flag(sk, SOCK_DEAD))