cgroup changes for v6.4-rc1

* cpuset changes including the fix for an incorrect interaction with CPU
   hotplug and an optimization.
 
 * Other doc and cosmetic changes.
 -----BEGIN PGP SIGNATURE-----
 
 iIQEABYIACwWIQTfIjM1kS57o3GsC/uxYfJx3gVYGQUCZErfng4cdGpAa2VybmVs
 Lm9yZwAKCRCxYfJx3gVYGVVtAQCDycK4VSgc4nsFPG1vh1Oy1A723ciEUwAbKmV/
 F1n7xwEA68FiDvE29LpMJJuYP9HnX0A5zRMyNnb52kN9jmgcEQI=
 =ALol
 -----END PGP SIGNATURE-----

Merge tag 'cgroup-for-6.4' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup

Pull cgroup updates from Tejun Heo:

 - cpuset changes including the fix for an incorrect interaction with
   CPU hotplug and an optimization

 - Other doc and cosmetic changes

* tag 'cgroup-for-6.4' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup:
  docs: cgroup-v1/cpusets: update libcgroup project link
  cgroup/cpuset: Minor updates to test_cpuset_prs.sh
  cgroup/cpuset: Include offline CPUs when tasks' cpumasks in top_cpuset are updated
  cgroup/cpuset: Skip task update if hotplug doesn't affect current cpuset
  cpuset: Clean up cpuset_node_allowed
  cgroup: bpf: use cgroup_lock()/cgroup_unlock() wrappers
This commit is contained in:
Linus Torvalds 2023-04-29 10:05:22 -07:00
commit 86e98ed15b
10 changed files with 99 additions and 100 deletions

View File

@ -719,7 +719,7 @@ There are ways to query or modify cpusets:
cat, rmdir commands from the shell, or their equivalent from C. cat, rmdir commands from the shell, or their equivalent from C.
- via the C library libcpuset. - via the C library libcpuset.
- via the C library libcgroup. - via the C library libcgroup.
(http://sourceforge.net/projects/libcg/) (https://github.com/libcgroup/libcgroup/)
- via the python application cset. - via the python application cset.
(http://code.google.com/p/cpuset/) (http://code.google.com/p/cpuset/)

View File

@ -80,18 +80,11 @@ extern nodemask_t cpuset_mems_allowed(struct task_struct *p);
void cpuset_init_current_mems_allowed(void); void cpuset_init_current_mems_allowed(void);
int cpuset_nodemask_valid_mems_allowed(nodemask_t *nodemask); int cpuset_nodemask_valid_mems_allowed(nodemask_t *nodemask);
extern bool __cpuset_node_allowed(int node, gfp_t gfp_mask); extern bool cpuset_node_allowed(int node, gfp_t gfp_mask);
static inline bool cpuset_node_allowed(int node, gfp_t gfp_mask)
{
if (cpusets_enabled())
return __cpuset_node_allowed(node, gfp_mask);
return true;
}
static inline bool __cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask) static inline bool __cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask)
{ {
return __cpuset_node_allowed(zone_to_nid(z), gfp_mask); return cpuset_node_allowed(zone_to_nid(z), gfp_mask);
} }
static inline bool cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask) static inline bool cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask)
@ -223,11 +216,6 @@ static inline int cpuset_nodemask_valid_mems_allowed(nodemask_t *nodemask)
return 1; return 1;
} }
static inline bool cpuset_node_allowed(int node, gfp_t gfp_mask)
{
return true;
}
static inline bool __cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask) static inline bool __cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask)
{ {
return true; return true;

View File

@ -173,11 +173,11 @@ void bpf_cgroup_atype_put(int cgroup_atype)
{ {
int i = cgroup_atype - CGROUP_LSM_START; int i = cgroup_atype - CGROUP_LSM_START;
mutex_lock(&cgroup_mutex); cgroup_lock();
if (--cgroup_lsm_atype[i].refcnt <= 0) if (--cgroup_lsm_atype[i].refcnt <= 0)
cgroup_lsm_atype[i].attach_btf_id = 0; cgroup_lsm_atype[i].attach_btf_id = 0;
WARN_ON_ONCE(cgroup_lsm_atype[i].refcnt < 0); WARN_ON_ONCE(cgroup_lsm_atype[i].refcnt < 0);
mutex_unlock(&cgroup_mutex); cgroup_unlock();
} }
#else #else
static enum cgroup_bpf_attach_type static enum cgroup_bpf_attach_type
@ -282,7 +282,7 @@ static void cgroup_bpf_release(struct work_struct *work)
unsigned int atype; unsigned int atype;
mutex_lock(&cgroup_mutex); cgroup_lock();
for (atype = 0; atype < ARRAY_SIZE(cgrp->bpf.progs); atype++) { for (atype = 0; atype < ARRAY_SIZE(cgrp->bpf.progs); atype++) {
struct hlist_head *progs = &cgrp->bpf.progs[atype]; struct hlist_head *progs = &cgrp->bpf.progs[atype];
@ -315,7 +315,7 @@ static void cgroup_bpf_release(struct work_struct *work)
bpf_cgroup_storage_free(storage); bpf_cgroup_storage_free(storage);
} }
mutex_unlock(&cgroup_mutex); cgroup_unlock();
for (p = cgroup_parent(cgrp); p; p = cgroup_parent(p)) for (p = cgroup_parent(cgrp); p; p = cgroup_parent(p))
cgroup_bpf_put(p); cgroup_bpf_put(p);
@ -729,9 +729,9 @@ static int cgroup_bpf_attach(struct cgroup *cgrp,
{ {
int ret; int ret;
mutex_lock(&cgroup_mutex); cgroup_lock();
ret = __cgroup_bpf_attach(cgrp, prog, replace_prog, link, type, flags); ret = __cgroup_bpf_attach(cgrp, prog, replace_prog, link, type, flags);
mutex_unlock(&cgroup_mutex); cgroup_unlock();
return ret; return ret;
} }
@ -831,7 +831,7 @@ static int cgroup_bpf_replace(struct bpf_link *link, struct bpf_prog *new_prog,
cg_link = container_of(link, struct bpf_cgroup_link, link); cg_link = container_of(link, struct bpf_cgroup_link, link);
mutex_lock(&cgroup_mutex); cgroup_lock();
/* link might have been auto-released by dying cgroup, so fail */ /* link might have been auto-released by dying cgroup, so fail */
if (!cg_link->cgroup) { if (!cg_link->cgroup) {
ret = -ENOLINK; ret = -ENOLINK;
@ -843,7 +843,7 @@ static int cgroup_bpf_replace(struct bpf_link *link, struct bpf_prog *new_prog,
} }
ret = __cgroup_bpf_replace(cg_link->cgroup, cg_link, new_prog); ret = __cgroup_bpf_replace(cg_link->cgroup, cg_link, new_prog);
out_unlock: out_unlock:
mutex_unlock(&cgroup_mutex); cgroup_unlock();
return ret; return ret;
} }
@ -1009,9 +1009,9 @@ static int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
{ {
int ret; int ret;
mutex_lock(&cgroup_mutex); cgroup_lock();
ret = __cgroup_bpf_detach(cgrp, prog, NULL, type); ret = __cgroup_bpf_detach(cgrp, prog, NULL, type);
mutex_unlock(&cgroup_mutex); cgroup_unlock();
return ret; return ret;
} }
@ -1120,9 +1120,9 @@ static int cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
{ {
int ret; int ret;
mutex_lock(&cgroup_mutex); cgroup_lock();
ret = __cgroup_bpf_query(cgrp, attr, uattr); ret = __cgroup_bpf_query(cgrp, attr, uattr);
mutex_unlock(&cgroup_mutex); cgroup_unlock();
return ret; return ret;
} }
@ -1189,11 +1189,11 @@ static void bpf_cgroup_link_release(struct bpf_link *link)
if (!cg_link->cgroup) if (!cg_link->cgroup)
return; return;
mutex_lock(&cgroup_mutex); cgroup_lock();
/* re-check cgroup under lock again */ /* re-check cgroup under lock again */
if (!cg_link->cgroup) { if (!cg_link->cgroup) {
mutex_unlock(&cgroup_mutex); cgroup_unlock();
return; return;
} }
@ -1205,7 +1205,7 @@ static void bpf_cgroup_link_release(struct bpf_link *link)
cg = cg_link->cgroup; cg = cg_link->cgroup;
cg_link->cgroup = NULL; cg_link->cgroup = NULL;
mutex_unlock(&cgroup_mutex); cgroup_unlock();
cgroup_put(cg); cgroup_put(cg);
} }
@ -1232,10 +1232,10 @@ static void bpf_cgroup_link_show_fdinfo(const struct bpf_link *link,
container_of(link, struct bpf_cgroup_link, link); container_of(link, struct bpf_cgroup_link, link);
u64 cg_id = 0; u64 cg_id = 0;
mutex_lock(&cgroup_mutex); cgroup_lock();
if (cg_link->cgroup) if (cg_link->cgroup)
cg_id = cgroup_id(cg_link->cgroup); cg_id = cgroup_id(cg_link->cgroup);
mutex_unlock(&cgroup_mutex); cgroup_unlock();
seq_printf(seq, seq_printf(seq,
"cgroup_id:\t%llu\n" "cgroup_id:\t%llu\n"
@ -1251,10 +1251,10 @@ static int bpf_cgroup_link_fill_link_info(const struct bpf_link *link,
container_of(link, struct bpf_cgroup_link, link); container_of(link, struct bpf_cgroup_link, link);
u64 cg_id = 0; u64 cg_id = 0;
mutex_lock(&cgroup_mutex); cgroup_lock();
if (cg_link->cgroup) if (cg_link->cgroup)
cg_id = cgroup_id(cg_link->cgroup); cg_id = cgroup_id(cg_link->cgroup);
mutex_unlock(&cgroup_mutex); cgroup_unlock();
info->cgroup.cgroup_id = cg_id; info->cgroup.cgroup_id = cg_id;
info->cgroup.attach_type = cg_link->type; info->cgroup.attach_type = cg_link->type;

View File

@ -58,7 +58,7 @@ static void *cgroup_iter_seq_start(struct seq_file *seq, loff_t *pos)
{ {
struct cgroup_iter_priv *p = seq->private; struct cgroup_iter_priv *p = seq->private;
mutex_lock(&cgroup_mutex); cgroup_lock();
/* cgroup_iter doesn't support read across multiple sessions. */ /* cgroup_iter doesn't support read across multiple sessions. */
if (*pos > 0) { if (*pos > 0) {
@ -89,7 +89,7 @@ static void cgroup_iter_seq_stop(struct seq_file *seq, void *v)
{ {
struct cgroup_iter_priv *p = seq->private; struct cgroup_iter_priv *p = seq->private;
mutex_unlock(&cgroup_mutex); cgroup_unlock();
/* pass NULL to the prog for post-processing */ /* pass NULL to the prog for post-processing */
if (!v) { if (!v) {

View File

@ -333,14 +333,14 @@ static void cgroup_storage_map_free(struct bpf_map *_map)
struct list_head *storages = &map->list; struct list_head *storages = &map->list;
struct bpf_cgroup_storage *storage, *stmp; struct bpf_cgroup_storage *storage, *stmp;
mutex_lock(&cgroup_mutex); cgroup_lock();
list_for_each_entry_safe(storage, stmp, storages, list_map) { list_for_each_entry_safe(storage, stmp, storages, list_map) {
bpf_cgroup_storage_unlink(storage); bpf_cgroup_storage_unlink(storage);
bpf_cgroup_storage_free(storage); bpf_cgroup_storage_free(storage);
} }
mutex_unlock(&cgroup_mutex); cgroup_unlock();
WARN_ON(!RB_EMPTY_ROOT(&map->root)); WARN_ON(!RB_EMPTY_ROOT(&map->root));
WARN_ON(!list_empty(&map->list)); WARN_ON(!list_empty(&map->list));

View File

@ -58,7 +58,7 @@ int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk)
struct cgroup_root *root; struct cgroup_root *root;
int retval = 0; int retval = 0;
mutex_lock(&cgroup_mutex); cgroup_lock();
cgroup_attach_lock(true); cgroup_attach_lock(true);
for_each_root(root) { for_each_root(root) {
struct cgroup *from_cgrp; struct cgroup *from_cgrp;
@ -72,7 +72,7 @@ int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk)
break; break;
} }
cgroup_attach_unlock(true); cgroup_attach_unlock(true);
mutex_unlock(&cgroup_mutex); cgroup_unlock();
return retval; return retval;
} }
@ -106,7 +106,7 @@ int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from)
if (ret) if (ret)
return ret; return ret;
mutex_lock(&cgroup_mutex); cgroup_lock();
percpu_down_write(&cgroup_threadgroup_rwsem); percpu_down_write(&cgroup_threadgroup_rwsem);
@ -145,7 +145,7 @@ int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from)
out_err: out_err:
cgroup_migrate_finish(&mgctx); cgroup_migrate_finish(&mgctx);
percpu_up_write(&cgroup_threadgroup_rwsem); percpu_up_write(&cgroup_threadgroup_rwsem);
mutex_unlock(&cgroup_mutex); cgroup_unlock();
return ret; return ret;
} }
@ -847,13 +847,13 @@ static int cgroup1_rename(struct kernfs_node *kn, struct kernfs_node *new_parent
kernfs_break_active_protection(new_parent); kernfs_break_active_protection(new_parent);
kernfs_break_active_protection(kn); kernfs_break_active_protection(kn);
mutex_lock(&cgroup_mutex); cgroup_lock();
ret = kernfs_rename(kn, new_parent, new_name_str); ret = kernfs_rename(kn, new_parent, new_name_str);
if (!ret) if (!ret)
TRACE_CGROUP_PATH(rename, cgrp); TRACE_CGROUP_PATH(rename, cgrp);
mutex_unlock(&cgroup_mutex); cgroup_unlock();
kernfs_unbreak_active_protection(kn); kernfs_unbreak_active_protection(kn);
kernfs_unbreak_active_protection(new_parent); kernfs_unbreak_active_protection(new_parent);
@ -1119,7 +1119,7 @@ int cgroup1_reconfigure(struct fs_context *fc)
trace_cgroup_remount(root); trace_cgroup_remount(root);
out_unlock: out_unlock:
mutex_unlock(&cgroup_mutex); cgroup_unlock();
return ret; return ret;
} }
@ -1246,7 +1246,7 @@ int cgroup1_get_tree(struct fs_context *fc)
if (!ret && !percpu_ref_tryget_live(&ctx->root->cgrp.self.refcnt)) if (!ret && !percpu_ref_tryget_live(&ctx->root->cgrp.self.refcnt))
ret = 1; /* restart */ ret = 1; /* restart */
mutex_unlock(&cgroup_mutex); cgroup_unlock();
if (!ret) if (!ret)
ret = cgroup_do_get_tree(fc); ret = cgroup_do_get_tree(fc);

View File

@ -1391,7 +1391,7 @@ static void cgroup_destroy_root(struct cgroup_root *root)
cgroup_favor_dynmods(root, false); cgroup_favor_dynmods(root, false);
cgroup_exit_root_id(root); cgroup_exit_root_id(root);
mutex_unlock(&cgroup_mutex); cgroup_unlock();
cgroup_rstat_exit(cgrp); cgroup_rstat_exit(cgrp);
kernfs_destroy_root(root->kf_root); kernfs_destroy_root(root->kf_root);
@ -1635,7 +1635,7 @@ void cgroup_kn_unlock(struct kernfs_node *kn)
else else
cgrp = kn->parent->priv; cgrp = kn->parent->priv;
mutex_unlock(&cgroup_mutex); cgroup_unlock();
kernfs_unbreak_active_protection(kn); kernfs_unbreak_active_protection(kn);
cgroup_put(cgrp); cgroup_put(cgrp);
@ -1680,7 +1680,7 @@ struct cgroup *cgroup_kn_lock_live(struct kernfs_node *kn, bool drain_offline)
if (drain_offline) if (drain_offline)
cgroup_lock_and_drain_offline(cgrp); cgroup_lock_and_drain_offline(cgrp);
else else
mutex_lock(&cgroup_mutex); cgroup_lock();
if (!cgroup_is_dead(cgrp)) if (!cgroup_is_dead(cgrp))
return cgrp; return cgrp;
@ -2177,13 +2177,13 @@ int cgroup_do_get_tree(struct fs_context *fc)
struct super_block *sb = fc->root->d_sb; struct super_block *sb = fc->root->d_sb;
struct cgroup *cgrp; struct cgroup *cgrp;
mutex_lock(&cgroup_mutex); cgroup_lock();
spin_lock_irq(&css_set_lock); spin_lock_irq(&css_set_lock);
cgrp = cset_cgroup_from_root(ctx->ns->root_cset, ctx->root); cgrp = cset_cgroup_from_root(ctx->ns->root_cset, ctx->root);
spin_unlock_irq(&css_set_lock); spin_unlock_irq(&css_set_lock);
mutex_unlock(&cgroup_mutex); cgroup_unlock();
nsdentry = kernfs_node_dentry(cgrp->kn, sb); nsdentry = kernfs_node_dentry(cgrp->kn, sb);
dput(fc->root); dput(fc->root);
@ -2366,13 +2366,13 @@ int cgroup_path_ns(struct cgroup *cgrp, char *buf, size_t buflen,
{ {
int ret; int ret;
mutex_lock(&cgroup_mutex); cgroup_lock();
spin_lock_irq(&css_set_lock); spin_lock_irq(&css_set_lock);
ret = cgroup_path_ns_locked(cgrp, buf, buflen, ns); ret = cgroup_path_ns_locked(cgrp, buf, buflen, ns);
spin_unlock_irq(&css_set_lock); spin_unlock_irq(&css_set_lock);
mutex_unlock(&cgroup_mutex); cgroup_unlock();
return ret; return ret;
} }
@ -2398,7 +2398,7 @@ int task_cgroup_path(struct task_struct *task, char *buf, size_t buflen)
int hierarchy_id = 1; int hierarchy_id = 1;
int ret; int ret;
mutex_lock(&cgroup_mutex); cgroup_lock();
spin_lock_irq(&css_set_lock); spin_lock_irq(&css_set_lock);
root = idr_get_next(&cgroup_hierarchy_idr, &hierarchy_id); root = idr_get_next(&cgroup_hierarchy_idr, &hierarchy_id);
@ -2412,7 +2412,7 @@ int task_cgroup_path(struct task_struct *task, char *buf, size_t buflen)
} }
spin_unlock_irq(&css_set_lock); spin_unlock_irq(&css_set_lock);
mutex_unlock(&cgroup_mutex); cgroup_unlock();
return ret; return ret;
} }
EXPORT_SYMBOL_GPL(task_cgroup_path); EXPORT_SYMBOL_GPL(task_cgroup_path);
@ -3121,7 +3121,7 @@ void cgroup_lock_and_drain_offline(struct cgroup *cgrp)
int ssid; int ssid;
restart: restart:
mutex_lock(&cgroup_mutex); cgroup_lock();
cgroup_for_each_live_descendant_post(dsct, d_css, cgrp) { cgroup_for_each_live_descendant_post(dsct, d_css, cgrp) {
for_each_subsys(ss, ssid) { for_each_subsys(ss, ssid) {
@ -3135,7 +3135,7 @@ restart:
prepare_to_wait(&dsct->offline_waitq, &wait, prepare_to_wait(&dsct->offline_waitq, &wait,
TASK_UNINTERRUPTIBLE); TASK_UNINTERRUPTIBLE);
mutex_unlock(&cgroup_mutex); cgroup_unlock();
schedule(); schedule();
finish_wait(&dsct->offline_waitq, &wait); finish_wait(&dsct->offline_waitq, &wait);
@ -4384,9 +4384,9 @@ int cgroup_rm_cftypes(struct cftype *cfts)
if (!(cfts[0].flags & __CFTYPE_ADDED)) if (!(cfts[0].flags & __CFTYPE_ADDED))
return -ENOENT; return -ENOENT;
mutex_lock(&cgroup_mutex); cgroup_lock();
ret = cgroup_rm_cftypes_locked(cfts); ret = cgroup_rm_cftypes_locked(cfts);
mutex_unlock(&cgroup_mutex); cgroup_unlock();
return ret; return ret;
} }
@ -4418,14 +4418,14 @@ static int cgroup_add_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
if (ret) if (ret)
return ret; return ret;
mutex_lock(&cgroup_mutex); cgroup_lock();
list_add_tail(&cfts->node, &ss->cfts); list_add_tail(&cfts->node, &ss->cfts);
ret = cgroup_apply_cftypes(cfts, true); ret = cgroup_apply_cftypes(cfts, true);
if (ret) if (ret)
cgroup_rm_cftypes_locked(cfts); cgroup_rm_cftypes_locked(cfts);
mutex_unlock(&cgroup_mutex); cgroup_unlock();
return ret; return ret;
} }
@ -5395,7 +5395,7 @@ static void css_release_work_fn(struct work_struct *work)
struct cgroup_subsys *ss = css->ss; struct cgroup_subsys *ss = css->ss;
struct cgroup *cgrp = css->cgroup; struct cgroup *cgrp = css->cgroup;
mutex_lock(&cgroup_mutex); cgroup_lock();
css->flags |= CSS_RELEASED; css->flags |= CSS_RELEASED;
list_del_rcu(&css->sibling); list_del_rcu(&css->sibling);
@ -5436,7 +5436,7 @@ static void css_release_work_fn(struct work_struct *work)
NULL); NULL);
} }
mutex_unlock(&cgroup_mutex); cgroup_unlock();
INIT_RCU_WORK(&css->destroy_rwork, css_free_rwork_fn); INIT_RCU_WORK(&css->destroy_rwork, css_free_rwork_fn);
queue_rcu_work(cgroup_destroy_wq, &css->destroy_rwork); queue_rcu_work(cgroup_destroy_wq, &css->destroy_rwork);
@ -5784,7 +5784,7 @@ static void css_killed_work_fn(struct work_struct *work)
struct cgroup_subsys_state *css = struct cgroup_subsys_state *css =
container_of(work, struct cgroup_subsys_state, destroy_work); container_of(work, struct cgroup_subsys_state, destroy_work);
mutex_lock(&cgroup_mutex); cgroup_lock();
do { do {
offline_css(css); offline_css(css);
@ -5793,7 +5793,7 @@ static void css_killed_work_fn(struct work_struct *work)
css = css->parent; css = css->parent;
} while (css && atomic_dec_and_test(&css->online_cnt)); } while (css && atomic_dec_and_test(&css->online_cnt));
mutex_unlock(&cgroup_mutex); cgroup_unlock();
} }
/* css kill confirmation processing requires process context, bounce */ /* css kill confirmation processing requires process context, bounce */
@ -5977,7 +5977,7 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss, bool early)
pr_debug("Initializing cgroup subsys %s\n", ss->name); pr_debug("Initializing cgroup subsys %s\n", ss->name);
mutex_lock(&cgroup_mutex); cgroup_lock();
idr_init(&ss->css_idr); idr_init(&ss->css_idr);
INIT_LIST_HEAD(&ss->cfts); INIT_LIST_HEAD(&ss->cfts);
@ -6021,7 +6021,7 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss, bool early)
BUG_ON(online_css(css)); BUG_ON(online_css(css));
mutex_unlock(&cgroup_mutex); cgroup_unlock();
} }
/** /**
@ -6081,7 +6081,7 @@ int __init cgroup_init(void)
get_user_ns(init_cgroup_ns.user_ns); get_user_ns(init_cgroup_ns.user_ns);
mutex_lock(&cgroup_mutex); cgroup_lock();
/* /*
* Add init_css_set to the hash table so that dfl_root can link to * Add init_css_set to the hash table so that dfl_root can link to
@ -6092,7 +6092,7 @@ int __init cgroup_init(void)
BUG_ON(cgroup_setup_root(&cgrp_dfl_root, 0)); BUG_ON(cgroup_setup_root(&cgrp_dfl_root, 0));
mutex_unlock(&cgroup_mutex); cgroup_unlock();
for_each_subsys(ss, ssid) { for_each_subsys(ss, ssid) {
if (ss->early_init) { if (ss->early_init) {
@ -6144,9 +6144,9 @@ int __init cgroup_init(void)
if (ss->bind) if (ss->bind)
ss->bind(init_css_set.subsys[ssid]); ss->bind(init_css_set.subsys[ssid]);
mutex_lock(&cgroup_mutex); cgroup_lock();
css_populate_dir(init_css_set.subsys[ssid]); css_populate_dir(init_css_set.subsys[ssid]);
mutex_unlock(&cgroup_mutex); cgroup_unlock();
} }
/* init_css_set.subsys[] has been updated, re-hash */ /* init_css_set.subsys[] has been updated, re-hash */
@ -6251,7 +6251,7 @@ int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns,
if (!buf) if (!buf)
goto out; goto out;
mutex_lock(&cgroup_mutex); cgroup_lock();
spin_lock_irq(&css_set_lock); spin_lock_irq(&css_set_lock);
for_each_root(root) { for_each_root(root) {
@ -6306,7 +6306,7 @@ int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns,
retval = 0; retval = 0;
out_unlock: out_unlock:
spin_unlock_irq(&css_set_lock); spin_unlock_irq(&css_set_lock);
mutex_unlock(&cgroup_mutex); cgroup_unlock();
kfree(buf); kfree(buf);
out: out:
return retval; return retval;
@ -6390,7 +6390,7 @@ static int cgroup_css_set_fork(struct kernel_clone_args *kargs)
struct file *f; struct file *f;
if (kargs->flags & CLONE_INTO_CGROUP) if (kargs->flags & CLONE_INTO_CGROUP)
mutex_lock(&cgroup_mutex); cgroup_lock();
cgroup_threadgroup_change_begin(current); cgroup_threadgroup_change_begin(current);
@ -6465,7 +6465,7 @@ static int cgroup_css_set_fork(struct kernel_clone_args *kargs)
err: err:
cgroup_threadgroup_change_end(current); cgroup_threadgroup_change_end(current);
mutex_unlock(&cgroup_mutex); cgroup_unlock();
if (f) if (f)
fput(f); fput(f);
if (dst_cgrp) if (dst_cgrp)
@ -6492,7 +6492,7 @@ static void cgroup_css_set_put_fork(struct kernel_clone_args *kargs)
struct cgroup *cgrp = kargs->cgrp; struct cgroup *cgrp = kargs->cgrp;
struct css_set *cset = kargs->cset; struct css_set *cset = kargs->cset;
mutex_unlock(&cgroup_mutex); cgroup_unlock();
if (cset) { if (cset) {
put_css_set(cset); put_css_set(cset);

View File

@ -1209,7 +1209,9 @@ void rebuild_sched_domains(void)
* *
* Iterate through each task of @cs updating its cpus_allowed to the * Iterate through each task of @cs updating its cpus_allowed to the
* effective cpuset's. As this function is called with cpuset_rwsem held, * effective cpuset's. As this function is called with cpuset_rwsem held,
* cpuset membership stays stable. * cpuset membership stays stable. For top_cpuset, task_cpu_possible_mask()
* is used instead of effective_cpus to make sure all offline CPUs are also
* included as hotplug code won't update cpumasks for tasks in top_cpuset.
*/ */
static void update_tasks_cpumask(struct cpuset *cs, struct cpumask *new_cpus) static void update_tasks_cpumask(struct cpuset *cs, struct cpumask *new_cpus)
{ {
@ -1219,15 +1221,18 @@ static void update_tasks_cpumask(struct cpuset *cs, struct cpumask *new_cpus)
css_task_iter_start(&cs->css, 0, &it); css_task_iter_start(&cs->css, 0, &it);
while ((task = css_task_iter_next(&it))) { while ((task = css_task_iter_next(&it))) {
/* const struct cpumask *possible_mask = task_cpu_possible_mask(task);
* Percpu kthreads in top_cpuset are ignored
*/
if (top_cs && (task->flags & PF_KTHREAD) &&
kthread_is_per_cpu(task))
continue;
cpumask_and(new_cpus, cs->effective_cpus, if (top_cs) {
task_cpu_possible_mask(task)); /*
* Percpu kthreads in top_cpuset are ignored
*/
if ((task->flags & PF_KTHREAD) && kthread_is_per_cpu(task))
continue;
cpumask_andnot(new_cpus, possible_mask, cs->subparts_cpus);
} else {
cpumask_and(new_cpus, possible_mask, cs->effective_cpus);
}
set_cpus_allowed_ptr(task, new_cpus); set_cpus_allowed_ptr(task, new_cpus);
} }
css_task_iter_end(&it); css_task_iter_end(&it);
@ -3618,6 +3623,8 @@ retry:
update_tasks: update_tasks:
cpus_updated = !cpumask_equal(&new_cpus, cs->effective_cpus); cpus_updated = !cpumask_equal(&new_cpus, cs->effective_cpus);
mems_updated = !nodes_equal(new_mems, cs->effective_mems); mems_updated = !nodes_equal(new_mems, cs->effective_mems);
if (!cpus_updated && !mems_updated)
goto unlock; /* Hotplug doesn't affect this cpuset */
if (mems_updated) if (mems_updated)
check_insane_mems_config(&new_mems); check_insane_mems_config(&new_mems);
@ -3629,6 +3636,7 @@ update_tasks:
hotplug_update_tasks_legacy(cs, &new_cpus, &new_mems, hotplug_update_tasks_legacy(cs, &new_cpus, &new_mems,
cpus_updated, mems_updated); cpus_updated, mems_updated);
unlock:
percpu_up_write(&cpuset_rwsem); percpu_up_write(&cpuset_rwsem);
} }
@ -3941,7 +3949,7 @@ static struct cpuset *nearest_hardwall_ancestor(struct cpuset *cs)
} }
/* /*
* __cpuset_node_allowed - Can we allocate on a memory node? * cpuset_node_allowed - Can we allocate on a memory node?
* @node: is this an allowed node? * @node: is this an allowed node?
* @gfp_mask: memory allocation flags * @gfp_mask: memory allocation flags
* *
@ -3980,7 +3988,7 @@ static struct cpuset *nearest_hardwall_ancestor(struct cpuset *cs)
* GFP_KERNEL - any node in enclosing hardwalled cpuset ok * GFP_KERNEL - any node in enclosing hardwalled cpuset ok
* GFP_USER - only nodes in current tasks mems allowed ok. * GFP_USER - only nodes in current tasks mems allowed ok.
*/ */
bool __cpuset_node_allowed(int node, gfp_t gfp_mask) bool cpuset_node_allowed(int node, gfp_t gfp_mask)
{ {
struct cpuset *cs; /* current cpuset ancestors */ struct cpuset *cs; /* current cpuset ancestors */
bool allowed; /* is allocation in zone z allowed? */ bool allowed; /* is allocation in zone z allowed? */

View File

@ -3385,7 +3385,7 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags,
retry: retry:
/* /*
* Scan zonelist, looking for a zone with enough free. * Scan zonelist, looking for a zone with enough free.
* See also __cpuset_node_allowed() comment in kernel/cgroup/cpuset.c. * See also cpuset_node_allowed() comment in kernel/cgroup/cpuset.c.
*/ */
no_fallback = alloc_flags & ALLOC_NOFRAGMENT; no_fallback = alloc_flags & ALLOC_NOFRAGMENT;
z = ac->preferred_zoneref; z = ac->preferred_zoneref;
@ -4059,7 +4059,7 @@ gfp_to_alloc_flags(gfp_t gfp_mask, unsigned int order)
/* /*
* Ignore cpuset mems for non-blocking __GFP_HIGH (probably * Ignore cpuset mems for non-blocking __GFP_HIGH (probably
* GFP_ATOMIC) rather than fail, see the comment for * GFP_ATOMIC) rather than fail, see the comment for
* __cpuset_node_allowed(). * cpuset_node_allowed().
*/ */
if (alloc_flags & ALLOC_MIN_RESERVE) if (alloc_flags & ALLOC_MIN_RESERVE)
alloc_flags &= ~ALLOC_CPUSET; alloc_flags &= ~ALLOC_CPUSET;

View File

@ -15,13 +15,6 @@ skip_test() {
[[ $(id -u) -eq 0 ]] || skip_test "Test must be run as root!" [[ $(id -u) -eq 0 ]] || skip_test "Test must be run as root!"
# Set sched verbose flag, if available
if [[ -d /sys/kernel/debug/sched ]]
then
# Used to restore the original setting during cleanup
SCHED_DEBUG=$(cat /sys/kernel/debug/sched/verbose)
echo Y > /sys/kernel/debug/sched/verbose
fi
# Get wait_inotify location # Get wait_inotify location
WAIT_INOTIFY=$(cd $(dirname $0); pwd)/wait_inotify WAIT_INOTIFY=$(cd $(dirname $0); pwd)/wait_inotify
@ -37,10 +30,14 @@ CPUS=$(lscpu | grep "^CPU(s):" | sed -e "s/.*:[[:space:]]*//")
PROG=$1 PROG=$1
VERBOSE= VERBOSE=
DELAY_FACTOR=1 DELAY_FACTOR=1
SCHED_DEBUG=
while [[ "$1" = -* ]] while [[ "$1" = -* ]]
do do
case "$1" in case "$1" in
-v) VERBOSE=1 -v) VERBOSE=1
# Enable sched/verbose can slow thing down
[[ $DELAY_FACTOR -eq 1 ]] &&
DELAY_FACTOR=2
break break
;; ;;
-d) DELAY_FACTOR=$2 -d) DELAY_FACTOR=$2
@ -54,6 +51,14 @@ do
shift shift
done done
# Set sched verbose flag if available when "-v" option is specified
if [[ -n "$VERBOSE" && -d /sys/kernel/debug/sched ]]
then
# Used to restore the original setting during cleanup
SCHED_DEBUG=$(cat /sys/kernel/debug/sched/verbose)
echo Y > /sys/kernel/debug/sched/verbose
fi
cd $CGROUP2 cd $CGROUP2
echo +cpuset > cgroup.subtree_control echo +cpuset > cgroup.subtree_control
[[ -d test ]] || mkdir test [[ -d test ]] || mkdir test
@ -65,7 +70,8 @@ cleanup()
rmdir A1/A2/A3 A1/A2 A1 B1 > /dev/null 2>&1 rmdir A1/A2/A3 A1/A2 A1 B1 > /dev/null 2>&1
cd .. cd ..
rmdir test > /dev/null 2>&1 rmdir test > /dev/null 2>&1
echo "$SCHED_DEBUG" > /sys/kernel/debug/sched/verbose [[ -n "$SCHED_DEBUG" ]] &&
echo "$SCHED_DEBUG" > /sys/kernel/debug/sched/verbose
} }
# Pause in ms # Pause in ms
@ -571,7 +577,6 @@ run_state_test()
echo "Test $TEST[$I] failed result check!" echo "Test $TEST[$I] failed result check!"
eval echo \"\${$TEST[$I]}\" eval echo \"\${$TEST[$I]}\"
dump_states dump_states
online_cpus
exit 1 exit 1
} }
@ -582,7 +587,6 @@ run_state_test()
eval echo \"\${$TEST[$I]}\" eval echo \"\${$TEST[$I]}\"
echo echo
dump_states dump_states
online_cpus
exit 1 exit 1
} }
} }
@ -594,7 +598,6 @@ run_state_test()
eval echo \"\${$TEST[$I]}\" eval echo \"\${$TEST[$I]}\"
echo echo
dump_states dump_states
online_cpus
exit 1 exit 1
} }
} }