blkcg: Restructure blkg_conf_prep() and friends

We want to support lazy init of rq-qos policies so that iolatency is enabled
lazily on configuration instead of gendisk initialization. The way blkg
config helpers are structured now is a bit awkward for that. Let's
restructure:

* blkcg_conf_open_bdev() is renamed to blkg_conf_open_bdev(). The blkcg_
  prefix was used because the bdev opening step is blkg-independent.
  However, the distinction is too subtle and confuses more than helps. Let's
  switch to blkg prefix so that it's consistent with the type and other
  helper names.

* struct blkg_conf_ctx now remembers the original input string and is always
  initialized by the new blkg_conf_init().

* blkg_conf_open_bdev() is updated to take a pointer to blkg_conf_ctx like
  blkg_conf_prep() and can be called multiple times safely. Instead of
  modifying the double pointer to input string directly,
  blkg_conf_open_bdev() now sets blkg_conf_ctx->body.

* blkg_conf_finish() is renamed to blkg_conf_exit() for symmetry and now
  must be called on all blkg_conf_ctx's which were initialized with
  blkg_conf_init().

Combined, this allows the users to either open the bdev first or do it
altogether with blkg_conf_prep() which will help implementing lazy init of
rq-qos policies.

blkg_conf_init/exit() will also be used implement synchronization against
device removal. This is necessary because iolat / iocost are configured
through cgroupfs instead of one of the files under /sys/block/DEVICE. As
cgroupfs operations aren't synchronized with block layer, the lazy init and
other configuration operations may race against device removal. This patch
makes blkg_conf_init/exit() used consistently for all cgroup-orginating
configurations making them a good place to implement explicit
synchronization.

Users are updated accordingly. No behavior change is intended by this patch.

v2: bfq wasn't updated in v1 causing a build error. Fixed.

v3: Update the description to include future use of blkg_conf_init/exit() as
    synchronization points.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Josef Bacik <josef@toxicpanda.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Yu Kuai <yukuai1@huaweicloud.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20230413000649.115785-3-tj@kernel.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
Tejun Heo 2023-04-12 14:06:47 -10:00 committed by Jens Axboe
parent 83462a6c97
commit faffaab289
6 changed files with 128 additions and 79 deletions

View File

@ -1105,9 +1105,11 @@ static ssize_t bfq_io_set_device_weight(struct kernfs_open_file *of,
struct bfq_group *bfqg;
u64 v;
ret = blkg_conf_prep(blkcg, &blkcg_policy_bfq, buf, &ctx);
blkg_conf_init(&ctx, buf);
ret = blkg_conf_prep(blkcg, &blkcg_policy_bfq, &ctx);
if (ret)
return ret;
goto out;
if (sscanf(ctx.body, "%llu", &v) == 1) {
/* require "default" on dfl */
@ -1129,7 +1131,7 @@ static ssize_t bfq_io_set_device_weight(struct kernfs_open_file *of,
ret = 0;
}
out:
blkg_conf_finish(&ctx);
blkg_conf_exit(&ctx);
return ret ?: nbytes;
}

View File

@ -653,69 +653,93 @@ u64 __blkg_prfill_u64(struct seq_file *sf, struct blkg_policy_data *pd, u64 v)
EXPORT_SYMBOL_GPL(__blkg_prfill_u64);
/**
* blkcg_conf_open_bdev - parse and open bdev for per-blkg config update
* @inputp: input string pointer
* blkg_conf_init - initialize a blkg_conf_ctx
* @ctx: blkg_conf_ctx to initialize
* @input: input string
*
* Parse the device node prefix part, MAJ:MIN, of per-blkg config update
* from @input and get and return the matching bdev. *@inputp is
* updated to point past the device node prefix. Returns an ERR_PTR()
* value on error.
*
* Use this function iff blkg_conf_prep() can't be used for some reason.
* Initialize @ctx which can be used to parse blkg config input string @input.
* Once initialized, @ctx can be used with blkg_conf_open_bdev() and
* blkg_conf_prep(), and must be cleaned up with blkg_conf_exit().
*/
struct block_device *blkcg_conf_open_bdev(char **inputp)
void blkg_conf_init(struct blkg_conf_ctx *ctx, char *input)
{
char *input = *inputp;
*ctx = (struct blkg_conf_ctx){ .input = input };
}
EXPORT_SYMBOL_GPL(blkg_conf_init);
/**
* blkg_conf_open_bdev - parse and open bdev for per-blkg config update
* @ctx: blkg_conf_ctx initialized with blkg_conf_init()
*
* Parse the device node prefix part, MAJ:MIN, of per-blkg config update from
* @ctx->input and get and store the matching bdev in @ctx->bdev. @ctx->body is
* set to point past the device node prefix.
*
* This function may be called multiple times on @ctx and the extra calls become
* NOOPs. blkg_conf_prep() implicitly calls this function. Use this function
* explicitly if bdev access is needed without resolving the blkcg / policy part
* of @ctx->input. Returns -errno on error.
*/
int blkg_conf_open_bdev(struct blkg_conf_ctx *ctx)
{
char *input = ctx->input;
unsigned int major, minor;
struct block_device *bdev;
int key_len;
if (ctx->bdev)
return 0;
if (sscanf(input, "%u:%u%n", &major, &minor, &key_len) != 2)
return ERR_PTR(-EINVAL);
return -EINVAL;
input += key_len;
if (!isspace(*input))
return ERR_PTR(-EINVAL);
return -EINVAL;
input = skip_spaces(input);
bdev = blkdev_get_no_open(MKDEV(major, minor));
if (!bdev)
return ERR_PTR(-ENODEV);
return -ENODEV;
if (bdev_is_partition(bdev)) {
blkdev_put_no_open(bdev);
return ERR_PTR(-ENODEV);
return -ENODEV;
}
*inputp = input;
return bdev;
ctx->body = input;
ctx->bdev = bdev;
return 0;
}
/**
* blkg_conf_prep - parse and prepare for per-blkg config update
* @blkcg: target block cgroup
* @pol: target policy
* @input: input string
* @ctx: blkg_conf_ctx to be filled
* @ctx: blkg_conf_ctx initialized with blkg_conf_init()
*
* Parse per-blkg config update from @input and initialize @ctx with the
* result. @ctx->blkg points to the blkg to be updated and @ctx->body the
* part of @input following MAJ:MIN. This function returns with queue lock
* held and must be paired with blkg_conf_finish().
* Parse per-blkg config update from @ctx->input and initialize @ctx
* accordingly. On success, @ctx->body points to the part of @ctx->input
* following MAJ:MIN, @ctx->bdev points to the target block device and
* @ctx->blkg to the blkg being configured.
*
* blkg_conf_open_bdev() may be called on @ctx beforehand. On success, this
* function returns with queue lock held and must be followed by
* blkg_conf_exit().
*/
int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
char *input, struct blkg_conf_ctx *ctx)
struct blkg_conf_ctx *ctx)
__acquires(&bdev->bd_queue->queue_lock)
{
struct block_device *bdev;
struct gendisk *disk;
struct request_queue *q;
struct blkcg_gq *blkg;
int ret;
bdev = blkcg_conf_open_bdev(&input);
if (IS_ERR(bdev))
return PTR_ERR(bdev);
disk = bdev->bd_disk;
ret = blkg_conf_open_bdev(ctx);
if (ret)
return ret;
disk = ctx->bdev->bd_disk;
q = disk->queue;
/*
@ -793,9 +817,7 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
}
success:
blk_queue_exit(q);
ctx->bdev = bdev;
ctx->blkg = blkg;
ctx->body = input;
return 0;
fail_preloaded:
@ -805,7 +827,6 @@ fail_unlock:
fail_exit_queue:
blk_queue_exit(q);
fail:
blkdev_put_no_open(bdev);
/*
* If queue was bypassing, we should retry. Do so after a
* short msleep(). It isn't strictly necessary but queue
@ -821,19 +842,27 @@ fail:
EXPORT_SYMBOL_GPL(blkg_conf_prep);
/**
* blkg_conf_finish - finish up per-blkg config update
* @ctx: blkg_conf_ctx initialized by blkg_conf_prep()
* blkg_conf_exit - clean up per-blkg config update
* @ctx: blkg_conf_ctx initialized with blkg_conf_init()
*
* Finish up after per-blkg config update. This function must be paired
* with blkg_conf_prep().
* Clean up after per-blkg config update. This function must be called on all
* blkg_conf_ctx's initialized with blkg_conf_init().
*/
void blkg_conf_finish(struct blkg_conf_ctx *ctx)
void blkg_conf_exit(struct blkg_conf_ctx *ctx)
__releases(&ctx->bdev->bd_queue->queue_lock)
{
spin_unlock_irq(&bdev_get_queue(ctx->bdev)->queue_lock);
blkdev_put_no_open(ctx->bdev);
if (ctx->blkg) {
spin_unlock_irq(&bdev_get_queue(ctx->bdev)->queue_lock);
ctx->blkg = NULL;
}
if (ctx->bdev) {
blkdev_put_no_open(ctx->bdev);
ctx->body = NULL;
ctx->bdev = NULL;
}
}
EXPORT_SYMBOL_GPL(blkg_conf_finish);
EXPORT_SYMBOL_GPL(blkg_conf_exit);
static void blkg_iostat_set(struct blkg_iostat *dst, struct blkg_iostat *src)
{

View File

@ -206,15 +206,17 @@ void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg,
u64 __blkg_prfill_u64(struct seq_file *sf, struct blkg_policy_data *pd, u64 v);
struct blkg_conf_ctx {
char *input;
char *body;
struct block_device *bdev;
struct blkcg_gq *blkg;
char *body;
};
struct block_device *blkcg_conf_open_bdev(char **inputp);
void blkg_conf_init(struct blkg_conf_ctx *ctx, char *input);
int blkg_conf_open_bdev(struct blkg_conf_ctx *ctx);
int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
char *input, struct blkg_conf_ctx *ctx);
void blkg_conf_finish(struct blkg_conf_ctx *ctx);
struct blkg_conf_ctx *ctx);
void blkg_conf_exit(struct blkg_conf_ctx *ctx);
/**
* bio_issue_as_root_blkg - see if this bio needs to be issued as root blkg

View File

@ -3106,9 +3106,11 @@ static ssize_t ioc_weight_write(struct kernfs_open_file *of, char *buf,
return nbytes;
}
ret = blkg_conf_prep(blkcg, &blkcg_policy_iocost, buf, &ctx);
blkg_conf_init(&ctx, buf);
ret = blkg_conf_prep(blkcg, &blkcg_policy_iocost, &ctx);
if (ret)
return ret;
goto err;
iocg = blkg_to_iocg(ctx.blkg);
@ -3127,12 +3129,14 @@ static ssize_t ioc_weight_write(struct kernfs_open_file *of, char *buf,
weight_updated(iocg, &now);
spin_unlock(&iocg->ioc->lock);
blkg_conf_finish(&ctx);
blkg_conf_exit(&ctx);
return nbytes;
einval:
blkg_conf_finish(&ctx);
return -EINVAL;
ret = -EINVAL;
err:
blkg_conf_exit(&ctx);
return ret;
}
static u64 ioc_qos_prfill(struct seq_file *sf, struct blkg_policy_data *pd,
@ -3189,19 +3193,22 @@ static const match_table_t qos_tokens = {
static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input,
size_t nbytes, loff_t off)
{
struct block_device *bdev;
struct blkg_conf_ctx ctx;
struct gendisk *disk;
struct ioc *ioc;
u32 qos[NR_QOS_PARAMS];
bool enable, user;
char *p;
char *body, *p;
int ret;
bdev = blkcg_conf_open_bdev(&input);
if (IS_ERR(bdev))
return PTR_ERR(bdev);
blkg_conf_init(&ctx, input);
disk = bdev->bd_disk;
ret = blkg_conf_open_bdev(&ctx);
if (ret)
goto err;
body = ctx.body;
disk = ctx.bdev->bd_disk;
if (!queue_is_mq(disk->queue)) {
ret = -EOPNOTSUPP;
goto err;
@ -3223,7 +3230,7 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input,
enable = ioc->enabled;
user = ioc->user_qos_params;
while ((p = strsep(&input, " \t\n"))) {
while ((p = strsep(&body, " \t\n"))) {
substring_t args[MAX_OPT_ARGS];
char buf[32];
int tok;
@ -3313,7 +3320,7 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input,
blk_mq_unquiesce_queue(disk->queue);
blk_mq_unfreeze_queue(disk->queue);
blkdev_put_no_open(bdev);
blkg_conf_exit(&ctx);
return nbytes;
einval:
spin_unlock_irq(&ioc->lock);
@ -3323,7 +3330,7 @@ einval:
ret = -EINVAL;
err:
blkdev_put_no_open(bdev);
blkg_conf_exit(&ctx);
return ret;
}
@ -3376,19 +3383,22 @@ static const match_table_t i_lcoef_tokens = {
static ssize_t ioc_cost_model_write(struct kernfs_open_file *of, char *input,
size_t nbytes, loff_t off)
{
struct block_device *bdev;
struct blkg_conf_ctx ctx;
struct request_queue *q;
struct ioc *ioc;
u64 u[NR_I_LCOEFS];
bool user;
char *p;
char *body, *p;
int ret;
bdev = blkcg_conf_open_bdev(&input);
if (IS_ERR(bdev))
return PTR_ERR(bdev);
blkg_conf_init(&ctx, input);
q = bdev_get_queue(bdev);
ret = blkg_conf_open_bdev(&ctx);
if (ret)
goto err;
body = ctx.body;
q = bdev_get_queue(ctx.bdev);
if (!queue_is_mq(q)) {
ret = -EOPNOTSUPP;
goto err;
@ -3396,7 +3406,7 @@ static ssize_t ioc_cost_model_write(struct kernfs_open_file *of, char *input,
ioc = q_to_ioc(q);
if (!ioc) {
ret = blk_iocost_init(bdev->bd_disk);
ret = blk_iocost_init(ctx.bdev->bd_disk);
if (ret)
goto err;
ioc = q_to_ioc(q);
@ -3409,7 +3419,7 @@ static ssize_t ioc_cost_model_write(struct kernfs_open_file *of, char *input,
memcpy(u, ioc->params.i_lcoefs, sizeof(u));
user = ioc->user_cost_model;
while ((p = strsep(&input, " \t\n"))) {
while ((p = strsep(&body, " \t\n"))) {
substring_t args[MAX_OPT_ARGS];
char buf[32];
int tok;
@ -3456,7 +3466,7 @@ static ssize_t ioc_cost_model_write(struct kernfs_open_file *of, char *input,
blk_mq_unquiesce_queue(q);
blk_mq_unfreeze_queue(q);
blkdev_put_no_open(bdev);
blkg_conf_exit(&ctx);
return nbytes;
einval:
@ -3467,7 +3477,7 @@ einval:
ret = -EINVAL;
err:
blkdev_put_no_open(bdev);
blkg_conf_exit(&ctx);
return ret;
}

View File

@ -836,9 +836,11 @@ static ssize_t iolatency_set_limit(struct kernfs_open_file *of, char *buf,
u64 oldval;
int ret;
ret = blkg_conf_prep(blkcg, &blkcg_policy_iolatency, buf, &ctx);
blkg_conf_init(&ctx, buf);
ret = blkg_conf_prep(blkcg, &blkcg_policy_iolatency, &ctx);
if (ret)
return ret;
goto out;
iolat = blkg_to_lat(ctx.blkg);
p = ctx.body;
@ -874,7 +876,7 @@ static ssize_t iolatency_set_limit(struct kernfs_open_file *of, char *buf,
iolatency_clear_scaling(blkg);
ret = 0;
out:
blkg_conf_finish(&ctx);
blkg_conf_exit(&ctx);
return ret ?: nbytes;
}

View File

@ -1368,9 +1368,11 @@ static ssize_t tg_set_conf(struct kernfs_open_file *of,
int ret;
u64 v;
ret = blkg_conf_prep(blkcg, &blkcg_policy_throtl, buf, &ctx);
blkg_conf_init(&ctx, buf);
ret = blkg_conf_prep(blkcg, &blkcg_policy_throtl, &ctx);
if (ret)
return ret;
goto out_finish;
ret = -EINVAL;
if (sscanf(ctx.body, "%llu", &v) != 1)
@ -1389,7 +1391,7 @@ static ssize_t tg_set_conf(struct kernfs_open_file *of,
tg_conf_updated(tg, false);
ret = 0;
out_finish:
blkg_conf_finish(&ctx);
blkg_conf_exit(&ctx);
return ret ?: nbytes;
}
@ -1561,9 +1563,11 @@ static ssize_t tg_set_limit(struct kernfs_open_file *of,
int ret;
int index = of_cft(of)->private;
ret = blkg_conf_prep(blkcg, &blkcg_policy_throtl, buf, &ctx);
blkg_conf_init(&ctx, buf);
ret = blkg_conf_prep(blkcg, &blkcg_policy_throtl, &ctx);
if (ret)
return ret;
goto out_finish;
tg = blkg_to_tg(ctx.blkg);
tg_update_carryover(tg);
@ -1662,7 +1666,7 @@ static ssize_t tg_set_limit(struct kernfs_open_file *of,
tg->td->limit_valid[LIMIT_LOW]);
ret = 0;
out_finish:
blkg_conf_finish(&ctx);
blkg_conf_exit(&ctx);
return ret ?: nbytes;
}