linux-stable/block/blk-mq-sysfs.c
Ye Bin 33e8a3f618 blk-mq: fix possible memleak when register 'hctx' failed
[ Upstream commit 4b7a21c57b ]

There's issue as follows when do fault injection test:
unreferenced object 0xffff888132a9f400 (size 512):
  comm "insmod", pid 308021, jiffies 4324277909 (age 509.733s)
  hex dump (first 32 bytes):
    00 00 00 00 00 00 00 00 08 f4 a9 32 81 88 ff ff  ...........2....
    08 f4 a9 32 81 88 ff ff 00 00 00 00 00 00 00 00  ...2............
  backtrace:
    [<00000000e8952bb4>] kmalloc_node_trace+0x22/0xa0
    [<00000000f9980e0f>] blk_mq_alloc_and_init_hctx+0x3f1/0x7e0
    [<000000002e719efa>] blk_mq_realloc_hw_ctxs+0x1e6/0x230
    [<000000004f1fda40>] blk_mq_init_allocated_queue+0x27e/0x910
    [<00000000287123ec>] __blk_mq_alloc_disk+0x67/0xf0
    [<00000000a2a34657>] 0xffffffffa2ad310f
    [<00000000b173f718>] 0xffffffffa2af824a
    [<0000000095a1dabb>] do_one_initcall+0x87/0x2a0
    [<00000000f32fdf93>] do_init_module+0xdf/0x320
    [<00000000cbe8541e>] load_module+0x3006/0x3390
    [<0000000069ed1bdb>] __do_sys_finit_module+0x113/0x1b0
    [<00000000a1a29ae8>] do_syscall_64+0x35/0x80
    [<000000009cd878b0>] entry_SYSCALL_64_after_hwframe+0x46/0xb0

Fault injection context as follows:
 kobject_add
 blk_mq_register_hctx
 blk_mq_sysfs_register
 blk_register_queue
 device_add_disk
 null_add_dev.part.0 [null_blk]

As 'blk_mq_register_hctx' may already add some objects when failed halfway,
but there isn't do fallback, caller don't know which objects add failed.
To solve above issue just do fallback when add objects failed halfway in
'blk_mq_register_hctx'.

Signed-off-by: Ye Bin <yebin10@huawei.com>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Link: https://lore.kernel.org/r/20221117022940.873959-1-yebin@huaweicloud.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Sasha Levin <sashal@kernel.org>
2022-12-31 13:33:03 +01:00

335 lines
7.1 KiB
C

// SPDX-License-Identifier: GPL-2.0
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/backing-dev.h>
#include <linux/bio.h>
#include <linux/blkdev.h>
#include <linux/mm.h>
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/workqueue.h>
#include <linux/smp.h>
#include <linux/blk-mq.h>
#include "blk.h"
#include "blk-mq.h"
#include "blk-mq-tag.h"
static void blk_mq_sysfs_release(struct kobject *kobj)
{
struct blk_mq_ctxs *ctxs = container_of(kobj, struct blk_mq_ctxs, kobj);
free_percpu(ctxs->queue_ctx);
kfree(ctxs);
}
static void blk_mq_ctx_sysfs_release(struct kobject *kobj)
{
struct blk_mq_ctx *ctx = container_of(kobj, struct blk_mq_ctx, kobj);
/* ctx->ctxs won't be released until all ctx are freed */
kobject_put(&ctx->ctxs->kobj);
}
static void blk_mq_hw_sysfs_release(struct kobject *kobj)
{
struct blk_mq_hw_ctx *hctx = container_of(kobj, struct blk_mq_hw_ctx,
kobj);
blk_free_flush_queue(hctx->fq);
sbitmap_free(&hctx->ctx_map);
free_cpumask_var(hctx->cpumask);
kfree(hctx->ctxs);
kfree(hctx);
}
struct blk_mq_hw_ctx_sysfs_entry {
struct attribute attr;
ssize_t (*show)(struct blk_mq_hw_ctx *, char *);
ssize_t (*store)(struct blk_mq_hw_ctx *, const char *, size_t);
};
static ssize_t blk_mq_hw_sysfs_show(struct kobject *kobj,
struct attribute *attr, char *page)
{
struct blk_mq_hw_ctx_sysfs_entry *entry;
struct blk_mq_hw_ctx *hctx;
struct request_queue *q;
ssize_t res;
entry = container_of(attr, struct blk_mq_hw_ctx_sysfs_entry, attr);
hctx = container_of(kobj, struct blk_mq_hw_ctx, kobj);
q = hctx->queue;
if (!entry->show)
return -EIO;
mutex_lock(&q->sysfs_lock);
res = entry->show(hctx, page);
mutex_unlock(&q->sysfs_lock);
return res;
}
static ssize_t blk_mq_hw_sysfs_store(struct kobject *kobj,
struct attribute *attr, const char *page,
size_t length)
{
struct blk_mq_hw_ctx_sysfs_entry *entry;
struct blk_mq_hw_ctx *hctx;
struct request_queue *q;
ssize_t res;
entry = container_of(attr, struct blk_mq_hw_ctx_sysfs_entry, attr);
hctx = container_of(kobj, struct blk_mq_hw_ctx, kobj);
q = hctx->queue;
if (!entry->store)
return -EIO;
mutex_lock(&q->sysfs_lock);
res = entry->store(hctx, page, length);
mutex_unlock(&q->sysfs_lock);
return res;
}
static ssize_t blk_mq_hw_sysfs_nr_tags_show(struct blk_mq_hw_ctx *hctx,
char *page)
{
return sprintf(page, "%u\n", hctx->tags->nr_tags);
}
static ssize_t blk_mq_hw_sysfs_nr_reserved_tags_show(struct blk_mq_hw_ctx *hctx,
char *page)
{
return sprintf(page, "%u\n", hctx->tags->nr_reserved_tags);
}
static ssize_t blk_mq_hw_sysfs_cpus_show(struct blk_mq_hw_ctx *hctx, char *page)
{
const size_t size = PAGE_SIZE - 1;
unsigned int i, first = 1;
int ret = 0, pos = 0;
for_each_cpu(i, hctx->cpumask) {
if (first)
ret = snprintf(pos + page, size - pos, "%u", i);
else
ret = snprintf(pos + page, size - pos, ", %u", i);
if (ret >= size - pos)
break;
first = 0;
pos += ret;
}
ret = snprintf(pos + page, size + 1 - pos, "\n");
return pos + ret;
}
static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_nr_tags = {
.attr = {.name = "nr_tags", .mode = 0444 },
.show = blk_mq_hw_sysfs_nr_tags_show,
};
static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_nr_reserved_tags = {
.attr = {.name = "nr_reserved_tags", .mode = 0444 },
.show = blk_mq_hw_sysfs_nr_reserved_tags_show,
};
static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_cpus = {
.attr = {.name = "cpu_list", .mode = 0444 },
.show = blk_mq_hw_sysfs_cpus_show,
};
static struct attribute *default_hw_ctx_attrs[] = {
&blk_mq_hw_sysfs_nr_tags.attr,
&blk_mq_hw_sysfs_nr_reserved_tags.attr,
&blk_mq_hw_sysfs_cpus.attr,
NULL,
};
ATTRIBUTE_GROUPS(default_hw_ctx);
static const struct sysfs_ops blk_mq_hw_sysfs_ops = {
.show = blk_mq_hw_sysfs_show,
.store = blk_mq_hw_sysfs_store,
};
static struct kobj_type blk_mq_ktype = {
.release = blk_mq_sysfs_release,
};
static struct kobj_type blk_mq_ctx_ktype = {
.release = blk_mq_ctx_sysfs_release,
};
static struct kobj_type blk_mq_hw_ktype = {
.sysfs_ops = &blk_mq_hw_sysfs_ops,
.default_groups = default_hw_ctx_groups,
.release = blk_mq_hw_sysfs_release,
};
static void blk_mq_unregister_hctx(struct blk_mq_hw_ctx *hctx)
{
struct blk_mq_ctx *ctx;
int i;
if (!hctx->nr_ctx)
return;
hctx_for_each_ctx(hctx, ctx, i)
kobject_del(&ctx->kobj);
kobject_del(&hctx->kobj);
}
static int blk_mq_register_hctx(struct blk_mq_hw_ctx *hctx)
{
struct request_queue *q = hctx->queue;
struct blk_mq_ctx *ctx;
int i, j, ret;
if (!hctx->nr_ctx)
return 0;
ret = kobject_add(&hctx->kobj, q->mq_kobj, "%u", hctx->queue_num);
if (ret)
return ret;
hctx_for_each_ctx(hctx, ctx, i) {
ret = kobject_add(&ctx->kobj, &hctx->kobj, "cpu%u", ctx->cpu);
if (ret)
goto out;
}
return 0;
out:
hctx_for_each_ctx(hctx, ctx, j) {
if (j < i)
kobject_del(&ctx->kobj);
}
kobject_del(&hctx->kobj);
return ret;
}
void blk_mq_hctx_kobj_init(struct blk_mq_hw_ctx *hctx)
{
kobject_init(&hctx->kobj, &blk_mq_hw_ktype);
}
void blk_mq_sysfs_deinit(struct request_queue *q)
{
struct blk_mq_ctx *ctx;
int cpu;
for_each_possible_cpu(cpu) {
ctx = per_cpu_ptr(q->queue_ctx, cpu);
kobject_put(&ctx->kobj);
}
kobject_put(q->mq_kobj);
}
void blk_mq_sysfs_init(struct request_queue *q)
{
struct blk_mq_ctx *ctx;
int cpu;
kobject_init(q->mq_kobj, &blk_mq_ktype);
for_each_possible_cpu(cpu) {
ctx = per_cpu_ptr(q->queue_ctx, cpu);
kobject_get(q->mq_kobj);
kobject_init(&ctx->kobj, &blk_mq_ctx_ktype);
}
}
int blk_mq_sysfs_register(struct gendisk *disk)
{
struct request_queue *q = disk->queue;
struct blk_mq_hw_ctx *hctx;
unsigned long i, j;
int ret;
lockdep_assert_held(&q->sysfs_dir_lock);
ret = kobject_add(q->mq_kobj, &disk_to_dev(disk)->kobj, "mq");
if (ret < 0)
goto out;
kobject_uevent(q->mq_kobj, KOBJ_ADD);
queue_for_each_hw_ctx(q, hctx, i) {
ret = blk_mq_register_hctx(hctx);
if (ret)
goto unreg;
}
q->mq_sysfs_init_done = true;
out:
return ret;
unreg:
queue_for_each_hw_ctx(q, hctx, j) {
if (j < i)
blk_mq_unregister_hctx(hctx);
}
kobject_uevent(q->mq_kobj, KOBJ_REMOVE);
kobject_del(q->mq_kobj);
return ret;
}
void blk_mq_sysfs_unregister(struct gendisk *disk)
{
struct request_queue *q = disk->queue;
struct blk_mq_hw_ctx *hctx;
unsigned long i;
lockdep_assert_held(&q->sysfs_dir_lock);
queue_for_each_hw_ctx(q, hctx, i)
blk_mq_unregister_hctx(hctx);
kobject_uevent(q->mq_kobj, KOBJ_REMOVE);
kobject_del(q->mq_kobj);
q->mq_sysfs_init_done = false;
}
void blk_mq_sysfs_unregister_hctxs(struct request_queue *q)
{
struct blk_mq_hw_ctx *hctx;
unsigned long i;
mutex_lock(&q->sysfs_dir_lock);
if (!q->mq_sysfs_init_done)
goto unlock;
queue_for_each_hw_ctx(q, hctx, i)
blk_mq_unregister_hctx(hctx);
unlock:
mutex_unlock(&q->sysfs_dir_lock);
}
int blk_mq_sysfs_register_hctxs(struct request_queue *q)
{
struct blk_mq_hw_ctx *hctx;
unsigned long i;
int ret = 0;
mutex_lock(&q->sysfs_dir_lock);
if (!q->mq_sysfs_init_done)
goto unlock;
queue_for_each_hw_ctx(q, hctx, i) {
ret = blk_mq_register_hctx(hctx);
if (ret)
break;
}
unlock:
mutex_unlock(&q->sysfs_dir_lock);
return ret;
}