diff --git a/drivers/md/md.c b/drivers/md/md.c index 7ba00e4c862d..843e13666e3f 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -2340,7 +2340,8 @@ int md_integrity_register(struct mddev *mddev) bdev_get_integrity(reference->bdev)); pr_debug("md: data integrity enabled on %s\n", mdname(mddev)); - if (bioset_integrity_create(&mddev->bio_set, BIO_POOL_SIZE)) { + if (bioset_integrity_create(&mddev->bio_set, BIO_POOL_SIZE) || + bioset_integrity_create(&mddev->io_acct_set, BIO_POOL_SIZE)) { pr_err("md: failed to create integrity pool for %s\n", mdname(mddev)); return -EINVAL; @@ -5569,6 +5570,7 @@ static void md_free(struct kobject *ko) bioset_exit(&mddev->bio_set); bioset_exit(&mddev->sync_set); + bioset_exit(&mddev->io_acct_set); kfree(mddev); } @@ -5862,7 +5864,13 @@ int md_run(struct mddev *mddev) if (!bioset_initialized(&mddev->sync_set)) { err = bioset_init(&mddev->sync_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS); if (err) - return err; + goto exit_bio_set; + } + if (!bioset_initialized(&mddev->io_acct_set)) { + err = bioset_init(&mddev->io_acct_set, BIO_POOL_SIZE, + offsetof(struct md_io_acct, bio_clone), 0); + if (err) + goto exit_sync_set; } spin_lock(&pers_lock); @@ -5990,6 +5998,7 @@ int md_run(struct mddev *mddev) blk_queue_flag_set(QUEUE_FLAG_NONROT, mddev->queue); else blk_queue_flag_clear(QUEUE_FLAG_NONROT, mddev->queue); + blk_queue_flag_set(QUEUE_FLAG_IO_STAT, mddev->queue); } if (pers->sync_request) { if (mddev->kobj.sd && @@ -6039,8 +6048,11 @@ int md_run(struct mddev *mddev) module_put(pers->owner); md_bitmap_destroy(mddev); abort: - bioset_exit(&mddev->bio_set); + bioset_exit(&mddev->io_acct_set); +exit_sync_set: bioset_exit(&mddev->sync_set); +exit_bio_set: + bioset_exit(&mddev->bio_set); return err; } EXPORT_SYMBOL_GPL(md_run); @@ -6264,6 +6276,7 @@ void md_stop(struct mddev *mddev) __md_stop(mddev); bioset_exit(&mddev->bio_set); bioset_exit(&mddev->sync_set); + bioset_exit(&mddev->io_acct_set); } EXPORT_SYMBOL_GPL(md_stop); @@ -8568,6 +8581,38 @@ void md_submit_discard_bio(struct mddev *mddev, struct md_rdev *rdev, } EXPORT_SYMBOL_GPL(md_submit_discard_bio); +static void md_end_io_acct(struct bio *bio) +{ + struct md_io_acct *md_io_acct = bio->bi_private; + struct bio *orig_bio = md_io_acct->orig_bio; + + orig_bio->bi_status = bio->bi_status; + + bio_end_io_acct(orig_bio, md_io_acct->start_time); + bio_put(bio); + bio_endio(orig_bio); +} + +/* used by personalities (raid0 and raid5) to account io stats */ +void md_account_bio(struct mddev *mddev, struct bio **bio) +{ + struct md_io_acct *md_io_acct; + struct bio *clone; + + if (!blk_queue_io_stat((*bio)->bi_bdev->bd_disk->queue)) + return; + + clone = bio_clone_fast(*bio, GFP_NOIO, &mddev->io_acct_set); + md_io_acct = container_of(clone, struct md_io_acct, bio_clone); + md_io_acct->orig_bio = *bio; + md_io_acct->start_time = bio_start_io_acct(*bio); + + clone->bi_end_io = md_end_io_acct; + clone->bi_private = md_io_acct; + *bio = clone; +} +EXPORT_SYMBOL_GPL(md_account_bio); + /* md_allow_write(mddev) * Calling this ensures that the array is marked 'active' so that writes * may proceed without blocking. It is important to call this before diff --git a/drivers/md/md.h b/drivers/md/md.h index 4da240ffe2c5..4191f22acce4 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -487,6 +487,7 @@ struct mddev { struct bio_set sync_set; /* for sync operations like * metadata and bitmap writes */ + struct bio_set io_acct_set; /* for raid0 and raid5 io accounting */ /* Generic flush handling. * The last to finish preflush schedules a worker to submit @@ -683,6 +684,12 @@ struct md_thread { void *private; }; +struct md_io_acct { + struct bio *orig_bio; + unsigned long start_time; + struct bio bio_clone; +}; + #define THREAD_WAKEUP 0 static inline void safe_put_page(struct page *p) @@ -714,6 +721,7 @@ extern void md_error(struct mddev *mddev, struct md_rdev *rdev); extern void md_finish_reshape(struct mddev *mddev); void md_submit_discard_bio(struct mddev *mddev, struct md_rdev *rdev, struct bio *bio, sector_t start, sector_t size); +void md_account_bio(struct mddev *mddev, struct bio **bio); extern bool __must_check md_flush_request(struct mddev *mddev, struct bio *bio); extern void md_super_write(struct mddev *mddev, struct md_rdev *rdev, diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index e5d7411cba9b..62c8b6adac70 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -546,6 +546,9 @@ static bool raid0_make_request(struct mddev *mddev, struct bio *bio) bio = split; } + if (bio->bi_pool != &mddev->bio_set) + md_account_bio(mddev, &bio); + orig_sector = sector; zone = find_zone(mddev->private, §or); switch (conf->layout) { diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 841e1c1aa5e6..58e9dbc0f683 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -5468,6 +5468,7 @@ static struct bio *chunk_aligned_read(struct mddev *mddev, struct bio *raid_bio) sector_t sector = raid_bio->bi_iter.bi_sector; unsigned chunk_sects = mddev->chunk_sectors; unsigned sectors = chunk_sects - (sector & (chunk_sects-1)); + struct r5conf *conf = mddev->private; if (sectors < bio_sectors(raid_bio)) { struct r5conf *conf = mddev->private; @@ -5477,6 +5478,9 @@ static struct bio *chunk_aligned_read(struct mddev *mddev, struct bio *raid_bio) raid_bio = split; } + if (raid_bio->bi_pool != &conf->bio_split) + md_account_bio(mddev, &raid_bio); + if (!raid5_read_one_chunk(mddev, raid_bio)) return raid_bio; @@ -5756,6 +5760,7 @@ static bool raid5_make_request(struct mddev *mddev, struct bio * bi) DEFINE_WAIT(w); bool do_prepare; bool do_flush = false; + bool do_clone = false; if (unlikely(bi->bi_opf & REQ_PREFLUSH)) { int ret = log_handle_flush_request(conf, bi); @@ -5784,6 +5789,7 @@ static bool raid5_make_request(struct mddev *mddev, struct bio * bi) if (rw == READ && mddev->degraded == 0 && mddev->reshape_position == MaxSector) { bi = chunk_aligned_read(mddev, bi); + do_clone = true; if (!bi) return true; } @@ -5798,6 +5804,9 @@ static bool raid5_make_request(struct mddev *mddev, struct bio * bi) last_sector = bio_end_sector(bi); bi->bi_next = NULL; + if (!do_clone) + md_account_bio(mddev, &bi); + prepare_to_wait(&conf->wait_for_overlap, &w, TASK_UNINTERRUPTIBLE); for (; logical_sector < last_sector; logical_sector += RAID5_STRIPE_SECTORS(conf)) { int previous;