linux-stable/include/linux/elevator.h
Jan Kara b6e68ee825 blk-mq: Improve performance of non-mq IO schedulers with multiple HW queues
Currently when non-mq aware IO scheduler (BFQ, mq-deadline) is used for
a queue with multiple HW queues, the performance it rather bad. The
problem is that these IO schedulers use queue-wide locking and their
dispatch function does not respect the hctx it is passed in and returns
any request it finds appropriate. Thus locality of request access is
broken and dispatch from multiple CPUs just contends on IO scheduler
locks. For these IO schedulers there's little point in dispatching from
multiple CPUs. Instead dispatch always only from a single CPU to limit
contention.

Below is a comparison of dbench runs on XFS filesystem where the storage
is a raid card with 64 HW queues and to it attached a single rotating
disk. BFQ is used as IO scheduler:

      clients           MQ                     SQ             MQ-Patched
Amean 1      39.12 (0.00%)       43.29 * -10.67%*       36.09 *   7.74%*
Amean 2     128.58 (0.00%)      101.30 *  21.22%*       96.14 *  25.23%*
Amean 4     577.42 (0.00%)      494.47 *  14.37%*      508.49 *  11.94%*
Amean 8     610.95 (0.00%)      363.86 *  40.44%*      362.12 *  40.73%*
Amean 16    391.78 (0.00%)      261.49 *  33.25%*      282.94 *  27.78%*
Amean 32    324.64 (0.00%)      267.71 *  17.54%*      233.00 *  28.23%*
Amean 64    295.04 (0.00%)      253.02 *  14.24%*      242.37 *  17.85%*
Amean 512 10281.61 (0.00%)    10211.16 *   0.69%*    10447.53 *  -1.61%*

Numbers are times so lower is better. MQ is stock 5.10-rc6 kernel. SQ is
the same kernel with megaraid_sas.host_tagset_enable=0 so that the card
advertises just a single HW queue. MQ-Patched is a kernel with this
patch applied.

You can see multiple hardware queues heavily hurt performance in
combination with BFQ. The patch restores the performance.

Signed-off-by: Jan Kara <jack@suse.cz>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
2021-01-24 18:19:46 -07:00

179 lines
5.4 KiB
C

/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_ELEVATOR_H
#define _LINUX_ELEVATOR_H
#include <linux/percpu.h>
#include <linux/hashtable.h>
#ifdef CONFIG_BLOCK
struct io_cq;
struct elevator_type;
#ifdef CONFIG_BLK_DEBUG_FS
struct blk_mq_debugfs_attr;
#endif
/*
* Return values from elevator merger
*/
enum elv_merge {
ELEVATOR_NO_MERGE = 0,
ELEVATOR_FRONT_MERGE = 1,
ELEVATOR_BACK_MERGE = 2,
ELEVATOR_DISCARD_MERGE = 3,
};
struct blk_mq_alloc_data;
struct blk_mq_hw_ctx;
struct elevator_mq_ops {
int (*init_sched)(struct request_queue *, struct elevator_type *);
void (*exit_sched)(struct elevator_queue *);
int (*init_hctx)(struct blk_mq_hw_ctx *, unsigned int);
void (*exit_hctx)(struct blk_mq_hw_ctx *, unsigned int);
void (*depth_updated)(struct blk_mq_hw_ctx *);
bool (*allow_merge)(struct request_queue *, struct request *, struct bio *);
bool (*bio_merge)(struct blk_mq_hw_ctx *, struct bio *, unsigned int);
int (*request_merge)(struct request_queue *q, struct request **, struct bio *);
void (*request_merged)(struct request_queue *, struct request *, enum elv_merge);
void (*requests_merged)(struct request_queue *, struct request *, struct request *);
void (*limit_depth)(unsigned int, struct blk_mq_alloc_data *);
void (*prepare_request)(struct request *);
void (*finish_request)(struct request *);
void (*insert_requests)(struct blk_mq_hw_ctx *, struct list_head *, bool);
struct request *(*dispatch_request)(struct blk_mq_hw_ctx *);
bool (*has_work)(struct blk_mq_hw_ctx *);
void (*completed_request)(struct request *, u64);
void (*requeue_request)(struct request *);
struct request *(*former_request)(struct request_queue *, struct request *);
struct request *(*next_request)(struct request_queue *, struct request *);
void (*init_icq)(struct io_cq *);
void (*exit_icq)(struct io_cq *);
};
#define ELV_NAME_MAX (16)
struct elv_fs_entry {
struct attribute attr;
ssize_t (*show)(struct elevator_queue *, char *);
ssize_t (*store)(struct elevator_queue *, const char *, size_t);
};
/*
* identifies an elevator type, such as AS or deadline
*/
struct elevator_type
{
/* managed by elevator core */
struct kmem_cache *icq_cache;
/* fields provided by elevator implementation */
struct elevator_mq_ops ops;
size_t icq_size; /* see iocontext.h */
size_t icq_align; /* ditto */
struct elv_fs_entry *elevator_attrs;
const char *elevator_name;
const char *elevator_alias;
const unsigned int elevator_features;
struct module *elevator_owner;
#ifdef CONFIG_BLK_DEBUG_FS
const struct blk_mq_debugfs_attr *queue_debugfs_attrs;
const struct blk_mq_debugfs_attr *hctx_debugfs_attrs;
#endif
/* managed by elevator core */
char icq_cache_name[ELV_NAME_MAX + 6]; /* elvname + "_io_cq" */
struct list_head list;
};
#define ELV_HASH_BITS 6
void elv_rqhash_del(struct request_queue *q, struct request *rq);
void elv_rqhash_add(struct request_queue *q, struct request *rq);
void elv_rqhash_reposition(struct request_queue *q, struct request *rq);
struct request *elv_rqhash_find(struct request_queue *q, sector_t offset);
/*
* each queue has an elevator_queue associated with it
*/
struct elevator_queue
{
struct elevator_type *type;
void *elevator_data;
struct kobject kobj;
struct mutex sysfs_lock;
unsigned int registered:1;
DECLARE_HASHTABLE(hash, ELV_HASH_BITS);
};
/*
* block elevator interface
*/
extern enum elv_merge elv_merge(struct request_queue *, struct request **,
struct bio *);
extern void elv_merge_requests(struct request_queue *, struct request *,
struct request *);
extern void elv_merged_request(struct request_queue *, struct request *,
enum elv_merge);
extern bool elv_attempt_insert_merge(struct request_queue *, struct request *);
extern struct request *elv_former_request(struct request_queue *, struct request *);
extern struct request *elv_latter_request(struct request_queue *, struct request *);
/*
* io scheduler registration
*/
extern int elv_register(struct elevator_type *);
extern void elv_unregister(struct elevator_type *);
/*
* io scheduler sysfs switching
*/
extern ssize_t elv_iosched_show(struct request_queue *, char *);
extern ssize_t elv_iosched_store(struct request_queue *, const char *, size_t);
extern bool elv_bio_merge_ok(struct request *, struct bio *);
extern struct elevator_queue *elevator_alloc(struct request_queue *,
struct elevator_type *);
/*
* Helper functions.
*/
extern struct request *elv_rb_former_request(struct request_queue *, struct request *);
extern struct request *elv_rb_latter_request(struct request_queue *, struct request *);
/*
* rb support functions.
*/
extern void elv_rb_add(struct rb_root *, struct request *);
extern void elv_rb_del(struct rb_root *, struct request *);
extern struct request *elv_rb_find(struct rb_root *, sector_t);
/*
* Insertion selection
*/
#define ELEVATOR_INSERT_FRONT 1
#define ELEVATOR_INSERT_BACK 2
#define ELEVATOR_INSERT_SORT 3
#define ELEVATOR_INSERT_REQUEUE 4
#define ELEVATOR_INSERT_FLUSH 5
#define ELEVATOR_INSERT_SORT_MERGE 6
#define rq_end_sector(rq) (blk_rq_pos(rq) + blk_rq_sectors(rq))
#define rb_entry_rq(node) rb_entry((node), struct request, rb_node)
#define rq_entry_fifo(ptr) list_entry((ptr), struct request, queuelist)
#define rq_fifo_clear(rq) list_del_init(&(rq)->queuelist)
/*
* Elevator features.
*/
/* Supports zoned block devices sequential write constraint */
#define ELEVATOR_F_ZBD_SEQ_WRITE (1U << 0)
/* Supports scheduling on multiple hardware queues */
#define ELEVATOR_F_MQ_AWARE (1U << 1)
#endif /* CONFIG_BLOCK */
#endif