linux-stable/include/linux/elevator.h
Omar Sandoval efed9a3337 kyber: fix out of bounds access when preempted
__blk_mq_sched_bio_merge() gets the ctx and hctx for the current CPU and
passes the hctx to ->bio_merge(). kyber_bio_merge() then gets the ctx
for the current CPU again and uses that to get the corresponding Kyber
context in the passed hctx. However, the thread may be preempted between
the two calls to blk_mq_get_ctx(), and the ctx returned the second time
may no longer correspond to the passed hctx. This "works" accidentally
most of the time, but it can cause us to read garbage if the second ctx
came from an hctx with more ctx's than the first one (i.e., if
ctx->index_hw[hctx->type] > hctx->nr_ctx).

This manifested as this UBSAN array index out of bounds error reported
by Jakub:

UBSAN: array-index-out-of-bounds in ../kernel/locking/qspinlock.c:130:9
index 13106 is out of range for type 'long unsigned int [128]'
Call Trace:
 dump_stack+0xa4/0xe5
 ubsan_epilogue+0x5/0x40
 __ubsan_handle_out_of_bounds.cold.13+0x2a/0x34
 queued_spin_lock_slowpath+0x476/0x480
 do_raw_spin_lock+0x1c2/0x1d0
 kyber_bio_merge+0x112/0x180
 blk_mq_submit_bio+0x1f5/0x1100
 submit_bio_noacct+0x7b0/0x870
 submit_bio+0xc2/0x3a0
 btrfs_map_bio+0x4f0/0x9d0
 btrfs_submit_data_bio+0x24e/0x310
 submit_one_bio+0x7f/0xb0
 submit_extent_page+0xc4/0x440
 __extent_writepage_io+0x2b8/0x5e0
 __extent_writepage+0x28d/0x6e0
 extent_write_cache_pages+0x4d7/0x7a0
 extent_writepages+0xa2/0x110
 do_writepages+0x8f/0x180
 __writeback_single_inode+0x99/0x7f0
 writeback_sb_inodes+0x34e/0x790
 __writeback_inodes_wb+0x9e/0x120
 wb_writeback+0x4d2/0x660
 wb_workfn+0x64d/0xa10
 process_one_work+0x53a/0xa80
 worker_thread+0x69/0x5b0
 kthread+0x20b/0x240
 ret_from_fork+0x1f/0x30

Only Kyber uses the hctx, so fix it by passing the request_queue to
->bio_merge() instead. BFQ and mq-deadline just use that, and Kyber can
map the queues itself to avoid the mismatch.

Fixes: a6088845c2 ("block: kyber: make kyber more friendly with merging")
Reported-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Omar Sandoval <osandov@fb.com>
Link: https://lore.kernel.org/r/c7598605401a48d5cfeadebb678abd10af22b83f.1620691329.git.osandov@fb.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
2021-05-11 08:12:14 -06:00

179 lines
5.4 KiB
C

/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_ELEVATOR_H
#define _LINUX_ELEVATOR_H
#include <linux/percpu.h>
#include <linux/hashtable.h>
#ifdef CONFIG_BLOCK
struct io_cq;
struct elevator_type;
#ifdef CONFIG_BLK_DEBUG_FS
struct blk_mq_debugfs_attr;
#endif
/*
* Return values from elevator merger
*/
enum elv_merge {
ELEVATOR_NO_MERGE = 0,
ELEVATOR_FRONT_MERGE = 1,
ELEVATOR_BACK_MERGE = 2,
ELEVATOR_DISCARD_MERGE = 3,
};
struct blk_mq_alloc_data;
struct blk_mq_hw_ctx;
struct elevator_mq_ops {
int (*init_sched)(struct request_queue *, struct elevator_type *);
void (*exit_sched)(struct elevator_queue *);
int (*init_hctx)(struct blk_mq_hw_ctx *, unsigned int);
void (*exit_hctx)(struct blk_mq_hw_ctx *, unsigned int);
void (*depth_updated)(struct blk_mq_hw_ctx *);
bool (*allow_merge)(struct request_queue *, struct request *, struct bio *);
bool (*bio_merge)(struct request_queue *, struct bio *, unsigned int);
int (*request_merge)(struct request_queue *q, struct request **, struct bio *);
void (*request_merged)(struct request_queue *, struct request *, enum elv_merge);
void (*requests_merged)(struct request_queue *, struct request *, struct request *);
void (*limit_depth)(unsigned int, struct blk_mq_alloc_data *);
void (*prepare_request)(struct request *);
void (*finish_request)(struct request *);
void (*insert_requests)(struct blk_mq_hw_ctx *, struct list_head *, bool);
struct request *(*dispatch_request)(struct blk_mq_hw_ctx *);
bool (*has_work)(struct blk_mq_hw_ctx *);
void (*completed_request)(struct request *, u64);
void (*requeue_request)(struct request *);
struct request *(*former_request)(struct request_queue *, struct request *);
struct request *(*next_request)(struct request_queue *, struct request *);
void (*init_icq)(struct io_cq *);
void (*exit_icq)(struct io_cq *);
};
#define ELV_NAME_MAX (16)
struct elv_fs_entry {
struct attribute attr;
ssize_t (*show)(struct elevator_queue *, char *);
ssize_t (*store)(struct elevator_queue *, const char *, size_t);
};
/*
* identifies an elevator type, such as AS or deadline
*/
struct elevator_type
{
/* managed by elevator core */
struct kmem_cache *icq_cache;
/* fields provided by elevator implementation */
struct elevator_mq_ops ops;
size_t icq_size; /* see iocontext.h */
size_t icq_align; /* ditto */
struct elv_fs_entry *elevator_attrs;
const char *elevator_name;
const char *elevator_alias;
const unsigned int elevator_features;
struct module *elevator_owner;
#ifdef CONFIG_BLK_DEBUG_FS
const struct blk_mq_debugfs_attr *queue_debugfs_attrs;
const struct blk_mq_debugfs_attr *hctx_debugfs_attrs;
#endif
/* managed by elevator core */
char icq_cache_name[ELV_NAME_MAX + 6]; /* elvname + "_io_cq" */
struct list_head list;
};
#define ELV_HASH_BITS 6
void elv_rqhash_del(struct request_queue *q, struct request *rq);
void elv_rqhash_add(struct request_queue *q, struct request *rq);
void elv_rqhash_reposition(struct request_queue *q, struct request *rq);
struct request *elv_rqhash_find(struct request_queue *q, sector_t offset);
/*
* each queue has an elevator_queue associated with it
*/
struct elevator_queue
{
struct elevator_type *type;
void *elevator_data;
struct kobject kobj;
struct mutex sysfs_lock;
unsigned int registered:1;
DECLARE_HASHTABLE(hash, ELV_HASH_BITS);
};
/*
* block elevator interface
*/
extern enum elv_merge elv_merge(struct request_queue *, struct request **,
struct bio *);
extern void elv_merge_requests(struct request_queue *, struct request *,
struct request *);
extern void elv_merged_request(struct request_queue *, struct request *,
enum elv_merge);
extern bool elv_attempt_insert_merge(struct request_queue *, struct request *);
extern struct request *elv_former_request(struct request_queue *, struct request *);
extern struct request *elv_latter_request(struct request_queue *, struct request *);
/*
* io scheduler registration
*/
extern int elv_register(struct elevator_type *);
extern void elv_unregister(struct elevator_type *);
/*
* io scheduler sysfs switching
*/
extern ssize_t elv_iosched_show(struct request_queue *, char *);
extern ssize_t elv_iosched_store(struct request_queue *, const char *, size_t);
extern bool elv_bio_merge_ok(struct request *, struct bio *);
extern struct elevator_queue *elevator_alloc(struct request_queue *,
struct elevator_type *);
/*
* Helper functions.
*/
extern struct request *elv_rb_former_request(struct request_queue *, struct request *);
extern struct request *elv_rb_latter_request(struct request_queue *, struct request *);
/*
* rb support functions.
*/
extern void elv_rb_add(struct rb_root *, struct request *);
extern void elv_rb_del(struct rb_root *, struct request *);
extern struct request *elv_rb_find(struct rb_root *, sector_t);
/*
* Insertion selection
*/
#define ELEVATOR_INSERT_FRONT 1
#define ELEVATOR_INSERT_BACK 2
#define ELEVATOR_INSERT_SORT 3
#define ELEVATOR_INSERT_REQUEUE 4
#define ELEVATOR_INSERT_FLUSH 5
#define ELEVATOR_INSERT_SORT_MERGE 6
#define rq_end_sector(rq) (blk_rq_pos(rq) + blk_rq_sectors(rq))
#define rb_entry_rq(node) rb_entry((node), struct request, rb_node)
#define rq_entry_fifo(ptr) list_entry((ptr), struct request, queuelist)
#define rq_fifo_clear(rq) list_del_init(&(rq)->queuelist)
/*
* Elevator features.
*/
/* Supports zoned block devices sequential write constraint */
#define ELEVATOR_F_ZBD_SEQ_WRITE (1U << 0)
/* Supports scheduling on multiple hardware queues */
#define ELEVATOR_F_MQ_AWARE (1U << 1)
#endif /* CONFIG_BLOCK */
#endif