diff --git a/block/blk-core.c b/block/blk-core.c index 90f22cc30799..3c8121072507 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -198,19 +198,6 @@ void blk_dump_rq_flags(struct request *rq, char *msg) } EXPORT_SYMBOL(blk_dump_rq_flags); -/* - * Make sure that plugs that were pending when this function was entered, - * are now complete and requests pushed to the queue. -*/ -static inline void queue_sync_plugs(struct request_queue *q) -{ - /* - * If the current process is plugged and has barriers submitted, - * we will livelock if we don't unplug first. - */ - blk_flush_plug(current); -} - static void blk_delay_work(struct work_struct *work) { struct request_queue *q; @@ -298,7 +285,6 @@ void blk_sync_queue(struct request_queue *q) { del_timer_sync(&q->timeout); cancel_delayed_work_sync(&q->delay_work); - queue_sync_plugs(q); } EXPORT_SYMBOL(blk_sync_queue); @@ -1311,7 +1297,15 @@ static int __make_request(struct request_queue *q, struct bio *bio) plug = current->plug; if (plug) { - if (!plug->should_sort && !list_empty(&plug->list)) { + /* + * If this is the first request added after a plug, fire + * of a plug trace. If others have been added before, check + * if we have multiple devices in this plug. If so, make a + * note to sort the list before dispatch. + */ + if (list_empty(&plug->list)) + trace_block_plug(q); + else if (!plug->should_sort) { struct request *__rq; __rq = list_entry_rq(plug->list.prev); @@ -2668,33 +2662,56 @@ static int plug_rq_cmp(void *priv, struct list_head *a, struct list_head *b) return !(rqa->q <= rqb->q); } -static void flush_plug_list(struct blk_plug *plug) +static void queue_unplugged(struct request_queue *q, unsigned int depth, + bool force_kblockd) +{ + trace_block_unplug_io(q, depth); + __blk_run_queue(q, force_kblockd); + + if (q->unplugged_fn) + q->unplugged_fn(q); +} + +void blk_flush_plug_list(struct blk_plug *plug, bool force_kblockd) { struct request_queue *q; unsigned long flags; struct request *rq; + LIST_HEAD(list); + unsigned int depth; BUG_ON(plug->magic != PLUG_MAGIC); if (list_empty(&plug->list)) return; - if (plug->should_sort) - list_sort(NULL, &plug->list, plug_rq_cmp); + list_splice_init(&plug->list, &list); + + if (plug->should_sort) { + list_sort(NULL, &list, plug_rq_cmp); + plug->should_sort = 0; + } q = NULL; + depth = 0; + + /* + * Save and disable interrupts here, to avoid doing it for every + * queue lock we have to take. + */ local_irq_save(flags); - while (!list_empty(&plug->list)) { - rq = list_entry_rq(plug->list.next); + while (!list_empty(&list)) { + rq = list_entry_rq(list.next); list_del_init(&rq->queuelist); BUG_ON(!(rq->cmd_flags & REQ_ON_PLUG)); BUG_ON(!rq->q); if (rq->q != q) { if (q) { - __blk_run_queue(q, false); + queue_unplugged(q, depth, force_kblockd); spin_unlock(q->queue_lock); } q = rq->q; + depth = 0; spin_lock(q->queue_lock); } rq->cmd_flags &= ~REQ_ON_PLUG; @@ -2706,39 +2723,28 @@ static void flush_plug_list(struct blk_plug *plug) __elv_add_request(q, rq, ELEVATOR_INSERT_FLUSH); else __elv_add_request(q, rq, ELEVATOR_INSERT_SORT_MERGE); + + depth++; } if (q) { - __blk_run_queue(q, false); + queue_unplugged(q, depth, force_kblockd); spin_unlock(q->queue_lock); } - BUG_ON(!list_empty(&plug->list)); local_irq_restore(flags); } - -static void __blk_finish_plug(struct task_struct *tsk, struct blk_plug *plug) -{ - flush_plug_list(plug); - - if (plug == tsk->plug) - tsk->plug = NULL; -} +EXPORT_SYMBOL(blk_flush_plug_list); void blk_finish_plug(struct blk_plug *plug) { - if (plug) - __blk_finish_plug(current, plug); + blk_flush_plug_list(plug, false); + + if (plug == current->plug) + current->plug = NULL; } EXPORT_SYMBOL(blk_finish_plug); -void __blk_flush_plug(struct task_struct *tsk, struct blk_plug *plug) -{ - __blk_finish_plug(tsk, plug); - tsk->plug = plug; -} -EXPORT_SYMBOL(__blk_flush_plug); - int __init blk_dev_init(void) { BUILD_BUG_ON(__REQ_NR_BITS > 8 * diff --git a/block/blk-settings.c b/block/blk-settings.c index 1fa769293597..eb949045bb12 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -790,6 +790,22 @@ void blk_queue_flush(struct request_queue *q, unsigned int flush) } EXPORT_SYMBOL_GPL(blk_queue_flush); +/** + * blk_queue_unplugged - register a callback for an unplug event + * @q: the request queue for the device + * @fn: the function to call + * + * Some stacked drivers may need to know when IO is dispatched on an + * unplug event. By registrering a callback here, they will be notified + * when someone flushes their on-stack queue plug. The function will be + * called with the queue lock held. + */ +void blk_queue_unplugged(struct request_queue *q, unplugged_fn *fn) +{ + q->unplugged_fn = fn; +} +EXPORT_SYMBOL(blk_queue_unplugged); + static int __init blk_settings_init(void) { blk_max_low_pfn = max_low_pfn - 1; diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 261c75c665ae..6d735122bc59 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -498,7 +498,6 @@ int blk_register_queue(struct gendisk *disk) { int ret; struct device *dev = disk_to_dev(disk); - struct request_queue *q = disk->queue; if (WARN_ON(!q)) @@ -521,7 +520,7 @@ int blk_register_queue(struct gendisk *disk) if (ret) { kobject_uevent(&q->kobj, KOBJ_REMOVE); kobject_del(&q->kobj); - blk_trace_remove_sysfs(disk_to_dev(disk)); + blk_trace_remove_sysfs(dev); kobject_put(&dev->kobj); return ret; } diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 32176cc8e715..1c76506fcf11 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -196,6 +196,7 @@ typedef void (request_fn_proc) (struct request_queue *q); typedef int (make_request_fn) (struct request_queue *q, struct bio *bio); typedef int (prep_rq_fn) (struct request_queue *, struct request *); typedef void (unprep_rq_fn) (struct request_queue *, struct request *); +typedef void (unplugged_fn) (struct request_queue *); struct bio_vec; struct bvec_merge_data { @@ -283,6 +284,7 @@ struct request_queue rq_timed_out_fn *rq_timed_out_fn; dma_drain_needed_fn *dma_drain_needed; lld_busy_fn *lld_busy_fn; + unplugged_fn *unplugged_fn; /* * Dispatch queue sorting @@ -841,6 +843,7 @@ extern void blk_queue_dma_alignment(struct request_queue *, int); extern void blk_queue_update_dma_alignment(struct request_queue *, int); extern void blk_queue_softirq_done(struct request_queue *, softirq_done_fn *); extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *); +extern void blk_queue_unplugged(struct request_queue *, unplugged_fn *); extern void blk_queue_rq_timeout(struct request_queue *, unsigned int); extern void blk_queue_flush(struct request_queue *q, unsigned int flush); extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev); @@ -862,14 +865,14 @@ struct blk_plug { extern void blk_start_plug(struct blk_plug *); extern void blk_finish_plug(struct blk_plug *); -extern void __blk_flush_plug(struct task_struct *, struct blk_plug *); +extern void blk_flush_plug_list(struct blk_plug *, bool); static inline void blk_flush_plug(struct task_struct *tsk) { struct blk_plug *plug = tsk->plug; - if (unlikely(plug)) - __blk_flush_plug(tsk, plug); + if (plug) + blk_flush_plug_list(plug, true); } static inline bool blk_needs_flush_plug(struct task_struct *tsk) diff --git a/include/trace/events/block.h b/include/trace/events/block.h index 78f18adb49c8..006e60b58306 100644 --- a/include/trace/events/block.h +++ b/include/trace/events/block.h @@ -401,9 +401,9 @@ TRACE_EVENT(block_plug, DECLARE_EVENT_CLASS(block_unplug, - TP_PROTO(struct request_queue *q), + TP_PROTO(struct request_queue *q, unsigned int depth), - TP_ARGS(q), + TP_ARGS(q, depth), TP_STRUCT__entry( __field( int, nr_rq ) @@ -411,39 +411,26 @@ DECLARE_EVENT_CLASS(block_unplug, ), TP_fast_assign( - __entry->nr_rq = q->rq.count[READ] + q->rq.count[WRITE]; + __entry->nr_rq = depth; memcpy(__entry->comm, current->comm, TASK_COMM_LEN); ), TP_printk("[%s] %d", __entry->comm, __entry->nr_rq) ); -/** - * block_unplug_timer - timed release of operations requests in queue to device driver - * @q: request queue to unplug - * - * Unplug the request queue @q because a timer expired and allow block - * operation requests to be sent to the device driver. - */ -DEFINE_EVENT(block_unplug, block_unplug_timer, - - TP_PROTO(struct request_queue *q), - - TP_ARGS(q) -); - /** * block_unplug_io - release of operations requests in request queue * @q: request queue to unplug + * @depth: number of requests just added to the queue * * Unplug request queue @q because device driver is scheduled to work * on elements in the request queue. */ DEFINE_EVENT(block_unplug, block_unplug_io, - TP_PROTO(struct request_queue *q), + TP_PROTO(struct request_queue *q, unsigned int depth), - TP_ARGS(q) + TP_ARGS(q, depth) ); /** diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 7aa40f8e182d..3e3970d53d14 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -850,32 +850,19 @@ static void blk_add_trace_plug(void *ignore, struct request_queue *q) __blk_add_trace(bt, 0, 0, 0, BLK_TA_PLUG, 0, 0, NULL); } -static void blk_add_trace_unplug_io(void *ignore, struct request_queue *q) +static void blk_add_trace_unplug_io(void *ignore, struct request_queue *q, + unsigned int depth) { struct blk_trace *bt = q->blk_trace; if (bt) { - unsigned int pdu = q->rq.count[READ] + q->rq.count[WRITE]; - __be64 rpdu = cpu_to_be64(pdu); + __be64 rpdu = cpu_to_be64(depth); __blk_add_trace(bt, 0, 0, 0, BLK_TA_UNPLUG_IO, 0, sizeof(rpdu), &rpdu); } } -static void blk_add_trace_unplug_timer(void *ignore, struct request_queue *q) -{ - struct blk_trace *bt = q->blk_trace; - - if (bt) { - unsigned int pdu = q->rq.count[READ] + q->rq.count[WRITE]; - __be64 rpdu = cpu_to_be64(pdu); - - __blk_add_trace(bt, 0, 0, 0, BLK_TA_UNPLUG_TIMER, 0, - sizeof(rpdu), &rpdu); - } -} - static void blk_add_trace_split(void *ignore, struct request_queue *q, struct bio *bio, unsigned int pdu) @@ -1015,8 +1002,6 @@ static void blk_register_tracepoints(void) WARN_ON(ret); ret = register_trace_block_plug(blk_add_trace_plug, NULL); WARN_ON(ret); - ret = register_trace_block_unplug_timer(blk_add_trace_unplug_timer, NULL); - WARN_ON(ret); ret = register_trace_block_unplug_io(blk_add_trace_unplug_io, NULL); WARN_ON(ret); ret = register_trace_block_split(blk_add_trace_split, NULL); @@ -1033,7 +1018,6 @@ static void blk_unregister_tracepoints(void) unregister_trace_block_bio_remap(blk_add_trace_bio_remap, NULL); unregister_trace_block_split(blk_add_trace_split, NULL); unregister_trace_block_unplug_io(blk_add_trace_unplug_io, NULL); - unregister_trace_block_unplug_timer(blk_add_trace_unplug_timer, NULL); unregister_trace_block_plug(blk_add_trace_plug, NULL); unregister_trace_block_sleeprq(blk_add_trace_sleeprq, NULL); unregister_trace_block_getrq(blk_add_trace_getrq, NULL); @@ -1348,7 +1332,6 @@ static const struct { [__BLK_TA_COMPLETE] = {{ "C", "complete" }, blk_log_with_error }, [__BLK_TA_PLUG] = {{ "P", "plug" }, blk_log_plug }, [__BLK_TA_UNPLUG_IO] = {{ "U", "unplug_io" }, blk_log_unplug }, - [__BLK_TA_UNPLUG_TIMER] = {{ "UT", "unplug_timer" }, blk_log_unplug }, [__BLK_TA_INSERT] = {{ "I", "insert" }, blk_log_generic }, [__BLK_TA_SPLIT] = {{ "X", "split" }, blk_log_split }, [__BLK_TA_BOUNCE] = {{ "B", "bounce" }, blk_log_generic },