Merge branch 'for-linus' of git://git.kernel.dk/linux-block

Pull block fixes from Jens Axboe:
 "A small collection of fixes for the current rc series.  This contains:

   - Two small blk-mq patches from Rob Elliott, cleaning up error case
     at init time.

   - A fix from Ming Lei, fixing SG merging for blk-mq where
     QUEUE_FLAG_SG_NO_MERGE is the default.

   - A dev_t minor lifetime fix from Keith, fixing an issue where a
     minor might be reused before all references to it were gone.

   - Fix from Alan Stern where an unbalanced queue bypass caused SCSI
     some headaches when it does a series of add/del on devices without
     fully registrering the queue.

   - A fix from me for improving the scaling of tag depth in blk-mq if
     we are short on memory"

* 'for-linus' of git://git.kernel.dk/linux-block:
  blk-mq: scale depth and rq map appropriate if low on memory
  Block: fix unbalanced bypass-disable in blk_register_queue
  block: Fix dev_t minor allocation lifetime
  blk-mq: cleanup after blk_mq_init_rq_map failures
  blk-mq: pass along blk_mq_alloc_tag_set return values
  blk-merge: fix blk_recount_segments
This commit is contained in:
Linus Torvalds 2014-09-13 09:39:55 -07:00
commit 645cc09381
7 changed files with 123 additions and 47 deletions

View file

@ -10,10 +10,11 @@
#include "blk.h" #include "blk.h"
static unsigned int __blk_recalc_rq_segments(struct request_queue *q, static unsigned int __blk_recalc_rq_segments(struct request_queue *q,
struct bio *bio) struct bio *bio,
bool no_sg_merge)
{ {
struct bio_vec bv, bvprv = { NULL }; struct bio_vec bv, bvprv = { NULL };
int cluster, high, highprv = 1, no_sg_merge; int cluster, high, highprv = 1;
unsigned int seg_size, nr_phys_segs; unsigned int seg_size, nr_phys_segs;
struct bio *fbio, *bbio; struct bio *fbio, *bbio;
struct bvec_iter iter; struct bvec_iter iter;
@ -35,7 +36,6 @@ static unsigned int __blk_recalc_rq_segments(struct request_queue *q,
cluster = blk_queue_cluster(q); cluster = blk_queue_cluster(q);
seg_size = 0; seg_size = 0;
nr_phys_segs = 0; nr_phys_segs = 0;
no_sg_merge = test_bit(QUEUE_FLAG_NO_SG_MERGE, &q->queue_flags);
high = 0; high = 0;
for_each_bio(bio) { for_each_bio(bio) {
bio_for_each_segment(bv, bio, iter) { bio_for_each_segment(bv, bio, iter) {
@ -88,18 +88,23 @@ static unsigned int __blk_recalc_rq_segments(struct request_queue *q,
void blk_recalc_rq_segments(struct request *rq) void blk_recalc_rq_segments(struct request *rq)
{ {
rq->nr_phys_segments = __blk_recalc_rq_segments(rq->q, rq->bio); bool no_sg_merge = !!test_bit(QUEUE_FLAG_NO_SG_MERGE,
&rq->q->queue_flags);
rq->nr_phys_segments = __blk_recalc_rq_segments(rq->q, rq->bio,
no_sg_merge);
} }
void blk_recount_segments(struct request_queue *q, struct bio *bio) void blk_recount_segments(struct request_queue *q, struct bio *bio)
{ {
if (test_bit(QUEUE_FLAG_NO_SG_MERGE, &q->queue_flags)) if (test_bit(QUEUE_FLAG_NO_SG_MERGE, &q->queue_flags) &&
bio->bi_vcnt < queue_max_segments(q))
bio->bi_phys_segments = bio->bi_vcnt; bio->bi_phys_segments = bio->bi_vcnt;
else { else {
struct bio *nxt = bio->bi_next; struct bio *nxt = bio->bi_next;
bio->bi_next = NULL; bio->bi_next = NULL;
bio->bi_phys_segments = __blk_recalc_rq_segments(q, bio); bio->bi_phys_segments = __blk_recalc_rq_segments(q, bio, false);
bio->bi_next = nxt; bio->bi_next = nxt;
} }

View file

@ -1321,6 +1321,7 @@ static void blk_mq_free_rq_map(struct blk_mq_tag_set *set,
continue; continue;
set->ops->exit_request(set->driver_data, tags->rqs[i], set->ops->exit_request(set->driver_data, tags->rqs[i],
hctx_idx, i); hctx_idx, i);
tags->rqs[i] = NULL;
} }
} }
@ -1354,8 +1355,9 @@ static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set,
INIT_LIST_HEAD(&tags->page_list); INIT_LIST_HEAD(&tags->page_list);
tags->rqs = kmalloc_node(set->queue_depth * sizeof(struct request *), tags->rqs = kzalloc_node(set->queue_depth * sizeof(struct request *),
GFP_KERNEL, set->numa_node); GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY,
set->numa_node);
if (!tags->rqs) { if (!tags->rqs) {
blk_mq_free_tags(tags); blk_mq_free_tags(tags);
return NULL; return NULL;
@ -1379,8 +1381,9 @@ static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set,
this_order--; this_order--;
do { do {
page = alloc_pages_node(set->numa_node, GFP_KERNEL, page = alloc_pages_node(set->numa_node,
this_order); GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY,
this_order);
if (page) if (page)
break; break;
if (!this_order--) if (!this_order--)
@ -1404,8 +1407,10 @@ static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set,
if (set->ops->init_request) { if (set->ops->init_request) {
if (set->ops->init_request(set->driver_data, if (set->ops->init_request(set->driver_data,
tags->rqs[i], hctx_idx, i, tags->rqs[i], hctx_idx, i,
set->numa_node)) set->numa_node)) {
tags->rqs[i] = NULL;
goto fail; goto fail;
}
} }
p += rq_size; p += rq_size;
@ -1416,7 +1421,6 @@ static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set,
return tags; return tags;
fail: fail:
pr_warn("%s: failed to allocate requests\n", __func__);
blk_mq_free_rq_map(set, tags, hctx_idx); blk_mq_free_rq_map(set, tags, hctx_idx);
return NULL; return NULL;
} }
@ -1936,6 +1940,61 @@ static int blk_mq_queue_reinit_notify(struct notifier_block *nb,
return NOTIFY_OK; return NOTIFY_OK;
} }
static int __blk_mq_alloc_rq_maps(struct blk_mq_tag_set *set)
{
int i;
for (i = 0; i < set->nr_hw_queues; i++) {
set->tags[i] = blk_mq_init_rq_map(set, i);
if (!set->tags[i])
goto out_unwind;
}
return 0;
out_unwind:
while (--i >= 0)
blk_mq_free_rq_map(set, set->tags[i], i);
set->tags = NULL;
return -ENOMEM;
}
/*
* Allocate the request maps associated with this tag_set. Note that this
* may reduce the depth asked for, if memory is tight. set->queue_depth
* will be updated to reflect the allocated depth.
*/
static int blk_mq_alloc_rq_maps(struct blk_mq_tag_set *set)
{
unsigned int depth;
int err;
depth = set->queue_depth;
do {
err = __blk_mq_alloc_rq_maps(set);
if (!err)
break;
set->queue_depth >>= 1;
if (set->queue_depth < set->reserved_tags + BLK_MQ_TAG_MIN) {
err = -ENOMEM;
break;
}
} while (set->queue_depth);
if (!set->queue_depth || err) {
pr_err("blk-mq: failed to allocate request map\n");
return -ENOMEM;
}
if (depth != set->queue_depth)
pr_info("blk-mq: reduced tag depth (%u -> %u)\n",
depth, set->queue_depth);
return 0;
}
/* /*
* Alloc a tag set to be associated with one or more request queues. * Alloc a tag set to be associated with one or more request queues.
* May fail with EINVAL for various error conditions. May adjust the * May fail with EINVAL for various error conditions. May adjust the
@ -1944,8 +2003,6 @@ static int blk_mq_queue_reinit_notify(struct notifier_block *nb,
*/ */
int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set) int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
{ {
int i;
if (!set->nr_hw_queues) if (!set->nr_hw_queues)
return -EINVAL; return -EINVAL;
if (!set->queue_depth) if (!set->queue_depth)
@ -1966,23 +2023,18 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
sizeof(struct blk_mq_tags *), sizeof(struct blk_mq_tags *),
GFP_KERNEL, set->numa_node); GFP_KERNEL, set->numa_node);
if (!set->tags) if (!set->tags)
goto out; return -ENOMEM;
for (i = 0; i < set->nr_hw_queues; i++) { if (blk_mq_alloc_rq_maps(set))
set->tags[i] = blk_mq_init_rq_map(set, i); goto enomem;
if (!set->tags[i])
goto out_unwind;
}
mutex_init(&set->tag_list_lock); mutex_init(&set->tag_list_lock);
INIT_LIST_HEAD(&set->tag_list); INIT_LIST_HEAD(&set->tag_list);
return 0; return 0;
enomem:
out_unwind: kfree(set->tags);
while (--i >= 0) set->tags = NULL;
blk_mq_free_rq_map(set, set->tags[i], i);
out:
return -ENOMEM; return -ENOMEM;
} }
EXPORT_SYMBOL(blk_mq_alloc_tag_set); EXPORT_SYMBOL(blk_mq_alloc_tag_set);
@ -1997,6 +2049,7 @@ void blk_mq_free_tag_set(struct blk_mq_tag_set *set)
} }
kfree(set->tags); kfree(set->tags);
set->tags = NULL;
} }
EXPORT_SYMBOL(blk_mq_free_tag_set); EXPORT_SYMBOL(blk_mq_free_tag_set);

View file

@ -554,8 +554,10 @@ int blk_register_queue(struct gendisk *disk)
* Initialization must be complete by now. Finish the initial * Initialization must be complete by now. Finish the initial
* bypass from queue allocation. * bypass from queue allocation.
*/ */
queue_flag_set_unlocked(QUEUE_FLAG_INIT_DONE, q); if (!blk_queue_init_done(q)) {
blk_queue_bypass_end(q); queue_flag_set_unlocked(QUEUE_FLAG_INIT_DONE, q);
blk_queue_bypass_end(q);
}
ret = blk_trace_init_sysfs(dev); ret = blk_trace_init_sysfs(dev);
if (ret) if (ret)

View file

@ -28,10 +28,10 @@ struct kobject *block_depr;
/* for extended dynamic devt allocation, currently only one major is used */ /* for extended dynamic devt allocation, currently only one major is used */
#define NR_EXT_DEVT (1 << MINORBITS) #define NR_EXT_DEVT (1 << MINORBITS)
/* For extended devt allocation. ext_devt_mutex prevents look up /* For extended devt allocation. ext_devt_lock prevents look up
* results from going away underneath its user. * results from going away underneath its user.
*/ */
static DEFINE_MUTEX(ext_devt_mutex); static DEFINE_SPINLOCK(ext_devt_lock);
static DEFINE_IDR(ext_devt_idr); static DEFINE_IDR(ext_devt_idr);
static struct device_type disk_type; static struct device_type disk_type;
@ -420,9 +420,13 @@ int blk_alloc_devt(struct hd_struct *part, dev_t *devt)
} }
/* allocate ext devt */ /* allocate ext devt */
mutex_lock(&ext_devt_mutex); idr_preload(GFP_KERNEL);
idx = idr_alloc(&ext_devt_idr, part, 0, NR_EXT_DEVT, GFP_KERNEL);
mutex_unlock(&ext_devt_mutex); spin_lock(&ext_devt_lock);
idx = idr_alloc(&ext_devt_idr, part, 0, NR_EXT_DEVT, GFP_NOWAIT);
spin_unlock(&ext_devt_lock);
idr_preload_end();
if (idx < 0) if (idx < 0)
return idx == -ENOSPC ? -EBUSY : idx; return idx == -ENOSPC ? -EBUSY : idx;
@ -447,9 +451,9 @@ void blk_free_devt(dev_t devt)
return; return;
if (MAJOR(devt) == BLOCK_EXT_MAJOR) { if (MAJOR(devt) == BLOCK_EXT_MAJOR) {
mutex_lock(&ext_devt_mutex); spin_lock(&ext_devt_lock);
idr_remove(&ext_devt_idr, blk_mangle_minor(MINOR(devt))); idr_remove(&ext_devt_idr, blk_mangle_minor(MINOR(devt)));
mutex_unlock(&ext_devt_mutex); spin_unlock(&ext_devt_lock);
} }
} }
@ -665,7 +669,6 @@ void del_gendisk(struct gendisk *disk)
sysfs_remove_link(block_depr, dev_name(disk_to_dev(disk))); sysfs_remove_link(block_depr, dev_name(disk_to_dev(disk)));
pm_runtime_set_memalloc_noio(disk_to_dev(disk), false); pm_runtime_set_memalloc_noio(disk_to_dev(disk), false);
device_del(disk_to_dev(disk)); device_del(disk_to_dev(disk));
blk_free_devt(disk_to_dev(disk)->devt);
} }
EXPORT_SYMBOL(del_gendisk); EXPORT_SYMBOL(del_gendisk);
@ -690,13 +693,13 @@ struct gendisk *get_gendisk(dev_t devt, int *partno)
} else { } else {
struct hd_struct *part; struct hd_struct *part;
mutex_lock(&ext_devt_mutex); spin_lock(&ext_devt_lock);
part = idr_find(&ext_devt_idr, blk_mangle_minor(MINOR(devt))); part = idr_find(&ext_devt_idr, blk_mangle_minor(MINOR(devt)));
if (part && get_disk(part_to_disk(part))) { if (part && get_disk(part_to_disk(part))) {
*partno = part->partno; *partno = part->partno;
disk = part_to_disk(part); disk = part_to_disk(part);
} }
mutex_unlock(&ext_devt_mutex); spin_unlock(&ext_devt_lock);
} }
return disk; return disk;
@ -1098,6 +1101,7 @@ static void disk_release(struct device *dev)
{ {
struct gendisk *disk = dev_to_disk(dev); struct gendisk *disk = dev_to_disk(dev);
blk_free_devt(dev->devt);
disk_release_events(disk); disk_release_events(disk);
kfree(disk->random); kfree(disk->random);
disk_replace_part_tbl(disk, NULL); disk_replace_part_tbl(disk, NULL);

View file

@ -211,6 +211,7 @@ static const struct attribute_group *part_attr_groups[] = {
static void part_release(struct device *dev) static void part_release(struct device *dev)
{ {
struct hd_struct *p = dev_to_part(dev); struct hd_struct *p = dev_to_part(dev);
blk_free_devt(dev->devt);
free_part_stats(p); free_part_stats(p);
free_part_info(p); free_part_info(p);
kfree(p); kfree(p);
@ -253,7 +254,6 @@ void delete_partition(struct gendisk *disk, int partno)
rcu_assign_pointer(ptbl->last_lookup, NULL); rcu_assign_pointer(ptbl->last_lookup, NULL);
kobject_put(part->holder_dir); kobject_put(part->holder_dir);
device_del(part_to_dev(part)); device_del(part_to_dev(part));
blk_free_devt(part_devt(part));
hd_struct_put(part); hd_struct_put(part);
} }

View file

@ -3918,7 +3918,6 @@ static int mtip_block_initialize(struct driver_data *dd)
if (rv) { if (rv) {
dev_err(&dd->pdev->dev, dev_err(&dd->pdev->dev,
"Unable to allocate request queue\n"); "Unable to allocate request queue\n");
rv = -ENOMEM;
goto block_queue_alloc_init_error; goto block_queue_alloc_init_error;
} }

View file

@ -462,17 +462,21 @@ static int null_add_dev(void)
struct gendisk *disk; struct gendisk *disk;
struct nullb *nullb; struct nullb *nullb;
sector_t size; sector_t size;
int rv;
nullb = kzalloc_node(sizeof(*nullb), GFP_KERNEL, home_node); nullb = kzalloc_node(sizeof(*nullb), GFP_KERNEL, home_node);
if (!nullb) if (!nullb) {
rv = -ENOMEM;
goto out; goto out;
}
spin_lock_init(&nullb->lock); spin_lock_init(&nullb->lock);
if (queue_mode == NULL_Q_MQ && use_per_node_hctx) if (queue_mode == NULL_Q_MQ && use_per_node_hctx)
submit_queues = nr_online_nodes; submit_queues = nr_online_nodes;
if (setup_queues(nullb)) rv = setup_queues(nullb);
if (rv)
goto out_free_nullb; goto out_free_nullb;
if (queue_mode == NULL_Q_MQ) { if (queue_mode == NULL_Q_MQ) {
@ -484,22 +488,29 @@ static int null_add_dev(void)
nullb->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; nullb->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
nullb->tag_set.driver_data = nullb; nullb->tag_set.driver_data = nullb;
if (blk_mq_alloc_tag_set(&nullb->tag_set)) rv = blk_mq_alloc_tag_set(&nullb->tag_set);
if (rv)
goto out_cleanup_queues; goto out_cleanup_queues;
nullb->q = blk_mq_init_queue(&nullb->tag_set); nullb->q = blk_mq_init_queue(&nullb->tag_set);
if (!nullb->q) if (!nullb->q) {
rv = -ENOMEM;
goto out_cleanup_tags; goto out_cleanup_tags;
}
} else if (queue_mode == NULL_Q_BIO) { } else if (queue_mode == NULL_Q_BIO) {
nullb->q = blk_alloc_queue_node(GFP_KERNEL, home_node); nullb->q = blk_alloc_queue_node(GFP_KERNEL, home_node);
if (!nullb->q) if (!nullb->q) {
rv = -ENOMEM;
goto out_cleanup_queues; goto out_cleanup_queues;
}
blk_queue_make_request(nullb->q, null_queue_bio); blk_queue_make_request(nullb->q, null_queue_bio);
init_driver_queues(nullb); init_driver_queues(nullb);
} else { } else {
nullb->q = blk_init_queue_node(null_request_fn, &nullb->lock, home_node); nullb->q = blk_init_queue_node(null_request_fn, &nullb->lock, home_node);
if (!nullb->q) if (!nullb->q) {
rv = -ENOMEM;
goto out_cleanup_queues; goto out_cleanup_queues;
}
blk_queue_prep_rq(nullb->q, null_rq_prep_fn); blk_queue_prep_rq(nullb->q, null_rq_prep_fn);
blk_queue_softirq_done(nullb->q, null_softirq_done_fn); blk_queue_softirq_done(nullb->q, null_softirq_done_fn);
init_driver_queues(nullb); init_driver_queues(nullb);
@ -509,8 +520,10 @@ static int null_add_dev(void)
queue_flag_set_unlocked(QUEUE_FLAG_NONROT, nullb->q); queue_flag_set_unlocked(QUEUE_FLAG_NONROT, nullb->q);
disk = nullb->disk = alloc_disk_node(1, home_node); disk = nullb->disk = alloc_disk_node(1, home_node);
if (!disk) if (!disk) {
rv = -ENOMEM;
goto out_cleanup_blk_queue; goto out_cleanup_blk_queue;
}
mutex_lock(&lock); mutex_lock(&lock);
list_add_tail(&nullb->list, &nullb_list); list_add_tail(&nullb->list, &nullb_list);
@ -544,7 +557,7 @@ static int null_add_dev(void)
out_free_nullb: out_free_nullb:
kfree(nullb); kfree(nullb);
out: out:
return -ENOMEM; return rv;
} }
static int __init null_init(void) static int __init null_init(void)