X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=drivers%2Fmd%2Fmultipath.c;h=ee7646f974a07165bd368deb3a18f6390e5553dc;hb=ab9122cd3377c9eee85380ea2fe35125c6962a87;hp=2d2ca7fa0265261a71e2493e88b68403cc902c57;hpb=990a8baf568ca1d0ae65e59783ff821794118d07;p=safe%2Fjmp%2Flinux-2.6 diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c index 2d2ca7f..ee7646f 100644 --- a/drivers/md/multipath.c +++ b/drivers/md/multipath.c @@ -19,39 +19,17 @@ * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ -#include -#include -#include -#include -#include -#include - -#define MAJOR_NR MD_MAJOR -#define MD_DRIVER -#define MD_PERSONALITY +#include +#include +#include +#include "md.h" +#include "multipath.h" #define MAX_WORK_PER_DISK 128 #define NR_RESERVED_BUFS 32 -static mdk_personality_t multipath_personality; - - -static void *mp_pool_alloc(unsigned int __nocast gfp_flags, void *data) -{ - struct multipath_bh *mpb; - mpb = kmalloc(sizeof(*mpb), gfp_flags); - if (mpb) - memset(mpb, 0, sizeof(*mpb)); - return mpb; -} - -static void mp_pool_free(void *mpb, void *data) -{ - kfree(mpb); -} - static int multipath_map (multipath_conf_t *conf) { int i, disks = conf->raid_disks; @@ -63,8 +41,8 @@ static int multipath_map (multipath_conf_t *conf) rcu_read_lock(); for (i = 0; i < disks; i++) { - mdk_rdev_t *rdev = conf->multipaths[i].rdev; - if (rdev && rdev->in_sync) { + mdk_rdev_t *rdev = rcu_dereference(conf->multipaths[i].rdev); + if (rdev && test_bit(In_sync, &rdev->flags)) { atomic_inc(&rdev->nr_pending); rcu_read_unlock(); return i; @@ -80,7 +58,7 @@ static void multipath_reschedule_retry (struct multipath_bh *mp_bh) { unsigned long flags; mddev_t *mddev = mp_bh->mddev; - multipath_conf_t *conf = mddev_to_conf(mddev); + multipath_conf_t *conf = mddev->private; spin_lock_irqsave(&conf->device_lock, flags); list_add(&mp_bh->retry_list, &conf->retry_list); @@ -97,26 +75,22 @@ static void multipath_reschedule_retry (struct multipath_bh *mp_bh) static void multipath_end_bh_io (struct multipath_bh *mp_bh, int err) { struct bio *bio = mp_bh->master_bio; - multipath_conf_t *conf = mddev_to_conf(mp_bh->mddev); + multipath_conf_t *conf = mp_bh->mddev->private; - bio_endio(bio, bio->bi_size, err); + bio_endio(bio, err); mempool_free(mp_bh, conf->pool); } -static int multipath_end_request(struct bio *bio, unsigned int bytes_done, - int error) +static void multipath_end_request(struct bio *bio, int error) { int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); struct multipath_bh * mp_bh = (struct multipath_bh *)(bio->bi_private); - multipath_conf_t *conf = mddev_to_conf(mp_bh->mddev); + multipath_conf_t *conf = mp_bh->mddev->private; mdk_rdev_t *rdev = conf->multipaths[mp_bh->path].rdev; - if (bio->bi_size) - return 1; - if (uptodate) multipath_end_bh_io(mp_bh, 0); - else if (!bio_rw_ahead(bio)) { + else if (!bio_rw_flagged(bio, BIO_RW_AHEAD)) { /* * oops, IO error: */ @@ -129,25 +103,24 @@ static int multipath_end_request(struct bio *bio, unsigned int bytes_done, } else multipath_end_bh_io(mp_bh, error); rdev_dec_pending(rdev, conf->mddev); - return 0; } static void unplug_slaves(mddev_t *mddev) { - multipath_conf_t *conf = mddev_to_conf(mddev); + multipath_conf_t *conf = mddev->private; int i; rcu_read_lock(); for (i=0; iraid_disks; i++) { - mdk_rdev_t *rdev = conf->multipaths[i].rdev; - if (rdev && !rdev->faulty && atomic_read(&rdev->nr_pending)) { - request_queue_t *r_queue = bdev_get_queue(rdev->bdev); + mdk_rdev_t *rdev = rcu_dereference(conf->multipaths[i].rdev); + if (rdev && !test_bit(Faulty, &rdev->flags) + && atomic_read(&rdev->nr_pending)) { + struct request_queue *r_queue = bdev_get_queue(rdev->bdev); atomic_inc(&rdev->nr_pending); rcu_read_unlock(); - if (r_queue->unplug_fn) - r_queue->unplug_fn(r_queue); + blk_unplug(r_queue); rdev_dec_pending(rdev, mddev); rcu_read_lock(); @@ -156,35 +129,40 @@ static void unplug_slaves(mddev_t *mddev) rcu_read_unlock(); } -static void multipath_unplug(request_queue_t *q) +static void multipath_unplug(struct request_queue *q) { unplug_slaves(q->queuedata); } -static int multipath_make_request (request_queue_t *q, struct bio * bio) +static int multipath_make_request (struct request_queue *q, struct bio * bio) { mddev_t *mddev = q->queuedata; - multipath_conf_t *conf = mddev_to_conf(mddev); + multipath_conf_t *conf = mddev->private; struct multipath_bh * mp_bh; struct multipath_info *multipath; + const int rw = bio_data_dir(bio); + int cpu; + + if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER))) { + bio_endio(bio, -EOPNOTSUPP); + return 0; + } mp_bh = mempool_alloc(conf->pool, GFP_NOIO); mp_bh->master_bio = bio; mp_bh->mddev = mddev; - if (bio_data_dir(bio)==WRITE) { - disk_stat_inc(mddev->gendisk, writes); - disk_stat_add(mddev->gendisk, write_sectors, bio_sectors(bio)); - } else { - disk_stat_inc(mddev->gendisk, reads); - disk_stat_add(mddev->gendisk, read_sectors, bio_sectors(bio)); - } + cpu = part_stat_lock(); + part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]); + part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw], + bio_sectors(bio)); + part_stat_unlock(); mp_bh->path = multipath_map(conf); if (mp_bh->path < 0) { - bio_endio(bio, bio->bi_size, -EIO); + bio_endio(bio, -EIO); mempool_free(mp_bh, conf->pool); return 0; } @@ -193,7 +171,7 @@ static int multipath_make_request (request_queue_t *q, struct bio * bio) mp_bh->bio = *bio; mp_bh->bio.bi_sector += multipath->rdev->data_offset; mp_bh->bio.bi_bdev = multipath->rdev->bdev; - mp_bh->bio.bi_rw |= (1 << BIO_RW_FAILFAST); + mp_bh->bio.bi_rw |= (1 << BIO_RW_FAILFAST_TRANSPORT); mp_bh->bio.bi_end_io = multipath_end_request; mp_bh->bio.bi_private = mp_bh; generic_make_request(&mp_bh->bio); @@ -202,7 +180,7 @@ static int multipath_make_request (request_queue_t *q, struct bio * bio) static void multipath_status (struct seq_file *seq, mddev_t *mddev) { - multipath_conf_t *conf = mddev_to_conf(mddev); + multipath_conf_t *conf = mddev->private; int i; seq_printf (seq, " [%d/%d] [", conf->raid_disks, @@ -210,34 +188,30 @@ static void multipath_status (struct seq_file *seq, mddev_t *mddev) for (i = 0; i < conf->raid_disks; i++) seq_printf (seq, "%s", conf->multipaths[i].rdev && - conf->multipaths[i].rdev->in_sync ? "U" : "_"); + test_bit(In_sync, &conf->multipaths[i].rdev->flags) ? "U" : "_"); seq_printf (seq, "]"); } -static int multipath_issue_flush(request_queue_t *q, struct gendisk *disk, - sector_t *error_sector) +static int multipath_congested(void *data, int bits) { - mddev_t *mddev = q->queuedata; - multipath_conf_t *conf = mddev_to_conf(mddev); + mddev_t *mddev = data; + multipath_conf_t *conf = mddev->private; int i, ret = 0; + if (mddev_congested(mddev, bits)) + return 1; + rcu_read_lock(); - for (i=0; iraid_disks && ret == 0; i++) { - mdk_rdev_t *rdev = conf->multipaths[i].rdev; - if (rdev && !rdev->faulty) { - struct block_device *bdev = rdev->bdev; - request_queue_t *r_queue = bdev_get_queue(bdev); - - if (!r_queue->issue_flush_fn) - ret = -EOPNOTSUPP; - else { - atomic_inc(&rdev->nr_pending); - rcu_read_unlock(); - ret = r_queue->issue_flush_fn(r_queue, bdev->bd_disk, - error_sector); - rdev_dec_pending(rdev, mddev); - rcu_read_lock(); - } + for (i = 0; i < mddev->raid_disks ; i++) { + mdk_rdev_t *rdev = rcu_dereference(conf->multipaths[i].rdev); + if (rdev && !test_bit(Faulty, &rdev->flags)) { + struct request_queue *q = bdev_get_queue(rdev->bdev); + + ret |= bdi_congested(&q->backing_dev_info, bits); + /* Just like multipath_map, we just check the + * first available device + */ + break; } } rcu_read_unlock(); @@ -249,7 +223,7 @@ static int multipath_issue_flush(request_queue_t *q, struct gendisk *disk, */ static void multipath_error (mddev_t *mddev, mdk_rdev_t *rdev) { - multipath_conf_t *conf = mddev_to_conf(mddev); + multipath_conf_t *conf = mddev->private; if (conf->working_disks <= 1) { /* @@ -264,14 +238,16 @@ static void multipath_error (mddev_t *mddev, mdk_rdev_t *rdev) /* * Mark disk as unusable */ - if (!rdev->faulty) { + if (!test_bit(Faulty, &rdev->flags)) { char b[BDEVNAME_SIZE]; - rdev->in_sync = 0; - rdev->faulty = 1; - mddev->sb_dirty = 1; + clear_bit(In_sync, &rdev->flags); + set_bit(Faulty, &rdev->flags); + set_bit(MD_CHANGE_DEVS, &mddev->flags); conf->working_disks--; + mddev->degraded++; printk(KERN_ALERT "multipath: IO failure on %s," - " disabling IO path. \n Operation continuing" + " disabling IO path.\n" + "multipath: Operation continuing" " on %d IO paths.\n", bdevname (rdev->bdev,b), conf->working_disks); @@ -297,7 +273,7 @@ static void print_multipath_conf (multipath_conf_t *conf) tmp = conf->multipaths + i; if (tmp->rdev) printk(" disk%d, o:%d, dev:%s\n", - i,!tmp->rdev->faulty, + i,!test_bit(Faulty, &tmp->rdev->flags), bdevname(tmp->rdev->bdev,b)); } } @@ -306,16 +282,23 @@ static void print_multipath_conf (multipath_conf_t *conf) static int multipath_add_disk(mddev_t *mddev, mdk_rdev_t *rdev) { multipath_conf_t *conf = mddev->private; - int found = 0; + struct request_queue *q; + int err = -EEXIST; int path; struct multipath_info *p; + int first = 0; + int last = mddev->raid_disks - 1; + + if (rdev->raid_disk >= 0) + first = last = rdev->raid_disk; print_multipath_conf(conf); - for (path=0; pathraid_disks; path++) + for (path = first; path <= last; path++) if ((p=conf->multipaths+path)->rdev == NULL) { - blk_queue_stack_limits(mddev->queue, - rdev->bdev->bd_disk->queue); + q = rdev->bdev->bd_disk->queue; + disk_stack_limits(mddev->gendisk, rdev->bdev, + rdev->data_offset << 9); /* as we don't honour merge_bvec_fn, we must never risk * violating it, so limit ->max_sector to one PAGE, as @@ -323,19 +306,23 @@ static int multipath_add_disk(mddev_t *mddev, mdk_rdev_t *rdev) * (Note: it is very unlikely that a device with * merge_bvec_fn will be involved in multipath.) */ - if (rdev->bdev->bd_disk->queue->merge_bvec_fn && - mddev->queue->max_sectors > (PAGE_SIZE>>9)) + if (q->merge_bvec_fn && + queue_max_sectors(q) > (PAGE_SIZE>>9)) blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9); conf->working_disks++; + mddev->degraded--; rdev->raid_disk = path; - rdev->in_sync = 1; - p->rdev = rdev; - found = 1; + set_bit(In_sync, &rdev->flags); + rcu_assign_pointer(p->rdev, rdev); + err = 0; + md_integrity_add_rdev(rdev, mddev); + break; } print_multipath_conf(conf); - return found; + + return err; } static int multipath_remove_disk(mddev_t *mddev, int number) @@ -349,9 +336,10 @@ static int multipath_remove_disk(mddev_t *mddev, int number) rdev = p->rdev; if (rdev) { - if (rdev->in_sync || + if (test_bit(In_sync, &rdev->flags) || atomic_read(&rdev->nr_pending)) { - printk(KERN_ERR "hot-remove-disk, slot %d is identified" " but is still operational!\n", number); + printk(KERN_ERR "hot-remove-disk, slot %d is identified" + " but is still operational!\n", number); err = -EBUSY; goto abort; } @@ -361,7 +349,9 @@ static int multipath_remove_disk(mddev_t *mddev, int number) /* lost the race, try later */ err = -EBUSY; p->rdev = rdev; + goto abort; } + md_integrity_register(mddev); } abort: @@ -384,7 +374,7 @@ static void multipathd (mddev_t *mddev) struct multipath_bh *mp_bh; struct bio *bio; unsigned long flags; - multipath_conf_t *conf = mddev_to_conf(mddev); + multipath_conf_t *conf = mddev->private; struct list_head *head = &conf->retry_list; md_check_recovery(mddev); @@ -414,7 +404,7 @@ static void multipathd (mddev_t *mddev) *bio = *(mp_bh->master_bio); bio->bi_sector += conf->multipaths[mp_bh->path].rdev->data_offset; bio->bi_bdev = conf->multipaths[mp_bh->path].rdev->bdev; - bio->bi_rw |= (1 << BIO_RW_FAILFAST); + bio->bi_rw |= (1 << BIO_RW_FAILFAST_TRANSPORT); bio->bi_end_io = multipath_end_request; bio->bi_private = mp_bh; generic_make_request(bio); @@ -423,13 +413,23 @@ static void multipathd (mddev_t *mddev) spin_unlock_irqrestore(&conf->device_lock, flags); } +static sector_t multipath_size(mddev_t *mddev, sector_t sectors, int raid_disks) +{ + WARN_ONCE(sectors || raid_disks, + "%s does not support generic reshape\n", __func__); + + return mddev->dev_sectors; +} + static int multipath_run (mddev_t *mddev) { multipath_conf_t *conf; int disk_idx; struct multipath_info *disk; mdk_rdev_t *rdev; - struct list_head *tmp; + + if (md_check_no_bitmap(mddev)) + return -EINVAL; if (mddev->level != LEVEL_MULTIPATH) { printk("multipath: %s: raid level not set to multipath IO (%d)\n", @@ -441,8 +441,9 @@ static int multipath_run (mddev_t *mddev) * bookkeeping area. [whatever we allocate in multipath_run(), * should be freed in multipath_stop()] */ + mddev->queue->queue_lock = &mddev->queue->__queue_lock; - conf = kmalloc(sizeof(multipath_conf_t), GFP_KERNEL); + conf = kzalloc(sizeof(multipath_conf_t), GFP_KERNEL); mddev->private = conf; if (!conf) { printk(KERN_ERR @@ -450,9 +451,8 @@ static int multipath_run (mddev_t *mddev) mdname(mddev)); goto out; } - memset(conf, 0, sizeof(*conf)); - conf->multipaths = kmalloc(sizeof(struct multipath_info)*mddev->raid_disks, + conf->multipaths = kzalloc(sizeof(struct multipath_info)*mddev->raid_disks, GFP_KERNEL); if (!conf->multipaths) { printk(KERN_ERR @@ -460,10 +460,9 @@ static int multipath_run (mddev_t *mddev) mdname(mddev)); goto out_free_conf; } - memset(conf->multipaths, 0, sizeof(struct multipath_info)*mddev->raid_disks); conf->working_disks = 0; - ITERATE_RDEV(mddev,rdev,tmp) { + list_for_each_entry(rdev, &mddev->disks, same_set) { disk_idx = rdev->raid_disk; if (disk_idx < 0 || disk_idx >= mddev->raid_disks) @@ -471,22 +470,21 @@ static int multipath_run (mddev_t *mddev) disk = conf->multipaths + disk_idx; disk->rdev = rdev; + disk_stack_limits(mddev->gendisk, rdev->bdev, + rdev->data_offset << 9); - blk_queue_stack_limits(mddev->queue, - rdev->bdev->bd_disk->queue); /* as we don't honour merge_bvec_fn, we must never risk * violating it, not that we ever expect a device with * a merge_bvec_fn to be involved in multipath */ if (rdev->bdev->bd_disk->queue->merge_bvec_fn && - mddev->queue->max_sectors > (PAGE_SIZE>>9)) + queue_max_sectors(mddev->queue) > (PAGE_SIZE>>9)) blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9); - if (!rdev->faulty) + if (!test_bit(Faulty, &rdev->flags)) conf->working_disks++; } conf->raid_disks = mddev->raid_disks; - mddev->sb_dirty = 1; conf->mddev = mddev; spin_lock_init(&conf->device_lock); INIT_LIST_HEAD(&conf->retry_list); @@ -496,11 +494,10 @@ static int multipath_run (mddev_t *mddev) mdname(mddev)); goto out_free_conf; } - mddev->degraded = conf->raid_disks = conf->working_disks; + mddev->degraded = conf->raid_disks - conf->working_disks; - conf->pool = mempool_create(NR_RESERVED_BUFS, - mp_pool_alloc, mp_pool_free, - NULL); + conf->pool = mempool_create_kmalloc_pool(NR_RESERVED_BUFS, + sizeof(struct multipath_bh)); if (conf->pool == NULL) { printk(KERN_ERR "multipath: couldn't allocate memory for %s\n", @@ -509,7 +506,7 @@ static int multipath_run (mddev_t *mddev) } { - mddev->thread = md_register_thread(multipathd, mddev, "%s_multipath"); + mddev->thread = md_register_thread(multipathd, mddev, NULL); if (!mddev->thread) { printk(KERN_ERR "multipath: couldn't allocate thread" " for %s\n", mdname(mddev)); @@ -523,11 +520,12 @@ static int multipath_run (mddev_t *mddev) /* * Ok, everything is just fine now */ - mddev->array_size = mddev->size; + md_set_array_sectors(mddev, multipath_size(mddev, 0, 0)); mddev->queue->unplug_fn = multipath_unplug; - mddev->queue->issue_flush_fn = multipath_issue_flush; - + mddev->queue->backing_dev_info.congested_fn = multipath_congested; + mddev->queue->backing_dev_info.congested_data = mddev; + md_integrity_register(mddev); return 0; out_free_conf: @@ -543,7 +541,7 @@ out: static int multipath_stop (mddev_t *mddev) { - multipath_conf_t *conf = mddev_to_conf(mddev); + multipath_conf_t *conf = mddev->private; md_unregister_thread(mddev->thread); mddev->thread = NULL; @@ -555,9 +553,10 @@ static int multipath_stop (mddev_t *mddev) return 0; } -static mdk_personality_t multipath_personality= +static struct mdk_personality multipath_personality = { .name = "multipath", + .level = LEVEL_MULTIPATH, .owner = THIS_MODULE, .make_request = multipath_make_request, .run = multipath_run, @@ -566,19 +565,22 @@ static mdk_personality_t multipath_personality= .error_handler = multipath_error, .hot_add_disk = multipath_add_disk, .hot_remove_disk= multipath_remove_disk, + .size = multipath_size, }; static int __init multipath_init (void) { - return register_md_personality (MULTIPATH, &multipath_personality); + return register_md_personality (&multipath_personality); } static void __exit multipath_exit (void) { - unregister_md_personality (MULTIPATH); + unregister_md_personality (&multipath_personality); } module_init(multipath_init); module_exit(multipath_exit); MODULE_LICENSE("GPL"); MODULE_ALIAS("md-personality-7"); /* MULTIPATH */ +MODULE_ALIAS("md-multipath"); +MODULE_ALIAS("md-level--4");