X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=drivers%2Fmd%2Fraid1.c;h=23a7516abbfd42b8f530aa0e361955429242b11b;hb=54071b3808ee3dc8624d9d6f1b06c4fd5308fa3b;hp=ff7ed33359959e39747509e4a16a24a0c584b292;hpb=1f98a13f623e0ef666690a18c1250335fc6d7ef1;p=safe%2Fjmp%2Flinux-2.6 diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index ff7ed33..23a7516 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -64,7 +64,7 @@ static void * r1bio_pool_alloc(gfp_t gfp_flags, void *data) /* allocate a r1bio with room for raid_disks entries in the bios array */ r1_bio = kzalloc(size, gfp_flags); - if (!r1_bio) + if (!r1_bio && pi->mddev) unplug_slaves(pi->mddev); return r1_bio; @@ -262,7 +262,7 @@ static inline void update_head_pos(int disk, r1bio_t *r1_bio) static void raid1_end_read_request(struct bio *bio, int error) { int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); - r1bio_t * r1_bio = (r1bio_t *)(bio->bi_private); + r1bio_t *r1_bio = bio->bi_private; int mirror; conf_t *conf = r1_bio->mddev->private; @@ -307,7 +307,7 @@ static void raid1_end_read_request(struct bio *bio, int error) static void raid1_end_write_request(struct bio *bio, int error) { int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); - r1bio_t * r1_bio = (r1bio_t *)(bio->bi_private); + r1bio_t *r1_bio = bio->bi_private; int mirror, behind = test_bit(R1BIO_BehindIO, &r1_bio->state); conf_t *conf = r1_bio->mddev->private; struct bio *to_put = NULL; @@ -576,6 +576,9 @@ static int raid1_congested(void *data, int bits) conf_t *conf = mddev->private; int i, ret = 0; + if (mddev_congested(mddev, bits)) + return 1; + rcu_read_lock(); for (i = 0; i < mddev->raid_disks; i++) { mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev); @@ -674,6 +677,7 @@ static void raise_barrier(conf_t *conf) static void lower_barrier(conf_t *conf) { unsigned long flags; + BUG_ON(conf->barrier <= 0); spin_lock_irqsave(&conf->resync_lock, flags); conf->barrier--; spin_unlock_irqrestore(&conf->resync_lock, flags); @@ -798,6 +802,25 @@ static int make_request(struct request_queue *q, struct bio * bio) md_write_start(mddev, bio); /* wait on superblock update early */ + if (bio_data_dir(bio) == WRITE && + bio->bi_sector + bio->bi_size/512 > mddev->suspend_lo && + bio->bi_sector < mddev->suspend_hi) { + /* As the suspend_* range is controlled by + * userspace, we want an interruptible + * wait. + */ + DEFINE_WAIT(w); + for (;;) { + flush_signals(current); + prepare_to_wait(&conf->wait_barrier, + &w, TASK_INTERRUPTIBLE); + if (bio->bi_sector + bio->bi_size/512 <= mddev->suspend_lo || + bio->bi_sector >= mddev->suspend_hi) + break; + schedule(); + } + finish_wait(&conf->wait_barrier, &w); + } if (unlikely(!mddev->barriers_work && bio_rw_flagged(bio, BIO_RW_BARRIER))) { if (rw == WRITE) @@ -851,7 +874,7 @@ static int make_request(struct request_queue *q, struct bio * bio) read_bio->bi_sector = r1_bio->sector + mirror->rdev->data_offset; read_bio->bi_bdev = mirror->rdev->bdev; read_bio->bi_end_io = raid1_end_read_request; - read_bio->bi_rw = READ | do_sync; + read_bio->bi_rw = READ | (do_sync << BIO_RW_SYNCIO); read_bio->bi_private = r1_bio; generic_make_request(read_bio); @@ -888,9 +911,10 @@ static int make_request(struct request_queue *q, struct bio * bio) if (test_bit(Faulty, &rdev->flags)) { rdev_dec_pending(rdev, mddev); r1_bio->bios[i] = NULL; - } else + } else { r1_bio->bios[i] = bio; - targets++; + targets++; + } } else r1_bio->bios[i] = NULL; } @@ -920,7 +944,8 @@ static int make_request(struct request_queue *q, struct bio * bio) /* do behind I/O ? */ if (bitmap && - atomic_read(&bitmap->behind_writes) < bitmap->max_write_behind && + (atomic_read(&bitmap->behind_writes) + < mddev->bitmap_info.max_write_behind) && (behind_pages = alloc_behind_pages(bio)) != NULL) set_bit(R1BIO_BehindIO, &r1_bio->state); @@ -943,7 +968,8 @@ static int make_request(struct request_queue *q, struct bio * bio) mbio->bi_sector = r1_bio->sector + conf->mirrors[i].rdev->data_offset; mbio->bi_bdev = conf->mirrors[i].rdev->bdev; mbio->bi_end_io = raid1_end_write_request; - mbio->bi_rw = WRITE | do_barriers | do_sync; + mbio->bi_rw = WRITE | (do_barriers << BIO_RW_BARRIER) | + (do_sync << BIO_RW_SYNCIO); mbio->bi_private = r1_bio; if (behind_pages) { @@ -1127,13 +1153,17 @@ static int raid1_add_disk(mddev_t *mddev, mdk_rdev_t *rdev) disk_stack_limits(mddev->gendisk, rdev->bdev, rdev->data_offset << 9); - /* as we don't honour merge_bvec_fn, we must never risk - * violating it, so limit ->max_sector to one PAGE, as - * a one page request is never in violation. + /* as we don't honour merge_bvec_fn, we must + * never risk violating it, so limit + * ->max_segments to one lying with a single + * page, as a one page request is never in + * violation. */ - if (rdev->bdev->bd_disk->queue->merge_bvec_fn && - queue_max_sectors(mddev->queue) > (PAGE_SIZE>>9)) - blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9); + if (rdev->bdev->bd_disk->queue->merge_bvec_fn) { + blk_queue_max_segments(mddev->queue, 1); + blk_queue_segment_boundary(mddev->queue, + PAGE_CACHE_SIZE - 1); + } p->head_position = 0; rdev->raid_disk = mirror; @@ -1193,7 +1223,7 @@ abort: static void end_sync_read(struct bio *bio, int error) { - r1bio_t * r1_bio = (r1bio_t *)(bio->bi_private); + r1bio_t *r1_bio = bio->bi_private; int i; for (i=r1_bio->mddev->raid_disks; i--; ) @@ -1216,7 +1246,7 @@ static void end_sync_read(struct bio *bio, int error) static void end_sync_write(struct bio *bio, int error) { int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); - r1bio_t * r1_bio = (r1bio_t *)(bio->bi_private); + r1bio_t *r1_bio = bio->bi_private; mddev_t *mddev = r1_bio->mddev; conf_t *conf = mddev->private; int i; @@ -1623,7 +1653,8 @@ static void raid1d(mddev_t *mddev) conf->mirrors[i].rdev->data_offset; bio->bi_bdev = conf->mirrors[i].rdev->bdev; bio->bi_end_io = raid1_end_write_request; - bio->bi_rw = WRITE | do_sync; + bio->bi_rw = WRITE | + (do_sync << BIO_RW_SYNCIO); bio->bi_private = r1_bio; r1_bio->bios[i] = bio; generic_make_request(bio); @@ -1645,11 +1676,12 @@ static void raid1d(mddev_t *mddev) r1_bio->sector, r1_bio->sectors); unfreeze_array(conf); - } + } else + md_error(mddev, + conf->mirrors[r1_bio->read_disk].rdev); bio = r1_bio->bios[r1_bio->read_disk]; - if ((disk=read_balance(conf, r1_bio)) == -1 || - disk == r1_bio->read_disk) { + if ((disk=read_balance(conf, r1_bio)) == -1) { printk(KERN_ALERT "raid1: %s: unrecoverable I/O" " read error for block %llu\n", bdevname(bio->bi_bdev,b), @@ -1672,12 +1704,13 @@ static void raid1d(mddev_t *mddev) bio->bi_sector = r1_bio->sector + rdev->data_offset; bio->bi_bdev = rdev->bdev; bio->bi_end_io = raid1_end_read_request; - bio->bi_rw = READ | do_sync; + bio->bi_rw = READ | (do_sync << BIO_RW_SYNCIO); bio->bi_private = r1_bio; unplug = 1; generic_make_request(bio); } } + cond_resched(); } if (unplug) unplug_slaves(mddev); @@ -1934,73 +1967,48 @@ static sector_t raid1_size(mddev_t *mddev, sector_t sectors, int raid_disks) return mddev->dev_sectors; } -static int run(mddev_t *mddev) +static conf_t *setup_conf(mddev_t *mddev) { conf_t *conf; - int i, j, disk_idx; + int i; mirror_info_t *disk; mdk_rdev_t *rdev; + int err = -ENOMEM; - if (mddev->level != 1) { - printk("raid1: %s: raid level not set to mirroring (%d)\n", - mdname(mddev), mddev->level); - goto out; - } - if (mddev->reshape_position != MaxSector) { - printk("raid1: %s: reshape_position set but not supported\n", - mdname(mddev)); - goto out; - } - /* - * copy the already verified devices into our private RAID1 - * bookkeeping area. [whatever we allocate in run(), - * should be freed in stop()] - */ conf = kzalloc(sizeof(conf_t), GFP_KERNEL); - mddev->private = conf; if (!conf) - goto out_no_mem; + goto abort; conf->mirrors = kzalloc(sizeof(struct mirror_info)*mddev->raid_disks, GFP_KERNEL); if (!conf->mirrors) - goto out_no_mem; + goto abort; conf->tmppage = alloc_page(GFP_KERNEL); if (!conf->tmppage) - goto out_no_mem; + goto abort; - conf->poolinfo = kmalloc(sizeof(*conf->poolinfo), GFP_KERNEL); + conf->poolinfo = kzalloc(sizeof(*conf->poolinfo), GFP_KERNEL); if (!conf->poolinfo) - goto out_no_mem; - conf->poolinfo->mddev = mddev; + goto abort; conf->poolinfo->raid_disks = mddev->raid_disks; conf->r1bio_pool = mempool_create(NR_RAID1_BIOS, r1bio_pool_alloc, r1bio_pool_free, conf->poolinfo); if (!conf->r1bio_pool) - goto out_no_mem; + goto abort; - spin_lock_init(&conf->device_lock); - mddev->queue->queue_lock = &conf->device_lock; + conf->poolinfo->mddev = mddev; + spin_lock_init(&conf->device_lock); list_for_each_entry(rdev, &mddev->disks, same_set) { - disk_idx = rdev->raid_disk; + int disk_idx = rdev->raid_disk; if (disk_idx >= mddev->raid_disks || disk_idx < 0) continue; disk = conf->mirrors + disk_idx; disk->rdev = rdev; - disk_stack_limits(mddev->gendisk, rdev->bdev, - rdev->data_offset << 9); - /* as we don't honour merge_bvec_fn, we must never risk - * violating it, so limit ->max_sector to one PAGE, as - * a one page request is never in violation. - */ - if (rdev->bdev->bd_disk->queue->merge_bvec_fn && - queue_max_sectors(mddev->queue) > (PAGE_SIZE>>9)) - blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9); disk->head_position = 0; } @@ -2014,8 +2022,7 @@ static int run(mddev_t *mddev) bio_list_init(&conf->pending_bio_list); bio_list_init(&conf->flushing_bio_list); - - mddev->degraded = 0; + conf->last_used = -1; for (i = 0; i < conf->raid_disks; i++) { disk = conf->mirrors + i; @@ -2023,38 +2030,99 @@ static int run(mddev_t *mddev) if (!disk->rdev || !test_bit(In_sync, &disk->rdev->flags)) { disk->head_position = 0; - mddev->degraded++; if (disk->rdev) conf->fullsync = 1; - } + } else if (conf->last_used < 0) + /* + * The first working device is used as a + * starting point to read balancing. + */ + conf->last_used = i; } - if (mddev->degraded == conf->raid_disks) { + + err = -EIO; + if (conf->last_used < 0) { printk(KERN_ERR "raid1: no operational mirrors for %s\n", - mdname(mddev)); - goto out_free_conf; + mdname(mddev)); + goto abort; } - if (conf->raid_disks - mddev->degraded == 1) - mddev->recovery_cp = MaxSector; + err = -ENOMEM; + conf->thread = md_register_thread(raid1d, mddev, NULL); + if (!conf->thread) { + printk(KERN_ERR + "raid1: couldn't allocate thread for %s\n", + mdname(mddev)); + goto abort; + } + + return conf; + + abort: + if (conf) { + if (conf->r1bio_pool) + mempool_destroy(conf->r1bio_pool); + kfree(conf->mirrors); + safe_put_page(conf->tmppage); + kfree(conf->poolinfo); + kfree(conf); + } + return ERR_PTR(err); +} +static int run(mddev_t *mddev) +{ + conf_t *conf; + int i; + mdk_rdev_t *rdev; + + if (mddev->level != 1) { + printk("raid1: %s: raid level not set to mirroring (%d)\n", + mdname(mddev), mddev->level); + return -EIO; + } + if (mddev->reshape_position != MaxSector) { + printk("raid1: %s: reshape_position set but not supported\n", + mdname(mddev)); + return -EIO; + } /* - * find the first working one and use it as a starting point - * to read balancing. + * copy the already verified devices into our private RAID1 + * bookkeeping area. [whatever we allocate in run(), + * should be freed in stop()] */ - for (j = 0; j < conf->raid_disks && - (!conf->mirrors[j].rdev || - !test_bit(In_sync, &conf->mirrors[j].rdev->flags)) ; j++) - /* nothing */; - conf->last_used = j; + if (mddev->private == NULL) + conf = setup_conf(mddev); + else + conf = mddev->private; + if (IS_ERR(conf)) + return PTR_ERR(conf); - mddev->thread = md_register_thread(raid1d, mddev, "%s_raid1"); - if (!mddev->thread) { - printk(KERN_ERR - "raid1: couldn't allocate thread for %s\n", - mdname(mddev)); - goto out_free_conf; + mddev->queue->queue_lock = &conf->device_lock; + list_for_each_entry(rdev, &mddev->disks, same_set) { + disk_stack_limits(mddev->gendisk, rdev->bdev, + rdev->data_offset << 9); + /* as we don't honour merge_bvec_fn, we must never risk + * violating it, so limit ->max_segments to 1 lying within + * a single page, as a one page request is never in violation. + */ + if (rdev->bdev->bd_disk->queue->merge_bvec_fn) { + blk_queue_max_segments(mddev->queue, 1); + blk_queue_segment_boundary(mddev->queue, + PAGE_CACHE_SIZE - 1); + } } + mddev->degraded = 0; + for (i=0; i < conf->raid_disks; i++) + if (conf->mirrors[i].rdev == NULL || + !test_bit(In_sync, &conf->mirrors[i].rdev->flags) || + test_bit(Faulty, &conf->mirrors[i].rdev->flags)) + mddev->degraded++; + + if (conf->raid_disks - mddev->degraded == 1) + mddev->recovery_cp = MaxSector; + if (mddev->recovery_cp != MaxSector) printk(KERN_NOTICE "raid1: %s is not clean" " -- starting background reconstruction\n", @@ -2063,9 +2131,14 @@ static int run(mddev_t *mddev) "raid1: raid set %s active with %d out of %d mirrors\n", mdname(mddev), mddev->raid_disks - mddev->degraded, mddev->raid_disks); + /* * Ok, everything is just fine now */ + mddev->thread = conf->thread; + conf->thread = NULL; + mddev->private = conf; + md_set_array_sectors(mddev, raid1_size(mddev, 0, 0)); mddev->queue->unplug_fn = raid1_unplug; @@ -2073,23 +2146,6 @@ static int run(mddev_t *mddev) mddev->queue->backing_dev_info.congested_data = mddev; md_integrity_register(mddev); return 0; - -out_no_mem: - printk(KERN_ERR "raid1: couldn't allocate memory for %s\n", - mdname(mddev)); - -out_free_conf: - if (conf) { - if (conf->r1bio_pool) - mempool_destroy(conf->r1bio_pool); - kfree(conf->mirrors); - safe_put_page(conf->tmppage); - kfree(conf->poolinfo); - kfree(conf); - mddev->private = NULL; - } -out: - return -EIO; } static int stop(mddev_t *mddev) @@ -2263,6 +2319,9 @@ static void raid1_quiesce(mddev_t *mddev, int state) conf_t *conf = mddev->private; switch(state) { + case 2: /* wake for suspend */ + wake_up(&conf->wait_barrier); + break; case 1: raise_barrier(conf); break; @@ -2272,6 +2331,23 @@ static void raid1_quiesce(mddev_t *mddev, int state) } } +static void *raid1_takeover(mddev_t *mddev) +{ + /* raid1 can take over: + * raid5 with 2 devices, any layout or chunk size + */ + if (mddev->level == 5 && mddev->raid_disks == 2) { + conf_t *conf; + mddev->new_level = 1; + mddev->new_layout = 0; + mddev->new_chunk_sectors = 0; + conf = setup_conf(mddev); + if (!IS_ERR(conf)) + conf->barrier = 1; + return conf; + } + return ERR_PTR(-EINVAL); +} static struct mdk_personality raid1_personality = { @@ -2291,6 +2367,7 @@ static struct mdk_personality raid1_personality = .size = raid1_size, .check_reshape = raid1_reshape, .quiesce = raid1_quiesce, + .takeover = raid1_takeover, }; static int __init raid_init(void) @@ -2306,6 +2383,7 @@ static void raid_exit(void) module_init(raid_init); module_exit(raid_exit); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("RAID1 (mirroring) personality for MD"); MODULE_ALIAS("md-personality-3"); /* RAID1 */ MODULE_ALIAS("md-raid1"); MODULE_ALIAS("md-level-1");