X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=drivers%2Fmd%2Fraid10.c;h=e2766d8251a184a6c28a67430ca34c7e68d0df67;hb=acc4e4168f87c05be31c3259ee0cb6262473c8ea;hp=e1feb87afc6c827027d193e8e50003e0b1ca316b;hpb=78200d45cde2a79c0d0ae0407883bb264caa3c18;p=safe%2Fjmp%2Flinux-2.6 diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index e1feb87..e2766d8 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -18,10 +18,13 @@ * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ -#include "dm-bio-list.h" +#include #include -#include -#include +#include +#include +#include "md.h" +#include "raid10.h" +#include "bitmap.h" /* * RAID10 provides a combination of RAID0 and RAID1 functionality. @@ -66,7 +69,7 @@ static void * r10bio_pool_alloc(gfp_t gfp_flags, void *data) /* allocate a r10bio with room for raid_disks entries in the bios array */ r10_bio = kzalloc(size, gfp_flags); - if (!r10_bio) + if (!r10_bio && conf->mddev) unplug_slaves(conf->mddev); return r10_bio; @@ -186,7 +189,7 @@ static void put_all_bios(conf_t *conf, r10bio_t *r10_bio) static void free_r10bio(r10bio_t *r10_bio) { - conf_t *conf = mddev_to_conf(r10_bio->mddev); + conf_t *conf = r10_bio->mddev->private; /* * Wake up any possible resync thread that waits for the device @@ -200,7 +203,7 @@ static void free_r10bio(r10bio_t *r10_bio) static void put_buf(r10bio_t *r10_bio) { - conf_t *conf = mddev_to_conf(r10_bio->mddev); + conf_t *conf = r10_bio->mddev->private; mempool_free(r10_bio, conf->r10buf_pool); @@ -211,7 +214,7 @@ static void reschedule_retry(r10bio_t *r10_bio) { unsigned long flags; mddev_t *mddev = r10_bio->mddev; - conf_t *conf = mddev_to_conf(mddev); + conf_t *conf = mddev->private; spin_lock_irqsave(&conf->device_lock, flags); list_add(&r10_bio->retry_list, &conf->retry_list); @@ -243,7 +246,7 @@ static void raid_end_bio_io(r10bio_t *r10_bio) */ static inline void update_head_pos(int slot, r10bio_t *r10_bio) { - conf_t *conf = mddev_to_conf(r10_bio->mddev); + conf_t *conf = r10_bio->mddev->private; conf->mirrors[r10_bio->devs[slot].devnum].head_position = r10_bio->devs[slot].addr + (r10_bio->sectors); @@ -254,7 +257,7 @@ static void raid10_end_read_request(struct bio *bio, int error) int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); r10bio_t * r10_bio = (r10bio_t *)(bio->bi_private); int slot, dev; - conf_t *conf = mddev_to_conf(r10_bio->mddev); + conf_t *conf = r10_bio->mddev->private; slot = r10_bio->read_slot; @@ -295,7 +298,7 @@ static void raid10_end_write_request(struct bio *bio, int error) int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); r10bio_t * r10_bio = (r10bio_t *)(bio->bi_private); int slot, dev; - conf_t *conf = mddev_to_conf(r10_bio->mddev); + conf_t *conf = r10_bio->mddev->private; for (slot = 0; slot < conf->copies; slot++) if (r10_bio->devs[slot].bio == bio) @@ -459,7 +462,7 @@ static int raid10_mergeable_bvec(struct request_queue *q, mddev_t *mddev = q->queuedata; sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev); int max; - unsigned int chunk_sectors = mddev->chunk_size >> 9; + unsigned int chunk_sectors = mddev->chunk_sectors; unsigned int bio_sectors = bvm->bi_size >> 9; max = (chunk_sectors - ((sector & (chunk_sectors - 1)) + bio_sectors)) << 9; @@ -594,7 +597,7 @@ rb_out: static void unplug_slaves(mddev_t *mddev) { - conf_t *conf = mddev_to_conf(mddev); + conf_t *conf = mddev->private; int i; rcu_read_lock(); @@ -626,9 +629,11 @@ static void raid10_unplug(struct request_queue *q) static int raid10_congested(void *data, int bits) { mddev_t *mddev = data; - conf_t *conf = mddev_to_conf(mddev); + conf_t *conf = mddev->private; int i, ret = 0; + if (mddev_congested(mddev, bits)) + return 1; rcu_read_lock(); for (i = 0; i < mddev->raid_disks && ret == 0; i++) { mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev); @@ -786,7 +791,7 @@ static void unfreeze_array(conf_t *conf) static int make_request(struct request_queue *q, struct bio * bio) { mddev_t *mddev = q->queuedata; - conf_t *conf = mddev_to_conf(mddev); + conf_t *conf = mddev->private; mirror_info_t *mirror; r10bio_t *r10_bio; struct bio *read_bio; @@ -794,13 +799,13 @@ static int make_request(struct request_queue *q, struct bio * bio) int i; int chunk_sects = conf->chunk_mask + 1; const int rw = bio_data_dir(bio); - const int do_sync = bio_sync(bio); + const bool do_sync = bio_rw_flagged(bio, BIO_RW_SYNCIO); struct bio_list bl; unsigned long flags; mdk_rdev_t *blocked_rdev; - if (unlikely(bio_barrier(bio))) { - bio_endio(bio, -EOPNOTSUPP); + if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER))) { + md_barrier_request(mddev, bio); return 0; } @@ -880,7 +885,7 @@ static int make_request(struct request_queue *q, struct bio * bio) mirror->rdev->data_offset; read_bio->bi_bdev = mirror->rdev->bdev; read_bio->bi_end_io = raid10_end_read_request; - read_bio->bi_rw = READ | do_sync; + read_bio->bi_rw = READ | (do_sync << BIO_RW_SYNCIO); read_bio->bi_private = r10_bio; generic_make_request(read_bio); @@ -948,7 +953,7 @@ static int make_request(struct request_queue *q, struct bio * bio) conf->mirrors[d].rdev->data_offset; mbio->bi_bdev = conf->mirrors[d].rdev->bdev; mbio->bi_end_io = raid10_end_write_request; - mbio->bi_rw = WRITE | do_sync; + mbio->bi_rw = WRITE | (do_sync << BIO_RW_SYNCIO); mbio->bi_private = r10_bio; atomic_inc(&r10_bio->remaining); @@ -979,11 +984,11 @@ static int make_request(struct request_queue *q, struct bio * bio) static void status(struct seq_file *seq, mddev_t *mddev) { - conf_t *conf = mddev_to_conf(mddev); + conf_t *conf = mddev->private; int i; if (conf->near_copies < conf->raid_disks) - seq_printf(seq, " %dK chunks", mddev->chunk_size/1024); + seq_printf(seq, " %dK chunks", mddev->chunk_sectors / 2); if (conf->near_copies > 1) seq_printf(seq, " %d near-copies", conf->near_copies); if (conf->far_copies > 1) { @@ -1004,7 +1009,7 @@ static void status(struct seq_file *seq, mddev_t *mddev) static void error(mddev_t *mddev, mdk_rdev_t *rdev) { char b[BDEVNAME_SIZE]; - conf_t *conf = mddev_to_conf(mddev); + conf_t *conf = mddev->private; /* * If it is not operational, then we have already marked it as dead @@ -1149,15 +1154,19 @@ static int raid10_add_disk(mddev_t *mddev, mdk_rdev_t *rdev) for ( ; mirror <= last ; mirror++) if ( !(p=conf->mirrors+mirror)->rdev) { - blk_queue_stack_limits(mddev->queue, - rdev->bdev->bd_disk->queue); - /* as we don't honour merge_bvec_fn, we must never risk - * violating it, so limit ->max_sector to one PAGE, as - * a one page request is never in violation. + disk_stack_limits(mddev->gendisk, rdev->bdev, + rdev->data_offset << 9); + /* as we don't honour merge_bvec_fn, we must + * never risk violating it, so limit + * ->max_segments to one lying with a single + * page, as a one page request is never in + * violation. */ - if (rdev->bdev->bd_disk->queue->merge_bvec_fn && - mddev->queue->max_sectors > (PAGE_SIZE>>9)) - mddev->queue->max_sectors = (PAGE_SIZE>>9); + if (rdev->bdev->bd_disk->queue->merge_bvec_fn) { + blk_queue_max_segments(mddev->queue, 1); + blk_queue_segment_boundary(mddev->queue, + PAGE_CACHE_SIZE - 1); + } p->head_position = 0; rdev->raid_disk = mirror; @@ -1168,6 +1177,7 @@ static int raid10_add_disk(mddev_t *mddev, mdk_rdev_t *rdev) break; } + md_integrity_add_rdev(rdev, mddev); print_conf(conf); return err; } @@ -1201,7 +1211,9 @@ static int raid10_remove_disk(mddev_t *mddev, int number) /* lost the race, try later */ err = -EBUSY; p->rdev = rdev; + goto abort; } + md_integrity_register(mddev); } abort: @@ -1213,7 +1225,7 @@ abort: static void end_sync_read(struct bio *bio, int error) { r10bio_t * r10_bio = (r10bio_t *)(bio->bi_private); - conf_t *conf = mddev_to_conf(r10_bio->mddev); + conf_t *conf = r10_bio->mddev->private; int i,d; for (i=0; icopies; i++) @@ -1236,6 +1248,7 @@ static void end_sync_read(struct bio *bio, int error) /* for reconstruct, we always reschedule after a read. * for resync, only after all reads */ + rdev_dec_pending(conf->mirrors[d].rdev, conf->mddev); if (test_bit(R10BIO_IsRecover, &r10_bio->state) || atomic_dec_and_test(&r10_bio->remaining)) { /* we have read all the blocks, @@ -1243,7 +1256,6 @@ static void end_sync_read(struct bio *bio, int error) */ reschedule_retry(r10_bio); } - rdev_dec_pending(conf->mirrors[d].rdev, conf->mddev); } static void end_sync_write(struct bio *bio, int error) @@ -1251,7 +1263,7 @@ static void end_sync_write(struct bio *bio, int error) int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); r10bio_t * r10_bio = (r10bio_t *)(bio->bi_private); mddev_t *mddev = r10_bio->mddev; - conf_t *conf = mddev_to_conf(mddev); + conf_t *conf = mddev->private; int i,d; for (i = 0; i < conf->copies; i++) @@ -1264,11 +1276,13 @@ static void end_sync_write(struct bio *bio, int error) update_head_pos(i, r10_bio); + rdev_dec_pending(conf->mirrors[d].rdev, mddev); while (atomic_dec_and_test(&r10_bio->remaining)) { if (r10_bio->master_bio == NULL) { /* the primary of several recovery bios */ - md_done_sync(mddev, r10_bio->sectors, 1); + sector_t s = r10_bio->sectors; put_buf(r10_bio); + md_done_sync(mddev, s, 1); break; } else { r10bio_t *r10_bio2 = (r10bio_t *)r10_bio->master_bio; @@ -1276,7 +1290,6 @@ static void end_sync_write(struct bio *bio, int error) r10_bio = r10_bio2; } } - rdev_dec_pending(conf->mirrors[d].rdev, mddev); } /* @@ -1297,7 +1310,7 @@ static void end_sync_write(struct bio *bio, int error) */ static void sync_request_write(mddev_t *mddev, r10bio_t *r10_bio) { - conf_t *conf = mddev_to_conf(mddev); + conf_t *conf = mddev->private; int i, first; struct bio *tbio, *fbio; @@ -1397,7 +1410,7 @@ done: static void recovery_request_write(mddev_t *mddev, r10bio_t *r10_bio) { - conf_t *conf = mddev_to_conf(mddev); + conf_t *conf = mddev->private; int i, d; struct bio *bio, *wbio; @@ -1424,6 +1437,43 @@ static void recovery_request_write(mddev_t *mddev, r10bio_t *r10_bio) /* + * Used by fix_read_error() to decay the per rdev read_errors. + * We halve the read error count for every hour that has elapsed + * since the last recorded read error. + * + */ +static void check_decay_read_errors(mddev_t *mddev, mdk_rdev_t *rdev) +{ + struct timespec cur_time_mon; + unsigned long hours_since_last; + unsigned int read_errors = atomic_read(&rdev->read_errors); + + ktime_get_ts(&cur_time_mon); + + if (rdev->last_read_error.tv_sec == 0 && + rdev->last_read_error.tv_nsec == 0) { + /* first time we've seen a read error */ + rdev->last_read_error = cur_time_mon; + return; + } + + hours_since_last = (cur_time_mon.tv_sec - + rdev->last_read_error.tv_sec) / 3600; + + rdev->last_read_error = cur_time_mon; + + /* + * if hours_since_last is > the number of bits in read_errors + * just set read errors to 0. We do this to avoid + * overflowing the shift of read_errors by hours_since_last. + */ + if (hours_since_last >= 8 * sizeof(read_errors)) + atomic_set(&rdev->read_errors, 0); + else + atomic_set(&rdev->read_errors, read_errors >> hours_since_last); +} + +/* * This is a kernel thread which: * * 1. Retries failed read operations on working mirrors. @@ -1436,6 +1486,43 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio) int sect = 0; /* Offset from r10_bio->sector */ int sectors = r10_bio->sectors; mdk_rdev_t*rdev; + int max_read_errors = atomic_read(&mddev->max_corr_read_errors); + + rcu_read_lock(); + { + int d = r10_bio->devs[r10_bio->read_slot].devnum; + char b[BDEVNAME_SIZE]; + int cur_read_error_count = 0; + + rdev = rcu_dereference(conf->mirrors[d].rdev); + bdevname(rdev->bdev, b); + + if (test_bit(Faulty, &rdev->flags)) { + rcu_read_unlock(); + /* drive has already been failed, just ignore any + more fix_read_error() attempts */ + return; + } + + check_decay_read_errors(mddev, rdev); + atomic_inc(&rdev->read_errors); + cur_read_error_count = atomic_read(&rdev->read_errors); + if (cur_read_error_count > max_read_errors) { + rcu_read_unlock(); + printk(KERN_NOTICE + "raid10: %s: Raid device exceeded " + "read_error threshold " + "[cur %d:max %d]\n", + b, cur_read_error_count, max_read_errors); + printk(KERN_NOTICE + "raid10: %s: Failing raid " + "device\n", b); + md_error(mddev, conf->mirrors[d].rdev); + return; + } + } + rcu_read_unlock(); + while(sectors) { int s = sectors; int sl = r10_bio->read_slot; @@ -1480,6 +1567,7 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio) /* write it back and re-read */ rcu_read_lock(); while (sl != r10_bio->read_slot) { + char b[BDEVNAME_SIZE]; int d; if (sl==0) sl = conf->copies; @@ -1495,9 +1583,21 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio) r10_bio->devs[sl].addr + sect + rdev->data_offset, s<<9, conf->tmppage, WRITE) - == 0) + == 0) { /* Well, this device is dead */ + printk(KERN_NOTICE + "raid10:%s: read correction " + "write failed" + " (%d sectors at %llu on %s)\n", + mdname(mddev), s, + (unsigned long long)(sect+ + rdev->data_offset), + bdevname(rdev->bdev, b)); + printk(KERN_NOTICE "raid10:%s: failing " + "drive\n", + bdevname(rdev->bdev, b)); md_error(mddev, rdev); + } rdev_dec_pending(rdev, mddev); rcu_read_lock(); } @@ -1518,10 +1618,22 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio) if (sync_page_io(rdev->bdev, r10_bio->devs[sl].addr + sect + rdev->data_offset, - s<<9, conf->tmppage, READ) == 0) + s<<9, conf->tmppage, + READ) == 0) { /* Well, this device is dead */ + printk(KERN_NOTICE + "raid10:%s: unable to read back " + "corrected sectors" + " (%d sectors at %llu on %s)\n", + mdname(mddev), s, + (unsigned long long)(sect+ + rdev->data_offset), + bdevname(rdev->bdev, b)); + printk(KERN_NOTICE "raid10:%s: failing drive\n", + bdevname(rdev->bdev, b)); + md_error(mddev, rdev); - else + } else { printk(KERN_INFO "raid10:%s: read error corrected" " (%d sectors at %llu on %s)\n", @@ -1529,6 +1641,7 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio) (unsigned long long)(sect+ rdev->data_offset), bdevname(rdev->bdev, b)); + } rdev_dec_pending(rdev, mddev); rcu_read_lock(); @@ -1546,7 +1659,7 @@ static void raid10d(mddev_t *mddev) r10bio_t *r10_bio; struct bio *bio; unsigned long flags; - conf_t *conf = mddev_to_conf(mddev); + conf_t *conf = mddev->private; struct list_head *head = &conf->retry_list; int unplug=0; mdk_rdev_t *rdev; @@ -1569,7 +1682,7 @@ static void raid10d(mddev_t *mddev) spin_unlock_irqrestore(&conf->device_lock, flags); mddev = r10_bio->mddev; - conf = mddev_to_conf(mddev); + conf = mddev->private; if (test_bit(R10BIO_IsSync, &r10_bio->state)) { sync_request_write(mddev, r10_bio); unplug = 1; @@ -1604,7 +1717,7 @@ static void raid10d(mddev_t *mddev) raid_end_bio_io(r10_bio); bio_put(bio); } else { - const int do_sync = bio_sync(r10_bio->master_bio); + const bool do_sync = bio_rw_flagged(r10_bio->master_bio, BIO_RW_SYNCIO); bio_put(bio); rdev = conf->mirrors[mirror].rdev; if (printk_ratelimit()) @@ -1617,13 +1730,14 @@ static void raid10d(mddev_t *mddev) bio->bi_sector = r10_bio->devs[r10_bio->read_slot].addr + rdev->data_offset; bio->bi_bdev = rdev->bdev; - bio->bi_rw = READ | do_sync; + bio->bi_rw = READ | (do_sync << BIO_RW_SYNCIO); bio->bi_private = r10_bio; bio->bi_end_io = raid10_end_read_request; unplug = 1; generic_make_request(bio); } } + cond_resched(); } if (unplug) unplug_slaves(mddev); @@ -1677,7 +1791,7 @@ static int init_resync(conf_t *conf) static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, int go_faster) { - conf_t *conf = mddev_to_conf(mddev); + conf_t *conf = mddev->private; r10bio_t *r10_bio; struct bio *biolist = NULL, *bio; sector_t max_sector, nr_sectors; @@ -1694,7 +1808,7 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i return 0; skipped: - max_sector = mddev->size << 1; + max_sector = mddev->dev_sectors; if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) max_sector = mddev->resync_max_sectors; if (sector_nr >= max_sector) { @@ -1767,7 +1881,7 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i max_sync = RESYNC_PAGES << (PAGE_SHIFT-9); if (!test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) { /* recovery... the complicated one */ - int i, j, k; + int j, k; r10_bio = NULL; for (i=0 ; iraid_disks; i++) @@ -1806,17 +1920,17 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i r10_bio->sector = sect; raid10_find_phys(conf, r10_bio); - /* Need to check if this section will still be + + /* Need to check if the array will still be * degraded */ - for (j=0; jcopies;j++) { - int d = r10_bio->devs[j].devnum; - if (conf->mirrors[d].rdev == NULL || - test_bit(Faulty, &conf->mirrors[d].rdev->flags)) { + for (j=0; jraid_disks; j++) + if (conf->mirrors[j].rdev == NULL || + test_bit(Faulty, &conf->mirrors[j].rdev->flags)) { still_degraded = 1; break; } - } + must_sync = bitmap_start_sync(mddev->bitmap, sect, &sync_blocks, still_degraded); @@ -2019,18 +2133,38 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i goto skipped; } +static sector_t +raid10_size(mddev_t *mddev, sector_t sectors, int raid_disks) +{ + sector_t size; + conf_t *conf = mddev->private; + + if (!raid_disks) + raid_disks = mddev->raid_disks; + if (!sectors) + sectors = mddev->dev_sectors; + + size = sectors >> conf->chunk_shift; + sector_div(size, conf->far_copies); + size = size * raid_disks; + sector_div(size, conf->near_copies); + + return size << conf->chunk_shift; +} + static int run(mddev_t *mddev) { conf_t *conf; - int i, disk_idx; + int i, disk_idx, chunk_size; mirror_info_t *disk; mdk_rdev_t *rdev; int nc, fc, fo; sector_t stride, size; - if (mddev->chunk_size < PAGE_SIZE) { + if (mddev->chunk_sectors < (PAGE_SIZE >> 9) || + !is_power_of_2(mddev->chunk_sectors)) { printk(KERN_ERR "md/raid10: chunk size must be " - "at least PAGE_SIZE(%ld).\n", PAGE_SIZE); + "at least PAGE_SIZE(%ld) and be a power of 2.\n", PAGE_SIZE); return -EINVAL; } @@ -2067,15 +2201,14 @@ static int run(mddev_t *mddev) if (!conf->tmppage) goto out_free_conf; - conf->mddev = mddev; conf->raid_disks = mddev->raid_disks; conf->near_copies = nc; conf->far_copies = fc; conf->copies = nc*fc; conf->far_offset = fo; - conf->chunk_mask = (sector_t)(mddev->chunk_size>>9)-1; - conf->chunk_shift = ffz(~mddev->chunk_size) - 9; - size = mddev->size >> (conf->chunk_shift-1); + conf->chunk_mask = mddev->chunk_sectors - 1; + conf->chunk_shift = ffz(~mddev->chunk_sectors); + size = mddev->dev_sectors >> conf->chunk_shift; sector_div(size, fc); size = size * conf->raid_disks; sector_div(size, nc); @@ -2088,7 +2221,7 @@ static int run(mddev_t *mddev) */ stride += conf->raid_disks - 1; sector_div(stride, conf->raid_disks); - mddev->size = stride << (conf->chunk_shift-1); + mddev->dev_sectors = stride << conf->chunk_shift; if (fo) stride = 1; @@ -2104,9 +2237,18 @@ static int run(mddev_t *mddev) goto out_free_conf; } + conf->mddev = mddev; spin_lock_init(&conf->device_lock); mddev->queue->queue_lock = &conf->device_lock; + chunk_size = mddev->chunk_sectors << 9; + blk_queue_io_min(mddev->queue, chunk_size); + if (conf->raid_disks % conf->near_copies) + blk_queue_io_opt(mddev->queue, chunk_size * conf->raid_disks); + else + blk_queue_io_opt(mddev->queue, chunk_size * + (conf->raid_disks / conf->near_copies)); + list_for_each_entry(rdev, &mddev->disks, same_set) { disk_idx = rdev->raid_disk; if (disk_idx >= mddev->raid_disks @@ -2115,16 +2257,17 @@ static int run(mddev_t *mddev) disk = conf->mirrors + disk_idx; disk->rdev = rdev; - - blk_queue_stack_limits(mddev->queue, - rdev->bdev->bd_disk->queue); + disk_stack_limits(mddev->gendisk, rdev->bdev, + rdev->data_offset << 9); /* as we don't honour merge_bvec_fn, we must never risk - * violating it, so limit ->max_sector to one PAGE, as - * a one page request is never in violation. + * violating it, so limit max_segments to 1 lying + * within a single page. */ - if (rdev->bdev->bd_disk->queue->merge_bvec_fn && - mddev->queue->max_sectors > (PAGE_SIZE>>9)) - mddev->queue->max_sectors = (PAGE_SIZE>>9); + if (rdev->bdev->bd_disk->queue->merge_bvec_fn) { + blk_queue_max_segments(mddev->queue, 1); + blk_queue_segment_boundary(mddev->queue, + PAGE_CACHE_SIZE - 1); + } disk->head_position = 0; } @@ -2155,7 +2298,7 @@ static int run(mddev_t *mddev) } - mddev->thread = md_register_thread(raid10d, mddev, "%s_raid10"); + mddev->thread = md_register_thread(raid10d, mddev, NULL); if (!mddev->thread) { printk(KERN_ERR "raid10: couldn't allocate thread for %s\n", @@ -2163,6 +2306,10 @@ static int run(mddev_t *mddev) goto out_free_conf; } + if (mddev->recovery_cp != MaxSector) + printk(KERN_NOTICE "raid10: %s is not clean" + " -- starting background reconstruction\n", + mdname(mddev)); printk(KERN_INFO "raid10: raid set %s active with %d out of %d devices\n", mdname(mddev), mddev->raid_disks - mddev->degraded, @@ -2170,8 +2317,8 @@ static int run(mddev_t *mddev) /* * Ok, everything is just fine now */ - mddev->array_sectors = size << conf->chunk_shift; - mddev->resync_max_sectors = size << conf->chunk_shift; + md_set_array_sectors(mddev, raid10_size(mddev, 0, 0)); + mddev->resync_max_sectors = raid10_size(mddev, 0, 0); mddev->queue->unplug_fn = raid10_unplug; mddev->queue->backing_dev_info.congested_fn = raid10_congested; @@ -2182,7 +2329,8 @@ static int run(mddev_t *mddev) * maybe... */ { - int stripe = conf->raid_disks * (mddev->chunk_size / PAGE_SIZE); + int stripe = conf->raid_disks * + ((mddev->chunk_sectors << 9) / PAGE_SIZE); stripe /= conf->near_copies; if (mddev->queue->backing_dev_info.ra_pages < 2* stripe) mddev->queue->backing_dev_info.ra_pages = 2* stripe; @@ -2190,6 +2338,7 @@ static int run(mddev_t *mddev) if (conf->near_copies < mddev->raid_disks) blk_queue_merge_bvec(mddev->queue, raid10_mergeable_bvec); + md_integrity_register(mddev); return 0; out_free_conf: @@ -2205,7 +2354,10 @@ out: static int stop(mddev_t *mddev) { - conf_t *conf = mddev_to_conf(mddev); + conf_t *conf = mddev->private; + + raise_barrier(conf, 0); + lower_barrier(conf); md_unregister_thread(mddev->thread); mddev->thread = NULL; @@ -2220,7 +2372,7 @@ static int stop(mddev_t *mddev) static void raid10_quiesce(mddev_t *mddev, int state) { - conf_t *conf = mddev_to_conf(mddev); + conf_t *conf = mddev->private; switch(state) { case 1: @@ -2230,13 +2382,6 @@ static void raid10_quiesce(mddev_t *mddev, int state) lower_barrier(conf); break; } - if (mddev->thread) { - if (mddev->bitmap) - mddev->thread->timeout = mddev->bitmap->daemon_sleep * HZ; - else - mddev->thread->timeout = MAX_SCHEDULE_TIMEOUT; - md_wakeup_thread(mddev->thread); - } } static struct mdk_personality raid10_personality = @@ -2254,6 +2399,7 @@ static struct mdk_personality raid10_personality = .spare_active = raid10_spare_active, .sync_request = sync_request, .quiesce = raid10_quiesce, + .size = raid10_size, }; static int __init raid_init(void) @@ -2269,6 +2415,7 @@ static void raid_exit(void) module_init(raid_init); module_exit(raid_exit); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("RAID10 (striped mirror) personality for MD"); MODULE_ALIAS("md-personality-9"); /* RAID10 */ MODULE_ALIAS("md-raid10"); MODULE_ALIAS("md-level-10");