X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=drivers%2Fmd%2Fraid10.c;h=5938fa9629221145e6b4249d4996ca6c67e08516;hb=6bfe0b499082fd3950429017cd8ebf2a6c458aa5;hp=d6f12882424d96c8b6512f8825eed6f4ba9358a6;hpb=c620727779f7cc8ea96efb71f0651a26349e59c1;p=safe%2Fjmp%2Flinux-2.6 diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index d6f1288..5938fa9 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -537,7 +537,8 @@ static int read_balance(conf_t *conf, r10bio_t *r10_bio) current_distance = abs(r10_bio->devs[slot].addr - conf->mirrors[disk].head_position); - /* Find the disk whose head is closest */ + /* Find the disk whose head is closest, + * or - for far > 1 - find the closest to partition beginning */ for (nslot = slot; nslot < conf->copies; nslot++) { int ndisk = r10_bio->devs[nslot].devnum; @@ -557,8 +558,13 @@ static int read_balance(conf_t *conf, r10bio_t *r10_bio) slot = nslot; break; } - new_distance = abs(r10_bio->devs[nslot].addr - - conf->mirrors[ndisk].head_position); + + /* for far > 1 always use the lowest address */ + if (conf->far_copies > 1) + new_distance = r10_bio->devs[nslot].addr; + else + new_distance = abs(r10_bio->devs[nslot].addr - + conf->mirrors[ndisk].head_position); if (new_distance < current_distance) { current_distance = new_distance; disk = ndisk; @@ -629,7 +635,36 @@ static int raid10_congested(void *data, int bits) return ret; } - +static int flush_pending_writes(conf_t *conf) +{ + /* Any writes that have been queued but are awaiting + * bitmap updates get flushed here. + * We return 1 if any requests were actually submitted. + */ + int rv = 0; + + spin_lock_irq(&conf->device_lock); + + if (conf->pending_bio_list.head) { + struct bio *bio; + bio = bio_list_get(&conf->pending_bio_list); + blk_remove_plug(conf->mddev->queue); + spin_unlock_irq(&conf->device_lock); + /* flush any pending bitmap writes to disk + * before proceeding w/ I/O */ + bitmap_unplug(conf->mddev->bitmap); + + while (bio) { /* submit pending writes */ + struct bio *next = bio->bi_next; + bio->bi_next = NULL; + generic_make_request(bio); + bio = next; + } + rv = 1; + } else + spin_unlock_irq(&conf->device_lock); + return rv; +} /* Barriers.... * Sometimes we need to suspend IO while we do something else, * either some resync/recovery, or reconfigure the array. @@ -712,15 +747,23 @@ static void freeze_array(conf_t *conf) /* stop syncio and normal IO and wait for everything to * go quiet. * We increment barrier and nr_waiting, and then - * wait until barrier+nr_pending match nr_queued+2 + * wait until nr_pending match nr_queued+1 + * This is called in the context of one normal IO request + * that has failed. Thus any sync request that might be pending + * will be blocked by nr_pending, and we need to wait for + * pending IO requests to complete or be queued for re-try. + * Thus the number queued (nr_queued) plus this request (1) + * must match the number of pending IOs (nr_pending) before + * we continue. */ spin_lock_irq(&conf->resync_lock); conf->barrier++; conf->nr_waiting++; wait_event_lock_irq(conf->wait_barrier, - conf->barrier+conf->nr_pending == conf->nr_queued+2, + conf->nr_pending == conf->nr_queued+1, conf->resync_lock, - raid10_unplug(conf->mddev->queue)); + ({ flush_pending_writes(conf); + raid10_unplug(conf->mddev->queue); })); spin_unlock_irq(&conf->resync_lock); } @@ -747,6 +790,7 @@ static int make_request(struct request_queue *q, struct bio * bio) const int do_sync = bio_sync(bio); struct bio_list bl; unsigned long flags; + mdk_rdev_t *blocked_rdev; if (unlikely(bio_barrier(bio))) { bio_endio(bio, -EOPNOTSUPP); @@ -836,17 +880,23 @@ static int make_request(struct request_queue *q, struct bio * bio) /* * WRITE: */ - /* first select target devices under spinlock and + /* first select target devices under rcu_lock and * inc refcount on their rdev. Record them by setting * bios[x] to bio */ raid10_find_phys(conf, r10_bio); + retry_write: + blocked_rdev = 0; rcu_read_lock(); for (i = 0; i < conf->copies; i++) { int d = r10_bio->devs[i].devnum; mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[d].rdev); - if (rdev && - !test_bit(Faulty, &rdev->flags)) { + if (rdev && unlikely(test_bit(Blocked, &rdev->flags))) { + atomic_inc(&rdev->nr_pending); + blocked_rdev = rdev; + break; + } + if (rdev && !test_bit(Faulty, &rdev->flags)) { atomic_inc(&rdev->nr_pending); r10_bio->devs[i].bio = bio; } else { @@ -856,6 +906,22 @@ static int make_request(struct request_queue *q, struct bio * bio) } rcu_read_unlock(); + if (unlikely(blocked_rdev)) { + /* Have to wait for this device to get unblocked, then retry */ + int j; + int d; + + for (j = 0; j < i; j++) + if (r10_bio->devs[j].bio) { + d = r10_bio->devs[j].devnum; + rdev_dec_pending(conf->mirrors[d].rdev, mddev); + } + allow_barrier(conf); + md_wait_for_blocked_rdev(blocked_rdev, mddev); + wait_barrier(conf); + goto retry_write; + } + atomic_set(&r10_bio->remaining, 0); bio_list_init(&bl); @@ -892,6 +958,9 @@ static int make_request(struct request_queue *q, struct bio * bio) blk_plug_device(mddev->queue); spin_unlock_irqrestore(&conf->device_lock, flags); + /* In case raid10d snuck in to freeze_array */ + wake_up(&conf->wait_barrier); + if (do_sync) md_wakeup_thread(mddev->thread); @@ -955,8 +1024,8 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev) } set_bit(Faulty, &rdev->flags); set_bit(MD_CHANGE_DEVS, &mddev->flags); - printk(KERN_ALERT "raid10: Disk failure on %s, disabling device. \n" - " Operation continuing on %d devices\n", + printk(KERN_ALERT "raid10: Disk failure on %s, disabling device.\n" + "raid10: Operation continuing on %d devices.\n", bdevname(rdev->bdev,b), conf->raid_disks - mddev->degraded); } @@ -1464,28 +1533,14 @@ static void raid10d(mddev_t *mddev) for (;;) { char b[BDEVNAME_SIZE]; - spin_lock_irqsave(&conf->device_lock, flags); - - if (conf->pending_bio_list.head) { - bio = bio_list_get(&conf->pending_bio_list); - blk_remove_plug(mddev->queue); - spin_unlock_irqrestore(&conf->device_lock, flags); - /* flush any pending bitmap writes to disk before proceeding w/ I/O */ - bitmap_unplug(mddev->bitmap); - - while (bio) { /* submit pending writes */ - struct bio *next = bio->bi_next; - bio->bi_next = NULL; - generic_make_request(bio); - bio = next; - } - unplug = 1; - continue; - } + unplug += flush_pending_writes(conf); - if (list_empty(head)) + spin_lock_irqsave(&conf->device_lock, flags); + if (list_empty(head)) { + spin_unlock_irqrestore(&conf->device_lock, flags); break; + } r10_bio = list_entry(head->prev, r10bio_t, retry_list); list_del(head->prev); conf->nr_queued--; @@ -1548,7 +1603,6 @@ static void raid10d(mddev_t *mddev) } } } - spin_unlock_irqrestore(&conf->device_lock, flags); if (unplug) unplug_slaves(mddev); } @@ -1787,6 +1841,8 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i if (j == conf->copies) { /* Cannot recover, so abort the recovery */ put_buf(r10_bio); + if (rb2) + atomic_dec(&rb2->remaining); r10_bio = rb2; if (!test_and_set_bit(MD_RECOVERY_ERR, &mddev->recovery)) printk(KERN_INFO "raid10: %s: insufficient working devices for recovery.\n", @@ -2026,7 +2082,7 @@ static int run(mddev_t *mddev) goto out_free_conf; } - ITERATE_RDEV(mddev, rdev, tmp) { + rdev_for_each(rdev, tmp, mddev) { disk_idx = rdev->raid_disk; if (disk_idx >= mddev->raid_disks || disk_idx < 0)