}
+static int flush_pending_writes(conf_t *conf)
+{
+ /* Any writes that have been queued but are awaiting
+ * bitmap updates get flushed here.
+ * We return 1 if any requests were actually submitted.
+ */
+ int rv = 0;
+
+ spin_lock_irq(&conf->device_lock);
+
+ if (conf->pending_bio_list.head) {
+ struct bio *bio;
+ bio = bio_list_get(&conf->pending_bio_list);
+ blk_remove_plug(conf->mddev->queue);
+ spin_unlock_irq(&conf->device_lock);
+ /* flush any pending bitmap writes to
+ * disk before proceeding w/ I/O */
+ bitmap_unplug(conf->mddev->bitmap);
+
+ while (bio) { /* submit pending writes */
+ struct bio *next = bio->bi_next;
+ bio->bi_next = NULL;
+ generic_make_request(bio);
+ bio = next;
+ }
+ rv = 1;
+ } else
+ spin_unlock_irq(&conf->device_lock);
+ return rv;
+}
+
/* Barriers....
* Sometimes we need to suspend IO while we do something else,
* either some resync/recovery, or reconfigure the array.
/* stop syncio and normal IO and wait for everything to
* go quite.
* We increment barrier and nr_waiting, and then
- * wait until barrier+nr_pending match nr_queued+2
+ * wait until nr_pending match nr_queued+1
+ * This is called in the context of one normal IO request
+ * that has failed. Thus any sync request that might be pending
+ * will be blocked by nr_pending, and we need to wait for
+ * pending IO requests to complete or be queued for re-try.
+ * Thus the number queued (nr_queued) plus this request (1)
+ * must match the number of pending IOs (nr_pending) before
+ * we continue.
*/
spin_lock_irq(&conf->resync_lock);
conf->barrier++;
conf->nr_waiting++;
wait_event_lock_irq(conf->wait_barrier,
- conf->barrier+conf->nr_pending == conf->nr_queued+2,
+ conf->nr_pending == conf->nr_queued+1,
conf->resync_lock,
- raid1_unplug(conf->mddev->queue));
+ ({ flush_pending_writes(conf);
+ raid1_unplug(conf->mddev->queue); }));
spin_unlock_irq(&conf->resync_lock);
}
static void unfreeze_array(conf_t *conf)
r1bio_t *r1_bio;
struct bio *read_bio;
int i, targets = 0, disks;
- mdk_rdev_t *rdev;
- struct bitmap *bitmap = mddev->bitmap;
+ struct bitmap *bitmap;
unsigned long flags;
struct bio_list bl;
struct page **behind_pages = NULL;
const int rw = bio_data_dir(bio);
const int do_sync = bio_sync(bio);
int do_barriers;
+ mdk_rdev_t *blocked_rdev;
/*
* Register the new request and wait if the reconstruction
wait_barrier(conf);
+ bitmap = mddev->bitmap;
+
disk_stat_inc(mddev->gendisk, ios[rw]);
disk_stat_add(mddev->gendisk, sectors[rw], bio_sectors(bio));
first = 0;
}
#endif
+ retry_write:
+ blocked_rdev = NULL;
rcu_read_lock();
for (i = 0; i < disks; i++) {
- if ((rdev=rcu_dereference(conf->mirrors[i].rdev)) != NULL &&
- !test_bit(Faulty, &rdev->flags)) {
+ mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev);
+ if (rdev && unlikely(test_bit(Blocked, &rdev->flags))) {
+ atomic_inc(&rdev->nr_pending);
+ blocked_rdev = rdev;
+ break;
+ }
+ if (rdev && !test_bit(Faulty, &rdev->flags)) {
atomic_inc(&rdev->nr_pending);
if (test_bit(Faulty, &rdev->flags)) {
rdev_dec_pending(rdev, mddev);
}
rcu_read_unlock();
+ if (unlikely(blocked_rdev)) {
+ /* Wait for this device to become unblocked */
+ int j;
+
+ for (j = 0; j < i; j++)
+ if (r1_bio->bios[j])
+ rdev_dec_pending(conf->mirrors[j].rdev, mddev);
+
+ allow_barrier(conf);
+ md_wait_for_blocked_rdev(blocked_rdev, mddev);
+ wait_barrier(conf);
+ goto retry_write;
+ }
+
BUG_ON(targets == 0); /* we never fail the last device */
if (targets < conf->raid_disks) {
blk_plug_device(mddev->queue);
spin_unlock_irqrestore(&conf->device_lock, flags);
+ /* In case raid1d snuck into freeze_array */
+ wake_up(&conf->wait_barrier);
+
if (do_sync)
md_wakeup_thread(mddev->thread);
#if 0
/*
* if recovery is running, make sure it aborts.
*/
- set_bit(MD_RECOVERY_ERR, &mddev->recovery);
+ set_bit(MD_RECOVERY_INTR, &mddev->recovery);
} else
set_bit(Faulty, &rdev->flags);
set_bit(MD_CHANGE_DEVS, &mddev->flags);
- printk(KERN_ALERT "raid1: Disk failure on %s, disabling device. \n"
- " Operation continuing on %d devices\n",
+ printk(KERN_ALERT "raid1: Disk failure on %s, disabling device.\n"
+ "raid1: Operation continuing on %d devices.\n",
bdevname(rdev->bdev,b), conf->raid_disks - mddev->degraded);
}
err = -EBUSY;
goto abort;
}
+ /* Only remove non-faulty devices is recovery
+ * is not possible.
+ */
+ if (!test_bit(Faulty, &rdev->flags) &&
+ mddev->degraded < conf->raid_disks) {
+ err = -EBUSY;
+ goto abort;
+ }
p->rdev = NULL;
synchronize_rcu();
if (atomic_read(&rdev->nr_pending)) {
rdev_dec_pending(conf->mirrors[i].rdev, mddev);
} else {
/* fixup the bio for reuse */
+ int size;
sbio->bi_vcnt = vcnt;
sbio->bi_size = r1_bio->sectors << 9;
sbio->bi_idx = 0;
sbio->bi_sector = r1_bio->sector +
conf->mirrors[i].rdev->data_offset;
sbio->bi_bdev = conf->mirrors[i].rdev->bdev;
- for (j = 0; j < vcnt ; j++)
- memcpy(page_address(sbio->bi_io_vec[j].bv_page),
+ size = sbio->bi_size;
+ for (j = 0; j < vcnt ; j++) {
+ struct bio_vec *bi;
+ bi = &sbio->bi_io_vec[j];
+ bi->bv_offset = 0;
+ if (size > PAGE_SIZE)
+ bi->bv_len = PAGE_SIZE;
+ else
+ bi->bv_len = size;
+ size -= PAGE_SIZE;
+ memcpy(page_address(bi->bv_page),
page_address(pbio->bi_io_vec[j].bv_page),
PAGE_SIZE);
+ }
}
}
for (;;) {
char b[BDEVNAME_SIZE];
- spin_lock_irqsave(&conf->device_lock, flags);
- if (conf->pending_bio_list.head) {
- bio = bio_list_get(&conf->pending_bio_list);
- blk_remove_plug(mddev->queue);
- spin_unlock_irqrestore(&conf->device_lock, flags);
- /* flush any pending bitmap writes to disk before proceeding w/ I/O */
- bitmap_unplug(mddev->bitmap);
-
- while (bio) { /* submit pending writes */
- struct bio *next = bio->bi_next;
- bio->bi_next = NULL;
- generic_make_request(bio);
- bio = next;
- }
- unplug = 1;
+ unplug += flush_pending_writes(conf);
- continue;
- }
-
- if (list_empty(head))
+ spin_lock_irqsave(&conf->device_lock, flags);
+ if (list_empty(head)) {
+ spin_unlock_irqrestore(&conf->device_lock, flags);
break;
+ }
r1_bio = list_entry(head->prev, r1bio_t, retry_list);
list_del(head->prev);
conf->nr_queued--;
}
}
}
- spin_unlock_irqrestore(&conf->device_lock, flags);
if (unplug)
unplug_slaves(mddev);
}
if (!conf->r1bio_pool)
goto out_no_mem;
+ spin_lock_init(&conf->device_lock);
+ mddev->queue->queue_lock = &conf->device_lock;
+
rdev_for_each(rdev, tmp, mddev) {
disk_idx = rdev->raid_disk;
if (disk_idx >= mddev->raid_disks
}
conf->raid_disks = mddev->raid_disks;
conf->mddev = mddev;
- spin_lock_init(&conf->device_lock);
INIT_LIST_HEAD(&conf->retry_list);
spin_lock_init(&conf->resync_lock);