dm raid1: use list_split_init

[safe/jmp/linux-2.6] / drivers / md / raid5.c
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c

index e8c8157..b162b83 100644 (file)
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -1143,7 +1143,7 @@ static void raid5_end_read_request(struct bio * bi, int error)
                         rdev = conf->disks[i].rdev;
                         printk(KERN_INFO "raid5:%s: read error corrected (%lu sectors at %llu on %s)\n",
                                mdname(conf->mddev), STRIPE_SECTORS,
-                              (unsigned long long)sh->sector + rdev->data_offset,
+                              (unsigned long long)(sh->sector + rdev->data_offset),
                                bdevname(rdev->bdev, b));
                         clear_bit(R5_ReadError, &sh->dev[i].flags);
                         clear_bit(R5_ReWrite, &sh->dev[i].flags);
@@ -1160,13 +1160,13 @@ static void raid5_end_read_request(struct bio * bi, int error)
                 if (conf->mddev->degraded)
                         printk(KERN_WARNING "raid5:%s: read error not correctable (sector %llu on %s).\n",
                                mdname(conf->mddev),
-                              (unsigned long long)sh->sector + rdev->data_offset,
+                              (unsigned long long)(sh->sector + rdev->data_offset),
                                bdn);
                 else if (test_bit(R5_ReWrite, &sh->dev[i].flags))
                         /* Oh, no!!! */
                         printk(KERN_WARNING "raid5:%s: read error NOT corrected!! (sector %llu on %s).\n",
                                mdname(conf->mddev),
-                              (unsigned long long)sh->sector + rdev->data_offset,
+                              (unsigned long long)(sh->sector + rdev->data_offset),
                                bdn);
                 else if (atomic_read(&rdev->read_errors)
                          > conf->max_nr_stripes)
@@ -2348,25 +2348,15 @@ static void handle_issuing_new_write_requests6(raid5_conf_t *conf,
  static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh,
                                 struct stripe_head_state *s, int disks)
  {
+       int canceled_check = 0;
+
         set_bit(STRIPE_HANDLE, &sh->state);
-       /* Take one of the following actions:
-        * 1/ start a check parity operation if (uptodate == disks)
-        * 2/ finish a check parity operation and act on the result
-        * 3/ skip to the writeback section if we previously
-        *    initiated a recovery operation
-        */
-       if (s->failed == 0 &&
-           !test_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending)) {
-               if (!test_and_set_bit(STRIPE_OP_CHECK, &sh->ops.pending)) {
-                       BUG_ON(s->uptodate != disks);
-                       clear_bit(R5_UPTODATE, &sh->dev[sh->pd_idx].flags);
-                       sh->ops.count++;
-                       s->uptodate--;
-               } else if (
-                      test_and_clear_bit(STRIPE_OP_CHECK, &sh->ops.complete)) {
-                       clear_bit(STRIPE_OP_CHECK, &sh->ops.ack);
-                       clear_bit(STRIPE_OP_CHECK, &sh->ops.pending);
  
+       /* complete a check operation */
+       if (test_and_clear_bit(STRIPE_OP_CHECK, &sh->ops.complete)) {
+           clear_bit(STRIPE_OP_CHECK, &sh->ops.ack);
+           clear_bit(STRIPE_OP_CHECK, &sh->ops.pending);
+               if (s->failed == 0) {
                         if (sh->ops.zero_sum_result == 0)
                                 /* parity is correct (on disc,
                                  * not in buffer any more)
@@ -2391,7 +2381,8 @@ static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh,
                                         s->uptodate++;
                                 }
                         }
-               }
+               } else
+                       canceled_check = 1; /* STRIPE_INSYNC is not set */
         }
  
         /* check if we can clear a parity disk reconstruct */
@@ -2404,12 +2395,28 @@ static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh,
                 clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending);
         }
  
+       /* start a new check operation if there are no failures, the stripe is
+        * not insync, and a repair is not in flight
+        */
+       if (s->failed == 0 &&
+           !test_bit(STRIPE_INSYNC, &sh->state) &&
+           !test_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending)) {
+               if (!test_and_set_bit(STRIPE_OP_CHECK, &sh->ops.pending)) {
+                       BUG_ON(s->uptodate != disks);
+                       clear_bit(R5_UPTODATE, &sh->dev[sh->pd_idx].flags);
+                       sh->ops.count++;
+                       s->uptodate--;
+               }
+       }
+
         /* Wait for check parity and compute block operations to complete
-        * before write-back
+        * before write-back.  If a failure occurred while the check operation
+        * was in flight we need to cycle this stripe through handle_stripe
+        * since the parity block may not be uptodate
          */
-       if (!test_bit(STRIPE_INSYNC, &sh->state) &&
-               !test_bit(STRIPE_OP_CHECK, &sh->ops.pending) &&
-               !test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending)) {
+       if (!canceled_check && !test_bit(STRIPE_INSYNC, &sh->state) &&
+           !test_bit(STRIPE_OP_CHECK, &sh->ops.pending) &&
+           !test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending)) {
                 struct r5dev *dev;
                 /* either failed parity check, or recovery is happening */
                 if (s->failed == 0)
@@ -3159,7 +3166,8 @@ static void raid5_activate_delayed(raid5_conf_t *conf)
                                 atomic_inc(&conf->preread_active_stripes);
                         list_add_tail(&sh->lru, &conf->handle_list);
                 }
-       }
+       } else
+               blk_plug_device(conf->mddev->queue);
  }
  
  static void activate_bit_delay(raid5_conf_t *conf)
@@ -3549,7 +3557,8 @@ static int make_request(struct request_queue *q, struct bio * bi)
                                 goto retry;
                         }
                         finish_wait(&conf->wait_for_overlap, &w);
-                       handle_stripe(sh, NULL);
+                       set_bit(STRIPE_HANDLE, &sh->state);
+                       clear_bit(STRIPE_DELAYED, &sh->state);
                         release_stripe(sh);
                 } else {
                         /* cannot get stripe for read-ahead, just give-up */
@@ -3698,6 +3707,25 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped
                 release_stripe(sh);
                 first_sector += STRIPE_SECTORS;
         }
+       /* If this takes us to the resync_max point where we have to pause,
+        * then we need to write out the superblock.
+        */
+       sector_nr += conf->chunk_size>>9;
+       if (sector_nr >= mddev->resync_max) {
+               /* Cannot proceed until we've updated the superblock... */
+               wait_event(conf->wait_for_overlap,
+                          atomic_read(&conf->reshape_stripes) == 0);
+               mddev->reshape_position = conf->expand_progress;
+               set_bit(MD_CHANGE_DEVS, &mddev->flags);
+               md_wakeup_thread(mddev->thread);
+               wait_event(mddev->sb_wait,
+                          !test_bit(MD_CHANGE_DEVS, &mddev->flags)
+                          || kthread_should_stop());
+               spin_lock_irq(&conf->device_lock);
+               conf->expand_lo = mddev->reshape_position;
+               spin_unlock_irq(&conf->device_lock);
+               wake_up(&conf->wait_for_overlap);
+       }
         return conf->chunk_size>>9;
  }
  
@@ -3734,6 +3762,12 @@ static inline sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *ski
         if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
                 return reshape_request(mddev, sector_nr, skipped);
  
+       /* No need to check resync_max as we never do more than one
+        * stripe, and as resync_max will always be on a chunk boundary,
+        * if the check in md_do_sync didn't fire, there is no chance
+        * of overstepping resync_max here
+        */
+
         /* if there is too many failed drives and we are trying
          * to resync, then assert that we are finished, because there is
          * nothing we can do.
@@ -3753,6 +3787,9 @@ static inline sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *ski
                 return sync_blocks * STRIPE_SECTORS; /* keep things rounded to whole stripes */
         }
  
+
+       bitmap_cond_end_sync(mddev->bitmap, sector_nr);
+
         pd_idx = stripe_to_pdidx(sector_nr, conf, raid_disks);
         sh = get_active_stripe(conf, sector_nr, raid_disks, pd_idx, 1);
         if (sh == NULL) {
@@ -3864,7 +3901,7 @@ static int  retry_aligned_read(raid5_conf_t *conf, struct bio *raid_bio)
   * During the scan, completed stripes are saved for us by the interrupt
   * handler, so that they will not have to wait for our next wakeup.
   */
-static void raid5d (mddev_t *mddev)
+static void raid5d(mddev_t *mddev)
  {
         struct stripe_head *sh;
         raid5_conf_t *conf = mddev_to_conf(mddev);
@@ -3889,12 +3926,6 @@ static void raid5d (mddev_t *mddev)
                         activate_bit_delay(conf);
                 }
  
-               if (list_empty(&conf->handle_list) &&
-                   atomic_read(&conf->preread_active_stripes) < IO_THRESHOLD &&
-                   !blk_queue_plugged(mddev->queue) &&
-                   !list_empty(&conf->delayed_list))
-                       raid5_activate_delayed(conf);
-
                 while ((bio = remove_bio_from_retry(conf))) {
                         int ok;
                         spin_unlock_irq(&conf->device_lock);
@@ -4108,7 +4139,7 @@ static int run(mddev_t *mddev)
  
         pr_debug("raid5: run(%s) called.\n", mdname(mddev));
  
-       ITERATE_RDEV(mddev,rdev,tmp) {
+       rdev_for_each(rdev, tmp, mddev) {
                 raid_disk = rdev->raid_disk;
                 if (raid_disk >= conf->raid_disks
                     || raid_disk < 0)
@@ -4521,7 +4552,7 @@ static int raid5_start_reshape(mddev_t *mddev)
         if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
                 return -EBUSY;
  
-       ITERATE_RDEV(mddev, rdev, rtmp)
+       rdev_for_each(rdev, rtmp, mddev)
                 if (rdev->raid_disk < 0 &&
                     !test_bit(Faulty, &rdev->flags))
                         spares++;
@@ -4543,7 +4574,7 @@ static int raid5_start_reshape(mddev_t *mddev)
         /* Add some new drives, as many as will fit.
          * We know there are enough to make the newly sized array work.
          */
-       ITERATE_RDEV(mddev, rdev, rtmp)
+       rdev_for_each(rdev, rtmp, mddev)
                 if (rdev->raid_disk < 0 &&
                     !test_bit(Faulty, &rdev->flags)) {
                         if (raid5_add_disk(mddev, rdev)) {