md/raid5: refactor raid5 "run"
authorNeilBrown <neilb@suse.de>
Tue, 31 Mar 2009 03:39:39 +0000 (14:39 +1100)
committerNeilBrown <neilb@suse.de>
Tue, 31 Mar 2009 03:39:39 +0000 (14:39 +1100)
.. so that the code to create the private data structures is separate.
This will help with future code to change the level of an active
array.

Signed-off-by: NeilBrown <neilb@suse.de>
drivers/md/raid5.c
drivers/md/raid5.h

index edbc80c..d019a85 100644 (file)
@@ -4164,95 +4164,49 @@ static struct attribute_group raid5_attrs_group = {
        .attrs = raid5_attrs,
 };
 
-static int run(mddev_t *mddev)
+static raid5_conf_t *setup_conf(mddev_t *mddev)
 {
        raid5_conf_t *conf;
        int raid_disk, memory;
        mdk_rdev_t *rdev;
        struct disk_info *disk;
-       int working_disks = 0;
 
-       if (mddev->level != 5 && mddev->level != 4 && mddev->level != 6) {
+       if (mddev->new_level != 5
+           && mddev->new_level != 4
+           && mddev->new_level != 6) {
                printk(KERN_ERR "raid5: %s: raid level not set to 4/5/6 (%d)\n",
-                      mdname(mddev), mddev->level);
-               return -EIO;
+                      mdname(mddev), mddev->new_level);
+               return ERR_PTR(-EIO);
        }
-       if ((mddev->level == 5 && !algorithm_valid_raid5(mddev->layout)) ||
-           (mddev->level == 6 && !algorithm_valid_raid6(mddev->layout))) {
+       if ((mddev->new_level == 5
+            && !algorithm_valid_raid5(mddev->new_layout)) ||
+           (mddev->new_level == 6
+            && !algorithm_valid_raid6(mddev->new_layout))) {
                printk(KERN_ERR "raid5: %s: layout %d not supported\n",
-                      mdname(mddev), mddev->layout);
-               return -EIO;
+                      mdname(mddev), mddev->new_layout);
+               return ERR_PTR(-EIO);
        }
-
-       if (mddev->chunk_size < PAGE_SIZE) {
-               printk(KERN_ERR "md/raid5: chunk_size must be at least "
-                      "PAGE_SIZE but %d < %ld\n",
-                      mddev->chunk_size, PAGE_SIZE);
-               return -EINVAL;
+       if (mddev->new_level == 6 && mddev->raid_disks < 4) {
+               printk(KERN_ERR "raid6: not enough configured devices for %s (%d, minimum 4)\n",
+                      mdname(mddev), mddev->raid_disks);
+               return ERR_PTR(-EINVAL);
        }
 
-       if (mddev->reshape_position != MaxSector) {
-               /* Check that we can continue the reshape.
-                * Currently only disks can change, it must
-                * increase, and we must be past the point where
-                * a stripe over-writes itself
-                */
-               sector_t here_new, here_old;
-               int old_disks;
-               int max_degraded = (mddev->level == 5 ? 1 : 2);
-
-               if (mddev->new_level != mddev->level ||
-                   mddev->new_layout != mddev->layout ||
-                   mddev->new_chunk != mddev->chunk_size) {
-                       printk(KERN_ERR "raid5: %s: unsupported reshape "
-                              "required - aborting.\n",
-                              mdname(mddev));
-                       return -EINVAL;
-               }
-               if (mddev->delta_disks <= 0) {
-                       printk(KERN_ERR "raid5: %s: unsupported reshape "
-                              "(reduce disks) required - aborting.\n",
-                              mdname(mddev));
-                       return -EINVAL;
-               }
-               old_disks = mddev->raid_disks - mddev->delta_disks;
-               /* reshape_position must be on a new-stripe boundary, and one
-                * further up in new geometry must map after here in old
-                * geometry.
-                */
-               here_new = mddev->reshape_position;
-               if (sector_div(here_new, (mddev->chunk_size>>9)*
-                              (mddev->raid_disks - max_degraded))) {
-                       printk(KERN_ERR "raid5: reshape_position not "
-                              "on a stripe boundary\n");
-                       return -EINVAL;
-               }
-               /* here_new is the stripe we will write to */
-               here_old = mddev->reshape_position;
-               sector_div(here_old, (mddev->chunk_size>>9)*
-                          (old_disks-max_degraded));
-               /* here_old is the first stripe that we might need to read
-                * from */
-               if (here_new >= here_old) {
-                       /* Reading from the same stripe as writing to - bad */
-                       printk(KERN_ERR "raid5: reshape_position too early for "
-                              "auto-recovery - aborting.\n");
-                       return -EINVAL;
-               }
-               printk(KERN_INFO "raid5: reshape will continue\n");
-               /* OK, we should be able to continue; */
+       if (!mddev->new_chunk || mddev->new_chunk % PAGE_SIZE) {
+               printk(KERN_ERR "raid5: invalid chunk size %d for %s\n",
+                       mddev->new_chunk, mdname(mddev));
+               return ERR_PTR(-EINVAL);
        }
 
-
-       mddev->private = kzalloc(sizeof (raid5_conf_t), GFP_KERNEL);
-       if ((conf = mddev->private) == NULL)
+       conf = kzalloc(sizeof(raid5_conf_t), GFP_KERNEL);
+       if (conf == NULL)
                goto abort;
-       if (mddev->reshape_position == MaxSector) {
-               conf->previous_raid_disks = conf->raid_disks = mddev->raid_disks;
-       } else {
-               conf->raid_disks = mddev->raid_disks;
+
+       conf->raid_disks = mddev->raid_disks;
+       if (mddev->reshape_position == MaxSector)
+               conf->previous_raid_disks = mddev->raid_disks;
+       else
                conf->previous_raid_disks = mddev->raid_disks - mddev->delta_disks;
-       }
 
        conf->disks = kzalloc(conf->raid_disks * sizeof(struct disk_info),
                              GFP_KERNEL);
@@ -4264,13 +4218,12 @@ static int run(mddev_t *mddev)
        if ((conf->stripe_hashtbl = kzalloc(PAGE_SIZE, GFP_KERNEL)) == NULL)
                goto abort;
 
-       if (mddev->level == 6) {
+       if (mddev->new_level == 6) {
                conf->spare_page = alloc_page(GFP_KERNEL);
                if (!conf->spare_page)
                        goto abort;
        }
        spin_lock_init(&conf->device_lock);
-       mddev->queue->queue_lock = &conf->device_lock;
        init_waitqueue_head(&conf->wait_for_stripe);
        init_waitqueue_head(&conf->wait_for_overlap);
        INIT_LIST_HEAD(&conf->handle_list);
@@ -4299,41 +4252,136 @@ static int run(mddev_t *mddev)
                        printk(KERN_INFO "raid5: device %s operational as raid"
                                " disk %d\n", bdevname(rdev->bdev,b),
                                raid_disk);
-                       working_disks++;
                } else
                        /* Cannot rely on bitmap to complete recovery */
                        conf->fullsync = 1;
        }
 
-       /*
-        * 0 for a fully functional array, 1 or 2 for a degraded array.
-        */
-       mddev->degraded = conf->raid_disks - working_disks;
-       conf->mddev = mddev;
-       conf->chunk_size = mddev->chunk_size;
-       conf->level = mddev->level;
+       conf->chunk_size = mddev->new_chunk;
+       conf->level = mddev->new_level;
        if (conf->level == 6)
                conf->max_degraded = 2;
        else
                conf->max_degraded = 1;
-       conf->algorithm = mddev->layout;
+       conf->algorithm = mddev->new_layout;
        conf->max_nr_stripes = NR_STRIPES;
        conf->expand_progress = mddev->reshape_position;
 
-       /* device size must be a multiple of chunk size */
-       mddev->dev_sectors &= ~(mddev->chunk_size / 512 - 1);
-       mddev->resync_max_sectors = mddev->dev_sectors;
+       memory = conf->max_nr_stripes * (sizeof(struct stripe_head) +
+                conf->raid_disks * ((sizeof(struct bio) + PAGE_SIZE))) / 1024;
+       if (grow_stripes(conf, conf->max_nr_stripes)) {
+               printk(KERN_ERR
+                       "raid5: couldn't allocate %dkB for buffers\n", memory);
+               goto abort;
+       } else
+               printk(KERN_INFO "raid5: allocated %dkB for %s\n",
+                       memory, mdname(mddev));
 
-       if (conf->level == 6 && conf->raid_disks < 4) {
-               printk(KERN_ERR "raid6: not enough configured devices for %s (%d, minimum 4)\n",
-                      mdname(mddev), conf->raid_disks);
+       conf->thread = md_register_thread(raid5d, mddev, "%s_raid5");
+       if (!conf->thread) {
+               printk(KERN_ERR
+                      "raid5: couldn't allocate thread for %s\n",
+                      mdname(mddev));
                goto abort;
        }
-       if (!conf->chunk_size || conf->chunk_size % 4) {
-               printk(KERN_ERR "raid5: invalid chunk size %d for %s\n",
-                       conf->chunk_size, mdname(mddev));
-               goto abort;
+
+       return conf;
+
+ abort:
+       if (conf) {
+               shrink_stripes(conf);
+               safe_put_page(conf->spare_page);
+               kfree(conf->disks);
+               kfree(conf->stripe_hashtbl);
+               kfree(conf);
+               return ERR_PTR(-EIO);
+       } else
+               return ERR_PTR(-ENOMEM);
+}
+
+static int run(mddev_t *mddev)
+{
+       raid5_conf_t *conf;
+       int working_disks = 0;
+       mdk_rdev_t *rdev;
+
+       if (mddev->reshape_position != MaxSector) {
+               /* Check that we can continue the reshape.
+                * Currently only disks can change, it must
+                * increase, and we must be past the point where
+                * a stripe over-writes itself
+                */
+               sector_t here_new, here_old;
+               int old_disks;
+               int max_degraded = (mddev->level == 5 ? 1 : 2);
+
+               if (mddev->new_level != mddev->level ||
+                   mddev->new_layout != mddev->layout ||
+                   mddev->new_chunk != mddev->chunk_size) {
+                       printk(KERN_ERR "raid5: %s: unsupported reshape "
+                              "required - aborting.\n",
+                              mdname(mddev));
+                       return -EINVAL;
+               }
+               if (mddev->delta_disks <= 0) {
+                       printk(KERN_ERR "raid5: %s: unsupported reshape "
+                              "(reduce disks) required - aborting.\n",
+                              mdname(mddev));
+                       return -EINVAL;
+               }
+               old_disks = mddev->raid_disks - mddev->delta_disks;
+               /* reshape_position must be on a new-stripe boundary, and one
+                * further up in new geometry must map after here in old
+                * geometry.
+                */
+               here_new = mddev->reshape_position;
+               if (sector_div(here_new, (mddev->chunk_size>>9)*
+                              (mddev->raid_disks - max_degraded))) {
+                       printk(KERN_ERR "raid5: reshape_position not "
+                              "on a stripe boundary\n");
+                       return -EINVAL;
+               }
+               /* here_new is the stripe we will write to */
+               here_old = mddev->reshape_position;
+               sector_div(here_old, (mddev->chunk_size>>9)*
+                          (old_disks-max_degraded));
+               /* here_old is the first stripe that we might need to read
+                * from */
+               if (here_new >= here_old) {
+                       /* Reading from the same stripe as writing to - bad */
+                       printk(KERN_ERR "raid5: reshape_position too early for "
+                              "auto-recovery - aborting.\n");
+                       return -EINVAL;
+               }
+               printk(KERN_INFO "raid5: reshape will continue\n");
+               /* OK, we should be able to continue; */
+       } else {
+               BUG_ON(mddev->level != mddev->new_level);
+               BUG_ON(mddev->layout != mddev->new_layout);
+               BUG_ON(mddev->chunk_size != mddev->new_chunk);
+               BUG_ON(mddev->delta_disks != 0);
        }
+       conf = setup_conf(mddev);
+
+       if (conf == NULL)
+               return -EIO;
+       if (IS_ERR(conf))
+               return PTR_ERR(conf);
+
+       mddev->thread = conf->thread;
+       conf->thread = NULL;
+       mddev->private = conf;
+
+       /*
+        * 0 for a fully functional array, 1 or 2 for a degraded array.
+        */
+       list_for_each_entry(rdev, &mddev->disks, same_set)
+               if (rdev->raid_disk >= 0 &&
+                   test_bit(In_sync, &rdev->flags))
+                       working_disks++;
+
+       mddev->degraded = conf->raid_disks - working_disks;
+
        if (mddev->degraded > conf->max_degraded) {
                printk(KERN_ERR "raid5: not enough operational devices for %s"
                        " (%d/%d failed)\n",
@@ -4341,6 +4389,10 @@ static int run(mddev_t *mddev)
                goto abort;
        }
 
+       /* device size must be a multiple of chunk size */
+       mddev->dev_sectors &= ~(mddev->chunk_size / 512 - 1);
+       mddev->resync_max_sectors = mddev->dev_sectors;
+
        if (mddev->degraded > 0 &&
            mddev->recovery_cp != MaxSector) {
                if (mddev->ok_start_degraded)
@@ -4356,27 +4408,6 @@ static int run(mddev_t *mddev)
                }
        }
 
-       {
-               mddev->thread = md_register_thread(raid5d, mddev, "%s_raid5");
-               if (!mddev->thread) {
-                       printk(KERN_ERR 
-                               "raid5: couldn't allocate thread for %s\n",
-                               mdname(mddev));
-                       goto abort;
-               }
-       }
-       memory = conf->max_nr_stripes * (sizeof(struct stripe_head) +
-                conf->raid_disks * ((sizeof(struct bio) + PAGE_SIZE))) / 1024;
-       if (grow_stripes(conf, conf->max_nr_stripes)) {
-               printk(KERN_ERR 
-                       "raid5: couldn't allocate %dkB for buffers\n", memory);
-               shrink_stripes(conf);
-               md_unregister_thread(mddev->thread);
-               goto abort;
-       } else
-               printk(KERN_INFO "raid5: allocated %dkB for %s\n",
-                       memory, mdname(mddev));
-
        if (mddev->degraded == 0)
                printk("raid5: raid level %d set %s active with %d out of %d"
                        " devices, algorithm %d\n", conf->level, mdname(mddev), 
@@ -4419,6 +4450,8 @@ static int run(mddev_t *mddev)
                       "raid5: failed to create sysfs attributes for %s\n",
                       mdname(mddev));
 
+       mddev->queue->queue_lock = &conf->device_lock;
+
        mddev->queue->unplug_fn = raid5_unplug_device;
        mddev->queue->backing_dev_info.congested_data = mddev;
        mddev->queue->backing_dev_info.congested_fn = raid5_congested;
@@ -4430,7 +4463,11 @@ static int run(mddev_t *mddev)
 
        return 0;
 abort:
+       if (mddev->thread)
+               md_unregister_thread(mddev->thread);
+       mddev->thread = NULL;
        if (conf) {
+               shrink_stripes(conf);
                print_raid5_conf(conf);
                safe_put_page(conf->spare_page);
                kfree(conf->disks);
index 84456b1..c172371 100644 (file)
@@ -386,6 +386,11 @@ struct raid5_private_data {
        int                     pool_size; /* number of disks in stripeheads in pool */
        spinlock_t              device_lock;
        struct disk_info        *disks;
+
+       /* When taking over an array from a different personality, we store
+        * the new thread here until we fully activate the array.
+        */
+       struct mdk_thread_s     *thread;
 };
 
 typedef struct raid5_private_data raid5_conf_t;