mtd: davinci nand: update clock naming
[safe/jmp/linux-2.6] / drivers / md / raid5.c
index 8a5e14e..5d400ae 100644 (file)
 
 #include <linux/blkdev.h>
 #include <linux/kthread.h>
+#include <linux/raid/pq.h>
 #include <linux/async_tx.h>
 #include <linux/seq_file.h>
 #include "md.h"
 #include "raid5.h"
-#include "raid6.h"
 #include "bitmap.h"
 
 /*
 
 #define printk_rl(args...) ((void) (printk_ratelimit() && printk(args)))
 
-#if !RAID6_USE_EMPTY_ZERO_PAGE
-/* In .bss so it's zeroed */
-const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(256)));
-#endif
-
 /*
  * We maintain a biased count of active stripes in the bottom 16 bits of
  * bi_phys_segments, and a count of processed stripes in the upper 16 bits
@@ -304,7 +299,7 @@ static int grow_buffers(struct stripe_head *sh, int num)
        return 0;
 }
 
-static void raid5_build_block(struct stripe_head *sh, int i);
+static void raid5_build_block(struct stripe_head *sh, int i, int previous);
 static void stripe_set_idx(sector_t stripe, raid5_conf_t *conf, int previous,
                            struct stripe_head *sh);
 
@@ -323,6 +318,7 @@ static void init_stripe(struct stripe_head *sh, sector_t sector, int previous)
 
        remove_hash(sh);
 
+       sh->generation = conf->generation - previous;
        sh->disks = previous ? conf->previous_raid_disks : conf->raid_disks;
        sh->sector = sector;
        stripe_set_idx(sector, conf, previous, sh);
@@ -341,12 +337,13 @@ static void init_stripe(struct stripe_head *sh, sector_t sector, int previous)
                        BUG();
                }
                dev->flags = 0;
-               raid5_build_block(sh, i);
+               raid5_build_block(sh, i, previous);
        }
        insert_hash(conf, sh);
 }
 
-static struct stripe_head *__find_stripe(raid5_conf_t *conf, sector_t sector, int disks)
+static struct stripe_head *__find_stripe(raid5_conf_t *conf, sector_t sector,
+                                        short generation)
 {
        struct stripe_head *sh;
        struct hlist_node *hn;
@@ -354,7 +351,7 @@ static struct stripe_head *__find_stripe(raid5_conf_t *conf, sector_t sector, in
        CHECK_DEVLOCK();
        pr_debug("__find_stripe, sector %llu\n", (unsigned long long)sector);
        hlist_for_each_entry(sh, hn, stripe_hash(conf, sector), hash)
-               if (sh->sector == sector && sh->disks == disks)
+               if (sh->sector == sector && sh->generation == generation)
                        return sh;
        pr_debug("__stripe %llu not in cache\n", (unsigned long long)sector);
        return NULL;
@@ -368,7 +365,6 @@ get_active_stripe(raid5_conf_t *conf, sector_t sector,
                  int previous, int noblock)
 {
        struct stripe_head *sh;
-       int disks = previous ? conf->previous_raid_disks : conf->raid_disks;
 
        pr_debug("get_stripe, sector %llu\n", (unsigned long long)sector);
 
@@ -378,7 +374,7 @@ get_active_stripe(raid5_conf_t *conf, sector_t sector,
                wait_event_lock_irq(conf->wait_for_stripe,
                                    conf->quiesce == 0,
                                    conf->device_lock, /* nothing */);
-               sh = __find_stripe(conf, sector, disks);
+               sh = __find_stripe(conf, sector, conf->generation - previous);
                if (!sh) {
                        if (!conf->inactive_blocked)
                                sh = get_free_stripe(conf);
@@ -399,7 +395,8 @@ get_active_stripe(raid5_conf_t *conf, sector_t sector,
                                init_stripe(sh, sector, previous);
                } else {
                        if (atomic_read(&sh->count)) {
-                         BUG_ON(!list_empty(&sh->lru));
+                               BUG_ON(!list_empty(&sh->lru)
+                                   && !test_bit(STRIPE_EXPANDING, &sh->state));
                        } else {
                                if (!test_bit(STRIPE_HANDLE, &sh->state))
                                        atomic_inc(&conf->active_stripes);
@@ -951,7 +948,6 @@ static int grow_stripes(raid5_conf_t *conf, int num)
        return 0;
 }
 
-#ifdef CONFIG_MD_RAID5_RESHAPE
 static int resize_stripes(raid5_conf_t *conf, int newsize)
 {
        /* Make all the stripes able to hold 'newsize' devices.
@@ -1076,7 +1072,6 @@ static int resize_stripes(raid5_conf_t *conf, int newsize)
        conf->pool_size = newsize;
        return err;
 }
-#endif
 
 static int drop_one_stripe(raid5_conf_t *conf)
 {
@@ -1216,9 +1211,9 @@ static void raid5_end_write_request(struct bio *bi, int error)
 }
 
 
-static sector_t compute_blocknr(struct stripe_head *sh, int i);
+static sector_t compute_blocknr(struct stripe_head *sh, int i, int previous);
        
-static void raid5_build_block(struct stripe_head *sh, int i)
+static void raid5_build_block(struct stripe_head *sh, int i, int previous)
 {
        struct r5dev *dev = &sh->dev[i];
 
@@ -1234,7 +1229,7 @@ static void raid5_build_block(struct stripe_head *sh, int i)
        dev->req.bi_private = sh;
 
        dev->flags = 0;
-       dev->sector = compute_blocknr(sh, i);
+       dev->sector = compute_blocknr(sh, i, previous);
 }
 
 static void error(mddev_t *mddev, mdk_rdev_t *rdev)
@@ -1277,7 +1272,10 @@ static sector_t raid5_compute_sector(raid5_conf_t *conf, sector_t r_sector,
        int pd_idx, qd_idx;
        int ddf_layout = 0;
        sector_t new_sector;
-       int sectors_per_chunk = conf->chunk_size >> 9;
+       int algorithm = previous ? conf->prev_algo
+                                : conf->algorithm;
+       int sectors_per_chunk = previous ? (conf->prev_chunk >> 9)
+                                        : (conf->chunk_size >> 9);
        int raid_disks = previous ? conf->previous_raid_disks
                                  : conf->raid_disks;
        int data_disks = raid_disks - conf->max_degraded;
@@ -1310,7 +1308,7 @@ static sector_t raid5_compute_sector(raid5_conf_t *conf, sector_t r_sector,
                pd_idx = data_disks;
                break;
        case 5:
-               switch (conf->algorithm) {
+               switch (algorithm) {
                case ALGORITHM_LEFT_ASYMMETRIC:
                        pd_idx = data_disks - stripe % raid_disks;
                        if (*dd_idx >= pd_idx)
@@ -1338,13 +1336,13 @@ static sector_t raid5_compute_sector(raid5_conf_t *conf, sector_t r_sector,
                        break;
                default:
                        printk(KERN_ERR "raid5: unsupported algorithm %d\n",
-                               conf->algorithm);
+                               algorithm);
                        BUG();
                }
                break;
        case 6:
 
-               switch (conf->algorithm) {
+               switch (algorithm) {
                case ALGORITHM_LEFT_ASYMMETRIC:
                        pd_idx = raid_disks - 1 - (stripe % raid_disks);
                        qd_idx = pd_idx + 1;
@@ -1457,7 +1455,7 @@ static sector_t raid5_compute_sector(raid5_conf_t *conf, sector_t r_sector,
 
                default:
                        printk(KERN_CRIT "raid6: unsupported algorithm %d\n",
-                              conf->algorithm);
+                              algorithm);
                        BUG();
                }
                break;
@@ -1476,13 +1474,16 @@ static sector_t raid5_compute_sector(raid5_conf_t *conf, sector_t r_sector,
 }
 
 
-static sector_t compute_blocknr(struct stripe_head *sh, int i)
+static sector_t compute_blocknr(struct stripe_head *sh, int i, int previous)
 {
        raid5_conf_t *conf = sh->raid_conf;
        int raid_disks = sh->disks;
        int data_disks = raid_disks - conf->max_degraded;
        sector_t new_sector = sh->sector, check;
-       int sectors_per_chunk = conf->chunk_size >> 9;
+       int sectors_per_chunk = previous ? (conf->prev_chunk >> 9)
+                                        : (conf->chunk_size >> 9);
+       int algorithm = previous ? conf->prev_algo
+                                : conf->algorithm;
        sector_t stripe;
        int chunk_offset;
        int chunk_number, dummy1, dd_idx = i;
@@ -1499,7 +1500,7 @@ static sector_t compute_blocknr(struct stripe_head *sh, int i)
        switch(conf->level) {
        case 4: break;
        case 5:
-               switch (conf->algorithm) {
+               switch (algorithm) {
                case ALGORITHM_LEFT_ASYMMETRIC:
                case ALGORITHM_RIGHT_ASYMMETRIC:
                        if (i > sh->pd_idx)
@@ -1518,14 +1519,14 @@ static sector_t compute_blocknr(struct stripe_head *sh, int i)
                        break;
                default:
                        printk(KERN_ERR "raid5: unsupported algorithm %d\n",
-                              conf->algorithm);
+                              algorithm);
                        BUG();
                }
                break;
        case 6:
                if (i == sh->qd_idx)
                        return 0; /* It is the Q disk */
-               switch (conf->algorithm) {
+               switch (algorithm) {
                case ALGORITHM_LEFT_ASYMMETRIC:
                case ALGORITHM_RIGHT_ASYMMETRIC:
                case ALGORITHM_ROTATING_ZERO_RESTART:
@@ -1573,7 +1574,7 @@ static sector_t compute_blocknr(struct stripe_head *sh, int i)
                        break;
                default:
                        printk(KERN_CRIT "raid6: unsupported algorithm %d\n",
-                              conf->algorithm);
+                              algorithm);
                        BUG();
                }
                break;
@@ -1583,8 +1584,7 @@ static sector_t compute_blocknr(struct stripe_head *sh, int i)
        r_sector = (sector_t)chunk_number * sectors_per_chunk + chunk_offset;
 
        check = raid5_compute_sector(conf, r_sector,
-                                    (raid_disks != conf->raid_disks),
-                                    &dummy1, &sh2);
+                                    previous, &dummy1, &sh2);
        if (check != sh->sector || dummy1 != dd_idx || sh2.pd_idx != sh->pd_idx
                || sh2.qd_idx != sh->qd_idx) {
                printk(KERN_ERR "compute_blocknr: map not correct\n");
@@ -1996,7 +1996,9 @@ static int page_is_zero(struct page *p)
 static void stripe_set_idx(sector_t stripe, raid5_conf_t *conf, int previous,
                            struct stripe_head *sh)
 {
-       int sectors_per_chunk = conf->chunk_size >> 9;
+       int sectors_per_chunk =
+               previous ? (conf->prev_chunk >> 9)
+                        : (conf->chunk_size >> 9);
        int dd_idx;
        int chunk_offset = sector_div(stripe, sectors_per_chunk);
        int disks = previous ? conf->previous_raid_disks : conf->raid_disks;
@@ -2376,7 +2378,7 @@ static void handle_stripe_dirtying6(raid5_conf_t *conf,
                struct r6_state *r6s, int disks)
 {
        int rcw = 0, must_compute = 0, pd_idx = sh->pd_idx, i;
-       int qd_idx = r6s->qd_idx;
+       int qd_idx = sh->qd_idx;
        for (i = disks; i--; ) {
                struct r5dev *dev = &sh->dev[i];
                /* Would I have to read this buffer for reconstruct_write */
@@ -2566,7 +2568,7 @@ static void handle_parity_checks6(raid5_conf_t *conf, struct stripe_head *sh,
        int update_p = 0, update_q = 0;
        struct r5dev *dev;
        int pd_idx = sh->pd_idx;
-       int qd_idx = r6s->qd_idx;
+       int qd_idx = sh->qd_idx;
 
        set_bit(STRIPE_HANDLE, &sh->state);
 
@@ -2662,11 +2664,11 @@ static void handle_stripe_expansion(raid5_conf_t *conf, struct stripe_head *sh,
        struct dma_async_tx_descriptor *tx = NULL;
        clear_bit(STRIPE_EXPAND_SOURCE, &sh->state);
        for (i = 0; i < sh->disks; i++)
-               if (i != sh->pd_idx && (!r6s || i != r6s->qd_idx)) {
+               if (i != sh->pd_idx && i != sh->qd_idx) {
                        int dd_idx, j;
                        struct stripe_head *sh2;
 
-                       sector_t bn = compute_blocknr(sh, i);
+                       sector_t bn = compute_blocknr(sh, i, 1);
                        sector_t s = raid5_compute_sector(conf, bn, 0,
                                                          &dd_idx, NULL);
                        sh2 = get_active_stripe(conf, s, 0, 1);
@@ -2941,6 +2943,23 @@ static bool handle_stripe5(struct stripe_head *sh)
 
        /* Finish reconstruct operations initiated by the expansion process */
        if (sh->reconstruct_state == reconstruct_state_result) {
+               struct stripe_head *sh2
+                       = get_active_stripe(conf, sh->sector, 1, 1);
+               if (sh2 && test_bit(STRIPE_EXPAND_SOURCE, &sh2->state)) {
+                       /* sh cannot be written until sh2 has been read.
+                        * so arrange for sh to be delayed a little
+                        */
+                       set_bit(STRIPE_DELAYED, &sh->state);
+                       set_bit(STRIPE_HANDLE, &sh->state);
+                       if (!test_and_set_bit(STRIPE_PREREAD_ACTIVE,
+                                             &sh2->state))
+                               atomic_inc(&conf->preread_active_stripes);
+                       release_stripe(sh2);
+                       goto unlock;
+               }
+               if (sh2)
+                       release_stripe(sh2);
+
                sh->reconstruct_state = reconstruct_state_idle;
                clear_bit(STRIPE_EXPANDING, &sh->state);
                for (i = conf->raid_disks; i--; ) {
@@ -2989,17 +3008,16 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
        raid5_conf_t *conf = sh->raid_conf;
        int disks = sh->disks;
        struct bio *return_bi = NULL;
-       int i, pd_idx = sh->pd_idx;
+       int i, pd_idx = sh->pd_idx, qd_idx = sh->qd_idx;
        struct stripe_head_state s;
        struct r6_state r6s;
        struct r5dev *dev, *pdev, *qdev;
        mdk_rdev_t *blocked_rdev = NULL;
 
-       r6s.qd_idx = sh->qd_idx;
        pr_debug("handling stripe %llu, state=%#lx cnt=%d, "
                "pd_idx=%d, qd_idx=%d\n",
               (unsigned long long)sh->sector, sh->state,
-              atomic_read(&sh->count), pd_idx, r6s.qd_idx);
+              atomic_read(&sh->count), pd_idx, qd_idx);
        memset(&s, 0, sizeof(s));
 
        spin_lock(&sh->lock);
@@ -3110,9 +3128,9 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
        pdev = &sh->dev[pd_idx];
        r6s.p_failed = (s.failed >= 1 && r6s.failed_num[0] == pd_idx)
                || (s.failed >= 2 && r6s.failed_num[1] == pd_idx);
-       qdev = &sh->dev[r6s.qd_idx];
-       r6s.q_failed = (s.failed >= 1 && r6s.failed_num[0] == r6s.qd_idx)
-               || (s.failed >= 2 && r6s.failed_num[1] == r6s.qd_idx);
+       qdev = &sh->dev[qd_idx];
+       r6s.q_failed = (s.failed >= 1 && r6s.failed_num[0] == qd_idx)
+               || (s.failed >= 2 && r6s.failed_num[1] == qd_idx);
 
        if ( s.written &&
             ( r6s.p_failed || ((test_bit(R5_Insync, &pdev->flags)
@@ -3170,6 +3188,23 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
                }
 
        if (s.expanded && test_bit(STRIPE_EXPANDING, &sh->state)) {
+               struct stripe_head *sh2
+                       = get_active_stripe(conf, sh->sector, 1, 1);
+               if (sh2 && test_bit(STRIPE_EXPAND_SOURCE, &sh2->state)) {
+                       /* sh cannot be written until sh2 has been read.
+                        * so arrange for sh to be delayed a little
+                        */
+                       set_bit(STRIPE_DELAYED, &sh->state);
+                       set_bit(STRIPE_HANDLE, &sh->state);
+                       if (!test_and_set_bit(STRIPE_PREREAD_ACTIVE,
+                                             &sh2->state))
+                               atomic_inc(&conf->preread_active_stripes);
+                       release_stripe(sh2);
+                       goto unlock;
+               }
+               if (sh2)
+                       release_stripe(sh2);
+
                /* Need to write out all blocks after computing P&Q */
                sh->disks = conf->raid_disks;
                stripe_set_idx(sh->sector, conf, 0, sh);
@@ -3323,6 +3358,8 @@ static int raid5_mergeable_bvec(struct request_queue *q,
        if ((bvm->bi_rw & 1) == WRITE)
                return biovec->bv_len; /* always allow writes to be mergeable */
 
+       if (mddev->new_chunk < mddev->chunk_size)
+               chunk_sectors = mddev->new_chunk >> 9;
        max =  (chunk_sectors - ((sector & (chunk_sectors - 1)) + bio_sectors)) << 9;
        if (max < 0) max = 0;
        if (max <= biovec->bv_len && bio_sectors == 0)
@@ -3338,6 +3375,8 @@ static int in_chunk_boundary(mddev_t *mddev, struct bio *bio)
        unsigned int chunk_sectors = mddev->chunk_size >> 9;
        unsigned int bio_sectors = bio->bi_size >> 9;
 
+       if (mddev->new_chunk < mddev->chunk_size)
+               chunk_sectors = mddev->new_chunk >> 9;
        return  chunk_sectors >=
                ((sector & (chunk_sectors - 1)) + bio_sectors);
 }
@@ -3598,25 +3637,27 @@ static int make_request(struct request_queue *q, struct bio * bi)
 
        retry:
                previous = 0;
+               disks = conf->raid_disks;
                prepare_to_wait(&conf->wait_for_overlap, &w, TASK_UNINTERRUPTIBLE);
-               if (likely(conf->expand_progress == MaxSector))
-                       disks = conf->raid_disks;
-               else {
-                       /* spinlock is needed as expand_progress may be
+               if (unlikely(conf->reshape_progress != MaxSector)) {
+                       /* spinlock is needed as reshape_progress may be
                         * 64bit on a 32bit platform, and so it might be
                         * possible to see a half-updated value
-                        * Ofcourse expand_progress could change after
+                        * Ofcourse reshape_progress could change after
                         * the lock is dropped, so once we get a reference
                         * to the stripe that we think it is, we will have
                         * to check again.
                         */
                        spin_lock_irq(&conf->device_lock);
-                       disks = conf->raid_disks;
-                       if (logical_sector >= conf->expand_progress) {
+                       if (mddev->delta_disks < 0
+                           ? logical_sector < conf->reshape_progress
+                           : logical_sector >= conf->reshape_progress) {
                                disks = conf->previous_raid_disks;
                                previous = 1;
                        } else {
-                               if (logical_sector >= conf->expand_lo) {
+                               if (mddev->delta_disks < 0
+                                   ? logical_sector < conf->reshape_safe
+                                   : logical_sector >= conf->reshape_safe) {
                                        spin_unlock_irq(&conf->device_lock);
                                        schedule();
                                        goto retry;
@@ -3636,7 +3677,7 @@ static int make_request(struct request_queue *q, struct bio * bi)
                sh = get_active_stripe(conf, new_sector, previous,
                                       (bi->bi_rw&RWA_MASK));
                if (sh) {
-                       if (unlikely(conf->expand_progress != MaxSector)) {
+                       if (unlikely(previous)) {
                                /* expansion might have moved on while waiting for a
                                 * stripe, so we must do the range check again.
                                 * Expansion could still move past after this
@@ -3647,8 +3688,9 @@ static int make_request(struct request_queue *q, struct bio * bi)
                                 */
                                int must_retry = 0;
                                spin_lock_irq(&conf->device_lock);
-                               if (logical_sector <  conf->expand_progress &&
-                                   disks == conf->previous_raid_disks)
+                               if (mddev->delta_disks < 0
+                                   ? logical_sector >= conf->reshape_progress
+                                   : logical_sector < conf->reshape_progress)
                                        /* mismatch, need to try again */
                                        must_retry = 1;
                                spin_unlock_irq(&conf->device_lock);
@@ -3703,6 +3745,8 @@ static int make_request(struct request_queue *q, struct bio * bi)
        return 0;
 }
 
+static sector_t raid5_size(mddev_t *mddev, sector_t sectors, int raid_disks);
+
 static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped)
 {
        /* reshaping is quite different to recovery/resync so it is
@@ -3722,52 +3766,114 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped
        int new_data_disks = conf->raid_disks - conf->max_degraded;
        int i;
        int dd_idx;
-       sector_t writepos, safepos, gap;
-
-       if (sector_nr == 0 &&
-           conf->expand_progress != 0) {
-               /* restarting in the middle, skip the initial sectors */
-               sector_nr = conf->expand_progress;
+       sector_t writepos, readpos, safepos;
+       sector_t stripe_addr;
+       int reshape_sectors;
+       struct list_head stripes;
+
+       if (sector_nr == 0) {
+               /* If restarting in the middle, skip the initial sectors */
+               if (mddev->delta_disks < 0 &&
+                   conf->reshape_progress < raid5_size(mddev, 0, 0)) {
+                       sector_nr = raid5_size(mddev, 0, 0)
+                               - conf->reshape_progress;
+               } else if (mddev->delta_disks > 0 &&
+                          conf->reshape_progress > 0)
+                       sector_nr = conf->reshape_progress;
                sector_div(sector_nr, new_data_disks);
-               *skipped = 1;
-               return sector_nr;
+               if (sector_nr) {
+                       *skipped = 1;
+                       return sector_nr;
+               }
        }
 
+       /* We need to process a full chunk at a time.
+        * If old and new chunk sizes differ, we need to process the
+        * largest of these
+        */
+       if (mddev->new_chunk > mddev->chunk_size)
+               reshape_sectors = mddev->new_chunk / 512;
+       else
+               reshape_sectors = mddev->chunk_size / 512;
+
        /* we update the metadata when there is more than 3Meg
         * in the block range (that is rather arbitrary, should
         * probably be time based) or when the data about to be
         * copied would over-write the source of the data at
         * the front of the range.
-        * i.e. one new_stripe forward from expand_progress new_maps
-        * to after where expand_lo old_maps to
+        * i.e. one new_stripe along from reshape_progress new_maps
+        * to after where reshape_safe old_maps to
         */
-       writepos = conf->expand_progress +
-               conf->chunk_size/512*(new_data_disks);
+       writepos = conf->reshape_progress;
        sector_div(writepos, new_data_disks);
-       safepos = conf->expand_lo;
+       readpos = conf->reshape_progress;
+       sector_div(readpos, data_disks);
+       safepos = conf->reshape_safe;
        sector_div(safepos, data_disks);
-       gap = conf->expand_progress - conf->expand_lo;
+       if (mddev->delta_disks < 0) {
+               writepos -= min_t(sector_t, reshape_sectors, writepos);
+               readpos += reshape_sectors;
+               safepos += reshape_sectors;
+       } else {
+               writepos += reshape_sectors;
+               readpos -= min_t(sector_t, reshape_sectors, readpos);
+               safepos -= min_t(sector_t, reshape_sectors, safepos);
+       }
 
-       if (writepos >= safepos ||
-           gap > (new_data_disks)*3000*2 /*3Meg*/) {
+       /* 'writepos' is the most advanced device address we might write.
+        * 'readpos' is the least advanced device address we might read.
+        * 'safepos' is the least address recorded in the metadata as having
+        *     been reshaped.
+        * If 'readpos' is behind 'writepos', then there is no way that we can
+        * ensure safety in the face of a crash - that must be done by userspace
+        * making a backup of the data.  So in that case there is no particular
+        * rush to update metadata.
+        * Otherwise if 'safepos' is behind 'writepos', then we really need to
+        * update the metadata to advance 'safepos' to match 'readpos' so that
+        * we can be safe in the event of a crash.
+        * So we insist on updating metadata if safepos is behind writepos and
+        * readpos is beyond writepos.
+        * In any case, update the metadata every 10 seconds.
+        * Maybe that number should be configurable, but I'm not sure it is
+        * worth it.... maybe it could be a multiple of safemode_delay???
+        */
+       if ((mddev->delta_disks < 0
+            ? (safepos > writepos && readpos < writepos)
+            : (safepos < writepos && readpos > writepos)) ||
+           time_after(jiffies, conf->reshape_checkpoint + 10*HZ)) {
                /* Cannot proceed until we've updated the superblock... */
                wait_event(conf->wait_for_overlap,
                           atomic_read(&conf->reshape_stripes)==0);
-               mddev->reshape_position = conf->expand_progress;
+               mddev->reshape_position = conf->reshape_progress;
+               mddev->curr_resync_completed = mddev->curr_resync;
+               conf->reshape_checkpoint = jiffies;
                set_bit(MD_CHANGE_DEVS, &mddev->flags);
                md_wakeup_thread(mddev->thread);
                wait_event(mddev->sb_wait, mddev->flags == 0 ||
                           kthread_should_stop());
                spin_lock_irq(&conf->device_lock);
-               conf->expand_lo = mddev->reshape_position;
+               conf->reshape_safe = mddev->reshape_position;
                spin_unlock_irq(&conf->device_lock);
                wake_up(&conf->wait_for_overlap);
+               sysfs_notify(&mddev->kobj, NULL, "sync_completed");
        }
 
-       for (i=0; i < conf->chunk_size/512; i+= STRIPE_SECTORS) {
+       if (mddev->delta_disks < 0) {
+               BUG_ON(conf->reshape_progress == 0);
+               stripe_addr = writepos;
+               BUG_ON((mddev->dev_sectors &
+                       ~((sector_t)reshape_sectors - 1))
+                      - reshape_sectors - stripe_addr
+                      != sector_nr);
+       } else {
+               BUG_ON(writepos != sector_nr + reshape_sectors);
+               stripe_addr = sector_nr;
+       }
+       INIT_LIST_HEAD(&stripes);
+       for (i = 0; i < reshape_sectors; i += STRIPE_SECTORS) {
                int j;
                int skipped = 0;
-               sh = get_active_stripe(conf, sector_nr+i, 0, 0);
+               sh = get_active_stripe(conf, stripe_addr+i, 0, 0);
                set_bit(STRIPE_EXPANDING, &sh->state);
                atomic_inc(&conf->reshape_stripes);
                /* If any of this stripe is beyond the end of the old
@@ -3780,8 +3886,8 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped
                        if (conf->level == 6 &&
                            j == sh->qd_idx)
                                continue;
-                       s = compute_blocknr(sh, j);
-                       if (s < mddev->array_sectors) {
+                       s = compute_blocknr(sh, j, 0);
+                       if (s < raid5_size(mddev, 0, 0)) {
                                skipped = 1;
                                continue;
                        }
@@ -3793,10 +3899,13 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped
                        set_bit(STRIPE_EXPAND_READY, &sh->state);
                        set_bit(STRIPE_HANDLE, &sh->state);
                }
-               release_stripe(sh);
+               list_add(&sh->lru, &stripes);
        }
        spin_lock_irq(&conf->device_lock);
-       conf->expand_progress = (sector_nr + i) * new_data_disks;
+       if (mddev->delta_disks < 0)
+               conf->reshape_progress -= reshape_sectors * new_data_disks;
+       else
+               conf->reshape_progress += reshape_sectors * new_data_disks;
        spin_unlock_irq(&conf->device_lock);
        /* Ok, those stripe are ready. We can start scheduling
         * reads on the source stripes.
@@ -3804,10 +3913,10 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped
         * block on the destination stripes.
         */
        first_sector =
-               raid5_compute_sector(conf, sector_nr*(new_data_disks),
+               raid5_compute_sector(conf, stripe_addr*(new_data_disks),
                                     1, &dd_idx, NULL);
        last_sector =
-               raid5_compute_sector(conf, ((sector_nr+conf->chunk_size/512)
+               raid5_compute_sector(conf, ((stripe_addr+conf->chunk_size/512)
                                            *(new_data_disks) - 1),
                                     1, &dd_idx, NULL);
        if (last_sector >= mddev->dev_sectors)
@@ -3819,26 +3928,38 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped
                release_stripe(sh);
                first_sector += STRIPE_SECTORS;
        }
+       /* Now that the sources are clearly marked, we can release
+        * the destination stripes
+        */
+       while (!list_empty(&stripes)) {
+               sh = list_entry(stripes.next, struct stripe_head, lru);
+               list_del_init(&sh->lru);
+               release_stripe(sh);
+       }
        /* If this takes us to the resync_max point where we have to pause,
         * then we need to write out the superblock.
         */
-       sector_nr += conf->chunk_size>>9;
-       if (sector_nr >= mddev->resync_max) {
+       sector_nr += reshape_sectors;
+       if ((sector_nr - mddev->curr_resync_completed) * 2
+           >= mddev->resync_max - mddev->curr_resync_completed) {
                /* Cannot proceed until we've updated the superblock... */
                wait_event(conf->wait_for_overlap,
                           atomic_read(&conf->reshape_stripes) == 0);
-               mddev->reshape_position = conf->expand_progress;
+               mddev->reshape_position = conf->reshape_progress;
+               mddev->curr_resync_completed = mddev->curr_resync;
+               conf->reshape_checkpoint = jiffies;
                set_bit(MD_CHANGE_DEVS, &mddev->flags);
                md_wakeup_thread(mddev->thread);
                wait_event(mddev->sb_wait,
                           !test_bit(MD_CHANGE_DEVS, &mddev->flags)
                           || kthread_should_stop());
                spin_lock_irq(&conf->device_lock);
-               conf->expand_lo = mddev->reshape_position;
+               conf->reshape_safe = mddev->reshape_position;
                spin_unlock_irq(&conf->device_lock);
                wake_up(&conf->wait_for_overlap);
+               sysfs_notify(&mddev->kobj, NULL, "sync_completed");
        }
-       return conf->chunk_size>>9;
+       return reshape_sectors;
 }
 
 /* FIXME go_faster isn't used */
@@ -3854,6 +3975,7 @@ static inline sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *ski
        if (sector_nr >= max_sector) {
                /* just being told to finish up .. nothing much to do */
                unplug_slaves(mddev);
+
                if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) {
                        end_reshape(conf);
                        return 0;
@@ -4166,6 +4288,26 @@ static struct attribute_group raid5_attrs_group = {
        .attrs = raid5_attrs,
 };
 
+static sector_t
+raid5_size(mddev_t *mddev, sector_t sectors, int raid_disks)
+{
+       raid5_conf_t *conf = mddev_to_conf(mddev);
+
+       if (!sectors)
+               sectors = mddev->dev_sectors;
+       if (!raid_disks) {
+               /* size is defined by the smallest of previous and new size */
+               if (conf->raid_disks < conf->previous_raid_disks)
+                       raid_disks = conf->raid_disks;
+               else
+                       raid_disks = conf->previous_raid_disks;
+       }
+
+       sectors &= ~((sector_t)mddev->chunk_size/512 - 1);
+       sectors &= ~((sector_t)mddev->new_chunk/512 - 1);
+       return sectors * (raid_disks - conf->max_degraded);
+}
+
 static raid5_conf_t *setup_conf(mddev_t *mddev)
 {
        raid5_conf_t *conf;
@@ -4267,7 +4409,11 @@ static raid5_conf_t *setup_conf(mddev_t *mddev)
                conf->max_degraded = 1;
        conf->algorithm = mddev->new_layout;
        conf->max_nr_stripes = NR_STRIPES;
-       conf->expand_progress = mddev->reshape_position;
+       conf->reshape_progress = mddev->reshape_position;
+       if (conf->reshape_progress != MaxSector) {
+               conf->prev_chunk = mddev->chunk_size;
+               conf->prev_algo = mddev->layout;
+       }
 
        memory = conf->max_nr_stripes * (sizeof(struct stripe_head) +
                 conf->raid_disks * ((sizeof(struct bio) + PAGE_SIZE))) / 1024;
@@ -4315,29 +4461,21 @@ static int run(mddev_t *mddev)
                 */
                sector_t here_new, here_old;
                int old_disks;
-               int max_degraded = (mddev->level == 5 ? 1 : 2);
+               int max_degraded = (mddev->level == 6 ? 2 : 1);
 
-               if (mddev->new_level != mddev->level ||
-                   mddev->new_layout != mddev->layout ||
-                   mddev->new_chunk != mddev->chunk_size) {
+               if (mddev->new_level != mddev->level) {
                        printk(KERN_ERR "raid5: %s: unsupported reshape "
                               "required - aborting.\n",
                               mdname(mddev));
                        return -EINVAL;
                }
-               if (mddev->delta_disks <= 0) {
-                       printk(KERN_ERR "raid5: %s: unsupported reshape "
-                              "(reduce disks) required - aborting.\n",
-                              mdname(mddev));
-                       return -EINVAL;
-               }
                old_disks = mddev->raid_disks - mddev->delta_disks;
                /* reshape_position must be on a new-stripe boundary, and one
                 * further up in new geometry must map after here in old
                 * geometry.
                 */
                here_new = mddev->reshape_position;
-               if (sector_div(here_new, (mddev->chunk_size>>9)*
+               if (sector_div(here_new, (mddev->new_chunk>>9)*
                               (mddev->raid_disks - max_degraded))) {
                        printk(KERN_ERR "raid5: reshape_position not "
                               "on a stripe boundary\n");
@@ -4414,20 +4552,20 @@ static int run(mddev_t *mddev)
 
        if (mddev->degraded == 0)
                printk("raid5: raid level %d set %s active with %d out of %d"
-                       " devices, algorithm %d\n", conf->level, mdname(mddev), 
-                       mddev->raid_disks-mddev->degraded, mddev->raid_disks,
-                       conf->algorithm);
+                      " devices, algorithm %d\n", conf->level, mdname(mddev),
+                      mddev->raid_disks-mddev->degraded, mddev->raid_disks,
+                      mddev->new_layout);
        else
                printk(KERN_ALERT "raid5: raid level %d set %s active with %d"
                        " out of %d devices, algorithm %d\n", conf->level,
                        mdname(mddev), mddev->raid_disks - mddev->degraded,
-                       mddev->raid_disks, conf->algorithm);
+                       mddev->raid_disks, mddev->new_layout);
 
        print_raid5_conf(conf);
 
-       if (conf->expand_progress != MaxSector) {
+       if (conf->reshape_progress != MaxSector) {
                printk("...ok start reshape thread\n");
-               conf->expand_lo = conf->expand_progress;
+               conf->reshape_safe = conf->reshape_progress;
                atomic_set(&conf->reshape_stripes, 0);
                clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
                clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
@@ -4460,8 +4598,7 @@ static int run(mddev_t *mddev)
        mddev->queue->backing_dev_info.congested_data = mddev;
        mddev->queue->backing_dev_info.congested_fn = raid5_congested;
 
-       mddev->array_sectors = mddev->dev_sectors *
-               (conf->previous_raid_disks - conf->max_degraded);
+       md_set_array_sectors(mddev, raid5_size(mddev, 0, 0));
 
        blk_queue_merge_bvec(mddev->queue, raid5_mergeable_bvec);
 
@@ -4608,6 +4745,10 @@ static int raid5_remove_disk(mddev_t *mddev, int number)
        print_raid5_conf(conf);
        rdev = p->rdev;
        if (rdev) {
+               if (number >= conf->raid_disks &&
+                   conf->reshape_progress == MaxSector)
+                       clear_bit(In_sync, &rdev->flags);
+
                if (test_bit(In_sync, &rdev->flags) ||
                    atomic_read(&rdev->nr_pending)) {
                        err = -EBUSY;
@@ -4617,7 +4758,8 @@ static int raid5_remove_disk(mddev_t *mddev, int number)
                 * isn't possible.
                 */
                if (!test_bit(Faulty, &rdev->flags) &&
-                   mddev->degraded <= conf->max_degraded) {
+                   mddev->degraded <= conf->max_degraded &&
+                   number < conf->raid_disks) {
                        err = -EBUSY;
                        goto abort;
                }
@@ -4684,11 +4826,12 @@ static int raid5_resize(mddev_t *mddev, sector_t sectors)
         * any io in the removed space completes, but it hardly seems
         * worth it.
         */
-       raid5_conf_t *conf = mddev_to_conf(mddev);
-
        sectors &= ~((sector_t)mddev->chunk_size/512 - 1);
-       mddev->array_sectors = sectors * (mddev->raid_disks
-                                         - conf->max_degraded);
+       md_set_array_sectors(mddev, raid5_size(mddev, sectors,
+                                              mddev->raid_disks));
+       if (mddev->array_sectors >
+           raid5_size(mddev, sectors, mddev->raid_disks))
+               return -EINVAL;
        set_capacity(mddev->gendisk, mddev->array_sectors);
        mddev->changed = 1;
        if (sectors > mddev->dev_sectors && mddev->recovery_cp == MaxSector) {
@@ -4700,20 +4843,31 @@ static int raid5_resize(mddev_t *mddev, sector_t sectors)
        return 0;
 }
 
-#ifdef CONFIG_MD_RAID5_RESHAPE
 static int raid5_check_reshape(mddev_t *mddev)
 {
        raid5_conf_t *conf = mddev_to_conf(mddev);
-       int err;
 
-       if (mddev->delta_disks < 0 ||
-           mddev->new_level != mddev->level)
-               return -EINVAL; /* Cannot shrink array or change level yet */
-       if (mddev->delta_disks == 0)
-               return 0; /* nothing to do */
+       if (mddev->delta_disks == 0 &&
+           mddev->new_layout == mddev->layout &&
+           mddev->new_chunk == mddev->chunk_size)
+               return -EINVAL; /* nothing to do */
        if (mddev->bitmap)
                /* Cannot grow a bitmap yet */
                return -EBUSY;
+       if (mddev->degraded > conf->max_degraded)
+               return -EINVAL;
+       if (mddev->delta_disks < 0) {
+               /* We might be able to shrink, but the devices must
+                * be made bigger first.
+                * For raid6, 4 is the minimum size.
+                * Otherwise 2 is the minimum
+                */
+               int min = 2;
+               if (mddev->level == 6)
+                       min = 4;
+               if (mddev->raid_disks + mddev->delta_disks < min)
+                       return -EINVAL;
+       }
 
        /* Can only proceed if there are plenty of stripe_heads.
         * We need a minimum of one full stripe,, and for sensible progress
@@ -4726,18 +4880,12 @@ static int raid5_check_reshape(mddev_t *mddev)
        if ((mddev->chunk_size / STRIPE_SIZE) * 4 > conf->max_nr_stripes ||
            (mddev->new_chunk / STRIPE_SIZE) * 4 > conf->max_nr_stripes) {
                printk(KERN_WARNING "raid5: reshape: not enough stripes.  Needed %lu\n",
-                      (mddev->chunk_size / STRIPE_SIZE)*4);
+                      (max(mddev->chunk_size, mddev->new_chunk)
+                       / STRIPE_SIZE)*4);
                return -ENOSPC;
        }
 
-       err = resize_stripes(conf, conf->raid_disks + mddev->delta_disks);
-       if (err)
-               return err;
-
-       if (mddev->degraded > conf->max_degraded)
-               return -EINVAL;
-       /* looks like we might be able to manage this */
-       return 0;
+       return resize_stripes(conf, conf->raid_disks + mddev->delta_disks);
 }
 
 static int raid5_start_reshape(mddev_t *mddev)
@@ -4762,12 +4910,31 @@ static int raid5_start_reshape(mddev_t *mddev)
                 */
                return -EINVAL;
 
+       /* Refuse to reduce size of the array.  Any reductions in
+        * array size must be through explicit setting of array_size
+        * attribute.
+        */
+       if (raid5_size(mddev, 0, conf->raid_disks + mddev->delta_disks)
+           < mddev->array_sectors) {
+               printk(KERN_ERR "md: %s: array size must be reduced "
+                      "before number of disks\n", mdname(mddev));
+               return -EINVAL;
+       }
+
        atomic_set(&conf->reshape_stripes, 0);
        spin_lock_irq(&conf->device_lock);
        conf->previous_raid_disks = conf->raid_disks;
        conf->raid_disks += mddev->delta_disks;
-       conf->expand_progress = 0;
-       conf->expand_lo = 0;
+       conf->prev_chunk = conf->chunk_size;
+       conf->chunk_size = mddev->new_chunk;
+       conf->prev_algo = conf->algorithm;
+       conf->algorithm = mddev->new_layout;
+       if (mddev->delta_disks < 0)
+               conf->reshape_progress = raid5_size(mddev, 0, 0);
+       else
+               conf->reshape_progress = 0;
+       conf->reshape_safe = conf->reshape_progress;
+       conf->generation++;
        spin_unlock_irq(&conf->device_lock);
 
        /* Add some new drives, as many as will fit.
@@ -4792,9 +4959,12 @@ static int raid5_start_reshape(mddev_t *mddev)
                                break;
                }
 
-       spin_lock_irqsave(&conf->device_lock, flags);
-       mddev->degraded = (conf->raid_disks - conf->previous_raid_disks) - added_devices;
-       spin_unlock_irqrestore(&conf->device_lock, flags);
+       if (mddev->delta_disks > 0) {
+               spin_lock_irqsave(&conf->device_lock, flags);
+               mddev->degraded = (conf->raid_disks - conf->previous_raid_disks)
+                       - added_devices;
+               spin_unlock_irqrestore(&conf->device_lock, flags);
+       }
        mddev->raid_disks = conf->raid_disks;
        mddev->reshape_position = 0;
        set_bit(MD_CHANGE_DEVS, &mddev->flags);
@@ -4809,52 +4979,86 @@ static int raid5_start_reshape(mddev_t *mddev)
                mddev->recovery = 0;
                spin_lock_irq(&conf->device_lock);
                mddev->raid_disks = conf->raid_disks = conf->previous_raid_disks;
-               conf->expand_progress = MaxSector;
+               conf->reshape_progress = MaxSector;
                spin_unlock_irq(&conf->device_lock);
                return -EAGAIN;
        }
+       conf->reshape_checkpoint = jiffies;
        md_wakeup_thread(mddev->sync_thread);
        md_new_event(mddev);
        return 0;
 }
-#endif
 
+/* This is called from the reshape thread and should make any
+ * changes needed in 'conf'
+ */
 static void end_reshape(raid5_conf_t *conf)
 {
-       struct block_device *bdev;
 
        if (!test_bit(MD_RECOVERY_INTR, &conf->mddev->recovery)) {
-               conf->mddev->array_sectors = conf->mddev->dev_sectors *
-                       (conf->raid_disks - conf->max_degraded);
-               set_capacity(conf->mddev->gendisk, conf->mddev->array_sectors);
-               conf->mddev->changed = 1;
-
-               bdev = bdget_disk(conf->mddev->gendisk, 0);
-               if (bdev) {
-                       mutex_lock(&bdev->bd_inode->i_mutex);
-                       i_size_write(bdev->bd_inode,
-                                    (loff_t)conf->mddev->array_sectors << 9);
-                       mutex_unlock(&bdev->bd_inode->i_mutex);
-                       bdput(bdev);
-               }
+
                spin_lock_irq(&conf->device_lock);
-               conf->expand_progress = MaxSector;
+               conf->previous_raid_disks = conf->raid_disks;
+               conf->reshape_progress = MaxSector;
                spin_unlock_irq(&conf->device_lock);
-               conf->mddev->reshape_position = MaxSector;
+               wake_up(&conf->wait_for_overlap);
 
                /* read-ahead size must cover two whole stripes, which is
                 * 2 * (datadisks) * chunksize where 'n' is the number of raid devices
                 */
                {
-                       int data_disks = conf->previous_raid_disks - conf->max_degraded;
-                       int stripe = data_disks *
-                               (conf->mddev->chunk_size / PAGE_SIZE);
+                       int data_disks = conf->raid_disks - conf->max_degraded;
+                       int stripe = data_disks * (conf->chunk_size
+                                                  / PAGE_SIZE);
                        if (conf->mddev->queue->backing_dev_info.ra_pages < 2 * stripe)
                                conf->mddev->queue->backing_dev_info.ra_pages = 2 * stripe;
                }
        }
 }
 
+/* This is called from the raid5d thread with mddev_lock held.
+ * It makes config changes to the device.
+ */
+static void raid5_finish_reshape(mddev_t *mddev)
+{
+       struct block_device *bdev;
+       raid5_conf_t *conf = mddev_to_conf(mddev);
+
+       if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
+
+               if (mddev->delta_disks > 0) {
+                       md_set_array_sectors(mddev, raid5_size(mddev, 0, 0));
+                       set_capacity(mddev->gendisk, mddev->array_sectors);
+                       mddev->changed = 1;
+
+                       bdev = bdget_disk(mddev->gendisk, 0);
+                       if (bdev) {
+                               mutex_lock(&bdev->bd_inode->i_mutex);
+                               i_size_write(bdev->bd_inode,
+                                            (loff_t)mddev->array_sectors << 9);
+                               mutex_unlock(&bdev->bd_inode->i_mutex);
+                               bdput(bdev);
+                       }
+               } else {
+                       int d;
+                       mddev->degraded = conf->raid_disks;
+                       for (d = 0; d < conf->raid_disks ; d++)
+                               if (conf->disks[d].rdev &&
+                                   test_bit(In_sync,
+                                            &conf->disks[d].rdev->flags))
+                                       mddev->degraded--;
+                       for (d = conf->raid_disks ;
+                            d < conf->raid_disks - mddev->delta_disks;
+                            d++)
+                               raid5_remove_disk(mddev, d);
+               }
+               mddev->layout = conf->algorithm;
+               mddev->chunk_size = conf->chunk_size;
+               mddev->reshape_position = MaxSector;
+               mddev->delta_disks = 0;
+       }
+}
+
 static void raid5_quiesce(mddev_t *mddev, int state)
 {
        raid5_conf_t *conf = mddev_to_conf(mddev);
@@ -4912,14 +5116,46 @@ static void *raid5_takeover_raid1(mddev_t *mddev)
        return setup_conf(mddev);
 }
 
+static void *raid5_takeover_raid6(mddev_t *mddev)
+{
+       int new_layout;
+
+       switch (mddev->layout) {
+       case ALGORITHM_LEFT_ASYMMETRIC_6:
+               new_layout = ALGORITHM_LEFT_ASYMMETRIC;
+               break;
+       case ALGORITHM_RIGHT_ASYMMETRIC_6:
+               new_layout = ALGORITHM_RIGHT_ASYMMETRIC;
+               break;
+       case ALGORITHM_LEFT_SYMMETRIC_6:
+               new_layout = ALGORITHM_LEFT_SYMMETRIC;
+               break;
+       case ALGORITHM_RIGHT_SYMMETRIC_6:
+               new_layout = ALGORITHM_RIGHT_SYMMETRIC;
+               break;
+       case ALGORITHM_PARITY_0_6:
+               new_layout = ALGORITHM_PARITY_0;
+               break;
+       case ALGORITHM_PARITY_N:
+               new_layout = ALGORITHM_PARITY_N;
+               break;
+       default:
+               return ERR_PTR(-EINVAL);
+       }
+       mddev->new_level = 5;
+       mddev->new_layout = new_layout;
+       mddev->delta_disks = -1;
+       mddev->raid_disks -= 1;
+       return setup_conf(mddev);
+}
+
 
 static int raid5_reconfig(mddev_t *mddev, int new_layout, int new_chunk)
 {
-       /* Currently the layout and chunk size can only be changed
-        * for a 2-drive raid array, as in that case no data shuffling
-        * is required.
-        * Later we might validate these and set new_* so a reshape
-        * can complete the change.
+       /* For a 2-drive array, the layout and chunk size can be changed
+        * immediately as not restriping is needed.
+        * For larger arrays we record the new value - after validation
+        * to be used by a reshape pass.
         */
        raid5_conf_t *conf = mddev_to_conf(mddev);
 
@@ -4938,19 +5174,49 @@ static int raid5_reconfig(mddev_t *mddev, int new_layout, int new_chunk)
 
        /* They look valid */
 
-       if (mddev->raid_disks != 2)
-               return -EINVAL;
+       if (mddev->raid_disks == 2) {
 
-       if (new_layout >= 0) {
-               conf->algorithm = new_layout;
-               mddev->layout = mddev->new_layout = new_layout;
+               if (new_layout >= 0) {
+                       conf->algorithm = new_layout;
+                       mddev->layout = mddev->new_layout = new_layout;
+               }
+               if (new_chunk > 0) {
+                       conf->chunk_size = new_chunk;
+                       mddev->chunk_size = mddev->new_chunk = new_chunk;
+               }
+               set_bit(MD_CHANGE_DEVS, &mddev->flags);
+               md_wakeup_thread(mddev->thread);
+       } else {
+               if (new_layout >= 0)
+                       mddev->new_layout = new_layout;
+               if (new_chunk > 0)
+                       mddev->new_chunk = new_chunk;
        }
+       return 0;
+}
+
+static int raid6_reconfig(mddev_t *mddev, int new_layout, int new_chunk)
+{
+       if (new_layout >= 0 && !algorithm_valid_raid6(new_layout))
+               return -EINVAL;
        if (new_chunk > 0) {
-               conf->chunk_size = new_chunk;
-               mddev->chunk_size = mddev->new_chunk = new_chunk;
+               if (new_chunk & (new_chunk-1))
+                       /* not a power of 2 */
+                       return -EINVAL;
+               if (new_chunk < PAGE_SIZE)
+                       return -EINVAL;
+               if (mddev->array_sectors & ((new_chunk>>9)-1))
+                       /* not factor of array size */
+                       return -EINVAL;
        }
-       set_bit(MD_CHANGE_DEVS, &mddev->flags);
-       md_wakeup_thread(mddev->thread);
+
+       /* They look valid */
+
+       if (new_layout >= 0)
+               mddev->new_layout = new_layout;
+       if (new_chunk > 0)
+               mddev->new_chunk = new_chunk;
+
        return 0;
 }
 
@@ -4967,6 +5233,13 @@ static void *raid5_takeover(mddev_t *mddev)
 
        if (mddev->level == 1)
                return raid5_takeover_raid1(mddev);
+       if (mddev->level == 4) {
+               mddev->new_layout = ALGORITHM_PARITY_N;
+               mddev->new_level = 5;
+               return setup_conf(mddev);
+       }
+       if (mddev->level == 6)
+               return raid5_takeover_raid6(mddev);
 
        return ERR_PTR(-EINVAL);
 }
@@ -5036,12 +5309,13 @@ static struct mdk_personality raid6_personality =
        .spare_active   = raid5_spare_active,
        .sync_request   = sync_request,
        .resize         = raid5_resize,
-#ifdef CONFIG_MD_RAID5_RESHAPE
+       .size           = raid5_size,
        .check_reshape  = raid5_check_reshape,
        .start_reshape  = raid5_start_reshape,
-#endif
+       .finish_reshape = raid5_finish_reshape,
        .quiesce        = raid5_quiesce,
        .takeover       = raid6_takeover,
+       .reconfig       = raid6_reconfig,
 };
 static struct mdk_personality raid5_personality =
 {
@@ -5058,10 +5332,10 @@ static struct mdk_personality raid5_personality =
        .spare_active   = raid5_spare_active,
        .sync_request   = sync_request,
        .resize         = raid5_resize,
-#ifdef CONFIG_MD_RAID5_RESHAPE
+       .size           = raid5_size,
        .check_reshape  = raid5_check_reshape,
        .start_reshape  = raid5_start_reshape,
-#endif
+       .finish_reshape = raid5_finish_reshape,
        .quiesce        = raid5_quiesce,
        .takeover       = raid5_takeover,
        .reconfig       = raid5_reconfig,
@@ -5082,20 +5356,15 @@ static struct mdk_personality raid4_personality =
        .spare_active   = raid5_spare_active,
        .sync_request   = sync_request,
        .resize         = raid5_resize,
-#ifdef CONFIG_MD_RAID5_RESHAPE
+       .size           = raid5_size,
        .check_reshape  = raid5_check_reshape,
        .start_reshape  = raid5_start_reshape,
-#endif
+       .finish_reshape = raid5_finish_reshape,
        .quiesce        = raid5_quiesce,
 };
 
 static int __init raid5_init(void)
 {
-       int e;
-
-       e = raid6_select_algo();
-       if ( e )
-               return e;
        register_md_personality(&raid6_personality);
        register_md_personality(&raid5_personality);
        register_md_personality(&raid4_personality);