md: replace STRIPE_OP_{BIODRAIN,PREXOR,POSTXOR} with 'reconstruct_states'

[safe/jmp/linux-2.6] / drivers / md / raid5.c
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c

index b9c0a32..b915936 100644 (file)
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -122,6 +122,13 @@ static void return_io(struct bio *return_bi)
  
  static void print_raid5_conf (raid5_conf_t *conf);
  
+static int stripe_operations_active(struct stripe_head *sh)
+{
+       return sh->check_state || sh->reconstruct_state ||
+              test_bit(STRIPE_BIOFILL_RUN, &sh->state) ||
+              test_bit(STRIPE_COMPUTE_RUN, &sh->state);
+}
+
  static void __release_stripe(raid5_conf_t *conf, struct stripe_head *sh)
  {
         if (atomic_dec_and_test(&sh->count)) {
@@ -141,7 +148,7 @@ static void __release_stripe(raid5_conf_t *conf, struct stripe_head *sh)
                         }
                         md_wakeup_thread(conf->mddev->thread);
                 } else {
-                       BUG_ON(sh->ops.pending);
+                       BUG_ON(stripe_operations_active(sh));
                         if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
                                 atomic_dec(&conf->preread_active_stripes);
                                 if (atomic_read(&conf->preread_active_stripes) < IO_THRESHOLD)
@@ -243,7 +250,7 @@ static void init_stripe(struct stripe_head *sh, sector_t sector, int pd_idx, int
  
         BUG_ON(atomic_read(&sh->count) != 0);
         BUG_ON(test_bit(STRIPE_HANDLE, &sh->state));
-       BUG_ON(sh->ops.pending || sh->ops.ack || sh->ops.complete);
+       BUG_ON(stripe_operations_active(sh));
  
         CHECK_DEVLOCK();
         pr_debug("init_stripe called, stripe %llu\n",
@@ -344,47 +351,6 @@ static struct stripe_head *get_active_stripe(raid5_conf_t *conf, sector_t sector
         return sh;
  }
  
-/* test_and_ack_op() ensures that we only dequeue an operation once */
-#define test_and_ack_op(op, pend) \
-do {                                                   \
-       if (test_bit(op, &sh->ops.pending) &&           \
-               !test_bit(op, &sh->ops.complete)) {     \
-               if (test_and_set_bit(op, &sh->ops.ack)) \
-                       clear_bit(op, &pend);           \
-               else                                    \
-                       ack++;                          \
-       } else                                          \
-               clear_bit(op, &pend);                   \
-} while (0)
-
-/* find new work to run, do not resubmit work that is already
- * in flight
- */
-static unsigned long get_stripe_work(struct stripe_head *sh)
-{
-       unsigned long pending;
-       int ack = 0;
-
-       pending = sh->ops.pending;
-
-       test_and_ack_op(STRIPE_OP_BIOFILL, pending);
-       test_and_ack_op(STRIPE_OP_COMPUTE_BLK, pending);
-       test_and_ack_op(STRIPE_OP_PREXOR, pending);
-       test_and_ack_op(STRIPE_OP_BIODRAIN, pending);
-       test_and_ack_op(STRIPE_OP_POSTXOR, pending);
-       test_and_ack_op(STRIPE_OP_CHECK, pending);
-
-       sh->ops.count -= ack;
-       if (unlikely(sh->ops.count < 0)) {
-               printk(KERN_ERR "pending: %#lx ops.pending: %#lx ops.ack: %#lx "
-                       "ops.complete: %#lx\n", pending, sh->ops.pending,
-                       sh->ops.ack, sh->ops.complete);
-               BUG();
-       }
-
-       return pending;
-}
-
  static void
  raid5_end_read_request(struct bio *bi, int error);
  static void
@@ -604,14 +570,12 @@ static void ops_complete_compute5(void *stripe_head_ref)
         clear_bit(STRIPE_COMPUTE_RUN, &sh->state);
         if (sh->check_state == check_state_compute_run)
                 sh->check_state = check_state_compute_result;
-       else
-               set_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete);
         set_bit(STRIPE_HANDLE, &sh->state);
         release_stripe(sh);
  }
  
  static struct dma_async_tx_descriptor *
-ops_run_compute5(struct stripe_head *sh, unsigned long pending)
+ops_run_compute5(struct stripe_head *sh, unsigned long ops_request)
  {
         /* kernel stack size limits the total number of disks */
         int disks = sh->disks;
@@ -642,7 +606,7 @@ ops_run_compute5(struct stripe_head *sh, unsigned long pending)
                         ops_complete_compute5, sh);
  
         /* ack now if postxor is not set to be run */
-       if (tx && !test_bit(STRIPE_OP_POSTXOR, &pending))
+       if (tx && !test_bit(STRIPE_OP_POSTXOR, &ops_request))
                 async_tx_ack(tx);
  
         return tx;
@@ -654,8 +618,6 @@ static void ops_complete_prexor(void *stripe_head_ref)
  
         pr_debug("%s: stripe %llu\n", __func__,
                 (unsigned long long)sh->sector);
-
-       set_bit(STRIPE_OP_PREXOR, &sh->ops.complete);
  }
  
  static struct dma_async_tx_descriptor *
@@ -688,7 +650,7 @@ ops_run_prexor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
  
  static struct dma_async_tx_descriptor *
  ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx,
-                unsigned long pending)
+                unsigned long ops_request)
  {
         int disks = sh->disks;
         int pd_idx = sh->pd_idx, i;
@@ -696,7 +658,7 @@ ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx,
         /* check if prexor is active which means only process blocks
          * that are part of a read-modify-write (Wantprexor)
          */
-       int prexor = test_bit(STRIPE_OP_PREXOR, &pending);
+       int prexor = test_bit(STRIPE_OP_PREXOR, &ops_request);
  
         pr_debug("%s: stripe %llu\n", __func__,
                 (unsigned long long)sh->sector);
@@ -746,7 +708,7 @@ static void ops_complete_postxor(void *stripe_head_ref)
         pr_debug("%s: stripe %llu\n", __func__,
                 (unsigned long long)sh->sector);
  
-       set_bit(STRIPE_OP_POSTXOR, &sh->ops.complete);
+       sh->reconstruct_state = reconstruct_state_result;
         set_bit(STRIPE_HANDLE, &sh->state);
         release_stripe(sh);
  }
@@ -765,16 +727,14 @@ static void ops_complete_write(void *stripe_head_ref)
                         set_bit(R5_UPTODATE, &dev->flags);
         }
  
-       set_bit(STRIPE_OP_BIODRAIN, &sh->ops.complete);
-       set_bit(STRIPE_OP_POSTXOR, &sh->ops.complete);
-
+       sh->reconstruct_state = reconstruct_state_drain_result;
         set_bit(STRIPE_HANDLE, &sh->state);
         release_stripe(sh);
  }
  
  static void
  ops_run_postxor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx,
-               unsigned long pending)
+               unsigned long ops_request)
  {
         /* kernel stack size limits the total number of disks */
         int disks = sh->disks;
@@ -782,7 +742,7 @@ ops_run_postxor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx,
  
         int count = 0, pd_idx = sh->pd_idx, i;
         struct page *xor_dest;
-       int prexor = test_bit(STRIPE_OP_PREXOR, &pending);
+       int prexor = test_bit(STRIPE_OP_PREXOR, &ops_request);
         unsigned long flags;
         dma_async_tx_callback callback;
  
@@ -809,7 +769,7 @@ ops_run_postxor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx,
         }
  
         /* check whether this postxor is part of a write */
-       callback = test_bit(STRIPE_OP_BIODRAIN, &pending) ?
+       callback = test_bit(STRIPE_OP_BIODRAIN, &ops_request) ?
                 ops_complete_write : ops_complete_postxor;
  
         /* 1/ if we prexor'd then the dest is reused as a source
@@ -870,8 +830,7 @@ static void ops_run_check(struct stripe_head *sh)
                 ops_complete_check, sh);
  }
  
-static void raid5_run_ops(struct stripe_head *sh, unsigned long pending,
-                         unsigned long ops_request)
+static void raid5_run_ops(struct stripe_head *sh, unsigned long ops_request)
  {
         int overlap_clear = 0, i, disks = sh->disks;
         struct dma_async_tx_descriptor *tx = NULL;
@@ -881,20 +840,19 @@ static void raid5_run_ops(struct stripe_head *sh, unsigned long pending,
                 overlap_clear++;
         }
  
-       if (test_bit(STRIPE_OP_COMPUTE_BLK, &pending) ||
-           test_bit(STRIPE_OP_COMPUTE_BLK, &ops_request))
-               tx = ops_run_compute5(sh, pending);
+       if (test_bit(STRIPE_OP_COMPUTE_BLK, &ops_request))
+               tx = ops_run_compute5(sh, ops_request);
  
-       if (test_bit(STRIPE_OP_PREXOR, &pending))
+       if (test_bit(STRIPE_OP_PREXOR, &ops_request))
                 tx = ops_run_prexor(sh, tx);
  
-       if (test_bit(STRIPE_OP_BIODRAIN, &pending)) {
-               tx = ops_run_biodrain(sh, tx, pending);
+       if (test_bit(STRIPE_OP_BIODRAIN, &ops_request)) {
+               tx = ops_run_biodrain(sh, tx, ops_request);
                 overlap_clear++;
         }
  
-       if (test_bit(STRIPE_OP_POSTXOR, &pending))
-               ops_run_postxor(sh, tx, pending);
+       if (test_bit(STRIPE_OP_POSTXOR, &ops_request))
+               ops_run_postxor(sh, tx, ops_request);
  
         if (test_bit(STRIPE_OP_CHECK, &ops_request))
                 ops_run_check(sh);
@@ -1687,11 +1645,11 @@ static void compute_block_2(struct stripe_head *sh, int dd_idx1, int dd_idx2)
         }
  }
  
-static int
-handle_write_operations5(struct stripe_head *sh, int rcw, int expand)
+static void
+handle_write_operations5(struct stripe_head *sh, struct stripe_head_state *s,
+                        int rcw, int expand)
  {
         int i, pd_idx = sh->pd_idx, disks = sh->disks;
-       int locked = 0;
  
         if (rcw) {
                 /* if we are not expanding this is a proper write request, and
@@ -1699,12 +1657,12 @@ handle_write_operations5(struct stripe_head *sh, int rcw, int expand)
                  * stripe cache
                  */
                 if (!expand) {
-                       set_bit(STRIPE_OP_BIODRAIN, &sh->ops.pending);
-                       sh->ops.count++;
-               }
+                       sh->reconstruct_state = reconstruct_state_drain_run;
+                       set_bit(STRIPE_OP_BIODRAIN, &s->ops_request);
+               } else
+                       sh->reconstruct_state = reconstruct_state_run;
  
-               set_bit(STRIPE_OP_POSTXOR, &sh->ops.pending);
-               sh->ops.count++;
+               set_bit(STRIPE_OP_POSTXOR, &s->ops_request);
  
                 for (i = disks; i--; ) {
                         struct r5dev *dev = &sh->dev[i];
@@ -1713,21 +1671,20 @@ handle_write_operations5(struct stripe_head *sh, int rcw, int expand)
                                 set_bit(R5_LOCKED, &dev->flags);
                                 if (!expand)
                                         clear_bit(R5_UPTODATE, &dev->flags);
-                               locked++;
+                               s->locked++;
                         }
                 }
-               if (locked + 1 == disks)
+               if (s->locked + 1 == disks)
                         if (!test_and_set_bit(STRIPE_FULL_WRITE, &sh->state))
                                 atomic_inc(&sh->raid_conf->pending_full_writes);
         } else {
                 BUG_ON(!(test_bit(R5_UPTODATE, &sh->dev[pd_idx].flags) ||
                         test_bit(R5_Wantcompute, &sh->dev[pd_idx].flags)));
  
-               set_bit(STRIPE_OP_PREXOR, &sh->ops.pending);
-               set_bit(STRIPE_OP_BIODRAIN, &sh->ops.pending);
-               set_bit(STRIPE_OP_POSTXOR, &sh->ops.pending);
-
-               sh->ops.count += 3;
+               sh->reconstruct_state = reconstruct_state_drain_run;
+               set_bit(STRIPE_OP_PREXOR, &s->ops_request);
+               set_bit(STRIPE_OP_BIODRAIN, &s->ops_request);
+               set_bit(STRIPE_OP_POSTXOR, &s->ops_request);
  
                 for (i = disks; i--; ) {
                         struct r5dev *dev = &sh->dev[i];
@@ -1745,7 +1702,7 @@ handle_write_operations5(struct stripe_head *sh, int rcw, int expand)
                                 set_bit(R5_Wantprexor, &dev->flags);
                                 set_bit(R5_LOCKED, &dev->flags);
                                 clear_bit(R5_UPTODATE, &dev->flags);
-                               locked++;
+                               s->locked++;
                         }
                 }
         }
@@ -1755,13 +1712,11 @@ handle_write_operations5(struct stripe_head *sh, int rcw, int expand)
          */
         set_bit(R5_LOCKED, &sh->dev[pd_idx].flags);
         clear_bit(R5_UPTODATE, &sh->dev[pd_idx].flags);
-       locked++;
+       s->locked++;
  
-       pr_debug("%s: stripe %llu locked: %d pending: %lx\n",
+       pr_debug("%s: stripe %llu locked: %d ops_request: %lx\n",
                 __func__, (unsigned long long)sh->sector,
-               locked, sh->ops.pending);
-
-       return locked;
+               s->locked, s->ops_request);
  }
  
  /*
@@ -1960,12 +1915,6 @@ static int __handle_issuing_new_read_requests5(struct stripe_head *sh,
         struct r5dev *dev = &sh->dev[disk_idx];
         struct r5dev *failed_dev = &sh->dev[s->failed_num];
  
-       /* don't schedule compute operations or reads on the parity block while
-        * a check is in flight
-        */
-       if (disk_idx == sh->pd_idx && sh->check_state)
-               return ~0;
-
         /* is the data in this block needed, and can we get it? */
         if (!test_bit(R5_LOCKED, &dev->flags) &&
             !test_bit(R5_UPTODATE, &dev->flags) && (dev->toread ||
@@ -1974,23 +1923,16 @@ static int __handle_issuing_new_read_requests5(struct stripe_head *sh,
              (failed_dev->toread || (failed_dev->towrite &&
              !test_bit(R5_OVERWRITE, &failed_dev->flags)
              ))))) {
-               /* 1/ We would like to get this block, possibly by computing it,
-                * but we might not be able to.
-                *
-                * 2/ Since parity check operations potentially make the parity
-                * block !uptodate it will need to be refreshed before any
-                * compute operations on data disks are scheduled.
-                *
-                * 3/ We hold off parity block re-reads until check operations
-                * have quiesced.
+               /* We would like to get this block, possibly by computing it,
+                * otherwise read it if the backing disk is insync
                  */
-               if ((s->uptodate == disks - 1) && !sh->check_state &&
+               if ((s->uptodate == disks - 1) &&
                     (s->failed && disk_idx == s->failed_num)) {
-                       set_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending);
+                       set_bit(STRIPE_COMPUTE_RUN, &sh->state);
+                       set_bit(STRIPE_OP_COMPUTE_BLK, &s->ops_request);
                         set_bit(R5_Wantcompute, &dev->flags);
                         sh->ops.target = disk_idx;
                         s->req_compute = 1;
-                       sh->ops.count++;
                         /* Careful: from this point on 'uptodate' is in the eye
                          * of raid5_run_ops which services 'compute' operations
                          * before writes. R5_Wantcompute flags a block that will
@@ -1999,12 +1941,7 @@ static int __handle_issuing_new_read_requests5(struct stripe_head *sh,
                          */
                         s->uptodate++;
                         return 0; /* uptodate + compute == disks */
-               } else if ((s->uptodate < disks - 1) &&
-                       test_bit(R5_Insync, &dev->flags)) {
-                       /* Note: we hold off compute operations while checks are
-                        * in flight, but we still prefer 'compute' over 'read'
-                        * hence we only read if (uptodate < * disks-1)
-                        */
+               } else if (test_bit(R5_Insync, &dev->flags)) {
                         set_bit(R5_LOCKED, &dev->flags);
                         set_bit(R5_Wantread, &dev->flags);
                         s->locked++;
@@ -2021,20 +1958,12 @@ static void handle_issuing_new_read_requests5(struct stripe_head *sh,
  {
         int i;
  
-       /* Clear completed compute operations */
-       if (test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete)) {
-               clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete);
-               clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.ack);
-               clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending);
-       }
-
         /* look for blocks to read/compute, skip this if a compute
          * is already in flight, or if the stripe contents are in the
          * midst of changing due to a write
          */
-       if (!test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending) &&
-               !test_bit(STRIPE_OP_PREXOR, &sh->ops.pending) &&
-               !test_bit(STRIPE_OP_POSTXOR, &sh->ops.pending)) {
+       if (!test_bit(STRIPE_COMPUTE_RUN, &sh->state) && !sh->check_state &&
+           !sh->reconstruct_state) {
                 for (i = disks; i--; )
                         if (__handle_issuing_new_read_requests5(
                                 sh, s, i, disks) == 0)
@@ -2236,11 +2165,10 @@ static void handle_issuing_new_write_requests5(raid5_conf_t *conf,
          * simultaneously.  If this is not the case then new writes need to be
          * held off until the compute completes.
          */
-       if ((s->req_compute ||
-           !test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending)) &&
-               (s->locked == 0 && (rcw == 0 || rmw == 0) &&
-               !test_bit(STRIPE_BIT_DELAY, &sh->state)))
-               s->locked += handle_write_operations5(sh, rcw == 0, 0);
+       if ((s->req_compute || !test_bit(STRIPE_COMPUTE_RUN, &sh->state)) &&
+           (s->locked == 0 && (rcw == 0 || rmw == 0) &&
+           !test_bit(STRIPE_BIT_DELAY, &sh->state)))
+               handle_write_operations5(sh, s, rcw == 0, 0);
  }
  
  static void handle_issuing_new_write_requests6(raid5_conf_t *conf,
@@ -2410,6 +2338,7 @@ static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh,
                                 set_bit(STRIPE_INSYNC, &sh->state);
                         else {
                                 sh->check_state = check_state_compute_run;
+                               set_bit(STRIPE_COMPUTE_RUN, &sh->state);
                                 set_bit(STRIPE_OP_COMPUTE_BLK, &s->ops_request);
                                 set_bit(R5_Wantcompute,
                                         &sh->dev[sh->pd_idx].flags);
@@ -2609,15 +2538,14 @@ static void handle_stripe5(struct stripe_head *sh)
         struct bio *return_bi = NULL;
         struct stripe_head_state s;
         struct r5dev *dev;
-       unsigned long pending = 0;
         mdk_rdev_t *blocked_rdev = NULL;
         int prexor;
  
         memset(&s, 0, sizeof(s));
-       pr_debug("handling stripe %llu, state=%#lx cnt=%d, pd_idx=%d "
-               "ops=%lx:%lx:%lx\n", (unsigned long long)sh->sector, sh->state,
-               atomic_read(&sh->count), sh->pd_idx,
-               sh->ops.pending, sh->ops.ack, sh->ops.complete);
+       pr_debug("handling stripe %llu, state=%#lx cnt=%d, pd_idx=%d check:%d "
+                "reconstruct:%d\n", (unsigned long long)sh->sector, sh->state,
+                atomic_read(&sh->count), sh->pd_idx, sh->check_state,
+                sh->reconstruct_state);
  
         spin_lock(&sh->lock);
         clear_bit(STRIPE_HANDLE, &sh->state);
@@ -2725,41 +2653,18 @@ static void handle_stripe5(struct stripe_head *sh)
          * or to load a block that is being partially written.
          */
         if (s.to_read || s.non_overwrite ||
-           (s.syncing && (s.uptodate + s.compute < disks)) || s.expanding ||
-           test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending))
+           (s.syncing && (s.uptodate + s.compute < disks)) || s.expanding)
                 handle_issuing_new_read_requests5(sh, &s, disks);
  
         /* Now we check to see if any write operations have recently
          * completed
          */
-
-       /* leave prexor set until postxor is done, allows us to distinguish
-        * a rmw from a rcw during biodrain
-        */
         prexor = 0;
-       if (test_bit(STRIPE_OP_PREXOR, &sh->ops.complete) &&
-               test_bit(STRIPE_OP_POSTXOR, &sh->ops.complete)) {
-
-               prexor = 1;
-               clear_bit(STRIPE_OP_PREXOR, &sh->ops.complete);
-               clear_bit(STRIPE_OP_PREXOR, &sh->ops.ack);
-               clear_bit(STRIPE_OP_PREXOR, &sh->ops.pending);
-
+       if (sh->reconstruct_state == reconstruct_state_drain_result) {
+               sh->reconstruct_state = reconstruct_state_idle;
                 for (i = disks; i--; )
-                       clear_bit(R5_Wantprexor, &sh->dev[i].flags);
-       }
-
-       /* if only POSTXOR is set then this is an 'expand' postxor */
-       if (test_bit(STRIPE_OP_BIODRAIN, &sh->ops.complete) &&
-               test_bit(STRIPE_OP_POSTXOR, &sh->ops.complete)) {
-
-               clear_bit(STRIPE_OP_BIODRAIN, &sh->ops.complete);
-               clear_bit(STRIPE_OP_BIODRAIN, &sh->ops.ack);
-               clear_bit(STRIPE_OP_BIODRAIN, &sh->ops.pending);
-
-               clear_bit(STRIPE_OP_POSTXOR, &sh->ops.complete);
-               clear_bit(STRIPE_OP_POSTXOR, &sh->ops.ack);
-               clear_bit(STRIPE_OP_POSTXOR, &sh->ops.pending);
+                       prexor += test_and_clear_bit(R5_Wantprexor,
+                                                    &sh->dev[i].flags);
  
                 /* All the 'written' buffers and the parity block are ready to
                  * be written back to disk
@@ -2792,8 +2697,7 @@ static void handle_stripe5(struct stripe_head *sh)
          * 2/ A 'check' operation is in flight, as it may clobber the parity
          *    block.
          */
-       if (s.to_write && !test_bit(STRIPE_OP_POSTXOR, &sh->ops.pending) &&
-           !sh->check_state)
+       if (s.to_write && !sh->reconstruct_state && !sh->check_state)
                 handle_issuing_new_write_requests5(conf, sh, &s, disks);
  
         /* maybe we need to check and possibly fix the parity for this stripe
@@ -2803,7 +2707,7 @@ static void handle_stripe5(struct stripe_head *sh)
          */
         if (sh->check_state ||
             (s.syncing && s.locked == 0 &&
-            !test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending) &&
+            !test_bit(STRIPE_COMPUTE_RUN, &sh->state) &&
              !test_bit(STRIPE_INSYNC, &sh->state)))
                 handle_parity_checks5(conf, sh, &s, disks);
  
@@ -2834,18 +2738,10 @@ static void handle_stripe5(struct stripe_head *sh)
                 }
         }
  
-       /* Finish postxor operations initiated by the expansion
-        * process
-        */
-       if (test_bit(STRIPE_OP_POSTXOR, &sh->ops.complete) &&
-               !test_bit(STRIPE_OP_BIODRAIN, &sh->ops.pending)) {
-
+       /* Finish reconstruct operations initiated by the expansion process */
+       if (sh->reconstruct_state == reconstruct_state_result) {
+               sh->reconstruct_state = reconstruct_state_idle;
                 clear_bit(STRIPE_EXPANDING, &sh->state);
-
-               clear_bit(STRIPE_OP_POSTXOR, &sh->ops.pending);
-               clear_bit(STRIPE_OP_POSTXOR, &sh->ops.ack);
-               clear_bit(STRIPE_OP_POSTXOR, &sh->ops.complete);
-
                 for (i = conf->raid_disks; i--; )
                         set_bit(R5_Wantwrite, &sh->dev[i].flags);
                         set_bit(R5_LOCKED, &dev->flags);
@@ -2853,15 +2749,13 @@ static void handle_stripe5(struct stripe_head *sh)
         }
  
         if (s.expanded && test_bit(STRIPE_EXPANDING, &sh->state) &&
-               !test_bit(STRIPE_OP_POSTXOR, &sh->ops.pending)) {
+           !sh->reconstruct_state) {
                 /* Need to write out all blocks after computing parity */
                 sh->disks = conf->raid_disks;
                 sh->pd_idx = stripe_to_pdidx(sh->sector, conf,
                         conf->raid_disks);
-               s.locked += handle_write_operations5(sh, 1, 1);
-       } else if (s.expanded &&
-                  s.locked == 0 &&
-               !test_bit(STRIPE_OP_POSTXOR, &sh->ops.pending)) {
+               handle_write_operations5(sh, &s, 1, 1);
+       } else if (s.expanded && !sh->reconstruct_state && s.locked == 0) {
                 clear_bit(STRIPE_EXPAND_READY, &sh->state);
                 atomic_dec(&conf->reshape_stripes);
                 wake_up(&conf->wait_for_overlap);
@@ -2869,12 +2763,9 @@ static void handle_stripe5(struct stripe_head *sh)
         }
  
         if (s.expanding && s.locked == 0 &&
-           !test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending))
+           !test_bit(STRIPE_COMPUTE_RUN, &sh->state))
                 handle_stripe_expansion(conf, sh, NULL);
  
-       if (sh->ops.count)
-               pending = get_stripe_work(sh);
-
   unlock:
         spin_unlock(&sh->lock);
  
@@ -2882,8 +2773,8 @@ static void handle_stripe5(struct stripe_head *sh)
         if (unlikely(blocked_rdev))
                 md_wait_for_blocked_rdev(blocked_rdev, conf->mddev);
  
-       if (pending || s.ops_request)
-               raid5_run_ops(sh, pending, s.ops_request);
+       if (s.ops_request)
+               raid5_run_ops(sh, s.ops_request);
  
         ops_run_io(sh, &s);
  
@@ -3089,7 +2980,7 @@ static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
         }
  
         if (s.expanding && s.locked == 0 &&
-           !test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending))
+           !test_bit(STRIPE_COMPUTE_RUN, &sh->state))
                 handle_stripe_expansion(conf, sh, &r6s);
  
   unlock: