dm snapshot: rework writing to origin
[safe/jmp/linux-2.6] / drivers / md / dm-snap.c
index 446827f..c01e0da 100644 (file)
@@ -142,28 +142,6 @@ struct dm_snap_pending_exception {
        struct bio_list origin_bios;
        struct bio_list snapshot_bios;
 
-       /*
-        * Short-term queue of pending exceptions prior to submission.
-        */
-       struct list_head list;
-
-       /*
-        * The primary pending_exception is the one that holds
-        * the ref_count and the list of origin_bios for a
-        * group of pending_exceptions.  It is always last to get freed.
-        * These fields get set up when writing to the origin.
-        */
-       struct dm_snap_pending_exception *primary_pe;
-
-       /*
-        * Number of pending_exceptions processing this chunk.
-        * When this drops to zero we must complete the origin bios.
-        * If incrementing or decrementing this, hold pe->snap->lock for
-        * the sibling concerned and not pe->primary_pe->snap->lock unless
-        * they are the same.
-        */
-       atomic_t ref_count;
-
        /* Pointer back to snapshot context */
        struct dm_snapshot *snap;
 
@@ -1019,6 +997,26 @@ static void flush_queued_bios(struct work_struct *work)
        flush_bios(queued_bios);
 }
 
+static int do_origin(struct dm_dev *origin, struct bio *bio);
+
+/*
+ * Flush a list of buffers.
+ */
+static void retry_origin_bios(struct dm_snapshot *s, struct bio *bio)
+{
+       struct bio *n;
+       int r;
+
+       while (bio) {
+               n = bio->bi_next;
+               bio->bi_next = NULL;
+               r = do_origin(s->origin, bio);
+               if (r == DM_MAPIO_REMAPPED)
+                       generic_make_request(bio);
+               bio = n;
+       }
+}
+
 /*
  * Error a list of buffers.
  */
@@ -1052,39 +1050,6 @@ static void __invalidate_snapshot(struct dm_snapshot *s, int err)
        dm_table_event(s->ti->table);
 }
 
-static void get_pending_exception(struct dm_snap_pending_exception *pe)
-{
-       atomic_inc(&pe->ref_count);
-}
-
-static struct bio *put_pending_exception(struct dm_snap_pending_exception *pe)
-{
-       struct dm_snap_pending_exception *primary_pe;
-       struct bio *origin_bios = NULL;
-
-       primary_pe = pe->primary_pe;
-
-       /*
-        * If this pe is involved in a write to the origin and
-        * it is the last sibling to complete then release
-        * the bios for the original write to the origin.
-        */
-       if (primary_pe &&
-           atomic_dec_and_test(&primary_pe->ref_count)) {
-               origin_bios = bio_list_get(&primary_pe->origin_bios);
-               free_pending_exception(primary_pe);
-       }
-
-       /*
-        * Free the pe if it's not linked to an origin write or if
-        * it's not itself a primary pe.
-        */
-       if (!primary_pe || primary_pe != pe)
-               free_pending_exception(pe);
-
-       return origin_bios;
-}
-
 static void pending_complete(struct dm_snap_pending_exception *pe, int success)
 {
        struct dm_exception *e;
@@ -1129,7 +1094,8 @@ static void pending_complete(struct dm_snap_pending_exception *pe, int success)
  out:
        dm_remove_exception(&pe->e);
        snapshot_bios = bio_list_get(&pe->snapshot_bios);
-       origin_bios = put_pending_exception(pe);
+       origin_bios = bio_list_get(&pe->origin_bios);
+       free_pending_exception(pe);
 
        up_write(&s->lock);
 
@@ -1139,7 +1105,7 @@ static void pending_complete(struct dm_snap_pending_exception *pe, int success)
        else
                flush_bios(snapshot_bios);
 
-       flush_bios(origin_bios);
+       retry_origin_bios(s, origin_bios);
 }
 
 static void commit_callback(void *context, int success)
@@ -1226,8 +1192,6 @@ __find_pending_exception(struct dm_snapshot *s,
        pe->e.old_chunk = chunk;
        bio_list_init(&pe->origin_bios);
        bio_list_init(&pe->snapshot_bios);
-       pe->primary_pe = NULL;
-       atomic_set(&pe->ref_count, 0);
        pe->started = 0;
 
        if (s->store->type->prepare_exception(s->store, &pe->e)) {
@@ -1235,7 +1199,6 @@ __find_pending_exception(struct dm_snapshot *s,
                return NULL;
        }
 
-       get_pending_exception(pe);
        dm_insert_exception(&s->pending, &pe->e);
 
        return pe;
@@ -1492,16 +1455,16 @@ static int snapshot_iterate_devices(struct dm_target *ti,
 static int __origin_write(struct list_head *snapshots, sector_t sector,
                          struct bio *bio)
 {
-       int r = DM_MAPIO_REMAPPED, first = 0;
+       int r = DM_MAPIO_REMAPPED;
        struct dm_snapshot *snap;
        struct dm_exception *e;
-       struct dm_snap_pending_exception *pe, *next_pe, *primary_pe = NULL;
+       struct dm_snap_pending_exception *pe;
+       struct dm_snap_pending_exception *pe_to_start_now = NULL;
+       struct dm_snap_pending_exception *pe_to_start_last = NULL;
        chunk_t chunk;
-       LIST_HEAD(pe_queue);
 
        /* Do all the snapshots on this origin */
        list_for_each_entry (snap, snapshots, list) {
-
                down_write(&snap->lock);
 
                /* Only deal with valid and active snapshots */
@@ -1522,9 +1485,6 @@ static int __origin_write(struct list_head *snapshots, sector_t sector,
                 * Check exception table to see if block
                 * is already remapped in this snapshot
                 * and trigger an exception if not.
-                *
-                * ref_count is initialised to 1 so pending_complete()
-                * won't destroy the primary_pe while we're inside this loop.
                 */
                e = dm_lookup_exception(&snap->complete, chunk);
                if (e)
@@ -1554,60 +1514,43 @@ static int __origin_write(struct list_head *snapshots, sector_t sector,
                        }
                }
 
-               if (!primary_pe) {
-                       /*
-                        * Either every pe here has same
-                        * primary_pe or none has one yet.
-                        */
-                       if (pe->primary_pe)
-                               primary_pe = pe->primary_pe;
-                       else {
-                               primary_pe = pe;
-                               first = 1;
-                       }
-
-                       if (bio)
-                               bio_list_add(&primary_pe->origin_bios, bio);
+               r = DM_MAPIO_SUBMITTED;
 
-                       r = DM_MAPIO_SUBMITTED;
-               }
+               /*
+                * If an origin bio was supplied, queue it to wait for the
+                * completion of this exception, and start this one last,
+                * at the end of the function.
+                */
+               if (bio) {
+                       bio_list_add(&pe->origin_bios, bio);
+                       bio = NULL;
 
-               if (!pe->primary_pe) {
-                       pe->primary_pe = primary_pe;
-                       get_pending_exception(primary_pe);
+                       if (!pe->started) {
+                               pe->started = 1;
+                               pe_to_start_last = pe;
+                       }
                }
 
                if (!pe->started) {
                        pe->started = 1;
-                       list_add_tail(&pe->list, &pe_queue);
+                       pe_to_start_now = pe;
                }
 
  next_snapshot:
                up_write(&snap->lock);
-       }
 
-       if (!primary_pe)
-               return r;
-
-       /*
-        * If this is the first time we're processing this chunk and
-        * ref_count is now 1 it means all the pending exceptions
-        * got completed while we were in the loop above, so it falls to
-        * us here to remove the primary_pe and submit any origin_bios.
-        */
-
-       if (first && atomic_dec_and_test(&primary_pe->ref_count)) {
-               flush_bios(bio_list_get(&primary_pe->origin_bios));
-               free_pending_exception(primary_pe);
-               /* If we got here, pe_queue is necessarily empty. */
-               return r;
+               if (pe_to_start_now) {
+                       start_copy(pe_to_start_now);
+                       pe_to_start_now = NULL;
+               }
        }
 
        /*
-        * Now that we have a complete pe list we can start the copying.
+        * Submit the exception against which the bio is queued last,
+        * to give the other exceptions a head start.
         */
-       list_for_each_entry_safe(pe, next_pe, &pe_queue, list)
-               start_copy(pe);
+       if (pe_to_start_last)
+               start_copy(pe_to_start_last);
 
        return r;
 }