dm raid1: support flush
authorMikulas Patocka <mpatocka@redhat.com>
Thu, 10 Dec 2009 23:51:59 +0000 (23:51 +0000)
committerAlasdair G Kergon <agk@redhat.com>
Thu, 10 Dec 2009 23:51:59 +0000 (23:51 +0000)
Flush support for dm-raid1.

When it receives an empty barrier, submit it to all the devices via dm-io.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
drivers/md/dm-raid1.c
drivers/md/dm-region-hash.c

index cc9dc79..752a29e 100644 (file)
@@ -396,6 +396,8 @@ static int mirror_available(struct mirror_set *ms, struct bio *bio)
  */
 static sector_t map_sector(struct mirror *m, struct bio *bio)
 {
+       if (unlikely(!bio->bi_size))
+               return 0;
        return m->offset + (bio->bi_sector - m->ms->ti->begin);
 }
 
@@ -562,7 +564,7 @@ static void do_write(struct mirror_set *ms, struct bio *bio)
        struct dm_io_region io[ms->nr_mirrors], *dest = io;
        struct mirror *m;
        struct dm_io_request io_req = {
-               .bi_rw = WRITE,
+               .bi_rw = WRITE | (bio->bi_rw & WRITE_BARRIER),
                .mem.type = DM_IO_BVEC,
                .mem.ptr.bvec = bio->bi_io_vec + bio->bi_idx,
                .notify.fn = write_callback,
@@ -603,6 +605,11 @@ static void do_writes(struct mirror_set *ms, struct bio_list *writes)
        bio_list_init(&requeue);
 
        while ((bio = bio_list_pop(writes))) {
+               if (unlikely(bio_empty_barrier(bio))) {
+                       bio_list_add(&sync, bio);
+                       continue;
+               }
+
                region = dm_rh_bio_to_region(ms->rh, bio);
 
                if (log->type->is_remote_recovering &&
@@ -995,6 +1002,7 @@ static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 
        ti->private = ms;
        ti->split_io = dm_rh_get_region_size(ms->rh);
+       ti->num_flush_requests = 1;
 
        ms->kmirrord_wq = create_singlethread_workqueue("kmirrord");
        if (!ms->kmirrord_wq) {
@@ -1122,7 +1130,8 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio,
         * We need to dec pending if this was a write.
         */
        if (rw == WRITE) {
-               dm_rh_dec(ms->rh, map_context->ll);
+               if (likely(!bio_empty_barrier(bio)))
+                       dm_rh_dec(ms->rh, map_context->ll);
                return error;
        }
 
index 36dbe29..00806b7 100644 (file)
@@ -79,6 +79,11 @@ struct dm_region_hash {
        struct list_head recovered_regions;
        struct list_head failed_recovered_regions;
 
+       /*
+        * If there was a barrier failure no regions can be marked clean.
+        */
+       int barrier_failure;
+
        void *context;
        sector_t target_begin;
 
@@ -211,6 +216,7 @@ struct dm_region_hash *dm_region_hash_create(
        INIT_LIST_HEAD(&rh->quiesced_regions);
        INIT_LIST_HEAD(&rh->recovered_regions);
        INIT_LIST_HEAD(&rh->failed_recovered_regions);
+       rh->barrier_failure = 0;
 
        rh->region_pool = mempool_create_kmalloc_pool(MIN_REGIONS,
                                                      sizeof(struct dm_region));
@@ -395,6 +401,11 @@ void dm_rh_mark_nosync(struct dm_region_hash *rh,
        region_t region = dm_rh_bio_to_region(rh, bio);
        int recovering = 0;
 
+       if (bio_empty_barrier(bio)) {
+               rh->barrier_failure = 1;
+               return;
+       }
+
        /* We must inform the log that the sync count has changed. */
        log->type->set_region_sync(log, region, 0);
 
@@ -515,8 +526,11 @@ void dm_rh_inc_pending(struct dm_region_hash *rh, struct bio_list *bios)
 {
        struct bio *bio;
 
-       for (bio = bios->head; bio; bio = bio->bi_next)
+       for (bio = bios->head; bio; bio = bio->bi_next) {
+               if (bio_empty_barrier(bio))
+                       continue;
                rh_inc(rh, dm_rh_bio_to_region(rh, bio));
+       }
 }
 EXPORT_SYMBOL_GPL(dm_rh_inc_pending);
 
@@ -544,7 +558,14 @@ void dm_rh_dec(struct dm_region_hash *rh, region_t region)
                 */
 
                /* do nothing for DM_RH_NOSYNC */
-               if (reg->state == DM_RH_RECOVERING) {
+               if (unlikely(rh->barrier_failure)) {
+                       /*
+                        * If a write barrier failed some time ago, we
+                        * don't know whether or not this write made it
+                        * to the disk, so we must resync the device.
+                        */
+                       reg->state = DM_RH_NOSYNC;
+               } else if (reg->state == DM_RH_RECOVERING) {
                        list_add_tail(&reg->list, &rh->quiesced_regions);
                } else if (reg->state == DM_RH_DIRTY) {
                        reg->state = DM_RH_CLEAN;