dm raid1: hold write bios when errors are handled

[safe/jmp/linux-2.6] / drivers / md / dm-table.c
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c

index 09a5711..1a6cb3c 100644 (file)
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -41,6 +41,7 @@
  struct dm_table {
         struct mapped_device *md;
         atomic_t holders;
+       unsigned type;
  
         /* btree table */
         unsigned int depth;
@@ -65,6 +66,8 @@ struct dm_table {
         /* events get handed up using this callback */
         void (*event_fn)(void *);
         void *event_context;
+
+       struct dm_md_mempools *mempools;
  };
  
  /*
@@ -258,6 +261,8 @@ void dm_table_destroy(struct dm_table *t)
         if (t->devices.next != &t->devices)
                 free_devices(&t->devices);
  
+       dm_free_md_mempools(t->mempools);
+
         kfree(t);
  }
  
@@ -338,10 +343,10 @@ static void close_dev(struct dm_dev_internal *d, struct mapped_device *md)
  }
  
  /*
- * If possible, this checks an area of a destination device is valid.
+ * If possible, this checks an area of a destination device is invalid.
   */
-static int device_area_is_valid(struct dm_target *ti, struct dm_dev *dev,
-                               sector_t start, void *data)
+static int device_area_is_invalid(struct dm_target *ti, struct dm_dev *dev,
+                                 sector_t start, sector_t len, void *data)
  {
         struct queue_limits *limits = data;
         struct block_device *bdev = dev->bdev;
@@ -352,36 +357,40 @@ static int device_area_is_valid(struct dm_target *ti, struct dm_dev *dev,
         char b[BDEVNAME_SIZE];
  
         if (!dev_size)
-               return 1;
-
-       if ((start >= dev_size) || (start + ti->len > dev_size)) {
-               DMWARN("%s: %s too small for target",
-                      dm_device_name(ti->table->md), bdevname(bdev, b));
                 return 0;
+
+       if ((start >= dev_size) || (start + len > dev_size)) {
+               DMWARN("%s: %s too small for target: "
+                      "start=%llu, len=%llu, dev_size=%llu",
+                      dm_device_name(ti->table->md), bdevname(bdev, b),
+                      (unsigned long long)start,
+                      (unsigned long long)len,
+                      (unsigned long long)dev_size);
+               return 1;
         }
  
         if (logical_block_size_sectors <= 1)
-               return 1;
+               return 0;
  
         if (start & (logical_block_size_sectors - 1)) {
                 DMWARN("%s: start=%llu not aligned to h/w "
-                      "logical block size %hu of %s",
+                      "logical block size %u of %s",
                        dm_device_name(ti->table->md),
                        (unsigned long long)start,
                        limits->logical_block_size, bdevname(bdev, b));
-               return 0;
+               return 1;
         }
  
-       if (ti->len & (logical_block_size_sectors - 1)) {
+       if (len & (logical_block_size_sectors - 1)) {
                 DMWARN("%s: len=%llu not aligned to h/w "
-                      "logical block size %hu of %s",
+                      "logical block size %u of %s",
                        dm_device_name(ti->table->md),
-                      (unsigned long long)ti->len,
+                      (unsigned long long)len,
                        limits->logical_block_size, bdevname(bdev, b));
-               return 0;
+               return 1;
         }
  
-       return 1;
+       return 0;
  }
  
  /*
@@ -477,7 +486,7 @@ static int __table_get_device(struct dm_table *t, struct dm_target *ti,
  #define min_not_zero(l, r) (l == 0) ? r : ((r == 0) ? l : min(l, r))
  
  int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev,
-                        sector_t start, void *data)
+                        sector_t start, sector_t len, void *data)
  {
         struct queue_limits *limits = data;
         struct block_device *bdev = dev->bdev;
@@ -490,9 +499,16 @@ int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev,
                 return 0;
         }
  
-       if (blk_stack_limits(limits, &q->limits, start) < 0)
-               DMWARN("%s: target device %s is misaligned",
-                      dm_device_name(ti->table->md), bdevname(bdev, b));
+       if (blk_stack_limits(limits, &q->limits, start << 9) < 0)
+               DMWARN("%s: target device %s is misaligned: "
+                      "physical_block_size=%u, logical_block_size=%u, "
+                      "alignment_offset=%u, start=%llu",
+                      dm_device_name(ti->table->md), bdevname(bdev, b),
+                      q->limits.physical_block_size,
+                      q->limits.logical_block_size,
+                      q->limits.alignment_offset,
+                      (unsigned long long) start << 9);
+
  
         /*
          * Check if merge fn is supported.
@@ -693,7 +709,7 @@ static int validate_hardware_logical_block_alignment(struct dm_table *table,
  
         if (remaining) {
                 DMWARN("%s: table line %u (start sect %llu len %llu) "
-                      "not aligned to h/w logical block size %hu",
+                      "not aligned to h/w logical block size %u",
                        dm_device_name(table->md), i,
                        (unsigned long long) ti->begin,
                        (unsigned long long) ti->len,
@@ -764,6 +780,99 @@ int dm_table_add_target(struct dm_table *t, const char *type,
         return r;
  }
  
+int dm_table_set_type(struct dm_table *t)
+{
+       unsigned i;
+       unsigned bio_based = 0, request_based = 0;
+       struct dm_target *tgt;
+       struct dm_dev_internal *dd;
+       struct list_head *devices;
+
+       for (i = 0; i < t->num_targets; i++) {
+               tgt = t->targets + i;
+               if (dm_target_request_based(tgt))
+                       request_based = 1;
+               else
+                       bio_based = 1;
+
+               if (bio_based && request_based) {
+                       DMWARN("Inconsistent table: different target types"
+                              " can't be mixed up");
+                       return -EINVAL;
+               }
+       }
+
+       if (bio_based) {
+               /* We must use this table as bio-based */
+               t->type = DM_TYPE_BIO_BASED;
+               return 0;
+       }
+
+       BUG_ON(!request_based); /* No targets in this table */
+
+       /* Non-request-stackable devices can't be used for request-based dm */
+       devices = dm_table_get_devices(t);
+       list_for_each_entry(dd, devices, list) {
+               if (!blk_queue_stackable(bdev_get_queue(dd->dm_dev.bdev))) {
+                       DMWARN("table load rejected: including"
+                              " non-request-stackable devices");
+                       return -EINVAL;
+               }
+       }
+
+       /*
+        * Request-based dm supports only tables that have a single target now.
+        * To support multiple targets, request splitting support is needed,
+        * and that needs lots of changes in the block-layer.
+        * (e.g. request completion process for partial completion.)
+        */
+       if (t->num_targets > 1) {
+               DMWARN("Request-based dm doesn't support multiple targets yet");
+               return -EINVAL;
+       }
+
+       t->type = DM_TYPE_REQUEST_BASED;
+
+       return 0;
+}
+
+unsigned dm_table_get_type(struct dm_table *t)
+{
+       return t->type;
+}
+
+bool dm_table_request_based(struct dm_table *t)
+{
+       return dm_table_get_type(t) == DM_TYPE_REQUEST_BASED;
+}
+
+int dm_table_alloc_md_mempools(struct dm_table *t)
+{
+       unsigned type = dm_table_get_type(t);
+
+       if (unlikely(type == DM_TYPE_NONE)) {
+               DMWARN("no table type is set, can't allocate mempools");
+               return -EINVAL;
+       }
+
+       t->mempools = dm_alloc_md_mempools(type);
+       if (!t->mempools)
+               return -ENOMEM;
+
+       return 0;
+}
+
+void dm_table_free_md_mempools(struct dm_table *t)
+{
+       dm_free_md_mempools(t->mempools);
+       t->mempools = NULL;
+}
+
+struct dm_md_mempools *dm_table_get_md_mempools(struct dm_table *t)
+{
+       return t->mempools;
+}
+
  static int setup_indexes(struct dm_table *t)
  {
         int i;
@@ -898,12 +1007,16 @@ int dm_calculate_queue_limits(struct dm_table *table,
                 ti->type->iterate_devices(ti, dm_set_device_limits,
                                           &ti_limits);
  
+               /* Set I/O hints portion of queue limits */
+               if (ti->type->io_hints)
+                       ti->type->io_hints(ti, &ti_limits);
+
                 /*
                  * Check each device area is consistent with the target's
                  * overall queue limits.
                  */
-               if (!ti->type->iterate_devices(ti, device_area_is_valid,
-                                              &ti_limits))
+               if (ti->type->iterate_devices(ti, device_area_is_invalid,
+                                             &ti_limits))
                         return -EINVAL;
  
  combine_limits:
@@ -985,6 +1098,19 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
                 queue_flag_set_unlocked(QUEUE_FLAG_CLUSTER, q);
  
         dm_table_set_integrity(t);
+
+       /*
+        * QUEUE_FLAG_STACKABLE must be set after all queue settings are
+        * visible to other CPUs because, once the flag is set, incoming bios
+        * are processed by request-based dm, which refers to the queue
+        * settings.
+        * Until the flag set, bios are passed to bio-based dm and queued to
+        * md->deferred where queue settings are not needed yet.
+        * Those bios are passed to request-based dm at the resume time.
+        */
+       smp_mb();
+       if (dm_table_request_based(t))
+               queue_flag_set_unlocked(QUEUE_FLAG_STACKABLE, q);
  }
  
  unsigned int dm_table_get_num_targets(struct dm_table *t)
@@ -1080,6 +1206,20 @@ int dm_table_any_congested(struct dm_table *t, int bdi_bits)
         return r;
  }
  
+int dm_table_any_busy_target(struct dm_table *t)
+{
+       unsigned i;
+       struct dm_target *ti;
+
+       for (i = 0; i < t->num_targets; i++) {
+               ti = t->targets + i;
+               if (ti->type->busy && ti->type->busy(ti))
+                       return 1;
+       }
+
+       return 0;
+}
+
  void dm_table_unplug_all(struct dm_table *t)
  {
         struct dm_dev_internal *dd;