xfs: reclaim inodes under a write lock

[safe/jmp/linux-2.6] / block / blk-settings.c
diff --git a/block/blk-settings.c b/block/blk-settings.c

index 81bc0d1..d52d4ad 100644 (file)
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -7,6 +7,8 @@
  #include <linux/bio.h>
  #include <linux/blkdev.h>
  #include <linux/bootmem.h>     /* for max_pfn/max_low_pfn */
+#include <linux/gcd.h>
+#include <linux/jiffies.h>
  
  #include "blk.h"
  
@@ -33,23 +35,6 @@ void blk_queue_prep_rq(struct request_queue *q, prep_rq_fn *pfn)
  EXPORT_SYMBOL(blk_queue_prep_rq);
  
  /**
- * blk_queue_set_discard - set a discard_sectors function for queue
- * @q:         queue
- * @dfn:       prepare_discard function
- *
- * It's possible for a queue to register a discard callback which is used
- * to transform a discard request into the appropriate type for the
- * hardware. If none is registered, then discard requests are failed
- * with %EOPNOTSUPP.
- *
- */
-void blk_queue_set_discard(struct request_queue *q, prepare_discard_fn *dfn)
-{
-       q->prepare_discard_fn = dfn;
-}
-EXPORT_SYMBOL(blk_queue_set_discard);
-
-/**
   * blk_queue_merge_bvec - set a merge_bvec function for queue
   * @q:         queue
   * @mbfn:      merge_bvec_fn
@@ -97,7 +82,7 @@ EXPORT_SYMBOL_GPL(blk_queue_lld_busy);
  
  /**
   * blk_set_default_limits - reset limits to default values
- * @limits:  the queue_limits structure to reset
+ * @lim:  the queue_limits structure to reset
   *
   * Description:
   *   Returns a queue_limit struct to its default state.  Can be used by
@@ -110,7 +95,13 @@ void blk_set_default_limits(struct queue_limits *lim)
         lim->max_hw_segments = MAX_HW_SEGMENTS;
         lim->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK;
         lim->max_segment_size = MAX_SEGMENT_SIZE;
-       lim->max_sectors = lim->max_hw_sectors = SAFE_MAX_SECTORS;
+       lim->max_sectors = BLK_DEF_MAX_SECTORS;
+       lim->max_hw_sectors = INT_MAX;
+       lim->max_discard_sectors = 0;
+       lim->discard_granularity = 0;
+       lim->discard_alignment = 0;
+       lim->discard_misaligned = 0;
+       lim->discard_zeroes_data = -1;
         lim->logical_block_size = lim->physical_block_size = lim->io_min = 512;
         lim->bounce_pfn = (unsigned long)(BLK_BOUNCE_ANY >> PAGE_SHIFT);
         lim->alignment_offset = 0;
@@ -155,7 +146,7 @@ void blk_queue_make_request(struct request_queue *q, make_request_fn *mfn)
         q->nr_batching = BLK_BATCH_REQ;
  
         q->unplug_thresh = 4;           /* hmm */
-       q->unplug_delay = (3 * HZ) / 1000;      /* 3 milliseconds */
+       q->unplug_delay = msecs_to_jiffies(3);  /* 3 milliseconds */
         if (q->unplug_delay == 0)
                 q->unplug_delay = 1;
  
@@ -163,6 +154,14 @@ void blk_queue_make_request(struct request_queue *q, make_request_fn *mfn)
         q->unplug_timer.data = (unsigned long)q;
  
         blk_set_default_limits(&q->limits);
+       blk_queue_max_sectors(q, SAFE_MAX_SECTORS);
+
+       /*
+        * If the caller didn't supply a lock, fall back to our embedded
+        * per-queue locks
+        */
+       if (!q->queue_lock)
+               q->queue_lock = &q->__queue_lock;
  
         /*
          * by default assume old behaviour and bounce for any highmem page
@@ -246,6 +245,18 @@ void blk_queue_max_hw_sectors(struct request_queue *q, unsigned int max_sectors)
  EXPORT_SYMBOL(blk_queue_max_hw_sectors);
  
  /**
+ * blk_queue_max_discard_sectors - set max sectors for a single discard
+ * @q:  the request queue for the device
+ * @max_discard_sectors: maximum number of sectors to discard
+ **/
+void blk_queue_max_discard_sectors(struct request_queue *q,
+               unsigned int max_discard_sectors)
+{
+       q->limits.max_discard_sectors = max_discard_sectors;
+}
+EXPORT_SYMBOL(blk_queue_max_discard_sectors);
+
+/**
   * blk_queue_max_phys_segments - set max phys segments for a request for this queue
   * @q:  the request queue for the device
   * @max_segments:  max number of segments
@@ -377,8 +388,8 @@ void blk_queue_alignment_offset(struct request_queue *q, unsigned int offset)
  EXPORT_SYMBOL(blk_queue_alignment_offset);
  
  /**
- * blk_queue_io_min - set minimum request size for the queue
- * @q: the request queue for the device
+ * blk_limits_io_min - set minimum request size for a device
+ * @limits: the queue limits
   * @min:  smallest I/O size in bytes
   *
   * Description:
@@ -387,30 +398,73 @@ EXPORT_SYMBOL(blk_queue_alignment_offset);
   *   smallest I/O the device can perform without incurring a performance
   *   penalty.
   */
-void blk_queue_io_min(struct request_queue *q, unsigned int min)
+void blk_limits_io_min(struct queue_limits *limits, unsigned int min)
  {
-       q->limits.io_min = min;
+       limits->io_min = min;
  
-       if (q->limits.io_min < q->limits.logical_block_size)
-               q->limits.io_min = q->limits.logical_block_size;
+       if (limits->io_min < limits->logical_block_size)
+               limits->io_min = limits->logical_block_size;
  
-       if (q->limits.io_min < q->limits.physical_block_size)
-               q->limits.io_min = q->limits.physical_block_size;
+       if (limits->io_min < limits->physical_block_size)
+               limits->io_min = limits->physical_block_size;
+}
+EXPORT_SYMBOL(blk_limits_io_min);
+
+/**
+ * blk_queue_io_min - set minimum request size for the queue
+ * @q: the request queue for the device
+ * @min:  smallest I/O size in bytes
+ *
+ * Description:
+ *   Storage devices may report a granularity or preferred minimum I/O
+ *   size which is the smallest request the device can perform without
+ *   incurring a performance penalty.  For disk drives this is often the
+ *   physical block size.  For RAID arrays it is often the stripe chunk
+ *   size.  A properly aligned multiple of minimum_io_size is the
+ *   preferred request size for workloads where a high number of I/O
+ *   operations is desired.
+ */
+void blk_queue_io_min(struct request_queue *q, unsigned int min)
+{
+       blk_limits_io_min(&q->limits, min);
  }
  EXPORT_SYMBOL(blk_queue_io_min);
  
  /**
+ * blk_limits_io_opt - set optimal request size for a device
+ * @limits: the queue limits
+ * @opt:  smallest I/O size in bytes
+ *
+ * Description:
+ *   Storage devices may report an optimal I/O size, which is the
+ *   device's preferred unit for sustained I/O.  This is rarely reported
+ *   for disk drives.  For RAID arrays it is usually the stripe width or
+ *   the internal track size.  A properly aligned multiple of
+ *   optimal_io_size is the preferred request size for workloads where
+ *   sustained throughput is desired.
+ */
+void blk_limits_io_opt(struct queue_limits *limits, unsigned int opt)
+{
+       limits->io_opt = opt;
+}
+EXPORT_SYMBOL(blk_limits_io_opt);
+
+/**
   * blk_queue_io_opt - set optimal request size for the queue
   * @q: the request queue for the device
   * @opt:  optimal request size in bytes
   *
   * Description:
- *   Drivers can call this function to set the preferred I/O request
- *   size for devices that report such a value.
+ *   Storage devices may report an optimal I/O size, which is the
+ *   device's preferred unit for sustained I/O.  This is rarely reported
+ *   for disk drives.  For RAID arrays it is usually the stripe width or
+ *   the internal track size.  A properly aligned multiple of
+ *   optimal_io_size is the preferred request size for workloads where
+ *   sustained throughput is desired.
   */
  void blk_queue_io_opt(struct request_queue *q, unsigned int opt)
  {
-       q->limits.io_opt = opt;
+       blk_limits_io_opt(&q->limits, opt);
  }
  EXPORT_SYMBOL(blk_queue_io_opt);
  
@@ -426,27 +480,7 @@ EXPORT_SYMBOL(blk_queue_io_opt);
   **/
  void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b)
  {
-       /* zero is "infinity" */
-       t->limits.max_sectors = min_not_zero(queue_max_sectors(t),
-                                            queue_max_sectors(b));
-
-       t->limits.max_hw_sectors = min_not_zero(queue_max_hw_sectors(t),
-                                               queue_max_hw_sectors(b));
-
-       t->limits.seg_boundary_mask = min_not_zero(queue_segment_boundary(t),
-                                                  queue_segment_boundary(b));
-
-       t->limits.max_phys_segments = min_not_zero(queue_max_phys_segments(t),
-                                                  queue_max_phys_segments(b));
-
-       t->limits.max_hw_segments = min_not_zero(queue_max_hw_segments(t),
-                                                queue_max_hw_segments(b));
-
-       t->limits.max_segment_size = min_not_zero(queue_max_segment_size(t),
-                                                 queue_max_segment_size(b));
-
-       t->limits.logical_block_size = max(queue_logical_block_size(t),
-                                          queue_logical_block_size(b));
+       blk_stack_limits(&t->limits, &b->limits, 0);
  
         if (!t->queue_lock)
                 WARN_ON_ONCE(1);
@@ -459,20 +493,43 @@ void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b)
  }
  EXPORT_SYMBOL(blk_queue_stack_limits);
  
+static unsigned int lcm(unsigned int a, unsigned int b)
+{
+       if (a && b)
+               return (a * b) / gcd(a, b);
+       else if (b)
+               return b;
+
+       return a;
+}
+
  /**
   * blk_stack_limits - adjust queue_limits for stacked devices
- * @t: the stacking driver limits (top)
- * @b:  the underlying queue limits (bottom)
+ * @t: the stacking driver limits (top device)
+ * @b:  the underlying queue limits (bottom, component device)
   * @offset:  offset to beginning of data within component device
   *
   * Description:
- *    Merges two queue_limit structs.  Returns 0 if alignment didn't
- *    change.  Returns -1 if adding the bottom device caused
- *    misalignment.
+ *    This function is used by stacking drivers like MD and DM to ensure
+ *    that all component devices have compatible block sizes and
+ *    alignments.  The stacking driver must provide a queue_limits
+ *    struct (top) and then iteratively call the stacking function for
+ *    all component (bottom) devices.  The stacking function will
+ *    attempt to combine the values and ensure proper alignment.
+ *
+ *    Returns 0 if the top and bottom queue_limits are compatible.  The
+ *    top device's block sizes and alignment offsets may be adjusted to
+ *    ensure alignment with the bottom device. If no compatible sizes
+ *    and alignments exist, -1 is returned and the resulting top
+ *    queue_limits will have the misaligned flag set to indicate that
+ *    the alignment_offset is undefined.
   */
  int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
                      sector_t offset)
  {
+       sector_t alignment;
+       unsigned int top, bottom;
+
         t->max_sectors = min_not_zero(t->max_sectors, b->max_sectors);
         t->max_hw_sectors = min_not_zero(t->max_hw_sectors, b->max_hw_sectors);
         t->bounce_pfn = min_not_zero(t->bounce_pfn, b->bounce_pfn);
@@ -489,6 +546,22 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
         t->max_segment_size = min_not_zero(t->max_segment_size,
                                            b->max_segment_size);
  
+       alignment = queue_limit_alignment_offset(b, offset);
+
+       /* Bottom device has different alignment.  Check that it is
+        * compatible with the current top alignment.
+        */
+       if (t->alignment_offset != alignment) {
+
+               top = max(t->physical_block_size, t->io_min)
+                       + t->alignment_offset;
+               bottom = max(b->physical_block_size, b->io_min) + alignment;
+
+               /* Verify that top and bottom intervals line up */
+               if (max(top, bottom) & (min(top, bottom) - 1))
+                       t->misaligned = 1;
+       }
+
         t->logical_block_size = max(t->logical_block_size,
                                     b->logical_block_size);
  
@@ -496,27 +569,64 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
                                      b->physical_block_size);
  
         t->io_min = max(t->io_min, b->io_min);
+       t->io_opt = lcm(t->io_opt, b->io_opt);
+
         t->no_cluster |= b->no_cluster;
+       t->discard_zeroes_data &= b->discard_zeroes_data;
  
-       /* Bottom device offset aligned? */
-       if (offset &&
-           (offset & (b->physical_block_size - 1)) != b->alignment_offset) {
+       /* Physical block size a multiple of the logical block size? */
+       if (t->physical_block_size & (t->logical_block_size - 1)) {
+               t->physical_block_size = t->logical_block_size;
                 t->misaligned = 1;
-               return -1;
         }
  
-       /* If top has no alignment offset, inherit from bottom */
-       if (!t->alignment_offset)
-               t->alignment_offset =
-                       b->alignment_offset & (b->physical_block_size - 1);
+       /* Minimum I/O a multiple of the physical block size? */
+       if (t->io_min & (t->physical_block_size - 1)) {
+               t->io_min = t->physical_block_size;
+               t->misaligned = 1;
+       }
  
-       /* Top device aligned on logical block boundary? */
-       if (t->alignment_offset & (t->logical_block_size - 1)) {
+       /* Optimal I/O a multiple of the physical block size? */
+       if (t->io_opt & (t->physical_block_size - 1)) {
+               t->io_opt = 0;
                 t->misaligned = 1;
-               return -1;
         }
  
-       return 0;
+       /* Find lowest common alignment_offset */
+       t->alignment_offset = lcm(t->alignment_offset, alignment)
+               & (max(t->physical_block_size, t->io_min) - 1);
+
+       /* Verify that new alignment_offset is on a logical block boundary */
+       if (t->alignment_offset & (t->logical_block_size - 1))
+               t->misaligned = 1;
+
+       /* Discard alignment and granularity */
+       if (b->discard_granularity) {
+               unsigned int granularity = b->discard_granularity;
+               offset &= granularity - 1;
+
+               alignment = (granularity + b->discard_alignment - offset)
+                       & (granularity - 1);
+
+               if (t->discard_granularity != 0 &&
+                   t->discard_alignment != alignment) {
+                       top = t->discard_granularity + t->discard_alignment;
+                       bottom = b->discard_granularity + alignment;
+
+                       /* Verify that top and bottom intervals line up */
+                       if (max(top, bottom) & (min(top, bottom) - 1))
+                               t->discard_misaligned = 1;
+               }
+
+               t->max_discard_sectors = min_not_zero(t->max_discard_sectors,
+                                                     b->max_discard_sectors);
+               t->discard_granularity = max(t->discard_granularity,
+                                            b->discard_granularity);
+               t->discard_alignment = lcm(t->discard_alignment, alignment) &
+                       (t->discard_granularity - 1);
+       }
+
+       return t->misaligned ? -1 : 0;
  }
  EXPORT_SYMBOL(blk_stack_limits);