#include <linux/bio.h>
#include <linux/blkdev.h>
#include <linux/bootmem.h> /* for max_pfn/max_low_pfn */
+#include <linux/gcd.h>
+#include <linux/jiffies.h>
#include "blk.h"
EXPORT_SYMBOL(blk_queue_prep_rq);
/**
- * blk_queue_set_discard - set a discard_sectors function for queue
- * @q: queue
- * @dfn: prepare_discard function
- *
- * It's possible for a queue to register a discard callback which is used
- * to transform a discard request into the appropriate type for the
- * hardware. If none is registered, then discard requests are failed
- * with %EOPNOTSUPP.
- *
- */
-void blk_queue_set_discard(struct request_queue *q, prepare_discard_fn *dfn)
-{
- q->prepare_discard_fn = dfn;
-}
-EXPORT_SYMBOL(blk_queue_set_discard);
-
-/**
* blk_queue_merge_bvec - set a merge_bvec function for queue
* @q: queue
* @mbfn: merge_bvec_fn
/**
* blk_set_default_limits - reset limits to default values
- * @limits: the queue_limits structure to reset
+ * @lim: the queue_limits structure to reset
*
* Description:
* Returns a queue_limit struct to its default state. Can be used by
lim->max_hw_segments = MAX_HW_SEGMENTS;
lim->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK;
lim->max_segment_size = MAX_SEGMENT_SIZE;
- lim->max_sectors = lim->max_hw_sectors = SAFE_MAX_SECTORS;
+ lim->max_sectors = BLK_DEF_MAX_SECTORS;
+ lim->max_hw_sectors = INT_MAX;
+ lim->max_discard_sectors = 0;
+ lim->discard_granularity = 0;
+ lim->discard_alignment = 0;
+ lim->discard_misaligned = 0;
+ lim->discard_zeroes_data = -1;
lim->logical_block_size = lim->physical_block_size = lim->io_min = 512;
lim->bounce_pfn = (unsigned long)(BLK_BOUNCE_ANY >> PAGE_SHIFT);
lim->alignment_offset = 0;
q->nr_batching = BLK_BATCH_REQ;
q->unplug_thresh = 4; /* hmm */
- q->unplug_delay = (3 * HZ) / 1000; /* 3 milliseconds */
+ q->unplug_delay = msecs_to_jiffies(3); /* 3 milliseconds */
if (q->unplug_delay == 0)
q->unplug_delay = 1;
q->unplug_timer.data = (unsigned long)q;
blk_set_default_limits(&q->limits);
+ blk_queue_max_sectors(q, SAFE_MAX_SECTORS);
+
+ /*
+ * If the caller didn't supply a lock, fall back to our embedded
+ * per-queue locks
+ */
+ if (!q->queue_lock)
+ q->queue_lock = &q->__queue_lock;
/*
* by default assume old behaviour and bounce for any highmem page
EXPORT_SYMBOL(blk_queue_max_hw_sectors);
/**
+ * blk_queue_max_discard_sectors - set max sectors for a single discard
+ * @q: the request queue for the device
+ * @max_discard_sectors: maximum number of sectors to discard
+ **/
+void blk_queue_max_discard_sectors(struct request_queue *q,
+ unsigned int max_discard_sectors)
+{
+ q->limits.max_discard_sectors = max_discard_sectors;
+}
+EXPORT_SYMBOL(blk_queue_max_discard_sectors);
+
+/**
* blk_queue_max_phys_segments - set max phys segments for a request for this queue
* @q: the request queue for the device
* @max_segments: max number of segments
EXPORT_SYMBOL(blk_queue_alignment_offset);
/**
- * blk_queue_io_min - set minimum request size for the queue
- * @q: the request queue for the device
+ * blk_limits_io_min - set minimum request size for a device
+ * @limits: the queue limits
* @min: smallest I/O size in bytes
*
* Description:
* smallest I/O the device can perform without incurring a performance
* penalty.
*/
-void blk_queue_io_min(struct request_queue *q, unsigned int min)
+void blk_limits_io_min(struct queue_limits *limits, unsigned int min)
{
- q->limits.io_min = min;
+ limits->io_min = min;
- if (q->limits.io_min < q->limits.logical_block_size)
- q->limits.io_min = q->limits.logical_block_size;
+ if (limits->io_min < limits->logical_block_size)
+ limits->io_min = limits->logical_block_size;
- if (q->limits.io_min < q->limits.physical_block_size)
- q->limits.io_min = q->limits.physical_block_size;
+ if (limits->io_min < limits->physical_block_size)
+ limits->io_min = limits->physical_block_size;
+}
+EXPORT_SYMBOL(blk_limits_io_min);
+
+/**
+ * blk_queue_io_min - set minimum request size for the queue
+ * @q: the request queue for the device
+ * @min: smallest I/O size in bytes
+ *
+ * Description:
+ * Storage devices may report a granularity or preferred minimum I/O
+ * size which is the smallest request the device can perform without
+ * incurring a performance penalty. For disk drives this is often the
+ * physical block size. For RAID arrays it is often the stripe chunk
+ * size. A properly aligned multiple of minimum_io_size is the
+ * preferred request size for workloads where a high number of I/O
+ * operations is desired.
+ */
+void blk_queue_io_min(struct request_queue *q, unsigned int min)
+{
+ blk_limits_io_min(&q->limits, min);
}
EXPORT_SYMBOL(blk_queue_io_min);
/**
+ * blk_limits_io_opt - set optimal request size for a device
+ * @limits: the queue limits
+ * @opt: smallest I/O size in bytes
+ *
+ * Description:
+ * Storage devices may report an optimal I/O size, which is the
+ * device's preferred unit for sustained I/O. This is rarely reported
+ * for disk drives. For RAID arrays it is usually the stripe width or
+ * the internal track size. A properly aligned multiple of
+ * optimal_io_size is the preferred request size for workloads where
+ * sustained throughput is desired.
+ */
+void blk_limits_io_opt(struct queue_limits *limits, unsigned int opt)
+{
+ limits->io_opt = opt;
+}
+EXPORT_SYMBOL(blk_limits_io_opt);
+
+/**
* blk_queue_io_opt - set optimal request size for the queue
* @q: the request queue for the device
* @opt: optimal request size in bytes
*
* Description:
- * Drivers can call this function to set the preferred I/O request
- * size for devices that report such a value.
+ * Storage devices may report an optimal I/O size, which is the
+ * device's preferred unit for sustained I/O. This is rarely reported
+ * for disk drives. For RAID arrays it is usually the stripe width or
+ * the internal track size. A properly aligned multiple of
+ * optimal_io_size is the preferred request size for workloads where
+ * sustained throughput is desired.
*/
void blk_queue_io_opt(struct request_queue *q, unsigned int opt)
{
- q->limits.io_opt = opt;
+ blk_limits_io_opt(&q->limits, opt);
}
EXPORT_SYMBOL(blk_queue_io_opt);
**/
void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b)
{
- /* zero is "infinity" */
- t->limits.max_sectors = min_not_zero(queue_max_sectors(t),
- queue_max_sectors(b));
-
- t->limits.max_hw_sectors = min_not_zero(queue_max_hw_sectors(t),
- queue_max_hw_sectors(b));
-
- t->limits.seg_boundary_mask = min_not_zero(queue_segment_boundary(t),
- queue_segment_boundary(b));
-
- t->limits.max_phys_segments = min_not_zero(queue_max_phys_segments(t),
- queue_max_phys_segments(b));
-
- t->limits.max_hw_segments = min_not_zero(queue_max_hw_segments(t),
- queue_max_hw_segments(b));
-
- t->limits.max_segment_size = min_not_zero(queue_max_segment_size(t),
- queue_max_segment_size(b));
-
- t->limits.logical_block_size = max(queue_logical_block_size(t),
- queue_logical_block_size(b));
+ blk_stack_limits(&t->limits, &b->limits, 0);
if (!t->queue_lock)
WARN_ON_ONCE(1);
}
EXPORT_SYMBOL(blk_queue_stack_limits);
+static unsigned int lcm(unsigned int a, unsigned int b)
+{
+ if (a && b)
+ return (a * b) / gcd(a, b);
+ else if (b)
+ return b;
+
+ return a;
+}
+
/**
* blk_stack_limits - adjust queue_limits for stacked devices
- * @t: the stacking driver limits (top)
- * @b: the underlying queue limits (bottom)
+ * @t: the stacking driver limits (top device)
+ * @b: the underlying queue limits (bottom, component device)
* @offset: offset to beginning of data within component device
*
* Description:
- * Merges two queue_limit structs. Returns 0 if alignment didn't
- * change. Returns -1 if adding the bottom device caused
- * misalignment.
+ * This function is used by stacking drivers like MD and DM to ensure
+ * that all component devices have compatible block sizes and
+ * alignments. The stacking driver must provide a queue_limits
+ * struct (top) and then iteratively call the stacking function for
+ * all component (bottom) devices. The stacking function will
+ * attempt to combine the values and ensure proper alignment.
+ *
+ * Returns 0 if the top and bottom queue_limits are compatible. The
+ * top device's block sizes and alignment offsets may be adjusted to
+ * ensure alignment with the bottom device. If no compatible sizes
+ * and alignments exist, -1 is returned and the resulting top
+ * queue_limits will have the misaligned flag set to indicate that
+ * the alignment_offset is undefined.
*/
int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
sector_t offset)
{
+ sector_t alignment;
+ unsigned int top, bottom;
+
t->max_sectors = min_not_zero(t->max_sectors, b->max_sectors);
t->max_hw_sectors = min_not_zero(t->max_hw_sectors, b->max_hw_sectors);
t->bounce_pfn = min_not_zero(t->bounce_pfn, b->bounce_pfn);
t->max_segment_size = min_not_zero(t->max_segment_size,
b->max_segment_size);
+ alignment = queue_limit_alignment_offset(b, offset);
+
+ /* Bottom device has different alignment. Check that it is
+ * compatible with the current top alignment.
+ */
+ if (t->alignment_offset != alignment) {
+
+ top = max(t->physical_block_size, t->io_min)
+ + t->alignment_offset;
+ bottom = max(b->physical_block_size, b->io_min) + alignment;
+
+ /* Verify that top and bottom intervals line up */
+ if (max(top, bottom) & (min(top, bottom) - 1))
+ t->misaligned = 1;
+ }
+
t->logical_block_size = max(t->logical_block_size,
b->logical_block_size);
b->physical_block_size);
t->io_min = max(t->io_min, b->io_min);
+ t->io_opt = lcm(t->io_opt, b->io_opt);
+
t->no_cluster |= b->no_cluster;
+ t->discard_zeroes_data &= b->discard_zeroes_data;
- /* Bottom device offset aligned? */
- if (offset &&
- (offset & (b->physical_block_size - 1)) != b->alignment_offset) {
+ /* Physical block size a multiple of the logical block size? */
+ if (t->physical_block_size & (t->logical_block_size - 1)) {
+ t->physical_block_size = t->logical_block_size;
t->misaligned = 1;
- return -1;
}
- /* If top has no alignment offset, inherit from bottom */
- if (!t->alignment_offset)
- t->alignment_offset =
- b->alignment_offset & (b->physical_block_size - 1);
+ /* Minimum I/O a multiple of the physical block size? */
+ if (t->io_min & (t->physical_block_size - 1)) {
+ t->io_min = t->physical_block_size;
+ t->misaligned = 1;
+ }
- /* Top device aligned on logical block boundary? */
- if (t->alignment_offset & (t->logical_block_size - 1)) {
+ /* Optimal I/O a multiple of the physical block size? */
+ if (t->io_opt & (t->physical_block_size - 1)) {
+ t->io_opt = 0;
t->misaligned = 1;
- return -1;
}
- return 0;
+ /* Find lowest common alignment_offset */
+ t->alignment_offset = lcm(t->alignment_offset, alignment)
+ & (max(t->physical_block_size, t->io_min) - 1);
+
+ /* Verify that new alignment_offset is on a logical block boundary */
+ if (t->alignment_offset & (t->logical_block_size - 1))
+ t->misaligned = 1;
+
+ /* Discard alignment and granularity */
+ if (b->discard_granularity) {
+ unsigned int granularity = b->discard_granularity;
+ offset &= granularity - 1;
+
+ alignment = (granularity + b->discard_alignment - offset)
+ & (granularity - 1);
+
+ if (t->discard_granularity != 0 &&
+ t->discard_alignment != alignment) {
+ top = t->discard_granularity + t->discard_alignment;
+ bottom = b->discard_granularity + alignment;
+
+ /* Verify that top and bottom intervals line up */
+ if (max(top, bottom) & (min(top, bottom) - 1))
+ t->discard_misaligned = 1;
+ }
+
+ t->max_discard_sectors = min_not_zero(t->max_discard_sectors,
+ b->max_discard_sectors);
+ t->discard_granularity = max(t->discard_granularity,
+ b->discard_granularity);
+ t->discard_alignment = lcm(t->discard_alignment, alignment) &
+ (t->discard_granularity - 1);
+ }
+
+ return t->misaligned ? -1 : 0;
}
EXPORT_SYMBOL(blk_stack_limits);