/*
* This handles all read/write requests to block devices
*/
-#include <linux/config.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/backing-dev.h>
/* Assume anything <= 4GB can be handled by IOMMU.
Actually some IOMMUs can handle everything, but I don't
know of a way to test this here. */
- if (bounce_pfn < (0xffffffff>>PAGE_SHIFT))
+ if (bounce_pfn < (min_t(u64,0xffffffff,BLK_BOUNCE_HIGH) >> PAGE_SHIFT))
dma = 1;
q->bounce_pfn = max_low_pfn;
#else
t->max_hw_segments = min(t->max_hw_segments,b->max_hw_segments);
t->max_segment_size = min(t->max_segment_size,b->max_segment_size);
t->hardsect_size = max(t->hardsect_size,b->hardsect_size);
+ if (!test_bit(QUEUE_FLAG_CLUSTER, &b->queue_flags))
+ clear_bit(QUEUE_FLAG_CLUSTER, &t->queue_flags);
}
EXPORT_SYMBOL(blk_queue_stack_limits);
EXPORT_SYMBOL(blk_queue_find_tag);
/**
- * __blk_queue_free_tags - release tag maintenance info
- * @q: the request queue for the device
+ * __blk_free_tags - release a given set of tag maintenance info
+ * @bqt: the tag map to free
*
- * Notes:
- * blk_cleanup_queue() will take care of calling this function, if tagging
- * has been used. So there's no need to call this directly.
- **/
-static void __blk_queue_free_tags(request_queue_t *q)
+ * Tries to free the specified @bqt@. Returns true if it was
+ * actually freed and false if there are still references using it
+ */
+static int __blk_free_tags(struct blk_queue_tag *bqt)
{
- struct blk_queue_tag *bqt = q->queue_tags;
-
- if (!bqt)
- return;
+ int retval;
- if (atomic_dec_and_test(&bqt->refcnt)) {
+ retval = atomic_dec_and_test(&bqt->refcnt);
+ if (retval) {
BUG_ON(bqt->busy);
BUG_ON(!list_empty(&bqt->busy_list));
bqt->tag_map = NULL;
kfree(bqt);
+
}
+ return retval;
+}
+
+/**
+ * __blk_queue_free_tags - release tag maintenance info
+ * @q: the request queue for the device
+ *
+ * Notes:
+ * blk_cleanup_queue() will take care of calling this function, if tagging
+ * has been used. So there's no need to call this directly.
+ **/
+static void __blk_queue_free_tags(request_queue_t *q)
+{
+ struct blk_queue_tag *bqt = q->queue_tags;
+
+ if (!bqt)
+ return;
+
+ __blk_free_tags(bqt);
+
q->queue_tags = NULL;
q->queue_flags &= ~(1 << QUEUE_FLAG_QUEUED);
}
+
+/**
+ * blk_free_tags - release a given set of tag maintenance info
+ * @bqt: the tag map to free
+ *
+ * For externally managed @bqt@ frees the map. Callers of this
+ * function must guarantee to have released all the queues that
+ * might have been using this tag map.
+ */
+void blk_free_tags(struct blk_queue_tag *bqt)
+{
+ if (unlikely(!__blk_free_tags(bqt)))
+ BUG();
+}
+EXPORT_SYMBOL(blk_free_tags);
+
/**
* blk_queue_free_tags - release tag maintenance info
* @q: the request queue for the device
unsigned long *tag_map;
int nr_ulongs;
- if (depth > q->nr_requests * 2) {
+ if (q && depth > q->nr_requests * 2) {
depth = q->nr_requests * 2;
printk(KERN_ERR "%s: adjusted depth to %d\n",
__FUNCTION__, depth);
return -ENOMEM;
}
+static struct blk_queue_tag *__blk_queue_init_tags(struct request_queue *q,
+ int depth)
+{
+ struct blk_queue_tag *tags;
+
+ tags = kmalloc(sizeof(struct blk_queue_tag), GFP_ATOMIC);
+ if (!tags)
+ goto fail;
+
+ if (init_tag_map(q, tags, depth))
+ goto fail;
+
+ INIT_LIST_HEAD(&tags->busy_list);
+ tags->busy = 0;
+ atomic_set(&tags->refcnt, 1);
+ return tags;
+fail:
+ kfree(tags);
+ return NULL;
+}
+
+/**
+ * blk_init_tags - initialize the tag info for an external tag map
+ * @depth: the maximum queue depth supported
+ * @tags: the tag to use
+ **/
+struct blk_queue_tag *blk_init_tags(int depth)
+{
+ return __blk_queue_init_tags(NULL, depth);
+}
+EXPORT_SYMBOL(blk_init_tags);
+
/**
* blk_queue_init_tags - initialize the queue tag info
* @q: the request queue for the device
BUG_ON(tags && q->queue_tags && tags != q->queue_tags);
if (!tags && !q->queue_tags) {
- tags = kmalloc(sizeof(struct blk_queue_tag), GFP_ATOMIC);
- if (!tags)
- goto fail;
+ tags = __blk_queue_init_tags(q, depth);
- if (init_tag_map(q, tags, depth))
+ if (!tags)
goto fail;
-
- INIT_LIST_HEAD(&tags->busy_list);
- tags->busy = 0;
- atomic_set(&tags->refcnt, 1);
} else if (q->queue_tags) {
if ((rc = blk_queue_resize_tags(q, depth)))
return rc;
}
/*
+ * Currently cannot replace a shared tag map with a new
+ * one, so error out if this is the case
+ */
+ if (atomic_read(&bqt->refcnt) != 1)
+ return -EBUSY;
+
+ /*
* save the old state info, so we can copy it back
*/
tag_index = bqt->tag_index;
* don't plug a stopped queue, it must be paired with blk_start_queue()
* which will restart the queueing
*/
- if (test_bit(QUEUE_FLAG_STOPPED, &q->queue_flags))
+ if (blk_queue_stopped(q))
return;
if (!test_and_set_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags)) {
*/
void __generic_unplug_device(request_queue_t *q)
{
- if (unlikely(test_bit(QUEUE_FLAG_STOPPED, &q->queue_flags)))
+ if (unlikely(blk_queue_stopped(q)))
return;
if (!blk_remove_plug(q))
**/
void blk_start_queue(request_queue_t *q)
{
+ WARN_ON(!irqs_disabled());
+
clear_bit(QUEUE_FLAG_STOPPED, &q->queue_flags);
/*
spin_lock_irqsave(q->queue_lock, flags);
blk_remove_plug(q);
- if (!elv_queue_empty(q))
- q->request_fn(q);
+
+ /*
+ * Only recurse once to avoid overrunning the stack, let the unplug
+ * handling reinvoke the handler shortly if we already got there.
+ */
+ if (!elv_queue_empty(q)) {
+ if (!test_and_set_bit(QUEUE_FLAG_REENTER, &q->queue_flags)) {
+ q->request_fn(q);
+ clear_bit(QUEUE_FLAG_REENTER, &q->queue_flags);
+ } else {
+ blk_plug_device(q);
+ kblockd_schedule_work(&q->unplug_work);
+ }
+ }
+
spin_unlock_irqrestore(q->queue_lock, flags);
}
EXPORT_SYMBOL(blk_run_queue);
/**
* blk_cleanup_queue: - release a &request_queue_t when it is no longer needed
- * @q: the request queue to be released
+ * @kobj: the kobj belonging of the request queue to be released
*
* Description:
* blk_cleanup_queue is the pair to blk_init_queue() or
* get dealt with eventually.
*
* The queue spin lock must be held while manipulating the requests on the
- * request queue.
+ * request queue; this lock will be taken also from interrupt context, so irq
+ * disabling is needed for it.
*
* Function returns a pointer to the initialized request queue, or NULL if
* it didn't succeed.
rq->rq_disk = bd_disk;
rq->flags |= REQ_NOMERGE;
rq->end_io = done;
- elv_add_request(q, rq, where, 1);
- generic_unplug_device(q);
+ WARN_ON(irqs_disabled());
+ spin_lock_irq(q->queue_lock);
+ __elv_add_request(q, rq, where, 1);
+ __generic_unplug_device(q);
+ spin_unlock_irq(q->queue_lock);
}
-
EXPORT_SYMBOL_GPL(blk_execute_rq_nowait);
/**
int blk_execute_rq(request_queue_t *q, struct gendisk *bd_disk,
struct request *rq, int at_head)
{
- DECLARE_COMPLETION(wait);
+ DECLARE_COMPLETION_ONSTACK(wait);
char sense[SCSI_SENSE_BUFFERSIZE];
int err = 0;
EXPORT_SYMBOL(blk_congestion_wait);
+/**
+ * blk_congestion_end - wake up sleepers on a congestion queue
+ * @rw: READ or WRITE
+ */
+void blk_congestion_end(int rw)
+{
+ wait_queue_head_t *wqh = &congestion_wqh[rw];
+
+ if (waitqueue_active(wqh))
+ wake_up(wqh);
+}
+
/*
* Has to be called with the request spinlock acquired
*/
return 0;
/*
- * not contigious
+ * not contiguous
*/
if (req->sector + req->nr_sectors != next->sector)
return 0;
if (unlikely(bio_barrier(bio)))
req->flags |= (REQ_HARDBARRIER | REQ_NOMERGE);
+ if (bio_sync(bio))
+ req->flags |= REQ_RW_SYNC;
+
req->errors = 0;
req->hard_sector = req->sector = bio->bi_sector;
req->hard_nr_sectors = req->nr_sectors = bio_sectors(bio);
BIO_BUG_ON(!bio->bi_io_vec);
bio->bi_rw |= rw;
if (rw & WRITE)
- mod_page_state(pgpgout, count);
+ count_vm_events(PGPGOUT, count);
else
- mod_page_state(pgpgin, count);
+ count_vm_events(PGPGIN, count);
if (unlikely(block_dump)) {
char b[BDEVNAME_SIZE];
*/
static void blk_done_softirq(struct softirq_action *h)
{
- struct list_head *cpu_list;
- LIST_HEAD(local_list);
+ struct list_head *cpu_list, local_list;
local_irq_disable();
cpu_list = &__get_cpu_var(blk_cpu_done);
- list_splice_init(cpu_list, &local_list);
+ list_replace_init(cpu_list, &local_list);
local_irq_enable();
while (!list_empty(&local_list)) {
*
* Description:
* Ends all I/O on a request. It does not handle partial completions,
- * unless the driver actually implements this in its completionc callback
+ * unless the driver actually implements this in its completion callback
* through requeueing. Theh actual completion happens out-of-order,
* through a softirq handler. The user must have registered a completion
* callback through blk_queue_softirq_done().
if (unlikely(laptop_mode) && blk_fs_request(req))
laptop_io_completion();
- if (disk && blk_fs_request(req)) {
+ /*
+ * Account IO completion. bar_rq isn't accounted as a normal
+ * IO on queueing nor completion. Accounting the containing
+ * request is enough.
+ */
+ if (disk && blk_fs_request(req) && req != &req->q->bar_rq) {
unsigned long duration = jiffies - req->start_time;
const int rw = rq_data_dir(req);
void blk_rq_bio_prep(request_queue_t *q, struct request *rq, struct bio *bio)
{
- /* first three bits are identical in rq->flags and bio->bi_rw */
- rq->flags |= (bio->bi_rw & 7);
+ /* first two bits are identical in rq->flags and bio->bi_rw */
+ rq->flags |= (bio->bi_rw & 3);
rq->nr_phys_segments = bio_phys_segments(q, bio);
rq->nr_hw_segments = bio_hw_segments(q, bio);
iocontext_cachep = kmem_cache_create("blkdev_ioc",
sizeof(struct io_context), 0, SLAB_PANIC, NULL, NULL);
- for_each_cpu(i)
+ for_each_possible_cpu(i)
INIT_LIST_HEAD(&per_cpu(blk_cpu_done, i));
open_softirq(BLOCK_SOFTIRQ, blk_done_softirq, NULL);
-#ifdef CONFIG_HOTPLUG_CPU
- register_cpu_notifier(&blk_cpu_notifier);
-#endif
+ register_hotcpu_notifier(&blk_cpu_notifier);
blk_max_low_pfn = max_low_pfn;
blk_max_pfn = max_pfn;
BUG_ON(atomic_read(&ioc->refcount) == 0);
if (atomic_dec_and_test(&ioc->refcount)) {
+ struct cfq_io_context *cic;
+
rcu_read_lock();
if (ioc->aic && ioc->aic->dtor)
ioc->aic->dtor(ioc->aic);
- if (ioc->cic && ioc->cic->dtor)
- ioc->cic->dtor(ioc->cic);
+ if (ioc->cic_root.rb_node != NULL) {
+ struct rb_node *n = rb_first(&ioc->cic_root);
+
+ cic = rb_entry(n, struct cfq_io_context, rb_node);
+ cic->dtor(ioc);
+ }
rcu_read_unlock();
kmem_cache_free(iocontext_cachep, ioc);
{
unsigned long flags;
struct io_context *ioc;
+ struct cfq_io_context *cic;
local_irq_save(flags);
task_lock(current);
if (ioc->aic && ioc->aic->exit)
ioc->aic->exit(ioc->aic);
- if (ioc->cic && ioc->cic->exit)
- ioc->cic->exit(ioc->cic);
-
+ if (ioc->cic_root.rb_node != NULL) {
+ cic = rb_entry(rb_first(&ioc->cic_root), struct cfq_io_context, rb_node);
+ cic->exit(ioc);
+ }
+
put_io_context(ioc);
}
ret->last_waited = jiffies; /* doesn't matter... */
ret->nr_batch_requests = 0; /* because this is 0 */
ret->aic = NULL;
- ret->cic = NULL;
+ ret->cic_root.rb_node = NULL;
+ /* make sure set_task_ioprio() sees the settings above */
+ smp_wmb();
tsk->io_context = ret;
}