X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=block%2Fll_rw_blk.c;h=9c3a06bcb7ba97b02d12de6638944ac79e33baa2;hb=182777700d912a69824245e9ee99148ac0aa57d7;hp=b836b43113daabf023457af9913837cf7ef43845;hpb=f68110fc28859f5d7231d5c4fb6dbe68b1394c9b;p=safe%2Fjmp%2Flinux-2.6 diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c index b836b43..9c3a06b 100644 --- a/block/ll_rw_blk.c +++ b/block/ll_rw_blk.c @@ -10,7 +10,6 @@ /* * This handles all read/write requests to block devices */ -#include #include #include #include @@ -638,7 +637,7 @@ void blk_queue_bounce_limit(request_queue_t *q, u64 dma_addr) /* Assume anything <= 4GB can be handled by IOMMU. Actually some IOMMUs can handle everything, but I don't know of a way to test this here. */ - if (bounce_pfn < (0xffffffff>>PAGE_SHIFT)) + if (bounce_pfn < (min_t(u64,0xffffffff,BLK_BOUNCE_HIGH) >> PAGE_SHIFT)) dma = 1; q->bounce_pfn = max_low_pfn; #else @@ -785,6 +784,8 @@ void blk_queue_stack_limits(request_queue_t *t, request_queue_t *b) t->max_hw_segments = min(t->max_hw_segments,b->max_hw_segments); t->max_segment_size = min(t->max_segment_size,b->max_segment_size); t->hardsect_size = max(t->hardsect_size,b->hardsect_size); + if (!test_bit(QUEUE_FLAG_CLUSTER, &b->queue_flags)) + clear_bit(QUEUE_FLAG_CLUSTER, &t->queue_flags); } EXPORT_SYMBOL(blk_queue_stack_limits); @@ -847,21 +848,18 @@ struct request *blk_queue_find_tag(request_queue_t *q, int tag) EXPORT_SYMBOL(blk_queue_find_tag); /** - * __blk_queue_free_tags - release tag maintenance info - * @q: the request queue for the device + * __blk_free_tags - release a given set of tag maintenance info + * @bqt: the tag map to free * - * Notes: - * blk_cleanup_queue() will take care of calling this function, if tagging - * has been used. So there's no need to call this directly. - **/ -static void __blk_queue_free_tags(request_queue_t *q) + * Tries to free the specified @bqt@. Returns true if it was + * actually freed and false if there are still references using it + */ +static int __blk_free_tags(struct blk_queue_tag *bqt) { - struct blk_queue_tag *bqt = q->queue_tags; - - if (!bqt) - return; + int retval; - if (atomic_dec_and_test(&bqt->refcnt)) { + retval = atomic_dec_and_test(&bqt->refcnt); + if (retval) { BUG_ON(bqt->busy); BUG_ON(!list_empty(&bqt->busy_list)); @@ -872,12 +870,49 @@ static void __blk_queue_free_tags(request_queue_t *q) bqt->tag_map = NULL; kfree(bqt); + } + return retval; +} + +/** + * __blk_queue_free_tags - release tag maintenance info + * @q: the request queue for the device + * + * Notes: + * blk_cleanup_queue() will take care of calling this function, if tagging + * has been used. So there's no need to call this directly. + **/ +static void __blk_queue_free_tags(request_queue_t *q) +{ + struct blk_queue_tag *bqt = q->queue_tags; + + if (!bqt) + return; + + __blk_free_tags(bqt); + q->queue_tags = NULL; q->queue_flags &= ~(1 << QUEUE_FLAG_QUEUED); } + +/** + * blk_free_tags - release a given set of tag maintenance info + * @bqt: the tag map to free + * + * For externally managed @bqt@ frees the map. Callers of this + * function must guarantee to have released all the queues that + * might have been using this tag map. + */ +void blk_free_tags(struct blk_queue_tag *bqt) +{ + if (unlikely(!__blk_free_tags(bqt))) + BUG(); +} +EXPORT_SYMBOL(blk_free_tags); + /** * blk_queue_free_tags - release tag maintenance info * @q: the request queue for the device @@ -900,7 +935,7 @@ init_tag_map(request_queue_t *q, struct blk_queue_tag *tags, int depth) unsigned long *tag_map; int nr_ulongs; - if (depth > q->nr_requests * 2) { + if (q && depth > q->nr_requests * 2) { depth = q->nr_requests * 2; printk(KERN_ERR "%s: adjusted depth to %d\n", __FUNCTION__, depth); @@ -926,6 +961,38 @@ fail: return -ENOMEM; } +static struct blk_queue_tag *__blk_queue_init_tags(struct request_queue *q, + int depth) +{ + struct blk_queue_tag *tags; + + tags = kmalloc(sizeof(struct blk_queue_tag), GFP_ATOMIC); + if (!tags) + goto fail; + + if (init_tag_map(q, tags, depth)) + goto fail; + + INIT_LIST_HEAD(&tags->busy_list); + tags->busy = 0; + atomic_set(&tags->refcnt, 1); + return tags; +fail: + kfree(tags); + return NULL; +} + +/** + * blk_init_tags - initialize the tag info for an external tag map + * @depth: the maximum queue depth supported + * @tags: the tag to use + **/ +struct blk_queue_tag *blk_init_tags(int depth) +{ + return __blk_queue_init_tags(NULL, depth); +} +EXPORT_SYMBOL(blk_init_tags); + /** * blk_queue_init_tags - initialize the queue tag info * @q: the request queue for the device @@ -940,16 +1007,10 @@ int blk_queue_init_tags(request_queue_t *q, int depth, BUG_ON(tags && q->queue_tags && tags != q->queue_tags); if (!tags && !q->queue_tags) { - tags = kmalloc(sizeof(struct blk_queue_tag), GFP_ATOMIC); - if (!tags) - goto fail; + tags = __blk_queue_init_tags(q, depth); - if (init_tag_map(q, tags, depth)) + if (!tags) goto fail; - - INIT_LIST_HEAD(&tags->busy_list); - tags->busy = 0; - atomic_set(&tags->refcnt, 1); } else if (q->queue_tags) { if ((rc = blk_queue_resize_tags(q, depth))) return rc; @@ -1001,6 +1062,13 @@ int blk_queue_resize_tags(request_queue_t *q, int new_depth) } /* + * Currently cannot replace a shared tag map with a new + * one, so error out if this is the case + */ + if (atomic_read(&bqt->refcnt) != 1) + return -EBUSY; + + /* * save the old state info, so we can copy it back */ tag_index = bqt->tag_index; @@ -1552,7 +1620,7 @@ void blk_plug_device(request_queue_t *q) * don't plug a stopped queue, it must be paired with blk_start_queue() * which will restart the queueing */ - if (test_bit(QUEUE_FLAG_STOPPED, &q->queue_flags)) + if (blk_queue_stopped(q)) return; if (!test_and_set_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags)) { @@ -1585,7 +1653,7 @@ EXPORT_SYMBOL(blk_remove_plug); */ void __generic_unplug_device(request_queue_t *q) { - if (unlikely(test_bit(QUEUE_FLAG_STOPPED, &q->queue_flags))) + if (unlikely(blk_queue_stopped(q))) return; if (!blk_remove_plug(q)) @@ -1661,6 +1729,8 @@ static void blk_unplug_timeout(unsigned long data) **/ void blk_start_queue(request_queue_t *q) { + WARN_ON(!irqs_disabled()); + clear_bit(QUEUE_FLAG_STOPPED, &q->queue_flags); /* @@ -1730,15 +1800,28 @@ void blk_run_queue(struct request_queue *q) spin_lock_irqsave(q->queue_lock, flags); blk_remove_plug(q); - if (!elv_queue_empty(q)) - q->request_fn(q); + + /* + * Only recurse once to avoid overrunning the stack, let the unplug + * handling reinvoke the handler shortly if we already got there. + */ + if (!elv_queue_empty(q)) { + if (!test_and_set_bit(QUEUE_FLAG_REENTER, &q->queue_flags)) { + q->request_fn(q); + clear_bit(QUEUE_FLAG_REENTER, &q->queue_flags); + } else { + blk_plug_device(q); + kblockd_schedule_work(&q->unplug_work); + } + } + spin_unlock_irqrestore(q->queue_lock, flags); } EXPORT_SYMBOL(blk_run_queue); /** * blk_cleanup_queue: - release a &request_queue_t when it is no longer needed - * @q: the request queue to be released + * @kobj: the kobj belonging of the request queue to be released * * Description: * blk_cleanup_queue is the pair to blk_init_queue() or @@ -1863,7 +1946,8 @@ EXPORT_SYMBOL(blk_alloc_queue_node); * get dealt with eventually. * * The queue spin lock must be held while manipulating the requests on the - * request queue. + * request queue; this lock will be taken also from interrupt context, so irq + * disabling is needed for it. * * Function returns a pointer to the initialized request queue, or NULL if * it didn't succeed. @@ -2477,10 +2561,12 @@ void blk_execute_rq_nowait(request_queue_t *q, struct gendisk *bd_disk, rq->rq_disk = bd_disk; rq->flags |= REQ_NOMERGE; rq->end_io = done; - elv_add_request(q, rq, where, 1); - generic_unplug_device(q); + WARN_ON(irqs_disabled()); + spin_lock_irq(q->queue_lock); + __elv_add_request(q, rq, where, 1); + __generic_unplug_device(q); + spin_unlock_irq(q->queue_lock); } - EXPORT_SYMBOL_GPL(blk_execute_rq_nowait); /** @@ -2497,7 +2583,7 @@ EXPORT_SYMBOL_GPL(blk_execute_rq_nowait); int blk_execute_rq(request_queue_t *q, struct gendisk *bd_disk, struct request *rq, int at_head) { - DECLARE_COMPLETION(wait); + DECLARE_COMPLETION_ONSTACK(wait); char sense[SCSI_SENSE_BUFFERSIZE]; int err = 0; @@ -2715,6 +2801,18 @@ long blk_congestion_wait(int rw, long timeout) EXPORT_SYMBOL(blk_congestion_wait); +/** + * blk_congestion_end - wake up sleepers on a congestion queue + * @rw: READ or WRITE + */ +void blk_congestion_end(int rw) +{ + wait_queue_head_t *wqh = &congestion_wqh[rw]; + + if (waitqueue_active(wqh)) + wake_up(wqh); +} + /* * Has to be called with the request spinlock acquired */ @@ -2725,7 +2823,7 @@ static int attempt_merge(request_queue_t *q, struct request *req, return 0; /* - * not contigious + * not contiguous */ if (req->sector + req->nr_sectors != next->sector) return 0; @@ -2807,6 +2905,9 @@ static void init_request_from_bio(struct request *req, struct bio *bio) if (unlikely(bio_barrier(bio))) req->flags |= (REQ_HARDBARRIER | REQ_NOMERGE); + if (bio_sync(bio)) + req->flags |= REQ_RW_SYNC; + req->errors = 0; req->hard_sector = req->sector = bio->bi_sector; req->hard_nr_sectors = req->nr_sectors = bio_sectors(bio); @@ -3094,9 +3195,9 @@ void submit_bio(int rw, struct bio *bio) BIO_BUG_ON(!bio->bi_io_vec); bio->bi_rw |= rw; if (rw & WRITE) - mod_page_state(pgpgout, count); + count_vm_events(PGPGOUT, count); else - mod_page_state(pgpgin, count); + count_vm_events(PGPGIN, count); if (unlikely(block_dump)) { char b[BDEVNAME_SIZE]; @@ -3342,12 +3443,11 @@ EXPORT_SYMBOL(end_that_request_chunk); */ static void blk_done_softirq(struct softirq_action *h) { - struct list_head *cpu_list; - LIST_HEAD(local_list); + struct list_head *cpu_list, local_list; local_irq_disable(); cpu_list = &__get_cpu_var(blk_cpu_done); - list_splice_init(cpu_list, &local_list); + list_replace_init(cpu_list, &local_list); local_irq_enable(); while (!list_empty(&local_list)) { @@ -3393,7 +3493,7 @@ static struct notifier_block __devinitdata blk_cpu_notifier = { * * Description: * Ends all I/O on a request. It does not handle partial completions, - * unless the driver actually implements this in its completionc callback + * unless the driver actually implements this in its completion callback * through requeueing. Theh actual completion happens out-of-order, * through a softirq handler. The user must have registered a completion * callback through blk_queue_softirq_done(). @@ -3435,7 +3535,12 @@ void end_that_request_last(struct request *req, int uptodate) if (unlikely(laptop_mode) && blk_fs_request(req)) laptop_io_completion(); - if (disk && blk_fs_request(req)) { + /* + * Account IO completion. bar_rq isn't accounted as a normal + * IO on queueing nor completion. Accounting the containing + * request is enough. + */ + if (disk && blk_fs_request(req) && req != &req->q->bar_rq) { unsigned long duration = jiffies - req->start_time; const int rw = rq_data_dir(req); @@ -3465,8 +3570,8 @@ EXPORT_SYMBOL(end_request); void blk_rq_bio_prep(request_queue_t *q, struct request *rq, struct bio *bio) { - /* first three bits are identical in rq->flags and bio->bi_rw */ - rq->flags |= (bio->bi_rw & 7); + /* first two bits are identical in rq->flags and bio->bi_rw */ + rq->flags |= (bio->bi_rw & 3); rq->nr_phys_segments = bio_phys_segments(q, bio); rq->nr_hw_segments = bio_hw_segments(q, bio); @@ -3510,13 +3615,11 @@ int __init blk_dev_init(void) iocontext_cachep = kmem_cache_create("blkdev_ioc", sizeof(struct io_context), 0, SLAB_PANIC, NULL, NULL); - for_each_cpu(i) + for_each_possible_cpu(i) INIT_LIST_HEAD(&per_cpu(blk_cpu_done, i)); open_softirq(BLOCK_SOFTIRQ, blk_done_softirq, NULL); -#ifdef CONFIG_HOTPLUG_CPU - register_cpu_notifier(&blk_cpu_notifier); -#endif + register_hotcpu_notifier(&blk_cpu_notifier); blk_max_low_pfn = max_low_pfn; blk_max_pfn = max_pfn; @@ -3535,11 +3638,17 @@ void put_io_context(struct io_context *ioc) BUG_ON(atomic_read(&ioc->refcount) == 0); if (atomic_dec_and_test(&ioc->refcount)) { + struct cfq_io_context *cic; + rcu_read_lock(); if (ioc->aic && ioc->aic->dtor) ioc->aic->dtor(ioc->aic); - if (ioc->cic && ioc->cic->dtor) - ioc->cic->dtor(ioc->cic); + if (ioc->cic_root.rb_node != NULL) { + struct rb_node *n = rb_first(&ioc->cic_root); + + cic = rb_entry(n, struct cfq_io_context, rb_node); + cic->dtor(ioc); + } rcu_read_unlock(); kmem_cache_free(iocontext_cachep, ioc); @@ -3552,6 +3661,7 @@ void exit_io_context(void) { unsigned long flags; struct io_context *ioc; + struct cfq_io_context *cic; local_irq_save(flags); task_lock(current); @@ -3563,9 +3673,11 @@ void exit_io_context(void) if (ioc->aic && ioc->aic->exit) ioc->aic->exit(ioc->aic); - if (ioc->cic && ioc->cic->exit) - ioc->cic->exit(ioc->cic); - + if (ioc->cic_root.rb_node != NULL) { + cic = rb_entry(rb_first(&ioc->cic_root), struct cfq_io_context, rb_node); + cic->exit(ioc); + } + put_io_context(ioc); } @@ -3594,7 +3706,9 @@ struct io_context *current_io_context(gfp_t gfp_flags) ret->last_waited = jiffies; /* doesn't matter... */ ret->nr_batch_requests = 0; /* because this is 0 */ ret->aic = NULL; - ret->cic = NULL; + ret->cic_root.rb_node = NULL; + /* make sure set_task_ioprio() sees the settings above */ + smp_wmb(); tsk->io_context = ret; }