nfsd4: shut down callback queue outside state lock
[safe/jmp/linux-2.6] / block / blk-barrier.c
index 47127ba..6d88544 100644 (file)
@@ -5,6 +5,7 @@
 #include <linux/module.h>
 #include <linux/bio.h>
 #include <linux/blkdev.h>
+#include <linux/gfp.h>
 
 #include "blk.h"
 
 int blk_queue_ordered(struct request_queue *q, unsigned ordered,
                      prepare_flush_fn *prepare_flush_fn)
 {
-       if (ordered & (QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_POSTFLUSH) &&
-           prepare_flush_fn == NULL) {
-               printk(KERN_ERR "%s: prepare_flush_fn required\n",
-                                                               __FUNCTION__);
+       if (!prepare_flush_fn && (ordered & (QUEUE_ORDERED_DO_PREFLUSH |
+                                            QUEUE_ORDERED_DO_POSTFLUSH))) {
+               printk(KERN_ERR "%s: prepare_flush_fn required\n", __func__);
                return -EINVAL;
        }
 
@@ -89,7 +89,7 @@ unsigned blk_ordered_req_seq(struct request *rq)
                return QUEUE_ORDSEQ_DONE;
 }
 
-void blk_ordered_complete_seq(struct request_queue *q, unsigned seq, int error)
+bool blk_ordered_complete_seq(struct request_queue *q, unsigned seq, int error)
 {
        struct request *rq;
 
@@ -100,16 +100,15 @@ void blk_ordered_complete_seq(struct request_queue *q, unsigned seq, int error)
        q->ordseq |= seq;
 
        if (blk_ordered_cur_seq(q) != QUEUE_ORDSEQ_DONE)
-               return;
+               return false;
 
        /*
         * Okay, sequence complete.
         */
        q->ordseq = 0;
        rq = q->orig_bar_rq;
-
-       if (__blk_end_request(rq, q->orderr, blk_rq_bytes(rq)))
-               BUG();
+       __blk_end_request_all(rq, q->orderr);
+       return true;
 }
 
 static void pre_flush_end_io(struct request *rq, int error)
@@ -135,7 +134,7 @@ static void queue_flush(struct request_queue *q, unsigned which)
        struct request *rq;
        rq_end_io_fn *end_io;
 
-       if (which == QUEUE_ORDERED_PREFLUSH) {
+       if (which == QUEUE_ORDERED_DO_PREFLUSH) {
                rq = &q->pre_flush_rq;
                end_io = pre_flush_end_io;
        } else {
@@ -143,7 +142,7 @@ static void queue_flush(struct request_queue *q, unsigned which)
                end_io = post_flush_end_io;
        }
 
-       rq_init(q, rq);
+       blk_rq_init(q, rq);
        rq->cmd_flags = REQ_HARDBARRIER;
        rq->rq_disk = q->bar_rq.rq_disk;
        rq->end_io = end_io;
@@ -152,80 +151,108 @@ static void queue_flush(struct request_queue *q, unsigned which)
        elv_insert(q, rq, ELEVATOR_INSERT_FRONT);
 }
 
-static inline struct request *start_ordered(struct request_queue *q,
-                                           struct request *rq)
+static inline bool start_ordered(struct request_queue *q, struct request **rqp)
 {
+       struct request *rq = *rqp;
+       unsigned skip = 0;
+
        q->orderr = 0;
        q->ordered = q->next_ordered;
        q->ordseq |= QUEUE_ORDSEQ_STARTED;
 
        /*
-        * Prep proxy barrier request.
+        * For an empty barrier, there's no actual BAR request, which
+        * in turn makes POSTFLUSH unnecessary.  Mask them off.
         */
-       blkdev_dequeue_request(rq);
+       if (!blk_rq_sectors(rq)) {
+               q->ordered &= ~(QUEUE_ORDERED_DO_BAR |
+                               QUEUE_ORDERED_DO_POSTFLUSH);
+               /*
+                * Empty barrier on a write-through device w/ ordered
+                * tag has no command to issue and without any command
+                * to issue, ordering by tag can't be used.  Drain
+                * instead.
+                */
+               if ((q->ordered & QUEUE_ORDERED_BY_TAG) &&
+                   !(q->ordered & QUEUE_ORDERED_DO_PREFLUSH)) {
+                       q->ordered &= ~QUEUE_ORDERED_BY_TAG;
+                       q->ordered |= QUEUE_ORDERED_BY_DRAIN;
+               }
+       }
+
+       /* stash away the original request */
+       blk_dequeue_request(rq);
        q->orig_bar_rq = rq;
-       rq = &q->bar_rq;
-       rq_init(q, rq);
-       if (bio_data_dir(q->orig_bar_rq->bio) == WRITE)
-               rq->cmd_flags |= REQ_RW;
-       if (q->ordered & QUEUE_ORDERED_FUA)
-               rq->cmd_flags |= REQ_FUA;
-       init_request_from_bio(rq, q->orig_bar_rq->bio);
-       rq->end_io = bar_end_io;
+       rq = NULL;
 
        /*
         * Queue ordered sequence.  As we stack them at the head, we
         * need to queue in reverse order.  Note that we rely on that
         * no fs request uses ELEVATOR_INSERT_FRONT and thus no fs
-        * request gets inbetween ordered sequence. If this request is
-        * an empty barrier, we don't need to do a postflush ever since
-        * there will be no data written between the pre and post flush.
-        * Hence a single flush will suffice.
+        * request gets inbetween ordered sequence.
         */
-       if ((q->ordered & QUEUE_ORDERED_POSTFLUSH) && !blk_empty_barrier(rq))
-               queue_flush(q, QUEUE_ORDERED_POSTFLUSH);
-       else
-               q->ordseq |= QUEUE_ORDSEQ_POSTFLUSH;
+       if (q->ordered & QUEUE_ORDERED_DO_POSTFLUSH) {
+               queue_flush(q, QUEUE_ORDERED_DO_POSTFLUSH);
+               rq = &q->post_flush_rq;
+       } else
+               skip |= QUEUE_ORDSEQ_POSTFLUSH;
 
-       elv_insert(q, rq, ELEVATOR_INSERT_FRONT);
+       if (q->ordered & QUEUE_ORDERED_DO_BAR) {
+               rq = &q->bar_rq;
+
+               /* initialize proxy request and queue it */
+               blk_rq_init(q, rq);
+               if (bio_data_dir(q->orig_bar_rq->bio) == WRITE)
+                       rq->cmd_flags |= REQ_RW;
+               if (q->ordered & QUEUE_ORDERED_DO_FUA)
+                       rq->cmd_flags |= REQ_FUA;
+               init_request_from_bio(rq, q->orig_bar_rq->bio);
+               rq->end_io = bar_end_io;
+
+               elv_insert(q, rq, ELEVATOR_INSERT_FRONT);
+       } else
+               skip |= QUEUE_ORDSEQ_BAR;
 
-       if (q->ordered & QUEUE_ORDERED_PREFLUSH) {
-               queue_flush(q, QUEUE_ORDERED_PREFLUSH);
+       if (q->ordered & QUEUE_ORDERED_DO_PREFLUSH) {
+               queue_flush(q, QUEUE_ORDERED_DO_PREFLUSH);
                rq = &q->pre_flush_rq;
        } else
-               q->ordseq |= QUEUE_ORDSEQ_PREFLUSH;
+               skip |= QUEUE_ORDSEQ_PREFLUSH;
 
-       if ((q->ordered & QUEUE_ORDERED_TAG) || q->in_flight == 0)
-               q->ordseq |= QUEUE_ORDSEQ_DRAIN;
-       else
+       if ((q->ordered & QUEUE_ORDERED_BY_DRAIN) && queue_in_flight(q))
                rq = NULL;
+       else
+               skip |= QUEUE_ORDSEQ_DRAIN;
 
-       return rq;
+       *rqp = rq;
+
+       /*
+        * Complete skipped sequences.  If whole sequence is complete,
+        * return false to tell elevator that this request is gone.
+        */
+       return !blk_ordered_complete_seq(q, skip, 0);
 }
 
-int blk_do_ordered(struct request_queue *q, struct request **rqp)
+bool blk_do_ordered(struct request_queue *q, struct request **rqp)
 {
        struct request *rq = *rqp;
        const int is_barrier = blk_fs_request(rq) && blk_barrier_rq(rq);
 
        if (!q->ordseq) {
                if (!is_barrier)
-                       return 1;
+                       return true;
 
-               if (q->next_ordered != QUEUE_ORDERED_NONE) {
-                       *rqp = start_ordered(q, rq);
-                       return 1;
-               } else {
+               if (q->next_ordered != QUEUE_ORDERED_NONE)
+                       return start_ordered(q, rqp);
+               else {
                        /*
-                        * This can happen when the queue switches to
-                        * ORDERED_NONE while this request is on it.
+                        * Queue ordering not supported.  Terminate
+                        * with prejudice.
                         */
-                       blkdev_dequeue_request(rq);
-                       if (__blk_end_request(rq, -EOPNOTSUPP,
-                                             blk_rq_bytes(rq)))
-                               BUG();
+                       blk_dequeue_request(rq);
+                       __blk_end_request_all(rq, -EOPNOTSUPP);
                        *rqp = NULL;
-                       return 0;
+                       return false;
                }
        }
 
@@ -236,9 +263,9 @@ int blk_do_ordered(struct request_queue *q, struct request **rqp)
        /* Special requests are not subject to ordering rules. */
        if (!blk_fs_request(rq) &&
            rq != &q->pre_flush_rq && rq != &q->post_flush_rq)
-               return 1;
+               return true;
 
-       if (q->ordered & QUEUE_ORDERED_TAG) {
+       if (q->ordered & QUEUE_ORDERED_BY_TAG) {
                /* Ordered by tag.  Blocking the next barrier is enough. */
                if (is_barrier && rq != &q->bar_rq)
                        *rqp = NULL;
@@ -249,7 +276,7 @@ int blk_do_ordered(struct request_queue *q, struct request **rqp)
                        *rqp = NULL;
        }
 
-       return 1;
+       return true;
 }
 
 static void bio_end_empty_barrier(struct bio *bio, int err)
@@ -271,7 +298,7 @@ static void bio_end_empty_barrier(struct bio *bio, int err)
  * Description:
  *    Issue a flush for the block device in question. Caller can supply
  *    room for storing the error offset in case of a flush error, if they
- *    wish to.  Caller must run wait_for_completion() on its own.
+ *    wish to.
  */
 int blkdev_issue_flush(struct block_device *bdev, sector_t *error_sector)
 {
@@ -288,20 +315,17 @@ int blkdev_issue_flush(struct block_device *bdev, sector_t *error_sector)
                return -ENXIO;
 
        bio = bio_alloc(GFP_KERNEL, 0);
-       if (!bio)
-               return -ENOMEM;
-
        bio->bi_end_io = bio_end_empty_barrier;
        bio->bi_private = &wait;
        bio->bi_bdev = bdev;
-       submit_bio(1 << BIO_RW_BARRIER, bio);
+       submit_bio(WRITE_BARRIER, bio);
 
        wait_for_completion(&wait);
 
        /*
         * The driver must store the error location in ->bi_sector, if
         * it supports it. For non-stacked drivers, this should be copied
-        * from rq->sector.
+        * from blk_rq_pos(rq).
         */
        if (error_sector)
                *error_sector = bio->bi_sector;
@@ -316,3 +340,107 @@ int blkdev_issue_flush(struct block_device *bdev, sector_t *error_sector)
        return ret;
 }
 EXPORT_SYMBOL(blkdev_issue_flush);
+
+static void blkdev_discard_end_io(struct bio *bio, int err)
+{
+       if (err) {
+               if (err == -EOPNOTSUPP)
+                       set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
+               clear_bit(BIO_UPTODATE, &bio->bi_flags);
+       }
+
+       if (bio->bi_private)
+               complete(bio->bi_private);
+       __free_page(bio_page(bio));
+
+       bio_put(bio);
+}
+
+/**
+ * blkdev_issue_discard - queue a discard
+ * @bdev:      blockdev to issue discard for
+ * @sector:    start sector
+ * @nr_sects:  number of sectors to discard
+ * @gfp_mask:  memory allocation flags (for bio_alloc)
+ * @flags:     DISCARD_FL_* flags to control behaviour
+ *
+ * Description:
+ *    Issue a discard request for the sectors in question.
+ */
+int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
+               sector_t nr_sects, gfp_t gfp_mask, int flags)
+{
+       DECLARE_COMPLETION_ONSTACK(wait);
+       struct request_queue *q = bdev_get_queue(bdev);
+       int type = flags & DISCARD_FL_BARRIER ?
+               DISCARD_BARRIER : DISCARD_NOBARRIER;
+       struct bio *bio;
+       struct page *page;
+       int ret = 0;
+
+       if (!q)
+               return -ENXIO;
+
+       if (!blk_queue_discard(q))
+               return -EOPNOTSUPP;
+
+       while (nr_sects && !ret) {
+               unsigned int sector_size = q->limits.logical_block_size;
+               unsigned int max_discard_sectors =
+                       min(q->limits.max_discard_sectors, UINT_MAX >> 9);
+
+               bio = bio_alloc(gfp_mask, 1);
+               if (!bio)
+                       goto out;
+               bio->bi_sector = sector;
+               bio->bi_end_io = blkdev_discard_end_io;
+               bio->bi_bdev = bdev;
+               if (flags & DISCARD_FL_WAIT)
+                       bio->bi_private = &wait;
+
+               /*
+                * Add a zeroed one-sector payload as that's what
+                * our current implementations need.  If we'll ever need
+                * more the interface will need revisiting.
+                */
+               page = alloc_page(gfp_mask | __GFP_ZERO);
+               if (!page)
+                       goto out_free_bio;
+               if (bio_add_pc_page(q, bio, page, sector_size, 0) < sector_size)
+                       goto out_free_page;
+
+               /*
+                * And override the bio size - the way discard works we
+                * touch many more blocks on disk than the actual payload
+                * length.
+                */
+               if (nr_sects > max_discard_sectors) {
+                       bio->bi_size = max_discard_sectors << 9;
+                       nr_sects -= max_discard_sectors;
+                       sector += max_discard_sectors;
+               } else {
+                       bio->bi_size = nr_sects << 9;
+                       nr_sects = 0;
+               }
+
+               bio_get(bio);
+               submit_bio(type, bio);
+
+               if (flags & DISCARD_FL_WAIT)
+                       wait_for_completion(&wait);
+
+               if (bio_flagged(bio, BIO_EOPNOTSUPP))
+                       ret = -EOPNOTSUPP;
+               else if (!bio_flagged(bio, BIO_UPTODATE))
+                       ret = -EIO;
+               bio_put(bio);
+       }
+       return ret;
+out_free_page:
+       __free_page(page);
+out_free_bio:
+       bio_put(bio);
+out:
+       return -ENOMEM;
+}
+EXPORT_SYMBOL(blkdev_issue_discard);