KVM: SVM: Selective cr0 intercept

[safe/jmp/linux-2.6] / block / blk-barrier.c
diff --git a/block/blk-barrier.c b/block/blk-barrier.c

index 722140a..8618d89 100644 (file)
--- a/block/blk-barrier.c
+++ b/block/blk-barrier.c
@@ -24,10 +24,9 @@
  int blk_queue_ordered(struct request_queue *q, unsigned ordered,
                       prepare_flush_fn *prepare_flush_fn)
  {
-       if (ordered & (QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_POSTFLUSH) &&
-           prepare_flush_fn == NULL) {
-               printk(KERN_ERR "%s: prepare_flush_fn required\n",
-                                                               __FUNCTION__);
+       if (!prepare_flush_fn && (ordered & (QUEUE_ORDERED_DO_PREFLUSH |
+                                            QUEUE_ORDERED_DO_POSTFLUSH))) {
+               printk(KERN_ERR "%s: prepare_flush_fn required\n", __func__);
                 return -EINVAL;
         }
  
@@ -53,7 +52,7 @@ EXPORT_SYMBOL(blk_queue_ordered);
  /*
   * Cache flushing for ordered writes handling
   */
-inline unsigned blk_ordered_cur_seq(struct request_queue *q)
+unsigned blk_ordered_cur_seq(struct request_queue *q)
  {
         if (!q->ordseq)
                 return 0;
@@ -89,7 +88,7 @@ unsigned blk_ordered_req_seq(struct request *rq)
                 return QUEUE_ORDSEQ_DONE;
  }
  
-void blk_ordered_complete_seq(struct request_queue *q, unsigned seq, int error)
+bool blk_ordered_complete_seq(struct request_queue *q, unsigned seq, int error)
  {
         struct request *rq;
  
@@ -100,16 +99,15 @@ void blk_ordered_complete_seq(struct request_queue *q, unsigned seq, int error)
         q->ordseq |= seq;
  
         if (blk_ordered_cur_seq(q) != QUEUE_ORDSEQ_DONE)
-               return;
+               return false;
  
         /*
          * Okay, sequence complete.
          */
         q->ordseq = 0;
         rq = q->orig_bar_rq;
-
-       if (__blk_end_request(rq, q->orderr, blk_rq_bytes(rq)))
-               BUG();
+       __blk_end_request_all(rq, q->orderr);
+       return true;
  }
  
  static void pre_flush_end_io(struct request *rq, int error)
@@ -135,7 +133,7 @@ static void queue_flush(struct request_queue *q, unsigned which)
         struct request *rq;
         rq_end_io_fn *end_io;
  
-       if (which == QUEUE_ORDERED_PREFLUSH) {
+       if (which == QUEUE_ORDERED_DO_PREFLUSH) {
                 rq = &q->pre_flush_rq;
                 end_io = pre_flush_end_io;
         } else {
@@ -143,7 +141,7 @@ static void queue_flush(struct request_queue *q, unsigned which)
                 end_io = post_flush_end_io;
         }
  
-       rq_init(q, rq);
+       blk_rq_init(q, rq);
         rq->cmd_flags = REQ_HARDBARRIER;
         rq->rq_disk = q->bar_rq.rq_disk;
         rq->end_io = end_io;
@@ -152,80 +150,108 @@ static void queue_flush(struct request_queue *q, unsigned which)
         elv_insert(q, rq, ELEVATOR_INSERT_FRONT);
  }
  
-static inline struct request *start_ordered(struct request_queue *q,
-                                           struct request *rq)
+static inline bool start_ordered(struct request_queue *q, struct request **rqp)
  {
+       struct request *rq = *rqp;
+       unsigned skip = 0;
+
         q->orderr = 0;
         q->ordered = q->next_ordered;
         q->ordseq |= QUEUE_ORDSEQ_STARTED;
  
         /*
-        * Prep proxy barrier request.
+        * For an empty barrier, there's no actual BAR request, which
+        * in turn makes POSTFLUSH unnecessary.  Mask them off.
          */
-       blkdev_dequeue_request(rq);
+       if (!blk_rq_sectors(rq)) {
+               q->ordered &= ~(QUEUE_ORDERED_DO_BAR |
+                               QUEUE_ORDERED_DO_POSTFLUSH);
+               /*
+                * Empty barrier on a write-through device w/ ordered
+                * tag has no command to issue and without any command
+                * to issue, ordering by tag can't be used.  Drain
+                * instead.
+                */
+               if ((q->ordered & QUEUE_ORDERED_BY_TAG) &&
+                   !(q->ordered & QUEUE_ORDERED_DO_PREFLUSH)) {
+                       q->ordered &= ~QUEUE_ORDERED_BY_TAG;
+                       q->ordered |= QUEUE_ORDERED_BY_DRAIN;
+               }
+       }
+
+       /* stash away the original request */
+       blk_dequeue_request(rq);
         q->orig_bar_rq = rq;
-       rq = &q->bar_rq;
-       rq_init(q, rq);
-       if (bio_data_dir(q->orig_bar_rq->bio) == WRITE)
-               rq->cmd_flags |= REQ_RW;
-       if (q->ordered & QUEUE_ORDERED_FUA)
-               rq->cmd_flags |= REQ_FUA;
-       init_request_from_bio(rq, q->orig_bar_rq->bio);
-       rq->end_io = bar_end_io;
+       rq = NULL;
  
         /*
          * Queue ordered sequence.  As we stack them at the head, we
          * need to queue in reverse order.  Note that we rely on that
          * no fs request uses ELEVATOR_INSERT_FRONT and thus no fs
-        * request gets inbetween ordered sequence. If this request is
-        * an empty barrier, we don't need to do a postflush ever since
-        * there will be no data written between the pre and post flush.
-        * Hence a single flush will suffice.
+        * request gets inbetween ordered sequence.
          */
-       if ((q->ordered & QUEUE_ORDERED_POSTFLUSH) && !blk_empty_barrier(rq))
-               queue_flush(q, QUEUE_ORDERED_POSTFLUSH);
-       else
-               q->ordseq |= QUEUE_ORDSEQ_POSTFLUSH;
+       if (q->ordered & QUEUE_ORDERED_DO_POSTFLUSH) {
+               queue_flush(q, QUEUE_ORDERED_DO_POSTFLUSH);
+               rq = &q->post_flush_rq;
+       } else
+               skip |= QUEUE_ORDSEQ_POSTFLUSH;
  
-       elv_insert(q, rq, ELEVATOR_INSERT_FRONT);
+       if (q->ordered & QUEUE_ORDERED_DO_BAR) {
+               rq = &q->bar_rq;
+
+               /* initialize proxy request and queue it */
+               blk_rq_init(q, rq);
+               if (bio_data_dir(q->orig_bar_rq->bio) == WRITE)
+                       rq->cmd_flags |= REQ_RW;
+               if (q->ordered & QUEUE_ORDERED_DO_FUA)
+                       rq->cmd_flags |= REQ_FUA;
+               init_request_from_bio(rq, q->orig_bar_rq->bio);
+               rq->end_io = bar_end_io;
+
+               elv_insert(q, rq, ELEVATOR_INSERT_FRONT);
+       } else
+               skip |= QUEUE_ORDSEQ_BAR;
  
-       if (q->ordered & QUEUE_ORDERED_PREFLUSH) {
-               queue_flush(q, QUEUE_ORDERED_PREFLUSH);
+       if (q->ordered & QUEUE_ORDERED_DO_PREFLUSH) {
+               queue_flush(q, QUEUE_ORDERED_DO_PREFLUSH);
                 rq = &q->pre_flush_rq;
         } else
-               q->ordseq |= QUEUE_ORDSEQ_PREFLUSH;
+               skip |= QUEUE_ORDSEQ_PREFLUSH;
  
-       if ((q->ordered & QUEUE_ORDERED_TAG) || q->in_flight == 0)
-               q->ordseq |= QUEUE_ORDSEQ_DRAIN;
-       else
+       if ((q->ordered & QUEUE_ORDERED_BY_DRAIN) && queue_in_flight(q))
                 rq = NULL;
+       else
+               skip |= QUEUE_ORDSEQ_DRAIN;
  
-       return rq;
+       *rqp = rq;
+
+       /*
+        * Complete skipped sequences.  If whole sequence is complete,
+        * return false to tell elevator that this request is gone.
+        */
+       return !blk_ordered_complete_seq(q, skip, 0);
  }
  
-int blk_do_ordered(struct request_queue *q, struct request **rqp)
+bool blk_do_ordered(struct request_queue *q, struct request **rqp)
  {
         struct request *rq = *rqp;
         const int is_barrier = blk_fs_request(rq) && blk_barrier_rq(rq);
  
         if (!q->ordseq) {
                 if (!is_barrier)
-                       return 1;
+                       return true;
  
-               if (q->next_ordered != QUEUE_ORDERED_NONE) {
-                       *rqp = start_ordered(q, rq);
-                       return 1;
-               } else {
+               if (q->next_ordered != QUEUE_ORDERED_NONE)
+                       return start_ordered(q, rqp);
+               else {
                         /*
-                        * This can happen when the queue switches to
-                        * ORDERED_NONE while this request is on it.
+                        * Queue ordering not supported.  Terminate
+                        * with prejudice.
                          */
-                       blkdev_dequeue_request(rq);
-                       if (__blk_end_request(rq, -EOPNOTSUPP,
-                                             blk_rq_bytes(rq)))
-                               BUG();
+                       blk_dequeue_request(rq);
+                       __blk_end_request_all(rq, -EOPNOTSUPP);
                         *rqp = NULL;
-                       return 0;
+                       return false;
                 }
         }
  
@@ -236,9 +262,9 @@ int blk_do_ordered(struct request_queue *q, struct request **rqp)
         /* Special requests are not subject to ordering rules. */
         if (!blk_fs_request(rq) &&
             rq != &q->pre_flush_rq && rq != &q->post_flush_rq)
-               return 1;
+               return true;
  
-       if (q->ordered & QUEUE_ORDERED_TAG) {
+       if (q->ordered & QUEUE_ORDERED_BY_TAG) {
                 /* Ordered by tag.  Blocking the next barrier is enough. */
                 if (is_barrier && rq != &q->bar_rq)
                         *rqp = NULL;
@@ -249,7 +275,7 @@ int blk_do_ordered(struct request_queue *q, struct request **rqp)
                         *rqp = NULL;
         }
  
-       return 1;
+       return true;
  }
  
  static void bio_end_empty_barrier(struct bio *bio, int err)
@@ -271,7 +297,7 @@ static void bio_end_empty_barrier(struct bio *bio, int err)
   * Description:
   *    Issue a flush for the block device in question. Caller can supply
   *    room for storing the error offset in case of a flush error, if they
- *    wish to.  Caller must run wait_for_completion() on its own.
+ *    wish to.
   */
  int blkdev_issue_flush(struct block_device *bdev, sector_t *error_sector)
  {
@@ -288,20 +314,17 @@ int blkdev_issue_flush(struct block_device *bdev, sector_t *error_sector)
                 return -ENXIO;
  
         bio = bio_alloc(GFP_KERNEL, 0);
-       if (!bio)
-               return -ENOMEM;
-
         bio->bi_end_io = bio_end_empty_barrier;
         bio->bi_private = &wait;
         bio->bi_bdev = bdev;
-       submit_bio(1 << BIO_RW_BARRIER, bio);
+       submit_bio(WRITE_BARRIER, bio);
  
         wait_for_completion(&wait);
  
         /*
          * The driver must store the error location in ->bi_sector, if
          * it supports it. For non-stacked drivers, this should be copied
-        * from rq->sector.
+        * from blk_rq_pos(rq).
          */
         if (error_sector)
                 *error_sector = bio->bi_sector;
@@ -316,3 +339,107 @@ int blkdev_issue_flush(struct block_device *bdev, sector_t *error_sector)
         return ret;
  }
  EXPORT_SYMBOL(blkdev_issue_flush);
+
+static void blkdev_discard_end_io(struct bio *bio, int err)
+{
+       if (err) {
+               if (err == -EOPNOTSUPP)
+                       set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
+               clear_bit(BIO_UPTODATE, &bio->bi_flags);
+       }
+
+       if (bio->bi_private)
+               complete(bio->bi_private);
+       __free_page(bio_page(bio));
+
+       bio_put(bio);
+}
+
+/**
+ * blkdev_issue_discard - queue a discard
+ * @bdev:      blockdev to issue discard for
+ * @sector:    start sector
+ * @nr_sects:  number of sectors to discard
+ * @gfp_mask:  memory allocation flags (for bio_alloc)
+ * @flags:     DISCARD_FL_* flags to control behaviour
+ *
+ * Description:
+ *    Issue a discard request for the sectors in question.
+ */
+int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
+               sector_t nr_sects, gfp_t gfp_mask, int flags)
+{
+       DECLARE_COMPLETION_ONSTACK(wait);
+       struct request_queue *q = bdev_get_queue(bdev);
+       int type = flags & DISCARD_FL_BARRIER ?
+               DISCARD_BARRIER : DISCARD_NOBARRIER;
+       struct bio *bio;
+       struct page *page;
+       int ret = 0;
+
+       if (!q)
+               return -ENXIO;
+
+       if (!blk_queue_discard(q))
+               return -EOPNOTSUPP;
+
+       while (nr_sects && !ret) {
+               unsigned int sector_size = q->limits.logical_block_size;
+               unsigned int max_discard_sectors =
+                       min(q->limits.max_discard_sectors, UINT_MAX >> 9);
+
+               bio = bio_alloc(gfp_mask, 1);
+               if (!bio)
+                       goto out;
+               bio->bi_sector = sector;
+               bio->bi_end_io = blkdev_discard_end_io;
+               bio->bi_bdev = bdev;
+               if (flags & DISCARD_FL_WAIT)
+                       bio->bi_private = &wait;
+
+               /*
+                * Add a zeroed one-sector payload as that's what
+                * our current implementations need.  If we'll ever need
+                * more the interface will need revisiting.
+                */
+               page = alloc_page(gfp_mask | __GFP_ZERO);
+               if (!page)
+                       goto out_free_bio;
+               if (bio_add_pc_page(q, bio, page, sector_size, 0) < sector_size)
+                       goto out_free_page;
+
+               /*
+                * And override the bio size - the way discard works we
+                * touch many more blocks on disk than the actual payload
+                * length.
+                */
+               if (nr_sects > max_discard_sectors) {
+                       bio->bi_size = max_discard_sectors << 9;
+                       nr_sects -= max_discard_sectors;
+                       sector += max_discard_sectors;
+               } else {
+                       bio->bi_size = nr_sects << 9;
+                       nr_sects = 0;
+               }
+
+               bio_get(bio);
+               submit_bio(type, bio);
+
+               if (flags & DISCARD_FL_WAIT)
+                       wait_for_completion(&wait);
+
+               if (bio_flagged(bio, BIO_EOPNOTSUPP))
+                       ret = -EOPNOTSUPP;
+               else if (!bio_flagged(bio, BIO_UPTODATE))
+                       ret = -EIO;
+               bio_put(bio);
+       }
+       return ret;
+out_free_page:
+       __free_page(page);
+out_free_bio:
+       bio_put(bio);
+out:
+       return -ENOMEM;
+}
+EXPORT_SYMBOL(blkdev_issue_discard);