Merge branch 'master' into for-linus
authorJens Axboe <jens.axboe@oracle.com>
Fri, 19 Mar 2010 07:05:10 +0000 (08:05 +0100)
committerJens Axboe <jens.axboe@oracle.com>
Fri, 19 Mar 2010 07:05:10 +0000 (08:05 +0100)
Conflicts:
block/Kconfig

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
30 files changed:
Documentation/DocBook/tracepoint.tmpl
Documentation/block/biodoc.txt
block/Kconfig
block/blk-settings.c
block/blk-sysfs.c
block/cfq-iosched.c
drivers/block/DAC960.c
drivers/block/drbd/drbd_actlog.c
drivers/block/drbd/drbd_bitmap.c
drivers/block/drbd/drbd_int.h
drivers/block/drbd/drbd_main.c
drivers/block/drbd/drbd_nl.c
drivers/block/drbd/drbd_receiver.c
drivers/block/drbd/drbd_worker.c
drivers/block/paride/pcd.c
drivers/block/paride/pf.c
drivers/block/paride/pt.c
drivers/block/virtio_blk.c
drivers/scsi/sd.c
fs/bio.c
fs/fs-writeback.c
include/linux/blkdev.h
include/linux/drbd.h
include/linux/drbd_nl.h
include/linux/genhd.h
include/linux/lcm.h [new file with mode: 0644]
include/linux/writeback.h
include/trace/events/block.h
lib/Makefile
lib/lcm.c [new file with mode: 0644]

index 8bca1d5..e8473ea 100644 (file)
      </address>
     </affiliation>
    </author>
+   <author>
+    <firstname>William</firstname>
+    <surname>Cohen</surname>
+    <affiliation>
+     <address>
+      <email>wcohen@redhat.com</email>
+     </address>
+    </affiliation>
+   </author>
   </authorgroup>
 
   <legalnotice>
 !Iinclude/trace/events/signal.h
   </chapter>
 
+  <chapter id="block">
+   <title>Block IO</title>
+!Iinclude/trace/events/block.h
+  </chapter>
 </book>
index 6fab97e..508b5b2 100644 (file)
@@ -1162,8 +1162,8 @@ where a driver received a request ala this before:
 
 As mentioned, there is no virtual mapping of a bio. For DMA, this is
 not a problem as the driver probably never will need a virtual mapping.
-Instead it needs a bus mapping (pci_map_page for a single segment or
-use blk_rq_map_sg for scatter gather) to be able to ship it to the driver. For
+Instead it needs a bus mapping (dma_map_page for a single segment or
+use dma_map_sg for scatter gather) to be able to ship it to the driver. For
 PIO drivers (or drivers that need to revert to PIO transfer once in a
 while (IDE for example)), where the CPU is doing the actual data
 transfer a virtual mapping is needed. If the driver supports highmem I/O,
index 62a5921..f9e89f4 100644 (file)
@@ -78,8 +78,9 @@ config BLK_DEV_INTEGRITY
        Protection.  If in doubt, say N.
 
 config BLK_CGROUP
-       tristate
+       tristate "Block cgroup support"
        depends on CGROUPS
+       depends on CFQ_GROUP_IOSCHED
        default n
        ---help---
        Generic block IO controller cgroup interface. This is the common
index 31e7a93..4c4700d 100644 (file)
@@ -8,6 +8,7 @@
 #include <linux/blkdev.h>
 #include <linux/bootmem.h>     /* for max_pfn/max_low_pfn */
 #include <linux/gcd.h>
+#include <linux/lcm.h>
 #include <linux/jiffies.h>
 
 #include "blk.h"
@@ -461,16 +462,6 @@ void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b)
 }
 EXPORT_SYMBOL(blk_queue_stack_limits);
 
-static unsigned int lcm(unsigned int a, unsigned int b)
-{
-       if (a && b)
-               return (a * b) / gcd(a, b);
-       else if (b)
-               return b;
-
-       return a;
-}
-
 /**
  * blk_stack_limits - adjust queue_limits for stacked devices
  * @t: the stacking driver limits (top device)
index 2ae2cb3..4426739 100644 (file)
@@ -106,6 +106,19 @@ static ssize_t queue_max_sectors_show(struct request_queue *q, char *page)
        return queue_var_show(max_sectors_kb, (page));
 }
 
+static ssize_t queue_max_segments_show(struct request_queue *q, char *page)
+{
+       return queue_var_show(queue_max_segments(q), (page));
+}
+
+static ssize_t queue_max_segment_size_show(struct request_queue *q, char *page)
+{
+       if (test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags))
+               return queue_var_show(queue_max_segment_size(q), (page));
+
+       return queue_var_show(PAGE_CACHE_SIZE, (page));
+}
+
 static ssize_t queue_logical_block_size_show(struct request_queue *q, char *page)
 {
        return queue_var_show(queue_logical_block_size(q), page);
@@ -280,6 +293,16 @@ static struct queue_sysfs_entry queue_max_hw_sectors_entry = {
        .show = queue_max_hw_sectors_show,
 };
 
+static struct queue_sysfs_entry queue_max_segments_entry = {
+       .attr = {.name = "max_segments", .mode = S_IRUGO },
+       .show = queue_max_segments_show,
+};
+
+static struct queue_sysfs_entry queue_max_segment_size_entry = {
+       .attr = {.name = "max_segment_size", .mode = S_IRUGO },
+       .show = queue_max_segment_size_show,
+};
+
 static struct queue_sysfs_entry queue_iosched_entry = {
        .attr = {.name = "scheduler", .mode = S_IRUGO | S_IWUSR },
        .show = elv_iosched_show,
@@ -355,6 +378,8 @@ static struct attribute *default_attrs[] = {
        &queue_ra_entry.attr,
        &queue_max_hw_sectors_entry.attr,
        &queue_max_sectors_entry.attr,
+       &queue_max_segments_entry.attr,
+       &queue_max_segment_size_entry.attr,
        &queue_iosched_entry.attr,
        &queue_hw_sector_size_entry.attr,
        &queue_logical_block_size_entry.attr,
index dee9d93..8d5a2f2 100644 (file)
@@ -47,6 +47,7 @@ static const int cfq_hist_divisor = 4;
 #define CFQ_SERVICE_SHIFT       12
 
 #define CFQQ_SEEK_THR          (sector_t)(8 * 100)
+#define CFQQ_CLOSE_THR         (sector_t)(8 * 1024)
 #define CFQQ_SECT_THR_NONROT   (sector_t)(2 * 32)
 #define CFQQ_SEEKY(cfqq)       (hweight32(cfqq->seek_history) > 32/8)
 
@@ -1660,9 +1661,9 @@ static inline sector_t cfq_dist_from_last(struct cfq_data *cfqd,
 }
 
 static inline int cfq_rq_close(struct cfq_data *cfqd, struct cfq_queue *cfqq,
-                              struct request *rq, bool for_preempt)
+                              struct request *rq)
 {
-       return cfq_dist_from_last(cfqd, rq) <= CFQQ_SEEK_THR;
+       return cfq_dist_from_last(cfqd, rq) <= CFQQ_CLOSE_THR;
 }
 
 static struct cfq_queue *cfqq_close(struct cfq_data *cfqd,
@@ -1689,7 +1690,7 @@ static struct cfq_queue *cfqq_close(struct cfq_data *cfqd,
         * will contain the closest sector.
         */
        __cfqq = rb_entry(parent, struct cfq_queue, p_node);
-       if (cfq_rq_close(cfqd, cur_cfqq, __cfqq->next_rq, false))
+       if (cfq_rq_close(cfqd, cur_cfqq, __cfqq->next_rq))
                return __cfqq;
 
        if (blk_rq_pos(__cfqq->next_rq) < sector)
@@ -1700,7 +1701,7 @@ static struct cfq_queue *cfqq_close(struct cfq_data *cfqd,
                return NULL;
 
        __cfqq = rb_entry(node, struct cfq_queue, p_node);
-       if (cfq_rq_close(cfqd, cur_cfqq, __cfqq->next_rq, false))
+       if (cfq_rq_close(cfqd, cur_cfqq, __cfqq->next_rq))
                return __cfqq;
 
        return NULL;
@@ -3103,7 +3104,7 @@ cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq,
         * if this request is as-good as one we would expect from the
         * current cfqq, let it preempt
         */
-       if (cfq_rq_close(cfqd, cfqq, rq, true))
+       if (cfq_rq_close(cfqd, cfqq, rq))
                return true;
 
        return false;
index 459f1bc..c5f22bb 100644 (file)
@@ -2533,7 +2533,6 @@ static bool DAC960_RegisterBlockDevice(DAC960_Controller_T *Controller)
        Controller->RequestQueue[n] = RequestQueue;
        blk_queue_bounce_limit(RequestQueue, Controller->BounceBufferLimit);
        RequestQueue->queuedata = Controller;
-       blk_queue_max_hw_segments(RequestQueue, Controller->DriverScatterGatherLimit);
        blk_queue_max_segments(RequestQueue, Controller->DriverScatterGatherLimit);
        blk_queue_max_hw_sectors(RequestQueue, Controller->MaxBlocksPerCommand);
        disk->queue = RequestQueue;
index 17956ff..43e57f3 100644 (file)
@@ -536,7 +536,9 @@ static void atodb_endio(struct bio *bio, int error)
        put_ldev(mdev);
 }
 
+/* sector to word */
 #define S2W(s) ((s)<<(BM_EXT_SHIFT-BM_BLOCK_SHIFT-LN2_BPL))
+
 /* activity log to on disk bitmap -- prepare bio unless that sector
  * is already covered by previously prepared bios */
 static int atodb_prepare_unless_covered(struct drbd_conf *mdev,
@@ -546,13 +548,20 @@ static int atodb_prepare_unless_covered(struct drbd_conf *mdev,
 {
        struct bio *bio;
        struct page *page;
-       sector_t on_disk_sector = enr + mdev->ldev->md.md_offset
-                                     + mdev->ldev->md.bm_offset;
+       sector_t on_disk_sector;
        unsigned int page_offset = PAGE_SIZE;
        int offset;
        int i = 0;
        int err = -ENOMEM;
 
+       /* We always write aligned, full 4k blocks,
+        * so we can ignore the logical_block_size (for now) */
+       enr &= ~7U;
+       on_disk_sector = enr + mdev->ldev->md.md_offset
+                            + mdev->ldev->md.bm_offset;
+
+       D_ASSERT(!(on_disk_sector & 7U));
+
        /* Check if that enr is already covered by an already created bio.
         * Caution, bios[] is not NULL terminated,
         * but only initialized to all NULL.
@@ -588,7 +597,7 @@ static int atodb_prepare_unless_covered(struct drbd_conf *mdev,
 
        offset = S2W(enr);
        drbd_bm_get_lel(mdev, offset,
-                       min_t(size_t, S2W(1), drbd_bm_words(mdev) - offset),
+                       min_t(size_t, S2W(8), drbd_bm_words(mdev) - offset),
                        kmap(page) + page_offset);
        kunmap(page);
 
@@ -597,7 +606,7 @@ static int atodb_prepare_unless_covered(struct drbd_conf *mdev,
        bio->bi_bdev = mdev->ldev->md_bdev;
        bio->bi_sector = on_disk_sector;
 
-       if (bio_add_page(bio, page, MD_SECTOR_SIZE, page_offset) != MD_SECTOR_SIZE)
+       if (bio_add_page(bio, page, 4096, page_offset) != 4096)
                goto out_put_page;
 
        atomic_inc(&wc->count);
index b61057e..f58e765 100644 (file)
@@ -66,7 +66,7 @@ struct drbd_bitmap {
        size_t   bm_words;
        size_t   bm_number_of_pages;
        sector_t bm_dev_capacity;
-       struct semaphore bm_change; /* serializes resize operations */
+       struct mutex bm_change; /* serializes resize operations */
 
        atomic_t bm_async_io;
        wait_queue_head_t bm_io_wait;
@@ -114,7 +114,7 @@ void drbd_bm_lock(struct drbd_conf *mdev, char *why)
                return;
        }
 
-       trylock_failed = down_trylock(&b->bm_change);
+       trylock_failed = !mutex_trylock(&b->bm_change);
 
        if (trylock_failed) {
                dev_warn(DEV, "%s going to '%s' but bitmap already locked for '%s' by %s\n",
@@ -125,7 +125,7 @@ void drbd_bm_lock(struct drbd_conf *mdev, char *why)
                    b->bm_task == mdev->receiver.task ? "receiver" :
                    b->bm_task == mdev->asender.task  ? "asender"  :
                    b->bm_task == mdev->worker.task   ? "worker"   : "?");
-               down(&b->bm_change);
+               mutex_lock(&b->bm_change);
        }
        if (__test_and_set_bit(BM_LOCKED, &b->bm_flags))
                dev_err(DEV, "FIXME bitmap already locked in bm_lock\n");
@@ -147,7 +147,7 @@ void drbd_bm_unlock(struct drbd_conf *mdev)
 
        b->bm_why  = NULL;
        b->bm_task = NULL;
-       up(&b->bm_change);
+       mutex_unlock(&b->bm_change);
 }
 
 /* word offset to long pointer */
@@ -295,7 +295,7 @@ int drbd_bm_init(struct drbd_conf *mdev)
        if (!b)
                return -ENOMEM;
        spin_lock_init(&b->bm_lock);
-       init_MUTEX(&b->bm_change);
+       mutex_init(&b->bm_change);
        init_waitqueue_head(&b->bm_io_wait);
 
        mdev->bitmap = b;
index d9301e8..e5e86a7 100644 (file)
@@ -261,6 +261,9 @@ static inline const char *cmdname(enum drbd_packets cmd)
                [P_OV_REQUEST]          = "OVRequest",
                [P_OV_REPLY]            = "OVReply",
                [P_OV_RESULT]           = "OVResult",
+               [P_CSUM_RS_REQUEST]     = "CsumRSRequest",
+               [P_RS_IS_IN_SYNC]       = "CsumRSIsInSync",
+               [P_COMPRESSED_BITMAP]   = "CBitmap",
                [P_MAX_CMD]             = NULL,
        };
 
@@ -443,13 +446,18 @@ struct p_rs_param_89 {
        char csums_alg[SHARED_SECRET_MAX];
 } __packed;
 
+enum drbd_conn_flags {
+       CF_WANT_LOSE = 1,
+       CF_DRY_RUN = 2,
+};
+
 struct p_protocol {
        struct p_header head;
        u32 protocol;
        u32 after_sb_0p;
        u32 after_sb_1p;
        u32 after_sb_2p;
-       u32 want_lose;
+       u32 conn_flags;
        u32 two_primaries;
 
               /* Since protocol version 87 and higher. */
@@ -791,6 +799,8 @@ enum {
                                 * while this is set. */
        RESIZE_PENDING,         /* Size change detected locally, waiting for the response from
                                 * the peer, if it changed there as well. */
+       CONN_DRY_RUN,           /* Expect disconnect after resync handshake. */
+       GOT_PING_ACK,           /* set when we receive a ping_ack packet, misc wait gets woken */
 };
 
 struct drbd_bitmap; /* opaque for drbd_conf */
index ab871e0..67e0fc5 100644 (file)
@@ -1668,7 +1668,7 @@ int drbd_send_sync_param(struct drbd_conf *mdev, struct syncer_conf *sc)
 int drbd_send_protocol(struct drbd_conf *mdev)
 {
        struct p_protocol *p;
-       int size, rv;
+       int size, cf, rv;
 
        size = sizeof(struct p_protocol);
 
@@ -1685,9 +1685,21 @@ int drbd_send_protocol(struct drbd_conf *mdev)
        p->after_sb_0p   = cpu_to_be32(mdev->net_conf->after_sb_0p);
        p->after_sb_1p   = cpu_to_be32(mdev->net_conf->after_sb_1p);
        p->after_sb_2p   = cpu_to_be32(mdev->net_conf->after_sb_2p);
-       p->want_lose     = cpu_to_be32(mdev->net_conf->want_lose);
        p->two_primaries = cpu_to_be32(mdev->net_conf->two_primaries);
 
+       cf = 0;
+       if (mdev->net_conf->want_lose)
+               cf |= CF_WANT_LOSE;
+       if (mdev->net_conf->dry_run) {
+               if (mdev->agreed_pro_version >= 92)
+                       cf |= CF_DRY_RUN;
+               else {
+                       dev_err(DEV, "--dry-run is not supported by peer");
+                       return 0;
+               }
+       }
+       p->conn_flags    = cpu_to_be32(cf);
+
        if (mdev->agreed_pro_version >= 87)
                strcpy(p->integrity_alg, mdev->net_conf->integrity_alg);
 
@@ -3161,14 +3173,18 @@ void drbd_free_bc(struct drbd_backing_dev *ldev)
 void drbd_free_sock(struct drbd_conf *mdev)
 {
        if (mdev->data.socket) {
+               mutex_lock(&mdev->data.mutex);
                kernel_sock_shutdown(mdev->data.socket, SHUT_RDWR);
                sock_release(mdev->data.socket);
                mdev->data.socket = NULL;
+               mutex_unlock(&mdev->data.mutex);
        }
        if (mdev->meta.socket) {
+               mutex_lock(&mdev->meta.mutex);
                kernel_sock_shutdown(mdev->meta.socket, SHUT_RDWR);
                sock_release(mdev->meta.socket);
                mdev->meta.socket = NULL;
+               mutex_unlock(&mdev->meta.mutex);
        }
 }
 
index 4df3b40..6429d2b 100644 (file)
@@ -285,8 +285,8 @@ int drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force)
                }
 
                if (r == SS_NO_UP_TO_DATE_DISK && force &&
-                   (mdev->state.disk == D_INCONSISTENT ||
-                    mdev->state.disk == D_OUTDATED)) {
+                   (mdev->state.disk < D_UP_TO_DATE &&
+                    mdev->state.disk >= D_INCONSISTENT)) {
                        mask.disk = D_MASK;
                        val.disk  = D_UP_TO_DATE;
                        forced = 1;
@@ -407,7 +407,7 @@ static int drbd_nl_primary(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
        }
 
        reply->ret_code =
-               drbd_set_role(mdev, R_PRIMARY, primary_args.overwrite_peer);
+               drbd_set_role(mdev, R_PRIMARY, primary_args.primary_force);
 
        return 0;
 }
@@ -941,6 +941,25 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
 
        drbd_md_set_sector_offsets(mdev, nbc);
 
+       /* allocate a second IO page if logical_block_size != 512 */
+       logical_block_size = bdev_logical_block_size(nbc->md_bdev);
+       if (logical_block_size == 0)
+               logical_block_size = MD_SECTOR_SIZE;
+
+       if (logical_block_size != MD_SECTOR_SIZE) {
+               if (!mdev->md_io_tmpp) {
+                       struct page *page = alloc_page(GFP_NOIO);
+                       if (!page)
+                               goto force_diskless_dec;
+
+                       dev_warn(DEV, "Meta data's bdev logical_block_size = %d != %d\n",
+                            logical_block_size, MD_SECTOR_SIZE);
+                       dev_warn(DEV, "Workaround engaged (has performance impact).\n");
+
+                       mdev->md_io_tmpp = page;
+               }
+       }
+
        if (!mdev->bitmap) {
                if (drbd_bm_init(mdev)) {
                        retcode = ERR_NOMEM;
@@ -980,25 +999,6 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
                goto force_diskless_dec;
        }
 
-       /* allocate a second IO page if logical_block_size != 512 */
-       logical_block_size = bdev_logical_block_size(nbc->md_bdev);
-       if (logical_block_size == 0)
-               logical_block_size = MD_SECTOR_SIZE;
-
-       if (logical_block_size != MD_SECTOR_SIZE) {
-               if (!mdev->md_io_tmpp) {
-                       struct page *page = alloc_page(GFP_NOIO);
-                       if (!page)
-                               goto force_diskless_dec;
-
-                       dev_warn(DEV, "Meta data's bdev logical_block_size = %d != %d\n",
-                            logical_block_size, MD_SECTOR_SIZE);
-                       dev_warn(DEV, "Workaround engaged (has performance impact).\n");
-
-                       mdev->md_io_tmpp = page;
-               }
-       }
-
        /* Reset the "barriers don't work" bits here, then force meta data to
         * be written, to ensure we determine if barriers are supported. */
        if (nbc->dc.no_md_flush)
index d065c64..ed9f1de 100644 (file)
@@ -2513,6 +2513,10 @@ static enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_rol
        }
 
        if (hg == -100) {
+               /* FIXME this log message is not correct if we end up here
+                * after an attempted attach on a diskless node.
+                * We just refuse to attach -- well, we drop the "connection"
+                * to that disk, in a way... */
                dev_alert(DEV, "Split-Brain detected, dropping connection!\n");
                drbd_khelper(mdev, "split-brain");
                return C_MASK;
@@ -2538,6 +2542,16 @@ static enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_rol
                }
        }
 
+       if (mdev->net_conf->dry_run || test_bit(CONN_DRY_RUN, &mdev->flags)) {
+               if (hg == 0)
+                       dev_info(DEV, "dry-run connect: No resync, would become Connected immediately.\n");
+               else
+                       dev_info(DEV, "dry-run connect: Would become %s, doing a %s resync.",
+                                drbd_conn_str(hg > 0 ? C_SYNC_SOURCE : C_SYNC_TARGET),
+                                abs(hg) >= 2 ? "full" : "bit-map based");
+               return C_MASK;
+       }
+
        if (abs(hg) >= 2) {
                dev_info(DEV, "Writing the whole bitmap, full sync required after drbd_sync_handshake.\n");
                if (drbd_bitmap_io(mdev, &drbd_bmio_set_n_write, "set_n_write from sync_handshake"))
@@ -2585,7 +2599,7 @@ static int receive_protocol(struct drbd_conf *mdev, struct p_header *h)
        struct p_protocol *p = (struct p_protocol *)h;
        int header_size, data_size;
        int p_proto, p_after_sb_0p, p_after_sb_1p, p_after_sb_2p;
-       int p_want_lose, p_two_primaries;
+       int p_want_lose, p_two_primaries, cf;
        char p_integrity_alg[SHARED_SECRET_MAX] = "";
 
        header_size = sizeof(*p) - sizeof(*h);
@@ -2598,8 +2612,14 @@ static int receive_protocol(struct drbd_conf *mdev, struct p_header *h)
        p_after_sb_0p   = be32_to_cpu(p->after_sb_0p);
        p_after_sb_1p   = be32_to_cpu(p->after_sb_1p);
        p_after_sb_2p   = be32_to_cpu(p->after_sb_2p);
-       p_want_lose     = be32_to_cpu(p->want_lose);
        p_two_primaries = be32_to_cpu(p->two_primaries);
+       cf              = be32_to_cpu(p->conn_flags);
+       p_want_lose = cf & CF_WANT_LOSE;
+
+       clear_bit(CONN_DRY_RUN, &mdev->flags);
+
+       if (cf & CF_DRY_RUN)
+               set_bit(CONN_DRY_RUN, &mdev->flags);
 
        if (p_proto != mdev->net_conf->wire_protocol) {
                dev_err(DEV, "incompatible communication protocols\n");
@@ -3118,13 +3138,16 @@ static int receive_state(struct drbd_conf *mdev, struct p_header *h)
 
                put_ldev(mdev);
                if (nconn == C_MASK) {
+                       nconn = C_CONNECTED;
                        if (mdev->state.disk == D_NEGOTIATING) {
                                drbd_force_state(mdev, NS(disk, D_DISKLESS));
-                               nconn = C_CONNECTED;
                        } else if (peer_state.disk == D_NEGOTIATING) {
                                dev_err(DEV, "Disk attach process on the peer node was aborted.\n");
                                peer_state.disk = D_DISKLESS;
+                               real_peer_disk = D_DISKLESS;
                        } else {
+                               if (test_and_clear_bit(CONN_DRY_RUN, &mdev->flags))
+                                       return FALSE;
                                D_ASSERT(oconn == C_WF_REPORT_PARAMS);
                                drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
                                return FALSE;
@@ -3594,10 +3617,7 @@ static void drbd_disconnect(struct drbd_conf *mdev)
 
        /* asender does not clean up anything. it must not interfere, either */
        drbd_thread_stop(&mdev->asender);
-
-       mutex_lock(&mdev->data.mutex);
        drbd_free_sock(mdev);
-       mutex_unlock(&mdev->data.mutex);
 
        spin_lock_irq(&mdev->req_lock);
        _drbd_wait_ee_list_empty(mdev, &mdev->active_ee);
@@ -4054,6 +4074,8 @@ static int got_PingAck(struct drbd_conf *mdev, struct p_header *h)
 {
        /* restore idle timeout */
        mdev->meta.socket->sk->sk_rcvtimeo = mdev->net_conf->ping_int*HZ;
+       if (!test_and_set_bit(GOT_PING_ACK, &mdev->flags))
+               wake_up(&mdev->misc_wait);
 
        return TRUE;
 }
index b453c2b..44bf6d1 100644 (file)
@@ -938,7 +938,8 @@ int w_e_end_csum_rs_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
 
                if (eq) {
                        drbd_set_in_sync(mdev, e->sector, e->size);
-                       mdev->rs_same_csum++;
+                       /* rs_same_csums unit is BM_BLOCK_SIZE */
+                       mdev->rs_same_csum += e->size >> BM_BLOCK_SHIFT;
                        ok = drbd_send_ack(mdev, P_RS_IS_IN_SYNC, e);
                } else {
                        inc_rs_pending(mdev);
@@ -1288,6 +1289,14 @@ int drbd_alter_sa(struct drbd_conf *mdev, int na)
        return retcode;
 }
 
+static void ping_peer(struct drbd_conf *mdev)
+{
+       clear_bit(GOT_PING_ACK, &mdev->flags);
+       request_ping(mdev);
+       wait_event(mdev->misc_wait,
+                  test_bit(GOT_PING_ACK, &mdev->flags) || mdev->state.conn < C_CONNECTED);
+}
+
 /**
  * drbd_start_resync() - Start the resync process
  * @mdev:      DRBD device.
@@ -1371,7 +1380,6 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side)
                _drbd_pause_after(mdev);
        }
        write_unlock_irq(&global_state_lock);
-       drbd_state_unlock(mdev);
        put_ldev(mdev);
 
        if (r == SS_SUCCESS) {
@@ -1382,11 +1390,8 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side)
 
                if (mdev->rs_total == 0) {
                        /* Peer still reachable? Beware of failing before-resync-target handlers! */
-                       request_ping(mdev);
-                       __set_current_state(TASK_INTERRUPTIBLE);
-                       schedule_timeout(mdev->net_conf->ping_timeo*HZ/9); /* 9 instead 10 */
+                       ping_peer(mdev);
                        drbd_resync_finished(mdev);
-                       return;
                }
 
                /* ns.conn may already be != mdev->state.conn,
@@ -1398,6 +1403,7 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side)
 
                drbd_md_sync(mdev);
        }
+       drbd_state_unlock(mdev);
 }
 
 int drbd_worker(struct drbd_thread *thi)
index 8866ca3..71acf4e 100644 (file)
@@ -341,11 +341,11 @@ static int pcd_wait(struct pcd_unit *cd, int go, int stop, char *fun, char *msg)
               && (j++ < PCD_SPIN))
                udelay(PCD_DELAY);
 
-       if ((r & (IDE_ERR & stop)) || (j >= PCD_SPIN)) {
+       if ((r & (IDE_ERR & stop)) || (j > PCD_SPIN)) {
                s = read_reg(cd, 7);
                e = read_reg(cd, 1);
                p = read_reg(cd, 2);
-               if (j >= PCD_SPIN)
+               if (j > PCD_SPIN)
                        e |= 0x100;
                if (fun)
                        printk("%s: %s %s: alt=0x%x stat=0x%x err=0x%x"
index ddb4f9a..c059aab 100644 (file)
@@ -391,11 +391,11 @@ static int pf_wait(struct pf_unit *pf, int go, int stop, char *fun, char *msg)
               && (j++ < PF_SPIN))
                udelay(PF_SPIN_DEL);
 
-       if ((r & (STAT_ERR & stop)) || (j >= PF_SPIN)) {
+       if ((r & (STAT_ERR & stop)) || (j > PF_SPIN)) {
                s = read_reg(pf, 7);
                e = read_reg(pf, 1);
                p = read_reg(pf, 2);
-               if (j >= PF_SPIN)
+               if (j > PF_SPIN)
                        e |= 0x100;
                if (fun)
                        printk("%s: %s %s: alt=0x%x stat=0x%x err=0x%x"
index 1e4006e..bc5825f 100644 (file)
@@ -274,11 +274,11 @@ static int pt_wait(struct pt_unit *tape, int go, int stop, char *fun, char *msg)
               && (j++ < PT_SPIN))
                udelay(PT_SPIN_DEL);
 
-       if ((r & (STAT_ERR & stop)) || (j >= PT_SPIN)) {
+       if ((r & (STAT_ERR & stop)) || (j > PT_SPIN)) {
                s = read_reg(pi, 7);
                e = read_reg(pi, 1);
                p = read_reg(pi, 2);
-               if (j >= PT_SPIN)
+               if (j > PT_SPIN)
                        e |= 0x100;
                if (fun)
                        printk("%s: %s %s: alt=0x%x stat=0x%x err=0x%x"
index 3c64af0..653817c 100644 (file)
@@ -347,14 +347,13 @@ static int __devinit virtblk_probe(struct virtio_device *vdev)
        set_capacity(vblk->disk, cap);
 
        /* We can handle whatever the host told us to handle. */
-       blk_queue_max_phys_segments(q, vblk->sg_elems-2);
-       blk_queue_max_hw_segments(q, vblk->sg_elems-2);
+       blk_queue_max_segments(q, vblk->sg_elems-2);
 
        /* No need to bounce any requests */
        blk_queue_bounce_limit(q, BLK_BOUNCE_ANY);
 
        /* No real sector limit. */
-       blk_queue_max_sectors(q, -1U);
+       blk_queue_max_hw_sectors(q, -1U);
 
        /* Host can optionally specify maximum segment size and number of
         * segments. */
index 83881df..a82ab3e 100644 (file)
@@ -2185,7 +2185,7 @@ static void sd_probe_async(void *data, async_cookie_t cookie)
        blk_queue_prep_rq(sdp->request_queue, sd_prep_fn);
 
        gd->driverfs_dev = &sdp->sdev_gendev;
-       gd->flags = GENHD_FL_EXT_DEVT | GENHD_FL_DRIVERFS;
+       gd->flags = GENHD_FL_EXT_DEVT;
        if (sdp->removable)
                gd->flags |= GENHD_FL_REMOVABLE;
 
index e1f9221..e7bf6ca 100644 (file)
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -554,7 +554,7 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page
                                        .bi_rw = bio->bi_rw,
                                };
 
-                               if (q->merge_bvec_fn(q, &bvm, prev) < len) {
+                               if (q->merge_bvec_fn(q, &bvm, prev) < prev->bv_len) {
                                        prev->bv_len -= len;
                                        return 0;
                                }
@@ -607,7 +607,7 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page
                 * merge_bvec_fn() returns number of bytes it can accept
                 * at this offset
                 */
-               if (q->merge_bvec_fn(q, &bvm, bvec) < len) {
+               if (q->merge_bvec_fn(q, &bvm, bvec) < bvec->bv_len) {
                        bvec->bv_page = NULL;
                        bvec->bv_len = 0;
                        bvec->bv_offset = 0;
index 76fc4d5..6841eff 100644 (file)
@@ -553,108 +553,85 @@ select_queue:
        return ret;
 }
 
-static void unpin_sb_for_writeback(struct super_block **psb)
+static void unpin_sb_for_writeback(struct super_block *sb)
 {
-       struct super_block *sb = *psb;
-
-       if (sb) {
-               up_read(&sb->s_umount);
-               put_super(sb);
-               *psb = NULL;
-       }
+       up_read(&sb->s_umount);
+       put_super(sb);
 }
 
+enum sb_pin_state {
+       SB_PINNED,
+       SB_NOT_PINNED,
+       SB_PIN_FAILED
+};
+
 /*
  * For WB_SYNC_NONE writeback, the caller does not have the sb pinned
  * before calling writeback. So make sure that we do pin it, so it doesn't
  * go away while we are writing inodes from it.
- *
- * Returns 0 if the super was successfully pinned (or pinning wasn't needed),
- * 1 if we failed.
  */
-static int pin_sb_for_writeback(struct writeback_control *wbc,
-                               struct inode *inode, struct super_block **psb)
+static enum sb_pin_state pin_sb_for_writeback(struct writeback_control *wbc,
+                                             struct super_block *sb)
 {
-       struct super_block *sb = inode->i_sb;
-
-       /*
-        * If this sb is already pinned, nothing more to do. If not and
-        * *psb is non-NULL, unpin the old one first
-        */
-       if (sb == *psb)
-               return 0;
-       else if (*psb)
-               unpin_sb_for_writeback(psb);
-
        /*
         * Caller must already hold the ref for this
         */
        if (wbc->sync_mode == WB_SYNC_ALL) {
                WARN_ON(!rwsem_is_locked(&sb->s_umount));
-               return 0;
+               return SB_NOT_PINNED;
        }
-
        spin_lock(&sb_lock);
        sb->s_count++;
        if (down_read_trylock(&sb->s_umount)) {
                if (sb->s_root) {
                        spin_unlock(&sb_lock);
-                       goto pinned;
+                       return SB_PINNED;
                }
                /*
                 * umounted, drop rwsem again and fall through to failure
                 */
                up_read(&sb->s_umount);
        }
-
        sb->s_count--;
        spin_unlock(&sb_lock);
-       return 1;
-pinned:
-       *psb = sb;
-       return 0;
+       return SB_PIN_FAILED;
 }
 
-static void writeback_inodes_wb(struct bdi_writeback *wb,
-                               struct writeback_control *wbc)
+/*
+ * Write a portion of b_io inodes which belong to @sb.
+ * If @wbc->sb != NULL, then find and write all such
+ * inodes. Otherwise write only ones which go sequentially
+ * in reverse order.
+ * Return 1, if the caller writeback routine should be
+ * interrupted. Otherwise return 0.
+ */
+static int writeback_sb_inodes(struct super_block *sb,
+                              struct bdi_writeback *wb,
+                              struct writeback_control *wbc)
 {
-       struct super_block *sb = wbc->sb, *pin_sb = NULL;
-       const unsigned long start = jiffies;    /* livelock avoidance */
-
-       spin_lock(&inode_lock);
-
-       if (!wbc->for_kupdate || list_empty(&wb->b_io))
-               queue_io(wb, wbc->older_than_this);
-
        while (!list_empty(&wb->b_io)) {
-               struct inode *inode = list_entry(wb->b_io.prev,
-                                               struct inode, i_list);
                long pages_skipped;
-
-               /*
-                * super block given and doesn't match, skip this inode
-                */
-               if (sb && sb != inode->i_sb) {
+               struct inode *inode = list_entry(wb->b_io.prev,
+                                                struct inode, i_list);
+               if (wbc->sb && sb != inode->i_sb) {
+                       /* super block given and doesn't
+                          match, skip this inode */
                        redirty_tail(inode);
                        continue;
                }
-
+               if (sb != inode->i_sb)
+                       /* finish with this superblock */
+                       return 0;
                if (inode->i_state & (I_NEW | I_WILL_FREE)) {
                        requeue_io(inode);
                        continue;
                }
-
                /*
                 * Was this inode dirtied after sync_sb_inodes was called?
                 * This keeps sync from extra jobs and livelock.
                 */
-               if (inode_dirtied_after(inode, start))
-                       break;
-
-               if (pin_sb_for_writeback(wbc, inode, &pin_sb)) {
-                       requeue_io(inode);
-                       continue;
-               }
+               if (inode_dirtied_after(inode, wbc->wb_start))
+                       return 1;
 
                BUG_ON(inode->i_state & (I_FREEING | I_CLEAR));
                __iget(inode);
@@ -673,14 +650,50 @@ static void writeback_inodes_wb(struct bdi_writeback *wb,
                spin_lock(&inode_lock);
                if (wbc->nr_to_write <= 0) {
                        wbc->more_io = 1;
-                       break;
+                       return 1;
                }
                if (!list_empty(&wb->b_more_io))
                        wbc->more_io = 1;
        }
+       /* b_io is empty */
+       return 1;
+}
+
+static void writeback_inodes_wb(struct bdi_writeback *wb,
+                               struct writeback_control *wbc)
+{
+       int ret = 0;
 
-       unpin_sb_for_writeback(&pin_sb);
+       wbc->wb_start = jiffies; /* livelock avoidance */
+       spin_lock(&inode_lock);
+       if (!wbc->for_kupdate || list_empty(&wb->b_io))
+               queue_io(wb, wbc->older_than_this);
+
+       while (!list_empty(&wb->b_io)) {
+               struct inode *inode = list_entry(wb->b_io.prev,
+                                                struct inode, i_list);
+               struct super_block *sb = inode->i_sb;
+               enum sb_pin_state state;
+
+               if (wbc->sb && sb != wbc->sb) {
+                       /* super block given and doesn't
+                          match, skip this inode */
+                       redirty_tail(inode);
+                       continue;
+               }
+               state = pin_sb_for_writeback(wbc, sb);
+
+               if (state == SB_PIN_FAILED) {
+                       requeue_io(inode);
+                       continue;
+               }
+               ret = writeback_sb_inodes(sb, wb, wbc);
 
+               if (state == SB_PINNED)
+                       unpin_sb_for_writeback(sb);
+               if (ret)
+                       break;
+       }
        spin_unlock(&inode_lock);
        /* Leave any unwritten inodes on b_io */
 }
index ebd22db..41551c9 100644 (file)
@@ -921,26 +921,7 @@ extern void blk_cleanup_queue(struct request_queue *);
 extern void blk_queue_make_request(struct request_queue *, make_request_fn *);
 extern void blk_queue_bounce_limit(struct request_queue *, u64);
 extern void blk_queue_max_hw_sectors(struct request_queue *, unsigned int);
-
-/* Temporary compatibility wrapper */
-static inline void blk_queue_max_sectors(struct request_queue *q, unsigned int max)
-{
-       blk_queue_max_hw_sectors(q, max);
-}
-
 extern void blk_queue_max_segments(struct request_queue *, unsigned short);
-
-static inline void blk_queue_max_phys_segments(struct request_queue *q, unsigned short max)
-{
-       blk_queue_max_segments(q, max);
-}
-
-static inline void blk_queue_max_hw_segments(struct request_queue *q, unsigned short max)
-{
-       blk_queue_max_segments(q, max);
-}
-
-
 extern void blk_queue_max_segment_size(struct request_queue *, unsigned int);
 extern void blk_queue_max_discard_sectors(struct request_queue *q,
                unsigned int max_discard_sectors);
@@ -1030,11 +1011,6 @@ static inline int sb_issue_discard(struct super_block *sb,
 
 extern int blk_verify_command(unsigned char *cmd, fmode_t has_write_perm);
 
-#define MAX_PHYS_SEGMENTS 128
-#define MAX_HW_SEGMENTS 128
-#define SAFE_MAX_SECTORS 255
-#define MAX_SEGMENT_SIZE       65536
-
 enum blk_default_limits {
        BLK_MAX_SEGMENTS        = 128,
        BLK_SAFE_MAX_SECTORS    = 255,
index 7896227..4341b1a 100644 (file)
@@ -56,7 +56,7 @@ extern const char *drbd_buildtag(void);
 #define REL_VERSION "8.3.7"
 #define API_VERSION 88
 #define PRO_VERSION_MIN 86
-#define PRO_VERSION_MAX 91
+#define PRO_VERSION_MAX 92
 
 
 enum drbd_io_error_p {
index a4d82f8..f7431a4 100644 (file)
@@ -12,7 +12,7 @@
 #endif
 
 NL_PACKET(primary, 1,
-       NL_BIT(         1,      T_MAY_IGNORE,   overwrite_peer)
+       NL_BIT(         1,      T_MAY_IGNORE,   primary_force)
 )
 
 NL_PACKET(secondary, 2, )
@@ -63,6 +63,7 @@ NL_PACKET(net_conf, 5,
        NL_BIT(         41,     T_MAY_IGNORE,   always_asbp)
        NL_BIT(         61,     T_MAY_IGNORE,   no_cork)
        NL_BIT(         62,     T_MANDATORY,    auto_sndbuf_size)
+       NL_BIT(         70,     T_MANDATORY,    dry_run)
 )
 
 NL_PACKET(disconnect, 6, )
index 56b5051..5f2f4c4 100644 (file)
@@ -109,7 +109,7 @@ struct hd_struct {
 };
 
 #define GENHD_FL_REMOVABLE                     1
-#define GENHD_FL_DRIVERFS                      2
+/* 2 is unused */
 #define GENHD_FL_MEDIA_CHANGE_NOTIFY           4
 #define GENHD_FL_CD                            8
 #define GENHD_FL_UP                            16
diff --git a/include/linux/lcm.h b/include/linux/lcm.h
new file mode 100644 (file)
index 0000000..7bf01d7
--- /dev/null
@@ -0,0 +1,8 @@
+#ifndef _LCM_H
+#define _LCM_H
+
+#include <linux/compiler.h>
+
+unsigned long lcm(unsigned long a, unsigned long b) __attribute_const__;
+
+#endif /* _LCM_H */
index 76e8903..36520de 100644 (file)
@@ -34,6 +34,9 @@ struct writeback_control {
        enum writeback_sync_modes sync_mode;
        unsigned long *older_than_this; /* If !NULL, only write back inodes
                                           older than this */
+       unsigned long wb_start;         /* Time writeback_inodes_wb was
+                                          called. This is needed to avoid
+                                          extra jobs and livelock */
        long nr_to_write;               /* Write this many pages, and decrement
                                           this for each page written */
        long pages_skipped;             /* Pages which were not written */
index 5fb7273..d870a91 100644 (file)
@@ -40,6 +40,16 @@ DECLARE_EVENT_CLASS(block_rq_with_error,
                  __entry->nr_sector, __entry->errors)
 );
 
+/**
+ * block_rq_abort - abort block operation request
+ * @q: queue containing the block operation request
+ * @rq: block IO operation request
+ *
+ * Called immediately after pending block IO operation request @rq in
+ * queue @q is aborted. The fields in the operation request @rq
+ * can be examined to determine which device and sectors the pending
+ * operation would access.
+ */
 DEFINE_EVENT(block_rq_with_error, block_rq_abort,
 
        TP_PROTO(struct request_queue *q, struct request *rq),
@@ -47,6 +57,15 @@ DEFINE_EVENT(block_rq_with_error, block_rq_abort,
        TP_ARGS(q, rq)
 );
 
+/**
+ * block_rq_requeue - place block IO request back on a queue
+ * @q: queue holding operation
+ * @rq: block IO operation request
+ *
+ * The block operation request @rq is being placed back into queue
+ * @q.  For some reason the request was not completed and needs to be
+ * put back in the queue.
+ */
 DEFINE_EVENT(block_rq_with_error, block_rq_requeue,
 
        TP_PROTO(struct request_queue *q, struct request *rq),
@@ -54,6 +73,17 @@ DEFINE_EVENT(block_rq_with_error, block_rq_requeue,
        TP_ARGS(q, rq)
 );
 
+/**
+ * block_rq_complete - block IO operation completed by device driver
+ * @q: queue containing the block operation request
+ * @rq: block operations request
+ *
+ * The block_rq_complete tracepoint event indicates that some portion
+ * of operation request has been completed by the device driver.  If
+ * the @rq->bio is %NULL, then there is absolutely no additional work to
+ * do for the request. If @rq->bio is non-NULL then there is
+ * additional work required to complete the request.
+ */
 DEFINE_EVENT(block_rq_with_error, block_rq_complete,
 
        TP_PROTO(struct request_queue *q, struct request *rq),
@@ -95,6 +125,16 @@ DECLARE_EVENT_CLASS(block_rq,
                  __entry->nr_sector, __entry->comm)
 );
 
+/**
+ * block_rq_insert - insert block operation request into queue
+ * @q: target queue
+ * @rq: block IO operation request
+ *
+ * Called immediately before block operation request @rq is inserted
+ * into queue @q.  The fields in the operation request @rq struct can
+ * be examined to determine which device and sectors the pending
+ * operation would access.
+ */
 DEFINE_EVENT(block_rq, block_rq_insert,
 
        TP_PROTO(struct request_queue *q, struct request *rq),
@@ -102,6 +142,14 @@ DEFINE_EVENT(block_rq, block_rq_insert,
        TP_ARGS(q, rq)
 );
 
+/**
+ * block_rq_issue - issue pending block IO request operation to device driver
+ * @q: queue holding operation
+ * @rq: block IO operation operation request
+ *
+ * Called when block operation request @rq from queue @q is sent to a
+ * device driver for processing.
+ */
 DEFINE_EVENT(block_rq, block_rq_issue,
 
        TP_PROTO(struct request_queue *q, struct request *rq),
@@ -109,6 +157,17 @@ DEFINE_EVENT(block_rq, block_rq_issue,
        TP_ARGS(q, rq)
 );
 
+/**
+ * block_bio_bounce - used bounce buffer when processing block operation
+ * @q: queue holding the block operation
+ * @bio: block operation
+ *
+ * A bounce buffer was used to handle the block operation @bio in @q.
+ * This occurs when hardware limitations prevent a direct transfer of
+ * data between the @bio data memory area and the IO device.  Use of a
+ * bounce buffer requires extra copying of data and decreases
+ * performance.
+ */
 TRACE_EVENT(block_bio_bounce,
 
        TP_PROTO(struct request_queue *q, struct bio *bio),
@@ -138,6 +197,14 @@ TRACE_EVENT(block_bio_bounce,
                  __entry->nr_sector, __entry->comm)
 );
 
+/**
+ * block_bio_complete - completed all work on the block operation
+ * @q: queue holding the block operation
+ * @bio: block operation completed
+ *
+ * This tracepoint indicates there is no further work to do on this
+ * block IO operation @bio.
+ */
 TRACE_EVENT(block_bio_complete,
 
        TP_PROTO(struct request_queue *q, struct bio *bio),
@@ -193,6 +260,14 @@ DECLARE_EVENT_CLASS(block_bio,
                  __entry->nr_sector, __entry->comm)
 );
 
+/**
+ * block_bio_backmerge - merging block operation to the end of an existing operation
+ * @q: queue holding operation
+ * @bio: new block operation to merge
+ *
+ * Merging block request @bio to the end of an existing block request
+ * in queue @q.
+ */
 DEFINE_EVENT(block_bio, block_bio_backmerge,
 
        TP_PROTO(struct request_queue *q, struct bio *bio),
@@ -200,6 +275,14 @@ DEFINE_EVENT(block_bio, block_bio_backmerge,
        TP_ARGS(q, bio)
 );
 
+/**
+ * block_bio_frontmerge - merging block operation to the beginning of an existing operation
+ * @q: queue holding operation
+ * @bio: new block operation to merge
+ *
+ * Merging block IO operation @bio to the beginning of an existing block
+ * operation in queue @q.
+ */
 DEFINE_EVENT(block_bio, block_bio_frontmerge,
 
        TP_PROTO(struct request_queue *q, struct bio *bio),
@@ -207,6 +290,13 @@ DEFINE_EVENT(block_bio, block_bio_frontmerge,
        TP_ARGS(q, bio)
 );
 
+/**
+ * block_bio_queue - putting new block IO operation in queue
+ * @q: queue holding operation
+ * @bio: new block operation
+ *
+ * About to place the block IO operation @bio into queue @q.
+ */
 DEFINE_EVENT(block_bio, block_bio_queue,
 
        TP_PROTO(struct request_queue *q, struct bio *bio),
@@ -243,6 +333,15 @@ DECLARE_EVENT_CLASS(block_get_rq,
                  __entry->nr_sector, __entry->comm)
 );
 
+/**
+ * block_getrq - get a free request entry in queue for block IO operations
+ * @q: queue for operations
+ * @bio: pending block IO operation
+ * @rw: low bit indicates a read (%0) or a write (%1)
+ *
+ * A request struct for queue @q has been allocated to handle the
+ * block IO operation @bio.
+ */
 DEFINE_EVENT(block_get_rq, block_getrq,
 
        TP_PROTO(struct request_queue *q, struct bio *bio, int rw),
@@ -250,6 +349,17 @@ DEFINE_EVENT(block_get_rq, block_getrq,
        TP_ARGS(q, bio, rw)
 );
 
+/**
+ * block_sleeprq - waiting to get a free request entry in queue for block IO operation
+ * @q: queue for operation
+ * @bio: pending block IO operation
+ * @rw: low bit indicates a read (%0) or a write (%1)
+ *
+ * In the case where a request struct cannot be provided for queue @q
+ * the process needs to wait for an request struct to become
+ * available.  This tracepoint event is generated each time the
+ * process goes to sleep waiting for request struct become available.
+ */
 DEFINE_EVENT(block_get_rq, block_sleeprq,
 
        TP_PROTO(struct request_queue *q, struct bio *bio, int rw),
@@ -257,6 +367,14 @@ DEFINE_EVENT(block_get_rq, block_sleeprq,
        TP_ARGS(q, bio, rw)
 );
 
+/**
+ * block_plug - keep operations requests in request queue
+ * @q: request queue to plug
+ *
+ * Plug the request queue @q.  Do not allow block operation requests
+ * to be sent to the device driver. Instead, accumulate requests in
+ * the queue to improve throughput performance of the block device.
+ */
 TRACE_EVENT(block_plug,
 
        TP_PROTO(struct request_queue *q),
@@ -293,6 +411,13 @@ DECLARE_EVENT_CLASS(block_unplug,
        TP_printk("[%s] %d", __entry->comm, __entry->nr_rq)
 );
 
+/**
+ * block_unplug_timer - timed release of operations requests in queue to device driver
+ * @q: request queue to unplug
+ *
+ * Unplug the request queue @q because a timer expired and allow block
+ * operation requests to be sent to the device driver.
+ */
 DEFINE_EVENT(block_unplug, block_unplug_timer,
 
        TP_PROTO(struct request_queue *q),
@@ -300,6 +425,13 @@ DEFINE_EVENT(block_unplug, block_unplug_timer,
        TP_ARGS(q)
 );
 
+/**
+ * block_unplug_io - release of operations requests in request queue
+ * @q: request queue to unplug
+ *
+ * Unplug request queue @q because device driver is scheduled to work
+ * on elements in the request queue.
+ */
 DEFINE_EVENT(block_unplug, block_unplug_io,
 
        TP_PROTO(struct request_queue *q),
@@ -307,6 +439,17 @@ DEFINE_EVENT(block_unplug, block_unplug_io,
        TP_ARGS(q)
 );
 
+/**
+ * block_split - split a single bio struct into two bio structs
+ * @q: queue containing the bio
+ * @bio: block operation being split
+ * @new_sector: The starting sector for the new bio
+ *
+ * The bio request @bio in request queue @q needs to be split into two
+ * bio requests. The newly created @bio request starts at
+ * @new_sector. This split may be required due to hardware limitation
+ * such as operation crossing device boundaries in a RAID system.
+ */
 TRACE_EVENT(block_split,
 
        TP_PROTO(struct request_queue *q, struct bio *bio,
@@ -337,6 +480,16 @@ TRACE_EVENT(block_split,
                  __entry->comm)
 );
 
+/**
+ * block_remap - map request for a partition to the raw device
+ * @q: queue holding the operation
+ * @bio: revised operation
+ * @dev: device for the operation
+ * @from: original sector for the operation
+ *
+ * An operation for a partition on a block device has been mapped to the
+ * raw block device.
+ */
 TRACE_EVENT(block_remap,
 
        TP_PROTO(struct request_queue *q, struct bio *bio, dev_t dev,
@@ -370,6 +523,17 @@ TRACE_EVENT(block_remap,
                  (unsigned long long)__entry->old_sector)
 );
 
+/**
+ * block_rq_remap - map request for a block operation request
+ * @q: queue holding the operation
+ * @rq: block IO operation request
+ * @dev: device for the operation
+ * @from: original sector for the operation
+ *
+ * The block operation request @rq in @q has been remapped.  The block
+ * operation request @rq holds the current information and @from hold
+ * the original sector.
+ */
 TRACE_EVENT(block_rq_remap,
 
        TP_PROTO(struct request_queue *q, struct request *rq, dev_t dev,
index 2e152ae..0d40152 100644 (file)
@@ -21,7 +21,7 @@ lib-y += kobject.o kref.o klist.o
 
 obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \
         bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \
-        string_helpers.o gcd.o list_sort.o
+        string_helpers.o gcd.o lcm.o list_sort.o
 
 ifeq ($(CONFIG_DEBUG_KOBJECT),y)
 CFLAGS_kobject.o += -DDEBUG
diff --git a/lib/lcm.c b/lib/lcm.c
new file mode 100644 (file)
index 0000000..157cd88
--- /dev/null
+++ b/lib/lcm.c
@@ -0,0 +1,15 @@
+#include <linux/kernel.h>
+#include <linux/gcd.h>
+#include <linux/module.h>
+
+/* Lowest common multiple */
+unsigned long lcm(unsigned long a, unsigned long b)
+{
+       if (a && b)
+               return (a * b) / gcd(a, b);
+       else if (b)
+               return b;
+
+       return a;
+}
+EXPORT_SYMBOL_GPL(lcm);