[XFS] Add infrastructure for tracking I/O completions
authorChristoph Hellwig <hch@sgi.com>
Fri, 2 Sep 2005 06:58:49 +0000 (16:58 +1000)
committerNathan Scott <nathans@sgi.com>
Fri, 2 Sep 2005 06:58:49 +0000 (16:58 +1000)
SGI-PV: 934766
SGI-Modid: xfs-linux:xfs-kern:196856a

Signed-off-by: Christoph Hellwig <hch@sgi.com>
Signed-off-by: Nathan Scott <nathans@sgi.com>
fs/xfs/linux-2.6/xfs_aops.c
fs/xfs/linux-2.6/xfs_buf.c
fs/xfs/linux-2.6/xfs_linux.h
fs/xfs/linux-2.6/xfs_super.c

index b55cb7f..ed98c7a 100644 (file)
@@ -104,22 +104,24 @@ xfs_page_trace(
 #define xfs_page_trace(tag, inode, page, mask)
 #endif
 
-void
-linvfs_unwritten_done(
-       struct buffer_head      *bh,
-       int                     uptodate)
+/*
+ * Schedule IO completion handling on a xfsdatad if this was
+ * the final hold on this ioend.
+ */
+STATIC void
+xfs_finish_ioend(
+       xfs_ioend_t             *ioend)
 {
-       xfs_buf_t               *pb = (xfs_buf_t *)bh->b_private;
+       if (atomic_dec_and_test(&ioend->io_remaining))
+               queue_work(xfsdatad_workqueue, &ioend->io_work);
+}
 
-       ASSERT(buffer_unwritten(bh));
-       bh->b_end_io = NULL;
-       clear_buffer_unwritten(bh);
-       if (!uptodate)
-               pagebuf_ioerror(pb, EIO);
-       if (atomic_dec_and_test(&pb->pb_io_remaining) == 1) {
-               pagebuf_iodone(pb, 1, 1);
-       }
-       end_buffer_async_write(bh, uptodate);
+STATIC void
+xfs_destroy_ioend(
+       xfs_ioend_t             *ioend)
+{
+       vn_iowake(ioend->io_vnode);
+       mempool_free(ioend, xfs_ioend_pool);
 }
 
 /*
@@ -127,20 +129,66 @@ linvfs_unwritten_done(
  * to written extents (buffered IO).
  */
 STATIC void
-linvfs_unwritten_convert(
-       xfs_buf_t       *bp)
+xfs_end_bio_unwritten(
+       void                    *data)
 {
-       vnode_t         *vp = XFS_BUF_FSPRIVATE(bp, vnode_t *);
-       int             error;
+       xfs_ioend_t             *ioend = data;
+       vnode_t                 *vp = ioend->io_vnode;
+       xfs_off_t               offset = ioend->io_offset;
+       size_t                  size = ioend->io_size;
+       int                     error;
+
+       if (ioend->io_uptodate)
+               VOP_BMAP(vp, offset, size, BMAPI_UNWRITTEN, NULL, NULL, error);
+       xfs_destroy_ioend(ioend);
+}
+
+/*
+ * Allocate and initialise an IO completion structure.
+ * We need to track unwritten extent write completion here initially.
+ * We'll need to extend this for updating the ondisk inode size later
+ * (vs. incore size).
+ */
+STATIC xfs_ioend_t *
+xfs_alloc_ioend(
+       struct inode            *inode)
+{
+       xfs_ioend_t             *ioend;
 
-       BUG_ON(atomic_read(&bp->pb_hold) < 1);
-       VOP_BMAP(vp, XFS_BUF_OFFSET(bp), XFS_BUF_SIZE(bp),
-                       BMAPI_UNWRITTEN, NULL, NULL, error);
-       XFS_BUF_SET_FSPRIVATE(bp, NULL);
-       XFS_BUF_CLR_IODONE_FUNC(bp);
-       XFS_BUF_UNDATAIO(bp);
-       vn_iowake(vp);
-       pagebuf_iodone(bp, 0, 0);
+       ioend = mempool_alloc(xfs_ioend_pool, GFP_NOFS);
+
+       /*
+        * Set the count to 1 initially, which will prevent an I/O
+        * completion callback from happening before we have started
+        * all the I/O from calling the completion routine too early.
+        */
+       atomic_set(&ioend->io_remaining, 1);
+       ioend->io_uptodate = 1; /* cleared if any I/O fails */
+       ioend->io_vnode = LINVFS_GET_VP(inode);
+       atomic_inc(&ioend->io_vnode->v_iocount);
+       ioend->io_offset = 0;
+       ioend->io_size = 0;
+
+       INIT_WORK(&ioend->io_work, xfs_end_bio_unwritten, ioend);
+
+       return ioend;
+}
+
+void
+linvfs_unwritten_done(
+       struct buffer_head      *bh,
+       int                     uptodate)
+{
+       xfs_ioend_t             *ioend = bh->b_private;
+
+       ASSERT(buffer_unwritten(bh));
+       bh->b_end_io = NULL;
+       clear_buffer_unwritten(bh);
+       if (!uptodate)
+               ioend->io_uptodate = 0;
+
+       xfs_finish_ioend(ioend);
+       end_buffer_async_write(bh, uptodate);
 }
 
 /*
@@ -255,7 +303,7 @@ xfs_probe_unwritten_page(
        struct address_space    *mapping,
        pgoff_t                 index,
        xfs_iomap_t             *iomapp,
-       xfs_buf_t               *pb,
+       xfs_ioend_t             *ioend,
        unsigned long           max_offset,
        unsigned long           *fsbs,
        unsigned int            bbits)
@@ -283,7 +331,7 @@ xfs_probe_unwritten_page(
                                break;
                        xfs_map_at_offset(page, bh, p_offset, bbits, iomapp);
                        set_buffer_unwritten_io(bh);
-                       bh->b_private = pb;
+                       bh->b_private = ioend;
                        p_offset += bh->b_size;
                        (*fsbs)++;
                } while ((bh = bh->b_this_page) != head);
@@ -434,27 +482,15 @@ xfs_map_unwritten(
 {
        struct buffer_head      *bh = curr;
        xfs_iomap_t             *tmp;
-       xfs_buf_t               *pb;
-       loff_t                  offset, size;
+       xfs_ioend_t             *ioend;
+       loff_t                  offset;
        unsigned long           nblocks = 0;
 
        offset = start_page->index;
        offset <<= PAGE_CACHE_SHIFT;
        offset += p_offset;
 
-       /* get an "empty" pagebuf to manage IO completion
-        * Proper values will be set before returning */
-       pb = pagebuf_lookup(iomapp->iomap_target, 0, 0, 0);
-       if (!pb)
-               return -EAGAIN;
-
-       atomic_inc(&LINVFS_GET_VP(inode)->v_iocount);
-
-       /* Set the count to 1 initially, this will stop an I/O
-        * completion callout which happens before we have started
-        * all the I/O from calling pagebuf_iodone too early.
-        */
-       atomic_set(&pb->pb_io_remaining, 1);
+       ioend = xfs_alloc_ioend(inode);
 
        /* First map forwards in the page consecutive buffers
         * covering this unwritten extent
@@ -467,12 +503,12 @@ xfs_map_unwritten(
                        break;
                xfs_map_at_offset(start_page, bh, p_offset, block_bits, iomapp);
                set_buffer_unwritten_io(bh);
-               bh->b_private = pb;
+               bh->b_private = ioend;
                p_offset += bh->b_size;
                nblocks++;
        } while ((bh = bh->b_this_page) != head);
 
-       atomic_add(nblocks, &pb->pb_io_remaining);
+       atomic_add(nblocks, &ioend->io_remaining);
 
        /* If we reached the end of the page, map forwards in any
         * following pages which are also covered by this extent.
@@ -489,13 +525,13 @@ xfs_map_unwritten(
                tloff = min(tlast, tloff);
                for (tindex = start_page->index + 1; tindex < tloff; tindex++) {
                        page = xfs_probe_unwritten_page(mapping,
-                                               tindex, iomapp, pb,
+                                               tindex, iomapp, ioend,
                                                PAGE_CACHE_SIZE, &bs, bbits);
                        if (!page)
                                break;
                        nblocks += bs;
-                       atomic_add(bs, &pb->pb_io_remaining);
-                       xfs_convert_page(inode, page, iomapp, wbc, pb,
+                       atomic_add(bs, &ioend->io_remaining);
+                       xfs_convert_page(inode, page, iomapp, wbc, ioend,
                                                        startio, all_bh);
                        /* stop if converting the next page might add
                         * enough blocks that the corresponding byte
@@ -507,12 +543,12 @@ xfs_map_unwritten(
                if (tindex == tlast &&
                    (pg_offset = (i_size_read(inode) & (PAGE_CACHE_SIZE - 1)))) {
                        page = xfs_probe_unwritten_page(mapping,
-                                                       tindex, iomapp, pb,
+                                                       tindex, iomapp, ioend,
                                                        pg_offset, &bs, bbits);
                        if (page) {
                                nblocks += bs;
-                               atomic_add(bs, &pb->pb_io_remaining);
-                               xfs_convert_page(inode, page, iomapp, wbc, pb,
+                               atomic_add(bs, &ioend->io_remaining);
+                               xfs_convert_page(inode, page, iomapp, wbc, ioend,
                                                        startio, all_bh);
                                if (nblocks >= ((ULONG_MAX - PAGE_SIZE) >> block_bits))
                                        goto enough;
@@ -521,21 +557,9 @@ xfs_map_unwritten(
        }
 
 enough:
-       size = nblocks;         /* NB: using 64bit number here */
-       size <<= block_bits;    /* convert fsb's to byte range */
-
-       XFS_BUF_DATAIO(pb);
-       XFS_BUF_ASYNC(pb);
-       XFS_BUF_SET_SIZE(pb, size);
-       XFS_BUF_SET_COUNT(pb, size);
-       XFS_BUF_SET_OFFSET(pb, offset);
-       XFS_BUF_SET_FSPRIVATE(pb, LINVFS_GET_VP(inode));
-       XFS_BUF_SET_IODONE_FUNC(pb, linvfs_unwritten_convert);
-
-       if (atomic_dec_and_test(&pb->pb_io_remaining) == 1) {
-               pagebuf_iodone(pb, 1, 1);
-       }
-
+       ioend->io_size = (xfs_off_t)nblocks << block_bits;
+       ioend->io_offset = offset;
+       xfs_finish_ioend(ioend);
        return 0;
 }
 
index 58286b1..fba40cb 100644 (file)
@@ -67,7 +67,7 @@ STATIC int xfsbufd_wakeup(int, unsigned int);
 STATIC void pagebuf_delwri_queue(xfs_buf_t *, int);
 
 STATIC struct workqueue_struct *xfslogd_workqueue;
-STATIC struct workqueue_struct *xfsdatad_workqueue;
+struct workqueue_struct *xfsdatad_workqueue;
 
 /*
  * Pagebuf debugging
index 42dc5e4..1c63fd3 100644 (file)
 #include <xfs_stats.h>
 #include <xfs_sysctl.h>
 #include <xfs_iops.h>
+#include <xfs_aops.h>
 #include <xfs_super.h>
 #include <xfs_globals.h>
 #include <xfs_fs_subr.h>
index d2c8a11..1a0bcbb 100644 (file)
 #include <linux/namei.h>
 #include <linux/init.h>
 #include <linux/mount.h>
+#include <linux/mempool.h>
 #include <linux/writeback.h>
 
 STATIC struct quotactl_ops linvfs_qops;
 STATIC struct super_operations linvfs_sops;
-STATIC kmem_zone_t *linvfs_inode_zone;
+STATIC kmem_zone_t *xfs_vnode_zone;
+STATIC kmem_zone_t *xfs_ioend_zone;
+mempool_t *xfs_ioend_pool;
 
 STATIC struct xfs_mount_args *
 xfs_args_allocate(
@@ -281,8 +284,7 @@ linvfs_alloc_inode(
 {
        vnode_t                 *vp;
 
-       vp = (vnode_t *)kmem_cache_alloc(linvfs_inode_zone, 
-                kmem_flags_convert(KM_SLEEP));
+       vp = kmem_cache_alloc(xfs_vnode_zone, kmem_flags_convert(KM_SLEEP));
        if (!vp)
                return NULL;
        return LINVFS_GET_IP(vp);
@@ -292,11 +294,11 @@ STATIC void
 linvfs_destroy_inode(
        struct inode            *inode)
 {
-       kmem_cache_free(linvfs_inode_zone, LINVFS_GET_VP(inode));
+       kmem_zone_free(xfs_vnode_zone, LINVFS_GET_VP(inode));
 }
 
 STATIC void
-init_once(
+linvfs_inode_init_once(
        void                    *data,
        kmem_cache_t            *cachep,
        unsigned long           flags)
@@ -309,21 +311,41 @@ init_once(
 }
 
 STATIC int
-init_inodecache( void )
+linvfs_init_zones(void)
 {
-       linvfs_inode_zone = kmem_cache_create("linvfs_icache",
+       xfs_vnode_zone = kmem_cache_create("xfs_vnode",
                                sizeof(vnode_t), 0, SLAB_RECLAIM_ACCOUNT,
-                               init_once, NULL);
-       if (linvfs_inode_zone == NULL)
-               return -ENOMEM;
+                               linvfs_inode_init_once, NULL);
+       if (!xfs_vnode_zone)
+               goto out;
+
+       xfs_ioend_zone = kmem_zone_init(sizeof(xfs_ioend_t), "xfs_ioend");
+       if (!xfs_ioend_zone)
+               goto out_destroy_vnode_zone;
+
+       xfs_ioend_pool = mempool_create(4 * MAX_BUF_PER_PAGE,
+                       mempool_alloc_slab, mempool_free_slab,
+                       xfs_ioend_zone);
+       if (!xfs_ioend_pool)
+               goto out_free_ioend_zone;
+
        return 0;
+
+
+ out_free_ioend_zone:
+       kmem_zone_destroy(xfs_ioend_zone);
+ out_destroy_vnode_zone:
+       kmem_zone_destroy(xfs_vnode_zone);
+ out:
+       return -ENOMEM;
 }
 
 STATIC void
-destroy_inodecache( void )
+linvfs_destroy_zones(void)
 {
-       if (kmem_cache_destroy(linvfs_inode_zone))
-               printk(KERN_WARNING "%s: cache still in use!\n", __FUNCTION__);
+       mempool_destroy(xfs_ioend_pool);
+       kmem_zone_destroy(xfs_vnode_zone);
+       kmem_zone_destroy(xfs_ioend_zone);
 }
 
 /*
@@ -873,9 +895,9 @@ init_xfs_fs( void )
 
        ktrace_init(64);
 
-       error = init_inodecache();
+       error = linvfs_init_zones();
        if (error < 0)
-               goto undo_inodecache;
+               goto undo_zones;
 
        error = pagebuf_init();
        if (error < 0)
@@ -896,9 +918,9 @@ undo_register:
        pagebuf_terminate();
 
 undo_pagebuf:
-       destroy_inodecache();
+       linvfs_destroy_zones();
 
-undo_inodecache:
+undo_zones:
        return error;
 }
 
@@ -910,7 +932,7 @@ exit_xfs_fs( void )
        unregister_filesystem(&xfs_fs_type);
        xfs_cleanup();
        pagebuf_terminate();
-       destroy_inodecache();
+       linvfs_destroy_zones();
        ktrace_uninit();
 }