xfs: improve metadata I/O merging in the elevator
authorDave Chinner <dgc@sgi.com>
Tue, 24 Nov 2009 18:03:15 +0000 (18:03 +0000)
committerAlex Elder <aelder@sgi.com>
Wed, 16 Dec 2009 19:41:19 +0000 (13:41 -0600)
Change all async metadata buffers to use [READ|WRITE]_META I/O types
so that the I/O doesn't get issued immediately. This allows merging of
adjacent metadata requests but still prioritises them over bulk data.
This shows a 10-15% improvement in sequential create speed of small
files.

Don't include the log buffers in this classification - leave them as
sync types so they are issued immediately.

Signed-off-by: Dave Chinner <dgc@sgi.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Alex Elder <aelder@sgi.com>
fs/xfs/linux-2.6/xfs_buf.c
fs/xfs/linux-2.6/xfs_buf.h
fs/xfs/xfs_log.c
include/linux/fs.h

index b4c7d42..162359b 100644 (file)
@@ -1149,10 +1149,14 @@ _xfs_buf_ioapply(
        if (bp->b_flags & XBF_ORDERED) {
                ASSERT(!(bp->b_flags & XBF_READ));
                rw = WRITE_BARRIER;
-       } else if (bp->b_flags & _XBF_RUN_QUEUES) {
+       } else if (bp->b_flags & XBF_LOG_BUFFER) {
                ASSERT(!(bp->b_flags & XBF_READ_AHEAD));
                bp->b_flags &= ~_XBF_RUN_QUEUES;
                rw = (bp->b_flags & XBF_WRITE) ? WRITE_SYNC : READ_SYNC;
+       } else if (bp->b_flags & _XBF_RUN_QUEUES) {
+               ASSERT(!(bp->b_flags & XBF_READ_AHEAD));
+               bp->b_flags &= ~_XBF_RUN_QUEUES;
+               rw = (bp->b_flags & XBF_WRITE) ? WRITE_META : READ_META;
        } else {
                rw = (bp->b_flags & XBF_WRITE) ? WRITE :
                     (bp->b_flags & XBF_READ_AHEAD) ? READA : READ;
index a509f4a..a34c7b5 100644 (file)
@@ -55,6 +55,7 @@ typedef enum {
        XBF_FS_MANAGED = (1 << 8),  /* filesystem controls freeing memory  */
        XBF_ORDERED = (1 << 11),    /* use ordered writes                  */
        XBF_READ_AHEAD = (1 << 12), /* asynchronous read-ahead             */
+       XBF_LOG_BUFFER = (1 << 13), /* this is a buffer used for the log   */
 
        /* flags used only as arguments to access routines */
        XBF_LOCK = (1 << 14),       /* lock requested                      */
index 4cb1792..600b5b0 100644 (file)
@@ -1441,6 +1441,7 @@ xlog_sync(xlog_t          *log,
        XFS_BUF_ZEROFLAGS(bp);
        XFS_BUF_BUSY(bp);
        XFS_BUF_ASYNC(bp);
+       bp->b_flags |= XBF_LOG_BUFFER;
        /*
         * Do an ordered write for the log block.
         * Its unnecessary to flush the first split block in the log wrap case.
@@ -1478,6 +1479,7 @@ xlog_sync(xlog_t          *log,
                XFS_BUF_ZEROFLAGS(bp);
                XFS_BUF_BUSY(bp);
                XFS_BUF_ASYNC(bp);
+               bp->b_flags |= XBF_LOG_BUFFER;
                if (log->l_mp->m_flags & XFS_MOUNT_BARRIER)
                        XFS_BUF_ORDERED(bp);
                dptr = XFS_BUF_PTR(bp);
index b23a701..cf7fc8a 100644 (file)
@@ -152,6 +152,7 @@ struct inodes_stat_t {
 #define WRITE_SYNC_PLUG        (WRITE | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_NOIDLE))
 #define WRITE_SYNC     (WRITE_SYNC_PLUG | (1 << BIO_RW_UNPLUG))
 #define WRITE_ODIRECT_PLUG     (WRITE | (1 << BIO_RW_SYNCIO))
+#define WRITE_META     (WRITE | (1 << BIO_RW_META))
 #define SWRITE_SYNC_PLUG       \
                        (SWRITE | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_NOIDLE))
 #define SWRITE_SYNC    (SWRITE_SYNC_PLUG | (1 << BIO_RW_UNPLUG))