xfs: enable background pushing of the CIL

author Dave Chinner <dchinner@redhat.com>

Mon, 17 May 2010 05:52:13 +0000 (15:52 +1000)

committer Alex Elder <aelder@sgi.com>

Mon, 24 May 2010 15:38:20 +0000 (10:38 -0500)
author Dave Chinner <dchinner@redhat.com>
Mon, 17 May 2010 05:52:13 +0000 (15:52 +1000)
committer Alex Elder <aelder@sgi.com>
Mon, 24 May 2010 15:38:20 +0000 (10:38 -0500)
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c

index 53abd6b..9b21f80 100644 (file)
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -336,6 +336,7 @@ xfs_log_commit_cil(
  {
         struct log              *log = mp->m_log;
         int                     log_flags = 0;
+       int                     push = 0;
  
         if (flags & XFS_TRANS_RELEASE_LOG_RES)
                 log_flags = XFS_LOG_REL_PERM_RESERV;
@@ -365,8 +366,20 @@ xfs_log_commit_cil(
         xfs_log_done(mp, tp->t_ticket, NULL, log_flags);
         xfs_trans_unreserve_and_mod_sb(tp);
  
-       /* background commit is allowed again */
+       /* check for background commit before unlock */
+       if (log->l_cilp->xc_ctx->space_used > XLOG_CIL_SPACE_LIMIT(log))
+               push = 1;
         up_read(&log->l_cilp->xc_ctx_lock);
+
+       /*
+        * We need to push CIL every so often so we don't cache more than we
+        * can fit in the log. The limit really is that a checkpoint can't be
+        * more than half the log (the current checkpoint is not allowed to
+        * overwrite the previous checkpoint), but commit latency and memory
+        * usage limit this to a smaller size in most cases.
+        */
+       if (push)
+               xlog_cil_push(log, 0);
         return 0;
  }
  
@@ -429,18 +442,25 @@ xlog_cil_push(
         if (!cil)
                 return 0;
  
-       /* XXX: don't sleep for background? */
         new_ctx = kmem_zalloc(sizeof(*new_ctx), KM_SLEEP|KM_NOFS);
         new_ctx->ticket = xlog_cil_ticket_alloc(log);
  
-       /* lock out transaction commit */
-       down_write(&cil->xc_ctx_lock);
+       /* lock out transaction commit, but don't block on background push */
+       if (!down_write_trylock(&cil->xc_ctx_lock)) {
+               if (!push_now)
+                       goto out_free_ticket;
+               down_write(&cil->xc_ctx_lock);
+       }
         ctx = cil->xc_ctx;
  
         /* check if we've anything to push */
         if (list_empty(&cil->xc_cil))
                 goto out_skip;
  
+       /* check for spurious background flush */
+       if (!push_now && cil->xc_ctx->space_used < XLOG_CIL_SPACE_LIMIT(log))
+               goto out_skip;
+
         /*
          * pull all the log vectors off the items in the CIL, and
          * remove the items from the CIL. We don't need the CIL lock
@@ -584,6 +604,7 @@ restart:
  
  out_skip:
         up_write(&cil->xc_ctx_lock);
+out_free_ticket:
         xfs_log_ticket_put(new_ctx->ticket);
         kmem_free(new_ctx);
         return 0;
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h

index 48d9208..8c07261 100644 (file)
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -425,6 +425,51 @@ struct xfs_cil {
  };
  
  /*
+ * The amount of log space we should the CIL to aggregate is difficult to size.
+ * Whatever we chose we have to make we can get a reservation for the log space
+ * effectively, that it is large enough to capture sufficient relogging to
+ * reduce log buffer IO significantly, but it is not too large for the log or
+ * induces too much latency when writing out through the iclogs. We track both
+ * space consumed and the number of vectors in the checkpoint context, so we
+ * need to decide which to use for limiting.
+ *
+ * Every log buffer we write out during a push needs a header reserved, which
+ * is at least one sector and more for v2 logs. Hence we need a reservation of
+ * at least 512 bytes per 32k of log space just for the LR headers. That means
+ * 16KB of reservation per megabyte of delayed logging space we will consume,
+ * plus various headers.  The number of headers will vary based on the num of
+ * io vectors, so limiting on a specific number of vectors is going to result
+ * in transactions of varying size. IOWs, it is more consistent to track and
+ * limit space consumed in the log rather than by the number of objects being
+ * logged in order to prevent checkpoint ticket overruns.
+ *
+ * Further, use of static reservations through the log grant mechanism is
+ * problematic. It introduces a lot of complexity (e.g. reserve grant vs write
+ * grant) and a significant deadlock potential because regranting write space
+ * can block on log pushes. Hence if we have to regrant log space during a log
+ * push, we can deadlock.
+ *
+ * However, we can avoid this by use of a dynamic "reservation stealing"
+ * technique during transaction commit whereby unused reservation space in the
+ * transaction ticket is transferred to the CIL ctx commit ticket to cover the
+ * space needed by the checkpoint transaction. This means that we never need to
+ * specifically reserve space for the CIL checkpoint transaction, nor do we
+ * need to regrant space once the checkpoint completes. This also means the
+ * checkpoint transaction ticket is specific to the checkpoint context, rather
+ * than the CIL itself.
+ *
+ * With dynamic reservations, we can basically make up arbitrary limits for the
+ * checkpoint size so long as they don't violate any other size rules.  Hence
+ * the initial maximum size for the checkpoint transaction will be set to a
+ * quarter of the log or 8MB, which ever is smaller. 8MB is an arbitrary limit
+ * right now based on the latency of writing out a large amount of data through
+ * the circular iclog buffers.
+ */
+
+#define XLOG_CIL_SPACE_LIMIT(log)      \
+       (min((log->l_logsize >> 2), (8 * 1024 * 1024)))
+
+/*
   * The reservation head lsn is not made up of a cycle number and block number.
   * Instead, it uses a cycle number and byte number.  Logs don't expect to
   * overflow 31 bits worth of byte offset, so using a byte number will mean
author	Dave Chinner <dchinner@redhat.com>
	Mon, 17 May 2010 05:52:13 +0000 (15:52 +1000)
committer	Alex Elder <aelder@sgi.com>
	Mon, 24 May 2010 15:38:20 +0000 (10:38 -0500)
fs/xfs/xfs_log_cil.c		patch \| blob \| history
fs/xfs/xfs_log_priv.h		patch \| blob \| history