Merge branch 'alpha-next' of git://git.kernel.org/pub/scm/linux/kernel/git/mattst88...

[safe/jmp/linux-2.6] / fs / jbd / commit.c
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c

index 29e62d9..28a9dda 100644 (file)
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -1,5 +1,5 @@
  /*
- * linux/fs/commit.c
+ * linux/fs/jbd/commit.c
   *
   * Written by Stephen C. Tweedie <sct@redhat.com>, 1998
   *
@@ -17,10 +17,9 @@
  #include <linux/fs.h>
  #include <linux/jbd.h>
  #include <linux/errno.h>
-#include <linux/slab.h>
  #include <linux/mm.h>
  #include <linux/pagemap.h>
-#include <linux/smp_lock.h>
+#include <linux/bio.h>
  
  /*
   * Default IO end handler for temporary BJ_IO buffer_heads.
@@ -37,7 +36,7 @@ static void journal_end_buffer_io_sync(struct buffer_head *bh, int uptodate)
  
  /*
   * When an ext3-ordered file is truncated, it is possible that many pages are
- * not sucessfully freed, because they are attached to a committing transaction.
+ * not successfully freed, because they are attached to a committing transaction.
   * After the transaction commits, these pages are left on the LRU, with no
   * ->mapping, and with attached buffers.  These pages are trivially reclaimable
   * by the VM, but their apparent absence upsets the VM accounting, and it makes
@@ -46,8 +45,8 @@ static void journal_end_buffer_io_sync(struct buffer_head *bh, int uptodate)
   * So here, we have a buffer which has just come off the forget list.  Look to
   * see if we can strip all buffers from the backing page.
   *
- * Called under lock_journal(), and possibly under journal_datalist_lock.  The
- * caller provided us with a ref against the buffer, and we drop that here.
+ * Called under journal->j_list_lock.  The caller provided us with a ref
+ * against the buffer, and we drop that here.
   */
  static void release_buffer_page(struct buffer_head *bh)
  {
@@ -64,7 +63,7 @@ static void release_buffer_page(struct buffer_head *bh)
                 goto nope;
  
         /* OK, it's a truncated page */
-       if (TestSetPageLocked(page))
+       if (!trylock_page(page))
                 goto nope;
  
         page_cache_get(page);
@@ -79,6 +78,19 @@ nope:
  }
  
  /*
+ * Decrement reference counter for data buffer. If it has been marked
+ * 'BH_Freed', release it and the page to which it belongs if possible.
+ */
+static void release_data_buffer(struct buffer_head *bh)
+{
+       if (buffer_freed(bh)) {
+               clear_buffer_freed(bh);
+               release_buffer_page(bh);
+       } else
+               put_bh(bh);
+}
+
+/*
   * Try to acquire jbd_lock_bh_state() against the buffer, when j_list_lock is
   * held.  For ranking reasons we must trylock.  If we lose, schedule away and
   * return 0.  j_list_lock is dropped in this case.
@@ -105,7 +117,8 @@ static int journal_write_commit_record(journal_t *journal,
  {
         struct journal_head *descriptor;
         struct buffer_head *bh;
-       int i, ret;
+       journal_header_t *header;
+       int ret;
         int barrier_done = 0;
  
         if (is_journal_aborted(journal))
@@ -117,13 +130,10 @@ static int journal_write_commit_record(journal_t *journal,
  
         bh = jh2bh(descriptor);
  
-       /* AKPM: buglet - add `i' to tmp! */
-       for (i = 0; i < bh->b_size; i += 512) {
-               journal_header_t *tmp = (journal_header_t*)bh->b_data;
-               tmp->h_magic = cpu_to_be32(JFS_MAGIC_NUMBER);
-               tmp->h_blocktype = cpu_to_be32(JFS_COMMIT_BLOCK);
-               tmp->h_sequence = cpu_to_be32(commit_transaction->t_tid);
-       }
+       header = (journal_header_t *)(bh->b_data);
+       header->h_magic = cpu_to_be32(JFS_MAGIC_NUMBER);
+       header->h_blocktype = cpu_to_be32(JFS_COMMIT_BLOCK);
+       header->h_sequence = cpu_to_be32(commit_transaction->t_tid);
  
         JBUFFER_TRACE(descriptor, "write commit block");
         set_buffer_dirty(bh);
@@ -132,6 +142,8 @@ static int journal_write_commit_record(journal_t *journal,
                 barrier_done = 1;
         }
         ret = sync_dirty_buffer(bh);
+       if (barrier_done)
+               clear_buffer_ordered(bh);
         /* is it possible for another commit to fail at roughly
          * the same time as this one?  If so, we don't want to
          * trust the barrier flag in the super, but instead want
@@ -149,7 +161,6 @@ static int journal_write_commit_record(journal_t *journal,
                 spin_unlock(&journal->j_state_lock);
  
                 /* And try again, without the barrier */
-               clear_buffer_ordered(bh);
                 set_buffer_uptodate(bh);
                 set_buffer_dirty(bh);
                 ret = sync_dirty_buffer(bh);
@@ -160,6 +171,128 @@ static int journal_write_commit_record(journal_t *journal,
         return (ret == -EIO);
  }
  
+static void journal_do_submit_data(struct buffer_head **wbuf, int bufs,
+                                  int write_op)
+{
+       int i;
+
+       for (i = 0; i < bufs; i++) {
+               wbuf[i]->b_end_io = end_buffer_write_sync;
+               /* We use-up our safety reference in submit_bh() */
+               submit_bh(write_op, wbuf[i]);
+       }
+}
+
+/*
+ *  Submit all the data buffers to disk
+ */
+static int journal_submit_data_buffers(journal_t *journal,
+                                      transaction_t *commit_transaction,
+                                      int write_op)
+{
+       struct journal_head *jh;
+       struct buffer_head *bh;
+       int locked;
+       int bufs = 0;
+       struct buffer_head **wbuf = journal->j_wbuf;
+       int err = 0;
+
+       /*
+        * Whenever we unlock the journal and sleep, things can get added
+        * onto ->t_sync_datalist, so we have to keep looping back to
+        * write_out_data until we *know* that the list is empty.
+        *
+        * Cleanup any flushed data buffers from the data list.  Even in
+        * abort mode, we want to flush this out as soon as possible.
+        */
+write_out_data:
+       cond_resched();
+       spin_lock(&journal->j_list_lock);
+
+       while (commit_transaction->t_sync_datalist) {
+               jh = commit_transaction->t_sync_datalist;
+               bh = jh2bh(jh);
+               locked = 0;
+
+               /* Get reference just to make sure buffer does not disappear
+                * when we are forced to drop various locks */
+               get_bh(bh);
+               /* If the buffer is dirty, we need to submit IO and hence
+                * we need the buffer lock. We try to lock the buffer without
+                * blocking. If we fail, we need to drop j_list_lock and do
+                * blocking lock_buffer().
+                */
+               if (buffer_dirty(bh)) {
+                       if (!trylock_buffer(bh)) {
+                               BUFFER_TRACE(bh, "needs blocking lock");
+                               spin_unlock(&journal->j_list_lock);
+                               /* Write out all data to prevent deadlocks */
+                               journal_do_submit_data(wbuf, bufs, write_op);
+                               bufs = 0;
+                               lock_buffer(bh);
+                               spin_lock(&journal->j_list_lock);
+                       }
+                       locked = 1;
+               }
+               /* We have to get bh_state lock. Again out of order, sigh. */
+               if (!inverted_lock(journal, bh)) {
+                       jbd_lock_bh_state(bh);
+                       spin_lock(&journal->j_list_lock);
+               }
+               /* Someone already cleaned up the buffer? */
+               if (!buffer_jbd(bh) || bh2jh(bh) != jh
+                       || jh->b_transaction != commit_transaction
+                       || jh->b_jlist != BJ_SyncData) {
+                       jbd_unlock_bh_state(bh);
+                       if (locked)
+                               unlock_buffer(bh);
+                       BUFFER_TRACE(bh, "already cleaned up");
+                       release_data_buffer(bh);
+                       continue;
+               }
+               if (locked && test_clear_buffer_dirty(bh)) {
+                       BUFFER_TRACE(bh, "needs writeout, adding to array");
+                       wbuf[bufs++] = bh;
+                       __journal_file_buffer(jh, commit_transaction,
+                                               BJ_Locked);
+                       jbd_unlock_bh_state(bh);
+                       if (bufs == journal->j_wbufsize) {
+                               spin_unlock(&journal->j_list_lock);
+                               journal_do_submit_data(wbuf, bufs, write_op);
+                               bufs = 0;
+                               goto write_out_data;
+                       }
+               } else if (!locked && buffer_locked(bh)) {
+                       __journal_file_buffer(jh, commit_transaction,
+                                               BJ_Locked);
+                       jbd_unlock_bh_state(bh);
+                       put_bh(bh);
+               } else {
+                       BUFFER_TRACE(bh, "writeout complete: unfile");
+                       if (unlikely(!buffer_uptodate(bh)))
+                               err = -EIO;
+                       __journal_unfile_buffer(jh);
+                       jbd_unlock_bh_state(bh);
+                       if (locked)
+                               unlock_buffer(bh);
+                       journal_remove_journal_head(bh);
+                       /* One for our safety reference, other for
+                        * journal_remove_journal_head() */
+                       put_bh(bh);
+                       release_data_buffer(bh);
+               }
+
+               if (need_resched() || spin_needbreak(&journal->j_list_lock)) {
+                       spin_unlock(&journal->j_list_lock);
+                       goto write_out_data;
+               }
+       }
+       spin_unlock(&journal->j_list_lock);
+       journal_do_submit_data(wbuf, bufs, write_op);
+
+       return err;
+}
+
  /*
   * journal_commit_transaction
   *
@@ -174,7 +307,9 @@ void journal_commit_transaction(journal_t *journal)
         int bufs;
         int flags;
         int err;
-       unsigned long blocknr;
+       unsigned int blocknr;
+       ktime_t start_time;
+       u64 commit_time;
         char *tagp = NULL;
         journal_header_t *header;
         journal_block_tag_t *tag = NULL;
@@ -182,6 +317,7 @@ void journal_commit_transaction(journal_t *journal)
         int first_tag = 0;
         int tag_flag;
         int i;
+       int write_op = WRITE;
  
         /*
          * First job: lock down the current transaction and wait for
@@ -214,6 +350,13 @@ void journal_commit_transaction(journal_t *journal)
         spin_lock(&journal->j_state_lock);
         commit_transaction->t_state = T_LOCKED;
  
+       /*
+        * Use plugged writes here, since we want to submit several before
+        * we unplug the device. We don't do explicit unplugging in here,
+        * instead we rely on sync_buffer() doing the unplug for us.
+        */
+       if (commit_transaction->t_synchronous_commit)
+               write_op = WRITE_SYNC_PLUG;
         spin_lock(&commit_transaction->t_handle_lock);
         while (commit_transaction->t_updates) {
                 DEFINE_WAIT(wait);
@@ -261,7 +404,7 @@ void journal_commit_transaction(journal_t *journal)
                         struct buffer_head *bh = jh2bh(jh);
  
                         jbd_lock_bh_state(bh);
-                       kfree(jh->b_committed_data);
+                       jbd_free(jh->b_committed_data, bh->b_size);
                         jh->b_committed_data = NULL;
                         jbd_unlock_bh_state(bh);
                 }
@@ -287,6 +430,7 @@ void journal_commit_transaction(journal_t *journal)
         commit_transaction->t_state = T_FLUSH;
         journal->j_committing_transaction = commit_transaction;
         journal->j_running_transaction = NULL;
+       start_time = ktime_get();
         commit_transaction->t_log_start = journal->j_head;
         wake_up(&journal->j_wait_transaction_locked);
         spin_unlock(&journal->j_state_lock);
@@ -294,99 +438,16 @@ void journal_commit_transaction(journal_t *journal)
         jbd_debug (3, "JBD: commit phase 2\n");
  
         /*
-        * First, drop modified flag: all accesses to the buffers
-        * will be tracked for a new trasaction only -bzzz
-        */
-       spin_lock(&journal->j_list_lock);
-       if (commit_transaction->t_buffers) {
-               new_jh = jh = commit_transaction->t_buffers->b_tnext;
-               do {
-                       J_ASSERT_JH(new_jh, new_jh->b_modified == 1 ||
-                                       new_jh->b_modified == 0);
-                       new_jh->b_modified = 0;
-                       new_jh = new_jh->b_tnext;
-               } while (new_jh != jh);
-       }
-       spin_unlock(&journal->j_list_lock);
-
-       /*
          * Now start flushing things to disk, in the order they appear
          * on the transaction lists.  Data blocks go first.
          */
-
-       err = 0;
-       /*
-        * Whenever we unlock the journal and sleep, things can get added
-        * onto ->t_sync_datalist, so we have to keep looping back to
-        * write_out_data until we *know* that the list is empty.
-        */
-       bufs = 0;
-       /*
-        * Cleanup any flushed data buffers from the data list.  Even in
-        * abort mode, we want to flush this out as soon as possible.
-        */
-write_out_data:
-       cond_resched();
-       spin_lock(&journal->j_list_lock);
-
-       while (commit_transaction->t_sync_datalist) {
-               struct buffer_head *bh;
-
-               jh = commit_transaction->t_sync_datalist;
-               commit_transaction->t_sync_datalist = jh->b_tnext;
-               bh = jh2bh(jh);
-               if (buffer_locked(bh)) {
-                       BUFFER_TRACE(bh, "locked");
-                       if (!inverted_lock(journal, bh))
-                               goto write_out_data;
-                       __journal_temp_unlink_buffer(jh);
-                       __journal_file_buffer(jh, commit_transaction,
-                                               BJ_Locked);
-                       jbd_unlock_bh_state(bh);
-                       if (lock_need_resched(&journal->j_list_lock)) {
-                               spin_unlock(&journal->j_list_lock);
-                               goto write_out_data;
-                       }
-               } else {
-                       if (buffer_dirty(bh)) {
-                               BUFFER_TRACE(bh, "start journal writeout");
-                               get_bh(bh);
-                               wbuf[bufs++] = bh;
-                               if (bufs == journal->j_wbufsize) {
-                                       jbd_debug(2, "submit %d writes\n",
-                                                       bufs);
-                                       spin_unlock(&journal->j_list_lock);
-                                       ll_rw_block(SWRITE, bufs, wbuf);
-                                       journal_brelse_array(wbuf, bufs);
-                                       bufs = 0;
-                                       goto write_out_data;
-                               }
-                       } else {
-                               BUFFER_TRACE(bh, "writeout complete: unfile");
-                               if (!inverted_lock(journal, bh))
-                                       goto write_out_data;
-                               __journal_unfile_buffer(jh);
-                               jbd_unlock_bh_state(bh);
-                               journal_remove_journal_head(bh);
-                               put_bh(bh);
-                               if (lock_need_resched(&journal->j_list_lock)) {
-                                       spin_unlock(&journal->j_list_lock);
-                                       goto write_out_data;
-                               }
-                       }
-               }
-       }
-
-       if (bufs) {
-               spin_unlock(&journal->j_list_lock);
-               ll_rw_block(SWRITE, bufs, wbuf);
-               journal_brelse_array(wbuf, bufs);
-               spin_lock(&journal->j_list_lock);
-       }
+       err = journal_submit_data_buffers(journal, commit_transaction,
+                                         write_op);
  
         /*
          * Wait for all previously submitted IO to complete.
          */
+       spin_lock(&journal->j_list_lock);
         while (commit_transaction->t_locked_list) {
                 struct buffer_head *bh;
  
@@ -396,16 +457,29 @@ write_out_data:
                 if (buffer_locked(bh)) {
                         spin_unlock(&journal->j_list_lock);
                         wait_on_buffer(bh);
-                       if (unlikely(!buffer_uptodate(bh)))
-                               err = -EIO;
                         spin_lock(&journal->j_list_lock);
                 }
+               if (unlikely(!buffer_uptodate(bh))) {
+                       if (!trylock_page(bh->b_page)) {
+                               spin_unlock(&journal->j_list_lock);
+                               lock_page(bh->b_page);
+                               spin_lock(&journal->j_list_lock);
+                       }
+                       if (bh->b_page->mapping)
+                               set_bit(AS_EIO, &bh->b_page->mapping->flags);
+
+                       unlock_page(bh->b_page);
+                       SetPageError(bh->b_page);
+                       err = -EIO;
+               }
                 if (!inverted_lock(journal, bh)) {
                         put_bh(bh);
                         spin_lock(&journal->j_list_lock);
                         continue;
                 }
-               if (buffer_jbd(bh) && jh->b_jlist == BJ_Locked) {
+               if (buffer_jbd(bh) && bh2jh(bh) == jh &&
+                   jh->b_transaction == commit_transaction &&
+                   jh->b_jlist == BJ_Locked) {
                         __journal_unfile_buffer(jh);
                         jbd_unlock_bh_state(bh);
                         journal_remove_journal_head(bh);
@@ -413,17 +487,23 @@ write_out_data:
                 } else {
                         jbd_unlock_bh_state(bh);
                 }
-               put_bh(bh);
+               release_data_buffer(bh);
                 cond_resched_lock(&journal->j_list_lock);
         }
         spin_unlock(&journal->j_list_lock);
  
-       if (err)
-               __journal_abort_hard(journal);
+       if (err) {
+               char b[BDEVNAME_SIZE];
  
-       journal_write_revoke_records(journal, commit_transaction);
+               printk(KERN_WARNING
+                       "JBD: Detected IO errors while flushing file data "
+                       "on %s\n", bdevname(journal->j_fs_dev, b));
+               if (journal->j_flags & JFS_ABORT_ON_SYNCDATA_ERR)
+                       journal_abort(journal, err);
+               err = 0;
+       }
  
-       jbd_debug(3, "JBD: commit phase 2\n");
+       journal_write_revoke_records(journal, commit_transaction, write_op);
  
         /*
          * If we found any dirty or locked buffers, then we should have
@@ -440,7 +520,12 @@ write_out_data:
          * transaction!  Now comes the tricky part: we need to write out
          * metadata.  Loop over the transaction's entire buffer list:
          */
+       spin_lock(&journal->j_state_lock);
         commit_transaction->t_state = T_COMMIT;
+       spin_unlock(&journal->j_state_lock);
+
+       J_ASSERT(commit_transaction->t_nr_buffers <=
+                commit_transaction->t_outstanding_credits);
  
         descriptor = NULL;
         bufs = 0;
@@ -451,9 +536,10 @@ write_out_data:
                 jh = commit_transaction->t_buffers;
  
                 /* If we're in abort mode, we just un-journal the buffer and
-                  release it for background writing. */
+                  release it. */
  
                 if (is_journal_aborted(journal)) {
+                       clear_buffer_jbddirty(jh2bh(jh));
                         JBUFFER_TRACE(jh, "journal is aborting: refile");
                         journal_refile_buffer(journal, jh);
                         /* If that was the last one, we need to clean up
@@ -477,7 +563,7 @@ write_out_data:
  
                         descriptor = journal_get_descriptor_buffer(journal);
                         if (!descriptor) {
-                               __journal_abort_hard(journal);
+                               journal_abort(journal, -EIO);
                                 continue;
                         }
  
@@ -510,7 +596,7 @@ write_out_data:
                    and repeat this loop: we'll fall into the
                    refile-on-abort condition above. */
                 if (err) {
-                       __journal_abort_hard(journal);
+                       journal_abort(journal, err);
                         continue;
                 }
  
@@ -587,7 +673,7 @@ start_journal_io:
                                 clear_buffer_dirty(bh);
                                 set_buffer_uptodate(bh);
                                 bh->b_end_io = journal_end_buffer_io_sync;
-                               submit_bh(WRITE, bh);
+                               submit_bh(write_op, bh);
                         }
                         cond_resched();
  
@@ -695,13 +781,22 @@ wait_for_iobuf:
                 /* AKPM: bforget here */
         }
  
+       if (err)
+               journal_abort(journal, err);
+
         jbd_debug(3, "JBD: commit phase 6\n");
  
+       /* All metadata is written, now write commit record and do cleanup */
+       spin_lock(&journal->j_state_lock);
+       J_ASSERT(commit_transaction->t_state == T_COMMIT);
+       commit_transaction->t_state = T_COMMIT_RECORD;
+       spin_unlock(&journal->j_state_lock);
+
         if (journal_write_commit_record(journal, commit_transaction))
                 err = -EIO;
  
         if (err)
-               __journal_abort_hard(journal);
+               journal_abort(journal, err);
  
         /* End of a transaction!  Finally, we can do checkpoint
             processing: any buffers committed as a result of this
@@ -745,14 +840,14 @@ restart_loop:
                  * Otherwise, we can just throw away the frozen data now.
                  */
                 if (jh->b_committed_data) {
-                       kfree(jh->b_committed_data);
+                       jbd_free(jh->b_committed_data, bh->b_size);
                         jh->b_committed_data = NULL;
                         if (jh->b_frozen_data) {
                                 jh->b_committed_data = jh->b_frozen_data;
                                 jh->b_frozen_data = NULL;
                         }
                 } else if (jh->b_frozen_data) {
-                       kfree(jh->b_frozen_data);
+                       jbd_free(jh->b_frozen_data, bh->b_size);
                         jh->b_frozen_data = NULL;
                 }
  
@@ -772,12 +867,12 @@ restart_loop:
                 /* A buffer which has been freed while still being
                  * journaled by a previous transaction may end up still
                  * being dirty here, but we want to avoid writing back
-                * that buffer in the future now that the last use has
-                * been committed.  That's not only a performance gain,
-                * it also stops aliasing problems if the buffer is left
-                * behind for writeback and gets reallocated for another
+                * that buffer in the future after the "add to orphan"
+                * operation been committed,  That's not only a performance
+                * gain, it also stops aliasing problems if the buffer is
+                * left behind for writeback and gets reallocated for another
                  * use in a different page. */
-               if (buffer_freed(bh)) {
+               if (buffer_freed(bh) && !jh->b_next_transaction) {
                         clear_buffer_freed(bh);
                         clear_buffer_jbddirty(bh);
                 }
@@ -785,25 +880,38 @@ restart_loop:
                 if (buffer_jbddirty(bh)) {
                         JBUFFER_TRACE(jh, "add to new checkpointing trans");
                         __journal_insert_checkpoint(jh, commit_transaction);
+                       if (is_journal_aborted(journal))
+                               clear_buffer_jbddirty(bh);
                         JBUFFER_TRACE(jh, "refile for checkpoint writeback");
                         __journal_refile_buffer(jh);
                         jbd_unlock_bh_state(bh);
                 } else {
                         J_ASSERT_BH(bh, !buffer_dirty(bh));
-                       J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
-                       __journal_unfile_buffer(jh);
-                       jbd_unlock_bh_state(bh);
-                       journal_remove_journal_head(bh);  /* needs a brelse */
-                       release_buffer_page(bh);
+                       /* The buffer on BJ_Forget list and not jbddirty means
+                        * it has been freed by this transaction and hence it
+                        * could not have been reallocated until this
+                        * transaction has committed. *BUT* it could be
+                        * reallocated once we have written all the data to
+                        * disk and before we process the buffer on BJ_Forget
+                        * list. */
+                       JBUFFER_TRACE(jh, "refile or unfile freed buffer");
+                       __journal_refile_buffer(jh);
+                       if (!jh->b_transaction) {
+                               jbd_unlock_bh_state(bh);
+                                /* needs a brelse */
+                               journal_remove_journal_head(bh);
+                               release_buffer_page(bh);
+                       } else
+                               jbd_unlock_bh_state(bh);
                 }
                 cond_resched_lock(&journal->j_list_lock);
         }
         spin_unlock(&journal->j_list_lock);
         /*
-        * This is a bit sleazy.  We borrow j_list_lock to protect
-        * journal->j_committing_transaction in __journal_remove_checkpoint.
-        * Really, __journal_remove_checkpoint should be using j_state_lock but
-        * it's a bit hassle to hold that across __journal_remove_checkpoint
+        * This is a bit sleazy.  We use j_list_lock to protect transition
+        * of a transaction into T_FINISHED state and calling
+        * __journal_drop_transaction(). Otherwise we could race with
+        * other checkpointing code processing the transaction...
          */
         spin_lock(&journal->j_state_lock);
         spin_lock(&journal->j_list_lock);
@@ -821,12 +929,24 @@ restart_loop:
  
         jbd_debug(3, "JBD: commit phase 8\n");
  
-       J_ASSERT(commit_transaction->t_state == T_COMMIT);
+       J_ASSERT(commit_transaction->t_state == T_COMMIT_RECORD);
  
         commit_transaction->t_state = T_FINISHED;
         J_ASSERT(commit_transaction == journal->j_committing_transaction);
         journal->j_commit_sequence = commit_transaction->t_tid;
         journal->j_committing_transaction = NULL;
+       commit_time = ktime_to_ns(ktime_sub(ktime_get(), start_time));
+
+       /*
+        * weight the commit time higher than the average time so we don't
+        * react too strongly to vast changes in commit time
+        */
+       if (likely(journal->j_average_commit_time))
+               journal->j_average_commit_time = (commit_time*3 +
+                               journal->j_average_commit_time) / 4;
+       else
+               journal->j_average_commit_time = commit_time;
+
         spin_unlock(&journal->j_state_lock);
  
         if (commit_transaction->t_checkpoint_list == NULL &&