ext4, jbd2: Add barriers for file systems with exernal journals

author Theodore Ts'o <tytso@mit.edu>

Wed, 23 Dec 2009 11:52:08 +0000 (06:52 -0500)

committer Theodore Ts'o <tytso@mit.edu>

Wed, 23 Dec 2009 11:52:08 +0000 (06:52 -0500)
author Theodore Ts'o <tytso@mit.edu>
Wed, 23 Dec 2009 11:52:08 +0000 (06:52 -0500)
committer Theodore Ts'o <tytso@mit.edu>
Wed, 23 Dec 2009 11:52:08 +0000 (06:52 -0500)
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c

index 0b22497..98bd140 100644 (file)
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -88,9 +88,21 @@ int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync)
                 return ext4_force_commit(inode->i_sb);
  
         commit_tid = datasync ? ei->i_datasync_tid : ei->i_sync_tid;
-       if (jbd2_log_start_commit(journal, commit_tid))
+       if (jbd2_log_start_commit(journal, commit_tid)) {
+               /*
+                * When the journal is on a different device than the
+                * fs data disk, we need to issue the barrier in
+                * writeback mode.  (In ordered mode, the jbd2 layer
+                * will take care of issuing the barrier.  In
+                * data=journal, all of the data blocks are written to
+                * the journal device.)
+                */
+               if (ext4_should_writeback_data(inode) &&
+                   (journal->j_fs_dev != journal->j_dev) &&
+                   (journal->j_flags & JBD2_BARRIER))
+                       blkdev_issue_flush(inode->i_sb->s_bdev, NULL);
                 jbd2_log_wait_commit(journal, commit_tid);
-       else if (journal->j_flags & JBD2_BARRIER)
+       } else if (journal->j_flags & JBD2_BARRIER)
                 blkdev_issue_flush(inode->i_sb->s_bdev, NULL);
         return ret;
  }
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c

index ca0f5eb..8868493 100644 (file)
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -22,6 +22,7 @@
  #include <linux/jbd2.h>
  #include <linux/errno.h>
  #include <linux/slab.h>
+#include <linux/blkdev.h>
  #include <trace/events/jbd2.h>
  
  /*
@@ -515,6 +516,20 @@ int jbd2_cleanup_journal_tail(journal_t *journal)
         journal->j_tail_sequence = first_tid;
         journal->j_tail = blocknr;
         spin_unlock(&journal->j_state_lock);
+
+       /*
+        * If there is an external journal, we need to make sure that
+        * any data blocks that were recently written out --- perhaps
+        * by jbd2_log_do_checkpoint() --- are flushed out before we
+        * drop the transactions from the external journal.  It's
+        * unlikely this will be necessary, especially with a
+        * appropriately sized journal, but we need this to guarantee
+        * correctness.  Fortunately jbd2_cleanup_journal_tail()
+        * doesn't get called all that often.
+        */
+       if ((journal->j_fs_dev != journal->j_dev) &&
+           (journal->j_flags & JBD2_BARRIER))
+               blkdev_issue_flush(journal->j_fs_dev, NULL);
         if (!(journal->j_flags & JBD2_ABORT))
                 jbd2_journal_update_superblock(journal, 1);
         return 0;
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c

index 6a10238..1bc74b6 100644 (file)
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -259,6 +259,7 @@ static int journal_submit_data_buffers(journal_t *journal,
                         ret = err;
                 spin_lock(&journal->j_list_lock);
                 J_ASSERT(jinode->i_transaction == commit_transaction);
+               commit_transaction->t_flushed_data_blocks = 1;
                 jinode->i_flags &= ~JI_COMMIT_RUNNING;
                 wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING);
         }
@@ -708,8 +709,17 @@ start_journal_io:
                 }
         }
  
-       /* Done it all: now write the commit record asynchronously. */
+       /* 
+        * If the journal is not located on the file system device,
+        * then we must flush the file system device before we issue
+        * the commit record
+        */
+       if (commit_transaction->t_flushed_data_blocks &&
+           (journal->j_fs_dev != journal->j_dev) &&
+           (journal->j_flags & JBD2_BARRIER))
+               blkdev_issue_flush(journal->j_fs_dev, NULL);
  
+       /* Done it all: now write the commit record asynchronously. */
         if (JBD2_HAS_INCOMPAT_FEATURE(journal,
                                       JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) {
                 err = journal_submit_commit_record(journal, commit_transaction,
@@ -720,13 +730,6 @@ start_journal_io:
                         blkdev_issue_flush(journal->j_dev, NULL);
         }
  
-       /*
-        * This is the right place to wait for data buffers both for ASYNC
-        * and !ASYNC commit. If commit is ASYNC, we need to wait only after
-        * the commit block went to disk (which happens above). If commit is
-        * SYNC, we need to wait for data buffers before we start writing
-        * commit block, which happens below in such setting.
-        */
         err = journal_finish_inode_data_buffers(journal, commit_transaction);
         if (err) {
                 printk(KERN_WARNING
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h

index f1011f7..638ce45 100644 (file)
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -653,6 +653,7 @@ struct transaction_s
          * waiting for it to finish.
          */
         unsigned int t_synchronous_commit:1;
+       unsigned int t_flushed_data_blocks:1;
  
         /*
          * For use by the filesystem to store fs-specific data
author	Theodore Ts'o <tytso@mit.edu>
	Wed, 23 Dec 2009 11:52:08 +0000 (06:52 -0500)
committer	Theodore Ts'o <tytso@mit.edu>
	Wed, 23 Dec 2009 11:52:08 +0000 (06:52 -0500)
fs/ext4/fsync.c		patch \| blob \| history
fs/jbd2/checkpoint.c		patch \| blob \| history
fs/jbd2/commit.c		patch \| blob \| history
include/linux/jbd2.h		patch \| blob \| history