fix setattr error handling in sysfs, configfs
[safe/jmp/linux-2.6] / fs / ext4 / fsync.c
index 5afe437..592adf2 100644 (file)
 #include <linux/writeback.h>
 #include <linux/jbd2.h>
 #include <linux/blkdev.h>
-#include <linux/marker.h>
+
 #include "ext4.h"
 #include "ext4_jbd2.h"
 
+#include <trace/events/ext4.h>
+
+/*
+ * If we're not journaling and this is a just-created file, we have to
+ * sync our parent directory (if it was freshly created) since
+ * otherwise it will only be written by writeback, leaving a huge
+ * window during which a crash may lose the file.  This may apply for
+ * the parent directory's parent as well, and so on recursively, if
+ * they are also freshly created.
+ */
+static void ext4_sync_parent(struct inode *inode)
+{
+       struct dentry *dentry = NULL;
+
+       while (inode && ext4_test_inode_state(inode, EXT4_STATE_NEWENTRY)) {
+               ext4_clear_inode_state(inode, EXT4_STATE_NEWENTRY);
+               dentry = list_entry(inode->i_dentry.next,
+                                   struct dentry, d_alias);
+               if (!dentry || !dentry->d_parent || !dentry->d_parent->d_inode)
+                       break;
+               inode = dentry->d_parent->d_inode;
+               sync_mapping_buffers(inode->i_mapping);
+       }
+}
+
 /*
  * akpm: A new design for ext4_sync_file().
  *
  *
  * What we do is just kick off a commit and wait on it.  This will snapshot the
  * inode to disk.
+ *
+ * i_mutex lock is held when entering and exiting this function
  */
 
-int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync)
+int ext4_sync_file(struct file *file, int datasync)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = file->f_mapping->host;
+       struct ext4_inode_info *ei = EXT4_I(inode);
        journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
-       int ret = 0;
+       int ret;
+       tid_t commit_tid;
 
        J_ASSERT(ext4_journal_current_handle() == NULL);
 
-       trace_mark(ext4_sync_file, "dev %s datasync %d ino %ld parent %ld",
-                  inode->i_sb->s_id, datasync, inode->i_ino,
-                  dentry->d_parent->d_inode->i_ino);
+       trace_ext4_sync_file(file, datasync);
+
+       if (inode->i_sb->s_flags & MS_RDONLY)
+               return 0;
+
+       ret = flush_completed_IO(inode);
+       if (ret < 0)
+               return ret;
+
+       if (!journal) {
+               ret = generic_file_fsync(file, datasync);
+               if (!ret && !list_empty(&inode->i_dentry))
+                       ext4_sync_parent(inode);
+               return ret;
+       }
 
        /*
-        * data=writeback:
+        * data=writeback,ordered:
         *  The caller's filemap_fdatawrite()/wait will sync the data.
-        *  sync_inode() will sync the metadata
-        *
-        * data=ordered:
-        *  The caller's filemap_fdatawrite() will write the data and
-        *  sync_inode() will write the inode if it is dirty.  Then the caller's
-        *  filemap_fdatawait() will wait on the pages.
+        *  Metadata is in the journal, we wait for proper transaction to
+        *  commit here.
         *
         * data=journal:
         *  filemap_fdatawrite won't do anything (the buffers are clean).
@@ -74,27 +111,27 @@ int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync)
         *  (they were dirtied by commit).  But that's OK - the blocks are
         *  safe in-journal, which is all fsync() needs to ensure.
         */
-       if (ext4_should_journal_data(inode)) {
-               ret = ext4_force_commit(inode->i_sb);
-               goto out;
-       }
-
-       if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
-               goto out;
+       if (ext4_should_journal_data(inode))
+               return ext4_force_commit(inode->i_sb);
 
-       /*
-        * The VFS has written the file data.  If the inode is unaltered
-        * then we need not start a commit.
-        */
-       if (inode->i_state & (I_DIRTY_SYNC|I_DIRTY_DATASYNC)) {
-               struct writeback_control wbc = {
-                       .sync_mode = WB_SYNC_ALL,
-                       .nr_to_write = 0, /* sys_fsync did this */
-               };
-               ret = sync_inode(inode, &wbc);
-               if (journal && (journal->j_flags & JBD2_BARRIER))
-                       blkdev_issue_flush(inode->i_sb->s_bdev, NULL);
-       }
-out:
+       commit_tid = datasync ? ei->i_datasync_tid : ei->i_sync_tid;
+       if (jbd2_log_start_commit(journal, commit_tid)) {
+               /*
+                * When the journal is on a different device than the
+                * fs data disk, we need to issue the barrier in
+                * writeback mode.  (In ordered mode, the jbd2 layer
+                * will take care of issuing the barrier.  In
+                * data=journal, all of the data blocks are written to
+                * the journal device.)
+                */
+               if (ext4_should_writeback_data(inode) &&
+                   (journal->j_fs_dev != journal->j_dev) &&
+                   (journal->j_flags & JBD2_BARRIER))
+                       blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL,
+                                       NULL, BLKDEV_IFL_WAIT);
+               ret = jbd2_log_wait_commit(journal, commit_tid);
+       } else if (journal->j_flags & JBD2_BARRIER)
+               blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL,
+                       BLKDEV_IFL_WAIT);
        return ret;
 }