vfs: Make sys_sync() use fsync_super() (version 4)

author Jan Kara <jack@suse.cz>

Mon, 27 Apr 2009 14:43:51 +0000 (16:43 +0200)

committer Al Viro <viro@zeniv.linux.org.uk>

Fri, 12 Jun 2009 01:36:03 +0000 (21:36 -0400)
author Jan Kara <jack@suse.cz>
Mon, 27 Apr 2009 14:43:51 +0000 (16:43 +0200)
committer Al Viro <viro@zeniv.linux.org.uk>
Fri, 12 Jun 2009 01:36:03 +0000 (21:36 -0400)
diff --git a/fs/block_dev.c b/fs/block_dev.c

index fe47f72..4b6a3b9 100644 (file)
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -176,17 +176,22 @@ blkdev_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
                                 iov, offset, nr_segs, blkdev_get_blocks, NULL);
  }
  
+int __sync_blockdev(struct block_device *bdev, int wait)
+{
+       if (!bdev)
+               return 0;
+       if (!wait)
+               return filemap_flush(bdev->bd_inode->i_mapping);
+       return filemap_write_and_wait(bdev->bd_inode->i_mapping);
+}
+
  /*
   * Write out and wait upon all the dirty data associated with a block
   * device via its mapping.  Does not take the superblock lock.
   */
  int sync_blockdev(struct block_device *bdev)
  {
-       int ret = 0;
-
-       if (bdev)
-               ret = filemap_write_and_wait(bdev->bd_inode->i_mapping);
-       return ret;
+       return __sync_blockdev(bdev, 1);
  }
  EXPORT_SYMBOL(sync_blockdev);
  
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c

index 91013ff..e0fb2e7 100644 (file)
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -679,55 +679,6 @@ void sync_inodes_sb(struct super_block *sb, int wait)
  }
  
  /**
- * sync_inodes - writes all inodes to disk
- * @wait: wait for completion
- *
- * sync_inodes() goes through each super block's dirty inode list, writes the
- * inodes out, waits on the writeout and puts the inodes back on the normal
- * list.
- *
- * This is for sys_sync().  fsync_dev() uses the same algorithm.  The subtle
- * part of the sync functions is that the blockdev "superblock" is processed
- * last.  This is because the write_inode() function of a typical fs will
- * perform no I/O, but will mark buffers in the blockdev mapping as dirty.
- * What we want to do is to perform all that dirtying first, and then write
- * back all those inode blocks via the blockdev mapping in one sweep.  So the
- * additional (somewhat redundant) sync_blockdev() calls here are to make
- * sure that really happens.  Because if we call sync_inodes_sb(wait=1) with
- * outstanding dirty inodes, the writeback goes block-at-a-time within the
- * filesystem's write_inode().  This is extremely slow.
- */
-static void __sync_inodes(int wait)
-{
-       struct super_block *sb;
-
-       spin_lock(&sb_lock);
-restart:
-       list_for_each_entry(sb, &super_blocks, s_list) {
-               sb->s_count++;
-               spin_unlock(&sb_lock);
-               down_read(&sb->s_umount);
-               if (sb->s_root) {
-                       sync_inodes_sb(sb, wait);
-                       sync_blockdev(sb->s_bdev);
-               }
-               up_read(&sb->s_umount);
-               spin_lock(&sb_lock);
-               if (__put_super_and_need_restart(sb))
-                       goto restart;
-       }
-       spin_unlock(&sb_lock);
-}
-
-void sync_inodes(int wait)
-{
-       __sync_inodes(0);
-
-       if (wait)
-               __sync_inodes(1);
-}
-
-/**
   * write_inode_now     -       write an inode to disk
   * @inode: inode to write to disk
   * @sync: whether the write should be synchronous or not
diff --git a/fs/internal.h b/fs/internal.h

index 343a537..dbec3cc 100644 (file)
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -25,6 +25,8 @@ static inline int sb_is_blkdev_sb(struct super_block *sb)
         return sb == blockdev_superblock;
  }
  
+extern int __sync_blockdev(struct block_device *bdev, int wait);
+
  #else
  static inline void bdev_cache_init(void)
  {
@@ -34,6 +36,11 @@ static inline int sb_is_blkdev_sb(struct super_block *sb)
  {
         return 0;
  }
+
+static inline int __sync_blockdev(struct block_device *bdev, int wait)
+{
+       return 0;
+}
  #endif
  
  /*
@@ -71,12 +78,3 @@ extern void chroot_fs_refs(struct path *, struct path *);
   * file_table.c
   */
  extern void mark_files_ro(struct super_block *);
-
-/*
- * super.c
- */
-#ifdef CONFIG_BLOCK
-extern void sync_blockdevs(void);
-#else
-static inline void sync_blockdevs(void) { }
-#endif
diff --git a/fs/super.c b/fs/super.c

index 8dbe1ea..c8ce5ed 100644 (file)
--- a/fs/super.c
+++ b/fs/super.c
@@ -284,23 +284,23 @@ EXPORT_SYMBOL(lock_super);
  EXPORT_SYMBOL(unlock_super);
  
  /*
- * Write out and wait upon all dirty data associated with this
- * superblock.  Filesystem data as well as the underlying block
- * device.  Takes the superblock lock.  Requires a second blkdev
- * flush by the caller to complete the operation.
+ * Do the filesystem syncing work. For simple filesystems sync_inodes_sb(sb, 0)
+ * just dirties buffers with inodes so we have to submit IO for these buffers
+ * via __sync_blockdev(). This also speeds up the wait == 1 case since in that
+ * case write_inode() functions do sync_dirty_buffer() and thus effectively
+ * write one block at a time.
   */
-static int __fsync_super(struct super_block *sb)
+static int __fsync_super(struct super_block *sb, int wait)
  {
-       sync_inodes_sb(sb, 0);
         vfs_dq_sync(sb);
-       sync_inodes_sb(sb, 1);
+       sync_inodes_sb(sb, wait);
         lock_super(sb);
         if (sb->s_dirt && sb->s_op->write_super)
                 sb->s_op->write_super(sb);
         unlock_super(sb);
         if (sb->s_op->sync_fs)
-               sb->s_op->sync_fs(sb, 1);
-       return sync_blockdev(sb->s_bdev);
+               sb->s_op->sync_fs(sb, wait);
+       return __sync_blockdev(sb->s_bdev, wait);
  }
  
  /*
@@ -310,7 +310,12 @@ static int __fsync_super(struct super_block *sb)
   */
  int fsync_super(struct super_block *sb)
  {
-       return __fsync_super(sb);
+       int ret;
+
+       ret = __fsync_super(sb, 0);
+       if (ret < 0)
+               return ret;
+       return __fsync_super(sb, 1);
  }
  EXPORT_SYMBOL_GPL(fsync_super);
  
@@ -469,20 +474,18 @@ restart:
  }
  
  /*
- * Call the ->sync_fs super_op against all filesystems which are r/w and
- * which implement it.
+ * Sync all the data for all the filesystems (called by sys_sync() and
+ * emergency sync)
   *
   * This operation is careful to avoid the livelock which could easily happen
- * if two or more filesystems are being continuously dirtied.  s_need_sync_fs
+ * if two or more filesystems are being continuously dirtied.  s_need_sync
   * is used only here.  We set it against all filesystems and then clear it as
   * we sync them.  So redirtied filesystems are skipped.
   *
   * But if process A is currently running sync_filesystems and then process B
- * calls sync_filesystems as well, process B will set all the s_need_sync_fs
+ * calls sync_filesystems as well, process B will set all the s_need_sync
   * flags again, which will cause process A to resync everything.  Fix that with
   * a local mutex.
- *
- * (Fabian) Avoid sync_fs with clean fs & wait mode 0
   */
  void sync_filesystems(int wait)
  {
@@ -492,25 +495,23 @@ void sync_filesystems(int wait)
         mutex_lock(&mutex);             /* Could be down_interruptible */
         spin_lock(&sb_lock);
         list_for_each_entry(sb, &super_blocks, s_list) {
-               if (!sb->s_op->sync_fs)
-                       continue;
                 if (sb->s_flags & MS_RDONLY)
                         continue;
-               sb->s_need_sync_fs = 1;
+               sb->s_need_sync = 1;
         }
  
  restart:
         list_for_each_entry(sb, &super_blocks, s_list) {
-               if (!sb->s_need_sync_fs)
+               if (!sb->s_need_sync)
                         continue;
-               sb->s_need_sync_fs = 0;
+               sb->s_need_sync = 0;
                 if (sb->s_flags & MS_RDONLY)
                         continue;       /* hm.  Was remounted r/o meanwhile */
                 sb->s_count++;
                 spin_unlock(&sb_lock);
                 down_read(&sb->s_umount);
                 if (sb->s_root)
-                       sb->s_op->sync_fs(sb, wait);
+                       __fsync_super(sb, wait);
                 up_read(&sb->s_umount);
                 /* restart only when sb is no longer on the list */
                 spin_lock(&sb_lock);
@@ -521,33 +522,6 @@ restart:
         mutex_unlock(&mutex);
  }
  
-#ifdef CONFIG_BLOCK
-/*
- *  Sync all block devices underlying some superblock
- */
-void sync_blockdevs(void)
-{
-       struct super_block *sb;
-
-       spin_lock(&sb_lock);
-restart:
-       list_for_each_entry(sb, &super_blocks, s_list) {
-               if (!sb->s_bdev)
-                       continue;
-               sb->s_count++;
-               spin_unlock(&sb_lock);
-               down_read(&sb->s_umount);
-               if (sb->s_root)
-                       sync_blockdev(sb->s_bdev);
-               up_read(&sb->s_umount);
-               spin_lock(&sb_lock);
-               if (__put_super_and_need_restart(sb))
-                       goto restart;
-       }
-       spin_unlock(&sb_lock);
-}
-#endif
-
  /**
   *     get_super - get the superblock of a device
   *     @bdev: device to get the superblock for
diff --git a/fs/sync.c b/fs/sync.c

index 631fd5a..be0798c 100644 (file)
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -18,35 +18,24 @@
  #define VALID_FLAGS (SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE| \
                         SYNC_FILE_RANGE_WAIT_AFTER)
  
-/*
- * sync everything.  Start out by waking pdflush, because that writes back
- * all queues in parallel.
- */
-static void do_sync(unsigned long wait)
+SYSCALL_DEFINE0(sync)
  {
-       wakeup_pdflush(0);
-       sync_inodes(0);         /* All mappings, inodes and their blockdevs */
-       vfs_dq_sync(NULL);
-       sync_inodes(wait);      /* Mappings, inodes and blockdevs, again. */
-       sync_supers();          /* Write the superblocks */
-       sync_filesystems(0);    /* Start syncing the filesystems */
-       sync_filesystems(wait); /* Waitingly sync the filesystems */
-       sync_blockdevs();
-       if (!wait)
-               printk("Emergency Sync complete\n");
+       sync_filesystems(0);
+       sync_filesystems(1);
         if (unlikely(laptop_mode))
                 laptop_sync_completion();
-}
-
-SYSCALL_DEFINE0(sync)
-{
-       do_sync(1);
         return 0;
  }
  
  static void do_sync_work(struct work_struct *work)
  {
-       do_sync(0);
+       /*
+        * Sync twice to reduce the possibility we skipped some inodes / pages
+        * because they were temporarily locked
+        */
+       sync_filesystems(0);
+       sync_filesystems(0);
+       printk("Emergency Sync complete\n");
         kfree(work);
  }
  
diff --git a/include/linux/fs.h b/include/linux/fs.h

index 41a9907..f00df65 100644 (file)
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1321,7 +1321,7 @@ struct super_block {
         struct rw_semaphore     s_umount;
         struct mutex            s_lock;
         int                     s_count;
-       int                     s_need_sync_fs;
+       int                     s_need_sync;
         atomic_t                s_active;
  #ifdef CONFIG_SECURITY
         void                    *s_security;
diff --git a/include/linux/writeback.h b/include/linux/writeback.h

index 9344547..3224820 100644 (file)
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -79,7 +79,6 @@ struct writeback_control {
  void writeback_inodes(struct writeback_control *wbc);
  int inode_wait(void *);
  void sync_inodes_sb(struct super_block *, int wait);
-void sync_inodes(int wait);
  
  /* writeback.h requires fs.h; it, too, is not included from here. */
  static inline void wait_on_inode(struct inode *inode)
author	Jan Kara <jack@suse.cz>
	Mon, 27 Apr 2009 14:43:51 +0000 (16:43 +0200)
committer	Al Viro <viro@zeniv.linux.org.uk>
	Fri, 12 Jun 2009 01:36:03 +0000 (21:36 -0400)
fs/block_dev.c		patch \| blob \| history
fs/fs-writeback.c		patch \| blob \| history
fs/internal.h		patch \| blob \| history
fs/super.c		patch \| blob \| history
fs/sync.c		patch \| blob \| history
include/linux/fs.h		patch \| blob \| history
include/linux/writeback.h		patch \| blob \| history