Merge branch 'linus' into cont_syslog
[safe/jmp/linux-2.6] / fs / fs-writeback.c
index 781a322..1d1088f 100644 (file)
@@ -42,9 +42,9 @@ struct wb_writeback_args {
        long nr_pages;
        struct super_block *sb;
        enum writeback_sync_modes sync_mode;
-       int for_kupdate:1;
-       int range_cyclic:1;
-       int for_background:1;
+       unsigned int for_kupdate:1;
+       unsigned int range_cyclic:1;
+       unsigned int for_background:1;
 };
 
 /*
@@ -398,11 +398,11 @@ static void inode_wait_for_writeback(struct inode *inode)
        wait_queue_head_t *wqh;
 
        wqh = bit_waitqueue(&inode->i_state, __I_SYNC);
-       do {
+        while (inode->i_state & I_SYNC) {
                spin_unlock(&inode_lock);
                __wait_on_bit(wqh, &wq, inode_wait, TASK_UNINTERRUPTIBLE);
                spin_lock(&inode_lock);
-       } while (inode->i_state & I_SYNC);
+       }
 }
 
 /*
@@ -452,11 +452,9 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
 
        BUG_ON(inode->i_state & I_SYNC);
 
-       /* Set I_SYNC, reset I_DIRTY */
-       dirty = inode->i_state & I_DIRTY;
+       /* Set I_SYNC, reset I_DIRTY_PAGES */
        inode->i_state |= I_SYNC;
-       inode->i_state &= ~I_DIRTY;
-
+       inode->i_state &= ~I_DIRTY_PAGES;
        spin_unlock(&inode_lock);
 
        ret = do_writepages(mapping, wbc);
@@ -472,6 +470,15 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
                        ret = err;
        }
 
+       /*
+        * Some filesystems may redirty the inode during the writeback
+        * due to delalloc, clear dirty metadata flags right before
+        * write_inode()
+        */
+       spin_lock(&inode_lock);
+       dirty = inode->i_state & I_DIRTY;
+       inode->i_state &= ~(I_DIRTY_SYNC | I_DIRTY_DATASYNC);
+       spin_unlock(&inode_lock);
        /* Don't write the inode if only I_DIRTY_PAGES was set */
        if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) {
                int err = write_inode(inode, wbc);
@@ -554,108 +561,85 @@ select_queue:
        return ret;
 }
 
-static void unpin_sb_for_writeback(struct super_block **psb)
+static void unpin_sb_for_writeback(struct super_block *sb)
 {
-       struct super_block *sb = *psb;
-
-       if (sb) {
-               up_read(&sb->s_umount);
-               put_super(sb);
-               *psb = NULL;
-       }
+       up_read(&sb->s_umount);
+       put_super(sb);
 }
 
+enum sb_pin_state {
+       SB_PINNED,
+       SB_NOT_PINNED,
+       SB_PIN_FAILED
+};
+
 /*
  * For WB_SYNC_NONE writeback, the caller does not have the sb pinned
  * before calling writeback. So make sure that we do pin it, so it doesn't
  * go away while we are writing inodes from it.
- *
- * Returns 0 if the super was successfully pinned (or pinning wasn't needed),
- * 1 if we failed.
  */
-static int pin_sb_for_writeback(struct writeback_control *wbc,
-                               struct inode *inode, struct super_block **psb)
+static enum sb_pin_state pin_sb_for_writeback(struct writeback_control *wbc,
+                                             struct super_block *sb)
 {
-       struct super_block *sb = inode->i_sb;
-
-       /*
-        * If this sb is already pinned, nothing more to do. If not and
-        * *psb is non-NULL, unpin the old one first
-        */
-       if (sb == *psb)
-               return 0;
-       else if (*psb)
-               unpin_sb_for_writeback(psb);
-
        /*
         * Caller must already hold the ref for this
         */
        if (wbc->sync_mode == WB_SYNC_ALL) {
                WARN_ON(!rwsem_is_locked(&sb->s_umount));
-               return 0;
+               return SB_NOT_PINNED;
        }
-
        spin_lock(&sb_lock);
        sb->s_count++;
        if (down_read_trylock(&sb->s_umount)) {
                if (sb->s_root) {
                        spin_unlock(&sb_lock);
-                       goto pinned;
+                       return SB_PINNED;
                }
                /*
                 * umounted, drop rwsem again and fall through to failure
                 */
                up_read(&sb->s_umount);
        }
-
        sb->s_count--;
        spin_unlock(&sb_lock);
-       return 1;
-pinned:
-       *psb = sb;
-       return 0;
+       return SB_PIN_FAILED;
 }
 
-static void writeback_inodes_wb(struct bdi_writeback *wb,
-                               struct writeback_control *wbc)
+/*
+ * Write a portion of b_io inodes which belong to @sb.
+ * If @wbc->sb != NULL, then find and write all such
+ * inodes. Otherwise write only ones which go sequentially
+ * in reverse order.
+ * Return 1, if the caller writeback routine should be
+ * interrupted. Otherwise return 0.
+ */
+static int writeback_sb_inodes(struct super_block *sb,
+                              struct bdi_writeback *wb,
+                              struct writeback_control *wbc)
 {
-       struct super_block *sb = wbc->sb, *pin_sb = NULL;
-       const unsigned long start = jiffies;    /* livelock avoidance */
-
-       spin_lock(&inode_lock);
-
-       if (!wbc->for_kupdate || list_empty(&wb->b_io))
-               queue_io(wb, wbc->older_than_this);
-
        while (!list_empty(&wb->b_io)) {
-               struct inode *inode = list_entry(wb->b_io.prev,
-                                               struct inode, i_list);
                long pages_skipped;
-
-               /*
-                * super block given and doesn't match, skip this inode
-                */
-               if (sb && sb != inode->i_sb) {
+               struct inode *inode = list_entry(wb->b_io.prev,
+                                                struct inode, i_list);
+               if (wbc->sb && sb != inode->i_sb) {
+                       /* super block given and doesn't
+                          match, skip this inode */
                        redirty_tail(inode);
                        continue;
                }
-
+               if (sb != inode->i_sb)
+                       /* finish with this superblock */
+                       return 0;
                if (inode->i_state & (I_NEW | I_WILL_FREE)) {
                        requeue_io(inode);
                        continue;
                }
-
                /*
                 * Was this inode dirtied after sync_sb_inodes was called?
                 * This keeps sync from extra jobs and livelock.
                 */
-               if (inode_dirtied_after(inode, start))
-                       break;
-
-               if (pin_sb_for_writeback(wbc, inode, &pin_sb)) {
-                       requeue_io(inode);
-                       continue;
-               }
+               if (inode_dirtied_after(inode, wbc->wb_start))
+                       return 1;
 
                BUG_ON(inode->i_state & (I_FREEING | I_CLEAR));
                __iget(inode);
@@ -674,14 +658,50 @@ static void writeback_inodes_wb(struct bdi_writeback *wb,
                spin_lock(&inode_lock);
                if (wbc->nr_to_write <= 0) {
                        wbc->more_io = 1;
-                       break;
+                       return 1;
                }
                if (!list_empty(&wb->b_more_io))
                        wbc->more_io = 1;
        }
+       /* b_io is empty */
+       return 1;
+}
+
+static void writeback_inodes_wb(struct bdi_writeback *wb,
+                               struct writeback_control *wbc)
+{
+       int ret = 0;
 
-       unpin_sb_for_writeback(&pin_sb);
+       wbc->wb_start = jiffies; /* livelock avoidance */
+       spin_lock(&inode_lock);
+       if (!wbc->for_kupdate || list_empty(&wb->b_io))
+               queue_io(wb, wbc->older_than_this);
 
+       while (!list_empty(&wb->b_io)) {
+               struct inode *inode = list_entry(wb->b_io.prev,
+                                                struct inode, i_list);
+               struct super_block *sb = inode->i_sb;
+               enum sb_pin_state state;
+
+               if (wbc->sb && sb != wbc->sb) {
+                       /* super block given and doesn't
+                          match, skip this inode */
+                       redirty_tail(inode);
+                       continue;
+               }
+               state = pin_sb_for_writeback(wbc, sb);
+
+               if (state == SB_PIN_FAILED) {
+                       requeue_io(inode);
+                       continue;
+               }
+               ret = writeback_sb_inodes(sb, wb, wbc);
+
+               if (state == SB_PINNED)
+                       unpin_sb_for_writeback(sb);
+               if (ret)
+                       break;
+       }
        spin_unlock(&inode_lock);
        /* Leave any unwritten inodes on b_io */
 }
@@ -839,6 +859,12 @@ static long wb_check_old_data_flush(struct bdi_writeback *wb)
        unsigned long expired;
        long nr_pages;
 
+       /*
+        * When set to zero, disable periodic writeback
+        */
+       if (!dirty_writeback_interval)
+               return 0;
+
        expired = wb->last_old_flush +
                        msecs_to_jiffies(dirty_writeback_interval * 10);
        if (time_before(jiffies, expired))
@@ -934,8 +960,17 @@ int bdi_writeback_task(struct bdi_writeback *wb)
                                break;
                }
 
-               wait_jiffies = msecs_to_jiffies(dirty_writeback_interval * 10);
-               schedule_timeout_interruptible(wait_jiffies);
+               if (dirty_writeback_interval) {
+                       wait_jiffies = msecs_to_jiffies(dirty_writeback_interval * 10);
+                       schedule_timeout_interruptible(wait_jiffies);
+               } else {
+                       set_current_state(TASK_INTERRUPTIBLE);
+                       if (list_empty_careful(&wb->bdi->work_list) &&
+                           !kthread_should_stop())
+                               schedule();
+                       __set_current_state(TASK_RUNNING);
+               }
+
                try_to_freeze();
        }