Add btrfs_end_transaction_throttle to force writers to wait for pending commits
authorChris Mason <chris.mason@oracle.com>
Wed, 25 Jun 2008 20:01:31 +0000 (16:01 -0400)
committerChris Mason <chris.mason@oracle.com>
Thu, 25 Sep 2008 15:04:03 +0000 (11:04 -0400)
The existing throttle mechanism was often not sufficient to prevent
new writers from coming in and making a given transaction run forever.
This adds an explicit wait at the end of most operations so they will
allow the current transaction to close.

There is no wait inside file_write, inode updates, or cow filling, all which
have different deadlock possibilities.

This is a temporary measure until better asynchronous commit support is
added.  This code leads to stalls as it waits for data=ordered
writeback, and it really needs to be fixed.

Signed-off-by: Chris Mason <chris.mason@oracle.com>
fs/btrfs/disk-io.c
fs/btrfs/disk-io.h
fs/btrfs/file.c
fs/btrfs/inode.c
fs/btrfs/ioctl.c
fs/btrfs/transaction.c
fs/btrfs/transaction.h

index 3cc480b..52569b5 100644 (file)
@@ -1672,24 +1672,6 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
        set_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, buf);
 }
 
-void btrfs_throttle(struct btrfs_root *root)
-{
-#if 0
-       struct backing_dev_info *bdi;
-
-       bdi = &root->fs_info->bdi;
-       if (atomic_read(&root->fs_info->throttles) &&
-           bdi_write_congested(bdi)) {
-#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,18)
-               congestion_wait(WRITE, HZ/20);
-#else
-               blk_congestion_wait(WRITE, HZ/20);
-#endif
-
-       }
-#endif
-}
-
 void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr)
 {
        /*
index 2bc64fe..deff6b4 100644 (file)
@@ -70,7 +70,6 @@ int btrfs_clear_buffer_defrag_done(struct extent_buffer *buf);
 int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid);
 u32 btrfs_csum_data(struct btrfs_root *root, char *data, u32 seed, size_t len);
 void btrfs_csum_final(u32 crc, char *result);
-void btrfs_throttle(struct btrfs_root *root);
 int btrfs_open_device(struct btrfs_device *dev);
 int btrfs_verify_block_csum(struct btrfs_root *root,
                            struct extent_buffer *buf);
index b7f8f92..ece221c 100644 (file)
@@ -934,7 +934,6 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
                balance_dirty_pages_ratelimited_nr(inode->i_mapping, num_pages);
                if (num_pages < (root->leafsize >> PAGE_CACHE_SHIFT) + 1)
                        btrfs_btree_balance_dirty(root, 1);
-               btrfs_throttle(root);
                cond_resched();
        }
 out:
index cf27b59..bbba335 100644 (file)
@@ -855,10 +855,9 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
                btrfs_del_ordered_inode(inode, 1);
        }
 
-       btrfs_end_transaction(trans, root);
+       btrfs_end_transaction_throttle(trans, root);
 fail:
        btrfs_btree_balance_dirty(root, nr);
-       btrfs_throttle(root);
        return ret;
 }
 
@@ -889,10 +888,9 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
        }
 
        nr = trans->blocks_used;
-       ret = btrfs_end_transaction(trans, root);
+       ret = btrfs_end_transaction_throttle(trans, root);
 fail:
        btrfs_btree_balance_dirty(root, nr);
-       btrfs_throttle(root);
 
        if (ret && !err)
                err = ret;
@@ -1871,14 +1869,13 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
        btrfs_update_inode_block_group(trans, dir);
 out_unlock:
        nr = trans->blocks_used;
-       btrfs_end_transaction(trans, root);
+       btrfs_end_transaction_throttle(trans, root);
 fail:
        if (drop_inode) {
                inode_dec_link_count(inode);
                iput(inode);
        }
        btrfs_btree_balance_dirty(root, nr);
-       btrfs_throttle(root);
        return err;
 }
 
@@ -1936,14 +1933,13 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
        btrfs_update_inode_block_group(trans, dir);
 out_unlock:
        nr = trans->blocks_used;
-       btrfs_end_transaction(trans, root);
+       btrfs_end_transaction_throttle(trans, root);
 fail:
        if (drop_inode) {
                inode_dec_link_count(inode);
                iput(inode);
        }
        btrfs_btree_balance_dirty(root, nr);
-       btrfs_throttle(root);
        return err;
 }
 
@@ -1985,14 +1981,13 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
                drop_inode = 1;
 
        nr = trans->blocks_used;
-       btrfs_end_transaction(trans, root);
+       btrfs_end_transaction_throttle(trans, root);
 fail:
        if (drop_inode) {
                inode_dec_link_count(inode);
                iput(inode);
        }
        btrfs_btree_balance_dirty(root, nr);
-       btrfs_throttle(root);
        return err;
 }
 
@@ -2055,13 +2050,12 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 
 out_fail:
        nr = trans->blocks_used;
-       btrfs_end_transaction(trans, root);
+       btrfs_end_transaction_throttle(trans, root);
 
 out_unlock:
        if (drop_on_err)
                iput(inode);
        btrfs_btree_balance_dirty(root, nr);
-       btrfs_throttle(root);
        return err;
 }
 
@@ -2587,10 +2581,9 @@ static void btrfs_truncate(struct inode *inode)
        btrfs_update_inode(trans, root, inode);
        nr = trans->blocks_used;
 
-       ret = btrfs_end_transaction(trans, root);
+       ret = btrfs_end_transaction_throttle(trans, root);
        BUG_ON(ret);
        btrfs_btree_balance_dirty(root, nr);
-       btrfs_throttle(root);
 }
 
 /*
@@ -2912,14 +2905,13 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
 
 out_unlock:
        nr = trans->blocks_used;
-       btrfs_end_transaction(trans, root);
+       btrfs_end_transaction_throttle(trans, root);
 out_fail:
        if (drop_inode) {
                inode_dec_link_count(inode);
                iput(inode);
        }
        btrfs_btree_balance_dirty(root, nr);
-       btrfs_throttle(root);
        return err;
 }
 
index 6002eb6..026039a 100644 (file)
@@ -164,7 +164,6 @@ fail:
                ret = err;
 fail_commit:
        btrfs_btree_balance_dirty(root, nr);
-       btrfs_throttle(root);
        return ret;
 }
 
@@ -206,7 +205,6 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen)
 
 fail_unlock:
        btrfs_btree_balance_dirty(root, nr);
-       btrfs_throttle(root);
        return ret;
 }
 
index 5a1ee06..69ed5f8 100644 (file)
@@ -130,8 +130,27 @@ struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,
        return h;
 }
 
-int btrfs_end_transaction(struct btrfs_trans_handle *trans,
-                         struct btrfs_root *root)
+static noinline int wait_for_commit(struct btrfs_root *root,
+                                   struct btrfs_transaction *commit)
+{
+       DEFINE_WAIT(wait);
+       mutex_lock(&root->fs_info->trans_mutex);
+       while(!commit->commit_done) {
+               prepare_to_wait(&commit->commit_wait, &wait,
+                               TASK_UNINTERRUPTIBLE);
+               if (commit->commit_done)
+                       break;
+               mutex_unlock(&root->fs_info->trans_mutex);
+               schedule();
+               mutex_lock(&root->fs_info->trans_mutex);
+       }
+       mutex_unlock(&root->fs_info->trans_mutex);
+       finish_wait(&commit->commit_wait, &wait);
+       return 0;
+}
+
+static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
+                         struct btrfs_root *root, int throttle)
 {
        struct btrfs_transaction *cur_trans;
 
@@ -140,8 +159,18 @@ int btrfs_end_transaction(struct btrfs_trans_handle *trans,
        WARN_ON(cur_trans != trans->transaction);
        WARN_ON(cur_trans->num_writers < 1);
        cur_trans->num_writers--;
+
        if (waitqueue_active(&cur_trans->writer_wait))
                wake_up(&cur_trans->writer_wait);
+
+       if (cur_trans->in_commit && throttle) {
+               int ret;
+               mutex_unlock(&root->fs_info->trans_mutex);
+               ret = wait_for_commit(root, cur_trans);
+               BUG_ON(ret);
+               mutex_lock(&root->fs_info->trans_mutex);
+       }
+
        put_transaction(cur_trans);
        mutex_unlock(&root->fs_info->trans_mutex);
        memset(trans, 0, sizeof(*trans));
@@ -149,6 +178,18 @@ int btrfs_end_transaction(struct btrfs_trans_handle *trans,
        return 0;
 }
 
+int btrfs_end_transaction(struct btrfs_trans_handle *trans,
+                         struct btrfs_root *root)
+{
+       return __btrfs_end_transaction(trans, root, 0);
+}
+
+int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans,
+                                  struct btrfs_root *root)
+{
+       return __btrfs_end_transaction(trans, root, 1);
+}
+
 
 int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans,
                                     struct btrfs_root *root)
@@ -240,25 +281,6 @@ int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans,
        return 0;
 }
 
-static noinline int wait_for_commit(struct btrfs_root *root,
-                                   struct btrfs_transaction *commit)
-{
-       DEFINE_WAIT(wait);
-       mutex_lock(&root->fs_info->trans_mutex);
-       while(!commit->commit_done) {
-               prepare_to_wait(&commit->commit_wait, &wait,
-                               TASK_UNINTERRUPTIBLE);
-               if (commit->commit_done)
-                       break;
-               mutex_unlock(&root->fs_info->trans_mutex);
-               schedule();
-               mutex_lock(&root->fs_info->trans_mutex);
-       }
-       mutex_unlock(&root->fs_info->trans_mutex);
-       finish_wait(&commit->commit_wait, &wait);
-       return 0;
-}
-
 struct dirty_root {
        struct list_head list;
        struct btrfs_root *root;
@@ -680,6 +702,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
                             root->fs_info->btree_inode->i_mapping, GFP_NOFS);
 
        trans->transaction->in_commit = 1;
+printk("trans %Lu in commit\n", trans->transid);
        cur_trans = trans->transaction;
        if (cur_trans->list.prev != &root->fs_info->trans_list) {
                prev_trans = list_entry(cur_trans->list.prev,
@@ -760,6 +783,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
        kfree(pinned_copy);
 
        cur_trans->commit_done = 1;
+printk("trans %Lu done in commit\n", cur_trans->transid);
        root->fs_info->last_trans_committed = cur_trans->transid;
        wake_up(&cur_trans->commit_wait);
        put_transaction(cur_trans);
index c3172dd..52559b5 100644 (file)
@@ -101,4 +101,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
                             struct btrfs_root *root);
 int btrfs_write_ordered_inodes(struct btrfs_trans_handle *trans,
                                struct btrfs_root *root);
+int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans,
+                                  struct btrfs_root *root);
 #endif