Merge git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable
authorLinus Torvalds <torvalds@linux-foundation.org>
Sun, 11 Oct 2009 18:23:13 +0000 (11:23 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Sun, 11 Oct 2009 18:23:13 +0000 (11:23 -0700)
* git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable:
  Btrfs: fix file clone ioctl for bookend extents
  Btrfs: fix uninit compiler warning in cow_file_range_nocow
  Btrfs: constify dentry_operations
  Btrfs: optimize back reference update during btrfs_drop_snapshot
  Btrfs: remove negative dentry when deleting subvolumne
  Btrfs: optimize fsync for the single writer case
  Btrfs: async delalloc flushing under space pressure
  Btrfs: release delalloc reservations on extent item insertion
  Btrfs: delay clearing EXTENT_DELALLOC for compressed extents
  Btrfs: cleanup extent_clear_unlock_delalloc flags
  Btrfs: fix possible softlockup in the allocator
  Btrfs: fix deadlock on async thread startup

1  2 
fs/btrfs/ctree.h
fs/btrfs/disk-io.c
fs/btrfs/extent-tree.c
fs/btrfs/file.c
fs/btrfs/inode.c
fs/btrfs/ordered-data.c
fs/btrfs/tree-log.c

diff --combined fs/btrfs/ctree.h
@@@ -691,14 -691,17 +691,17 @@@ struct btrfs_space_info 
  
        struct list_head list;
  
+       /* for controlling how we free up space for allocations */
+       wait_queue_head_t allocate_wait;
+       wait_queue_head_t flush_wait;
+       int allocating_chunk;
+       int flushing;
        /* for block groups in our same type */
        struct list_head block_groups;
        spinlock_t lock;
        struct rw_semaphore groups_sem;
        atomic_t caching_threads;
-       int allocating_chunk;
-       wait_queue_head_t wait;
  };
  
  /*
@@@ -907,6 -910,7 +910,7 @@@ struct btrfs_fs_info 
         * A third pool does submit_bio to avoid deadlocking with the other
         * two
         */
+       struct btrfs_workers generic_worker;
        struct btrfs_workers workers;
        struct btrfs_workers delalloc_workers;
        struct btrfs_workers endio_workers;
        struct btrfs_workers endio_meta_write_workers;
        struct btrfs_workers endio_write_workers;
        struct btrfs_workers submit_workers;
+       struct btrfs_workers enospc_workers;
        /*
         * fixup workers take dirty pages that didn't properly go through
         * the cow mechanism and make them safe to write.  It happens
@@@ -1005,6 -1010,8 +1010,8 @@@ struct btrfs_root 
        atomic_t log_commit[2];
        unsigned long log_transid;
        unsigned long log_batch;
+       pid_t log_start_pid;
+       bool log_multiple_pids;
  
        u64 objectid;
        u64 last_trans;
@@@ -2323,7 -2330,7 +2330,7 @@@ int btrfs_orphan_del(struct btrfs_trans
  void btrfs_orphan_cleanup(struct btrfs_root *root);
  int btrfs_cont_expand(struct inode *inode, loff_t size);
  int btrfs_invalidate_inodes(struct btrfs_root *root);
- extern struct dentry_operations btrfs_dentry_operations;
+ extern const struct dentry_operations btrfs_dentry_operations;
  
  /* ioctl.c */
  long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
@@@ -2335,7 -2342,7 +2342,7 @@@ int btrfs_sync_file(struct file *file, 
  int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
                            int skip_pinned);
  int btrfs_check_file(struct btrfs_root *root, struct inode *inode);
 -extern struct file_operations btrfs_file_operations;
 +extern const struct file_operations btrfs_file_operations;
  int btrfs_drop_extents(struct btrfs_trans_handle *trans,
                       struct btrfs_root *root, struct inode *inode,
                       u64 start, u64 end, u64 locked_end,
diff --combined fs/btrfs/disk-io.c
@@@ -773,7 -773,7 +773,7 @@@ static void btree_invalidatepage(struc
        }
  }
  
 -static struct address_space_operations btree_aops = {
 +static const struct address_space_operations btree_aops = {
        .readpage       = btree_readpage,
        .writepage      = btree_writepage,
        .writepages     = btree_writepages,
@@@ -822,14 -822,16 +822,14 @@@ struct extent_buffer *btrfs_find_create
  
  int btrfs_write_tree_block(struct extent_buffer *buf)
  {
 -      return btrfs_fdatawrite_range(buf->first_page->mapping, buf->start,
 -                                    buf->start + buf->len - 1, WB_SYNC_ALL);
 +      return filemap_fdatawrite_range(buf->first_page->mapping, buf->start,
 +                                      buf->start + buf->len - 1);
  }
  
  int btrfs_wait_tree_block_writeback(struct extent_buffer *buf)
  {
 -      return btrfs_wait_on_page_writeback_range(buf->first_page->mapping,
 -                                buf->start >> PAGE_CACHE_SHIFT,
 -                                (buf->start + buf->len - 1) >>
 -                                 PAGE_CACHE_SHIFT);
 +      return filemap_fdatawait_range(buf->first_page->mapping,
 +                                     buf->start, buf->start + buf->len - 1);
  }
  
  struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr,
@@@ -1371,7 -1373,6 +1371,7 @@@ static int setup_bdi(struct btrfs_fs_in
  {
        int err;
  
 +      bdi->name = "btrfs";
        bdi->capabilities = BDI_CAP_MAP_COPY;
        err = bdi_init(bdi);
        if (err)
@@@ -1640,7 -1641,6 +1640,7 @@@ struct btrfs_root *open_ctree(struct su
  
        sb->s_blocksize = 4096;
        sb->s_blocksize_bits = blksize_bits(4096);
 +      sb->s_bdi = &fs_info->bdi;
  
        fs_info->btree_inode->i_ino = BTRFS_BTREE_INODE_OBJECTID;
        fs_info->btree_inode->i_nlink = 1;
                err = -EINVAL;
                goto fail_iput;
        }
- printk("thread pool is %d\n", fs_info->thread_pool_size);
-       /*
-        * we need to start all the end_io workers up front because the
-        * queue work function gets called at interrupt time, and so it
-        * cannot dynamically grow.
-        */
+       btrfs_init_workers(&fs_info->generic_worker,
+                          "genwork", 1, NULL);
        btrfs_init_workers(&fs_info->workers, "worker",
-                          fs_info->thread_pool_size);
+                          fs_info->thread_pool_size,
+                          &fs_info->generic_worker);
  
        btrfs_init_workers(&fs_info->delalloc_workers, "delalloc",
-                          fs_info->thread_pool_size);
+                          fs_info->thread_pool_size,
+                          &fs_info->generic_worker);
  
        btrfs_init_workers(&fs_info->submit_workers, "submit",
                           min_t(u64, fs_devices->num_devices,
-                          fs_info->thread_pool_size));
+                          fs_info->thread_pool_size),
+                          &fs_info->generic_worker);
+       btrfs_init_workers(&fs_info->enospc_workers, "enospc",
+                          fs_info->thread_pool_size,
+                          &fs_info->generic_worker);
  
        /* a higher idle thresh on the submit workers makes it much more
         * likely that bios will be send down in a sane order to the
        fs_info->delalloc_workers.idle_thresh = 2;
        fs_info->delalloc_workers.ordered = 1;
  
-       btrfs_init_workers(&fs_info->fixup_workers, "fixup", 1);
+       btrfs_init_workers(&fs_info->fixup_workers, "fixup", 1,
+                          &fs_info->generic_worker);
        btrfs_init_workers(&fs_info->endio_workers, "endio",
-                          fs_info->thread_pool_size);
+                          fs_info->thread_pool_size,
+                          &fs_info->generic_worker);
        btrfs_init_workers(&fs_info->endio_meta_workers, "endio-meta",
-                          fs_info->thread_pool_size);
+                          fs_info->thread_pool_size,
+                          &fs_info->generic_worker);
        btrfs_init_workers(&fs_info->endio_meta_write_workers,
-                          "endio-meta-write", fs_info->thread_pool_size);
+                          "endio-meta-write", fs_info->thread_pool_size,
+                          &fs_info->generic_worker);
        btrfs_init_workers(&fs_info->endio_write_workers, "endio-write",
-                          fs_info->thread_pool_size);
+                          fs_info->thread_pool_size,
+                          &fs_info->generic_worker);
  
        /*
         * endios are largely parallel and should have a very
        fs_info->endio_write_workers.idle_thresh = 2;
        fs_info->endio_meta_write_workers.idle_thresh = 2;
  
-       fs_info->endio_workers.atomic_worker_start = 1;
-       fs_info->endio_meta_workers.atomic_worker_start = 1;
-       fs_info->endio_write_workers.atomic_worker_start = 1;
-       fs_info->endio_meta_write_workers.atomic_worker_start = 1;
        btrfs_start_workers(&fs_info->workers, 1);
+       btrfs_start_workers(&fs_info->generic_worker, 1);
        btrfs_start_workers(&fs_info->submit_workers, 1);
        btrfs_start_workers(&fs_info->delalloc_workers, 1);
        btrfs_start_workers(&fs_info->fixup_workers, 1);
        btrfs_start_workers(&fs_info->endio_meta_workers, 1);
        btrfs_start_workers(&fs_info->endio_meta_write_workers, 1);
        btrfs_start_workers(&fs_info->endio_write_workers, 1);
+       btrfs_start_workers(&fs_info->enospc_workers, 1);
  
        fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super);
        fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages,
@@@ -2012,6 -2018,7 +2018,7 @@@ fail_chunk_root
        free_extent_buffer(chunk_root->node);
        free_extent_buffer(chunk_root->commit_root);
  fail_sb_buffer:
+       btrfs_stop_workers(&fs_info->generic_worker);
        btrfs_stop_workers(&fs_info->fixup_workers);
        btrfs_stop_workers(&fs_info->delalloc_workers);
        btrfs_stop_workers(&fs_info->workers);
        btrfs_stop_workers(&fs_info->endio_meta_write_workers);
        btrfs_stop_workers(&fs_info->endio_write_workers);
        btrfs_stop_workers(&fs_info->submit_workers);
+       btrfs_stop_workers(&fs_info->enospc_workers);
  fail_iput:
        invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
        iput(fs_info->btree_inode);
@@@ -2437,6 -2445,7 +2445,7 @@@ int close_ctree(struct btrfs_root *root
  
        iput(fs_info->btree_inode);
  
+       btrfs_stop_workers(&fs_info->generic_worker);
        btrfs_stop_workers(&fs_info->fixup_workers);
        btrfs_stop_workers(&fs_info->delalloc_workers);
        btrfs_stop_workers(&fs_info->workers);
        btrfs_stop_workers(&fs_info->endio_meta_write_workers);
        btrfs_stop_workers(&fs_info->endio_write_workers);
        btrfs_stop_workers(&fs_info->submit_workers);
+       btrfs_stop_workers(&fs_info->enospc_workers);
  
        btrfs_close_devices(fs_info->fs_devices);
        btrfs_mapping_tree_free(&fs_info->mapping_tree);
diff --combined fs/btrfs/extent-tree.c
@@@ -1572,8 -1572,7 +1572,8 @@@ static int remove_extent_backref(struc
  static void btrfs_issue_discard(struct block_device *bdev,
                                u64 start, u64 len)
  {
 -      blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL);
 +      blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL,
 +                           DISCARD_FL_BARRIER);
  }
  #endif
  
@@@ -2824,14 -2823,17 +2824,17 @@@ int btrfs_unreserve_metadata_for_delall
                                           num_items);
  
        spin_lock(&meta_sinfo->lock);
-       if (BTRFS_I(inode)->delalloc_reserved_extents <=
-           BTRFS_I(inode)->delalloc_extents) {
+       spin_lock(&BTRFS_I(inode)->accounting_lock);
+       if (BTRFS_I(inode)->reserved_extents <=
+           BTRFS_I(inode)->outstanding_extents) {
+               spin_unlock(&BTRFS_I(inode)->accounting_lock);
                spin_unlock(&meta_sinfo->lock);
                return 0;
        }
+       spin_unlock(&BTRFS_I(inode)->accounting_lock);
  
-       BTRFS_I(inode)->delalloc_reserved_extents--;
-       BUG_ON(BTRFS_I(inode)->delalloc_reserved_extents < 0);
+       BTRFS_I(inode)->reserved_extents--;
+       BUG_ON(BTRFS_I(inode)->reserved_extents < 0);
  
        if (meta_sinfo->bytes_delalloc < num_bytes) {
                bug = true;
@@@ -2864,6 -2866,107 +2867,107 @@@ static void check_force_delalloc(struc
                meta_sinfo->force_delalloc = 0;
  }
  
+ struct async_flush {
+       struct btrfs_root *root;
+       struct btrfs_space_info *info;
+       struct btrfs_work work;
+ };
+ static noinline void flush_delalloc_async(struct btrfs_work *work)
+ {
+       struct async_flush *async;
+       struct btrfs_root *root;
+       struct btrfs_space_info *info;
+       async = container_of(work, struct async_flush, work);
+       root = async->root;
+       info = async->info;
+       btrfs_start_delalloc_inodes(root);
+       wake_up(&info->flush_wait);
+       btrfs_wait_ordered_extents(root, 0);
+       spin_lock(&info->lock);
+       info->flushing = 0;
+       spin_unlock(&info->lock);
+       wake_up(&info->flush_wait);
+       kfree(async);
+ }
+ static void wait_on_flush(struct btrfs_space_info *info)
+ {
+       DEFINE_WAIT(wait);
+       u64 used;
+       while (1) {
+               prepare_to_wait(&info->flush_wait, &wait,
+                               TASK_UNINTERRUPTIBLE);
+               spin_lock(&info->lock);
+               if (!info->flushing) {
+                       spin_unlock(&info->lock);
+                       break;
+               }
+               used = info->bytes_used + info->bytes_reserved +
+                       info->bytes_pinned + info->bytes_readonly +
+                       info->bytes_super + info->bytes_root +
+                       info->bytes_may_use + info->bytes_delalloc;
+               if (used < info->total_bytes) {
+                       spin_unlock(&info->lock);
+                       break;
+               }
+               spin_unlock(&info->lock);
+               schedule();
+       }
+       finish_wait(&info->flush_wait, &wait);
+ }
+ static void flush_delalloc(struct btrfs_root *root,
+                                struct btrfs_space_info *info)
+ {
+       struct async_flush *async;
+       bool wait = false;
+       spin_lock(&info->lock);
+       if (!info->flushing) {
+               info->flushing = 1;
+               init_waitqueue_head(&info->flush_wait);
+       } else {
+               wait = true;
+       }
+       spin_unlock(&info->lock);
+       if (wait) {
+               wait_on_flush(info);
+               return;
+       }
+       async = kzalloc(sizeof(*async), GFP_NOFS);
+       if (!async)
+               goto flush;
+       async->root = root;
+       async->info = info;
+       async->work.func = flush_delalloc_async;
+       btrfs_queue_worker(&root->fs_info->enospc_workers,
+                          &async->work);
+       wait_on_flush(info);
+       return;
+ flush:
+       btrfs_start_delalloc_inodes(root);
+       btrfs_wait_ordered_extents(root, 0);
+       spin_lock(&info->lock);
+       info->flushing = 0;
+       spin_unlock(&info->lock);
+       wake_up(&info->flush_wait);
+ }
  static int maybe_allocate_chunk(struct btrfs_root *root,
                                 struct btrfs_space_info *info)
  {
        if (!info->allocating_chunk) {
                info->force_alloc = 1;
                info->allocating_chunk = 1;
-               init_waitqueue_head(&info->wait);
+               init_waitqueue_head(&info->allocate_wait);
        } else {
                wait = true;
        }
        spin_unlock(&info->lock);
  
        if (wait) {
-               wait_event(info->wait,
+               wait_event(info->allocate_wait,
                           !info->allocating_chunk);
                return 1;
        }
@@@ -2923,7 -3026,7 +3027,7 @@@ out
        spin_lock(&info->lock);
        info->allocating_chunk = 0;
        spin_unlock(&info->lock);
-       wake_up(&info->wait);
+       wake_up(&info->allocate_wait);
  
        if (ret)
                return 0;
@@@ -2981,21 -3084,20 +3085,20 @@@ again
                        filemap_flush(inode->i_mapping);
                        goto again;
                } else if (flushed == 3) {
-                       btrfs_start_delalloc_inodes(root);
-                       btrfs_wait_ordered_extents(root, 0);
+                       flush_delalloc(root, meta_sinfo);
                        goto again;
                }
                spin_lock(&meta_sinfo->lock);
                meta_sinfo->bytes_delalloc -= num_bytes;
                spin_unlock(&meta_sinfo->lock);
                printk(KERN_ERR "enospc, has %d, reserved %d\n",
-                      BTRFS_I(inode)->delalloc_extents,
-                      BTRFS_I(inode)->delalloc_reserved_extents);
+                      BTRFS_I(inode)->outstanding_extents,
+                      BTRFS_I(inode)->reserved_extents);
                dump_space_info(meta_sinfo, 0, 0);
                return -ENOSPC;
        }
  
-       BTRFS_I(inode)->delalloc_reserved_extents++;
+       BTRFS_I(inode)->reserved_extents++;
        check_force_delalloc(meta_sinfo);
        spin_unlock(&meta_sinfo->lock);
  
@@@ -3094,8 -3196,7 +3197,7 @@@ again
                }
  
                if (retries == 2) {
-                       btrfs_start_delalloc_inodes(root);
-                       btrfs_wait_ordered_extents(root, 0);
+                       flush_delalloc(root, meta_sinfo);
                        goto again;
                }
                spin_lock(&meta_sinfo->lock);
@@@ -4029,6 -4130,7 +4131,7 @@@ static noinline int find_free_extent(st
        int loop = 0;
        bool found_uncached_bg = false;
        bool failed_cluster_refill = false;
+       bool failed_alloc = false;
  
        WARN_ON(num_bytes < root->sectorsize);
        btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);
@@@ -4233,14 -4335,23 +4336,23 @@@ refill_cluster
  
                offset = btrfs_find_space_for_alloc(block_group, search_start,
                                                    num_bytes, empty_size);
-               if (!offset && (cached || (!cached &&
-                                          loop == LOOP_CACHING_NOWAIT))) {
-                       goto loop;
-               } else if (!offset && (!cached &&
-                                      loop > LOOP_CACHING_NOWAIT)) {
+               /*
+                * If we didn't find a chunk, and we haven't failed on this
+                * block group before, and this block group is in the middle of
+                * caching and we are ok with waiting, then go ahead and wait
+                * for progress to be made, and set failed_alloc to true.
+                *
+                * If failed_alloc is true then we've already waited on this
+                * block group once and should move on to the next block group.
+                */
+               if (!offset && !failed_alloc && !cached &&
+                   loop > LOOP_CACHING_NOWAIT) {
                        wait_block_group_cache_progress(block_group,
-                                       num_bytes + empty_size);
+                                               num_bytes + empty_size);
+                       failed_alloc = true;
                        goto have_block_group;
+               } else if (!offset) {
+                       goto loop;
                }
  checks:
                search_start = stripe_align(root, offset);
                break;
  loop:
                failed_cluster_refill = false;
+               failed_alloc = false;
                btrfs_put_block_group(block_group);
        }
        up_read(&space_info->groups_sem);
@@@ -4799,6 -4911,7 +4912,7 @@@ static noinline void reada_walk_down(st
        u64 bytenr;
        u64 generation;
        u64 refs;
+       u64 flags;
        u64 last = 0;
        u32 nritems;
        u32 blocksize;
                    generation <= root->root_key.offset)
                        continue;
  
+               /* We don't lock the tree block, it's OK to be racy here */
+               ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize,
+                                              &refs, &flags);
+               BUG_ON(ret);
+               BUG_ON(refs == 0);
                if (wc->stage == DROP_REFERENCE) {
                        if (refs == 1)
                                goto reada;
  
+                       if (wc->level == 1 &&
+                           (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF))
+                               continue;
                        if (!wc->update_ref ||
                            generation <= root->root_key.offset)
                                continue;
                                                  &wc->update_progress);
                        if (ret < 0)
                                continue;
+               } else {
+                       if (wc->level == 1 &&
+                           (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF))
+                               continue;
                }
  reada:
                ret = readahead_tree_block(root, bytenr, blocksize,
  static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
                                   struct btrfs_root *root,
                                   struct btrfs_path *path,
-                                  struct walk_control *wc)
+                                  struct walk_control *wc, int lookup_info)
  {
        int level = wc->level;
        struct extent_buffer *eb = path->nodes[level];
         * when reference count of tree block is 1, it won't increase
         * again. once full backref flag is set, we never clear it.
         */
-       if ((wc->stage == DROP_REFERENCE && wc->refs[level] != 1) ||
-           (wc->stage == UPDATE_BACKREF && !(wc->flags[level] & flag))) {
+       if (lookup_info &&
+           ((wc->stage == DROP_REFERENCE && wc->refs[level] != 1) ||
+            (wc->stage == UPDATE_BACKREF && !(wc->flags[level] & flag)))) {
                BUG_ON(!path->locks[level]);
                ret = btrfs_lookup_extent_info(trans, root,
                                               eb->start, eb->len,
  static noinline int do_walk_down(struct btrfs_trans_handle *trans,
                                 struct btrfs_root *root,
                                 struct btrfs_path *path,
-                                struct walk_control *wc)
+                                struct walk_control *wc, int *lookup_info)
  {
        u64 bytenr;
        u64 generation;
         * for the subtree
         */
        if (wc->stage == UPDATE_BACKREF &&
-           generation <= root->root_key.offset)
+           generation <= root->root_key.offset) {
+               *lookup_info = 1;
                return 1;
+       }
  
        bytenr = btrfs_node_blockptr(path->nodes[level], path->slots[level]);
        blocksize = btrfs_level_size(root, level - 1);
        btrfs_tree_lock(next);
        btrfs_set_lock_blocking(next);
  
-       if (wc->stage == DROP_REFERENCE) {
-               ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize,
-                                              &wc->refs[level - 1],
-                                              &wc->flags[level - 1]);
-               BUG_ON(ret);
-               BUG_ON(wc->refs[level - 1] == 0);
+       ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize,
+                                      &wc->refs[level - 1],
+                                      &wc->flags[level - 1]);
+       BUG_ON(ret);
+       BUG_ON(wc->refs[level - 1] == 0);
+       *lookup_info = 0;
  
+       if (wc->stage == DROP_REFERENCE) {
                if (wc->refs[level - 1] > 1) {
+                       if (level == 1 &&
+                           (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF))
+                               goto skip;
                        if (!wc->update_ref ||
                            generation <= root->root_key.offset)
                                goto skip;
                        wc->stage = UPDATE_BACKREF;
                        wc->shared_level = level - 1;
                }
+       } else {
+               if (level == 1 &&
+                   (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF))
+                       goto skip;
        }
  
        if (!btrfs_buffer_uptodate(next, generation)) {
                btrfs_tree_unlock(next);
                free_extent_buffer(next);
                next = NULL;
+               *lookup_info = 1;
        }
  
        if (!next) {
  skip:
        wc->refs[level - 1] = 0;
        wc->flags[level - 1] = 0;
+       if (wc->stage == DROP_REFERENCE) {
+               if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
+                       parent = path->nodes[level]->start;
+               } else {
+                       BUG_ON(root->root_key.objectid !=
+                              btrfs_header_owner(path->nodes[level]));
+                       parent = 0;
+               }
  
-       if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
-               parent = path->nodes[level]->start;
-       } else {
-               BUG_ON(root->root_key.objectid !=
-                      btrfs_header_owner(path->nodes[level]));
-               parent = 0;
+               ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent,
+                                       root->root_key.objectid, level - 1, 0);
+               BUG_ON(ret);
        }
-       ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent,
-                               root->root_key.objectid, level - 1, 0);
-       BUG_ON(ret);
        btrfs_tree_unlock(next);
        free_extent_buffer(next);
+       *lookup_info = 1;
        return 1;
  }
  
@@@ -5164,6 -5299,7 +5300,7 @@@ static noinline int walk_down_tree(stru
                                   struct walk_control *wc)
  {
        int level = wc->level;
+       int lookup_info = 1;
        int ret;
  
        while (level >= 0) {
                    btrfs_header_nritems(path->nodes[level]))
                        break;
  
-               ret = walk_down_proc(trans, root, path, wc);
+               ret = walk_down_proc(trans, root, path, wc, lookup_info);
                if (ret > 0)
                        break;
  
                if (level == 0)
                        break;
  
-               ret = do_walk_down(trans, root, path, wc);
+               ret = do_walk_down(trans, root, path, wc, &lookup_info);
                if (ret > 0) {
                        path->slots[level]++;
                        continue;
diff --combined fs/btrfs/file.c
@@@ -878,7 -878,8 +878,8 @@@ again
                        btrfs_put_ordered_extent(ordered);
  
                clear_extent_bits(&BTRFS_I(inode)->io_tree, start_pos,
-                                 last_pos - 1, EXTENT_DIRTY | EXTENT_DELALLOC,
+                                 last_pos - 1, EXTENT_DIRTY | EXTENT_DELALLOC |
+                                 EXTENT_DO_ACCOUNTING,
                                  GFP_NOFS);
                unlock_extent(&BTRFS_I(inode)->io_tree,
                              start_pos, last_pos - 1, GFP_NOFS);
@@@ -1022,8 -1023,9 +1023,8 @@@ static ssize_t btrfs_file_write(struct 
                }
  
                if (will_write) {
 -                      btrfs_fdatawrite_range(inode->i_mapping, pos,
 -                                             pos + write_bytes - 1,
 -                                             WB_SYNC_ALL);
 +                      filemap_fdatawrite_range(inode->i_mapping, pos,
 +                                               pos + write_bytes - 1);
                } else {
                        balance_dirty_pages_ratelimited_nr(inode->i_mapping,
                                                           num_pages);
@@@ -1201,7 -1203,7 +1202,7 @@@ out
        return ret > 0 ? EIO : ret;
  }
  
 -static struct vm_operations_struct btrfs_file_vm_ops = {
 +static const struct vm_operations_struct btrfs_file_vm_ops = {
        .fault          = filemap_fault,
        .page_mkwrite   = btrfs_page_mkwrite,
  };
@@@ -1213,7 -1215,7 +1214,7 @@@ static int btrfs_file_mmap(struct file  
        return 0;
  }
  
 -struct file_operations btrfs_file_operations = {
 +const struct file_operations btrfs_file_operations = {
        .llseek         = generic_file_llseek,
        .read           = do_sync_read,
        .aio_read       = generic_file_aio_read,
diff --combined fs/btrfs/inode.c
@@@ -55,14 -55,14 +55,14 @@@ struct btrfs_iget_args 
        struct btrfs_root *root;
  };
  
 -static struct inode_operations btrfs_dir_inode_operations;
 -static struct inode_operations btrfs_symlink_inode_operations;
 -static struct inode_operations btrfs_dir_ro_inode_operations;
 -static struct inode_operations btrfs_special_inode_operations;
 -static struct inode_operations btrfs_file_inode_operations;
 -static struct address_space_operations btrfs_aops;
 -static struct address_space_operations btrfs_symlink_aops;
 -static struct file_operations btrfs_dir_file_operations;
 +static const struct inode_operations btrfs_dir_inode_operations;
 +static const struct inode_operations btrfs_symlink_inode_operations;
 +static const struct inode_operations btrfs_dir_ro_inode_operations;
 +static const struct inode_operations btrfs_special_inode_operations;
 +static const struct inode_operations btrfs_file_inode_operations;
 +static const struct address_space_operations btrfs_aops;
 +static const struct address_space_operations btrfs_symlink_aops;
 +static const struct file_operations btrfs_dir_file_operations;
  static struct extent_io_ops btrfs_extent_io_ops;
  
  static struct kmem_cache *btrfs_inode_cachep;
@@@ -424,9 -424,12 +424,12 @@@ again
                         * and free up our temp pages.
                         */
                        extent_clear_unlock_delalloc(inode,
-                                                    &BTRFS_I(inode)->io_tree,
-                                                    start, end, NULL, 1, 0,
-                                                    0, 1, 1, 1, 0);
+                            &BTRFS_I(inode)->io_tree,
+                            start, end, NULL,
+                            EXTENT_CLEAR_UNLOCK_PAGE | EXTENT_CLEAR_DIRTY |
+                            EXTENT_CLEAR_DELALLOC |
+                            EXTENT_CLEAR_ACCOUNTING |
+                            EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK);
                        ret = 0;
                        goto free_pages_out;
                }
@@@ -637,11 -640,14 +640,14 @@@ static noinline int submit_compressed_e
                 * clear dirty, set writeback and unlock the pages.
                 */
                extent_clear_unlock_delalloc(inode,
-                                            &BTRFS_I(inode)->io_tree,
-                                            async_extent->start,
-                                            async_extent->start +
-                                            async_extent->ram_size - 1,
-                                            NULL, 1, 1, 0, 1, 1, 0, 0);
+                               &BTRFS_I(inode)->io_tree,
+                               async_extent->start,
+                               async_extent->start +
+                               async_extent->ram_size - 1,
+                               NULL, EXTENT_CLEAR_UNLOCK_PAGE |
+                               EXTENT_CLEAR_UNLOCK |
+                               EXTENT_CLEAR_DELALLOC |
+                               EXTENT_CLEAR_DIRTY | EXTENT_SET_WRITEBACK);
  
                ret = btrfs_submit_compressed_write(inode,
                                    async_extent->start,
@@@ -712,9 -718,15 +718,15 @@@ static noinline int cow_file_range(stru
                                            start, end, 0, NULL);
                if (ret == 0) {
                        extent_clear_unlock_delalloc(inode,
-                                                    &BTRFS_I(inode)->io_tree,
-                                                    start, end, NULL, 1, 1,
-                                                    1, 1, 1, 1, 0);
+                                    &BTRFS_I(inode)->io_tree,
+                                    start, end, NULL,
+                                    EXTENT_CLEAR_UNLOCK_PAGE |
+                                    EXTENT_CLEAR_UNLOCK |
+                                    EXTENT_CLEAR_DELALLOC |
+                                    EXTENT_CLEAR_ACCOUNTING |
+                                    EXTENT_CLEAR_DIRTY |
+                                    EXTENT_SET_WRITEBACK |
+                                    EXTENT_END_WRITEBACK);
                        *nr_written = *nr_written +
                             (end - start + PAGE_CACHE_SIZE) / PAGE_CACHE_SIZE;
                        *page_started = 1;
        btrfs_drop_extent_cache(inode, start, start + num_bytes - 1, 0);
  
        while (disk_num_bytes > 0) {
+               unsigned long op;
                cur_alloc_size = min(disk_num_bytes, root->fs_info->max_extent);
                ret = btrfs_reserve_extent(trans, root, cur_alloc_size,
                                           root->sectorsize, 0, alloc_hint,
                 * Do set the Private2 bit so we know this page was properly
                 * setup for writepage
                 */
+               op = unlock ? EXTENT_CLEAR_UNLOCK_PAGE : 0;
+               op |= EXTENT_CLEAR_UNLOCK | EXTENT_CLEAR_DELALLOC |
+                       EXTENT_SET_PRIVATE2;
                extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree,
                                             start, start + ram_size - 1,
-                                            locked_page, unlock, 1,
-                                            1, 0, 0, 0, 1);
+                                            locked_page, op);
                disk_num_bytes -= cur_alloc_size;
                num_bytes -= cur_alloc_size;
                alloc_hint = ins.objectid + ins.offset;
@@@ -864,8 -881,8 +881,8 @@@ static int cow_file_range_async(struct 
        u64 cur_end;
        int limit = 10 * 1024 * 1042;
  
-       clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, EXTENT_LOCKED |
-                        EXTENT_DELALLOC, 1, 0, NULL, GFP_NOFS);
+       clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, EXTENT_LOCKED,
+                        1, 0, NULL, GFP_NOFS);
        while (start < end) {
                async_cow = kmalloc(sizeof(*async_cow), GFP_NOFS);
                async_cow->inode = inode;
@@@ -1006,6 -1023,7 +1023,7 @@@ next_slot
  
                if (found_key.offset > cur_offset) {
                        extent_end = found_key.offset;
+                       extent_type = 0;
                        goto out_check;
                }
  
@@@ -1112,8 -1130,10 +1130,10 @@@ out_check
                BUG_ON(ret);
  
                extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree,
-                                       cur_offset, cur_offset + num_bytes - 1,
-                                       locked_page, 1, 1, 1, 0, 0, 0, 1);
+                               cur_offset, cur_offset + num_bytes - 1,
+                               locked_page, EXTENT_CLEAR_UNLOCK_PAGE |
+                               EXTENT_CLEAR_UNLOCK | EXTENT_CLEAR_DELALLOC |
+                               EXTENT_SET_PRIVATE2);
                cur_offset = extent_end;
                if (cur_offset > end)
                        break;
@@@ -1178,15 -1198,17 +1198,17 @@@ static int btrfs_split_extent_hook(stru
                                        root->fs_info->max_extent);
  
                /*
-                * if we break a large extent up then leave delalloc_extents be,
-                * since we've already accounted for the large extent.
+                * if we break a large extent up then leave oustanding_extents
+                * be, since we've already accounted for the large extent.
                 */
                if (div64_u64(new_size + root->fs_info->max_extent - 1,
                              root->fs_info->max_extent) < num_extents)
                        return 0;
        }
  
-       BTRFS_I(inode)->delalloc_extents++;
+       spin_lock(&BTRFS_I(inode)->accounting_lock);
+       BTRFS_I(inode)->outstanding_extents++;
+       spin_unlock(&BTRFS_I(inode)->accounting_lock);
  
        return 0;
  }
@@@ -1217,7 -1239,9 +1239,9 @@@ static int btrfs_merge_extent_hook(stru
  
        /* we're not bigger than the max, unreserve the space and go */
        if (new_size <= root->fs_info->max_extent) {
-               BTRFS_I(inode)->delalloc_extents--;
+               spin_lock(&BTRFS_I(inode)->accounting_lock);
+               BTRFS_I(inode)->outstanding_extents--;
+               spin_unlock(&BTRFS_I(inode)->accounting_lock);
                return 0;
        }
  
                      root->fs_info->max_extent) > num_extents)
                return 0;
  
-       BTRFS_I(inode)->delalloc_extents--;
+       spin_lock(&BTRFS_I(inode)->accounting_lock);
+       BTRFS_I(inode)->outstanding_extents--;
+       spin_unlock(&BTRFS_I(inode)->accounting_lock);
  
        return 0;
  }
@@@ -1253,7 -1279,9 +1279,9 @@@ static int btrfs_set_bit_hook(struct in
        if (!(old & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) {
                struct btrfs_root *root = BTRFS_I(inode)->root;
  
-               BTRFS_I(inode)->delalloc_extents++;
+               spin_lock(&BTRFS_I(inode)->accounting_lock);
+               BTRFS_I(inode)->outstanding_extents++;
+               spin_unlock(&BTRFS_I(inode)->accounting_lock);
                btrfs_delalloc_reserve_space(root, inode, end - start + 1);
                spin_lock(&root->fs_info->delalloc_lock);
                BTRFS_I(inode)->delalloc_bytes += end - start + 1;
@@@ -1281,8 -1309,12 +1309,12 @@@ static int btrfs_clear_bit_hook(struct 
        if ((state->state & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) {
                struct btrfs_root *root = BTRFS_I(inode)->root;
  
-               BTRFS_I(inode)->delalloc_extents--;
-               btrfs_unreserve_metadata_for_delalloc(root, inode, 1);
+               if (bits & EXTENT_DO_ACCOUNTING) {
+                       spin_lock(&BTRFS_I(inode)->accounting_lock);
+                       BTRFS_I(inode)->outstanding_extents--;
+                       spin_unlock(&BTRFS_I(inode)->accounting_lock);
+                       btrfs_unreserve_metadata_for_delalloc(root, inode, 1);
+               }
  
                spin_lock(&root->fs_info->delalloc_lock);
                if (state->end - state->start + 1 >
@@@ -3598,12 -3630,14 +3630,14 @@@ static int btrfs_dentry_delete(struct d
  {
        struct btrfs_root *root;
  
-       if (!dentry->d_inode)
-               return 0;
+       if (!dentry->d_inode && !IS_ROOT(dentry))
+               dentry = dentry->d_parent;
  
-       root = BTRFS_I(dentry->d_inode)->root;
-       if (btrfs_root_refs(&root->root_item) == 0)
-               return 1;
+       if (dentry->d_inode) {
+               root = BTRFS_I(dentry->d_inode)->root;
+               if (btrfs_root_refs(&root->root_item) == 0)
+                       return 1;
+       }
        return 0;
  }
  
@@@ -4808,7 -4842,8 +4842,8 @@@ static void btrfs_invalidatepage(struc
                 */
                clear_extent_bit(tree, page_start, page_end,
                                 EXTENT_DIRTY | EXTENT_DELALLOC |
-                                EXTENT_LOCKED, 1, 0, NULL, GFP_NOFS);
+                                EXTENT_LOCKED | EXTENT_DO_ACCOUNTING, 1, 0,
+                                NULL, GFP_NOFS);
                /*
                 * whoever cleared the private bit is responsible
                 * for the finish_ordered_io
                lock_extent(tree, page_start, page_end, GFP_NOFS);
        }
        clear_extent_bit(tree, page_start, page_end,
-                EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC,
-                1, 1, NULL, GFP_NOFS);
+                EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC |
+                EXTENT_DO_ACCOUNTING, 1, 1, NULL, GFP_NOFS);
        __btrfs_releasepage(page, GFP_NOFS);
  
        ClearPageChecked(page);
@@@ -4917,7 -4952,8 +4952,8 @@@ again
         * prepare_pages in the normal write path.
         */
        clear_extent_bits(&BTRFS_I(inode)->io_tree, page_start, page_end,
-                         EXTENT_DIRTY | EXTENT_DELALLOC, GFP_NOFS);
+                         EXTENT_DIRTY | EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING,
+                         GFP_NOFS);
  
        ret = btrfs_set_extent_delalloc(inode, page_start, page_end);
        if (ret) {
@@@ -5065,8 -5101,9 +5101,9 @@@ struct inode *btrfs_alloc_inode(struct 
                return NULL;
        ei->last_trans = 0;
        ei->logged_trans = 0;
-       ei->delalloc_extents = 0;
-       ei->delalloc_reserved_extents = 0;
+       ei->outstanding_extents = 0;
+       ei->reserved_extents = 0;
+       spin_lock_init(&ei->accounting_lock);
        btrfs_ordered_inode_tree_init(&ei->ordered_tree);
        INIT_LIST_HEAD(&ei->i_orphan);
        INIT_LIST_HEAD(&ei->ordered_operations);
@@@ -5692,7 -5729,7 +5729,7 @@@ static int btrfs_permission(struct inod
        return generic_permission(inode, mask, btrfs_check_acl);
  }
  
 -static struct inode_operations btrfs_dir_inode_operations = {
 +static const struct inode_operations btrfs_dir_inode_operations = {
        .getattr        = btrfs_getattr,
        .lookup         = btrfs_lookup,
        .create         = btrfs_create,
        .removexattr    = btrfs_removexattr,
        .permission     = btrfs_permission,
  };
 -static struct inode_operations btrfs_dir_ro_inode_operations = {
 +static const struct inode_operations btrfs_dir_ro_inode_operations = {
        .lookup         = btrfs_lookup,
        .permission     = btrfs_permission,
  };
  
 -static struct file_operations btrfs_dir_file_operations = {
 +static const struct file_operations btrfs_dir_file_operations = {
        .llseek         = generic_file_llseek,
        .read           = generic_read_dir,
        .readdir        = btrfs_real_readdir,
@@@ -5753,7 -5790,7 +5790,7 @@@ static struct extent_io_ops btrfs_exten
   *
   * For now we're avoiding this by dropping bmap.
   */
 -static struct address_space_operations btrfs_aops = {
 +static const struct address_space_operations btrfs_aops = {
        .readpage       = btrfs_readpage,
        .writepage      = btrfs_writepage,
        .writepages     = btrfs_writepages,
        .invalidatepage = btrfs_invalidatepage,
        .releasepage    = btrfs_releasepage,
        .set_page_dirty = btrfs_set_page_dirty,
 +      .error_remove_page = generic_error_remove_page,
  };
  
 -static struct address_space_operations btrfs_symlink_aops = {
 +static const struct address_space_operations btrfs_symlink_aops = {
        .readpage       = btrfs_readpage,
        .writepage      = btrfs_writepage,
        .invalidatepage = btrfs_invalidatepage,
        .releasepage    = btrfs_releasepage,
  };
  
 -static struct inode_operations btrfs_file_inode_operations = {
 +static const struct inode_operations btrfs_file_inode_operations = {
        .truncate       = btrfs_truncate,
        .getattr        = btrfs_getattr,
        .setattr        = btrfs_setattr,
        .fallocate      = btrfs_fallocate,
        .fiemap         = btrfs_fiemap,
  };
 -static struct inode_operations btrfs_special_inode_operations = {
 +static const struct inode_operations btrfs_special_inode_operations = {
        .getattr        = btrfs_getattr,
        .setattr        = btrfs_setattr,
        .permission     = btrfs_permission,
        .listxattr      = btrfs_listxattr,
        .removexattr    = btrfs_removexattr,
  };
 -static struct inode_operations btrfs_symlink_inode_operations = {
 +static const struct inode_operations btrfs_symlink_inode_operations = {
        .readlink       = generic_readlink,
        .follow_link    = page_follow_link_light,
        .put_link       = page_put_link,
        .removexattr    = btrfs_removexattr,
  };
  
- struct dentry_operations btrfs_dentry_operations = {
const struct dentry_operations btrfs_dentry_operations = {
        .d_delete       = btrfs_dentry_delete,
  };
diff --combined fs/btrfs/ordered-data.c
@@@ -306,6 -306,12 +306,12 @@@ int btrfs_remove_ordered_extent(struct 
        tree->last = NULL;
        set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags);
  
+       spin_lock(&BTRFS_I(inode)->accounting_lock);
+       BTRFS_I(inode)->outstanding_extents--;
+       spin_unlock(&BTRFS_I(inode)->accounting_lock);
+       btrfs_unreserve_metadata_for_delalloc(BTRFS_I(inode)->root,
+                                             inode, 1);
        spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock);
        list_del_init(&entry->root_extent_list);
  
@@@ -458,7 -464,7 +464,7 @@@ void btrfs_start_ordered_extent(struct 
         * start IO on any dirty ones so the wait doesn't stall waiting
         * for pdflush to find them
         */
 -      btrfs_fdatawrite_range(inode->i_mapping, start, end, WB_SYNC_ALL);
 +      filemap_fdatawrite_range(inode->i_mapping, start, end);
        if (wait) {
                wait_event(entry->wait, test_bit(BTRFS_ORDERED_COMPLETE,
                                                 &entry->flags));
@@@ -488,15 -494,17 +494,15 @@@ again
        /* start IO across the range first to instantiate any delalloc
         * extents
         */
 -      btrfs_fdatawrite_range(inode->i_mapping, start, orig_end, WB_SYNC_ALL);
 +      filemap_fdatawrite_range(inode->i_mapping, start, orig_end);
  
        /* The compression code will leave pages locked but return from
         * writepage without setting the page writeback.  Starting again
         * with WB_SYNC_ALL will end up waiting for the IO to actually start.
         */
 -      btrfs_fdatawrite_range(inode->i_mapping, start, orig_end, WB_SYNC_ALL);
 +      filemap_fdatawrite_range(inode->i_mapping, start, orig_end);
  
 -      btrfs_wait_on_page_writeback_range(inode->i_mapping,
 -                                         start >> PAGE_CACHE_SHIFT,
 -                                         orig_end >> PAGE_CACHE_SHIFT);
 +      filemap_fdatawait_range(inode->i_mapping, start, orig_end);
  
        end = orig_end;
        found = 0;
@@@ -714,6 -722,90 +720,6 @@@ out
  }
  
  
 -/**
 - * taken from mm/filemap.c because it isn't exported
 - *
 - * __filemap_fdatawrite_range - start writeback on mapping dirty pages in range
 - * @mapping:  address space structure to write
 - * @start:    offset in bytes where the range starts
 - * @end:      offset in bytes where the range ends (inclusive)
 - * @sync_mode:        enable synchronous operation
 - *
 - * Start writeback against all of a mapping's dirty pages that lie
 - * within the byte offsets <start, end> inclusive.
 - *
 - * If sync_mode is WB_SYNC_ALL then this is a "data integrity" operation, as
 - * opposed to a regular memory cleansing writeback.  The difference between
 - * these two operations is that if a dirty page/buffer is encountered, it must
 - * be waited upon, and not just skipped over.
 - */
 -int btrfs_fdatawrite_range(struct address_space *mapping, loff_t start,
 -                         loff_t end, int sync_mode)
 -{
 -      struct writeback_control wbc = {
 -              .sync_mode = sync_mode,
 -              .nr_to_write = mapping->nrpages * 2,
 -              .range_start = start,
 -              .range_end = end,
 -              .for_writepages = 1,
 -      };
 -      return btrfs_writepages(mapping, &wbc);
 -}
 -
 -/**
 - * taken from mm/filemap.c because it isn't exported
 - *
 - * wait_on_page_writeback_range - wait for writeback to complete
 - * @mapping:  target address_space
 - * @start:    beginning page index
 - * @end:      ending page index
 - *
 - * Wait for writeback to complete against pages indexed by start->end
 - * inclusive
 - */
 -int btrfs_wait_on_page_writeback_range(struct address_space *mapping,
 -                                     pgoff_t start, pgoff_t end)
 -{
 -      struct pagevec pvec;
 -      int nr_pages;
 -      int ret = 0;
 -      pgoff_t index;
 -
 -      if (end < start)
 -              return 0;
 -
 -      pagevec_init(&pvec, 0);
 -      index = start;
 -      while ((index <= end) &&
 -                      (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
 -                      PAGECACHE_TAG_WRITEBACK,
 -                      min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1)) != 0) {
 -              unsigned i;
 -
 -              for (i = 0; i < nr_pages; i++) {
 -                      struct page *page = pvec.pages[i];
 -
 -                      /* until radix tree lookup accepts end_index */
 -                      if (page->index > end)
 -                              continue;
 -
 -                      wait_on_page_writeback(page);
 -                      if (PageError(page))
 -                              ret = -EIO;
 -              }
 -              pagevec_release(&pvec);
 -              cond_resched();
 -      }
 -
 -      /* Check for outstanding write errors */
 -      if (test_and_clear_bit(AS_ENOSPC, &mapping->flags))
 -              ret = -ENOSPC;
 -      if (test_and_clear_bit(AS_EIO, &mapping->flags))
 -              ret = -EIO;
 -
 -      return ret;
 -}
 -
  /*
   * add a given inode to the list of inodes that must be fully on
   * disk before a transaction commit finishes.
diff --combined fs/btrfs/tree-log.c
@@@ -137,11 -137,20 +137,20 @@@ static int start_log_trans(struct btrfs
  
        mutex_lock(&root->log_mutex);
        if (root->log_root) {
+               if (!root->log_start_pid) {
+                       root->log_start_pid = current->pid;
+                       root->log_multiple_pids = false;
+               } else if (root->log_start_pid != current->pid) {
+                       root->log_multiple_pids = true;
+               }
                root->log_batch++;
                atomic_inc(&root->log_writers);
                mutex_unlock(&root->log_mutex);
                return 0;
        }
+       root->log_multiple_pids = false;
+       root->log_start_pid = current->pid;
        mutex_lock(&root->fs_info->tree_log_mutex);
        if (!root->fs_info->log_root_tree) {
                ret = btrfs_init_log_root_tree(trans, root->fs_info);
@@@ -1985,7 -1994,7 +1994,7 @@@ int btrfs_sync_log(struct btrfs_trans_h
        if (atomic_read(&root->log_commit[(index1 + 1) % 2]))
                wait_log_commit(trans, root, root->log_transid - 1);
  
-       while (1) {
+       while (root->log_multiple_pids) {
                unsigned long batch = root->log_batch;
                mutex_unlock(&root->log_mutex);
                schedule_timeout_uninterruptible(1);
        root->log_batch = 0;
        root->log_transid++;
        log->log_transid = root->log_transid;
+       root->log_start_pid = 0;
        smp_mb();
        /*
         * log tree has been flushed to disk, new modifications of
@@@ -2605,7 -2615,7 +2615,7 @@@ static noinline int copy_items(struct b
                                                                extent);
                                cs = btrfs_file_extent_offset(src, extent);
                                cl = btrfs_file_extent_num_bytes(src,
 -                                                              extent);;
 +                                                              extent);
                                if (btrfs_file_extent_compression(src,
                                                                  extent)) {
                                        cs = 0;