Btrfs: Add fallocate support v2
authorYan Zheng <zheng.yan@oracle.com>
Thu, 30 Oct 2008 18:25:28 +0000 (14:25 -0400)
committerChris Mason <chris.mason@oracle.com>
Thu, 30 Oct 2008 18:25:28 +0000 (14:25 -0400)
This patch updates btrfs-progs for fallocate support.

fallocate is a little different in Btrfs because we need to tell the
COW system that a given preallocated extent doesn't need to be
cow'd as long as there are no snapshots of it.  This leverages the
-o nodatacow checks.

Signed-off-by: Yan Zheng <zheng.yan@oracle.com>
fs/btrfs/ctree.h
fs/btrfs/extent-tree.c
fs/btrfs/extent_io.c
fs/btrfs/extent_map.h
fs/btrfs/file.c
fs/btrfs/inode.c
fs/btrfs/ioctl.c
fs/btrfs/ordered-data.c
fs/btrfs/ordered-data.h
fs/btrfs/tree-log.c

index 8bf6a08..d5ba3d1 100644 (file)
@@ -462,8 +462,9 @@ struct btrfs_root_item {
        u8 level;
 } __attribute__ ((__packed__));
 
-#define BTRFS_FILE_EXTENT_REG 0
-#define BTRFS_FILE_EXTENT_INLINE 1
+#define BTRFS_FILE_EXTENT_INLINE 0
+#define BTRFS_FILE_EXTENT_REG 1
+#define BTRFS_FILE_EXTENT_PREALLOC 2
 
 struct btrfs_file_extent_item {
        /*
@@ -868,6 +869,7 @@ struct btrfs_root {
 #define BTRFS_INODE_NODATACOW          (1 << 1)
 #define BTRFS_INODE_READONLY           (1 << 2)
 #define BTRFS_INODE_NOCOMPRESS         (1 << 3)
+#define BTRFS_INODE_PREALLOC           (1 << 4)
 #define btrfs_clear_flag(inode, flag)  (BTRFS_I(inode)->flags &= \
                                         ~BTRFS_INODE_##flag)
 #define btrfs_set_flag(inode, flag)    (BTRFS_I(inode)->flags |= \
@@ -1924,6 +1926,9 @@ extern struct file_operations btrfs_file_operations;
 int btrfs_drop_extents(struct btrfs_trans_handle *trans,
                       struct btrfs_root *root, struct inode *inode,
                       u64 start, u64 end, u64 inline_limit, u64 *hint_block);
+int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
+                             struct btrfs_root *root,
+                             struct inode *inode, u64 start, u64 end);
 int btrfs_release_file(struct inode *inode, struct file *file);
 
 /* tree-defrag.c */
index 1eb69a9..8af3952 100644 (file)
@@ -2147,6 +2147,9 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans,
        total_needed += empty_size;
 
        block_group = btrfs_lookup_block_group(root->fs_info, search_start);
+       if (!block_group)
+               block_group = btrfs_lookup_first_block_group(root->fs_info,
+                                                            search_start);
        space_info = __find_space_info(root->fs_info, data);
 
        down_read(&space_info->groups_sem);
@@ -3426,9 +3429,7 @@ walk_down:
 next:
                level--;
                btrfs_release_path(extent_root, path);
-               if (need_resched()) {
-                       cond_resched();
-               }
+               cond_resched();
        }
        /* reached lowest level */
        ret = 1;
@@ -3539,9 +3540,7 @@ found:
                }
 
                btrfs_release_path(extent_root, path);
-               if (need_resched()) {
-                       cond_resched();
-               }
+               cond_resched();
        }
        /* reached max tree level, but no tree root found. */
        BUG();
@@ -3654,8 +3653,9 @@ static int noinline get_new_locations(struct inode *reloc_inode,
                exts[nr].encryption = btrfs_file_extent_encryption(leaf, fi);
                exts[nr].other_encoding = btrfs_file_extent_other_encoding(leaf,
                                                                           fi);
-               WARN_ON(exts[nr].offset > 0);
-               WARN_ON(exts[nr].num_bytes != exts[nr].disk_num_bytes);
+               BUG_ON(exts[nr].offset > 0);
+               BUG_ON(exts[nr].compression || exts[nr].encryption);
+               BUG_ON(exts[nr].num_bytes != exts[nr].disk_num_bytes);
 
                cur_pos += exts[nr].num_bytes;
                nr++;
@@ -3709,6 +3709,7 @@ static int noinline replace_one_extent(struct btrfs_trans_handle *trans,
        u32 nritems;
        int nr_scaned = 0;
        int extent_locked = 0;
+       int extent_type;
        int ret;
 
        memcpy(&key, leaf_key, sizeof(key));
@@ -3781,8 +3782,9 @@ next:
                }
                fi = btrfs_item_ptr(leaf, path->slots[0],
                                    struct btrfs_file_extent_item);
-               if ((btrfs_file_extent_type(leaf, fi) !=
-                    BTRFS_FILE_EXTENT_REG) ||
+               extent_type = btrfs_file_extent_type(leaf, fi);
+               if ((extent_type != BTRFS_FILE_EXTENT_REG &&
+                    extent_type != BTRFS_FILE_EXTENT_PREALLOC) ||
                    (btrfs_file_extent_disk_bytenr(leaf, fi) !=
                     extent_key->objectid)) {
                        path->slots[0]++;
@@ -3865,16 +3867,10 @@ next:
 
                if (nr_extents == 1) {
                        /* update extent pointer in place */
-                       btrfs_set_file_extent_generation(leaf, fi,
-                                               trans->transid);
                        btrfs_set_file_extent_disk_bytenr(leaf, fi,
                                                new_extents[0].disk_bytenr);
                        btrfs_set_file_extent_disk_num_bytes(leaf, fi,
                                                new_extents[0].disk_num_bytes);
-                       btrfs_set_file_extent_ram_bytes(leaf, fi,
-                                               new_extents[0].ram_bytes);
-                       ext_offset += new_extents[0].offset;
-                       btrfs_set_file_extent_offset(leaf, fi, ext_offset);
                        btrfs_mark_buffer_dirty(leaf);
 
                        btrfs_drop_extent_cache(inode, key.offset,
@@ -3901,6 +3897,8 @@ next:
                        btrfs_release_path(root, path);
                        key.offset += num_bytes;
                } else {
+                       BUG_ON(1);
+#if 0
                        u64 alloc_hint;
                        u64 extent_len;
                        int i;
@@ -3977,6 +3975,7 @@ next:
                                        break;
                        }
                        BUG_ON(i >= nr_extents);
+#endif
                }
 
                if (extent_locked) {
@@ -4156,15 +4155,10 @@ static int noinline replace_extents_in_leaf(struct btrfs_trans_handle *trans,
                ref->extents[ext_index].bytenr = new_extent->disk_bytenr;
                ref->extents[ext_index].num_bytes = new_extent->disk_num_bytes;
 
-               btrfs_set_file_extent_generation(leaf, fi, trans->transid);
-               btrfs_set_file_extent_ram_bytes(leaf, fi,
-                                               new_extent->ram_bytes);
                btrfs_set_file_extent_disk_bytenr(leaf, fi,
                                                new_extent->disk_bytenr);
                btrfs_set_file_extent_disk_num_bytes(leaf, fi,
                                                new_extent->disk_num_bytes);
-               new_extent->offset += btrfs_file_extent_offset(leaf, fi);
-               btrfs_set_file_extent_offset(leaf, fi, new_extent->offset);
                btrfs_mark_buffer_dirty(leaf);
 
                ret = btrfs_inc_extent_ref(trans, root,
@@ -4625,12 +4619,15 @@ static int noinline relocate_one_extent(struct btrfs_root *extent_root,
                         */
                        if (!new_extents) {
                                u64 group_start = group->key.objectid;
+                               new_extents = kmalloc(sizeof(*new_extents),
+                                                     GFP_NOFS);
+                               nr_extents = 1;
                                ret = get_new_locations(reloc_inode,
                                                        extent_key,
-                                                       group_start, 0,
+                                                       group_start, 1,
                                                        &new_extents,
                                                        &nr_extents);
-                               if (ret < 0)
+                               if (ret)
                                        goto out;
                        }
                        btrfs_record_root_in_trans(found_root);
@@ -4762,7 +4759,8 @@ static int __insert_orphan_inode(struct btrfs_trans_handle *trans,
        btrfs_set_inode_generation(leaf, item, 1);
        btrfs_set_inode_size(leaf, item, size);
        btrfs_set_inode_mode(leaf, item, S_IFREG | 0600);
-       btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NODATASUM);
+       btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NODATASUM |
+                                         BTRFS_INODE_NOCOMPRESS);
        btrfs_mark_buffer_dirty(leaf);
        btrfs_release_path(root, path);
 out:
@@ -4835,6 +4833,7 @@ int btrfs_relocate_block_group(struct btrfs_root *root, u64 group_start)
        struct inode *reloc_inode;
        struct btrfs_block_group_cache *block_group;
        struct btrfs_key key;
+       u64 skipped;
        u64 cur_byte;
        u64 total_found;
        u32 nritems;
@@ -4864,6 +4863,7 @@ int btrfs_relocate_block_group(struct btrfs_root *root, u64 group_start)
        btrfs_start_delalloc_inodes(info->tree_root);
        btrfs_wait_ordered_extents(info->tree_root, 0);
 again:
+       skipped = 0;
        total_found = 0;
        progress = 0;
        key.objectid = block_group->key.objectid;
@@ -4926,6 +4926,8 @@ next:
                ret = relocate_one_extent(root, path, &key, block_group,
                                          reloc_inode, pass);
                BUG_ON(ret < 0);
+               if (ret > 0)
+                       skipped++;
 
                key.objectid = cur_byte;
                key.type = 0;
@@ -4944,6 +4946,11 @@ next:
                printk("btrfs found %llu extents in pass %d\n",
                       (unsigned long long)total_found, pass);
                pass++;
+               if (total_found == skipped && pass > 2) {
+                       iput(reloc_inode);
+                       reloc_inode = create_reloc_inode(info, block_group);
+                       pass = 0;
+               }
                goto again;
        }
 
@@ -5011,17 +5018,17 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
        while ((n = rb_last(&info->block_group_cache_tree)) != NULL) {
                block_group = rb_entry(n, struct btrfs_block_group_cache,
                                       cache_node);
-
-               spin_unlock(&info->block_group_cache_lock);
-               btrfs_remove_free_space_cache(block_group);
-               spin_lock(&info->block_group_cache_lock);
-
                rb_erase(&block_group->cache_node,
                         &info->block_group_cache_tree);
+               spin_unlock(&info->block_group_cache_lock);
+
+               btrfs_remove_free_space_cache(block_group);
                down_write(&block_group->space_info->groups_sem);
                list_del(&block_group->list);
                up_write(&block_group->space_info->groups_sem);
                kfree(block_group);
+
+               spin_lock(&info->block_group_cache_lock);
        }
        spin_unlock(&info->block_group_cache_lock);
        return 0;
index 65a0583..eb3c12e 100644 (file)
@@ -2015,6 +2015,8 @@ printk("2bad mapping end %Lu cur %Lu\n", end, cur);
                }
                bdev = em->bdev;
                block_start = em->block_start;
+               if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
+                       block_start = EXTENT_MAP_HOLE;
                free_extent_map(em);
                em = NULL;
 
@@ -2769,14 +2771,18 @@ sector_t extent_bmap(struct address_space *mapping, sector_t iblock,
        struct inode *inode = mapping->host;
        u64 start = iblock << inode->i_blkbits;
        sector_t sector = 0;
+       size_t blksize = (1 << inode->i_blkbits);
        struct extent_map *em;
 
-       em = get_extent(inode, NULL, 0, start, (1 << inode->i_blkbits), 0);
+       lock_extent(&BTRFS_I(inode)->io_tree, start, start + blksize - 1,
+                   GFP_NOFS);
+       em = get_extent(inode, NULL, 0, start, blksize, 0);
+       unlock_extent(&BTRFS_I(inode)->io_tree, start, start + blksize - 1,
+                     GFP_NOFS);
        if (!em || IS_ERR(em))
                return 0;
 
-       if (em->block_start == EXTENT_MAP_INLINE ||
-           em->block_start == EXTENT_MAP_HOLE)
+       if (em->block_start > EXTENT_MAP_LAST_BYTE)
                goto out;
 
        sector = (em->block_start + start - em->start) >> inode->i_blkbits;
index e693e1b..accfeda 100644 (file)
@@ -12,6 +12,7 @@
 #define EXTENT_FLAG_PINNED 0 /* this entry not yet on disk, don't free it */
 #define EXTENT_FLAG_COMPRESSED 1
 #define EXTENT_FLAG_VACANCY 2 /* no file extent item found */
+#define EXTENT_FLAG_PREALLOC 3 /* pre-allocated extent */
 
 struct extent_map {
        struct rb_node rb_node;
index 1a0510a..238a8e2 100644 (file)
@@ -381,7 +381,7 @@ int noinline btrfs_drop_extents(struct btrfs_trans_handle *trans,
        int keep;
        int slot;
        int bookend;
-       int found_type;
+       int found_type = 0;
        int found_extent;
        int found_inline;
        int recow;
@@ -442,7 +442,8 @@ next_slot:
                                                                  extent);
                        other_encoding = btrfs_file_extent_other_encoding(leaf,
                                                                  extent);
-                       if (found_type == BTRFS_FILE_EXTENT_REG) {
+                       if (found_type == BTRFS_FILE_EXTENT_REG ||
+                           found_type == BTRFS_FILE_EXTENT_PREALLOC) {
                                extent_end =
                                     btrfs_file_extent_disk_bytenr(leaf,
                                                                   extent);
@@ -609,8 +610,7 @@ next_slot:
                         */
                        btrfs_set_file_extent_ram_bytes(leaf, extent,
                                                        ram_bytes);
-                       btrfs_set_file_extent_type(leaf, extent,
-                                                  BTRFS_FILE_EXTENT_REG);
+                       btrfs_set_file_extent_type(leaf, extent, found_type);
 
                        btrfs_mark_buffer_dirty(path->nodes[0]);
 
@@ -661,6 +661,243 @@ out:
        return ret;
 }
 
+static int extent_mergeable(struct extent_buffer *leaf, int slot,
+                           u64 objectid, u64 bytenr, u64 *start, u64 *end)
+{
+       struct btrfs_file_extent_item *fi;
+       struct btrfs_key key;
+       u64 extent_end;
+
+       if (slot < 0 || slot >= btrfs_header_nritems(leaf))
+               return 0;
+
+       btrfs_item_key_to_cpu(leaf, &key, slot);
+       if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
+               return 0;
+
+       fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
+       if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_REG ||
+           btrfs_file_extent_disk_bytenr(leaf, fi) != bytenr ||
+           btrfs_file_extent_compression(leaf, fi) ||
+           btrfs_file_extent_encryption(leaf, fi) ||
+           btrfs_file_extent_other_encoding(leaf, fi))
+               return 0;
+
+       extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi);
+       if ((*start && *start != key.offset) || (*end && *end != extent_end))
+               return 0;
+
+       *start = key.offset;
+       *end = extent_end;
+       return 1;
+}
+
+/*
+ * Mark extent in the range start - end as written.
+ *
+ * This changes extent type from 'pre-allocated' to 'regular'. If only
+ * part of extent is marked as written, the extent will be split into
+ * two or three.
+ */
+int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
+                             struct btrfs_root *root,
+                             struct inode *inode, u64 start, u64 end)
+{
+       struct extent_buffer *leaf;
+       struct btrfs_path *path;
+       struct btrfs_file_extent_item *fi;
+       struct btrfs_key key;
+       u64 bytenr;
+       u64 num_bytes;
+       u64 extent_end;
+       u64 extent_offset;
+       u64 other_start;
+       u64 other_end;
+       u64 split = start;
+       u64 locked_end = end;
+       int extent_type;
+       int split_end = 1;
+       int ret;
+
+       btrfs_drop_extent_cache(inode, start, end - 1, 0);
+
+       path = btrfs_alloc_path();
+       BUG_ON(!path);
+again:
+       key.objectid = inode->i_ino;
+       key.type = BTRFS_EXTENT_DATA_KEY;
+       if (split == start)
+               key.offset = split;
+       else
+               key.offset = split - 1;
+
+       ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
+       if (ret > 0 && path->slots[0] > 0)
+               path->slots[0]--;
+
+       leaf = path->nodes[0];
+       btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
+       BUG_ON(key.objectid != inode->i_ino ||
+              key.type != BTRFS_EXTENT_DATA_KEY);
+       fi = btrfs_item_ptr(leaf, path->slots[0],
+                           struct btrfs_file_extent_item);
+       extent_type = btrfs_file_extent_type(leaf, fi);
+       BUG_ON(extent_type != BTRFS_FILE_EXTENT_PREALLOC);
+       extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi);
+       BUG_ON(key.offset > start || extent_end < end);
+
+       bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
+       num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
+       extent_offset = btrfs_file_extent_offset(leaf, fi);
+
+       if (key.offset == start)
+               split = end;
+
+       if (key.offset == start && extent_end == end) {
+               int del_nr = 0;
+               int del_slot = 0;
+               u64 leaf_owner = btrfs_header_owner(leaf);
+               u64 leaf_gen = btrfs_header_generation(leaf);
+               other_start = end;
+               other_end = 0;
+               if (extent_mergeable(leaf, path->slots[0] + 1, inode->i_ino,
+                                    bytenr, &other_start, &other_end)) {
+                       extent_end = other_end;
+                       del_slot = path->slots[0] + 1;
+                       del_nr++;
+                       ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
+                                               leaf->start, leaf_owner,
+                                               leaf_gen, inode->i_ino, 0);
+                       BUG_ON(ret);
+               }
+               other_start = 0;
+               other_end = start;
+               if (extent_mergeable(leaf, path->slots[0] - 1, inode->i_ino,
+                                    bytenr, &other_start, &other_end)) {
+                       key.offset = other_start;
+                       del_slot = path->slots[0];
+                       del_nr++;
+                       ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
+                                               leaf->start, leaf_owner,
+                                               leaf_gen, inode->i_ino, 0);
+                       BUG_ON(ret);
+               }
+               split_end = 0;
+               if (del_nr == 0) {
+                       btrfs_set_file_extent_type(leaf, fi,
+                                                  BTRFS_FILE_EXTENT_REG);
+                       goto done;
+               }
+
+               fi = btrfs_item_ptr(leaf, del_slot - 1,
+                                   struct btrfs_file_extent_item);
+               btrfs_set_file_extent_type(leaf, fi, BTRFS_FILE_EXTENT_REG);
+               btrfs_set_file_extent_num_bytes(leaf, fi,
+                                               extent_end - key.offset);
+               btrfs_mark_buffer_dirty(leaf);
+
+               ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
+               BUG_ON(ret);
+               goto done;
+       } else if (split == start) {
+               if (locked_end < extent_end) {
+                       ret = try_lock_extent(&BTRFS_I(inode)->io_tree,
+                                       locked_end, extent_end - 1, GFP_NOFS);
+                       if (!ret) {
+                               btrfs_release_path(root, path);
+                               lock_extent(&BTRFS_I(inode)->io_tree,
+                                       locked_end, extent_end - 1, GFP_NOFS);
+                               locked_end = extent_end;
+                               goto again;
+                       }
+                       locked_end = extent_end;
+               }
+               btrfs_set_file_extent_num_bytes(leaf, fi, split - key.offset);
+               extent_offset += split - key.offset;
+       } else  {
+               BUG_ON(key.offset != start);
+               btrfs_set_file_extent_offset(leaf, fi, extent_offset +
+                                            split - key.offset);
+               btrfs_set_file_extent_num_bytes(leaf, fi, extent_end - split);
+               key.offset = split;
+               btrfs_set_item_key_safe(trans, root, path, &key);
+               extent_end = split;
+       }
+
+       if (extent_end == end) {
+               split_end = 0;
+               extent_type = BTRFS_FILE_EXTENT_REG;
+       }
+       if (extent_end == end && split == start) {
+               other_start = end;
+               other_end = 0;
+               if (extent_mergeable(leaf, path->slots[0] + 1, inode->i_ino,
+                                    bytenr, &other_start, &other_end)) {
+                       path->slots[0]++;
+                       fi = btrfs_item_ptr(leaf, path->slots[0],
+                                           struct btrfs_file_extent_item);
+                       key.offset = split;
+                       btrfs_set_item_key_safe(trans, root, path, &key);
+                       btrfs_set_file_extent_offset(leaf, fi, extent_offset);
+                       btrfs_set_file_extent_num_bytes(leaf, fi,
+                                                       other_end - split);
+                       goto done;
+               }
+       }
+       if (extent_end == end && split == end) {
+               other_start = 0;
+               other_end = start;
+               if (extent_mergeable(leaf, path->slots[0] - 1 , inode->i_ino,
+                                    bytenr, &other_start, &other_end)) {
+                       path->slots[0]--;
+                       fi = btrfs_item_ptr(leaf, path->slots[0],
+                                           struct btrfs_file_extent_item);
+                       btrfs_set_file_extent_num_bytes(leaf, fi, extent_end -
+                                                       other_start);
+                       goto done;
+               }
+       }
+
+       btrfs_mark_buffer_dirty(leaf);
+       btrfs_release_path(root, path);
+
+       key.offset = start;
+       ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(*fi));
+       BUG_ON(ret);
+
+       leaf = path->nodes[0];
+       fi = btrfs_item_ptr(leaf, path->slots[0],
+                           struct btrfs_file_extent_item);
+       btrfs_set_file_extent_generation(leaf, fi, trans->transid);
+       btrfs_set_file_extent_type(leaf, fi, extent_type);
+       btrfs_set_file_extent_disk_bytenr(leaf, fi, bytenr);
+       btrfs_set_file_extent_disk_num_bytes(leaf, fi, num_bytes);
+       btrfs_set_file_extent_offset(leaf, fi, extent_offset);
+       btrfs_set_file_extent_num_bytes(leaf, fi, extent_end - key.offset);
+       btrfs_set_file_extent_ram_bytes(leaf, fi, num_bytes);
+       btrfs_set_file_extent_compression(leaf, fi, 0);
+       btrfs_set_file_extent_encryption(leaf, fi, 0);
+       btrfs_set_file_extent_other_encoding(leaf, fi, 0);
+
+       ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes,
+                                  leaf->start, root->root_key.objectid,
+                                  trans->transid, inode->i_ino);
+       BUG_ON(ret);
+done:
+       btrfs_mark_buffer_dirty(leaf);
+       btrfs_release_path(root, path);
+       if (split_end && split == start) {
+               split = end;
+               goto again;
+       }
+       if (locked_end > end) {
+               unlock_extent(&BTRFS_I(inode)->io_tree, end, locked_end - 1,
+                             GFP_NOFS);
+       }
+       btrfs_free_path(path);
+       return 0;
+}
+
 /*
  * this gets pages into the page cache and locks them down, it also properly
  * waits for data=ordered extents to finish before allowing the pages to be
index 3e6f056..789c376 100644 (file)
@@ -37,6 +37,7 @@
 #include <linux/version.h>
 #include <linux/xattr.h>
 #include <linux/posix_acl.h>
+#include <linux/falloc.h>
 #include "ctree.h"
 #include "disk-io.h"
 #include "transaction.h"
@@ -587,7 +588,7 @@ free_pages_out:
  * blocks on disk
  */
 static int run_delalloc_nocow(struct inode *inode, struct page *locked_page,
-                             u64 start, u64 end, int *page_started)
+                             u64 start, u64 end, int *page_started, int force)
 {
        struct btrfs_root *root = BTRFS_I(inode)->root;
        struct btrfs_trans_handle *trans;
@@ -602,6 +603,7 @@ static int run_delalloc_nocow(struct inode *inode, struct page *locked_page,
        u64 num_bytes;
        int extent_type;
        int ret;
+       int type;
        int nocow;
        int check_prev = 1;
 
@@ -654,7 +656,8 @@ next_slot:
                                    struct btrfs_file_extent_item);
                extent_type = btrfs_file_extent_type(leaf, fi);
 
-               if (extent_type == BTRFS_FILE_EXTENT_REG) {
+               if (extent_type == BTRFS_FILE_EXTENT_REG ||
+                   extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
                        struct btrfs_block_group_cache *block_group;
                        disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
                        extent_end = found_key.offset +
@@ -669,6 +672,8 @@ next_slot:
                                goto out_check;
                        if (disk_bytenr == 0)
                                goto out_check;
+                       if (extent_type == BTRFS_FILE_EXTENT_REG && !force)
+                               goto out_check;
                        if (btrfs_cross_ref_exist(trans, root, disk_bytenr))
                                goto out_check;
                        block_group = btrfs_lookup_block_group(root->fs_info,
@@ -709,10 +714,39 @@ out_check:
 
                disk_bytenr += cur_offset - found_key.offset;
                num_bytes = min(end + 1, extent_end) - cur_offset;
+               if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
+                       struct extent_map *em;
+                       struct extent_map_tree *em_tree;
+                       em_tree = &BTRFS_I(inode)->extent_tree;
+                       em = alloc_extent_map(GFP_NOFS);
+                       em->start = cur_offset;
+                       em->len = num_bytes;
+                       em->block_len = num_bytes;
+                       em->block_start = disk_bytenr;
+                       em->bdev = root->fs_info->fs_devices->latest_bdev;
+                       set_bit(EXTENT_FLAG_PINNED, &em->flags);
+                       while (1) {
+                               spin_lock(&em_tree->lock);
+                               ret = add_extent_mapping(em_tree, em);
+                               spin_unlock(&em_tree->lock);
+                               if (ret != -EEXIST) {
+                                       free_extent_map(em);
+                                       break;
+                               }
+                               btrfs_drop_extent_cache(inode, em->start,
+                                               em->start + em->len - 1, 0);
+                       }
+                       type = BTRFS_ORDERED_PREALLOC;
+               } else {
+                       type = BTRFS_ORDERED_NOCOW;
+               }
 
                ret = btrfs_add_ordered_extent(inode, cur_offset, disk_bytenr,
-                                              num_bytes, num_bytes,
-                                              BTRFS_ORDERED_NOCOW);
+                                              num_bytes, num_bytes, type);
+               BUG_ON(ret);
+               extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree,
+                                       cur_offset, cur_offset + num_bytes - 1,
+                                       locked_page, 0, 0, 0);
                cur_offset = extent_end;
                if (cur_offset > end)
                        break;
@@ -745,7 +779,10 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page,
        if (btrfs_test_opt(root, NODATACOW) ||
            btrfs_test_flag(inode, NODATACOW))
                ret = run_delalloc_nocow(inode, locked_page, start, end,
-                                        page_started);
+                                        page_started, 0);
+       else if (btrfs_test_flag(inode, PREALLOC))
+               ret = run_delalloc_nocow(inode, locked_page, start, end,
+                                        page_started, 1);
        else
                ret = cow_file_range(inode, locked_page, start, end,
                                     page_started);
@@ -1006,6 +1043,63 @@ int btrfs_writepage_start_hook(struct page *page, u64 start, u64 end)
        return -EAGAIN;
 }
 
+static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
+                                      struct inode *inode, u64 file_pos,
+                                      u64 disk_bytenr, u64 disk_num_bytes,
+                                      u64 num_bytes, u64 ram_bytes,
+                                      u8 compression, u8 encryption,
+                                      u16 other_encoding, int extent_type)
+{
+       struct btrfs_root *root = BTRFS_I(inode)->root;
+       struct btrfs_file_extent_item *fi;
+       struct btrfs_path *path;
+       struct extent_buffer *leaf;
+       struct btrfs_key ins;
+       u64 hint;
+       int ret;
+
+       path = btrfs_alloc_path();
+       BUG_ON(!path);
+
+       ret = btrfs_drop_extents(trans, root, inode, file_pos,
+                                file_pos + num_bytes, file_pos, &hint);
+       BUG_ON(ret);
+
+       ins.objectid = inode->i_ino;
+       ins.offset = file_pos;
+       ins.type = BTRFS_EXTENT_DATA_KEY;
+       ret = btrfs_insert_empty_item(trans, root, path, &ins, sizeof(*fi));
+       BUG_ON(ret);
+       leaf = path->nodes[0];
+       fi = btrfs_item_ptr(leaf, path->slots[0],
+                           struct btrfs_file_extent_item);
+       btrfs_set_file_extent_generation(leaf, fi, trans->transid);
+       btrfs_set_file_extent_type(leaf, fi, extent_type);
+       btrfs_set_file_extent_disk_bytenr(leaf, fi, disk_bytenr);
+       btrfs_set_file_extent_disk_num_bytes(leaf, fi, disk_num_bytes);
+       btrfs_set_file_extent_offset(leaf, fi, 0);
+       btrfs_set_file_extent_num_bytes(leaf, fi, num_bytes);
+       btrfs_set_file_extent_ram_bytes(leaf, fi, ram_bytes);
+       btrfs_set_file_extent_compression(leaf, fi, compression);
+       btrfs_set_file_extent_encryption(leaf, fi, encryption);
+       btrfs_set_file_extent_other_encoding(leaf, fi, other_encoding);
+       btrfs_mark_buffer_dirty(leaf);
+
+       inode_add_bytes(inode, num_bytes);
+       btrfs_drop_extent_cache(inode, file_pos, file_pos + num_bytes - 1, 0);
+
+       ins.objectid = disk_bytenr;
+       ins.offset = disk_num_bytes;
+       ins.type = BTRFS_EXTENT_ITEM_KEY;
+       ret = btrfs_alloc_reserved_extent(trans, root, leaf->start,
+                                         root->root_key.objectid,
+                                         trans->transid, inode->i_ino, &ins);
+       BUG_ON(ret);
+
+       btrfs_free_path(path);
+       return 0;
+}
+
 /* as ordered data IO finishes, this gets called so we can finish
  * an ordered extent if the range of bytes in the file it covers are
  * fully written.
@@ -1016,12 +1110,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
        struct btrfs_trans_handle *trans;
        struct btrfs_ordered_extent *ordered_extent;
        struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
-       struct btrfs_file_extent_item *extent_item;
-       struct btrfs_path *path = NULL;
-       struct extent_buffer *leaf;
-       u64 alloc_hint = 0;
-       struct list_head list;
-       struct btrfs_key ins;
+       int compressed = 0;
        int ret;
 
        ret = btrfs_dec_test_ordered_pending(inode, start, end - start + 1);
@@ -1035,67 +1124,30 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
        if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags))
                goto nocow;
 
-       path = btrfs_alloc_path();
-       BUG_ON(!path);
-
        lock_extent(io_tree, ordered_extent->file_offset,
                    ordered_extent->file_offset + ordered_extent->len - 1,
                    GFP_NOFS);
 
-       INIT_LIST_HEAD(&list);
-
-       ret = btrfs_drop_extents(trans, root, inode,
-                                ordered_extent->file_offset,
-                                ordered_extent->file_offset +
-                                ordered_extent->len,
-                                ordered_extent->file_offset, &alloc_hint);
-       BUG_ON(ret);
-
-       ins.objectid = inode->i_ino;
-       ins.offset = ordered_extent->file_offset;
-       ins.type = BTRFS_EXTENT_DATA_KEY;
-       ret = btrfs_insert_empty_item(trans, root, path, &ins,
-                                     sizeof(*extent_item));
-       BUG_ON(ret);
-       leaf = path->nodes[0];
-       extent_item = btrfs_item_ptr(leaf, path->slots[0],
-                                    struct btrfs_file_extent_item);
-       btrfs_set_file_extent_generation(leaf, extent_item, trans->transid);
-       btrfs_set_file_extent_type(leaf, extent_item, BTRFS_FILE_EXTENT_REG);
-       btrfs_set_file_extent_disk_bytenr(leaf, extent_item,
-                                         ordered_extent->start);
-       btrfs_set_file_extent_disk_num_bytes(leaf, extent_item,
-                                            ordered_extent->disk_len);
-       btrfs_set_file_extent_offset(leaf, extent_item, 0);
-
        if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags))
-               btrfs_set_file_extent_compression(leaf, extent_item, 1);
-       else
-               btrfs_set_file_extent_compression(leaf, extent_item, 0);
-       btrfs_set_file_extent_encryption(leaf, extent_item, 0);
-       btrfs_set_file_extent_other_encoding(leaf, extent_item, 0);
-
-       /* ram bytes = extent_num_bytes for now */
-       btrfs_set_file_extent_num_bytes(leaf, extent_item,
-                                       ordered_extent->len);
-       btrfs_set_file_extent_ram_bytes(leaf, extent_item,
-                                       ordered_extent->len);
-       btrfs_mark_buffer_dirty(leaf);
-
-       btrfs_drop_extent_cache(inode, ordered_extent->file_offset,
-                               ordered_extent->file_offset +
-                               ordered_extent->len - 1, 0);
-
-       ins.objectid = ordered_extent->start;
-       ins.offset = ordered_extent->disk_len;
-       ins.type = BTRFS_EXTENT_ITEM_KEY;
-       ret = btrfs_alloc_reserved_extent(trans, root, leaf->start,
-                                         root->root_key.objectid,
-                                         trans->transid, inode->i_ino, &ins);
-       BUG_ON(ret);
-       btrfs_release_path(root, path);
-
-       inode_add_bytes(inode, ordered_extent->len);
+               compressed = 1;
+       if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) {
+               BUG_ON(compressed);
+               ret = btrfs_mark_extent_written(trans, root, inode,
+                                               ordered_extent->file_offset,
+                                               ordered_extent->file_offset +
+                                               ordered_extent->len);
+               BUG_ON(ret);
+       } else {
+               ret = insert_reserved_file_extent(trans, inode,
+                                               ordered_extent->file_offset,
+                                               ordered_extent->start,
+                                               ordered_extent->disk_len,
+                                               ordered_extent->len,
+                                               ordered_extent->len,
+                                               compressed, 0, 0,
+                                               BTRFS_FILE_EXTENT_REG);
+               BUG_ON(ret);
+       }
        unlock_extent(io_tree, ordered_extent->file_offset,
                    ordered_extent->file_offset + ordered_extent->len - 1,
                    GFP_NOFS);
@@ -1115,8 +1167,6 @@ nocow:
        btrfs_put_ordered_extent(ordered_extent);
 
        btrfs_end_transaction(trans, root);
-       if (path)
-               btrfs_free_path(path);
        return 0;
 }
 
@@ -3488,7 +3538,8 @@ again:
        found_type = btrfs_file_extent_type(leaf, item);
        extent_start = found_key.offset;
        compressed = btrfs_file_extent_compression(leaf, item);
-       if (found_type == BTRFS_FILE_EXTENT_REG) {
+       if (found_type == BTRFS_FILE_EXTENT_REG ||
+           found_type == BTRFS_FILE_EXTENT_PREALLOC) {
                extent_end = extent_start +
                       btrfs_file_extent_num_bytes(leaf, item);
        } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
@@ -3521,7 +3572,8 @@ again:
                goto not_found_em;
        }
 
-       if (found_type == BTRFS_FILE_EXTENT_REG) {
+       if (found_type == BTRFS_FILE_EXTENT_REG ||
+           found_type == BTRFS_FILE_EXTENT_PREALLOC) {
                em->start = extent_start;
                em->len = extent_end - extent_start;
                bytenr = btrfs_file_extent_disk_bytenr(leaf, item);
@@ -3538,6 +3590,8 @@ again:
                        bytenr += btrfs_file_extent_offset(leaf, item);
                        em->block_start = bytenr;
                        em->block_len = em->len;
+                       if (found_type == BTRFS_FILE_EXTENT_PREALLOC)
+                               set_bit(EXTENT_FLAG_PREALLOC, &em->flags);
                }
                goto insert;
        } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
@@ -3969,6 +4023,7 @@ int btrfs_create_subvol_root(struct btrfs_root *new_root, struct dentry *dentry,
        if (error)
                return error;
 
+       atomic_inc(&inode->i_count);
        d_instantiate(dentry, inode);
        return 0;
 }
@@ -4318,6 +4373,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
        inode->i_op = &btrfs_symlink_inode_operations;
        inode->i_mapping->a_ops = &btrfs_symlink_aops;
        inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
+       inode_set_bytes(inode, name_len);
        btrfs_i_size_write(inode, name_len - 1);
        err = btrfs_update_inode(trans, root, inode);
        if (err)
@@ -4335,6 +4391,130 @@ out_fail:
        return err;
 }
 
+static int prealloc_file_range(struct inode *inode, u64 start, u64 end,
+                              u64 alloc_hint, int mode)
+{
+       struct btrfs_trans_handle *trans;
+       struct btrfs_root *root = BTRFS_I(inode)->root;
+       struct btrfs_key ins;
+       u64 alloc_size;
+       u64 cur_offset = start;
+       u64 num_bytes = end - start;
+       int ret = 0;
+
+       trans = btrfs_join_transaction(root, 1);
+       BUG_ON(!trans);
+       btrfs_set_trans_block_group(trans, inode);
+
+       while (num_bytes > 0) {
+               alloc_size = min(num_bytes, root->fs_info->max_extent);
+               ret = btrfs_reserve_extent(trans, root, alloc_size,
+                                          root->sectorsize, 0, alloc_hint,
+                                          (u64)-1, &ins, 1);
+               if (ret) {
+                       WARN_ON(1);
+                       goto out;
+               }
+               ret = insert_reserved_file_extent(trans, inode,
+                                                 cur_offset, ins.objectid,
+                                                 ins.offset, ins.offset,
+                                                 ins.offset, 0, 0, 0,
+                                                 BTRFS_FILE_EXTENT_PREALLOC);
+               BUG_ON(ret);
+               num_bytes -= ins.offset;
+               cur_offset += ins.offset;
+               alloc_hint = ins.objectid + ins.offset;
+       }
+out:
+       if (cur_offset > start) {
+               inode->i_ctime = CURRENT_TIME;
+               btrfs_set_flag(inode, PREALLOC);
+               if (!(mode & FALLOC_FL_KEEP_SIZE) &&
+                   cur_offset > i_size_read(inode))
+                       btrfs_i_size_write(inode, cur_offset);
+               ret = btrfs_update_inode(trans, root, inode);
+               BUG_ON(ret);
+       }
+
+       btrfs_end_transaction(trans, root);
+       return ret;
+}
+
+static long btrfs_fallocate(struct inode *inode, int mode,
+                           loff_t offset, loff_t len)
+{
+       u64 cur_offset;
+       u64 last_byte;
+       u64 alloc_start;
+       u64 alloc_end;
+       u64 alloc_hint = 0;
+       u64 mask = BTRFS_I(inode)->root->sectorsize - 1;
+       struct extent_map *em;
+       int ret;
+
+       alloc_start = offset & ~mask;
+       alloc_end =  (offset + len + mask) & ~mask;
+
+       mutex_lock(&inode->i_mutex);
+       if (alloc_start > inode->i_size) {
+               ret = btrfs_cont_expand(inode, alloc_start);
+               if (ret)
+                       goto out;
+       }
+
+       while (1) {
+               struct btrfs_ordered_extent *ordered;
+               lock_extent(&BTRFS_I(inode)->io_tree, alloc_start,
+                           alloc_end - 1, GFP_NOFS);
+               ordered = btrfs_lookup_first_ordered_extent(inode,
+                                                           alloc_end - 1);
+               if (ordered &&
+                   ordered->file_offset + ordered->len > alloc_start &&
+                   ordered->file_offset < alloc_end) {
+                       btrfs_put_ordered_extent(ordered);
+                       unlock_extent(&BTRFS_I(inode)->io_tree,
+                                     alloc_start, alloc_end - 1, GFP_NOFS);
+                       btrfs_wait_ordered_range(inode, alloc_start,
+                                                alloc_end - alloc_start);
+               } else {
+                       if (ordered)
+                               btrfs_put_ordered_extent(ordered);
+                       break;
+               }
+       }
+
+       cur_offset = alloc_start;
+       while (1) {
+               em = btrfs_get_extent(inode, NULL, 0, cur_offset,
+                                     alloc_end - cur_offset, 0);
+               BUG_ON(IS_ERR(em) || !em);
+               last_byte = min(extent_map_end(em), alloc_end);
+               last_byte = (last_byte + mask) & ~mask;
+               if (em->block_start == EXTENT_MAP_HOLE) {
+                       ret = prealloc_file_range(inode, cur_offset,
+                                       last_byte, alloc_hint, mode);
+                       if (ret < 0) {
+                               free_extent_map(em);
+                               break;
+                       }
+               }
+               if (em->block_start <= EXTENT_MAP_LAST_BYTE)
+                       alloc_hint = em->block_start;
+               free_extent_map(em);
+
+               cur_offset = last_byte;
+               if (cur_offset >= alloc_end) {
+                       ret = 0;
+                       break;
+               }
+       }
+       unlock_extent(&BTRFS_I(inode)->io_tree, alloc_start, alloc_end - 1,
+                     GFP_NOFS);
+out:
+       mutex_unlock(&inode->i_mutex);
+       return ret;
+}
+
 static int btrfs_set_page_dirty(struct page *page)
 {
        return __set_page_dirty_nobuffers(page);
@@ -4421,6 +4601,7 @@ static struct inode_operations btrfs_file_inode_operations = {
        .listxattr      = btrfs_listxattr,
        .removexattr    = btrfs_removexattr,
        .permission     = btrfs_permission,
+       .fallocate      = btrfs_fallocate,
 };
 static struct inode_operations btrfs_special_inode_operations = {
        .getattr        = btrfs_getattr,
index 7f915d4..9ff2b4e 100644 (file)
@@ -724,7 +724,8 @@ long btrfs_ioctl_clone(struct file *file, unsigned long src_fd)
                        extent = btrfs_item_ptr(leaf, slot,
                                                struct btrfs_file_extent_item);
                        found_type = btrfs_file_extent_type(leaf, extent);
-                       if (found_type == BTRFS_FILE_EXTENT_REG) {
+                       if (found_type == BTRFS_FILE_EXTENT_REG ||
+                           found_type == BTRFS_FILE_EXTENT_PREALLOC) {
                                u64 ds = btrfs_file_extent_disk_bytenr(leaf,
                                                                       extent);
                                u64 dl = btrfs_file_extent_disk_num_bytes(leaf,
index e7317c8..370bb42 100644 (file)
@@ -182,7 +182,7 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
        entry->len = len;
        entry->disk_len = disk_len;
        entry->inode = inode;
-       if (type == BTRFS_ORDERED_NOCOW || type == BTRFS_ORDERED_COMPRESSED)
+       if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE)
                set_bit(type, &entry->flags);
 
        /* one ref for the tree */
@@ -339,7 +339,8 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nocow_only)
                ordered = list_entry(cur, struct btrfs_ordered_extent,
                                     root_extent_list);
                if (nocow_only &&
-                   !test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags)) {
+                   !test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags) &&
+                   !test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags)) {
                        list_move(&ordered->root_extent_list,
                                  &root->fs_info->ordered_extents);
                        cond_resched_lock(&root->fs_info->ordered_extent_lock);
index e6d9bc5..260bf95 100644 (file)
@@ -68,6 +68,8 @@ struct btrfs_ordered_sum {
 
 #define BTRFS_ORDERED_COMPRESSED 3 /* writing a compressed extent */
 
+#define BTRFS_ORDERED_PREALLOC 4 /* set when writing to prealloced extent */
+
 struct btrfs_ordered_extent {
        /* logical offset in the file */
        u64 file_offset;
@@ -132,7 +134,7 @@ int btrfs_remove_ordered_extent(struct inode *inode,
 int btrfs_dec_test_ordered_pending(struct inode *inode,
                                       u64 file_offset, u64 io_size);
 int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
-                            u64 start, u64 len, u64 disk_len, int type);
+                            u64 start, u64 len, u64 disk_len, int tyep);
 int btrfs_add_ordered_sum(struct inode *inode,
                          struct btrfs_ordered_extent *entry,
                          struct btrfs_ordered_sum *sum);
index e0201c3..be4fc30 100644 (file)
@@ -442,7 +442,8 @@ insert:
 
                fi = (struct btrfs_file_extent_item *)dst_ptr;
                extent_type = btrfs_file_extent_type(path->nodes[0], fi);
-               if (extent_type == BTRFS_FILE_EXTENT_REG) {
+               if (extent_type == BTRFS_FILE_EXTENT_REG ||
+                   extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
                        struct btrfs_key ins;
                        ins.objectid = btrfs_file_extent_disk_bytenr(
                                                        path->nodes[0], fi);
@@ -538,7 +539,8 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
        item = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
        found_type = btrfs_file_extent_type(eb, item);
 
-       if (found_type == BTRFS_FILE_EXTENT_REG)
+       if (found_type == BTRFS_FILE_EXTENT_REG ||
+           found_type == BTRFS_FILE_EXTENT_PREALLOC)
                extent_end = start + btrfs_file_extent_num_bytes(eb, item);
        else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
                size = btrfs_file_extent_inline_len(eb, item);
@@ -562,7 +564,9 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
        ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino,
                                       start, 0);
 
-       if (ret == 0 && found_type == BTRFS_FILE_EXTENT_REG) {
+       if (ret == 0 &&
+           (found_type == BTRFS_FILE_EXTENT_REG ||
+            found_type == BTRFS_FILE_EXTENT_PREALLOC)) {
                struct btrfs_file_extent_item cmp1;
                struct btrfs_file_extent_item cmp2;
                struct btrfs_file_extent_item *existing;
@@ -2522,7 +2526,8 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
                                                struct btrfs_file_extent_item);
 
                        found_type = btrfs_file_extent_type(src, extent);
-                       if (found_type == BTRFS_FILE_EXTENT_REG) {
+                       if (found_type == BTRFS_FILE_EXTENT_REG ||
+                           found_type == BTRFS_FILE_EXTENT_PREALLOC) {
                                u64 ds = btrfs_file_extent_disk_bytenr(src,
                                                                   extent);
                                u64 dl = btrfs_file_extent_disk_num_bytes(src,