Btrfs: add compat ioctl
[safe/jmp/linux-2.6] / fs / btrfs / super.c
1 #include <linux/module.h>
2 #include <linux/buffer_head.h>
3 #include <linux/fs.h>
4 #include <linux/pagemap.h>
5 #include <linux/highmem.h>
6 #include <linux/time.h>
7 #include <linux/init.h>
8 #include <linux/string.h>
9 #include <linux/smp_lock.h>
10 #include <linux/backing-dev.h>
11 #include <linux/mpage.h>
12 #include <linux/swap.h>
13 #include <linux/writeback.h>
14 #include <linux/statfs.h>
15 #include <linux/compat.h>
16 #include "ctree.h"
17 #include "disk-io.h"
18 #include "transaction.h"
19 #include "btrfs_inode.h"
20 #include "ioctl.h"
21 #include "print-tree.h"
22
23 struct btrfs_iget_args {
24         u64 ino;
25         struct btrfs_root *root;
26 };
27
28 #define BTRFS_SUPER_MAGIC 0x9123682E
29
30 static struct inode_operations btrfs_dir_inode_operations;
31 static struct inode_operations btrfs_symlink_inode_operations;
32 static struct inode_operations btrfs_dir_ro_inode_operations;
33 static struct super_operations btrfs_super_ops;
34 static struct file_operations btrfs_dir_file_operations;
35 static struct inode_operations btrfs_file_inode_operations;
36 static struct address_space_operations btrfs_aops;
37 static struct address_space_operations btrfs_symlink_aops;
38 static struct file_operations btrfs_file_operations;
39
40 static int drop_extents(struct btrfs_trans_handle *trans,
41                           struct btrfs_root *root,
42                           struct inode *inode,
43                           u64 start, u64 end, u64 *hint_block);
44 static int btrfs_get_block(struct inode *inode, sector_t iblock,
45                            struct buffer_head *result, int create);
46
47
48 #define S_SHIFT 12
49 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
50         [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
51         [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
52         [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
53         [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
54         [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
55         [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
56         [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
57 };
58
59 static void btrfs_read_locked_inode(struct inode *inode)
60 {
61         struct btrfs_path *path;
62         struct btrfs_inode_item *inode_item;
63         struct btrfs_root *root = BTRFS_I(inode)->root;
64         struct btrfs_key location;
65         struct btrfs_block_group_cache *alloc_group;
66         u64 alloc_group_block;
67         int ret;
68
69         path = btrfs_alloc_path();
70         BUG_ON(!path);
71         btrfs_init_path(path);
72         mutex_lock(&root->fs_info->fs_mutex);
73
74         memcpy(&location, &BTRFS_I(inode)->location, sizeof(location));
75         ret = btrfs_lookup_inode(NULL, root, path, &location, 0);
76         if (ret) {
77                 btrfs_free_path(path);
78                 goto make_bad;
79         }
80         inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
81                                   path->slots[0],
82                                   struct btrfs_inode_item);
83
84         inode->i_mode = btrfs_inode_mode(inode_item);
85         inode->i_nlink = btrfs_inode_nlink(inode_item);
86         inode->i_uid = btrfs_inode_uid(inode_item);
87         inode->i_gid = btrfs_inode_gid(inode_item);
88         inode->i_size = btrfs_inode_size(inode_item);
89         inode->i_atime.tv_sec = btrfs_timespec_sec(&inode_item->atime);
90         inode->i_atime.tv_nsec = btrfs_timespec_nsec(&inode_item->atime);
91         inode->i_mtime.tv_sec = btrfs_timespec_sec(&inode_item->mtime);
92         inode->i_mtime.tv_nsec = btrfs_timespec_nsec(&inode_item->mtime);
93         inode->i_ctime.tv_sec = btrfs_timespec_sec(&inode_item->ctime);
94         inode->i_ctime.tv_nsec = btrfs_timespec_nsec(&inode_item->ctime);
95         inode->i_blocks = btrfs_inode_nblocks(inode_item);
96         inode->i_generation = btrfs_inode_generation(inode_item);
97         alloc_group_block = btrfs_inode_block_group(inode_item);
98         ret = radix_tree_gang_lookup(&root->fs_info->block_group_radix,
99                                      (void **)&alloc_group,
100                                      alloc_group_block, 1);
101         BUG_ON(!ret);
102         BTRFS_I(inode)->block_group = alloc_group;
103
104         btrfs_free_path(path);
105         inode_item = NULL;
106
107         mutex_unlock(&root->fs_info->fs_mutex);
108
109         switch (inode->i_mode & S_IFMT) {
110 #if 0
111         default:
112                 init_special_inode(inode, inode->i_mode,
113                                    btrfs_inode_rdev(inode_item));
114                 break;
115 #endif
116         case S_IFREG:
117                 inode->i_mapping->a_ops = &btrfs_aops;
118                 inode->i_fop = &btrfs_file_operations;
119                 inode->i_op = &btrfs_file_inode_operations;
120                 break;
121         case S_IFDIR:
122                 inode->i_fop = &btrfs_dir_file_operations;
123                 if (root == root->fs_info->tree_root)
124                         inode->i_op = &btrfs_dir_ro_inode_operations;
125                 else
126                         inode->i_op = &btrfs_dir_inode_operations;
127                 break;
128         case S_IFLNK:
129                 inode->i_op = &btrfs_symlink_inode_operations;
130                 inode->i_mapping->a_ops = &btrfs_symlink_aops;
131                 break;
132         }
133         return;
134
135 make_bad:
136         btrfs_release_path(root, path);
137         btrfs_free_path(path);
138         mutex_unlock(&root->fs_info->fs_mutex);
139         make_bad_inode(inode);
140 }
141
142 static void fill_inode_item(struct btrfs_inode_item *item,
143                             struct inode *inode)
144 {
145         btrfs_set_inode_uid(item, inode->i_uid);
146         btrfs_set_inode_gid(item, inode->i_gid);
147         btrfs_set_inode_size(item, inode->i_size);
148         btrfs_set_inode_mode(item, inode->i_mode);
149         btrfs_set_inode_nlink(item, inode->i_nlink);
150         btrfs_set_timespec_sec(&item->atime, inode->i_atime.tv_sec);
151         btrfs_set_timespec_nsec(&item->atime, inode->i_atime.tv_nsec);
152         btrfs_set_timespec_sec(&item->mtime, inode->i_mtime.tv_sec);
153         btrfs_set_timespec_nsec(&item->mtime, inode->i_mtime.tv_nsec);
154         btrfs_set_timespec_sec(&item->ctime, inode->i_ctime.tv_sec);
155         btrfs_set_timespec_nsec(&item->ctime, inode->i_ctime.tv_nsec);
156         btrfs_set_inode_nblocks(item, inode->i_blocks);
157         btrfs_set_inode_generation(item, inode->i_generation);
158         btrfs_set_inode_block_group(item,
159                                     BTRFS_I(inode)->block_group->key.objectid);
160 }
161
162 static int btrfs_update_inode(struct btrfs_trans_handle *trans,
163                               struct btrfs_root *root,
164                               struct inode *inode)
165 {
166         struct btrfs_inode_item *inode_item;
167         struct btrfs_path *path;
168         int ret;
169
170         path = btrfs_alloc_path();
171         BUG_ON(!path);
172         btrfs_init_path(path);
173         ret = btrfs_lookup_inode(trans, root, path,
174                                  &BTRFS_I(inode)->location, 1);
175         if (ret) {
176                 if (ret > 0)
177                         ret = -ENOENT;
178                 goto failed;
179         }
180
181         inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
182                                   path->slots[0],
183                                   struct btrfs_inode_item);
184
185         fill_inode_item(inode_item, inode);
186         btrfs_mark_buffer_dirty(path->nodes[0]);
187         ret = 0;
188 failed:
189         btrfs_release_path(root, path);
190         btrfs_free_path(path);
191         return ret;
192 }
193
194
195 static int btrfs_unlink_trans(struct btrfs_trans_handle *trans,
196                               struct btrfs_root *root,
197                               struct inode *dir,
198                               struct dentry *dentry)
199 {
200         struct btrfs_path *path;
201         const char *name = dentry->d_name.name;
202         int name_len = dentry->d_name.len;
203         int ret = 0;
204         u64 objectid;
205         struct btrfs_dir_item *di;
206
207         path = btrfs_alloc_path();
208         BUG_ON(!path);
209         btrfs_init_path(path);
210         di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino,
211                                     name, name_len, -1);
212         if (IS_ERR(di)) {
213                 ret = PTR_ERR(di);
214                 goto err;
215         }
216         if (!di) {
217                 ret = -ENOENT;
218                 goto err;
219         }
220         objectid = btrfs_disk_key_objectid(&di->location);
221         ret = btrfs_delete_one_dir_name(trans, root, path, di);
222         BUG_ON(ret);
223         btrfs_release_path(root, path);
224
225         di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino,
226                                          objectid, name, name_len, -1);
227         if (IS_ERR(di)) {
228                 ret = PTR_ERR(di);
229                 goto err;
230         }
231         if (!di) {
232                 ret = -ENOENT;
233                 goto err;
234         }
235         ret = btrfs_delete_one_dir_name(trans, root, path, di);
236         BUG_ON(ret);
237
238         dentry->d_inode->i_ctime = dir->i_ctime;
239 err:
240         btrfs_free_path(path);
241         if (!ret) {
242                 dir->i_size -= name_len * 2;
243                 btrfs_update_inode(trans, root, dir);
244                 drop_nlink(dentry->d_inode);
245                 btrfs_update_inode(trans, root, dentry->d_inode);
246                 dir->i_sb->s_dirt = 1;
247         }
248         return ret;
249 }
250
251 static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
252 {
253         struct btrfs_root *root;
254         struct btrfs_trans_handle *trans;
255         int ret;
256
257         root = BTRFS_I(dir)->root;
258         mutex_lock(&root->fs_info->fs_mutex);
259         trans = btrfs_start_transaction(root, 1);
260         btrfs_set_trans_block_group(trans, dir);
261         ret = btrfs_unlink_trans(trans, root, dir, dentry);
262         btrfs_end_transaction(trans, root);
263         mutex_unlock(&root->fs_info->fs_mutex);
264         btrfs_btree_balance_dirty(root);
265         return ret;
266 }
267
268 static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
269 {
270         struct inode *inode = dentry->d_inode;
271         int err;
272         int ret;
273         struct btrfs_root *root = BTRFS_I(dir)->root;
274         struct btrfs_path *path;
275         struct btrfs_key key;
276         struct btrfs_trans_handle *trans;
277         struct btrfs_key found_key;
278         int found_type;
279         struct btrfs_leaf *leaf;
280         char *goodnames = "..";
281
282         path = btrfs_alloc_path();
283         BUG_ON(!path);
284         btrfs_init_path(path);
285         mutex_lock(&root->fs_info->fs_mutex);
286         trans = btrfs_start_transaction(root, 1);
287         btrfs_set_trans_block_group(trans, dir);
288         key.objectid = inode->i_ino;
289         key.offset = (u64)-1;
290         key.flags = (u32)-1;
291         while(1) {
292                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
293                 if (ret < 0) {
294                         err = ret;
295                         goto out;
296                 }
297                 BUG_ON(ret == 0);
298                 if (path->slots[0] == 0) {
299                         err = -ENOENT;
300                         goto out;
301                 }
302                 path->slots[0]--;
303                 leaf = btrfs_buffer_leaf(path->nodes[0]);
304                 btrfs_disk_key_to_cpu(&found_key,
305                                       &leaf->items[path->slots[0]].key);
306                 found_type = btrfs_key_type(&found_key);
307                 if (found_key.objectid != inode->i_ino) {
308                         err = -ENOENT;
309                         goto out;
310                 }
311                 if ((found_type != BTRFS_DIR_ITEM_KEY &&
312                      found_type != BTRFS_DIR_INDEX_KEY) ||
313                     (!btrfs_match_dir_item_name(root, path, goodnames, 2) &&
314                     !btrfs_match_dir_item_name(root, path, goodnames, 1))) {
315                         err = -ENOTEMPTY;
316                         goto out;
317                 }
318                 ret = btrfs_del_item(trans, root, path);
319                 BUG_ON(ret);
320
321                 if (found_type == BTRFS_DIR_ITEM_KEY && found_key.offset == 1)
322                         break;
323                 btrfs_release_path(root, path);
324         }
325         ret = 0;
326         btrfs_release_path(root, path);
327
328         /* now the directory is empty */
329         err = btrfs_unlink_trans(trans, root, dir, dentry);
330         if (!err) {
331                 inode->i_size = 0;
332         }
333 out:
334         btrfs_release_path(root, path);
335         btrfs_free_path(path);
336         mutex_unlock(&root->fs_info->fs_mutex);
337         ret = btrfs_end_transaction(trans, root);
338         btrfs_btree_balance_dirty(root);
339         if (ret && !err)
340                 err = ret;
341         return err;
342 }
343
344 static int btrfs_free_inode(struct btrfs_trans_handle *trans,
345                             struct btrfs_root *root,
346                             struct inode *inode)
347 {
348         struct btrfs_path *path;
349         int ret;
350
351         clear_inode(inode);
352
353         path = btrfs_alloc_path();
354         BUG_ON(!path);
355         btrfs_init_path(path);
356         ret = btrfs_lookup_inode(trans, root, path,
357                                  &BTRFS_I(inode)->location, -1);
358         BUG_ON(ret);
359         ret = btrfs_del_item(trans, root, path);
360         BUG_ON(ret);
361         btrfs_free_path(path);
362         return ret;
363 }
364
365 static void reada_truncate(struct btrfs_root *root, struct btrfs_path *path,
366                            u64 objectid)
367 {
368         struct btrfs_node *node;
369         int i;
370         int nritems;
371         u64 item_objectid;
372         u64 blocknr;
373         int slot;
374         int ret;
375
376         if (!path->nodes[1])
377                 return;
378         node = btrfs_buffer_node(path->nodes[1]);
379         slot = path->slots[1];
380         if (slot == 0)
381                 return;
382         nritems = btrfs_header_nritems(&node->header);
383         for (i = slot - 1; i >= 0; i--) {
384                 item_objectid = btrfs_disk_key_objectid(&node->ptrs[i].key);
385                 if (item_objectid != objectid)
386                         break;
387                 blocknr = btrfs_node_blockptr(node, i);
388                 ret = readahead_tree_block(root, blocknr);
389                 if (ret)
390                         break;
391         }
392 }
393
394 static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans,
395                                    struct btrfs_root *root,
396                                    struct inode *inode)
397 {
398         int ret;
399         struct btrfs_path *path;
400         struct btrfs_key key;
401         struct btrfs_disk_key *found_key;
402         u32 found_type;
403         struct btrfs_leaf *leaf;
404         struct btrfs_file_extent_item *fi;
405         u64 extent_start = 0;
406         u64 extent_num_blocks = 0;
407         u64 item_end = 0;
408         int found_extent;
409         int del_item;
410
411         path = btrfs_alloc_path();
412         BUG_ON(!path);
413         /* FIXME, add redo link to tree so we don't leak on crash */
414         key.objectid = inode->i_ino;
415         key.offset = (u64)-1;
416         key.flags = (u32)-1;
417         while(1) {
418                 btrfs_init_path(path);
419                 fi = NULL;
420                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
421                 if (ret < 0) {
422                         goto error;
423                 }
424                 if (ret > 0) {
425                         BUG_ON(path->slots[0] == 0);
426                         path->slots[0]--;
427                 }
428                 reada_truncate(root, path, inode->i_ino);
429                 leaf = btrfs_buffer_leaf(path->nodes[0]);
430                 found_key = &leaf->items[path->slots[0]].key;
431                 found_type = btrfs_disk_key_type(found_key);
432                 if (btrfs_disk_key_objectid(found_key) != inode->i_ino)
433                         break;
434                 if (found_type != BTRFS_CSUM_ITEM_KEY &&
435                     found_type != BTRFS_DIR_ITEM_KEY &&
436                     found_type != BTRFS_DIR_INDEX_KEY &&
437                     found_type != BTRFS_EXTENT_DATA_KEY)
438                         break;
439                 item_end = btrfs_disk_key_offset(found_key);
440                 if (found_type == BTRFS_EXTENT_DATA_KEY) {
441                         fi = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
442                                             path->slots[0],
443                                             struct btrfs_file_extent_item);
444                         if (btrfs_file_extent_type(fi) !=
445                             BTRFS_FILE_EXTENT_INLINE) {
446                                 item_end += btrfs_file_extent_num_blocks(fi) <<
447                                                 inode->i_blkbits;
448                         }
449                 }
450                 if (found_type == BTRFS_CSUM_ITEM_KEY) {
451                         ret = btrfs_csum_truncate(trans, root, path,
452                                                   inode->i_size);
453                         BUG_ON(ret);
454                 }
455                 if (item_end < inode->i_size) {
456                         if (found_type) {
457                                 btrfs_set_key_type(&key, found_type - 1);
458                                 continue;
459                         }
460                         break;
461                 }
462                 if (btrfs_disk_key_offset(found_key) >= inode->i_size)
463                         del_item = 1;
464                 else
465                         del_item = 0;
466                 found_extent = 0;
467
468                 if (found_type == BTRFS_EXTENT_DATA_KEY &&
469                            btrfs_file_extent_type(fi) !=
470                            BTRFS_FILE_EXTENT_INLINE) {
471                         u64 num_dec;
472                         if (!del_item) {
473                                 u64 orig_num_blocks =
474                                         btrfs_file_extent_num_blocks(fi);
475                                 extent_num_blocks = inode->i_size -
476                                         btrfs_disk_key_offset(found_key) +
477                                         root->blocksize - 1;
478                                 extent_num_blocks >>= inode->i_blkbits;
479                                 btrfs_set_file_extent_num_blocks(fi,
480                                                          extent_num_blocks);
481                                 inode->i_blocks -= (orig_num_blocks -
482                                         extent_num_blocks) << 3;
483                                 mark_buffer_dirty(path->nodes[0]);
484                         } else {
485                                 extent_start =
486                                         btrfs_file_extent_disk_blocknr(fi);
487                                 extent_num_blocks =
488                                         btrfs_file_extent_disk_num_blocks(fi);
489                                 /* FIXME blocksize != 4096 */
490                                 num_dec = btrfs_file_extent_num_blocks(fi) << 3;
491                                 if (extent_start != 0) {
492                                         found_extent = 1;
493                                         inode->i_blocks -= num_dec;
494                                 }
495                         }
496                 }
497                 if (del_item) {
498                         ret = btrfs_del_item(trans, root, path);
499                         BUG_ON(ret);
500                 } else {
501                         break;
502                 }
503                 btrfs_release_path(root, path);
504                 if (found_extent) {
505                         ret = btrfs_free_extent(trans, root, extent_start,
506                                                 extent_num_blocks, 0);
507                         BUG_ON(ret);
508                 }
509         }
510         ret = 0;
511 error:
512         btrfs_release_path(root, path);
513         btrfs_free_path(path);
514         inode->i_sb->s_dirt = 1;
515         return ret;
516 }
517
518 static int btrfs_truncate_page(struct address_space *mapping, loff_t from)
519 {
520         struct inode *inode = mapping->host;
521         unsigned blocksize = 1 << inode->i_blkbits;
522         pgoff_t index = from >> PAGE_CACHE_SHIFT;
523         unsigned offset = from & (PAGE_CACHE_SIZE-1);
524         struct page *page;
525         char *kaddr;
526         int ret = 0;
527         struct btrfs_root *root = BTRFS_I(inode)->root;
528         u64 alloc_hint;
529         struct btrfs_key ins;
530         struct btrfs_trans_handle *trans;
531
532         if ((offset & (blocksize - 1)) == 0)
533                 goto out;
534
535         ret = -ENOMEM;
536         page = grab_cache_page(mapping, index);
537         if (!page)
538                 goto out;
539
540         if (!PageUptodate(page)) {
541                 ret = mpage_readpage(page, btrfs_get_block);
542                 lock_page(page);
543                 if (!PageUptodate(page)) {
544                         ret = -EIO;
545                         goto out;
546                 }
547         }
548         mutex_lock(&root->fs_info->fs_mutex);
549         trans = btrfs_start_transaction(root, 1);
550         btrfs_set_trans_block_group(trans, inode);
551
552         ret = drop_extents(trans, root, inode, page->index << PAGE_CACHE_SHIFT,
553                            (page->index + 1) << PAGE_CACHE_SHIFT, &alloc_hint);
554         BUG_ON(ret);
555         ret = btrfs_alloc_extent(trans, root, inode->i_ino, 1,
556                                  alloc_hint, (u64)-1, &ins, 1);
557         BUG_ON(ret);
558         ret = btrfs_insert_file_extent(trans, root, inode->i_ino,
559                                        page->index << PAGE_CACHE_SHIFT,
560                                        ins.objectid, 1, 1);
561         BUG_ON(ret);
562         SetPageChecked(page);
563         kaddr = kmap(page);
564         memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset);
565         flush_dcache_page(page);
566         btrfs_csum_file_block(trans, root, inode->i_ino,
567                               page->index << PAGE_CACHE_SHIFT,
568                               kaddr, PAGE_CACHE_SIZE);
569         kunmap(page);
570         btrfs_end_transaction(trans, root);
571         mutex_unlock(&root->fs_info->fs_mutex);
572
573         set_page_dirty(page);
574         unlock_page(page);
575         page_cache_release(page);
576 out:
577         return ret;
578 }
579
580 static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
581 {
582         struct inode *inode = dentry->d_inode;
583         int err;
584
585         err = inode_change_ok(inode, attr);
586         if (err)
587                 return err;
588
589         if (S_ISREG(inode->i_mode) &&
590             attr->ia_valid & ATTR_SIZE && attr->ia_size > inode->i_size) {
591                 struct btrfs_trans_handle *trans;
592                 struct btrfs_root *root = BTRFS_I(inode)->root;
593                 u64 mask = root->blocksize - 1;
594                 u64 pos = (inode->i_size + mask) & ~mask;
595                 u64 hole_size;
596
597                 if (attr->ia_size <= pos)
598                         goto out;
599
600                 btrfs_truncate_page(inode->i_mapping, inode->i_size);
601
602                 hole_size = (attr->ia_size - pos + mask) & ~mask;
603                 hole_size >>= inode->i_blkbits;
604
605                 mutex_lock(&root->fs_info->fs_mutex);
606                 trans = btrfs_start_transaction(root, 1);
607                 btrfs_set_trans_block_group(trans, inode);
608                 err = btrfs_insert_file_extent(trans, root, inode->i_ino,
609                                                pos, 0, 0, hole_size);
610                 BUG_ON(err);
611                 btrfs_end_transaction(trans, root);
612                 mutex_unlock(&root->fs_info->fs_mutex);
613         }
614 out:
615         err = inode_setattr(inode, attr);
616
617         return err;
618 }
619 static void btrfs_delete_inode(struct inode *inode)
620 {
621         struct btrfs_trans_handle *trans;
622         struct btrfs_root *root = BTRFS_I(inode)->root;
623         int ret;
624
625         truncate_inode_pages(&inode->i_data, 0);
626         if (is_bad_inode(inode)) {
627                 goto no_delete;
628         }
629         inode->i_size = 0;
630         mutex_lock(&root->fs_info->fs_mutex);
631         trans = btrfs_start_transaction(root, 1);
632         btrfs_set_trans_block_group(trans, inode);
633         ret = btrfs_truncate_in_trans(trans, root, inode);
634         BUG_ON(ret);
635         btrfs_free_inode(trans, root, inode);
636         btrfs_end_transaction(trans, root);
637         mutex_unlock(&root->fs_info->fs_mutex);
638         btrfs_btree_balance_dirty(root);
639         return;
640 no_delete:
641         clear_inode(inode);
642 }
643
644 static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry,
645                                struct btrfs_key *location)
646 {
647         const char *name = dentry->d_name.name;
648         int namelen = dentry->d_name.len;
649         struct btrfs_dir_item *di;
650         struct btrfs_path *path;
651         struct btrfs_root *root = BTRFS_I(dir)->root;
652         int ret;
653
654         path = btrfs_alloc_path();
655         BUG_ON(!path);
656         btrfs_init_path(path);
657         di = btrfs_lookup_dir_item(NULL, root, path, dir->i_ino, name,
658                                     namelen, 0);
659         if (!di || IS_ERR(di)) {
660                 location->objectid = 0;
661                 ret = 0;
662                 goto out;
663         }
664         btrfs_disk_key_to_cpu(location, &di->location);
665 out:
666         btrfs_release_path(root, path);
667         btrfs_free_path(path);
668         return ret;
669 }
670
671 static int fixup_tree_root_location(struct btrfs_root *root,
672                              struct btrfs_key *location,
673                              struct btrfs_root **sub_root)
674 {
675         struct btrfs_path *path;
676         struct btrfs_root_item *ri;
677
678         if (btrfs_key_type(location) != BTRFS_ROOT_ITEM_KEY)
679                 return 0;
680         if (location->objectid == BTRFS_ROOT_TREE_OBJECTID)
681                 return 0;
682
683         path = btrfs_alloc_path();
684         BUG_ON(!path);
685         mutex_lock(&root->fs_info->fs_mutex);
686
687         *sub_root = btrfs_read_fs_root(root->fs_info, location);
688         if (IS_ERR(*sub_root))
689                 return PTR_ERR(*sub_root);
690
691         ri = &(*sub_root)->root_item;
692         location->objectid = btrfs_root_dirid(ri);
693         location->flags = 0;
694         btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY);
695         location->offset = 0;
696
697         btrfs_free_path(path);
698         mutex_unlock(&root->fs_info->fs_mutex);
699         return 0;
700 }
701
702 static int btrfs_init_locked_inode(struct inode *inode, void *p)
703 {
704         struct btrfs_iget_args *args = p;
705         inode->i_ino = args->ino;
706         BTRFS_I(inode)->root = args->root;
707         return 0;
708 }
709
710 static int btrfs_find_actor(struct inode *inode, void *opaque)
711 {
712         struct btrfs_iget_args *args = opaque;
713         return (args->ino == inode->i_ino &&
714                 args->root == BTRFS_I(inode)->root);
715 }
716
717 static struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid,
718                                        struct btrfs_root *root)
719 {
720         struct inode *inode;
721         struct btrfs_iget_args args;
722         args.ino = objectid;
723         args.root = root;
724
725         inode = iget5_locked(s, objectid, btrfs_find_actor,
726                              btrfs_init_locked_inode,
727                              (void *)&args);
728         return inode;
729 }
730
731 static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
732                                    struct nameidata *nd)
733 {
734         struct inode * inode;
735         struct btrfs_inode *bi = BTRFS_I(dir);
736         struct btrfs_root *root = bi->root;
737         struct btrfs_root *sub_root = root;
738         struct btrfs_key location;
739         int ret;
740
741         if (dentry->d_name.len > BTRFS_NAME_LEN)
742                 return ERR_PTR(-ENAMETOOLONG);
743         mutex_lock(&root->fs_info->fs_mutex);
744         ret = btrfs_inode_by_name(dir, dentry, &location);
745         mutex_unlock(&root->fs_info->fs_mutex);
746         if (ret < 0)
747                 return ERR_PTR(ret);
748         inode = NULL;
749         if (location.objectid) {
750                 ret = fixup_tree_root_location(root, &location, &sub_root);
751                 if (ret < 0)
752                         return ERR_PTR(ret);
753                 if (ret > 0)
754                         return ERR_PTR(-ENOENT);
755                 inode = btrfs_iget_locked(dir->i_sb, location.objectid,
756                                           sub_root);
757                 if (!inode)
758                         return ERR_PTR(-EACCES);
759                 if (inode->i_state & I_NEW) {
760                         if (sub_root != root) {
761 printk("adding new root for inode %lu root %p (found %p)\n", inode->i_ino, sub_root, BTRFS_I(inode)->root);
762                                 igrab(inode);
763                                 sub_root->inode = inode;
764                         }
765                         BTRFS_I(inode)->root = sub_root;
766                         memcpy(&BTRFS_I(inode)->location, &location,
767                                sizeof(location));
768                         btrfs_read_locked_inode(inode);
769                         unlock_new_inode(inode);
770                 }
771         }
772         return d_splice_alias(inode, dentry);
773 }
774
775 static void reada_leaves(struct btrfs_root *root, struct btrfs_path *path,
776                          u64 objectid)
777 {
778         struct btrfs_node *node;
779         int i;
780         u32 nritems;
781         u64 item_objectid;
782         u64 blocknr;
783         int slot;
784         int ret;
785
786         if (!path->nodes[1])
787                 return;
788         node = btrfs_buffer_node(path->nodes[1]);
789         slot = path->slots[1];
790         nritems = btrfs_header_nritems(&node->header);
791         for (i = slot + 1; i < nritems; i++) {
792                 item_objectid = btrfs_disk_key_objectid(&node->ptrs[i].key);
793                 if (item_objectid != objectid)
794                         break;
795                 blocknr = btrfs_node_blockptr(node, i);
796                 ret = readahead_tree_block(root, blocknr);
797                 if (ret)
798                         break;
799         }
800 }
801 static unsigned char btrfs_filetype_table[] = {
802         DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
803 };
804
805 static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
806 {
807         struct inode *inode = filp->f_path.dentry->d_inode;
808         struct btrfs_root *root = BTRFS_I(inode)->root;
809         struct btrfs_item *item;
810         struct btrfs_dir_item *di;
811         struct btrfs_key key;
812         struct btrfs_path *path;
813         int ret;
814         u32 nritems;
815         struct btrfs_leaf *leaf;
816         int slot;
817         int advance;
818         unsigned char d_type;
819         int over = 0;
820         u32 di_cur;
821         u32 di_total;
822         u32 di_len;
823         int key_type = BTRFS_DIR_INDEX_KEY;
824
825         /* FIXME, use a real flag for deciding about the key type */
826         if (root->fs_info->tree_root == root)
827                 key_type = BTRFS_DIR_ITEM_KEY;
828         mutex_lock(&root->fs_info->fs_mutex);
829         key.objectid = inode->i_ino;
830         key.flags = 0;
831         btrfs_set_key_type(&key, key_type);
832         key.offset = filp->f_pos;
833         path = btrfs_alloc_path();
834         btrfs_init_path(path);
835         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
836         if (ret < 0)
837                 goto err;
838         advance = 0;
839         reada_leaves(root, path, inode->i_ino);
840         while(1) {
841                 leaf = btrfs_buffer_leaf(path->nodes[0]);
842                 nritems = btrfs_header_nritems(&leaf->header);
843                 slot = path->slots[0];
844                 if (advance || slot >= nritems) {
845                         if (slot >= nritems -1) {
846                                 reada_leaves(root, path, inode->i_ino);
847                                 ret = btrfs_next_leaf(root, path);
848                                 if (ret)
849                                         break;
850                                 leaf = btrfs_buffer_leaf(path->nodes[0]);
851                                 nritems = btrfs_header_nritems(&leaf->header);
852                                 slot = path->slots[0];
853                         } else {
854                                 slot++;
855                                 path->slots[0]++;
856                         }
857                 }
858                 advance = 1;
859                 item = leaf->items + slot;
860                 if (btrfs_disk_key_objectid(&item->key) != key.objectid)
861                         break;
862                 if (btrfs_disk_key_type(&item->key) != key_type)
863                         break;
864                 if (btrfs_disk_key_offset(&item->key) < filp->f_pos)
865                         continue;
866                 filp->f_pos = btrfs_disk_key_offset(&item->key);
867                 advance = 1;
868                 di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
869                 di_cur = 0;
870                 di_total = btrfs_item_size(leaf->items + slot);
871                 while(di_cur < di_total) {
872                         d_type = btrfs_filetype_table[btrfs_dir_type(di)];
873                         over = filldir(dirent, (const char *)(di + 1),
874                                        btrfs_dir_name_len(di),
875                                        btrfs_disk_key_offset(&item->key),
876                                        btrfs_disk_key_objectid(&di->location),
877                                        d_type);
878                         if (over)
879                                 goto nopos;
880                         di_len = btrfs_dir_name_len(di) + sizeof(*di);
881                         di_cur += di_len;
882                         di = (struct btrfs_dir_item *)((char *)di + di_len);
883                 }
884         }
885         filp->f_pos++;
886 nopos:
887         ret = 0;
888 err:
889         btrfs_release_path(root, path);
890         btrfs_free_path(path);
891         mutex_unlock(&root->fs_info->fs_mutex);
892         return ret;
893 }
894
895 static void btrfs_put_super (struct super_block * sb)
896 {
897         struct btrfs_root *root = btrfs_sb(sb);
898         int ret;
899
900         ret = close_ctree(root);
901         if (ret) {
902                 printk("close ctree returns %d\n", ret);
903         }
904         sb->s_fs_info = NULL;
905 }
906
907 static int btrfs_fill_super(struct super_block * sb, void * data, int silent)
908 {
909         struct inode * inode;
910         struct dentry * root_dentry;
911         struct btrfs_super_block *disk_super;
912         struct btrfs_root *tree_root;
913         struct btrfs_inode *bi;
914
915         sb->s_maxbytes = MAX_LFS_FILESIZE;
916         sb->s_magic = BTRFS_SUPER_MAGIC;
917         sb->s_op = &btrfs_super_ops;
918         sb->s_time_gran = 1;
919
920         tree_root = open_ctree(sb);
921
922         if (!tree_root) {
923                 printk("btrfs: open_ctree failed\n");
924                 return -EIO;
925         }
926         sb->s_fs_info = tree_root;
927         disk_super = tree_root->fs_info->disk_super;
928         printk("read in super total blocks %Lu root %Lu\n",
929                btrfs_super_total_blocks(disk_super),
930                btrfs_super_root_dir(disk_super));
931
932         inode = btrfs_iget_locked(sb, btrfs_super_root_dir(disk_super),
933                                   tree_root);
934         bi = BTRFS_I(inode);
935         bi->location.objectid = inode->i_ino;
936         bi->location.offset = 0;
937         bi->location.flags = 0;
938         bi->root = tree_root;
939         btrfs_set_key_type(&bi->location, BTRFS_INODE_ITEM_KEY);
940
941         if (!inode)
942                 return -ENOMEM;
943         if (inode->i_state & I_NEW) {
944                 btrfs_read_locked_inode(inode);
945                 unlock_new_inode(inode);
946         }
947
948         root_dentry = d_alloc_root(inode);
949         if (!root_dentry) {
950                 iput(inode);
951                 return -ENOMEM;
952         }
953         sb->s_root = root_dentry;
954         btrfs_transaction_queue_work(tree_root, HZ * 30);
955         return 0;
956 }
957
958 static int btrfs_write_inode(struct inode *inode, int wait)
959 {
960         struct btrfs_root *root = BTRFS_I(inode)->root;
961         struct btrfs_trans_handle *trans;
962         int ret = 0;
963
964         if (wait) {
965                 mutex_lock(&root->fs_info->fs_mutex);
966                 trans = btrfs_start_transaction(root, 1);
967                 btrfs_set_trans_block_group(trans, inode);
968                 ret = btrfs_commit_transaction(trans, root);
969                 mutex_unlock(&root->fs_info->fs_mutex);
970         }
971         return ret;
972 }
973
974 static void btrfs_dirty_inode(struct inode *inode)
975 {
976         struct btrfs_root *root = BTRFS_I(inode)->root;
977         struct btrfs_trans_handle *trans;
978
979         mutex_lock(&root->fs_info->fs_mutex);
980         trans = btrfs_start_transaction(root, 1);
981         btrfs_set_trans_block_group(trans, inode);
982         btrfs_update_inode(trans, root, inode);
983         btrfs_end_transaction(trans, root);
984         mutex_unlock(&root->fs_info->fs_mutex);
985         btrfs_btree_balance_dirty(root);
986 }
987
988 static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
989                                      struct btrfs_root *root,
990                                      u64 objectid,
991                                      struct btrfs_block_group_cache *group,
992                                      int mode)
993 {
994         struct inode *inode;
995         struct btrfs_inode_item inode_item;
996         struct btrfs_key *location;
997         int ret;
998         int owner;
999
1000         inode = new_inode(root->fs_info->sb);
1001         if (!inode)
1002                 return ERR_PTR(-ENOMEM);
1003
1004         BTRFS_I(inode)->root = root;
1005         if (mode & S_IFDIR)
1006                 owner = 0;
1007         else
1008                 owner = 1;
1009         group = btrfs_find_block_group(root, group, 0, 0, owner);
1010         BTRFS_I(inode)->block_group = group;
1011
1012         inode->i_uid = current->fsuid;
1013         inode->i_gid = current->fsgid;
1014         inode->i_mode = mode;
1015         inode->i_ino = objectid;
1016         inode->i_blocks = 0;
1017         inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
1018         fill_inode_item(&inode_item, inode);
1019         location = &BTRFS_I(inode)->location;
1020         location->objectid = objectid;
1021         location->flags = 0;
1022         location->offset = 0;
1023         btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY);
1024
1025         ret = btrfs_insert_inode(trans, root, objectid, &inode_item);
1026         BUG_ON(ret);
1027
1028         insert_inode_hash(inode);
1029         return inode;
1030 }
1031
1032 static inline u8 btrfs_inode_type(struct inode *inode)
1033 {
1034         return btrfs_type_by_mode[(inode->i_mode & S_IFMT) >> S_SHIFT];
1035 }
1036
1037 static int btrfs_add_link(struct btrfs_trans_handle *trans,
1038                             struct dentry *dentry, struct inode *inode)
1039 {
1040         int ret;
1041         struct btrfs_key key;
1042         struct btrfs_root *root = BTRFS_I(dentry->d_parent->d_inode)->root;
1043         key.objectid = inode->i_ino;
1044         key.flags = 0;
1045         btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
1046         key.offset = 0;
1047
1048         ret = btrfs_insert_dir_item(trans, root,
1049                                     dentry->d_name.name, dentry->d_name.len,
1050                                     dentry->d_parent->d_inode->i_ino,
1051                                     &key, btrfs_inode_type(inode));
1052         if (ret == 0) {
1053                 dentry->d_parent->d_inode->i_size += dentry->d_name.len * 2;
1054                 ret = btrfs_update_inode(trans, root,
1055                                          dentry->d_parent->d_inode);
1056         }
1057         return ret;
1058 }
1059
1060 static int btrfs_add_nondir(struct btrfs_trans_handle *trans,
1061                             struct dentry *dentry, struct inode *inode)
1062 {
1063         int err = btrfs_add_link(trans, dentry, inode);
1064         if (!err) {
1065                 d_instantiate(dentry, inode);
1066                 return 0;
1067         }
1068         if (err > 0)
1069                 err = -EEXIST;
1070         return err;
1071 }
1072
1073 static int btrfs_create(struct inode *dir, struct dentry *dentry,
1074                         int mode, struct nameidata *nd)
1075 {
1076         struct btrfs_trans_handle *trans;
1077         struct btrfs_root *root = BTRFS_I(dir)->root;
1078         struct inode *inode;
1079         int err;
1080         int drop_inode = 0;
1081         u64 objectid;
1082
1083         mutex_lock(&root->fs_info->fs_mutex);
1084         trans = btrfs_start_transaction(root, 1);
1085         btrfs_set_trans_block_group(trans, dir);
1086
1087         err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
1088         if (err) {
1089                 err = -ENOSPC;
1090                 goto out_unlock;
1091         }
1092
1093         inode = btrfs_new_inode(trans, root, objectid,
1094                                 BTRFS_I(dir)->block_group, mode);
1095         err = PTR_ERR(inode);
1096         if (IS_ERR(inode))
1097                 goto out_unlock;
1098
1099         btrfs_set_trans_block_group(trans, inode);
1100         err = btrfs_add_nondir(trans, dentry, inode);
1101         if (err)
1102                 drop_inode = 1;
1103         else {
1104                 inode->i_mapping->a_ops = &btrfs_aops;
1105                 inode->i_fop = &btrfs_file_operations;
1106                 inode->i_op = &btrfs_file_inode_operations;
1107         }
1108         dir->i_sb->s_dirt = 1;
1109         btrfs_update_inode_block_group(trans, inode);
1110         btrfs_update_inode_block_group(trans, dir);
1111 out_unlock:
1112         btrfs_end_transaction(trans, root);
1113         mutex_unlock(&root->fs_info->fs_mutex);
1114
1115         if (drop_inode) {
1116                 inode_dec_link_count(inode);
1117                 iput(inode);
1118         }
1119         btrfs_btree_balance_dirty(root);
1120         return err;
1121 }
1122
1123 static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
1124                       struct dentry *dentry)
1125 {
1126         struct btrfs_trans_handle *trans;
1127         struct btrfs_root *root = BTRFS_I(dir)->root;
1128         struct inode *inode = old_dentry->d_inode;
1129         int err;
1130         int drop_inode = 0;
1131
1132         if (inode->i_nlink == 0)
1133                 return -ENOENT;
1134
1135         inc_nlink(inode);
1136         mutex_lock(&root->fs_info->fs_mutex);
1137         trans = btrfs_start_transaction(root, 1);
1138         btrfs_set_trans_block_group(trans, dir);
1139         atomic_inc(&inode->i_count);
1140         err = btrfs_add_nondir(trans, dentry, inode);
1141         if (err)
1142                 drop_inode = 1;
1143         dir->i_sb->s_dirt = 1;
1144         btrfs_update_inode_block_group(trans, dir);
1145         btrfs_update_inode(trans, root, inode);
1146
1147         btrfs_end_transaction(trans, root);
1148         mutex_unlock(&root->fs_info->fs_mutex);
1149
1150         if (drop_inode) {
1151                 inode_dec_link_count(inode);
1152                 iput(inode);
1153         }
1154         btrfs_btree_balance_dirty(root);
1155         return err;
1156 }
1157
1158 static int btrfs_make_empty_dir(struct btrfs_trans_handle *trans,
1159                                 struct btrfs_root *root,
1160                                 u64 objectid, u64 dirid)
1161 {
1162         int ret;
1163         char buf[2];
1164         struct btrfs_key key;
1165
1166         buf[0] = '.';
1167         buf[1] = '.';
1168
1169         key.objectid = objectid;
1170         key.offset = 0;
1171         key.flags = 0;
1172         btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
1173
1174         ret = btrfs_insert_dir_item(trans, root, buf, 1, objectid,
1175                                     &key, BTRFS_FT_DIR);
1176         if (ret)
1177                 goto error;
1178         key.objectid = dirid;
1179         ret = btrfs_insert_dir_item(trans, root, buf, 2, objectid,
1180                                     &key, BTRFS_FT_DIR);
1181         if (ret)
1182                 goto error;
1183 error:
1184         return ret;
1185 }
1186
1187 static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
1188 {
1189         struct inode *inode;
1190         struct btrfs_trans_handle *trans;
1191         struct btrfs_root *root = BTRFS_I(dir)->root;
1192         int err = 0;
1193         int drop_on_err = 0;
1194         u64 objectid;
1195
1196         mutex_lock(&root->fs_info->fs_mutex);
1197         trans = btrfs_start_transaction(root, 1);
1198         btrfs_set_trans_block_group(trans, dir);
1199         if (IS_ERR(trans)) {
1200                 err = PTR_ERR(trans);
1201                 goto out_unlock;
1202         }
1203
1204         err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
1205         if (err) {
1206                 err = -ENOSPC;
1207                 goto out_unlock;
1208         }
1209
1210         inode = btrfs_new_inode(trans, root, objectid,
1211                                 BTRFS_I(dir)->block_group, S_IFDIR | mode);
1212         if (IS_ERR(inode)) {
1213                 err = PTR_ERR(inode);
1214                 goto out_fail;
1215         }
1216         drop_on_err = 1;
1217         inode->i_op = &btrfs_dir_inode_operations;
1218         inode->i_fop = &btrfs_dir_file_operations;
1219         btrfs_set_trans_block_group(trans, inode);
1220
1221         err = btrfs_make_empty_dir(trans, root, inode->i_ino, dir->i_ino);
1222         if (err)
1223                 goto out_fail;
1224
1225         inode->i_size = 6;
1226         err = btrfs_update_inode(trans, root, inode);
1227         if (err)
1228                 goto out_fail;
1229         err = btrfs_add_link(trans, dentry, inode);
1230         if (err)
1231                 goto out_fail;
1232         d_instantiate(dentry, inode);
1233         drop_on_err = 0;
1234         dir->i_sb->s_dirt = 1;
1235         btrfs_update_inode_block_group(trans, inode);
1236         btrfs_update_inode_block_group(trans, dir);
1237
1238 out_fail:
1239         btrfs_end_transaction(trans, root);
1240 out_unlock:
1241         mutex_unlock(&root->fs_info->fs_mutex);
1242         if (drop_on_err)
1243                 iput(inode);
1244         btrfs_btree_balance_dirty(root);
1245         return err;
1246 }
1247
1248 static int btrfs_sync_file(struct file *file,
1249                            struct dentry *dentry, int datasync)
1250 {
1251         struct inode *inode = dentry->d_inode;
1252         struct btrfs_root *root = BTRFS_I(inode)->root;
1253         int ret;
1254         struct btrfs_trans_handle *trans;
1255
1256         mutex_lock(&root->fs_info->fs_mutex);
1257         trans = btrfs_start_transaction(root, 1);
1258         if (!trans) {
1259                 ret = -ENOMEM;
1260                 goto out;
1261         }
1262         ret = btrfs_commit_transaction(trans, root);
1263         mutex_unlock(&root->fs_info->fs_mutex);
1264 out:
1265         return ret > 0 ? EIO : ret;
1266 }
1267
1268 static int btrfs_sync_fs(struct super_block *sb, int wait)
1269 {
1270         struct btrfs_trans_handle *trans;
1271         struct btrfs_root *root;
1272         int ret;
1273         root = btrfs_sb(sb);
1274
1275         sb->s_dirt = 0;
1276         if (!wait) {
1277                 filemap_flush(root->fs_info->btree_inode->i_mapping);
1278                 return 0;
1279         }
1280         mutex_lock(&root->fs_info->fs_mutex);
1281         trans = btrfs_start_transaction(root, 1);
1282         ret = btrfs_commit_transaction(trans, root);
1283         sb->s_dirt = 0;
1284         BUG_ON(ret);
1285 printk("btrfs sync_fs\n");
1286         mutex_unlock(&root->fs_info->fs_mutex);
1287         return 0;
1288 }
1289
1290 #define BTRFS_GET_BLOCK_NO_CREATE 0
1291 #define BTRFS_GET_BLOCK_CREATE 1
1292 #define BTRFS_GET_BLOCK_NO_DIRECT 2
1293
1294 static int btrfs_get_block_lock(struct inode *inode, sector_t iblock,
1295                            struct buffer_head *result, int create)
1296 {
1297         int ret;
1298         int err = 0;
1299         u64 blocknr;
1300         u64 extent_start = 0;
1301         u64 extent_end = 0;
1302         u64 objectid = inode->i_ino;
1303         u32 found_type;
1304         u64 alloc_hint = 0;
1305         struct btrfs_path *path;
1306         struct btrfs_root *root = BTRFS_I(inode)->root;
1307         struct btrfs_file_extent_item *item;
1308         struct btrfs_leaf *leaf;
1309         struct btrfs_disk_key *found_key;
1310         struct btrfs_trans_handle *trans = NULL;
1311
1312         path = btrfs_alloc_path();
1313         BUG_ON(!path);
1314         btrfs_init_path(path);
1315         if (create & BTRFS_GET_BLOCK_CREATE) {
1316                 WARN_ON(1);
1317                 /* this almost but not quite works */
1318                 trans = btrfs_start_transaction(root, 1);
1319                 if (!trans) {
1320                         err = -ENOMEM;
1321                         goto out;
1322                 }
1323                 ret = drop_extents(trans, root, inode,
1324                                    iblock << inode->i_blkbits,
1325                                    (iblock + 1) << inode->i_blkbits,
1326                                    &alloc_hint);
1327                 BUG_ON(ret);
1328         }
1329
1330         ret = btrfs_lookup_file_extent(NULL, root, path,
1331                                        inode->i_ino,
1332                                        iblock << inode->i_blkbits, 0);
1333         if (ret < 0) {
1334                 err = ret;
1335                 goto out;
1336         }
1337
1338         if (ret != 0) {
1339                 if (path->slots[0] == 0) {
1340                         btrfs_release_path(root, path);
1341                         goto not_found;
1342                 }
1343                 path->slots[0]--;
1344         }
1345
1346         item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0],
1347                               struct btrfs_file_extent_item);
1348         leaf = btrfs_buffer_leaf(path->nodes[0]);
1349         blocknr = btrfs_file_extent_disk_blocknr(item);
1350         blocknr += btrfs_file_extent_offset(item);
1351
1352         /* are we inside the extent that was found? */
1353         found_key = &leaf->items[path->slots[0]].key;
1354         found_type = btrfs_disk_key_type(found_key);
1355         if (btrfs_disk_key_objectid(found_key) != objectid ||
1356             found_type != BTRFS_EXTENT_DATA_KEY) {
1357                 extent_end = 0;
1358                 extent_start = 0;
1359                 goto not_found;
1360         }
1361         found_type = btrfs_file_extent_type(item);
1362         extent_start = btrfs_disk_key_offset(&leaf->items[path->slots[0]].key);
1363         if (found_type == BTRFS_FILE_EXTENT_REG) {
1364                 extent_start = extent_start >> inode->i_blkbits;
1365                 extent_end = extent_start + btrfs_file_extent_num_blocks(item);
1366                 err = 0;
1367                 if (btrfs_file_extent_disk_blocknr(item) == 0)
1368                         goto out;
1369                 if (iblock >= extent_start && iblock < extent_end) {
1370                         btrfs_map_bh_to_logical(root, result, blocknr +
1371                                                 iblock - extent_start);
1372                         goto out;
1373                 }
1374         } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
1375                 char *ptr;
1376                 char *map;
1377                 u32 size;
1378
1379                 if (create & BTRFS_GET_BLOCK_NO_DIRECT) {
1380                         err = -EINVAL;
1381                         goto out;
1382                 }
1383                 size = btrfs_file_extent_inline_len(leaf->items +
1384                                                     path->slots[0]);
1385                 extent_end = (extent_start + size) >> inode->i_blkbits;
1386                 extent_start >>= inode->i_blkbits;
1387                 if (iblock < extent_start || iblock > extent_end) {
1388                         goto not_found;
1389                 }
1390                 ptr = btrfs_file_extent_inline_start(item);
1391                 map = kmap(result->b_page);
1392                 memcpy(map, ptr, size);
1393                 memset(map + size, 0, PAGE_CACHE_SIZE - size);
1394                 flush_dcache_page(result->b_page);
1395                 kunmap(result->b_page);
1396                 set_buffer_uptodate(result);
1397                 SetPageChecked(result->b_page);
1398                 btrfs_map_bh_to_logical(root, result, 0);
1399         }
1400 not_found:
1401         if (create & BTRFS_GET_BLOCK_CREATE) {
1402                 struct btrfs_key ins;
1403                 ret = btrfs_alloc_extent(trans, root, inode->i_ino,
1404                                          1, alloc_hint, (u64)-1,
1405                                          &ins, 1);
1406                 BUG_ON(ret);
1407                 ret = btrfs_insert_file_extent(trans, root, inode->i_ino,
1408                                                iblock << inode->i_blkbits,
1409                                                ins.objectid, ins.offset,
1410                                                ins.offset);
1411                 BUG_ON(ret);
1412                 SetPageChecked(result->b_page);
1413                 btrfs_map_bh_to_logical(root, result, ins.objectid);
1414         }
1415 out:
1416         if (trans)
1417                 err = btrfs_end_transaction(trans, root);
1418         btrfs_free_path(path);
1419         return err;
1420 }
1421
1422 static int btrfs_get_block(struct inode *inode, sector_t iblock,
1423                            struct buffer_head *result, int create)
1424 {
1425         int err;
1426         struct btrfs_root *root = BTRFS_I(inode)->root;
1427         mutex_lock(&root->fs_info->fs_mutex);
1428         err = btrfs_get_block_lock(inode, iblock, result, create);
1429         mutex_unlock(&root->fs_info->fs_mutex);
1430         return err;
1431 }
1432
1433 static int btrfs_get_block_bmap(struct inode *inode, sector_t iblock,
1434                            struct buffer_head *result, int create)
1435 {
1436         struct btrfs_root *root = BTRFS_I(inode)->root;
1437         mutex_lock(&root->fs_info->fs_mutex);
1438         btrfs_get_block_lock(inode, iblock, result, BTRFS_GET_BLOCK_NO_DIRECT);
1439         mutex_unlock(&root->fs_info->fs_mutex);
1440         return 0;
1441 }
1442
1443 static sector_t btrfs_bmap(struct address_space *as, sector_t block)
1444 {
1445         return generic_block_bmap(as, block, btrfs_get_block_bmap);
1446 }
1447
1448 static int btrfs_prepare_write(struct file *file, struct page *page,
1449                                unsigned from, unsigned to)
1450 {
1451         return block_prepare_write(page, from, to, btrfs_get_block);
1452 }
1453
1454 static void btrfs_write_super(struct super_block *sb)
1455 {
1456         sb->s_dirt = 0;
1457 }
1458
1459 static int btrfs_readpage(struct file *file, struct page *page)
1460 {
1461         return mpage_readpage(page, btrfs_get_block);
1462 }
1463
1464 /*
1465  * While block_write_full_page is writing back the dirty buffers under
1466  * the page lock, whoever dirtied the buffers may decide to clean them
1467  * again at any time.  We handle that by only looking at the buffer
1468  * state inside lock_buffer().
1469  *
1470  * If block_write_full_page() is called for regular writeback
1471  * (wbc->sync_mode == WB_SYNC_NONE) then it will redirty a page which has a
1472  * locked buffer.   This only can happen if someone has written the buffer
1473  * directly, with submit_bh().  At the address_space level PageWriteback
1474  * prevents this contention from occurring.
1475  */
1476 static int __btrfs_write_full_page(struct inode *inode, struct page *page,
1477                                    struct writeback_control *wbc)
1478 {
1479         int err;
1480         sector_t block;
1481         sector_t last_block;
1482         struct buffer_head *bh, *head;
1483         const unsigned blocksize = 1 << inode->i_blkbits;
1484         int nr_underway = 0;
1485
1486         BUG_ON(!PageLocked(page));
1487
1488         last_block = (i_size_read(inode) - 1) >> inode->i_blkbits;
1489
1490         if (!page_has_buffers(page)) {
1491                 create_empty_buffers(page, blocksize,
1492                                         (1 << BH_Dirty)|(1 << BH_Uptodate));
1493         }
1494
1495         /*
1496          * Be very careful.  We have no exclusion from __set_page_dirty_buffers
1497          * here, and the (potentially unmapped) buffers may become dirty at
1498          * any time.  If a buffer becomes dirty here after we've inspected it
1499          * then we just miss that fact, and the page stays dirty.
1500          *
1501          * Buffers outside i_size may be dirtied by __set_page_dirty_buffers;
1502          * handle that here by just cleaning them.
1503          */
1504
1505         block = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
1506         head = page_buffers(page);
1507         bh = head;
1508
1509         /*
1510          * Get all the dirty buffers mapped to disk addresses and
1511          * handle any aliases from the underlying blockdev's mapping.
1512          */
1513         do {
1514                 if (block > last_block) {
1515                         /*
1516                          * mapped buffers outside i_size will occur, because
1517                          * this page can be outside i_size when there is a
1518                          * truncate in progress.
1519                          */
1520                         /*
1521                          * The buffer was zeroed by block_write_full_page()
1522                          */
1523                         clear_buffer_dirty(bh);
1524                         set_buffer_uptodate(bh);
1525                 } else if (!buffer_mapped(bh) && buffer_dirty(bh)) {
1526                         WARN_ON(bh->b_size != blocksize);
1527                         err = btrfs_get_block(inode, block, bh, 0);
1528                         if (err) {
1529 printk("writepage going to recovery err %d\n", err);
1530                                 goto recover;
1531                         }
1532                         if (buffer_new(bh)) {
1533                                 /* blockdev mappings never come here */
1534                                 clear_buffer_new(bh);
1535                         }
1536                 }
1537                 bh = bh->b_this_page;
1538                 block++;
1539         } while (bh != head);
1540
1541         do {
1542                 if (!buffer_mapped(bh))
1543                         continue;
1544                 /*
1545                  * If it's a fully non-blocking write attempt and we cannot
1546                  * lock the buffer then redirty the page.  Note that this can
1547                  * potentially cause a busy-wait loop from pdflush and kswapd
1548                  * activity, but those code paths have their own higher-level
1549                  * throttling.
1550                  */
1551                 if (wbc->sync_mode != WB_SYNC_NONE || !wbc->nonblocking) {
1552                         lock_buffer(bh);
1553                 } else if (test_set_buffer_locked(bh)) {
1554                         redirty_page_for_writepage(wbc, page);
1555                         continue;
1556                 }
1557                 if (test_clear_buffer_dirty(bh) && bh->b_blocknr != 0) {
1558                         mark_buffer_async_write(bh);
1559                 } else {
1560                         unlock_buffer(bh);
1561                 }
1562         } while ((bh = bh->b_this_page) != head);
1563
1564         /*
1565          * The page and its buffers are protected by PageWriteback(), so we can
1566          * drop the bh refcounts early.
1567          */
1568         BUG_ON(PageWriteback(page));
1569         set_page_writeback(page);
1570
1571         do {
1572                 struct buffer_head *next = bh->b_this_page;
1573                 if (buffer_async_write(bh)) {
1574                         submit_bh(WRITE, bh);
1575                         nr_underway++;
1576                 }
1577                 bh = next;
1578         } while (bh != head);
1579         unlock_page(page);
1580
1581         err = 0;
1582 done:
1583         if (nr_underway == 0) {
1584                 /*
1585                  * The page was marked dirty, but the buffers were
1586                  * clean.  Someone wrote them back by hand with
1587                  * ll_rw_block/submit_bh.  A rare case.
1588                  */
1589                 int uptodate = 1;
1590                 do {
1591                         if (!buffer_uptodate(bh)) {
1592                                 uptodate = 0;
1593                                 break;
1594                         }
1595                         bh = bh->b_this_page;
1596                 } while (bh != head);
1597                 if (uptodate)
1598                         SetPageUptodate(page);
1599                 end_page_writeback(page);
1600         }
1601         return err;
1602
1603 recover:
1604         /*
1605          * ENOSPC, or some other error.  We may already have added some
1606          * blocks to the file, so we need to write these out to avoid
1607          * exposing stale data.
1608          * The page is currently locked and not marked for writeback
1609          */
1610         bh = head;
1611         /* Recovery: lock and submit the mapped buffers */
1612         do {
1613                 if (buffer_mapped(bh) && buffer_dirty(bh)) {
1614                         lock_buffer(bh);
1615                         mark_buffer_async_write(bh);
1616                 } else {
1617                         /*
1618                          * The buffer may have been set dirty during
1619                          * attachment to a dirty page.
1620                          */
1621                         clear_buffer_dirty(bh);
1622                 }
1623         } while ((bh = bh->b_this_page) != head);
1624         SetPageError(page);
1625         BUG_ON(PageWriteback(page));
1626         set_page_writeback(page);
1627         do {
1628                 struct buffer_head *next = bh->b_this_page;
1629                 if (buffer_async_write(bh)) {
1630                         clear_buffer_dirty(bh);
1631                         submit_bh(WRITE, bh);
1632                         nr_underway++;
1633                 }
1634                 bh = next;
1635         } while (bh != head);
1636         unlock_page(page);
1637         goto done;
1638 }
1639
1640 /*
1641  * The generic ->writepage function for buffer-backed address_spaces
1642  */
1643 static int btrfs_writepage(struct page *page, struct writeback_control *wbc)
1644 {
1645         struct inode * const inode = page->mapping->host;
1646         loff_t i_size = i_size_read(inode);
1647         const pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
1648         unsigned offset;
1649         void *kaddr;
1650
1651         /* Is the page fully inside i_size? */
1652         if (page->index < end_index)
1653                 return __btrfs_write_full_page(inode, page, wbc);
1654
1655         /* Is the page fully outside i_size? (truncate in progress) */
1656         offset = i_size & (PAGE_CACHE_SIZE-1);
1657         if (page->index >= end_index+1 || !offset) {
1658                 /*
1659                  * The page may have dirty, unmapped buffers.  For example,
1660                  * they may have been added in ext3_writepage().  Make them
1661                  * freeable here, so the page does not leak.
1662                  */
1663                 block_invalidatepage(page, 0);
1664                 unlock_page(page);
1665                 return 0; /* don't care */
1666         }
1667
1668         /*
1669          * The page straddles i_size.  It must be zeroed out on each and every
1670          * writepage invokation because it may be mmapped.  "A file is mapped
1671          * in multiples of the page size.  For a file that is not a multiple of
1672          * the  page size, the remaining memory is zeroed when mapped, and
1673          * writes to that region are not written out to the file."
1674          */
1675         kaddr = kmap_atomic(page, KM_USER0);
1676         memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset);
1677         flush_dcache_page(page);
1678         kunmap_atomic(kaddr, KM_USER0);
1679         return __btrfs_write_full_page(inode, page, wbc);
1680 }
1681
1682 static void btrfs_truncate(struct inode *inode)
1683 {
1684         struct btrfs_root *root = BTRFS_I(inode)->root;
1685         int ret;
1686         struct btrfs_trans_handle *trans;
1687
1688         if (!S_ISREG(inode->i_mode))
1689                 return;
1690         if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
1691                 return;
1692
1693         btrfs_truncate_page(inode->i_mapping, inode->i_size);
1694
1695         mutex_lock(&root->fs_info->fs_mutex);
1696         trans = btrfs_start_transaction(root, 1);
1697         btrfs_set_trans_block_group(trans, inode);
1698
1699         /* FIXME, add redo link to tree so we don't leak on crash */
1700         ret = btrfs_truncate_in_trans(trans, root, inode);
1701         BUG_ON(ret);
1702         btrfs_update_inode(trans, root, inode);
1703         ret = btrfs_end_transaction(trans, root);
1704         BUG_ON(ret);
1705         mutex_unlock(&root->fs_info->fs_mutex);
1706         btrfs_btree_balance_dirty(root);
1707 }
1708
1709 static int btrfs_commit_write(struct file *file, struct page *page,
1710                               unsigned from, unsigned to)
1711 {
1712         struct inode *inode = page->mapping->host;
1713         struct buffer_head *bh;
1714         loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
1715
1716         SetPageUptodate(page);
1717         bh = page_buffers(page);
1718         set_buffer_uptodate(bh);
1719         if (buffer_mapped(bh) && bh->b_blocknr != 0) {
1720                 set_page_dirty(page);
1721         }
1722         if (pos > inode->i_size) {
1723                 i_size_write(inode, pos);
1724                 mark_inode_dirty(inode);
1725         }
1726         return 0;
1727 }
1728
1729 static int btrfs_copy_from_user(loff_t pos, int num_pages, int write_bytes,
1730                                 struct page **prepared_pages,
1731                                 const char __user * buf)
1732 {
1733         long page_fault = 0;
1734         int i;
1735         int offset = pos & (PAGE_CACHE_SIZE - 1);
1736
1737         for (i = 0; i < num_pages && write_bytes > 0; i++, offset = 0) {
1738                 size_t count = min_t(size_t,
1739                                      PAGE_CACHE_SIZE - offset, write_bytes);
1740                 struct page *page = prepared_pages[i];
1741                 fault_in_pages_readable(buf, count);
1742
1743                 /* Copy data from userspace to the current page */
1744                 kmap(page);
1745                 page_fault = __copy_from_user(page_address(page) + offset,
1746                                               buf, count);
1747                 /* Flush processor's dcache for this page */
1748                 flush_dcache_page(page);
1749                 kunmap(page);
1750                 buf += count;
1751                 write_bytes -= count;
1752
1753                 if (page_fault)
1754                         break;
1755         }
1756         return page_fault ? -EFAULT : 0;
1757 }
1758
1759 static void btrfs_drop_pages(struct page **pages, size_t num_pages)
1760 {
1761         size_t i;
1762         for (i = 0; i < num_pages; i++) {
1763                 if (!pages[i])
1764                         break;
1765                 unlock_page(pages[i]);
1766                 mark_page_accessed(pages[i]);
1767                 page_cache_release(pages[i]);
1768         }
1769 }
1770 static int dirty_and_release_pages(struct btrfs_trans_handle *trans,
1771                                    struct btrfs_root *root,
1772                                    struct file *file,
1773                                    struct page **pages,
1774                                    size_t num_pages,
1775                                    loff_t pos,
1776                                    size_t write_bytes)
1777 {
1778         int i;
1779         int offset;
1780         int err = 0;
1781         int ret;
1782         int this_write;
1783         struct inode *inode = file->f_path.dentry->d_inode;
1784         struct buffer_head *bh;
1785         struct btrfs_file_extent_item *ei;
1786
1787         for (i = 0; i < num_pages; i++) {
1788                 offset = pos & (PAGE_CACHE_SIZE -1);
1789                 this_write = min(PAGE_CACHE_SIZE - offset, write_bytes);
1790                 /* FIXME, one block at a time */
1791
1792                 mutex_lock(&root->fs_info->fs_mutex);
1793                 trans = btrfs_start_transaction(root, 1);
1794                 btrfs_set_trans_block_group(trans, inode);
1795
1796                 bh = page_buffers(pages[i]);
1797                 if (buffer_mapped(bh) && bh->b_blocknr == 0) {
1798                         struct btrfs_key key;
1799                         struct btrfs_path *path;
1800                         char *ptr;
1801                         u32 datasize;
1802
1803                         path = btrfs_alloc_path();
1804                         BUG_ON(!path);
1805                         key.objectid = inode->i_ino;
1806                         key.offset = pages[i]->index << PAGE_CACHE_SHIFT;
1807                         key.flags = 0;
1808                         btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY);
1809                         BUG_ON(write_bytes >= PAGE_CACHE_SIZE);
1810                         datasize = offset +
1811                                 btrfs_file_extent_calc_inline_size(write_bytes);
1812                         ret = btrfs_insert_empty_item(trans, root, path, &key,
1813                                                       datasize);
1814                         BUG_ON(ret);
1815                         ei = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
1816                                path->slots[0], struct btrfs_file_extent_item);
1817                         btrfs_set_file_extent_generation(ei, trans->transid);
1818                         btrfs_set_file_extent_type(ei,
1819                                                    BTRFS_FILE_EXTENT_INLINE);
1820                         ptr = btrfs_file_extent_inline_start(ei);
1821                         btrfs_memcpy(root, path->nodes[0]->b_data,
1822                                      ptr, bh->b_data, offset + write_bytes);
1823                         mark_buffer_dirty(path->nodes[0]);
1824                         btrfs_free_path(path);
1825                 } else if (buffer_mapped(bh)) {
1826                         btrfs_csum_file_block(trans, root, inode->i_ino,
1827                                       pages[i]->index << PAGE_CACHE_SHIFT,
1828                                       kmap(pages[i]), PAGE_CACHE_SIZE);
1829                         kunmap(pages[i]);
1830                 }
1831                 SetPageChecked(pages[i]);
1832                 // btrfs_update_inode_block_group(trans, inode);
1833                 ret = btrfs_end_transaction(trans, root);
1834                 BUG_ON(ret);
1835                 mutex_unlock(&root->fs_info->fs_mutex);
1836
1837                 ret = btrfs_commit_write(file, pages[i], offset,
1838                                          offset + this_write);
1839                 pos += this_write;
1840                 if (ret) {
1841                         err = ret;
1842                         goto failed;
1843                 }
1844                 WARN_ON(this_write > write_bytes);
1845                 write_bytes -= this_write;
1846         }
1847 failed:
1848         return err;
1849 }
1850
1851 static int drop_extents(struct btrfs_trans_handle *trans,
1852                           struct btrfs_root *root,
1853                           struct inode *inode,
1854                           u64 start, u64 end, u64 *hint_block)
1855 {
1856         int ret;
1857         struct btrfs_key key;
1858         struct btrfs_leaf *leaf;
1859         int slot;
1860         struct btrfs_file_extent_item *extent;
1861         u64 extent_end = 0;
1862         int keep;
1863         struct btrfs_file_extent_item old;
1864         struct btrfs_path *path;
1865         u64 search_start = start;
1866         int bookend;
1867         int found_type;
1868         int found_extent;
1869         int found_inline;
1870
1871         path = btrfs_alloc_path();
1872         if (!path)
1873                 return -ENOMEM;
1874         while(1) {
1875                 btrfs_release_path(root, path);
1876                 ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino,
1877                                                search_start, -1);
1878                 if (ret < 0)
1879                         goto out;
1880                 if (ret > 0) {
1881                         if (path->slots[0] == 0) {
1882                                 ret = 0;
1883                                 goto out;
1884                         }
1885                         path->slots[0]--;
1886                 }
1887                 keep = 0;
1888                 bookend = 0;
1889                 found_extent = 0;
1890                 found_inline = 0;
1891                 extent = NULL;
1892                 leaf = btrfs_buffer_leaf(path->nodes[0]);
1893                 slot = path->slots[0];
1894                 btrfs_disk_key_to_cpu(&key, &leaf->items[slot].key);
1895                 if (key.offset >= end || key.objectid != inode->i_ino) {
1896                         ret = 0;
1897                         goto out;
1898                 }
1899                 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) {
1900                         ret = 0;
1901                         goto out;
1902                 }
1903                 extent = btrfs_item_ptr(leaf, slot,
1904                                         struct btrfs_file_extent_item);
1905                 found_type = btrfs_file_extent_type(extent);
1906                 if (found_type == BTRFS_FILE_EXTENT_REG) {
1907                         extent_end = key.offset +
1908                                 (btrfs_file_extent_num_blocks(extent) <<
1909                                  inode->i_blkbits);
1910                         found_extent = 1;
1911                 } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
1912                         found_inline = 1;
1913                         extent_end = key.offset +
1914                              btrfs_file_extent_inline_len(leaf->items + slot);
1915                 }
1916
1917                 if (!found_extent && !found_inline) {
1918                         ret = 0;
1919                         goto out;
1920                 }
1921
1922                 if (search_start >= extent_end) {
1923                         ret = 0;
1924                         goto out;
1925                 }
1926
1927                 if (found_inline) {
1928                         u64 mask = root->blocksize - 1;
1929                         search_start = (extent_end + mask) & ~mask;
1930                 } else
1931                         search_start = extent_end;
1932
1933                 if (end < extent_end && end >= key.offset) {
1934                         if (found_extent) {
1935                                 u64 disk_blocknr =
1936                                         btrfs_file_extent_disk_blocknr(extent);
1937                                 u64 disk_num_blocks =
1938                                       btrfs_file_extent_disk_num_blocks(extent);
1939                                 memcpy(&old, extent, sizeof(old));
1940                                 if (disk_blocknr != 0) {
1941                                         ret = btrfs_inc_extent_ref(trans, root,
1942                                                  disk_blocknr, disk_num_blocks);
1943                                         BUG_ON(ret);
1944                                 }
1945                         }
1946                         WARN_ON(found_inline);
1947                         bookend = 1;
1948                 }
1949
1950                 if (start > key.offset) {
1951                         u64 new_num;
1952                         u64 old_num;
1953                         /* truncate existing extent */
1954                         keep = 1;
1955                         WARN_ON(start & (root->blocksize - 1));
1956                         if (found_extent) {
1957                                 new_num = (start - key.offset) >>
1958                                         inode->i_blkbits;
1959                                 old_num = btrfs_file_extent_num_blocks(extent);
1960                                 *hint_block =
1961                                         btrfs_file_extent_disk_blocknr(extent);
1962                                 if (btrfs_file_extent_disk_blocknr(extent)) {
1963                                         inode->i_blocks -=
1964                                                 (old_num - new_num) << 3;
1965                                 }
1966                                 btrfs_set_file_extent_num_blocks(extent,
1967                                                                  new_num);
1968                                 mark_buffer_dirty(path->nodes[0]);
1969                         } else {
1970                                 WARN_ON(1);
1971                         }
1972                 }
1973                 if (!keep) {
1974                         u64 disk_blocknr = 0;
1975                         u64 disk_num_blocks = 0;
1976                         u64 extent_num_blocks = 0;
1977                         if (found_extent) {
1978                                 disk_blocknr =
1979                                       btrfs_file_extent_disk_blocknr(extent);
1980                                 disk_num_blocks =
1981                                       btrfs_file_extent_disk_num_blocks(extent);
1982                                 extent_num_blocks =
1983                                       btrfs_file_extent_num_blocks(extent);
1984                                 *hint_block =
1985                                         btrfs_file_extent_disk_blocknr(extent);
1986                         }
1987                         ret = btrfs_del_item(trans, root, path);
1988                         BUG_ON(ret);
1989                         btrfs_release_path(root, path);
1990                         extent = NULL;
1991                         if (found_extent && disk_blocknr != 0) {
1992                                 inode->i_blocks -= extent_num_blocks << 3;
1993                                 ret = btrfs_free_extent(trans, root,
1994                                                         disk_blocknr,
1995                                                         disk_num_blocks, 0);
1996                         }
1997
1998                         BUG_ON(ret);
1999                         if (!bookend && search_start >= end) {
2000                                 ret = 0;
2001                                 goto out;
2002                         }
2003                         if (!bookend)
2004                                 continue;
2005                 }
2006                 if (bookend && found_extent) {
2007                         /* create bookend */
2008                         struct btrfs_key ins;
2009                         ins.objectid = inode->i_ino;
2010                         ins.offset = end;
2011                         ins.flags = 0;
2012                         btrfs_set_key_type(&ins, BTRFS_EXTENT_DATA_KEY);
2013
2014                         btrfs_release_path(root, path);
2015                         ret = btrfs_insert_empty_item(trans, root, path, &ins,
2016                                                       sizeof(*extent));
2017                         BUG_ON(ret);
2018                         extent = btrfs_item_ptr(
2019                                     btrfs_buffer_leaf(path->nodes[0]),
2020                                     path->slots[0],
2021                                     struct btrfs_file_extent_item);
2022                         btrfs_set_file_extent_disk_blocknr(extent,
2023                                     btrfs_file_extent_disk_blocknr(&old));
2024                         btrfs_set_file_extent_disk_num_blocks(extent,
2025                                     btrfs_file_extent_disk_num_blocks(&old));
2026
2027                         btrfs_set_file_extent_offset(extent,
2028                                     btrfs_file_extent_offset(&old) +
2029                                     ((end - key.offset) >> inode->i_blkbits));
2030                         WARN_ON(btrfs_file_extent_num_blocks(&old) <
2031                                 (extent_end - end) >> inode->i_blkbits);
2032                         btrfs_set_file_extent_num_blocks(extent,
2033                                     (extent_end - end) >> inode->i_blkbits);
2034
2035                         btrfs_set_file_extent_type(extent,
2036                                                    BTRFS_FILE_EXTENT_REG);
2037                         btrfs_set_file_extent_generation(extent,
2038                                     btrfs_file_extent_generation(&old));
2039                         btrfs_mark_buffer_dirty(path->nodes[0]);
2040                         if (btrfs_file_extent_disk_blocknr(&old) != 0) {
2041                                 inode->i_blocks +=
2042                                       btrfs_file_extent_num_blocks(extent) << 3;
2043                         }
2044                         ret = 0;
2045                         goto out;
2046                 }
2047         }
2048 out:
2049         btrfs_free_path(path);
2050         return ret;
2051 }
2052
2053 static int prepare_pages(struct btrfs_root *root,
2054                          struct file *file,
2055                          struct page **pages,
2056                          size_t num_pages,
2057                          loff_t pos,
2058                          unsigned long first_index,
2059                          unsigned long last_index,
2060                          size_t write_bytes,
2061                          u64 alloc_extent_start)
2062 {
2063         int i;
2064         unsigned long index = pos >> PAGE_CACHE_SHIFT;
2065         struct inode *inode = file->f_path.dentry->d_inode;
2066         int offset;
2067         int err = 0;
2068         int this_write;
2069         struct buffer_head *bh;
2070         struct buffer_head *head;
2071         loff_t isize = i_size_read(inode);
2072
2073         memset(pages, 0, num_pages * sizeof(struct page *));
2074
2075         for (i = 0; i < num_pages; i++) {
2076                 pages[i] = grab_cache_page(inode->i_mapping, index + i);
2077                 if (!pages[i]) {
2078                         err = -ENOMEM;
2079                         goto failed_release;
2080                 }
2081                 cancel_dirty_page(pages[i], PAGE_CACHE_SIZE);
2082                 wait_on_page_writeback(pages[i]);
2083                 offset = pos & (PAGE_CACHE_SIZE -1);
2084                 this_write = min(PAGE_CACHE_SIZE - offset, write_bytes);
2085                 if (!page_has_buffers(pages[i])) {
2086                         create_empty_buffers(pages[i],
2087                                              root->fs_info->sb->s_blocksize,
2088                                              (1 << BH_Uptodate));
2089                 }
2090                 head = page_buffers(pages[i]);
2091                 bh = head;
2092                 do {
2093                         err = btrfs_map_bh_to_logical(root, bh,
2094                                                       alloc_extent_start);
2095                         BUG_ON(err);
2096                         if (err)
2097                                 goto failed_truncate;
2098                         bh = bh->b_this_page;
2099                         if (alloc_extent_start)
2100                                 alloc_extent_start++;
2101                 } while (bh != head);
2102                 pos += this_write;
2103                 WARN_ON(this_write > write_bytes);
2104                 write_bytes -= this_write;
2105         }
2106         return 0;
2107
2108 failed_release:
2109         btrfs_drop_pages(pages, num_pages);
2110         return err;
2111
2112 failed_truncate:
2113         btrfs_drop_pages(pages, num_pages);
2114         if (pos > isize)
2115                 vmtruncate(inode, isize);
2116         return err;
2117 }
2118
2119 static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
2120                                 size_t count, loff_t *ppos)
2121 {
2122         loff_t pos;
2123         size_t num_written = 0;
2124         int err = 0;
2125         int ret = 0;
2126         struct inode *inode = file->f_path.dentry->d_inode;
2127         struct btrfs_root *root = BTRFS_I(inode)->root;
2128         struct page *pages[8];
2129         struct page *pinned[2];
2130         unsigned long first_index;
2131         unsigned long last_index;
2132         u64 start_pos;
2133         u64 num_blocks;
2134         u64 alloc_extent_start;
2135         u64 hint_block;
2136         struct btrfs_trans_handle *trans;
2137         struct btrfs_key ins;
2138         pinned[0] = NULL;
2139         pinned[1] = NULL;
2140         if (file->f_flags & O_DIRECT)
2141                 return -EINVAL;
2142         pos = *ppos;
2143         vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
2144         current->backing_dev_info = inode->i_mapping->backing_dev_info;
2145         err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
2146         if (err)
2147                 goto out;
2148         if (count == 0)
2149                 goto out;
2150         err = remove_suid(file->f_path.dentry);
2151         if (err)
2152                 goto out;
2153         file_update_time(file);
2154
2155         start_pos = pos & ~((u64)PAGE_CACHE_SIZE - 1);
2156         num_blocks = (count + pos - start_pos + root->blocksize - 1) >>
2157                         inode->i_blkbits;
2158
2159         mutex_lock(&inode->i_mutex);
2160         first_index = pos >> PAGE_CACHE_SHIFT;
2161         last_index = (pos + count) >> PAGE_CACHE_SHIFT;
2162
2163         if ((pos & (PAGE_CACHE_SIZE - 1))) {
2164                 pinned[0] = grab_cache_page(inode->i_mapping, first_index);
2165                 if (!PageUptodate(pinned[0])) {
2166                         ret = mpage_readpage(pinned[0], btrfs_get_block);
2167                         BUG_ON(ret);
2168                         wait_on_page_locked(pinned[0]);
2169                 } else {
2170                         unlock_page(pinned[0]);
2171                 }
2172         }
2173         if ((pos + count) & (PAGE_CACHE_SIZE - 1)) {
2174                 pinned[1] = grab_cache_page(inode->i_mapping, last_index);
2175                 if (!PageUptodate(pinned[1])) {
2176                         ret = mpage_readpage(pinned[1], btrfs_get_block);
2177                         BUG_ON(ret);
2178                         wait_on_page_locked(pinned[1]);
2179                 } else {
2180                         unlock_page(pinned[1]);
2181                 }
2182         }
2183
2184         mutex_lock(&root->fs_info->fs_mutex);
2185         trans = btrfs_start_transaction(root, 1);
2186         if (!trans) {
2187                 err = -ENOMEM;
2188                 mutex_unlock(&root->fs_info->fs_mutex);
2189                 goto out_unlock;
2190         }
2191         btrfs_set_trans_block_group(trans, inode);
2192         /* FIXME blocksize != 4096 */
2193         inode->i_blocks += num_blocks << 3;
2194         hint_block = 0;
2195         if (start_pos < inode->i_size) {
2196                 /* FIXME blocksize != pagesize */
2197                 ret = drop_extents(trans, root, inode,
2198                                    start_pos,
2199                                    (pos + count + root->blocksize -1) &
2200                                    ~((u64)root->blocksize - 1), &hint_block);
2201                 BUG_ON(ret);
2202         }
2203         if (inode->i_size < start_pos) {
2204                 u64 last_pos_in_file;
2205                 u64 hole_size;
2206                 u64 mask = root->blocksize - 1;
2207                 last_pos_in_file = (inode->i_size + mask) & ~mask;
2208                 hole_size = (start_pos - last_pos_in_file + mask) & ~mask;
2209                 hole_size >>= inode->i_blkbits;
2210                 if (last_pos_in_file < start_pos) {
2211                         ret = btrfs_insert_file_extent(trans, root,
2212                                                        inode->i_ino,
2213                                                        last_pos_in_file,
2214                                                        0, 0, hole_size);
2215                 }
2216                 BUG_ON(ret);
2217         }
2218         if (inode->i_size >= PAGE_CACHE_SIZE || pos + count < inode->i_size ||
2219             pos + count - start_pos > BTRFS_MAX_INLINE_DATA_SIZE(root)) {
2220                 ret = btrfs_alloc_extent(trans, root, inode->i_ino,
2221                                          num_blocks, hint_block, (u64)-1,
2222                                          &ins, 1);
2223                 BUG_ON(ret);
2224                 ret = btrfs_insert_file_extent(trans, root, inode->i_ino,
2225                                        start_pos, ins.objectid, ins.offset,
2226                                        ins.offset);
2227                 BUG_ON(ret);
2228         } else {
2229                 ins.offset = 0;
2230                 ins.objectid = 0;
2231         }
2232         BUG_ON(ret);
2233         alloc_extent_start = ins.objectid;
2234         // btrfs_update_inode_block_group(trans, inode);
2235         ret = btrfs_end_transaction(trans, root);
2236         mutex_unlock(&root->fs_info->fs_mutex);
2237
2238         while(count > 0) {
2239                 size_t offset = pos & (PAGE_CACHE_SIZE - 1);
2240                 size_t write_bytes = min(count, PAGE_CACHE_SIZE - offset);
2241                 size_t num_pages = (write_bytes + PAGE_CACHE_SIZE - 1) >>
2242                                         PAGE_CACHE_SHIFT;
2243
2244                 memset(pages, 0, sizeof(pages));
2245                 ret = prepare_pages(root, file, pages, num_pages,
2246                                     pos, first_index, last_index,
2247                                     write_bytes, alloc_extent_start);
2248                 BUG_ON(ret);
2249
2250                 /* FIXME blocks != pagesize */
2251                 if (alloc_extent_start)
2252                         alloc_extent_start += num_pages;
2253                 ret = btrfs_copy_from_user(pos, num_pages,
2254                                            write_bytes, pages, buf);
2255                 BUG_ON(ret);
2256
2257                 ret = dirty_and_release_pages(NULL, root, file, pages,
2258                                               num_pages, pos, write_bytes);
2259                 BUG_ON(ret);
2260                 btrfs_drop_pages(pages, num_pages);
2261
2262                 buf += write_bytes;
2263                 count -= write_bytes;
2264                 pos += write_bytes;
2265                 num_written += write_bytes;
2266
2267                 balance_dirty_pages_ratelimited(inode->i_mapping);
2268                 btrfs_btree_balance_dirty(root);
2269                 cond_resched();
2270         }
2271 out_unlock:
2272         mutex_unlock(&inode->i_mutex);
2273 out:
2274         if (pinned[0])
2275                 page_cache_release(pinned[0]);
2276         if (pinned[1])
2277                 page_cache_release(pinned[1]);
2278         *ppos = pos;
2279         current->backing_dev_info = NULL;
2280         mark_inode_dirty(inode);
2281         return num_written ? num_written : err;
2282 }
2283
2284 static int btrfs_read_actor(read_descriptor_t *desc, struct page *page,
2285                         unsigned long offset, unsigned long size)
2286 {
2287         char *kaddr;
2288         unsigned long left, count = desc->count;
2289         struct inode *inode = page->mapping->host;
2290
2291         if (size > count)
2292                 size = count;
2293
2294         if (!PageChecked(page)) {
2295                 /* FIXME, do it per block */
2296                 struct btrfs_root *root = BTRFS_I(inode)->root;
2297                 int ret;
2298                 struct buffer_head *bh;
2299
2300                 if (page_has_buffers(page)) {
2301                         bh = page_buffers(page);
2302                         if (!buffer_mapped(bh)) {
2303                                 SetPageChecked(page);
2304                                 goto checked;
2305                         }
2306                 }
2307
2308                 ret = btrfs_csum_verify_file_block(root,
2309                                   page->mapping->host->i_ino,
2310                                   page->index << PAGE_CACHE_SHIFT,
2311                                   kmap(page), PAGE_CACHE_SIZE);
2312                 if (ret) {
2313                         if (ret != -ENOENT) {
2314                                 printk("failed to verify ino %lu page %lu ret %d\n",
2315                                        page->mapping->host->i_ino,
2316                                        page->index, ret);
2317                                 memset(page_address(page), 1, PAGE_CACHE_SIZE);
2318                                 flush_dcache_page(page);
2319                         }
2320                 }
2321                 SetPageChecked(page);
2322                 kunmap(page);
2323         }
2324 checked:
2325         /*
2326          * Faults on the destination of a read are common, so do it before
2327          * taking the kmap.
2328          */
2329         if (!fault_in_pages_writeable(desc->arg.buf, size)) {
2330                 kaddr = kmap_atomic(page, KM_USER0);
2331                 left = __copy_to_user_inatomic(desc->arg.buf,
2332                                                 kaddr + offset, size);
2333                 kunmap_atomic(kaddr, KM_USER0);
2334                 if (left == 0)
2335                         goto success;
2336         }
2337
2338         /* Do it the slow way */
2339         kaddr = kmap(page);
2340         left = __copy_to_user(desc->arg.buf, kaddr + offset, size);
2341         kunmap(page);
2342
2343         if (left) {
2344                 size -= left;
2345                 desc->error = -EFAULT;
2346         }
2347 success:
2348         desc->count = count - size;
2349         desc->written += size;
2350         desc->arg.buf += size;
2351         return size;
2352 }
2353
2354 /**
2355  * btrfs_file_aio_read - filesystem read routine
2356  * @iocb:       kernel I/O control block
2357  * @iov:        io vector request
2358  * @nr_segs:    number of segments in the iovec
2359  * @pos:        current file position
2360  */
2361 static ssize_t btrfs_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
2362                                    unsigned long nr_segs, loff_t pos)
2363 {
2364         struct file *filp = iocb->ki_filp;
2365         ssize_t retval;
2366         unsigned long seg;
2367         size_t count;
2368         loff_t *ppos = &iocb->ki_pos;
2369
2370         count = 0;
2371         for (seg = 0; seg < nr_segs; seg++) {
2372                 const struct iovec *iv = &iov[seg];
2373
2374                 /*
2375                  * If any segment has a negative length, or the cumulative
2376                  * length ever wraps negative then return -EINVAL.
2377                  */
2378                 count += iv->iov_len;
2379                 if (unlikely((ssize_t)(count|iv->iov_len) < 0))
2380                         return -EINVAL;
2381                 if (access_ok(VERIFY_WRITE, iv->iov_base, iv->iov_len))
2382                         continue;
2383                 if (seg == 0)
2384                         return -EFAULT;
2385                 nr_segs = seg;
2386                 count -= iv->iov_len;   /* This segment is no good */
2387                 break;
2388         }
2389         retval = 0;
2390         if (count) {
2391                 for (seg = 0; seg < nr_segs; seg++) {
2392                         read_descriptor_t desc;
2393
2394                         desc.written = 0;
2395                         desc.arg.buf = iov[seg].iov_base;
2396                         desc.count = iov[seg].iov_len;
2397                         if (desc.count == 0)
2398                                 continue;
2399                         desc.error = 0;
2400                         do_generic_file_read(filp, ppos, &desc,
2401                                              btrfs_read_actor);
2402                         retval += desc.written;
2403                         if (desc.error) {
2404                                 retval = retval ?: desc.error;
2405                                 break;
2406                         }
2407                 }
2408         }
2409         return retval;
2410 }
2411
2412 static int create_subvol(struct btrfs_root *root, char *name, int namelen)
2413 {
2414         struct btrfs_trans_handle *trans;
2415         struct btrfs_key key;
2416         struct btrfs_root_item root_item;
2417         struct btrfs_inode_item *inode_item;
2418         struct buffer_head *subvol;
2419         struct btrfs_leaf *leaf;
2420         struct btrfs_root *new_root;
2421         struct inode *inode;
2422         struct inode *dir;
2423         int ret;
2424         u64 objectid;
2425         u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID;
2426
2427         mutex_lock(&root->fs_info->fs_mutex);
2428         trans = btrfs_start_transaction(root, 1);
2429         BUG_ON(!trans);
2430
2431         subvol = btrfs_alloc_free_block(trans, root, 0);
2432         if (subvol == NULL)
2433                 return -ENOSPC;
2434         leaf = btrfs_buffer_leaf(subvol);
2435         btrfs_set_header_nritems(&leaf->header, 0);
2436         btrfs_set_header_level(&leaf->header, 0);
2437         btrfs_set_header_blocknr(&leaf->header, bh_blocknr(subvol));
2438         btrfs_set_header_generation(&leaf->header, trans->transid);
2439         btrfs_set_header_owner(&leaf->header, root->root_key.objectid);
2440         memcpy(leaf->header.fsid, root->fs_info->disk_super->fsid,
2441                sizeof(leaf->header.fsid));
2442         mark_buffer_dirty(subvol);
2443
2444         inode_item = &root_item.inode;
2445         memset(inode_item, 0, sizeof(*inode_item));
2446         btrfs_set_inode_generation(inode_item, 1);
2447         btrfs_set_inode_size(inode_item, 3);
2448         btrfs_set_inode_nlink(inode_item, 1);
2449         btrfs_set_inode_nblocks(inode_item, 1);
2450         btrfs_set_inode_mode(inode_item, S_IFDIR | 0755);
2451
2452         btrfs_set_root_blocknr(&root_item, bh_blocknr(subvol));
2453         btrfs_set_root_refs(&root_item, 1);
2454         brelse(subvol);
2455         subvol = NULL;
2456
2457         ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root,
2458                                        0, &objectid);
2459         BUG_ON(ret);
2460
2461         btrfs_set_root_dirid(&root_item, new_dirid);
2462
2463         key.objectid = objectid;
2464         key.offset = 1;
2465         key.flags = 0;
2466         btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
2467         ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key,
2468                                 &root_item);
2469         BUG_ON(ret);
2470
2471         /*
2472          * insert the directory item
2473          */
2474         key.offset = (u64)-1;
2475         dir = root->fs_info->sb->s_root->d_inode;
2476         ret = btrfs_insert_dir_item(trans, root->fs_info->tree_root,
2477                                     name, namelen, dir->i_ino, &key,
2478                                     BTRFS_FT_DIR);
2479         BUG_ON(ret);
2480
2481         ret = btrfs_commit_transaction(trans, root);
2482         BUG_ON(ret);
2483
2484         new_root = btrfs_read_fs_root(root->fs_info, &key);
2485         BUG_ON(!new_root);
2486
2487         trans = btrfs_start_transaction(new_root, 1);
2488         BUG_ON(!trans);
2489
2490         inode = btrfs_new_inode(trans, new_root, new_dirid,
2491                                 BTRFS_I(dir)->block_group, S_IFDIR | 0700);
2492         inode->i_op = &btrfs_dir_inode_operations;
2493         inode->i_fop = &btrfs_dir_file_operations;
2494
2495         ret = btrfs_make_empty_dir(trans, new_root, new_dirid, new_dirid);
2496         BUG_ON(ret);
2497
2498         inode->i_nlink = 1;
2499         inode->i_size = 6;
2500         ret = btrfs_update_inode(trans, new_root, inode);
2501         BUG_ON(ret);
2502
2503         ret = btrfs_commit_transaction(trans, new_root);
2504         BUG_ON(ret);
2505
2506         iput(inode);
2507
2508         mutex_unlock(&root->fs_info->fs_mutex);
2509         btrfs_btree_balance_dirty(root);
2510         return 0;
2511 }
2512
2513 static int create_snapshot(struct btrfs_root *root, char *name, int namelen)
2514 {
2515         struct btrfs_trans_handle *trans;
2516         struct btrfs_key key;
2517         struct btrfs_root_item new_root_item;
2518         int ret;
2519         u64 objectid;
2520
2521         if (!root->ref_cows)
2522                 return -EINVAL;
2523
2524         mutex_lock(&root->fs_info->fs_mutex);
2525         trans = btrfs_start_transaction(root, 1);
2526         BUG_ON(!trans);
2527
2528         ret = btrfs_update_inode(trans, root, root->inode);
2529         BUG_ON(ret);
2530
2531         ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root,
2532                                        0, &objectid);
2533         BUG_ON(ret);
2534
2535         memcpy(&new_root_item, &root->root_item,
2536                sizeof(new_root_item));
2537
2538         key.objectid = objectid;
2539         key.offset = 1;
2540         key.flags = 0;
2541         btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
2542         btrfs_set_root_blocknr(&new_root_item, bh_blocknr(root->node));
2543
2544         ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key,
2545                                 &new_root_item);
2546         BUG_ON(ret);
2547
2548         /*
2549          * insert the directory item
2550          */
2551         key.offset = (u64)-1;
2552         ret = btrfs_insert_dir_item(trans, root->fs_info->tree_root,
2553                                     name, namelen,
2554                                     root->fs_info->sb->s_root->d_inode->i_ino,
2555                                     &key, BTRFS_FT_DIR);
2556
2557         BUG_ON(ret);
2558
2559         ret = btrfs_inc_root_ref(trans, root);
2560         BUG_ON(ret);
2561
2562         ret = btrfs_commit_transaction(trans, root);
2563         BUG_ON(ret);
2564         mutex_unlock(&root->fs_info->fs_mutex);
2565         btrfs_btree_balance_dirty(root);
2566         return 0;
2567 }
2568
2569 static int add_disk(struct btrfs_root *root, char *name, int namelen)
2570 {
2571         struct block_device *bdev;
2572         struct btrfs_path *path;
2573         struct super_block *sb = root->fs_info->sb;
2574         struct btrfs_root *dev_root = root->fs_info->dev_root;
2575         struct btrfs_trans_handle *trans;
2576         struct btrfs_device_item *dev_item;
2577         struct btrfs_key key;
2578         u16 item_size;
2579         u64 num_blocks;
2580         u64 new_blocks;
2581         u64 device_id;
2582         int ret;
2583
2584 printk("adding disk %s\n", name);
2585         path = btrfs_alloc_path();
2586         if (!path)
2587                 return -ENOMEM;
2588         num_blocks = btrfs_super_total_blocks(root->fs_info->disk_super);
2589         bdev = open_bdev_excl(name, O_RDWR, sb);
2590         if (IS_ERR(bdev)) {
2591                 ret = PTR_ERR(bdev);
2592 printk("open bdev excl failed ret %d\n", ret);
2593                 goto out_nolock;
2594         }
2595         set_blocksize(bdev, sb->s_blocksize);
2596         new_blocks = bdev->bd_inode->i_size >> sb->s_blocksize_bits;
2597         key.objectid = num_blocks;
2598         key.offset = new_blocks;
2599         key.flags = 0;
2600         btrfs_set_key_type(&key, BTRFS_DEV_ITEM_KEY);
2601
2602         mutex_lock(&dev_root->fs_info->fs_mutex);
2603         trans = btrfs_start_transaction(dev_root, 1);
2604         item_size = sizeof(*dev_item) + namelen;
2605 printk("insert empty on %Lu %Lu %u size %d\n", num_blocks, new_blocks, key.flags, item_size);
2606         ret = btrfs_insert_empty_item(trans, dev_root, path, &key, item_size);
2607         if (ret) {
2608 printk("insert failed %d\n", ret);
2609                 close_bdev_excl(bdev);
2610                 if (ret > 0)
2611                         ret = -EEXIST;
2612                 goto out;
2613         }
2614         dev_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
2615                                   path->slots[0], struct btrfs_device_item);
2616         btrfs_set_device_pathlen(dev_item, namelen);
2617         memcpy(dev_item + 1, name, namelen);
2618
2619         device_id = btrfs_super_last_device_id(root->fs_info->disk_super) + 1;
2620         btrfs_set_super_last_device_id(root->fs_info->disk_super, device_id);
2621         btrfs_set_device_id(dev_item, device_id);
2622         mark_buffer_dirty(path->nodes[0]);
2623
2624         ret = btrfs_insert_dev_radix(root, bdev, device_id, num_blocks,
2625                                      new_blocks);
2626
2627         if (!ret) {
2628                 btrfs_set_super_total_blocks(root->fs_info->disk_super,
2629                                              num_blocks + new_blocks);
2630                 i_size_write(root->fs_info->btree_inode,
2631                              (num_blocks + new_blocks) <<
2632                              root->fs_info->btree_inode->i_blkbits);
2633         }
2634
2635 out:
2636         ret = btrfs_commit_transaction(trans, dev_root);
2637         BUG_ON(ret);
2638         mutex_unlock(&root->fs_info->fs_mutex);
2639 out_nolock:
2640         btrfs_free_path(path);
2641         btrfs_btree_balance_dirty(root);
2642
2643         return ret;
2644 }
2645
2646 static int btrfs_ioctl(struct inode *inode, struct file *filp, unsigned int
2647                        cmd, unsigned long arg)
2648 {
2649         struct btrfs_root *root = BTRFS_I(inode)->root;
2650         struct btrfs_ioctl_vol_args vol_args;
2651         int ret = 0;
2652         struct btrfs_dir_item *di;
2653         int namelen;
2654         struct btrfs_path *path;
2655         u64 root_dirid;
2656
2657         switch (cmd) {
2658         case BTRFS_IOC_SNAP_CREATE:
2659                 if (copy_from_user(&vol_args,
2660                                    (struct btrfs_ioctl_vol_args __user *)arg,
2661                                    sizeof(vol_args)))
2662                         return -EFAULT;
2663                 namelen = strlen(vol_args.name);
2664                 if (namelen > BTRFS_VOL_NAME_MAX)
2665                         return -EINVAL;
2666                 path = btrfs_alloc_path();
2667                 if (!path)
2668                         return -ENOMEM;
2669                 root_dirid = root->fs_info->sb->s_root->d_inode->i_ino,
2670                 mutex_lock(&root->fs_info->fs_mutex);
2671                 di = btrfs_lookup_dir_item(NULL, root->fs_info->tree_root,
2672                                     path, root_dirid,
2673                                     vol_args.name, namelen, 0);
2674                 mutex_unlock(&root->fs_info->fs_mutex);
2675                 btrfs_free_path(path);
2676                 if (di && !IS_ERR(di))
2677                         return -EEXIST;
2678
2679                 if (root == root->fs_info->tree_root)
2680                         ret = create_subvol(root, vol_args.name, namelen);
2681                 else
2682                         ret = create_snapshot(root, vol_args.name, namelen);
2683                 WARN_ON(ret);
2684                 break;
2685         case BTRFS_IOC_ADD_DISK:
2686                 if (copy_from_user(&vol_args,
2687                                    (struct btrfs_ioctl_vol_args __user *)arg,
2688                                    sizeof(vol_args)))
2689                         return -EFAULT;
2690                 namelen = strlen(vol_args.name);
2691                 if (namelen > BTRFS_VOL_NAME_MAX)
2692                         return -EINVAL;
2693                 vol_args.name[namelen] = '\0';
2694                 ret = add_disk(root, vol_args.name, namelen);
2695                 break;
2696         default:
2697                 return -ENOTTY;
2698         }
2699         return ret;
2700 }
2701
2702 #ifdef CONFIG_COMPAT
2703 static long btrfs_compat_ioctl(struct file *file, unsigned int cmd,
2704                                unsigned long arg)
2705 {
2706         struct inode *inode = file->f_path.dentry->d_inode;
2707         int ret;
2708         lock_kernel();
2709         ret = btrfs_ioctl(inode, file, cmd, (unsigned long) compat_ptr(arg));
2710         unlock_kernel();
2711         return ret;
2712
2713 }
2714 #endif
2715
2716 static struct kmem_cache *btrfs_inode_cachep;
2717 struct kmem_cache *btrfs_trans_handle_cachep;
2718 struct kmem_cache *btrfs_transaction_cachep;
2719 struct kmem_cache *btrfs_bit_radix_cachep;
2720 struct kmem_cache *btrfs_path_cachep;
2721
2722 /*
2723  * Called inside transaction, so use GFP_NOFS
2724  */
2725 static struct inode *btrfs_alloc_inode(struct super_block *sb)
2726 {
2727         struct btrfs_inode *ei;
2728
2729         ei = kmem_cache_alloc(btrfs_inode_cachep, GFP_NOFS);
2730         if (!ei)
2731                 return NULL;
2732         return &ei->vfs_inode;
2733 }
2734
2735 static void btrfs_destroy_inode(struct inode *inode)
2736 {
2737         WARN_ON(!list_empty(&inode->i_dentry));
2738         WARN_ON(inode->i_data.nrpages);
2739
2740         kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
2741 }
2742
2743 static void init_once(void * foo, struct kmem_cache * cachep,
2744                       unsigned long flags)
2745 {
2746         struct btrfs_inode *ei = (struct btrfs_inode *) foo;
2747
2748         if ((flags & (SLAB_CTOR_CONSTRUCTOR)) ==
2749             SLAB_CTOR_CONSTRUCTOR) {
2750                 inode_init_once(&ei->vfs_inode);
2751         }
2752 }
2753
2754 static int init_inodecache(void)
2755 {
2756         btrfs_inode_cachep = kmem_cache_create("btrfs_inode_cache",
2757                                              sizeof(struct btrfs_inode),
2758                                              0, (SLAB_RECLAIM_ACCOUNT|
2759                                                 SLAB_MEM_SPREAD),
2760                                              init_once, NULL);
2761         btrfs_trans_handle_cachep = kmem_cache_create("btrfs_trans_handle_cache",
2762                                              sizeof(struct btrfs_trans_handle),
2763                                              0, (SLAB_RECLAIM_ACCOUNT|
2764                                                 SLAB_MEM_SPREAD),
2765                                              NULL, NULL);
2766         btrfs_transaction_cachep = kmem_cache_create("btrfs_transaction_cache",
2767                                              sizeof(struct btrfs_transaction),
2768                                              0, (SLAB_RECLAIM_ACCOUNT|
2769                                                 SLAB_MEM_SPREAD),
2770                                              NULL, NULL);
2771         btrfs_path_cachep = kmem_cache_create("btrfs_path_cache",
2772                                              sizeof(struct btrfs_transaction),
2773                                              0, (SLAB_RECLAIM_ACCOUNT|
2774                                                 SLAB_MEM_SPREAD),
2775                                              NULL, NULL);
2776         btrfs_bit_radix_cachep = kmem_cache_create("btrfs_radix",
2777                                              256,
2778                                              0, (SLAB_RECLAIM_ACCOUNT|
2779                                                 SLAB_MEM_SPREAD |
2780                                                 SLAB_DESTROY_BY_RCU),
2781                                              NULL, NULL);
2782         if (btrfs_inode_cachep == NULL || btrfs_trans_handle_cachep == NULL ||
2783             btrfs_transaction_cachep == NULL || btrfs_bit_radix_cachep == NULL)
2784                 return -ENOMEM;
2785         return 0;
2786 }
2787
2788 static void destroy_inodecache(void)
2789 {
2790         kmem_cache_destroy(btrfs_inode_cachep);
2791         kmem_cache_destroy(btrfs_trans_handle_cachep);
2792         kmem_cache_destroy(btrfs_transaction_cachep);
2793         kmem_cache_destroy(btrfs_bit_radix_cachep);
2794         kmem_cache_destroy(btrfs_path_cachep);
2795 }
2796
2797 static int btrfs_get_sb(struct file_system_type *fs_type,
2798         int flags, const char *dev_name, void *data, struct vfsmount *mnt)
2799 {
2800         return get_sb_bdev(fs_type, flags, dev_name, data,
2801                            btrfs_fill_super, mnt);
2802 }
2803
2804 static int btrfs_getattr(struct vfsmount *mnt,
2805                          struct dentry *dentry, struct kstat *stat)
2806 {
2807         struct inode *inode = dentry->d_inode;
2808         generic_fillattr(inode, stat);
2809         stat->blksize = 256 * 1024;
2810         return 0;
2811 }
2812
2813 static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
2814 {
2815         struct btrfs_root *root = btrfs_sb(dentry->d_sb);
2816         struct btrfs_super_block *disk_super = root->fs_info->disk_super;
2817
2818         buf->f_namelen = BTRFS_NAME_LEN;
2819         buf->f_blocks = btrfs_super_total_blocks(disk_super);
2820         buf->f_bfree = buf->f_blocks - btrfs_super_blocks_used(disk_super);
2821         buf->f_bavail = buf->f_bfree;
2822         buf->f_bsize = dentry->d_sb->s_blocksize;
2823         buf->f_type = BTRFS_SUPER_MAGIC;
2824         return 0;
2825 }
2826
2827 static int btrfs_rename(struct inode * old_dir, struct dentry *old_dentry,
2828                            struct inode * new_dir,struct dentry *new_dentry)
2829 {
2830         struct btrfs_trans_handle *trans;
2831         struct btrfs_root *root = BTRFS_I(old_dir)->root;
2832         struct inode *new_inode = new_dentry->d_inode;
2833         struct inode *old_inode = old_dentry->d_inode;
2834         struct timespec ctime = CURRENT_TIME;
2835         struct btrfs_path *path;
2836         struct btrfs_dir_item *di;
2837         int ret;
2838
2839         if (S_ISDIR(old_inode->i_mode) && new_inode &&
2840             new_inode->i_size > BTRFS_EMPTY_DIR_SIZE) {
2841                 return -ENOTEMPTY;
2842         }
2843         mutex_lock(&root->fs_info->fs_mutex);
2844         trans = btrfs_start_transaction(root, 1);
2845         btrfs_set_trans_block_group(trans, new_dir);
2846         path = btrfs_alloc_path();
2847         if (!path) {
2848                 ret = -ENOMEM;
2849                 goto out_fail;
2850         }
2851
2852         old_dentry->d_inode->i_nlink++;
2853         old_dir->i_ctime = old_dir->i_mtime = ctime;
2854         new_dir->i_ctime = new_dir->i_mtime = ctime;
2855         old_inode->i_ctime = ctime;
2856         if (S_ISDIR(old_inode->i_mode) && old_dir != new_dir) {
2857                 struct btrfs_key *location = &BTRFS_I(new_dir)->location;
2858                 u64 old_parent_oid;
2859                 di = btrfs_lookup_dir_item(trans, root, path, old_inode->i_ino,
2860                                            "..", 2, -1);
2861                 if (IS_ERR(di)) {
2862                         ret = PTR_ERR(di);
2863                         goto out_fail;
2864                 }
2865                 if (!di) {
2866                         ret = -ENOENT;
2867                         goto out_fail;
2868                 }
2869                 old_parent_oid = btrfs_disk_key_objectid(&di->location);
2870                 ret = btrfs_del_item(trans, root, path);
2871                 if (ret) {
2872                         ret = -EIO;
2873                         goto out_fail;
2874                 }
2875                 btrfs_release_path(root, path);
2876
2877                 di = btrfs_lookup_dir_index_item(trans, root, path,
2878                                                  old_inode->i_ino,
2879                                                  old_parent_oid,
2880                                                  "..", 2, -1);
2881                 if (IS_ERR(di)) {
2882                         ret = PTR_ERR(di);
2883                         goto out_fail;
2884                 }
2885                 if (!di) {
2886                         ret = -ENOENT;
2887                         goto out_fail;
2888                 }
2889                 ret = btrfs_del_item(trans, root, path);
2890                 if (ret) {
2891                         ret = -EIO;
2892                         goto out_fail;
2893                 }
2894                 btrfs_release_path(root, path);
2895
2896                 ret = btrfs_insert_dir_item(trans, root, "..", 2,
2897                                             old_inode->i_ino, location,
2898                                             BTRFS_FT_DIR);
2899                 if (ret)
2900                         goto out_fail;
2901         }
2902
2903
2904         ret = btrfs_unlink_trans(trans, root, old_dir, old_dentry);
2905         if (ret)
2906                 goto out_fail;
2907
2908         if (new_inode) {
2909                 new_inode->i_ctime = CURRENT_TIME;
2910                 ret = btrfs_unlink_trans(trans, root, new_dir, new_dentry);
2911                 if (ret)
2912                         goto out_fail;
2913                 if (S_ISDIR(new_inode->i_mode))
2914                         clear_nlink(new_inode);
2915                 else
2916                         drop_nlink(new_inode);
2917                 btrfs_update_inode(trans, root, new_inode);
2918         }
2919         ret = btrfs_add_link(trans, new_dentry, old_inode);
2920         if (ret)
2921                 goto out_fail;
2922
2923 out_fail:
2924         btrfs_free_path(path);
2925         btrfs_end_transaction(trans, root);
2926         mutex_unlock(&root->fs_info->fs_mutex);
2927         return ret;
2928 }
2929
2930 static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
2931                          const char *symname)
2932 {
2933         struct btrfs_trans_handle *trans;
2934         struct btrfs_root *root = BTRFS_I(dir)->root;
2935         struct btrfs_path *path;
2936         struct btrfs_key key;
2937         struct inode *inode;
2938         int err;
2939         int drop_inode = 0;
2940         u64 objectid;
2941         int name_len;
2942         int datasize;
2943         char *ptr;
2944         struct btrfs_file_extent_item *ei;
2945
2946         name_len = strlen(symname) + 1;
2947         if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root))
2948                 return -ENAMETOOLONG;
2949         mutex_lock(&root->fs_info->fs_mutex);
2950         trans = btrfs_start_transaction(root, 1);
2951         btrfs_set_trans_block_group(trans, dir);
2952
2953         err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
2954         if (err) {
2955                 err = -ENOSPC;
2956                 goto out_unlock;
2957         }
2958
2959         inode = btrfs_new_inode(trans, root, objectid,
2960                                 BTRFS_I(dir)->block_group, S_IFLNK|S_IRWXUGO);
2961         err = PTR_ERR(inode);
2962         if (IS_ERR(inode))
2963                 goto out_unlock;
2964
2965         btrfs_set_trans_block_group(trans, inode);
2966         err = btrfs_add_nondir(trans, dentry, inode);
2967         if (err)
2968                 drop_inode = 1;
2969         else {
2970                 inode->i_mapping->a_ops = &btrfs_aops;
2971                 inode->i_fop = &btrfs_file_operations;
2972                 inode->i_op = &btrfs_file_inode_operations;
2973         }
2974         dir->i_sb->s_dirt = 1;
2975         btrfs_update_inode_block_group(trans, inode);
2976         btrfs_update_inode_block_group(trans, dir);
2977         if (drop_inode)
2978                 goto out_unlock;
2979
2980         path = btrfs_alloc_path();
2981         BUG_ON(!path);
2982         key.objectid = inode->i_ino;
2983         key.offset = 0;
2984         key.flags = 0;
2985         btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY);
2986         datasize = btrfs_file_extent_calc_inline_size(name_len);
2987         err = btrfs_insert_empty_item(trans, root, path, &key,
2988                                       datasize);
2989         BUG_ON(err);
2990         ei = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
2991                path->slots[0], struct btrfs_file_extent_item);
2992         btrfs_set_file_extent_generation(ei, trans->transid);
2993         btrfs_set_file_extent_type(ei,
2994                                    BTRFS_FILE_EXTENT_INLINE);
2995         ptr = btrfs_file_extent_inline_start(ei);
2996         btrfs_memcpy(root, path->nodes[0]->b_data,
2997                      ptr, symname, name_len);
2998         mark_buffer_dirty(path->nodes[0]);
2999         btrfs_free_path(path);
3000         inode->i_op = &btrfs_symlink_inode_operations;
3001         inode->i_mapping->a_ops = &btrfs_symlink_aops;
3002         inode->i_size = name_len - 1;
3003         btrfs_update_inode(trans, root, inode);
3004         err = 0;
3005
3006 out_unlock:
3007         btrfs_end_transaction(trans, root);
3008         mutex_unlock(&root->fs_info->fs_mutex);
3009
3010         if (drop_inode) {
3011                 inode_dec_link_count(inode);
3012                 iput(inode);
3013         }
3014         btrfs_btree_balance_dirty(root);
3015         return err;
3016 }
3017
3018 static struct file_system_type btrfs_fs_type = {
3019         .owner          = THIS_MODULE,
3020         .name           = "btrfs",
3021         .get_sb         = btrfs_get_sb,
3022         .kill_sb        = kill_block_super,
3023         .fs_flags       = FS_REQUIRES_DEV,
3024 };
3025
3026 static struct super_operations btrfs_super_ops = {
3027         .delete_inode   = btrfs_delete_inode,
3028         .put_super      = btrfs_put_super,
3029         .read_inode     = btrfs_read_locked_inode,
3030         .write_super    = btrfs_write_super,
3031         .sync_fs        = btrfs_sync_fs,
3032         .write_inode    = btrfs_write_inode,
3033         .dirty_inode    = btrfs_dirty_inode,
3034         .alloc_inode    = btrfs_alloc_inode,
3035         .destroy_inode  = btrfs_destroy_inode,
3036         .statfs         = btrfs_statfs,
3037 };
3038
3039 static struct inode_operations btrfs_dir_inode_operations = {
3040         .lookup         = btrfs_lookup,
3041         .create         = btrfs_create,
3042         .unlink         = btrfs_unlink,
3043         .link           = btrfs_link,
3044         .mkdir          = btrfs_mkdir,
3045         .rmdir          = btrfs_rmdir,
3046         .rename         = btrfs_rename,
3047         .symlink        = btrfs_symlink,
3048         .setattr        = btrfs_setattr,
3049 };
3050
3051 static struct inode_operations btrfs_dir_ro_inode_operations = {
3052         .lookup         = btrfs_lookup,
3053 };
3054
3055 static struct file_operations btrfs_dir_file_operations = {
3056         .llseek         = generic_file_llseek,
3057         .read           = generic_read_dir,
3058         .readdir        = btrfs_readdir,
3059         .ioctl          = btrfs_ioctl,
3060 #ifdef CONFIG_COMPAT
3061         .compat_ioctl   = btrfs_compat_ioctl,
3062 #endif
3063 };
3064
3065 static struct address_space_operations btrfs_aops = {
3066         .readpage       = btrfs_readpage,
3067         .writepage      = btrfs_writepage,
3068         .sync_page      = block_sync_page,
3069         .prepare_write  = btrfs_prepare_write,
3070         .commit_write   = btrfs_commit_write,
3071         .bmap           = btrfs_bmap,
3072 };
3073
3074 static struct address_space_operations btrfs_symlink_aops = {
3075         .readpage       = btrfs_readpage,
3076         .writepage      = btrfs_writepage,
3077 };
3078
3079 static struct inode_operations btrfs_file_inode_operations = {
3080         .truncate       = btrfs_truncate,
3081         .getattr        = btrfs_getattr,
3082         .setattr        = btrfs_setattr,
3083 };
3084
3085 static struct file_operations btrfs_file_operations = {
3086         .llseek         = generic_file_llseek,
3087         .read           = do_sync_read,
3088         .aio_read       = btrfs_file_aio_read,
3089         .write          = btrfs_file_write,
3090         .mmap           = generic_file_mmap,
3091         .open           = generic_file_open,
3092         .ioctl          = btrfs_ioctl,
3093         .fsync          = btrfs_sync_file,
3094 #ifdef CONFIG_COMPAT
3095         .compat_ioctl   = btrfs_compat_ioctl,
3096 #endif
3097 };
3098
3099 static struct inode_operations btrfs_symlink_inode_operations = {
3100         .readlink       = generic_readlink,
3101         .follow_link    = page_follow_link_light,
3102         .put_link       = page_put_link,
3103 };
3104
3105 static int __init init_btrfs_fs(void)
3106 {
3107         int err;
3108         printk("btrfs loaded!\n");
3109         btrfs_init_transaction_sys();
3110         err = init_inodecache();
3111         if (err)
3112                 return err;
3113         return register_filesystem(&btrfs_fs_type);
3114         destroy_inodecache();
3115         return err;
3116 }
3117
3118 static void __exit exit_btrfs_fs(void)
3119 {
3120         btrfs_exit_transaction_sys();
3121         destroy_inodecache();
3122         unregister_filesystem(&btrfs_fs_type);
3123         printk("btrfs unloaded\n");
3124 }
3125
3126 module_init(init_btrfs_fs)
3127 module_exit(exit_btrfs_fs)
3128
3129 MODULE_LICENSE("GPL");