a29a781b86c25d42ff2c35fff4404710450c11e2
[safe/jmp/linux-2.6] / fs / btrfs / super.c
1 #include <linux/module.h>
2 #include <linux/buffer_head.h>
3 #include <linux/fs.h>
4 #include <linux/pagemap.h>
5 #include <linux/highmem.h>
6 #include <linux/time.h>
7 #include <linux/init.h>
8 #include <linux/string.h>
9 #include <linux/smp_lock.h>
10 #include <linux/backing-dev.h>
11 #include <linux/mpage.h>
12 #include <linux/swap.h>
13 #include <linux/writeback.h>
14 #include <linux/statfs.h>
15 #include "ctree.h"
16 #include "disk-io.h"
17 #include "transaction.h"
18 #include "btrfs_inode.h"
19 #include "ioctl.h"
20
21 void btrfs_fsinfo_release(struct kobject *obj)
22 {
23         struct btrfs_fs_info *fsinfo = container_of(obj,
24                                             struct btrfs_fs_info, kobj);
25         kfree(fsinfo);
26 }
27
28 struct kobj_type btrfs_fsinfo_ktype = {
29         .release = btrfs_fsinfo_release,
30 };
31
32 struct btrfs_iget_args {
33         u64 ino;
34         struct btrfs_root *root;
35 };
36
37 decl_subsys(btrfs, &btrfs_fsinfo_ktype, NULL);
38
39 #define BTRFS_SUPER_MAGIC 0x9123682E
40
41 static struct inode_operations btrfs_dir_inode_operations;
42 static struct inode_operations btrfs_dir_ro_inode_operations;
43 static struct super_operations btrfs_super_ops;
44 static struct file_operations btrfs_dir_file_operations;
45 static struct inode_operations btrfs_file_inode_operations;
46 static struct address_space_operations btrfs_aops;
47 static struct file_operations btrfs_file_operations;
48
49 static void btrfs_read_locked_inode(struct inode *inode)
50 {
51         struct btrfs_path *path;
52         struct btrfs_inode_item *inode_item;
53         struct btrfs_root *root = BTRFS_I(inode)->root;
54         struct btrfs_key location;
55         struct btrfs_block_group_cache *alloc_group;
56         u64 alloc_group_block;
57         int ret;
58
59         path = btrfs_alloc_path();
60         BUG_ON(!path);
61         btrfs_init_path(path);
62         mutex_lock(&root->fs_info->fs_mutex);
63
64         memcpy(&location, &BTRFS_I(inode)->location, sizeof(location));
65         ret = btrfs_lookup_inode(NULL, root, path, &location, 0);
66         if (ret) {
67                 btrfs_free_path(path);
68                 goto make_bad;
69         }
70         inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
71                                   path->slots[0],
72                                   struct btrfs_inode_item);
73
74         inode->i_mode = btrfs_inode_mode(inode_item);
75         inode->i_nlink = btrfs_inode_nlink(inode_item);
76         inode->i_uid = btrfs_inode_uid(inode_item);
77         inode->i_gid = btrfs_inode_gid(inode_item);
78         inode->i_size = btrfs_inode_size(inode_item);
79         inode->i_atime.tv_sec = btrfs_timespec_sec(&inode_item->atime);
80         inode->i_atime.tv_nsec = btrfs_timespec_nsec(&inode_item->atime);
81         inode->i_mtime.tv_sec = btrfs_timespec_sec(&inode_item->mtime);
82         inode->i_mtime.tv_nsec = btrfs_timespec_nsec(&inode_item->mtime);
83         inode->i_ctime.tv_sec = btrfs_timespec_sec(&inode_item->ctime);
84         inode->i_ctime.tv_nsec = btrfs_timespec_nsec(&inode_item->ctime);
85         inode->i_blocks = btrfs_inode_nblocks(inode_item);
86         inode->i_generation = btrfs_inode_generation(inode_item);
87         alloc_group_block = btrfs_inode_block_group(inode_item);
88         ret = radix_tree_gang_lookup(&root->fs_info->block_group_radix,
89                                      (void **)&alloc_group,
90                                      alloc_group_block, 1);
91         BUG_ON(!ret);
92         BTRFS_I(inode)->block_group = alloc_group;
93
94         btrfs_free_path(path);
95         inode_item = NULL;
96
97         mutex_unlock(&root->fs_info->fs_mutex);
98
99         switch (inode->i_mode & S_IFMT) {
100 #if 0
101         default:
102                 init_special_inode(inode, inode->i_mode,
103                                    btrfs_inode_rdev(inode_item));
104                 break;
105 #endif
106         case S_IFREG:
107                 inode->i_mapping->a_ops = &btrfs_aops;
108                 inode->i_fop = &btrfs_file_operations;
109                 inode->i_op = &btrfs_file_inode_operations;
110                 break;
111         case S_IFDIR:
112                 inode->i_fop = &btrfs_dir_file_operations;
113                 if (root == root->fs_info->tree_root)
114                         inode->i_op = &btrfs_dir_ro_inode_operations;
115                 else
116                         inode->i_op = &btrfs_dir_inode_operations;
117                 break;
118         case S_IFLNK:
119                 // inode->i_op = &page_symlink_inode_operations;
120                 break;
121         }
122         return;
123
124 make_bad:
125         btrfs_release_path(root, path);
126         btrfs_free_path(path);
127         mutex_unlock(&root->fs_info->fs_mutex);
128         make_bad_inode(inode);
129 }
130
131 static void fill_inode_item(struct btrfs_inode_item *item,
132                             struct inode *inode)
133 {
134         btrfs_set_inode_uid(item, inode->i_uid);
135         btrfs_set_inode_gid(item, inode->i_gid);
136         btrfs_set_inode_size(item, inode->i_size);
137         btrfs_set_inode_mode(item, inode->i_mode);
138         btrfs_set_inode_nlink(item, inode->i_nlink);
139         btrfs_set_timespec_sec(&item->atime, inode->i_atime.tv_sec);
140         btrfs_set_timespec_nsec(&item->atime, inode->i_atime.tv_nsec);
141         btrfs_set_timespec_sec(&item->mtime, inode->i_mtime.tv_sec);
142         btrfs_set_timespec_nsec(&item->mtime, inode->i_mtime.tv_nsec);
143         btrfs_set_timespec_sec(&item->ctime, inode->i_ctime.tv_sec);
144         btrfs_set_timespec_nsec(&item->ctime, inode->i_ctime.tv_nsec);
145         btrfs_set_inode_nblocks(item, inode->i_blocks);
146         btrfs_set_inode_generation(item, inode->i_generation);
147         btrfs_set_inode_block_group(item,
148                                     BTRFS_I(inode)->block_group->key.objectid);
149 }
150
151
152 static int btrfs_update_inode(struct btrfs_trans_handle *trans,
153                               struct btrfs_root *root,
154                               struct inode *inode)
155 {
156         struct btrfs_inode_item *inode_item;
157         struct btrfs_path *path;
158         int ret;
159
160         path = btrfs_alloc_path();
161         BUG_ON(!path);
162         btrfs_init_path(path);
163         ret = btrfs_lookup_inode(trans, root, path,
164                                  &BTRFS_I(inode)->location, 1);
165         if (ret) {
166                 if (ret > 0)
167                         ret = -ENOENT;
168                 goto failed;
169         }
170
171         inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
172                                   path->slots[0],
173                                   struct btrfs_inode_item);
174
175         fill_inode_item(inode_item, inode);
176         btrfs_mark_buffer_dirty(path->nodes[0]);
177         ret = 0;
178 failed:
179         btrfs_release_path(root, path);
180         btrfs_free_path(path);
181         return ret;
182 }
183
184
185 static int btrfs_unlink_trans(struct btrfs_trans_handle *trans,
186                               struct btrfs_root *root,
187                               struct inode *dir,
188                               struct dentry *dentry)
189 {
190         struct btrfs_path *path;
191         const char *name = dentry->d_name.name;
192         int name_len = dentry->d_name.len;
193         int ret = 0;
194         u64 objectid;
195         struct btrfs_dir_item *di;
196
197         path = btrfs_alloc_path();
198         BUG_ON(!path);
199         btrfs_init_path(path);
200         di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino,
201                                     name, name_len, -1);
202         if (IS_ERR(di)) {
203                 ret = PTR_ERR(di);
204                 goto err;
205         }
206         if (!di) {
207                 ret = -ENOENT;
208                 goto err;
209         }
210         objectid = btrfs_disk_key_objectid(&di->location);
211         ret = btrfs_delete_one_dir_name(trans, root, path, di);
212         BUG_ON(ret);
213         btrfs_release_path(root, path);
214
215         di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino,
216                                          objectid, name, name_len, -1);
217         if (IS_ERR(di)) {
218                 ret = PTR_ERR(di);
219                 goto err;
220         }
221         if (!di) {
222                 ret = -ENOENT;
223                 goto err;
224         }
225         ret = btrfs_delete_one_dir_name(trans, root, path, di);
226         BUG_ON(ret);
227
228         dentry->d_inode->i_ctime = dir->i_ctime;
229 err:
230         btrfs_free_path(path);
231         if (!ret) {
232                 dir->i_size -= name_len * 2;
233                 btrfs_update_inode(trans, root, dir);
234                 drop_nlink(dentry->d_inode);
235                 btrfs_update_inode(trans, root, dentry->d_inode);
236                 dir->i_sb->s_dirt = 1;
237         }
238         return ret;
239 }
240
241 static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
242 {
243         struct btrfs_root *root;
244         struct btrfs_trans_handle *trans;
245         int ret;
246
247         root = BTRFS_I(dir)->root;
248         mutex_lock(&root->fs_info->fs_mutex);
249         trans = btrfs_start_transaction(root, 1);
250         btrfs_set_trans_block_group(trans, dir);
251         ret = btrfs_unlink_trans(trans, root, dir, dentry);
252         btrfs_end_transaction(trans, root);
253         mutex_unlock(&root->fs_info->fs_mutex);
254         return ret;
255 }
256
257 static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
258 {
259         struct inode *inode = dentry->d_inode;
260         int err;
261         int ret;
262         struct btrfs_root *root = BTRFS_I(dir)->root;
263         struct btrfs_path *path;
264         struct btrfs_key key;
265         struct btrfs_trans_handle *trans;
266         struct btrfs_key found_key;
267         int found_type;
268         struct btrfs_leaf *leaf;
269         char *goodnames = "..";
270
271         path = btrfs_alloc_path();
272         BUG_ON(!path);
273         btrfs_init_path(path);
274         mutex_lock(&root->fs_info->fs_mutex);
275         trans = btrfs_start_transaction(root, 1);
276         btrfs_set_trans_block_group(trans, dir);
277         key.objectid = inode->i_ino;
278         key.offset = (u64)-1;
279         key.flags = (u32)-1;
280         while(1) {
281                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
282                 if (ret < 0) {
283                         err = ret;
284                         goto out;
285                 }
286                 BUG_ON(ret == 0);
287                 if (path->slots[0] == 0) {
288                         err = -ENOENT;
289                         goto out;
290                 }
291                 path->slots[0]--;
292                 leaf = btrfs_buffer_leaf(path->nodes[0]);
293                 btrfs_disk_key_to_cpu(&found_key,
294                                       &leaf->items[path->slots[0]].key);
295                 found_type = btrfs_key_type(&found_key);
296                 if (found_key.objectid != inode->i_ino) {
297                         err = -ENOENT;
298                         goto out;
299                 }
300                 if ((found_type != BTRFS_DIR_ITEM_KEY &&
301                      found_type != BTRFS_DIR_INDEX_KEY) ||
302                     (!btrfs_match_dir_item_name(root, path, goodnames, 2) &&
303                     !btrfs_match_dir_item_name(root, path, goodnames, 1))) {
304                         err = -ENOTEMPTY;
305                         goto out;
306                 }
307                 ret = btrfs_del_item(trans, root, path);
308                 BUG_ON(ret);
309
310                 if (found_type == BTRFS_DIR_ITEM_KEY && found_key.offset == 1)
311                         break;
312                 btrfs_release_path(root, path);
313         }
314         ret = 0;
315         btrfs_release_path(root, path);
316
317         /* now the directory is empty */
318         err = btrfs_unlink_trans(trans, root, dir, dentry);
319         if (!err) {
320                 inode->i_size = 0;
321         }
322 out:
323         btrfs_release_path(root, path);
324         btrfs_free_path(path);
325         mutex_unlock(&root->fs_info->fs_mutex);
326         ret = btrfs_end_transaction(trans, root);
327         if (ret && !err)
328                 err = ret;
329         return err;
330 }
331
332 static int btrfs_free_inode(struct btrfs_trans_handle *trans,
333                             struct btrfs_root *root,
334                             struct inode *inode)
335 {
336         struct btrfs_path *path;
337         int ret;
338
339         clear_inode(inode);
340
341         path = btrfs_alloc_path();
342         BUG_ON(!path);
343         btrfs_init_path(path);
344         ret = btrfs_lookup_inode(trans, root, path,
345                                  &BTRFS_I(inode)->location, -1);
346         BUG_ON(ret);
347         ret = btrfs_del_item(trans, root, path);
348         BUG_ON(ret);
349         btrfs_free_path(path);
350         return ret;
351 }
352
353 static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans,
354                                    struct btrfs_root *root,
355                                    struct inode *inode)
356 {
357         int ret;
358         struct btrfs_path *path;
359         struct btrfs_key key;
360         struct btrfs_disk_key *found_key;
361         struct btrfs_leaf *leaf;
362         struct btrfs_file_extent_item *fi = NULL;
363         u64 extent_start = 0;
364         u64 extent_num_blocks = 0;
365         int found_extent;
366
367         path = btrfs_alloc_path();
368         BUG_ON(!path);
369         /* FIXME, add redo link to tree so we don't leak on crash */
370         key.objectid = inode->i_ino;
371         key.offset = (u64)-1;
372         key.flags = 0;
373         /*
374          * use BTRFS_CSUM_ITEM_KEY because it is larger than inline keys
375          * or extent data
376          */
377         btrfs_set_key_type(&key, BTRFS_CSUM_ITEM_KEY);
378         while(1) {
379                 btrfs_init_path(path);
380                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
381                 if (ret < 0) {
382                         goto error;
383                 }
384                 if (ret > 0) {
385                         BUG_ON(path->slots[0] == 0);
386                         path->slots[0]--;
387                 }
388                 leaf = btrfs_buffer_leaf(path->nodes[0]);
389                 found_key = &leaf->items[path->slots[0]].key;
390                 if (btrfs_disk_key_objectid(found_key) != inode->i_ino)
391                         break;
392                 if (btrfs_disk_key_type(found_key) != BTRFS_CSUM_ITEM_KEY &&
393                     btrfs_disk_key_type(found_key) != BTRFS_EXTENT_DATA_KEY)
394                         break;
395                 if (btrfs_disk_key_offset(found_key) < inode->i_size)
396                         break;
397                 found_extent = 0;
398                 if (btrfs_disk_key_type(found_key) == BTRFS_EXTENT_DATA_KEY) {
399                         fi = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
400                                             path->slots[0],
401                                             struct btrfs_file_extent_item);
402                         if (btrfs_file_extent_type(fi) !=
403                             BTRFS_FILE_EXTENT_INLINE) {
404                                 extent_start =
405                                         btrfs_file_extent_disk_blocknr(fi);
406                                 extent_num_blocks =
407                                         btrfs_file_extent_disk_num_blocks(fi);
408                                 /* FIXME blocksize != 4096 */
409                                 inode->i_blocks -=
410                                         btrfs_file_extent_num_blocks(fi) << 3;
411                                 found_extent = 1;
412                         }
413                 }
414                 ret = btrfs_del_item(trans, root, path);
415                 BUG_ON(ret);
416                 btrfs_release_path(root, path);
417                 if (found_extent) {
418                         ret = btrfs_free_extent(trans, root, extent_start,
419                                                 extent_num_blocks, 0);
420                         BUG_ON(ret);
421                 }
422         }
423         ret = 0;
424 error:
425         btrfs_release_path(root, path);
426         btrfs_free_path(path);
427         inode->i_sb->s_dirt = 1;
428         return ret;
429 }
430
431 static void btrfs_delete_inode(struct inode *inode)
432 {
433         struct btrfs_trans_handle *trans;
434         struct btrfs_root *root = BTRFS_I(inode)->root;
435         int ret;
436
437         truncate_inode_pages(&inode->i_data, 0);
438         if (is_bad_inode(inode)) {
439                 goto no_delete;
440         }
441         inode->i_size = 0;
442         mutex_lock(&root->fs_info->fs_mutex);
443         trans = btrfs_start_transaction(root, 1);
444         btrfs_set_trans_block_group(trans, inode);
445         if (S_ISREG(inode->i_mode)) {
446                 ret = btrfs_truncate_in_trans(trans, root, inode);
447                 BUG_ON(ret);
448         }
449         btrfs_free_inode(trans, root, inode);
450         btrfs_end_transaction(trans, root);
451         mutex_unlock(&root->fs_info->fs_mutex);
452         return;
453 no_delete:
454         clear_inode(inode);
455 }
456
457 static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry,
458                                struct btrfs_key *location)
459 {
460         const char *name = dentry->d_name.name;
461         int namelen = dentry->d_name.len;
462         struct btrfs_dir_item *di;
463         struct btrfs_path *path;
464         struct btrfs_root *root = BTRFS_I(dir)->root;
465         int ret;
466
467         path = btrfs_alloc_path();
468         BUG_ON(!path);
469         btrfs_init_path(path);
470         di = btrfs_lookup_dir_item(NULL, root, path, dir->i_ino, name,
471                                     namelen, 0);
472         if (!di || IS_ERR(di)) {
473                 location->objectid = 0;
474                 ret = 0;
475                 goto out;
476         }
477         btrfs_disk_key_to_cpu(location, &di->location);
478 out:
479         btrfs_release_path(root, path);
480         btrfs_free_path(path);
481         return ret;
482 }
483
484 int fixup_tree_root_location(struct btrfs_root *root,
485                              struct btrfs_key *location,
486                              struct btrfs_root **sub_root)
487 {
488         struct btrfs_path *path;
489         struct btrfs_root_item *ri;
490
491         if (btrfs_key_type(location) != BTRFS_ROOT_ITEM_KEY)
492                 return 0;
493         if (location->objectid == BTRFS_ROOT_TREE_OBJECTID)
494                 return 0;
495
496         path = btrfs_alloc_path();
497         BUG_ON(!path);
498         mutex_lock(&root->fs_info->fs_mutex);
499
500         *sub_root = btrfs_read_fs_root(root->fs_info, location);
501         if (IS_ERR(*sub_root))
502                 return PTR_ERR(*sub_root);
503
504         ri = &(*sub_root)->root_item;
505         location->objectid = btrfs_root_dirid(ri);
506         location->flags = 0;
507         btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY);
508         location->offset = 0;
509
510         btrfs_free_path(path);
511         mutex_unlock(&root->fs_info->fs_mutex);
512         return 0;
513 }
514
515 int btrfs_init_locked_inode(struct inode *inode, void *p)
516 {
517         struct btrfs_iget_args *args = p;
518         inode->i_ino = args->ino;
519         BTRFS_I(inode)->root = args->root;
520         return 0;
521 }
522
523 int btrfs_find_actor(struct inode *inode, void *opaque)
524 {
525         struct btrfs_iget_args *args = opaque;
526         return (args->ino == inode->i_ino &&
527                 args->root == BTRFS_I(inode)->root);
528 }
529
530 struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid,
531                                 struct btrfs_root *root)
532 {
533         struct inode *inode;
534         struct btrfs_iget_args args;
535         args.ino = objectid;
536         args.root = root;
537
538         inode = iget5_locked(s, objectid, btrfs_find_actor,
539                              btrfs_init_locked_inode,
540                              (void *)&args);
541         return inode;
542 }
543
544 static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
545                                    struct nameidata *nd)
546 {
547         struct inode * inode;
548         struct btrfs_inode *bi = BTRFS_I(dir);
549         struct btrfs_root *root = bi->root;
550         struct btrfs_root *sub_root = root;
551         struct btrfs_key location;
552         int ret;
553
554         if (dentry->d_name.len > BTRFS_NAME_LEN)
555                 return ERR_PTR(-ENAMETOOLONG);
556         mutex_lock(&root->fs_info->fs_mutex);
557         ret = btrfs_inode_by_name(dir, dentry, &location);
558         mutex_unlock(&root->fs_info->fs_mutex);
559         if (ret < 0)
560                 return ERR_PTR(ret);
561         inode = NULL;
562         if (location.objectid) {
563                 ret = fixup_tree_root_location(root, &location, &sub_root);
564                 if (ret < 0)
565                         return ERR_PTR(ret);
566                 if (ret > 0)
567                         return ERR_PTR(-ENOENT);
568                 inode = btrfs_iget_locked(dir->i_sb, location.objectid,
569                                           sub_root);
570                 if (!inode)
571                         return ERR_PTR(-EACCES);
572                 if (inode->i_state & I_NEW) {
573                         if (sub_root != root) {
574 printk("adding new root for inode %lu root %p (found %p)\n", inode->i_ino, sub_root, BTRFS_I(inode)->root);
575                                 igrab(inode);
576                                 sub_root->inode = inode;
577                         }
578                         BTRFS_I(inode)->root = sub_root;
579                         memcpy(&BTRFS_I(inode)->location, &location,
580                                sizeof(location));
581                         btrfs_read_locked_inode(inode);
582                         unlock_new_inode(inode);
583                 }
584         }
585         return d_splice_alias(inode, dentry);
586 }
587
588 static void reada_leaves(struct btrfs_root *root, struct btrfs_path *path)
589 {
590         struct btrfs_node *node;
591         int i;
592         int nritems;
593         u64 objectid;
594         u64 item_objectid;
595         u64 blocknr;
596         int slot;
597
598         if (!path->nodes[1])
599                 return;
600         node = btrfs_buffer_node(path->nodes[1]);
601         slot = path->slots[1];
602         objectid = btrfs_disk_key_objectid(&node->ptrs[slot].key);
603         nritems = btrfs_header_nritems(&node->header);
604         for (i = slot; i < nritems; i++) {
605                 item_objectid = btrfs_disk_key_objectid(&node->ptrs[i].key);
606                 if (item_objectid != objectid)
607                         break;
608                 blocknr = btrfs_node_blockptr(node, i);
609                 readahead_tree_block(root, blocknr);
610         }
611 }
612
613 static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
614 {
615         struct inode *inode = filp->f_path.dentry->d_inode;
616         struct btrfs_root *root = BTRFS_I(inode)->root;
617         struct btrfs_item *item;
618         struct btrfs_dir_item *di;
619         struct btrfs_key key;
620         struct btrfs_path *path;
621         int ret;
622         u32 nritems;
623         struct btrfs_leaf *leaf;
624         int slot;
625         int advance;
626         unsigned char d_type = DT_UNKNOWN;
627         int over = 0;
628         u32 di_cur;
629         u32 di_total;
630         u32 di_len;
631         int key_type = BTRFS_DIR_INDEX_KEY;
632
633         /* FIXME, use a real flag for deciding about the key type */
634         if (root->fs_info->tree_root == root)
635                 key_type = BTRFS_DIR_ITEM_KEY;
636         mutex_lock(&root->fs_info->fs_mutex);
637         key.objectid = inode->i_ino;
638         key.flags = 0;
639         btrfs_set_key_type(&key, key_type);
640         key.offset = filp->f_pos;
641         path = btrfs_alloc_path();
642         btrfs_init_path(path);
643         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
644         if (ret < 0)
645                 goto err;
646         advance = 0;
647         reada_leaves(root, path);
648         while(1) {
649                 leaf = btrfs_buffer_leaf(path->nodes[0]);
650                 nritems = btrfs_header_nritems(&leaf->header);
651                 slot = path->slots[0];
652                 if (advance || slot >= nritems) {
653                         if (slot >= nritems -1) {
654                                 ret = btrfs_next_leaf(root, path);
655                                 if (ret)
656                                         break;
657                                 leaf = btrfs_buffer_leaf(path->nodes[0]);
658                                 nritems = btrfs_header_nritems(&leaf->header);
659                                 slot = path->slots[0];
660                                 if (path->slots[1] == 0)
661                                         reada_leaves(root, path);
662                         } else {
663                                 slot++;
664                                 path->slots[0]++;
665                         }
666                 }
667                 advance = 1;
668                 item = leaf->items + slot;
669                 if (btrfs_disk_key_objectid(&item->key) != key.objectid)
670                         break;
671                 if (btrfs_disk_key_type(&item->key) != key_type)
672                         break;
673                 if (btrfs_disk_key_offset(&item->key) < filp->f_pos)
674                         continue;
675                 filp->f_pos = btrfs_disk_key_offset(&item->key);
676                 advance = 1;
677                 di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
678                 di_cur = 0;
679                 di_total = btrfs_item_size(leaf->items + slot);
680                 while(di_cur < di_total) {
681                         over = filldir(dirent, (const char *)(di + 1),
682                                        btrfs_dir_name_len(di),
683                                        btrfs_disk_key_offset(&item->key),
684                                        btrfs_disk_key_objectid(&di->location),
685                                        d_type);
686                         if (over)
687                                 goto nopos;
688                         di_len = btrfs_dir_name_len(di) + sizeof(*di);
689                         di_cur += di_len;
690                         di = (struct btrfs_dir_item *)((char *)di + di_len);
691                 }
692         }
693         filp->f_pos++;
694 nopos:
695         ret = 0;
696 err:
697         btrfs_release_path(root, path);
698         btrfs_free_path(path);
699         mutex_unlock(&root->fs_info->fs_mutex);
700         return ret;
701 }
702
703 static void btrfs_put_super (struct super_block * sb)
704 {
705         struct btrfs_root *root = btrfs_sb(sb);
706         int ret;
707
708         ret = close_ctree(root);
709         if (ret) {
710                 printk("close ctree returns %d\n", ret);
711         }
712         sb->s_fs_info = NULL;
713 }
714
715 static int btrfs_fill_super(struct super_block * sb, void * data, int silent)
716 {
717         struct inode * inode;
718         struct dentry * root_dentry;
719         struct btrfs_super_block *disk_super;
720         struct btrfs_root *tree_root;
721         struct btrfs_inode *bi;
722
723         sb->s_maxbytes = MAX_LFS_FILESIZE;
724         sb->s_magic = BTRFS_SUPER_MAGIC;
725         sb->s_op = &btrfs_super_ops;
726         sb->s_time_gran = 1;
727
728         tree_root = open_ctree(sb);
729
730         if (!tree_root) {
731                 printk("btrfs: open_ctree failed\n");
732                 return -EIO;
733         }
734         sb->s_fs_info = tree_root;
735         disk_super = tree_root->fs_info->disk_super;
736         printk("read in super total blocks %Lu root %Lu\n",
737                btrfs_super_total_blocks(disk_super),
738                btrfs_super_root_dir(disk_super));
739
740         inode = btrfs_iget_locked(sb, btrfs_super_root_dir(disk_super),
741                                   tree_root);
742         bi = BTRFS_I(inode);
743         bi->location.objectid = inode->i_ino;
744         bi->location.offset = 0;
745         bi->location.flags = 0;
746         bi->root = tree_root;
747         btrfs_set_key_type(&bi->location, BTRFS_INODE_ITEM_KEY);
748
749         if (!inode)
750                 return -ENOMEM;
751         if (inode->i_state & I_NEW) {
752                 btrfs_read_locked_inode(inode);
753                 unlock_new_inode(inode);
754         }
755
756         root_dentry = d_alloc_root(inode);
757         if (!root_dentry) {
758                 iput(inode);
759                 return -ENOMEM;
760         }
761         sb->s_root = root_dentry;
762
763         return 0;
764 }
765
766 static int btrfs_write_inode(struct inode *inode, int wait)
767 {
768         struct btrfs_root *root = BTRFS_I(inode)->root;
769         struct btrfs_trans_handle *trans;
770         int ret = 0;
771
772         if (wait) {
773                 mutex_lock(&root->fs_info->fs_mutex);
774                 trans = btrfs_start_transaction(root, 1);
775                 btrfs_set_trans_block_group(trans, inode);
776                 ret = btrfs_commit_transaction(trans, root);
777                 mutex_unlock(&root->fs_info->fs_mutex);
778         }
779         return ret;
780 }
781
782 static void btrfs_dirty_inode(struct inode *inode)
783 {
784         struct btrfs_root *root = BTRFS_I(inode)->root;
785         struct btrfs_trans_handle *trans;
786
787         mutex_lock(&root->fs_info->fs_mutex);
788         trans = btrfs_start_transaction(root, 1);
789         btrfs_set_trans_block_group(trans, inode);
790         btrfs_update_inode(trans, root, inode);
791         btrfs_end_transaction(trans, root);
792         mutex_unlock(&root->fs_info->fs_mutex);
793 }
794
795 static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
796                                      struct btrfs_root *root,
797                                      u64 objectid,
798                                      struct btrfs_block_group_cache *group,
799                                      int mode)
800 {
801         struct inode *inode;
802         struct btrfs_inode_item inode_item;
803         struct btrfs_key *location;
804         int ret;
805
806         inode = new_inode(root->fs_info->sb);
807         if (!inode)
808                 return ERR_PTR(-ENOMEM);
809
810         BTRFS_I(inode)->root = root;
811         group = btrfs_find_block_group(root, group, 0);
812         BTRFS_I(inode)->block_group = group;
813
814         inode->i_uid = current->fsuid;
815         inode->i_gid = current->fsgid;
816         inode->i_mode = mode;
817         inode->i_ino = objectid;
818         inode->i_blocks = 0;
819         inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
820         fill_inode_item(&inode_item, inode);
821         location = &BTRFS_I(inode)->location;
822         location->objectid = objectid;
823         location->flags = 0;
824         location->offset = 0;
825         btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY);
826
827         ret = btrfs_insert_inode(trans, root, objectid, &inode_item);
828         BUG_ON(ret);
829
830         insert_inode_hash(inode);
831         return inode;
832 }
833
834 static int btrfs_add_link(struct btrfs_trans_handle *trans,
835                             struct dentry *dentry, struct inode *inode)
836 {
837         int ret;
838         struct btrfs_key key;
839         struct btrfs_root *root = BTRFS_I(dentry->d_parent->d_inode)->root;
840         key.objectid = inode->i_ino;
841         key.flags = 0;
842         btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
843         key.offset = 0;
844
845         ret = btrfs_insert_dir_item(trans, root,
846                                     dentry->d_name.name, dentry->d_name.len,
847                                     dentry->d_parent->d_inode->i_ino,
848                                     &key, 0);
849         if (ret == 0) {
850                 dentry->d_parent->d_inode->i_size += dentry->d_name.len * 2;
851                 ret = btrfs_update_inode(trans, root,
852                                          dentry->d_parent->d_inode);
853         }
854         return ret;
855 }
856
857 static int btrfs_add_nondir(struct btrfs_trans_handle *trans,
858                             struct dentry *dentry, struct inode *inode)
859 {
860         int err = btrfs_add_link(trans, dentry, inode);
861         if (!err) {
862                 d_instantiate(dentry, inode);
863                 return 0;
864         }
865         if (err > 0)
866                 err = -EEXIST;
867         return err;
868 }
869
870 static int btrfs_create(struct inode *dir, struct dentry *dentry,
871                         int mode, struct nameidata *nd)
872 {
873         struct btrfs_trans_handle *trans;
874         struct btrfs_root *root = BTRFS_I(dir)->root;
875         struct inode *inode;
876         int err;
877         int drop_inode = 0;
878         u64 objectid;
879
880         mutex_lock(&root->fs_info->fs_mutex);
881         trans = btrfs_start_transaction(root, 1);
882         btrfs_set_trans_block_group(trans, dir);
883
884         err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
885         if (err) {
886                 err = -ENOSPC;
887                 goto out_unlock;
888         }
889
890         inode = btrfs_new_inode(trans, root, objectid,
891                                 BTRFS_I(dir)->block_group, mode);
892         err = PTR_ERR(inode);
893         if (IS_ERR(inode))
894                 goto out_unlock;
895
896         btrfs_set_trans_block_group(trans, inode);
897         err = btrfs_add_nondir(trans, dentry, inode);
898         if (err)
899                 drop_inode = 1;
900         else {
901                 inode->i_mapping->a_ops = &btrfs_aops;
902                 inode->i_fop = &btrfs_file_operations;
903                 inode->i_op = &btrfs_file_inode_operations;
904         }
905         dir->i_sb->s_dirt = 1;
906         btrfs_update_inode_block_group(trans, inode);
907         btrfs_update_inode_block_group(trans, dir);
908 out_unlock:
909         btrfs_end_transaction(trans, root);
910         mutex_unlock(&root->fs_info->fs_mutex);
911
912         if (drop_inode) {
913                 inode_dec_link_count(inode);
914                 iput(inode);
915         }
916         return err;
917 }
918
919 static int btrfs_make_empty_dir(struct btrfs_trans_handle *trans,
920                                 struct btrfs_root *root,
921                                 u64 objectid, u64 dirid)
922 {
923         int ret;
924         char buf[2];
925         struct btrfs_key key;
926
927         buf[0] = '.';
928         buf[1] = '.';
929
930         key.objectid = objectid;
931         key.offset = 0;
932         key.flags = 0;
933         btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
934
935         ret = btrfs_insert_dir_item(trans, root, buf, 1, objectid,
936                                     &key, 1);
937         if (ret)
938                 goto error;
939         key.objectid = dirid;
940         ret = btrfs_insert_dir_item(trans, root, buf, 2, objectid,
941                                     &key, 1);
942         if (ret)
943                 goto error;
944 error:
945         return ret;
946 }
947
948 static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
949 {
950         struct inode *inode;
951         struct btrfs_trans_handle *trans;
952         struct btrfs_root *root = BTRFS_I(dir)->root;
953         int err = 0;
954         int drop_on_err = 0;
955         u64 objectid;
956
957         mutex_lock(&root->fs_info->fs_mutex);
958         trans = btrfs_start_transaction(root, 1);
959         btrfs_set_trans_block_group(trans, dir);
960         if (IS_ERR(trans)) {
961                 err = PTR_ERR(trans);
962                 goto out_unlock;
963         }
964
965         err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
966         if (err) {
967                 err = -ENOSPC;
968                 goto out_unlock;
969         }
970
971         inode = btrfs_new_inode(trans, root, objectid,
972                                 BTRFS_I(dir)->block_group, S_IFDIR | mode);
973         if (IS_ERR(inode)) {
974                 err = PTR_ERR(inode);
975                 goto out_fail;
976         }
977         drop_on_err = 1;
978         inode->i_op = &btrfs_dir_inode_operations;
979         inode->i_fop = &btrfs_dir_file_operations;
980         btrfs_set_trans_block_group(trans, inode);
981
982         err = btrfs_make_empty_dir(trans, root, inode->i_ino, dir->i_ino);
983         if (err)
984                 goto out_fail;
985
986         inode->i_size = 6;
987         err = btrfs_update_inode(trans, root, inode);
988         if (err)
989                 goto out_fail;
990         err = btrfs_add_link(trans, dentry, inode);
991         if (err)
992                 goto out_fail;
993         d_instantiate(dentry, inode);
994         drop_on_err = 0;
995         dir->i_sb->s_dirt = 1;
996         btrfs_update_inode_block_group(trans, inode);
997         btrfs_update_inode_block_group(trans, dir);
998
999 out_fail:
1000         btrfs_end_transaction(trans, root);
1001 out_unlock:
1002         mutex_unlock(&root->fs_info->fs_mutex);
1003         if (drop_on_err)
1004                 iput(inode);
1005         return err;
1006 }
1007
1008 static int btrfs_sync_file(struct file *file,
1009                            struct dentry *dentry, int datasync)
1010 {
1011         struct inode *inode = dentry->d_inode;
1012         struct btrfs_root *root = BTRFS_I(inode)->root;
1013         int ret;
1014         struct btrfs_trans_handle *trans;
1015
1016         mutex_lock(&root->fs_info->fs_mutex);
1017         trans = btrfs_start_transaction(root, 1);
1018         if (!trans) {
1019                 ret = -ENOMEM;
1020                 goto out;
1021         }
1022         ret = btrfs_commit_transaction(trans, root);
1023         mutex_unlock(&root->fs_info->fs_mutex);
1024 out:
1025         return ret > 0 ? EIO : ret;
1026 }
1027
1028 static int btrfs_sync_fs(struct super_block *sb, int wait)
1029 {
1030         struct btrfs_trans_handle *trans;
1031         struct btrfs_root *root;
1032         int ret;
1033         root = btrfs_sb(sb);
1034
1035         sb->s_dirt = 0;
1036         if (!wait) {
1037                 filemap_flush(root->fs_info->btree_inode->i_mapping);
1038                 return 0;
1039         }
1040         mutex_lock(&root->fs_info->fs_mutex);
1041         trans = btrfs_start_transaction(root, 1);
1042         ret = btrfs_commit_transaction(trans, root);
1043         sb->s_dirt = 0;
1044         BUG_ON(ret);
1045 printk("btrfs sync_fs\n");
1046         mutex_unlock(&root->fs_info->fs_mutex);
1047         return 0;
1048 }
1049
1050 static int btrfs_get_block_lock(struct inode *inode, sector_t iblock,
1051                            struct buffer_head *result, int create)
1052 {
1053         int ret;
1054         int err = 0;
1055         u64 blocknr;
1056         u64 extent_start = 0;
1057         u64 extent_end = 0;
1058         u64 objectid = inode->i_ino;
1059         u32 found_type;
1060         struct btrfs_path *path;
1061         struct btrfs_root *root = BTRFS_I(inode)->root;
1062         struct btrfs_file_extent_item *item;
1063         struct btrfs_leaf *leaf;
1064         struct btrfs_disk_key *found_key;
1065
1066         path = btrfs_alloc_path();
1067         BUG_ON(!path);
1068         btrfs_init_path(path);
1069         if (create) {
1070                 WARN_ON(1);
1071         }
1072
1073         ret = btrfs_lookup_file_extent(NULL, root, path,
1074                                        inode->i_ino,
1075                                        iblock << inode->i_blkbits, 0);
1076         if (ret < 0) {
1077                 err = ret;
1078                 goto out;
1079         }
1080
1081         if (ret != 0) {
1082                 if (path->slots[0] == 0) {
1083                         btrfs_release_path(root, path);
1084                         goto out;
1085                 }
1086                 path->slots[0]--;
1087         }
1088
1089         item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0],
1090                               struct btrfs_file_extent_item);
1091         leaf = btrfs_buffer_leaf(path->nodes[0]);
1092         blocknr = btrfs_file_extent_disk_blocknr(item);
1093         blocknr += btrfs_file_extent_offset(item);
1094
1095         /* are we inside the extent that was found? */
1096         found_key = &leaf->items[path->slots[0]].key;
1097         found_type = btrfs_disk_key_type(found_key);
1098         if (btrfs_disk_key_objectid(found_key) != objectid ||
1099             found_type != BTRFS_EXTENT_DATA_KEY) {
1100                 extent_end = 0;
1101                 extent_start = 0;
1102                 btrfs_release_path(root, path);
1103                 goto out;
1104         }
1105         found_type = btrfs_file_extent_type(item);
1106         extent_start = btrfs_disk_key_offset(&leaf->items[path->slots[0]].key);
1107         if (found_type == BTRFS_FILE_EXTENT_REG) {
1108                 extent_start = extent_start >> inode->i_blkbits;
1109                 extent_end = extent_start + btrfs_file_extent_num_blocks(item);
1110                 if (iblock >= extent_start && iblock < extent_end) {
1111                         err = 0;
1112                         btrfs_map_bh_to_logical(root, result, blocknr +
1113                                                 iblock - extent_start);
1114                         goto out;
1115                 }
1116         } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
1117                 char *ptr;
1118                 char *map;
1119                 u32 size;
1120                 size = btrfs_file_extent_inline_len(leaf->items +
1121                                                     path->slots[0]);
1122                 extent_end = (extent_start + size) >> inode->i_blkbits;
1123                 extent_start >>= inode->i_blkbits;
1124                 if (iblock < extent_start || iblock > extent_end) {
1125                         goto out;
1126                 }
1127                 ptr = btrfs_file_extent_inline_start(item);
1128                 map = kmap(result->b_page);
1129                 memcpy(map, ptr, size);
1130                 memset(map + size, 0, PAGE_CACHE_SIZE - size);
1131                 flush_dcache_page(result->b_page);
1132                 kunmap(result->b_page);
1133                 set_buffer_uptodate(result);
1134                 SetPageChecked(result->b_page);
1135                 btrfs_map_bh_to_logical(root, result, 0);
1136         }
1137 out:
1138         btrfs_release_path(root, path);
1139         btrfs_free_path(path);
1140         return err;
1141 }
1142
1143 static int btrfs_get_block(struct inode *inode, sector_t iblock,
1144                            struct buffer_head *result, int create)
1145 {
1146         int err;
1147         struct btrfs_root *root = BTRFS_I(inode)->root;
1148         mutex_lock(&root->fs_info->fs_mutex);
1149         err = btrfs_get_block_lock(inode, iblock, result, create);
1150         mutex_unlock(&root->fs_info->fs_mutex);
1151         return err;
1152 }
1153
1154 static int btrfs_prepare_write(struct file *file, struct page *page,
1155                                unsigned from, unsigned to)
1156 {
1157         return nobh_prepare_write(page, from, to, btrfs_get_block);
1158 }
1159
1160 static void btrfs_write_super(struct super_block *sb)
1161 {
1162         btrfs_sync_fs(sb, 1);
1163 }
1164
1165 static int btrfs_readpage(struct file *file, struct page *page)
1166 {
1167         return mpage_readpage(page, btrfs_get_block);
1168 }
1169
1170 /*
1171  * While block_write_full_page is writing back the dirty buffers under
1172  * the page lock, whoever dirtied the buffers may decide to clean them
1173  * again at any time.  We handle that by only looking at the buffer
1174  * state inside lock_buffer().
1175  *
1176  * If block_write_full_page() is called for regular writeback
1177  * (wbc->sync_mode == WB_SYNC_NONE) then it will redirty a page which has a
1178  * locked buffer.   This only can happen if someone has written the buffer
1179  * directly, with submit_bh().  At the address_space level PageWriteback
1180  * prevents this contention from occurring.
1181  */
1182 static int __btrfs_write_full_page(struct inode *inode, struct page *page,
1183                                    struct writeback_control *wbc)
1184 {
1185         int err;
1186         sector_t block;
1187         sector_t last_block;
1188         struct buffer_head *bh, *head;
1189         const unsigned blocksize = 1 << inode->i_blkbits;
1190         int nr_underway = 0;
1191
1192         BUG_ON(!PageLocked(page));
1193
1194         last_block = (i_size_read(inode) - 1) >> inode->i_blkbits;
1195
1196         if (!page_has_buffers(page)) {
1197                 create_empty_buffers(page, blocksize,
1198                                         (1 << BH_Dirty)|(1 << BH_Uptodate));
1199         }
1200
1201         /*
1202          * Be very careful.  We have no exclusion from __set_page_dirty_buffers
1203          * here, and the (potentially unmapped) buffers may become dirty at
1204          * any time.  If a buffer becomes dirty here after we've inspected it
1205          * then we just miss that fact, and the page stays dirty.
1206          *
1207          * Buffers outside i_size may be dirtied by __set_page_dirty_buffers;
1208          * handle that here by just cleaning them.
1209          */
1210
1211         block = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
1212         head = page_buffers(page);
1213         bh = head;
1214
1215         /*
1216          * Get all the dirty buffers mapped to disk addresses and
1217          * handle any aliases from the underlying blockdev's mapping.
1218          */
1219         do {
1220                 if (block > last_block) {
1221                         /*
1222                          * mapped buffers outside i_size will occur, because
1223                          * this page can be outside i_size when there is a
1224                          * truncate in progress.
1225                          */
1226                         /*
1227                          * The buffer was zeroed by block_write_full_page()
1228                          */
1229                         clear_buffer_dirty(bh);
1230                         set_buffer_uptodate(bh);
1231                 } else if (!buffer_mapped(bh) && buffer_dirty(bh)) {
1232                         WARN_ON(bh->b_size != blocksize);
1233                         err = btrfs_get_block(inode, block, bh, 0);
1234                         if (err)
1235                                 goto recover;
1236                         if (buffer_new(bh)) {
1237                                 /* blockdev mappings never come here */
1238                                 clear_buffer_new(bh);
1239                                 unmap_underlying_metadata(bh->b_bdev,
1240                                                         bh->b_blocknr);
1241                         }
1242                 }
1243                 bh = bh->b_this_page;
1244                 block++;
1245         } while (bh != head);
1246
1247         do {
1248                 if (!buffer_mapped(bh))
1249                         continue;
1250                 /*
1251                  * If it's a fully non-blocking write attempt and we cannot
1252                  * lock the buffer then redirty the page.  Note that this can
1253                  * potentially cause a busy-wait loop from pdflush and kswapd
1254                  * activity, but those code paths have their own higher-level
1255                  * throttling.
1256                  */
1257                 if (wbc->sync_mode != WB_SYNC_NONE || !wbc->nonblocking) {
1258                         lock_buffer(bh);
1259                 } else if (test_set_buffer_locked(bh)) {
1260                         redirty_page_for_writepage(wbc, page);
1261                         continue;
1262                 }
1263                 if (test_clear_buffer_dirty(bh) && bh->b_blocknr != 0) {
1264                         mark_buffer_async_write(bh);
1265                 } else {
1266                         unlock_buffer(bh);
1267                 }
1268         } while ((bh = bh->b_this_page) != head);
1269
1270         /*
1271          * The page and its buffers are protected by PageWriteback(), so we can
1272          * drop the bh refcounts early.
1273          */
1274         BUG_ON(PageWriteback(page));
1275         set_page_writeback(page);
1276
1277         do {
1278                 struct buffer_head *next = bh->b_this_page;
1279                 if (buffer_async_write(bh)) {
1280                         submit_bh(WRITE, bh);
1281                         nr_underway++;
1282                 }
1283                 bh = next;
1284         } while (bh != head);
1285         unlock_page(page);
1286
1287         err = 0;
1288 done:
1289         if (nr_underway == 0) {
1290                 /*
1291                  * The page was marked dirty, but the buffers were
1292                  * clean.  Someone wrote them back by hand with
1293                  * ll_rw_block/submit_bh.  A rare case.
1294                  */
1295                 int uptodate = 1;
1296                 do {
1297                         if (!buffer_uptodate(bh)) {
1298                                 uptodate = 0;
1299                                 break;
1300                         }
1301                         bh = bh->b_this_page;
1302                 } while (bh != head);
1303                 if (uptodate)
1304                         SetPageUptodate(page);
1305                 end_page_writeback(page);
1306                 /*
1307                  * The page and buffer_heads can be released at any time from
1308                  * here on.
1309                  */
1310                 wbc->pages_skipped++;   /* We didn't write this page */
1311         }
1312         return err;
1313
1314 recover:
1315         /*
1316          * ENOSPC, or some other error.  We may already have added some
1317          * blocks to the file, so we need to write these out to avoid
1318          * exposing stale data.
1319          * The page is currently locked and not marked for writeback
1320          */
1321         bh = head;
1322         /* Recovery: lock and submit the mapped buffers */
1323         do {
1324                 if (buffer_mapped(bh) && buffer_dirty(bh)) {
1325                         lock_buffer(bh);
1326                         mark_buffer_async_write(bh);
1327                 } else {
1328                         /*
1329                          * The buffer may have been set dirty during
1330                          * attachment to a dirty page.
1331                          */
1332                         clear_buffer_dirty(bh);
1333                 }
1334         } while ((bh = bh->b_this_page) != head);
1335         SetPageError(page);
1336         BUG_ON(PageWriteback(page));
1337         set_page_writeback(page);
1338         do {
1339                 struct buffer_head *next = bh->b_this_page;
1340                 if (buffer_async_write(bh)) {
1341                         clear_buffer_dirty(bh);
1342                         submit_bh(WRITE, bh);
1343                         nr_underway++;
1344                 }
1345                 bh = next;
1346         } while (bh != head);
1347         unlock_page(page);
1348         goto done;
1349 }
1350
1351 /*
1352  * The generic ->writepage function for buffer-backed address_spaces
1353  */
1354 static int btrfs_writepage(struct page *page, struct writeback_control *wbc)
1355 {
1356         struct inode * const inode = page->mapping->host;
1357         loff_t i_size = i_size_read(inode);
1358         const pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
1359         unsigned offset;
1360         void *kaddr;
1361
1362         /* Is the page fully inside i_size? */
1363         if (page->index < end_index)
1364                 return __btrfs_write_full_page(inode, page, wbc);
1365
1366         /* Is the page fully outside i_size? (truncate in progress) */
1367         offset = i_size & (PAGE_CACHE_SIZE-1);
1368         if (page->index >= end_index+1 || !offset) {
1369                 /*
1370                  * The page may have dirty, unmapped buffers.  For example,
1371                  * they may have been added in ext3_writepage().  Make them
1372                  * freeable here, so the page does not leak.
1373                  */
1374                 block_invalidatepage(page, 0);
1375                 unlock_page(page);
1376                 return 0; /* don't care */
1377         }
1378
1379         /*
1380          * The page straddles i_size.  It must be zeroed out on each and every
1381          * writepage invokation because it may be mmapped.  "A file is mapped
1382          * in multiples of the page size.  For a file that is not a multiple of
1383          * the  page size, the remaining memory is zeroed when mapped, and
1384          * writes to that region are not written out to the file."
1385          */
1386         kaddr = kmap_atomic(page, KM_USER0);
1387         memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset);
1388         flush_dcache_page(page);
1389         kunmap_atomic(kaddr, KM_USER0);
1390         return __btrfs_write_full_page(inode, page, wbc);
1391 }
1392
1393 static void btrfs_truncate(struct inode *inode)
1394 {
1395         struct btrfs_root *root = BTRFS_I(inode)->root;
1396         int ret;
1397         struct btrfs_trans_handle *trans;
1398
1399         if (!S_ISREG(inode->i_mode))
1400                 return;
1401         if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
1402                 return;
1403
1404         nobh_truncate_page(inode->i_mapping, inode->i_size);
1405
1406         /* FIXME, add redo link to tree so we don't leak on crash */
1407         mutex_lock(&root->fs_info->fs_mutex);
1408         trans = btrfs_start_transaction(root, 1);
1409         btrfs_set_trans_block_group(trans, inode);
1410         ret = btrfs_truncate_in_trans(trans, root, inode);
1411         BUG_ON(ret);
1412         ret = btrfs_end_transaction(trans, root);
1413         BUG_ON(ret);
1414         mutex_unlock(&root->fs_info->fs_mutex);
1415         mark_inode_dirty(inode);
1416 }
1417
1418 /*
1419  * Make sure any changes to nobh_commit_write() are reflected in
1420  * nobh_truncate_page(), since it doesn't call commit_write().
1421  */
1422 static int btrfs_commit_write(struct file *file, struct page *page,
1423                               unsigned from, unsigned to)
1424 {
1425         struct inode *inode = page->mapping->host;
1426         struct buffer_head *bh;
1427         loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
1428
1429         SetPageUptodate(page);
1430         bh = page_buffers(page);
1431         if (buffer_mapped(bh) && bh->b_blocknr != 0) {
1432                 set_page_dirty(page);
1433         }
1434         if (pos > inode->i_size) {
1435                 i_size_write(inode, pos);
1436                 mark_inode_dirty(inode);
1437         }
1438         return 0;
1439 }
1440
1441 static int btrfs_copy_from_user(loff_t pos, int num_pages, int write_bytes,
1442                                 struct page **prepared_pages,
1443                                 const char __user * buf)
1444 {
1445         long page_fault = 0;
1446         int i;
1447         int offset = pos & (PAGE_CACHE_SIZE - 1);
1448
1449         for (i = 0; i < num_pages && write_bytes > 0; i++, offset = 0) {
1450                 size_t count = min_t(size_t,
1451                                      PAGE_CACHE_SIZE - offset, write_bytes);
1452                 struct page *page = prepared_pages[i];
1453                 fault_in_pages_readable(buf, count);
1454
1455                 /* Copy data from userspace to the current page */
1456                 kmap(page);
1457                 page_fault = __copy_from_user(page_address(page) + offset,
1458                                               buf, count);
1459                 /* Flush processor's dcache for this page */
1460                 flush_dcache_page(page);
1461                 kunmap(page);
1462                 buf += count;
1463                 write_bytes -= count;
1464
1465                 if (page_fault)
1466                         break;
1467         }
1468         return page_fault ? -EFAULT : 0;
1469 }
1470
1471 static void btrfs_drop_pages(struct page **pages, size_t num_pages)
1472 {
1473         size_t i;
1474         for (i = 0; i < num_pages; i++) {
1475                 if (!pages[i])
1476                         break;
1477                 unlock_page(pages[i]);
1478                 mark_page_accessed(pages[i]);
1479                 page_cache_release(pages[i]);
1480         }
1481 }
1482 static int dirty_and_release_pages(struct btrfs_trans_handle *trans,
1483                                    struct btrfs_root *root,
1484                                    struct file *file,
1485                                    struct page **pages,
1486                                    size_t num_pages,
1487                                    loff_t pos,
1488                                    size_t write_bytes)
1489 {
1490         int i;
1491         int offset;
1492         int err = 0;
1493         int ret;
1494         int this_write;
1495         struct inode *inode = file->f_path.dentry->d_inode;
1496         struct buffer_head *bh;
1497         struct btrfs_file_extent_item *ei;
1498
1499         for (i = 0; i < num_pages; i++) {
1500                 offset = pos & (PAGE_CACHE_SIZE -1);
1501                 this_write = min(PAGE_CACHE_SIZE - offset, write_bytes);
1502                 /* FIXME, one block at a time */
1503
1504                 mutex_lock(&root->fs_info->fs_mutex);
1505                 trans = btrfs_start_transaction(root, 1);
1506                 btrfs_set_trans_block_group(trans, inode);
1507
1508                 bh = page_buffers(pages[i]);
1509                 if (buffer_mapped(bh) && bh->b_blocknr == 0) {
1510                         struct btrfs_key key;
1511                         struct btrfs_path *path;
1512                         char *ptr;
1513                         u32 datasize;
1514
1515                         path = btrfs_alloc_path();
1516                         BUG_ON(!path);
1517                         key.objectid = inode->i_ino;
1518                         key.offset = pages[i]->index << PAGE_CACHE_SHIFT;
1519                         key.flags = 0;
1520                         btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY);
1521                         BUG_ON(write_bytes >= PAGE_CACHE_SIZE);
1522                         datasize = offset +
1523                                 btrfs_file_extent_calc_inline_size(write_bytes);
1524                         ret = btrfs_insert_empty_item(trans, root, path, &key,
1525                                                       datasize);
1526                         BUG_ON(ret);
1527                         ei = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
1528                                path->slots[0], struct btrfs_file_extent_item);
1529                         btrfs_set_file_extent_generation(ei, trans->transid);
1530                         btrfs_set_file_extent_type(ei,
1531                                                    BTRFS_FILE_EXTENT_INLINE);
1532                         ptr = btrfs_file_extent_inline_start(ei);
1533                         memcpy(ptr, bh->b_data, offset + write_bytes);
1534                         mark_buffer_dirty(path->nodes[0]);
1535                         btrfs_free_path(path);
1536                 } else {
1537                         btrfs_csum_file_block(trans, root, inode->i_ino,
1538                                       pages[i]->index << PAGE_CACHE_SHIFT,
1539                                       kmap(pages[i]), PAGE_CACHE_SIZE);
1540                         kunmap(pages[i]);
1541                 }
1542                 SetPageChecked(pages[i]);
1543                 btrfs_update_inode_block_group(trans, inode);
1544                 ret = btrfs_end_transaction(trans, root);
1545                 BUG_ON(ret);
1546                 mutex_unlock(&root->fs_info->fs_mutex);
1547
1548                 ret = btrfs_commit_write(file, pages[i], offset,
1549                                          offset + this_write);
1550                 pos += this_write;
1551                 if (ret) {
1552                         err = ret;
1553                         goto failed;
1554                 }
1555                 WARN_ON(this_write > write_bytes);
1556                 write_bytes -= this_write;
1557         }
1558 failed:
1559         return err;
1560 }
1561
1562 static int drop_extents(struct btrfs_trans_handle *trans,
1563                           struct btrfs_root *root,
1564                           struct inode *inode,
1565                           u64 start, u64 end)
1566 {
1567         int ret;
1568         struct btrfs_key key;
1569         struct btrfs_leaf *leaf;
1570         int slot;
1571         struct btrfs_file_extent_item *extent;
1572         u64 extent_end = 0;
1573         int keep;
1574         struct btrfs_file_extent_item old;
1575         struct btrfs_path *path;
1576         u64 search_start = start;
1577         int bookend;
1578         int found_type;
1579         int found_extent;
1580         int found_inline;
1581
1582         path = btrfs_alloc_path();
1583         if (!path)
1584                 return -ENOMEM;
1585         while(1) {
1586                 btrfs_release_path(root, path);
1587                 ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino,
1588                                                search_start, -1);
1589                 if (ret < 0)
1590                         goto out;
1591                 if (ret > 0) {
1592                         if (path->slots[0] == 0) {
1593                                 ret = 0;
1594                                 goto out;
1595                         }
1596                         path->slots[0]--;
1597                 }
1598                 keep = 0;
1599                 bookend = 0;
1600                 found_extent = 0;
1601                 found_inline = 0;
1602                 extent = NULL;
1603                 leaf = btrfs_buffer_leaf(path->nodes[0]);
1604                 slot = path->slots[0];
1605                 btrfs_disk_key_to_cpu(&key, &leaf->items[slot].key);
1606                 if (key.offset >= end || key.objectid != inode->i_ino) {
1607                         ret = 0;
1608                         goto out;
1609                 }
1610                 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) {
1611                         ret = 0;
1612                         goto out;
1613                 }
1614                 extent = btrfs_item_ptr(leaf, slot,
1615                                         struct btrfs_file_extent_item);
1616                 found_type = btrfs_file_extent_type(extent);
1617                 if (found_type == BTRFS_FILE_EXTENT_REG) {
1618                         extent_end = key.offset +
1619                                 (btrfs_file_extent_num_blocks(extent) <<
1620                                  inode->i_blkbits);
1621                         found_extent = 1;
1622                 } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
1623                         found_inline = 1;
1624                         extent_end = key.offset +
1625                              btrfs_file_extent_inline_len(leaf->items + slot);
1626                 }
1627
1628                 if (!found_extent && !found_inline) {
1629                         ret = 0;
1630                         goto out;
1631                 }
1632
1633                 if (search_start >= extent_end) {
1634                         ret = 0;
1635                         goto out;
1636                 }
1637
1638                 search_start = extent_end;
1639
1640                 if (end < extent_end && end >= key.offset) {
1641                         if (found_extent) {
1642                                 memcpy(&old, extent, sizeof(old));
1643                                 ret = btrfs_inc_extent_ref(trans, root,
1644                                       btrfs_file_extent_disk_blocknr(&old),
1645                                       btrfs_file_extent_disk_num_blocks(&old));
1646                                 BUG_ON(ret);
1647                         }
1648                         WARN_ON(found_inline);
1649                         bookend = 1;
1650                 }
1651
1652                 if (start > key.offset) {
1653                         u64 new_num;
1654                         u64 old_num;
1655                         /* truncate existing extent */
1656                         keep = 1;
1657                         WARN_ON(start & (root->blocksize - 1));
1658                         if (found_extent) {
1659                                 new_num = (start - key.offset) >>
1660                                         inode->i_blkbits;
1661                                 old_num = btrfs_file_extent_num_blocks(extent);
1662                                 inode->i_blocks -= (old_num - new_num) << 3;
1663                                 btrfs_set_file_extent_num_blocks(extent,
1664                                                                  new_num);
1665                                 mark_buffer_dirty(path->nodes[0]);
1666                         } else {
1667                                 WARN_ON(1);
1668                                 /*
1669                                 ret = btrfs_truncate_item(trans, root, path,
1670                                                           start - key.offset);
1671                                 BUG_ON(ret);
1672                                 */
1673                         }
1674                 }
1675                 if (!keep) {
1676                         u64 disk_blocknr = 0;
1677                         u64 disk_num_blocks = 0;
1678                         u64 extent_num_blocks = 0;
1679                         if (found_extent) {
1680                                 disk_blocknr =
1681                                       btrfs_file_extent_disk_blocknr(extent);
1682                                 disk_num_blocks =
1683                                       btrfs_file_extent_disk_num_blocks(extent);
1684                                 extent_num_blocks =
1685                                       btrfs_file_extent_num_blocks(extent);
1686                         }
1687                         ret = btrfs_del_item(trans, root, path);
1688                         BUG_ON(ret);
1689                         btrfs_release_path(root, path);
1690                         if (found_extent) {
1691                                 inode->i_blocks -=
1692                                 btrfs_file_extent_num_blocks(extent) << 3;
1693                                 ret = btrfs_free_extent(trans, root,
1694                                                         disk_blocknr,
1695                                                         disk_num_blocks, 0);
1696                         }
1697
1698                         BUG_ON(ret);
1699                         if (!bookend && search_start >= end) {
1700                                 ret = 0;
1701                                 goto out;
1702                         }
1703                         if (!bookend)
1704                                 continue;
1705                 }
1706                 if (bookend && found_extent) {
1707                         /* create bookend */
1708                         struct btrfs_key ins;
1709                         ins.objectid = inode->i_ino;
1710                         ins.offset = end;
1711                         ins.flags = 0;
1712                         btrfs_set_key_type(&ins, BTRFS_EXTENT_DATA_KEY);
1713
1714                         btrfs_release_path(root, path);
1715                         ret = btrfs_insert_empty_item(trans, root, path, &ins,
1716                                                       sizeof(*extent));
1717                         BUG_ON(ret);
1718                         extent = btrfs_item_ptr(
1719                                     btrfs_buffer_leaf(path->nodes[0]),
1720                                     path->slots[0],
1721                                     struct btrfs_file_extent_item);
1722                         btrfs_set_file_extent_disk_blocknr(extent,
1723                                     btrfs_file_extent_disk_blocknr(&old));
1724                         btrfs_set_file_extent_disk_num_blocks(extent,
1725                                     btrfs_file_extent_disk_num_blocks(&old));
1726
1727                         btrfs_set_file_extent_offset(extent,
1728                                     btrfs_file_extent_offset(&old) +
1729                                     ((end - key.offset) >> inode->i_blkbits));
1730                         WARN_ON(btrfs_file_extent_num_blocks(&old) <
1731                                 (end - key.offset) >> inode->i_blkbits);
1732                         btrfs_set_file_extent_num_blocks(extent,
1733                                     btrfs_file_extent_num_blocks(&old) -
1734                                     ((end - key.offset) >> inode->i_blkbits));
1735
1736                         btrfs_set_file_extent_type(extent,
1737                                                    BTRFS_FILE_EXTENT_REG);
1738                         btrfs_set_file_extent_generation(extent,
1739                                     btrfs_file_extent_generation(&old));
1740                         btrfs_mark_buffer_dirty(path->nodes[0]);
1741                         inode->i_blocks +=
1742                                 btrfs_file_extent_num_blocks(extent) << 3;
1743                         ret = 0;
1744                         goto out;
1745                 }
1746         }
1747 out:
1748         btrfs_free_path(path);
1749         return ret;
1750 }
1751
1752 static int prepare_pages(struct btrfs_root *root,
1753                          struct file *file,
1754                          struct page **pages,
1755                          size_t num_pages,
1756                          loff_t pos,
1757                          unsigned long first_index,
1758                          unsigned long last_index,
1759                          size_t write_bytes,
1760                          u64 alloc_extent_start)
1761 {
1762         int i;
1763         unsigned long index = pos >> PAGE_CACHE_SHIFT;
1764         struct inode *inode = file->f_path.dentry->d_inode;
1765         int offset;
1766         int err = 0;
1767         int this_write;
1768         struct buffer_head *bh;
1769         struct buffer_head *head;
1770         loff_t isize = i_size_read(inode);
1771
1772         memset(pages, 0, num_pages * sizeof(struct page *));
1773
1774         for (i = 0; i < num_pages; i++) {
1775                 pages[i] = grab_cache_page(inode->i_mapping, index + i);
1776                 if (!pages[i]) {
1777                         err = -ENOMEM;
1778                         goto failed_release;
1779                 }
1780                 offset = pos & (PAGE_CACHE_SIZE -1);
1781                 this_write = min(PAGE_CACHE_SIZE - offset, write_bytes);
1782                 create_empty_buffers(pages[i], root->fs_info->sb->s_blocksize,
1783                                      (1 << BH_Uptodate));
1784                 head = page_buffers(pages[i]);
1785                 bh = head;
1786                 do {
1787                         err = btrfs_map_bh_to_logical(root, bh,
1788                                                       alloc_extent_start);
1789                         BUG_ON(err);
1790                         if (err)
1791                                 goto failed_truncate;
1792                         bh = bh->b_this_page;
1793                         if (alloc_extent_start)
1794                                 alloc_extent_start++;
1795                 } while (bh != head);
1796                 pos += this_write;
1797                 WARN_ON(this_write > write_bytes);
1798                 write_bytes -= this_write;
1799         }
1800         return 0;
1801
1802 failed_release:
1803         btrfs_drop_pages(pages, num_pages);
1804         return err;
1805
1806 failed_truncate:
1807         btrfs_drop_pages(pages, num_pages);
1808         if (pos > isize)
1809                 vmtruncate(inode, isize);
1810         return err;
1811 }
1812
1813 static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
1814                                 size_t count, loff_t *ppos)
1815 {
1816         loff_t pos;
1817         size_t num_written = 0;
1818         int err = 0;
1819         int ret = 0;
1820         struct inode *inode = file->f_path.dentry->d_inode;
1821         struct btrfs_root *root = BTRFS_I(inode)->root;
1822         struct page *pages[8];
1823         struct page *pinned[2] = { NULL, NULL };
1824         unsigned long first_index;
1825         unsigned long last_index;
1826         u64 start_pos;
1827         u64 num_blocks;
1828         u64 alloc_extent_start;
1829         struct btrfs_trans_handle *trans;
1830         struct btrfs_key ins;
1831
1832         if (file->f_flags & O_DIRECT)
1833                 return -EINVAL;
1834         pos = *ppos;
1835         vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
1836         current->backing_dev_info = inode->i_mapping->backing_dev_info;
1837         err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
1838         if (err)
1839                 goto out;
1840         if (count == 0)
1841                 goto out;
1842         err = remove_suid(file->f_path.dentry);
1843         if (err)
1844                 goto out;
1845         file_update_time(file);
1846
1847         start_pos = pos & ~((u64)PAGE_CACHE_SIZE - 1);
1848         num_blocks = (count + pos - start_pos + root->blocksize - 1) >>
1849                         inode->i_blkbits;
1850
1851         mutex_lock(&inode->i_mutex);
1852         first_index = pos >> PAGE_CACHE_SHIFT;
1853         last_index = (pos + count) >> PAGE_CACHE_SHIFT;
1854
1855         if ((first_index << PAGE_CACHE_SHIFT) < inode->i_size &&
1856             (pos & (PAGE_CACHE_SIZE - 1))) {
1857                 pinned[0] = grab_cache_page(inode->i_mapping, first_index);
1858                 if (!PageUptodate(pinned[0])) {
1859                         ret = mpage_readpage(pinned[0], btrfs_get_block);
1860                         BUG_ON(ret);
1861                 } else {
1862                         unlock_page(pinned[0]);
1863                 }
1864         }
1865         if (first_index != last_index &&
1866             (last_index << PAGE_CACHE_SHIFT) < inode->i_size &&
1867             (count & (PAGE_CACHE_SIZE - 1))) {
1868                 pinned[1] = grab_cache_page(inode->i_mapping, last_index);
1869                 if (!PageUptodate(pinned[1])) {
1870                         ret = mpage_readpage(pinned[1], btrfs_get_block);
1871                         BUG_ON(ret);
1872                 } else {
1873                         unlock_page(pinned[1]);
1874                 }
1875         }
1876
1877         mutex_lock(&root->fs_info->fs_mutex);
1878         trans = btrfs_start_transaction(root, 1);
1879         if (!trans) {
1880                 err = -ENOMEM;
1881                 mutex_unlock(&root->fs_info->fs_mutex);
1882                 goto out_unlock;
1883         }
1884         btrfs_set_trans_block_group(trans, inode);
1885         /* FIXME blocksize != 4096 */
1886         inode->i_blocks += num_blocks << 3;
1887         if (start_pos < inode->i_size) {
1888                 /* FIXME blocksize != pagesize */
1889                 ret = drop_extents(trans, root, inode,
1890                                    start_pos,
1891                                    (pos + count + root->blocksize -1) &
1892                                    ~((u64)root->blocksize - 1));
1893                 BUG_ON(ret);
1894         }
1895         if (inode->i_size >= PAGE_CACHE_SIZE || pos + count < inode->i_size ||
1896             pos + count - start_pos > BTRFS_MAX_INLINE_DATA_SIZE(root)) {
1897                 ret = btrfs_alloc_extent(trans, root, inode->i_ino,
1898                                          num_blocks, 1, (u64)-1, &ins);
1899                 BUG_ON(ret);
1900                 ret = btrfs_insert_file_extent(trans, root, inode->i_ino,
1901                                        start_pos, ins.objectid, ins.offset);
1902                 BUG_ON(ret);
1903         } else {
1904                 ins.offset = 0;
1905                 ins.objectid = 0;
1906         }
1907         BUG_ON(ret);
1908         alloc_extent_start = ins.objectid;
1909         btrfs_update_inode_block_group(trans, inode);
1910         ret = btrfs_end_transaction(trans, root);
1911         mutex_unlock(&root->fs_info->fs_mutex);
1912
1913         while(count > 0) {
1914                 size_t offset = pos & (PAGE_CACHE_SIZE - 1);
1915                 size_t write_bytes = min(count, PAGE_CACHE_SIZE - offset);
1916                 size_t num_pages = (write_bytes + PAGE_CACHE_SIZE - 1) >>
1917                                         PAGE_CACHE_SHIFT;
1918
1919                 memset(pages, 0, sizeof(pages));
1920                 ret = prepare_pages(root, file, pages, num_pages,
1921                                     pos, first_index, last_index,
1922                                     write_bytes, alloc_extent_start);
1923                 BUG_ON(ret);
1924
1925                 /* FIXME blocks != pagesize */
1926                 if (alloc_extent_start)
1927                         alloc_extent_start += num_pages;
1928                 ret = btrfs_copy_from_user(pos, num_pages,
1929                                            write_bytes, pages, buf);
1930                 BUG_ON(ret);
1931
1932                 ret = dirty_and_release_pages(NULL, root, file, pages,
1933                                               num_pages, pos, write_bytes);
1934                 BUG_ON(ret);
1935                 btrfs_drop_pages(pages, num_pages);
1936
1937                 buf += write_bytes;
1938                 count -= write_bytes;
1939                 pos += write_bytes;
1940                 num_written += write_bytes;
1941
1942                 balance_dirty_pages_ratelimited(inode->i_mapping);
1943                 cond_resched();
1944         }
1945 out_unlock:
1946         mutex_unlock(&inode->i_mutex);
1947 out:
1948         if (pinned[0])
1949                 page_cache_release(pinned[0]);
1950         if (pinned[1])
1951                 page_cache_release(pinned[1]);
1952         *ppos = pos;
1953         current->backing_dev_info = NULL;
1954         mark_inode_dirty(inode);
1955         return num_written ? num_written : err;
1956 }
1957
1958 static int btrfs_read_actor(read_descriptor_t *desc, struct page *page,
1959                         unsigned long offset, unsigned long size)
1960 {
1961         char *kaddr;
1962         unsigned long left, count = desc->count;
1963         struct inode *inode = page->mapping->host;
1964
1965         if (size > count)
1966                 size = count;
1967
1968         if (!PageChecked(page)) {
1969                 /* FIXME, do it per block */
1970                 struct btrfs_root *root = BTRFS_I(inode)->root;
1971
1972                 int ret = btrfs_csum_verify_file_block(root,
1973                                   page->mapping->host->i_ino,
1974                                   page->index << PAGE_CACHE_SHIFT,
1975                                   kmap(page), PAGE_CACHE_SIZE);
1976                 if (ret) {
1977                         printk("failed to verify ino %lu page %lu\n",
1978                                page->mapping->host->i_ino,
1979                                page->index);
1980                         memset(page_address(page), 0, PAGE_CACHE_SIZE);
1981                 }
1982                 SetPageChecked(page);
1983                 kunmap(page);
1984         }
1985         /*
1986          * Faults on the destination of a read are common, so do it before
1987          * taking the kmap.
1988          */
1989         if (!fault_in_pages_writeable(desc->arg.buf, size)) {
1990                 kaddr = kmap_atomic(page, KM_USER0);
1991                 left = __copy_to_user_inatomic(desc->arg.buf,
1992                                                 kaddr + offset, size);
1993                 kunmap_atomic(kaddr, KM_USER0);
1994                 if (left == 0)
1995                         goto success;
1996         }
1997
1998         /* Do it the slow way */
1999         kaddr = kmap(page);
2000         left = __copy_to_user(desc->arg.buf, kaddr + offset, size);
2001         kunmap(page);
2002
2003         if (left) {
2004                 size -= left;
2005                 desc->error = -EFAULT;
2006         }
2007 success:
2008         desc->count = count - size;
2009         desc->written += size;
2010         desc->arg.buf += size;
2011         return size;
2012 }
2013
2014 /**
2015  * btrfs_file_aio_read - filesystem read routine
2016  * @iocb:       kernel I/O control block
2017  * @iov:        io vector request
2018  * @nr_segs:    number of segments in the iovec
2019  * @pos:        current file position
2020  */
2021 static ssize_t btrfs_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
2022                                    unsigned long nr_segs, loff_t pos)
2023 {
2024         struct file *filp = iocb->ki_filp;
2025         ssize_t retval;
2026         unsigned long seg;
2027         size_t count;
2028         loff_t *ppos = &iocb->ki_pos;
2029
2030         count = 0;
2031         for (seg = 0; seg < nr_segs; seg++) {
2032                 const struct iovec *iv = &iov[seg];
2033
2034                 /*
2035                  * If any segment has a negative length, or the cumulative
2036                  * length ever wraps negative then return -EINVAL.
2037                  */
2038                 count += iv->iov_len;
2039                 if (unlikely((ssize_t)(count|iv->iov_len) < 0))
2040                         return -EINVAL;
2041                 if (access_ok(VERIFY_WRITE, iv->iov_base, iv->iov_len))
2042                         continue;
2043                 if (seg == 0)
2044                         return -EFAULT;
2045                 nr_segs = seg;
2046                 count -= iv->iov_len;   /* This segment is no good */
2047                 break;
2048         }
2049         retval = 0;
2050         if (count) {
2051                 for (seg = 0; seg < nr_segs; seg++) {
2052                         read_descriptor_t desc;
2053
2054                         desc.written = 0;
2055                         desc.arg.buf = iov[seg].iov_base;
2056                         desc.count = iov[seg].iov_len;
2057                         if (desc.count == 0)
2058                                 continue;
2059                         desc.error = 0;
2060                         do_generic_file_read(filp, ppos, &desc,
2061                                              btrfs_read_actor);
2062                         retval += desc.written;
2063                         if (desc.error) {
2064                                 retval = retval ?: desc.error;
2065                                 break;
2066                         }
2067                 }
2068         }
2069         return retval;
2070 }
2071
2072 static int create_subvol(struct btrfs_root *root, char *name, int namelen)
2073 {
2074         struct btrfs_trans_handle *trans;
2075         struct btrfs_key key;
2076         struct btrfs_root_item root_item;
2077         struct btrfs_inode_item *inode_item;
2078         struct buffer_head *subvol;
2079         struct btrfs_leaf *leaf;
2080         struct btrfs_root *new_root;
2081         struct inode *inode;
2082         struct inode *dir;
2083         int ret;
2084         u64 objectid;
2085         u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID;
2086
2087         mutex_lock(&root->fs_info->fs_mutex);
2088         trans = btrfs_start_transaction(root, 1);
2089         BUG_ON(!trans);
2090
2091         subvol = btrfs_alloc_free_block(trans, root, 0);
2092         if (subvol == NULL)
2093                 return -ENOSPC;
2094         leaf = btrfs_buffer_leaf(subvol);
2095         btrfs_set_header_nritems(&leaf->header, 0);
2096         btrfs_set_header_level(&leaf->header, 0);
2097         btrfs_set_header_blocknr(&leaf->header, bh_blocknr(subvol));
2098         btrfs_set_header_generation(&leaf->header, trans->transid);
2099         btrfs_set_header_owner(&leaf->header, root->root_key.objectid);
2100         memcpy(leaf->header.fsid, root->fs_info->disk_super->fsid,
2101                sizeof(leaf->header.fsid));
2102         mark_buffer_dirty(subvol);
2103
2104         inode_item = &root_item.inode;
2105         memset(inode_item, 0, sizeof(*inode_item));
2106         btrfs_set_inode_generation(inode_item, 1);
2107         btrfs_set_inode_size(inode_item, 3);
2108         btrfs_set_inode_nlink(inode_item, 1);
2109         btrfs_set_inode_nblocks(inode_item, 1);
2110         btrfs_set_inode_mode(inode_item, S_IFDIR | 0755);
2111
2112         btrfs_set_root_blocknr(&root_item, bh_blocknr(subvol));
2113         btrfs_set_root_refs(&root_item, 1);
2114         brelse(subvol);
2115         subvol = NULL;
2116
2117         ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root,
2118                                        0, &objectid);
2119         BUG_ON(ret);
2120
2121         btrfs_set_root_dirid(&root_item, new_dirid);
2122
2123         key.objectid = objectid;
2124         key.offset = 1;
2125         key.flags = 0;
2126         btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
2127         ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key,
2128                                 &root_item);
2129         BUG_ON(ret);
2130
2131         /*
2132          * insert the directory item
2133          */
2134         key.offset = (u64)-1;
2135         dir = root->fs_info->sb->s_root->d_inode;
2136         ret = btrfs_insert_dir_item(trans, root->fs_info->tree_root,
2137                                     name, namelen, dir->i_ino, &key, 0);
2138         BUG_ON(ret);
2139
2140         ret = btrfs_commit_transaction(trans, root);
2141         BUG_ON(ret);
2142
2143         new_root = btrfs_read_fs_root(root->fs_info, &key);
2144         BUG_ON(!new_root);
2145
2146         trans = btrfs_start_transaction(new_root, 1);
2147         BUG_ON(!trans);
2148
2149         inode = btrfs_new_inode(trans, new_root, new_dirid,
2150                                 BTRFS_I(dir)->block_group, S_IFDIR | 0700);
2151         inode->i_op = &btrfs_dir_inode_operations;
2152         inode->i_fop = &btrfs_dir_file_operations;
2153
2154         ret = btrfs_make_empty_dir(trans, new_root, new_dirid, new_dirid);
2155         BUG_ON(ret);
2156
2157         inode->i_nlink = 1;
2158         inode->i_size = 6;
2159         ret = btrfs_update_inode(trans, new_root, inode);
2160         BUG_ON(ret);
2161
2162         ret = btrfs_commit_transaction(trans, new_root);
2163         BUG_ON(ret);
2164
2165         iput(inode);
2166
2167         mutex_unlock(&root->fs_info->fs_mutex);
2168         return 0;
2169 }
2170
2171 static int create_snapshot(struct btrfs_root *root, char *name, int namelen)
2172 {
2173         struct btrfs_trans_handle *trans;
2174         struct btrfs_key key;
2175         struct btrfs_root_item new_root_item;
2176         int ret;
2177         u64 objectid;
2178
2179         if (!root->ref_cows)
2180                 return -EINVAL;
2181
2182         mutex_lock(&root->fs_info->fs_mutex);
2183         trans = btrfs_start_transaction(root, 1);
2184         BUG_ON(!trans);
2185
2186         ret = btrfs_update_inode(trans, root, root->inode);
2187         BUG_ON(ret);
2188
2189         ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root,
2190                                        0, &objectid);
2191         BUG_ON(ret);
2192
2193         memcpy(&new_root_item, &root->root_item,
2194                sizeof(new_root_item));
2195
2196         key.objectid = objectid;
2197         key.offset = 1;
2198         key.flags = 0;
2199         btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
2200         btrfs_set_root_blocknr(&new_root_item, bh_blocknr(root->node));
2201
2202         ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key,
2203                                 &new_root_item);
2204         BUG_ON(ret);
2205
2206         /*
2207          * insert the directory item
2208          */
2209         key.offset = (u64)-1;
2210         ret = btrfs_insert_dir_item(trans, root->fs_info->tree_root,
2211                                     name, namelen,
2212                                     root->fs_info->sb->s_root->d_inode->i_ino,
2213                                     &key, 0);
2214
2215         BUG_ON(ret);
2216
2217         ret = btrfs_inc_root_ref(trans, root);
2218         BUG_ON(ret);
2219
2220         ret = btrfs_commit_transaction(trans, root);
2221         BUG_ON(ret);
2222         mutex_unlock(&root->fs_info->fs_mutex);
2223         return 0;
2224 }
2225
2226 static int add_disk(struct btrfs_root *root, char *name, int namelen)
2227 {
2228         struct block_device *bdev;
2229         struct btrfs_path *path;
2230         struct super_block *sb = root->fs_info->sb;
2231         struct btrfs_root *dev_root = root->fs_info->dev_root;
2232         struct btrfs_trans_handle *trans;
2233         struct btrfs_device_item *dev_item;
2234         struct btrfs_key key;
2235         u16 item_size;
2236         u64 num_blocks;
2237         u64 new_blocks;
2238         u64 device_id;
2239         int ret;
2240
2241 printk("adding disk %s\n", name);
2242         path = btrfs_alloc_path();
2243         if (!path)
2244                 return -ENOMEM;
2245         num_blocks = btrfs_super_total_blocks(root->fs_info->disk_super);
2246         bdev = open_bdev_excl(name, O_RDWR, sb);
2247         if (IS_ERR(bdev)) {
2248                 ret = PTR_ERR(bdev);
2249 printk("open bdev excl failed ret %d\n", ret);
2250                 goto out_nolock;
2251         }
2252         set_blocksize(bdev, sb->s_blocksize);
2253         new_blocks = bdev->bd_inode->i_size >> sb->s_blocksize_bits;
2254         key.objectid = num_blocks;
2255         key.offset = new_blocks;
2256         key.flags = 0;
2257         btrfs_set_key_type(&key, BTRFS_DEV_ITEM_KEY);
2258
2259         mutex_lock(&dev_root->fs_info->fs_mutex);
2260         trans = btrfs_start_transaction(dev_root, 1);
2261         item_size = sizeof(*dev_item) + namelen;
2262 printk("insert empty on %Lu %Lu %u size %d\n", num_blocks, new_blocks, key.flags, item_size);
2263         ret = btrfs_insert_empty_item(trans, dev_root, path, &key, item_size);
2264         if (ret) {
2265 printk("insert failed %d\n", ret);
2266                 close_bdev_excl(bdev);
2267                 if (ret > 0)
2268                         ret = -EEXIST;
2269                 goto out;
2270         }
2271         dev_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
2272                                   path->slots[0], struct btrfs_device_item);
2273         btrfs_set_device_pathlen(dev_item, namelen);
2274         memcpy(dev_item + 1, name, namelen);
2275
2276         device_id = btrfs_super_last_device_id(root->fs_info->disk_super) + 1;
2277         btrfs_set_super_last_device_id(root->fs_info->disk_super, device_id);
2278         btrfs_set_device_id(dev_item, device_id);
2279         mark_buffer_dirty(path->nodes[0]);
2280
2281         ret = btrfs_insert_dev_radix(root, bdev, device_id, num_blocks,
2282                                      new_blocks);
2283
2284         if (!ret) {
2285                 btrfs_set_super_total_blocks(root->fs_info->disk_super,
2286                                              num_blocks + new_blocks);
2287                 i_size_write(root->fs_info->btree_inode,
2288                              (num_blocks + new_blocks) <<
2289                              root->fs_info->btree_inode->i_blkbits);
2290         }
2291
2292 out:
2293         ret = btrfs_commit_transaction(trans, dev_root);
2294         BUG_ON(ret);
2295         mutex_unlock(&root->fs_info->fs_mutex);
2296 out_nolock:
2297         btrfs_free_path(path);
2298
2299         return ret;
2300 }
2301
2302 static int btrfs_ioctl(struct inode *inode, struct file *filp, unsigned int
2303                        cmd, unsigned long arg)
2304 {
2305         struct btrfs_root *root = BTRFS_I(inode)->root;
2306         struct btrfs_ioctl_vol_args vol_args;
2307         int ret = 0;
2308         struct btrfs_dir_item *di;
2309         int namelen;
2310         struct btrfs_path *path;
2311         u64 root_dirid;
2312
2313         switch (cmd) {
2314         case BTRFS_IOC_SNAP_CREATE:
2315                 if (copy_from_user(&vol_args,
2316                                    (struct btrfs_ioctl_vol_args __user *)arg,
2317                                    sizeof(vol_args)))
2318                         return -EFAULT;
2319                 namelen = strlen(vol_args.name);
2320                 if (namelen > BTRFS_VOL_NAME_MAX)
2321                         return -EINVAL;
2322                 path = btrfs_alloc_path();
2323                 if (!path)
2324                         return -ENOMEM;
2325                 root_dirid = root->fs_info->sb->s_root->d_inode->i_ino,
2326                 mutex_lock(&root->fs_info->fs_mutex);
2327                 di = btrfs_lookup_dir_item(NULL, root->fs_info->tree_root,
2328                                     path, root_dirid,
2329                                     vol_args.name, namelen, 0);
2330                 mutex_unlock(&root->fs_info->fs_mutex);
2331                 btrfs_free_path(path);
2332                 if (di && !IS_ERR(di))
2333                         return -EEXIST;
2334
2335                 if (root == root->fs_info->tree_root)
2336                         ret = create_subvol(root, vol_args.name, namelen);
2337                 else
2338                         ret = create_snapshot(root, vol_args.name, namelen);
2339                 WARN_ON(ret);
2340                 break;
2341         case BTRFS_IOC_ADD_DISK:
2342                 if (copy_from_user(&vol_args,
2343                                    (struct btrfs_ioctl_vol_args __user *)arg,
2344                                    sizeof(vol_args)))
2345                         return -EFAULT;
2346                 namelen = strlen(vol_args.name);
2347                 if (namelen > BTRFS_VOL_NAME_MAX)
2348                         return -EINVAL;
2349                 vol_args.name[namelen] = '\0';
2350                 ret = add_disk(root, vol_args.name, namelen);
2351                 break;
2352         default:
2353                 return -ENOTTY;
2354         }
2355         return ret;
2356 }
2357
2358 static struct kmem_cache *btrfs_inode_cachep;
2359 struct kmem_cache *btrfs_trans_handle_cachep;
2360 struct kmem_cache *btrfs_transaction_cachep;
2361 struct kmem_cache *btrfs_bit_radix_cachep;
2362 struct kmem_cache *btrfs_path_cachep;
2363
2364 /*
2365  * Called inside transaction, so use GFP_NOFS
2366  */
2367 static struct inode *btrfs_alloc_inode(struct super_block *sb)
2368 {
2369         struct btrfs_inode *ei;
2370
2371         ei = kmem_cache_alloc(btrfs_inode_cachep, GFP_NOFS);
2372         if (!ei)
2373                 return NULL;
2374         return &ei->vfs_inode;
2375 }
2376
2377 static void btrfs_destroy_inode(struct inode *inode)
2378 {
2379         WARN_ON(!list_empty(&inode->i_dentry));
2380         WARN_ON(inode->i_data.nrpages);
2381
2382         kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
2383 }
2384
2385 static void init_once(void * foo, struct kmem_cache * cachep,
2386                       unsigned long flags)
2387 {
2388         struct btrfs_inode *ei = (struct btrfs_inode *) foo;
2389
2390         if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
2391             SLAB_CTOR_CONSTRUCTOR) {
2392                 inode_init_once(&ei->vfs_inode);
2393         }
2394 }
2395
2396 static int init_inodecache(void)
2397 {
2398         btrfs_inode_cachep = kmem_cache_create("btrfs_inode_cache",
2399                                              sizeof(struct btrfs_inode),
2400                                              0, (SLAB_RECLAIM_ACCOUNT|
2401                                                 SLAB_MEM_SPREAD),
2402                                              init_once, NULL);
2403         btrfs_trans_handle_cachep = kmem_cache_create("btrfs_trans_handle_cache",
2404                                              sizeof(struct btrfs_trans_handle),
2405                                              0, (SLAB_RECLAIM_ACCOUNT|
2406                                                 SLAB_MEM_SPREAD),
2407                                              NULL, NULL);
2408         btrfs_transaction_cachep = kmem_cache_create("btrfs_transaction_cache",
2409                                              sizeof(struct btrfs_transaction),
2410                                              0, (SLAB_RECLAIM_ACCOUNT|
2411                                                 SLAB_MEM_SPREAD),
2412                                              NULL, NULL);
2413         btrfs_path_cachep = kmem_cache_create("btrfs_path_cache",
2414                                              sizeof(struct btrfs_transaction),
2415                                              0, (SLAB_RECLAIM_ACCOUNT|
2416                                                 SLAB_MEM_SPREAD),
2417                                              NULL, NULL);
2418         btrfs_bit_radix_cachep = kmem_cache_create("btrfs_radix",
2419                                              256,
2420                                              0, (SLAB_RECLAIM_ACCOUNT|
2421                                                 SLAB_MEM_SPREAD |
2422                                                 SLAB_DESTROY_BY_RCU),
2423                                              NULL, NULL);
2424         if (btrfs_inode_cachep == NULL || btrfs_trans_handle_cachep == NULL ||
2425             btrfs_transaction_cachep == NULL || btrfs_bit_radix_cachep == NULL)
2426                 return -ENOMEM;
2427         return 0;
2428 }
2429
2430 static void destroy_inodecache(void)
2431 {
2432         kmem_cache_destroy(btrfs_inode_cachep);
2433         kmem_cache_destroy(btrfs_trans_handle_cachep);
2434         kmem_cache_destroy(btrfs_transaction_cachep);
2435         kmem_cache_destroy(btrfs_bit_radix_cachep);
2436         kmem_cache_destroy(btrfs_path_cachep);
2437 }
2438
2439 static int btrfs_get_sb(struct file_system_type *fs_type,
2440         int flags, const char *dev_name, void *data, struct vfsmount *mnt)
2441 {
2442         return get_sb_bdev(fs_type, flags, dev_name, data,
2443                            btrfs_fill_super, mnt);
2444 }
2445
2446
2447 static int btrfs_getattr(struct vfsmount *mnt,
2448                          struct dentry *dentry, struct kstat *stat)
2449 {
2450         struct inode *inode = dentry->d_inode;
2451         generic_fillattr(inode, stat);
2452         stat->blksize = 256 * 1024;
2453         return 0;
2454 }
2455
2456 static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
2457 {
2458         struct btrfs_root *root = btrfs_sb(dentry->d_sb);
2459         struct btrfs_super_block *disk_super = root->fs_info->disk_super;
2460
2461         buf->f_namelen = BTRFS_NAME_LEN;
2462         buf->f_blocks = btrfs_super_total_blocks(disk_super);
2463         buf->f_bfree = buf->f_blocks - btrfs_super_blocks_used(disk_super);
2464         buf->f_bavail = buf->f_bfree;
2465         buf->f_bsize = dentry->d_sb->s_blocksize;
2466         buf->f_type = BTRFS_SUPER_MAGIC;
2467         return 0;
2468 }
2469
2470 static struct file_system_type btrfs_fs_type = {
2471         .owner          = THIS_MODULE,
2472         .name           = "btrfs",
2473         .get_sb         = btrfs_get_sb,
2474         .kill_sb        = kill_block_super,
2475         .fs_flags       = FS_REQUIRES_DEV,
2476 };
2477
2478 static struct super_operations btrfs_super_ops = {
2479         .delete_inode   = btrfs_delete_inode,
2480         .put_super      = btrfs_put_super,
2481         .read_inode     = btrfs_read_locked_inode,
2482         .write_super    = btrfs_write_super,
2483         .sync_fs        = btrfs_sync_fs,
2484         .write_inode    = btrfs_write_inode,
2485         .dirty_inode    = btrfs_dirty_inode,
2486         .alloc_inode    = btrfs_alloc_inode,
2487         .destroy_inode  = btrfs_destroy_inode,
2488         .statfs         = btrfs_statfs,
2489 };
2490
2491 static struct inode_operations btrfs_dir_inode_operations = {
2492         .lookup         = btrfs_lookup,
2493         .create         = btrfs_create,
2494         .unlink         = btrfs_unlink,
2495         .mkdir          = btrfs_mkdir,
2496         .rmdir          = btrfs_rmdir,
2497 };
2498
2499 static struct inode_operations btrfs_dir_ro_inode_operations = {
2500         .lookup         = btrfs_lookup,
2501 };
2502
2503 static struct file_operations btrfs_dir_file_operations = {
2504         .llseek         = generic_file_llseek,
2505         .read           = generic_read_dir,
2506         .readdir        = btrfs_readdir,
2507         .ioctl          = btrfs_ioctl,
2508 };
2509
2510 static struct address_space_operations btrfs_aops = {
2511         .readpage       = btrfs_readpage,
2512         .writepage      = btrfs_writepage,
2513         .sync_page      = block_sync_page,
2514         .prepare_write  = btrfs_prepare_write,
2515         .commit_write   = btrfs_commit_write,
2516 };
2517
2518 static struct inode_operations btrfs_file_inode_operations = {
2519         .truncate       = btrfs_truncate,
2520         .getattr        = btrfs_getattr,
2521 };
2522
2523 static struct file_operations btrfs_file_operations = {
2524         .llseek         = generic_file_llseek,
2525         .read           = do_sync_read,
2526         .aio_read       = btrfs_file_aio_read,
2527         .write          = btrfs_file_write,
2528         .mmap           = generic_file_mmap,
2529         .open           = generic_file_open,
2530         .ioctl          = btrfs_ioctl,
2531         .fsync          = btrfs_sync_file,
2532 };
2533
2534 static int __init init_btrfs_fs(void)
2535 {
2536         int err;
2537         printk("btrfs loaded!\n");
2538         err = init_inodecache();
2539         if (err)
2540                 return err;
2541         kset_set_kset_s(&btrfs_subsys, fs_subsys);
2542         err = subsystem_register(&btrfs_subsys);
2543         if (err)
2544                 goto out;
2545         return register_filesystem(&btrfs_fs_type);
2546 out:
2547         destroy_inodecache();
2548         return err;
2549 }
2550
2551 static void __exit exit_btrfs_fs(void)
2552 {
2553         destroy_inodecache();
2554         unregister_filesystem(&btrfs_fs_type);
2555         subsystem_unregister(&btrfs_subsys);
2556         printk("btrfs unloaded\n");
2557 }
2558
2559 module_init(init_btrfs_fs)
2560 module_exit(exit_btrfs_fs)
2561
2562 MODULE_LICENSE("GPL");