Btrfs: Add O_DIRECT read and write (writes == buffered + cache flush)
[safe/jmp/linux-2.6] / fs / btrfs / inode.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <linux/bio.h>
20 #include <linux/buffer_head.h>
21 #include <linux/fs.h>
22 #include <linux/pagemap.h>
23 #include <linux/highmem.h>
24 #include <linux/time.h>
25 #include <linux/init.h>
26 #include <linux/string.h>
27 #include <linux/smp_lock.h>
28 #include <linux/backing-dev.h>
29 #include <linux/mpage.h>
30 #include <linux/swap.h>
31 #include <linux/writeback.h>
32 #include <linux/statfs.h>
33 #include <linux/compat.h>
34 #include <linux/bit_spinlock.h>
35 #include <linux/version.h>
36 #include <linux/xattr.h>
37 #include "ctree.h"
38 #include "disk-io.h"
39 #include "transaction.h"
40 #include "btrfs_inode.h"
41 #include "ioctl.h"
42 #include "print-tree.h"
43 #include "volumes.h"
44
45 struct btrfs_iget_args {
46         u64 ino;
47         struct btrfs_root *root;
48 };
49
50 static struct inode_operations btrfs_dir_inode_operations;
51 static struct inode_operations btrfs_symlink_inode_operations;
52 static struct inode_operations btrfs_dir_ro_inode_operations;
53 static struct inode_operations btrfs_special_inode_operations;
54 static struct inode_operations btrfs_file_inode_operations;
55 static struct address_space_operations btrfs_aops;
56 static struct address_space_operations btrfs_symlink_aops;
57 static struct file_operations btrfs_dir_file_operations;
58 static struct extent_io_ops btrfs_extent_io_ops;
59
60 static struct kmem_cache *btrfs_inode_cachep;
61 struct kmem_cache *btrfs_trans_handle_cachep;
62 struct kmem_cache *btrfs_transaction_cachep;
63 struct kmem_cache *btrfs_bit_radix_cachep;
64 struct kmem_cache *btrfs_path_cachep;
65
66 #define S_SHIFT 12
67 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
68         [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
69         [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
70         [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
71         [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
72         [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
73         [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
74         [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
75 };
76
77 int btrfs_check_free_space(struct btrfs_root *root, u64 num_required,
78                            int for_del)
79 {
80         u64 total = btrfs_super_total_bytes(&root->fs_info->super_copy);
81         u64 used = btrfs_super_bytes_used(&root->fs_info->super_copy);
82         u64 thresh;
83         int ret = 0;
84
85         if (for_del)
86                 thresh = total * 90;
87         else
88                 thresh = total * 85;
89
90         do_div(thresh, 100);
91
92         spin_lock(&root->fs_info->delalloc_lock);
93         if (used + root->fs_info->delalloc_bytes + num_required > thresh)
94                 ret = -ENOSPC;
95         spin_unlock(&root->fs_info->delalloc_lock);
96         return ret;
97 }
98
99 static int cow_file_range(struct inode *inode, u64 start, u64 end)
100 {
101         struct btrfs_root *root = BTRFS_I(inode)->root;
102         struct btrfs_trans_handle *trans;
103         u64 alloc_hint = 0;
104         u64 num_bytes;
105         u64 cur_alloc_size;
106         u64 blocksize = root->sectorsize;
107         u64 orig_start = start;
108         u64 orig_num_bytes;
109         struct btrfs_key ins;
110         int ret;
111
112         trans = btrfs_start_transaction(root, 1);
113         BUG_ON(!trans);
114         btrfs_set_trans_block_group(trans, inode);
115
116         num_bytes = (end - start + blocksize) & ~(blocksize - 1);
117         num_bytes = max(blocksize,  num_bytes);
118         ret = btrfs_drop_extents(trans, root, inode,
119                                  start, start + num_bytes, start, &alloc_hint);
120         orig_num_bytes = num_bytes;
121
122         if (alloc_hint == EXTENT_MAP_INLINE)
123                 goto out;
124
125         while(num_bytes > 0) {
126                 cur_alloc_size = min(num_bytes, root->fs_info->max_extent);
127                 ret = btrfs_alloc_extent(trans, root, cur_alloc_size,
128                                          root->root_key.objectid,
129                                          trans->transid,
130                                          inode->i_ino, start, 0,
131                                          alloc_hint, (u64)-1, &ins, 1);
132                 if (ret) {
133                         WARN_ON(1);
134                         goto out;
135                 }
136                 ret = btrfs_insert_file_extent(trans, root, inode->i_ino,
137                                                start, ins.objectid, ins.offset,
138                                                ins.offset);
139                 inode->i_blocks += ins.offset >> 9;
140                 btrfs_check_file(root, inode);
141                 num_bytes -= cur_alloc_size;
142                 alloc_hint = ins.objectid + ins.offset;
143                 start += cur_alloc_size;
144         }
145         btrfs_drop_extent_cache(inode, orig_start,
146                                 orig_start + orig_num_bytes - 1);
147         btrfs_add_ordered_inode(inode);
148         btrfs_update_inode(trans, root, inode);
149 out:
150         btrfs_end_transaction(trans, root);
151         return ret;
152 }
153
154 static int run_delalloc_nocow(struct inode *inode, u64 start, u64 end)
155 {
156         u64 extent_start;
157         u64 extent_end;
158         u64 bytenr;
159         u64 cow_end;
160         u64 loops = 0;
161         u64 total_fs_bytes;
162         struct btrfs_root *root = BTRFS_I(inode)->root;
163         struct extent_buffer *leaf;
164         int found_type;
165         struct btrfs_path *path;
166         struct btrfs_file_extent_item *item;
167         int ret;
168         int err;
169         struct btrfs_key found_key;
170
171         total_fs_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy);
172         path = btrfs_alloc_path();
173         BUG_ON(!path);
174 again:
175         ret = btrfs_lookup_file_extent(NULL, root, path,
176                                        inode->i_ino, start, 0);
177         if (ret < 0) {
178                 btrfs_free_path(path);
179                 return ret;
180         }
181
182         cow_end = end;
183         if (ret != 0) {
184                 if (path->slots[0] == 0)
185                         goto not_found;
186                 path->slots[0]--;
187         }
188
189         leaf = path->nodes[0];
190         item = btrfs_item_ptr(leaf, path->slots[0],
191                               struct btrfs_file_extent_item);
192
193         /* are we inside the extent that was found? */
194         btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
195         found_type = btrfs_key_type(&found_key);
196         if (found_key.objectid != inode->i_ino ||
197             found_type != BTRFS_EXTENT_DATA_KEY) {
198                 goto not_found;
199         }
200
201         found_type = btrfs_file_extent_type(leaf, item);
202         extent_start = found_key.offset;
203         if (found_type == BTRFS_FILE_EXTENT_REG) {
204                 u64 extent_num_bytes;
205
206                 extent_num_bytes = btrfs_file_extent_num_bytes(leaf, item);
207                 extent_end = extent_start + extent_num_bytes;
208                 err = 0;
209
210                 if (loops && start != extent_start)
211                         goto not_found;
212
213                 if (start < extent_start || start >= extent_end)
214                         goto not_found;
215
216                 cow_end = min(end, extent_end - 1);
217                 bytenr = btrfs_file_extent_disk_bytenr(leaf, item);
218                 if (bytenr == 0)
219                         goto not_found;
220
221                 /*
222                  * we may be called by the resizer, make sure we're inside
223                  * the limits of the FS
224                  */
225                 if (bytenr + extent_num_bytes > total_fs_bytes)
226                         goto not_found;
227
228                 if (btrfs_count_snapshots_in_path(root, path, bytenr) != 1) {
229                         goto not_found;
230                 }
231
232                 start = extent_end;
233         } else {
234                 goto not_found;
235         }
236 loop:
237         if (start > end) {
238                 btrfs_free_path(path);
239                 return 0;
240         }
241         btrfs_release_path(root, path);
242         loops++;
243         goto again;
244
245 not_found:
246         cow_file_range(inode, start, cow_end);
247         start = cow_end + 1;
248         goto loop;
249 }
250
251 static int run_delalloc_range(struct inode *inode, u64 start, u64 end)
252 {
253         struct btrfs_root *root = BTRFS_I(inode)->root;
254         int ret;
255         mutex_lock(&root->fs_info->fs_mutex);
256         if (btrfs_test_opt(root, NODATACOW) ||
257             btrfs_test_flag(inode, NODATACOW))
258                 ret = run_delalloc_nocow(inode, start, end);
259         else
260                 ret = cow_file_range(inode, start, end);
261
262         mutex_unlock(&root->fs_info->fs_mutex);
263         return ret;
264 }
265
266 int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end,
267                        unsigned long old, unsigned long bits)
268 {
269         if (!(old & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) {
270                 struct btrfs_root *root = BTRFS_I(inode)->root;
271                 spin_lock(&root->fs_info->delalloc_lock);
272                 BTRFS_I(inode)->delalloc_bytes += end - start + 1;
273                 root->fs_info->delalloc_bytes += end - start + 1;
274                 spin_unlock(&root->fs_info->delalloc_lock);
275         }
276         return 0;
277 }
278
279 int btrfs_clear_bit_hook(struct inode *inode, u64 start, u64 end,
280                          unsigned long old, unsigned long bits)
281 {
282         if ((old & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) {
283                 struct btrfs_root *root = BTRFS_I(inode)->root;
284                 spin_lock(&root->fs_info->delalloc_lock);
285                 if (end - start + 1 > root->fs_info->delalloc_bytes) {
286                         printk("warning: delalloc account %Lu %Lu\n",
287                                end - start + 1, root->fs_info->delalloc_bytes);
288                         root->fs_info->delalloc_bytes = 0;
289                         BTRFS_I(inode)->delalloc_bytes = 0;
290                 } else {
291                         root->fs_info->delalloc_bytes -= end - start + 1;
292                         BTRFS_I(inode)->delalloc_bytes -= end - start + 1;
293                 }
294                 spin_unlock(&root->fs_info->delalloc_lock);
295         }
296         return 0;
297 }
298
299 int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
300                          size_t size, struct bio *bio)
301 {
302         struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
303         struct btrfs_mapping_tree *map_tree;
304         u64 logical = bio->bi_sector << 9;
305         u64 length = 0;
306         u64 map_length;
307         struct bio_vec *bvec;
308         int i;
309         int ret;
310
311         bio_for_each_segment(bvec, bio, i) {
312                 length += bvec->bv_len;
313         }
314         map_tree = &root->fs_info->mapping_tree;
315         map_length = length;
316         ret = btrfs_map_block(map_tree, READ, logical,
317                               &map_length, NULL, 0);
318
319         if (map_length < length + size) {
320                 return 1;
321         }
322         return 0;
323 }
324
325 int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
326                           int mirror_num)
327 {
328         struct btrfs_root *root = BTRFS_I(inode)->root;
329         struct btrfs_trans_handle *trans;
330         int ret = 0;
331
332         if (!(rw & (1 << BIO_RW))) {
333                 ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
334                 BUG_ON(ret);
335                 goto mapit;
336         }
337
338         if (btrfs_test_opt(root, NODATASUM) ||
339             btrfs_test_flag(inode, NODATASUM)) {
340                 goto mapit;
341         }
342
343         mutex_lock(&root->fs_info->fs_mutex);
344         trans = btrfs_start_transaction(root, 1);
345         btrfs_set_trans_block_group(trans, inode);
346         btrfs_csum_file_blocks(trans, root, inode, bio);
347         ret = btrfs_end_transaction(trans, root);
348         BUG_ON(ret);
349         mutex_unlock(&root->fs_info->fs_mutex);
350 mapit:
351         return btrfs_map_bio(root, rw, bio, mirror_num);
352 }
353
354 int btrfs_readpage_io_hook(struct page *page, u64 start, u64 end)
355 {
356         int ret = 0;
357         struct inode *inode = page->mapping->host;
358         struct btrfs_root *root = BTRFS_I(inode)->root;
359         struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
360         struct btrfs_csum_item *item;
361         struct btrfs_path *path = NULL;
362         u32 csum;
363         if (btrfs_test_opt(root, NODATASUM) ||
364             btrfs_test_flag(inode, NODATASUM))
365                 return 0;
366         mutex_lock(&root->fs_info->fs_mutex);
367         path = btrfs_alloc_path();
368         item = btrfs_lookup_csum(NULL, root, path, inode->i_ino, start, 0);
369         if (IS_ERR(item)) {
370                 ret = PTR_ERR(item);
371                 /* a csum that isn't present is a preallocated region. */
372                 if (ret == -ENOENT || ret == -EFBIG)
373                         ret = 0;
374                 csum = 0;
375                 printk("no csum found for inode %lu start %Lu\n", inode->i_ino, start);
376                 goto out;
377         }
378         read_extent_buffer(path->nodes[0], &csum, (unsigned long)item,
379                            BTRFS_CRC32_SIZE);
380         set_state_private(io_tree, start, csum);
381 out:
382         if (path)
383                 btrfs_free_path(path);
384         mutex_unlock(&root->fs_info->fs_mutex);
385         return ret;
386 }
387
388 struct io_failure_record {
389         struct page *page;
390         u64 start;
391         u64 len;
392         u64 logical;
393         int last_mirror;
394 };
395
396 int btrfs_readpage_io_failed_hook(struct bio *failed_bio,
397                                   struct page *page, u64 start, u64 end,
398                                   struct extent_state *state)
399 {
400         struct io_failure_record *failrec = NULL;
401         u64 private;
402         struct extent_map *em;
403         struct inode *inode = page->mapping->host;
404         struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
405         struct bio *bio;
406         int num_copies;
407         int ret;
408         u64 logical;
409
410         ret = get_state_private(failure_tree, start, &private);
411         if (ret) {
412                 size_t pg_offset = start - page_offset(page);
413                 failrec = kmalloc(sizeof(*failrec), GFP_NOFS);
414                 if (!failrec)
415                         return -ENOMEM;
416                 failrec->start = start;
417                 failrec->len = end - start + 1;
418                 failrec->last_mirror = 0;
419
420                 em = btrfs_get_extent(inode, NULL, pg_offset, start,
421                                       failrec->len, 0);
422
423                 if (!em || IS_ERR(em)) {
424                         kfree(failrec);
425                         return -EIO;
426                 }
427                 logical = start - em->start;
428                 logical = em->block_start + logical;
429                 failrec->logical = logical;
430                 free_extent_map(em);
431                 set_extent_bits(failure_tree, start, end, EXTENT_LOCKED |
432                                 EXTENT_DIRTY, GFP_NOFS);
433                 set_state_private(failure_tree, start, (u64)failrec);
434         } else {
435                 failrec = (struct io_failure_record *)private;
436         }
437         num_copies = btrfs_num_copies(
438                               &BTRFS_I(inode)->root->fs_info->mapping_tree,
439                               failrec->logical, failrec->len);
440         failrec->last_mirror++;
441         if (!state) {
442                 spin_lock_irq(&BTRFS_I(inode)->io_tree.lock);
443                 state = find_first_extent_bit_state(&BTRFS_I(inode)->io_tree,
444                                                     failrec->start,
445                                                     EXTENT_LOCKED);
446                 if (state && state->start != failrec->start)
447                         state = NULL;
448                 spin_unlock_irq(&BTRFS_I(inode)->io_tree.lock);
449         }
450         if (!state || failrec->last_mirror > num_copies) {
451                 set_state_private(failure_tree, failrec->start, 0);
452                 clear_extent_bits(failure_tree, failrec->start,
453                                   failrec->start + failrec->len - 1,
454                                   EXTENT_LOCKED | EXTENT_DIRTY, GFP_NOFS);
455                 kfree(failrec);
456                 return -EIO;
457         }
458         bio = bio_alloc(GFP_NOFS, 1);
459         bio->bi_private = state;
460         bio->bi_end_io = failed_bio->bi_end_io;
461         bio->bi_sector = failrec->logical >> 9;
462         bio->bi_bdev = failed_bio->bi_bdev;
463         bio_add_page(bio, page, failrec->len, start - page_offset(page));
464         btrfs_submit_bio_hook(inode, READ, bio, failrec->last_mirror);
465         return 0;
466 }
467
468 int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end,
469                                struct extent_state *state)
470 {
471         size_t offset = start - ((u64)page->index << PAGE_CACHE_SHIFT);
472         struct inode *inode = page->mapping->host;
473         struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
474         char *kaddr;
475         u64 private = ~(u32)0;
476         int ret;
477         struct btrfs_root *root = BTRFS_I(inode)->root;
478         u32 csum = ~(u32)0;
479         unsigned long flags;
480
481         if (btrfs_test_opt(root, NODATASUM) ||
482             btrfs_test_flag(inode, NODATASUM))
483                 return 0;
484         if (state && state->start == start) {
485                 private = state->private;
486                 ret = 0;
487         } else {
488                 ret = get_state_private(io_tree, start, &private);
489         }
490         local_irq_save(flags);
491         kaddr = kmap_atomic(page, KM_IRQ0);
492         if (ret) {
493                 goto zeroit;
494         }
495         csum = btrfs_csum_data(root, kaddr + offset, csum,  end - start + 1);
496         btrfs_csum_final(csum, (char *)&csum);
497         if (csum != private) {
498                 goto zeroit;
499         }
500         kunmap_atomic(kaddr, KM_IRQ0);
501         local_irq_restore(flags);
502
503         /* if the io failure tree for this inode is non-empty,
504          * check to see if we've recovered from a failed IO
505          */
506         private = 0;
507         if (count_range_bits(&BTRFS_I(inode)->io_failure_tree, &private,
508                              (u64)-1, 1, EXTENT_DIRTY)) {
509                 u64 private_failure;
510                 struct io_failure_record *failure;
511                 ret = get_state_private(&BTRFS_I(inode)->io_failure_tree,
512                                         start, &private_failure);
513                 if (ret == 0) {
514                         failure = (struct io_failure_record *)private_failure;
515                         set_state_private(&BTRFS_I(inode)->io_failure_tree,
516                                           failure->start, 0);
517                         clear_extent_bits(&BTRFS_I(inode)->io_failure_tree,
518                                           failure->start,
519                                           failure->start + failure->len - 1,
520                                           EXTENT_DIRTY | EXTENT_LOCKED,
521                                           GFP_NOFS);
522                         kfree(failure);
523                 }
524         }
525         return 0;
526
527 zeroit:
528         printk("btrfs csum failed ino %lu off %llu csum %u private %Lu\n",
529                page->mapping->host->i_ino, (unsigned long long)start, csum,
530                private);
531         memset(kaddr + offset, 1, end - start + 1);
532         flush_dcache_page(page);
533         kunmap_atomic(kaddr, KM_IRQ0);
534         local_irq_restore(flags);
535         return -EIO;
536 }
537
538 void btrfs_read_locked_inode(struct inode *inode)
539 {
540         struct btrfs_path *path;
541         struct extent_buffer *leaf;
542         struct btrfs_inode_item *inode_item;
543         struct btrfs_timespec *tspec;
544         struct btrfs_root *root = BTRFS_I(inode)->root;
545         struct btrfs_key location;
546         u64 alloc_group_block;
547         u32 rdev;
548         int ret;
549
550         path = btrfs_alloc_path();
551         BUG_ON(!path);
552         mutex_lock(&root->fs_info->fs_mutex);
553         memcpy(&location, &BTRFS_I(inode)->location, sizeof(location));
554
555         ret = btrfs_lookup_inode(NULL, root, path, &location, 0);
556         if (ret)
557                 goto make_bad;
558
559         leaf = path->nodes[0];
560         inode_item = btrfs_item_ptr(leaf, path->slots[0],
561                                     struct btrfs_inode_item);
562
563         inode->i_mode = btrfs_inode_mode(leaf, inode_item);
564         inode->i_nlink = btrfs_inode_nlink(leaf, inode_item);
565         inode->i_uid = btrfs_inode_uid(leaf, inode_item);
566         inode->i_gid = btrfs_inode_gid(leaf, inode_item);
567         inode->i_size = btrfs_inode_size(leaf, inode_item);
568
569         tspec = btrfs_inode_atime(inode_item);
570         inode->i_atime.tv_sec = btrfs_timespec_sec(leaf, tspec);
571         inode->i_atime.tv_nsec = btrfs_timespec_nsec(leaf, tspec);
572
573         tspec = btrfs_inode_mtime(inode_item);
574         inode->i_mtime.tv_sec = btrfs_timespec_sec(leaf, tspec);
575         inode->i_mtime.tv_nsec = btrfs_timespec_nsec(leaf, tspec);
576
577         tspec = btrfs_inode_ctime(inode_item);
578         inode->i_ctime.tv_sec = btrfs_timespec_sec(leaf, tspec);
579         inode->i_ctime.tv_nsec = btrfs_timespec_nsec(leaf, tspec);
580
581         inode->i_blocks = btrfs_inode_nblocks(leaf, inode_item);
582         inode->i_generation = btrfs_inode_generation(leaf, inode_item);
583         inode->i_rdev = 0;
584         rdev = btrfs_inode_rdev(leaf, inode_item);
585
586         alloc_group_block = btrfs_inode_block_group(leaf, inode_item);
587         BTRFS_I(inode)->block_group = btrfs_lookup_block_group(root->fs_info,
588                                                        alloc_group_block);
589         BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item);
590         if (!BTRFS_I(inode)->block_group) {
591                 BTRFS_I(inode)->block_group = btrfs_find_block_group(root,
592                                                  NULL, 0,
593                                                  BTRFS_BLOCK_GROUP_METADATA, 0);
594         }
595         btrfs_free_path(path);
596         inode_item = NULL;
597
598         mutex_unlock(&root->fs_info->fs_mutex);
599
600         switch (inode->i_mode & S_IFMT) {
601         case S_IFREG:
602                 inode->i_mapping->a_ops = &btrfs_aops;
603                 inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
604                 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
605                 inode->i_fop = &btrfs_file_operations;
606                 inode->i_op = &btrfs_file_inode_operations;
607                 break;
608         case S_IFDIR:
609                 inode->i_fop = &btrfs_dir_file_operations;
610                 if (root == root->fs_info->tree_root)
611                         inode->i_op = &btrfs_dir_ro_inode_operations;
612                 else
613                         inode->i_op = &btrfs_dir_inode_operations;
614                 break;
615         case S_IFLNK:
616                 inode->i_op = &btrfs_symlink_inode_operations;
617                 inode->i_mapping->a_ops = &btrfs_symlink_aops;
618                 inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
619                 break;
620         default:
621                 init_special_inode(inode, inode->i_mode, rdev);
622                 break;
623         }
624         return;
625
626 make_bad:
627         btrfs_release_path(root, path);
628         btrfs_free_path(path);
629         mutex_unlock(&root->fs_info->fs_mutex);
630         make_bad_inode(inode);
631 }
632
633 static void fill_inode_item(struct extent_buffer *leaf,
634                             struct btrfs_inode_item *item,
635                             struct inode *inode)
636 {
637         btrfs_set_inode_uid(leaf, item, inode->i_uid);
638         btrfs_set_inode_gid(leaf, item, inode->i_gid);
639         btrfs_set_inode_size(leaf, item, inode->i_size);
640         btrfs_set_inode_mode(leaf, item, inode->i_mode);
641         btrfs_set_inode_nlink(leaf, item, inode->i_nlink);
642
643         btrfs_set_timespec_sec(leaf, btrfs_inode_atime(item),
644                                inode->i_atime.tv_sec);
645         btrfs_set_timespec_nsec(leaf, btrfs_inode_atime(item),
646                                 inode->i_atime.tv_nsec);
647
648         btrfs_set_timespec_sec(leaf, btrfs_inode_mtime(item),
649                                inode->i_mtime.tv_sec);
650         btrfs_set_timespec_nsec(leaf, btrfs_inode_mtime(item),
651                                 inode->i_mtime.tv_nsec);
652
653         btrfs_set_timespec_sec(leaf, btrfs_inode_ctime(item),
654                                inode->i_ctime.tv_sec);
655         btrfs_set_timespec_nsec(leaf, btrfs_inode_ctime(item),
656                                 inode->i_ctime.tv_nsec);
657
658         btrfs_set_inode_nblocks(leaf, item, inode->i_blocks);
659         btrfs_set_inode_generation(leaf, item, inode->i_generation);
660         btrfs_set_inode_rdev(leaf, item, inode->i_rdev);
661         btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags);
662         btrfs_set_inode_block_group(leaf, item,
663                                     BTRFS_I(inode)->block_group->key.objectid);
664 }
665
666 int btrfs_update_inode(struct btrfs_trans_handle *trans,
667                               struct btrfs_root *root,
668                               struct inode *inode)
669 {
670         struct btrfs_inode_item *inode_item;
671         struct btrfs_path *path;
672         struct extent_buffer *leaf;
673         int ret;
674
675         path = btrfs_alloc_path();
676         BUG_ON(!path);
677         ret = btrfs_lookup_inode(trans, root, path,
678                                  &BTRFS_I(inode)->location, 1);
679         if (ret) {
680                 if (ret > 0)
681                         ret = -ENOENT;
682                 goto failed;
683         }
684
685         leaf = path->nodes[0];
686         inode_item = btrfs_item_ptr(leaf, path->slots[0],
687                                   struct btrfs_inode_item);
688
689         fill_inode_item(leaf, inode_item, inode);
690         btrfs_mark_buffer_dirty(leaf);
691         btrfs_set_inode_last_trans(trans, inode);
692         ret = 0;
693 failed:
694         btrfs_release_path(root, path);
695         btrfs_free_path(path);
696         return ret;
697 }
698
699
700 static int btrfs_unlink_trans(struct btrfs_trans_handle *trans,
701                               struct btrfs_root *root,
702                               struct inode *dir,
703                               struct dentry *dentry)
704 {
705         struct btrfs_path *path;
706         const char *name = dentry->d_name.name;
707         int name_len = dentry->d_name.len;
708         int ret = 0;
709         struct extent_buffer *leaf;
710         struct btrfs_dir_item *di;
711         struct btrfs_key key;
712
713         path = btrfs_alloc_path();
714         if (!path) {
715                 ret = -ENOMEM;
716                 goto err;
717         }
718
719         di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino,
720                                     name, name_len, -1);
721         if (IS_ERR(di)) {
722                 ret = PTR_ERR(di);
723                 goto err;
724         }
725         if (!di) {
726                 ret = -ENOENT;
727                 goto err;
728         }
729         leaf = path->nodes[0];
730         btrfs_dir_item_key_to_cpu(leaf, di, &key);
731         ret = btrfs_delete_one_dir_name(trans, root, path, di);
732         if (ret)
733                 goto err;
734         btrfs_release_path(root, path);
735
736         di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino,
737                                          key.objectid, name, name_len, -1);
738         if (IS_ERR(di)) {
739                 ret = PTR_ERR(di);
740                 goto err;
741         }
742         if (!di) {
743                 ret = -ENOENT;
744                 goto err;
745         }
746         ret = btrfs_delete_one_dir_name(trans, root, path, di);
747
748         dentry->d_inode->i_ctime = dir->i_ctime;
749         ret = btrfs_del_inode_ref(trans, root, name, name_len,
750                                   dentry->d_inode->i_ino,
751                                   dentry->d_parent->d_inode->i_ino);
752         if (ret) {
753                 printk("failed to delete reference to %.*s, "
754                        "inode %lu parent %lu\n", name_len, name,
755                        dentry->d_inode->i_ino,
756                        dentry->d_parent->d_inode->i_ino);
757         }
758 err:
759         btrfs_free_path(path);
760         if (!ret) {
761                 dir->i_size -= name_len * 2;
762                 dir->i_mtime = dir->i_ctime = CURRENT_TIME;
763                 btrfs_update_inode(trans, root, dir);
764 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
765                 dentry->d_inode->i_nlink--;
766 #else
767                 drop_nlink(dentry->d_inode);
768 #endif
769                 ret = btrfs_update_inode(trans, root, dentry->d_inode);
770                 dir->i_sb->s_dirt = 1;
771         }
772         return ret;
773 }
774
775 static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
776 {
777         struct btrfs_root *root;
778         struct btrfs_trans_handle *trans;
779         struct inode *inode = dentry->d_inode;
780         int ret;
781         unsigned long nr = 0;
782
783         root = BTRFS_I(dir)->root;
784         mutex_lock(&root->fs_info->fs_mutex);
785
786         ret = btrfs_check_free_space(root, 1, 1);
787         if (ret)
788                 goto fail;
789
790         trans = btrfs_start_transaction(root, 1);
791
792         btrfs_set_trans_block_group(trans, dir);
793         ret = btrfs_unlink_trans(trans, root, dir, dentry);
794         nr = trans->blocks_used;
795
796         if (inode->i_nlink == 0) {
797                 int found;
798                 /* if the inode isn't linked anywhere,
799                  * we don't need to worry about
800                  * data=ordered
801                  */
802                 found = btrfs_del_ordered_inode(inode);
803                 if (found == 1) {
804                         atomic_dec(&inode->i_count);
805                 }
806         }
807
808         btrfs_end_transaction(trans, root);
809 fail:
810         mutex_unlock(&root->fs_info->fs_mutex);
811         btrfs_btree_balance_dirty(root, nr);
812         btrfs_throttle(root);
813         return ret;
814 }
815
816 static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
817 {
818         struct inode *inode = dentry->d_inode;
819         int err = 0;
820         int ret;
821         struct btrfs_root *root = BTRFS_I(dir)->root;
822         struct btrfs_trans_handle *trans;
823         unsigned long nr = 0;
824
825         if (inode->i_size > BTRFS_EMPTY_DIR_SIZE)
826                 return -ENOTEMPTY;
827
828         mutex_lock(&root->fs_info->fs_mutex);
829         ret = btrfs_check_free_space(root, 1, 1);
830         if (ret)
831                 goto fail;
832
833         trans = btrfs_start_transaction(root, 1);
834         btrfs_set_trans_block_group(trans, dir);
835
836         /* now the directory is empty */
837         err = btrfs_unlink_trans(trans, root, dir, dentry);
838         if (!err) {
839                 inode->i_size = 0;
840         }
841
842         nr = trans->blocks_used;
843         ret = btrfs_end_transaction(trans, root);
844 fail:
845         mutex_unlock(&root->fs_info->fs_mutex);
846         btrfs_btree_balance_dirty(root, nr);
847         btrfs_throttle(root);
848
849         if (ret && !err)
850                 err = ret;
851         return err;
852 }
853
854 /*
855  * this can truncate away extent items, csum items and directory items.
856  * It starts at a high offset and removes keys until it can't find
857  * any higher than i_size.
858  *
859  * csum items that cross the new i_size are truncated to the new size
860  * as well.
861  */
862 static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans,
863                                    struct btrfs_root *root,
864                                    struct inode *inode,
865                                    u32 min_type)
866 {
867         int ret;
868         struct btrfs_path *path;
869         struct btrfs_key key;
870         struct btrfs_key found_key;
871         u32 found_type;
872         struct extent_buffer *leaf;
873         struct btrfs_file_extent_item *fi;
874         u64 extent_start = 0;
875         u64 extent_num_bytes = 0;
876         u64 item_end = 0;
877         u64 root_gen = 0;
878         u64 root_owner = 0;
879         int found_extent;
880         int del_item;
881         int pending_del_nr = 0;
882         int pending_del_slot = 0;
883         int extent_type = -1;
884
885         btrfs_drop_extent_cache(inode, inode->i_size, (u64)-1);
886         path = btrfs_alloc_path();
887         path->reada = -1;
888         BUG_ON(!path);
889
890         /* FIXME, add redo link to tree so we don't leak on crash */
891         key.objectid = inode->i_ino;
892         key.offset = (u64)-1;
893         key.type = (u8)-1;
894
895         btrfs_init_path(path);
896 search_again:
897         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
898         if (ret < 0) {
899                 goto error;
900         }
901         if (ret > 0) {
902                 BUG_ON(path->slots[0] == 0);
903                 path->slots[0]--;
904         }
905
906         while(1) {
907                 fi = NULL;
908                 leaf = path->nodes[0];
909                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
910                 found_type = btrfs_key_type(&found_key);
911
912                 if (found_key.objectid != inode->i_ino)
913                         break;
914
915                 if (found_type < min_type)
916                         break;
917
918                 item_end = found_key.offset;
919                 if (found_type == BTRFS_EXTENT_DATA_KEY) {
920                         fi = btrfs_item_ptr(leaf, path->slots[0],
921                                             struct btrfs_file_extent_item);
922                         extent_type = btrfs_file_extent_type(leaf, fi);
923                         if (extent_type != BTRFS_FILE_EXTENT_INLINE) {
924                                 item_end +=
925                                     btrfs_file_extent_num_bytes(leaf, fi);
926                         } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
927                                 struct btrfs_item *item = btrfs_item_nr(leaf,
928                                                                 path->slots[0]);
929                                 item_end += btrfs_file_extent_inline_len(leaf,
930                                                                          item);
931                         }
932                         item_end--;
933                 }
934                 if (found_type == BTRFS_CSUM_ITEM_KEY) {
935                         ret = btrfs_csum_truncate(trans, root, path,
936                                                   inode->i_size);
937                         BUG_ON(ret);
938                 }
939                 if (item_end < inode->i_size) {
940                         if (found_type == BTRFS_DIR_ITEM_KEY) {
941                                 found_type = BTRFS_INODE_ITEM_KEY;
942                         } else if (found_type == BTRFS_EXTENT_ITEM_KEY) {
943                                 found_type = BTRFS_CSUM_ITEM_KEY;
944                         } else if (found_type == BTRFS_EXTENT_DATA_KEY) {
945                                 found_type = BTRFS_XATTR_ITEM_KEY;
946                         } else if (found_type == BTRFS_XATTR_ITEM_KEY) {
947                                 found_type = BTRFS_INODE_REF_KEY;
948                         } else if (found_type) {
949                                 found_type--;
950                         } else {
951                                 break;
952                         }
953                         btrfs_set_key_type(&key, found_type);
954                         goto next;
955                 }
956                 if (found_key.offset >= inode->i_size)
957                         del_item = 1;
958                 else
959                         del_item = 0;
960                 found_extent = 0;
961
962                 /* FIXME, shrink the extent if the ref count is only 1 */
963                 if (found_type != BTRFS_EXTENT_DATA_KEY)
964                         goto delete;
965
966                 if (extent_type != BTRFS_FILE_EXTENT_INLINE) {
967                         u64 num_dec;
968                         extent_start = btrfs_file_extent_disk_bytenr(leaf, fi);
969                         if (!del_item) {
970                                 u64 orig_num_bytes =
971                                         btrfs_file_extent_num_bytes(leaf, fi);
972                                 extent_num_bytes = inode->i_size -
973                                         found_key.offset + root->sectorsize - 1;
974                                 extent_num_bytes = extent_num_bytes &
975                                         ~((u64)root->sectorsize - 1);
976                                 btrfs_set_file_extent_num_bytes(leaf, fi,
977                                                          extent_num_bytes);
978                                 num_dec = (orig_num_bytes -
979                                            extent_num_bytes);
980                                 if (extent_start != 0)
981                                         dec_i_blocks(inode, num_dec);
982                                 btrfs_mark_buffer_dirty(leaf);
983                         } else {
984                                 extent_num_bytes =
985                                         btrfs_file_extent_disk_num_bytes(leaf,
986                                                                          fi);
987                                 /* FIXME blocksize != 4096 */
988                                 num_dec = btrfs_file_extent_num_bytes(leaf, fi);
989                                 if (extent_start != 0) {
990                                         found_extent = 1;
991                                         dec_i_blocks(inode, num_dec);
992                                 }
993                                 root_gen = btrfs_header_generation(leaf);
994                                 root_owner = btrfs_header_owner(leaf);
995                         }
996                 } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
997                         if (!del_item) {
998                                 u32 newsize = inode->i_size - found_key.offset;
999                                 dec_i_blocks(inode, item_end + 1 -
1000                                             found_key.offset - newsize);
1001                                 newsize =
1002                                     btrfs_file_extent_calc_inline_size(newsize);
1003                                 ret = btrfs_truncate_item(trans, root, path,
1004                                                           newsize, 1);
1005                                 BUG_ON(ret);
1006                         } else {
1007                                 dec_i_blocks(inode, item_end + 1 -
1008                                              found_key.offset);
1009                         }
1010                 }
1011 delete:
1012                 if (del_item) {
1013                         if (!pending_del_nr) {
1014                                 /* no pending yet, add ourselves */
1015                                 pending_del_slot = path->slots[0];
1016                                 pending_del_nr = 1;
1017                         } else if (pending_del_nr &&
1018                                    path->slots[0] + 1 == pending_del_slot) {
1019                                 /* hop on the pending chunk */
1020                                 pending_del_nr++;
1021                                 pending_del_slot = path->slots[0];
1022                         } else {
1023                                 printk("bad pending slot %d pending_del_nr %d pending_del_slot %d\n", path->slots[0], pending_del_nr, pending_del_slot);
1024                         }
1025                 } else {
1026                         break;
1027                 }
1028                 if (found_extent) {
1029                         ret = btrfs_free_extent(trans, root, extent_start,
1030                                                 extent_num_bytes,
1031                                                 root_owner,
1032                                                 root_gen, inode->i_ino,
1033                                                 found_key.offset, 0);
1034                         BUG_ON(ret);
1035                 }
1036 next:
1037                 if (path->slots[0] == 0) {
1038                         if (pending_del_nr)
1039                                 goto del_pending;
1040                         btrfs_release_path(root, path);
1041                         goto search_again;
1042                 }
1043
1044                 path->slots[0]--;
1045                 if (pending_del_nr &&
1046                     path->slots[0] + 1 != pending_del_slot) {
1047                         struct btrfs_key debug;
1048 del_pending:
1049                         btrfs_item_key_to_cpu(path->nodes[0], &debug,
1050                                               pending_del_slot);
1051                         ret = btrfs_del_items(trans, root, path,
1052                                               pending_del_slot,
1053                                               pending_del_nr);
1054                         BUG_ON(ret);
1055                         pending_del_nr = 0;
1056                         btrfs_release_path(root, path);
1057                         goto search_again;
1058                 }
1059         }
1060         ret = 0;
1061 error:
1062         if (pending_del_nr) {
1063                 ret = btrfs_del_items(trans, root, path, pending_del_slot,
1064                                       pending_del_nr);
1065         }
1066         btrfs_release_path(root, path);
1067         btrfs_free_path(path);
1068         inode->i_sb->s_dirt = 1;
1069         return ret;
1070 }
1071
1072 static int btrfs_cow_one_page(struct inode *inode, struct page *page,
1073                               size_t zero_start)
1074 {
1075         char *kaddr;
1076         struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
1077         u64 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
1078         u64 page_end = page_start + PAGE_CACHE_SIZE - 1;
1079         int ret = 0;
1080
1081         WARN_ON(!PageLocked(page));
1082         set_page_extent_mapped(page);
1083
1084         lock_extent(io_tree, page_start, page_end, GFP_NOFS);
1085         set_extent_delalloc(&BTRFS_I(inode)->io_tree, page_start,
1086                             page_end, GFP_NOFS);
1087
1088         if (zero_start != PAGE_CACHE_SIZE) {
1089                 kaddr = kmap(page);
1090                 memset(kaddr + zero_start, 0, PAGE_CACHE_SIZE - zero_start);
1091                 flush_dcache_page(page);
1092                 kunmap(page);
1093         }
1094         set_page_dirty(page);
1095         unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
1096
1097         return ret;
1098 }
1099
1100 /*
1101  * taken from block_truncate_page, but does cow as it zeros out
1102  * any bytes left in the last page in the file.
1103  */
1104 static int btrfs_truncate_page(struct address_space *mapping, loff_t from)
1105 {
1106         struct inode *inode = mapping->host;
1107         struct btrfs_root *root = BTRFS_I(inode)->root;
1108         u32 blocksize = root->sectorsize;
1109         pgoff_t index = from >> PAGE_CACHE_SHIFT;
1110         unsigned offset = from & (PAGE_CACHE_SIZE-1);
1111         struct page *page;
1112         int ret = 0;
1113         u64 page_start;
1114
1115         if ((offset & (blocksize - 1)) == 0)
1116                 goto out;
1117
1118         ret = -ENOMEM;
1119         page = grab_cache_page(mapping, index);
1120         if (!page)
1121                 goto out;
1122         if (!PageUptodate(page)) {
1123                 ret = btrfs_readpage(NULL, page);
1124                 lock_page(page);
1125                 if (!PageUptodate(page)) {
1126                         ret = -EIO;
1127                         goto out;
1128                 }
1129         }
1130         page_start = (u64)page->index << PAGE_CACHE_SHIFT;
1131
1132         ret = btrfs_cow_one_page(inode, page, offset);
1133
1134         unlock_page(page);
1135         page_cache_release(page);
1136 out:
1137         return ret;
1138 }
1139
1140 static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
1141 {
1142         struct inode *inode = dentry->d_inode;
1143         int err;
1144
1145         err = inode_change_ok(inode, attr);
1146         if (err)
1147                 return err;
1148
1149         if (S_ISREG(inode->i_mode) &&
1150             attr->ia_valid & ATTR_SIZE && attr->ia_size > inode->i_size) {
1151                 struct btrfs_trans_handle *trans;
1152                 struct btrfs_root *root = BTRFS_I(inode)->root;
1153                 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
1154
1155                 u64 mask = root->sectorsize - 1;
1156                 u64 hole_start = (inode->i_size + mask) & ~mask;
1157                 u64 block_end = (attr->ia_size + mask) & ~mask;
1158                 u64 hole_size;
1159                 u64 alloc_hint = 0;
1160
1161                 if (attr->ia_size <= hole_start)
1162                         goto out;
1163
1164                 mutex_lock(&root->fs_info->fs_mutex);
1165                 err = btrfs_check_free_space(root, 1, 0);
1166                 mutex_unlock(&root->fs_info->fs_mutex);
1167                 if (err)
1168                         goto fail;
1169
1170                 btrfs_truncate_page(inode->i_mapping, inode->i_size);
1171
1172                 lock_extent(io_tree, hole_start, block_end - 1, GFP_NOFS);
1173                 hole_size = block_end - hole_start;
1174
1175                 mutex_lock(&root->fs_info->fs_mutex);
1176                 trans = btrfs_start_transaction(root, 1);
1177                 btrfs_set_trans_block_group(trans, inode);
1178                 err = btrfs_drop_extents(trans, root, inode,
1179                                          hole_start, block_end, hole_start,
1180                                          &alloc_hint);
1181
1182                 if (alloc_hint != EXTENT_MAP_INLINE) {
1183                         err = btrfs_insert_file_extent(trans, root,
1184                                                        inode->i_ino,
1185                                                        hole_start, 0, 0,
1186                                                        hole_size);
1187                         btrfs_drop_extent_cache(inode, hole_start,
1188                                                 hole_size - 1);
1189                         btrfs_check_file(root, inode);
1190                 }
1191                 btrfs_end_transaction(trans, root);
1192                 mutex_unlock(&root->fs_info->fs_mutex);
1193                 unlock_extent(io_tree, hole_start, block_end - 1, GFP_NOFS);
1194                 if (err)
1195                         return err;
1196         }
1197 out:
1198         err = inode_setattr(inode, attr);
1199 fail:
1200         return err;
1201 }
1202
1203 void btrfs_put_inode(struct inode *inode)
1204 {
1205         int ret;
1206
1207         if (!BTRFS_I(inode)->ordered_trans) {
1208                 return;
1209         }
1210
1211         if (mapping_tagged(inode->i_mapping, PAGECACHE_TAG_DIRTY) ||
1212             mapping_tagged(inode->i_mapping, PAGECACHE_TAG_WRITEBACK))
1213                 return;
1214
1215         ret = btrfs_del_ordered_inode(inode);
1216         if (ret == 1) {
1217                 atomic_dec(&inode->i_count);
1218         }
1219 }
1220
1221 void btrfs_delete_inode(struct inode *inode)
1222 {
1223         struct btrfs_trans_handle *trans;
1224         struct btrfs_root *root = BTRFS_I(inode)->root;
1225         unsigned long nr;
1226         int ret;
1227
1228         truncate_inode_pages(&inode->i_data, 0);
1229         if (is_bad_inode(inode)) {
1230                 goto no_delete;
1231         }
1232
1233         inode->i_size = 0;
1234         mutex_lock(&root->fs_info->fs_mutex);
1235         trans = btrfs_start_transaction(root, 1);
1236
1237         btrfs_set_trans_block_group(trans, inode);
1238         ret = btrfs_truncate_in_trans(trans, root, inode, 0);
1239         if (ret)
1240                 goto no_delete_lock;
1241
1242         nr = trans->blocks_used;
1243         clear_inode(inode);
1244
1245         btrfs_end_transaction(trans, root);
1246         mutex_unlock(&root->fs_info->fs_mutex);
1247         btrfs_btree_balance_dirty(root, nr);
1248         btrfs_throttle(root);
1249         return;
1250
1251 no_delete_lock:
1252         nr = trans->blocks_used;
1253         btrfs_end_transaction(trans, root);
1254         mutex_unlock(&root->fs_info->fs_mutex);
1255         btrfs_btree_balance_dirty(root, nr);
1256         btrfs_throttle(root);
1257 no_delete:
1258         clear_inode(inode);
1259 }
1260
1261 /*
1262  * this returns the key found in the dir entry in the location pointer.
1263  * If no dir entries were found, location->objectid is 0.
1264  */
1265 static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry,
1266                                struct btrfs_key *location)
1267 {
1268         const char *name = dentry->d_name.name;
1269         int namelen = dentry->d_name.len;
1270         struct btrfs_dir_item *di;
1271         struct btrfs_path *path;
1272         struct btrfs_root *root = BTRFS_I(dir)->root;
1273         int ret = 0;
1274
1275         if (namelen == 1 && strcmp(name, ".") == 0) {
1276                 location->objectid = dir->i_ino;
1277                 location->type = BTRFS_INODE_ITEM_KEY;
1278                 location->offset = 0;
1279                 return 0;
1280         }
1281         path = btrfs_alloc_path();
1282         BUG_ON(!path);
1283
1284         if (namelen == 2 && strcmp(name, "..") == 0) {
1285                 struct btrfs_key key;
1286                 struct extent_buffer *leaf;
1287                 u32 nritems;
1288                 int slot;
1289
1290                 key.objectid = dir->i_ino;
1291                 btrfs_set_key_type(&key, BTRFS_INODE_REF_KEY);
1292                 key.offset = 0;
1293                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1294                 BUG_ON(ret == 0);
1295                 ret = 0;
1296
1297                 leaf = path->nodes[0];
1298                 slot = path->slots[0];
1299                 nritems = btrfs_header_nritems(leaf);
1300                 if (slot >= nritems)
1301                         goto out_err;
1302
1303                 btrfs_item_key_to_cpu(leaf, &key, slot);
1304                 if (key.objectid != dir->i_ino ||
1305                     key.type != BTRFS_INODE_REF_KEY) {
1306                         goto out_err;
1307                 }
1308                 location->objectid = key.offset;
1309                 location->type = BTRFS_INODE_ITEM_KEY;
1310                 location->offset = 0;
1311                 goto out;
1312         }
1313
1314         di = btrfs_lookup_dir_item(NULL, root, path, dir->i_ino, name,
1315                                     namelen, 0);
1316         if (IS_ERR(di))
1317                 ret = PTR_ERR(di);
1318         if (!di || IS_ERR(di)) {
1319                 goto out_err;
1320         }
1321         btrfs_dir_item_key_to_cpu(path->nodes[0], di, location);
1322 out:
1323         btrfs_free_path(path);
1324         return ret;
1325 out_err:
1326         location->objectid = 0;
1327         goto out;
1328 }
1329
1330 /*
1331  * when we hit a tree root in a directory, the btrfs part of the inode
1332  * needs to be changed to reflect the root directory of the tree root.  This
1333  * is kind of like crossing a mount point.
1334  */
1335 static int fixup_tree_root_location(struct btrfs_root *root,
1336                              struct btrfs_key *location,
1337                              struct btrfs_root **sub_root,
1338                              struct dentry *dentry)
1339 {
1340         struct btrfs_path *path;
1341         struct btrfs_root_item *ri;
1342
1343         if (btrfs_key_type(location) != BTRFS_ROOT_ITEM_KEY)
1344                 return 0;
1345         if (location->objectid == BTRFS_ROOT_TREE_OBJECTID)
1346                 return 0;
1347
1348         path = btrfs_alloc_path();
1349         BUG_ON(!path);
1350         mutex_lock(&root->fs_info->fs_mutex);
1351
1352         *sub_root = btrfs_read_fs_root(root->fs_info, location,
1353                                         dentry->d_name.name,
1354                                         dentry->d_name.len);
1355         if (IS_ERR(*sub_root))
1356                 return PTR_ERR(*sub_root);
1357
1358         ri = &(*sub_root)->root_item;
1359         location->objectid = btrfs_root_dirid(ri);
1360         btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY);
1361         location->offset = 0;
1362
1363         btrfs_free_path(path);
1364         mutex_unlock(&root->fs_info->fs_mutex);
1365         return 0;
1366 }
1367
1368 static int btrfs_init_locked_inode(struct inode *inode, void *p)
1369 {
1370         struct btrfs_iget_args *args = p;
1371         inode->i_ino = args->ino;
1372         BTRFS_I(inode)->root = args->root;
1373         BTRFS_I(inode)->delalloc_bytes = 0;
1374         extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS);
1375         extent_io_tree_init(&BTRFS_I(inode)->io_tree,
1376                              inode->i_mapping, GFP_NOFS);
1377         extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree,
1378                              inode->i_mapping, GFP_NOFS);
1379         return 0;
1380 }
1381
1382 static int btrfs_find_actor(struct inode *inode, void *opaque)
1383 {
1384         struct btrfs_iget_args *args = opaque;
1385         return (args->ino == inode->i_ino &&
1386                 args->root == BTRFS_I(inode)->root);
1387 }
1388
1389 struct inode *btrfs_ilookup(struct super_block *s, u64 objectid,
1390                             u64 root_objectid)
1391 {
1392         struct btrfs_iget_args args;
1393         args.ino = objectid;
1394         args.root = btrfs_lookup_fs_root(btrfs_sb(s)->fs_info, root_objectid);
1395
1396         if (!args.root)
1397                 return NULL;
1398
1399         return ilookup5(s, objectid, btrfs_find_actor, (void *)&args);
1400 }
1401
1402 struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid,
1403                                 struct btrfs_root *root)
1404 {
1405         struct inode *inode;
1406         struct btrfs_iget_args args;
1407         args.ino = objectid;
1408         args.root = root;
1409
1410         inode = iget5_locked(s, objectid, btrfs_find_actor,
1411                              btrfs_init_locked_inode,
1412                              (void *)&args);
1413         return inode;
1414 }
1415
1416 static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
1417                                    struct nameidata *nd)
1418 {
1419         struct inode * inode;
1420         struct btrfs_inode *bi = BTRFS_I(dir);
1421         struct btrfs_root *root = bi->root;
1422         struct btrfs_root *sub_root = root;
1423         struct btrfs_key location;
1424         int ret;
1425
1426         if (dentry->d_name.len > BTRFS_NAME_LEN)
1427                 return ERR_PTR(-ENAMETOOLONG);
1428
1429         mutex_lock(&root->fs_info->fs_mutex);
1430         ret = btrfs_inode_by_name(dir, dentry, &location);
1431         mutex_unlock(&root->fs_info->fs_mutex);
1432
1433         if (ret < 0)
1434                 return ERR_PTR(ret);
1435
1436         inode = NULL;
1437         if (location.objectid) {
1438                 ret = fixup_tree_root_location(root, &location, &sub_root,
1439                                                 dentry);
1440                 if (ret < 0)
1441                         return ERR_PTR(ret);
1442                 if (ret > 0)
1443                         return ERR_PTR(-ENOENT);
1444                 inode = btrfs_iget_locked(dir->i_sb, location.objectid,
1445                                           sub_root);
1446                 if (!inode)
1447                         return ERR_PTR(-EACCES);
1448                 if (inode->i_state & I_NEW) {
1449                         /* the inode and parent dir are two different roots */
1450                         if (sub_root != root) {
1451                                 igrab(inode);
1452                                 sub_root->inode = inode;
1453                         }
1454                         BTRFS_I(inode)->root = sub_root;
1455                         memcpy(&BTRFS_I(inode)->location, &location,
1456                                sizeof(location));
1457                         btrfs_read_locked_inode(inode);
1458                         unlock_new_inode(inode);
1459                 }
1460         }
1461         return d_splice_alias(inode, dentry);
1462 }
1463
1464 static unsigned char btrfs_filetype_table[] = {
1465         DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
1466 };
1467
1468 static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
1469 {
1470         struct inode *inode = filp->f_dentry->d_inode;
1471         struct btrfs_root *root = BTRFS_I(inode)->root;
1472         struct btrfs_item *item;
1473         struct btrfs_dir_item *di;
1474         struct btrfs_key key;
1475         struct btrfs_key found_key;
1476         struct btrfs_path *path;
1477         int ret;
1478         u32 nritems;
1479         struct extent_buffer *leaf;
1480         int slot;
1481         int advance;
1482         unsigned char d_type;
1483         int over = 0;
1484         u32 di_cur;
1485         u32 di_total;
1486         u32 di_len;
1487         int key_type = BTRFS_DIR_INDEX_KEY;
1488         char tmp_name[32];
1489         char *name_ptr;
1490         int name_len;
1491
1492         /* FIXME, use a real flag for deciding about the key type */
1493         if (root->fs_info->tree_root == root)
1494                 key_type = BTRFS_DIR_ITEM_KEY;
1495
1496         /* special case for "." */
1497         if (filp->f_pos == 0) {
1498                 over = filldir(dirent, ".", 1,
1499                                1, inode->i_ino,
1500                                DT_DIR);
1501                 if (over)
1502                         return 0;
1503                 filp->f_pos = 1;
1504         }
1505
1506         mutex_lock(&root->fs_info->fs_mutex);
1507         key.objectid = inode->i_ino;
1508         path = btrfs_alloc_path();
1509         path->reada = 2;
1510
1511         /* special case for .., just use the back ref */
1512         if (filp->f_pos == 1) {
1513                 btrfs_set_key_type(&key, BTRFS_INODE_REF_KEY);
1514                 key.offset = 0;
1515                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1516                 BUG_ON(ret == 0);
1517                 leaf = path->nodes[0];
1518                 slot = path->slots[0];
1519                 nritems = btrfs_header_nritems(leaf);
1520                 if (slot >= nritems) {
1521                         btrfs_release_path(root, path);
1522                         goto read_dir_items;
1523                 }
1524                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
1525                 btrfs_release_path(root, path);
1526                 if (found_key.objectid != key.objectid ||
1527                     found_key.type != BTRFS_INODE_REF_KEY)
1528                         goto read_dir_items;
1529                 over = filldir(dirent, "..", 2,
1530                                2, found_key.offset, DT_DIR);
1531                 if (over)
1532                         goto nopos;
1533                 filp->f_pos = 2;
1534         }
1535
1536 read_dir_items:
1537         btrfs_set_key_type(&key, key_type);
1538         key.offset = filp->f_pos;
1539
1540         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1541         if (ret < 0)
1542                 goto err;
1543         advance = 0;
1544         while(1) {
1545                 leaf = path->nodes[0];
1546                 nritems = btrfs_header_nritems(leaf);
1547                 slot = path->slots[0];
1548                 if (advance || slot >= nritems) {
1549                         if (slot >= nritems -1) {
1550                                 ret = btrfs_next_leaf(root, path);
1551                                 if (ret)
1552                                         break;
1553                                 leaf = path->nodes[0];
1554                                 nritems = btrfs_header_nritems(leaf);
1555                                 slot = path->slots[0];
1556                         } else {
1557                                 slot++;
1558                                 path->slots[0]++;
1559                         }
1560                 }
1561                 advance = 1;
1562                 item = btrfs_item_nr(leaf, slot);
1563                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
1564
1565                 if (found_key.objectid != key.objectid)
1566                         break;
1567                 if (btrfs_key_type(&found_key) != key_type)
1568                         break;
1569                 if (found_key.offset < filp->f_pos)
1570                         continue;
1571
1572                 filp->f_pos = found_key.offset;
1573                 advance = 1;
1574                 di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
1575                 di_cur = 0;
1576                 di_total = btrfs_item_size(leaf, item);
1577                 while(di_cur < di_total) {
1578                         struct btrfs_key location;
1579
1580                         name_len = btrfs_dir_name_len(leaf, di);
1581                         if (name_len < 32) {
1582                                 name_ptr = tmp_name;
1583                         } else {
1584                                 name_ptr = kmalloc(name_len, GFP_NOFS);
1585                                 BUG_ON(!name_ptr);
1586                         }
1587                         read_extent_buffer(leaf, name_ptr,
1588                                            (unsigned long)(di + 1), name_len);
1589
1590                         d_type = btrfs_filetype_table[btrfs_dir_type(leaf, di)];
1591                         btrfs_dir_item_key_to_cpu(leaf, di, &location);
1592                         over = filldir(dirent, name_ptr, name_len,
1593                                        found_key.offset,
1594                                        location.objectid,
1595                                        d_type);
1596
1597                         if (name_ptr != tmp_name)
1598                                 kfree(name_ptr);
1599
1600                         if (over)
1601                                 goto nopos;
1602                         di_len = btrfs_dir_name_len(leaf, di) +
1603                                 btrfs_dir_data_len(leaf, di) +sizeof(*di);
1604                         di_cur += di_len;
1605                         di = (struct btrfs_dir_item *)((char *)di + di_len);
1606                 }
1607         }
1608         if (key_type == BTRFS_DIR_INDEX_KEY)
1609                 filp->f_pos = INT_LIMIT(typeof(filp->f_pos));
1610         else
1611                 filp->f_pos++;
1612 nopos:
1613         ret = 0;
1614 err:
1615         btrfs_release_path(root, path);
1616         btrfs_free_path(path);
1617         mutex_unlock(&root->fs_info->fs_mutex);
1618         return ret;
1619 }
1620
1621 int btrfs_write_inode(struct inode *inode, int wait)
1622 {
1623         struct btrfs_root *root = BTRFS_I(inode)->root;
1624         struct btrfs_trans_handle *trans;
1625         int ret = 0;
1626
1627         if (wait) {
1628                 mutex_lock(&root->fs_info->fs_mutex);
1629                 trans = btrfs_start_transaction(root, 1);
1630                 btrfs_set_trans_block_group(trans, inode);
1631                 ret = btrfs_commit_transaction(trans, root);
1632                 mutex_unlock(&root->fs_info->fs_mutex);
1633         }
1634         return ret;
1635 }
1636
1637 /*
1638  * This is somewhat expensive, updating the tree every time the
1639  * inode changes.  But, it is most likely to find the inode in cache.
1640  * FIXME, needs more benchmarking...there are no reasons other than performance
1641  * to keep or drop this code.
1642  */
1643 void btrfs_dirty_inode(struct inode *inode)
1644 {
1645         struct btrfs_root *root = BTRFS_I(inode)->root;
1646         struct btrfs_trans_handle *trans;
1647
1648         mutex_lock(&root->fs_info->fs_mutex);
1649         trans = btrfs_start_transaction(root, 1);
1650         btrfs_set_trans_block_group(trans, inode);
1651         btrfs_update_inode(trans, root, inode);
1652         btrfs_end_transaction(trans, root);
1653         mutex_unlock(&root->fs_info->fs_mutex);
1654 }
1655
1656 static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
1657                                      struct btrfs_root *root,
1658                                      const char *name, int name_len,
1659                                      u64 ref_objectid,
1660                                      u64 objectid,
1661                                      struct btrfs_block_group_cache *group,
1662                                      int mode)
1663 {
1664         struct inode *inode;
1665         struct btrfs_inode_item *inode_item;
1666         struct btrfs_block_group_cache *new_inode_group;
1667         struct btrfs_key *location;
1668         struct btrfs_path *path;
1669         struct btrfs_inode_ref *ref;
1670         struct btrfs_key key[2];
1671         u32 sizes[2];
1672         unsigned long ptr;
1673         int ret;
1674         int owner;
1675
1676         path = btrfs_alloc_path();
1677         BUG_ON(!path);
1678
1679         inode = new_inode(root->fs_info->sb);
1680         if (!inode)
1681                 return ERR_PTR(-ENOMEM);
1682
1683         extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS);
1684         extent_io_tree_init(&BTRFS_I(inode)->io_tree,
1685                              inode->i_mapping, GFP_NOFS);
1686         extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree,
1687                              inode->i_mapping, GFP_NOFS);
1688         BTRFS_I(inode)->delalloc_bytes = 0;
1689         BTRFS_I(inode)->root = root;
1690
1691         if (mode & S_IFDIR)
1692                 owner = 0;
1693         else
1694                 owner = 1;
1695         new_inode_group = btrfs_find_block_group(root, group, 0,
1696                                        BTRFS_BLOCK_GROUP_METADATA, owner);
1697         if (!new_inode_group) {
1698                 printk("find_block group failed\n");
1699                 new_inode_group = group;
1700         }
1701         BTRFS_I(inode)->block_group = new_inode_group;
1702         BTRFS_I(inode)->flags = 0;
1703
1704         key[0].objectid = objectid;
1705         btrfs_set_key_type(&key[0], BTRFS_INODE_ITEM_KEY);
1706         key[0].offset = 0;
1707
1708         key[1].objectid = objectid;
1709         btrfs_set_key_type(&key[1], BTRFS_INODE_REF_KEY);
1710         key[1].offset = ref_objectid;
1711
1712         sizes[0] = sizeof(struct btrfs_inode_item);
1713         sizes[1] = name_len + sizeof(*ref);
1714
1715         ret = btrfs_insert_empty_items(trans, root, path, key, sizes, 2);
1716         if (ret != 0)
1717                 goto fail;
1718
1719         if (objectid > root->highest_inode)
1720                 root->highest_inode = objectid;
1721
1722         inode->i_uid = current->fsuid;
1723         inode->i_gid = current->fsgid;
1724         inode->i_mode = mode;
1725         inode->i_ino = objectid;
1726         inode->i_blocks = 0;
1727         inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
1728         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
1729                                   struct btrfs_inode_item);
1730         fill_inode_item(path->nodes[0], inode_item, inode);
1731
1732         ref = btrfs_item_ptr(path->nodes[0], path->slots[0] + 1,
1733                              struct btrfs_inode_ref);
1734         btrfs_set_inode_ref_name_len(path->nodes[0], ref, name_len);
1735         ptr = (unsigned long)(ref + 1);
1736         write_extent_buffer(path->nodes[0], name, ptr, name_len);
1737
1738         btrfs_mark_buffer_dirty(path->nodes[0]);
1739         btrfs_free_path(path);
1740
1741         location = &BTRFS_I(inode)->location;
1742         location->objectid = objectid;
1743         location->offset = 0;
1744         btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY);
1745
1746         insert_inode_hash(inode);
1747         return inode;
1748 fail:
1749         btrfs_free_path(path);
1750         return ERR_PTR(ret);
1751 }
1752
1753 static inline u8 btrfs_inode_type(struct inode *inode)
1754 {
1755         return btrfs_type_by_mode[(inode->i_mode & S_IFMT) >> S_SHIFT];
1756 }
1757
1758 static int btrfs_add_link(struct btrfs_trans_handle *trans,
1759                             struct dentry *dentry, struct inode *inode,
1760                             int add_backref)
1761 {
1762         int ret;
1763         struct btrfs_key key;
1764         struct btrfs_root *root = BTRFS_I(dentry->d_parent->d_inode)->root;
1765         struct inode *parent_inode;
1766
1767         key.objectid = inode->i_ino;
1768         btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
1769         key.offset = 0;
1770
1771         ret = btrfs_insert_dir_item(trans, root,
1772                                     dentry->d_name.name, dentry->d_name.len,
1773                                     dentry->d_parent->d_inode->i_ino,
1774                                     &key, btrfs_inode_type(inode));
1775         if (ret == 0) {
1776                 if (add_backref) {
1777                         ret = btrfs_insert_inode_ref(trans, root,
1778                                              dentry->d_name.name,
1779                                              dentry->d_name.len,
1780                                              inode->i_ino,
1781                                              dentry->d_parent->d_inode->i_ino);
1782                 }
1783                 parent_inode = dentry->d_parent->d_inode;
1784                 parent_inode->i_size += dentry->d_name.len * 2;
1785                 parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME;
1786                 ret = btrfs_update_inode(trans, root,
1787                                          dentry->d_parent->d_inode);
1788         }
1789         return ret;
1790 }
1791
1792 static int btrfs_add_nondir(struct btrfs_trans_handle *trans,
1793                             struct dentry *dentry, struct inode *inode,
1794                             int backref)
1795 {
1796         int err = btrfs_add_link(trans, dentry, inode, backref);
1797         if (!err) {
1798                 d_instantiate(dentry, inode);
1799                 return 0;
1800         }
1801         if (err > 0)
1802                 err = -EEXIST;
1803         return err;
1804 }
1805
1806 static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
1807                         int mode, dev_t rdev)
1808 {
1809         struct btrfs_trans_handle *trans;
1810         struct btrfs_root *root = BTRFS_I(dir)->root;
1811         struct inode *inode = NULL;
1812         int err;
1813         int drop_inode = 0;
1814         u64 objectid;
1815         unsigned long nr = 0;
1816
1817         if (!new_valid_dev(rdev))
1818                 return -EINVAL;
1819
1820         mutex_lock(&root->fs_info->fs_mutex);
1821         err = btrfs_check_free_space(root, 1, 0);
1822         if (err)
1823                 goto fail;
1824
1825         trans = btrfs_start_transaction(root, 1);
1826         btrfs_set_trans_block_group(trans, dir);
1827
1828         err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
1829         if (err) {
1830                 err = -ENOSPC;
1831                 goto out_unlock;
1832         }
1833
1834         inode = btrfs_new_inode(trans, root, dentry->d_name.name,
1835                                 dentry->d_name.len,
1836                                 dentry->d_parent->d_inode->i_ino, objectid,
1837                                 BTRFS_I(dir)->block_group, mode);
1838         err = PTR_ERR(inode);
1839         if (IS_ERR(inode))
1840                 goto out_unlock;
1841
1842         btrfs_set_trans_block_group(trans, inode);
1843         err = btrfs_add_nondir(trans, dentry, inode, 0);
1844         if (err)
1845                 drop_inode = 1;
1846         else {
1847                 inode->i_op = &btrfs_special_inode_operations;
1848                 init_special_inode(inode, inode->i_mode, rdev);
1849                 btrfs_update_inode(trans, root, inode);
1850         }
1851         dir->i_sb->s_dirt = 1;
1852         btrfs_update_inode_block_group(trans, inode);
1853         btrfs_update_inode_block_group(trans, dir);
1854 out_unlock:
1855         nr = trans->blocks_used;
1856         btrfs_end_transaction(trans, root);
1857 fail:
1858         mutex_unlock(&root->fs_info->fs_mutex);
1859
1860         if (drop_inode) {
1861                 inode_dec_link_count(inode);
1862                 iput(inode);
1863         }
1864         btrfs_btree_balance_dirty(root, nr);
1865         btrfs_throttle(root);
1866         return err;
1867 }
1868
1869 static int btrfs_create(struct inode *dir, struct dentry *dentry,
1870                         int mode, struct nameidata *nd)
1871 {
1872         struct btrfs_trans_handle *trans;
1873         struct btrfs_root *root = BTRFS_I(dir)->root;
1874         struct inode *inode = NULL;
1875         int err;
1876         int drop_inode = 0;
1877         unsigned long nr = 0;
1878         u64 objectid;
1879
1880         mutex_lock(&root->fs_info->fs_mutex);
1881         err = btrfs_check_free_space(root, 1, 0);
1882         if (err)
1883                 goto fail;
1884         trans = btrfs_start_transaction(root, 1);
1885         btrfs_set_trans_block_group(trans, dir);
1886
1887         err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
1888         if (err) {
1889                 err = -ENOSPC;
1890                 goto out_unlock;
1891         }
1892
1893         inode = btrfs_new_inode(trans, root, dentry->d_name.name,
1894                                 dentry->d_name.len,
1895                                 dentry->d_parent->d_inode->i_ino,
1896                                 objectid, BTRFS_I(dir)->block_group, mode);
1897         err = PTR_ERR(inode);
1898         if (IS_ERR(inode))
1899                 goto out_unlock;
1900
1901         btrfs_set_trans_block_group(trans, inode);
1902         err = btrfs_add_nondir(trans, dentry, inode, 0);
1903         if (err)
1904                 drop_inode = 1;
1905         else {
1906                 inode->i_mapping->a_ops = &btrfs_aops;
1907                 inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
1908                 inode->i_fop = &btrfs_file_operations;
1909                 inode->i_op = &btrfs_file_inode_operations;
1910                 extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS);
1911                 extent_io_tree_init(&BTRFS_I(inode)->io_tree,
1912                                      inode->i_mapping, GFP_NOFS);
1913                 extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree,
1914                                      inode->i_mapping, GFP_NOFS);
1915                 BTRFS_I(inode)->delalloc_bytes = 0;
1916                 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
1917         }
1918         dir->i_sb->s_dirt = 1;
1919         btrfs_update_inode_block_group(trans, inode);
1920         btrfs_update_inode_block_group(trans, dir);
1921 out_unlock:
1922         nr = trans->blocks_used;
1923         btrfs_end_transaction(trans, root);
1924 fail:
1925         mutex_unlock(&root->fs_info->fs_mutex);
1926
1927         if (drop_inode) {
1928                 inode_dec_link_count(inode);
1929                 iput(inode);
1930         }
1931         btrfs_btree_balance_dirty(root, nr);
1932         btrfs_throttle(root);
1933         return err;
1934 }
1935
1936 static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
1937                       struct dentry *dentry)
1938 {
1939         struct btrfs_trans_handle *trans;
1940         struct btrfs_root *root = BTRFS_I(dir)->root;
1941         struct inode *inode = old_dentry->d_inode;
1942         unsigned long nr = 0;
1943         int err;
1944         int drop_inode = 0;
1945
1946         if (inode->i_nlink == 0)
1947                 return -ENOENT;
1948
1949 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
1950         inode->i_nlink++;
1951 #else
1952         inc_nlink(inode);
1953 #endif
1954         mutex_lock(&root->fs_info->fs_mutex);
1955         err = btrfs_check_free_space(root, 1, 0);
1956         if (err)
1957                 goto fail;
1958         trans = btrfs_start_transaction(root, 1);
1959
1960         btrfs_set_trans_block_group(trans, dir);
1961         atomic_inc(&inode->i_count);
1962         err = btrfs_add_nondir(trans, dentry, inode, 1);
1963
1964         if (err)
1965                 drop_inode = 1;
1966
1967         dir->i_sb->s_dirt = 1;
1968         btrfs_update_inode_block_group(trans, dir);
1969         err = btrfs_update_inode(trans, root, inode);
1970
1971         if (err)
1972                 drop_inode = 1;
1973
1974         nr = trans->blocks_used;
1975         btrfs_end_transaction(trans, root);
1976 fail:
1977         mutex_unlock(&root->fs_info->fs_mutex);
1978
1979         if (drop_inode) {
1980                 inode_dec_link_count(inode);
1981                 iput(inode);
1982         }
1983         btrfs_btree_balance_dirty(root, nr);
1984         btrfs_throttle(root);
1985         return err;
1986 }
1987
1988 static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
1989 {
1990         struct inode *inode;
1991         struct btrfs_trans_handle *trans;
1992         struct btrfs_root *root = BTRFS_I(dir)->root;
1993         int err = 0;
1994         int drop_on_err = 0;
1995         u64 objectid;
1996         unsigned long nr = 1;
1997
1998         mutex_lock(&root->fs_info->fs_mutex);
1999         err = btrfs_check_free_space(root, 1, 0);
2000         if (err)
2001                 goto out_unlock;
2002
2003         trans = btrfs_start_transaction(root, 1);
2004         btrfs_set_trans_block_group(trans, dir);
2005
2006         if (IS_ERR(trans)) {
2007                 err = PTR_ERR(trans);
2008                 goto out_unlock;
2009         }
2010
2011         err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
2012         if (err) {
2013                 err = -ENOSPC;
2014                 goto out_unlock;
2015         }
2016
2017         inode = btrfs_new_inode(trans, root, dentry->d_name.name,
2018                                 dentry->d_name.len,
2019                                 dentry->d_parent->d_inode->i_ino, objectid,
2020                                 BTRFS_I(dir)->block_group, S_IFDIR | mode);
2021         if (IS_ERR(inode)) {
2022                 err = PTR_ERR(inode);
2023                 goto out_fail;
2024         }
2025
2026         drop_on_err = 1;
2027         inode->i_op = &btrfs_dir_inode_operations;
2028         inode->i_fop = &btrfs_dir_file_operations;
2029         btrfs_set_trans_block_group(trans, inode);
2030
2031         inode->i_size = 0;
2032         err = btrfs_update_inode(trans, root, inode);
2033         if (err)
2034                 goto out_fail;
2035
2036         err = btrfs_add_link(trans, dentry, inode, 0);
2037         if (err)
2038                 goto out_fail;
2039
2040         d_instantiate(dentry, inode);
2041         drop_on_err = 0;
2042         dir->i_sb->s_dirt = 1;
2043         btrfs_update_inode_block_group(trans, inode);
2044         btrfs_update_inode_block_group(trans, dir);
2045
2046 out_fail:
2047         nr = trans->blocks_used;
2048         btrfs_end_transaction(trans, root);
2049
2050 out_unlock:
2051         mutex_unlock(&root->fs_info->fs_mutex);
2052         if (drop_on_err)
2053                 iput(inode);
2054         btrfs_btree_balance_dirty(root, nr);
2055         btrfs_throttle(root);
2056         return err;
2057 }
2058
2059 struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
2060                                     size_t pg_offset, u64 start, u64 len,
2061                                     int create)
2062 {
2063         int ret;
2064         int err = 0;
2065         u64 bytenr;
2066         u64 extent_start = 0;
2067         u64 extent_end = 0;
2068         u64 objectid = inode->i_ino;
2069         u32 found_type;
2070         struct btrfs_path *path;
2071         struct btrfs_root *root = BTRFS_I(inode)->root;
2072         struct btrfs_file_extent_item *item;
2073         struct extent_buffer *leaf;
2074         struct btrfs_key found_key;
2075         struct extent_map *em = NULL;
2076         struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
2077         struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
2078         struct btrfs_trans_handle *trans = NULL;
2079
2080         path = btrfs_alloc_path();
2081         BUG_ON(!path);
2082         mutex_lock(&root->fs_info->fs_mutex);
2083
2084 again:
2085         spin_lock(&em_tree->lock);
2086         em = lookup_extent_mapping(em_tree, start, len);
2087         spin_unlock(&em_tree->lock);
2088
2089         if (em) {
2090                 if (em->start > start) {
2091                         printk("get_extent lookup [%Lu %Lu] em [%Lu %Lu]\n",
2092                                start, len, em->start, em->len);
2093                         WARN_ON(1);
2094                 }
2095                 if (em->block_start == EXTENT_MAP_INLINE && page)
2096                         free_extent_map(em);
2097                 else
2098                         goto out;
2099         }
2100         em = alloc_extent_map(GFP_NOFS);
2101         if (!em) {
2102                 err = -ENOMEM;
2103                 goto out;
2104         }
2105
2106         em->start = EXTENT_MAP_HOLE;
2107         em->len = (u64)-1;
2108         em->bdev = inode->i_sb->s_bdev;
2109         ret = btrfs_lookup_file_extent(trans, root, path,
2110                                        objectid, start, trans != NULL);
2111         if (ret < 0) {
2112                 err = ret;
2113                 goto out;
2114         }
2115
2116         if (ret != 0) {
2117                 if (path->slots[0] == 0)
2118                         goto not_found;
2119                 path->slots[0]--;
2120         }
2121
2122         leaf = path->nodes[0];
2123         item = btrfs_item_ptr(leaf, path->slots[0],
2124                               struct btrfs_file_extent_item);
2125         /* are we inside the extent that was found? */
2126         btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
2127         found_type = btrfs_key_type(&found_key);
2128         if (found_key.objectid != objectid ||
2129             found_type != BTRFS_EXTENT_DATA_KEY) {
2130                 goto not_found;
2131         }
2132
2133         found_type = btrfs_file_extent_type(leaf, item);
2134         extent_start = found_key.offset;
2135         if (found_type == BTRFS_FILE_EXTENT_REG) {
2136                 extent_end = extent_start +
2137                        btrfs_file_extent_num_bytes(leaf, item);
2138                 err = 0;
2139                 if (start < extent_start || start >= extent_end) {
2140                         em->start = start;
2141                         if (start < extent_start) {
2142                                 if (start + len <= extent_start)
2143                                         goto not_found;
2144                                 em->len = extent_end - extent_start;
2145                         } else {
2146                                 em->len = len;
2147                         }
2148                         goto not_found_em;
2149                 }
2150                 bytenr = btrfs_file_extent_disk_bytenr(leaf, item);
2151                 if (bytenr == 0) {
2152                         em->start = extent_start;
2153                         em->len = extent_end - extent_start;
2154                         em->block_start = EXTENT_MAP_HOLE;
2155                         goto insert;
2156                 }
2157                 bytenr += btrfs_file_extent_offset(leaf, item);
2158                 em->block_start = bytenr;
2159                 em->start = extent_start;
2160                 em->len = extent_end - extent_start;
2161                 goto insert;
2162         } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
2163                 u64 page_start;
2164                 unsigned long ptr;
2165                 char *map;
2166                 size_t size;
2167                 size_t extent_offset;
2168                 size_t copy_size;
2169
2170                 size = btrfs_file_extent_inline_len(leaf, btrfs_item_nr(leaf,
2171                                                     path->slots[0]));
2172                 extent_end = (extent_start + size + root->sectorsize - 1) &
2173                         ~((u64)root->sectorsize - 1);
2174                 if (start < extent_start || start >= extent_end) {
2175                         em->start = start;
2176                         if (start < extent_start) {
2177                                 if (start + len <= extent_start)
2178                                         goto not_found;
2179                                 em->len = extent_end - extent_start;
2180                         } else {
2181                                 em->len = len;
2182                         }
2183                         goto not_found_em;
2184                 }
2185                 em->block_start = EXTENT_MAP_INLINE;
2186
2187                 if (!page) {
2188                         em->start = extent_start;
2189                         em->len = size;
2190                         goto out;
2191                 }
2192
2193                 page_start = page_offset(page) + pg_offset;
2194                 extent_offset = page_start - extent_start;
2195                 copy_size = min_t(u64, PAGE_CACHE_SIZE - pg_offset,
2196                                 size - extent_offset);
2197                 em->start = extent_start + extent_offset;
2198                 em->len = (copy_size + root->sectorsize - 1) &
2199                         ~((u64)root->sectorsize - 1);
2200                 map = kmap(page);
2201                 ptr = btrfs_file_extent_inline_start(item) + extent_offset;
2202                 if (create == 0 && !PageUptodate(page)) {
2203                         read_extent_buffer(leaf, map + pg_offset, ptr,
2204                                            copy_size);
2205                         flush_dcache_page(page);
2206                 } else if (create && PageUptodate(page)) {
2207                         if (!trans) {
2208                                 kunmap(page);
2209                                 free_extent_map(em);
2210                                 em = NULL;
2211                                 btrfs_release_path(root, path);
2212                                 trans = btrfs_start_transaction(root, 1);
2213                                 goto again;
2214                         }
2215                         write_extent_buffer(leaf, map + pg_offset, ptr,
2216                                             copy_size);
2217                         btrfs_mark_buffer_dirty(leaf);
2218                 }
2219                 kunmap(page);
2220                 set_extent_uptodate(io_tree, em->start,
2221                                     extent_map_end(em) - 1, GFP_NOFS);
2222                 goto insert;
2223         } else {
2224                 printk("unkknown found_type %d\n", found_type);
2225                 WARN_ON(1);
2226         }
2227 not_found:
2228         em->start = start;
2229         em->len = len;
2230 not_found_em:
2231         em->block_start = EXTENT_MAP_HOLE;
2232 insert:
2233         btrfs_release_path(root, path);
2234         if (em->start > start || extent_map_end(em) <= start) {
2235                 printk("bad extent! em: [%Lu %Lu] passed [%Lu %Lu]\n", em->start, em->len, start, len);
2236                 err = -EIO;
2237                 goto out;
2238         }
2239
2240         err = 0;
2241         spin_lock(&em_tree->lock);
2242         ret = add_extent_mapping(em_tree, em);
2243         if (ret == -EEXIST) {
2244                 free_extent_map(em);
2245                 em = lookup_extent_mapping(em_tree, start, len);
2246                 if (!em) {
2247                         err = -EIO;
2248                         printk("failing to insert %Lu %Lu\n", start, len);
2249                 }
2250         }
2251         spin_unlock(&em_tree->lock);
2252 out:
2253         btrfs_free_path(path);
2254         if (trans) {
2255                 ret = btrfs_end_transaction(trans, root);
2256                 if (!err)
2257                         err = ret;
2258         }
2259         mutex_unlock(&root->fs_info->fs_mutex);
2260         if (err) {
2261                 free_extent_map(em);
2262                 WARN_ON(1);
2263                 return ERR_PTR(err);
2264         }
2265         return em;
2266 }
2267
2268 static int btrfs_get_block(struct inode *inode, sector_t iblock,
2269                         struct buffer_head *bh_result, int create)
2270 {
2271         struct extent_map *em;
2272         u64 start = (u64)iblock << inode->i_blkbits;
2273         struct btrfs_multi_bio *multi = NULL;
2274         struct btrfs_root *root = BTRFS_I(inode)->root;
2275         u64 len;
2276         u64 logical;
2277         u64 map_length;
2278         int ret = 0;
2279
2280         em = btrfs_get_extent(inode, NULL, 0, start, bh_result->b_size, 0);
2281
2282         if (!em || IS_ERR(em))
2283                 goto out;
2284
2285         if (em->start > start || em->start + em->len <= start)
2286             goto out;
2287
2288         if (em->block_start == EXTENT_MAP_INLINE) {
2289                 ret = -EINVAL;
2290                 goto out;
2291         }
2292
2293         if (em->block_start == EXTENT_MAP_HOLE ||
2294             em->block_start == EXTENT_MAP_DELALLOC) {
2295                 goto out;
2296         }
2297
2298         len = em->start + em->len - start;
2299         len = min_t(u64, len, INT_LIMIT(typeof(bh_result->b_size)));
2300
2301         logical = start - em->start;
2302         logical = em->block_start + logical;
2303
2304         map_length = len;
2305         ret = btrfs_map_block(&root->fs_info->mapping_tree, READ,
2306                               logical, &map_length, &multi, 0);
2307         BUG_ON(ret);
2308         bh_result->b_blocknr = multi->stripes[0].physical >> inode->i_blkbits;
2309         bh_result->b_size = min(map_length, len);
2310         bh_result->b_bdev = multi->stripes[0].dev->bdev;
2311         set_buffer_mapped(bh_result);
2312         kfree(multi);
2313 out:
2314         free_extent_map(em);
2315         return ret;
2316 }
2317
2318 static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
2319                         const struct iovec *iov, loff_t offset,
2320                         unsigned long nr_segs)
2321 {
2322         struct file *file = iocb->ki_filp;
2323         struct inode *inode = file->f_mapping->host;
2324
2325         if (rw == WRITE)
2326                 return -EINVAL;
2327
2328         return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
2329                                   offset, nr_segs, btrfs_get_block, NULL);
2330 }
2331
2332 static sector_t btrfs_bmap(struct address_space *mapping, sector_t iblock)
2333 {
2334         return extent_bmap(mapping, iblock, btrfs_get_extent);
2335 }
2336
2337 int btrfs_readpage(struct file *file, struct page *page)
2338 {
2339         struct extent_io_tree *tree;
2340         tree = &BTRFS_I(page->mapping->host)->io_tree;
2341         return extent_read_full_page(tree, page, btrfs_get_extent);
2342 }
2343
2344 static int btrfs_writepage(struct page *page, struct writeback_control *wbc)
2345 {
2346         struct extent_io_tree *tree;
2347
2348
2349         if (current->flags & PF_MEMALLOC) {
2350                 redirty_page_for_writepage(wbc, page);
2351                 unlock_page(page);
2352                 return 0;
2353         }
2354         tree = &BTRFS_I(page->mapping->host)->io_tree;
2355         return extent_write_full_page(tree, page, btrfs_get_extent, wbc);
2356 }
2357
2358 static int btrfs_writepages(struct address_space *mapping,
2359                             struct writeback_control *wbc)
2360 {
2361         struct extent_io_tree *tree;
2362         tree = &BTRFS_I(mapping->host)->io_tree;
2363         return extent_writepages(tree, mapping, btrfs_get_extent, wbc);
2364 }
2365
2366 static int
2367 btrfs_readpages(struct file *file, struct address_space *mapping,
2368                 struct list_head *pages, unsigned nr_pages)
2369 {
2370         struct extent_io_tree *tree;
2371         tree = &BTRFS_I(mapping->host)->io_tree;
2372         return extent_readpages(tree, mapping, pages, nr_pages,
2373                                 btrfs_get_extent);
2374 }
2375
2376 static int btrfs_releasepage(struct page *page, gfp_t gfp_flags)
2377 {
2378         struct extent_io_tree *tree;
2379         struct extent_map_tree *map;
2380         int ret;
2381
2382         tree = &BTRFS_I(page->mapping->host)->io_tree;
2383         map = &BTRFS_I(page->mapping->host)->extent_tree;
2384         ret = try_release_extent_mapping(map, tree, page, gfp_flags);
2385         if (ret == 1) {
2386                 ClearPagePrivate(page);
2387                 set_page_private(page, 0);
2388                 page_cache_release(page);
2389         }
2390         return ret;
2391 }
2392
2393 static void btrfs_invalidatepage(struct page *page, unsigned long offset)
2394 {
2395         struct extent_io_tree *tree;
2396
2397         tree = &BTRFS_I(page->mapping->host)->io_tree;
2398         extent_invalidatepage(tree, page, offset);
2399         btrfs_releasepage(page, GFP_NOFS);
2400 }
2401
2402 /*
2403  * btrfs_page_mkwrite() is not allowed to change the file size as it gets
2404  * called from a page fault handler when a page is first dirtied. Hence we must
2405  * be careful to check for EOF conditions here. We set the page up correctly
2406  * for a written page which means we get ENOSPC checking when writing into
2407  * holes and correct delalloc and unwritten extent mapping on filesystems that
2408  * support these features.
2409  *
2410  * We are not allowed to take the i_mutex here so we have to play games to
2411  * protect against truncate races as the page could now be beyond EOF.  Because
2412  * vmtruncate() writes the inode size before removing pages, once we have the
2413  * page lock we can determine safely if the page is beyond EOF. If it is not
2414  * beyond EOF, then the page is guaranteed safe against truncation until we
2415  * unlock the page.
2416  */
2417 int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page)
2418 {
2419         struct inode *inode = fdentry(vma->vm_file)->d_inode;
2420         struct btrfs_root *root = BTRFS_I(inode)->root;
2421         unsigned long end;
2422         loff_t size;
2423         int ret;
2424         u64 page_start;
2425
2426         mutex_lock(&root->fs_info->fs_mutex);
2427         ret = btrfs_check_free_space(root, PAGE_CACHE_SIZE, 0);
2428         mutex_unlock(&root->fs_info->fs_mutex);
2429         if (ret)
2430                 goto out;
2431
2432         ret = -EINVAL;
2433
2434         lock_page(page);
2435         wait_on_page_writeback(page);
2436         size = i_size_read(inode);
2437         page_start = (u64)page->index << PAGE_CACHE_SHIFT;
2438
2439         if ((page->mapping != inode->i_mapping) ||
2440             (page_start > size)) {
2441                 /* page got truncated out from underneath us */
2442                 goto out_unlock;
2443         }
2444
2445         /* page is wholly or partially inside EOF */
2446         if (page_start + PAGE_CACHE_SIZE > size)
2447                 end = size & ~PAGE_CACHE_MASK;
2448         else
2449                 end = PAGE_CACHE_SIZE;
2450
2451         ret = btrfs_cow_one_page(inode, page, end);
2452
2453 out_unlock:
2454         unlock_page(page);
2455 out:
2456         return ret;
2457 }
2458
2459 static void btrfs_truncate(struct inode *inode)
2460 {
2461         struct btrfs_root *root = BTRFS_I(inode)->root;
2462         int ret;
2463         struct btrfs_trans_handle *trans;
2464         unsigned long nr;
2465
2466         if (!S_ISREG(inode->i_mode))
2467                 return;
2468         if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
2469                 return;
2470
2471         btrfs_truncate_page(inode->i_mapping, inode->i_size);
2472
2473         mutex_lock(&root->fs_info->fs_mutex);
2474         trans = btrfs_start_transaction(root, 1);
2475         btrfs_set_trans_block_group(trans, inode);
2476
2477         /* FIXME, add redo link to tree so we don't leak on crash */
2478         ret = btrfs_truncate_in_trans(trans, root, inode,
2479                                       BTRFS_EXTENT_DATA_KEY);
2480         btrfs_update_inode(trans, root, inode);
2481         nr = trans->blocks_used;
2482
2483         ret = btrfs_end_transaction(trans, root);
2484         BUG_ON(ret);
2485         mutex_unlock(&root->fs_info->fs_mutex);
2486         btrfs_btree_balance_dirty(root, nr);
2487         btrfs_throttle(root);
2488 }
2489
2490 static int noinline create_subvol(struct btrfs_root *root, char *name,
2491                                   int namelen)
2492 {
2493         struct btrfs_trans_handle *trans;
2494         struct btrfs_key key;
2495         struct btrfs_root_item root_item;
2496         struct btrfs_inode_item *inode_item;
2497         struct extent_buffer *leaf;
2498         struct btrfs_root *new_root = root;
2499         struct inode *inode;
2500         struct inode *dir;
2501         int ret;
2502         int err;
2503         u64 objectid;
2504         u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID;
2505         unsigned long nr = 1;
2506
2507         mutex_lock(&root->fs_info->fs_mutex);
2508         ret = btrfs_check_free_space(root, 1, 0);
2509         if (ret)
2510                 goto fail_commit;
2511
2512         trans = btrfs_start_transaction(root, 1);
2513         BUG_ON(!trans);
2514
2515         ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root,
2516                                        0, &objectid);
2517         if (ret)
2518                 goto fail;
2519
2520         leaf = __btrfs_alloc_free_block(trans, root, root->leafsize,
2521                                         objectid, trans->transid, 0, 0,
2522                                         0, 0);
2523         if (IS_ERR(leaf))
2524                 return PTR_ERR(leaf);
2525
2526         btrfs_set_header_nritems(leaf, 0);
2527         btrfs_set_header_level(leaf, 0);
2528         btrfs_set_header_bytenr(leaf, leaf->start);
2529         btrfs_set_header_generation(leaf, trans->transid);
2530         btrfs_set_header_owner(leaf, objectid);
2531
2532         write_extent_buffer(leaf, root->fs_info->fsid,
2533                             (unsigned long)btrfs_header_fsid(leaf),
2534                             BTRFS_FSID_SIZE);
2535         btrfs_mark_buffer_dirty(leaf);
2536
2537         inode_item = &root_item.inode;
2538         memset(inode_item, 0, sizeof(*inode_item));
2539         inode_item->generation = cpu_to_le64(1);
2540         inode_item->size = cpu_to_le64(3);
2541         inode_item->nlink = cpu_to_le32(1);
2542         inode_item->nblocks = cpu_to_le64(1);
2543         inode_item->mode = cpu_to_le32(S_IFDIR | 0755);
2544
2545         btrfs_set_root_bytenr(&root_item, leaf->start);
2546         btrfs_set_root_level(&root_item, 0);
2547         btrfs_set_root_refs(&root_item, 1);
2548         btrfs_set_root_used(&root_item, 0);
2549
2550         memset(&root_item.drop_progress, 0, sizeof(root_item.drop_progress));
2551         root_item.drop_level = 0;
2552
2553         free_extent_buffer(leaf);
2554         leaf = NULL;
2555
2556         btrfs_set_root_dirid(&root_item, new_dirid);
2557
2558         key.objectid = objectid;
2559         key.offset = 1;
2560         btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
2561         ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key,
2562                                 &root_item);
2563         if (ret)
2564                 goto fail;
2565
2566         /*
2567          * insert the directory item
2568          */
2569         key.offset = (u64)-1;
2570         dir = root->fs_info->sb->s_root->d_inode;
2571         ret = btrfs_insert_dir_item(trans, root->fs_info->tree_root,
2572                                     name, namelen, dir->i_ino, &key,
2573                                     BTRFS_FT_DIR);
2574         if (ret)
2575                 goto fail;
2576
2577         ret = btrfs_insert_inode_ref(trans, root->fs_info->tree_root,
2578                              name, namelen, objectid,
2579                              root->fs_info->sb->s_root->d_inode->i_ino);
2580         if (ret)
2581                 goto fail;
2582
2583         ret = btrfs_commit_transaction(trans, root);
2584         if (ret)
2585                 goto fail_commit;
2586
2587         new_root = btrfs_read_fs_root(root->fs_info, &key, name, namelen);
2588         BUG_ON(!new_root);
2589
2590         trans = btrfs_start_transaction(new_root, 1);
2591         BUG_ON(!trans);
2592
2593         inode = btrfs_new_inode(trans, new_root, "..", 2, new_dirid,
2594                                 new_dirid,
2595                                 BTRFS_I(dir)->block_group, S_IFDIR | 0700);
2596         if (IS_ERR(inode))
2597                 goto fail;
2598         inode->i_op = &btrfs_dir_inode_operations;
2599         inode->i_fop = &btrfs_dir_file_operations;
2600         new_root->inode = inode;
2601
2602         ret = btrfs_insert_inode_ref(trans, new_root, "..", 2, new_dirid,
2603                                      new_dirid);
2604         inode->i_nlink = 1;
2605         inode->i_size = 0;
2606         ret = btrfs_update_inode(trans, new_root, inode);
2607         if (ret)
2608                 goto fail;
2609 fail:
2610         nr = trans->blocks_used;
2611         err = btrfs_commit_transaction(trans, new_root);
2612         if (err && !ret)
2613                 ret = err;
2614 fail_commit:
2615         mutex_unlock(&root->fs_info->fs_mutex);
2616         btrfs_btree_balance_dirty(root, nr);
2617         btrfs_throttle(root);
2618         return ret;
2619 }
2620
2621 static int create_snapshot(struct btrfs_root *root, char *name, int namelen)
2622 {
2623         struct btrfs_pending_snapshot *pending_snapshot;
2624         struct btrfs_trans_handle *trans;
2625         int ret;
2626         int err;
2627         unsigned long nr = 0;
2628
2629         if (!root->ref_cows)
2630                 return -EINVAL;
2631
2632         mutex_lock(&root->fs_info->fs_mutex);
2633         ret = btrfs_check_free_space(root, 1, 0);
2634         if (ret)
2635                 goto fail_unlock;
2636
2637         pending_snapshot = kmalloc(sizeof(*pending_snapshot), GFP_NOFS);
2638         if (!pending_snapshot) {
2639                 ret = -ENOMEM;
2640                 goto fail_unlock;
2641         }
2642         pending_snapshot->name = kmalloc(namelen + 1, GFP_NOFS);
2643         if (!pending_snapshot->name) {
2644                 ret = -ENOMEM;
2645                 kfree(pending_snapshot);
2646                 goto fail_unlock;
2647         }
2648         memcpy(pending_snapshot->name, name, namelen);
2649         pending_snapshot->name[namelen] = '\0';
2650         trans = btrfs_start_transaction(root, 1);
2651         BUG_ON(!trans);
2652         pending_snapshot->root = root;
2653         list_add(&pending_snapshot->list,
2654                  &trans->transaction->pending_snapshots);
2655         ret = btrfs_update_inode(trans, root, root->inode);
2656         err = btrfs_commit_transaction(trans, root);
2657
2658 fail_unlock:
2659         mutex_unlock(&root->fs_info->fs_mutex);
2660         btrfs_btree_balance_dirty(root, nr);
2661         btrfs_throttle(root);
2662         return ret;
2663 }
2664
2665 unsigned long btrfs_force_ra(struct address_space *mapping,
2666                               struct file_ra_state *ra, struct file *file,
2667                               pgoff_t offset, pgoff_t last_index)
2668 {
2669         pgoff_t req_size;
2670
2671 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23)
2672         req_size = last_index - offset + 1;
2673         offset = page_cache_readahead(mapping, ra, file, offset, req_size);
2674         return offset;
2675 #else
2676         req_size = min(last_index - offset + 1, (pgoff_t)128);
2677         page_cache_sync_readahead(mapping, ra, file, offset, req_size);
2678         return offset + req_size;
2679 #endif
2680 }
2681
2682 int btrfs_defrag_file(struct file *file) {
2683         struct inode *inode = fdentry(file)->d_inode;
2684         struct btrfs_root *root = BTRFS_I(inode)->root;
2685         struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
2686         struct page *page;
2687         unsigned long last_index;
2688         unsigned long ra_index = 0;
2689         u64 page_start;
2690         u64 page_end;
2691         unsigned long i;
2692         int ret;
2693
2694         mutex_lock(&root->fs_info->fs_mutex);
2695         ret = btrfs_check_free_space(root, inode->i_size, 0);
2696         mutex_unlock(&root->fs_info->fs_mutex);
2697         if (ret)
2698                 return -ENOSPC;
2699
2700         mutex_lock(&inode->i_mutex);
2701         last_index = inode->i_size >> PAGE_CACHE_SHIFT;
2702         for (i = 0; i <= last_index; i++) {
2703                 if (i == ra_index) {
2704                         ra_index = btrfs_force_ra(inode->i_mapping,
2705                                                   &file->f_ra,
2706                                                   file, ra_index, last_index);
2707                 }
2708                 page = grab_cache_page(inode->i_mapping, i);
2709                 if (!page)
2710                         goto out_unlock;
2711                 if (!PageUptodate(page)) {
2712                         btrfs_readpage(NULL, page);
2713                         lock_page(page);
2714                         if (!PageUptodate(page)) {
2715                                 unlock_page(page);
2716                                 page_cache_release(page);
2717                                 goto out_unlock;
2718                         }
2719                 }
2720                 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
2721                 page_end = page_start + PAGE_CACHE_SIZE - 1;
2722
2723                 lock_extent(io_tree, page_start, page_end, GFP_NOFS);
2724                 set_extent_delalloc(io_tree, page_start,
2725                                     page_end, GFP_NOFS);
2726
2727                 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
2728                 set_page_dirty(page);
2729                 unlock_page(page);
2730                 page_cache_release(page);
2731                 balance_dirty_pages_ratelimited_nr(inode->i_mapping, 1);
2732         }
2733
2734 out_unlock:
2735         mutex_unlock(&inode->i_mutex);
2736         return 0;
2737 }
2738
2739 static int btrfs_ioctl_resize(struct btrfs_root *root, void __user *arg)
2740 {
2741         u64 new_size;
2742         u64 old_size;
2743         struct btrfs_ioctl_vol_args *vol_args;
2744         struct btrfs_trans_handle *trans;
2745         char *sizestr;
2746         int ret = 0;
2747         int namelen;
2748         int mod = 0;
2749
2750         vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS);
2751
2752         if (!vol_args)
2753                 return -ENOMEM;
2754
2755         if (copy_from_user(vol_args, arg, sizeof(*vol_args))) {
2756                 ret = -EFAULT;
2757                 goto out;
2758         }
2759         namelen = strlen(vol_args->name);
2760         if (namelen > BTRFS_VOL_NAME_MAX) {
2761                 ret = -EINVAL;
2762                 goto out;
2763         }
2764
2765         sizestr = vol_args->name;
2766         if (!strcmp(sizestr, "max"))
2767                 new_size = root->fs_info->sb->s_bdev->bd_inode->i_size;
2768         else {
2769                 if (sizestr[0] == '-') {
2770                         mod = -1;
2771                         sizestr++;
2772                 } else if (sizestr[0] == '+') {
2773                         mod = 1;
2774                         sizestr++;
2775                 }
2776                 new_size = btrfs_parse_size(sizestr);
2777                 if (new_size == 0) {
2778                         ret = -EINVAL;
2779                         goto out;
2780                 }
2781         }
2782
2783         mutex_lock(&root->fs_info->fs_mutex);
2784         old_size = btrfs_super_total_bytes(&root->fs_info->super_copy);
2785
2786         if (mod < 0) {
2787                 if (new_size > old_size) {
2788                         ret = -EINVAL;
2789                         goto out_unlock;
2790                 }
2791                 new_size = old_size - new_size;
2792         } else if (mod > 0) {
2793                 new_size = old_size + new_size;
2794         }
2795
2796         if (new_size < 256 * 1024 * 1024) {
2797                 ret = -EINVAL;
2798                 goto out_unlock;
2799         }
2800         if (new_size > root->fs_info->sb->s_bdev->bd_inode->i_size) {
2801                 ret = -EFBIG;
2802                 goto out_unlock;
2803         }
2804
2805         do_div(new_size, root->sectorsize);
2806         new_size *= root->sectorsize;
2807
2808 printk("new size is %Lu\n", new_size);
2809         if (new_size > old_size) {
2810                 trans = btrfs_start_transaction(root, 1);
2811                 ret = btrfs_grow_extent_tree(trans, root, new_size);
2812                 btrfs_commit_transaction(trans, root);
2813         } else {
2814                 ret = btrfs_shrink_extent_tree(root, new_size);
2815         }
2816
2817 out_unlock:
2818         mutex_unlock(&root->fs_info->fs_mutex);
2819 out:
2820         kfree(vol_args);
2821         return ret;
2822 }
2823
2824 static int noinline btrfs_ioctl_snap_create(struct btrfs_root *root,
2825                                             void __user *arg)
2826 {
2827         struct btrfs_ioctl_vol_args *vol_args;
2828         struct btrfs_dir_item *di;
2829         struct btrfs_path *path;
2830         u64 root_dirid;
2831         int namelen;
2832         int ret;
2833
2834         vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS);
2835
2836         if (!vol_args)
2837                 return -ENOMEM;
2838
2839         if (copy_from_user(vol_args, arg, sizeof(*vol_args))) {
2840                 ret = -EFAULT;
2841                 goto out;
2842         }
2843
2844         namelen = strlen(vol_args->name);
2845         if (namelen > BTRFS_VOL_NAME_MAX) {
2846                 ret = -EINVAL;
2847                 goto out;
2848         }
2849         if (strchr(vol_args->name, '/')) {
2850                 ret = -EINVAL;
2851                 goto out;
2852         }
2853
2854         path = btrfs_alloc_path();
2855         if (!path) {
2856                 ret = -ENOMEM;
2857                 goto out;
2858         }
2859
2860         root_dirid = root->fs_info->sb->s_root->d_inode->i_ino,
2861         mutex_lock(&root->fs_info->fs_mutex);
2862         di = btrfs_lookup_dir_item(NULL, root->fs_info->tree_root,
2863                             path, root_dirid,
2864                             vol_args->name, namelen, 0);
2865         mutex_unlock(&root->fs_info->fs_mutex);
2866         btrfs_free_path(path);
2867
2868         if (di && !IS_ERR(di)) {
2869                 ret = -EEXIST;
2870                 goto out;
2871         }
2872
2873         if (IS_ERR(di)) {
2874                 ret = PTR_ERR(di);
2875                 goto out;
2876         }
2877
2878         if (root == root->fs_info->tree_root)
2879                 ret = create_subvol(root, vol_args->name, namelen);
2880         else
2881                 ret = create_snapshot(root, vol_args->name, namelen);
2882 out:
2883         kfree(vol_args);
2884         return ret;
2885 }
2886
2887 static int btrfs_ioctl_defrag(struct file *file)
2888 {
2889         struct inode *inode = fdentry(file)->d_inode;
2890         struct btrfs_root *root = BTRFS_I(inode)->root;
2891
2892         switch (inode->i_mode & S_IFMT) {
2893         case S_IFDIR:
2894                 mutex_lock(&root->fs_info->fs_mutex);
2895                 btrfs_defrag_root(root, 0);
2896                 btrfs_defrag_root(root->fs_info->extent_root, 0);
2897                 mutex_unlock(&root->fs_info->fs_mutex);
2898                 break;
2899         case S_IFREG:
2900                 btrfs_defrag_file(file);
2901                 break;
2902         }
2903
2904         return 0;
2905 }
2906
2907 long btrfs_ioctl(struct file *file, unsigned int
2908                 cmd, unsigned long arg)
2909 {
2910         struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
2911
2912         switch (cmd) {
2913         case BTRFS_IOC_SNAP_CREATE:
2914                 return btrfs_ioctl_snap_create(root, (void __user *)arg);
2915         case BTRFS_IOC_DEFRAG:
2916                 return btrfs_ioctl_defrag(file);
2917         case BTRFS_IOC_RESIZE:
2918                 return btrfs_ioctl_resize(root, (void __user *)arg);
2919         }
2920
2921         return -ENOTTY;
2922 }
2923
2924 /*
2925  * Called inside transaction, so use GFP_NOFS
2926  */
2927 struct inode *btrfs_alloc_inode(struct super_block *sb)
2928 {
2929         struct btrfs_inode *ei;
2930
2931         ei = kmem_cache_alloc(btrfs_inode_cachep, GFP_NOFS);
2932         if (!ei)
2933                 return NULL;
2934         ei->last_trans = 0;
2935         ei->ordered_trans = 0;
2936         return &ei->vfs_inode;
2937 }
2938
2939 void btrfs_destroy_inode(struct inode *inode)
2940 {
2941         WARN_ON(!list_empty(&inode->i_dentry));
2942         WARN_ON(inode->i_data.nrpages);
2943
2944         btrfs_drop_extent_cache(inode, 0, (u64)-1);
2945         kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
2946 }
2947
2948 #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23)
2949 static void init_once(struct kmem_cache * cachep, void *foo)
2950 #else
2951 static void init_once(void * foo, struct kmem_cache * cachep,
2952                       unsigned long flags)
2953 #endif
2954 {
2955         struct btrfs_inode *ei = (struct btrfs_inode *) foo;
2956
2957         inode_init_once(&ei->vfs_inode);
2958 }
2959
2960 void btrfs_destroy_cachep(void)
2961 {
2962         if (btrfs_inode_cachep)
2963                 kmem_cache_destroy(btrfs_inode_cachep);
2964         if (btrfs_trans_handle_cachep)
2965                 kmem_cache_destroy(btrfs_trans_handle_cachep);
2966         if (btrfs_transaction_cachep)
2967                 kmem_cache_destroy(btrfs_transaction_cachep);
2968         if (btrfs_bit_radix_cachep)
2969                 kmem_cache_destroy(btrfs_bit_radix_cachep);
2970         if (btrfs_path_cachep)
2971                 kmem_cache_destroy(btrfs_path_cachep);
2972 }
2973
2974 struct kmem_cache *btrfs_cache_create(const char *name, size_t size,
2975                                        unsigned long extra_flags,
2976 #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23)
2977                                        void (*ctor)(struct kmem_cache *, void *)
2978 #else
2979                                        void (*ctor)(void *, struct kmem_cache *,
2980                                                     unsigned long)
2981 #endif
2982                                      )
2983 {
2984         return kmem_cache_create(name, size, 0, (SLAB_RECLAIM_ACCOUNT |
2985                                  SLAB_MEM_SPREAD | extra_flags), ctor
2986 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23)
2987                                  ,NULL
2988 #endif
2989                                 );
2990 }
2991
2992 int btrfs_init_cachep(void)
2993 {
2994         btrfs_inode_cachep = btrfs_cache_create("btrfs_inode_cache",
2995                                           sizeof(struct btrfs_inode),
2996                                           0, init_once);
2997         if (!btrfs_inode_cachep)
2998                 goto fail;
2999         btrfs_trans_handle_cachep =
3000                         btrfs_cache_create("btrfs_trans_handle_cache",
3001                                            sizeof(struct btrfs_trans_handle),
3002                                            0, NULL);
3003         if (!btrfs_trans_handle_cachep)
3004                 goto fail;
3005         btrfs_transaction_cachep = btrfs_cache_create("btrfs_transaction_cache",
3006                                              sizeof(struct btrfs_transaction),
3007                                              0, NULL);
3008         if (!btrfs_transaction_cachep)
3009                 goto fail;
3010         btrfs_path_cachep = btrfs_cache_create("btrfs_path_cache",
3011                                          sizeof(struct btrfs_path),
3012                                          0, NULL);
3013         if (!btrfs_path_cachep)
3014                 goto fail;
3015         btrfs_bit_radix_cachep = btrfs_cache_create("btrfs_radix", 256,
3016                                               SLAB_DESTROY_BY_RCU, NULL);
3017         if (!btrfs_bit_radix_cachep)
3018                 goto fail;
3019         return 0;
3020 fail:
3021         btrfs_destroy_cachep();
3022         return -ENOMEM;
3023 }
3024
3025 static int btrfs_getattr(struct vfsmount *mnt,
3026                          struct dentry *dentry, struct kstat *stat)
3027 {
3028         struct inode *inode = dentry->d_inode;
3029         generic_fillattr(inode, stat);
3030         stat->blksize = PAGE_CACHE_SIZE;
3031         stat->blocks = inode->i_blocks + (BTRFS_I(inode)->delalloc_bytes >> 9);
3032         return 0;
3033 }
3034
3035 static int btrfs_rename(struct inode * old_dir, struct dentry *old_dentry,
3036                            struct inode * new_dir,struct dentry *new_dentry)
3037 {
3038         struct btrfs_trans_handle *trans;
3039         struct btrfs_root *root = BTRFS_I(old_dir)->root;
3040         struct inode *new_inode = new_dentry->d_inode;
3041         struct inode *old_inode = old_dentry->d_inode;
3042         struct timespec ctime = CURRENT_TIME;
3043         struct btrfs_path *path;
3044         int ret;
3045
3046         if (S_ISDIR(old_inode->i_mode) && new_inode &&
3047             new_inode->i_size > BTRFS_EMPTY_DIR_SIZE) {
3048                 return -ENOTEMPTY;
3049         }
3050
3051         mutex_lock(&root->fs_info->fs_mutex);
3052         ret = btrfs_check_free_space(root, 1, 0);
3053         if (ret)
3054                 goto out_unlock;
3055
3056         trans = btrfs_start_transaction(root, 1);
3057
3058         btrfs_set_trans_block_group(trans, new_dir);
3059         path = btrfs_alloc_path();
3060         if (!path) {
3061                 ret = -ENOMEM;
3062                 goto out_fail;
3063         }
3064
3065         old_dentry->d_inode->i_nlink++;
3066         old_dir->i_ctime = old_dir->i_mtime = ctime;
3067         new_dir->i_ctime = new_dir->i_mtime = ctime;
3068         old_inode->i_ctime = ctime;
3069
3070         ret = btrfs_unlink_trans(trans, root, old_dir, old_dentry);
3071         if (ret)
3072                 goto out_fail;
3073
3074         if (new_inode) {
3075                 new_inode->i_ctime = CURRENT_TIME;
3076                 ret = btrfs_unlink_trans(trans, root, new_dir, new_dentry);
3077                 if (ret)
3078                         goto out_fail;
3079         }
3080         ret = btrfs_add_link(trans, new_dentry, old_inode, 1);
3081         if (ret)
3082                 goto out_fail;
3083
3084 out_fail:
3085         btrfs_free_path(path);
3086         btrfs_end_transaction(trans, root);
3087 out_unlock:
3088         mutex_unlock(&root->fs_info->fs_mutex);
3089         return ret;
3090 }
3091
3092 static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
3093                          const char *symname)
3094 {
3095         struct btrfs_trans_handle *trans;
3096         struct btrfs_root *root = BTRFS_I(dir)->root;
3097         struct btrfs_path *path;
3098         struct btrfs_key key;
3099         struct inode *inode = NULL;
3100         int err;
3101         int drop_inode = 0;
3102         u64 objectid;
3103         int name_len;
3104         int datasize;
3105         unsigned long ptr;
3106         struct btrfs_file_extent_item *ei;
3107         struct extent_buffer *leaf;
3108         unsigned long nr = 0;
3109
3110         name_len = strlen(symname) + 1;
3111         if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root))
3112                 return -ENAMETOOLONG;
3113
3114         mutex_lock(&root->fs_info->fs_mutex);
3115         err = btrfs_check_free_space(root, 1, 0);
3116         if (err)
3117                 goto out_fail;
3118
3119         trans = btrfs_start_transaction(root, 1);
3120         btrfs_set_trans_block_group(trans, dir);
3121
3122         err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
3123         if (err) {
3124                 err = -ENOSPC;
3125                 goto out_unlock;
3126         }
3127
3128         inode = btrfs_new_inode(trans, root, dentry->d_name.name,
3129                                 dentry->d_name.len,
3130                                 dentry->d_parent->d_inode->i_ino, objectid,
3131                                 BTRFS_I(dir)->block_group, S_IFLNK|S_IRWXUGO);
3132         err = PTR_ERR(inode);
3133         if (IS_ERR(inode))
3134                 goto out_unlock;
3135
3136         btrfs_set_trans_block_group(trans, inode);
3137         err = btrfs_add_nondir(trans, dentry, inode, 0);
3138         if (err)
3139                 drop_inode = 1;
3140         else {
3141                 inode->i_mapping->a_ops = &btrfs_aops;
3142                 inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
3143                 inode->i_fop = &btrfs_file_operations;
3144                 inode->i_op = &btrfs_file_inode_operations;
3145                 extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS);
3146                 extent_io_tree_init(&BTRFS_I(inode)->io_tree,
3147                                      inode->i_mapping, GFP_NOFS);
3148                 extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree,
3149                                      inode->i_mapping, GFP_NOFS);
3150                 BTRFS_I(inode)->delalloc_bytes = 0;
3151                 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
3152         }
3153         dir->i_sb->s_dirt = 1;
3154         btrfs_update_inode_block_group(trans, inode);
3155         btrfs_update_inode_block_group(trans, dir);
3156         if (drop_inode)
3157                 goto out_unlock;
3158
3159         path = btrfs_alloc_path();
3160         BUG_ON(!path);
3161         key.objectid = inode->i_ino;
3162         key.offset = 0;
3163         btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY);
3164         datasize = btrfs_file_extent_calc_inline_size(name_len);
3165         err = btrfs_insert_empty_item(trans, root, path, &key,
3166                                       datasize);
3167         if (err) {
3168                 drop_inode = 1;
3169                 goto out_unlock;
3170         }
3171         leaf = path->nodes[0];
3172         ei = btrfs_item_ptr(leaf, path->slots[0],
3173                             struct btrfs_file_extent_item);
3174         btrfs_set_file_extent_generation(leaf, ei, trans->transid);
3175         btrfs_set_file_extent_type(leaf, ei,
3176                                    BTRFS_FILE_EXTENT_INLINE);
3177         ptr = btrfs_file_extent_inline_start(ei);
3178         write_extent_buffer(leaf, symname, ptr, name_len);
3179         btrfs_mark_buffer_dirty(leaf);
3180         btrfs_free_path(path);
3181
3182         inode->i_op = &btrfs_symlink_inode_operations;
3183         inode->i_mapping->a_ops = &btrfs_symlink_aops;
3184         inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
3185         inode->i_size = name_len - 1;
3186         err = btrfs_update_inode(trans, root, inode);
3187         if (err)
3188                 drop_inode = 1;
3189
3190 out_unlock:
3191         nr = trans->blocks_used;
3192         btrfs_end_transaction(trans, root);
3193 out_fail:
3194         mutex_unlock(&root->fs_info->fs_mutex);
3195         if (drop_inode) {
3196                 inode_dec_link_count(inode);
3197                 iput(inode);
3198         }
3199         btrfs_btree_balance_dirty(root, nr);
3200         btrfs_throttle(root);
3201         return err;
3202 }
3203
3204 static int btrfs_permission(struct inode *inode, int mask,
3205                             struct nameidata *nd)
3206 {
3207         if (btrfs_test_flag(inode, READONLY) && (mask & MAY_WRITE))
3208                 return -EACCES;
3209         return generic_permission(inode, mask, NULL);
3210 }
3211
3212 static struct inode_operations btrfs_dir_inode_operations = {
3213         .lookup         = btrfs_lookup,
3214         .create         = btrfs_create,
3215         .unlink         = btrfs_unlink,
3216         .link           = btrfs_link,
3217         .mkdir          = btrfs_mkdir,
3218         .rmdir          = btrfs_rmdir,
3219         .rename         = btrfs_rename,
3220         .symlink        = btrfs_symlink,
3221         .setattr        = btrfs_setattr,
3222         .mknod          = btrfs_mknod,
3223         .setxattr       = generic_setxattr,
3224         .getxattr       = generic_getxattr,
3225         .listxattr      = btrfs_listxattr,
3226         .removexattr    = generic_removexattr,
3227         .permission     = btrfs_permission,
3228 };
3229 static struct inode_operations btrfs_dir_ro_inode_operations = {
3230         .lookup         = btrfs_lookup,
3231         .permission     = btrfs_permission,
3232 };
3233 static struct file_operations btrfs_dir_file_operations = {
3234         .llseek         = generic_file_llseek,
3235         .read           = generic_read_dir,
3236         .readdir        = btrfs_readdir,
3237         .unlocked_ioctl = btrfs_ioctl,
3238 #ifdef CONFIG_COMPAT
3239         .compat_ioctl   = btrfs_ioctl,
3240 #endif
3241 };
3242
3243 static struct extent_io_ops btrfs_extent_io_ops = {
3244         .fill_delalloc = run_delalloc_range,
3245         .submit_bio_hook = btrfs_submit_bio_hook,
3246         .merge_bio_hook = btrfs_merge_bio_hook,
3247         .readpage_io_hook = btrfs_readpage_io_hook,
3248         .readpage_end_io_hook = btrfs_readpage_end_io_hook,
3249         .readpage_io_failed_hook = btrfs_readpage_io_failed_hook,
3250         .set_bit_hook = btrfs_set_bit_hook,
3251         .clear_bit_hook = btrfs_clear_bit_hook,
3252 };
3253
3254 static struct address_space_operations btrfs_aops = {
3255         .readpage       = btrfs_readpage,
3256         .writepage      = btrfs_writepage,
3257         .writepages     = btrfs_writepages,
3258         .readpages      = btrfs_readpages,
3259         .sync_page      = block_sync_page,
3260         .bmap           = btrfs_bmap,
3261         .direct_IO      = btrfs_direct_IO,
3262         .invalidatepage = btrfs_invalidatepage,
3263         .releasepage    = btrfs_releasepage,
3264         .set_page_dirty = __set_page_dirty_nobuffers,
3265 };
3266
3267 static struct address_space_operations btrfs_symlink_aops = {
3268         .readpage       = btrfs_readpage,
3269         .writepage      = btrfs_writepage,
3270         .invalidatepage = btrfs_invalidatepage,
3271         .releasepage    = btrfs_releasepage,
3272 };
3273
3274 static struct inode_operations btrfs_file_inode_operations = {
3275         .truncate       = btrfs_truncate,
3276         .getattr        = btrfs_getattr,
3277         .setattr        = btrfs_setattr,
3278         .setxattr       = generic_setxattr,
3279         .getxattr       = generic_getxattr,
3280         .listxattr      = btrfs_listxattr,
3281         .removexattr    = generic_removexattr,
3282         .permission     = btrfs_permission,
3283 };
3284 static struct inode_operations btrfs_special_inode_operations = {
3285         .getattr        = btrfs_getattr,
3286         .setattr        = btrfs_setattr,
3287         .permission     = btrfs_permission,
3288 };
3289 static struct inode_operations btrfs_symlink_inode_operations = {
3290         .readlink       = generic_readlink,
3291         .follow_link    = page_follow_link_light,
3292         .put_link       = page_put_link,
3293         .permission     = btrfs_permission,
3294 };