Btrfs: Add readahead to the online shrinker, and a mount -o alloc_start= for testing
[safe/jmp/linux-2.6] / fs / btrfs / super.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <linux/blkdev.h>
20 #include <linux/module.h>
21 #include <linux/buffer_head.h>
22 #include <linux/fs.h>
23 #include <linux/pagemap.h>
24 #include <linux/highmem.h>
25 #include <linux/time.h>
26 #include <linux/init.h>
27 #include <linux/string.h>
28 #include <linux/smp_lock.h>
29 #include <linux/backing-dev.h>
30 #include <linux/mount.h>
31 #include <linux/mpage.h>
32 #include <linux/swap.h>
33 #include <linux/writeback.h>
34 #include <linux/statfs.h>
35 #include <linux/compat.h>
36 #include <linux/parser.h>
37 #include <linux/ctype.h>
38 #include <linux/namei.h>
39 #include "ctree.h"
40 #include "disk-io.h"
41 #include "transaction.h"
42 #include "btrfs_inode.h"
43 #include "ioctl.h"
44 #include "print-tree.h"
45 #include "xattr.h"
46
47 #define BTRFS_SUPER_MAGIC 0x9123683E
48
49 static struct super_operations btrfs_super_ops;
50
51 static void btrfs_put_super (struct super_block * sb)
52 {
53         struct btrfs_root *root = btrfs_sb(sb);
54         struct btrfs_fs_info *fs = root->fs_info;
55         int ret;
56
57         ret = close_ctree(root);
58         if (ret) {
59                 printk("close ctree returns %d\n", ret);
60         }
61         btrfs_sysfs_del_super(fs);
62         sb->s_fs_info = NULL;
63 }
64
65 enum {
66         Opt_subvol, Opt_nodatasum, Opt_nodatacow, Opt_max_extent,
67         Opt_alloc_start, Opt_err,
68 };
69
70 static match_table_t tokens = {
71         {Opt_subvol, "subvol=%s"},
72         {Opt_nodatasum, "nodatasum"},
73         {Opt_nodatacow, "nodatacow"},
74         {Opt_max_extent, "max_extent=%s"},
75         {Opt_alloc_start, "alloc_start=%s"},
76         {Opt_err, NULL}
77 };
78
79 u64 btrfs_parse_size(char *str)
80 {
81         u64 res;
82         int mult = 1;
83         char *end;
84         char last;
85
86         res = simple_strtoul(str, &end, 10);
87
88         last = end[0];
89         if (isalpha(last)) {
90                 last = tolower(last);
91                 switch (last) {
92                 case 'g':
93                         mult *= 1024;
94                 case 'm':
95                         mult *= 1024;
96                 case 'k':
97                         mult *= 1024;
98                 }
99                 res = res * mult;
100         }
101         return res;
102 }
103
104 static int parse_options (char * options,
105                           struct btrfs_root *root,
106                           char **subvol_name)
107 {
108         char * p;
109         struct btrfs_fs_info *info = NULL;
110         substring_t args[MAX_OPT_ARGS];
111
112         if (!options)
113                 return 1;
114
115         /*
116          * strsep changes the string, duplicate it because parse_options
117          * gets called twice
118          */
119         options = kstrdup(options, GFP_NOFS);
120         if (!options)
121                 return -ENOMEM;
122
123         if (root)
124                 info = root->fs_info;
125
126         while ((p = strsep (&options, ",")) != NULL) {
127                 int token;
128                 if (!*p)
129                         continue;
130
131                 token = match_token(p, tokens, args);
132                 switch (token) {
133                 case Opt_subvol:
134                         if (subvol_name) {
135                                 *subvol_name = match_strdup(&args[0]);
136                         }
137                         break;
138                 case Opt_nodatasum:
139                         if (info) {
140                                 printk("btrfs: setting nodatacsum\n");
141                                 btrfs_set_opt(info->mount_opt, NODATASUM);
142                         }
143                         break;
144                 case Opt_nodatacow:
145                         if (info) {
146                                 printk("btrfs: setting nodatacow\n");
147                                 btrfs_set_opt(info->mount_opt, NODATACOW);
148                                 btrfs_set_opt(info->mount_opt, NODATASUM);
149                         }
150                         break;
151                 case Opt_max_extent:
152                         if (info) {
153                                 char *num = match_strdup(&args[0]);
154                                 if (num) {
155                                         info->max_extent =
156                                                 btrfs_parse_size(num);
157                                         kfree(num);
158
159                                         info->max_extent = max_t(u64,
160                                                          info->max_extent,
161                                                          root->sectorsize);
162                                         printk("btrfs: max_extent at %Lu\n",
163                                                info->max_extent);
164                                 }
165                         }
166                         break;
167                 case Opt_alloc_start:
168                         if (info) {
169                                 char *num = match_strdup(&args[0]);
170                                 if (num) {
171                                         info->alloc_start =
172                                                 btrfs_parse_size(num);
173                                         kfree(num);
174                                         printk("btrfs: allocations start at "
175                                                "%Lu\n", info->alloc_start);
176                                 }
177                         }
178                         break;
179                 default:
180                         break;
181                 }
182         }
183         kfree(options);
184         return 1;
185 }
186
187 static int btrfs_fill_super(struct super_block * sb, void * data, int silent)
188 {
189         struct inode * inode;
190         struct dentry * root_dentry;
191         struct btrfs_super_block *disk_super;
192         struct btrfs_root *tree_root;
193         struct btrfs_inode *bi;
194         int err;
195
196         sb->s_maxbytes = MAX_LFS_FILESIZE;
197         sb->s_magic = BTRFS_SUPER_MAGIC;
198         sb->s_op = &btrfs_super_ops;
199         sb->s_xattr = btrfs_xattr_handlers;
200         sb->s_time_gran = 1;
201
202         tree_root = open_ctree(sb);
203
204         if (!tree_root || IS_ERR(tree_root)) {
205                 printk("btrfs: open_ctree failed\n");
206                 return -EIO;
207         }
208         sb->s_fs_info = tree_root;
209         disk_super = &tree_root->fs_info->super_copy;
210         inode = btrfs_iget_locked(sb, btrfs_super_root_dir(disk_super),
211                                   tree_root);
212         bi = BTRFS_I(inode);
213         bi->location.objectid = inode->i_ino;
214         bi->location.offset = 0;
215         bi->root = tree_root;
216
217         btrfs_set_key_type(&bi->location, BTRFS_INODE_ITEM_KEY);
218
219         if (!inode) {
220                 err = -ENOMEM;
221                 goto fail_close;
222         }
223         if (inode->i_state & I_NEW) {
224                 btrfs_read_locked_inode(inode);
225                 unlock_new_inode(inode);
226         }
227
228         root_dentry = d_alloc_root(inode);
229         if (!root_dentry) {
230                 iput(inode);
231                 err = -ENOMEM;
232                 goto fail_close;
233         }
234
235         parse_options((char *)data, tree_root, NULL);
236
237         /* this does the super kobj at the same time */
238         err = btrfs_sysfs_add_super(tree_root->fs_info);
239         if (err)
240                 goto fail_close;
241
242         sb->s_root = root_dentry;
243         btrfs_transaction_queue_work(tree_root, HZ * 30);
244         return 0;
245
246 fail_close:
247         close_ctree(tree_root);
248         return err;
249 }
250
251 static int btrfs_sync_fs(struct super_block *sb, int wait)
252 {
253         struct btrfs_trans_handle *trans;
254         struct btrfs_root *root;
255         int ret;
256         root = btrfs_sb(sb);
257
258         sb->s_dirt = 0;
259         if (!wait) {
260                 filemap_flush(root->fs_info->btree_inode->i_mapping);
261                 return 0;
262         }
263         btrfs_clean_old_snapshots(root);
264         mutex_lock(&root->fs_info->fs_mutex);
265         btrfs_defrag_dirty_roots(root->fs_info);
266         trans = btrfs_start_transaction(root, 1);
267         ret = btrfs_commit_transaction(trans, root);
268         sb->s_dirt = 0;
269         mutex_unlock(&root->fs_info->fs_mutex);
270         return ret;
271 }
272
273 static void btrfs_write_super(struct super_block *sb)
274 {
275         sb->s_dirt = 0;
276 }
277
278 /*
279  * This is almost a copy of get_sb_bdev in fs/super.c.
280  * We need the local copy to allow direct mounting of
281  * subvolumes, but this could be easily integrated back
282  * into the generic version.  --hch
283  */
284
285 /* start copy & paste */
286 static int set_bdev_super(struct super_block *s, void *data)
287 {
288         s->s_bdev = data;
289         s->s_dev = s->s_bdev->bd_dev;
290         return 0;
291 }
292
293 static int test_bdev_super(struct super_block *s, void *data)
294 {
295         return (void *)s->s_bdev == data;
296 }
297
298 int btrfs_get_sb_bdev(struct file_system_type *fs_type,
299         int flags, const char *dev_name, void *data,
300         int (*fill_super)(struct super_block *, void *, int),
301         struct vfsmount *mnt, const char *subvol)
302 {
303         struct block_device *bdev = NULL;
304         struct super_block *s;
305         struct dentry *root;
306         int error = 0;
307
308         bdev = open_bdev_excl(dev_name, flags, fs_type);
309         if (IS_ERR(bdev))
310                 return PTR_ERR(bdev);
311
312         /*
313          * once the super is inserted into the list by sget, s_umount
314          * will protect the lockfs code from trying to start a snapshot
315          * while we are mounting
316          */
317         down(&bdev->bd_mount_sem);
318         s = sget(fs_type, test_bdev_super, set_bdev_super, bdev);
319         up(&bdev->bd_mount_sem);
320         if (IS_ERR(s))
321                 goto error_s;
322
323         if (s->s_root) {
324                 if ((flags ^ s->s_flags) & MS_RDONLY) {
325                         up_write(&s->s_umount);
326                         deactivate_super(s);
327                         error = -EBUSY;
328                         goto error_bdev;
329                 }
330
331                 close_bdev_excl(bdev);
332         } else {
333                 char b[BDEVNAME_SIZE];
334
335                 s->s_flags = flags;
336                 strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id));
337                 sb_set_blocksize(s, block_size(bdev));
338                 error = fill_super(s, data, flags & MS_SILENT ? 1 : 0);
339                 if (error) {
340                         up_write(&s->s_umount);
341                         deactivate_super(s);
342                         goto error;
343                 }
344
345                 s->s_flags |= MS_ACTIVE;
346         }
347
348         if (subvol) {
349                 root = lookup_one_len(subvol, s->s_root, strlen(subvol));
350                 if (IS_ERR(root)) {
351                         up_write(&s->s_umount);
352                         deactivate_super(s);
353                         error = PTR_ERR(root);
354                         goto error;
355                 }
356                 if (!root->d_inode) {
357                         dput(root);
358                         up_write(&s->s_umount);
359                         deactivate_super(s);
360                         error = -ENXIO;
361                         goto error;
362                 }
363         } else {
364                 root = dget(s->s_root);
365         }
366
367         mnt->mnt_sb = s;
368         mnt->mnt_root = root;
369         return 0;
370
371 error_s:
372         error = PTR_ERR(s);
373 error_bdev:
374         close_bdev_excl(bdev);
375 error:
376         return error;
377 }
378 /* end copy & paste */
379
380 static int btrfs_get_sb(struct file_system_type *fs_type,
381         int flags, const char *dev_name, void *data, struct vfsmount *mnt)
382 {
383         int ret;
384         char *subvol_name = NULL;
385
386         parse_options((char *)data, NULL, &subvol_name);
387         ret = btrfs_get_sb_bdev(fs_type, flags, dev_name, data,
388                         btrfs_fill_super, mnt,
389                         subvol_name ? subvol_name : "default");
390         if (subvol_name)
391                 kfree(subvol_name);
392         return ret;
393 }
394
395 static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
396 {
397         struct btrfs_root *root = btrfs_sb(dentry->d_sb);
398         struct btrfs_super_block *disk_super = &root->fs_info->super_copy;
399         int bits = dentry->d_sb->s_blocksize_bits;
400
401         buf->f_namelen = BTRFS_NAME_LEN;
402         buf->f_blocks = btrfs_super_total_bytes(disk_super) >> bits;
403         buf->f_bfree = buf->f_blocks -
404                 (btrfs_super_bytes_used(disk_super) >> bits);
405         buf->f_bavail = buf->f_bfree;
406         buf->f_bsize = dentry->d_sb->s_blocksize;
407         buf->f_type = BTRFS_SUPER_MAGIC;
408         return 0;
409 }
410
411 static struct file_system_type btrfs_fs_type = {
412         .owner          = THIS_MODULE,
413         .name           = "btrfs",
414         .get_sb         = btrfs_get_sb,
415         .kill_sb        = kill_block_super,
416         .fs_flags       = FS_REQUIRES_DEV,
417 };
418
419 static struct super_operations btrfs_super_ops = {
420         .delete_inode   = btrfs_delete_inode,
421         .put_super      = btrfs_put_super,
422         .read_inode     = btrfs_read_locked_inode,
423         .write_super    = btrfs_write_super,
424         .sync_fs        = btrfs_sync_fs,
425         .write_inode    = btrfs_write_inode,
426         .dirty_inode    = btrfs_dirty_inode,
427         .alloc_inode    = btrfs_alloc_inode,
428         .destroy_inode  = btrfs_destroy_inode,
429         .statfs         = btrfs_statfs,
430 };
431
432 static int __init init_btrfs_fs(void)
433 {
434         int err;
435
436         err = btrfs_init_sysfs();
437         if (err)
438                 return err;
439
440         btrfs_init_transaction_sys();
441         err = btrfs_init_cachep();
442         if (err)
443                 goto free_transaction_sys;
444         err = extent_map_init();
445         if (err)
446                 goto free_cachep;
447
448         err = register_filesystem(&btrfs_fs_type);
449         if (err)
450                 goto free_extent_map;
451         return 0;
452
453 free_extent_map:
454         extent_map_exit();
455 free_cachep:
456         btrfs_destroy_cachep();
457 free_transaction_sys:
458         btrfs_exit_transaction_sys();
459         btrfs_exit_sysfs();
460         return err;
461 }
462
463 static void __exit exit_btrfs_fs(void)
464 {
465         btrfs_exit_transaction_sys();
466         btrfs_destroy_cachep();
467         extent_map_exit();
468         unregister_filesystem(&btrfs_fs_type);
469         btrfs_exit_sysfs();
470 }
471
472 module_init(init_btrfs_fs)
473 module_exit(exit_btrfs_fs)
474
475 MODULE_LICENSE("GPL");